1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * Sync each mounted filesystem. 110 */ 111 #ifndef _SYS_SYSPROTO_H_ 112 struct sync_args { 113 int dummy; 114 }; 115 #endif 116 /* ARGSUSED */ 117 int 118 sys_sync(td, uap) 119 struct thread *td; 120 struct sync_args *uap; 121 { 122 struct mount *mp, *nmp; 123 int save; 124 125 mtx_lock(&mountlist_mtx); 126 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 127 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 128 nmp = TAILQ_NEXT(mp, mnt_list); 129 continue; 130 } 131 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 132 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 133 save = curthread_pflags_set(TDP_SYNCIO); 134 vfs_msync(mp, MNT_NOWAIT); 135 VFS_SYNC(mp, MNT_NOWAIT); 136 curthread_pflags_restore(save); 137 vn_finished_write(mp); 138 } 139 mtx_lock(&mountlist_mtx); 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 vfs_unbusy(mp); 142 } 143 mtx_unlock(&mountlist_mtx); 144 return (0); 145 } 146 147 /* 148 * Change filesystem quotas. 149 */ 150 #ifndef _SYS_SYSPROTO_H_ 151 struct quotactl_args { 152 char *path; 153 int cmd; 154 int uid; 155 caddr_t arg; 156 }; 157 #endif 158 int 159 sys_quotactl(td, uap) 160 struct thread *td; 161 register struct quotactl_args /* { 162 char *path; 163 int cmd; 164 int uid; 165 caddr_t arg; 166 } */ *uap; 167 { 168 struct mount *mp; 169 struct nameidata nd; 170 int error; 171 172 AUDIT_ARG_CMD(uap->cmd); 173 AUDIT_ARG_UID(uap->uid); 174 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 175 return (EPERM); 176 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 177 uap->path, td); 178 if ((error = namei(&nd)) != 0) 179 return (error); 180 NDFREE(&nd, NDF_ONLY_PNBUF); 181 mp = nd.ni_vp->v_mount; 182 vfs_ref(mp); 183 vput(nd.ni_vp); 184 error = vfs_busy(mp, 0); 185 vfs_rel(mp); 186 if (error != 0) 187 return (error); 188 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 189 190 /* 191 * Since quota on operation typically needs to open quota 192 * file, the Q_QUOTAON handler needs to unbusy the mount point 193 * before calling into namei. Otherwise, unmount might be 194 * started between two vfs_busy() invocations (first is our, 195 * second is from mount point cross-walk code in lookup()), 196 * causing deadlock. 197 * 198 * Require that Q_QUOTAON handles the vfs_busy() reference on 199 * its own, always returning with ubusied mount point. 200 */ 201 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 202 vfs_unbusy(mp); 203 return (error); 204 } 205 206 /* 207 * Used by statfs conversion routines to scale the block size up if 208 * necessary so that all of the block counts are <= 'max_size'. Note 209 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 210 * value of 'n'. 211 */ 212 void 213 statfs_scale_blocks(struct statfs *sf, long max_size) 214 { 215 uint64_t count; 216 int shift; 217 218 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 219 220 /* 221 * Attempt to scale the block counts to give a more accurate 222 * overview to userland of the ratio of free space to used 223 * space. To do this, find the largest block count and compute 224 * a divisor that lets it fit into a signed integer <= max_size. 225 */ 226 if (sf->f_bavail < 0) 227 count = -sf->f_bavail; 228 else 229 count = sf->f_bavail; 230 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 231 if (count <= max_size) 232 return; 233 234 count >>= flsl(max_size); 235 shift = 0; 236 while (count > 0) { 237 shift++; 238 count >>=1; 239 } 240 241 sf->f_bsize <<= shift; 242 sf->f_blocks >>= shift; 243 sf->f_bfree >>= shift; 244 sf->f_bavail >>= shift; 245 } 246 247 /* 248 * Get filesystem statistics. 249 */ 250 #ifndef _SYS_SYSPROTO_H_ 251 struct statfs_args { 252 char *path; 253 struct statfs *buf; 254 }; 255 #endif 256 int 257 sys_statfs(td, uap) 258 struct thread *td; 259 register struct statfs_args /* { 260 char *path; 261 struct statfs *buf; 262 } */ *uap; 263 { 264 struct statfs sf; 265 int error; 266 267 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 268 if (error == 0) 269 error = copyout(&sf, uap->buf, sizeof(sf)); 270 return (error); 271 } 272 273 int 274 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 275 struct statfs *buf) 276 { 277 struct mount *mp; 278 struct statfs *sp, sb; 279 struct nameidata nd; 280 int error; 281 282 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 283 pathseg, path, td); 284 error = namei(&nd); 285 if (error != 0) 286 return (error); 287 mp = nd.ni_vp->v_mount; 288 vfs_ref(mp); 289 NDFREE(&nd, NDF_ONLY_PNBUF); 290 vput(nd.ni_vp); 291 error = vfs_busy(mp, 0); 292 vfs_rel(mp); 293 if (error != 0) 294 return (error); 295 #ifdef MAC 296 error = mac_mount_check_stat(td->td_ucred, mp); 297 if (error != 0) 298 goto out; 299 #endif 300 /* 301 * Set these in case the underlying filesystem fails to do so. 302 */ 303 sp = &mp->mnt_stat; 304 sp->f_version = STATFS_VERSION; 305 sp->f_namemax = NAME_MAX; 306 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 307 error = VFS_STATFS(mp, sp); 308 if (error != 0) 309 goto out; 310 if (priv_check(td, PRIV_VFS_GENERATION)) { 311 bcopy(sp, &sb, sizeof(sb)); 312 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 313 prison_enforce_statfs(td->td_ucred, mp, &sb); 314 sp = &sb; 315 } 316 *buf = *sp; 317 out: 318 vfs_unbusy(mp); 319 return (error); 320 } 321 322 /* 323 * Get filesystem statistics. 324 */ 325 #ifndef _SYS_SYSPROTO_H_ 326 struct fstatfs_args { 327 int fd; 328 struct statfs *buf; 329 }; 330 #endif 331 int 332 sys_fstatfs(td, uap) 333 struct thread *td; 334 register struct fstatfs_args /* { 335 int fd; 336 struct statfs *buf; 337 } */ *uap; 338 { 339 struct statfs sf; 340 int error; 341 342 error = kern_fstatfs(td, uap->fd, &sf); 343 if (error == 0) 344 error = copyout(&sf, uap->buf, sizeof(sf)); 345 return (error); 346 } 347 348 int 349 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 350 { 351 struct file *fp; 352 struct mount *mp; 353 struct statfs *sp, sb; 354 struct vnode *vp; 355 cap_rights_t rights; 356 int error; 357 358 AUDIT_ARG_FD(fd); 359 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 360 if (error != 0) 361 return (error); 362 vp = fp->f_vnode; 363 vn_lock(vp, LK_SHARED | LK_RETRY); 364 #ifdef AUDIT 365 AUDIT_ARG_VNODE1(vp); 366 #endif 367 mp = vp->v_mount; 368 if (mp) 369 vfs_ref(mp); 370 VOP_UNLOCK(vp, 0); 371 fdrop(fp, td); 372 if (mp == NULL) { 373 error = EBADF; 374 goto out; 375 } 376 error = vfs_busy(mp, 0); 377 vfs_rel(mp); 378 if (error != 0) 379 return (error); 380 #ifdef MAC 381 error = mac_mount_check_stat(td->td_ucred, mp); 382 if (error != 0) 383 goto out; 384 #endif 385 /* 386 * Set these in case the underlying filesystem fails to do so. 387 */ 388 sp = &mp->mnt_stat; 389 sp->f_version = STATFS_VERSION; 390 sp->f_namemax = NAME_MAX; 391 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 392 error = VFS_STATFS(mp, sp); 393 if (error != 0) 394 goto out; 395 if (priv_check(td, PRIV_VFS_GENERATION)) { 396 bcopy(sp, &sb, sizeof(sb)); 397 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 398 prison_enforce_statfs(td->td_ucred, mp, &sb); 399 sp = &sb; 400 } 401 *buf = *sp; 402 out: 403 if (mp) 404 vfs_unbusy(mp); 405 return (error); 406 } 407 408 /* 409 * Get statistics on all filesystems. 410 */ 411 #ifndef _SYS_SYSPROTO_H_ 412 struct getfsstat_args { 413 struct statfs *buf; 414 long bufsize; 415 int flags; 416 }; 417 #endif 418 int 419 sys_getfsstat(td, uap) 420 struct thread *td; 421 register struct getfsstat_args /* { 422 struct statfs *buf; 423 long bufsize; 424 int flags; 425 } */ *uap; 426 { 427 size_t count; 428 int error; 429 430 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 431 return (EINVAL); 432 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 433 UIO_USERSPACE, uap->flags); 434 if (error == 0) 435 td->td_retval[0] = count; 436 return (error); 437 } 438 439 /* 440 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 441 * The caller is responsible for freeing memory which will be allocated 442 * in '*buf'. 443 */ 444 int 445 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 446 size_t *countp, enum uio_seg bufseg, int flags) 447 { 448 struct mount *mp, *nmp; 449 struct statfs *sfsp, *sp, sb; 450 size_t count, maxcount; 451 int error; 452 453 maxcount = bufsize / sizeof(struct statfs); 454 if (bufsize == 0) 455 sfsp = NULL; 456 else if (bufseg == UIO_USERSPACE) 457 sfsp = *buf; 458 else /* if (bufseg == UIO_SYSSPACE) */ { 459 count = 0; 460 mtx_lock(&mountlist_mtx); 461 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 462 count++; 463 } 464 mtx_unlock(&mountlist_mtx); 465 if (maxcount > count) 466 maxcount = count; 467 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 468 M_WAITOK); 469 } 470 count = 0; 471 mtx_lock(&mountlist_mtx); 472 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 473 if (prison_canseemount(td->td_ucred, mp) != 0) { 474 nmp = TAILQ_NEXT(mp, mnt_list); 475 continue; 476 } 477 #ifdef MAC 478 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 479 nmp = TAILQ_NEXT(mp, mnt_list); 480 continue; 481 } 482 #endif 483 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 484 nmp = TAILQ_NEXT(mp, mnt_list); 485 continue; 486 } 487 if (sfsp && count < maxcount) { 488 sp = &mp->mnt_stat; 489 /* 490 * Set these in case the underlying filesystem 491 * fails to do so. 492 */ 493 sp->f_version = STATFS_VERSION; 494 sp->f_namemax = NAME_MAX; 495 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 496 /* 497 * If MNT_NOWAIT or MNT_LAZY is specified, do not 498 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 499 * overrides MNT_WAIT. 500 */ 501 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 502 (flags & MNT_WAIT)) && 503 (error = VFS_STATFS(mp, sp))) { 504 mtx_lock(&mountlist_mtx); 505 nmp = TAILQ_NEXT(mp, mnt_list); 506 vfs_unbusy(mp); 507 continue; 508 } 509 if (priv_check(td, PRIV_VFS_GENERATION)) { 510 bcopy(sp, &sb, sizeof(sb)); 511 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 512 prison_enforce_statfs(td->td_ucred, mp, &sb); 513 sp = &sb; 514 } 515 if (bufseg == UIO_SYSSPACE) 516 bcopy(sp, sfsp, sizeof(*sp)); 517 else /* if (bufseg == UIO_USERSPACE) */ { 518 error = copyout(sp, sfsp, sizeof(*sp)); 519 if (error != 0) { 520 vfs_unbusy(mp); 521 return (error); 522 } 523 } 524 sfsp++; 525 } 526 count++; 527 mtx_lock(&mountlist_mtx); 528 nmp = TAILQ_NEXT(mp, mnt_list); 529 vfs_unbusy(mp); 530 } 531 mtx_unlock(&mountlist_mtx); 532 if (sfsp && count > maxcount) 533 *countp = maxcount; 534 else 535 *countp = count; 536 return (0); 537 } 538 539 #ifdef COMPAT_FREEBSD4 540 /* 541 * Get old format filesystem statistics. 542 */ 543 static void cvtstatfs(struct statfs *, struct ostatfs *); 544 545 #ifndef _SYS_SYSPROTO_H_ 546 struct freebsd4_statfs_args { 547 char *path; 548 struct ostatfs *buf; 549 }; 550 #endif 551 int 552 freebsd4_statfs(td, uap) 553 struct thread *td; 554 struct freebsd4_statfs_args /* { 555 char *path; 556 struct ostatfs *buf; 557 } */ *uap; 558 { 559 struct ostatfs osb; 560 struct statfs sf; 561 int error; 562 563 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 564 if (error != 0) 565 return (error); 566 cvtstatfs(&sf, &osb); 567 return (copyout(&osb, uap->buf, sizeof(osb))); 568 } 569 570 /* 571 * Get filesystem statistics. 572 */ 573 #ifndef _SYS_SYSPROTO_H_ 574 struct freebsd4_fstatfs_args { 575 int fd; 576 struct ostatfs *buf; 577 }; 578 #endif 579 int 580 freebsd4_fstatfs(td, uap) 581 struct thread *td; 582 struct freebsd4_fstatfs_args /* { 583 int fd; 584 struct ostatfs *buf; 585 } */ *uap; 586 { 587 struct ostatfs osb; 588 struct statfs sf; 589 int error; 590 591 error = kern_fstatfs(td, uap->fd, &sf); 592 if (error != 0) 593 return (error); 594 cvtstatfs(&sf, &osb); 595 return (copyout(&osb, uap->buf, sizeof(osb))); 596 } 597 598 /* 599 * Get statistics on all filesystems. 600 */ 601 #ifndef _SYS_SYSPROTO_H_ 602 struct freebsd4_getfsstat_args { 603 struct ostatfs *buf; 604 long bufsize; 605 int flags; 606 }; 607 #endif 608 int 609 freebsd4_getfsstat(td, uap) 610 struct thread *td; 611 register struct freebsd4_getfsstat_args /* { 612 struct ostatfs *buf; 613 long bufsize; 614 int flags; 615 } */ *uap; 616 { 617 struct statfs *buf, *sp; 618 struct ostatfs osb; 619 size_t count, size; 620 int error; 621 622 if (uap->bufsize < 0) 623 return (EINVAL); 624 count = uap->bufsize / sizeof(struct ostatfs); 625 if (count > SIZE_MAX / sizeof(struct statfs)) 626 return (EINVAL); 627 size = count * sizeof(struct statfs); 628 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 629 uap->flags); 630 td->td_retval[0] = count; 631 if (size != 0) { 632 sp = buf; 633 while (count != 0 && error == 0) { 634 cvtstatfs(sp, &osb); 635 error = copyout(&osb, uap->buf, sizeof(osb)); 636 sp++; 637 uap->buf++; 638 count--; 639 } 640 free(buf, M_TEMP); 641 } 642 return (error); 643 } 644 645 /* 646 * Implement fstatfs() for (NFS) file handles. 647 */ 648 #ifndef _SYS_SYSPROTO_H_ 649 struct freebsd4_fhstatfs_args { 650 struct fhandle *u_fhp; 651 struct ostatfs *buf; 652 }; 653 #endif 654 int 655 freebsd4_fhstatfs(td, uap) 656 struct thread *td; 657 struct freebsd4_fhstatfs_args /* { 658 struct fhandle *u_fhp; 659 struct ostatfs *buf; 660 } */ *uap; 661 { 662 struct ostatfs osb; 663 struct statfs sf; 664 fhandle_t fh; 665 int error; 666 667 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 668 if (error != 0) 669 return (error); 670 error = kern_fhstatfs(td, fh, &sf); 671 if (error != 0) 672 return (error); 673 cvtstatfs(&sf, &osb); 674 return (copyout(&osb, uap->buf, sizeof(osb))); 675 } 676 677 /* 678 * Convert a new format statfs structure to an old format statfs structure. 679 */ 680 static void 681 cvtstatfs(nsp, osp) 682 struct statfs *nsp; 683 struct ostatfs *osp; 684 { 685 686 statfs_scale_blocks(nsp, LONG_MAX); 687 bzero(osp, sizeof(*osp)); 688 osp->f_bsize = nsp->f_bsize; 689 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 690 osp->f_blocks = nsp->f_blocks; 691 osp->f_bfree = nsp->f_bfree; 692 osp->f_bavail = nsp->f_bavail; 693 osp->f_files = MIN(nsp->f_files, LONG_MAX); 694 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 695 osp->f_owner = nsp->f_owner; 696 osp->f_type = nsp->f_type; 697 osp->f_flags = nsp->f_flags; 698 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 699 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 700 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 701 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 702 strlcpy(osp->f_fstypename, nsp->f_fstypename, 703 MIN(MFSNAMELEN, OMFSNAMELEN)); 704 strlcpy(osp->f_mntonname, nsp->f_mntonname, 705 MIN(MNAMELEN, OMNAMELEN)); 706 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 707 MIN(MNAMELEN, OMNAMELEN)); 708 osp->f_fsid = nsp->f_fsid; 709 } 710 #endif /* COMPAT_FREEBSD4 */ 711 712 /* 713 * Change current working directory to a given file descriptor. 714 */ 715 #ifndef _SYS_SYSPROTO_H_ 716 struct fchdir_args { 717 int fd; 718 }; 719 #endif 720 int 721 sys_fchdir(td, uap) 722 struct thread *td; 723 struct fchdir_args /* { 724 int fd; 725 } */ *uap; 726 { 727 struct vnode *vp, *tdp; 728 struct mount *mp; 729 struct file *fp; 730 cap_rights_t rights; 731 int error; 732 733 AUDIT_ARG_FD(uap->fd); 734 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 735 &fp); 736 if (error != 0) 737 return (error); 738 vp = fp->f_vnode; 739 VREF(vp); 740 fdrop(fp, td); 741 vn_lock(vp, LK_SHARED | LK_RETRY); 742 AUDIT_ARG_VNODE1(vp); 743 error = change_dir(vp, td); 744 while (!error && (mp = vp->v_mountedhere) != NULL) { 745 if (vfs_busy(mp, 0)) 746 continue; 747 error = VFS_ROOT(mp, LK_SHARED, &tdp); 748 vfs_unbusy(mp); 749 if (error != 0) 750 break; 751 vput(vp); 752 vp = tdp; 753 } 754 if (error != 0) { 755 vput(vp); 756 return (error); 757 } 758 VOP_UNLOCK(vp, 0); 759 pwd_chdir(td, vp); 760 return (0); 761 } 762 763 /* 764 * Change current working directory (``.''). 765 */ 766 #ifndef _SYS_SYSPROTO_H_ 767 struct chdir_args { 768 char *path; 769 }; 770 #endif 771 int 772 sys_chdir(td, uap) 773 struct thread *td; 774 struct chdir_args /* { 775 char *path; 776 } */ *uap; 777 { 778 779 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 780 } 781 782 int 783 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 784 { 785 struct nameidata nd; 786 int error; 787 788 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 789 pathseg, path, td); 790 if ((error = namei(&nd)) != 0) 791 return (error); 792 if ((error = change_dir(nd.ni_vp, td)) != 0) { 793 vput(nd.ni_vp); 794 NDFREE(&nd, NDF_ONLY_PNBUF); 795 return (error); 796 } 797 VOP_UNLOCK(nd.ni_vp, 0); 798 NDFREE(&nd, NDF_ONLY_PNBUF); 799 pwd_chdir(td, nd.ni_vp); 800 return (0); 801 } 802 803 /* 804 * Change notion of root (``/'') directory. 805 */ 806 #ifndef _SYS_SYSPROTO_H_ 807 struct chroot_args { 808 char *path; 809 }; 810 #endif 811 int 812 sys_chroot(td, uap) 813 struct thread *td; 814 struct chroot_args /* { 815 char *path; 816 } */ *uap; 817 { 818 struct nameidata nd; 819 int error; 820 821 error = priv_check(td, PRIV_VFS_CHROOT); 822 if (error != 0) 823 return (error); 824 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 825 UIO_USERSPACE, uap->path, td); 826 error = namei(&nd); 827 if (error != 0) 828 goto error; 829 error = change_dir(nd.ni_vp, td); 830 if (error != 0) 831 goto e_vunlock; 832 #ifdef MAC 833 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 834 if (error != 0) 835 goto e_vunlock; 836 #endif 837 VOP_UNLOCK(nd.ni_vp, 0); 838 error = pwd_chroot(td, nd.ni_vp); 839 vrele(nd.ni_vp); 840 NDFREE(&nd, NDF_ONLY_PNBUF); 841 return (error); 842 e_vunlock: 843 vput(nd.ni_vp); 844 error: 845 NDFREE(&nd, NDF_ONLY_PNBUF); 846 return (error); 847 } 848 849 /* 850 * Common routine for chroot and chdir. Callers must provide a locked vnode 851 * instance. 852 */ 853 int 854 change_dir(vp, td) 855 struct vnode *vp; 856 struct thread *td; 857 { 858 #ifdef MAC 859 int error; 860 #endif 861 862 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 863 if (vp->v_type != VDIR) 864 return (ENOTDIR); 865 #ifdef MAC 866 error = mac_vnode_check_chdir(td->td_ucred, vp); 867 if (error != 0) 868 return (error); 869 #endif 870 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 871 } 872 873 static __inline void 874 flags_to_rights(int flags, cap_rights_t *rightsp) 875 { 876 877 if (flags & O_EXEC) { 878 cap_rights_set(rightsp, CAP_FEXECVE); 879 } else { 880 switch ((flags & O_ACCMODE)) { 881 case O_RDONLY: 882 cap_rights_set(rightsp, CAP_READ); 883 break; 884 case O_RDWR: 885 cap_rights_set(rightsp, CAP_READ); 886 /* FALLTHROUGH */ 887 case O_WRONLY: 888 cap_rights_set(rightsp, CAP_WRITE); 889 if (!(flags & (O_APPEND | O_TRUNC))) 890 cap_rights_set(rightsp, CAP_SEEK); 891 break; 892 } 893 } 894 895 if (flags & O_CREAT) 896 cap_rights_set(rightsp, CAP_CREATE); 897 898 if (flags & O_TRUNC) 899 cap_rights_set(rightsp, CAP_FTRUNCATE); 900 901 if (flags & (O_SYNC | O_FSYNC)) 902 cap_rights_set(rightsp, CAP_FSYNC); 903 904 if (flags & (O_EXLOCK | O_SHLOCK)) 905 cap_rights_set(rightsp, CAP_FLOCK); 906 } 907 908 /* 909 * Check permissions, allocate an open file structure, and call the device 910 * open routine if any. 911 */ 912 #ifndef _SYS_SYSPROTO_H_ 913 struct open_args { 914 char *path; 915 int flags; 916 int mode; 917 }; 918 #endif 919 int 920 sys_open(td, uap) 921 struct thread *td; 922 register struct open_args /* { 923 char *path; 924 int flags; 925 int mode; 926 } */ *uap; 927 { 928 929 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 930 uap->flags, uap->mode)); 931 } 932 933 #ifndef _SYS_SYSPROTO_H_ 934 struct openat_args { 935 int fd; 936 char *path; 937 int flag; 938 int mode; 939 }; 940 #endif 941 int 942 sys_openat(struct thread *td, struct openat_args *uap) 943 { 944 945 AUDIT_ARG_FD(uap->fd); 946 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 947 uap->mode)); 948 } 949 950 int 951 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 952 int flags, int mode) 953 { 954 struct proc *p = td->td_proc; 955 struct filedesc *fdp = p->p_fd; 956 struct file *fp; 957 struct vnode *vp; 958 struct nameidata nd; 959 cap_rights_t rights; 960 int cmode, error, indx; 961 962 indx = -1; 963 964 AUDIT_ARG_FFLAGS(flags); 965 AUDIT_ARG_MODE(mode); 966 cap_rights_init(&rights, CAP_LOOKUP); 967 flags_to_rights(flags, &rights); 968 /* 969 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 970 * may be specified. 971 */ 972 if (flags & O_EXEC) { 973 if (flags & O_ACCMODE) 974 return (EINVAL); 975 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 976 return (EINVAL); 977 } else { 978 flags = FFLAGS(flags); 979 } 980 981 /* 982 * Allocate a file structure. The descriptor to reference it 983 * is allocated and set by finstall() below. 984 */ 985 error = falloc_noinstall(td, &fp); 986 if (error != 0) 987 return (error); 988 /* 989 * An extra reference on `fp' has been held for us by 990 * falloc_noinstall(). 991 */ 992 /* Set the flags early so the finit in devfs can pick them up. */ 993 fp->f_flag = flags & FMASK; 994 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 995 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 996 &rights, td); 997 td->td_dupfd = -1; /* XXX check for fdopen */ 998 error = vn_open(&nd, &flags, cmode, fp); 999 if (error != 0) { 1000 /* 1001 * If the vn_open replaced the method vector, something 1002 * wonderous happened deep below and we just pass it up 1003 * pretending we know what we do. 1004 */ 1005 if (error == ENXIO && fp->f_ops != &badfileops) 1006 goto success; 1007 1008 /* 1009 * Handle special fdopen() case. bleh. 1010 * 1011 * Don't do this for relative (capability) lookups; we don't 1012 * understand exactly what would happen, and we don't think 1013 * that it ever should. 1014 */ 1015 if (nd.ni_strictrelative == 0 && 1016 (error == ENODEV || error == ENXIO) && 1017 td->td_dupfd >= 0) { 1018 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1019 &indx); 1020 if (error == 0) 1021 goto success; 1022 } 1023 1024 goto bad; 1025 } 1026 td->td_dupfd = 0; 1027 NDFREE(&nd, NDF_ONLY_PNBUF); 1028 vp = nd.ni_vp; 1029 1030 /* 1031 * Store the vnode, for any f_type. Typically, the vnode use 1032 * count is decremented by direct call to vn_closefile() for 1033 * files that switched type in the cdevsw fdopen() method. 1034 */ 1035 fp->f_vnode = vp; 1036 /* 1037 * If the file wasn't claimed by devfs bind it to the normal 1038 * vnode operations here. 1039 */ 1040 if (fp->f_ops == &badfileops) { 1041 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1042 fp->f_seqcount = 1; 1043 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1044 DTYPE_VNODE, vp, &vnops); 1045 } 1046 1047 VOP_UNLOCK(vp, 0); 1048 if (flags & O_TRUNC) { 1049 error = fo_truncate(fp, 0, td->td_ucred, td); 1050 if (error != 0) 1051 goto bad; 1052 } 1053 success: 1054 /* 1055 * If we haven't already installed the FD (for dupfdopen), do so now. 1056 */ 1057 if (indx == -1) { 1058 struct filecaps *fcaps; 1059 1060 #ifdef CAPABILITIES 1061 if (nd.ni_strictrelative == 1) 1062 fcaps = &nd.ni_filecaps; 1063 else 1064 #endif 1065 fcaps = NULL; 1066 error = finstall(td, fp, &indx, flags, fcaps); 1067 /* On success finstall() consumes fcaps. */ 1068 if (error != 0) { 1069 filecaps_free(&nd.ni_filecaps); 1070 goto bad; 1071 } 1072 } else { 1073 filecaps_free(&nd.ni_filecaps); 1074 } 1075 1076 /* 1077 * Release our private reference, leaving the one associated with 1078 * the descriptor table intact. 1079 */ 1080 fdrop(fp, td); 1081 td->td_retval[0] = indx; 1082 return (0); 1083 bad: 1084 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1085 fdrop(fp, td); 1086 return (error); 1087 } 1088 1089 #ifdef COMPAT_43 1090 /* 1091 * Create a file. 1092 */ 1093 #ifndef _SYS_SYSPROTO_H_ 1094 struct ocreat_args { 1095 char *path; 1096 int mode; 1097 }; 1098 #endif 1099 int 1100 ocreat(td, uap) 1101 struct thread *td; 1102 register struct ocreat_args /* { 1103 char *path; 1104 int mode; 1105 } */ *uap; 1106 { 1107 1108 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1109 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1110 } 1111 #endif /* COMPAT_43 */ 1112 1113 /* 1114 * Create a special file. 1115 */ 1116 #ifndef _SYS_SYSPROTO_H_ 1117 struct mknod_args { 1118 char *path; 1119 int mode; 1120 int dev; 1121 }; 1122 #endif 1123 int 1124 sys_mknod(td, uap) 1125 struct thread *td; 1126 register struct mknod_args /* { 1127 char *path; 1128 int mode; 1129 int dev; 1130 } */ *uap; 1131 { 1132 1133 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1134 uap->mode, uap->dev)); 1135 } 1136 1137 #ifndef _SYS_SYSPROTO_H_ 1138 struct mknodat_args { 1139 int fd; 1140 char *path; 1141 mode_t mode; 1142 dev_t dev; 1143 }; 1144 #endif 1145 int 1146 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1147 { 1148 1149 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1150 uap->dev)); 1151 } 1152 1153 int 1154 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1155 int mode, int dev) 1156 { 1157 struct vnode *vp; 1158 struct mount *mp; 1159 struct vattr vattr; 1160 struct nameidata nd; 1161 cap_rights_t rights; 1162 int error, whiteout = 0; 1163 1164 AUDIT_ARG_MODE(mode); 1165 AUDIT_ARG_DEV(dev); 1166 switch (mode & S_IFMT) { 1167 case S_IFCHR: 1168 case S_IFBLK: 1169 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1170 if (error == 0 && dev == VNOVAL) 1171 error = EINVAL; 1172 break; 1173 case S_IFMT: 1174 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1175 break; 1176 case S_IFWHT: 1177 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1178 break; 1179 case S_IFIFO: 1180 if (dev == 0) 1181 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1182 /* FALLTHROUGH */ 1183 default: 1184 error = EINVAL; 1185 break; 1186 } 1187 if (error != 0) 1188 return (error); 1189 restart: 1190 bwillwrite(); 1191 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1192 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1193 td); 1194 if ((error = namei(&nd)) != 0) 1195 return (error); 1196 vp = nd.ni_vp; 1197 if (vp != NULL) { 1198 NDFREE(&nd, NDF_ONLY_PNBUF); 1199 if (vp == nd.ni_dvp) 1200 vrele(nd.ni_dvp); 1201 else 1202 vput(nd.ni_dvp); 1203 vrele(vp); 1204 return (EEXIST); 1205 } else { 1206 VATTR_NULL(&vattr); 1207 vattr.va_mode = (mode & ALLPERMS) & 1208 ~td->td_proc->p_fd->fd_cmask; 1209 vattr.va_rdev = dev; 1210 whiteout = 0; 1211 1212 switch (mode & S_IFMT) { 1213 case S_IFMT: /* used by badsect to flag bad sectors */ 1214 vattr.va_type = VBAD; 1215 break; 1216 case S_IFCHR: 1217 vattr.va_type = VCHR; 1218 break; 1219 case S_IFBLK: 1220 vattr.va_type = VBLK; 1221 break; 1222 case S_IFWHT: 1223 whiteout = 1; 1224 break; 1225 default: 1226 panic("kern_mknod: invalid mode"); 1227 } 1228 } 1229 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1230 NDFREE(&nd, NDF_ONLY_PNBUF); 1231 vput(nd.ni_dvp); 1232 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1233 return (error); 1234 goto restart; 1235 } 1236 #ifdef MAC 1237 if (error == 0 && !whiteout) 1238 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1239 &nd.ni_cnd, &vattr); 1240 #endif 1241 if (error == 0) { 1242 if (whiteout) 1243 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1244 else { 1245 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1246 &nd.ni_cnd, &vattr); 1247 if (error == 0) 1248 vput(nd.ni_vp); 1249 } 1250 } 1251 NDFREE(&nd, NDF_ONLY_PNBUF); 1252 vput(nd.ni_dvp); 1253 vn_finished_write(mp); 1254 return (error); 1255 } 1256 1257 /* 1258 * Create a named pipe. 1259 */ 1260 #ifndef _SYS_SYSPROTO_H_ 1261 struct mkfifo_args { 1262 char *path; 1263 int mode; 1264 }; 1265 #endif 1266 int 1267 sys_mkfifo(td, uap) 1268 struct thread *td; 1269 register struct mkfifo_args /* { 1270 char *path; 1271 int mode; 1272 } */ *uap; 1273 { 1274 1275 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1276 uap->mode)); 1277 } 1278 1279 #ifndef _SYS_SYSPROTO_H_ 1280 struct mkfifoat_args { 1281 int fd; 1282 char *path; 1283 mode_t mode; 1284 }; 1285 #endif 1286 int 1287 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1288 { 1289 1290 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1291 uap->mode)); 1292 } 1293 1294 int 1295 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1296 int mode) 1297 { 1298 struct mount *mp; 1299 struct vattr vattr; 1300 struct nameidata nd; 1301 cap_rights_t rights; 1302 int error; 1303 1304 AUDIT_ARG_MODE(mode); 1305 restart: 1306 bwillwrite(); 1307 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1308 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1309 td); 1310 if ((error = namei(&nd)) != 0) 1311 return (error); 1312 if (nd.ni_vp != NULL) { 1313 NDFREE(&nd, NDF_ONLY_PNBUF); 1314 if (nd.ni_vp == nd.ni_dvp) 1315 vrele(nd.ni_dvp); 1316 else 1317 vput(nd.ni_dvp); 1318 vrele(nd.ni_vp); 1319 return (EEXIST); 1320 } 1321 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1322 NDFREE(&nd, NDF_ONLY_PNBUF); 1323 vput(nd.ni_dvp); 1324 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1325 return (error); 1326 goto restart; 1327 } 1328 VATTR_NULL(&vattr); 1329 vattr.va_type = VFIFO; 1330 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1331 #ifdef MAC 1332 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1333 &vattr); 1334 if (error != 0) 1335 goto out; 1336 #endif 1337 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1338 if (error == 0) 1339 vput(nd.ni_vp); 1340 #ifdef MAC 1341 out: 1342 #endif 1343 vput(nd.ni_dvp); 1344 vn_finished_write(mp); 1345 NDFREE(&nd, NDF_ONLY_PNBUF); 1346 return (error); 1347 } 1348 1349 /* 1350 * Make a hard file link. 1351 */ 1352 #ifndef _SYS_SYSPROTO_H_ 1353 struct link_args { 1354 char *path; 1355 char *link; 1356 }; 1357 #endif 1358 int 1359 sys_link(td, uap) 1360 struct thread *td; 1361 register struct link_args /* { 1362 char *path; 1363 char *link; 1364 } */ *uap; 1365 { 1366 1367 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1368 UIO_USERSPACE, FOLLOW)); 1369 } 1370 1371 #ifndef _SYS_SYSPROTO_H_ 1372 struct linkat_args { 1373 int fd1; 1374 char *path1; 1375 int fd2; 1376 char *path2; 1377 int flag; 1378 }; 1379 #endif 1380 int 1381 sys_linkat(struct thread *td, struct linkat_args *uap) 1382 { 1383 int flag; 1384 1385 flag = uap->flag; 1386 if (flag & ~AT_SYMLINK_FOLLOW) 1387 return (EINVAL); 1388 1389 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1390 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1391 } 1392 1393 int hardlink_check_uid = 0; 1394 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1395 &hardlink_check_uid, 0, 1396 "Unprivileged processes cannot create hard links to files owned by other " 1397 "users"); 1398 static int hardlink_check_gid = 0; 1399 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1400 &hardlink_check_gid, 0, 1401 "Unprivileged processes cannot create hard links to files owned by other " 1402 "groups"); 1403 1404 static int 1405 can_hardlink(struct vnode *vp, struct ucred *cred) 1406 { 1407 struct vattr va; 1408 int error; 1409 1410 if (!hardlink_check_uid && !hardlink_check_gid) 1411 return (0); 1412 1413 error = VOP_GETATTR(vp, &va, cred); 1414 if (error != 0) 1415 return (error); 1416 1417 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1418 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1419 if (error != 0) 1420 return (error); 1421 } 1422 1423 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1424 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1425 if (error != 0) 1426 return (error); 1427 } 1428 1429 return (0); 1430 } 1431 1432 int 1433 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1434 enum uio_seg segflg, int follow) 1435 { 1436 struct vnode *vp; 1437 struct mount *mp; 1438 struct nameidata nd; 1439 cap_rights_t rights; 1440 int error; 1441 1442 again: 1443 bwillwrite(); 1444 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1445 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1446 1447 if ((error = namei(&nd)) != 0) 1448 return (error); 1449 NDFREE(&nd, NDF_ONLY_PNBUF); 1450 vp = nd.ni_vp; 1451 if (vp->v_type == VDIR) { 1452 vrele(vp); 1453 return (EPERM); /* POSIX */ 1454 } 1455 NDINIT_ATRIGHTS(&nd, CREATE, 1456 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1457 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1458 if ((error = namei(&nd)) == 0) { 1459 if (nd.ni_vp != NULL) { 1460 NDFREE(&nd, NDF_ONLY_PNBUF); 1461 if (nd.ni_dvp == nd.ni_vp) 1462 vrele(nd.ni_dvp); 1463 else 1464 vput(nd.ni_dvp); 1465 vrele(nd.ni_vp); 1466 vrele(vp); 1467 return (EEXIST); 1468 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1469 /* 1470 * Cross-device link. No need to recheck 1471 * vp->v_type, since it cannot change, except 1472 * to VBAD. 1473 */ 1474 NDFREE(&nd, NDF_ONLY_PNBUF); 1475 vput(nd.ni_dvp); 1476 vrele(vp); 1477 return (EXDEV); 1478 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1479 error = can_hardlink(vp, td->td_ucred); 1480 #ifdef MAC 1481 if (error == 0) 1482 error = mac_vnode_check_link(td->td_ucred, 1483 nd.ni_dvp, vp, &nd.ni_cnd); 1484 #endif 1485 if (error != 0) { 1486 vput(vp); 1487 vput(nd.ni_dvp); 1488 NDFREE(&nd, NDF_ONLY_PNBUF); 1489 return (error); 1490 } 1491 error = vn_start_write(vp, &mp, V_NOWAIT); 1492 if (error != 0) { 1493 vput(vp); 1494 vput(nd.ni_dvp); 1495 NDFREE(&nd, NDF_ONLY_PNBUF); 1496 error = vn_start_write(NULL, &mp, 1497 V_XSLEEP | PCATCH); 1498 if (error != 0) 1499 return (error); 1500 goto again; 1501 } 1502 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1503 VOP_UNLOCK(vp, 0); 1504 vput(nd.ni_dvp); 1505 vn_finished_write(mp); 1506 NDFREE(&nd, NDF_ONLY_PNBUF); 1507 } else { 1508 vput(nd.ni_dvp); 1509 NDFREE(&nd, NDF_ONLY_PNBUF); 1510 vrele(vp); 1511 goto again; 1512 } 1513 } 1514 vrele(vp); 1515 return (error); 1516 } 1517 1518 /* 1519 * Make a symbolic link. 1520 */ 1521 #ifndef _SYS_SYSPROTO_H_ 1522 struct symlink_args { 1523 char *path; 1524 char *link; 1525 }; 1526 #endif 1527 int 1528 sys_symlink(td, uap) 1529 struct thread *td; 1530 register struct symlink_args /* { 1531 char *path; 1532 char *link; 1533 } */ *uap; 1534 { 1535 1536 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1537 UIO_USERSPACE)); 1538 } 1539 1540 #ifndef _SYS_SYSPROTO_H_ 1541 struct symlinkat_args { 1542 char *path; 1543 int fd; 1544 char *path2; 1545 }; 1546 #endif 1547 int 1548 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1549 { 1550 1551 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1552 UIO_USERSPACE)); 1553 } 1554 1555 int 1556 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1557 enum uio_seg segflg) 1558 { 1559 struct mount *mp; 1560 struct vattr vattr; 1561 char *syspath; 1562 struct nameidata nd; 1563 int error; 1564 cap_rights_t rights; 1565 1566 if (segflg == UIO_SYSSPACE) { 1567 syspath = path1; 1568 } else { 1569 syspath = uma_zalloc(namei_zone, M_WAITOK); 1570 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1571 goto out; 1572 } 1573 AUDIT_ARG_TEXT(syspath); 1574 restart: 1575 bwillwrite(); 1576 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1577 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1578 td); 1579 if ((error = namei(&nd)) != 0) 1580 goto out; 1581 if (nd.ni_vp) { 1582 NDFREE(&nd, NDF_ONLY_PNBUF); 1583 if (nd.ni_vp == nd.ni_dvp) 1584 vrele(nd.ni_dvp); 1585 else 1586 vput(nd.ni_dvp); 1587 vrele(nd.ni_vp); 1588 error = EEXIST; 1589 goto out; 1590 } 1591 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1592 NDFREE(&nd, NDF_ONLY_PNBUF); 1593 vput(nd.ni_dvp); 1594 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1595 goto out; 1596 goto restart; 1597 } 1598 VATTR_NULL(&vattr); 1599 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1600 #ifdef MAC 1601 vattr.va_type = VLNK; 1602 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1603 &vattr); 1604 if (error != 0) 1605 goto out2; 1606 #endif 1607 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1608 if (error == 0) 1609 vput(nd.ni_vp); 1610 #ifdef MAC 1611 out2: 1612 #endif 1613 NDFREE(&nd, NDF_ONLY_PNBUF); 1614 vput(nd.ni_dvp); 1615 vn_finished_write(mp); 1616 out: 1617 if (segflg != UIO_SYSSPACE) 1618 uma_zfree(namei_zone, syspath); 1619 return (error); 1620 } 1621 1622 /* 1623 * Delete a whiteout from the filesystem. 1624 */ 1625 int 1626 sys_undelete(td, uap) 1627 struct thread *td; 1628 register struct undelete_args /* { 1629 char *path; 1630 } */ *uap; 1631 { 1632 struct mount *mp; 1633 struct nameidata nd; 1634 int error; 1635 1636 restart: 1637 bwillwrite(); 1638 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1639 UIO_USERSPACE, uap->path, td); 1640 error = namei(&nd); 1641 if (error != 0) 1642 return (error); 1643 1644 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1645 NDFREE(&nd, NDF_ONLY_PNBUF); 1646 if (nd.ni_vp == nd.ni_dvp) 1647 vrele(nd.ni_dvp); 1648 else 1649 vput(nd.ni_dvp); 1650 if (nd.ni_vp) 1651 vrele(nd.ni_vp); 1652 return (EEXIST); 1653 } 1654 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1655 NDFREE(&nd, NDF_ONLY_PNBUF); 1656 vput(nd.ni_dvp); 1657 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1658 return (error); 1659 goto restart; 1660 } 1661 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1662 NDFREE(&nd, NDF_ONLY_PNBUF); 1663 vput(nd.ni_dvp); 1664 vn_finished_write(mp); 1665 return (error); 1666 } 1667 1668 /* 1669 * Delete a name from the filesystem. 1670 */ 1671 #ifndef _SYS_SYSPROTO_H_ 1672 struct unlink_args { 1673 char *path; 1674 }; 1675 #endif 1676 int 1677 sys_unlink(td, uap) 1678 struct thread *td; 1679 struct unlink_args /* { 1680 char *path; 1681 } */ *uap; 1682 { 1683 1684 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1685 } 1686 1687 #ifndef _SYS_SYSPROTO_H_ 1688 struct unlinkat_args { 1689 int fd; 1690 char *path; 1691 int flag; 1692 }; 1693 #endif 1694 int 1695 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1696 { 1697 int flag = uap->flag; 1698 int fd = uap->fd; 1699 char *path = uap->path; 1700 1701 if (flag & ~AT_REMOVEDIR) 1702 return (EINVAL); 1703 1704 if (flag & AT_REMOVEDIR) 1705 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1706 else 1707 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1708 } 1709 1710 int 1711 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1712 ino_t oldinum) 1713 { 1714 struct mount *mp; 1715 struct vnode *vp; 1716 struct nameidata nd; 1717 struct stat sb; 1718 cap_rights_t rights; 1719 int error; 1720 1721 restart: 1722 bwillwrite(); 1723 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1724 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1725 if ((error = namei(&nd)) != 0) 1726 return (error == EINVAL ? EPERM : error); 1727 vp = nd.ni_vp; 1728 if (vp->v_type == VDIR && oldinum == 0) { 1729 error = EPERM; /* POSIX */ 1730 } else if (oldinum != 0 && 1731 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1732 sb.st_ino != oldinum) { 1733 error = EIDRM; /* Identifier removed */ 1734 } else { 1735 /* 1736 * The root of a mounted filesystem cannot be deleted. 1737 * 1738 * XXX: can this only be a VDIR case? 1739 */ 1740 if (vp->v_vflag & VV_ROOT) 1741 error = EBUSY; 1742 } 1743 if (error == 0) { 1744 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1745 NDFREE(&nd, NDF_ONLY_PNBUF); 1746 vput(nd.ni_dvp); 1747 if (vp == nd.ni_dvp) 1748 vrele(vp); 1749 else 1750 vput(vp); 1751 if ((error = vn_start_write(NULL, &mp, 1752 V_XSLEEP | PCATCH)) != 0) 1753 return (error); 1754 goto restart; 1755 } 1756 #ifdef MAC 1757 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1758 &nd.ni_cnd); 1759 if (error != 0) 1760 goto out; 1761 #endif 1762 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1763 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1764 #ifdef MAC 1765 out: 1766 #endif 1767 vn_finished_write(mp); 1768 } 1769 NDFREE(&nd, NDF_ONLY_PNBUF); 1770 vput(nd.ni_dvp); 1771 if (vp == nd.ni_dvp) 1772 vrele(vp); 1773 else 1774 vput(vp); 1775 return (error); 1776 } 1777 1778 /* 1779 * Reposition read/write file offset. 1780 */ 1781 #ifndef _SYS_SYSPROTO_H_ 1782 struct lseek_args { 1783 int fd; 1784 int pad; 1785 off_t offset; 1786 int whence; 1787 }; 1788 #endif 1789 int 1790 sys_lseek(td, uap) 1791 struct thread *td; 1792 register struct lseek_args /* { 1793 int fd; 1794 int pad; 1795 off_t offset; 1796 int whence; 1797 } */ *uap; 1798 { 1799 struct file *fp; 1800 cap_rights_t rights; 1801 int error; 1802 1803 AUDIT_ARG_FD(uap->fd); 1804 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1805 if (error != 0) 1806 return (error); 1807 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1808 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1809 fdrop(fp, td); 1810 return (error); 1811 } 1812 1813 #if defined(COMPAT_43) 1814 /* 1815 * Reposition read/write file offset. 1816 */ 1817 #ifndef _SYS_SYSPROTO_H_ 1818 struct olseek_args { 1819 int fd; 1820 long offset; 1821 int whence; 1822 }; 1823 #endif 1824 int 1825 olseek(td, uap) 1826 struct thread *td; 1827 register struct olseek_args /* { 1828 int fd; 1829 long offset; 1830 int whence; 1831 } */ *uap; 1832 { 1833 struct lseek_args /* { 1834 int fd; 1835 int pad; 1836 off_t offset; 1837 int whence; 1838 } */ nuap; 1839 1840 nuap.fd = uap->fd; 1841 nuap.offset = uap->offset; 1842 nuap.whence = uap->whence; 1843 return (sys_lseek(td, &nuap)); 1844 } 1845 #endif /* COMPAT_43 */ 1846 1847 #if defined(COMPAT_FREEBSD6) 1848 /* Version with the 'pad' argument */ 1849 int 1850 freebsd6_lseek(td, uap) 1851 struct thread *td; 1852 register struct freebsd6_lseek_args *uap; 1853 { 1854 struct lseek_args ouap; 1855 1856 ouap.fd = uap->fd; 1857 ouap.offset = uap->offset; 1858 ouap.whence = uap->whence; 1859 return (sys_lseek(td, &ouap)); 1860 } 1861 #endif 1862 1863 /* 1864 * Check access permissions using passed credentials. 1865 */ 1866 static int 1867 vn_access(vp, user_flags, cred, td) 1868 struct vnode *vp; 1869 int user_flags; 1870 struct ucred *cred; 1871 struct thread *td; 1872 { 1873 accmode_t accmode; 1874 int error; 1875 1876 /* Flags == 0 means only check for existence. */ 1877 if (user_flags == 0) 1878 return (0); 1879 1880 accmode = 0; 1881 if (user_flags & R_OK) 1882 accmode |= VREAD; 1883 if (user_flags & W_OK) 1884 accmode |= VWRITE; 1885 if (user_flags & X_OK) 1886 accmode |= VEXEC; 1887 #ifdef MAC 1888 error = mac_vnode_check_access(cred, vp, accmode); 1889 if (error != 0) 1890 return (error); 1891 #endif 1892 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1893 error = VOP_ACCESS(vp, accmode, cred, td); 1894 return (error); 1895 } 1896 1897 /* 1898 * Check access permissions using "real" credentials. 1899 */ 1900 #ifndef _SYS_SYSPROTO_H_ 1901 struct access_args { 1902 char *path; 1903 int amode; 1904 }; 1905 #endif 1906 int 1907 sys_access(td, uap) 1908 struct thread *td; 1909 register struct access_args /* { 1910 char *path; 1911 int amode; 1912 } */ *uap; 1913 { 1914 1915 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1916 0, uap->amode)); 1917 } 1918 1919 #ifndef _SYS_SYSPROTO_H_ 1920 struct faccessat_args { 1921 int dirfd; 1922 char *path; 1923 int amode; 1924 int flag; 1925 } 1926 #endif 1927 int 1928 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1929 { 1930 1931 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1932 uap->amode)); 1933 } 1934 1935 int 1936 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1937 int flag, int amode) 1938 { 1939 struct ucred *cred, *usecred; 1940 struct vnode *vp; 1941 struct nameidata nd; 1942 cap_rights_t rights; 1943 int error; 1944 1945 if (flag & ~AT_EACCESS) 1946 return (EINVAL); 1947 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1948 return (EINVAL); 1949 1950 /* 1951 * Create and modify a temporary credential instead of one that 1952 * is potentially shared (if we need one). 1953 */ 1954 cred = td->td_ucred; 1955 if ((flag & AT_EACCESS) == 0 && 1956 ((cred->cr_uid != cred->cr_ruid || 1957 cred->cr_rgid != cred->cr_groups[0]))) { 1958 usecred = crdup(cred); 1959 usecred->cr_uid = cred->cr_ruid; 1960 usecred->cr_groups[0] = cred->cr_rgid; 1961 td->td_ucred = usecred; 1962 } else 1963 usecred = cred; 1964 AUDIT_ARG_VALUE(amode); 1965 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1966 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1967 td); 1968 if ((error = namei(&nd)) != 0) 1969 goto out; 1970 vp = nd.ni_vp; 1971 1972 error = vn_access(vp, amode, usecred, td); 1973 NDFREE(&nd, NDF_ONLY_PNBUF); 1974 vput(vp); 1975 out: 1976 if (usecred != cred) { 1977 td->td_ucred = cred; 1978 crfree(usecred); 1979 } 1980 return (error); 1981 } 1982 1983 /* 1984 * Check access permissions using "effective" credentials. 1985 */ 1986 #ifndef _SYS_SYSPROTO_H_ 1987 struct eaccess_args { 1988 char *path; 1989 int amode; 1990 }; 1991 #endif 1992 int 1993 sys_eaccess(td, uap) 1994 struct thread *td; 1995 register struct eaccess_args /* { 1996 char *path; 1997 int amode; 1998 } */ *uap; 1999 { 2000 2001 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2002 AT_EACCESS, uap->amode)); 2003 } 2004 2005 #if defined(COMPAT_43) 2006 /* 2007 * Get file status; this version follows links. 2008 */ 2009 #ifndef _SYS_SYSPROTO_H_ 2010 struct ostat_args { 2011 char *path; 2012 struct ostat *ub; 2013 }; 2014 #endif 2015 int 2016 ostat(td, uap) 2017 struct thread *td; 2018 register struct ostat_args /* { 2019 char *path; 2020 struct ostat *ub; 2021 } */ *uap; 2022 { 2023 struct stat sb; 2024 struct ostat osb; 2025 int error; 2026 2027 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2028 &sb, NULL); 2029 if (error != 0) 2030 return (error); 2031 cvtstat(&sb, &osb); 2032 return (copyout(&osb, uap->ub, sizeof (osb))); 2033 } 2034 2035 /* 2036 * Get file status; this version does not follow links. 2037 */ 2038 #ifndef _SYS_SYSPROTO_H_ 2039 struct olstat_args { 2040 char *path; 2041 struct ostat *ub; 2042 }; 2043 #endif 2044 int 2045 olstat(td, uap) 2046 struct thread *td; 2047 register struct olstat_args /* { 2048 char *path; 2049 struct ostat *ub; 2050 } */ *uap; 2051 { 2052 struct stat sb; 2053 struct ostat osb; 2054 int error; 2055 2056 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2057 UIO_USERSPACE, &sb, NULL); 2058 if (error != 0) 2059 return (error); 2060 cvtstat(&sb, &osb); 2061 return (copyout(&osb, uap->ub, sizeof (osb))); 2062 } 2063 2064 /* 2065 * Convert from an old to a new stat structure. 2066 */ 2067 void 2068 cvtstat(st, ost) 2069 struct stat *st; 2070 struct ostat *ost; 2071 { 2072 2073 bzero(ost, sizeof(*ost)); 2074 ost->st_dev = st->st_dev; 2075 ost->st_ino = st->st_ino; 2076 ost->st_mode = st->st_mode; 2077 ost->st_nlink = st->st_nlink; 2078 ost->st_uid = st->st_uid; 2079 ost->st_gid = st->st_gid; 2080 ost->st_rdev = st->st_rdev; 2081 if (st->st_size < (quad_t)1 << 32) 2082 ost->st_size = st->st_size; 2083 else 2084 ost->st_size = -2; 2085 ost->st_atim = st->st_atim; 2086 ost->st_mtim = st->st_mtim; 2087 ost->st_ctim = st->st_ctim; 2088 ost->st_blksize = st->st_blksize; 2089 ost->st_blocks = st->st_blocks; 2090 ost->st_flags = st->st_flags; 2091 ost->st_gen = st->st_gen; 2092 } 2093 #endif /* COMPAT_43 */ 2094 2095 /* 2096 * Get file status; this version follows links. 2097 */ 2098 #ifndef _SYS_SYSPROTO_H_ 2099 struct stat_args { 2100 char *path; 2101 struct stat *ub; 2102 }; 2103 #endif 2104 int 2105 sys_stat(td, uap) 2106 struct thread *td; 2107 register struct stat_args /* { 2108 char *path; 2109 struct stat *ub; 2110 } */ *uap; 2111 { 2112 struct stat sb; 2113 int error; 2114 2115 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2116 &sb, NULL); 2117 if (error == 0) 2118 error = copyout(&sb, uap->ub, sizeof (sb)); 2119 return (error); 2120 } 2121 2122 #ifndef _SYS_SYSPROTO_H_ 2123 struct fstatat_args { 2124 int fd; 2125 char *path; 2126 struct stat *buf; 2127 int flag; 2128 } 2129 #endif 2130 int 2131 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2132 { 2133 struct stat sb; 2134 int error; 2135 2136 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2137 UIO_USERSPACE, &sb, NULL); 2138 if (error == 0) 2139 error = copyout(&sb, uap->buf, sizeof (sb)); 2140 return (error); 2141 } 2142 2143 int 2144 kern_statat(struct thread *td, int flag, int fd, char *path, 2145 enum uio_seg pathseg, struct stat *sbp, 2146 void (*hook)(struct vnode *vp, struct stat *sbp)) 2147 { 2148 struct nameidata nd; 2149 struct stat sb; 2150 cap_rights_t rights; 2151 int error; 2152 2153 if (flag & ~AT_SYMLINK_NOFOLLOW) 2154 return (EINVAL); 2155 2156 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2157 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2158 cap_rights_init(&rights, CAP_FSTAT), td); 2159 2160 if ((error = namei(&nd)) != 0) 2161 return (error); 2162 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2163 if (error == 0) { 2164 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2165 if (S_ISREG(sb.st_mode)) 2166 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2167 if (__predict_false(hook != NULL)) 2168 hook(nd.ni_vp, &sb); 2169 } 2170 NDFREE(&nd, NDF_ONLY_PNBUF); 2171 vput(nd.ni_vp); 2172 if (error != 0) 2173 return (error); 2174 *sbp = sb; 2175 #ifdef KTRACE 2176 if (KTRPOINT(td, KTR_STRUCT)) 2177 ktrstat(&sb); 2178 #endif 2179 return (0); 2180 } 2181 2182 /* 2183 * Get file status; this version does not follow links. 2184 */ 2185 #ifndef _SYS_SYSPROTO_H_ 2186 struct lstat_args { 2187 char *path; 2188 struct stat *ub; 2189 }; 2190 #endif 2191 int 2192 sys_lstat(td, uap) 2193 struct thread *td; 2194 register struct lstat_args /* { 2195 char *path; 2196 struct stat *ub; 2197 } */ *uap; 2198 { 2199 struct stat sb; 2200 int error; 2201 2202 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2203 UIO_USERSPACE, &sb, NULL); 2204 if (error == 0) 2205 error = copyout(&sb, uap->ub, sizeof (sb)); 2206 return (error); 2207 } 2208 2209 /* 2210 * Implementation of the NetBSD [l]stat() functions. 2211 */ 2212 void 2213 cvtnstat(sb, nsb) 2214 struct stat *sb; 2215 struct nstat *nsb; 2216 { 2217 2218 bzero(nsb, sizeof *nsb); 2219 nsb->st_dev = sb->st_dev; 2220 nsb->st_ino = sb->st_ino; 2221 nsb->st_mode = sb->st_mode; 2222 nsb->st_nlink = sb->st_nlink; 2223 nsb->st_uid = sb->st_uid; 2224 nsb->st_gid = sb->st_gid; 2225 nsb->st_rdev = sb->st_rdev; 2226 nsb->st_atim = sb->st_atim; 2227 nsb->st_mtim = sb->st_mtim; 2228 nsb->st_ctim = sb->st_ctim; 2229 nsb->st_size = sb->st_size; 2230 nsb->st_blocks = sb->st_blocks; 2231 nsb->st_blksize = sb->st_blksize; 2232 nsb->st_flags = sb->st_flags; 2233 nsb->st_gen = sb->st_gen; 2234 nsb->st_birthtim = sb->st_birthtim; 2235 } 2236 2237 #ifndef _SYS_SYSPROTO_H_ 2238 struct nstat_args { 2239 char *path; 2240 struct nstat *ub; 2241 }; 2242 #endif 2243 int 2244 sys_nstat(td, uap) 2245 struct thread *td; 2246 register struct nstat_args /* { 2247 char *path; 2248 struct nstat *ub; 2249 } */ *uap; 2250 { 2251 struct stat sb; 2252 struct nstat nsb; 2253 int error; 2254 2255 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2256 &sb, NULL); 2257 if (error != 0) 2258 return (error); 2259 cvtnstat(&sb, &nsb); 2260 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2261 } 2262 2263 /* 2264 * NetBSD lstat. Get file status; this version does not follow links. 2265 */ 2266 #ifndef _SYS_SYSPROTO_H_ 2267 struct lstat_args { 2268 char *path; 2269 struct stat *ub; 2270 }; 2271 #endif 2272 int 2273 sys_nlstat(td, uap) 2274 struct thread *td; 2275 register struct nlstat_args /* { 2276 char *path; 2277 struct nstat *ub; 2278 } */ *uap; 2279 { 2280 struct stat sb; 2281 struct nstat nsb; 2282 int error; 2283 2284 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2285 UIO_USERSPACE, &sb, NULL); 2286 if (error != 0) 2287 return (error); 2288 cvtnstat(&sb, &nsb); 2289 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2290 } 2291 2292 /* 2293 * Get configurable pathname variables. 2294 */ 2295 #ifndef _SYS_SYSPROTO_H_ 2296 struct pathconf_args { 2297 char *path; 2298 int name; 2299 }; 2300 #endif 2301 int 2302 sys_pathconf(td, uap) 2303 struct thread *td; 2304 register struct pathconf_args /* { 2305 char *path; 2306 int name; 2307 } */ *uap; 2308 { 2309 2310 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2311 } 2312 2313 #ifndef _SYS_SYSPROTO_H_ 2314 struct lpathconf_args { 2315 char *path; 2316 int name; 2317 }; 2318 #endif 2319 int 2320 sys_lpathconf(td, uap) 2321 struct thread *td; 2322 register struct lpathconf_args /* { 2323 char *path; 2324 int name; 2325 } */ *uap; 2326 { 2327 2328 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2329 NOFOLLOW)); 2330 } 2331 2332 int 2333 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2334 u_long flags) 2335 { 2336 struct nameidata nd; 2337 int error; 2338 2339 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2340 pathseg, path, td); 2341 if ((error = namei(&nd)) != 0) 2342 return (error); 2343 NDFREE(&nd, NDF_ONLY_PNBUF); 2344 2345 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2346 vput(nd.ni_vp); 2347 return (error); 2348 } 2349 2350 /* 2351 * Return target name of a symbolic link. 2352 */ 2353 #ifndef _SYS_SYSPROTO_H_ 2354 struct readlink_args { 2355 char *path; 2356 char *buf; 2357 size_t count; 2358 }; 2359 #endif 2360 int 2361 sys_readlink(td, uap) 2362 struct thread *td; 2363 register struct readlink_args /* { 2364 char *path; 2365 char *buf; 2366 size_t count; 2367 } */ *uap; 2368 { 2369 2370 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2371 uap->buf, UIO_USERSPACE, uap->count)); 2372 } 2373 #ifndef _SYS_SYSPROTO_H_ 2374 struct readlinkat_args { 2375 int fd; 2376 char *path; 2377 char *buf; 2378 size_t bufsize; 2379 }; 2380 #endif 2381 int 2382 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2383 { 2384 2385 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2386 uap->buf, UIO_USERSPACE, uap->bufsize)); 2387 } 2388 2389 int 2390 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2391 char *buf, enum uio_seg bufseg, size_t count) 2392 { 2393 struct vnode *vp; 2394 struct iovec aiov; 2395 struct uio auio; 2396 struct nameidata nd; 2397 int error; 2398 2399 if (count > IOSIZE_MAX) 2400 return (EINVAL); 2401 2402 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2403 pathseg, path, fd, td); 2404 2405 if ((error = namei(&nd)) != 0) 2406 return (error); 2407 NDFREE(&nd, NDF_ONLY_PNBUF); 2408 vp = nd.ni_vp; 2409 #ifdef MAC 2410 error = mac_vnode_check_readlink(td->td_ucred, vp); 2411 if (error != 0) { 2412 vput(vp); 2413 return (error); 2414 } 2415 #endif 2416 if (vp->v_type != VLNK) 2417 error = EINVAL; 2418 else { 2419 aiov.iov_base = buf; 2420 aiov.iov_len = count; 2421 auio.uio_iov = &aiov; 2422 auio.uio_iovcnt = 1; 2423 auio.uio_offset = 0; 2424 auio.uio_rw = UIO_READ; 2425 auio.uio_segflg = bufseg; 2426 auio.uio_td = td; 2427 auio.uio_resid = count; 2428 error = VOP_READLINK(vp, &auio, td->td_ucred); 2429 td->td_retval[0] = count - auio.uio_resid; 2430 } 2431 vput(vp); 2432 return (error); 2433 } 2434 2435 /* 2436 * Common implementation code for chflags() and fchflags(). 2437 */ 2438 static int 2439 setfflags(td, vp, flags) 2440 struct thread *td; 2441 struct vnode *vp; 2442 u_long flags; 2443 { 2444 struct mount *mp; 2445 struct vattr vattr; 2446 int error; 2447 2448 /* We can't support the value matching VNOVAL. */ 2449 if (flags == VNOVAL) 2450 return (EOPNOTSUPP); 2451 2452 /* 2453 * Prevent non-root users from setting flags on devices. When 2454 * a device is reused, users can retain ownership of the device 2455 * if they are allowed to set flags and programs assume that 2456 * chown can't fail when done as root. 2457 */ 2458 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2459 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2460 if (error != 0) 2461 return (error); 2462 } 2463 2464 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2465 return (error); 2466 VATTR_NULL(&vattr); 2467 vattr.va_flags = flags; 2468 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2469 #ifdef MAC 2470 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2471 if (error == 0) 2472 #endif 2473 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2474 VOP_UNLOCK(vp, 0); 2475 vn_finished_write(mp); 2476 return (error); 2477 } 2478 2479 /* 2480 * Change flags of a file given a path name. 2481 */ 2482 #ifndef _SYS_SYSPROTO_H_ 2483 struct chflags_args { 2484 const char *path; 2485 u_long flags; 2486 }; 2487 #endif 2488 int 2489 sys_chflags(td, uap) 2490 struct thread *td; 2491 register struct chflags_args /* { 2492 const char *path; 2493 u_long flags; 2494 } */ *uap; 2495 { 2496 2497 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2498 uap->flags, 0)); 2499 } 2500 2501 #ifndef _SYS_SYSPROTO_H_ 2502 struct chflagsat_args { 2503 int fd; 2504 const char *path; 2505 u_long flags; 2506 int atflag; 2507 } 2508 #endif 2509 int 2510 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2511 { 2512 int fd = uap->fd; 2513 const char *path = uap->path; 2514 u_long flags = uap->flags; 2515 int atflag = uap->atflag; 2516 2517 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2518 return (EINVAL); 2519 2520 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2521 } 2522 2523 /* 2524 * Same as chflags() but doesn't follow symlinks. 2525 */ 2526 int 2527 sys_lchflags(td, uap) 2528 struct thread *td; 2529 register struct lchflags_args /* { 2530 const char *path; 2531 u_long flags; 2532 } */ *uap; 2533 { 2534 2535 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2536 uap->flags, AT_SYMLINK_NOFOLLOW)); 2537 } 2538 2539 static int 2540 kern_chflagsat(struct thread *td, int fd, const char *path, 2541 enum uio_seg pathseg, u_long flags, int atflag) 2542 { 2543 struct nameidata nd; 2544 cap_rights_t rights; 2545 int error, follow; 2546 2547 AUDIT_ARG_FFLAGS(flags); 2548 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2549 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2550 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2551 if ((error = namei(&nd)) != 0) 2552 return (error); 2553 NDFREE(&nd, NDF_ONLY_PNBUF); 2554 error = setfflags(td, nd.ni_vp, flags); 2555 vrele(nd.ni_vp); 2556 return (error); 2557 } 2558 2559 /* 2560 * Change flags of a file given a file descriptor. 2561 */ 2562 #ifndef _SYS_SYSPROTO_H_ 2563 struct fchflags_args { 2564 int fd; 2565 u_long flags; 2566 }; 2567 #endif 2568 int 2569 sys_fchflags(td, uap) 2570 struct thread *td; 2571 register struct fchflags_args /* { 2572 int fd; 2573 u_long flags; 2574 } */ *uap; 2575 { 2576 struct file *fp; 2577 cap_rights_t rights; 2578 int error; 2579 2580 AUDIT_ARG_FD(uap->fd); 2581 AUDIT_ARG_FFLAGS(uap->flags); 2582 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2583 &fp); 2584 if (error != 0) 2585 return (error); 2586 #ifdef AUDIT 2587 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2588 AUDIT_ARG_VNODE1(fp->f_vnode); 2589 VOP_UNLOCK(fp->f_vnode, 0); 2590 #endif 2591 error = setfflags(td, fp->f_vnode, uap->flags); 2592 fdrop(fp, td); 2593 return (error); 2594 } 2595 2596 /* 2597 * Common implementation code for chmod(), lchmod() and fchmod(). 2598 */ 2599 int 2600 setfmode(td, cred, vp, mode) 2601 struct thread *td; 2602 struct ucred *cred; 2603 struct vnode *vp; 2604 int mode; 2605 { 2606 struct mount *mp; 2607 struct vattr vattr; 2608 int error; 2609 2610 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2611 return (error); 2612 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2613 VATTR_NULL(&vattr); 2614 vattr.va_mode = mode & ALLPERMS; 2615 #ifdef MAC 2616 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2617 if (error == 0) 2618 #endif 2619 error = VOP_SETATTR(vp, &vattr, cred); 2620 VOP_UNLOCK(vp, 0); 2621 vn_finished_write(mp); 2622 return (error); 2623 } 2624 2625 /* 2626 * Change mode of a file given path name. 2627 */ 2628 #ifndef _SYS_SYSPROTO_H_ 2629 struct chmod_args { 2630 char *path; 2631 int mode; 2632 }; 2633 #endif 2634 int 2635 sys_chmod(td, uap) 2636 struct thread *td; 2637 register struct chmod_args /* { 2638 char *path; 2639 int mode; 2640 } */ *uap; 2641 { 2642 2643 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2644 uap->mode, 0)); 2645 } 2646 2647 #ifndef _SYS_SYSPROTO_H_ 2648 struct fchmodat_args { 2649 int dirfd; 2650 char *path; 2651 mode_t mode; 2652 int flag; 2653 } 2654 #endif 2655 int 2656 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2657 { 2658 int flag = uap->flag; 2659 int fd = uap->fd; 2660 char *path = uap->path; 2661 mode_t mode = uap->mode; 2662 2663 if (flag & ~AT_SYMLINK_NOFOLLOW) 2664 return (EINVAL); 2665 2666 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2667 } 2668 2669 /* 2670 * Change mode of a file given path name (don't follow links.) 2671 */ 2672 #ifndef _SYS_SYSPROTO_H_ 2673 struct lchmod_args { 2674 char *path; 2675 int mode; 2676 }; 2677 #endif 2678 int 2679 sys_lchmod(td, uap) 2680 struct thread *td; 2681 register struct lchmod_args /* { 2682 char *path; 2683 int mode; 2684 } */ *uap; 2685 { 2686 2687 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2688 uap->mode, AT_SYMLINK_NOFOLLOW)); 2689 } 2690 2691 int 2692 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2693 mode_t mode, int flag) 2694 { 2695 struct nameidata nd; 2696 cap_rights_t rights; 2697 int error, follow; 2698 2699 AUDIT_ARG_MODE(mode); 2700 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2701 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2702 cap_rights_init(&rights, CAP_FCHMOD), td); 2703 if ((error = namei(&nd)) != 0) 2704 return (error); 2705 NDFREE(&nd, NDF_ONLY_PNBUF); 2706 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2707 vrele(nd.ni_vp); 2708 return (error); 2709 } 2710 2711 /* 2712 * Change mode of a file given a file descriptor. 2713 */ 2714 #ifndef _SYS_SYSPROTO_H_ 2715 struct fchmod_args { 2716 int fd; 2717 int mode; 2718 }; 2719 #endif 2720 int 2721 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2722 { 2723 struct file *fp; 2724 cap_rights_t rights; 2725 int error; 2726 2727 AUDIT_ARG_FD(uap->fd); 2728 AUDIT_ARG_MODE(uap->mode); 2729 2730 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2731 if (error != 0) 2732 return (error); 2733 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2734 fdrop(fp, td); 2735 return (error); 2736 } 2737 2738 /* 2739 * Common implementation for chown(), lchown(), and fchown() 2740 */ 2741 int 2742 setfown(td, cred, vp, uid, gid) 2743 struct thread *td; 2744 struct ucred *cred; 2745 struct vnode *vp; 2746 uid_t uid; 2747 gid_t gid; 2748 { 2749 struct mount *mp; 2750 struct vattr vattr; 2751 int error; 2752 2753 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2754 return (error); 2755 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2756 VATTR_NULL(&vattr); 2757 vattr.va_uid = uid; 2758 vattr.va_gid = gid; 2759 #ifdef MAC 2760 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2761 vattr.va_gid); 2762 if (error == 0) 2763 #endif 2764 error = VOP_SETATTR(vp, &vattr, cred); 2765 VOP_UNLOCK(vp, 0); 2766 vn_finished_write(mp); 2767 return (error); 2768 } 2769 2770 /* 2771 * Set ownership given a path name. 2772 */ 2773 #ifndef _SYS_SYSPROTO_H_ 2774 struct chown_args { 2775 char *path; 2776 int uid; 2777 int gid; 2778 }; 2779 #endif 2780 int 2781 sys_chown(td, uap) 2782 struct thread *td; 2783 register struct chown_args /* { 2784 char *path; 2785 int uid; 2786 int gid; 2787 } */ *uap; 2788 { 2789 2790 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2791 uap->gid, 0)); 2792 } 2793 2794 #ifndef _SYS_SYSPROTO_H_ 2795 struct fchownat_args { 2796 int fd; 2797 const char * path; 2798 uid_t uid; 2799 gid_t gid; 2800 int flag; 2801 }; 2802 #endif 2803 int 2804 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2805 { 2806 int flag; 2807 2808 flag = uap->flag; 2809 if (flag & ~AT_SYMLINK_NOFOLLOW) 2810 return (EINVAL); 2811 2812 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2813 uap->gid, uap->flag)); 2814 } 2815 2816 int 2817 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2818 int uid, int gid, int flag) 2819 { 2820 struct nameidata nd; 2821 cap_rights_t rights; 2822 int error, follow; 2823 2824 AUDIT_ARG_OWNER(uid, gid); 2825 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2826 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2827 cap_rights_init(&rights, CAP_FCHOWN), td); 2828 2829 if ((error = namei(&nd)) != 0) 2830 return (error); 2831 NDFREE(&nd, NDF_ONLY_PNBUF); 2832 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2833 vrele(nd.ni_vp); 2834 return (error); 2835 } 2836 2837 /* 2838 * Set ownership given a path name, do not cross symlinks. 2839 */ 2840 #ifndef _SYS_SYSPROTO_H_ 2841 struct lchown_args { 2842 char *path; 2843 int uid; 2844 int gid; 2845 }; 2846 #endif 2847 int 2848 sys_lchown(td, uap) 2849 struct thread *td; 2850 register struct lchown_args /* { 2851 char *path; 2852 int uid; 2853 int gid; 2854 } */ *uap; 2855 { 2856 2857 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2858 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2859 } 2860 2861 /* 2862 * Set ownership given a file descriptor. 2863 */ 2864 #ifndef _SYS_SYSPROTO_H_ 2865 struct fchown_args { 2866 int fd; 2867 int uid; 2868 int gid; 2869 }; 2870 #endif 2871 int 2872 sys_fchown(td, uap) 2873 struct thread *td; 2874 register struct fchown_args /* { 2875 int fd; 2876 int uid; 2877 int gid; 2878 } */ *uap; 2879 { 2880 struct file *fp; 2881 cap_rights_t rights; 2882 int error; 2883 2884 AUDIT_ARG_FD(uap->fd); 2885 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2886 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2887 if (error != 0) 2888 return (error); 2889 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2890 fdrop(fp, td); 2891 return (error); 2892 } 2893 2894 /* 2895 * Common implementation code for utimes(), lutimes(), and futimes(). 2896 */ 2897 static int 2898 getutimes(usrtvp, tvpseg, tsp) 2899 const struct timeval *usrtvp; 2900 enum uio_seg tvpseg; 2901 struct timespec *tsp; 2902 { 2903 struct timeval tv[2]; 2904 const struct timeval *tvp; 2905 int error; 2906 2907 if (usrtvp == NULL) { 2908 vfs_timestamp(&tsp[0]); 2909 tsp[1] = tsp[0]; 2910 } else { 2911 if (tvpseg == UIO_SYSSPACE) { 2912 tvp = usrtvp; 2913 } else { 2914 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2915 return (error); 2916 tvp = tv; 2917 } 2918 2919 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2920 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2921 return (EINVAL); 2922 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2923 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2924 } 2925 return (0); 2926 } 2927 2928 /* 2929 * Common implementation code for futimens(), utimensat(). 2930 */ 2931 #define UTIMENS_NULL 0x1 2932 #define UTIMENS_EXIT 0x2 2933 static int 2934 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2935 struct timespec *tsp, int *retflags) 2936 { 2937 struct timespec tsnow; 2938 int error; 2939 2940 vfs_timestamp(&tsnow); 2941 *retflags = 0; 2942 if (usrtsp == NULL) { 2943 tsp[0] = tsnow; 2944 tsp[1] = tsnow; 2945 *retflags |= UTIMENS_NULL; 2946 return (0); 2947 } 2948 if (tspseg == UIO_SYSSPACE) { 2949 tsp[0] = usrtsp[0]; 2950 tsp[1] = usrtsp[1]; 2951 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2952 return (error); 2953 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2954 *retflags |= UTIMENS_EXIT; 2955 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2956 *retflags |= UTIMENS_NULL; 2957 if (tsp[0].tv_nsec == UTIME_OMIT) 2958 tsp[0].tv_sec = VNOVAL; 2959 else if (tsp[0].tv_nsec == UTIME_NOW) 2960 tsp[0] = tsnow; 2961 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2962 return (EINVAL); 2963 if (tsp[1].tv_nsec == UTIME_OMIT) 2964 tsp[1].tv_sec = VNOVAL; 2965 else if (tsp[1].tv_nsec == UTIME_NOW) 2966 tsp[1] = tsnow; 2967 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2968 return (EINVAL); 2969 2970 return (0); 2971 } 2972 2973 /* 2974 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2975 * and utimensat(). 2976 */ 2977 static int 2978 setutimes(td, vp, ts, numtimes, nullflag) 2979 struct thread *td; 2980 struct vnode *vp; 2981 const struct timespec *ts; 2982 int numtimes; 2983 int nullflag; 2984 { 2985 struct mount *mp; 2986 struct vattr vattr; 2987 int error, setbirthtime; 2988 2989 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2990 return (error); 2991 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2992 setbirthtime = 0; 2993 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2994 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2995 setbirthtime = 1; 2996 VATTR_NULL(&vattr); 2997 vattr.va_atime = ts[0]; 2998 vattr.va_mtime = ts[1]; 2999 if (setbirthtime) 3000 vattr.va_birthtime = ts[1]; 3001 if (numtimes > 2) 3002 vattr.va_birthtime = ts[2]; 3003 if (nullflag) 3004 vattr.va_vaflags |= VA_UTIMES_NULL; 3005 #ifdef MAC 3006 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3007 vattr.va_mtime); 3008 #endif 3009 if (error == 0) 3010 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3011 VOP_UNLOCK(vp, 0); 3012 vn_finished_write(mp); 3013 return (error); 3014 } 3015 3016 /* 3017 * Set the access and modification times of a file. 3018 */ 3019 #ifndef _SYS_SYSPROTO_H_ 3020 struct utimes_args { 3021 char *path; 3022 struct timeval *tptr; 3023 }; 3024 #endif 3025 int 3026 sys_utimes(td, uap) 3027 struct thread *td; 3028 register struct utimes_args /* { 3029 char *path; 3030 struct timeval *tptr; 3031 } */ *uap; 3032 { 3033 3034 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3035 uap->tptr, UIO_USERSPACE)); 3036 } 3037 3038 #ifndef _SYS_SYSPROTO_H_ 3039 struct futimesat_args { 3040 int fd; 3041 const char * path; 3042 const struct timeval * times; 3043 }; 3044 #endif 3045 int 3046 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3047 { 3048 3049 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3050 uap->times, UIO_USERSPACE)); 3051 } 3052 3053 int 3054 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3055 struct timeval *tptr, enum uio_seg tptrseg) 3056 { 3057 struct nameidata nd; 3058 struct timespec ts[2]; 3059 cap_rights_t rights; 3060 int error; 3061 3062 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3063 return (error); 3064 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3065 cap_rights_init(&rights, CAP_FUTIMES), td); 3066 3067 if ((error = namei(&nd)) != 0) 3068 return (error); 3069 NDFREE(&nd, NDF_ONLY_PNBUF); 3070 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3071 vrele(nd.ni_vp); 3072 return (error); 3073 } 3074 3075 /* 3076 * Set the access and modification times of a file. 3077 */ 3078 #ifndef _SYS_SYSPROTO_H_ 3079 struct lutimes_args { 3080 char *path; 3081 struct timeval *tptr; 3082 }; 3083 #endif 3084 int 3085 sys_lutimes(td, uap) 3086 struct thread *td; 3087 register struct lutimes_args /* { 3088 char *path; 3089 struct timeval *tptr; 3090 } */ *uap; 3091 { 3092 3093 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3094 UIO_USERSPACE)); 3095 } 3096 3097 int 3098 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3099 struct timeval *tptr, enum uio_seg tptrseg) 3100 { 3101 struct timespec ts[2]; 3102 struct nameidata nd; 3103 int error; 3104 3105 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3106 return (error); 3107 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3108 if ((error = namei(&nd)) != 0) 3109 return (error); 3110 NDFREE(&nd, NDF_ONLY_PNBUF); 3111 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3112 vrele(nd.ni_vp); 3113 return (error); 3114 } 3115 3116 /* 3117 * Set the access and modification times of a file. 3118 */ 3119 #ifndef _SYS_SYSPROTO_H_ 3120 struct futimes_args { 3121 int fd; 3122 struct timeval *tptr; 3123 }; 3124 #endif 3125 int 3126 sys_futimes(td, uap) 3127 struct thread *td; 3128 register struct futimes_args /* { 3129 int fd; 3130 struct timeval *tptr; 3131 } */ *uap; 3132 { 3133 3134 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3135 } 3136 3137 int 3138 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3139 enum uio_seg tptrseg) 3140 { 3141 struct timespec ts[2]; 3142 struct file *fp; 3143 cap_rights_t rights; 3144 int error; 3145 3146 AUDIT_ARG_FD(fd); 3147 error = getutimes(tptr, tptrseg, ts); 3148 if (error != 0) 3149 return (error); 3150 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3151 if (error != 0) 3152 return (error); 3153 #ifdef AUDIT 3154 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3155 AUDIT_ARG_VNODE1(fp->f_vnode); 3156 VOP_UNLOCK(fp->f_vnode, 0); 3157 #endif 3158 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3159 fdrop(fp, td); 3160 return (error); 3161 } 3162 3163 int 3164 sys_futimens(struct thread *td, struct futimens_args *uap) 3165 { 3166 3167 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3168 } 3169 3170 int 3171 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3172 enum uio_seg tptrseg) 3173 { 3174 struct timespec ts[2]; 3175 struct file *fp; 3176 cap_rights_t rights; 3177 int error, flags; 3178 3179 AUDIT_ARG_FD(fd); 3180 error = getutimens(tptr, tptrseg, ts, &flags); 3181 if (error != 0) 3182 return (error); 3183 if (flags & UTIMENS_EXIT) 3184 return (0); 3185 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3186 if (error != 0) 3187 return (error); 3188 #ifdef AUDIT 3189 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3190 AUDIT_ARG_VNODE1(fp->f_vnode); 3191 VOP_UNLOCK(fp->f_vnode, 0); 3192 #endif 3193 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3194 fdrop(fp, td); 3195 return (error); 3196 } 3197 3198 int 3199 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3200 { 3201 3202 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3203 uap->times, UIO_USERSPACE, uap->flag)); 3204 } 3205 3206 int 3207 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3208 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3209 { 3210 struct nameidata nd; 3211 struct timespec ts[2]; 3212 cap_rights_t rights; 3213 int error, flags; 3214 3215 if (flag & ~AT_SYMLINK_NOFOLLOW) 3216 return (EINVAL); 3217 3218 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3219 return (error); 3220 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3221 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3222 cap_rights_init(&rights, CAP_FUTIMES), td); 3223 if ((error = namei(&nd)) != 0) 3224 return (error); 3225 /* 3226 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3227 * POSIX states: 3228 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3229 * "Search permission is denied by a component of the path prefix." 3230 */ 3231 NDFREE(&nd, NDF_ONLY_PNBUF); 3232 if ((flags & UTIMENS_EXIT) == 0) 3233 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3234 vrele(nd.ni_vp); 3235 return (error); 3236 } 3237 3238 /* 3239 * Truncate a file given its path name. 3240 */ 3241 #ifndef _SYS_SYSPROTO_H_ 3242 struct truncate_args { 3243 char *path; 3244 int pad; 3245 off_t length; 3246 }; 3247 #endif 3248 int 3249 sys_truncate(td, uap) 3250 struct thread *td; 3251 register struct truncate_args /* { 3252 char *path; 3253 int pad; 3254 off_t length; 3255 } */ *uap; 3256 { 3257 3258 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3259 } 3260 3261 int 3262 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3263 { 3264 struct mount *mp; 3265 struct vnode *vp; 3266 void *rl_cookie; 3267 struct vattr vattr; 3268 struct nameidata nd; 3269 int error; 3270 3271 if (length < 0) 3272 return(EINVAL); 3273 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3274 if ((error = namei(&nd)) != 0) 3275 return (error); 3276 vp = nd.ni_vp; 3277 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3278 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3279 vn_rangelock_unlock(vp, rl_cookie); 3280 vrele(vp); 3281 return (error); 3282 } 3283 NDFREE(&nd, NDF_ONLY_PNBUF); 3284 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3285 if (vp->v_type == VDIR) 3286 error = EISDIR; 3287 #ifdef MAC 3288 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3289 } 3290 #endif 3291 else if ((error = vn_writechk(vp)) == 0 && 3292 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3293 VATTR_NULL(&vattr); 3294 vattr.va_size = length; 3295 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3296 } 3297 VOP_UNLOCK(vp, 0); 3298 vn_finished_write(mp); 3299 vn_rangelock_unlock(vp, rl_cookie); 3300 vrele(vp); 3301 return (error); 3302 } 3303 3304 #if defined(COMPAT_43) 3305 /* 3306 * Truncate a file given its path name. 3307 */ 3308 #ifndef _SYS_SYSPROTO_H_ 3309 struct otruncate_args { 3310 char *path; 3311 long length; 3312 }; 3313 #endif 3314 int 3315 otruncate(td, uap) 3316 struct thread *td; 3317 register struct otruncate_args /* { 3318 char *path; 3319 long length; 3320 } */ *uap; 3321 { 3322 struct truncate_args /* { 3323 char *path; 3324 int pad; 3325 off_t length; 3326 } */ nuap; 3327 3328 nuap.path = uap->path; 3329 nuap.length = uap->length; 3330 return (sys_truncate(td, &nuap)); 3331 } 3332 #endif /* COMPAT_43 */ 3333 3334 #if defined(COMPAT_FREEBSD6) 3335 /* Versions with the pad argument */ 3336 int 3337 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3338 { 3339 struct truncate_args ouap; 3340 3341 ouap.path = uap->path; 3342 ouap.length = uap->length; 3343 return (sys_truncate(td, &ouap)); 3344 } 3345 3346 int 3347 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3348 { 3349 struct ftruncate_args ouap; 3350 3351 ouap.fd = uap->fd; 3352 ouap.length = uap->length; 3353 return (sys_ftruncate(td, &ouap)); 3354 } 3355 #endif 3356 3357 /* 3358 * Sync an open file. 3359 */ 3360 #ifndef _SYS_SYSPROTO_H_ 3361 struct fsync_args { 3362 int fd; 3363 }; 3364 #endif 3365 int 3366 sys_fsync(td, uap) 3367 struct thread *td; 3368 struct fsync_args /* { 3369 int fd; 3370 } */ *uap; 3371 { 3372 struct vnode *vp; 3373 struct mount *mp; 3374 struct file *fp; 3375 cap_rights_t rights; 3376 int error, lock_flags; 3377 3378 AUDIT_ARG_FD(uap->fd); 3379 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3380 if (error != 0) 3381 return (error); 3382 vp = fp->f_vnode; 3383 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3384 if (error != 0) 3385 goto drop; 3386 if (MNT_SHARED_WRITES(mp) || 3387 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3388 lock_flags = LK_SHARED; 3389 } else { 3390 lock_flags = LK_EXCLUSIVE; 3391 } 3392 vn_lock(vp, lock_flags | LK_RETRY); 3393 AUDIT_ARG_VNODE1(vp); 3394 if (vp->v_object != NULL) { 3395 VM_OBJECT_WLOCK(vp->v_object); 3396 vm_object_page_clean(vp->v_object, 0, 0, 0); 3397 VM_OBJECT_WUNLOCK(vp->v_object); 3398 } 3399 error = VOP_FSYNC(vp, MNT_WAIT, td); 3400 3401 VOP_UNLOCK(vp, 0); 3402 vn_finished_write(mp); 3403 drop: 3404 fdrop(fp, td); 3405 return (error); 3406 } 3407 3408 /* 3409 * Rename files. Source and destination must either both be directories, or 3410 * both not be directories. If target is a directory, it must be empty. 3411 */ 3412 #ifndef _SYS_SYSPROTO_H_ 3413 struct rename_args { 3414 char *from; 3415 char *to; 3416 }; 3417 #endif 3418 int 3419 sys_rename(td, uap) 3420 struct thread *td; 3421 register struct rename_args /* { 3422 char *from; 3423 char *to; 3424 } */ *uap; 3425 { 3426 3427 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3428 uap->to, UIO_USERSPACE)); 3429 } 3430 3431 #ifndef _SYS_SYSPROTO_H_ 3432 struct renameat_args { 3433 int oldfd; 3434 char *old; 3435 int newfd; 3436 char *new; 3437 }; 3438 #endif 3439 int 3440 sys_renameat(struct thread *td, struct renameat_args *uap) 3441 { 3442 3443 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3444 UIO_USERSPACE)); 3445 } 3446 3447 int 3448 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3449 enum uio_seg pathseg) 3450 { 3451 struct mount *mp = NULL; 3452 struct vnode *tvp, *fvp, *tdvp; 3453 struct nameidata fromnd, tond; 3454 cap_rights_t rights; 3455 int error; 3456 3457 again: 3458 bwillwrite(); 3459 #ifdef MAC 3460 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3461 AUDITVNODE1, pathseg, old, oldfd, 3462 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3463 #else 3464 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3465 pathseg, old, oldfd, 3466 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3467 #endif 3468 3469 if ((error = namei(&fromnd)) != 0) 3470 return (error); 3471 #ifdef MAC 3472 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3473 fromnd.ni_vp, &fromnd.ni_cnd); 3474 VOP_UNLOCK(fromnd.ni_dvp, 0); 3475 if (fromnd.ni_dvp != fromnd.ni_vp) 3476 VOP_UNLOCK(fromnd.ni_vp, 0); 3477 #endif 3478 fvp = fromnd.ni_vp; 3479 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3480 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3481 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3482 if (fromnd.ni_vp->v_type == VDIR) 3483 tond.ni_cnd.cn_flags |= WILLBEDIR; 3484 if ((error = namei(&tond)) != 0) { 3485 /* Translate error code for rename("dir1", "dir2/."). */ 3486 if (error == EISDIR && fvp->v_type == VDIR) 3487 error = EINVAL; 3488 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3489 vrele(fromnd.ni_dvp); 3490 vrele(fvp); 3491 goto out1; 3492 } 3493 tdvp = tond.ni_dvp; 3494 tvp = tond.ni_vp; 3495 error = vn_start_write(fvp, &mp, V_NOWAIT); 3496 if (error != 0) { 3497 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3498 NDFREE(&tond, NDF_ONLY_PNBUF); 3499 if (tvp != NULL) 3500 vput(tvp); 3501 if (tdvp == tvp) 3502 vrele(tdvp); 3503 else 3504 vput(tdvp); 3505 vrele(fromnd.ni_dvp); 3506 vrele(fvp); 3507 vrele(tond.ni_startdir); 3508 if (fromnd.ni_startdir != NULL) 3509 vrele(fromnd.ni_startdir); 3510 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3511 if (error != 0) 3512 return (error); 3513 goto again; 3514 } 3515 if (tvp != NULL) { 3516 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3517 error = ENOTDIR; 3518 goto out; 3519 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3520 error = EISDIR; 3521 goto out; 3522 } 3523 #ifdef CAPABILITIES 3524 if (newfd != AT_FDCWD) { 3525 /* 3526 * If the target already exists we require CAP_UNLINKAT 3527 * from 'newfd'. 3528 */ 3529 error = cap_check(&tond.ni_filecaps.fc_rights, 3530 cap_rights_init(&rights, CAP_UNLINKAT)); 3531 if (error != 0) 3532 goto out; 3533 } 3534 #endif 3535 } 3536 if (fvp == tdvp) { 3537 error = EINVAL; 3538 goto out; 3539 } 3540 /* 3541 * If the source is the same as the destination (that is, if they 3542 * are links to the same vnode), then there is nothing to do. 3543 */ 3544 if (fvp == tvp) 3545 error = -1; 3546 #ifdef MAC 3547 else 3548 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3549 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3550 #endif 3551 out: 3552 if (error == 0) { 3553 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3554 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3555 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3556 NDFREE(&tond, NDF_ONLY_PNBUF); 3557 } else { 3558 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3559 NDFREE(&tond, NDF_ONLY_PNBUF); 3560 if (tvp != NULL) 3561 vput(tvp); 3562 if (tdvp == tvp) 3563 vrele(tdvp); 3564 else 3565 vput(tdvp); 3566 vrele(fromnd.ni_dvp); 3567 vrele(fvp); 3568 } 3569 vrele(tond.ni_startdir); 3570 vn_finished_write(mp); 3571 out1: 3572 if (fromnd.ni_startdir) 3573 vrele(fromnd.ni_startdir); 3574 if (error == -1) 3575 return (0); 3576 return (error); 3577 } 3578 3579 /* 3580 * Make a directory file. 3581 */ 3582 #ifndef _SYS_SYSPROTO_H_ 3583 struct mkdir_args { 3584 char *path; 3585 int mode; 3586 }; 3587 #endif 3588 int 3589 sys_mkdir(td, uap) 3590 struct thread *td; 3591 register struct mkdir_args /* { 3592 char *path; 3593 int mode; 3594 } */ *uap; 3595 { 3596 3597 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3598 uap->mode)); 3599 } 3600 3601 #ifndef _SYS_SYSPROTO_H_ 3602 struct mkdirat_args { 3603 int fd; 3604 char *path; 3605 mode_t mode; 3606 }; 3607 #endif 3608 int 3609 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3610 { 3611 3612 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3613 } 3614 3615 int 3616 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3617 int mode) 3618 { 3619 struct mount *mp; 3620 struct vnode *vp; 3621 struct vattr vattr; 3622 struct nameidata nd; 3623 cap_rights_t rights; 3624 int error; 3625 3626 AUDIT_ARG_MODE(mode); 3627 restart: 3628 bwillwrite(); 3629 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3630 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3631 td); 3632 nd.ni_cnd.cn_flags |= WILLBEDIR; 3633 if ((error = namei(&nd)) != 0) 3634 return (error); 3635 vp = nd.ni_vp; 3636 if (vp != NULL) { 3637 NDFREE(&nd, NDF_ONLY_PNBUF); 3638 /* 3639 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3640 * the strange behaviour of leaving the vnode unlocked 3641 * if the target is the same vnode as the parent. 3642 */ 3643 if (vp == nd.ni_dvp) 3644 vrele(nd.ni_dvp); 3645 else 3646 vput(nd.ni_dvp); 3647 vrele(vp); 3648 return (EEXIST); 3649 } 3650 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3651 NDFREE(&nd, NDF_ONLY_PNBUF); 3652 vput(nd.ni_dvp); 3653 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3654 return (error); 3655 goto restart; 3656 } 3657 VATTR_NULL(&vattr); 3658 vattr.va_type = VDIR; 3659 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3660 #ifdef MAC 3661 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3662 &vattr); 3663 if (error != 0) 3664 goto out; 3665 #endif 3666 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3667 #ifdef MAC 3668 out: 3669 #endif 3670 NDFREE(&nd, NDF_ONLY_PNBUF); 3671 vput(nd.ni_dvp); 3672 if (error == 0) 3673 vput(nd.ni_vp); 3674 vn_finished_write(mp); 3675 return (error); 3676 } 3677 3678 /* 3679 * Remove a directory file. 3680 */ 3681 #ifndef _SYS_SYSPROTO_H_ 3682 struct rmdir_args { 3683 char *path; 3684 }; 3685 #endif 3686 int 3687 sys_rmdir(td, uap) 3688 struct thread *td; 3689 struct rmdir_args /* { 3690 char *path; 3691 } */ *uap; 3692 { 3693 3694 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3695 } 3696 3697 int 3698 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3699 { 3700 struct mount *mp; 3701 struct vnode *vp; 3702 struct nameidata nd; 3703 cap_rights_t rights; 3704 int error; 3705 3706 restart: 3707 bwillwrite(); 3708 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3709 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3710 if ((error = namei(&nd)) != 0) 3711 return (error); 3712 vp = nd.ni_vp; 3713 if (vp->v_type != VDIR) { 3714 error = ENOTDIR; 3715 goto out; 3716 } 3717 /* 3718 * No rmdir "." please. 3719 */ 3720 if (nd.ni_dvp == vp) { 3721 error = EINVAL; 3722 goto out; 3723 } 3724 /* 3725 * The root of a mounted filesystem cannot be deleted. 3726 */ 3727 if (vp->v_vflag & VV_ROOT) { 3728 error = EBUSY; 3729 goto out; 3730 } 3731 #ifdef MAC 3732 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3733 &nd.ni_cnd); 3734 if (error != 0) 3735 goto out; 3736 #endif 3737 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3738 NDFREE(&nd, NDF_ONLY_PNBUF); 3739 vput(vp); 3740 if (nd.ni_dvp == vp) 3741 vrele(nd.ni_dvp); 3742 else 3743 vput(nd.ni_dvp); 3744 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3745 return (error); 3746 goto restart; 3747 } 3748 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3749 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3750 vn_finished_write(mp); 3751 out: 3752 NDFREE(&nd, NDF_ONLY_PNBUF); 3753 vput(vp); 3754 if (nd.ni_dvp == vp) 3755 vrele(nd.ni_dvp); 3756 else 3757 vput(nd.ni_dvp); 3758 return (error); 3759 } 3760 3761 #ifdef COMPAT_43 3762 /* 3763 * Read a block of directory entries in a filesystem independent format. 3764 */ 3765 #ifndef _SYS_SYSPROTO_H_ 3766 struct ogetdirentries_args { 3767 int fd; 3768 char *buf; 3769 u_int count; 3770 long *basep; 3771 }; 3772 #endif 3773 int 3774 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3775 { 3776 long loff; 3777 int error; 3778 3779 error = kern_ogetdirentries(td, uap, &loff); 3780 if (error == 0) 3781 error = copyout(&loff, uap->basep, sizeof(long)); 3782 return (error); 3783 } 3784 3785 int 3786 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3787 long *ploff) 3788 { 3789 struct vnode *vp; 3790 struct file *fp; 3791 struct uio auio, kuio; 3792 struct iovec aiov, kiov; 3793 struct dirent *dp, *edp; 3794 cap_rights_t rights; 3795 caddr_t dirbuf; 3796 int error, eofflag, readcnt; 3797 long loff; 3798 off_t foffset; 3799 3800 /* XXX arbitrary sanity limit on `count'. */ 3801 if (uap->count > 64 * 1024) 3802 return (EINVAL); 3803 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3804 if (error != 0) 3805 return (error); 3806 if ((fp->f_flag & FREAD) == 0) { 3807 fdrop(fp, td); 3808 return (EBADF); 3809 } 3810 vp = fp->f_vnode; 3811 foffset = foffset_lock(fp, 0); 3812 unionread: 3813 if (vp->v_type != VDIR) { 3814 foffset_unlock(fp, foffset, 0); 3815 fdrop(fp, td); 3816 return (EINVAL); 3817 } 3818 aiov.iov_base = uap->buf; 3819 aiov.iov_len = uap->count; 3820 auio.uio_iov = &aiov; 3821 auio.uio_iovcnt = 1; 3822 auio.uio_rw = UIO_READ; 3823 auio.uio_segflg = UIO_USERSPACE; 3824 auio.uio_td = td; 3825 auio.uio_resid = uap->count; 3826 vn_lock(vp, LK_SHARED | LK_RETRY); 3827 loff = auio.uio_offset = foffset; 3828 #ifdef MAC 3829 error = mac_vnode_check_readdir(td->td_ucred, vp); 3830 if (error != 0) { 3831 VOP_UNLOCK(vp, 0); 3832 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3833 fdrop(fp, td); 3834 return (error); 3835 } 3836 #endif 3837 # if (BYTE_ORDER != LITTLE_ENDIAN) 3838 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3839 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3840 NULL, NULL); 3841 foffset = auio.uio_offset; 3842 } else 3843 # endif 3844 { 3845 kuio = auio; 3846 kuio.uio_iov = &kiov; 3847 kuio.uio_segflg = UIO_SYSSPACE; 3848 kiov.iov_len = uap->count; 3849 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3850 kiov.iov_base = dirbuf; 3851 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3852 NULL, NULL); 3853 foffset = kuio.uio_offset; 3854 if (error == 0) { 3855 readcnt = uap->count - kuio.uio_resid; 3856 edp = (struct dirent *)&dirbuf[readcnt]; 3857 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3858 # if (BYTE_ORDER == LITTLE_ENDIAN) 3859 /* 3860 * The expected low byte of 3861 * dp->d_namlen is our dp->d_type. 3862 * The high MBZ byte of dp->d_namlen 3863 * is our dp->d_namlen. 3864 */ 3865 dp->d_type = dp->d_namlen; 3866 dp->d_namlen = 0; 3867 # else 3868 /* 3869 * The dp->d_type is the high byte 3870 * of the expected dp->d_namlen, 3871 * so must be zero'ed. 3872 */ 3873 dp->d_type = 0; 3874 # endif 3875 if (dp->d_reclen > 0) { 3876 dp = (struct dirent *) 3877 ((char *)dp + dp->d_reclen); 3878 } else { 3879 error = EIO; 3880 break; 3881 } 3882 } 3883 if (dp >= edp) 3884 error = uiomove(dirbuf, readcnt, &auio); 3885 } 3886 free(dirbuf, M_TEMP); 3887 } 3888 if (error != 0) { 3889 VOP_UNLOCK(vp, 0); 3890 foffset_unlock(fp, foffset, 0); 3891 fdrop(fp, td); 3892 return (error); 3893 } 3894 if (uap->count == auio.uio_resid && 3895 (vp->v_vflag & VV_ROOT) && 3896 (vp->v_mount->mnt_flag & MNT_UNION)) { 3897 struct vnode *tvp = vp; 3898 vp = vp->v_mount->mnt_vnodecovered; 3899 VREF(vp); 3900 fp->f_vnode = vp; 3901 fp->f_data = vp; 3902 foffset = 0; 3903 vput(tvp); 3904 goto unionread; 3905 } 3906 VOP_UNLOCK(vp, 0); 3907 foffset_unlock(fp, foffset, 0); 3908 fdrop(fp, td); 3909 td->td_retval[0] = uap->count - auio.uio_resid; 3910 if (error == 0) 3911 *ploff = loff; 3912 return (error); 3913 } 3914 #endif /* COMPAT_43 */ 3915 3916 /* 3917 * Read a block of directory entries in a filesystem independent format. 3918 */ 3919 #ifndef _SYS_SYSPROTO_H_ 3920 struct getdirentries_args { 3921 int fd; 3922 char *buf; 3923 u_int count; 3924 long *basep; 3925 }; 3926 #endif 3927 int 3928 sys_getdirentries(td, uap) 3929 struct thread *td; 3930 register struct getdirentries_args /* { 3931 int fd; 3932 char *buf; 3933 u_int count; 3934 long *basep; 3935 } */ *uap; 3936 { 3937 long base; 3938 int error; 3939 3940 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3941 NULL, UIO_USERSPACE); 3942 if (error != 0) 3943 return (error); 3944 if (uap->basep != NULL) 3945 error = copyout(&base, uap->basep, sizeof(long)); 3946 return (error); 3947 } 3948 3949 int 3950 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3951 long *basep, ssize_t *residp, enum uio_seg bufseg) 3952 { 3953 struct vnode *vp; 3954 struct file *fp; 3955 struct uio auio; 3956 struct iovec aiov; 3957 cap_rights_t rights; 3958 long loff; 3959 int error, eofflag; 3960 off_t foffset; 3961 3962 AUDIT_ARG_FD(fd); 3963 if (count > IOSIZE_MAX) 3964 return (EINVAL); 3965 auio.uio_resid = count; 3966 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3967 if (error != 0) 3968 return (error); 3969 if ((fp->f_flag & FREAD) == 0) { 3970 fdrop(fp, td); 3971 return (EBADF); 3972 } 3973 vp = fp->f_vnode; 3974 foffset = foffset_lock(fp, 0); 3975 unionread: 3976 if (vp->v_type != VDIR) { 3977 error = EINVAL; 3978 goto fail; 3979 } 3980 aiov.iov_base = buf; 3981 aiov.iov_len = count; 3982 auio.uio_iov = &aiov; 3983 auio.uio_iovcnt = 1; 3984 auio.uio_rw = UIO_READ; 3985 auio.uio_segflg = bufseg; 3986 auio.uio_td = td; 3987 vn_lock(vp, LK_SHARED | LK_RETRY); 3988 AUDIT_ARG_VNODE1(vp); 3989 loff = auio.uio_offset = foffset; 3990 #ifdef MAC 3991 error = mac_vnode_check_readdir(td->td_ucred, vp); 3992 if (error == 0) 3993 #endif 3994 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3995 NULL); 3996 foffset = auio.uio_offset; 3997 if (error != 0) { 3998 VOP_UNLOCK(vp, 0); 3999 goto fail; 4000 } 4001 if (count == auio.uio_resid && 4002 (vp->v_vflag & VV_ROOT) && 4003 (vp->v_mount->mnt_flag & MNT_UNION)) { 4004 struct vnode *tvp = vp; 4005 4006 vp = vp->v_mount->mnt_vnodecovered; 4007 VREF(vp); 4008 fp->f_vnode = vp; 4009 fp->f_data = vp; 4010 foffset = 0; 4011 vput(tvp); 4012 goto unionread; 4013 } 4014 VOP_UNLOCK(vp, 0); 4015 *basep = loff; 4016 if (residp != NULL) 4017 *residp = auio.uio_resid; 4018 td->td_retval[0] = count - auio.uio_resid; 4019 fail: 4020 foffset_unlock(fp, foffset, 0); 4021 fdrop(fp, td); 4022 return (error); 4023 } 4024 4025 #ifndef _SYS_SYSPROTO_H_ 4026 struct getdents_args { 4027 int fd; 4028 char *buf; 4029 size_t count; 4030 }; 4031 #endif 4032 int 4033 sys_getdents(td, uap) 4034 struct thread *td; 4035 register struct getdents_args /* { 4036 int fd; 4037 char *buf; 4038 u_int count; 4039 } */ *uap; 4040 { 4041 struct getdirentries_args ap; 4042 4043 ap.fd = uap->fd; 4044 ap.buf = uap->buf; 4045 ap.count = uap->count; 4046 ap.basep = NULL; 4047 return (sys_getdirentries(td, &ap)); 4048 } 4049 4050 /* 4051 * Set the mode mask for creation of filesystem nodes. 4052 */ 4053 #ifndef _SYS_SYSPROTO_H_ 4054 struct umask_args { 4055 int newmask; 4056 }; 4057 #endif 4058 int 4059 sys_umask(td, uap) 4060 struct thread *td; 4061 struct umask_args /* { 4062 int newmask; 4063 } */ *uap; 4064 { 4065 struct filedesc *fdp; 4066 4067 fdp = td->td_proc->p_fd; 4068 FILEDESC_XLOCK(fdp); 4069 td->td_retval[0] = fdp->fd_cmask; 4070 fdp->fd_cmask = uap->newmask & ALLPERMS; 4071 FILEDESC_XUNLOCK(fdp); 4072 return (0); 4073 } 4074 4075 /* 4076 * Void all references to file by ripping underlying filesystem away from 4077 * vnode. 4078 */ 4079 #ifndef _SYS_SYSPROTO_H_ 4080 struct revoke_args { 4081 char *path; 4082 }; 4083 #endif 4084 int 4085 sys_revoke(td, uap) 4086 struct thread *td; 4087 register struct revoke_args /* { 4088 char *path; 4089 } */ *uap; 4090 { 4091 struct vnode *vp; 4092 struct vattr vattr; 4093 struct nameidata nd; 4094 int error; 4095 4096 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4097 uap->path, td); 4098 if ((error = namei(&nd)) != 0) 4099 return (error); 4100 vp = nd.ni_vp; 4101 NDFREE(&nd, NDF_ONLY_PNBUF); 4102 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4103 error = EINVAL; 4104 goto out; 4105 } 4106 #ifdef MAC 4107 error = mac_vnode_check_revoke(td->td_ucred, vp); 4108 if (error != 0) 4109 goto out; 4110 #endif 4111 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4112 if (error != 0) 4113 goto out; 4114 if (td->td_ucred->cr_uid != vattr.va_uid) { 4115 error = priv_check(td, PRIV_VFS_ADMIN); 4116 if (error != 0) 4117 goto out; 4118 } 4119 if (vcount(vp) > 1) 4120 VOP_REVOKE(vp, REVOKEALL); 4121 out: 4122 vput(vp); 4123 return (error); 4124 } 4125 4126 /* 4127 * Convert a user file descriptor to a kernel file entry and check that, if it 4128 * is a capability, the correct rights are present. A reference on the file 4129 * entry is held upon returning. 4130 */ 4131 int 4132 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4133 { 4134 struct file *fp; 4135 int error; 4136 4137 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4138 if (error != 0) 4139 return (error); 4140 4141 /* 4142 * The file could be not of the vnode type, or it may be not 4143 * yet fully initialized, in which case the f_vnode pointer 4144 * may be set, but f_ops is still badfileops. E.g., 4145 * devfs_open() transiently create such situation to 4146 * facilitate csw d_fdopen(). 4147 * 4148 * Dupfdopen() handling in kern_openat() installs the 4149 * half-baked file into the process descriptor table, allowing 4150 * other thread to dereference it. Guard against the race by 4151 * checking f_ops. 4152 */ 4153 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4154 fdrop(fp, td); 4155 return (EINVAL); 4156 } 4157 *fpp = fp; 4158 return (0); 4159 } 4160 4161 4162 /* 4163 * Get an (NFS) file handle. 4164 */ 4165 #ifndef _SYS_SYSPROTO_H_ 4166 struct lgetfh_args { 4167 char *fname; 4168 fhandle_t *fhp; 4169 }; 4170 #endif 4171 int 4172 sys_lgetfh(td, uap) 4173 struct thread *td; 4174 register struct lgetfh_args *uap; 4175 { 4176 struct nameidata nd; 4177 fhandle_t fh; 4178 register struct vnode *vp; 4179 int error; 4180 4181 error = priv_check(td, PRIV_VFS_GETFH); 4182 if (error != 0) 4183 return (error); 4184 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4185 uap->fname, td); 4186 error = namei(&nd); 4187 if (error != 0) 4188 return (error); 4189 NDFREE(&nd, NDF_ONLY_PNBUF); 4190 vp = nd.ni_vp; 4191 bzero(&fh, sizeof(fh)); 4192 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4193 error = VOP_VPTOFH(vp, &fh.fh_fid); 4194 vput(vp); 4195 if (error == 0) 4196 error = copyout(&fh, uap->fhp, sizeof (fh)); 4197 return (error); 4198 } 4199 4200 #ifndef _SYS_SYSPROTO_H_ 4201 struct getfh_args { 4202 char *fname; 4203 fhandle_t *fhp; 4204 }; 4205 #endif 4206 int 4207 sys_getfh(td, uap) 4208 struct thread *td; 4209 register struct getfh_args *uap; 4210 { 4211 struct nameidata nd; 4212 fhandle_t fh; 4213 register struct vnode *vp; 4214 int error; 4215 4216 error = priv_check(td, PRIV_VFS_GETFH); 4217 if (error != 0) 4218 return (error); 4219 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4220 uap->fname, td); 4221 error = namei(&nd); 4222 if (error != 0) 4223 return (error); 4224 NDFREE(&nd, NDF_ONLY_PNBUF); 4225 vp = nd.ni_vp; 4226 bzero(&fh, sizeof(fh)); 4227 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4228 error = VOP_VPTOFH(vp, &fh.fh_fid); 4229 vput(vp); 4230 if (error == 0) 4231 error = copyout(&fh, uap->fhp, sizeof (fh)); 4232 return (error); 4233 } 4234 4235 /* 4236 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4237 * open descriptor. 4238 * 4239 * warning: do not remove the priv_check() call or this becomes one giant 4240 * security hole. 4241 */ 4242 #ifndef _SYS_SYSPROTO_H_ 4243 struct fhopen_args { 4244 const struct fhandle *u_fhp; 4245 int flags; 4246 }; 4247 #endif 4248 int 4249 sys_fhopen(td, uap) 4250 struct thread *td; 4251 struct fhopen_args /* { 4252 const struct fhandle *u_fhp; 4253 int flags; 4254 } */ *uap; 4255 { 4256 struct mount *mp; 4257 struct vnode *vp; 4258 struct fhandle fhp; 4259 struct file *fp; 4260 int fmode, error; 4261 int indx; 4262 4263 error = priv_check(td, PRIV_VFS_FHOPEN); 4264 if (error != 0) 4265 return (error); 4266 indx = -1; 4267 fmode = FFLAGS(uap->flags); 4268 /* why not allow a non-read/write open for our lockd? */ 4269 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4270 return (EINVAL); 4271 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4272 if (error != 0) 4273 return(error); 4274 /* find the mount point */ 4275 mp = vfs_busyfs(&fhp.fh_fsid); 4276 if (mp == NULL) 4277 return (ESTALE); 4278 /* now give me my vnode, it gets returned to me locked */ 4279 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4280 vfs_unbusy(mp); 4281 if (error != 0) 4282 return (error); 4283 4284 error = falloc_noinstall(td, &fp); 4285 if (error != 0) { 4286 vput(vp); 4287 return (error); 4288 } 4289 /* 4290 * An extra reference on `fp' has been held for us by 4291 * falloc_noinstall(). 4292 */ 4293 4294 #ifdef INVARIANTS 4295 td->td_dupfd = -1; 4296 #endif 4297 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4298 if (error != 0) { 4299 KASSERT(fp->f_ops == &badfileops, 4300 ("VOP_OPEN in fhopen() set f_ops")); 4301 KASSERT(td->td_dupfd < 0, 4302 ("fhopen() encountered fdopen()")); 4303 4304 vput(vp); 4305 goto bad; 4306 } 4307 #ifdef INVARIANTS 4308 td->td_dupfd = 0; 4309 #endif 4310 fp->f_vnode = vp; 4311 fp->f_seqcount = 1; 4312 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4313 &vnops); 4314 VOP_UNLOCK(vp, 0); 4315 if ((fmode & O_TRUNC) != 0) { 4316 error = fo_truncate(fp, 0, td->td_ucred, td); 4317 if (error != 0) 4318 goto bad; 4319 } 4320 4321 error = finstall(td, fp, &indx, fmode, NULL); 4322 bad: 4323 fdrop(fp, td); 4324 td->td_retval[0] = indx; 4325 return (error); 4326 } 4327 4328 /* 4329 * Stat an (NFS) file handle. 4330 */ 4331 #ifndef _SYS_SYSPROTO_H_ 4332 struct fhstat_args { 4333 struct fhandle *u_fhp; 4334 struct stat *sb; 4335 }; 4336 #endif 4337 int 4338 sys_fhstat(td, uap) 4339 struct thread *td; 4340 register struct fhstat_args /* { 4341 struct fhandle *u_fhp; 4342 struct stat *sb; 4343 } */ *uap; 4344 { 4345 struct stat sb; 4346 struct fhandle fh; 4347 int error; 4348 4349 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4350 if (error != 0) 4351 return (error); 4352 error = kern_fhstat(td, fh, &sb); 4353 if (error == 0) 4354 error = copyout(&sb, uap->sb, sizeof(sb)); 4355 return (error); 4356 } 4357 4358 int 4359 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4360 { 4361 struct mount *mp; 4362 struct vnode *vp; 4363 int error; 4364 4365 error = priv_check(td, PRIV_VFS_FHSTAT); 4366 if (error != 0) 4367 return (error); 4368 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4369 return (ESTALE); 4370 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4371 vfs_unbusy(mp); 4372 if (error != 0) 4373 return (error); 4374 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4375 vput(vp); 4376 return (error); 4377 } 4378 4379 /* 4380 * Implement fstatfs() for (NFS) file handles. 4381 */ 4382 #ifndef _SYS_SYSPROTO_H_ 4383 struct fhstatfs_args { 4384 struct fhandle *u_fhp; 4385 struct statfs *buf; 4386 }; 4387 #endif 4388 int 4389 sys_fhstatfs(td, uap) 4390 struct thread *td; 4391 struct fhstatfs_args /* { 4392 struct fhandle *u_fhp; 4393 struct statfs *buf; 4394 } */ *uap; 4395 { 4396 struct statfs sf; 4397 fhandle_t fh; 4398 int error; 4399 4400 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4401 if (error != 0) 4402 return (error); 4403 error = kern_fhstatfs(td, fh, &sf); 4404 if (error != 0) 4405 return (error); 4406 return (copyout(&sf, uap->buf, sizeof(sf))); 4407 } 4408 4409 int 4410 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4411 { 4412 struct statfs *sp; 4413 struct mount *mp; 4414 struct vnode *vp; 4415 int error; 4416 4417 error = priv_check(td, PRIV_VFS_FHSTATFS); 4418 if (error != 0) 4419 return (error); 4420 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4421 return (ESTALE); 4422 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4423 if (error != 0) { 4424 vfs_unbusy(mp); 4425 return (error); 4426 } 4427 vput(vp); 4428 error = prison_canseemount(td->td_ucred, mp); 4429 if (error != 0) 4430 goto out; 4431 #ifdef MAC 4432 error = mac_mount_check_stat(td->td_ucred, mp); 4433 if (error != 0) 4434 goto out; 4435 #endif 4436 /* 4437 * Set these in case the underlying filesystem fails to do so. 4438 */ 4439 sp = &mp->mnt_stat; 4440 sp->f_version = STATFS_VERSION; 4441 sp->f_namemax = NAME_MAX; 4442 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4443 error = VFS_STATFS(mp, sp); 4444 if (error == 0) 4445 *buf = *sp; 4446 out: 4447 vfs_unbusy(mp); 4448 return (error); 4449 } 4450 4451 int 4452 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4453 { 4454 struct file *fp; 4455 struct mount *mp; 4456 struct vnode *vp; 4457 cap_rights_t rights; 4458 off_t olen, ooffset; 4459 int error; 4460 4461 if (offset < 0 || len <= 0) 4462 return (EINVAL); 4463 /* Check for wrap. */ 4464 if (offset > OFF_MAX - len) 4465 return (EFBIG); 4466 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4467 if (error != 0) 4468 return (error); 4469 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4470 error = ESPIPE; 4471 goto out; 4472 } 4473 if ((fp->f_flag & FWRITE) == 0) { 4474 error = EBADF; 4475 goto out; 4476 } 4477 if (fp->f_type != DTYPE_VNODE) { 4478 error = ENODEV; 4479 goto out; 4480 } 4481 vp = fp->f_vnode; 4482 if (vp->v_type != VREG) { 4483 error = ENODEV; 4484 goto out; 4485 } 4486 4487 /* Allocating blocks may take a long time, so iterate. */ 4488 for (;;) { 4489 olen = len; 4490 ooffset = offset; 4491 4492 bwillwrite(); 4493 mp = NULL; 4494 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4495 if (error != 0) 4496 break; 4497 error = vn_lock(vp, LK_EXCLUSIVE); 4498 if (error != 0) { 4499 vn_finished_write(mp); 4500 break; 4501 } 4502 #ifdef MAC 4503 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4504 if (error == 0) 4505 #endif 4506 error = VOP_ALLOCATE(vp, &offset, &len); 4507 VOP_UNLOCK(vp, 0); 4508 vn_finished_write(mp); 4509 4510 if (olen + ooffset != offset + len) { 4511 panic("offset + len changed from %jx/%jx to %jx/%jx", 4512 ooffset, olen, offset, len); 4513 } 4514 if (error != 0 || len == 0) 4515 break; 4516 KASSERT(olen > len, ("Iteration did not make progress?")); 4517 maybe_yield(); 4518 } 4519 out: 4520 fdrop(fp, td); 4521 return (error); 4522 } 4523 4524 int 4525 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4526 { 4527 int error; 4528 4529 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4530 return (kern_posix_error(td, error)); 4531 } 4532 4533 /* 4534 * Unlike madvise(2), we do not make a best effort to remember every 4535 * possible caching hint. Instead, we remember the last setting with 4536 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4537 * region of any current setting. 4538 */ 4539 int 4540 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4541 int advice) 4542 { 4543 struct fadvise_info *fa, *new; 4544 struct file *fp; 4545 struct vnode *vp; 4546 cap_rights_t rights; 4547 off_t end; 4548 int error; 4549 4550 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4551 return (EINVAL); 4552 switch (advice) { 4553 case POSIX_FADV_SEQUENTIAL: 4554 case POSIX_FADV_RANDOM: 4555 case POSIX_FADV_NOREUSE: 4556 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4557 break; 4558 case POSIX_FADV_NORMAL: 4559 case POSIX_FADV_WILLNEED: 4560 case POSIX_FADV_DONTNEED: 4561 new = NULL; 4562 break; 4563 default: 4564 return (EINVAL); 4565 } 4566 /* XXX: CAP_POSIX_FADVISE? */ 4567 error = fget(td, fd, cap_rights_init(&rights), &fp); 4568 if (error != 0) 4569 goto out; 4570 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4571 error = ESPIPE; 4572 goto out; 4573 } 4574 if (fp->f_type != DTYPE_VNODE) { 4575 error = ENODEV; 4576 goto out; 4577 } 4578 vp = fp->f_vnode; 4579 if (vp->v_type != VREG) { 4580 error = ENODEV; 4581 goto out; 4582 } 4583 if (len == 0) 4584 end = OFF_MAX; 4585 else 4586 end = offset + len - 1; 4587 switch (advice) { 4588 case POSIX_FADV_SEQUENTIAL: 4589 case POSIX_FADV_RANDOM: 4590 case POSIX_FADV_NOREUSE: 4591 /* 4592 * Try to merge any existing non-standard region with 4593 * this new region if possible, otherwise create a new 4594 * non-standard region for this request. 4595 */ 4596 mtx_pool_lock(mtxpool_sleep, fp); 4597 fa = fp->f_advice; 4598 if (fa != NULL && fa->fa_advice == advice && 4599 ((fa->fa_start <= end && fa->fa_end >= offset) || 4600 (end != OFF_MAX && fa->fa_start == end + 1) || 4601 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4602 if (offset < fa->fa_start) 4603 fa->fa_start = offset; 4604 if (end > fa->fa_end) 4605 fa->fa_end = end; 4606 } else { 4607 new->fa_advice = advice; 4608 new->fa_start = offset; 4609 new->fa_end = end; 4610 fp->f_advice = new; 4611 new = fa; 4612 } 4613 mtx_pool_unlock(mtxpool_sleep, fp); 4614 break; 4615 case POSIX_FADV_NORMAL: 4616 /* 4617 * If a the "normal" region overlaps with an existing 4618 * non-standard region, trim or remove the 4619 * non-standard region. 4620 */ 4621 mtx_pool_lock(mtxpool_sleep, fp); 4622 fa = fp->f_advice; 4623 if (fa != NULL) { 4624 if (offset <= fa->fa_start && end >= fa->fa_end) { 4625 new = fa; 4626 fp->f_advice = NULL; 4627 } else if (offset <= fa->fa_start && 4628 end >= fa->fa_start) 4629 fa->fa_start = end + 1; 4630 else if (offset <= fa->fa_end && end >= fa->fa_end) 4631 fa->fa_end = offset - 1; 4632 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4633 /* 4634 * If the "normal" region is a middle 4635 * portion of the existing 4636 * non-standard region, just remove 4637 * the whole thing rather than picking 4638 * one side or the other to 4639 * preserve. 4640 */ 4641 new = fa; 4642 fp->f_advice = NULL; 4643 } 4644 } 4645 mtx_pool_unlock(mtxpool_sleep, fp); 4646 break; 4647 case POSIX_FADV_WILLNEED: 4648 case POSIX_FADV_DONTNEED: 4649 error = VOP_ADVISE(vp, offset, end, advice); 4650 break; 4651 } 4652 out: 4653 if (fp != NULL) 4654 fdrop(fp, td); 4655 free(new, M_FADVISE); 4656 return (error); 4657 } 4658 4659 int 4660 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4661 { 4662 int error; 4663 4664 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4665 uap->advice); 4666 return (kern_posix_error(td, error)); 4667 } 4668