1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * Sync each mounted filesystem. 110 */ 111 #ifndef _SYS_SYSPROTO_H_ 112 struct sync_args { 113 int dummy; 114 }; 115 #endif 116 /* ARGSUSED */ 117 int 118 sys_sync(td, uap) 119 struct thread *td; 120 struct sync_args *uap; 121 { 122 struct mount *mp, *nmp; 123 int save; 124 125 mtx_lock(&mountlist_mtx); 126 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 127 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 128 nmp = TAILQ_NEXT(mp, mnt_list); 129 continue; 130 } 131 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 132 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 133 save = curthread_pflags_set(TDP_SYNCIO); 134 vfs_msync(mp, MNT_NOWAIT); 135 VFS_SYNC(mp, MNT_NOWAIT); 136 curthread_pflags_restore(save); 137 vn_finished_write(mp); 138 } 139 mtx_lock(&mountlist_mtx); 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 vfs_unbusy(mp); 142 } 143 mtx_unlock(&mountlist_mtx); 144 return (0); 145 } 146 147 /* 148 * Change filesystem quotas. 149 */ 150 #ifndef _SYS_SYSPROTO_H_ 151 struct quotactl_args { 152 char *path; 153 int cmd; 154 int uid; 155 caddr_t arg; 156 }; 157 #endif 158 int 159 sys_quotactl(td, uap) 160 struct thread *td; 161 register struct quotactl_args /* { 162 char *path; 163 int cmd; 164 int uid; 165 caddr_t arg; 166 } */ *uap; 167 { 168 struct mount *mp; 169 struct nameidata nd; 170 int error; 171 172 AUDIT_ARG_CMD(uap->cmd); 173 AUDIT_ARG_UID(uap->uid); 174 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 175 return (EPERM); 176 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 177 uap->path, td); 178 if ((error = namei(&nd)) != 0) 179 return (error); 180 NDFREE(&nd, NDF_ONLY_PNBUF); 181 mp = nd.ni_vp->v_mount; 182 vfs_ref(mp); 183 vput(nd.ni_vp); 184 error = vfs_busy(mp, 0); 185 vfs_rel(mp); 186 if (error != 0) 187 return (error); 188 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 189 190 /* 191 * Since quota on operation typically needs to open quota 192 * file, the Q_QUOTAON handler needs to unbusy the mount point 193 * before calling into namei. Otherwise, unmount might be 194 * started between two vfs_busy() invocations (first is our, 195 * second is from mount point cross-walk code in lookup()), 196 * causing deadlock. 197 * 198 * Require that Q_QUOTAON handles the vfs_busy() reference on 199 * its own, always returning with ubusied mount point. 200 */ 201 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 202 vfs_unbusy(mp); 203 return (error); 204 } 205 206 /* 207 * Used by statfs conversion routines to scale the block size up if 208 * necessary so that all of the block counts are <= 'max_size'. Note 209 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 210 * value of 'n'. 211 */ 212 void 213 statfs_scale_blocks(struct statfs *sf, long max_size) 214 { 215 uint64_t count; 216 int shift; 217 218 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 219 220 /* 221 * Attempt to scale the block counts to give a more accurate 222 * overview to userland of the ratio of free space to used 223 * space. To do this, find the largest block count and compute 224 * a divisor that lets it fit into a signed integer <= max_size. 225 */ 226 if (sf->f_bavail < 0) 227 count = -sf->f_bavail; 228 else 229 count = sf->f_bavail; 230 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 231 if (count <= max_size) 232 return; 233 234 count >>= flsl(max_size); 235 shift = 0; 236 while (count > 0) { 237 shift++; 238 count >>=1; 239 } 240 241 sf->f_bsize <<= shift; 242 sf->f_blocks >>= shift; 243 sf->f_bfree >>= shift; 244 sf->f_bavail >>= shift; 245 } 246 247 /* 248 * Get filesystem statistics. 249 */ 250 #ifndef _SYS_SYSPROTO_H_ 251 struct statfs_args { 252 char *path; 253 struct statfs *buf; 254 }; 255 #endif 256 int 257 sys_statfs(td, uap) 258 struct thread *td; 259 register struct statfs_args /* { 260 char *path; 261 struct statfs *buf; 262 } */ *uap; 263 { 264 struct statfs sf; 265 int error; 266 267 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 268 if (error == 0) 269 error = copyout(&sf, uap->buf, sizeof(sf)); 270 return (error); 271 } 272 273 int 274 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 275 struct statfs *buf) 276 { 277 struct mount *mp; 278 struct statfs *sp, sb; 279 struct nameidata nd; 280 int error; 281 282 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 283 pathseg, path, td); 284 error = namei(&nd); 285 if (error != 0) 286 return (error); 287 mp = nd.ni_vp->v_mount; 288 vfs_ref(mp); 289 NDFREE(&nd, NDF_ONLY_PNBUF); 290 vput(nd.ni_vp); 291 error = vfs_busy(mp, 0); 292 vfs_rel(mp); 293 if (error != 0) 294 return (error); 295 #ifdef MAC 296 error = mac_mount_check_stat(td->td_ucred, mp); 297 if (error != 0) 298 goto out; 299 #endif 300 /* 301 * Set these in case the underlying filesystem fails to do so. 302 */ 303 sp = &mp->mnt_stat; 304 sp->f_version = STATFS_VERSION; 305 sp->f_namemax = NAME_MAX; 306 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 307 error = VFS_STATFS(mp, sp); 308 if (error != 0) 309 goto out; 310 if (priv_check(td, PRIV_VFS_GENERATION)) { 311 bcopy(sp, &sb, sizeof(sb)); 312 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 313 prison_enforce_statfs(td->td_ucred, mp, &sb); 314 sp = &sb; 315 } 316 *buf = *sp; 317 out: 318 vfs_unbusy(mp); 319 return (error); 320 } 321 322 /* 323 * Get filesystem statistics. 324 */ 325 #ifndef _SYS_SYSPROTO_H_ 326 struct fstatfs_args { 327 int fd; 328 struct statfs *buf; 329 }; 330 #endif 331 int 332 sys_fstatfs(td, uap) 333 struct thread *td; 334 register struct fstatfs_args /* { 335 int fd; 336 struct statfs *buf; 337 } */ *uap; 338 { 339 struct statfs sf; 340 int error; 341 342 error = kern_fstatfs(td, uap->fd, &sf); 343 if (error == 0) 344 error = copyout(&sf, uap->buf, sizeof(sf)); 345 return (error); 346 } 347 348 int 349 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 350 { 351 struct file *fp; 352 struct mount *mp; 353 struct statfs *sp, sb; 354 struct vnode *vp; 355 cap_rights_t rights; 356 int error; 357 358 AUDIT_ARG_FD(fd); 359 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 360 if (error != 0) 361 return (error); 362 vp = fp->f_vnode; 363 vn_lock(vp, LK_SHARED | LK_RETRY); 364 #ifdef AUDIT 365 AUDIT_ARG_VNODE1(vp); 366 #endif 367 mp = vp->v_mount; 368 if (mp) 369 vfs_ref(mp); 370 VOP_UNLOCK(vp, 0); 371 fdrop(fp, td); 372 if (mp == NULL) { 373 error = EBADF; 374 goto out; 375 } 376 error = vfs_busy(mp, 0); 377 vfs_rel(mp); 378 if (error != 0) 379 return (error); 380 #ifdef MAC 381 error = mac_mount_check_stat(td->td_ucred, mp); 382 if (error != 0) 383 goto out; 384 #endif 385 /* 386 * Set these in case the underlying filesystem fails to do so. 387 */ 388 sp = &mp->mnt_stat; 389 sp->f_version = STATFS_VERSION; 390 sp->f_namemax = NAME_MAX; 391 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 392 error = VFS_STATFS(mp, sp); 393 if (error != 0) 394 goto out; 395 if (priv_check(td, PRIV_VFS_GENERATION)) { 396 bcopy(sp, &sb, sizeof(sb)); 397 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 398 prison_enforce_statfs(td->td_ucred, mp, &sb); 399 sp = &sb; 400 } 401 *buf = *sp; 402 out: 403 if (mp) 404 vfs_unbusy(mp); 405 return (error); 406 } 407 408 /* 409 * Get statistics on all filesystems. 410 */ 411 #ifndef _SYS_SYSPROTO_H_ 412 struct getfsstat_args { 413 struct statfs *buf; 414 long bufsize; 415 int flags; 416 }; 417 #endif 418 int 419 sys_getfsstat(td, uap) 420 struct thread *td; 421 register struct getfsstat_args /* { 422 struct statfs *buf; 423 long bufsize; 424 int flags; 425 } */ *uap; 426 { 427 size_t count; 428 int error; 429 430 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 431 return (EINVAL); 432 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 433 UIO_USERSPACE, uap->flags); 434 if (error == 0) 435 td->td_retval[0] = count; 436 return (error); 437 } 438 439 /* 440 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 441 * The caller is responsible for freeing memory which will be allocated 442 * in '*buf'. 443 */ 444 int 445 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 446 size_t *countp, enum uio_seg bufseg, int flags) 447 { 448 struct mount *mp, *nmp; 449 struct statfs *sfsp, *sp, sb; 450 size_t count, maxcount; 451 int error; 452 453 maxcount = bufsize / sizeof(struct statfs); 454 if (bufsize == 0) 455 sfsp = NULL; 456 else if (bufseg == UIO_USERSPACE) 457 sfsp = *buf; 458 else /* if (bufseg == UIO_SYSSPACE) */ { 459 count = 0; 460 mtx_lock(&mountlist_mtx); 461 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 462 count++; 463 } 464 mtx_unlock(&mountlist_mtx); 465 if (maxcount > count) 466 maxcount = count; 467 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 468 M_WAITOK); 469 } 470 count = 0; 471 mtx_lock(&mountlist_mtx); 472 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 473 if (prison_canseemount(td->td_ucred, mp) != 0) { 474 nmp = TAILQ_NEXT(mp, mnt_list); 475 continue; 476 } 477 #ifdef MAC 478 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 479 nmp = TAILQ_NEXT(mp, mnt_list); 480 continue; 481 } 482 #endif 483 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 484 nmp = TAILQ_NEXT(mp, mnt_list); 485 continue; 486 } 487 if (sfsp && count < maxcount) { 488 sp = &mp->mnt_stat; 489 /* 490 * Set these in case the underlying filesystem 491 * fails to do so. 492 */ 493 sp->f_version = STATFS_VERSION; 494 sp->f_namemax = NAME_MAX; 495 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 496 /* 497 * If MNT_NOWAIT or MNT_LAZY is specified, do not 498 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 499 * overrides MNT_WAIT. 500 */ 501 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 502 (flags & MNT_WAIT)) && 503 (error = VFS_STATFS(mp, sp))) { 504 mtx_lock(&mountlist_mtx); 505 nmp = TAILQ_NEXT(mp, mnt_list); 506 vfs_unbusy(mp); 507 continue; 508 } 509 if (priv_check(td, PRIV_VFS_GENERATION)) { 510 bcopy(sp, &sb, sizeof(sb)); 511 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 512 prison_enforce_statfs(td->td_ucred, mp, &sb); 513 sp = &sb; 514 } 515 if (bufseg == UIO_SYSSPACE) 516 bcopy(sp, sfsp, sizeof(*sp)); 517 else /* if (bufseg == UIO_USERSPACE) */ { 518 error = copyout(sp, sfsp, sizeof(*sp)); 519 if (error != 0) { 520 vfs_unbusy(mp); 521 return (error); 522 } 523 } 524 sfsp++; 525 } 526 count++; 527 mtx_lock(&mountlist_mtx); 528 nmp = TAILQ_NEXT(mp, mnt_list); 529 vfs_unbusy(mp); 530 } 531 mtx_unlock(&mountlist_mtx); 532 if (sfsp && count > maxcount) 533 *countp = maxcount; 534 else 535 *countp = count; 536 return (0); 537 } 538 539 #ifdef COMPAT_FREEBSD4 540 /* 541 * Get old format filesystem statistics. 542 */ 543 static void cvtstatfs(struct statfs *, struct ostatfs *); 544 545 #ifndef _SYS_SYSPROTO_H_ 546 struct freebsd4_statfs_args { 547 char *path; 548 struct ostatfs *buf; 549 }; 550 #endif 551 int 552 freebsd4_statfs(td, uap) 553 struct thread *td; 554 struct freebsd4_statfs_args /* { 555 char *path; 556 struct ostatfs *buf; 557 } */ *uap; 558 { 559 struct ostatfs osb; 560 struct statfs sf; 561 int error; 562 563 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 564 if (error != 0) 565 return (error); 566 cvtstatfs(&sf, &osb); 567 return (copyout(&osb, uap->buf, sizeof(osb))); 568 } 569 570 /* 571 * Get filesystem statistics. 572 */ 573 #ifndef _SYS_SYSPROTO_H_ 574 struct freebsd4_fstatfs_args { 575 int fd; 576 struct ostatfs *buf; 577 }; 578 #endif 579 int 580 freebsd4_fstatfs(td, uap) 581 struct thread *td; 582 struct freebsd4_fstatfs_args /* { 583 int fd; 584 struct ostatfs *buf; 585 } */ *uap; 586 { 587 struct ostatfs osb; 588 struct statfs sf; 589 int error; 590 591 error = kern_fstatfs(td, uap->fd, &sf); 592 if (error != 0) 593 return (error); 594 cvtstatfs(&sf, &osb); 595 return (copyout(&osb, uap->buf, sizeof(osb))); 596 } 597 598 /* 599 * Get statistics on all filesystems. 600 */ 601 #ifndef _SYS_SYSPROTO_H_ 602 struct freebsd4_getfsstat_args { 603 struct ostatfs *buf; 604 long bufsize; 605 int flags; 606 }; 607 #endif 608 int 609 freebsd4_getfsstat(td, uap) 610 struct thread *td; 611 register struct freebsd4_getfsstat_args /* { 612 struct ostatfs *buf; 613 long bufsize; 614 int flags; 615 } */ *uap; 616 { 617 struct statfs *buf, *sp; 618 struct ostatfs osb; 619 size_t count, size; 620 int error; 621 622 if (uap->bufsize < 0) 623 return (EINVAL); 624 count = uap->bufsize / sizeof(struct ostatfs); 625 if (count > SIZE_MAX / sizeof(struct statfs)) 626 return (EINVAL); 627 size = count * sizeof(struct statfs); 628 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 629 uap->flags); 630 td->td_retval[0] = count; 631 if (size != 0) { 632 sp = buf; 633 while (count != 0 && error == 0) { 634 cvtstatfs(sp, &osb); 635 error = copyout(&osb, uap->buf, sizeof(osb)); 636 sp++; 637 uap->buf++; 638 count--; 639 } 640 free(buf, M_TEMP); 641 } 642 return (error); 643 } 644 645 /* 646 * Implement fstatfs() for (NFS) file handles. 647 */ 648 #ifndef _SYS_SYSPROTO_H_ 649 struct freebsd4_fhstatfs_args { 650 struct fhandle *u_fhp; 651 struct ostatfs *buf; 652 }; 653 #endif 654 int 655 freebsd4_fhstatfs(td, uap) 656 struct thread *td; 657 struct freebsd4_fhstatfs_args /* { 658 struct fhandle *u_fhp; 659 struct ostatfs *buf; 660 } */ *uap; 661 { 662 struct ostatfs osb; 663 struct statfs sf; 664 fhandle_t fh; 665 int error; 666 667 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 668 if (error != 0) 669 return (error); 670 error = kern_fhstatfs(td, fh, &sf); 671 if (error != 0) 672 return (error); 673 cvtstatfs(&sf, &osb); 674 return (copyout(&osb, uap->buf, sizeof(osb))); 675 } 676 677 /* 678 * Convert a new format statfs structure to an old format statfs structure. 679 */ 680 static void 681 cvtstatfs(nsp, osp) 682 struct statfs *nsp; 683 struct ostatfs *osp; 684 { 685 686 statfs_scale_blocks(nsp, LONG_MAX); 687 bzero(osp, sizeof(*osp)); 688 osp->f_bsize = nsp->f_bsize; 689 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 690 osp->f_blocks = nsp->f_blocks; 691 osp->f_bfree = nsp->f_bfree; 692 osp->f_bavail = nsp->f_bavail; 693 osp->f_files = MIN(nsp->f_files, LONG_MAX); 694 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 695 osp->f_owner = nsp->f_owner; 696 osp->f_type = nsp->f_type; 697 osp->f_flags = nsp->f_flags; 698 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 699 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 700 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 701 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 702 strlcpy(osp->f_fstypename, nsp->f_fstypename, 703 MIN(MFSNAMELEN, OMFSNAMELEN)); 704 strlcpy(osp->f_mntonname, nsp->f_mntonname, 705 MIN(MNAMELEN, OMNAMELEN)); 706 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 707 MIN(MNAMELEN, OMNAMELEN)); 708 osp->f_fsid = nsp->f_fsid; 709 } 710 #endif /* COMPAT_FREEBSD4 */ 711 712 /* 713 * Change current working directory to a given file descriptor. 714 */ 715 #ifndef _SYS_SYSPROTO_H_ 716 struct fchdir_args { 717 int fd; 718 }; 719 #endif 720 int 721 sys_fchdir(td, uap) 722 struct thread *td; 723 struct fchdir_args /* { 724 int fd; 725 } */ *uap; 726 { 727 struct vnode *vp, *tdp; 728 struct mount *mp; 729 struct file *fp; 730 cap_rights_t rights; 731 int error; 732 733 AUDIT_ARG_FD(uap->fd); 734 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 735 &fp); 736 if (error != 0) 737 return (error); 738 vp = fp->f_vnode; 739 VREF(vp); 740 fdrop(fp, td); 741 vn_lock(vp, LK_SHARED | LK_RETRY); 742 AUDIT_ARG_VNODE1(vp); 743 error = change_dir(vp, td); 744 while (!error && (mp = vp->v_mountedhere) != NULL) { 745 if (vfs_busy(mp, 0)) 746 continue; 747 error = VFS_ROOT(mp, LK_SHARED, &tdp); 748 vfs_unbusy(mp); 749 if (error != 0) 750 break; 751 vput(vp); 752 vp = tdp; 753 } 754 if (error != 0) { 755 vput(vp); 756 return (error); 757 } 758 VOP_UNLOCK(vp, 0); 759 pwd_chdir(td, vp); 760 return (0); 761 } 762 763 /* 764 * Change current working directory (``.''). 765 */ 766 #ifndef _SYS_SYSPROTO_H_ 767 struct chdir_args { 768 char *path; 769 }; 770 #endif 771 int 772 sys_chdir(td, uap) 773 struct thread *td; 774 struct chdir_args /* { 775 char *path; 776 } */ *uap; 777 { 778 779 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 780 } 781 782 int 783 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 784 { 785 struct nameidata nd; 786 int error; 787 788 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 789 pathseg, path, td); 790 if ((error = namei(&nd)) != 0) 791 return (error); 792 if ((error = change_dir(nd.ni_vp, td)) != 0) { 793 vput(nd.ni_vp); 794 NDFREE(&nd, NDF_ONLY_PNBUF); 795 return (error); 796 } 797 VOP_UNLOCK(nd.ni_vp, 0); 798 NDFREE(&nd, NDF_ONLY_PNBUF); 799 pwd_chdir(td, nd.ni_vp); 800 return (0); 801 } 802 803 /* 804 * Change notion of root (``/'') directory. 805 */ 806 #ifndef _SYS_SYSPROTO_H_ 807 struct chroot_args { 808 char *path; 809 }; 810 #endif 811 int 812 sys_chroot(td, uap) 813 struct thread *td; 814 struct chroot_args /* { 815 char *path; 816 } */ *uap; 817 { 818 struct nameidata nd; 819 int error; 820 821 error = priv_check(td, PRIV_VFS_CHROOT); 822 if (error != 0) 823 return (error); 824 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 825 UIO_USERSPACE, uap->path, td); 826 error = namei(&nd); 827 if (error != 0) 828 goto error; 829 error = change_dir(nd.ni_vp, td); 830 if (error != 0) 831 goto e_vunlock; 832 #ifdef MAC 833 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 834 if (error != 0) 835 goto e_vunlock; 836 #endif 837 VOP_UNLOCK(nd.ni_vp, 0); 838 error = pwd_chroot(td, nd.ni_vp); 839 vrele(nd.ni_vp); 840 NDFREE(&nd, NDF_ONLY_PNBUF); 841 return (error); 842 e_vunlock: 843 vput(nd.ni_vp); 844 error: 845 NDFREE(&nd, NDF_ONLY_PNBUF); 846 return (error); 847 } 848 849 /* 850 * Common routine for chroot and chdir. Callers must provide a locked vnode 851 * instance. 852 */ 853 int 854 change_dir(vp, td) 855 struct vnode *vp; 856 struct thread *td; 857 { 858 #ifdef MAC 859 int error; 860 #endif 861 862 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 863 if (vp->v_type != VDIR) 864 return (ENOTDIR); 865 #ifdef MAC 866 error = mac_vnode_check_chdir(td->td_ucred, vp); 867 if (error != 0) 868 return (error); 869 #endif 870 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 871 } 872 873 static __inline void 874 flags_to_rights(int flags, cap_rights_t *rightsp) 875 { 876 877 if (flags & O_EXEC) { 878 cap_rights_set(rightsp, CAP_FEXECVE); 879 } else { 880 switch ((flags & O_ACCMODE)) { 881 case O_RDONLY: 882 cap_rights_set(rightsp, CAP_READ); 883 break; 884 case O_RDWR: 885 cap_rights_set(rightsp, CAP_READ); 886 /* FALLTHROUGH */ 887 case O_WRONLY: 888 cap_rights_set(rightsp, CAP_WRITE); 889 if (!(flags & (O_APPEND | O_TRUNC))) 890 cap_rights_set(rightsp, CAP_SEEK); 891 break; 892 } 893 } 894 895 if (flags & O_CREAT) 896 cap_rights_set(rightsp, CAP_CREATE); 897 898 if (flags & O_TRUNC) 899 cap_rights_set(rightsp, CAP_FTRUNCATE); 900 901 if (flags & (O_SYNC | O_FSYNC)) 902 cap_rights_set(rightsp, CAP_FSYNC); 903 904 if (flags & (O_EXLOCK | O_SHLOCK)) 905 cap_rights_set(rightsp, CAP_FLOCK); 906 } 907 908 /* 909 * Check permissions, allocate an open file structure, and call the device 910 * open routine if any. 911 */ 912 #ifndef _SYS_SYSPROTO_H_ 913 struct open_args { 914 char *path; 915 int flags; 916 int mode; 917 }; 918 #endif 919 int 920 sys_open(td, uap) 921 struct thread *td; 922 register struct open_args /* { 923 char *path; 924 int flags; 925 int mode; 926 } */ *uap; 927 { 928 929 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 930 uap->flags, uap->mode)); 931 } 932 933 #ifndef _SYS_SYSPROTO_H_ 934 struct openat_args { 935 int fd; 936 char *path; 937 int flag; 938 int mode; 939 }; 940 #endif 941 int 942 sys_openat(struct thread *td, struct openat_args *uap) 943 { 944 945 AUDIT_ARG_FD(uap->fd); 946 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 947 uap->mode)); 948 } 949 950 int 951 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 952 int flags, int mode) 953 { 954 struct proc *p = td->td_proc; 955 struct filedesc *fdp = p->p_fd; 956 struct file *fp; 957 struct vnode *vp; 958 struct nameidata nd; 959 cap_rights_t rights; 960 int cmode, error, indx; 961 962 indx = -1; 963 964 AUDIT_ARG_FFLAGS(flags); 965 AUDIT_ARG_MODE(mode); 966 cap_rights_init(&rights, CAP_LOOKUP); 967 flags_to_rights(flags, &rights); 968 /* 969 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 970 * may be specified. 971 */ 972 if (flags & O_EXEC) { 973 if (flags & O_ACCMODE) 974 return (EINVAL); 975 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 976 return (EINVAL); 977 } else { 978 flags = FFLAGS(flags); 979 } 980 981 /* 982 * Allocate a file structure. The descriptor to reference it 983 * is allocated and set by finstall() below. 984 */ 985 error = falloc_noinstall(td, &fp); 986 if (error != 0) 987 return (error); 988 /* 989 * An extra reference on `fp' has been held for us by 990 * falloc_noinstall(). 991 */ 992 /* Set the flags early so the finit in devfs can pick them up. */ 993 fp->f_flag = flags & FMASK; 994 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 995 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 996 &rights, td); 997 td->td_dupfd = -1; /* XXX check for fdopen */ 998 error = vn_open(&nd, &flags, cmode, fp); 999 if (error != 0) { 1000 /* 1001 * If the vn_open replaced the method vector, something 1002 * wonderous happened deep below and we just pass it up 1003 * pretending we know what we do. 1004 */ 1005 if (error == ENXIO && fp->f_ops != &badfileops) 1006 goto success; 1007 1008 /* 1009 * Handle special fdopen() case. bleh. 1010 * 1011 * Don't do this for relative (capability) lookups; we don't 1012 * understand exactly what would happen, and we don't think 1013 * that it ever should. 1014 */ 1015 if (nd.ni_strictrelative == 0 && 1016 (error == ENODEV || error == ENXIO) && 1017 td->td_dupfd >= 0) { 1018 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1019 &indx); 1020 if (error == 0) 1021 goto success; 1022 } 1023 1024 goto bad; 1025 } 1026 td->td_dupfd = 0; 1027 NDFREE(&nd, NDF_ONLY_PNBUF); 1028 vp = nd.ni_vp; 1029 1030 /* 1031 * Store the vnode, for any f_type. Typically, the vnode use 1032 * count is decremented by direct call to vn_closefile() for 1033 * files that switched type in the cdevsw fdopen() method. 1034 */ 1035 fp->f_vnode = vp; 1036 /* 1037 * If the file wasn't claimed by devfs bind it to the normal 1038 * vnode operations here. 1039 */ 1040 if (fp->f_ops == &badfileops) { 1041 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1042 fp->f_seqcount = 1; 1043 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1044 DTYPE_VNODE, vp, &vnops); 1045 } 1046 1047 VOP_UNLOCK(vp, 0); 1048 if (flags & O_TRUNC) { 1049 error = fo_truncate(fp, 0, td->td_ucred, td); 1050 if (error != 0) 1051 goto bad; 1052 } 1053 success: 1054 /* 1055 * If we haven't already installed the FD (for dupfdopen), do so now. 1056 */ 1057 if (indx == -1) { 1058 struct filecaps *fcaps; 1059 1060 #ifdef CAPABILITIES 1061 if (nd.ni_strictrelative == 1) 1062 fcaps = &nd.ni_filecaps; 1063 else 1064 #endif 1065 fcaps = NULL; 1066 error = finstall(td, fp, &indx, flags, fcaps); 1067 /* On success finstall() consumes fcaps. */ 1068 if (error != 0) { 1069 filecaps_free(&nd.ni_filecaps); 1070 goto bad; 1071 } 1072 } else { 1073 filecaps_free(&nd.ni_filecaps); 1074 } 1075 1076 /* 1077 * Release our private reference, leaving the one associated with 1078 * the descriptor table intact. 1079 */ 1080 fdrop(fp, td); 1081 td->td_retval[0] = indx; 1082 return (0); 1083 bad: 1084 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1085 fdrop(fp, td); 1086 return (error); 1087 } 1088 1089 #ifdef COMPAT_43 1090 /* 1091 * Create a file. 1092 */ 1093 #ifndef _SYS_SYSPROTO_H_ 1094 struct ocreat_args { 1095 char *path; 1096 int mode; 1097 }; 1098 #endif 1099 int 1100 ocreat(td, uap) 1101 struct thread *td; 1102 register struct ocreat_args /* { 1103 char *path; 1104 int mode; 1105 } */ *uap; 1106 { 1107 1108 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1109 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1110 } 1111 #endif /* COMPAT_43 */ 1112 1113 /* 1114 * Create a special file. 1115 */ 1116 #ifndef _SYS_SYSPROTO_H_ 1117 struct mknod_args { 1118 char *path; 1119 int mode; 1120 int dev; 1121 }; 1122 #endif 1123 int 1124 sys_mknod(td, uap) 1125 struct thread *td; 1126 register struct mknod_args /* { 1127 char *path; 1128 int mode; 1129 int dev; 1130 } */ *uap; 1131 { 1132 1133 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1134 uap->mode, uap->dev)); 1135 } 1136 1137 #ifndef _SYS_SYSPROTO_H_ 1138 struct mknodat_args { 1139 int fd; 1140 char *path; 1141 mode_t mode; 1142 dev_t dev; 1143 }; 1144 #endif 1145 int 1146 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1147 { 1148 1149 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1150 uap->dev)); 1151 } 1152 1153 int 1154 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1155 int mode, int dev) 1156 { 1157 struct vnode *vp; 1158 struct mount *mp; 1159 struct vattr vattr; 1160 struct nameidata nd; 1161 cap_rights_t rights; 1162 int error, whiteout = 0; 1163 1164 AUDIT_ARG_MODE(mode); 1165 AUDIT_ARG_DEV(dev); 1166 switch (mode & S_IFMT) { 1167 case S_IFCHR: 1168 case S_IFBLK: 1169 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1170 if (error == 0 && dev == VNOVAL) 1171 error = EINVAL; 1172 break; 1173 case S_IFMT: 1174 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1175 break; 1176 case S_IFWHT: 1177 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1178 break; 1179 case S_IFIFO: 1180 if (dev == 0) 1181 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1182 /* FALLTHROUGH */ 1183 default: 1184 error = EINVAL; 1185 break; 1186 } 1187 if (error != 0) 1188 return (error); 1189 restart: 1190 bwillwrite(); 1191 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1192 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1193 td); 1194 if ((error = namei(&nd)) != 0) 1195 return (error); 1196 vp = nd.ni_vp; 1197 if (vp != NULL) { 1198 NDFREE(&nd, NDF_ONLY_PNBUF); 1199 if (vp == nd.ni_dvp) 1200 vrele(nd.ni_dvp); 1201 else 1202 vput(nd.ni_dvp); 1203 vrele(vp); 1204 return (EEXIST); 1205 } else { 1206 VATTR_NULL(&vattr); 1207 vattr.va_mode = (mode & ALLPERMS) & 1208 ~td->td_proc->p_fd->fd_cmask; 1209 vattr.va_rdev = dev; 1210 whiteout = 0; 1211 1212 switch (mode & S_IFMT) { 1213 case S_IFMT: /* used by badsect to flag bad sectors */ 1214 vattr.va_type = VBAD; 1215 break; 1216 case S_IFCHR: 1217 vattr.va_type = VCHR; 1218 break; 1219 case S_IFBLK: 1220 vattr.va_type = VBLK; 1221 break; 1222 case S_IFWHT: 1223 whiteout = 1; 1224 break; 1225 default: 1226 panic("kern_mknod: invalid mode"); 1227 } 1228 } 1229 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1230 NDFREE(&nd, NDF_ONLY_PNBUF); 1231 vput(nd.ni_dvp); 1232 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1233 return (error); 1234 goto restart; 1235 } 1236 #ifdef MAC 1237 if (error == 0 && !whiteout) 1238 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1239 &nd.ni_cnd, &vattr); 1240 #endif 1241 if (error == 0) { 1242 if (whiteout) 1243 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1244 else { 1245 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1246 &nd.ni_cnd, &vattr); 1247 if (error == 0) 1248 vput(nd.ni_vp); 1249 } 1250 } 1251 NDFREE(&nd, NDF_ONLY_PNBUF); 1252 vput(nd.ni_dvp); 1253 vn_finished_write(mp); 1254 return (error); 1255 } 1256 1257 /* 1258 * Create a named pipe. 1259 */ 1260 #ifndef _SYS_SYSPROTO_H_ 1261 struct mkfifo_args { 1262 char *path; 1263 int mode; 1264 }; 1265 #endif 1266 int 1267 sys_mkfifo(td, uap) 1268 struct thread *td; 1269 register struct mkfifo_args /* { 1270 char *path; 1271 int mode; 1272 } */ *uap; 1273 { 1274 1275 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1276 uap->mode)); 1277 } 1278 1279 #ifndef _SYS_SYSPROTO_H_ 1280 struct mkfifoat_args { 1281 int fd; 1282 char *path; 1283 mode_t mode; 1284 }; 1285 #endif 1286 int 1287 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1288 { 1289 1290 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1291 uap->mode)); 1292 } 1293 1294 int 1295 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1296 int mode) 1297 { 1298 struct mount *mp; 1299 struct vattr vattr; 1300 struct nameidata nd; 1301 cap_rights_t rights; 1302 int error; 1303 1304 AUDIT_ARG_MODE(mode); 1305 restart: 1306 bwillwrite(); 1307 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1308 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1309 td); 1310 if ((error = namei(&nd)) != 0) 1311 return (error); 1312 if (nd.ni_vp != NULL) { 1313 NDFREE(&nd, NDF_ONLY_PNBUF); 1314 if (nd.ni_vp == nd.ni_dvp) 1315 vrele(nd.ni_dvp); 1316 else 1317 vput(nd.ni_dvp); 1318 vrele(nd.ni_vp); 1319 return (EEXIST); 1320 } 1321 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1322 NDFREE(&nd, NDF_ONLY_PNBUF); 1323 vput(nd.ni_dvp); 1324 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1325 return (error); 1326 goto restart; 1327 } 1328 VATTR_NULL(&vattr); 1329 vattr.va_type = VFIFO; 1330 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1331 #ifdef MAC 1332 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1333 &vattr); 1334 if (error != 0) 1335 goto out; 1336 #endif 1337 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1338 if (error == 0) 1339 vput(nd.ni_vp); 1340 #ifdef MAC 1341 out: 1342 #endif 1343 vput(nd.ni_dvp); 1344 vn_finished_write(mp); 1345 NDFREE(&nd, NDF_ONLY_PNBUF); 1346 return (error); 1347 } 1348 1349 /* 1350 * Make a hard file link. 1351 */ 1352 #ifndef _SYS_SYSPROTO_H_ 1353 struct link_args { 1354 char *path; 1355 char *link; 1356 }; 1357 #endif 1358 int 1359 sys_link(td, uap) 1360 struct thread *td; 1361 register struct link_args /* { 1362 char *path; 1363 char *link; 1364 } */ *uap; 1365 { 1366 1367 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1368 UIO_USERSPACE, FOLLOW)); 1369 } 1370 1371 #ifndef _SYS_SYSPROTO_H_ 1372 struct linkat_args { 1373 int fd1; 1374 char *path1; 1375 int fd2; 1376 char *path2; 1377 int flag; 1378 }; 1379 #endif 1380 int 1381 sys_linkat(struct thread *td, struct linkat_args *uap) 1382 { 1383 int flag; 1384 1385 flag = uap->flag; 1386 if (flag & ~AT_SYMLINK_FOLLOW) 1387 return (EINVAL); 1388 1389 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1390 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1391 } 1392 1393 int hardlink_check_uid = 0; 1394 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1395 &hardlink_check_uid, 0, 1396 "Unprivileged processes cannot create hard links to files owned by other " 1397 "users"); 1398 static int hardlink_check_gid = 0; 1399 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1400 &hardlink_check_gid, 0, 1401 "Unprivileged processes cannot create hard links to files owned by other " 1402 "groups"); 1403 1404 static int 1405 can_hardlink(struct vnode *vp, struct ucred *cred) 1406 { 1407 struct vattr va; 1408 int error; 1409 1410 if (!hardlink_check_uid && !hardlink_check_gid) 1411 return (0); 1412 1413 error = VOP_GETATTR(vp, &va, cred); 1414 if (error != 0) 1415 return (error); 1416 1417 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1418 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1419 if (error != 0) 1420 return (error); 1421 } 1422 1423 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1424 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1425 if (error != 0) 1426 return (error); 1427 } 1428 1429 return (0); 1430 } 1431 1432 int 1433 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1434 enum uio_seg segflg, int follow) 1435 { 1436 struct vnode *vp; 1437 struct mount *mp; 1438 struct nameidata nd; 1439 cap_rights_t rights; 1440 int error; 1441 1442 again: 1443 bwillwrite(); 1444 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1445 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1446 1447 if ((error = namei(&nd)) != 0) 1448 return (error); 1449 NDFREE(&nd, NDF_ONLY_PNBUF); 1450 vp = nd.ni_vp; 1451 if (vp->v_type == VDIR) { 1452 vrele(vp); 1453 return (EPERM); /* POSIX */ 1454 } 1455 NDINIT_ATRIGHTS(&nd, CREATE, 1456 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1457 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1458 if ((error = namei(&nd)) == 0) { 1459 if (nd.ni_vp != NULL) { 1460 NDFREE(&nd, NDF_ONLY_PNBUF); 1461 if (nd.ni_dvp == nd.ni_vp) 1462 vrele(nd.ni_dvp); 1463 else 1464 vput(nd.ni_dvp); 1465 vrele(nd.ni_vp); 1466 vrele(vp); 1467 return (EEXIST); 1468 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1469 /* 1470 * Cross-device link. No need to recheck 1471 * vp->v_type, since it cannot change, except 1472 * to VBAD. 1473 */ 1474 NDFREE(&nd, NDF_ONLY_PNBUF); 1475 vput(nd.ni_dvp); 1476 vrele(vp); 1477 return (EXDEV); 1478 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1479 error = can_hardlink(vp, td->td_ucred); 1480 #ifdef MAC 1481 if (error == 0) 1482 error = mac_vnode_check_link(td->td_ucred, 1483 nd.ni_dvp, vp, &nd.ni_cnd); 1484 #endif 1485 if (error != 0) { 1486 vput(vp); 1487 vput(nd.ni_dvp); 1488 NDFREE(&nd, NDF_ONLY_PNBUF); 1489 return (error); 1490 } 1491 error = vn_start_write(vp, &mp, V_NOWAIT); 1492 if (error != 0) { 1493 vput(vp); 1494 vput(nd.ni_dvp); 1495 NDFREE(&nd, NDF_ONLY_PNBUF); 1496 error = vn_start_write(NULL, &mp, 1497 V_XSLEEP | PCATCH); 1498 if (error != 0) 1499 return (error); 1500 goto again; 1501 } 1502 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1503 VOP_UNLOCK(vp, 0); 1504 vput(nd.ni_dvp); 1505 vn_finished_write(mp); 1506 NDFREE(&nd, NDF_ONLY_PNBUF); 1507 } else { 1508 vput(nd.ni_dvp); 1509 NDFREE(&nd, NDF_ONLY_PNBUF); 1510 vrele(vp); 1511 goto again; 1512 } 1513 } 1514 vrele(vp); 1515 return (error); 1516 } 1517 1518 /* 1519 * Make a symbolic link. 1520 */ 1521 #ifndef _SYS_SYSPROTO_H_ 1522 struct symlink_args { 1523 char *path; 1524 char *link; 1525 }; 1526 #endif 1527 int 1528 sys_symlink(td, uap) 1529 struct thread *td; 1530 register struct symlink_args /* { 1531 char *path; 1532 char *link; 1533 } */ *uap; 1534 { 1535 1536 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1537 UIO_USERSPACE)); 1538 } 1539 1540 #ifndef _SYS_SYSPROTO_H_ 1541 struct symlinkat_args { 1542 char *path; 1543 int fd; 1544 char *path2; 1545 }; 1546 #endif 1547 int 1548 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1549 { 1550 1551 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1552 UIO_USERSPACE)); 1553 } 1554 1555 int 1556 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1557 enum uio_seg segflg) 1558 { 1559 struct mount *mp; 1560 struct vattr vattr; 1561 char *syspath; 1562 struct nameidata nd; 1563 int error; 1564 cap_rights_t rights; 1565 1566 if (segflg == UIO_SYSSPACE) { 1567 syspath = path1; 1568 } else { 1569 syspath = uma_zalloc(namei_zone, M_WAITOK); 1570 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1571 goto out; 1572 } 1573 AUDIT_ARG_TEXT(syspath); 1574 restart: 1575 bwillwrite(); 1576 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1577 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1578 td); 1579 if ((error = namei(&nd)) != 0) 1580 goto out; 1581 if (nd.ni_vp) { 1582 NDFREE(&nd, NDF_ONLY_PNBUF); 1583 if (nd.ni_vp == nd.ni_dvp) 1584 vrele(nd.ni_dvp); 1585 else 1586 vput(nd.ni_dvp); 1587 vrele(nd.ni_vp); 1588 error = EEXIST; 1589 goto out; 1590 } 1591 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1592 NDFREE(&nd, NDF_ONLY_PNBUF); 1593 vput(nd.ni_dvp); 1594 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1595 goto out; 1596 goto restart; 1597 } 1598 VATTR_NULL(&vattr); 1599 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1600 #ifdef MAC 1601 vattr.va_type = VLNK; 1602 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1603 &vattr); 1604 if (error != 0) 1605 goto out2; 1606 #endif 1607 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1608 if (error == 0) 1609 vput(nd.ni_vp); 1610 #ifdef MAC 1611 out2: 1612 #endif 1613 NDFREE(&nd, NDF_ONLY_PNBUF); 1614 vput(nd.ni_dvp); 1615 vn_finished_write(mp); 1616 out: 1617 if (segflg != UIO_SYSSPACE) 1618 uma_zfree(namei_zone, syspath); 1619 return (error); 1620 } 1621 1622 /* 1623 * Delete a whiteout from the filesystem. 1624 */ 1625 int 1626 sys_undelete(td, uap) 1627 struct thread *td; 1628 register struct undelete_args /* { 1629 char *path; 1630 } */ *uap; 1631 { 1632 struct mount *mp; 1633 struct nameidata nd; 1634 int error; 1635 1636 restart: 1637 bwillwrite(); 1638 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1639 UIO_USERSPACE, uap->path, td); 1640 error = namei(&nd); 1641 if (error != 0) 1642 return (error); 1643 1644 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1645 NDFREE(&nd, NDF_ONLY_PNBUF); 1646 if (nd.ni_vp == nd.ni_dvp) 1647 vrele(nd.ni_dvp); 1648 else 1649 vput(nd.ni_dvp); 1650 if (nd.ni_vp) 1651 vrele(nd.ni_vp); 1652 return (EEXIST); 1653 } 1654 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1655 NDFREE(&nd, NDF_ONLY_PNBUF); 1656 vput(nd.ni_dvp); 1657 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1658 return (error); 1659 goto restart; 1660 } 1661 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1662 NDFREE(&nd, NDF_ONLY_PNBUF); 1663 vput(nd.ni_dvp); 1664 vn_finished_write(mp); 1665 return (error); 1666 } 1667 1668 /* 1669 * Delete a name from the filesystem. 1670 */ 1671 #ifndef _SYS_SYSPROTO_H_ 1672 struct unlink_args { 1673 char *path; 1674 }; 1675 #endif 1676 int 1677 sys_unlink(td, uap) 1678 struct thread *td; 1679 struct unlink_args /* { 1680 char *path; 1681 } */ *uap; 1682 { 1683 1684 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1685 } 1686 1687 #ifndef _SYS_SYSPROTO_H_ 1688 struct unlinkat_args { 1689 int fd; 1690 char *path; 1691 int flag; 1692 }; 1693 #endif 1694 int 1695 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1696 { 1697 int flag = uap->flag; 1698 int fd = uap->fd; 1699 char *path = uap->path; 1700 1701 if (flag & ~AT_REMOVEDIR) 1702 return (EINVAL); 1703 1704 if (flag & AT_REMOVEDIR) 1705 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1706 else 1707 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1708 } 1709 1710 int 1711 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1712 ino_t oldinum) 1713 { 1714 struct mount *mp; 1715 struct vnode *vp; 1716 struct nameidata nd; 1717 struct stat sb; 1718 cap_rights_t rights; 1719 int error; 1720 1721 restart: 1722 bwillwrite(); 1723 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1724 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1725 if ((error = namei(&nd)) != 0) 1726 return (error == EINVAL ? EPERM : error); 1727 vp = nd.ni_vp; 1728 if (vp->v_type == VDIR && oldinum == 0) { 1729 error = EPERM; /* POSIX */ 1730 } else if (oldinum != 0 && 1731 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1732 sb.st_ino != oldinum) { 1733 error = EIDRM; /* Identifier removed */ 1734 } else { 1735 /* 1736 * The root of a mounted filesystem cannot be deleted. 1737 * 1738 * XXX: can this only be a VDIR case? 1739 */ 1740 if (vp->v_vflag & VV_ROOT) 1741 error = EBUSY; 1742 } 1743 if (error == 0) { 1744 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1745 NDFREE(&nd, NDF_ONLY_PNBUF); 1746 vput(nd.ni_dvp); 1747 if (vp == nd.ni_dvp) 1748 vrele(vp); 1749 else 1750 vput(vp); 1751 if ((error = vn_start_write(NULL, &mp, 1752 V_XSLEEP | PCATCH)) != 0) 1753 return (error); 1754 goto restart; 1755 } 1756 #ifdef MAC 1757 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1758 &nd.ni_cnd); 1759 if (error != 0) 1760 goto out; 1761 #endif 1762 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1763 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1764 #ifdef MAC 1765 out: 1766 #endif 1767 vn_finished_write(mp); 1768 } 1769 NDFREE(&nd, NDF_ONLY_PNBUF); 1770 vput(nd.ni_dvp); 1771 if (vp == nd.ni_dvp) 1772 vrele(vp); 1773 else 1774 vput(vp); 1775 return (error); 1776 } 1777 1778 /* 1779 * Reposition read/write file offset. 1780 */ 1781 #ifndef _SYS_SYSPROTO_H_ 1782 struct lseek_args { 1783 int fd; 1784 int pad; 1785 off_t offset; 1786 int whence; 1787 }; 1788 #endif 1789 int 1790 sys_lseek(td, uap) 1791 struct thread *td; 1792 register struct lseek_args /* { 1793 int fd; 1794 int pad; 1795 off_t offset; 1796 int whence; 1797 } */ *uap; 1798 { 1799 struct file *fp; 1800 cap_rights_t rights; 1801 int error; 1802 1803 AUDIT_ARG_FD(uap->fd); 1804 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1805 if (error != 0) 1806 return (error); 1807 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1808 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1809 fdrop(fp, td); 1810 return (error); 1811 } 1812 1813 #if defined(COMPAT_43) 1814 /* 1815 * Reposition read/write file offset. 1816 */ 1817 #ifndef _SYS_SYSPROTO_H_ 1818 struct olseek_args { 1819 int fd; 1820 long offset; 1821 int whence; 1822 }; 1823 #endif 1824 int 1825 olseek(td, uap) 1826 struct thread *td; 1827 register struct olseek_args /* { 1828 int fd; 1829 long offset; 1830 int whence; 1831 } */ *uap; 1832 { 1833 struct lseek_args /* { 1834 int fd; 1835 int pad; 1836 off_t offset; 1837 int whence; 1838 } */ nuap; 1839 1840 nuap.fd = uap->fd; 1841 nuap.offset = uap->offset; 1842 nuap.whence = uap->whence; 1843 return (sys_lseek(td, &nuap)); 1844 } 1845 #endif /* COMPAT_43 */ 1846 1847 #if defined(COMPAT_FREEBSD6) 1848 /* Version with the 'pad' argument */ 1849 int 1850 freebsd6_lseek(td, uap) 1851 struct thread *td; 1852 register struct freebsd6_lseek_args *uap; 1853 { 1854 struct lseek_args ouap; 1855 1856 ouap.fd = uap->fd; 1857 ouap.offset = uap->offset; 1858 ouap.whence = uap->whence; 1859 return (sys_lseek(td, &ouap)); 1860 } 1861 #endif 1862 1863 /* 1864 * Check access permissions using passed credentials. 1865 */ 1866 static int 1867 vn_access(vp, user_flags, cred, td) 1868 struct vnode *vp; 1869 int user_flags; 1870 struct ucred *cred; 1871 struct thread *td; 1872 { 1873 accmode_t accmode; 1874 int error; 1875 1876 /* Flags == 0 means only check for existence. */ 1877 if (user_flags == 0) 1878 return (0); 1879 1880 accmode = 0; 1881 if (user_flags & R_OK) 1882 accmode |= VREAD; 1883 if (user_flags & W_OK) 1884 accmode |= VWRITE; 1885 if (user_flags & X_OK) 1886 accmode |= VEXEC; 1887 #ifdef MAC 1888 error = mac_vnode_check_access(cred, vp, accmode); 1889 if (error != 0) 1890 return (error); 1891 #endif 1892 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1893 error = VOP_ACCESS(vp, accmode, cred, td); 1894 return (error); 1895 } 1896 1897 /* 1898 * Check access permissions using "real" credentials. 1899 */ 1900 #ifndef _SYS_SYSPROTO_H_ 1901 struct access_args { 1902 char *path; 1903 int amode; 1904 }; 1905 #endif 1906 int 1907 sys_access(td, uap) 1908 struct thread *td; 1909 register struct access_args /* { 1910 char *path; 1911 int amode; 1912 } */ *uap; 1913 { 1914 1915 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1916 0, uap->amode)); 1917 } 1918 1919 #ifndef _SYS_SYSPROTO_H_ 1920 struct faccessat_args { 1921 int dirfd; 1922 char *path; 1923 int amode; 1924 int flag; 1925 } 1926 #endif 1927 int 1928 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1929 { 1930 1931 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1932 uap->amode)); 1933 } 1934 1935 int 1936 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1937 int flag, int amode) 1938 { 1939 struct ucred *cred, *usecred; 1940 struct vnode *vp; 1941 struct nameidata nd; 1942 cap_rights_t rights; 1943 int error; 1944 1945 if (flag & ~AT_EACCESS) 1946 return (EINVAL); 1947 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1948 return (EINVAL); 1949 1950 /* 1951 * Create and modify a temporary credential instead of one that 1952 * is potentially shared (if we need one). 1953 */ 1954 cred = td->td_ucred; 1955 if ((flag & AT_EACCESS) == 0 && 1956 ((cred->cr_uid != cred->cr_ruid || 1957 cred->cr_rgid != cred->cr_groups[0]))) { 1958 usecred = crdup(cred); 1959 usecred->cr_uid = cred->cr_ruid; 1960 usecred->cr_groups[0] = cred->cr_rgid; 1961 td->td_ucred = usecred; 1962 } else 1963 usecred = cred; 1964 AUDIT_ARG_VALUE(amode); 1965 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1966 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1967 td); 1968 if ((error = namei(&nd)) != 0) 1969 goto out; 1970 vp = nd.ni_vp; 1971 1972 error = vn_access(vp, amode, usecred, td); 1973 NDFREE(&nd, NDF_ONLY_PNBUF); 1974 vput(vp); 1975 out: 1976 if (usecred != cred) { 1977 td->td_ucred = cred; 1978 crfree(usecred); 1979 } 1980 return (error); 1981 } 1982 1983 /* 1984 * Check access permissions using "effective" credentials. 1985 */ 1986 #ifndef _SYS_SYSPROTO_H_ 1987 struct eaccess_args { 1988 char *path; 1989 int amode; 1990 }; 1991 #endif 1992 int 1993 sys_eaccess(td, uap) 1994 struct thread *td; 1995 register struct eaccess_args /* { 1996 char *path; 1997 int amode; 1998 } */ *uap; 1999 { 2000 2001 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2002 AT_EACCESS, uap->amode)); 2003 } 2004 2005 #if defined(COMPAT_43) 2006 /* 2007 * Get file status; this version follows links. 2008 */ 2009 #ifndef _SYS_SYSPROTO_H_ 2010 struct ostat_args { 2011 char *path; 2012 struct ostat *ub; 2013 }; 2014 #endif 2015 int 2016 ostat(td, uap) 2017 struct thread *td; 2018 register struct ostat_args /* { 2019 char *path; 2020 struct ostat *ub; 2021 } */ *uap; 2022 { 2023 struct stat sb; 2024 struct ostat osb; 2025 int error; 2026 2027 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2028 &sb, NULL); 2029 if (error != 0) 2030 return (error); 2031 cvtstat(&sb, &osb); 2032 return (copyout(&osb, uap->ub, sizeof (osb))); 2033 } 2034 2035 /* 2036 * Get file status; this version does not follow links. 2037 */ 2038 #ifndef _SYS_SYSPROTO_H_ 2039 struct olstat_args { 2040 char *path; 2041 struct ostat *ub; 2042 }; 2043 #endif 2044 int 2045 olstat(td, uap) 2046 struct thread *td; 2047 register struct olstat_args /* { 2048 char *path; 2049 struct ostat *ub; 2050 } */ *uap; 2051 { 2052 struct stat sb; 2053 struct ostat osb; 2054 int error; 2055 2056 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2057 UIO_USERSPACE, &sb, NULL); 2058 if (error != 0) 2059 return (error); 2060 cvtstat(&sb, &osb); 2061 return (copyout(&osb, uap->ub, sizeof (osb))); 2062 } 2063 2064 /* 2065 * Convert from an old to a new stat structure. 2066 */ 2067 void 2068 cvtstat(st, ost) 2069 struct stat *st; 2070 struct ostat *ost; 2071 { 2072 2073 bzero(ost, sizeof(*ost)); 2074 ost->st_dev = st->st_dev; 2075 ost->st_ino = st->st_ino; 2076 ost->st_mode = st->st_mode; 2077 ost->st_nlink = st->st_nlink; 2078 ost->st_uid = st->st_uid; 2079 ost->st_gid = st->st_gid; 2080 ost->st_rdev = st->st_rdev; 2081 if (st->st_size < (quad_t)1 << 32) 2082 ost->st_size = st->st_size; 2083 else 2084 ost->st_size = -2; 2085 ost->st_atim = st->st_atim; 2086 ost->st_mtim = st->st_mtim; 2087 ost->st_ctim = st->st_ctim; 2088 ost->st_blksize = st->st_blksize; 2089 ost->st_blocks = st->st_blocks; 2090 ost->st_flags = st->st_flags; 2091 ost->st_gen = st->st_gen; 2092 } 2093 #endif /* COMPAT_43 */ 2094 2095 /* 2096 * Get file status; this version follows links. 2097 */ 2098 #ifndef _SYS_SYSPROTO_H_ 2099 struct stat_args { 2100 char *path; 2101 struct stat *ub; 2102 }; 2103 #endif 2104 int 2105 sys_stat(td, uap) 2106 struct thread *td; 2107 register struct stat_args /* { 2108 char *path; 2109 struct stat *ub; 2110 } */ *uap; 2111 { 2112 struct stat sb; 2113 int error; 2114 2115 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2116 &sb, NULL); 2117 if (error == 0) 2118 error = copyout(&sb, uap->ub, sizeof (sb)); 2119 return (error); 2120 } 2121 2122 #ifndef _SYS_SYSPROTO_H_ 2123 struct fstatat_args { 2124 int fd; 2125 char *path; 2126 struct stat *buf; 2127 int flag; 2128 } 2129 #endif 2130 int 2131 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2132 { 2133 struct stat sb; 2134 int error; 2135 2136 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2137 UIO_USERSPACE, &sb, NULL); 2138 if (error == 0) 2139 error = copyout(&sb, uap->buf, sizeof (sb)); 2140 return (error); 2141 } 2142 2143 int 2144 kern_statat(struct thread *td, int flag, int fd, char *path, 2145 enum uio_seg pathseg, struct stat *sbp, 2146 void (*hook)(struct vnode *vp, struct stat *sbp)) 2147 { 2148 struct nameidata nd; 2149 struct stat sb; 2150 cap_rights_t rights; 2151 int error; 2152 2153 if (flag & ~AT_SYMLINK_NOFOLLOW) 2154 return (EINVAL); 2155 2156 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2157 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2158 cap_rights_init(&rights, CAP_FSTAT), td); 2159 2160 if ((error = namei(&nd)) != 0) 2161 return (error); 2162 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2163 if (error == 0) { 2164 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2165 if (S_ISREG(sb.st_mode)) 2166 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2167 if (__predict_false(hook != NULL)) 2168 hook(nd.ni_vp, &sb); 2169 } 2170 NDFREE(&nd, NDF_ONLY_PNBUF); 2171 vput(nd.ni_vp); 2172 if (error != 0) 2173 return (error); 2174 *sbp = sb; 2175 #ifdef KTRACE 2176 if (KTRPOINT(td, KTR_STRUCT)) 2177 ktrstat(&sb); 2178 #endif 2179 return (0); 2180 } 2181 2182 /* 2183 * Get file status; this version does not follow links. 2184 */ 2185 #ifndef _SYS_SYSPROTO_H_ 2186 struct lstat_args { 2187 char *path; 2188 struct stat *ub; 2189 }; 2190 #endif 2191 int 2192 sys_lstat(td, uap) 2193 struct thread *td; 2194 register struct lstat_args /* { 2195 char *path; 2196 struct stat *ub; 2197 } */ *uap; 2198 { 2199 struct stat sb; 2200 int error; 2201 2202 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2203 UIO_USERSPACE, &sb, NULL); 2204 if (error == 0) 2205 error = copyout(&sb, uap->ub, sizeof (sb)); 2206 return (error); 2207 } 2208 2209 /* 2210 * Implementation of the NetBSD [l]stat() functions. 2211 */ 2212 void 2213 cvtnstat(sb, nsb) 2214 struct stat *sb; 2215 struct nstat *nsb; 2216 { 2217 2218 bzero(nsb, sizeof *nsb); 2219 nsb->st_dev = sb->st_dev; 2220 nsb->st_ino = sb->st_ino; 2221 nsb->st_mode = sb->st_mode; 2222 nsb->st_nlink = sb->st_nlink; 2223 nsb->st_uid = sb->st_uid; 2224 nsb->st_gid = sb->st_gid; 2225 nsb->st_rdev = sb->st_rdev; 2226 nsb->st_atim = sb->st_atim; 2227 nsb->st_mtim = sb->st_mtim; 2228 nsb->st_ctim = sb->st_ctim; 2229 nsb->st_size = sb->st_size; 2230 nsb->st_blocks = sb->st_blocks; 2231 nsb->st_blksize = sb->st_blksize; 2232 nsb->st_flags = sb->st_flags; 2233 nsb->st_gen = sb->st_gen; 2234 nsb->st_birthtim = sb->st_birthtim; 2235 } 2236 2237 #ifndef _SYS_SYSPROTO_H_ 2238 struct nstat_args { 2239 char *path; 2240 struct nstat *ub; 2241 }; 2242 #endif 2243 int 2244 sys_nstat(td, uap) 2245 struct thread *td; 2246 register struct nstat_args /* { 2247 char *path; 2248 struct nstat *ub; 2249 } */ *uap; 2250 { 2251 struct stat sb; 2252 struct nstat nsb; 2253 int error; 2254 2255 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2256 &sb, NULL); 2257 if (error != 0) 2258 return (error); 2259 cvtnstat(&sb, &nsb); 2260 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2261 } 2262 2263 /* 2264 * NetBSD lstat. Get file status; this version does not follow links. 2265 */ 2266 #ifndef _SYS_SYSPROTO_H_ 2267 struct lstat_args { 2268 char *path; 2269 struct stat *ub; 2270 }; 2271 #endif 2272 int 2273 sys_nlstat(td, uap) 2274 struct thread *td; 2275 register struct nlstat_args /* { 2276 char *path; 2277 struct nstat *ub; 2278 } */ *uap; 2279 { 2280 struct stat sb; 2281 struct nstat nsb; 2282 int error; 2283 2284 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2285 UIO_USERSPACE, &sb, NULL); 2286 if (error != 0) 2287 return (error); 2288 cvtnstat(&sb, &nsb); 2289 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2290 } 2291 2292 /* 2293 * Get configurable pathname variables. 2294 */ 2295 #ifndef _SYS_SYSPROTO_H_ 2296 struct pathconf_args { 2297 char *path; 2298 int name; 2299 }; 2300 #endif 2301 int 2302 sys_pathconf(td, uap) 2303 struct thread *td; 2304 register struct pathconf_args /* { 2305 char *path; 2306 int name; 2307 } */ *uap; 2308 { 2309 2310 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2311 } 2312 2313 #ifndef _SYS_SYSPROTO_H_ 2314 struct lpathconf_args { 2315 char *path; 2316 int name; 2317 }; 2318 #endif 2319 int 2320 sys_lpathconf(td, uap) 2321 struct thread *td; 2322 register struct lpathconf_args /* { 2323 char *path; 2324 int name; 2325 } */ *uap; 2326 { 2327 2328 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2329 NOFOLLOW)); 2330 } 2331 2332 int 2333 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2334 u_long flags) 2335 { 2336 struct nameidata nd; 2337 int error; 2338 2339 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2340 pathseg, path, td); 2341 if ((error = namei(&nd)) != 0) 2342 return (error); 2343 NDFREE(&nd, NDF_ONLY_PNBUF); 2344 2345 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2346 vput(nd.ni_vp); 2347 return (error); 2348 } 2349 2350 /* 2351 * Return target name of a symbolic link. 2352 */ 2353 #ifndef _SYS_SYSPROTO_H_ 2354 struct readlink_args { 2355 char *path; 2356 char *buf; 2357 size_t count; 2358 }; 2359 #endif 2360 int 2361 sys_readlink(td, uap) 2362 struct thread *td; 2363 register struct readlink_args /* { 2364 char *path; 2365 char *buf; 2366 size_t count; 2367 } */ *uap; 2368 { 2369 2370 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2371 uap->buf, UIO_USERSPACE, uap->count)); 2372 } 2373 #ifndef _SYS_SYSPROTO_H_ 2374 struct readlinkat_args { 2375 int fd; 2376 char *path; 2377 char *buf; 2378 size_t bufsize; 2379 }; 2380 #endif 2381 int 2382 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2383 { 2384 2385 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2386 uap->buf, UIO_USERSPACE, uap->bufsize)); 2387 } 2388 2389 int 2390 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2391 char *buf, enum uio_seg bufseg, size_t count) 2392 { 2393 struct vnode *vp; 2394 struct iovec aiov; 2395 struct uio auio; 2396 struct nameidata nd; 2397 int error; 2398 2399 if (count > IOSIZE_MAX) 2400 return (EINVAL); 2401 2402 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2403 pathseg, path, fd, td); 2404 2405 if ((error = namei(&nd)) != 0) 2406 return (error); 2407 NDFREE(&nd, NDF_ONLY_PNBUF); 2408 vp = nd.ni_vp; 2409 #ifdef MAC 2410 error = mac_vnode_check_readlink(td->td_ucred, vp); 2411 if (error != 0) { 2412 vput(vp); 2413 return (error); 2414 } 2415 #endif 2416 if (vp->v_type != VLNK) 2417 error = EINVAL; 2418 else { 2419 aiov.iov_base = buf; 2420 aiov.iov_len = count; 2421 auio.uio_iov = &aiov; 2422 auio.uio_iovcnt = 1; 2423 auio.uio_offset = 0; 2424 auio.uio_rw = UIO_READ; 2425 auio.uio_segflg = bufseg; 2426 auio.uio_td = td; 2427 auio.uio_resid = count; 2428 error = VOP_READLINK(vp, &auio, td->td_ucred); 2429 td->td_retval[0] = count - auio.uio_resid; 2430 } 2431 vput(vp); 2432 return (error); 2433 } 2434 2435 /* 2436 * Common implementation code for chflags() and fchflags(). 2437 */ 2438 static int 2439 setfflags(td, vp, flags) 2440 struct thread *td; 2441 struct vnode *vp; 2442 u_long flags; 2443 { 2444 struct mount *mp; 2445 struct vattr vattr; 2446 int error; 2447 2448 /* We can't support the value matching VNOVAL. */ 2449 if (flags == VNOVAL) 2450 return (EOPNOTSUPP); 2451 2452 /* 2453 * Prevent non-root users from setting flags on devices. When 2454 * a device is reused, users can retain ownership of the device 2455 * if they are allowed to set flags and programs assume that 2456 * chown can't fail when done as root. 2457 */ 2458 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2459 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2460 if (error != 0) 2461 return (error); 2462 } 2463 2464 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2465 return (error); 2466 VATTR_NULL(&vattr); 2467 vattr.va_flags = flags; 2468 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2469 #ifdef MAC 2470 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2471 if (error == 0) 2472 #endif 2473 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2474 VOP_UNLOCK(vp, 0); 2475 vn_finished_write(mp); 2476 return (error); 2477 } 2478 2479 /* 2480 * Change flags of a file given a path name. 2481 */ 2482 #ifndef _SYS_SYSPROTO_H_ 2483 struct chflags_args { 2484 const char *path; 2485 u_long flags; 2486 }; 2487 #endif 2488 int 2489 sys_chflags(td, uap) 2490 struct thread *td; 2491 register struct chflags_args /* { 2492 const char *path; 2493 u_long flags; 2494 } */ *uap; 2495 { 2496 2497 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2498 uap->flags, 0)); 2499 } 2500 2501 #ifndef _SYS_SYSPROTO_H_ 2502 struct chflagsat_args { 2503 int fd; 2504 const char *path; 2505 u_long flags; 2506 int atflag; 2507 } 2508 #endif 2509 int 2510 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2511 { 2512 int fd = uap->fd; 2513 const char *path = uap->path; 2514 u_long flags = uap->flags; 2515 int atflag = uap->atflag; 2516 2517 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2518 return (EINVAL); 2519 2520 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2521 } 2522 2523 /* 2524 * Same as chflags() but doesn't follow symlinks. 2525 */ 2526 int 2527 sys_lchflags(td, uap) 2528 struct thread *td; 2529 register struct lchflags_args /* { 2530 const char *path; 2531 u_long flags; 2532 } */ *uap; 2533 { 2534 2535 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2536 uap->flags, AT_SYMLINK_NOFOLLOW)); 2537 } 2538 2539 static int 2540 kern_chflagsat(struct thread *td, int fd, const char *path, 2541 enum uio_seg pathseg, u_long flags, int atflag) 2542 { 2543 struct nameidata nd; 2544 cap_rights_t rights; 2545 int error, follow; 2546 2547 AUDIT_ARG_FFLAGS(flags); 2548 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2549 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2550 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2551 if ((error = namei(&nd)) != 0) 2552 return (error); 2553 NDFREE(&nd, NDF_ONLY_PNBUF); 2554 error = setfflags(td, nd.ni_vp, flags); 2555 vrele(nd.ni_vp); 2556 return (error); 2557 } 2558 2559 /* 2560 * Change flags of a file given a file descriptor. 2561 */ 2562 #ifndef _SYS_SYSPROTO_H_ 2563 struct fchflags_args { 2564 int fd; 2565 u_long flags; 2566 }; 2567 #endif 2568 int 2569 sys_fchflags(td, uap) 2570 struct thread *td; 2571 register struct fchflags_args /* { 2572 int fd; 2573 u_long flags; 2574 } */ *uap; 2575 { 2576 struct file *fp; 2577 cap_rights_t rights; 2578 int error; 2579 2580 AUDIT_ARG_FD(uap->fd); 2581 AUDIT_ARG_FFLAGS(uap->flags); 2582 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2583 &fp); 2584 if (error != 0) 2585 return (error); 2586 #ifdef AUDIT 2587 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2588 AUDIT_ARG_VNODE1(fp->f_vnode); 2589 VOP_UNLOCK(fp->f_vnode, 0); 2590 #endif 2591 error = setfflags(td, fp->f_vnode, uap->flags); 2592 fdrop(fp, td); 2593 return (error); 2594 } 2595 2596 /* 2597 * Common implementation code for chmod(), lchmod() and fchmod(). 2598 */ 2599 int 2600 setfmode(td, cred, vp, mode) 2601 struct thread *td; 2602 struct ucred *cred; 2603 struct vnode *vp; 2604 int mode; 2605 { 2606 struct mount *mp; 2607 struct vattr vattr; 2608 int error; 2609 2610 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2611 return (error); 2612 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2613 VATTR_NULL(&vattr); 2614 vattr.va_mode = mode & ALLPERMS; 2615 #ifdef MAC 2616 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2617 if (error == 0) 2618 #endif 2619 error = VOP_SETATTR(vp, &vattr, cred); 2620 VOP_UNLOCK(vp, 0); 2621 vn_finished_write(mp); 2622 return (error); 2623 } 2624 2625 /* 2626 * Change mode of a file given path name. 2627 */ 2628 #ifndef _SYS_SYSPROTO_H_ 2629 struct chmod_args { 2630 char *path; 2631 int mode; 2632 }; 2633 #endif 2634 int 2635 sys_chmod(td, uap) 2636 struct thread *td; 2637 register struct chmod_args /* { 2638 char *path; 2639 int mode; 2640 } */ *uap; 2641 { 2642 2643 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2644 uap->mode, 0)); 2645 } 2646 2647 #ifndef _SYS_SYSPROTO_H_ 2648 struct fchmodat_args { 2649 int dirfd; 2650 char *path; 2651 mode_t mode; 2652 int flag; 2653 } 2654 #endif 2655 int 2656 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2657 { 2658 int flag = uap->flag; 2659 int fd = uap->fd; 2660 char *path = uap->path; 2661 mode_t mode = uap->mode; 2662 2663 if (flag & ~AT_SYMLINK_NOFOLLOW) 2664 return (EINVAL); 2665 2666 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2667 } 2668 2669 /* 2670 * Change mode of a file given path name (don't follow links.) 2671 */ 2672 #ifndef _SYS_SYSPROTO_H_ 2673 struct lchmod_args { 2674 char *path; 2675 int mode; 2676 }; 2677 #endif 2678 int 2679 sys_lchmod(td, uap) 2680 struct thread *td; 2681 register struct lchmod_args /* { 2682 char *path; 2683 int mode; 2684 } */ *uap; 2685 { 2686 2687 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2688 uap->mode, AT_SYMLINK_NOFOLLOW)); 2689 } 2690 2691 int 2692 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2693 mode_t mode, int flag) 2694 { 2695 struct nameidata nd; 2696 cap_rights_t rights; 2697 int error, follow; 2698 2699 AUDIT_ARG_MODE(mode); 2700 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2701 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2702 cap_rights_init(&rights, CAP_FCHMOD), td); 2703 if ((error = namei(&nd)) != 0) 2704 return (error); 2705 NDFREE(&nd, NDF_ONLY_PNBUF); 2706 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2707 vrele(nd.ni_vp); 2708 return (error); 2709 } 2710 2711 /* 2712 * Change mode of a file given a file descriptor. 2713 */ 2714 #ifndef _SYS_SYSPROTO_H_ 2715 struct fchmod_args { 2716 int fd; 2717 int mode; 2718 }; 2719 #endif 2720 int 2721 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2722 { 2723 struct file *fp; 2724 cap_rights_t rights; 2725 int error; 2726 2727 AUDIT_ARG_FD(uap->fd); 2728 AUDIT_ARG_MODE(uap->mode); 2729 2730 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2731 if (error != 0) 2732 return (error); 2733 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2734 fdrop(fp, td); 2735 return (error); 2736 } 2737 2738 /* 2739 * Common implementation for chown(), lchown(), and fchown() 2740 */ 2741 int 2742 setfown(td, cred, vp, uid, gid) 2743 struct thread *td; 2744 struct ucred *cred; 2745 struct vnode *vp; 2746 uid_t uid; 2747 gid_t gid; 2748 { 2749 struct mount *mp; 2750 struct vattr vattr; 2751 int error; 2752 2753 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2754 return (error); 2755 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2756 VATTR_NULL(&vattr); 2757 vattr.va_uid = uid; 2758 vattr.va_gid = gid; 2759 #ifdef MAC 2760 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2761 vattr.va_gid); 2762 if (error == 0) 2763 #endif 2764 error = VOP_SETATTR(vp, &vattr, cred); 2765 VOP_UNLOCK(vp, 0); 2766 vn_finished_write(mp); 2767 return (error); 2768 } 2769 2770 /* 2771 * Set ownership given a path name. 2772 */ 2773 #ifndef _SYS_SYSPROTO_H_ 2774 struct chown_args { 2775 char *path; 2776 int uid; 2777 int gid; 2778 }; 2779 #endif 2780 int 2781 sys_chown(td, uap) 2782 struct thread *td; 2783 register struct chown_args /* { 2784 char *path; 2785 int uid; 2786 int gid; 2787 } */ *uap; 2788 { 2789 2790 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2791 uap->gid, 0)); 2792 } 2793 2794 #ifndef _SYS_SYSPROTO_H_ 2795 struct fchownat_args { 2796 int fd; 2797 const char * path; 2798 uid_t uid; 2799 gid_t gid; 2800 int flag; 2801 }; 2802 #endif 2803 int 2804 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2805 { 2806 int flag; 2807 2808 flag = uap->flag; 2809 if (flag & ~AT_SYMLINK_NOFOLLOW) 2810 return (EINVAL); 2811 2812 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2813 uap->gid, uap->flag)); 2814 } 2815 2816 int 2817 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2818 int uid, int gid, int flag) 2819 { 2820 struct nameidata nd; 2821 cap_rights_t rights; 2822 int error, follow; 2823 2824 AUDIT_ARG_OWNER(uid, gid); 2825 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2826 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2827 cap_rights_init(&rights, CAP_FCHOWN), td); 2828 2829 if ((error = namei(&nd)) != 0) 2830 return (error); 2831 NDFREE(&nd, NDF_ONLY_PNBUF); 2832 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2833 vrele(nd.ni_vp); 2834 return (error); 2835 } 2836 2837 /* 2838 * Set ownership given a path name, do not cross symlinks. 2839 */ 2840 #ifndef _SYS_SYSPROTO_H_ 2841 struct lchown_args { 2842 char *path; 2843 int uid; 2844 int gid; 2845 }; 2846 #endif 2847 int 2848 sys_lchown(td, uap) 2849 struct thread *td; 2850 register struct lchown_args /* { 2851 char *path; 2852 int uid; 2853 int gid; 2854 } */ *uap; 2855 { 2856 2857 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2858 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2859 } 2860 2861 /* 2862 * Set ownership given a file descriptor. 2863 */ 2864 #ifndef _SYS_SYSPROTO_H_ 2865 struct fchown_args { 2866 int fd; 2867 int uid; 2868 int gid; 2869 }; 2870 #endif 2871 int 2872 sys_fchown(td, uap) 2873 struct thread *td; 2874 register struct fchown_args /* { 2875 int fd; 2876 int uid; 2877 int gid; 2878 } */ *uap; 2879 { 2880 struct file *fp; 2881 cap_rights_t rights; 2882 int error; 2883 2884 AUDIT_ARG_FD(uap->fd); 2885 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2886 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2887 if (error != 0) 2888 return (error); 2889 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2890 fdrop(fp, td); 2891 return (error); 2892 } 2893 2894 /* 2895 * Common implementation code for utimes(), lutimes(), and futimes(). 2896 */ 2897 static int 2898 getutimes(usrtvp, tvpseg, tsp) 2899 const struct timeval *usrtvp; 2900 enum uio_seg tvpseg; 2901 struct timespec *tsp; 2902 { 2903 struct timeval tv[2]; 2904 const struct timeval *tvp; 2905 int error; 2906 2907 if (usrtvp == NULL) { 2908 vfs_timestamp(&tsp[0]); 2909 tsp[1] = tsp[0]; 2910 } else { 2911 if (tvpseg == UIO_SYSSPACE) { 2912 tvp = usrtvp; 2913 } else { 2914 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2915 return (error); 2916 tvp = tv; 2917 } 2918 2919 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2920 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2921 return (EINVAL); 2922 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2923 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2924 } 2925 return (0); 2926 } 2927 2928 /* 2929 * Common implementation code for futimens(), utimensat(). 2930 */ 2931 #define UTIMENS_NULL 0x1 2932 #define UTIMENS_EXIT 0x2 2933 static int 2934 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2935 struct timespec *tsp, int *retflags) 2936 { 2937 struct timespec tsnow; 2938 int error; 2939 2940 vfs_timestamp(&tsnow); 2941 *retflags = 0; 2942 if (usrtsp == NULL) { 2943 tsp[0] = tsnow; 2944 tsp[1] = tsnow; 2945 *retflags |= UTIMENS_NULL; 2946 return (0); 2947 } 2948 if (tspseg == UIO_SYSSPACE) { 2949 tsp[0] = usrtsp[0]; 2950 tsp[1] = usrtsp[1]; 2951 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2952 return (error); 2953 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2954 *retflags |= UTIMENS_EXIT; 2955 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2956 *retflags |= UTIMENS_NULL; 2957 if (tsp[0].tv_nsec == UTIME_OMIT) 2958 tsp[0].tv_sec = VNOVAL; 2959 else if (tsp[0].tv_nsec == UTIME_NOW) 2960 tsp[0] = tsnow; 2961 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2962 return (EINVAL); 2963 if (tsp[1].tv_nsec == UTIME_OMIT) 2964 tsp[1].tv_sec = VNOVAL; 2965 else if (tsp[1].tv_nsec == UTIME_NOW) 2966 tsp[1] = tsnow; 2967 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2968 return (EINVAL); 2969 2970 return (0); 2971 } 2972 2973 /* 2974 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2975 * and utimensat(). 2976 */ 2977 static int 2978 setutimes(td, vp, ts, numtimes, nullflag) 2979 struct thread *td; 2980 struct vnode *vp; 2981 const struct timespec *ts; 2982 int numtimes; 2983 int nullflag; 2984 { 2985 struct mount *mp; 2986 struct vattr vattr; 2987 int error, setbirthtime; 2988 2989 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2990 return (error); 2991 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2992 setbirthtime = 0; 2993 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2994 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2995 setbirthtime = 1; 2996 VATTR_NULL(&vattr); 2997 vattr.va_atime = ts[0]; 2998 vattr.va_mtime = ts[1]; 2999 if (setbirthtime) 3000 vattr.va_birthtime = ts[1]; 3001 if (numtimes > 2) 3002 vattr.va_birthtime = ts[2]; 3003 if (nullflag) 3004 vattr.va_vaflags |= VA_UTIMES_NULL; 3005 #ifdef MAC 3006 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3007 vattr.va_mtime); 3008 #endif 3009 if (error == 0) 3010 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3011 VOP_UNLOCK(vp, 0); 3012 vn_finished_write(mp); 3013 return (error); 3014 } 3015 3016 /* 3017 * Set the access and modification times of a file. 3018 */ 3019 #ifndef _SYS_SYSPROTO_H_ 3020 struct utimes_args { 3021 char *path; 3022 struct timeval *tptr; 3023 }; 3024 #endif 3025 int 3026 sys_utimes(td, uap) 3027 struct thread *td; 3028 register struct utimes_args /* { 3029 char *path; 3030 struct timeval *tptr; 3031 } */ *uap; 3032 { 3033 3034 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3035 uap->tptr, UIO_USERSPACE)); 3036 } 3037 3038 #ifndef _SYS_SYSPROTO_H_ 3039 struct futimesat_args { 3040 int fd; 3041 const char * path; 3042 const struct timeval * times; 3043 }; 3044 #endif 3045 int 3046 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3047 { 3048 3049 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3050 uap->times, UIO_USERSPACE)); 3051 } 3052 3053 int 3054 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3055 struct timeval *tptr, enum uio_seg tptrseg) 3056 { 3057 struct nameidata nd; 3058 struct timespec ts[2]; 3059 cap_rights_t rights; 3060 int error; 3061 3062 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3063 return (error); 3064 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3065 cap_rights_init(&rights, CAP_FUTIMES), td); 3066 3067 if ((error = namei(&nd)) != 0) 3068 return (error); 3069 NDFREE(&nd, NDF_ONLY_PNBUF); 3070 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3071 vrele(nd.ni_vp); 3072 return (error); 3073 } 3074 3075 /* 3076 * Set the access and modification times of a file. 3077 */ 3078 #ifndef _SYS_SYSPROTO_H_ 3079 struct lutimes_args { 3080 char *path; 3081 struct timeval *tptr; 3082 }; 3083 #endif 3084 int 3085 sys_lutimes(td, uap) 3086 struct thread *td; 3087 register struct lutimes_args /* { 3088 char *path; 3089 struct timeval *tptr; 3090 } */ *uap; 3091 { 3092 3093 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3094 UIO_USERSPACE)); 3095 } 3096 3097 int 3098 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3099 struct timeval *tptr, enum uio_seg tptrseg) 3100 { 3101 struct timespec ts[2]; 3102 struct nameidata nd; 3103 int error; 3104 3105 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3106 return (error); 3107 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3108 if ((error = namei(&nd)) != 0) 3109 return (error); 3110 NDFREE(&nd, NDF_ONLY_PNBUF); 3111 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3112 vrele(nd.ni_vp); 3113 return (error); 3114 } 3115 3116 /* 3117 * Set the access and modification times of a file. 3118 */ 3119 #ifndef _SYS_SYSPROTO_H_ 3120 struct futimes_args { 3121 int fd; 3122 struct timeval *tptr; 3123 }; 3124 #endif 3125 int 3126 sys_futimes(td, uap) 3127 struct thread *td; 3128 register struct futimes_args /* { 3129 int fd; 3130 struct timeval *tptr; 3131 } */ *uap; 3132 { 3133 3134 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3135 } 3136 3137 int 3138 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3139 enum uio_seg tptrseg) 3140 { 3141 struct timespec ts[2]; 3142 struct file *fp; 3143 cap_rights_t rights; 3144 int error; 3145 3146 AUDIT_ARG_FD(fd); 3147 error = getutimes(tptr, tptrseg, ts); 3148 if (error != 0) 3149 return (error); 3150 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3151 if (error != 0) 3152 return (error); 3153 #ifdef AUDIT 3154 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3155 AUDIT_ARG_VNODE1(fp->f_vnode); 3156 VOP_UNLOCK(fp->f_vnode, 0); 3157 #endif 3158 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3159 fdrop(fp, td); 3160 return (error); 3161 } 3162 3163 int 3164 sys_futimens(struct thread *td, struct futimens_args *uap) 3165 { 3166 3167 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3168 } 3169 3170 int 3171 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3172 enum uio_seg tptrseg) 3173 { 3174 struct timespec ts[2]; 3175 struct file *fp; 3176 cap_rights_t rights; 3177 int error, flags; 3178 3179 AUDIT_ARG_FD(fd); 3180 error = getutimens(tptr, tptrseg, ts, &flags); 3181 if (error != 0) 3182 return (error); 3183 if (flags & UTIMENS_EXIT) 3184 return (0); 3185 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3186 if (error != 0) 3187 return (error); 3188 #ifdef AUDIT 3189 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3190 AUDIT_ARG_VNODE1(fp->f_vnode); 3191 VOP_UNLOCK(fp->f_vnode, 0); 3192 #endif 3193 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3194 fdrop(fp, td); 3195 return (error); 3196 } 3197 3198 int 3199 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3200 { 3201 3202 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3203 uap->times, UIO_USERSPACE, uap->flag)); 3204 } 3205 3206 int 3207 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3208 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3209 { 3210 struct nameidata nd; 3211 struct timespec ts[2]; 3212 cap_rights_t rights; 3213 int error, flags; 3214 3215 if (flag & ~AT_SYMLINK_NOFOLLOW) 3216 return (EINVAL); 3217 3218 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3219 return (error); 3220 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3221 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3222 cap_rights_init(&rights, CAP_FUTIMES), td); 3223 if ((error = namei(&nd)) != 0) 3224 return (error); 3225 /* 3226 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3227 * POSIX states: 3228 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3229 * "Search permission is denied by a component of the path prefix." 3230 */ 3231 NDFREE(&nd, NDF_ONLY_PNBUF); 3232 if ((flags & UTIMENS_EXIT) == 0) 3233 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3234 vrele(nd.ni_vp); 3235 return (error); 3236 } 3237 3238 /* 3239 * Truncate a file given its path name. 3240 */ 3241 #ifndef _SYS_SYSPROTO_H_ 3242 struct truncate_args { 3243 char *path; 3244 int pad; 3245 off_t length; 3246 }; 3247 #endif 3248 int 3249 sys_truncate(td, uap) 3250 struct thread *td; 3251 register struct truncate_args /* { 3252 char *path; 3253 int pad; 3254 off_t length; 3255 } */ *uap; 3256 { 3257 3258 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3259 } 3260 3261 int 3262 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3263 { 3264 struct mount *mp; 3265 struct vnode *vp; 3266 void *rl_cookie; 3267 struct vattr vattr; 3268 struct nameidata nd; 3269 int error; 3270 3271 if (length < 0) 3272 return(EINVAL); 3273 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3274 if ((error = namei(&nd)) != 0) 3275 return (error); 3276 vp = nd.ni_vp; 3277 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3278 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3279 vn_rangelock_unlock(vp, rl_cookie); 3280 vrele(vp); 3281 return (error); 3282 } 3283 NDFREE(&nd, NDF_ONLY_PNBUF); 3284 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3285 if (vp->v_type == VDIR) 3286 error = EISDIR; 3287 #ifdef MAC 3288 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3289 } 3290 #endif 3291 else if ((error = vn_writechk(vp)) == 0 && 3292 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3293 VATTR_NULL(&vattr); 3294 vattr.va_size = length; 3295 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3296 } 3297 VOP_UNLOCK(vp, 0); 3298 vn_finished_write(mp); 3299 vn_rangelock_unlock(vp, rl_cookie); 3300 vrele(vp); 3301 return (error); 3302 } 3303 3304 #if defined(COMPAT_43) 3305 /* 3306 * Truncate a file given its path name. 3307 */ 3308 #ifndef _SYS_SYSPROTO_H_ 3309 struct otruncate_args { 3310 char *path; 3311 long length; 3312 }; 3313 #endif 3314 int 3315 otruncate(td, uap) 3316 struct thread *td; 3317 register struct otruncate_args /* { 3318 char *path; 3319 long length; 3320 } */ *uap; 3321 { 3322 struct truncate_args /* { 3323 char *path; 3324 int pad; 3325 off_t length; 3326 } */ nuap; 3327 3328 nuap.path = uap->path; 3329 nuap.length = uap->length; 3330 return (sys_truncate(td, &nuap)); 3331 } 3332 #endif /* COMPAT_43 */ 3333 3334 #if defined(COMPAT_FREEBSD6) 3335 /* Versions with the pad argument */ 3336 int 3337 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3338 { 3339 struct truncate_args ouap; 3340 3341 ouap.path = uap->path; 3342 ouap.length = uap->length; 3343 return (sys_truncate(td, &ouap)); 3344 } 3345 3346 int 3347 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3348 { 3349 struct ftruncate_args ouap; 3350 3351 ouap.fd = uap->fd; 3352 ouap.length = uap->length; 3353 return (sys_ftruncate(td, &ouap)); 3354 } 3355 #endif 3356 3357 int 3358 kern_fsync(struct thread *td, int fd, bool fullsync) 3359 { 3360 struct vnode *vp; 3361 struct mount *mp; 3362 struct file *fp; 3363 cap_rights_t rights; 3364 int error, lock_flags; 3365 3366 AUDIT_ARG_FD(fd); 3367 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3368 if (error != 0) 3369 return (error); 3370 vp = fp->f_vnode; 3371 #if 0 3372 if (!fullsync) 3373 /* XXXKIB: compete outstanding aio writes */; 3374 #endif 3375 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3376 if (error != 0) 3377 goto drop; 3378 if (MNT_SHARED_WRITES(mp) || 3379 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3380 lock_flags = LK_SHARED; 3381 } else { 3382 lock_flags = LK_EXCLUSIVE; 3383 } 3384 vn_lock(vp, lock_flags | LK_RETRY); 3385 AUDIT_ARG_VNODE1(vp); 3386 if (vp->v_object != NULL) { 3387 VM_OBJECT_WLOCK(vp->v_object); 3388 vm_object_page_clean(vp->v_object, 0, 0, 0); 3389 VM_OBJECT_WUNLOCK(vp->v_object); 3390 } 3391 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3392 VOP_UNLOCK(vp, 0); 3393 vn_finished_write(mp); 3394 drop: 3395 fdrop(fp, td); 3396 return (error); 3397 } 3398 3399 /* 3400 * Sync an open file. 3401 */ 3402 #ifndef _SYS_SYSPROTO_H_ 3403 struct fsync_args { 3404 int fd; 3405 }; 3406 #endif 3407 int 3408 sys_fsync(struct thread *td, struct fsync_args *uap) 3409 { 3410 3411 return (kern_fsync(td, uap->fd, true)); 3412 } 3413 3414 int 3415 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3416 { 3417 3418 return (kern_fsync(td, uap->fd, false)); 3419 } 3420 3421 /* 3422 * Rename files. Source and destination must either both be directories, or 3423 * both not be directories. If target is a directory, it must be empty. 3424 */ 3425 #ifndef _SYS_SYSPROTO_H_ 3426 struct rename_args { 3427 char *from; 3428 char *to; 3429 }; 3430 #endif 3431 int 3432 sys_rename(td, uap) 3433 struct thread *td; 3434 register struct rename_args /* { 3435 char *from; 3436 char *to; 3437 } */ *uap; 3438 { 3439 3440 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3441 uap->to, UIO_USERSPACE)); 3442 } 3443 3444 #ifndef _SYS_SYSPROTO_H_ 3445 struct renameat_args { 3446 int oldfd; 3447 char *old; 3448 int newfd; 3449 char *new; 3450 }; 3451 #endif 3452 int 3453 sys_renameat(struct thread *td, struct renameat_args *uap) 3454 { 3455 3456 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3457 UIO_USERSPACE)); 3458 } 3459 3460 int 3461 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3462 enum uio_seg pathseg) 3463 { 3464 struct mount *mp = NULL; 3465 struct vnode *tvp, *fvp, *tdvp; 3466 struct nameidata fromnd, tond; 3467 cap_rights_t rights; 3468 int error; 3469 3470 again: 3471 bwillwrite(); 3472 #ifdef MAC 3473 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3474 AUDITVNODE1, pathseg, old, oldfd, 3475 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3476 #else 3477 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3478 pathseg, old, oldfd, 3479 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3480 #endif 3481 3482 if ((error = namei(&fromnd)) != 0) 3483 return (error); 3484 #ifdef MAC 3485 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3486 fromnd.ni_vp, &fromnd.ni_cnd); 3487 VOP_UNLOCK(fromnd.ni_dvp, 0); 3488 if (fromnd.ni_dvp != fromnd.ni_vp) 3489 VOP_UNLOCK(fromnd.ni_vp, 0); 3490 #endif 3491 fvp = fromnd.ni_vp; 3492 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3493 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3494 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3495 if (fromnd.ni_vp->v_type == VDIR) 3496 tond.ni_cnd.cn_flags |= WILLBEDIR; 3497 if ((error = namei(&tond)) != 0) { 3498 /* Translate error code for rename("dir1", "dir2/."). */ 3499 if (error == EISDIR && fvp->v_type == VDIR) 3500 error = EINVAL; 3501 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3502 vrele(fromnd.ni_dvp); 3503 vrele(fvp); 3504 goto out1; 3505 } 3506 tdvp = tond.ni_dvp; 3507 tvp = tond.ni_vp; 3508 error = vn_start_write(fvp, &mp, V_NOWAIT); 3509 if (error != 0) { 3510 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3511 NDFREE(&tond, NDF_ONLY_PNBUF); 3512 if (tvp != NULL) 3513 vput(tvp); 3514 if (tdvp == tvp) 3515 vrele(tdvp); 3516 else 3517 vput(tdvp); 3518 vrele(fromnd.ni_dvp); 3519 vrele(fvp); 3520 vrele(tond.ni_startdir); 3521 if (fromnd.ni_startdir != NULL) 3522 vrele(fromnd.ni_startdir); 3523 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3524 if (error != 0) 3525 return (error); 3526 goto again; 3527 } 3528 if (tvp != NULL) { 3529 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3530 error = ENOTDIR; 3531 goto out; 3532 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3533 error = EISDIR; 3534 goto out; 3535 } 3536 #ifdef CAPABILITIES 3537 if (newfd != AT_FDCWD) { 3538 /* 3539 * If the target already exists we require CAP_UNLINKAT 3540 * from 'newfd'. 3541 */ 3542 error = cap_check(&tond.ni_filecaps.fc_rights, 3543 cap_rights_init(&rights, CAP_UNLINKAT)); 3544 if (error != 0) 3545 goto out; 3546 } 3547 #endif 3548 } 3549 if (fvp == tdvp) { 3550 error = EINVAL; 3551 goto out; 3552 } 3553 /* 3554 * If the source is the same as the destination (that is, if they 3555 * are links to the same vnode), then there is nothing to do. 3556 */ 3557 if (fvp == tvp) 3558 error = -1; 3559 #ifdef MAC 3560 else 3561 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3562 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3563 #endif 3564 out: 3565 if (error == 0) { 3566 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3567 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3568 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3569 NDFREE(&tond, NDF_ONLY_PNBUF); 3570 } else { 3571 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3572 NDFREE(&tond, NDF_ONLY_PNBUF); 3573 if (tvp != NULL) 3574 vput(tvp); 3575 if (tdvp == tvp) 3576 vrele(tdvp); 3577 else 3578 vput(tdvp); 3579 vrele(fromnd.ni_dvp); 3580 vrele(fvp); 3581 } 3582 vrele(tond.ni_startdir); 3583 vn_finished_write(mp); 3584 out1: 3585 if (fromnd.ni_startdir) 3586 vrele(fromnd.ni_startdir); 3587 if (error == -1) 3588 return (0); 3589 return (error); 3590 } 3591 3592 /* 3593 * Make a directory file. 3594 */ 3595 #ifndef _SYS_SYSPROTO_H_ 3596 struct mkdir_args { 3597 char *path; 3598 int mode; 3599 }; 3600 #endif 3601 int 3602 sys_mkdir(td, uap) 3603 struct thread *td; 3604 register struct mkdir_args /* { 3605 char *path; 3606 int mode; 3607 } */ *uap; 3608 { 3609 3610 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3611 uap->mode)); 3612 } 3613 3614 #ifndef _SYS_SYSPROTO_H_ 3615 struct mkdirat_args { 3616 int fd; 3617 char *path; 3618 mode_t mode; 3619 }; 3620 #endif 3621 int 3622 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3623 { 3624 3625 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3626 } 3627 3628 int 3629 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3630 int mode) 3631 { 3632 struct mount *mp; 3633 struct vnode *vp; 3634 struct vattr vattr; 3635 struct nameidata nd; 3636 cap_rights_t rights; 3637 int error; 3638 3639 AUDIT_ARG_MODE(mode); 3640 restart: 3641 bwillwrite(); 3642 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3643 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3644 td); 3645 nd.ni_cnd.cn_flags |= WILLBEDIR; 3646 if ((error = namei(&nd)) != 0) 3647 return (error); 3648 vp = nd.ni_vp; 3649 if (vp != NULL) { 3650 NDFREE(&nd, NDF_ONLY_PNBUF); 3651 /* 3652 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3653 * the strange behaviour of leaving the vnode unlocked 3654 * if the target is the same vnode as the parent. 3655 */ 3656 if (vp == nd.ni_dvp) 3657 vrele(nd.ni_dvp); 3658 else 3659 vput(nd.ni_dvp); 3660 vrele(vp); 3661 return (EEXIST); 3662 } 3663 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3664 NDFREE(&nd, NDF_ONLY_PNBUF); 3665 vput(nd.ni_dvp); 3666 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3667 return (error); 3668 goto restart; 3669 } 3670 VATTR_NULL(&vattr); 3671 vattr.va_type = VDIR; 3672 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3673 #ifdef MAC 3674 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3675 &vattr); 3676 if (error != 0) 3677 goto out; 3678 #endif 3679 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3680 #ifdef MAC 3681 out: 3682 #endif 3683 NDFREE(&nd, NDF_ONLY_PNBUF); 3684 vput(nd.ni_dvp); 3685 if (error == 0) 3686 vput(nd.ni_vp); 3687 vn_finished_write(mp); 3688 return (error); 3689 } 3690 3691 /* 3692 * Remove a directory file. 3693 */ 3694 #ifndef _SYS_SYSPROTO_H_ 3695 struct rmdir_args { 3696 char *path; 3697 }; 3698 #endif 3699 int 3700 sys_rmdir(td, uap) 3701 struct thread *td; 3702 struct rmdir_args /* { 3703 char *path; 3704 } */ *uap; 3705 { 3706 3707 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3708 } 3709 3710 int 3711 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3712 { 3713 struct mount *mp; 3714 struct vnode *vp; 3715 struct nameidata nd; 3716 cap_rights_t rights; 3717 int error; 3718 3719 restart: 3720 bwillwrite(); 3721 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3722 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3723 if ((error = namei(&nd)) != 0) 3724 return (error); 3725 vp = nd.ni_vp; 3726 if (vp->v_type != VDIR) { 3727 error = ENOTDIR; 3728 goto out; 3729 } 3730 /* 3731 * No rmdir "." please. 3732 */ 3733 if (nd.ni_dvp == vp) { 3734 error = EINVAL; 3735 goto out; 3736 } 3737 /* 3738 * The root of a mounted filesystem cannot be deleted. 3739 */ 3740 if (vp->v_vflag & VV_ROOT) { 3741 error = EBUSY; 3742 goto out; 3743 } 3744 #ifdef MAC 3745 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3746 &nd.ni_cnd); 3747 if (error != 0) 3748 goto out; 3749 #endif 3750 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3751 NDFREE(&nd, NDF_ONLY_PNBUF); 3752 vput(vp); 3753 if (nd.ni_dvp == vp) 3754 vrele(nd.ni_dvp); 3755 else 3756 vput(nd.ni_dvp); 3757 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3758 return (error); 3759 goto restart; 3760 } 3761 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3762 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3763 vn_finished_write(mp); 3764 out: 3765 NDFREE(&nd, NDF_ONLY_PNBUF); 3766 vput(vp); 3767 if (nd.ni_dvp == vp) 3768 vrele(nd.ni_dvp); 3769 else 3770 vput(nd.ni_dvp); 3771 return (error); 3772 } 3773 3774 #ifdef COMPAT_43 3775 /* 3776 * Read a block of directory entries in a filesystem independent format. 3777 */ 3778 #ifndef _SYS_SYSPROTO_H_ 3779 struct ogetdirentries_args { 3780 int fd; 3781 char *buf; 3782 u_int count; 3783 long *basep; 3784 }; 3785 #endif 3786 int 3787 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3788 { 3789 long loff; 3790 int error; 3791 3792 error = kern_ogetdirentries(td, uap, &loff); 3793 if (error == 0) 3794 error = copyout(&loff, uap->basep, sizeof(long)); 3795 return (error); 3796 } 3797 3798 int 3799 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3800 long *ploff) 3801 { 3802 struct vnode *vp; 3803 struct file *fp; 3804 struct uio auio, kuio; 3805 struct iovec aiov, kiov; 3806 struct dirent *dp, *edp; 3807 cap_rights_t rights; 3808 caddr_t dirbuf; 3809 int error, eofflag, readcnt; 3810 long loff; 3811 off_t foffset; 3812 3813 /* XXX arbitrary sanity limit on `count'. */ 3814 if (uap->count > 64 * 1024) 3815 return (EINVAL); 3816 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3817 if (error != 0) 3818 return (error); 3819 if ((fp->f_flag & FREAD) == 0) { 3820 fdrop(fp, td); 3821 return (EBADF); 3822 } 3823 vp = fp->f_vnode; 3824 foffset = foffset_lock(fp, 0); 3825 unionread: 3826 if (vp->v_type != VDIR) { 3827 foffset_unlock(fp, foffset, 0); 3828 fdrop(fp, td); 3829 return (EINVAL); 3830 } 3831 aiov.iov_base = uap->buf; 3832 aiov.iov_len = uap->count; 3833 auio.uio_iov = &aiov; 3834 auio.uio_iovcnt = 1; 3835 auio.uio_rw = UIO_READ; 3836 auio.uio_segflg = UIO_USERSPACE; 3837 auio.uio_td = td; 3838 auio.uio_resid = uap->count; 3839 vn_lock(vp, LK_SHARED | LK_RETRY); 3840 loff = auio.uio_offset = foffset; 3841 #ifdef MAC 3842 error = mac_vnode_check_readdir(td->td_ucred, vp); 3843 if (error != 0) { 3844 VOP_UNLOCK(vp, 0); 3845 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3846 fdrop(fp, td); 3847 return (error); 3848 } 3849 #endif 3850 # if (BYTE_ORDER != LITTLE_ENDIAN) 3851 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3852 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3853 NULL, NULL); 3854 foffset = auio.uio_offset; 3855 } else 3856 # endif 3857 { 3858 kuio = auio; 3859 kuio.uio_iov = &kiov; 3860 kuio.uio_segflg = UIO_SYSSPACE; 3861 kiov.iov_len = uap->count; 3862 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3863 kiov.iov_base = dirbuf; 3864 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3865 NULL, NULL); 3866 foffset = kuio.uio_offset; 3867 if (error == 0) { 3868 readcnt = uap->count - kuio.uio_resid; 3869 edp = (struct dirent *)&dirbuf[readcnt]; 3870 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3871 # if (BYTE_ORDER == LITTLE_ENDIAN) 3872 /* 3873 * The expected low byte of 3874 * dp->d_namlen is our dp->d_type. 3875 * The high MBZ byte of dp->d_namlen 3876 * is our dp->d_namlen. 3877 */ 3878 dp->d_type = dp->d_namlen; 3879 dp->d_namlen = 0; 3880 # else 3881 /* 3882 * The dp->d_type is the high byte 3883 * of the expected dp->d_namlen, 3884 * so must be zero'ed. 3885 */ 3886 dp->d_type = 0; 3887 # endif 3888 if (dp->d_reclen > 0) { 3889 dp = (struct dirent *) 3890 ((char *)dp + dp->d_reclen); 3891 } else { 3892 error = EIO; 3893 break; 3894 } 3895 } 3896 if (dp >= edp) 3897 error = uiomove(dirbuf, readcnt, &auio); 3898 } 3899 free(dirbuf, M_TEMP); 3900 } 3901 if (error != 0) { 3902 VOP_UNLOCK(vp, 0); 3903 foffset_unlock(fp, foffset, 0); 3904 fdrop(fp, td); 3905 return (error); 3906 } 3907 if (uap->count == auio.uio_resid && 3908 (vp->v_vflag & VV_ROOT) && 3909 (vp->v_mount->mnt_flag & MNT_UNION)) { 3910 struct vnode *tvp = vp; 3911 vp = vp->v_mount->mnt_vnodecovered; 3912 VREF(vp); 3913 fp->f_vnode = vp; 3914 fp->f_data = vp; 3915 foffset = 0; 3916 vput(tvp); 3917 goto unionread; 3918 } 3919 VOP_UNLOCK(vp, 0); 3920 foffset_unlock(fp, foffset, 0); 3921 fdrop(fp, td); 3922 td->td_retval[0] = uap->count - auio.uio_resid; 3923 if (error == 0) 3924 *ploff = loff; 3925 return (error); 3926 } 3927 #endif /* COMPAT_43 */ 3928 3929 /* 3930 * Read a block of directory entries in a filesystem independent format. 3931 */ 3932 #ifndef _SYS_SYSPROTO_H_ 3933 struct getdirentries_args { 3934 int fd; 3935 char *buf; 3936 u_int count; 3937 long *basep; 3938 }; 3939 #endif 3940 int 3941 sys_getdirentries(td, uap) 3942 struct thread *td; 3943 register struct getdirentries_args /* { 3944 int fd; 3945 char *buf; 3946 u_int count; 3947 long *basep; 3948 } */ *uap; 3949 { 3950 long base; 3951 int error; 3952 3953 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3954 NULL, UIO_USERSPACE); 3955 if (error != 0) 3956 return (error); 3957 if (uap->basep != NULL) 3958 error = copyout(&base, uap->basep, sizeof(long)); 3959 return (error); 3960 } 3961 3962 int 3963 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3964 long *basep, ssize_t *residp, enum uio_seg bufseg) 3965 { 3966 struct vnode *vp; 3967 struct file *fp; 3968 struct uio auio; 3969 struct iovec aiov; 3970 cap_rights_t rights; 3971 long loff; 3972 int error, eofflag; 3973 off_t foffset; 3974 3975 AUDIT_ARG_FD(fd); 3976 if (count > IOSIZE_MAX) 3977 return (EINVAL); 3978 auio.uio_resid = count; 3979 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3980 if (error != 0) 3981 return (error); 3982 if ((fp->f_flag & FREAD) == 0) { 3983 fdrop(fp, td); 3984 return (EBADF); 3985 } 3986 vp = fp->f_vnode; 3987 foffset = foffset_lock(fp, 0); 3988 unionread: 3989 if (vp->v_type != VDIR) { 3990 error = EINVAL; 3991 goto fail; 3992 } 3993 aiov.iov_base = buf; 3994 aiov.iov_len = count; 3995 auio.uio_iov = &aiov; 3996 auio.uio_iovcnt = 1; 3997 auio.uio_rw = UIO_READ; 3998 auio.uio_segflg = bufseg; 3999 auio.uio_td = td; 4000 vn_lock(vp, LK_SHARED | LK_RETRY); 4001 AUDIT_ARG_VNODE1(vp); 4002 loff = auio.uio_offset = foffset; 4003 #ifdef MAC 4004 error = mac_vnode_check_readdir(td->td_ucred, vp); 4005 if (error == 0) 4006 #endif 4007 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4008 NULL); 4009 foffset = auio.uio_offset; 4010 if (error != 0) { 4011 VOP_UNLOCK(vp, 0); 4012 goto fail; 4013 } 4014 if (count == auio.uio_resid && 4015 (vp->v_vflag & VV_ROOT) && 4016 (vp->v_mount->mnt_flag & MNT_UNION)) { 4017 struct vnode *tvp = vp; 4018 4019 vp = vp->v_mount->mnt_vnodecovered; 4020 VREF(vp); 4021 fp->f_vnode = vp; 4022 fp->f_data = vp; 4023 foffset = 0; 4024 vput(tvp); 4025 goto unionread; 4026 } 4027 VOP_UNLOCK(vp, 0); 4028 *basep = loff; 4029 if (residp != NULL) 4030 *residp = auio.uio_resid; 4031 td->td_retval[0] = count - auio.uio_resid; 4032 fail: 4033 foffset_unlock(fp, foffset, 0); 4034 fdrop(fp, td); 4035 return (error); 4036 } 4037 4038 #ifndef _SYS_SYSPROTO_H_ 4039 struct getdents_args { 4040 int fd; 4041 char *buf; 4042 size_t count; 4043 }; 4044 #endif 4045 int 4046 sys_getdents(td, uap) 4047 struct thread *td; 4048 register struct getdents_args /* { 4049 int fd; 4050 char *buf; 4051 u_int count; 4052 } */ *uap; 4053 { 4054 struct getdirentries_args ap; 4055 4056 ap.fd = uap->fd; 4057 ap.buf = uap->buf; 4058 ap.count = uap->count; 4059 ap.basep = NULL; 4060 return (sys_getdirentries(td, &ap)); 4061 } 4062 4063 /* 4064 * Set the mode mask for creation of filesystem nodes. 4065 */ 4066 #ifndef _SYS_SYSPROTO_H_ 4067 struct umask_args { 4068 int newmask; 4069 }; 4070 #endif 4071 int 4072 sys_umask(td, uap) 4073 struct thread *td; 4074 struct umask_args /* { 4075 int newmask; 4076 } */ *uap; 4077 { 4078 struct filedesc *fdp; 4079 4080 fdp = td->td_proc->p_fd; 4081 FILEDESC_XLOCK(fdp); 4082 td->td_retval[0] = fdp->fd_cmask; 4083 fdp->fd_cmask = uap->newmask & ALLPERMS; 4084 FILEDESC_XUNLOCK(fdp); 4085 return (0); 4086 } 4087 4088 /* 4089 * Void all references to file by ripping underlying filesystem away from 4090 * vnode. 4091 */ 4092 #ifndef _SYS_SYSPROTO_H_ 4093 struct revoke_args { 4094 char *path; 4095 }; 4096 #endif 4097 int 4098 sys_revoke(td, uap) 4099 struct thread *td; 4100 register struct revoke_args /* { 4101 char *path; 4102 } */ *uap; 4103 { 4104 struct vnode *vp; 4105 struct vattr vattr; 4106 struct nameidata nd; 4107 int error; 4108 4109 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4110 uap->path, td); 4111 if ((error = namei(&nd)) != 0) 4112 return (error); 4113 vp = nd.ni_vp; 4114 NDFREE(&nd, NDF_ONLY_PNBUF); 4115 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4116 error = EINVAL; 4117 goto out; 4118 } 4119 #ifdef MAC 4120 error = mac_vnode_check_revoke(td->td_ucred, vp); 4121 if (error != 0) 4122 goto out; 4123 #endif 4124 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4125 if (error != 0) 4126 goto out; 4127 if (td->td_ucred->cr_uid != vattr.va_uid) { 4128 error = priv_check(td, PRIV_VFS_ADMIN); 4129 if (error != 0) 4130 goto out; 4131 } 4132 if (vcount(vp) > 1) 4133 VOP_REVOKE(vp, REVOKEALL); 4134 out: 4135 vput(vp); 4136 return (error); 4137 } 4138 4139 /* 4140 * Convert a user file descriptor to a kernel file entry and check that, if it 4141 * is a capability, the correct rights are present. A reference on the file 4142 * entry is held upon returning. 4143 */ 4144 int 4145 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4146 { 4147 struct file *fp; 4148 int error; 4149 4150 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4151 if (error != 0) 4152 return (error); 4153 4154 /* 4155 * The file could be not of the vnode type, or it may be not 4156 * yet fully initialized, in which case the f_vnode pointer 4157 * may be set, but f_ops is still badfileops. E.g., 4158 * devfs_open() transiently create such situation to 4159 * facilitate csw d_fdopen(). 4160 * 4161 * Dupfdopen() handling in kern_openat() installs the 4162 * half-baked file into the process descriptor table, allowing 4163 * other thread to dereference it. Guard against the race by 4164 * checking f_ops. 4165 */ 4166 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4167 fdrop(fp, td); 4168 return (EINVAL); 4169 } 4170 *fpp = fp; 4171 return (0); 4172 } 4173 4174 4175 /* 4176 * Get an (NFS) file handle. 4177 */ 4178 #ifndef _SYS_SYSPROTO_H_ 4179 struct lgetfh_args { 4180 char *fname; 4181 fhandle_t *fhp; 4182 }; 4183 #endif 4184 int 4185 sys_lgetfh(td, uap) 4186 struct thread *td; 4187 register struct lgetfh_args *uap; 4188 { 4189 struct nameidata nd; 4190 fhandle_t fh; 4191 register struct vnode *vp; 4192 int error; 4193 4194 error = priv_check(td, PRIV_VFS_GETFH); 4195 if (error != 0) 4196 return (error); 4197 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4198 uap->fname, td); 4199 error = namei(&nd); 4200 if (error != 0) 4201 return (error); 4202 NDFREE(&nd, NDF_ONLY_PNBUF); 4203 vp = nd.ni_vp; 4204 bzero(&fh, sizeof(fh)); 4205 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4206 error = VOP_VPTOFH(vp, &fh.fh_fid); 4207 vput(vp); 4208 if (error == 0) 4209 error = copyout(&fh, uap->fhp, sizeof (fh)); 4210 return (error); 4211 } 4212 4213 #ifndef _SYS_SYSPROTO_H_ 4214 struct getfh_args { 4215 char *fname; 4216 fhandle_t *fhp; 4217 }; 4218 #endif 4219 int 4220 sys_getfh(td, uap) 4221 struct thread *td; 4222 register struct getfh_args *uap; 4223 { 4224 struct nameidata nd; 4225 fhandle_t fh; 4226 register struct vnode *vp; 4227 int error; 4228 4229 error = priv_check(td, PRIV_VFS_GETFH); 4230 if (error != 0) 4231 return (error); 4232 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4233 uap->fname, td); 4234 error = namei(&nd); 4235 if (error != 0) 4236 return (error); 4237 NDFREE(&nd, NDF_ONLY_PNBUF); 4238 vp = nd.ni_vp; 4239 bzero(&fh, sizeof(fh)); 4240 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4241 error = VOP_VPTOFH(vp, &fh.fh_fid); 4242 vput(vp); 4243 if (error == 0) 4244 error = copyout(&fh, uap->fhp, sizeof (fh)); 4245 return (error); 4246 } 4247 4248 /* 4249 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4250 * open descriptor. 4251 * 4252 * warning: do not remove the priv_check() call or this becomes one giant 4253 * security hole. 4254 */ 4255 #ifndef _SYS_SYSPROTO_H_ 4256 struct fhopen_args { 4257 const struct fhandle *u_fhp; 4258 int flags; 4259 }; 4260 #endif 4261 int 4262 sys_fhopen(td, uap) 4263 struct thread *td; 4264 struct fhopen_args /* { 4265 const struct fhandle *u_fhp; 4266 int flags; 4267 } */ *uap; 4268 { 4269 struct mount *mp; 4270 struct vnode *vp; 4271 struct fhandle fhp; 4272 struct file *fp; 4273 int fmode, error; 4274 int indx; 4275 4276 error = priv_check(td, PRIV_VFS_FHOPEN); 4277 if (error != 0) 4278 return (error); 4279 indx = -1; 4280 fmode = FFLAGS(uap->flags); 4281 /* why not allow a non-read/write open for our lockd? */ 4282 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4283 return (EINVAL); 4284 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4285 if (error != 0) 4286 return(error); 4287 /* find the mount point */ 4288 mp = vfs_busyfs(&fhp.fh_fsid); 4289 if (mp == NULL) 4290 return (ESTALE); 4291 /* now give me my vnode, it gets returned to me locked */ 4292 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4293 vfs_unbusy(mp); 4294 if (error != 0) 4295 return (error); 4296 4297 error = falloc_noinstall(td, &fp); 4298 if (error != 0) { 4299 vput(vp); 4300 return (error); 4301 } 4302 /* 4303 * An extra reference on `fp' has been held for us by 4304 * falloc_noinstall(). 4305 */ 4306 4307 #ifdef INVARIANTS 4308 td->td_dupfd = -1; 4309 #endif 4310 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4311 if (error != 0) { 4312 KASSERT(fp->f_ops == &badfileops, 4313 ("VOP_OPEN in fhopen() set f_ops")); 4314 KASSERT(td->td_dupfd < 0, 4315 ("fhopen() encountered fdopen()")); 4316 4317 vput(vp); 4318 goto bad; 4319 } 4320 #ifdef INVARIANTS 4321 td->td_dupfd = 0; 4322 #endif 4323 fp->f_vnode = vp; 4324 fp->f_seqcount = 1; 4325 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4326 &vnops); 4327 VOP_UNLOCK(vp, 0); 4328 if ((fmode & O_TRUNC) != 0) { 4329 error = fo_truncate(fp, 0, td->td_ucred, td); 4330 if (error != 0) 4331 goto bad; 4332 } 4333 4334 error = finstall(td, fp, &indx, fmode, NULL); 4335 bad: 4336 fdrop(fp, td); 4337 td->td_retval[0] = indx; 4338 return (error); 4339 } 4340 4341 /* 4342 * Stat an (NFS) file handle. 4343 */ 4344 #ifndef _SYS_SYSPROTO_H_ 4345 struct fhstat_args { 4346 struct fhandle *u_fhp; 4347 struct stat *sb; 4348 }; 4349 #endif 4350 int 4351 sys_fhstat(td, uap) 4352 struct thread *td; 4353 register struct fhstat_args /* { 4354 struct fhandle *u_fhp; 4355 struct stat *sb; 4356 } */ *uap; 4357 { 4358 struct stat sb; 4359 struct fhandle fh; 4360 int error; 4361 4362 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4363 if (error != 0) 4364 return (error); 4365 error = kern_fhstat(td, fh, &sb); 4366 if (error == 0) 4367 error = copyout(&sb, uap->sb, sizeof(sb)); 4368 return (error); 4369 } 4370 4371 int 4372 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4373 { 4374 struct mount *mp; 4375 struct vnode *vp; 4376 int error; 4377 4378 error = priv_check(td, PRIV_VFS_FHSTAT); 4379 if (error != 0) 4380 return (error); 4381 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4382 return (ESTALE); 4383 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4384 vfs_unbusy(mp); 4385 if (error != 0) 4386 return (error); 4387 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4388 vput(vp); 4389 return (error); 4390 } 4391 4392 /* 4393 * Implement fstatfs() for (NFS) file handles. 4394 */ 4395 #ifndef _SYS_SYSPROTO_H_ 4396 struct fhstatfs_args { 4397 struct fhandle *u_fhp; 4398 struct statfs *buf; 4399 }; 4400 #endif 4401 int 4402 sys_fhstatfs(td, uap) 4403 struct thread *td; 4404 struct fhstatfs_args /* { 4405 struct fhandle *u_fhp; 4406 struct statfs *buf; 4407 } */ *uap; 4408 { 4409 struct statfs sf; 4410 fhandle_t fh; 4411 int error; 4412 4413 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4414 if (error != 0) 4415 return (error); 4416 error = kern_fhstatfs(td, fh, &sf); 4417 if (error != 0) 4418 return (error); 4419 return (copyout(&sf, uap->buf, sizeof(sf))); 4420 } 4421 4422 int 4423 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4424 { 4425 struct statfs *sp; 4426 struct mount *mp; 4427 struct vnode *vp; 4428 int error; 4429 4430 error = priv_check(td, PRIV_VFS_FHSTATFS); 4431 if (error != 0) 4432 return (error); 4433 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4434 return (ESTALE); 4435 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4436 if (error != 0) { 4437 vfs_unbusy(mp); 4438 return (error); 4439 } 4440 vput(vp); 4441 error = prison_canseemount(td->td_ucred, mp); 4442 if (error != 0) 4443 goto out; 4444 #ifdef MAC 4445 error = mac_mount_check_stat(td->td_ucred, mp); 4446 if (error != 0) 4447 goto out; 4448 #endif 4449 /* 4450 * Set these in case the underlying filesystem fails to do so. 4451 */ 4452 sp = &mp->mnt_stat; 4453 sp->f_version = STATFS_VERSION; 4454 sp->f_namemax = NAME_MAX; 4455 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4456 error = VFS_STATFS(mp, sp); 4457 if (error == 0) 4458 *buf = *sp; 4459 out: 4460 vfs_unbusy(mp); 4461 return (error); 4462 } 4463 4464 int 4465 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4466 { 4467 struct file *fp; 4468 struct mount *mp; 4469 struct vnode *vp; 4470 cap_rights_t rights; 4471 off_t olen, ooffset; 4472 int error; 4473 4474 if (offset < 0 || len <= 0) 4475 return (EINVAL); 4476 /* Check for wrap. */ 4477 if (offset > OFF_MAX - len) 4478 return (EFBIG); 4479 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4480 if (error != 0) 4481 return (error); 4482 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4483 error = ESPIPE; 4484 goto out; 4485 } 4486 if ((fp->f_flag & FWRITE) == 0) { 4487 error = EBADF; 4488 goto out; 4489 } 4490 if (fp->f_type != DTYPE_VNODE) { 4491 error = ENODEV; 4492 goto out; 4493 } 4494 vp = fp->f_vnode; 4495 if (vp->v_type != VREG) { 4496 error = ENODEV; 4497 goto out; 4498 } 4499 4500 /* Allocating blocks may take a long time, so iterate. */ 4501 for (;;) { 4502 olen = len; 4503 ooffset = offset; 4504 4505 bwillwrite(); 4506 mp = NULL; 4507 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4508 if (error != 0) 4509 break; 4510 error = vn_lock(vp, LK_EXCLUSIVE); 4511 if (error != 0) { 4512 vn_finished_write(mp); 4513 break; 4514 } 4515 #ifdef MAC 4516 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4517 if (error == 0) 4518 #endif 4519 error = VOP_ALLOCATE(vp, &offset, &len); 4520 VOP_UNLOCK(vp, 0); 4521 vn_finished_write(mp); 4522 4523 if (olen + ooffset != offset + len) { 4524 panic("offset + len changed from %jx/%jx to %jx/%jx", 4525 ooffset, olen, offset, len); 4526 } 4527 if (error != 0 || len == 0) 4528 break; 4529 KASSERT(olen > len, ("Iteration did not make progress?")); 4530 maybe_yield(); 4531 } 4532 out: 4533 fdrop(fp, td); 4534 return (error); 4535 } 4536 4537 int 4538 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4539 { 4540 int error; 4541 4542 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4543 return (kern_posix_error(td, error)); 4544 } 4545 4546 /* 4547 * Unlike madvise(2), we do not make a best effort to remember every 4548 * possible caching hint. Instead, we remember the last setting with 4549 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4550 * region of any current setting. 4551 */ 4552 int 4553 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4554 int advice) 4555 { 4556 struct fadvise_info *fa, *new; 4557 struct file *fp; 4558 struct vnode *vp; 4559 cap_rights_t rights; 4560 off_t end; 4561 int error; 4562 4563 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4564 return (EINVAL); 4565 switch (advice) { 4566 case POSIX_FADV_SEQUENTIAL: 4567 case POSIX_FADV_RANDOM: 4568 case POSIX_FADV_NOREUSE: 4569 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4570 break; 4571 case POSIX_FADV_NORMAL: 4572 case POSIX_FADV_WILLNEED: 4573 case POSIX_FADV_DONTNEED: 4574 new = NULL; 4575 break; 4576 default: 4577 return (EINVAL); 4578 } 4579 /* XXX: CAP_POSIX_FADVISE? */ 4580 error = fget(td, fd, cap_rights_init(&rights), &fp); 4581 if (error != 0) 4582 goto out; 4583 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4584 error = ESPIPE; 4585 goto out; 4586 } 4587 if (fp->f_type != DTYPE_VNODE) { 4588 error = ENODEV; 4589 goto out; 4590 } 4591 vp = fp->f_vnode; 4592 if (vp->v_type != VREG) { 4593 error = ENODEV; 4594 goto out; 4595 } 4596 if (len == 0) 4597 end = OFF_MAX; 4598 else 4599 end = offset + len - 1; 4600 switch (advice) { 4601 case POSIX_FADV_SEQUENTIAL: 4602 case POSIX_FADV_RANDOM: 4603 case POSIX_FADV_NOREUSE: 4604 /* 4605 * Try to merge any existing non-standard region with 4606 * this new region if possible, otherwise create a new 4607 * non-standard region for this request. 4608 */ 4609 mtx_pool_lock(mtxpool_sleep, fp); 4610 fa = fp->f_advice; 4611 if (fa != NULL && fa->fa_advice == advice && 4612 ((fa->fa_start <= end && fa->fa_end >= offset) || 4613 (end != OFF_MAX && fa->fa_start == end + 1) || 4614 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4615 if (offset < fa->fa_start) 4616 fa->fa_start = offset; 4617 if (end > fa->fa_end) 4618 fa->fa_end = end; 4619 } else { 4620 new->fa_advice = advice; 4621 new->fa_start = offset; 4622 new->fa_end = end; 4623 fp->f_advice = new; 4624 new = fa; 4625 } 4626 mtx_pool_unlock(mtxpool_sleep, fp); 4627 break; 4628 case POSIX_FADV_NORMAL: 4629 /* 4630 * If a the "normal" region overlaps with an existing 4631 * non-standard region, trim or remove the 4632 * non-standard region. 4633 */ 4634 mtx_pool_lock(mtxpool_sleep, fp); 4635 fa = fp->f_advice; 4636 if (fa != NULL) { 4637 if (offset <= fa->fa_start && end >= fa->fa_end) { 4638 new = fa; 4639 fp->f_advice = NULL; 4640 } else if (offset <= fa->fa_start && 4641 end >= fa->fa_start) 4642 fa->fa_start = end + 1; 4643 else if (offset <= fa->fa_end && end >= fa->fa_end) 4644 fa->fa_end = offset - 1; 4645 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4646 /* 4647 * If the "normal" region is a middle 4648 * portion of the existing 4649 * non-standard region, just remove 4650 * the whole thing rather than picking 4651 * one side or the other to 4652 * preserve. 4653 */ 4654 new = fa; 4655 fp->f_advice = NULL; 4656 } 4657 } 4658 mtx_pool_unlock(mtxpool_sleep, fp); 4659 break; 4660 case POSIX_FADV_WILLNEED: 4661 case POSIX_FADV_DONTNEED: 4662 error = VOP_ADVISE(vp, offset, end, advice); 4663 break; 4664 } 4665 out: 4666 if (fp != NULL) 4667 fdrop(fp, td); 4668 free(new, M_FADVISE); 4669 return (error); 4670 } 4671 4672 int 4673 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4674 { 4675 int error; 4676 4677 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4678 uap->advice); 4679 return (kern_posix_error(td, error)); 4680 } 4681