1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * Sync each mounted filesystem. 110 */ 111 #ifndef _SYS_SYSPROTO_H_ 112 struct sync_args { 113 int dummy; 114 }; 115 #endif 116 /* ARGSUSED */ 117 int 118 sys_sync(td, uap) 119 struct thread *td; 120 struct sync_args *uap; 121 { 122 struct mount *mp, *nmp; 123 int save; 124 125 mtx_lock(&mountlist_mtx); 126 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 127 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 128 nmp = TAILQ_NEXT(mp, mnt_list); 129 continue; 130 } 131 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 132 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 133 save = curthread_pflags_set(TDP_SYNCIO); 134 vfs_msync(mp, MNT_NOWAIT); 135 VFS_SYNC(mp, MNT_NOWAIT); 136 curthread_pflags_restore(save); 137 vn_finished_write(mp); 138 } 139 mtx_lock(&mountlist_mtx); 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 vfs_unbusy(mp); 142 } 143 mtx_unlock(&mountlist_mtx); 144 return (0); 145 } 146 147 /* 148 * Change filesystem quotas. 149 */ 150 #ifndef _SYS_SYSPROTO_H_ 151 struct quotactl_args { 152 char *path; 153 int cmd; 154 int uid; 155 caddr_t arg; 156 }; 157 #endif 158 int 159 sys_quotactl(td, uap) 160 struct thread *td; 161 register struct quotactl_args /* { 162 char *path; 163 int cmd; 164 int uid; 165 caddr_t arg; 166 } */ *uap; 167 { 168 struct mount *mp; 169 struct nameidata nd; 170 int error; 171 172 AUDIT_ARG_CMD(uap->cmd); 173 AUDIT_ARG_UID(uap->uid); 174 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 175 return (EPERM); 176 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 177 uap->path, td); 178 if ((error = namei(&nd)) != 0) 179 return (error); 180 NDFREE(&nd, NDF_ONLY_PNBUF); 181 mp = nd.ni_vp->v_mount; 182 vfs_ref(mp); 183 vput(nd.ni_vp); 184 error = vfs_busy(mp, 0); 185 vfs_rel(mp); 186 if (error != 0) 187 return (error); 188 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 189 190 /* 191 * Since quota on operation typically needs to open quota 192 * file, the Q_QUOTAON handler needs to unbusy the mount point 193 * before calling into namei. Otherwise, unmount might be 194 * started between two vfs_busy() invocations (first is our, 195 * second is from mount point cross-walk code in lookup()), 196 * causing deadlock. 197 * 198 * Require that Q_QUOTAON handles the vfs_busy() reference on 199 * its own, always returning with ubusied mount point. 200 */ 201 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 202 vfs_unbusy(mp); 203 return (error); 204 } 205 206 /* 207 * Used by statfs conversion routines to scale the block size up if 208 * necessary so that all of the block counts are <= 'max_size'. Note 209 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 210 * value of 'n'. 211 */ 212 void 213 statfs_scale_blocks(struct statfs *sf, long max_size) 214 { 215 uint64_t count; 216 int shift; 217 218 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 219 220 /* 221 * Attempt to scale the block counts to give a more accurate 222 * overview to userland of the ratio of free space to used 223 * space. To do this, find the largest block count and compute 224 * a divisor that lets it fit into a signed integer <= max_size. 225 */ 226 if (sf->f_bavail < 0) 227 count = -sf->f_bavail; 228 else 229 count = sf->f_bavail; 230 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 231 if (count <= max_size) 232 return; 233 234 count >>= flsl(max_size); 235 shift = 0; 236 while (count > 0) { 237 shift++; 238 count >>=1; 239 } 240 241 sf->f_bsize <<= shift; 242 sf->f_blocks >>= shift; 243 sf->f_bfree >>= shift; 244 sf->f_bavail >>= shift; 245 } 246 247 static int 248 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 249 { 250 struct statfs *sp; 251 int error; 252 253 if (mp == NULL) 254 return (EBADF); 255 error = vfs_busy(mp, 0); 256 vfs_rel(mp); 257 if (error != 0) 258 return (error); 259 #ifdef MAC 260 error = mac_mount_check_stat(td->td_ucred, mp); 261 if (error != 0) 262 goto out; 263 #endif 264 /* 265 * Set these in case the underlying filesystem fails to do so. 266 */ 267 sp = &mp->mnt_stat; 268 sp->f_version = STATFS_VERSION; 269 sp->f_namemax = NAME_MAX; 270 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 271 error = VFS_STATFS(mp, sp); 272 if (error != 0) 273 goto out; 274 *buf = *sp; 275 if (priv_check(td, PRIV_VFS_GENERATION)) { 276 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 277 prison_enforce_statfs(td->td_ucred, mp, buf); 278 } 279 out: 280 vfs_unbusy(mp); 281 return (error); 282 } 283 284 /* 285 * Get filesystem statistics. 286 */ 287 #ifndef _SYS_SYSPROTO_H_ 288 struct statfs_args { 289 char *path; 290 struct statfs *buf; 291 }; 292 #endif 293 int 294 sys_statfs(td, uap) 295 struct thread *td; 296 register struct statfs_args /* { 297 char *path; 298 struct statfs *buf; 299 } */ *uap; 300 { 301 struct statfs sf; 302 int error; 303 304 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 305 if (error == 0) 306 error = copyout(&sf, uap->buf, sizeof(sf)); 307 return (error); 308 } 309 310 int 311 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 312 struct statfs *buf) 313 { 314 struct mount *mp; 315 struct nameidata nd; 316 int error; 317 318 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 319 pathseg, path, td); 320 error = namei(&nd); 321 if (error != 0) 322 return (error); 323 mp = nd.ni_vp->v_mount; 324 vfs_ref(mp); 325 NDFREE(&nd, NDF_ONLY_PNBUF); 326 vput(nd.ni_vp); 327 return (kern_do_statfs(td, mp, buf)); 328 } 329 330 /* 331 * Get filesystem statistics. 332 */ 333 #ifndef _SYS_SYSPROTO_H_ 334 struct fstatfs_args { 335 int fd; 336 struct statfs *buf; 337 }; 338 #endif 339 int 340 sys_fstatfs(td, uap) 341 struct thread *td; 342 register struct fstatfs_args /* { 343 int fd; 344 struct statfs *buf; 345 } */ *uap; 346 { 347 struct statfs sf; 348 int error; 349 350 error = kern_fstatfs(td, uap->fd, &sf); 351 if (error == 0) 352 error = copyout(&sf, uap->buf, sizeof(sf)); 353 return (error); 354 } 355 356 int 357 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 358 { 359 struct file *fp; 360 struct mount *mp; 361 struct vnode *vp; 362 cap_rights_t rights; 363 int error; 364 365 AUDIT_ARG_FD(fd); 366 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 367 if (error != 0) 368 return (error); 369 vp = fp->f_vnode; 370 vn_lock(vp, LK_SHARED | LK_RETRY); 371 #ifdef AUDIT 372 AUDIT_ARG_VNODE1(vp); 373 #endif 374 mp = vp->v_mount; 375 if (mp != NULL) 376 vfs_ref(mp); 377 VOP_UNLOCK(vp, 0); 378 fdrop(fp, td); 379 return (kern_do_statfs(td, mp, buf)); 380 } 381 382 /* 383 * Get statistics on all filesystems. 384 */ 385 #ifndef _SYS_SYSPROTO_H_ 386 struct getfsstat_args { 387 struct statfs *buf; 388 long bufsize; 389 int mode; 390 }; 391 #endif 392 int 393 sys_getfsstat(td, uap) 394 struct thread *td; 395 register struct getfsstat_args /* { 396 struct statfs *buf; 397 long bufsize; 398 int mode; 399 } */ *uap; 400 { 401 size_t count; 402 int error; 403 404 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 405 return (EINVAL); 406 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 407 UIO_USERSPACE, uap->mode); 408 if (error == 0) 409 td->td_retval[0] = count; 410 return (error); 411 } 412 413 /* 414 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 415 * The caller is responsible for freeing memory which will be allocated 416 * in '*buf'. 417 */ 418 int 419 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 420 size_t *countp, enum uio_seg bufseg, int mode) 421 { 422 struct mount *mp, *nmp; 423 struct statfs *sfsp, *sp, sb, *tofree; 424 size_t count, maxcount; 425 int error; 426 427 switch (mode) { 428 case MNT_WAIT: 429 case MNT_NOWAIT: 430 break; 431 default: 432 return (EINVAL); 433 } 434 restart: 435 maxcount = bufsize / sizeof(struct statfs); 436 if (bufsize == 0) { 437 sfsp = NULL; 438 tofree = NULL; 439 } else if (bufseg == UIO_USERSPACE) { 440 sfsp = *buf; 441 tofree = NULL; 442 } else /* if (bufseg == UIO_SYSSPACE) */ { 443 count = 0; 444 mtx_lock(&mountlist_mtx); 445 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 446 count++; 447 } 448 mtx_unlock(&mountlist_mtx); 449 if (maxcount > count) 450 maxcount = count; 451 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 452 M_TEMP, M_WAITOK); 453 } 454 count = 0; 455 mtx_lock(&mountlist_mtx); 456 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 457 if (prison_canseemount(td->td_ucred, mp) != 0) { 458 nmp = TAILQ_NEXT(mp, mnt_list); 459 continue; 460 } 461 #ifdef MAC 462 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 463 nmp = TAILQ_NEXT(mp, mnt_list); 464 continue; 465 } 466 #endif 467 if (mode == MNT_WAIT) { 468 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 469 /* 470 * If vfs_busy() failed, and MBF_NOWAIT 471 * wasn't passed, then the mp is gone. 472 * Furthermore, because of MBF_MNTLSTLOCK, 473 * the mountlist_mtx was dropped. We have 474 * no other choice than to start over. 475 */ 476 mtx_unlock(&mountlist_mtx); 477 free(tofree, M_TEMP); 478 goto restart; 479 } 480 } else { 481 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 } 486 if (sfsp && count < maxcount) { 487 sp = &mp->mnt_stat; 488 /* 489 * Set these in case the underlying filesystem 490 * fails to do so. 491 */ 492 sp->f_version = STATFS_VERSION; 493 sp->f_namemax = NAME_MAX; 494 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 495 /* 496 * If MNT_NOWAIT is specified, do not refresh 497 * the fsstat cache. 498 */ 499 if (mode != MNT_NOWAIT) { 500 error = VFS_STATFS(mp, sp); 501 if (error != 0) { 502 mtx_lock(&mountlist_mtx); 503 nmp = TAILQ_NEXT(mp, mnt_list); 504 vfs_unbusy(mp); 505 continue; 506 } 507 } 508 if (priv_check(td, PRIV_VFS_GENERATION)) { 509 bcopy(sp, &sb, sizeof(sb)); 510 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 511 prison_enforce_statfs(td->td_ucred, mp, &sb); 512 sp = &sb; 513 } 514 if (bufseg == UIO_SYSSPACE) 515 bcopy(sp, sfsp, sizeof(*sp)); 516 else /* if (bufseg == UIO_USERSPACE) */ { 517 error = copyout(sp, sfsp, sizeof(*sp)); 518 if (error != 0) { 519 vfs_unbusy(mp); 520 return (error); 521 } 522 } 523 sfsp++; 524 } 525 count++; 526 mtx_lock(&mountlist_mtx); 527 nmp = TAILQ_NEXT(mp, mnt_list); 528 vfs_unbusy(mp); 529 } 530 mtx_unlock(&mountlist_mtx); 531 if (sfsp && count > maxcount) 532 *countp = maxcount; 533 else 534 *countp = count; 535 return (0); 536 } 537 538 #ifdef COMPAT_FREEBSD4 539 /* 540 * Get old format filesystem statistics. 541 */ 542 static void cvtstatfs(struct statfs *, struct ostatfs *); 543 544 #ifndef _SYS_SYSPROTO_H_ 545 struct freebsd4_statfs_args { 546 char *path; 547 struct ostatfs *buf; 548 }; 549 #endif 550 int 551 freebsd4_statfs(td, uap) 552 struct thread *td; 553 struct freebsd4_statfs_args /* { 554 char *path; 555 struct ostatfs *buf; 556 } */ *uap; 557 { 558 struct ostatfs osb; 559 struct statfs sf; 560 int error; 561 562 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 563 if (error != 0) 564 return (error); 565 cvtstatfs(&sf, &osb); 566 return (copyout(&osb, uap->buf, sizeof(osb))); 567 } 568 569 /* 570 * Get filesystem statistics. 571 */ 572 #ifndef _SYS_SYSPROTO_H_ 573 struct freebsd4_fstatfs_args { 574 int fd; 575 struct ostatfs *buf; 576 }; 577 #endif 578 int 579 freebsd4_fstatfs(td, uap) 580 struct thread *td; 581 struct freebsd4_fstatfs_args /* { 582 int fd; 583 struct ostatfs *buf; 584 } */ *uap; 585 { 586 struct ostatfs osb; 587 struct statfs sf; 588 int error; 589 590 error = kern_fstatfs(td, uap->fd, &sf); 591 if (error != 0) 592 return (error); 593 cvtstatfs(&sf, &osb); 594 return (copyout(&osb, uap->buf, sizeof(osb))); 595 } 596 597 /* 598 * Get statistics on all filesystems. 599 */ 600 #ifndef _SYS_SYSPROTO_H_ 601 struct freebsd4_getfsstat_args { 602 struct ostatfs *buf; 603 long bufsize; 604 int mode; 605 }; 606 #endif 607 int 608 freebsd4_getfsstat(td, uap) 609 struct thread *td; 610 register struct freebsd4_getfsstat_args /* { 611 struct ostatfs *buf; 612 long bufsize; 613 int mode; 614 } */ *uap; 615 { 616 struct statfs *buf, *sp; 617 struct ostatfs osb; 618 size_t count, size; 619 int error; 620 621 if (uap->bufsize < 0) 622 return (EINVAL); 623 count = uap->bufsize / sizeof(struct ostatfs); 624 if (count > SIZE_MAX / sizeof(struct statfs)) 625 return (EINVAL); 626 size = count * sizeof(struct statfs); 627 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 628 uap->mode); 629 td->td_retval[0] = count; 630 if (size != 0) { 631 sp = buf; 632 while (count != 0 && error == 0) { 633 cvtstatfs(sp, &osb); 634 error = copyout(&osb, uap->buf, sizeof(osb)); 635 sp++; 636 uap->buf++; 637 count--; 638 } 639 free(buf, M_TEMP); 640 } 641 return (error); 642 } 643 644 /* 645 * Implement fstatfs() for (NFS) file handles. 646 */ 647 #ifndef _SYS_SYSPROTO_H_ 648 struct freebsd4_fhstatfs_args { 649 struct fhandle *u_fhp; 650 struct ostatfs *buf; 651 }; 652 #endif 653 int 654 freebsd4_fhstatfs(td, uap) 655 struct thread *td; 656 struct freebsd4_fhstatfs_args /* { 657 struct fhandle *u_fhp; 658 struct ostatfs *buf; 659 } */ *uap; 660 { 661 struct ostatfs osb; 662 struct statfs sf; 663 fhandle_t fh; 664 int error; 665 666 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 667 if (error != 0) 668 return (error); 669 error = kern_fhstatfs(td, fh, &sf); 670 if (error != 0) 671 return (error); 672 cvtstatfs(&sf, &osb); 673 return (copyout(&osb, uap->buf, sizeof(osb))); 674 } 675 676 /* 677 * Convert a new format statfs structure to an old format statfs structure. 678 */ 679 static void 680 cvtstatfs(nsp, osp) 681 struct statfs *nsp; 682 struct ostatfs *osp; 683 { 684 685 statfs_scale_blocks(nsp, LONG_MAX); 686 bzero(osp, sizeof(*osp)); 687 osp->f_bsize = nsp->f_bsize; 688 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 689 osp->f_blocks = nsp->f_blocks; 690 osp->f_bfree = nsp->f_bfree; 691 osp->f_bavail = nsp->f_bavail; 692 osp->f_files = MIN(nsp->f_files, LONG_MAX); 693 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 694 osp->f_owner = nsp->f_owner; 695 osp->f_type = nsp->f_type; 696 osp->f_flags = nsp->f_flags; 697 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 698 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 699 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 700 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 701 strlcpy(osp->f_fstypename, nsp->f_fstypename, 702 MIN(MFSNAMELEN, OMFSNAMELEN)); 703 strlcpy(osp->f_mntonname, nsp->f_mntonname, 704 MIN(MNAMELEN, OMNAMELEN)); 705 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 706 MIN(MNAMELEN, OMNAMELEN)); 707 osp->f_fsid = nsp->f_fsid; 708 } 709 #endif /* COMPAT_FREEBSD4 */ 710 711 /* 712 * Change current working directory to a given file descriptor. 713 */ 714 #ifndef _SYS_SYSPROTO_H_ 715 struct fchdir_args { 716 int fd; 717 }; 718 #endif 719 int 720 sys_fchdir(td, uap) 721 struct thread *td; 722 struct fchdir_args /* { 723 int fd; 724 } */ *uap; 725 { 726 struct vnode *vp, *tdp; 727 struct mount *mp; 728 struct file *fp; 729 cap_rights_t rights; 730 int error; 731 732 AUDIT_ARG_FD(uap->fd); 733 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 734 &fp); 735 if (error != 0) 736 return (error); 737 vp = fp->f_vnode; 738 vrefact(vp); 739 fdrop(fp, td); 740 vn_lock(vp, LK_SHARED | LK_RETRY); 741 AUDIT_ARG_VNODE1(vp); 742 error = change_dir(vp, td); 743 while (!error && (mp = vp->v_mountedhere) != NULL) { 744 if (vfs_busy(mp, 0)) 745 continue; 746 error = VFS_ROOT(mp, LK_SHARED, &tdp); 747 vfs_unbusy(mp); 748 if (error != 0) 749 break; 750 vput(vp); 751 vp = tdp; 752 } 753 if (error != 0) { 754 vput(vp); 755 return (error); 756 } 757 VOP_UNLOCK(vp, 0); 758 pwd_chdir(td, vp); 759 return (0); 760 } 761 762 /* 763 * Change current working directory (``.''). 764 */ 765 #ifndef _SYS_SYSPROTO_H_ 766 struct chdir_args { 767 char *path; 768 }; 769 #endif 770 int 771 sys_chdir(td, uap) 772 struct thread *td; 773 struct chdir_args /* { 774 char *path; 775 } */ *uap; 776 { 777 778 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 779 } 780 781 int 782 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 783 { 784 struct nameidata nd; 785 int error; 786 787 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 788 pathseg, path, td); 789 if ((error = namei(&nd)) != 0) 790 return (error); 791 if ((error = change_dir(nd.ni_vp, td)) != 0) { 792 vput(nd.ni_vp); 793 NDFREE(&nd, NDF_ONLY_PNBUF); 794 return (error); 795 } 796 VOP_UNLOCK(nd.ni_vp, 0); 797 NDFREE(&nd, NDF_ONLY_PNBUF); 798 pwd_chdir(td, nd.ni_vp); 799 return (0); 800 } 801 802 /* 803 * Change notion of root (``/'') directory. 804 */ 805 #ifndef _SYS_SYSPROTO_H_ 806 struct chroot_args { 807 char *path; 808 }; 809 #endif 810 int 811 sys_chroot(td, uap) 812 struct thread *td; 813 struct chroot_args /* { 814 char *path; 815 } */ *uap; 816 { 817 struct nameidata nd; 818 int error; 819 820 error = priv_check(td, PRIV_VFS_CHROOT); 821 if (error != 0) 822 return (error); 823 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 824 UIO_USERSPACE, uap->path, td); 825 error = namei(&nd); 826 if (error != 0) 827 goto error; 828 error = change_dir(nd.ni_vp, td); 829 if (error != 0) 830 goto e_vunlock; 831 #ifdef MAC 832 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 833 if (error != 0) 834 goto e_vunlock; 835 #endif 836 VOP_UNLOCK(nd.ni_vp, 0); 837 error = pwd_chroot(td, nd.ni_vp); 838 vrele(nd.ni_vp); 839 NDFREE(&nd, NDF_ONLY_PNBUF); 840 return (error); 841 e_vunlock: 842 vput(nd.ni_vp); 843 error: 844 NDFREE(&nd, NDF_ONLY_PNBUF); 845 return (error); 846 } 847 848 /* 849 * Common routine for chroot and chdir. Callers must provide a locked vnode 850 * instance. 851 */ 852 int 853 change_dir(vp, td) 854 struct vnode *vp; 855 struct thread *td; 856 { 857 #ifdef MAC 858 int error; 859 #endif 860 861 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 862 if (vp->v_type != VDIR) 863 return (ENOTDIR); 864 #ifdef MAC 865 error = mac_vnode_check_chdir(td->td_ucred, vp); 866 if (error != 0) 867 return (error); 868 #endif 869 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 870 } 871 872 static __inline void 873 flags_to_rights(int flags, cap_rights_t *rightsp) 874 { 875 876 if (flags & O_EXEC) { 877 cap_rights_set(rightsp, CAP_FEXECVE); 878 } else { 879 switch ((flags & O_ACCMODE)) { 880 case O_RDONLY: 881 cap_rights_set(rightsp, CAP_READ); 882 break; 883 case O_RDWR: 884 cap_rights_set(rightsp, CAP_READ); 885 /* FALLTHROUGH */ 886 case O_WRONLY: 887 cap_rights_set(rightsp, CAP_WRITE); 888 if (!(flags & (O_APPEND | O_TRUNC))) 889 cap_rights_set(rightsp, CAP_SEEK); 890 break; 891 } 892 } 893 894 if (flags & O_CREAT) 895 cap_rights_set(rightsp, CAP_CREATE); 896 897 if (flags & O_TRUNC) 898 cap_rights_set(rightsp, CAP_FTRUNCATE); 899 900 if (flags & (O_SYNC | O_FSYNC)) 901 cap_rights_set(rightsp, CAP_FSYNC); 902 903 if (flags & (O_EXLOCK | O_SHLOCK)) 904 cap_rights_set(rightsp, CAP_FLOCK); 905 } 906 907 /* 908 * Check permissions, allocate an open file structure, and call the device 909 * open routine if any. 910 */ 911 #ifndef _SYS_SYSPROTO_H_ 912 struct open_args { 913 char *path; 914 int flags; 915 int mode; 916 }; 917 #endif 918 int 919 sys_open(td, uap) 920 struct thread *td; 921 register struct open_args /* { 922 char *path; 923 int flags; 924 int mode; 925 } */ *uap; 926 { 927 928 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 929 uap->flags, uap->mode)); 930 } 931 932 #ifndef _SYS_SYSPROTO_H_ 933 struct openat_args { 934 int fd; 935 char *path; 936 int flag; 937 int mode; 938 }; 939 #endif 940 int 941 sys_openat(struct thread *td, struct openat_args *uap) 942 { 943 944 AUDIT_ARG_FD(uap->fd); 945 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 946 uap->mode)); 947 } 948 949 int 950 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 951 int flags, int mode) 952 { 953 struct proc *p = td->td_proc; 954 struct filedesc *fdp = p->p_fd; 955 struct file *fp; 956 struct vnode *vp; 957 struct nameidata nd; 958 cap_rights_t rights; 959 int cmode, error, indx; 960 961 indx = -1; 962 963 AUDIT_ARG_FFLAGS(flags); 964 AUDIT_ARG_MODE(mode); 965 cap_rights_init(&rights, CAP_LOOKUP); 966 flags_to_rights(flags, &rights); 967 /* 968 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 969 * may be specified. 970 */ 971 if (flags & O_EXEC) { 972 if (flags & O_ACCMODE) 973 return (EINVAL); 974 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 975 return (EINVAL); 976 } else { 977 flags = FFLAGS(flags); 978 } 979 980 /* 981 * Allocate a file structure. The descriptor to reference it 982 * is allocated and set by finstall() below. 983 */ 984 error = falloc_noinstall(td, &fp); 985 if (error != 0) 986 return (error); 987 /* 988 * An extra reference on `fp' has been held for us by 989 * falloc_noinstall(). 990 */ 991 /* Set the flags early so the finit in devfs can pick them up. */ 992 fp->f_flag = flags & FMASK; 993 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 994 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 995 &rights, td); 996 td->td_dupfd = -1; /* XXX check for fdopen */ 997 error = vn_open(&nd, &flags, cmode, fp); 998 if (error != 0) { 999 /* 1000 * If the vn_open replaced the method vector, something 1001 * wonderous happened deep below and we just pass it up 1002 * pretending we know what we do. 1003 */ 1004 if (error == ENXIO && fp->f_ops != &badfileops) 1005 goto success; 1006 1007 /* 1008 * Handle special fdopen() case. bleh. 1009 * 1010 * Don't do this for relative (capability) lookups; we don't 1011 * understand exactly what would happen, and we don't think 1012 * that it ever should. 1013 */ 1014 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 1015 (error == ENODEV || error == ENXIO) && 1016 td->td_dupfd >= 0) { 1017 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1018 &indx); 1019 if (error == 0) 1020 goto success; 1021 } 1022 1023 goto bad; 1024 } 1025 td->td_dupfd = 0; 1026 NDFREE(&nd, NDF_ONLY_PNBUF); 1027 vp = nd.ni_vp; 1028 1029 /* 1030 * Store the vnode, for any f_type. Typically, the vnode use 1031 * count is decremented by direct call to vn_closefile() for 1032 * files that switched type in the cdevsw fdopen() method. 1033 */ 1034 fp->f_vnode = vp; 1035 /* 1036 * If the file wasn't claimed by devfs bind it to the normal 1037 * vnode operations here. 1038 */ 1039 if (fp->f_ops == &badfileops) { 1040 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1041 fp->f_seqcount = 1; 1042 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1043 DTYPE_VNODE, vp, &vnops); 1044 } 1045 1046 VOP_UNLOCK(vp, 0); 1047 if (flags & O_TRUNC) { 1048 error = fo_truncate(fp, 0, td->td_ucred, td); 1049 if (error != 0) 1050 goto bad; 1051 } 1052 success: 1053 /* 1054 * If we haven't already installed the FD (for dupfdopen), do so now. 1055 */ 1056 if (indx == -1) { 1057 struct filecaps *fcaps; 1058 1059 #ifdef CAPABILITIES 1060 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1061 fcaps = &nd.ni_filecaps; 1062 else 1063 #endif 1064 fcaps = NULL; 1065 error = finstall(td, fp, &indx, flags, fcaps); 1066 /* On success finstall() consumes fcaps. */ 1067 if (error != 0) { 1068 filecaps_free(&nd.ni_filecaps); 1069 goto bad; 1070 } 1071 } else { 1072 filecaps_free(&nd.ni_filecaps); 1073 } 1074 1075 /* 1076 * Release our private reference, leaving the one associated with 1077 * the descriptor table intact. 1078 */ 1079 fdrop(fp, td); 1080 td->td_retval[0] = indx; 1081 return (0); 1082 bad: 1083 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1084 fdrop(fp, td); 1085 return (error); 1086 } 1087 1088 #ifdef COMPAT_43 1089 /* 1090 * Create a file. 1091 */ 1092 #ifndef _SYS_SYSPROTO_H_ 1093 struct ocreat_args { 1094 char *path; 1095 int mode; 1096 }; 1097 #endif 1098 int 1099 ocreat(td, uap) 1100 struct thread *td; 1101 register struct ocreat_args /* { 1102 char *path; 1103 int mode; 1104 } */ *uap; 1105 { 1106 1107 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1108 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1109 } 1110 #endif /* COMPAT_43 */ 1111 1112 /* 1113 * Create a special file. 1114 */ 1115 #ifndef _SYS_SYSPROTO_H_ 1116 struct mknod_args { 1117 char *path; 1118 int mode; 1119 int dev; 1120 }; 1121 #endif 1122 int 1123 sys_mknod(td, uap) 1124 struct thread *td; 1125 register struct mknod_args /* { 1126 char *path; 1127 int mode; 1128 int dev; 1129 } */ *uap; 1130 { 1131 1132 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1133 uap->mode, uap->dev)); 1134 } 1135 1136 #ifndef _SYS_SYSPROTO_H_ 1137 struct mknodat_args { 1138 int fd; 1139 char *path; 1140 mode_t mode; 1141 dev_t dev; 1142 }; 1143 #endif 1144 int 1145 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1146 { 1147 1148 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1149 uap->dev)); 1150 } 1151 1152 int 1153 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1154 int mode, int dev) 1155 { 1156 struct vnode *vp; 1157 struct mount *mp; 1158 struct vattr vattr; 1159 struct nameidata nd; 1160 cap_rights_t rights; 1161 int error, whiteout = 0; 1162 1163 AUDIT_ARG_MODE(mode); 1164 AUDIT_ARG_DEV(dev); 1165 switch (mode & S_IFMT) { 1166 case S_IFCHR: 1167 case S_IFBLK: 1168 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1169 if (error == 0 && dev == VNOVAL) 1170 error = EINVAL; 1171 break; 1172 case S_IFMT: 1173 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1174 break; 1175 case S_IFWHT: 1176 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1177 break; 1178 case S_IFIFO: 1179 if (dev == 0) 1180 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1181 /* FALLTHROUGH */ 1182 default: 1183 error = EINVAL; 1184 break; 1185 } 1186 if (error != 0) 1187 return (error); 1188 restart: 1189 bwillwrite(); 1190 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1191 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1192 td); 1193 if ((error = namei(&nd)) != 0) 1194 return (error); 1195 vp = nd.ni_vp; 1196 if (vp != NULL) { 1197 NDFREE(&nd, NDF_ONLY_PNBUF); 1198 if (vp == nd.ni_dvp) 1199 vrele(nd.ni_dvp); 1200 else 1201 vput(nd.ni_dvp); 1202 vrele(vp); 1203 return (EEXIST); 1204 } else { 1205 VATTR_NULL(&vattr); 1206 vattr.va_mode = (mode & ALLPERMS) & 1207 ~td->td_proc->p_fd->fd_cmask; 1208 vattr.va_rdev = dev; 1209 whiteout = 0; 1210 1211 switch (mode & S_IFMT) { 1212 case S_IFMT: /* used by badsect to flag bad sectors */ 1213 vattr.va_type = VBAD; 1214 break; 1215 case S_IFCHR: 1216 vattr.va_type = VCHR; 1217 break; 1218 case S_IFBLK: 1219 vattr.va_type = VBLK; 1220 break; 1221 case S_IFWHT: 1222 whiteout = 1; 1223 break; 1224 default: 1225 panic("kern_mknod: invalid mode"); 1226 } 1227 } 1228 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1229 NDFREE(&nd, NDF_ONLY_PNBUF); 1230 vput(nd.ni_dvp); 1231 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1232 return (error); 1233 goto restart; 1234 } 1235 #ifdef MAC 1236 if (error == 0 && !whiteout) 1237 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1238 &nd.ni_cnd, &vattr); 1239 #endif 1240 if (error == 0) { 1241 if (whiteout) 1242 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1243 else { 1244 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1245 &nd.ni_cnd, &vattr); 1246 if (error == 0) 1247 vput(nd.ni_vp); 1248 } 1249 } 1250 NDFREE(&nd, NDF_ONLY_PNBUF); 1251 vput(nd.ni_dvp); 1252 vn_finished_write(mp); 1253 return (error); 1254 } 1255 1256 /* 1257 * Create a named pipe. 1258 */ 1259 #ifndef _SYS_SYSPROTO_H_ 1260 struct mkfifo_args { 1261 char *path; 1262 int mode; 1263 }; 1264 #endif 1265 int 1266 sys_mkfifo(td, uap) 1267 struct thread *td; 1268 register struct mkfifo_args /* { 1269 char *path; 1270 int mode; 1271 } */ *uap; 1272 { 1273 1274 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1275 uap->mode)); 1276 } 1277 1278 #ifndef _SYS_SYSPROTO_H_ 1279 struct mkfifoat_args { 1280 int fd; 1281 char *path; 1282 mode_t mode; 1283 }; 1284 #endif 1285 int 1286 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1287 { 1288 1289 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1290 uap->mode)); 1291 } 1292 1293 int 1294 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1295 int mode) 1296 { 1297 struct mount *mp; 1298 struct vattr vattr; 1299 struct nameidata nd; 1300 cap_rights_t rights; 1301 int error; 1302 1303 AUDIT_ARG_MODE(mode); 1304 restart: 1305 bwillwrite(); 1306 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1307 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1308 td); 1309 if ((error = namei(&nd)) != 0) 1310 return (error); 1311 if (nd.ni_vp != NULL) { 1312 NDFREE(&nd, NDF_ONLY_PNBUF); 1313 if (nd.ni_vp == nd.ni_dvp) 1314 vrele(nd.ni_dvp); 1315 else 1316 vput(nd.ni_dvp); 1317 vrele(nd.ni_vp); 1318 return (EEXIST); 1319 } 1320 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1321 NDFREE(&nd, NDF_ONLY_PNBUF); 1322 vput(nd.ni_dvp); 1323 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1324 return (error); 1325 goto restart; 1326 } 1327 VATTR_NULL(&vattr); 1328 vattr.va_type = VFIFO; 1329 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1330 #ifdef MAC 1331 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1332 &vattr); 1333 if (error != 0) 1334 goto out; 1335 #endif 1336 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1337 if (error == 0) 1338 vput(nd.ni_vp); 1339 #ifdef MAC 1340 out: 1341 #endif 1342 vput(nd.ni_dvp); 1343 vn_finished_write(mp); 1344 NDFREE(&nd, NDF_ONLY_PNBUF); 1345 return (error); 1346 } 1347 1348 /* 1349 * Make a hard file link. 1350 */ 1351 #ifndef _SYS_SYSPROTO_H_ 1352 struct link_args { 1353 char *path; 1354 char *link; 1355 }; 1356 #endif 1357 int 1358 sys_link(td, uap) 1359 struct thread *td; 1360 register struct link_args /* { 1361 char *path; 1362 char *link; 1363 } */ *uap; 1364 { 1365 1366 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1367 UIO_USERSPACE, FOLLOW)); 1368 } 1369 1370 #ifndef _SYS_SYSPROTO_H_ 1371 struct linkat_args { 1372 int fd1; 1373 char *path1; 1374 int fd2; 1375 char *path2; 1376 int flag; 1377 }; 1378 #endif 1379 int 1380 sys_linkat(struct thread *td, struct linkat_args *uap) 1381 { 1382 int flag; 1383 1384 flag = uap->flag; 1385 if (flag & ~AT_SYMLINK_FOLLOW) 1386 return (EINVAL); 1387 1388 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1389 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1390 } 1391 1392 int hardlink_check_uid = 0; 1393 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1394 &hardlink_check_uid, 0, 1395 "Unprivileged processes cannot create hard links to files owned by other " 1396 "users"); 1397 static int hardlink_check_gid = 0; 1398 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1399 &hardlink_check_gid, 0, 1400 "Unprivileged processes cannot create hard links to files owned by other " 1401 "groups"); 1402 1403 static int 1404 can_hardlink(struct vnode *vp, struct ucred *cred) 1405 { 1406 struct vattr va; 1407 int error; 1408 1409 if (!hardlink_check_uid && !hardlink_check_gid) 1410 return (0); 1411 1412 error = VOP_GETATTR(vp, &va, cred); 1413 if (error != 0) 1414 return (error); 1415 1416 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1417 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1418 if (error != 0) 1419 return (error); 1420 } 1421 1422 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1423 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1424 if (error != 0) 1425 return (error); 1426 } 1427 1428 return (0); 1429 } 1430 1431 int 1432 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1433 enum uio_seg segflg, int follow) 1434 { 1435 struct vnode *vp; 1436 struct mount *mp; 1437 struct nameidata nd; 1438 cap_rights_t rights; 1439 int error; 1440 1441 again: 1442 bwillwrite(); 1443 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1444 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1445 1446 if ((error = namei(&nd)) != 0) 1447 return (error); 1448 NDFREE(&nd, NDF_ONLY_PNBUF); 1449 vp = nd.ni_vp; 1450 if (vp->v_type == VDIR) { 1451 vrele(vp); 1452 return (EPERM); /* POSIX */ 1453 } 1454 NDINIT_ATRIGHTS(&nd, CREATE, 1455 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1456 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1457 if ((error = namei(&nd)) == 0) { 1458 if (nd.ni_vp != NULL) { 1459 NDFREE(&nd, NDF_ONLY_PNBUF); 1460 if (nd.ni_dvp == nd.ni_vp) 1461 vrele(nd.ni_dvp); 1462 else 1463 vput(nd.ni_dvp); 1464 vrele(nd.ni_vp); 1465 vrele(vp); 1466 return (EEXIST); 1467 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1468 /* 1469 * Cross-device link. No need to recheck 1470 * vp->v_type, since it cannot change, except 1471 * to VBAD. 1472 */ 1473 NDFREE(&nd, NDF_ONLY_PNBUF); 1474 vput(nd.ni_dvp); 1475 vrele(vp); 1476 return (EXDEV); 1477 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1478 error = can_hardlink(vp, td->td_ucred); 1479 #ifdef MAC 1480 if (error == 0) 1481 error = mac_vnode_check_link(td->td_ucred, 1482 nd.ni_dvp, vp, &nd.ni_cnd); 1483 #endif 1484 if (error != 0) { 1485 vput(vp); 1486 vput(nd.ni_dvp); 1487 NDFREE(&nd, NDF_ONLY_PNBUF); 1488 return (error); 1489 } 1490 error = vn_start_write(vp, &mp, V_NOWAIT); 1491 if (error != 0) { 1492 vput(vp); 1493 vput(nd.ni_dvp); 1494 NDFREE(&nd, NDF_ONLY_PNBUF); 1495 error = vn_start_write(NULL, &mp, 1496 V_XSLEEP | PCATCH); 1497 if (error != 0) 1498 return (error); 1499 goto again; 1500 } 1501 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1502 VOP_UNLOCK(vp, 0); 1503 vput(nd.ni_dvp); 1504 vn_finished_write(mp); 1505 NDFREE(&nd, NDF_ONLY_PNBUF); 1506 } else { 1507 vput(nd.ni_dvp); 1508 NDFREE(&nd, NDF_ONLY_PNBUF); 1509 vrele(vp); 1510 goto again; 1511 } 1512 } 1513 vrele(vp); 1514 return (error); 1515 } 1516 1517 /* 1518 * Make a symbolic link. 1519 */ 1520 #ifndef _SYS_SYSPROTO_H_ 1521 struct symlink_args { 1522 char *path; 1523 char *link; 1524 }; 1525 #endif 1526 int 1527 sys_symlink(td, uap) 1528 struct thread *td; 1529 register struct symlink_args /* { 1530 char *path; 1531 char *link; 1532 } */ *uap; 1533 { 1534 1535 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1536 UIO_USERSPACE)); 1537 } 1538 1539 #ifndef _SYS_SYSPROTO_H_ 1540 struct symlinkat_args { 1541 char *path; 1542 int fd; 1543 char *path2; 1544 }; 1545 #endif 1546 int 1547 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1548 { 1549 1550 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1551 UIO_USERSPACE)); 1552 } 1553 1554 int 1555 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1556 enum uio_seg segflg) 1557 { 1558 struct mount *mp; 1559 struct vattr vattr; 1560 char *syspath; 1561 struct nameidata nd; 1562 int error; 1563 cap_rights_t rights; 1564 1565 if (segflg == UIO_SYSSPACE) { 1566 syspath = path1; 1567 } else { 1568 syspath = uma_zalloc(namei_zone, M_WAITOK); 1569 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1570 goto out; 1571 } 1572 AUDIT_ARG_TEXT(syspath); 1573 restart: 1574 bwillwrite(); 1575 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1576 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1577 td); 1578 if ((error = namei(&nd)) != 0) 1579 goto out; 1580 if (nd.ni_vp) { 1581 NDFREE(&nd, NDF_ONLY_PNBUF); 1582 if (nd.ni_vp == nd.ni_dvp) 1583 vrele(nd.ni_dvp); 1584 else 1585 vput(nd.ni_dvp); 1586 vrele(nd.ni_vp); 1587 error = EEXIST; 1588 goto out; 1589 } 1590 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1591 NDFREE(&nd, NDF_ONLY_PNBUF); 1592 vput(nd.ni_dvp); 1593 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1594 goto out; 1595 goto restart; 1596 } 1597 VATTR_NULL(&vattr); 1598 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1599 #ifdef MAC 1600 vattr.va_type = VLNK; 1601 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1602 &vattr); 1603 if (error != 0) 1604 goto out2; 1605 #endif 1606 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1607 if (error == 0) 1608 vput(nd.ni_vp); 1609 #ifdef MAC 1610 out2: 1611 #endif 1612 NDFREE(&nd, NDF_ONLY_PNBUF); 1613 vput(nd.ni_dvp); 1614 vn_finished_write(mp); 1615 out: 1616 if (segflg != UIO_SYSSPACE) 1617 uma_zfree(namei_zone, syspath); 1618 return (error); 1619 } 1620 1621 /* 1622 * Delete a whiteout from the filesystem. 1623 */ 1624 int 1625 sys_undelete(td, uap) 1626 struct thread *td; 1627 register struct undelete_args /* { 1628 char *path; 1629 } */ *uap; 1630 { 1631 struct mount *mp; 1632 struct nameidata nd; 1633 int error; 1634 1635 restart: 1636 bwillwrite(); 1637 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1638 UIO_USERSPACE, uap->path, td); 1639 error = namei(&nd); 1640 if (error != 0) 1641 return (error); 1642 1643 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1644 NDFREE(&nd, NDF_ONLY_PNBUF); 1645 if (nd.ni_vp == nd.ni_dvp) 1646 vrele(nd.ni_dvp); 1647 else 1648 vput(nd.ni_dvp); 1649 if (nd.ni_vp) 1650 vrele(nd.ni_vp); 1651 return (EEXIST); 1652 } 1653 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1654 NDFREE(&nd, NDF_ONLY_PNBUF); 1655 vput(nd.ni_dvp); 1656 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1657 return (error); 1658 goto restart; 1659 } 1660 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1661 NDFREE(&nd, NDF_ONLY_PNBUF); 1662 vput(nd.ni_dvp); 1663 vn_finished_write(mp); 1664 return (error); 1665 } 1666 1667 /* 1668 * Delete a name from the filesystem. 1669 */ 1670 #ifndef _SYS_SYSPROTO_H_ 1671 struct unlink_args { 1672 char *path; 1673 }; 1674 #endif 1675 int 1676 sys_unlink(td, uap) 1677 struct thread *td; 1678 struct unlink_args /* { 1679 char *path; 1680 } */ *uap; 1681 { 1682 1683 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1684 } 1685 1686 #ifndef _SYS_SYSPROTO_H_ 1687 struct unlinkat_args { 1688 int fd; 1689 char *path; 1690 int flag; 1691 }; 1692 #endif 1693 int 1694 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1695 { 1696 int flag = uap->flag; 1697 int fd = uap->fd; 1698 char *path = uap->path; 1699 1700 if (flag & ~AT_REMOVEDIR) 1701 return (EINVAL); 1702 1703 if (flag & AT_REMOVEDIR) 1704 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1705 else 1706 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1707 } 1708 1709 int 1710 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1711 ino_t oldinum) 1712 { 1713 struct mount *mp; 1714 struct vnode *vp; 1715 struct nameidata nd; 1716 struct stat sb; 1717 cap_rights_t rights; 1718 int error; 1719 1720 restart: 1721 bwillwrite(); 1722 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1723 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1724 if ((error = namei(&nd)) != 0) 1725 return (error == EINVAL ? EPERM : error); 1726 vp = nd.ni_vp; 1727 if (vp->v_type == VDIR && oldinum == 0) { 1728 error = EPERM; /* POSIX */ 1729 } else if (oldinum != 0 && 1730 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1731 sb.st_ino != oldinum) { 1732 error = EIDRM; /* Identifier removed */ 1733 } else { 1734 /* 1735 * The root of a mounted filesystem cannot be deleted. 1736 * 1737 * XXX: can this only be a VDIR case? 1738 */ 1739 if (vp->v_vflag & VV_ROOT) 1740 error = EBUSY; 1741 } 1742 if (error == 0) { 1743 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1744 NDFREE(&nd, NDF_ONLY_PNBUF); 1745 vput(nd.ni_dvp); 1746 if (vp == nd.ni_dvp) 1747 vrele(vp); 1748 else 1749 vput(vp); 1750 if ((error = vn_start_write(NULL, &mp, 1751 V_XSLEEP | PCATCH)) != 0) 1752 return (error); 1753 goto restart; 1754 } 1755 #ifdef MAC 1756 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1757 &nd.ni_cnd); 1758 if (error != 0) 1759 goto out; 1760 #endif 1761 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1762 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1763 #ifdef MAC 1764 out: 1765 #endif 1766 vn_finished_write(mp); 1767 } 1768 NDFREE(&nd, NDF_ONLY_PNBUF); 1769 vput(nd.ni_dvp); 1770 if (vp == nd.ni_dvp) 1771 vrele(vp); 1772 else 1773 vput(vp); 1774 return (error); 1775 } 1776 1777 /* 1778 * Reposition read/write file offset. 1779 */ 1780 #ifndef _SYS_SYSPROTO_H_ 1781 struct lseek_args { 1782 int fd; 1783 int pad; 1784 off_t offset; 1785 int whence; 1786 }; 1787 #endif 1788 int 1789 sys_lseek(td, uap) 1790 struct thread *td; 1791 register struct lseek_args /* { 1792 int fd; 1793 int pad; 1794 off_t offset; 1795 int whence; 1796 } */ *uap; 1797 { 1798 struct file *fp; 1799 cap_rights_t rights; 1800 int error; 1801 1802 AUDIT_ARG_FD(uap->fd); 1803 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1804 if (error != 0) 1805 return (error); 1806 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1807 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1808 fdrop(fp, td); 1809 return (error); 1810 } 1811 1812 #if defined(COMPAT_43) 1813 /* 1814 * Reposition read/write file offset. 1815 */ 1816 #ifndef _SYS_SYSPROTO_H_ 1817 struct olseek_args { 1818 int fd; 1819 long offset; 1820 int whence; 1821 }; 1822 #endif 1823 int 1824 olseek(td, uap) 1825 struct thread *td; 1826 register struct olseek_args /* { 1827 int fd; 1828 long offset; 1829 int whence; 1830 } */ *uap; 1831 { 1832 struct lseek_args /* { 1833 int fd; 1834 int pad; 1835 off_t offset; 1836 int whence; 1837 } */ nuap; 1838 1839 nuap.fd = uap->fd; 1840 nuap.offset = uap->offset; 1841 nuap.whence = uap->whence; 1842 return (sys_lseek(td, &nuap)); 1843 } 1844 #endif /* COMPAT_43 */ 1845 1846 #if defined(COMPAT_FREEBSD6) 1847 /* Version with the 'pad' argument */ 1848 int 1849 freebsd6_lseek(td, uap) 1850 struct thread *td; 1851 register struct freebsd6_lseek_args *uap; 1852 { 1853 struct lseek_args ouap; 1854 1855 ouap.fd = uap->fd; 1856 ouap.offset = uap->offset; 1857 ouap.whence = uap->whence; 1858 return (sys_lseek(td, &ouap)); 1859 } 1860 #endif 1861 1862 /* 1863 * Check access permissions using passed credentials. 1864 */ 1865 static int 1866 vn_access(vp, user_flags, cred, td) 1867 struct vnode *vp; 1868 int user_flags; 1869 struct ucred *cred; 1870 struct thread *td; 1871 { 1872 accmode_t accmode; 1873 int error; 1874 1875 /* Flags == 0 means only check for existence. */ 1876 if (user_flags == 0) 1877 return (0); 1878 1879 accmode = 0; 1880 if (user_flags & R_OK) 1881 accmode |= VREAD; 1882 if (user_flags & W_OK) 1883 accmode |= VWRITE; 1884 if (user_flags & X_OK) 1885 accmode |= VEXEC; 1886 #ifdef MAC 1887 error = mac_vnode_check_access(cred, vp, accmode); 1888 if (error != 0) 1889 return (error); 1890 #endif 1891 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1892 error = VOP_ACCESS(vp, accmode, cred, td); 1893 return (error); 1894 } 1895 1896 /* 1897 * Check access permissions using "real" credentials. 1898 */ 1899 #ifndef _SYS_SYSPROTO_H_ 1900 struct access_args { 1901 char *path; 1902 int amode; 1903 }; 1904 #endif 1905 int 1906 sys_access(td, uap) 1907 struct thread *td; 1908 register struct access_args /* { 1909 char *path; 1910 int amode; 1911 } */ *uap; 1912 { 1913 1914 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1915 0, uap->amode)); 1916 } 1917 1918 #ifndef _SYS_SYSPROTO_H_ 1919 struct faccessat_args { 1920 int dirfd; 1921 char *path; 1922 int amode; 1923 int flag; 1924 } 1925 #endif 1926 int 1927 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1928 { 1929 1930 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1931 uap->amode)); 1932 } 1933 1934 int 1935 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1936 int flag, int amode) 1937 { 1938 struct ucred *cred, *usecred; 1939 struct vnode *vp; 1940 struct nameidata nd; 1941 cap_rights_t rights; 1942 int error; 1943 1944 if (flag & ~AT_EACCESS) 1945 return (EINVAL); 1946 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1947 return (EINVAL); 1948 1949 /* 1950 * Create and modify a temporary credential instead of one that 1951 * is potentially shared (if we need one). 1952 */ 1953 cred = td->td_ucred; 1954 if ((flag & AT_EACCESS) == 0 && 1955 ((cred->cr_uid != cred->cr_ruid || 1956 cred->cr_rgid != cred->cr_groups[0]))) { 1957 usecred = crdup(cred); 1958 usecred->cr_uid = cred->cr_ruid; 1959 usecred->cr_groups[0] = cred->cr_rgid; 1960 td->td_ucred = usecred; 1961 } else 1962 usecred = cred; 1963 AUDIT_ARG_VALUE(amode); 1964 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1965 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1966 td); 1967 if ((error = namei(&nd)) != 0) 1968 goto out; 1969 vp = nd.ni_vp; 1970 1971 error = vn_access(vp, amode, usecred, td); 1972 NDFREE(&nd, NDF_ONLY_PNBUF); 1973 vput(vp); 1974 out: 1975 if (usecred != cred) { 1976 td->td_ucred = cred; 1977 crfree(usecred); 1978 } 1979 return (error); 1980 } 1981 1982 /* 1983 * Check access permissions using "effective" credentials. 1984 */ 1985 #ifndef _SYS_SYSPROTO_H_ 1986 struct eaccess_args { 1987 char *path; 1988 int amode; 1989 }; 1990 #endif 1991 int 1992 sys_eaccess(td, uap) 1993 struct thread *td; 1994 register struct eaccess_args /* { 1995 char *path; 1996 int amode; 1997 } */ *uap; 1998 { 1999 2000 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2001 AT_EACCESS, uap->amode)); 2002 } 2003 2004 #if defined(COMPAT_43) 2005 /* 2006 * Get file status; this version follows links. 2007 */ 2008 #ifndef _SYS_SYSPROTO_H_ 2009 struct ostat_args { 2010 char *path; 2011 struct ostat *ub; 2012 }; 2013 #endif 2014 int 2015 ostat(td, uap) 2016 struct thread *td; 2017 register struct ostat_args /* { 2018 char *path; 2019 struct ostat *ub; 2020 } */ *uap; 2021 { 2022 struct stat sb; 2023 struct ostat osb; 2024 int error; 2025 2026 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2027 &sb, NULL); 2028 if (error != 0) 2029 return (error); 2030 cvtstat(&sb, &osb); 2031 return (copyout(&osb, uap->ub, sizeof (osb))); 2032 } 2033 2034 /* 2035 * Get file status; this version does not follow links. 2036 */ 2037 #ifndef _SYS_SYSPROTO_H_ 2038 struct olstat_args { 2039 char *path; 2040 struct ostat *ub; 2041 }; 2042 #endif 2043 int 2044 olstat(td, uap) 2045 struct thread *td; 2046 register struct olstat_args /* { 2047 char *path; 2048 struct ostat *ub; 2049 } */ *uap; 2050 { 2051 struct stat sb; 2052 struct ostat osb; 2053 int error; 2054 2055 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2056 UIO_USERSPACE, &sb, NULL); 2057 if (error != 0) 2058 return (error); 2059 cvtstat(&sb, &osb); 2060 return (copyout(&osb, uap->ub, sizeof (osb))); 2061 } 2062 2063 /* 2064 * Convert from an old to a new stat structure. 2065 */ 2066 void 2067 cvtstat(st, ost) 2068 struct stat *st; 2069 struct ostat *ost; 2070 { 2071 2072 bzero(ost, sizeof(*ost)); 2073 ost->st_dev = st->st_dev; 2074 ost->st_ino = st->st_ino; 2075 ost->st_mode = st->st_mode; 2076 ost->st_nlink = st->st_nlink; 2077 ost->st_uid = st->st_uid; 2078 ost->st_gid = st->st_gid; 2079 ost->st_rdev = st->st_rdev; 2080 if (st->st_size < (quad_t)1 << 32) 2081 ost->st_size = st->st_size; 2082 else 2083 ost->st_size = -2; 2084 ost->st_atim = st->st_atim; 2085 ost->st_mtim = st->st_mtim; 2086 ost->st_ctim = st->st_ctim; 2087 ost->st_blksize = st->st_blksize; 2088 ost->st_blocks = st->st_blocks; 2089 ost->st_flags = st->st_flags; 2090 ost->st_gen = st->st_gen; 2091 } 2092 #endif /* COMPAT_43 */ 2093 2094 /* 2095 * Get file status; this version follows links. 2096 */ 2097 #ifndef _SYS_SYSPROTO_H_ 2098 struct stat_args { 2099 char *path; 2100 struct stat *ub; 2101 }; 2102 #endif 2103 int 2104 sys_stat(td, uap) 2105 struct thread *td; 2106 register struct stat_args /* { 2107 char *path; 2108 struct stat *ub; 2109 } */ *uap; 2110 { 2111 struct stat sb; 2112 int error; 2113 2114 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2115 &sb, NULL); 2116 if (error == 0) 2117 error = copyout(&sb, uap->ub, sizeof (sb)); 2118 return (error); 2119 } 2120 2121 #ifndef _SYS_SYSPROTO_H_ 2122 struct fstatat_args { 2123 int fd; 2124 char *path; 2125 struct stat *buf; 2126 int flag; 2127 } 2128 #endif 2129 int 2130 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2131 { 2132 struct stat sb; 2133 int error; 2134 2135 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2136 UIO_USERSPACE, &sb, NULL); 2137 if (error == 0) 2138 error = copyout(&sb, uap->buf, sizeof (sb)); 2139 return (error); 2140 } 2141 2142 int 2143 kern_statat(struct thread *td, int flag, int fd, char *path, 2144 enum uio_seg pathseg, struct stat *sbp, 2145 void (*hook)(struct vnode *vp, struct stat *sbp)) 2146 { 2147 struct nameidata nd; 2148 struct stat sb; 2149 cap_rights_t rights; 2150 int error; 2151 2152 if (flag & ~AT_SYMLINK_NOFOLLOW) 2153 return (EINVAL); 2154 2155 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2156 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2157 cap_rights_init(&rights, CAP_FSTAT), td); 2158 2159 if ((error = namei(&nd)) != 0) 2160 return (error); 2161 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2162 if (error == 0) { 2163 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2164 if (S_ISREG(sb.st_mode)) 2165 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2166 if (__predict_false(hook != NULL)) 2167 hook(nd.ni_vp, &sb); 2168 } 2169 NDFREE(&nd, NDF_ONLY_PNBUF); 2170 vput(nd.ni_vp); 2171 if (error != 0) 2172 return (error); 2173 *sbp = sb; 2174 #ifdef KTRACE 2175 if (KTRPOINT(td, KTR_STRUCT)) 2176 ktrstat(&sb); 2177 #endif 2178 return (0); 2179 } 2180 2181 /* 2182 * Get file status; this version does not follow links. 2183 */ 2184 #ifndef _SYS_SYSPROTO_H_ 2185 struct lstat_args { 2186 char *path; 2187 struct stat *ub; 2188 }; 2189 #endif 2190 int 2191 sys_lstat(td, uap) 2192 struct thread *td; 2193 register struct lstat_args /* { 2194 char *path; 2195 struct stat *ub; 2196 } */ *uap; 2197 { 2198 struct stat sb; 2199 int error; 2200 2201 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2202 UIO_USERSPACE, &sb, NULL); 2203 if (error == 0) 2204 error = copyout(&sb, uap->ub, sizeof (sb)); 2205 return (error); 2206 } 2207 2208 /* 2209 * Implementation of the NetBSD [l]stat() functions. 2210 */ 2211 void 2212 cvtnstat(sb, nsb) 2213 struct stat *sb; 2214 struct nstat *nsb; 2215 { 2216 2217 bzero(nsb, sizeof *nsb); 2218 nsb->st_dev = sb->st_dev; 2219 nsb->st_ino = sb->st_ino; 2220 nsb->st_mode = sb->st_mode; 2221 nsb->st_nlink = sb->st_nlink; 2222 nsb->st_uid = sb->st_uid; 2223 nsb->st_gid = sb->st_gid; 2224 nsb->st_rdev = sb->st_rdev; 2225 nsb->st_atim = sb->st_atim; 2226 nsb->st_mtim = sb->st_mtim; 2227 nsb->st_ctim = sb->st_ctim; 2228 nsb->st_size = sb->st_size; 2229 nsb->st_blocks = sb->st_blocks; 2230 nsb->st_blksize = sb->st_blksize; 2231 nsb->st_flags = sb->st_flags; 2232 nsb->st_gen = sb->st_gen; 2233 nsb->st_birthtim = sb->st_birthtim; 2234 } 2235 2236 #ifndef _SYS_SYSPROTO_H_ 2237 struct nstat_args { 2238 char *path; 2239 struct nstat *ub; 2240 }; 2241 #endif 2242 int 2243 sys_nstat(td, uap) 2244 struct thread *td; 2245 register struct nstat_args /* { 2246 char *path; 2247 struct nstat *ub; 2248 } */ *uap; 2249 { 2250 struct stat sb; 2251 struct nstat nsb; 2252 int error; 2253 2254 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2255 &sb, NULL); 2256 if (error != 0) 2257 return (error); 2258 cvtnstat(&sb, &nsb); 2259 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2260 } 2261 2262 /* 2263 * NetBSD lstat. Get file status; this version does not follow links. 2264 */ 2265 #ifndef _SYS_SYSPROTO_H_ 2266 struct lstat_args { 2267 char *path; 2268 struct stat *ub; 2269 }; 2270 #endif 2271 int 2272 sys_nlstat(td, uap) 2273 struct thread *td; 2274 register struct nlstat_args /* { 2275 char *path; 2276 struct nstat *ub; 2277 } */ *uap; 2278 { 2279 struct stat sb; 2280 struct nstat nsb; 2281 int error; 2282 2283 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2284 UIO_USERSPACE, &sb, NULL); 2285 if (error != 0) 2286 return (error); 2287 cvtnstat(&sb, &nsb); 2288 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2289 } 2290 2291 /* 2292 * Get configurable pathname variables. 2293 */ 2294 #ifndef _SYS_SYSPROTO_H_ 2295 struct pathconf_args { 2296 char *path; 2297 int name; 2298 }; 2299 #endif 2300 int 2301 sys_pathconf(td, uap) 2302 struct thread *td; 2303 register struct pathconf_args /* { 2304 char *path; 2305 int name; 2306 } */ *uap; 2307 { 2308 2309 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2310 } 2311 2312 #ifndef _SYS_SYSPROTO_H_ 2313 struct lpathconf_args { 2314 char *path; 2315 int name; 2316 }; 2317 #endif 2318 int 2319 sys_lpathconf(td, uap) 2320 struct thread *td; 2321 register struct lpathconf_args /* { 2322 char *path; 2323 int name; 2324 } */ *uap; 2325 { 2326 2327 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2328 NOFOLLOW)); 2329 } 2330 2331 int 2332 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2333 u_long flags) 2334 { 2335 struct nameidata nd; 2336 int error; 2337 2338 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2339 pathseg, path, td); 2340 if ((error = namei(&nd)) != 0) 2341 return (error); 2342 NDFREE(&nd, NDF_ONLY_PNBUF); 2343 2344 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2345 vput(nd.ni_vp); 2346 return (error); 2347 } 2348 2349 /* 2350 * Return target name of a symbolic link. 2351 */ 2352 #ifndef _SYS_SYSPROTO_H_ 2353 struct readlink_args { 2354 char *path; 2355 char *buf; 2356 size_t count; 2357 }; 2358 #endif 2359 int 2360 sys_readlink(td, uap) 2361 struct thread *td; 2362 register struct readlink_args /* { 2363 char *path; 2364 char *buf; 2365 size_t count; 2366 } */ *uap; 2367 { 2368 2369 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2370 uap->buf, UIO_USERSPACE, uap->count)); 2371 } 2372 #ifndef _SYS_SYSPROTO_H_ 2373 struct readlinkat_args { 2374 int fd; 2375 char *path; 2376 char *buf; 2377 size_t bufsize; 2378 }; 2379 #endif 2380 int 2381 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2382 { 2383 2384 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2385 uap->buf, UIO_USERSPACE, uap->bufsize)); 2386 } 2387 2388 int 2389 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2390 char *buf, enum uio_seg bufseg, size_t count) 2391 { 2392 struct vnode *vp; 2393 struct iovec aiov; 2394 struct uio auio; 2395 struct nameidata nd; 2396 int error; 2397 2398 if (count > IOSIZE_MAX) 2399 return (EINVAL); 2400 2401 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2402 pathseg, path, fd, td); 2403 2404 if ((error = namei(&nd)) != 0) 2405 return (error); 2406 NDFREE(&nd, NDF_ONLY_PNBUF); 2407 vp = nd.ni_vp; 2408 #ifdef MAC 2409 error = mac_vnode_check_readlink(td->td_ucred, vp); 2410 if (error != 0) { 2411 vput(vp); 2412 return (error); 2413 } 2414 #endif 2415 if (vp->v_type != VLNK) 2416 error = EINVAL; 2417 else { 2418 aiov.iov_base = buf; 2419 aiov.iov_len = count; 2420 auio.uio_iov = &aiov; 2421 auio.uio_iovcnt = 1; 2422 auio.uio_offset = 0; 2423 auio.uio_rw = UIO_READ; 2424 auio.uio_segflg = bufseg; 2425 auio.uio_td = td; 2426 auio.uio_resid = count; 2427 error = VOP_READLINK(vp, &auio, td->td_ucred); 2428 td->td_retval[0] = count - auio.uio_resid; 2429 } 2430 vput(vp); 2431 return (error); 2432 } 2433 2434 /* 2435 * Common implementation code for chflags() and fchflags(). 2436 */ 2437 static int 2438 setfflags(td, vp, flags) 2439 struct thread *td; 2440 struct vnode *vp; 2441 u_long flags; 2442 { 2443 struct mount *mp; 2444 struct vattr vattr; 2445 int error; 2446 2447 /* We can't support the value matching VNOVAL. */ 2448 if (flags == VNOVAL) 2449 return (EOPNOTSUPP); 2450 2451 /* 2452 * Prevent non-root users from setting flags on devices. When 2453 * a device is reused, users can retain ownership of the device 2454 * if they are allowed to set flags and programs assume that 2455 * chown can't fail when done as root. 2456 */ 2457 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2458 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2459 if (error != 0) 2460 return (error); 2461 } 2462 2463 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2464 return (error); 2465 VATTR_NULL(&vattr); 2466 vattr.va_flags = flags; 2467 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2468 #ifdef MAC 2469 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2470 if (error == 0) 2471 #endif 2472 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2473 VOP_UNLOCK(vp, 0); 2474 vn_finished_write(mp); 2475 return (error); 2476 } 2477 2478 /* 2479 * Change flags of a file given a path name. 2480 */ 2481 #ifndef _SYS_SYSPROTO_H_ 2482 struct chflags_args { 2483 const char *path; 2484 u_long flags; 2485 }; 2486 #endif 2487 int 2488 sys_chflags(td, uap) 2489 struct thread *td; 2490 register struct chflags_args /* { 2491 const char *path; 2492 u_long flags; 2493 } */ *uap; 2494 { 2495 2496 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2497 uap->flags, 0)); 2498 } 2499 2500 #ifndef _SYS_SYSPROTO_H_ 2501 struct chflagsat_args { 2502 int fd; 2503 const char *path; 2504 u_long flags; 2505 int atflag; 2506 } 2507 #endif 2508 int 2509 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2510 { 2511 int fd = uap->fd; 2512 const char *path = uap->path; 2513 u_long flags = uap->flags; 2514 int atflag = uap->atflag; 2515 2516 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2517 return (EINVAL); 2518 2519 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2520 } 2521 2522 /* 2523 * Same as chflags() but doesn't follow symlinks. 2524 */ 2525 int 2526 sys_lchflags(td, uap) 2527 struct thread *td; 2528 register struct lchflags_args /* { 2529 const char *path; 2530 u_long flags; 2531 } */ *uap; 2532 { 2533 2534 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2535 uap->flags, AT_SYMLINK_NOFOLLOW)); 2536 } 2537 2538 static int 2539 kern_chflagsat(struct thread *td, int fd, const char *path, 2540 enum uio_seg pathseg, u_long flags, int atflag) 2541 { 2542 struct nameidata nd; 2543 cap_rights_t rights; 2544 int error, follow; 2545 2546 AUDIT_ARG_FFLAGS(flags); 2547 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2548 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2549 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2550 if ((error = namei(&nd)) != 0) 2551 return (error); 2552 NDFREE(&nd, NDF_ONLY_PNBUF); 2553 error = setfflags(td, nd.ni_vp, flags); 2554 vrele(nd.ni_vp); 2555 return (error); 2556 } 2557 2558 /* 2559 * Change flags of a file given a file descriptor. 2560 */ 2561 #ifndef _SYS_SYSPROTO_H_ 2562 struct fchflags_args { 2563 int fd; 2564 u_long flags; 2565 }; 2566 #endif 2567 int 2568 sys_fchflags(td, uap) 2569 struct thread *td; 2570 register struct fchflags_args /* { 2571 int fd; 2572 u_long flags; 2573 } */ *uap; 2574 { 2575 struct file *fp; 2576 cap_rights_t rights; 2577 int error; 2578 2579 AUDIT_ARG_FD(uap->fd); 2580 AUDIT_ARG_FFLAGS(uap->flags); 2581 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2582 &fp); 2583 if (error != 0) 2584 return (error); 2585 #ifdef AUDIT 2586 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2587 AUDIT_ARG_VNODE1(fp->f_vnode); 2588 VOP_UNLOCK(fp->f_vnode, 0); 2589 #endif 2590 error = setfflags(td, fp->f_vnode, uap->flags); 2591 fdrop(fp, td); 2592 return (error); 2593 } 2594 2595 /* 2596 * Common implementation code for chmod(), lchmod() and fchmod(). 2597 */ 2598 int 2599 setfmode(td, cred, vp, mode) 2600 struct thread *td; 2601 struct ucred *cred; 2602 struct vnode *vp; 2603 int mode; 2604 { 2605 struct mount *mp; 2606 struct vattr vattr; 2607 int error; 2608 2609 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2610 return (error); 2611 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2612 VATTR_NULL(&vattr); 2613 vattr.va_mode = mode & ALLPERMS; 2614 #ifdef MAC 2615 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2616 if (error == 0) 2617 #endif 2618 error = VOP_SETATTR(vp, &vattr, cred); 2619 VOP_UNLOCK(vp, 0); 2620 vn_finished_write(mp); 2621 return (error); 2622 } 2623 2624 /* 2625 * Change mode of a file given path name. 2626 */ 2627 #ifndef _SYS_SYSPROTO_H_ 2628 struct chmod_args { 2629 char *path; 2630 int mode; 2631 }; 2632 #endif 2633 int 2634 sys_chmod(td, uap) 2635 struct thread *td; 2636 register struct chmod_args /* { 2637 char *path; 2638 int mode; 2639 } */ *uap; 2640 { 2641 2642 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2643 uap->mode, 0)); 2644 } 2645 2646 #ifndef _SYS_SYSPROTO_H_ 2647 struct fchmodat_args { 2648 int dirfd; 2649 char *path; 2650 mode_t mode; 2651 int flag; 2652 } 2653 #endif 2654 int 2655 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2656 { 2657 int flag = uap->flag; 2658 int fd = uap->fd; 2659 char *path = uap->path; 2660 mode_t mode = uap->mode; 2661 2662 if (flag & ~AT_SYMLINK_NOFOLLOW) 2663 return (EINVAL); 2664 2665 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2666 } 2667 2668 /* 2669 * Change mode of a file given path name (don't follow links.) 2670 */ 2671 #ifndef _SYS_SYSPROTO_H_ 2672 struct lchmod_args { 2673 char *path; 2674 int mode; 2675 }; 2676 #endif 2677 int 2678 sys_lchmod(td, uap) 2679 struct thread *td; 2680 register struct lchmod_args /* { 2681 char *path; 2682 int mode; 2683 } */ *uap; 2684 { 2685 2686 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2687 uap->mode, AT_SYMLINK_NOFOLLOW)); 2688 } 2689 2690 int 2691 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2692 mode_t mode, int flag) 2693 { 2694 struct nameidata nd; 2695 cap_rights_t rights; 2696 int error, follow; 2697 2698 AUDIT_ARG_MODE(mode); 2699 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2700 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2701 cap_rights_init(&rights, CAP_FCHMOD), td); 2702 if ((error = namei(&nd)) != 0) 2703 return (error); 2704 NDFREE(&nd, NDF_ONLY_PNBUF); 2705 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2706 vrele(nd.ni_vp); 2707 return (error); 2708 } 2709 2710 /* 2711 * Change mode of a file given a file descriptor. 2712 */ 2713 #ifndef _SYS_SYSPROTO_H_ 2714 struct fchmod_args { 2715 int fd; 2716 int mode; 2717 }; 2718 #endif 2719 int 2720 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2721 { 2722 struct file *fp; 2723 cap_rights_t rights; 2724 int error; 2725 2726 AUDIT_ARG_FD(uap->fd); 2727 AUDIT_ARG_MODE(uap->mode); 2728 2729 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2730 if (error != 0) 2731 return (error); 2732 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2733 fdrop(fp, td); 2734 return (error); 2735 } 2736 2737 /* 2738 * Common implementation for chown(), lchown(), and fchown() 2739 */ 2740 int 2741 setfown(td, cred, vp, uid, gid) 2742 struct thread *td; 2743 struct ucred *cred; 2744 struct vnode *vp; 2745 uid_t uid; 2746 gid_t gid; 2747 { 2748 struct mount *mp; 2749 struct vattr vattr; 2750 int error; 2751 2752 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2753 return (error); 2754 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2755 VATTR_NULL(&vattr); 2756 vattr.va_uid = uid; 2757 vattr.va_gid = gid; 2758 #ifdef MAC 2759 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2760 vattr.va_gid); 2761 if (error == 0) 2762 #endif 2763 error = VOP_SETATTR(vp, &vattr, cred); 2764 VOP_UNLOCK(vp, 0); 2765 vn_finished_write(mp); 2766 return (error); 2767 } 2768 2769 /* 2770 * Set ownership given a path name. 2771 */ 2772 #ifndef _SYS_SYSPROTO_H_ 2773 struct chown_args { 2774 char *path; 2775 int uid; 2776 int gid; 2777 }; 2778 #endif 2779 int 2780 sys_chown(td, uap) 2781 struct thread *td; 2782 register struct chown_args /* { 2783 char *path; 2784 int uid; 2785 int gid; 2786 } */ *uap; 2787 { 2788 2789 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2790 uap->gid, 0)); 2791 } 2792 2793 #ifndef _SYS_SYSPROTO_H_ 2794 struct fchownat_args { 2795 int fd; 2796 const char * path; 2797 uid_t uid; 2798 gid_t gid; 2799 int flag; 2800 }; 2801 #endif 2802 int 2803 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2804 { 2805 int flag; 2806 2807 flag = uap->flag; 2808 if (flag & ~AT_SYMLINK_NOFOLLOW) 2809 return (EINVAL); 2810 2811 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2812 uap->gid, uap->flag)); 2813 } 2814 2815 int 2816 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2817 int uid, int gid, int flag) 2818 { 2819 struct nameidata nd; 2820 cap_rights_t rights; 2821 int error, follow; 2822 2823 AUDIT_ARG_OWNER(uid, gid); 2824 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2825 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2826 cap_rights_init(&rights, CAP_FCHOWN), td); 2827 2828 if ((error = namei(&nd)) != 0) 2829 return (error); 2830 NDFREE(&nd, NDF_ONLY_PNBUF); 2831 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2832 vrele(nd.ni_vp); 2833 return (error); 2834 } 2835 2836 /* 2837 * Set ownership given a path name, do not cross symlinks. 2838 */ 2839 #ifndef _SYS_SYSPROTO_H_ 2840 struct lchown_args { 2841 char *path; 2842 int uid; 2843 int gid; 2844 }; 2845 #endif 2846 int 2847 sys_lchown(td, uap) 2848 struct thread *td; 2849 register struct lchown_args /* { 2850 char *path; 2851 int uid; 2852 int gid; 2853 } */ *uap; 2854 { 2855 2856 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2857 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2858 } 2859 2860 /* 2861 * Set ownership given a file descriptor. 2862 */ 2863 #ifndef _SYS_SYSPROTO_H_ 2864 struct fchown_args { 2865 int fd; 2866 int uid; 2867 int gid; 2868 }; 2869 #endif 2870 int 2871 sys_fchown(td, uap) 2872 struct thread *td; 2873 register struct fchown_args /* { 2874 int fd; 2875 int uid; 2876 int gid; 2877 } */ *uap; 2878 { 2879 struct file *fp; 2880 cap_rights_t rights; 2881 int error; 2882 2883 AUDIT_ARG_FD(uap->fd); 2884 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2885 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2886 if (error != 0) 2887 return (error); 2888 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2889 fdrop(fp, td); 2890 return (error); 2891 } 2892 2893 /* 2894 * Common implementation code for utimes(), lutimes(), and futimes(). 2895 */ 2896 static int 2897 getutimes(usrtvp, tvpseg, tsp) 2898 const struct timeval *usrtvp; 2899 enum uio_seg tvpseg; 2900 struct timespec *tsp; 2901 { 2902 struct timeval tv[2]; 2903 const struct timeval *tvp; 2904 int error; 2905 2906 if (usrtvp == NULL) { 2907 vfs_timestamp(&tsp[0]); 2908 tsp[1] = tsp[0]; 2909 } else { 2910 if (tvpseg == UIO_SYSSPACE) { 2911 tvp = usrtvp; 2912 } else { 2913 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2914 return (error); 2915 tvp = tv; 2916 } 2917 2918 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2919 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2920 return (EINVAL); 2921 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2922 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2923 } 2924 return (0); 2925 } 2926 2927 /* 2928 * Common implementation code for futimens(), utimensat(). 2929 */ 2930 #define UTIMENS_NULL 0x1 2931 #define UTIMENS_EXIT 0x2 2932 static int 2933 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2934 struct timespec *tsp, int *retflags) 2935 { 2936 struct timespec tsnow; 2937 int error; 2938 2939 vfs_timestamp(&tsnow); 2940 *retflags = 0; 2941 if (usrtsp == NULL) { 2942 tsp[0] = tsnow; 2943 tsp[1] = tsnow; 2944 *retflags |= UTIMENS_NULL; 2945 return (0); 2946 } 2947 if (tspseg == UIO_SYSSPACE) { 2948 tsp[0] = usrtsp[0]; 2949 tsp[1] = usrtsp[1]; 2950 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2951 return (error); 2952 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2953 *retflags |= UTIMENS_EXIT; 2954 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2955 *retflags |= UTIMENS_NULL; 2956 if (tsp[0].tv_nsec == UTIME_OMIT) 2957 tsp[0].tv_sec = VNOVAL; 2958 else if (tsp[0].tv_nsec == UTIME_NOW) 2959 tsp[0] = tsnow; 2960 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2961 return (EINVAL); 2962 if (tsp[1].tv_nsec == UTIME_OMIT) 2963 tsp[1].tv_sec = VNOVAL; 2964 else if (tsp[1].tv_nsec == UTIME_NOW) 2965 tsp[1] = tsnow; 2966 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2967 return (EINVAL); 2968 2969 return (0); 2970 } 2971 2972 /* 2973 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2974 * and utimensat(). 2975 */ 2976 static int 2977 setutimes(td, vp, ts, numtimes, nullflag) 2978 struct thread *td; 2979 struct vnode *vp; 2980 const struct timespec *ts; 2981 int numtimes; 2982 int nullflag; 2983 { 2984 struct mount *mp; 2985 struct vattr vattr; 2986 int error, setbirthtime; 2987 2988 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2989 return (error); 2990 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2991 setbirthtime = 0; 2992 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2993 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2994 setbirthtime = 1; 2995 VATTR_NULL(&vattr); 2996 vattr.va_atime = ts[0]; 2997 vattr.va_mtime = ts[1]; 2998 if (setbirthtime) 2999 vattr.va_birthtime = ts[1]; 3000 if (numtimes > 2) 3001 vattr.va_birthtime = ts[2]; 3002 if (nullflag) 3003 vattr.va_vaflags |= VA_UTIMES_NULL; 3004 #ifdef MAC 3005 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3006 vattr.va_mtime); 3007 #endif 3008 if (error == 0) 3009 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3010 VOP_UNLOCK(vp, 0); 3011 vn_finished_write(mp); 3012 return (error); 3013 } 3014 3015 /* 3016 * Set the access and modification times of a file. 3017 */ 3018 #ifndef _SYS_SYSPROTO_H_ 3019 struct utimes_args { 3020 char *path; 3021 struct timeval *tptr; 3022 }; 3023 #endif 3024 int 3025 sys_utimes(td, uap) 3026 struct thread *td; 3027 register struct utimes_args /* { 3028 char *path; 3029 struct timeval *tptr; 3030 } */ *uap; 3031 { 3032 3033 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3034 uap->tptr, UIO_USERSPACE)); 3035 } 3036 3037 #ifndef _SYS_SYSPROTO_H_ 3038 struct futimesat_args { 3039 int fd; 3040 const char * path; 3041 const struct timeval * times; 3042 }; 3043 #endif 3044 int 3045 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3046 { 3047 3048 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3049 uap->times, UIO_USERSPACE)); 3050 } 3051 3052 int 3053 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3054 struct timeval *tptr, enum uio_seg tptrseg) 3055 { 3056 struct nameidata nd; 3057 struct timespec ts[2]; 3058 cap_rights_t rights; 3059 int error; 3060 3061 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3062 return (error); 3063 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3064 cap_rights_init(&rights, CAP_FUTIMES), td); 3065 3066 if ((error = namei(&nd)) != 0) 3067 return (error); 3068 NDFREE(&nd, NDF_ONLY_PNBUF); 3069 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3070 vrele(nd.ni_vp); 3071 return (error); 3072 } 3073 3074 /* 3075 * Set the access and modification times of a file. 3076 */ 3077 #ifndef _SYS_SYSPROTO_H_ 3078 struct lutimes_args { 3079 char *path; 3080 struct timeval *tptr; 3081 }; 3082 #endif 3083 int 3084 sys_lutimes(td, uap) 3085 struct thread *td; 3086 register struct lutimes_args /* { 3087 char *path; 3088 struct timeval *tptr; 3089 } */ *uap; 3090 { 3091 3092 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3093 UIO_USERSPACE)); 3094 } 3095 3096 int 3097 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3098 struct timeval *tptr, enum uio_seg tptrseg) 3099 { 3100 struct timespec ts[2]; 3101 struct nameidata nd; 3102 int error; 3103 3104 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3105 return (error); 3106 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3107 if ((error = namei(&nd)) != 0) 3108 return (error); 3109 NDFREE(&nd, NDF_ONLY_PNBUF); 3110 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3111 vrele(nd.ni_vp); 3112 return (error); 3113 } 3114 3115 /* 3116 * Set the access and modification times of a file. 3117 */ 3118 #ifndef _SYS_SYSPROTO_H_ 3119 struct futimes_args { 3120 int fd; 3121 struct timeval *tptr; 3122 }; 3123 #endif 3124 int 3125 sys_futimes(td, uap) 3126 struct thread *td; 3127 register struct futimes_args /* { 3128 int fd; 3129 struct timeval *tptr; 3130 } */ *uap; 3131 { 3132 3133 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3134 } 3135 3136 int 3137 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3138 enum uio_seg tptrseg) 3139 { 3140 struct timespec ts[2]; 3141 struct file *fp; 3142 cap_rights_t rights; 3143 int error; 3144 3145 AUDIT_ARG_FD(fd); 3146 error = getutimes(tptr, tptrseg, ts); 3147 if (error != 0) 3148 return (error); 3149 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3150 if (error != 0) 3151 return (error); 3152 #ifdef AUDIT 3153 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3154 AUDIT_ARG_VNODE1(fp->f_vnode); 3155 VOP_UNLOCK(fp->f_vnode, 0); 3156 #endif 3157 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3158 fdrop(fp, td); 3159 return (error); 3160 } 3161 3162 int 3163 sys_futimens(struct thread *td, struct futimens_args *uap) 3164 { 3165 3166 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3167 } 3168 3169 int 3170 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3171 enum uio_seg tptrseg) 3172 { 3173 struct timespec ts[2]; 3174 struct file *fp; 3175 cap_rights_t rights; 3176 int error, flags; 3177 3178 AUDIT_ARG_FD(fd); 3179 error = getutimens(tptr, tptrseg, ts, &flags); 3180 if (error != 0) 3181 return (error); 3182 if (flags & UTIMENS_EXIT) 3183 return (0); 3184 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3185 if (error != 0) 3186 return (error); 3187 #ifdef AUDIT 3188 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3189 AUDIT_ARG_VNODE1(fp->f_vnode); 3190 VOP_UNLOCK(fp->f_vnode, 0); 3191 #endif 3192 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3193 fdrop(fp, td); 3194 return (error); 3195 } 3196 3197 int 3198 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3199 { 3200 3201 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3202 uap->times, UIO_USERSPACE, uap->flag)); 3203 } 3204 3205 int 3206 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3207 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3208 { 3209 struct nameidata nd; 3210 struct timespec ts[2]; 3211 cap_rights_t rights; 3212 int error, flags; 3213 3214 if (flag & ~AT_SYMLINK_NOFOLLOW) 3215 return (EINVAL); 3216 3217 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3218 return (error); 3219 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3220 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3221 cap_rights_init(&rights, CAP_FUTIMES), td); 3222 if ((error = namei(&nd)) != 0) 3223 return (error); 3224 /* 3225 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3226 * POSIX states: 3227 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3228 * "Search permission is denied by a component of the path prefix." 3229 */ 3230 NDFREE(&nd, NDF_ONLY_PNBUF); 3231 if ((flags & UTIMENS_EXIT) == 0) 3232 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3233 vrele(nd.ni_vp); 3234 return (error); 3235 } 3236 3237 /* 3238 * Truncate a file given its path name. 3239 */ 3240 #ifndef _SYS_SYSPROTO_H_ 3241 struct truncate_args { 3242 char *path; 3243 int pad; 3244 off_t length; 3245 }; 3246 #endif 3247 int 3248 sys_truncate(td, uap) 3249 struct thread *td; 3250 register struct truncate_args /* { 3251 char *path; 3252 int pad; 3253 off_t length; 3254 } */ *uap; 3255 { 3256 3257 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3258 } 3259 3260 int 3261 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3262 { 3263 struct mount *mp; 3264 struct vnode *vp; 3265 void *rl_cookie; 3266 struct vattr vattr; 3267 struct nameidata nd; 3268 int error; 3269 3270 if (length < 0) 3271 return(EINVAL); 3272 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3273 if ((error = namei(&nd)) != 0) 3274 return (error); 3275 vp = nd.ni_vp; 3276 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3277 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3278 vn_rangelock_unlock(vp, rl_cookie); 3279 vrele(vp); 3280 return (error); 3281 } 3282 NDFREE(&nd, NDF_ONLY_PNBUF); 3283 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3284 if (vp->v_type == VDIR) 3285 error = EISDIR; 3286 #ifdef MAC 3287 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3288 } 3289 #endif 3290 else if ((error = vn_writechk(vp)) == 0 && 3291 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3292 VATTR_NULL(&vattr); 3293 vattr.va_size = length; 3294 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3295 } 3296 VOP_UNLOCK(vp, 0); 3297 vn_finished_write(mp); 3298 vn_rangelock_unlock(vp, rl_cookie); 3299 vrele(vp); 3300 return (error); 3301 } 3302 3303 #if defined(COMPAT_43) 3304 /* 3305 * Truncate a file given its path name. 3306 */ 3307 #ifndef _SYS_SYSPROTO_H_ 3308 struct otruncate_args { 3309 char *path; 3310 long length; 3311 }; 3312 #endif 3313 int 3314 otruncate(td, uap) 3315 struct thread *td; 3316 register struct otruncate_args /* { 3317 char *path; 3318 long length; 3319 } */ *uap; 3320 { 3321 struct truncate_args /* { 3322 char *path; 3323 int pad; 3324 off_t length; 3325 } */ nuap; 3326 3327 nuap.path = uap->path; 3328 nuap.length = uap->length; 3329 return (sys_truncate(td, &nuap)); 3330 } 3331 #endif /* COMPAT_43 */ 3332 3333 #if defined(COMPAT_FREEBSD6) 3334 /* Versions with the pad argument */ 3335 int 3336 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3337 { 3338 struct truncate_args ouap; 3339 3340 ouap.path = uap->path; 3341 ouap.length = uap->length; 3342 return (sys_truncate(td, &ouap)); 3343 } 3344 3345 int 3346 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3347 { 3348 struct ftruncate_args ouap; 3349 3350 ouap.fd = uap->fd; 3351 ouap.length = uap->length; 3352 return (sys_ftruncate(td, &ouap)); 3353 } 3354 #endif 3355 3356 int 3357 kern_fsync(struct thread *td, int fd, bool fullsync) 3358 { 3359 struct vnode *vp; 3360 struct mount *mp; 3361 struct file *fp; 3362 cap_rights_t rights; 3363 int error, lock_flags; 3364 3365 AUDIT_ARG_FD(fd); 3366 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3367 if (error != 0) 3368 return (error); 3369 vp = fp->f_vnode; 3370 #if 0 3371 if (!fullsync) 3372 /* XXXKIB: compete outstanding aio writes */; 3373 #endif 3374 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3375 if (error != 0) 3376 goto drop; 3377 if (MNT_SHARED_WRITES(mp) || 3378 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3379 lock_flags = LK_SHARED; 3380 } else { 3381 lock_flags = LK_EXCLUSIVE; 3382 } 3383 vn_lock(vp, lock_flags | LK_RETRY); 3384 AUDIT_ARG_VNODE1(vp); 3385 if (vp->v_object != NULL) { 3386 VM_OBJECT_WLOCK(vp->v_object); 3387 vm_object_page_clean(vp->v_object, 0, 0, 0); 3388 VM_OBJECT_WUNLOCK(vp->v_object); 3389 } 3390 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3391 VOP_UNLOCK(vp, 0); 3392 vn_finished_write(mp); 3393 drop: 3394 fdrop(fp, td); 3395 return (error); 3396 } 3397 3398 /* 3399 * Sync an open file. 3400 */ 3401 #ifndef _SYS_SYSPROTO_H_ 3402 struct fsync_args { 3403 int fd; 3404 }; 3405 #endif 3406 int 3407 sys_fsync(struct thread *td, struct fsync_args *uap) 3408 { 3409 3410 return (kern_fsync(td, uap->fd, true)); 3411 } 3412 3413 int 3414 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3415 { 3416 3417 return (kern_fsync(td, uap->fd, false)); 3418 } 3419 3420 /* 3421 * Rename files. Source and destination must either both be directories, or 3422 * both not be directories. If target is a directory, it must be empty. 3423 */ 3424 #ifndef _SYS_SYSPROTO_H_ 3425 struct rename_args { 3426 char *from; 3427 char *to; 3428 }; 3429 #endif 3430 int 3431 sys_rename(td, uap) 3432 struct thread *td; 3433 register struct rename_args /* { 3434 char *from; 3435 char *to; 3436 } */ *uap; 3437 { 3438 3439 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3440 uap->to, UIO_USERSPACE)); 3441 } 3442 3443 #ifndef _SYS_SYSPROTO_H_ 3444 struct renameat_args { 3445 int oldfd; 3446 char *old; 3447 int newfd; 3448 char *new; 3449 }; 3450 #endif 3451 int 3452 sys_renameat(struct thread *td, struct renameat_args *uap) 3453 { 3454 3455 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3456 UIO_USERSPACE)); 3457 } 3458 3459 int 3460 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3461 enum uio_seg pathseg) 3462 { 3463 struct mount *mp = NULL; 3464 struct vnode *tvp, *fvp, *tdvp; 3465 struct nameidata fromnd, tond; 3466 cap_rights_t rights; 3467 int error; 3468 3469 again: 3470 bwillwrite(); 3471 #ifdef MAC 3472 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3473 AUDITVNODE1, pathseg, old, oldfd, 3474 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3475 #else 3476 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3477 pathseg, old, oldfd, 3478 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3479 #endif 3480 3481 if ((error = namei(&fromnd)) != 0) 3482 return (error); 3483 #ifdef MAC 3484 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3485 fromnd.ni_vp, &fromnd.ni_cnd); 3486 VOP_UNLOCK(fromnd.ni_dvp, 0); 3487 if (fromnd.ni_dvp != fromnd.ni_vp) 3488 VOP_UNLOCK(fromnd.ni_vp, 0); 3489 #endif 3490 fvp = fromnd.ni_vp; 3491 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3492 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3493 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3494 if (fromnd.ni_vp->v_type == VDIR) 3495 tond.ni_cnd.cn_flags |= WILLBEDIR; 3496 if ((error = namei(&tond)) != 0) { 3497 /* Translate error code for rename("dir1", "dir2/."). */ 3498 if (error == EISDIR && fvp->v_type == VDIR) 3499 error = EINVAL; 3500 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3501 vrele(fromnd.ni_dvp); 3502 vrele(fvp); 3503 goto out1; 3504 } 3505 tdvp = tond.ni_dvp; 3506 tvp = tond.ni_vp; 3507 error = vn_start_write(fvp, &mp, V_NOWAIT); 3508 if (error != 0) { 3509 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3510 NDFREE(&tond, NDF_ONLY_PNBUF); 3511 if (tvp != NULL) 3512 vput(tvp); 3513 if (tdvp == tvp) 3514 vrele(tdvp); 3515 else 3516 vput(tdvp); 3517 vrele(fromnd.ni_dvp); 3518 vrele(fvp); 3519 vrele(tond.ni_startdir); 3520 if (fromnd.ni_startdir != NULL) 3521 vrele(fromnd.ni_startdir); 3522 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3523 if (error != 0) 3524 return (error); 3525 goto again; 3526 } 3527 if (tvp != NULL) { 3528 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3529 error = ENOTDIR; 3530 goto out; 3531 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3532 error = EISDIR; 3533 goto out; 3534 } 3535 #ifdef CAPABILITIES 3536 if (newfd != AT_FDCWD) { 3537 /* 3538 * If the target already exists we require CAP_UNLINKAT 3539 * from 'newfd'. 3540 */ 3541 error = cap_check(&tond.ni_filecaps.fc_rights, 3542 cap_rights_init(&rights, CAP_UNLINKAT)); 3543 if (error != 0) 3544 goto out; 3545 } 3546 #endif 3547 } 3548 if (fvp == tdvp) { 3549 error = EINVAL; 3550 goto out; 3551 } 3552 /* 3553 * If the source is the same as the destination (that is, if they 3554 * are links to the same vnode), then there is nothing to do. 3555 */ 3556 if (fvp == tvp) 3557 error = -1; 3558 #ifdef MAC 3559 else 3560 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3561 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3562 #endif 3563 out: 3564 if (error == 0) { 3565 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3566 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3567 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3568 NDFREE(&tond, NDF_ONLY_PNBUF); 3569 } else { 3570 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3571 NDFREE(&tond, NDF_ONLY_PNBUF); 3572 if (tvp != NULL) 3573 vput(tvp); 3574 if (tdvp == tvp) 3575 vrele(tdvp); 3576 else 3577 vput(tdvp); 3578 vrele(fromnd.ni_dvp); 3579 vrele(fvp); 3580 } 3581 vrele(tond.ni_startdir); 3582 vn_finished_write(mp); 3583 out1: 3584 if (fromnd.ni_startdir) 3585 vrele(fromnd.ni_startdir); 3586 if (error == -1) 3587 return (0); 3588 return (error); 3589 } 3590 3591 /* 3592 * Make a directory file. 3593 */ 3594 #ifndef _SYS_SYSPROTO_H_ 3595 struct mkdir_args { 3596 char *path; 3597 int mode; 3598 }; 3599 #endif 3600 int 3601 sys_mkdir(td, uap) 3602 struct thread *td; 3603 register struct mkdir_args /* { 3604 char *path; 3605 int mode; 3606 } */ *uap; 3607 { 3608 3609 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3610 uap->mode)); 3611 } 3612 3613 #ifndef _SYS_SYSPROTO_H_ 3614 struct mkdirat_args { 3615 int fd; 3616 char *path; 3617 mode_t mode; 3618 }; 3619 #endif 3620 int 3621 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3622 { 3623 3624 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3625 } 3626 3627 int 3628 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3629 int mode) 3630 { 3631 struct mount *mp; 3632 struct vnode *vp; 3633 struct vattr vattr; 3634 struct nameidata nd; 3635 cap_rights_t rights; 3636 int error; 3637 3638 AUDIT_ARG_MODE(mode); 3639 restart: 3640 bwillwrite(); 3641 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3642 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3643 td); 3644 nd.ni_cnd.cn_flags |= WILLBEDIR; 3645 if ((error = namei(&nd)) != 0) 3646 return (error); 3647 vp = nd.ni_vp; 3648 if (vp != NULL) { 3649 NDFREE(&nd, NDF_ONLY_PNBUF); 3650 /* 3651 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3652 * the strange behaviour of leaving the vnode unlocked 3653 * if the target is the same vnode as the parent. 3654 */ 3655 if (vp == nd.ni_dvp) 3656 vrele(nd.ni_dvp); 3657 else 3658 vput(nd.ni_dvp); 3659 vrele(vp); 3660 return (EEXIST); 3661 } 3662 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3663 NDFREE(&nd, NDF_ONLY_PNBUF); 3664 vput(nd.ni_dvp); 3665 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3666 return (error); 3667 goto restart; 3668 } 3669 VATTR_NULL(&vattr); 3670 vattr.va_type = VDIR; 3671 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3672 #ifdef MAC 3673 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3674 &vattr); 3675 if (error != 0) 3676 goto out; 3677 #endif 3678 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3679 #ifdef MAC 3680 out: 3681 #endif 3682 NDFREE(&nd, NDF_ONLY_PNBUF); 3683 vput(nd.ni_dvp); 3684 if (error == 0) 3685 vput(nd.ni_vp); 3686 vn_finished_write(mp); 3687 return (error); 3688 } 3689 3690 /* 3691 * Remove a directory file. 3692 */ 3693 #ifndef _SYS_SYSPROTO_H_ 3694 struct rmdir_args { 3695 char *path; 3696 }; 3697 #endif 3698 int 3699 sys_rmdir(td, uap) 3700 struct thread *td; 3701 struct rmdir_args /* { 3702 char *path; 3703 } */ *uap; 3704 { 3705 3706 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3707 } 3708 3709 int 3710 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3711 { 3712 struct mount *mp; 3713 struct vnode *vp; 3714 struct nameidata nd; 3715 cap_rights_t rights; 3716 int error; 3717 3718 restart: 3719 bwillwrite(); 3720 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3721 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3722 if ((error = namei(&nd)) != 0) 3723 return (error); 3724 vp = nd.ni_vp; 3725 if (vp->v_type != VDIR) { 3726 error = ENOTDIR; 3727 goto out; 3728 } 3729 /* 3730 * No rmdir "." please. 3731 */ 3732 if (nd.ni_dvp == vp) { 3733 error = EINVAL; 3734 goto out; 3735 } 3736 /* 3737 * The root of a mounted filesystem cannot be deleted. 3738 */ 3739 if (vp->v_vflag & VV_ROOT) { 3740 error = EBUSY; 3741 goto out; 3742 } 3743 #ifdef MAC 3744 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3745 &nd.ni_cnd); 3746 if (error != 0) 3747 goto out; 3748 #endif 3749 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3750 NDFREE(&nd, NDF_ONLY_PNBUF); 3751 vput(vp); 3752 if (nd.ni_dvp == vp) 3753 vrele(nd.ni_dvp); 3754 else 3755 vput(nd.ni_dvp); 3756 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3757 return (error); 3758 goto restart; 3759 } 3760 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3761 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3762 vn_finished_write(mp); 3763 out: 3764 NDFREE(&nd, NDF_ONLY_PNBUF); 3765 vput(vp); 3766 if (nd.ni_dvp == vp) 3767 vrele(nd.ni_dvp); 3768 else 3769 vput(nd.ni_dvp); 3770 return (error); 3771 } 3772 3773 #ifdef COMPAT_43 3774 /* 3775 * Read a block of directory entries in a filesystem independent format. 3776 */ 3777 #ifndef _SYS_SYSPROTO_H_ 3778 struct ogetdirentries_args { 3779 int fd; 3780 char *buf; 3781 u_int count; 3782 long *basep; 3783 }; 3784 #endif 3785 int 3786 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3787 { 3788 long loff; 3789 int error; 3790 3791 error = kern_ogetdirentries(td, uap, &loff); 3792 if (error == 0) 3793 error = copyout(&loff, uap->basep, sizeof(long)); 3794 return (error); 3795 } 3796 3797 int 3798 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3799 long *ploff) 3800 { 3801 struct vnode *vp; 3802 struct file *fp; 3803 struct uio auio, kuio; 3804 struct iovec aiov, kiov; 3805 struct dirent *dp, *edp; 3806 cap_rights_t rights; 3807 caddr_t dirbuf; 3808 int error, eofflag, readcnt; 3809 long loff; 3810 off_t foffset; 3811 3812 /* XXX arbitrary sanity limit on `count'. */ 3813 if (uap->count > 64 * 1024) 3814 return (EINVAL); 3815 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3816 if (error != 0) 3817 return (error); 3818 if ((fp->f_flag & FREAD) == 0) { 3819 fdrop(fp, td); 3820 return (EBADF); 3821 } 3822 vp = fp->f_vnode; 3823 foffset = foffset_lock(fp, 0); 3824 unionread: 3825 if (vp->v_type != VDIR) { 3826 foffset_unlock(fp, foffset, 0); 3827 fdrop(fp, td); 3828 return (EINVAL); 3829 } 3830 aiov.iov_base = uap->buf; 3831 aiov.iov_len = uap->count; 3832 auio.uio_iov = &aiov; 3833 auio.uio_iovcnt = 1; 3834 auio.uio_rw = UIO_READ; 3835 auio.uio_segflg = UIO_USERSPACE; 3836 auio.uio_td = td; 3837 auio.uio_resid = uap->count; 3838 vn_lock(vp, LK_SHARED | LK_RETRY); 3839 loff = auio.uio_offset = foffset; 3840 #ifdef MAC 3841 error = mac_vnode_check_readdir(td->td_ucred, vp); 3842 if (error != 0) { 3843 VOP_UNLOCK(vp, 0); 3844 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3845 fdrop(fp, td); 3846 return (error); 3847 } 3848 #endif 3849 # if (BYTE_ORDER != LITTLE_ENDIAN) 3850 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3851 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3852 NULL, NULL); 3853 foffset = auio.uio_offset; 3854 } else 3855 # endif 3856 { 3857 kuio = auio; 3858 kuio.uio_iov = &kiov; 3859 kuio.uio_segflg = UIO_SYSSPACE; 3860 kiov.iov_len = uap->count; 3861 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3862 kiov.iov_base = dirbuf; 3863 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3864 NULL, NULL); 3865 foffset = kuio.uio_offset; 3866 if (error == 0) { 3867 readcnt = uap->count - kuio.uio_resid; 3868 edp = (struct dirent *)&dirbuf[readcnt]; 3869 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3870 # if (BYTE_ORDER == LITTLE_ENDIAN) 3871 /* 3872 * The expected low byte of 3873 * dp->d_namlen is our dp->d_type. 3874 * The high MBZ byte of dp->d_namlen 3875 * is our dp->d_namlen. 3876 */ 3877 dp->d_type = dp->d_namlen; 3878 dp->d_namlen = 0; 3879 # else 3880 /* 3881 * The dp->d_type is the high byte 3882 * of the expected dp->d_namlen, 3883 * so must be zero'ed. 3884 */ 3885 dp->d_type = 0; 3886 # endif 3887 if (dp->d_reclen > 0) { 3888 dp = (struct dirent *) 3889 ((char *)dp + dp->d_reclen); 3890 } else { 3891 error = EIO; 3892 break; 3893 } 3894 } 3895 if (dp >= edp) 3896 error = uiomove(dirbuf, readcnt, &auio); 3897 } 3898 free(dirbuf, M_TEMP); 3899 } 3900 if (error != 0) { 3901 VOP_UNLOCK(vp, 0); 3902 foffset_unlock(fp, foffset, 0); 3903 fdrop(fp, td); 3904 return (error); 3905 } 3906 if (uap->count == auio.uio_resid && 3907 (vp->v_vflag & VV_ROOT) && 3908 (vp->v_mount->mnt_flag & MNT_UNION)) { 3909 struct vnode *tvp = vp; 3910 vp = vp->v_mount->mnt_vnodecovered; 3911 VREF(vp); 3912 fp->f_vnode = vp; 3913 fp->f_data = vp; 3914 foffset = 0; 3915 vput(tvp); 3916 goto unionread; 3917 } 3918 VOP_UNLOCK(vp, 0); 3919 foffset_unlock(fp, foffset, 0); 3920 fdrop(fp, td); 3921 td->td_retval[0] = uap->count - auio.uio_resid; 3922 if (error == 0) 3923 *ploff = loff; 3924 return (error); 3925 } 3926 #endif /* COMPAT_43 */ 3927 3928 /* 3929 * Read a block of directory entries in a filesystem independent format. 3930 */ 3931 #ifndef _SYS_SYSPROTO_H_ 3932 struct getdirentries_args { 3933 int fd; 3934 char *buf; 3935 u_int count; 3936 long *basep; 3937 }; 3938 #endif 3939 int 3940 sys_getdirentries(td, uap) 3941 struct thread *td; 3942 register struct getdirentries_args /* { 3943 int fd; 3944 char *buf; 3945 u_int count; 3946 long *basep; 3947 } */ *uap; 3948 { 3949 long base; 3950 int error; 3951 3952 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3953 NULL, UIO_USERSPACE); 3954 if (error != 0) 3955 return (error); 3956 if (uap->basep != NULL) 3957 error = copyout(&base, uap->basep, sizeof(long)); 3958 return (error); 3959 } 3960 3961 int 3962 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3963 long *basep, ssize_t *residp, enum uio_seg bufseg) 3964 { 3965 struct vnode *vp; 3966 struct file *fp; 3967 struct uio auio; 3968 struct iovec aiov; 3969 cap_rights_t rights; 3970 long loff; 3971 int error, eofflag; 3972 off_t foffset; 3973 3974 AUDIT_ARG_FD(fd); 3975 if (count > IOSIZE_MAX) 3976 return (EINVAL); 3977 auio.uio_resid = count; 3978 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3979 if (error != 0) 3980 return (error); 3981 if ((fp->f_flag & FREAD) == 0) { 3982 fdrop(fp, td); 3983 return (EBADF); 3984 } 3985 vp = fp->f_vnode; 3986 foffset = foffset_lock(fp, 0); 3987 unionread: 3988 if (vp->v_type != VDIR) { 3989 error = EINVAL; 3990 goto fail; 3991 } 3992 aiov.iov_base = buf; 3993 aiov.iov_len = count; 3994 auio.uio_iov = &aiov; 3995 auio.uio_iovcnt = 1; 3996 auio.uio_rw = UIO_READ; 3997 auio.uio_segflg = bufseg; 3998 auio.uio_td = td; 3999 vn_lock(vp, LK_SHARED | LK_RETRY); 4000 AUDIT_ARG_VNODE1(vp); 4001 loff = auio.uio_offset = foffset; 4002 #ifdef MAC 4003 error = mac_vnode_check_readdir(td->td_ucred, vp); 4004 if (error == 0) 4005 #endif 4006 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4007 NULL); 4008 foffset = auio.uio_offset; 4009 if (error != 0) { 4010 VOP_UNLOCK(vp, 0); 4011 goto fail; 4012 } 4013 if (count == auio.uio_resid && 4014 (vp->v_vflag & VV_ROOT) && 4015 (vp->v_mount->mnt_flag & MNT_UNION)) { 4016 struct vnode *tvp = vp; 4017 4018 vp = vp->v_mount->mnt_vnodecovered; 4019 VREF(vp); 4020 fp->f_vnode = vp; 4021 fp->f_data = vp; 4022 foffset = 0; 4023 vput(tvp); 4024 goto unionread; 4025 } 4026 VOP_UNLOCK(vp, 0); 4027 *basep = loff; 4028 if (residp != NULL) 4029 *residp = auio.uio_resid; 4030 td->td_retval[0] = count - auio.uio_resid; 4031 fail: 4032 foffset_unlock(fp, foffset, 0); 4033 fdrop(fp, td); 4034 return (error); 4035 } 4036 4037 #ifndef _SYS_SYSPROTO_H_ 4038 struct getdents_args { 4039 int fd; 4040 char *buf; 4041 size_t count; 4042 }; 4043 #endif 4044 int 4045 sys_getdents(td, uap) 4046 struct thread *td; 4047 register struct getdents_args /* { 4048 int fd; 4049 char *buf; 4050 u_int count; 4051 } */ *uap; 4052 { 4053 struct getdirentries_args ap; 4054 4055 ap.fd = uap->fd; 4056 ap.buf = uap->buf; 4057 ap.count = uap->count; 4058 ap.basep = NULL; 4059 return (sys_getdirentries(td, &ap)); 4060 } 4061 4062 /* 4063 * Set the mode mask for creation of filesystem nodes. 4064 */ 4065 #ifndef _SYS_SYSPROTO_H_ 4066 struct umask_args { 4067 int newmask; 4068 }; 4069 #endif 4070 int 4071 sys_umask(td, uap) 4072 struct thread *td; 4073 struct umask_args /* { 4074 int newmask; 4075 } */ *uap; 4076 { 4077 struct filedesc *fdp; 4078 4079 fdp = td->td_proc->p_fd; 4080 FILEDESC_XLOCK(fdp); 4081 td->td_retval[0] = fdp->fd_cmask; 4082 fdp->fd_cmask = uap->newmask & ALLPERMS; 4083 FILEDESC_XUNLOCK(fdp); 4084 return (0); 4085 } 4086 4087 /* 4088 * Void all references to file by ripping underlying filesystem away from 4089 * vnode. 4090 */ 4091 #ifndef _SYS_SYSPROTO_H_ 4092 struct revoke_args { 4093 char *path; 4094 }; 4095 #endif 4096 int 4097 sys_revoke(td, uap) 4098 struct thread *td; 4099 register struct revoke_args /* { 4100 char *path; 4101 } */ *uap; 4102 { 4103 struct vnode *vp; 4104 struct vattr vattr; 4105 struct nameidata nd; 4106 int error; 4107 4108 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4109 uap->path, td); 4110 if ((error = namei(&nd)) != 0) 4111 return (error); 4112 vp = nd.ni_vp; 4113 NDFREE(&nd, NDF_ONLY_PNBUF); 4114 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4115 error = EINVAL; 4116 goto out; 4117 } 4118 #ifdef MAC 4119 error = mac_vnode_check_revoke(td->td_ucred, vp); 4120 if (error != 0) 4121 goto out; 4122 #endif 4123 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4124 if (error != 0) 4125 goto out; 4126 if (td->td_ucred->cr_uid != vattr.va_uid) { 4127 error = priv_check(td, PRIV_VFS_ADMIN); 4128 if (error != 0) 4129 goto out; 4130 } 4131 if (vcount(vp) > 1) 4132 VOP_REVOKE(vp, REVOKEALL); 4133 out: 4134 vput(vp); 4135 return (error); 4136 } 4137 4138 /* 4139 * Convert a user file descriptor to a kernel file entry and check that, if it 4140 * is a capability, the correct rights are present. A reference on the file 4141 * entry is held upon returning. 4142 */ 4143 int 4144 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4145 { 4146 struct file *fp; 4147 int error; 4148 4149 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4150 if (error != 0) 4151 return (error); 4152 4153 /* 4154 * The file could be not of the vnode type, or it may be not 4155 * yet fully initialized, in which case the f_vnode pointer 4156 * may be set, but f_ops is still badfileops. E.g., 4157 * devfs_open() transiently create such situation to 4158 * facilitate csw d_fdopen(). 4159 * 4160 * Dupfdopen() handling in kern_openat() installs the 4161 * half-baked file into the process descriptor table, allowing 4162 * other thread to dereference it. Guard against the race by 4163 * checking f_ops. 4164 */ 4165 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4166 fdrop(fp, td); 4167 return (EINVAL); 4168 } 4169 *fpp = fp; 4170 return (0); 4171 } 4172 4173 4174 /* 4175 * Get an (NFS) file handle. 4176 */ 4177 #ifndef _SYS_SYSPROTO_H_ 4178 struct lgetfh_args { 4179 char *fname; 4180 fhandle_t *fhp; 4181 }; 4182 #endif 4183 int 4184 sys_lgetfh(td, uap) 4185 struct thread *td; 4186 register struct lgetfh_args *uap; 4187 { 4188 struct nameidata nd; 4189 fhandle_t fh; 4190 register struct vnode *vp; 4191 int error; 4192 4193 error = priv_check(td, PRIV_VFS_GETFH); 4194 if (error != 0) 4195 return (error); 4196 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4197 uap->fname, td); 4198 error = namei(&nd); 4199 if (error != 0) 4200 return (error); 4201 NDFREE(&nd, NDF_ONLY_PNBUF); 4202 vp = nd.ni_vp; 4203 bzero(&fh, sizeof(fh)); 4204 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4205 error = VOP_VPTOFH(vp, &fh.fh_fid); 4206 vput(vp); 4207 if (error == 0) 4208 error = copyout(&fh, uap->fhp, sizeof (fh)); 4209 return (error); 4210 } 4211 4212 #ifndef _SYS_SYSPROTO_H_ 4213 struct getfh_args { 4214 char *fname; 4215 fhandle_t *fhp; 4216 }; 4217 #endif 4218 int 4219 sys_getfh(td, uap) 4220 struct thread *td; 4221 register struct getfh_args *uap; 4222 { 4223 struct nameidata nd; 4224 fhandle_t fh; 4225 register struct vnode *vp; 4226 int error; 4227 4228 error = priv_check(td, PRIV_VFS_GETFH); 4229 if (error != 0) 4230 return (error); 4231 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4232 uap->fname, td); 4233 error = namei(&nd); 4234 if (error != 0) 4235 return (error); 4236 NDFREE(&nd, NDF_ONLY_PNBUF); 4237 vp = nd.ni_vp; 4238 bzero(&fh, sizeof(fh)); 4239 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4240 error = VOP_VPTOFH(vp, &fh.fh_fid); 4241 vput(vp); 4242 if (error == 0) 4243 error = copyout(&fh, uap->fhp, sizeof (fh)); 4244 return (error); 4245 } 4246 4247 /* 4248 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4249 * open descriptor. 4250 * 4251 * warning: do not remove the priv_check() call or this becomes one giant 4252 * security hole. 4253 */ 4254 #ifndef _SYS_SYSPROTO_H_ 4255 struct fhopen_args { 4256 const struct fhandle *u_fhp; 4257 int flags; 4258 }; 4259 #endif 4260 int 4261 sys_fhopen(td, uap) 4262 struct thread *td; 4263 struct fhopen_args /* { 4264 const struct fhandle *u_fhp; 4265 int flags; 4266 } */ *uap; 4267 { 4268 struct mount *mp; 4269 struct vnode *vp; 4270 struct fhandle fhp; 4271 struct file *fp; 4272 int fmode, error; 4273 int indx; 4274 4275 error = priv_check(td, PRIV_VFS_FHOPEN); 4276 if (error != 0) 4277 return (error); 4278 indx = -1; 4279 fmode = FFLAGS(uap->flags); 4280 /* why not allow a non-read/write open for our lockd? */ 4281 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4282 return (EINVAL); 4283 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4284 if (error != 0) 4285 return(error); 4286 /* find the mount point */ 4287 mp = vfs_busyfs(&fhp.fh_fsid); 4288 if (mp == NULL) 4289 return (ESTALE); 4290 /* now give me my vnode, it gets returned to me locked */ 4291 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4292 vfs_unbusy(mp); 4293 if (error != 0) 4294 return (error); 4295 4296 error = falloc_noinstall(td, &fp); 4297 if (error != 0) { 4298 vput(vp); 4299 return (error); 4300 } 4301 /* 4302 * An extra reference on `fp' has been held for us by 4303 * falloc_noinstall(). 4304 */ 4305 4306 #ifdef INVARIANTS 4307 td->td_dupfd = -1; 4308 #endif 4309 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4310 if (error != 0) { 4311 KASSERT(fp->f_ops == &badfileops, 4312 ("VOP_OPEN in fhopen() set f_ops")); 4313 KASSERT(td->td_dupfd < 0, 4314 ("fhopen() encountered fdopen()")); 4315 4316 vput(vp); 4317 goto bad; 4318 } 4319 #ifdef INVARIANTS 4320 td->td_dupfd = 0; 4321 #endif 4322 fp->f_vnode = vp; 4323 fp->f_seqcount = 1; 4324 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4325 &vnops); 4326 VOP_UNLOCK(vp, 0); 4327 if ((fmode & O_TRUNC) != 0) { 4328 error = fo_truncate(fp, 0, td->td_ucred, td); 4329 if (error != 0) 4330 goto bad; 4331 } 4332 4333 error = finstall(td, fp, &indx, fmode, NULL); 4334 bad: 4335 fdrop(fp, td); 4336 td->td_retval[0] = indx; 4337 return (error); 4338 } 4339 4340 /* 4341 * Stat an (NFS) file handle. 4342 */ 4343 #ifndef _SYS_SYSPROTO_H_ 4344 struct fhstat_args { 4345 struct fhandle *u_fhp; 4346 struct stat *sb; 4347 }; 4348 #endif 4349 int 4350 sys_fhstat(td, uap) 4351 struct thread *td; 4352 register struct fhstat_args /* { 4353 struct fhandle *u_fhp; 4354 struct stat *sb; 4355 } */ *uap; 4356 { 4357 struct stat sb; 4358 struct fhandle fh; 4359 int error; 4360 4361 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4362 if (error != 0) 4363 return (error); 4364 error = kern_fhstat(td, fh, &sb); 4365 if (error == 0) 4366 error = copyout(&sb, uap->sb, sizeof(sb)); 4367 return (error); 4368 } 4369 4370 int 4371 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4372 { 4373 struct mount *mp; 4374 struct vnode *vp; 4375 int error; 4376 4377 error = priv_check(td, PRIV_VFS_FHSTAT); 4378 if (error != 0) 4379 return (error); 4380 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4381 return (ESTALE); 4382 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4383 vfs_unbusy(mp); 4384 if (error != 0) 4385 return (error); 4386 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4387 vput(vp); 4388 return (error); 4389 } 4390 4391 /* 4392 * Implement fstatfs() for (NFS) file handles. 4393 */ 4394 #ifndef _SYS_SYSPROTO_H_ 4395 struct fhstatfs_args { 4396 struct fhandle *u_fhp; 4397 struct statfs *buf; 4398 }; 4399 #endif 4400 int 4401 sys_fhstatfs(td, uap) 4402 struct thread *td; 4403 struct fhstatfs_args /* { 4404 struct fhandle *u_fhp; 4405 struct statfs *buf; 4406 } */ *uap; 4407 { 4408 struct statfs sf; 4409 fhandle_t fh; 4410 int error; 4411 4412 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4413 if (error != 0) 4414 return (error); 4415 error = kern_fhstatfs(td, fh, &sf); 4416 if (error != 0) 4417 return (error); 4418 return (copyout(&sf, uap->buf, sizeof(sf))); 4419 } 4420 4421 int 4422 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4423 { 4424 struct statfs *sp; 4425 struct mount *mp; 4426 struct vnode *vp; 4427 int error; 4428 4429 error = priv_check(td, PRIV_VFS_FHSTATFS); 4430 if (error != 0) 4431 return (error); 4432 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4433 return (ESTALE); 4434 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4435 if (error != 0) { 4436 vfs_unbusy(mp); 4437 return (error); 4438 } 4439 vput(vp); 4440 error = prison_canseemount(td->td_ucred, mp); 4441 if (error != 0) 4442 goto out; 4443 #ifdef MAC 4444 error = mac_mount_check_stat(td->td_ucred, mp); 4445 if (error != 0) 4446 goto out; 4447 #endif 4448 /* 4449 * Set these in case the underlying filesystem fails to do so. 4450 */ 4451 sp = &mp->mnt_stat; 4452 sp->f_version = STATFS_VERSION; 4453 sp->f_namemax = NAME_MAX; 4454 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4455 error = VFS_STATFS(mp, sp); 4456 if (error == 0) 4457 *buf = *sp; 4458 out: 4459 vfs_unbusy(mp); 4460 return (error); 4461 } 4462 4463 int 4464 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4465 { 4466 struct file *fp; 4467 struct mount *mp; 4468 struct vnode *vp; 4469 cap_rights_t rights; 4470 off_t olen, ooffset; 4471 int error; 4472 4473 if (offset < 0 || len <= 0) 4474 return (EINVAL); 4475 /* Check for wrap. */ 4476 if (offset > OFF_MAX - len) 4477 return (EFBIG); 4478 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4479 if (error != 0) 4480 return (error); 4481 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4482 error = ESPIPE; 4483 goto out; 4484 } 4485 if ((fp->f_flag & FWRITE) == 0) { 4486 error = EBADF; 4487 goto out; 4488 } 4489 if (fp->f_type != DTYPE_VNODE) { 4490 error = ENODEV; 4491 goto out; 4492 } 4493 vp = fp->f_vnode; 4494 if (vp->v_type != VREG) { 4495 error = ENODEV; 4496 goto out; 4497 } 4498 4499 /* Allocating blocks may take a long time, so iterate. */ 4500 for (;;) { 4501 olen = len; 4502 ooffset = offset; 4503 4504 bwillwrite(); 4505 mp = NULL; 4506 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4507 if (error != 0) 4508 break; 4509 error = vn_lock(vp, LK_EXCLUSIVE); 4510 if (error != 0) { 4511 vn_finished_write(mp); 4512 break; 4513 } 4514 #ifdef MAC 4515 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4516 if (error == 0) 4517 #endif 4518 error = VOP_ALLOCATE(vp, &offset, &len); 4519 VOP_UNLOCK(vp, 0); 4520 vn_finished_write(mp); 4521 4522 if (olen + ooffset != offset + len) { 4523 panic("offset + len changed from %jx/%jx to %jx/%jx", 4524 ooffset, olen, offset, len); 4525 } 4526 if (error != 0 || len == 0) 4527 break; 4528 KASSERT(olen > len, ("Iteration did not make progress?")); 4529 maybe_yield(); 4530 } 4531 out: 4532 fdrop(fp, td); 4533 return (error); 4534 } 4535 4536 int 4537 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4538 { 4539 int error; 4540 4541 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4542 return (kern_posix_error(td, error)); 4543 } 4544 4545 /* 4546 * Unlike madvise(2), we do not make a best effort to remember every 4547 * possible caching hint. Instead, we remember the last setting with 4548 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4549 * region of any current setting. 4550 */ 4551 int 4552 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4553 int advice) 4554 { 4555 struct fadvise_info *fa, *new; 4556 struct file *fp; 4557 struct vnode *vp; 4558 cap_rights_t rights; 4559 off_t end; 4560 int error; 4561 4562 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4563 return (EINVAL); 4564 switch (advice) { 4565 case POSIX_FADV_SEQUENTIAL: 4566 case POSIX_FADV_RANDOM: 4567 case POSIX_FADV_NOREUSE: 4568 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4569 break; 4570 case POSIX_FADV_NORMAL: 4571 case POSIX_FADV_WILLNEED: 4572 case POSIX_FADV_DONTNEED: 4573 new = NULL; 4574 break; 4575 default: 4576 return (EINVAL); 4577 } 4578 /* XXX: CAP_POSIX_FADVISE? */ 4579 error = fget(td, fd, cap_rights_init(&rights), &fp); 4580 if (error != 0) 4581 goto out; 4582 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4583 error = ESPIPE; 4584 goto out; 4585 } 4586 if (fp->f_type != DTYPE_VNODE) { 4587 error = ENODEV; 4588 goto out; 4589 } 4590 vp = fp->f_vnode; 4591 if (vp->v_type != VREG) { 4592 error = ENODEV; 4593 goto out; 4594 } 4595 if (len == 0) 4596 end = OFF_MAX; 4597 else 4598 end = offset + len - 1; 4599 switch (advice) { 4600 case POSIX_FADV_SEQUENTIAL: 4601 case POSIX_FADV_RANDOM: 4602 case POSIX_FADV_NOREUSE: 4603 /* 4604 * Try to merge any existing non-standard region with 4605 * this new region if possible, otherwise create a new 4606 * non-standard region for this request. 4607 */ 4608 mtx_pool_lock(mtxpool_sleep, fp); 4609 fa = fp->f_advice; 4610 if (fa != NULL && fa->fa_advice == advice && 4611 ((fa->fa_start <= end && fa->fa_end >= offset) || 4612 (end != OFF_MAX && fa->fa_start == end + 1) || 4613 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4614 if (offset < fa->fa_start) 4615 fa->fa_start = offset; 4616 if (end > fa->fa_end) 4617 fa->fa_end = end; 4618 } else { 4619 new->fa_advice = advice; 4620 new->fa_start = offset; 4621 new->fa_end = end; 4622 fp->f_advice = new; 4623 new = fa; 4624 } 4625 mtx_pool_unlock(mtxpool_sleep, fp); 4626 break; 4627 case POSIX_FADV_NORMAL: 4628 /* 4629 * If a the "normal" region overlaps with an existing 4630 * non-standard region, trim or remove the 4631 * non-standard region. 4632 */ 4633 mtx_pool_lock(mtxpool_sleep, fp); 4634 fa = fp->f_advice; 4635 if (fa != NULL) { 4636 if (offset <= fa->fa_start && end >= fa->fa_end) { 4637 new = fa; 4638 fp->f_advice = NULL; 4639 } else if (offset <= fa->fa_start && 4640 end >= fa->fa_start) 4641 fa->fa_start = end + 1; 4642 else if (offset <= fa->fa_end && end >= fa->fa_end) 4643 fa->fa_end = offset - 1; 4644 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4645 /* 4646 * If the "normal" region is a middle 4647 * portion of the existing 4648 * non-standard region, just remove 4649 * the whole thing rather than picking 4650 * one side or the other to 4651 * preserve. 4652 */ 4653 new = fa; 4654 fp->f_advice = NULL; 4655 } 4656 } 4657 mtx_pool_unlock(mtxpool_sleep, fp); 4658 break; 4659 case POSIX_FADV_WILLNEED: 4660 case POSIX_FADV_DONTNEED: 4661 error = VOP_ADVISE(vp, offset, end, advice); 4662 break; 4663 } 4664 out: 4665 if (fp != NULL) 4666 fdrop(fp, td); 4667 free(new, M_FADVISE); 4668 return (error); 4669 } 4670 4671 int 4672 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4673 { 4674 int error; 4675 4676 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4677 uap->advice); 4678 return (kern_posix_error(td, error)); 4679 } 4680