1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * The module initialization routine for POSIX asynchronous I/O will 110 * set this to the version of AIO that it implements. (Zero means 111 * that it is not implemented.) This value is used here by pathconf() 112 * and in kern_descrip.c by fpathconf(). 113 */ 114 int async_io_version; 115 116 /* 117 * Sync each mounted filesystem. 118 */ 119 #ifndef _SYS_SYSPROTO_H_ 120 struct sync_args { 121 int dummy; 122 }; 123 #endif 124 /* ARGSUSED */ 125 int 126 sys_sync(td, uap) 127 struct thread *td; 128 struct sync_args *uap; 129 { 130 struct mount *mp, *nmp; 131 int save; 132 133 mtx_lock(&mountlist_mtx); 134 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 135 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 136 nmp = TAILQ_NEXT(mp, mnt_list); 137 continue; 138 } 139 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 140 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 141 save = curthread_pflags_set(TDP_SYNCIO); 142 vfs_msync(mp, MNT_NOWAIT); 143 VFS_SYNC(mp, MNT_NOWAIT); 144 curthread_pflags_restore(save); 145 vn_finished_write(mp); 146 } 147 mtx_lock(&mountlist_mtx); 148 nmp = TAILQ_NEXT(mp, mnt_list); 149 vfs_unbusy(mp); 150 } 151 mtx_unlock(&mountlist_mtx); 152 return (0); 153 } 154 155 /* 156 * Change filesystem quotas. 157 */ 158 #ifndef _SYS_SYSPROTO_H_ 159 struct quotactl_args { 160 char *path; 161 int cmd; 162 int uid; 163 caddr_t arg; 164 }; 165 #endif 166 int 167 sys_quotactl(td, uap) 168 struct thread *td; 169 register struct quotactl_args /* { 170 char *path; 171 int cmd; 172 int uid; 173 caddr_t arg; 174 } */ *uap; 175 { 176 struct mount *mp; 177 struct nameidata nd; 178 int error; 179 180 AUDIT_ARG_CMD(uap->cmd); 181 AUDIT_ARG_UID(uap->uid); 182 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 183 return (EPERM); 184 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 185 uap->path, td); 186 if ((error = namei(&nd)) != 0) 187 return (error); 188 NDFREE(&nd, NDF_ONLY_PNBUF); 189 mp = nd.ni_vp->v_mount; 190 vfs_ref(mp); 191 vput(nd.ni_vp); 192 error = vfs_busy(mp, 0); 193 vfs_rel(mp); 194 if (error != 0) 195 return (error); 196 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 197 198 /* 199 * Since quota on operation typically needs to open quota 200 * file, the Q_QUOTAON handler needs to unbusy the mount point 201 * before calling into namei. Otherwise, unmount might be 202 * started between two vfs_busy() invocations (first is our, 203 * second is from mount point cross-walk code in lookup()), 204 * causing deadlock. 205 * 206 * Require that Q_QUOTAON handles the vfs_busy() reference on 207 * its own, always returning with ubusied mount point. 208 */ 209 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 210 vfs_unbusy(mp); 211 return (error); 212 } 213 214 /* 215 * Used by statfs conversion routines to scale the block size up if 216 * necessary so that all of the block counts are <= 'max_size'. Note 217 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 218 * value of 'n'. 219 */ 220 void 221 statfs_scale_blocks(struct statfs *sf, long max_size) 222 { 223 uint64_t count; 224 int shift; 225 226 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 227 228 /* 229 * Attempt to scale the block counts to give a more accurate 230 * overview to userland of the ratio of free space to used 231 * space. To do this, find the largest block count and compute 232 * a divisor that lets it fit into a signed integer <= max_size. 233 */ 234 if (sf->f_bavail < 0) 235 count = -sf->f_bavail; 236 else 237 count = sf->f_bavail; 238 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 239 if (count <= max_size) 240 return; 241 242 count >>= flsl(max_size); 243 shift = 0; 244 while (count > 0) { 245 shift++; 246 count >>=1; 247 } 248 249 sf->f_bsize <<= shift; 250 sf->f_blocks >>= shift; 251 sf->f_bfree >>= shift; 252 sf->f_bavail >>= shift; 253 } 254 255 /* 256 * Get filesystem statistics. 257 */ 258 #ifndef _SYS_SYSPROTO_H_ 259 struct statfs_args { 260 char *path; 261 struct statfs *buf; 262 }; 263 #endif 264 int 265 sys_statfs(td, uap) 266 struct thread *td; 267 register struct statfs_args /* { 268 char *path; 269 struct statfs *buf; 270 } */ *uap; 271 { 272 struct statfs sf; 273 int error; 274 275 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 276 if (error == 0) 277 error = copyout(&sf, uap->buf, sizeof(sf)); 278 return (error); 279 } 280 281 int 282 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 283 struct statfs *buf) 284 { 285 struct mount *mp; 286 struct statfs *sp, sb; 287 struct nameidata nd; 288 int error; 289 290 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 291 pathseg, path, td); 292 error = namei(&nd); 293 if (error != 0) 294 return (error); 295 mp = nd.ni_vp->v_mount; 296 vfs_ref(mp); 297 NDFREE(&nd, NDF_ONLY_PNBUF); 298 vput(nd.ni_vp); 299 error = vfs_busy(mp, 0); 300 vfs_rel(mp); 301 if (error != 0) 302 return (error); 303 #ifdef MAC 304 error = mac_mount_check_stat(td->td_ucred, mp); 305 if (error != 0) 306 goto out; 307 #endif 308 /* 309 * Set these in case the underlying filesystem fails to do so. 310 */ 311 sp = &mp->mnt_stat; 312 sp->f_version = STATFS_VERSION; 313 sp->f_namemax = NAME_MAX; 314 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 315 error = VFS_STATFS(mp, sp); 316 if (error != 0) 317 goto out; 318 if (priv_check(td, PRIV_VFS_GENERATION)) { 319 bcopy(sp, &sb, sizeof(sb)); 320 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 321 prison_enforce_statfs(td->td_ucred, mp, &sb); 322 sp = &sb; 323 } 324 *buf = *sp; 325 out: 326 vfs_unbusy(mp); 327 return (error); 328 } 329 330 /* 331 * Get filesystem statistics. 332 */ 333 #ifndef _SYS_SYSPROTO_H_ 334 struct fstatfs_args { 335 int fd; 336 struct statfs *buf; 337 }; 338 #endif 339 int 340 sys_fstatfs(td, uap) 341 struct thread *td; 342 register struct fstatfs_args /* { 343 int fd; 344 struct statfs *buf; 345 } */ *uap; 346 { 347 struct statfs sf; 348 int error; 349 350 error = kern_fstatfs(td, uap->fd, &sf); 351 if (error == 0) 352 error = copyout(&sf, uap->buf, sizeof(sf)); 353 return (error); 354 } 355 356 int 357 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 358 { 359 struct file *fp; 360 struct mount *mp; 361 struct statfs *sp, sb; 362 struct vnode *vp; 363 cap_rights_t rights; 364 int error; 365 366 AUDIT_ARG_FD(fd); 367 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 368 if (error != 0) 369 return (error); 370 vp = fp->f_vnode; 371 vn_lock(vp, LK_SHARED | LK_RETRY); 372 #ifdef AUDIT 373 AUDIT_ARG_VNODE1(vp); 374 #endif 375 mp = vp->v_mount; 376 if (mp) 377 vfs_ref(mp); 378 VOP_UNLOCK(vp, 0); 379 fdrop(fp, td); 380 if (mp == NULL) { 381 error = EBADF; 382 goto out; 383 } 384 error = vfs_busy(mp, 0); 385 vfs_rel(mp); 386 if (error != 0) 387 return (error); 388 #ifdef MAC 389 error = mac_mount_check_stat(td->td_ucred, mp); 390 if (error != 0) 391 goto out; 392 #endif 393 /* 394 * Set these in case the underlying filesystem fails to do so. 395 */ 396 sp = &mp->mnt_stat; 397 sp->f_version = STATFS_VERSION; 398 sp->f_namemax = NAME_MAX; 399 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 400 error = VFS_STATFS(mp, sp); 401 if (error != 0) 402 goto out; 403 if (priv_check(td, PRIV_VFS_GENERATION)) { 404 bcopy(sp, &sb, sizeof(sb)); 405 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 406 prison_enforce_statfs(td->td_ucred, mp, &sb); 407 sp = &sb; 408 } 409 *buf = *sp; 410 out: 411 if (mp) 412 vfs_unbusy(mp); 413 return (error); 414 } 415 416 /* 417 * Get statistics on all filesystems. 418 */ 419 #ifndef _SYS_SYSPROTO_H_ 420 struct getfsstat_args { 421 struct statfs *buf; 422 long bufsize; 423 int flags; 424 }; 425 #endif 426 int 427 sys_getfsstat(td, uap) 428 struct thread *td; 429 register struct getfsstat_args /* { 430 struct statfs *buf; 431 long bufsize; 432 int flags; 433 } */ *uap; 434 { 435 size_t count; 436 int error; 437 438 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 439 UIO_USERSPACE, uap->flags); 440 if (error == 0) 441 td->td_retval[0] = count; 442 return (error); 443 } 444 445 /* 446 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 447 * The caller is responsible for freeing memory which will be allocated 448 * in '*buf'. 449 */ 450 int 451 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 452 size_t *countp, enum uio_seg bufseg, int flags) 453 { 454 struct mount *mp, *nmp; 455 struct statfs *sfsp, *sp, sb; 456 size_t count, maxcount; 457 int error; 458 459 maxcount = bufsize / sizeof(struct statfs); 460 if (bufsize == 0) 461 sfsp = NULL; 462 else if (bufseg == UIO_USERSPACE) 463 sfsp = *buf; 464 else /* if (bufseg == UIO_SYSSPACE) */ { 465 count = 0; 466 mtx_lock(&mountlist_mtx); 467 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 468 count++; 469 } 470 mtx_unlock(&mountlist_mtx); 471 if (maxcount > count) 472 maxcount = count; 473 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 474 M_WAITOK); 475 } 476 count = 0; 477 mtx_lock(&mountlist_mtx); 478 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 479 if (prison_canseemount(td->td_ucred, mp) != 0) { 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 continue; 482 } 483 #ifdef MAC 484 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 continue; 487 } 488 #endif 489 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 490 nmp = TAILQ_NEXT(mp, mnt_list); 491 continue; 492 } 493 if (sfsp && count < maxcount) { 494 sp = &mp->mnt_stat; 495 /* 496 * Set these in case the underlying filesystem 497 * fails to do so. 498 */ 499 sp->f_version = STATFS_VERSION; 500 sp->f_namemax = NAME_MAX; 501 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 502 /* 503 * If MNT_NOWAIT or MNT_LAZY is specified, do not 504 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 505 * overrides MNT_WAIT. 506 */ 507 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 508 (flags & MNT_WAIT)) && 509 (error = VFS_STATFS(mp, sp))) { 510 mtx_lock(&mountlist_mtx); 511 nmp = TAILQ_NEXT(mp, mnt_list); 512 vfs_unbusy(mp); 513 continue; 514 } 515 if (priv_check(td, PRIV_VFS_GENERATION)) { 516 bcopy(sp, &sb, sizeof(sb)); 517 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 518 prison_enforce_statfs(td->td_ucred, mp, &sb); 519 sp = &sb; 520 } 521 if (bufseg == UIO_SYSSPACE) 522 bcopy(sp, sfsp, sizeof(*sp)); 523 else /* if (bufseg == UIO_USERSPACE) */ { 524 error = copyout(sp, sfsp, sizeof(*sp)); 525 if (error != 0) { 526 vfs_unbusy(mp); 527 return (error); 528 } 529 } 530 sfsp++; 531 } 532 count++; 533 mtx_lock(&mountlist_mtx); 534 nmp = TAILQ_NEXT(mp, mnt_list); 535 vfs_unbusy(mp); 536 } 537 mtx_unlock(&mountlist_mtx); 538 if (sfsp && count > maxcount) 539 *countp = maxcount; 540 else 541 *countp = count; 542 return (0); 543 } 544 545 #ifdef COMPAT_FREEBSD4 546 /* 547 * Get old format filesystem statistics. 548 */ 549 static void cvtstatfs(struct statfs *, struct ostatfs *); 550 551 #ifndef _SYS_SYSPROTO_H_ 552 struct freebsd4_statfs_args { 553 char *path; 554 struct ostatfs *buf; 555 }; 556 #endif 557 int 558 freebsd4_statfs(td, uap) 559 struct thread *td; 560 struct freebsd4_statfs_args /* { 561 char *path; 562 struct ostatfs *buf; 563 } */ *uap; 564 { 565 struct ostatfs osb; 566 struct statfs sf; 567 int error; 568 569 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 570 if (error != 0) 571 return (error); 572 cvtstatfs(&sf, &osb); 573 return (copyout(&osb, uap->buf, sizeof(osb))); 574 } 575 576 /* 577 * Get filesystem statistics. 578 */ 579 #ifndef _SYS_SYSPROTO_H_ 580 struct freebsd4_fstatfs_args { 581 int fd; 582 struct ostatfs *buf; 583 }; 584 #endif 585 int 586 freebsd4_fstatfs(td, uap) 587 struct thread *td; 588 struct freebsd4_fstatfs_args /* { 589 int fd; 590 struct ostatfs *buf; 591 } */ *uap; 592 { 593 struct ostatfs osb; 594 struct statfs sf; 595 int error; 596 597 error = kern_fstatfs(td, uap->fd, &sf); 598 if (error != 0) 599 return (error); 600 cvtstatfs(&sf, &osb); 601 return (copyout(&osb, uap->buf, sizeof(osb))); 602 } 603 604 /* 605 * Get statistics on all filesystems. 606 */ 607 #ifndef _SYS_SYSPROTO_H_ 608 struct freebsd4_getfsstat_args { 609 struct ostatfs *buf; 610 long bufsize; 611 int flags; 612 }; 613 #endif 614 int 615 freebsd4_getfsstat(td, uap) 616 struct thread *td; 617 register struct freebsd4_getfsstat_args /* { 618 struct ostatfs *buf; 619 long bufsize; 620 int flags; 621 } */ *uap; 622 { 623 struct statfs *buf, *sp; 624 struct ostatfs osb; 625 size_t count, size; 626 int error; 627 628 count = uap->bufsize / sizeof(struct ostatfs); 629 size = count * sizeof(struct statfs); 630 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 631 uap->flags); 632 if (size > 0) { 633 sp = buf; 634 while (count > 0 && error == 0) { 635 cvtstatfs(sp, &osb); 636 error = copyout(&osb, uap->buf, sizeof(osb)); 637 sp++; 638 uap->buf++; 639 count--; 640 } 641 free(buf, M_TEMP); 642 } 643 if (error == 0) 644 td->td_retval[0] = count; 645 return (error); 646 } 647 648 /* 649 * Implement fstatfs() for (NFS) file handles. 650 */ 651 #ifndef _SYS_SYSPROTO_H_ 652 struct freebsd4_fhstatfs_args { 653 struct fhandle *u_fhp; 654 struct ostatfs *buf; 655 }; 656 #endif 657 int 658 freebsd4_fhstatfs(td, uap) 659 struct thread *td; 660 struct freebsd4_fhstatfs_args /* { 661 struct fhandle *u_fhp; 662 struct ostatfs *buf; 663 } */ *uap; 664 { 665 struct ostatfs osb; 666 struct statfs sf; 667 fhandle_t fh; 668 int error; 669 670 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 671 if (error != 0) 672 return (error); 673 error = kern_fhstatfs(td, fh, &sf); 674 if (error != 0) 675 return (error); 676 cvtstatfs(&sf, &osb); 677 return (copyout(&osb, uap->buf, sizeof(osb))); 678 } 679 680 /* 681 * Convert a new format statfs structure to an old format statfs structure. 682 */ 683 static void 684 cvtstatfs(nsp, osp) 685 struct statfs *nsp; 686 struct ostatfs *osp; 687 { 688 689 statfs_scale_blocks(nsp, LONG_MAX); 690 bzero(osp, sizeof(*osp)); 691 osp->f_bsize = nsp->f_bsize; 692 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 693 osp->f_blocks = nsp->f_blocks; 694 osp->f_bfree = nsp->f_bfree; 695 osp->f_bavail = nsp->f_bavail; 696 osp->f_files = MIN(nsp->f_files, LONG_MAX); 697 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 698 osp->f_owner = nsp->f_owner; 699 osp->f_type = nsp->f_type; 700 osp->f_flags = nsp->f_flags; 701 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 702 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 703 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 704 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 705 strlcpy(osp->f_fstypename, nsp->f_fstypename, 706 MIN(MFSNAMELEN, OMFSNAMELEN)); 707 strlcpy(osp->f_mntonname, nsp->f_mntonname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 710 MIN(MNAMELEN, OMNAMELEN)); 711 osp->f_fsid = nsp->f_fsid; 712 } 713 #endif /* COMPAT_FREEBSD4 */ 714 715 /* 716 * Change current working directory to a given file descriptor. 717 */ 718 #ifndef _SYS_SYSPROTO_H_ 719 struct fchdir_args { 720 int fd; 721 }; 722 #endif 723 int 724 sys_fchdir(td, uap) 725 struct thread *td; 726 struct fchdir_args /* { 727 int fd; 728 } */ *uap; 729 { 730 struct vnode *vp, *tdp; 731 struct mount *mp; 732 struct file *fp; 733 cap_rights_t rights; 734 int error; 735 736 AUDIT_ARG_FD(uap->fd); 737 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 738 &fp); 739 if (error != 0) 740 return (error); 741 vp = fp->f_vnode; 742 VREF(vp); 743 fdrop(fp, td); 744 vn_lock(vp, LK_SHARED | LK_RETRY); 745 AUDIT_ARG_VNODE1(vp); 746 error = change_dir(vp, td); 747 while (!error && (mp = vp->v_mountedhere) != NULL) { 748 if (vfs_busy(mp, 0)) 749 continue; 750 error = VFS_ROOT(mp, LK_SHARED, &tdp); 751 vfs_unbusy(mp); 752 if (error != 0) 753 break; 754 vput(vp); 755 vp = tdp; 756 } 757 if (error != 0) { 758 vput(vp); 759 return (error); 760 } 761 VOP_UNLOCK(vp, 0); 762 pwd_chdir(td, vp); 763 return (0); 764 } 765 766 /* 767 * Change current working directory (``.''). 768 */ 769 #ifndef _SYS_SYSPROTO_H_ 770 struct chdir_args { 771 char *path; 772 }; 773 #endif 774 int 775 sys_chdir(td, uap) 776 struct thread *td; 777 struct chdir_args /* { 778 char *path; 779 } */ *uap; 780 { 781 782 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 783 } 784 785 int 786 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 787 { 788 struct nameidata nd; 789 int error; 790 791 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 792 pathseg, path, td); 793 if ((error = namei(&nd)) != 0) 794 return (error); 795 if ((error = change_dir(nd.ni_vp, td)) != 0) { 796 vput(nd.ni_vp); 797 NDFREE(&nd, NDF_ONLY_PNBUF); 798 return (error); 799 } 800 VOP_UNLOCK(nd.ni_vp, 0); 801 NDFREE(&nd, NDF_ONLY_PNBUF); 802 pwd_chdir(td, nd.ni_vp); 803 return (0); 804 } 805 806 /* 807 * Change notion of root (``/'') directory. 808 */ 809 #ifndef _SYS_SYSPROTO_H_ 810 struct chroot_args { 811 char *path; 812 }; 813 #endif 814 int 815 sys_chroot(td, uap) 816 struct thread *td; 817 struct chroot_args /* { 818 char *path; 819 } */ *uap; 820 { 821 struct nameidata nd; 822 int error; 823 824 error = priv_check(td, PRIV_VFS_CHROOT); 825 if (error != 0) 826 return (error); 827 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 828 UIO_USERSPACE, uap->path, td); 829 error = namei(&nd); 830 if (error != 0) 831 goto error; 832 error = change_dir(nd.ni_vp, td); 833 if (error != 0) 834 goto e_vunlock; 835 #ifdef MAC 836 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 837 if (error != 0) 838 goto e_vunlock; 839 #endif 840 VOP_UNLOCK(nd.ni_vp, 0); 841 error = pwd_chroot(td, nd.ni_vp); 842 vrele(nd.ni_vp); 843 NDFREE(&nd, NDF_ONLY_PNBUF); 844 return (error); 845 e_vunlock: 846 vput(nd.ni_vp); 847 error: 848 NDFREE(&nd, NDF_ONLY_PNBUF); 849 return (error); 850 } 851 852 /* 853 * Common routine for chroot and chdir. Callers must provide a locked vnode 854 * instance. 855 */ 856 int 857 change_dir(vp, td) 858 struct vnode *vp; 859 struct thread *td; 860 { 861 #ifdef MAC 862 int error; 863 #endif 864 865 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 866 if (vp->v_type != VDIR) 867 return (ENOTDIR); 868 #ifdef MAC 869 error = mac_vnode_check_chdir(td->td_ucred, vp); 870 if (error != 0) 871 return (error); 872 #endif 873 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 874 } 875 876 static __inline void 877 flags_to_rights(int flags, cap_rights_t *rightsp) 878 { 879 880 if (flags & O_EXEC) { 881 cap_rights_set(rightsp, CAP_FEXECVE); 882 } else { 883 switch ((flags & O_ACCMODE)) { 884 case O_RDONLY: 885 cap_rights_set(rightsp, CAP_READ); 886 break; 887 case O_RDWR: 888 cap_rights_set(rightsp, CAP_READ); 889 /* FALLTHROUGH */ 890 case O_WRONLY: 891 cap_rights_set(rightsp, CAP_WRITE); 892 if (!(flags & (O_APPEND | O_TRUNC))) 893 cap_rights_set(rightsp, CAP_SEEK); 894 break; 895 } 896 } 897 898 if (flags & O_CREAT) 899 cap_rights_set(rightsp, CAP_CREATE); 900 901 if (flags & O_TRUNC) 902 cap_rights_set(rightsp, CAP_FTRUNCATE); 903 904 if (flags & (O_SYNC | O_FSYNC)) 905 cap_rights_set(rightsp, CAP_FSYNC); 906 907 if (flags & (O_EXLOCK | O_SHLOCK)) 908 cap_rights_set(rightsp, CAP_FLOCK); 909 } 910 911 /* 912 * Check permissions, allocate an open file structure, and call the device 913 * open routine if any. 914 */ 915 #ifndef _SYS_SYSPROTO_H_ 916 struct open_args { 917 char *path; 918 int flags; 919 int mode; 920 }; 921 #endif 922 int 923 sys_open(td, uap) 924 struct thread *td; 925 register struct open_args /* { 926 char *path; 927 int flags; 928 int mode; 929 } */ *uap; 930 { 931 932 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 933 uap->flags, uap->mode)); 934 } 935 936 #ifndef _SYS_SYSPROTO_H_ 937 struct openat_args { 938 int fd; 939 char *path; 940 int flag; 941 int mode; 942 }; 943 #endif 944 int 945 sys_openat(struct thread *td, struct openat_args *uap) 946 { 947 948 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 949 uap->mode)); 950 } 951 952 int 953 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 954 int flags, int mode) 955 { 956 struct proc *p = td->td_proc; 957 struct filedesc *fdp = p->p_fd; 958 struct file *fp; 959 struct vnode *vp; 960 struct nameidata nd; 961 cap_rights_t rights; 962 int cmode, error, indx; 963 964 indx = -1; 965 966 AUDIT_ARG_FFLAGS(flags); 967 AUDIT_ARG_MODE(mode); 968 /* XXX: audit dirfd */ 969 cap_rights_init(&rights, CAP_LOOKUP); 970 flags_to_rights(flags, &rights); 971 /* 972 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 973 * may be specified. 974 */ 975 if (flags & O_EXEC) { 976 if (flags & O_ACCMODE) 977 return (EINVAL); 978 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 979 return (EINVAL); 980 } else { 981 flags = FFLAGS(flags); 982 } 983 984 /* 985 * Allocate the file descriptor, but don't install a descriptor yet. 986 */ 987 error = falloc_noinstall(td, &fp); 988 if (error != 0) 989 return (error); 990 /* 991 * An extra reference on `fp' has been held for us by 992 * falloc_noinstall(). 993 */ 994 /* Set the flags early so the finit in devfs can pick them up. */ 995 fp->f_flag = flags & FMASK; 996 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 997 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 998 &rights, td); 999 td->td_dupfd = -1; /* XXX check for fdopen */ 1000 error = vn_open(&nd, &flags, cmode, fp); 1001 if (error != 0) { 1002 /* 1003 * If the vn_open replaced the method vector, something 1004 * wonderous happened deep below and we just pass it up 1005 * pretending we know what we do. 1006 */ 1007 if (error == ENXIO && fp->f_ops != &badfileops) 1008 goto success; 1009 1010 /* 1011 * Handle special fdopen() case. bleh. 1012 * 1013 * Don't do this for relative (capability) lookups; we don't 1014 * understand exactly what would happen, and we don't think 1015 * that it ever should. 1016 */ 1017 if (nd.ni_strictrelative == 0 && 1018 (error == ENODEV || error == ENXIO) && 1019 td->td_dupfd >= 0) { 1020 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1021 &indx); 1022 if (error == 0) 1023 goto success; 1024 } 1025 1026 goto bad; 1027 } 1028 td->td_dupfd = 0; 1029 NDFREE(&nd, NDF_ONLY_PNBUF); 1030 vp = nd.ni_vp; 1031 1032 /* 1033 * Store the vnode, for any f_type. Typically, the vnode use 1034 * count is decremented by direct call to vn_closefile() for 1035 * files that switched type in the cdevsw fdopen() method. 1036 */ 1037 fp->f_vnode = vp; 1038 /* 1039 * If the file wasn't claimed by devfs bind it to the normal 1040 * vnode operations here. 1041 */ 1042 if (fp->f_ops == &badfileops) { 1043 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1044 fp->f_seqcount = 1; 1045 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1046 DTYPE_VNODE, vp, &vnops); 1047 } 1048 1049 VOP_UNLOCK(vp, 0); 1050 if (flags & O_TRUNC) { 1051 error = fo_truncate(fp, 0, td->td_ucred, td); 1052 if (error != 0) 1053 goto bad; 1054 } 1055 success: 1056 /* 1057 * If we haven't already installed the FD (for dupfdopen), do so now. 1058 */ 1059 if (indx == -1) { 1060 struct filecaps *fcaps; 1061 1062 #ifdef CAPABILITIES 1063 if (nd.ni_strictrelative == 1) 1064 fcaps = &nd.ni_filecaps; 1065 else 1066 #endif 1067 fcaps = NULL; 1068 error = finstall(td, fp, &indx, flags, fcaps); 1069 /* On success finstall() consumes fcaps. */ 1070 if (error != 0) { 1071 filecaps_free(&nd.ni_filecaps); 1072 goto bad; 1073 } 1074 } else { 1075 filecaps_free(&nd.ni_filecaps); 1076 } 1077 1078 /* 1079 * Release our private reference, leaving the one associated with 1080 * the descriptor table intact. 1081 */ 1082 fdrop(fp, td); 1083 td->td_retval[0] = indx; 1084 return (0); 1085 bad: 1086 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1087 fdrop(fp, td); 1088 return (error); 1089 } 1090 1091 #ifdef COMPAT_43 1092 /* 1093 * Create a file. 1094 */ 1095 #ifndef _SYS_SYSPROTO_H_ 1096 struct ocreat_args { 1097 char *path; 1098 int mode; 1099 }; 1100 #endif 1101 int 1102 ocreat(td, uap) 1103 struct thread *td; 1104 register struct ocreat_args /* { 1105 char *path; 1106 int mode; 1107 } */ *uap; 1108 { 1109 1110 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1111 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1112 } 1113 #endif /* COMPAT_43 */ 1114 1115 /* 1116 * Create a special file. 1117 */ 1118 #ifndef _SYS_SYSPROTO_H_ 1119 struct mknod_args { 1120 char *path; 1121 int mode; 1122 int dev; 1123 }; 1124 #endif 1125 int 1126 sys_mknod(td, uap) 1127 struct thread *td; 1128 register struct mknod_args /* { 1129 char *path; 1130 int mode; 1131 int dev; 1132 } */ *uap; 1133 { 1134 1135 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1136 uap->mode, uap->dev)); 1137 } 1138 1139 #ifndef _SYS_SYSPROTO_H_ 1140 struct mknodat_args { 1141 int fd; 1142 char *path; 1143 mode_t mode; 1144 dev_t dev; 1145 }; 1146 #endif 1147 int 1148 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1149 { 1150 1151 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1152 uap->dev)); 1153 } 1154 1155 int 1156 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1157 int mode, int dev) 1158 { 1159 struct vnode *vp; 1160 struct mount *mp; 1161 struct vattr vattr; 1162 struct nameidata nd; 1163 cap_rights_t rights; 1164 int error, whiteout = 0; 1165 1166 AUDIT_ARG_MODE(mode); 1167 AUDIT_ARG_DEV(dev); 1168 switch (mode & S_IFMT) { 1169 case S_IFCHR: 1170 case S_IFBLK: 1171 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1172 break; 1173 case S_IFMT: 1174 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1175 break; 1176 case S_IFWHT: 1177 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1178 break; 1179 case S_IFIFO: 1180 if (dev == 0) 1181 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1182 /* FALLTHROUGH */ 1183 default: 1184 error = EINVAL; 1185 break; 1186 } 1187 if (error != 0) 1188 return (error); 1189 restart: 1190 bwillwrite(); 1191 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1192 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1193 td); 1194 if ((error = namei(&nd)) != 0) 1195 return (error); 1196 vp = nd.ni_vp; 1197 if (vp != NULL) { 1198 NDFREE(&nd, NDF_ONLY_PNBUF); 1199 if (vp == nd.ni_dvp) 1200 vrele(nd.ni_dvp); 1201 else 1202 vput(nd.ni_dvp); 1203 vrele(vp); 1204 return (EEXIST); 1205 } else { 1206 VATTR_NULL(&vattr); 1207 vattr.va_mode = (mode & ALLPERMS) & 1208 ~td->td_proc->p_fd->fd_cmask; 1209 vattr.va_rdev = dev; 1210 whiteout = 0; 1211 1212 switch (mode & S_IFMT) { 1213 case S_IFMT: /* used by badsect to flag bad sectors */ 1214 vattr.va_type = VBAD; 1215 break; 1216 case S_IFCHR: 1217 vattr.va_type = VCHR; 1218 break; 1219 case S_IFBLK: 1220 vattr.va_type = VBLK; 1221 break; 1222 case S_IFWHT: 1223 whiteout = 1; 1224 break; 1225 default: 1226 panic("kern_mknod: invalid mode"); 1227 } 1228 } 1229 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1230 NDFREE(&nd, NDF_ONLY_PNBUF); 1231 vput(nd.ni_dvp); 1232 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1233 return (error); 1234 goto restart; 1235 } 1236 #ifdef MAC 1237 if (error == 0 && !whiteout) 1238 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1239 &nd.ni_cnd, &vattr); 1240 #endif 1241 if (error == 0) { 1242 if (whiteout) 1243 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1244 else { 1245 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1246 &nd.ni_cnd, &vattr); 1247 if (error == 0) 1248 vput(nd.ni_vp); 1249 } 1250 } 1251 NDFREE(&nd, NDF_ONLY_PNBUF); 1252 vput(nd.ni_dvp); 1253 vn_finished_write(mp); 1254 return (error); 1255 } 1256 1257 /* 1258 * Create a named pipe. 1259 */ 1260 #ifndef _SYS_SYSPROTO_H_ 1261 struct mkfifo_args { 1262 char *path; 1263 int mode; 1264 }; 1265 #endif 1266 int 1267 sys_mkfifo(td, uap) 1268 struct thread *td; 1269 register struct mkfifo_args /* { 1270 char *path; 1271 int mode; 1272 } */ *uap; 1273 { 1274 1275 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1276 uap->mode)); 1277 } 1278 1279 #ifndef _SYS_SYSPROTO_H_ 1280 struct mkfifoat_args { 1281 int fd; 1282 char *path; 1283 mode_t mode; 1284 }; 1285 #endif 1286 int 1287 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1288 { 1289 1290 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1291 uap->mode)); 1292 } 1293 1294 int 1295 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1296 int mode) 1297 { 1298 struct mount *mp; 1299 struct vattr vattr; 1300 struct nameidata nd; 1301 cap_rights_t rights; 1302 int error; 1303 1304 AUDIT_ARG_MODE(mode); 1305 restart: 1306 bwillwrite(); 1307 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1308 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1309 td); 1310 if ((error = namei(&nd)) != 0) 1311 return (error); 1312 if (nd.ni_vp != NULL) { 1313 NDFREE(&nd, NDF_ONLY_PNBUF); 1314 if (nd.ni_vp == nd.ni_dvp) 1315 vrele(nd.ni_dvp); 1316 else 1317 vput(nd.ni_dvp); 1318 vrele(nd.ni_vp); 1319 return (EEXIST); 1320 } 1321 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1322 NDFREE(&nd, NDF_ONLY_PNBUF); 1323 vput(nd.ni_dvp); 1324 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1325 return (error); 1326 goto restart; 1327 } 1328 VATTR_NULL(&vattr); 1329 vattr.va_type = VFIFO; 1330 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1331 #ifdef MAC 1332 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1333 &vattr); 1334 if (error != 0) 1335 goto out; 1336 #endif 1337 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1338 if (error == 0) 1339 vput(nd.ni_vp); 1340 #ifdef MAC 1341 out: 1342 #endif 1343 vput(nd.ni_dvp); 1344 vn_finished_write(mp); 1345 NDFREE(&nd, NDF_ONLY_PNBUF); 1346 return (error); 1347 } 1348 1349 /* 1350 * Make a hard file link. 1351 */ 1352 #ifndef _SYS_SYSPROTO_H_ 1353 struct link_args { 1354 char *path; 1355 char *link; 1356 }; 1357 #endif 1358 int 1359 sys_link(td, uap) 1360 struct thread *td; 1361 register struct link_args /* { 1362 char *path; 1363 char *link; 1364 } */ *uap; 1365 { 1366 1367 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1368 UIO_USERSPACE, FOLLOW)); 1369 } 1370 1371 #ifndef _SYS_SYSPROTO_H_ 1372 struct linkat_args { 1373 int fd1; 1374 char *path1; 1375 int fd2; 1376 char *path2; 1377 int flag; 1378 }; 1379 #endif 1380 int 1381 sys_linkat(struct thread *td, struct linkat_args *uap) 1382 { 1383 int flag; 1384 1385 flag = uap->flag; 1386 if (flag & ~AT_SYMLINK_FOLLOW) 1387 return (EINVAL); 1388 1389 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1390 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1391 } 1392 1393 int hardlink_check_uid = 0; 1394 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1395 &hardlink_check_uid, 0, 1396 "Unprivileged processes cannot create hard links to files owned by other " 1397 "users"); 1398 static int hardlink_check_gid = 0; 1399 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1400 &hardlink_check_gid, 0, 1401 "Unprivileged processes cannot create hard links to files owned by other " 1402 "groups"); 1403 1404 static int 1405 can_hardlink(struct vnode *vp, struct ucred *cred) 1406 { 1407 struct vattr va; 1408 int error; 1409 1410 if (!hardlink_check_uid && !hardlink_check_gid) 1411 return (0); 1412 1413 error = VOP_GETATTR(vp, &va, cred); 1414 if (error != 0) 1415 return (error); 1416 1417 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1418 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1419 if (error != 0) 1420 return (error); 1421 } 1422 1423 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1424 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1425 if (error != 0) 1426 return (error); 1427 } 1428 1429 return (0); 1430 } 1431 1432 int 1433 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1434 enum uio_seg segflg, int follow) 1435 { 1436 struct vnode *vp; 1437 struct mount *mp; 1438 struct nameidata nd; 1439 cap_rights_t rights; 1440 int error; 1441 1442 again: 1443 bwillwrite(); 1444 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1445 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1446 1447 if ((error = namei(&nd)) != 0) 1448 return (error); 1449 NDFREE(&nd, NDF_ONLY_PNBUF); 1450 vp = nd.ni_vp; 1451 if (vp->v_type == VDIR) { 1452 vrele(vp); 1453 return (EPERM); /* POSIX */ 1454 } 1455 NDINIT_ATRIGHTS(&nd, CREATE, 1456 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1457 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1458 if ((error = namei(&nd)) == 0) { 1459 if (nd.ni_vp != NULL) { 1460 NDFREE(&nd, NDF_ONLY_PNBUF); 1461 if (nd.ni_dvp == nd.ni_vp) 1462 vrele(nd.ni_dvp); 1463 else 1464 vput(nd.ni_dvp); 1465 vrele(nd.ni_vp); 1466 vrele(vp); 1467 return (EEXIST); 1468 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1469 /* 1470 * Cross-device link. No need to recheck 1471 * vp->v_type, since it cannot change, except 1472 * to VBAD. 1473 */ 1474 NDFREE(&nd, NDF_ONLY_PNBUF); 1475 vput(nd.ni_dvp); 1476 vrele(vp); 1477 return (EXDEV); 1478 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1479 error = can_hardlink(vp, td->td_ucred); 1480 #ifdef MAC 1481 if (error == 0) 1482 error = mac_vnode_check_link(td->td_ucred, 1483 nd.ni_dvp, vp, &nd.ni_cnd); 1484 #endif 1485 if (error != 0) { 1486 vput(vp); 1487 vput(nd.ni_dvp); 1488 NDFREE(&nd, NDF_ONLY_PNBUF); 1489 return (error); 1490 } 1491 error = vn_start_write(vp, &mp, V_NOWAIT); 1492 if (error != 0) { 1493 vput(vp); 1494 vput(nd.ni_dvp); 1495 NDFREE(&nd, NDF_ONLY_PNBUF); 1496 error = vn_start_write(NULL, &mp, 1497 V_XSLEEP | PCATCH); 1498 if (error != 0) 1499 return (error); 1500 goto again; 1501 } 1502 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1503 VOP_UNLOCK(vp, 0); 1504 vput(nd.ni_dvp); 1505 vn_finished_write(mp); 1506 NDFREE(&nd, NDF_ONLY_PNBUF); 1507 } else { 1508 vput(nd.ni_dvp); 1509 NDFREE(&nd, NDF_ONLY_PNBUF); 1510 vrele(vp); 1511 goto again; 1512 } 1513 } 1514 vrele(vp); 1515 return (error); 1516 } 1517 1518 /* 1519 * Make a symbolic link. 1520 */ 1521 #ifndef _SYS_SYSPROTO_H_ 1522 struct symlink_args { 1523 char *path; 1524 char *link; 1525 }; 1526 #endif 1527 int 1528 sys_symlink(td, uap) 1529 struct thread *td; 1530 register struct symlink_args /* { 1531 char *path; 1532 char *link; 1533 } */ *uap; 1534 { 1535 1536 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1537 UIO_USERSPACE)); 1538 } 1539 1540 #ifndef _SYS_SYSPROTO_H_ 1541 struct symlinkat_args { 1542 char *path; 1543 int fd; 1544 char *path2; 1545 }; 1546 #endif 1547 int 1548 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1549 { 1550 1551 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1552 UIO_USERSPACE)); 1553 } 1554 1555 int 1556 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1557 enum uio_seg segflg) 1558 { 1559 struct mount *mp; 1560 struct vattr vattr; 1561 char *syspath; 1562 struct nameidata nd; 1563 int error; 1564 cap_rights_t rights; 1565 1566 if (segflg == UIO_SYSSPACE) { 1567 syspath = path1; 1568 } else { 1569 syspath = uma_zalloc(namei_zone, M_WAITOK); 1570 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1571 goto out; 1572 } 1573 AUDIT_ARG_TEXT(syspath); 1574 restart: 1575 bwillwrite(); 1576 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1577 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1578 td); 1579 if ((error = namei(&nd)) != 0) 1580 goto out; 1581 if (nd.ni_vp) { 1582 NDFREE(&nd, NDF_ONLY_PNBUF); 1583 if (nd.ni_vp == nd.ni_dvp) 1584 vrele(nd.ni_dvp); 1585 else 1586 vput(nd.ni_dvp); 1587 vrele(nd.ni_vp); 1588 error = EEXIST; 1589 goto out; 1590 } 1591 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1592 NDFREE(&nd, NDF_ONLY_PNBUF); 1593 vput(nd.ni_dvp); 1594 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1595 goto out; 1596 goto restart; 1597 } 1598 VATTR_NULL(&vattr); 1599 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1600 #ifdef MAC 1601 vattr.va_type = VLNK; 1602 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1603 &vattr); 1604 if (error != 0) 1605 goto out2; 1606 #endif 1607 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1608 if (error == 0) 1609 vput(nd.ni_vp); 1610 #ifdef MAC 1611 out2: 1612 #endif 1613 NDFREE(&nd, NDF_ONLY_PNBUF); 1614 vput(nd.ni_dvp); 1615 vn_finished_write(mp); 1616 out: 1617 if (segflg != UIO_SYSSPACE) 1618 uma_zfree(namei_zone, syspath); 1619 return (error); 1620 } 1621 1622 /* 1623 * Delete a whiteout from the filesystem. 1624 */ 1625 int 1626 sys_undelete(td, uap) 1627 struct thread *td; 1628 register struct undelete_args /* { 1629 char *path; 1630 } */ *uap; 1631 { 1632 struct mount *mp; 1633 struct nameidata nd; 1634 int error; 1635 1636 restart: 1637 bwillwrite(); 1638 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1639 UIO_USERSPACE, uap->path, td); 1640 error = namei(&nd); 1641 if (error != 0) 1642 return (error); 1643 1644 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1645 NDFREE(&nd, NDF_ONLY_PNBUF); 1646 if (nd.ni_vp == nd.ni_dvp) 1647 vrele(nd.ni_dvp); 1648 else 1649 vput(nd.ni_dvp); 1650 if (nd.ni_vp) 1651 vrele(nd.ni_vp); 1652 return (EEXIST); 1653 } 1654 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1655 NDFREE(&nd, NDF_ONLY_PNBUF); 1656 vput(nd.ni_dvp); 1657 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1658 return (error); 1659 goto restart; 1660 } 1661 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1662 NDFREE(&nd, NDF_ONLY_PNBUF); 1663 vput(nd.ni_dvp); 1664 vn_finished_write(mp); 1665 return (error); 1666 } 1667 1668 /* 1669 * Delete a name from the filesystem. 1670 */ 1671 #ifndef _SYS_SYSPROTO_H_ 1672 struct unlink_args { 1673 char *path; 1674 }; 1675 #endif 1676 int 1677 sys_unlink(td, uap) 1678 struct thread *td; 1679 struct unlink_args /* { 1680 char *path; 1681 } */ *uap; 1682 { 1683 1684 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1685 } 1686 1687 #ifndef _SYS_SYSPROTO_H_ 1688 struct unlinkat_args { 1689 int fd; 1690 char *path; 1691 int flag; 1692 }; 1693 #endif 1694 int 1695 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1696 { 1697 int flag = uap->flag; 1698 int fd = uap->fd; 1699 char *path = uap->path; 1700 1701 if (flag & ~AT_REMOVEDIR) 1702 return (EINVAL); 1703 1704 if (flag & AT_REMOVEDIR) 1705 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1706 else 1707 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1708 } 1709 1710 int 1711 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1712 ino_t oldinum) 1713 { 1714 struct mount *mp; 1715 struct vnode *vp; 1716 struct nameidata nd; 1717 struct stat sb; 1718 cap_rights_t rights; 1719 int error; 1720 1721 restart: 1722 bwillwrite(); 1723 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1724 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1725 if ((error = namei(&nd)) != 0) 1726 return (error == EINVAL ? EPERM : error); 1727 vp = nd.ni_vp; 1728 if (vp->v_type == VDIR && oldinum == 0) { 1729 error = EPERM; /* POSIX */ 1730 } else if (oldinum != 0 && 1731 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1732 sb.st_ino != oldinum) { 1733 error = EIDRM; /* Identifier removed */ 1734 } else { 1735 /* 1736 * The root of a mounted filesystem cannot be deleted. 1737 * 1738 * XXX: can this only be a VDIR case? 1739 */ 1740 if (vp->v_vflag & VV_ROOT) 1741 error = EBUSY; 1742 } 1743 if (error == 0) { 1744 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1745 NDFREE(&nd, NDF_ONLY_PNBUF); 1746 vput(nd.ni_dvp); 1747 if (vp == nd.ni_dvp) 1748 vrele(vp); 1749 else 1750 vput(vp); 1751 if ((error = vn_start_write(NULL, &mp, 1752 V_XSLEEP | PCATCH)) != 0) 1753 return (error); 1754 goto restart; 1755 } 1756 #ifdef MAC 1757 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1758 &nd.ni_cnd); 1759 if (error != 0) 1760 goto out; 1761 #endif 1762 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1763 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1764 #ifdef MAC 1765 out: 1766 #endif 1767 vn_finished_write(mp); 1768 } 1769 NDFREE(&nd, NDF_ONLY_PNBUF); 1770 vput(nd.ni_dvp); 1771 if (vp == nd.ni_dvp) 1772 vrele(vp); 1773 else 1774 vput(vp); 1775 return (error); 1776 } 1777 1778 /* 1779 * Reposition read/write file offset. 1780 */ 1781 #ifndef _SYS_SYSPROTO_H_ 1782 struct lseek_args { 1783 int fd; 1784 int pad; 1785 off_t offset; 1786 int whence; 1787 }; 1788 #endif 1789 int 1790 sys_lseek(td, uap) 1791 struct thread *td; 1792 register struct lseek_args /* { 1793 int fd; 1794 int pad; 1795 off_t offset; 1796 int whence; 1797 } */ *uap; 1798 { 1799 struct file *fp; 1800 cap_rights_t rights; 1801 int error; 1802 1803 AUDIT_ARG_FD(uap->fd); 1804 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1805 if (error != 0) 1806 return (error); 1807 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1808 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1809 fdrop(fp, td); 1810 return (error); 1811 } 1812 1813 #if defined(COMPAT_43) 1814 /* 1815 * Reposition read/write file offset. 1816 */ 1817 #ifndef _SYS_SYSPROTO_H_ 1818 struct olseek_args { 1819 int fd; 1820 long offset; 1821 int whence; 1822 }; 1823 #endif 1824 int 1825 olseek(td, uap) 1826 struct thread *td; 1827 register struct olseek_args /* { 1828 int fd; 1829 long offset; 1830 int whence; 1831 } */ *uap; 1832 { 1833 struct lseek_args /* { 1834 int fd; 1835 int pad; 1836 off_t offset; 1837 int whence; 1838 } */ nuap; 1839 1840 nuap.fd = uap->fd; 1841 nuap.offset = uap->offset; 1842 nuap.whence = uap->whence; 1843 return (sys_lseek(td, &nuap)); 1844 } 1845 #endif /* COMPAT_43 */ 1846 1847 #if defined(COMPAT_FREEBSD6) 1848 /* Version with the 'pad' argument */ 1849 int 1850 freebsd6_lseek(td, uap) 1851 struct thread *td; 1852 register struct freebsd6_lseek_args *uap; 1853 { 1854 struct lseek_args ouap; 1855 1856 ouap.fd = uap->fd; 1857 ouap.offset = uap->offset; 1858 ouap.whence = uap->whence; 1859 return (sys_lseek(td, &ouap)); 1860 } 1861 #endif 1862 1863 /* 1864 * Check access permissions using passed credentials. 1865 */ 1866 static int 1867 vn_access(vp, user_flags, cred, td) 1868 struct vnode *vp; 1869 int user_flags; 1870 struct ucred *cred; 1871 struct thread *td; 1872 { 1873 accmode_t accmode; 1874 int error; 1875 1876 /* Flags == 0 means only check for existence. */ 1877 if (user_flags == 0) 1878 return (0); 1879 1880 accmode = 0; 1881 if (user_flags & R_OK) 1882 accmode |= VREAD; 1883 if (user_flags & W_OK) 1884 accmode |= VWRITE; 1885 if (user_flags & X_OK) 1886 accmode |= VEXEC; 1887 #ifdef MAC 1888 error = mac_vnode_check_access(cred, vp, accmode); 1889 if (error != 0) 1890 return (error); 1891 #endif 1892 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1893 error = VOP_ACCESS(vp, accmode, cred, td); 1894 return (error); 1895 } 1896 1897 /* 1898 * Check access permissions using "real" credentials. 1899 */ 1900 #ifndef _SYS_SYSPROTO_H_ 1901 struct access_args { 1902 char *path; 1903 int amode; 1904 }; 1905 #endif 1906 int 1907 sys_access(td, uap) 1908 struct thread *td; 1909 register struct access_args /* { 1910 char *path; 1911 int amode; 1912 } */ *uap; 1913 { 1914 1915 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1916 0, uap->amode)); 1917 } 1918 1919 #ifndef _SYS_SYSPROTO_H_ 1920 struct faccessat_args { 1921 int dirfd; 1922 char *path; 1923 int amode; 1924 int flag; 1925 } 1926 #endif 1927 int 1928 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1929 { 1930 1931 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1932 uap->amode)); 1933 } 1934 1935 int 1936 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1937 int flag, int amode) 1938 { 1939 struct ucred *cred, *usecred; 1940 struct vnode *vp; 1941 struct nameidata nd; 1942 cap_rights_t rights; 1943 int error; 1944 1945 if (flag & ~AT_EACCESS) 1946 return (EINVAL); 1947 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1948 return (EINVAL); 1949 1950 /* 1951 * Create and modify a temporary credential instead of one that 1952 * is potentially shared (if we need one). 1953 */ 1954 cred = td->td_ucred; 1955 if ((flag & AT_EACCESS) == 0 && 1956 ((cred->cr_uid != cred->cr_ruid || 1957 cred->cr_rgid != cred->cr_groups[0]))) { 1958 usecred = crdup(cred); 1959 usecred->cr_uid = cred->cr_ruid; 1960 usecred->cr_groups[0] = cred->cr_rgid; 1961 td->td_ucred = usecred; 1962 } else 1963 usecred = cred; 1964 AUDIT_ARG_VALUE(amode); 1965 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1966 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1967 td); 1968 if ((error = namei(&nd)) != 0) 1969 goto out; 1970 vp = nd.ni_vp; 1971 1972 error = vn_access(vp, amode, usecred, td); 1973 NDFREE(&nd, NDF_ONLY_PNBUF); 1974 vput(vp); 1975 out: 1976 if (usecred != cred) { 1977 td->td_ucred = cred; 1978 crfree(usecred); 1979 } 1980 return (error); 1981 } 1982 1983 /* 1984 * Check access permissions using "effective" credentials. 1985 */ 1986 #ifndef _SYS_SYSPROTO_H_ 1987 struct eaccess_args { 1988 char *path; 1989 int amode; 1990 }; 1991 #endif 1992 int 1993 sys_eaccess(td, uap) 1994 struct thread *td; 1995 register struct eaccess_args /* { 1996 char *path; 1997 int amode; 1998 } */ *uap; 1999 { 2000 2001 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2002 AT_EACCESS, uap->amode)); 2003 } 2004 2005 #if defined(COMPAT_43) 2006 /* 2007 * Get file status; this version follows links. 2008 */ 2009 #ifndef _SYS_SYSPROTO_H_ 2010 struct ostat_args { 2011 char *path; 2012 struct ostat *ub; 2013 }; 2014 #endif 2015 int 2016 ostat(td, uap) 2017 struct thread *td; 2018 register struct ostat_args /* { 2019 char *path; 2020 struct ostat *ub; 2021 } */ *uap; 2022 { 2023 struct stat sb; 2024 struct ostat osb; 2025 int error; 2026 2027 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2028 &sb, NULL); 2029 if (error != 0) 2030 return (error); 2031 cvtstat(&sb, &osb); 2032 return (copyout(&osb, uap->ub, sizeof (osb))); 2033 } 2034 2035 /* 2036 * Get file status; this version does not follow links. 2037 */ 2038 #ifndef _SYS_SYSPROTO_H_ 2039 struct olstat_args { 2040 char *path; 2041 struct ostat *ub; 2042 }; 2043 #endif 2044 int 2045 olstat(td, uap) 2046 struct thread *td; 2047 register struct olstat_args /* { 2048 char *path; 2049 struct ostat *ub; 2050 } */ *uap; 2051 { 2052 struct stat sb; 2053 struct ostat osb; 2054 int error; 2055 2056 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2057 UIO_USERSPACE, &sb, NULL); 2058 if (error != 0) 2059 return (error); 2060 cvtstat(&sb, &osb); 2061 return (copyout(&osb, uap->ub, sizeof (osb))); 2062 } 2063 2064 /* 2065 * Convert from an old to a new stat structure. 2066 */ 2067 void 2068 cvtstat(st, ost) 2069 struct stat *st; 2070 struct ostat *ost; 2071 { 2072 2073 ost->st_dev = st->st_dev; 2074 ost->st_ino = st->st_ino; 2075 ost->st_mode = st->st_mode; 2076 ost->st_nlink = st->st_nlink; 2077 ost->st_uid = st->st_uid; 2078 ost->st_gid = st->st_gid; 2079 ost->st_rdev = st->st_rdev; 2080 if (st->st_size < (quad_t)1 << 32) 2081 ost->st_size = st->st_size; 2082 else 2083 ost->st_size = -2; 2084 ost->st_atim = st->st_atim; 2085 ost->st_mtim = st->st_mtim; 2086 ost->st_ctim = st->st_ctim; 2087 ost->st_blksize = st->st_blksize; 2088 ost->st_blocks = st->st_blocks; 2089 ost->st_flags = st->st_flags; 2090 ost->st_gen = st->st_gen; 2091 } 2092 #endif /* COMPAT_43 */ 2093 2094 /* 2095 * Get file status; this version follows links. 2096 */ 2097 #ifndef _SYS_SYSPROTO_H_ 2098 struct stat_args { 2099 char *path; 2100 struct stat *ub; 2101 }; 2102 #endif 2103 int 2104 sys_stat(td, uap) 2105 struct thread *td; 2106 register struct stat_args /* { 2107 char *path; 2108 struct stat *ub; 2109 } */ *uap; 2110 { 2111 struct stat sb; 2112 int error; 2113 2114 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2115 &sb, NULL); 2116 if (error == 0) 2117 error = copyout(&sb, uap->ub, sizeof (sb)); 2118 return (error); 2119 } 2120 2121 #ifndef _SYS_SYSPROTO_H_ 2122 struct fstatat_args { 2123 int fd; 2124 char *path; 2125 struct stat *buf; 2126 int flag; 2127 } 2128 #endif 2129 int 2130 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2131 { 2132 struct stat sb; 2133 int error; 2134 2135 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2136 UIO_USERSPACE, &sb, NULL); 2137 if (error == 0) 2138 error = copyout(&sb, uap->buf, sizeof (sb)); 2139 return (error); 2140 } 2141 2142 int 2143 kern_statat(struct thread *td, int flag, int fd, char *path, 2144 enum uio_seg pathseg, struct stat *sbp, 2145 void (*hook)(struct vnode *vp, struct stat *sbp)) 2146 { 2147 struct nameidata nd; 2148 struct stat sb; 2149 cap_rights_t rights; 2150 int error; 2151 2152 if (flag & ~AT_SYMLINK_NOFOLLOW) 2153 return (EINVAL); 2154 2155 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2156 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2157 cap_rights_init(&rights, CAP_FSTAT), td); 2158 2159 if ((error = namei(&nd)) != 0) 2160 return (error); 2161 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2162 if (error == 0) { 2163 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2164 if (S_ISREG(sb.st_mode)) 2165 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2166 if (__predict_false(hook != NULL)) 2167 hook(nd.ni_vp, &sb); 2168 } 2169 NDFREE(&nd, NDF_ONLY_PNBUF); 2170 vput(nd.ni_vp); 2171 if (error != 0) 2172 return (error); 2173 *sbp = sb; 2174 #ifdef KTRACE 2175 if (KTRPOINT(td, KTR_STRUCT)) 2176 ktrstat(&sb); 2177 #endif 2178 return (0); 2179 } 2180 2181 /* 2182 * Get file status; this version does not follow links. 2183 */ 2184 #ifndef _SYS_SYSPROTO_H_ 2185 struct lstat_args { 2186 char *path; 2187 struct stat *ub; 2188 }; 2189 #endif 2190 int 2191 sys_lstat(td, uap) 2192 struct thread *td; 2193 register struct lstat_args /* { 2194 char *path; 2195 struct stat *ub; 2196 } */ *uap; 2197 { 2198 struct stat sb; 2199 int error; 2200 2201 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2202 UIO_USERSPACE, &sb, NULL); 2203 if (error == 0) 2204 error = copyout(&sb, uap->ub, sizeof (sb)); 2205 return (error); 2206 } 2207 2208 /* 2209 * Implementation of the NetBSD [l]stat() functions. 2210 */ 2211 void 2212 cvtnstat(sb, nsb) 2213 struct stat *sb; 2214 struct nstat *nsb; 2215 { 2216 2217 bzero(nsb, sizeof *nsb); 2218 nsb->st_dev = sb->st_dev; 2219 nsb->st_ino = sb->st_ino; 2220 nsb->st_mode = sb->st_mode; 2221 nsb->st_nlink = sb->st_nlink; 2222 nsb->st_uid = sb->st_uid; 2223 nsb->st_gid = sb->st_gid; 2224 nsb->st_rdev = sb->st_rdev; 2225 nsb->st_atim = sb->st_atim; 2226 nsb->st_mtim = sb->st_mtim; 2227 nsb->st_ctim = sb->st_ctim; 2228 nsb->st_size = sb->st_size; 2229 nsb->st_blocks = sb->st_blocks; 2230 nsb->st_blksize = sb->st_blksize; 2231 nsb->st_flags = sb->st_flags; 2232 nsb->st_gen = sb->st_gen; 2233 nsb->st_birthtim = sb->st_birthtim; 2234 } 2235 2236 #ifndef _SYS_SYSPROTO_H_ 2237 struct nstat_args { 2238 char *path; 2239 struct nstat *ub; 2240 }; 2241 #endif 2242 int 2243 sys_nstat(td, uap) 2244 struct thread *td; 2245 register struct nstat_args /* { 2246 char *path; 2247 struct nstat *ub; 2248 } */ *uap; 2249 { 2250 struct stat sb; 2251 struct nstat nsb; 2252 int error; 2253 2254 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2255 &sb, NULL); 2256 if (error != 0) 2257 return (error); 2258 cvtnstat(&sb, &nsb); 2259 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2260 } 2261 2262 /* 2263 * NetBSD lstat. Get file status; this version does not follow links. 2264 */ 2265 #ifndef _SYS_SYSPROTO_H_ 2266 struct lstat_args { 2267 char *path; 2268 struct stat *ub; 2269 }; 2270 #endif 2271 int 2272 sys_nlstat(td, uap) 2273 struct thread *td; 2274 register struct nlstat_args /* { 2275 char *path; 2276 struct nstat *ub; 2277 } */ *uap; 2278 { 2279 struct stat sb; 2280 struct nstat nsb; 2281 int error; 2282 2283 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2284 UIO_USERSPACE, &sb, NULL); 2285 if (error != 0) 2286 return (error); 2287 cvtnstat(&sb, &nsb); 2288 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2289 } 2290 2291 /* 2292 * Get configurable pathname variables. 2293 */ 2294 #ifndef _SYS_SYSPROTO_H_ 2295 struct pathconf_args { 2296 char *path; 2297 int name; 2298 }; 2299 #endif 2300 int 2301 sys_pathconf(td, uap) 2302 struct thread *td; 2303 register struct pathconf_args /* { 2304 char *path; 2305 int name; 2306 } */ *uap; 2307 { 2308 2309 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2310 } 2311 2312 #ifndef _SYS_SYSPROTO_H_ 2313 struct lpathconf_args { 2314 char *path; 2315 int name; 2316 }; 2317 #endif 2318 int 2319 sys_lpathconf(td, uap) 2320 struct thread *td; 2321 register struct lpathconf_args /* { 2322 char *path; 2323 int name; 2324 } */ *uap; 2325 { 2326 2327 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2328 NOFOLLOW)); 2329 } 2330 2331 int 2332 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2333 u_long flags) 2334 { 2335 struct nameidata nd; 2336 int error; 2337 2338 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2339 pathseg, path, td); 2340 if ((error = namei(&nd)) != 0) 2341 return (error); 2342 NDFREE(&nd, NDF_ONLY_PNBUF); 2343 2344 /* If asynchronous I/O is available, it works for all files. */ 2345 if (name == _PC_ASYNC_IO) 2346 td->td_retval[0] = async_io_version; 2347 else 2348 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2349 vput(nd.ni_vp); 2350 return (error); 2351 } 2352 2353 /* 2354 * Return target name of a symbolic link. 2355 */ 2356 #ifndef _SYS_SYSPROTO_H_ 2357 struct readlink_args { 2358 char *path; 2359 char *buf; 2360 size_t count; 2361 }; 2362 #endif 2363 int 2364 sys_readlink(td, uap) 2365 struct thread *td; 2366 register struct readlink_args /* { 2367 char *path; 2368 char *buf; 2369 size_t count; 2370 } */ *uap; 2371 { 2372 2373 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2374 uap->buf, UIO_USERSPACE, uap->count)); 2375 } 2376 #ifndef _SYS_SYSPROTO_H_ 2377 struct readlinkat_args { 2378 int fd; 2379 char *path; 2380 char *buf; 2381 size_t bufsize; 2382 }; 2383 #endif 2384 int 2385 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2386 { 2387 2388 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2389 uap->buf, UIO_USERSPACE, uap->bufsize)); 2390 } 2391 2392 int 2393 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2394 char *buf, enum uio_seg bufseg, size_t count) 2395 { 2396 struct vnode *vp; 2397 struct iovec aiov; 2398 struct uio auio; 2399 struct nameidata nd; 2400 int error; 2401 2402 if (count > IOSIZE_MAX) 2403 return (EINVAL); 2404 2405 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2406 pathseg, path, fd, td); 2407 2408 if ((error = namei(&nd)) != 0) 2409 return (error); 2410 NDFREE(&nd, NDF_ONLY_PNBUF); 2411 vp = nd.ni_vp; 2412 #ifdef MAC 2413 error = mac_vnode_check_readlink(td->td_ucred, vp); 2414 if (error != 0) { 2415 vput(vp); 2416 return (error); 2417 } 2418 #endif 2419 if (vp->v_type != VLNK) 2420 error = EINVAL; 2421 else { 2422 aiov.iov_base = buf; 2423 aiov.iov_len = count; 2424 auio.uio_iov = &aiov; 2425 auio.uio_iovcnt = 1; 2426 auio.uio_offset = 0; 2427 auio.uio_rw = UIO_READ; 2428 auio.uio_segflg = bufseg; 2429 auio.uio_td = td; 2430 auio.uio_resid = count; 2431 error = VOP_READLINK(vp, &auio, td->td_ucred); 2432 td->td_retval[0] = count - auio.uio_resid; 2433 } 2434 vput(vp); 2435 return (error); 2436 } 2437 2438 /* 2439 * Common implementation code for chflags() and fchflags(). 2440 */ 2441 static int 2442 setfflags(td, vp, flags) 2443 struct thread *td; 2444 struct vnode *vp; 2445 u_long flags; 2446 { 2447 struct mount *mp; 2448 struct vattr vattr; 2449 int error; 2450 2451 /* We can't support the value matching VNOVAL. */ 2452 if (flags == VNOVAL) 2453 return (EOPNOTSUPP); 2454 2455 /* 2456 * Prevent non-root users from setting flags on devices. When 2457 * a device is reused, users can retain ownership of the device 2458 * if they are allowed to set flags and programs assume that 2459 * chown can't fail when done as root. 2460 */ 2461 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2462 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2463 if (error != 0) 2464 return (error); 2465 } 2466 2467 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2468 return (error); 2469 VATTR_NULL(&vattr); 2470 vattr.va_flags = flags; 2471 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2472 #ifdef MAC 2473 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2474 if (error == 0) 2475 #endif 2476 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2477 VOP_UNLOCK(vp, 0); 2478 vn_finished_write(mp); 2479 return (error); 2480 } 2481 2482 /* 2483 * Change flags of a file given a path name. 2484 */ 2485 #ifndef _SYS_SYSPROTO_H_ 2486 struct chflags_args { 2487 const char *path; 2488 u_long flags; 2489 }; 2490 #endif 2491 int 2492 sys_chflags(td, uap) 2493 struct thread *td; 2494 register struct chflags_args /* { 2495 const char *path; 2496 u_long flags; 2497 } */ *uap; 2498 { 2499 2500 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2501 uap->flags, 0)); 2502 } 2503 2504 #ifndef _SYS_SYSPROTO_H_ 2505 struct chflagsat_args { 2506 int fd; 2507 const char *path; 2508 u_long flags; 2509 int atflag; 2510 } 2511 #endif 2512 int 2513 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2514 { 2515 int fd = uap->fd; 2516 const char *path = uap->path; 2517 u_long flags = uap->flags; 2518 int atflag = uap->atflag; 2519 2520 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2521 return (EINVAL); 2522 2523 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2524 } 2525 2526 /* 2527 * Same as chflags() but doesn't follow symlinks. 2528 */ 2529 int 2530 sys_lchflags(td, uap) 2531 struct thread *td; 2532 register struct lchflags_args /* { 2533 const char *path; 2534 u_long flags; 2535 } */ *uap; 2536 { 2537 2538 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2539 uap->flags, AT_SYMLINK_NOFOLLOW)); 2540 } 2541 2542 static int 2543 kern_chflagsat(struct thread *td, int fd, const char *path, 2544 enum uio_seg pathseg, u_long flags, int atflag) 2545 { 2546 struct nameidata nd; 2547 cap_rights_t rights; 2548 int error, follow; 2549 2550 AUDIT_ARG_FFLAGS(flags); 2551 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2552 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2553 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2554 if ((error = namei(&nd)) != 0) 2555 return (error); 2556 NDFREE(&nd, NDF_ONLY_PNBUF); 2557 error = setfflags(td, nd.ni_vp, flags); 2558 vrele(nd.ni_vp); 2559 return (error); 2560 } 2561 2562 /* 2563 * Change flags of a file given a file descriptor. 2564 */ 2565 #ifndef _SYS_SYSPROTO_H_ 2566 struct fchflags_args { 2567 int fd; 2568 u_long flags; 2569 }; 2570 #endif 2571 int 2572 sys_fchflags(td, uap) 2573 struct thread *td; 2574 register struct fchflags_args /* { 2575 int fd; 2576 u_long flags; 2577 } */ *uap; 2578 { 2579 struct file *fp; 2580 cap_rights_t rights; 2581 int error; 2582 2583 AUDIT_ARG_FD(uap->fd); 2584 AUDIT_ARG_FFLAGS(uap->flags); 2585 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2586 &fp); 2587 if (error != 0) 2588 return (error); 2589 #ifdef AUDIT 2590 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2591 AUDIT_ARG_VNODE1(fp->f_vnode); 2592 VOP_UNLOCK(fp->f_vnode, 0); 2593 #endif 2594 error = setfflags(td, fp->f_vnode, uap->flags); 2595 fdrop(fp, td); 2596 return (error); 2597 } 2598 2599 /* 2600 * Common implementation code for chmod(), lchmod() and fchmod(). 2601 */ 2602 int 2603 setfmode(td, cred, vp, mode) 2604 struct thread *td; 2605 struct ucred *cred; 2606 struct vnode *vp; 2607 int mode; 2608 { 2609 struct mount *mp; 2610 struct vattr vattr; 2611 int error; 2612 2613 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2614 return (error); 2615 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2616 VATTR_NULL(&vattr); 2617 vattr.va_mode = mode & ALLPERMS; 2618 #ifdef MAC 2619 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2620 if (error == 0) 2621 #endif 2622 error = VOP_SETATTR(vp, &vattr, cred); 2623 VOP_UNLOCK(vp, 0); 2624 vn_finished_write(mp); 2625 return (error); 2626 } 2627 2628 /* 2629 * Change mode of a file given path name. 2630 */ 2631 #ifndef _SYS_SYSPROTO_H_ 2632 struct chmod_args { 2633 char *path; 2634 int mode; 2635 }; 2636 #endif 2637 int 2638 sys_chmod(td, uap) 2639 struct thread *td; 2640 register struct chmod_args /* { 2641 char *path; 2642 int mode; 2643 } */ *uap; 2644 { 2645 2646 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2647 uap->mode, 0)); 2648 } 2649 2650 #ifndef _SYS_SYSPROTO_H_ 2651 struct fchmodat_args { 2652 int dirfd; 2653 char *path; 2654 mode_t mode; 2655 int flag; 2656 } 2657 #endif 2658 int 2659 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2660 { 2661 int flag = uap->flag; 2662 int fd = uap->fd; 2663 char *path = uap->path; 2664 mode_t mode = uap->mode; 2665 2666 if (flag & ~AT_SYMLINK_NOFOLLOW) 2667 return (EINVAL); 2668 2669 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2670 } 2671 2672 /* 2673 * Change mode of a file given path name (don't follow links.) 2674 */ 2675 #ifndef _SYS_SYSPROTO_H_ 2676 struct lchmod_args { 2677 char *path; 2678 int mode; 2679 }; 2680 #endif 2681 int 2682 sys_lchmod(td, uap) 2683 struct thread *td; 2684 register struct lchmod_args /* { 2685 char *path; 2686 int mode; 2687 } */ *uap; 2688 { 2689 2690 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2691 uap->mode, AT_SYMLINK_NOFOLLOW)); 2692 } 2693 2694 int 2695 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2696 mode_t mode, int flag) 2697 { 2698 struct nameidata nd; 2699 cap_rights_t rights; 2700 int error, follow; 2701 2702 AUDIT_ARG_MODE(mode); 2703 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2704 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2705 cap_rights_init(&rights, CAP_FCHMOD), td); 2706 if ((error = namei(&nd)) != 0) 2707 return (error); 2708 NDFREE(&nd, NDF_ONLY_PNBUF); 2709 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2710 vrele(nd.ni_vp); 2711 return (error); 2712 } 2713 2714 /* 2715 * Change mode of a file given a file descriptor. 2716 */ 2717 #ifndef _SYS_SYSPROTO_H_ 2718 struct fchmod_args { 2719 int fd; 2720 int mode; 2721 }; 2722 #endif 2723 int 2724 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2725 { 2726 struct file *fp; 2727 cap_rights_t rights; 2728 int error; 2729 2730 AUDIT_ARG_FD(uap->fd); 2731 AUDIT_ARG_MODE(uap->mode); 2732 2733 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2734 if (error != 0) 2735 return (error); 2736 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2737 fdrop(fp, td); 2738 return (error); 2739 } 2740 2741 /* 2742 * Common implementation for chown(), lchown(), and fchown() 2743 */ 2744 int 2745 setfown(td, cred, vp, uid, gid) 2746 struct thread *td; 2747 struct ucred *cred; 2748 struct vnode *vp; 2749 uid_t uid; 2750 gid_t gid; 2751 { 2752 struct mount *mp; 2753 struct vattr vattr; 2754 int error; 2755 2756 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2757 return (error); 2758 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2759 VATTR_NULL(&vattr); 2760 vattr.va_uid = uid; 2761 vattr.va_gid = gid; 2762 #ifdef MAC 2763 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2764 vattr.va_gid); 2765 if (error == 0) 2766 #endif 2767 error = VOP_SETATTR(vp, &vattr, cred); 2768 VOP_UNLOCK(vp, 0); 2769 vn_finished_write(mp); 2770 return (error); 2771 } 2772 2773 /* 2774 * Set ownership given a path name. 2775 */ 2776 #ifndef _SYS_SYSPROTO_H_ 2777 struct chown_args { 2778 char *path; 2779 int uid; 2780 int gid; 2781 }; 2782 #endif 2783 int 2784 sys_chown(td, uap) 2785 struct thread *td; 2786 register struct chown_args /* { 2787 char *path; 2788 int uid; 2789 int gid; 2790 } */ *uap; 2791 { 2792 2793 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2794 uap->gid, 0)); 2795 } 2796 2797 #ifndef _SYS_SYSPROTO_H_ 2798 struct fchownat_args { 2799 int fd; 2800 const char * path; 2801 uid_t uid; 2802 gid_t gid; 2803 int flag; 2804 }; 2805 #endif 2806 int 2807 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2808 { 2809 int flag; 2810 2811 flag = uap->flag; 2812 if (flag & ~AT_SYMLINK_NOFOLLOW) 2813 return (EINVAL); 2814 2815 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2816 uap->gid, uap->flag)); 2817 } 2818 2819 int 2820 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2821 int uid, int gid, int flag) 2822 { 2823 struct nameidata nd; 2824 cap_rights_t rights; 2825 int error, follow; 2826 2827 AUDIT_ARG_OWNER(uid, gid); 2828 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2829 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2830 cap_rights_init(&rights, CAP_FCHOWN), td); 2831 2832 if ((error = namei(&nd)) != 0) 2833 return (error); 2834 NDFREE(&nd, NDF_ONLY_PNBUF); 2835 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2836 vrele(nd.ni_vp); 2837 return (error); 2838 } 2839 2840 /* 2841 * Set ownership given a path name, do not cross symlinks. 2842 */ 2843 #ifndef _SYS_SYSPROTO_H_ 2844 struct lchown_args { 2845 char *path; 2846 int uid; 2847 int gid; 2848 }; 2849 #endif 2850 int 2851 sys_lchown(td, uap) 2852 struct thread *td; 2853 register struct lchown_args /* { 2854 char *path; 2855 int uid; 2856 int gid; 2857 } */ *uap; 2858 { 2859 2860 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2861 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2862 } 2863 2864 /* 2865 * Set ownership given a file descriptor. 2866 */ 2867 #ifndef _SYS_SYSPROTO_H_ 2868 struct fchown_args { 2869 int fd; 2870 int uid; 2871 int gid; 2872 }; 2873 #endif 2874 int 2875 sys_fchown(td, uap) 2876 struct thread *td; 2877 register struct fchown_args /* { 2878 int fd; 2879 int uid; 2880 int gid; 2881 } */ *uap; 2882 { 2883 struct file *fp; 2884 cap_rights_t rights; 2885 int error; 2886 2887 AUDIT_ARG_FD(uap->fd); 2888 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2889 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2890 if (error != 0) 2891 return (error); 2892 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2893 fdrop(fp, td); 2894 return (error); 2895 } 2896 2897 /* 2898 * Common implementation code for utimes(), lutimes(), and futimes(). 2899 */ 2900 static int 2901 getutimes(usrtvp, tvpseg, tsp) 2902 const struct timeval *usrtvp; 2903 enum uio_seg tvpseg; 2904 struct timespec *tsp; 2905 { 2906 struct timeval tv[2]; 2907 const struct timeval *tvp; 2908 int error; 2909 2910 if (usrtvp == NULL) { 2911 vfs_timestamp(&tsp[0]); 2912 tsp[1] = tsp[0]; 2913 } else { 2914 if (tvpseg == UIO_SYSSPACE) { 2915 tvp = usrtvp; 2916 } else { 2917 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2918 return (error); 2919 tvp = tv; 2920 } 2921 2922 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2923 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2924 return (EINVAL); 2925 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2926 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2927 } 2928 return (0); 2929 } 2930 2931 /* 2932 * Common implementation code for futimens(), utimensat(). 2933 */ 2934 #define UTIMENS_NULL 0x1 2935 #define UTIMENS_EXIT 0x2 2936 static int 2937 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2938 struct timespec *tsp, int *retflags) 2939 { 2940 struct timespec tsnow; 2941 int error; 2942 2943 vfs_timestamp(&tsnow); 2944 *retflags = 0; 2945 if (usrtsp == NULL) { 2946 tsp[0] = tsnow; 2947 tsp[1] = tsnow; 2948 *retflags |= UTIMENS_NULL; 2949 return (0); 2950 } 2951 if (tspseg == UIO_SYSSPACE) { 2952 tsp[0] = usrtsp[0]; 2953 tsp[1] = usrtsp[1]; 2954 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2955 return (error); 2956 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2957 *retflags |= UTIMENS_EXIT; 2958 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2959 *retflags |= UTIMENS_NULL; 2960 if (tsp[0].tv_nsec == UTIME_OMIT) 2961 tsp[0].tv_sec = VNOVAL; 2962 else if (tsp[0].tv_nsec == UTIME_NOW) 2963 tsp[0] = tsnow; 2964 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2965 return (EINVAL); 2966 if (tsp[1].tv_nsec == UTIME_OMIT) 2967 tsp[1].tv_sec = VNOVAL; 2968 else if (tsp[1].tv_nsec == UTIME_NOW) 2969 tsp[1] = tsnow; 2970 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2971 return (EINVAL); 2972 2973 return (0); 2974 } 2975 2976 /* 2977 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2978 * and utimensat(). 2979 */ 2980 static int 2981 setutimes(td, vp, ts, numtimes, nullflag) 2982 struct thread *td; 2983 struct vnode *vp; 2984 const struct timespec *ts; 2985 int numtimes; 2986 int nullflag; 2987 { 2988 struct mount *mp; 2989 struct vattr vattr; 2990 int error, setbirthtime; 2991 2992 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2993 return (error); 2994 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2995 setbirthtime = 0; 2996 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2997 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2998 setbirthtime = 1; 2999 VATTR_NULL(&vattr); 3000 vattr.va_atime = ts[0]; 3001 vattr.va_mtime = ts[1]; 3002 if (setbirthtime) 3003 vattr.va_birthtime = ts[1]; 3004 if (numtimes > 2) 3005 vattr.va_birthtime = ts[2]; 3006 if (nullflag) 3007 vattr.va_vaflags |= VA_UTIMES_NULL; 3008 #ifdef MAC 3009 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3010 vattr.va_mtime); 3011 #endif 3012 if (error == 0) 3013 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3014 VOP_UNLOCK(vp, 0); 3015 vn_finished_write(mp); 3016 return (error); 3017 } 3018 3019 /* 3020 * Set the access and modification times of a file. 3021 */ 3022 #ifndef _SYS_SYSPROTO_H_ 3023 struct utimes_args { 3024 char *path; 3025 struct timeval *tptr; 3026 }; 3027 #endif 3028 int 3029 sys_utimes(td, uap) 3030 struct thread *td; 3031 register struct utimes_args /* { 3032 char *path; 3033 struct timeval *tptr; 3034 } */ *uap; 3035 { 3036 3037 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3038 uap->tptr, UIO_USERSPACE)); 3039 } 3040 3041 #ifndef _SYS_SYSPROTO_H_ 3042 struct futimesat_args { 3043 int fd; 3044 const char * path; 3045 const struct timeval * times; 3046 }; 3047 #endif 3048 int 3049 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3050 { 3051 3052 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3053 uap->times, UIO_USERSPACE)); 3054 } 3055 3056 int 3057 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3058 struct timeval *tptr, enum uio_seg tptrseg) 3059 { 3060 struct nameidata nd; 3061 struct timespec ts[2]; 3062 cap_rights_t rights; 3063 int error; 3064 3065 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3066 return (error); 3067 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3068 cap_rights_init(&rights, CAP_FUTIMES), td); 3069 3070 if ((error = namei(&nd)) != 0) 3071 return (error); 3072 NDFREE(&nd, NDF_ONLY_PNBUF); 3073 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3074 vrele(nd.ni_vp); 3075 return (error); 3076 } 3077 3078 /* 3079 * Set the access and modification times of a file. 3080 */ 3081 #ifndef _SYS_SYSPROTO_H_ 3082 struct lutimes_args { 3083 char *path; 3084 struct timeval *tptr; 3085 }; 3086 #endif 3087 int 3088 sys_lutimes(td, uap) 3089 struct thread *td; 3090 register struct lutimes_args /* { 3091 char *path; 3092 struct timeval *tptr; 3093 } */ *uap; 3094 { 3095 3096 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3097 UIO_USERSPACE)); 3098 } 3099 3100 int 3101 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3102 struct timeval *tptr, enum uio_seg tptrseg) 3103 { 3104 struct timespec ts[2]; 3105 struct nameidata nd; 3106 int error; 3107 3108 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3109 return (error); 3110 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3111 if ((error = namei(&nd)) != 0) 3112 return (error); 3113 NDFREE(&nd, NDF_ONLY_PNBUF); 3114 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3115 vrele(nd.ni_vp); 3116 return (error); 3117 } 3118 3119 /* 3120 * Set the access and modification times of a file. 3121 */ 3122 #ifndef _SYS_SYSPROTO_H_ 3123 struct futimes_args { 3124 int fd; 3125 struct timeval *tptr; 3126 }; 3127 #endif 3128 int 3129 sys_futimes(td, uap) 3130 struct thread *td; 3131 register struct futimes_args /* { 3132 int fd; 3133 struct timeval *tptr; 3134 } */ *uap; 3135 { 3136 3137 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3138 } 3139 3140 int 3141 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3142 enum uio_seg tptrseg) 3143 { 3144 struct timespec ts[2]; 3145 struct file *fp; 3146 cap_rights_t rights; 3147 int error; 3148 3149 AUDIT_ARG_FD(fd); 3150 error = getutimes(tptr, tptrseg, ts); 3151 if (error != 0) 3152 return (error); 3153 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3154 if (error != 0) 3155 return (error); 3156 #ifdef AUDIT 3157 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3158 AUDIT_ARG_VNODE1(fp->f_vnode); 3159 VOP_UNLOCK(fp->f_vnode, 0); 3160 #endif 3161 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3162 fdrop(fp, td); 3163 return (error); 3164 } 3165 3166 int 3167 sys_futimens(struct thread *td, struct futimens_args *uap) 3168 { 3169 3170 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3171 } 3172 3173 int 3174 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3175 enum uio_seg tptrseg) 3176 { 3177 struct timespec ts[2]; 3178 struct file *fp; 3179 cap_rights_t rights; 3180 int error, flags; 3181 3182 AUDIT_ARG_FD(fd); 3183 error = getutimens(tptr, tptrseg, ts, &flags); 3184 if (error != 0) 3185 return (error); 3186 if (flags & UTIMENS_EXIT) 3187 return (0); 3188 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3189 if (error != 0) 3190 return (error); 3191 #ifdef AUDIT 3192 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3193 AUDIT_ARG_VNODE1(fp->f_vnode); 3194 VOP_UNLOCK(fp->f_vnode, 0); 3195 #endif 3196 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3197 fdrop(fp, td); 3198 return (error); 3199 } 3200 3201 int 3202 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3203 { 3204 3205 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3206 uap->times, UIO_USERSPACE, uap->flag)); 3207 } 3208 3209 int 3210 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3211 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3212 { 3213 struct nameidata nd; 3214 struct timespec ts[2]; 3215 cap_rights_t rights; 3216 int error, flags; 3217 3218 if (flag & ~AT_SYMLINK_NOFOLLOW) 3219 return (EINVAL); 3220 3221 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3222 return (error); 3223 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3224 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3225 cap_rights_init(&rights, CAP_FUTIMES), td); 3226 if ((error = namei(&nd)) != 0) 3227 return (error); 3228 /* 3229 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3230 * POSIX states: 3231 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3232 * "Search permission is denied by a component of the path prefix." 3233 */ 3234 NDFREE(&nd, NDF_ONLY_PNBUF); 3235 if ((flags & UTIMENS_EXIT) == 0) 3236 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3237 vrele(nd.ni_vp); 3238 return (error); 3239 } 3240 3241 /* 3242 * Truncate a file given its path name. 3243 */ 3244 #ifndef _SYS_SYSPROTO_H_ 3245 struct truncate_args { 3246 char *path; 3247 int pad; 3248 off_t length; 3249 }; 3250 #endif 3251 int 3252 sys_truncate(td, uap) 3253 struct thread *td; 3254 register struct truncate_args /* { 3255 char *path; 3256 int pad; 3257 off_t length; 3258 } */ *uap; 3259 { 3260 3261 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3262 } 3263 3264 int 3265 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3266 { 3267 struct mount *mp; 3268 struct vnode *vp; 3269 void *rl_cookie; 3270 struct vattr vattr; 3271 struct nameidata nd; 3272 int error; 3273 3274 if (length < 0) 3275 return(EINVAL); 3276 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3277 if ((error = namei(&nd)) != 0) 3278 return (error); 3279 vp = nd.ni_vp; 3280 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3281 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3282 vn_rangelock_unlock(vp, rl_cookie); 3283 vrele(vp); 3284 return (error); 3285 } 3286 NDFREE(&nd, NDF_ONLY_PNBUF); 3287 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3288 if (vp->v_type == VDIR) 3289 error = EISDIR; 3290 #ifdef MAC 3291 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3292 } 3293 #endif 3294 else if ((error = vn_writechk(vp)) == 0 && 3295 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3296 VATTR_NULL(&vattr); 3297 vattr.va_size = length; 3298 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3299 } 3300 VOP_UNLOCK(vp, 0); 3301 vn_finished_write(mp); 3302 vn_rangelock_unlock(vp, rl_cookie); 3303 vrele(vp); 3304 return (error); 3305 } 3306 3307 #if defined(COMPAT_43) 3308 /* 3309 * Truncate a file given its path name. 3310 */ 3311 #ifndef _SYS_SYSPROTO_H_ 3312 struct otruncate_args { 3313 char *path; 3314 long length; 3315 }; 3316 #endif 3317 int 3318 otruncate(td, uap) 3319 struct thread *td; 3320 register struct otruncate_args /* { 3321 char *path; 3322 long length; 3323 } */ *uap; 3324 { 3325 struct truncate_args /* { 3326 char *path; 3327 int pad; 3328 off_t length; 3329 } */ nuap; 3330 3331 nuap.path = uap->path; 3332 nuap.length = uap->length; 3333 return (sys_truncate(td, &nuap)); 3334 } 3335 #endif /* COMPAT_43 */ 3336 3337 #if defined(COMPAT_FREEBSD6) 3338 /* Versions with the pad argument */ 3339 int 3340 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3341 { 3342 struct truncate_args ouap; 3343 3344 ouap.path = uap->path; 3345 ouap.length = uap->length; 3346 return (sys_truncate(td, &ouap)); 3347 } 3348 3349 int 3350 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3351 { 3352 struct ftruncate_args ouap; 3353 3354 ouap.fd = uap->fd; 3355 ouap.length = uap->length; 3356 return (sys_ftruncate(td, &ouap)); 3357 } 3358 #endif 3359 3360 /* 3361 * Sync an open file. 3362 */ 3363 #ifndef _SYS_SYSPROTO_H_ 3364 struct fsync_args { 3365 int fd; 3366 }; 3367 #endif 3368 int 3369 sys_fsync(td, uap) 3370 struct thread *td; 3371 struct fsync_args /* { 3372 int fd; 3373 } */ *uap; 3374 { 3375 struct vnode *vp; 3376 struct mount *mp; 3377 struct file *fp; 3378 cap_rights_t rights; 3379 int error, lock_flags; 3380 3381 AUDIT_ARG_FD(uap->fd); 3382 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3383 if (error != 0) 3384 return (error); 3385 vp = fp->f_vnode; 3386 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3387 if (error != 0) 3388 goto drop; 3389 if (MNT_SHARED_WRITES(mp) || 3390 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3391 lock_flags = LK_SHARED; 3392 } else { 3393 lock_flags = LK_EXCLUSIVE; 3394 } 3395 vn_lock(vp, lock_flags | LK_RETRY); 3396 AUDIT_ARG_VNODE1(vp); 3397 if (vp->v_object != NULL) { 3398 VM_OBJECT_WLOCK(vp->v_object); 3399 vm_object_page_clean(vp->v_object, 0, 0, 0); 3400 VM_OBJECT_WUNLOCK(vp->v_object); 3401 } 3402 error = VOP_FSYNC(vp, MNT_WAIT, td); 3403 3404 VOP_UNLOCK(vp, 0); 3405 vn_finished_write(mp); 3406 drop: 3407 fdrop(fp, td); 3408 return (error); 3409 } 3410 3411 /* 3412 * Rename files. Source and destination must either both be directories, or 3413 * both not be directories. If target is a directory, it must be empty. 3414 */ 3415 #ifndef _SYS_SYSPROTO_H_ 3416 struct rename_args { 3417 char *from; 3418 char *to; 3419 }; 3420 #endif 3421 int 3422 sys_rename(td, uap) 3423 struct thread *td; 3424 register struct rename_args /* { 3425 char *from; 3426 char *to; 3427 } */ *uap; 3428 { 3429 3430 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3431 uap->to, UIO_USERSPACE)); 3432 } 3433 3434 #ifndef _SYS_SYSPROTO_H_ 3435 struct renameat_args { 3436 int oldfd; 3437 char *old; 3438 int newfd; 3439 char *new; 3440 }; 3441 #endif 3442 int 3443 sys_renameat(struct thread *td, struct renameat_args *uap) 3444 { 3445 3446 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3447 UIO_USERSPACE)); 3448 } 3449 3450 int 3451 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3452 enum uio_seg pathseg) 3453 { 3454 struct mount *mp = NULL; 3455 struct vnode *tvp, *fvp, *tdvp; 3456 struct nameidata fromnd, tond; 3457 cap_rights_t rights; 3458 int error; 3459 3460 again: 3461 bwillwrite(); 3462 #ifdef MAC 3463 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3464 AUDITVNODE1, pathseg, old, oldfd, 3465 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3466 #else 3467 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3468 pathseg, old, oldfd, 3469 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3470 #endif 3471 3472 if ((error = namei(&fromnd)) != 0) 3473 return (error); 3474 #ifdef MAC 3475 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3476 fromnd.ni_vp, &fromnd.ni_cnd); 3477 VOP_UNLOCK(fromnd.ni_dvp, 0); 3478 if (fromnd.ni_dvp != fromnd.ni_vp) 3479 VOP_UNLOCK(fromnd.ni_vp, 0); 3480 #endif 3481 fvp = fromnd.ni_vp; 3482 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3483 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3484 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3485 if (fromnd.ni_vp->v_type == VDIR) 3486 tond.ni_cnd.cn_flags |= WILLBEDIR; 3487 if ((error = namei(&tond)) != 0) { 3488 /* Translate error code for rename("dir1", "dir2/."). */ 3489 if (error == EISDIR && fvp->v_type == VDIR) 3490 error = EINVAL; 3491 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3492 vrele(fromnd.ni_dvp); 3493 vrele(fvp); 3494 goto out1; 3495 } 3496 tdvp = tond.ni_dvp; 3497 tvp = tond.ni_vp; 3498 error = vn_start_write(fvp, &mp, V_NOWAIT); 3499 if (error != 0) { 3500 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3501 NDFREE(&tond, NDF_ONLY_PNBUF); 3502 if (tvp != NULL) 3503 vput(tvp); 3504 if (tdvp == tvp) 3505 vrele(tdvp); 3506 else 3507 vput(tdvp); 3508 vrele(fromnd.ni_dvp); 3509 vrele(fvp); 3510 vrele(tond.ni_startdir); 3511 if (fromnd.ni_startdir != NULL) 3512 vrele(fromnd.ni_startdir); 3513 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3514 if (error != 0) 3515 return (error); 3516 goto again; 3517 } 3518 if (tvp != NULL) { 3519 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3520 error = ENOTDIR; 3521 goto out; 3522 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3523 error = EISDIR; 3524 goto out; 3525 } 3526 #ifdef CAPABILITIES 3527 if (newfd != AT_FDCWD) { 3528 /* 3529 * If the target already exists we require CAP_UNLINKAT 3530 * from 'newfd'. 3531 */ 3532 error = cap_check(&tond.ni_filecaps.fc_rights, 3533 cap_rights_init(&rights, CAP_UNLINKAT)); 3534 if (error != 0) 3535 goto out; 3536 } 3537 #endif 3538 } 3539 if (fvp == tdvp) { 3540 error = EINVAL; 3541 goto out; 3542 } 3543 /* 3544 * If the source is the same as the destination (that is, if they 3545 * are links to the same vnode), then there is nothing to do. 3546 */ 3547 if (fvp == tvp) 3548 error = -1; 3549 #ifdef MAC 3550 else 3551 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3552 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3553 #endif 3554 out: 3555 if (error == 0) { 3556 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3557 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3558 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3559 NDFREE(&tond, NDF_ONLY_PNBUF); 3560 } else { 3561 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3562 NDFREE(&tond, NDF_ONLY_PNBUF); 3563 if (tvp != NULL) 3564 vput(tvp); 3565 if (tdvp == tvp) 3566 vrele(tdvp); 3567 else 3568 vput(tdvp); 3569 vrele(fromnd.ni_dvp); 3570 vrele(fvp); 3571 } 3572 vrele(tond.ni_startdir); 3573 vn_finished_write(mp); 3574 out1: 3575 if (fromnd.ni_startdir) 3576 vrele(fromnd.ni_startdir); 3577 if (error == -1) 3578 return (0); 3579 return (error); 3580 } 3581 3582 /* 3583 * Make a directory file. 3584 */ 3585 #ifndef _SYS_SYSPROTO_H_ 3586 struct mkdir_args { 3587 char *path; 3588 int mode; 3589 }; 3590 #endif 3591 int 3592 sys_mkdir(td, uap) 3593 struct thread *td; 3594 register struct mkdir_args /* { 3595 char *path; 3596 int mode; 3597 } */ *uap; 3598 { 3599 3600 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3601 uap->mode)); 3602 } 3603 3604 #ifndef _SYS_SYSPROTO_H_ 3605 struct mkdirat_args { 3606 int fd; 3607 char *path; 3608 mode_t mode; 3609 }; 3610 #endif 3611 int 3612 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3613 { 3614 3615 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3616 } 3617 3618 int 3619 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3620 int mode) 3621 { 3622 struct mount *mp; 3623 struct vnode *vp; 3624 struct vattr vattr; 3625 struct nameidata nd; 3626 cap_rights_t rights; 3627 int error; 3628 3629 AUDIT_ARG_MODE(mode); 3630 restart: 3631 bwillwrite(); 3632 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3633 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3634 td); 3635 nd.ni_cnd.cn_flags |= WILLBEDIR; 3636 if ((error = namei(&nd)) != 0) 3637 return (error); 3638 vp = nd.ni_vp; 3639 if (vp != NULL) { 3640 NDFREE(&nd, NDF_ONLY_PNBUF); 3641 /* 3642 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3643 * the strange behaviour of leaving the vnode unlocked 3644 * if the target is the same vnode as the parent. 3645 */ 3646 if (vp == nd.ni_dvp) 3647 vrele(nd.ni_dvp); 3648 else 3649 vput(nd.ni_dvp); 3650 vrele(vp); 3651 return (EEXIST); 3652 } 3653 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3654 NDFREE(&nd, NDF_ONLY_PNBUF); 3655 vput(nd.ni_dvp); 3656 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3657 return (error); 3658 goto restart; 3659 } 3660 VATTR_NULL(&vattr); 3661 vattr.va_type = VDIR; 3662 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3663 #ifdef MAC 3664 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3665 &vattr); 3666 if (error != 0) 3667 goto out; 3668 #endif 3669 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3670 #ifdef MAC 3671 out: 3672 #endif 3673 NDFREE(&nd, NDF_ONLY_PNBUF); 3674 vput(nd.ni_dvp); 3675 if (error == 0) 3676 vput(nd.ni_vp); 3677 vn_finished_write(mp); 3678 return (error); 3679 } 3680 3681 /* 3682 * Remove a directory file. 3683 */ 3684 #ifndef _SYS_SYSPROTO_H_ 3685 struct rmdir_args { 3686 char *path; 3687 }; 3688 #endif 3689 int 3690 sys_rmdir(td, uap) 3691 struct thread *td; 3692 struct rmdir_args /* { 3693 char *path; 3694 } */ *uap; 3695 { 3696 3697 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3698 } 3699 3700 int 3701 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3702 { 3703 struct mount *mp; 3704 struct vnode *vp; 3705 struct nameidata nd; 3706 cap_rights_t rights; 3707 int error; 3708 3709 restart: 3710 bwillwrite(); 3711 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3712 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3713 if ((error = namei(&nd)) != 0) 3714 return (error); 3715 vp = nd.ni_vp; 3716 if (vp->v_type != VDIR) { 3717 error = ENOTDIR; 3718 goto out; 3719 } 3720 /* 3721 * No rmdir "." please. 3722 */ 3723 if (nd.ni_dvp == vp) { 3724 error = EINVAL; 3725 goto out; 3726 } 3727 /* 3728 * The root of a mounted filesystem cannot be deleted. 3729 */ 3730 if (vp->v_vflag & VV_ROOT) { 3731 error = EBUSY; 3732 goto out; 3733 } 3734 #ifdef MAC 3735 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3736 &nd.ni_cnd); 3737 if (error != 0) 3738 goto out; 3739 #endif 3740 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3741 NDFREE(&nd, NDF_ONLY_PNBUF); 3742 vput(vp); 3743 if (nd.ni_dvp == vp) 3744 vrele(nd.ni_dvp); 3745 else 3746 vput(nd.ni_dvp); 3747 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3748 return (error); 3749 goto restart; 3750 } 3751 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3752 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3753 vn_finished_write(mp); 3754 out: 3755 NDFREE(&nd, NDF_ONLY_PNBUF); 3756 vput(vp); 3757 if (nd.ni_dvp == vp) 3758 vrele(nd.ni_dvp); 3759 else 3760 vput(nd.ni_dvp); 3761 return (error); 3762 } 3763 3764 #ifdef COMPAT_43 3765 /* 3766 * Read a block of directory entries in a filesystem independent format. 3767 */ 3768 #ifndef _SYS_SYSPROTO_H_ 3769 struct ogetdirentries_args { 3770 int fd; 3771 char *buf; 3772 u_int count; 3773 long *basep; 3774 }; 3775 #endif 3776 int 3777 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3778 { 3779 long loff; 3780 int error; 3781 3782 error = kern_ogetdirentries(td, uap, &loff); 3783 if (error == 0) 3784 error = copyout(&loff, uap->basep, sizeof(long)); 3785 return (error); 3786 } 3787 3788 int 3789 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3790 long *ploff) 3791 { 3792 struct vnode *vp; 3793 struct file *fp; 3794 struct uio auio, kuio; 3795 struct iovec aiov, kiov; 3796 struct dirent *dp, *edp; 3797 cap_rights_t rights; 3798 caddr_t dirbuf; 3799 int error, eofflag, readcnt; 3800 long loff; 3801 off_t foffset; 3802 3803 /* XXX arbitrary sanity limit on `count'. */ 3804 if (uap->count > 64 * 1024) 3805 return (EINVAL); 3806 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3807 if (error != 0) 3808 return (error); 3809 if ((fp->f_flag & FREAD) == 0) { 3810 fdrop(fp, td); 3811 return (EBADF); 3812 } 3813 vp = fp->f_vnode; 3814 foffset = foffset_lock(fp, 0); 3815 unionread: 3816 if (vp->v_type != VDIR) { 3817 foffset_unlock(fp, foffset, 0); 3818 fdrop(fp, td); 3819 return (EINVAL); 3820 } 3821 aiov.iov_base = uap->buf; 3822 aiov.iov_len = uap->count; 3823 auio.uio_iov = &aiov; 3824 auio.uio_iovcnt = 1; 3825 auio.uio_rw = UIO_READ; 3826 auio.uio_segflg = UIO_USERSPACE; 3827 auio.uio_td = td; 3828 auio.uio_resid = uap->count; 3829 vn_lock(vp, LK_SHARED | LK_RETRY); 3830 loff = auio.uio_offset = foffset; 3831 #ifdef MAC 3832 error = mac_vnode_check_readdir(td->td_ucred, vp); 3833 if (error != 0) { 3834 VOP_UNLOCK(vp, 0); 3835 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3836 fdrop(fp, td); 3837 return (error); 3838 } 3839 #endif 3840 # if (BYTE_ORDER != LITTLE_ENDIAN) 3841 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3842 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3843 NULL, NULL); 3844 foffset = auio.uio_offset; 3845 } else 3846 # endif 3847 { 3848 kuio = auio; 3849 kuio.uio_iov = &kiov; 3850 kuio.uio_segflg = UIO_SYSSPACE; 3851 kiov.iov_len = uap->count; 3852 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3853 kiov.iov_base = dirbuf; 3854 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3855 NULL, NULL); 3856 foffset = kuio.uio_offset; 3857 if (error == 0) { 3858 readcnt = uap->count - kuio.uio_resid; 3859 edp = (struct dirent *)&dirbuf[readcnt]; 3860 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3861 # if (BYTE_ORDER == LITTLE_ENDIAN) 3862 /* 3863 * The expected low byte of 3864 * dp->d_namlen is our dp->d_type. 3865 * The high MBZ byte of dp->d_namlen 3866 * is our dp->d_namlen. 3867 */ 3868 dp->d_type = dp->d_namlen; 3869 dp->d_namlen = 0; 3870 # else 3871 /* 3872 * The dp->d_type is the high byte 3873 * of the expected dp->d_namlen, 3874 * so must be zero'ed. 3875 */ 3876 dp->d_type = 0; 3877 # endif 3878 if (dp->d_reclen > 0) { 3879 dp = (struct dirent *) 3880 ((char *)dp + dp->d_reclen); 3881 } else { 3882 error = EIO; 3883 break; 3884 } 3885 } 3886 if (dp >= edp) 3887 error = uiomove(dirbuf, readcnt, &auio); 3888 } 3889 free(dirbuf, M_TEMP); 3890 } 3891 if (error != 0) { 3892 VOP_UNLOCK(vp, 0); 3893 foffset_unlock(fp, foffset, 0); 3894 fdrop(fp, td); 3895 return (error); 3896 } 3897 if (uap->count == auio.uio_resid && 3898 (vp->v_vflag & VV_ROOT) && 3899 (vp->v_mount->mnt_flag & MNT_UNION)) { 3900 struct vnode *tvp = vp; 3901 vp = vp->v_mount->mnt_vnodecovered; 3902 VREF(vp); 3903 fp->f_vnode = vp; 3904 fp->f_data = vp; 3905 foffset = 0; 3906 vput(tvp); 3907 goto unionread; 3908 } 3909 VOP_UNLOCK(vp, 0); 3910 foffset_unlock(fp, foffset, 0); 3911 fdrop(fp, td); 3912 td->td_retval[0] = uap->count - auio.uio_resid; 3913 if (error == 0) 3914 *ploff = loff; 3915 return (error); 3916 } 3917 #endif /* COMPAT_43 */ 3918 3919 /* 3920 * Read a block of directory entries in a filesystem independent format. 3921 */ 3922 #ifndef _SYS_SYSPROTO_H_ 3923 struct getdirentries_args { 3924 int fd; 3925 char *buf; 3926 u_int count; 3927 long *basep; 3928 }; 3929 #endif 3930 int 3931 sys_getdirentries(td, uap) 3932 struct thread *td; 3933 register struct getdirentries_args /* { 3934 int fd; 3935 char *buf; 3936 u_int count; 3937 long *basep; 3938 } */ *uap; 3939 { 3940 long base; 3941 int error; 3942 3943 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3944 NULL, UIO_USERSPACE); 3945 if (error != 0) 3946 return (error); 3947 if (uap->basep != NULL) 3948 error = copyout(&base, uap->basep, sizeof(long)); 3949 return (error); 3950 } 3951 3952 int 3953 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3954 long *basep, ssize_t *residp, enum uio_seg bufseg) 3955 { 3956 struct vnode *vp; 3957 struct file *fp; 3958 struct uio auio; 3959 struct iovec aiov; 3960 cap_rights_t rights; 3961 long loff; 3962 int error, eofflag; 3963 off_t foffset; 3964 3965 AUDIT_ARG_FD(fd); 3966 if (count > IOSIZE_MAX) 3967 return (EINVAL); 3968 auio.uio_resid = count; 3969 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3970 if (error != 0) 3971 return (error); 3972 if ((fp->f_flag & FREAD) == 0) { 3973 fdrop(fp, td); 3974 return (EBADF); 3975 } 3976 vp = fp->f_vnode; 3977 foffset = foffset_lock(fp, 0); 3978 unionread: 3979 if (vp->v_type != VDIR) { 3980 error = EINVAL; 3981 goto fail; 3982 } 3983 aiov.iov_base = buf; 3984 aiov.iov_len = count; 3985 auio.uio_iov = &aiov; 3986 auio.uio_iovcnt = 1; 3987 auio.uio_rw = UIO_READ; 3988 auio.uio_segflg = bufseg; 3989 auio.uio_td = td; 3990 vn_lock(vp, LK_SHARED | LK_RETRY); 3991 AUDIT_ARG_VNODE1(vp); 3992 loff = auio.uio_offset = foffset; 3993 #ifdef MAC 3994 error = mac_vnode_check_readdir(td->td_ucred, vp); 3995 if (error == 0) 3996 #endif 3997 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3998 NULL); 3999 foffset = auio.uio_offset; 4000 if (error != 0) { 4001 VOP_UNLOCK(vp, 0); 4002 goto fail; 4003 } 4004 if (count == auio.uio_resid && 4005 (vp->v_vflag & VV_ROOT) && 4006 (vp->v_mount->mnt_flag & MNT_UNION)) { 4007 struct vnode *tvp = vp; 4008 4009 vp = vp->v_mount->mnt_vnodecovered; 4010 VREF(vp); 4011 fp->f_vnode = vp; 4012 fp->f_data = vp; 4013 foffset = 0; 4014 vput(tvp); 4015 goto unionread; 4016 } 4017 VOP_UNLOCK(vp, 0); 4018 *basep = loff; 4019 if (residp != NULL) 4020 *residp = auio.uio_resid; 4021 td->td_retval[0] = count - auio.uio_resid; 4022 fail: 4023 foffset_unlock(fp, foffset, 0); 4024 fdrop(fp, td); 4025 return (error); 4026 } 4027 4028 #ifndef _SYS_SYSPROTO_H_ 4029 struct getdents_args { 4030 int fd; 4031 char *buf; 4032 size_t count; 4033 }; 4034 #endif 4035 int 4036 sys_getdents(td, uap) 4037 struct thread *td; 4038 register struct getdents_args /* { 4039 int fd; 4040 char *buf; 4041 u_int count; 4042 } */ *uap; 4043 { 4044 struct getdirentries_args ap; 4045 4046 ap.fd = uap->fd; 4047 ap.buf = uap->buf; 4048 ap.count = uap->count; 4049 ap.basep = NULL; 4050 return (sys_getdirentries(td, &ap)); 4051 } 4052 4053 /* 4054 * Set the mode mask for creation of filesystem nodes. 4055 */ 4056 #ifndef _SYS_SYSPROTO_H_ 4057 struct umask_args { 4058 int newmask; 4059 }; 4060 #endif 4061 int 4062 sys_umask(td, uap) 4063 struct thread *td; 4064 struct umask_args /* { 4065 int newmask; 4066 } */ *uap; 4067 { 4068 struct filedesc *fdp; 4069 4070 fdp = td->td_proc->p_fd; 4071 FILEDESC_XLOCK(fdp); 4072 td->td_retval[0] = fdp->fd_cmask; 4073 fdp->fd_cmask = uap->newmask & ALLPERMS; 4074 FILEDESC_XUNLOCK(fdp); 4075 return (0); 4076 } 4077 4078 /* 4079 * Void all references to file by ripping underlying filesystem away from 4080 * vnode. 4081 */ 4082 #ifndef _SYS_SYSPROTO_H_ 4083 struct revoke_args { 4084 char *path; 4085 }; 4086 #endif 4087 int 4088 sys_revoke(td, uap) 4089 struct thread *td; 4090 register struct revoke_args /* { 4091 char *path; 4092 } */ *uap; 4093 { 4094 struct vnode *vp; 4095 struct vattr vattr; 4096 struct nameidata nd; 4097 int error; 4098 4099 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4100 uap->path, td); 4101 if ((error = namei(&nd)) != 0) 4102 return (error); 4103 vp = nd.ni_vp; 4104 NDFREE(&nd, NDF_ONLY_PNBUF); 4105 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4106 error = EINVAL; 4107 goto out; 4108 } 4109 #ifdef MAC 4110 error = mac_vnode_check_revoke(td->td_ucred, vp); 4111 if (error != 0) 4112 goto out; 4113 #endif 4114 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4115 if (error != 0) 4116 goto out; 4117 if (td->td_ucred->cr_uid != vattr.va_uid) { 4118 error = priv_check(td, PRIV_VFS_ADMIN); 4119 if (error != 0) 4120 goto out; 4121 } 4122 if (vcount(vp) > 1) 4123 VOP_REVOKE(vp, REVOKEALL); 4124 out: 4125 vput(vp); 4126 return (error); 4127 } 4128 4129 /* 4130 * Convert a user file descriptor to a kernel file entry and check that, if it 4131 * is a capability, the correct rights are present. A reference on the file 4132 * entry is held upon returning. 4133 */ 4134 int 4135 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4136 { 4137 struct file *fp; 4138 int error; 4139 4140 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4141 if (error != 0) 4142 return (error); 4143 4144 /* 4145 * The file could be not of the vnode type, or it may be not 4146 * yet fully initialized, in which case the f_vnode pointer 4147 * may be set, but f_ops is still badfileops. E.g., 4148 * devfs_open() transiently create such situation to 4149 * facilitate csw d_fdopen(). 4150 * 4151 * Dupfdopen() handling in kern_openat() installs the 4152 * half-baked file into the process descriptor table, allowing 4153 * other thread to dereference it. Guard against the race by 4154 * checking f_ops. 4155 */ 4156 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4157 fdrop(fp, td); 4158 return (EINVAL); 4159 } 4160 *fpp = fp; 4161 return (0); 4162 } 4163 4164 4165 /* 4166 * Get an (NFS) file handle. 4167 */ 4168 #ifndef _SYS_SYSPROTO_H_ 4169 struct lgetfh_args { 4170 char *fname; 4171 fhandle_t *fhp; 4172 }; 4173 #endif 4174 int 4175 sys_lgetfh(td, uap) 4176 struct thread *td; 4177 register struct lgetfh_args *uap; 4178 { 4179 struct nameidata nd; 4180 fhandle_t fh; 4181 register struct vnode *vp; 4182 int error; 4183 4184 error = priv_check(td, PRIV_VFS_GETFH); 4185 if (error != 0) 4186 return (error); 4187 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4188 uap->fname, td); 4189 error = namei(&nd); 4190 if (error != 0) 4191 return (error); 4192 NDFREE(&nd, NDF_ONLY_PNBUF); 4193 vp = nd.ni_vp; 4194 bzero(&fh, sizeof(fh)); 4195 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4196 error = VOP_VPTOFH(vp, &fh.fh_fid); 4197 vput(vp); 4198 if (error == 0) 4199 error = copyout(&fh, uap->fhp, sizeof (fh)); 4200 return (error); 4201 } 4202 4203 #ifndef _SYS_SYSPROTO_H_ 4204 struct getfh_args { 4205 char *fname; 4206 fhandle_t *fhp; 4207 }; 4208 #endif 4209 int 4210 sys_getfh(td, uap) 4211 struct thread *td; 4212 register struct getfh_args *uap; 4213 { 4214 struct nameidata nd; 4215 fhandle_t fh; 4216 register struct vnode *vp; 4217 int error; 4218 4219 error = priv_check(td, PRIV_VFS_GETFH); 4220 if (error != 0) 4221 return (error); 4222 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4223 uap->fname, td); 4224 error = namei(&nd); 4225 if (error != 0) 4226 return (error); 4227 NDFREE(&nd, NDF_ONLY_PNBUF); 4228 vp = nd.ni_vp; 4229 bzero(&fh, sizeof(fh)); 4230 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4231 error = VOP_VPTOFH(vp, &fh.fh_fid); 4232 vput(vp); 4233 if (error == 0) 4234 error = copyout(&fh, uap->fhp, sizeof (fh)); 4235 return (error); 4236 } 4237 4238 /* 4239 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4240 * open descriptor. 4241 * 4242 * warning: do not remove the priv_check() call or this becomes one giant 4243 * security hole. 4244 */ 4245 #ifndef _SYS_SYSPROTO_H_ 4246 struct fhopen_args { 4247 const struct fhandle *u_fhp; 4248 int flags; 4249 }; 4250 #endif 4251 int 4252 sys_fhopen(td, uap) 4253 struct thread *td; 4254 struct fhopen_args /* { 4255 const struct fhandle *u_fhp; 4256 int flags; 4257 } */ *uap; 4258 { 4259 struct mount *mp; 4260 struct vnode *vp; 4261 struct fhandle fhp; 4262 struct file *fp; 4263 int fmode, error; 4264 int indx; 4265 4266 error = priv_check(td, PRIV_VFS_FHOPEN); 4267 if (error != 0) 4268 return (error); 4269 indx = -1; 4270 fmode = FFLAGS(uap->flags); 4271 /* why not allow a non-read/write open for our lockd? */ 4272 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4273 return (EINVAL); 4274 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4275 if (error != 0) 4276 return(error); 4277 /* find the mount point */ 4278 mp = vfs_busyfs(&fhp.fh_fsid); 4279 if (mp == NULL) 4280 return (ESTALE); 4281 /* now give me my vnode, it gets returned to me locked */ 4282 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4283 vfs_unbusy(mp); 4284 if (error != 0) 4285 return (error); 4286 4287 error = falloc_noinstall(td, &fp); 4288 if (error != 0) { 4289 vput(vp); 4290 return (error); 4291 } 4292 /* 4293 * An extra reference on `fp' has been held for us by 4294 * falloc_noinstall(). 4295 */ 4296 4297 #ifdef INVARIANTS 4298 td->td_dupfd = -1; 4299 #endif 4300 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4301 if (error != 0) { 4302 KASSERT(fp->f_ops == &badfileops, 4303 ("VOP_OPEN in fhopen() set f_ops")); 4304 KASSERT(td->td_dupfd < 0, 4305 ("fhopen() encountered fdopen()")); 4306 4307 vput(vp); 4308 goto bad; 4309 } 4310 #ifdef INVARIANTS 4311 td->td_dupfd = 0; 4312 #endif 4313 fp->f_vnode = vp; 4314 fp->f_seqcount = 1; 4315 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4316 &vnops); 4317 VOP_UNLOCK(vp, 0); 4318 if ((fmode & O_TRUNC) != 0) { 4319 error = fo_truncate(fp, 0, td->td_ucred, td); 4320 if (error != 0) 4321 goto bad; 4322 } 4323 4324 error = finstall(td, fp, &indx, fmode, NULL); 4325 bad: 4326 fdrop(fp, td); 4327 td->td_retval[0] = indx; 4328 return (error); 4329 } 4330 4331 /* 4332 * Stat an (NFS) file handle. 4333 */ 4334 #ifndef _SYS_SYSPROTO_H_ 4335 struct fhstat_args { 4336 struct fhandle *u_fhp; 4337 struct stat *sb; 4338 }; 4339 #endif 4340 int 4341 sys_fhstat(td, uap) 4342 struct thread *td; 4343 register struct fhstat_args /* { 4344 struct fhandle *u_fhp; 4345 struct stat *sb; 4346 } */ *uap; 4347 { 4348 struct stat sb; 4349 struct fhandle fh; 4350 int error; 4351 4352 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4353 if (error != 0) 4354 return (error); 4355 error = kern_fhstat(td, fh, &sb); 4356 if (error == 0) 4357 error = copyout(&sb, uap->sb, sizeof(sb)); 4358 return (error); 4359 } 4360 4361 int 4362 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4363 { 4364 struct mount *mp; 4365 struct vnode *vp; 4366 int error; 4367 4368 error = priv_check(td, PRIV_VFS_FHSTAT); 4369 if (error != 0) 4370 return (error); 4371 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4372 return (ESTALE); 4373 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4374 vfs_unbusy(mp); 4375 if (error != 0) 4376 return (error); 4377 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4378 vput(vp); 4379 return (error); 4380 } 4381 4382 /* 4383 * Implement fstatfs() for (NFS) file handles. 4384 */ 4385 #ifndef _SYS_SYSPROTO_H_ 4386 struct fhstatfs_args { 4387 struct fhandle *u_fhp; 4388 struct statfs *buf; 4389 }; 4390 #endif 4391 int 4392 sys_fhstatfs(td, uap) 4393 struct thread *td; 4394 struct fhstatfs_args /* { 4395 struct fhandle *u_fhp; 4396 struct statfs *buf; 4397 } */ *uap; 4398 { 4399 struct statfs sf; 4400 fhandle_t fh; 4401 int error; 4402 4403 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4404 if (error != 0) 4405 return (error); 4406 error = kern_fhstatfs(td, fh, &sf); 4407 if (error != 0) 4408 return (error); 4409 return (copyout(&sf, uap->buf, sizeof(sf))); 4410 } 4411 4412 int 4413 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4414 { 4415 struct statfs *sp; 4416 struct mount *mp; 4417 struct vnode *vp; 4418 int error; 4419 4420 error = priv_check(td, PRIV_VFS_FHSTATFS); 4421 if (error != 0) 4422 return (error); 4423 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4424 return (ESTALE); 4425 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4426 if (error != 0) { 4427 vfs_unbusy(mp); 4428 return (error); 4429 } 4430 vput(vp); 4431 error = prison_canseemount(td->td_ucred, mp); 4432 if (error != 0) 4433 goto out; 4434 #ifdef MAC 4435 error = mac_mount_check_stat(td->td_ucred, mp); 4436 if (error != 0) 4437 goto out; 4438 #endif 4439 /* 4440 * Set these in case the underlying filesystem fails to do so. 4441 */ 4442 sp = &mp->mnt_stat; 4443 sp->f_version = STATFS_VERSION; 4444 sp->f_namemax = NAME_MAX; 4445 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4446 error = VFS_STATFS(mp, sp); 4447 if (error == 0) 4448 *buf = *sp; 4449 out: 4450 vfs_unbusy(mp); 4451 return (error); 4452 } 4453 4454 int 4455 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4456 { 4457 struct file *fp; 4458 struct mount *mp; 4459 struct vnode *vp; 4460 cap_rights_t rights; 4461 off_t olen, ooffset; 4462 int error; 4463 4464 if (offset < 0 || len <= 0) 4465 return (EINVAL); 4466 /* Check for wrap. */ 4467 if (offset > OFF_MAX - len) 4468 return (EFBIG); 4469 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4470 if (error != 0) 4471 return (error); 4472 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4473 error = ESPIPE; 4474 goto out; 4475 } 4476 if ((fp->f_flag & FWRITE) == 0) { 4477 error = EBADF; 4478 goto out; 4479 } 4480 if (fp->f_type != DTYPE_VNODE) { 4481 error = ENODEV; 4482 goto out; 4483 } 4484 vp = fp->f_vnode; 4485 if (vp->v_type != VREG) { 4486 error = ENODEV; 4487 goto out; 4488 } 4489 4490 /* Allocating blocks may take a long time, so iterate. */ 4491 for (;;) { 4492 olen = len; 4493 ooffset = offset; 4494 4495 bwillwrite(); 4496 mp = NULL; 4497 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4498 if (error != 0) 4499 break; 4500 error = vn_lock(vp, LK_EXCLUSIVE); 4501 if (error != 0) { 4502 vn_finished_write(mp); 4503 break; 4504 } 4505 #ifdef MAC 4506 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4507 if (error == 0) 4508 #endif 4509 error = VOP_ALLOCATE(vp, &offset, &len); 4510 VOP_UNLOCK(vp, 0); 4511 vn_finished_write(mp); 4512 4513 if (olen + ooffset != offset + len) { 4514 panic("offset + len changed from %jx/%jx to %jx/%jx", 4515 ooffset, olen, offset, len); 4516 } 4517 if (error != 0 || len == 0) 4518 break; 4519 KASSERT(olen > len, ("Iteration did not make progress?")); 4520 maybe_yield(); 4521 } 4522 out: 4523 fdrop(fp, td); 4524 return (error); 4525 } 4526 4527 int 4528 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4529 { 4530 4531 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4532 uap->len); 4533 return (0); 4534 } 4535 4536 /* 4537 * Unlike madvise(2), we do not make a best effort to remember every 4538 * possible caching hint. Instead, we remember the last setting with 4539 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4540 * region of any current setting. 4541 */ 4542 int 4543 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4544 int advice) 4545 { 4546 struct fadvise_info *fa, *new; 4547 struct file *fp; 4548 struct vnode *vp; 4549 cap_rights_t rights; 4550 off_t end; 4551 int error; 4552 4553 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4554 return (EINVAL); 4555 switch (advice) { 4556 case POSIX_FADV_SEQUENTIAL: 4557 case POSIX_FADV_RANDOM: 4558 case POSIX_FADV_NOREUSE: 4559 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4560 break; 4561 case POSIX_FADV_NORMAL: 4562 case POSIX_FADV_WILLNEED: 4563 case POSIX_FADV_DONTNEED: 4564 new = NULL; 4565 break; 4566 default: 4567 return (EINVAL); 4568 } 4569 /* XXX: CAP_POSIX_FADVISE? */ 4570 error = fget(td, fd, cap_rights_init(&rights), &fp); 4571 if (error != 0) 4572 goto out; 4573 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4574 error = ESPIPE; 4575 goto out; 4576 } 4577 if (fp->f_type != DTYPE_VNODE) { 4578 error = ENODEV; 4579 goto out; 4580 } 4581 vp = fp->f_vnode; 4582 if (vp->v_type != VREG) { 4583 error = ENODEV; 4584 goto out; 4585 } 4586 if (len == 0) 4587 end = OFF_MAX; 4588 else 4589 end = offset + len - 1; 4590 switch (advice) { 4591 case POSIX_FADV_SEQUENTIAL: 4592 case POSIX_FADV_RANDOM: 4593 case POSIX_FADV_NOREUSE: 4594 /* 4595 * Try to merge any existing non-standard region with 4596 * this new region if possible, otherwise create a new 4597 * non-standard region for this request. 4598 */ 4599 mtx_pool_lock(mtxpool_sleep, fp); 4600 fa = fp->f_advice; 4601 if (fa != NULL && fa->fa_advice == advice && 4602 ((fa->fa_start <= end && fa->fa_end >= offset) || 4603 (end != OFF_MAX && fa->fa_start == end + 1) || 4604 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4605 if (offset < fa->fa_start) 4606 fa->fa_start = offset; 4607 if (end > fa->fa_end) 4608 fa->fa_end = end; 4609 } else { 4610 new->fa_advice = advice; 4611 new->fa_start = offset; 4612 new->fa_end = end; 4613 new->fa_prevstart = 0; 4614 new->fa_prevend = 0; 4615 fp->f_advice = new; 4616 new = fa; 4617 } 4618 mtx_pool_unlock(mtxpool_sleep, fp); 4619 break; 4620 case POSIX_FADV_NORMAL: 4621 /* 4622 * If a the "normal" region overlaps with an existing 4623 * non-standard region, trim or remove the 4624 * non-standard region. 4625 */ 4626 mtx_pool_lock(mtxpool_sleep, fp); 4627 fa = fp->f_advice; 4628 if (fa != NULL) { 4629 if (offset <= fa->fa_start && end >= fa->fa_end) { 4630 new = fa; 4631 fp->f_advice = NULL; 4632 } else if (offset <= fa->fa_start && 4633 end >= fa->fa_start) 4634 fa->fa_start = end + 1; 4635 else if (offset <= fa->fa_end && end >= fa->fa_end) 4636 fa->fa_end = offset - 1; 4637 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4638 /* 4639 * If the "normal" region is a middle 4640 * portion of the existing 4641 * non-standard region, just remove 4642 * the whole thing rather than picking 4643 * one side or the other to 4644 * preserve. 4645 */ 4646 new = fa; 4647 fp->f_advice = NULL; 4648 } 4649 } 4650 mtx_pool_unlock(mtxpool_sleep, fp); 4651 break; 4652 case POSIX_FADV_WILLNEED: 4653 case POSIX_FADV_DONTNEED: 4654 error = VOP_ADVISE(vp, offset, end, advice); 4655 break; 4656 } 4657 out: 4658 if (fp != NULL) 4659 fdrop(fp, td); 4660 free(new, M_FADVISE); 4661 return (error); 4662 } 4663 4664 int 4665 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4666 { 4667 4668 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4669 uap->len, uap->advice); 4670 return (0); 4671 } 4672