1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_kdtrace.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capability.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/rwlock.h> 65 #include <sys/sdt.h> 66 #include <sys/stat.h> 67 #include <sys/sx.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/dirent.h> 73 #include <sys/jail.h> 74 #include <sys/syscallsubr.h> 75 #include <sys/sysctl.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <machine/stdarg.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/uma.h> 89 90 #include <ufs/ufs/quota.h> 91 92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93 94 SDT_PROVIDER_DEFINE(vfs); 95 SDT_PROBE_DEFINE2(vfs, , stat, mode, mode, "char *", "int"); 96 SDT_PROBE_DEFINE2(vfs, , stat, reg, reg, "char *", "int"); 97 98 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 100 static int kern_chflags(struct thread *td, const char *path, 101 enum uio_seg pathseg, u_long flags); 102 static int kern_chflagsat(struct thread *td, int fd, const char *path, 103 enum uio_seg pathseg, u_long flags, int atflag); 104 static int setfflags(struct thread *td, struct vnode *, u_long); 105 static int setutimes(struct thread *td, struct vnode *, 106 const struct timespec *, int, int); 107 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 108 struct thread *td); 109 110 /* 111 * The module initialization routine for POSIX asynchronous I/O will 112 * set this to the version of AIO that it implements. (Zero means 113 * that it is not implemented.) This value is used here by pathconf() 114 * and in kern_descrip.c by fpathconf(). 115 */ 116 int async_io_version; 117 118 #ifdef DEBUG 119 static int syncprt = 0; 120 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 121 #endif 122 123 /* 124 * Sync each mounted filesystem. 125 */ 126 #ifndef _SYS_SYSPROTO_H_ 127 struct sync_args { 128 int dummy; 129 }; 130 #endif 131 /* ARGSUSED */ 132 int 133 sys_sync(td, uap) 134 struct thread *td; 135 struct sync_args *uap; 136 { 137 struct mount *mp, *nmp; 138 int save; 139 140 mtx_lock(&mountlist_mtx); 141 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 142 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 143 nmp = TAILQ_NEXT(mp, mnt_list); 144 continue; 145 } 146 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 147 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 148 save = curthread_pflags_set(TDP_SYNCIO); 149 vfs_msync(mp, MNT_NOWAIT); 150 VFS_SYNC(mp, MNT_NOWAIT); 151 curthread_pflags_restore(save); 152 vn_finished_write(mp); 153 } 154 mtx_lock(&mountlist_mtx); 155 nmp = TAILQ_NEXT(mp, mnt_list); 156 vfs_unbusy(mp); 157 } 158 mtx_unlock(&mountlist_mtx); 159 return (0); 160 } 161 162 /* 163 * Change filesystem quotas. 164 */ 165 #ifndef _SYS_SYSPROTO_H_ 166 struct quotactl_args { 167 char *path; 168 int cmd; 169 int uid; 170 caddr_t arg; 171 }; 172 #endif 173 int 174 sys_quotactl(td, uap) 175 struct thread *td; 176 register struct quotactl_args /* { 177 char *path; 178 int cmd; 179 int uid; 180 caddr_t arg; 181 } */ *uap; 182 { 183 struct mount *mp; 184 int error; 185 struct nameidata nd; 186 187 AUDIT_ARG_CMD(uap->cmd); 188 AUDIT_ARG_UID(uap->uid); 189 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 190 return (EPERM); 191 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 192 uap->path, td); 193 if ((error = namei(&nd)) != 0) 194 return (error); 195 NDFREE(&nd, NDF_ONLY_PNBUF); 196 mp = nd.ni_vp->v_mount; 197 vfs_ref(mp); 198 vput(nd.ni_vp); 199 error = vfs_busy(mp, 0); 200 vfs_rel(mp); 201 if (error) 202 return (error); 203 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 204 205 /* 206 * Since quota on operation typically needs to open quota 207 * file, the Q_QUOTAON handler needs to unbusy the mount point 208 * before calling into namei. Otherwise, unmount might be 209 * started between two vfs_busy() invocations (first is our, 210 * second is from mount point cross-walk code in lookup()), 211 * causing deadlock. 212 * 213 * Require that Q_QUOTAON handles the vfs_busy() reference on 214 * its own, always returning with ubusied mount point. 215 */ 216 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 217 vfs_unbusy(mp); 218 return (error); 219 } 220 221 /* 222 * Used by statfs conversion routines to scale the block size up if 223 * necessary so that all of the block counts are <= 'max_size'. Note 224 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 225 * value of 'n'. 226 */ 227 void 228 statfs_scale_blocks(struct statfs *sf, long max_size) 229 { 230 uint64_t count; 231 int shift; 232 233 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 234 235 /* 236 * Attempt to scale the block counts to give a more accurate 237 * overview to userland of the ratio of free space to used 238 * space. To do this, find the largest block count and compute 239 * a divisor that lets it fit into a signed integer <= max_size. 240 */ 241 if (sf->f_bavail < 0) 242 count = -sf->f_bavail; 243 else 244 count = sf->f_bavail; 245 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 246 if (count <= max_size) 247 return; 248 249 count >>= flsl(max_size); 250 shift = 0; 251 while (count > 0) { 252 shift++; 253 count >>=1; 254 } 255 256 sf->f_bsize <<= shift; 257 sf->f_blocks >>= shift; 258 sf->f_bfree >>= shift; 259 sf->f_bavail >>= shift; 260 } 261 262 /* 263 * Get filesystem statistics. 264 */ 265 #ifndef _SYS_SYSPROTO_H_ 266 struct statfs_args { 267 char *path; 268 struct statfs *buf; 269 }; 270 #endif 271 int 272 sys_statfs(td, uap) 273 struct thread *td; 274 register struct statfs_args /* { 275 char *path; 276 struct statfs *buf; 277 } */ *uap; 278 { 279 struct statfs sf; 280 int error; 281 282 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 283 if (error == 0) 284 error = copyout(&sf, uap->buf, sizeof(sf)); 285 return (error); 286 } 287 288 int 289 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 290 struct statfs *buf) 291 { 292 struct mount *mp; 293 struct statfs *sp, sb; 294 int error; 295 struct nameidata nd; 296 297 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 298 pathseg, path, td); 299 error = namei(&nd); 300 if (error) 301 return (error); 302 mp = nd.ni_vp->v_mount; 303 vfs_ref(mp); 304 NDFREE(&nd, NDF_ONLY_PNBUF); 305 vput(nd.ni_vp); 306 error = vfs_busy(mp, 0); 307 vfs_rel(mp); 308 if (error) 309 return (error); 310 #ifdef MAC 311 error = mac_mount_check_stat(td->td_ucred, mp); 312 if (error) 313 goto out; 314 #endif 315 /* 316 * Set these in case the underlying filesystem fails to do so. 317 */ 318 sp = &mp->mnt_stat; 319 sp->f_version = STATFS_VERSION; 320 sp->f_namemax = NAME_MAX; 321 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 322 error = VFS_STATFS(mp, sp); 323 if (error) 324 goto out; 325 if (priv_check(td, PRIV_VFS_GENERATION)) { 326 bcopy(sp, &sb, sizeof(sb)); 327 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 328 prison_enforce_statfs(td->td_ucred, mp, &sb); 329 sp = &sb; 330 } 331 *buf = *sp; 332 out: 333 vfs_unbusy(mp); 334 return (error); 335 } 336 337 /* 338 * Get filesystem statistics. 339 */ 340 #ifndef _SYS_SYSPROTO_H_ 341 struct fstatfs_args { 342 int fd; 343 struct statfs *buf; 344 }; 345 #endif 346 int 347 sys_fstatfs(td, uap) 348 struct thread *td; 349 register struct fstatfs_args /* { 350 int fd; 351 struct statfs *buf; 352 } */ *uap; 353 { 354 struct statfs sf; 355 int error; 356 357 error = kern_fstatfs(td, uap->fd, &sf); 358 if (error == 0) 359 error = copyout(&sf, uap->buf, sizeof(sf)); 360 return (error); 361 } 362 363 int 364 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 365 { 366 struct file *fp; 367 struct mount *mp; 368 struct statfs *sp, sb; 369 struct vnode *vp; 370 int error; 371 372 AUDIT_ARG_FD(fd); 373 error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp); 374 if (error) 375 return (error); 376 vp = fp->f_vnode; 377 vn_lock(vp, LK_SHARED | LK_RETRY); 378 #ifdef AUDIT 379 AUDIT_ARG_VNODE1(vp); 380 #endif 381 mp = vp->v_mount; 382 if (mp) 383 vfs_ref(mp); 384 VOP_UNLOCK(vp, 0); 385 fdrop(fp, td); 386 if (mp == NULL) { 387 error = EBADF; 388 goto out; 389 } 390 error = vfs_busy(mp, 0); 391 vfs_rel(mp); 392 if (error) 393 return (error); 394 #ifdef MAC 395 error = mac_mount_check_stat(td->td_ucred, mp); 396 if (error) 397 goto out; 398 #endif 399 /* 400 * Set these in case the underlying filesystem fails to do so. 401 */ 402 sp = &mp->mnt_stat; 403 sp->f_version = STATFS_VERSION; 404 sp->f_namemax = NAME_MAX; 405 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 406 error = VFS_STATFS(mp, sp); 407 if (error) 408 goto out; 409 if (priv_check(td, PRIV_VFS_GENERATION)) { 410 bcopy(sp, &sb, sizeof(sb)); 411 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 412 prison_enforce_statfs(td->td_ucred, mp, &sb); 413 sp = &sb; 414 } 415 *buf = *sp; 416 out: 417 if (mp) 418 vfs_unbusy(mp); 419 return (error); 420 } 421 422 /* 423 * Get statistics on all filesystems. 424 */ 425 #ifndef _SYS_SYSPROTO_H_ 426 struct getfsstat_args { 427 struct statfs *buf; 428 long bufsize; 429 int flags; 430 }; 431 #endif 432 int 433 sys_getfsstat(td, uap) 434 struct thread *td; 435 register struct getfsstat_args /* { 436 struct statfs *buf; 437 long bufsize; 438 int flags; 439 } */ *uap; 440 { 441 442 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 443 uap->flags)); 444 } 445 446 /* 447 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 448 * The caller is responsible for freeing memory which will be allocated 449 * in '*buf'. 450 */ 451 int 452 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 453 enum uio_seg bufseg, int flags) 454 { 455 struct mount *mp, *nmp; 456 struct statfs *sfsp, *sp, sb; 457 size_t count, maxcount; 458 int error; 459 460 maxcount = bufsize / sizeof(struct statfs); 461 if (bufsize == 0) 462 sfsp = NULL; 463 else if (bufseg == UIO_USERSPACE) 464 sfsp = *buf; 465 else /* if (bufseg == UIO_SYSSPACE) */ { 466 count = 0; 467 mtx_lock(&mountlist_mtx); 468 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 469 count++; 470 } 471 mtx_unlock(&mountlist_mtx); 472 if (maxcount > count) 473 maxcount = count; 474 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 475 M_WAITOK); 476 } 477 count = 0; 478 mtx_lock(&mountlist_mtx); 479 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 480 if (prison_canseemount(td->td_ucred, mp) != 0) { 481 nmp = TAILQ_NEXT(mp, mnt_list); 482 continue; 483 } 484 #ifdef MAC 485 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 486 nmp = TAILQ_NEXT(mp, mnt_list); 487 continue; 488 } 489 #endif 490 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 491 nmp = TAILQ_NEXT(mp, mnt_list); 492 continue; 493 } 494 if (sfsp && count < maxcount) { 495 sp = &mp->mnt_stat; 496 /* 497 * Set these in case the underlying filesystem 498 * fails to do so. 499 */ 500 sp->f_version = STATFS_VERSION; 501 sp->f_namemax = NAME_MAX; 502 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 503 /* 504 * If MNT_NOWAIT or MNT_LAZY is specified, do not 505 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 506 * overrides MNT_WAIT. 507 */ 508 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 509 (flags & MNT_WAIT)) && 510 (error = VFS_STATFS(mp, sp))) { 511 mtx_lock(&mountlist_mtx); 512 nmp = TAILQ_NEXT(mp, mnt_list); 513 vfs_unbusy(mp); 514 continue; 515 } 516 if (priv_check(td, PRIV_VFS_GENERATION)) { 517 bcopy(sp, &sb, sizeof(sb)); 518 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 519 prison_enforce_statfs(td->td_ucred, mp, &sb); 520 sp = &sb; 521 } 522 if (bufseg == UIO_SYSSPACE) 523 bcopy(sp, sfsp, sizeof(*sp)); 524 else /* if (bufseg == UIO_USERSPACE) */ { 525 error = copyout(sp, sfsp, sizeof(*sp)); 526 if (error) { 527 vfs_unbusy(mp); 528 return (error); 529 } 530 } 531 sfsp++; 532 } 533 count++; 534 mtx_lock(&mountlist_mtx); 535 nmp = TAILQ_NEXT(mp, mnt_list); 536 vfs_unbusy(mp); 537 } 538 mtx_unlock(&mountlist_mtx); 539 if (sfsp && count > maxcount) 540 td->td_retval[0] = maxcount; 541 else 542 td->td_retval[0] = count; 543 return (0); 544 } 545 546 #ifdef COMPAT_FREEBSD4 547 /* 548 * Get old format filesystem statistics. 549 */ 550 static void cvtstatfs(struct statfs *, struct ostatfs *); 551 552 #ifndef _SYS_SYSPROTO_H_ 553 struct freebsd4_statfs_args { 554 char *path; 555 struct ostatfs *buf; 556 }; 557 #endif 558 int 559 freebsd4_statfs(td, uap) 560 struct thread *td; 561 struct freebsd4_statfs_args /* { 562 char *path; 563 struct ostatfs *buf; 564 } */ *uap; 565 { 566 struct ostatfs osb; 567 struct statfs sf; 568 int error; 569 570 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 571 if (error) 572 return (error); 573 cvtstatfs(&sf, &osb); 574 return (copyout(&osb, uap->buf, sizeof(osb))); 575 } 576 577 /* 578 * Get filesystem statistics. 579 */ 580 #ifndef _SYS_SYSPROTO_H_ 581 struct freebsd4_fstatfs_args { 582 int fd; 583 struct ostatfs *buf; 584 }; 585 #endif 586 int 587 freebsd4_fstatfs(td, uap) 588 struct thread *td; 589 struct freebsd4_fstatfs_args /* { 590 int fd; 591 struct ostatfs *buf; 592 } */ *uap; 593 { 594 struct ostatfs osb; 595 struct statfs sf; 596 int error; 597 598 error = kern_fstatfs(td, uap->fd, &sf); 599 if (error) 600 return (error); 601 cvtstatfs(&sf, &osb); 602 return (copyout(&osb, uap->buf, sizeof(osb))); 603 } 604 605 /* 606 * Get statistics on all filesystems. 607 */ 608 #ifndef _SYS_SYSPROTO_H_ 609 struct freebsd4_getfsstat_args { 610 struct ostatfs *buf; 611 long bufsize; 612 int flags; 613 }; 614 #endif 615 int 616 freebsd4_getfsstat(td, uap) 617 struct thread *td; 618 register struct freebsd4_getfsstat_args /* { 619 struct ostatfs *buf; 620 long bufsize; 621 int flags; 622 } */ *uap; 623 { 624 struct statfs *buf, *sp; 625 struct ostatfs osb; 626 size_t count, size; 627 int error; 628 629 count = uap->bufsize / sizeof(struct ostatfs); 630 size = count * sizeof(struct statfs); 631 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 632 if (size > 0) { 633 count = td->td_retval[0]; 634 sp = buf; 635 while (count > 0 && error == 0) { 636 cvtstatfs(sp, &osb); 637 error = copyout(&osb, uap->buf, sizeof(osb)); 638 sp++; 639 uap->buf++; 640 count--; 641 } 642 free(buf, M_TEMP); 643 } 644 return (error); 645 } 646 647 /* 648 * Implement fstatfs() for (NFS) file handles. 649 */ 650 #ifndef _SYS_SYSPROTO_H_ 651 struct freebsd4_fhstatfs_args { 652 struct fhandle *u_fhp; 653 struct ostatfs *buf; 654 }; 655 #endif 656 int 657 freebsd4_fhstatfs(td, uap) 658 struct thread *td; 659 struct freebsd4_fhstatfs_args /* { 660 struct fhandle *u_fhp; 661 struct ostatfs *buf; 662 } */ *uap; 663 { 664 struct ostatfs osb; 665 struct statfs sf; 666 fhandle_t fh; 667 int error; 668 669 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 670 if (error) 671 return (error); 672 error = kern_fhstatfs(td, fh, &sf); 673 if (error) 674 return (error); 675 cvtstatfs(&sf, &osb); 676 return (copyout(&osb, uap->buf, sizeof(osb))); 677 } 678 679 /* 680 * Convert a new format statfs structure to an old format statfs structure. 681 */ 682 static void 683 cvtstatfs(nsp, osp) 684 struct statfs *nsp; 685 struct ostatfs *osp; 686 { 687 688 statfs_scale_blocks(nsp, LONG_MAX); 689 bzero(osp, sizeof(*osp)); 690 osp->f_bsize = nsp->f_bsize; 691 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 692 osp->f_blocks = nsp->f_blocks; 693 osp->f_bfree = nsp->f_bfree; 694 osp->f_bavail = nsp->f_bavail; 695 osp->f_files = MIN(nsp->f_files, LONG_MAX); 696 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 697 osp->f_owner = nsp->f_owner; 698 osp->f_type = nsp->f_type; 699 osp->f_flags = nsp->f_flags; 700 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 701 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 702 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 703 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 704 strlcpy(osp->f_fstypename, nsp->f_fstypename, 705 MIN(MFSNAMELEN, OMFSNAMELEN)); 706 strlcpy(osp->f_mntonname, nsp->f_mntonname, 707 MIN(MNAMELEN, OMNAMELEN)); 708 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 709 MIN(MNAMELEN, OMNAMELEN)); 710 osp->f_fsid = nsp->f_fsid; 711 } 712 #endif /* COMPAT_FREEBSD4 */ 713 714 /* 715 * Change current working directory to a given file descriptor. 716 */ 717 #ifndef _SYS_SYSPROTO_H_ 718 struct fchdir_args { 719 int fd; 720 }; 721 #endif 722 int 723 sys_fchdir(td, uap) 724 struct thread *td; 725 struct fchdir_args /* { 726 int fd; 727 } */ *uap; 728 { 729 register struct filedesc *fdp = td->td_proc->p_fd; 730 struct vnode *vp, *tdp, *vpold; 731 struct mount *mp; 732 struct file *fp; 733 int error; 734 735 AUDIT_ARG_FD(uap->fd); 736 if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0) 737 return (error); 738 vp = fp->f_vnode; 739 VREF(vp); 740 fdrop(fp, td); 741 vn_lock(vp, LK_SHARED | LK_RETRY); 742 AUDIT_ARG_VNODE1(vp); 743 error = change_dir(vp, td); 744 while (!error && (mp = vp->v_mountedhere) != NULL) { 745 if (vfs_busy(mp, 0)) 746 continue; 747 error = VFS_ROOT(mp, LK_SHARED, &tdp); 748 vfs_unbusy(mp); 749 if (error) 750 break; 751 vput(vp); 752 vp = tdp; 753 } 754 if (error) { 755 vput(vp); 756 return (error); 757 } 758 VOP_UNLOCK(vp, 0); 759 FILEDESC_XLOCK(fdp); 760 vpold = fdp->fd_cdir; 761 fdp->fd_cdir = vp; 762 FILEDESC_XUNLOCK(fdp); 763 vrele(vpold); 764 return (0); 765 } 766 767 /* 768 * Change current working directory (``.''). 769 */ 770 #ifndef _SYS_SYSPROTO_H_ 771 struct chdir_args { 772 char *path; 773 }; 774 #endif 775 int 776 sys_chdir(td, uap) 777 struct thread *td; 778 struct chdir_args /* { 779 char *path; 780 } */ *uap; 781 { 782 783 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 784 } 785 786 int 787 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 788 { 789 register struct filedesc *fdp = td->td_proc->p_fd; 790 int error; 791 struct nameidata nd; 792 struct vnode *vp; 793 794 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 795 pathseg, path, td); 796 if ((error = namei(&nd)) != 0) 797 return (error); 798 if ((error = change_dir(nd.ni_vp, td)) != 0) { 799 vput(nd.ni_vp); 800 NDFREE(&nd, NDF_ONLY_PNBUF); 801 return (error); 802 } 803 VOP_UNLOCK(nd.ni_vp, 0); 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 FILEDESC_XLOCK(fdp); 806 vp = fdp->fd_cdir; 807 fdp->fd_cdir = nd.ni_vp; 808 FILEDESC_XUNLOCK(fdp); 809 vrele(vp); 810 return (0); 811 } 812 813 /* 814 * Helper function for raised chroot(2) security function: Refuse if 815 * any filedescriptors are open directories. 816 */ 817 static int 818 chroot_refuse_vdir_fds(fdp) 819 struct filedesc *fdp; 820 { 821 struct vnode *vp; 822 struct file *fp; 823 int fd; 824 825 FILEDESC_LOCK_ASSERT(fdp); 826 827 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 828 fp = fget_locked(fdp, fd); 829 if (fp == NULL) 830 continue; 831 if (fp->f_type == DTYPE_VNODE) { 832 vp = fp->f_vnode; 833 if (vp->v_type == VDIR) 834 return (EPERM); 835 } 836 } 837 return (0); 838 } 839 840 /* 841 * This sysctl determines if we will allow a process to chroot(2) if it 842 * has a directory open: 843 * 0: disallowed for all processes. 844 * 1: allowed for processes that were not already chroot(2)'ed. 845 * 2: allowed for all processes. 846 */ 847 848 static int chroot_allow_open_directories = 1; 849 850 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 851 &chroot_allow_open_directories, 0, 852 "Allow a process to chroot(2) if it has a directory open"); 853 854 /* 855 * Change notion of root (``/'') directory. 856 */ 857 #ifndef _SYS_SYSPROTO_H_ 858 struct chroot_args { 859 char *path; 860 }; 861 #endif 862 int 863 sys_chroot(td, uap) 864 struct thread *td; 865 struct chroot_args /* { 866 char *path; 867 } */ *uap; 868 { 869 int error; 870 struct nameidata nd; 871 872 error = priv_check(td, PRIV_VFS_CHROOT); 873 if (error) 874 return (error); 875 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 876 UIO_USERSPACE, uap->path, td); 877 error = namei(&nd); 878 if (error) 879 goto error; 880 if ((error = change_dir(nd.ni_vp, td)) != 0) 881 goto e_vunlock; 882 #ifdef MAC 883 if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp))) 884 goto e_vunlock; 885 #endif 886 VOP_UNLOCK(nd.ni_vp, 0); 887 error = change_root(nd.ni_vp, td); 888 vrele(nd.ni_vp); 889 NDFREE(&nd, NDF_ONLY_PNBUF); 890 return (error); 891 e_vunlock: 892 vput(nd.ni_vp); 893 error: 894 NDFREE(&nd, NDF_ONLY_PNBUF); 895 return (error); 896 } 897 898 /* 899 * Common routine for chroot and chdir. Callers must provide a locked vnode 900 * instance. 901 */ 902 int 903 change_dir(vp, td) 904 struct vnode *vp; 905 struct thread *td; 906 { 907 int error; 908 909 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 910 if (vp->v_type != VDIR) 911 return (ENOTDIR); 912 #ifdef MAC 913 error = mac_vnode_check_chdir(td->td_ucred, vp); 914 if (error) 915 return (error); 916 #endif 917 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 918 return (error); 919 } 920 921 /* 922 * Common routine for kern_chroot() and jail_attach(). The caller is 923 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 924 * authorize this operation. 925 */ 926 int 927 change_root(vp, td) 928 struct vnode *vp; 929 struct thread *td; 930 { 931 struct filedesc *fdp; 932 struct vnode *oldvp; 933 int error; 934 935 fdp = td->td_proc->p_fd; 936 FILEDESC_XLOCK(fdp); 937 if (chroot_allow_open_directories == 0 || 938 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 939 error = chroot_refuse_vdir_fds(fdp); 940 if (error) { 941 FILEDESC_XUNLOCK(fdp); 942 return (error); 943 } 944 } 945 oldvp = fdp->fd_rdir; 946 fdp->fd_rdir = vp; 947 VREF(fdp->fd_rdir); 948 if (!fdp->fd_jdir) { 949 fdp->fd_jdir = vp; 950 VREF(fdp->fd_jdir); 951 } 952 FILEDESC_XUNLOCK(fdp); 953 vrele(oldvp); 954 return (0); 955 } 956 957 static __inline cap_rights_t 958 flags_to_rights(int flags) 959 { 960 cap_rights_t rights = 0; 961 962 if (flags & O_EXEC) { 963 rights |= CAP_FEXECVE; 964 } else { 965 switch ((flags & O_ACCMODE)) { 966 case O_RDONLY: 967 rights |= CAP_READ; 968 break; 969 case O_RDWR: 970 rights |= CAP_READ; 971 /* FALLTHROUGH */ 972 case O_WRONLY: 973 rights |= CAP_WRITE; 974 if (!(flags & (O_APPEND | O_TRUNC))) 975 rights |= CAP_SEEK; 976 break; 977 } 978 } 979 980 if (flags & O_CREAT) 981 rights |= CAP_CREATE; 982 983 if (flags & O_TRUNC) 984 rights |= CAP_FTRUNCATE; 985 986 if (flags & (O_SYNC | O_FSYNC)) 987 rights |= CAP_FSYNC; 988 989 if (flags & (O_EXLOCK | O_SHLOCK)) 990 rights |= CAP_FLOCK; 991 992 return (rights); 993 } 994 995 /* 996 * Check permissions, allocate an open file structure, and call the device 997 * open routine if any. 998 */ 999 #ifndef _SYS_SYSPROTO_H_ 1000 struct open_args { 1001 char *path; 1002 int flags; 1003 int mode; 1004 }; 1005 #endif 1006 int 1007 sys_open(td, uap) 1008 struct thread *td; 1009 register struct open_args /* { 1010 char *path; 1011 int flags; 1012 int mode; 1013 } */ *uap; 1014 { 1015 1016 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1017 } 1018 1019 #ifndef _SYS_SYSPROTO_H_ 1020 struct openat_args { 1021 int fd; 1022 char *path; 1023 int flag; 1024 int mode; 1025 }; 1026 #endif 1027 int 1028 sys_openat(struct thread *td, struct openat_args *uap) 1029 { 1030 1031 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1032 uap->mode)); 1033 } 1034 1035 int 1036 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1037 int mode) 1038 { 1039 1040 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1041 } 1042 1043 int 1044 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1045 int flags, int mode) 1046 { 1047 struct proc *p = td->td_proc; 1048 struct filedesc *fdp = p->p_fd; 1049 struct file *fp; 1050 struct vnode *vp; 1051 int cmode; 1052 int indx = -1, error; 1053 struct nameidata nd; 1054 cap_rights_t rights_needed = CAP_LOOKUP; 1055 1056 AUDIT_ARG_FFLAGS(flags); 1057 AUDIT_ARG_MODE(mode); 1058 /* XXX: audit dirfd */ 1059 rights_needed |= flags_to_rights(flags); 1060 /* 1061 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1062 * may be specified. 1063 */ 1064 if (flags & O_EXEC) { 1065 if (flags & O_ACCMODE) 1066 return (EINVAL); 1067 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1068 return (EINVAL); 1069 } else { 1070 flags = FFLAGS(flags); 1071 } 1072 1073 /* 1074 * Allocate the file descriptor, but don't install a descriptor yet. 1075 */ 1076 error = falloc_noinstall(td, &fp); 1077 if (error) 1078 return (error); 1079 /* 1080 * An extra reference on `fp' has been held for us by 1081 * falloc_noinstall(). 1082 */ 1083 /* Set the flags early so the finit in devfs can pick them up. */ 1084 fp->f_flag = flags & FMASK; 1085 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1086 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1087 rights_needed, td); 1088 td->td_dupfd = -1; /* XXX check for fdopen */ 1089 error = vn_open(&nd, &flags, cmode, fp); 1090 if (error) { 1091 /* 1092 * If the vn_open replaced the method vector, something 1093 * wonderous happened deep below and we just pass it up 1094 * pretending we know what we do. 1095 */ 1096 if (error == ENXIO && fp->f_ops != &badfileops) 1097 goto success; 1098 1099 /* 1100 * Handle special fdopen() case. bleh. 1101 * 1102 * Don't do this for relative (capability) lookups; we don't 1103 * understand exactly what would happen, and we don't think 1104 * that it ever should. 1105 */ 1106 if (nd.ni_strictrelative == 0 && 1107 (error == ENODEV || error == ENXIO) && 1108 td->td_dupfd >= 0) { 1109 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1110 &indx); 1111 if (error == 0) 1112 goto success; 1113 } 1114 1115 goto bad; 1116 } 1117 td->td_dupfd = 0; 1118 NDFREE(&nd, NDF_ONLY_PNBUF); 1119 vp = nd.ni_vp; 1120 1121 /* 1122 * Store the vnode, for any f_type. Typically, the vnode use 1123 * count is decremented by direct call to vn_closefile() for 1124 * files that switched type in the cdevsw fdopen() method. 1125 */ 1126 fp->f_vnode = vp; 1127 /* 1128 * If the file wasn't claimed by devfs bind it to the normal 1129 * vnode operations here. 1130 */ 1131 if (fp->f_ops == &badfileops) { 1132 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1133 fp->f_seqcount = 1; 1134 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, 1135 vp, &vnops); 1136 } 1137 1138 VOP_UNLOCK(vp, 0); 1139 if (flags & O_TRUNC) { 1140 error = fo_truncate(fp, 0, td->td_ucred, td); 1141 if (error) 1142 goto bad; 1143 } 1144 success: 1145 /* 1146 * If we haven't already installed the FD (for dupfdopen), do so now. 1147 */ 1148 if (indx == -1) { 1149 struct filecaps *fcaps; 1150 1151 #ifdef CAPABILITIES 1152 if (nd.ni_strictrelative == 1) 1153 fcaps = &nd.ni_filecaps; 1154 else 1155 #endif 1156 fcaps = NULL; 1157 error = finstall(td, fp, &indx, flags, fcaps); 1158 /* On success finstall() consumes fcaps. */ 1159 if (error != 0) { 1160 filecaps_free(&nd.ni_filecaps); 1161 goto bad; 1162 } 1163 } else { 1164 filecaps_free(&nd.ni_filecaps); 1165 } 1166 1167 /* 1168 * Release our private reference, leaving the one associated with 1169 * the descriptor table intact. 1170 */ 1171 fdrop(fp, td); 1172 td->td_retval[0] = indx; 1173 return (0); 1174 bad: 1175 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1176 fdrop(fp, td); 1177 return (error); 1178 } 1179 1180 #ifdef COMPAT_43 1181 /* 1182 * Create a file. 1183 */ 1184 #ifndef _SYS_SYSPROTO_H_ 1185 struct ocreat_args { 1186 char *path; 1187 int mode; 1188 }; 1189 #endif 1190 int 1191 ocreat(td, uap) 1192 struct thread *td; 1193 register struct ocreat_args /* { 1194 char *path; 1195 int mode; 1196 } */ *uap; 1197 { 1198 1199 return (kern_open(td, uap->path, UIO_USERSPACE, 1200 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1201 } 1202 #endif /* COMPAT_43 */ 1203 1204 /* 1205 * Create a special file. 1206 */ 1207 #ifndef _SYS_SYSPROTO_H_ 1208 struct mknod_args { 1209 char *path; 1210 int mode; 1211 int dev; 1212 }; 1213 #endif 1214 int 1215 sys_mknod(td, uap) 1216 struct thread *td; 1217 register struct mknod_args /* { 1218 char *path; 1219 int mode; 1220 int dev; 1221 } */ *uap; 1222 { 1223 1224 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1225 } 1226 1227 #ifndef _SYS_SYSPROTO_H_ 1228 struct mknodat_args { 1229 int fd; 1230 char *path; 1231 mode_t mode; 1232 dev_t dev; 1233 }; 1234 #endif 1235 int 1236 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1237 { 1238 1239 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1240 uap->dev)); 1241 } 1242 1243 int 1244 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1245 int dev) 1246 { 1247 1248 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1249 } 1250 1251 int 1252 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1253 int mode, int dev) 1254 { 1255 struct vnode *vp; 1256 struct mount *mp; 1257 struct vattr vattr; 1258 int error; 1259 int whiteout = 0; 1260 struct nameidata nd; 1261 1262 AUDIT_ARG_MODE(mode); 1263 AUDIT_ARG_DEV(dev); 1264 switch (mode & S_IFMT) { 1265 case S_IFCHR: 1266 case S_IFBLK: 1267 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1268 break; 1269 case S_IFMT: 1270 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1271 break; 1272 case S_IFWHT: 1273 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1274 break; 1275 case S_IFIFO: 1276 if (dev == 0) 1277 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1278 /* FALLTHROUGH */ 1279 default: 1280 error = EINVAL; 1281 break; 1282 } 1283 if (error) 1284 return (error); 1285 restart: 1286 bwillwrite(); 1287 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1288 pathseg, path, fd, CAP_MKNODAT, td); 1289 if ((error = namei(&nd)) != 0) 1290 return (error); 1291 vp = nd.ni_vp; 1292 if (vp != NULL) { 1293 NDFREE(&nd, NDF_ONLY_PNBUF); 1294 if (vp == nd.ni_dvp) 1295 vrele(nd.ni_dvp); 1296 else 1297 vput(nd.ni_dvp); 1298 vrele(vp); 1299 return (EEXIST); 1300 } else { 1301 VATTR_NULL(&vattr); 1302 vattr.va_mode = (mode & ALLPERMS) & 1303 ~td->td_proc->p_fd->fd_cmask; 1304 vattr.va_rdev = dev; 1305 whiteout = 0; 1306 1307 switch (mode & S_IFMT) { 1308 case S_IFMT: /* used by badsect to flag bad sectors */ 1309 vattr.va_type = VBAD; 1310 break; 1311 case S_IFCHR: 1312 vattr.va_type = VCHR; 1313 break; 1314 case S_IFBLK: 1315 vattr.va_type = VBLK; 1316 break; 1317 case S_IFWHT: 1318 whiteout = 1; 1319 break; 1320 default: 1321 panic("kern_mknod: invalid mode"); 1322 } 1323 } 1324 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1325 NDFREE(&nd, NDF_ONLY_PNBUF); 1326 vput(nd.ni_dvp); 1327 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1328 return (error); 1329 goto restart; 1330 } 1331 #ifdef MAC 1332 if (error == 0 && !whiteout) 1333 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1334 &nd.ni_cnd, &vattr); 1335 #endif 1336 if (!error) { 1337 if (whiteout) 1338 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1339 else { 1340 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1341 &nd.ni_cnd, &vattr); 1342 if (error == 0) 1343 vput(nd.ni_vp); 1344 } 1345 } 1346 NDFREE(&nd, NDF_ONLY_PNBUF); 1347 vput(nd.ni_dvp); 1348 vn_finished_write(mp); 1349 return (error); 1350 } 1351 1352 /* 1353 * Create a named pipe. 1354 */ 1355 #ifndef _SYS_SYSPROTO_H_ 1356 struct mkfifo_args { 1357 char *path; 1358 int mode; 1359 }; 1360 #endif 1361 int 1362 sys_mkfifo(td, uap) 1363 struct thread *td; 1364 register struct mkfifo_args /* { 1365 char *path; 1366 int mode; 1367 } */ *uap; 1368 { 1369 1370 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1371 } 1372 1373 #ifndef _SYS_SYSPROTO_H_ 1374 struct mkfifoat_args { 1375 int fd; 1376 char *path; 1377 mode_t mode; 1378 }; 1379 #endif 1380 int 1381 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1382 { 1383 1384 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1385 uap->mode)); 1386 } 1387 1388 int 1389 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1390 { 1391 1392 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1393 } 1394 1395 int 1396 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1397 int mode) 1398 { 1399 struct mount *mp; 1400 struct vattr vattr; 1401 int error; 1402 struct nameidata nd; 1403 1404 AUDIT_ARG_MODE(mode); 1405 restart: 1406 bwillwrite(); 1407 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1408 pathseg, path, fd, CAP_MKFIFOAT, td); 1409 if ((error = namei(&nd)) != 0) 1410 return (error); 1411 if (nd.ni_vp != NULL) { 1412 NDFREE(&nd, NDF_ONLY_PNBUF); 1413 if (nd.ni_vp == nd.ni_dvp) 1414 vrele(nd.ni_dvp); 1415 else 1416 vput(nd.ni_dvp); 1417 vrele(nd.ni_vp); 1418 return (EEXIST); 1419 } 1420 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1421 NDFREE(&nd, NDF_ONLY_PNBUF); 1422 vput(nd.ni_dvp); 1423 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1424 return (error); 1425 goto restart; 1426 } 1427 VATTR_NULL(&vattr); 1428 vattr.va_type = VFIFO; 1429 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1430 #ifdef MAC 1431 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1432 &vattr); 1433 if (error) 1434 goto out; 1435 #endif 1436 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1437 if (error == 0) 1438 vput(nd.ni_vp); 1439 #ifdef MAC 1440 out: 1441 #endif 1442 vput(nd.ni_dvp); 1443 vn_finished_write(mp); 1444 NDFREE(&nd, NDF_ONLY_PNBUF); 1445 return (error); 1446 } 1447 1448 /* 1449 * Make a hard file link. 1450 */ 1451 #ifndef _SYS_SYSPROTO_H_ 1452 struct link_args { 1453 char *path; 1454 char *link; 1455 }; 1456 #endif 1457 int 1458 sys_link(td, uap) 1459 struct thread *td; 1460 register struct link_args /* { 1461 char *path; 1462 char *link; 1463 } */ *uap; 1464 { 1465 1466 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1467 } 1468 1469 #ifndef _SYS_SYSPROTO_H_ 1470 struct linkat_args { 1471 int fd1; 1472 char *path1; 1473 int fd2; 1474 char *path2; 1475 int flag; 1476 }; 1477 #endif 1478 int 1479 sys_linkat(struct thread *td, struct linkat_args *uap) 1480 { 1481 int flag; 1482 1483 flag = uap->flag; 1484 if (flag & ~AT_SYMLINK_FOLLOW) 1485 return (EINVAL); 1486 1487 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1488 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1489 } 1490 1491 int hardlink_check_uid = 0; 1492 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1493 &hardlink_check_uid, 0, 1494 "Unprivileged processes cannot create hard links to files owned by other " 1495 "users"); 1496 static int hardlink_check_gid = 0; 1497 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1498 &hardlink_check_gid, 0, 1499 "Unprivileged processes cannot create hard links to files owned by other " 1500 "groups"); 1501 1502 static int 1503 can_hardlink(struct vnode *vp, struct ucred *cred) 1504 { 1505 struct vattr va; 1506 int error; 1507 1508 if (!hardlink_check_uid && !hardlink_check_gid) 1509 return (0); 1510 1511 error = VOP_GETATTR(vp, &va, cred); 1512 if (error != 0) 1513 return (error); 1514 1515 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1516 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1517 if (error) 1518 return (error); 1519 } 1520 1521 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1522 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1523 if (error) 1524 return (error); 1525 } 1526 1527 return (0); 1528 } 1529 1530 int 1531 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1532 { 1533 1534 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1535 } 1536 1537 int 1538 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1539 enum uio_seg segflg, int follow) 1540 { 1541 struct vnode *vp; 1542 struct mount *mp; 1543 struct nameidata nd; 1544 int error; 1545 1546 bwillwrite(); 1547 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1548 1549 if ((error = namei(&nd)) != 0) 1550 return (error); 1551 NDFREE(&nd, NDF_ONLY_PNBUF); 1552 vp = nd.ni_vp; 1553 if (vp->v_type == VDIR) { 1554 vrele(vp); 1555 return (EPERM); /* POSIX */ 1556 } 1557 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 1558 vrele(vp); 1559 return (error); 1560 } 1561 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, 1562 segflg, path2, fd2, CAP_LINKAT, td); 1563 if ((error = namei(&nd)) == 0) { 1564 if (nd.ni_vp != NULL) { 1565 if (nd.ni_dvp == nd.ni_vp) 1566 vrele(nd.ni_dvp); 1567 else 1568 vput(nd.ni_dvp); 1569 vrele(nd.ni_vp); 1570 error = EEXIST; 1571 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) 1572 == 0) { 1573 error = can_hardlink(vp, td->td_ucred); 1574 if (error == 0) 1575 #ifdef MAC 1576 error = mac_vnode_check_link(td->td_ucred, 1577 nd.ni_dvp, vp, &nd.ni_cnd); 1578 if (error == 0) 1579 #endif 1580 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1581 VOP_UNLOCK(vp, 0); 1582 vput(nd.ni_dvp); 1583 } 1584 NDFREE(&nd, NDF_ONLY_PNBUF); 1585 } 1586 vrele(vp); 1587 vn_finished_write(mp); 1588 return (error); 1589 } 1590 1591 /* 1592 * Make a symbolic link. 1593 */ 1594 #ifndef _SYS_SYSPROTO_H_ 1595 struct symlink_args { 1596 char *path; 1597 char *link; 1598 }; 1599 #endif 1600 int 1601 sys_symlink(td, uap) 1602 struct thread *td; 1603 register struct symlink_args /* { 1604 char *path; 1605 char *link; 1606 } */ *uap; 1607 { 1608 1609 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1610 } 1611 1612 #ifndef _SYS_SYSPROTO_H_ 1613 struct symlinkat_args { 1614 char *path; 1615 int fd; 1616 char *path2; 1617 }; 1618 #endif 1619 int 1620 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1621 { 1622 1623 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1624 UIO_USERSPACE)); 1625 } 1626 1627 int 1628 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1629 { 1630 1631 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1632 } 1633 1634 int 1635 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1636 enum uio_seg segflg) 1637 { 1638 struct mount *mp; 1639 struct vattr vattr; 1640 char *syspath; 1641 int error; 1642 struct nameidata nd; 1643 1644 if (segflg == UIO_SYSSPACE) { 1645 syspath = path1; 1646 } else { 1647 syspath = uma_zalloc(namei_zone, M_WAITOK); 1648 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1649 goto out; 1650 } 1651 AUDIT_ARG_TEXT(syspath); 1652 restart: 1653 bwillwrite(); 1654 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1655 segflg, path2, fd, CAP_SYMLINKAT, td); 1656 if ((error = namei(&nd)) != 0) 1657 goto out; 1658 if (nd.ni_vp) { 1659 NDFREE(&nd, NDF_ONLY_PNBUF); 1660 if (nd.ni_vp == nd.ni_dvp) 1661 vrele(nd.ni_dvp); 1662 else 1663 vput(nd.ni_dvp); 1664 vrele(nd.ni_vp); 1665 error = EEXIST; 1666 goto out; 1667 } 1668 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1669 NDFREE(&nd, NDF_ONLY_PNBUF); 1670 vput(nd.ni_dvp); 1671 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1672 goto out; 1673 goto restart; 1674 } 1675 VATTR_NULL(&vattr); 1676 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1677 #ifdef MAC 1678 vattr.va_type = VLNK; 1679 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1680 &vattr); 1681 if (error) 1682 goto out2; 1683 #endif 1684 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1685 if (error == 0) 1686 vput(nd.ni_vp); 1687 #ifdef MAC 1688 out2: 1689 #endif 1690 NDFREE(&nd, NDF_ONLY_PNBUF); 1691 vput(nd.ni_dvp); 1692 vn_finished_write(mp); 1693 out: 1694 if (segflg != UIO_SYSSPACE) 1695 uma_zfree(namei_zone, syspath); 1696 return (error); 1697 } 1698 1699 /* 1700 * Delete a whiteout from the filesystem. 1701 */ 1702 int 1703 sys_undelete(td, uap) 1704 struct thread *td; 1705 register struct undelete_args /* { 1706 char *path; 1707 } */ *uap; 1708 { 1709 int error; 1710 struct mount *mp; 1711 struct nameidata nd; 1712 1713 restart: 1714 bwillwrite(); 1715 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1716 UIO_USERSPACE, uap->path, td); 1717 error = namei(&nd); 1718 if (error) 1719 return (error); 1720 1721 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1722 NDFREE(&nd, NDF_ONLY_PNBUF); 1723 if (nd.ni_vp == nd.ni_dvp) 1724 vrele(nd.ni_dvp); 1725 else 1726 vput(nd.ni_dvp); 1727 if (nd.ni_vp) 1728 vrele(nd.ni_vp); 1729 return (EEXIST); 1730 } 1731 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1732 NDFREE(&nd, NDF_ONLY_PNBUF); 1733 vput(nd.ni_dvp); 1734 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1735 return (error); 1736 goto restart; 1737 } 1738 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1739 NDFREE(&nd, NDF_ONLY_PNBUF); 1740 vput(nd.ni_dvp); 1741 vn_finished_write(mp); 1742 return (error); 1743 } 1744 1745 /* 1746 * Delete a name from the filesystem. 1747 */ 1748 #ifndef _SYS_SYSPROTO_H_ 1749 struct unlink_args { 1750 char *path; 1751 }; 1752 #endif 1753 int 1754 sys_unlink(td, uap) 1755 struct thread *td; 1756 struct unlink_args /* { 1757 char *path; 1758 } */ *uap; 1759 { 1760 1761 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1762 } 1763 1764 #ifndef _SYS_SYSPROTO_H_ 1765 struct unlinkat_args { 1766 int fd; 1767 char *path; 1768 int flag; 1769 }; 1770 #endif 1771 int 1772 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1773 { 1774 int flag = uap->flag; 1775 int fd = uap->fd; 1776 char *path = uap->path; 1777 1778 if (flag & ~AT_REMOVEDIR) 1779 return (EINVAL); 1780 1781 if (flag & AT_REMOVEDIR) 1782 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1783 else 1784 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1785 } 1786 1787 int 1788 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1789 { 1790 1791 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1792 } 1793 1794 int 1795 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1796 ino_t oldinum) 1797 { 1798 struct mount *mp; 1799 struct vnode *vp; 1800 int error; 1801 struct nameidata nd; 1802 struct stat sb; 1803 1804 restart: 1805 bwillwrite(); 1806 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1807 pathseg, path, fd, CAP_UNLINKAT, td); 1808 if ((error = namei(&nd)) != 0) 1809 return (error == EINVAL ? EPERM : error); 1810 vp = nd.ni_vp; 1811 if (vp->v_type == VDIR && oldinum == 0) { 1812 error = EPERM; /* POSIX */ 1813 } else if (oldinum != 0 && 1814 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1815 sb.st_ino != oldinum) { 1816 error = EIDRM; /* Identifier removed */ 1817 } else { 1818 /* 1819 * The root of a mounted filesystem cannot be deleted. 1820 * 1821 * XXX: can this only be a VDIR case? 1822 */ 1823 if (vp->v_vflag & VV_ROOT) 1824 error = EBUSY; 1825 } 1826 if (error == 0) { 1827 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1828 NDFREE(&nd, NDF_ONLY_PNBUF); 1829 vput(nd.ni_dvp); 1830 if (vp == nd.ni_dvp) 1831 vrele(vp); 1832 else 1833 vput(vp); 1834 if ((error = vn_start_write(NULL, &mp, 1835 V_XSLEEP | PCATCH)) != 0) 1836 return (error); 1837 goto restart; 1838 } 1839 #ifdef MAC 1840 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1841 &nd.ni_cnd); 1842 if (error) 1843 goto out; 1844 #endif 1845 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1846 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1847 #ifdef MAC 1848 out: 1849 #endif 1850 vn_finished_write(mp); 1851 } 1852 NDFREE(&nd, NDF_ONLY_PNBUF); 1853 vput(nd.ni_dvp); 1854 if (vp == nd.ni_dvp) 1855 vrele(vp); 1856 else 1857 vput(vp); 1858 return (error); 1859 } 1860 1861 /* 1862 * Reposition read/write file offset. 1863 */ 1864 #ifndef _SYS_SYSPROTO_H_ 1865 struct lseek_args { 1866 int fd; 1867 int pad; 1868 off_t offset; 1869 int whence; 1870 }; 1871 #endif 1872 int 1873 sys_lseek(td, uap) 1874 struct thread *td; 1875 register struct lseek_args /* { 1876 int fd; 1877 int pad; 1878 off_t offset; 1879 int whence; 1880 } */ *uap; 1881 { 1882 struct file *fp; 1883 int error; 1884 1885 AUDIT_ARG_FD(uap->fd); 1886 if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0) 1887 return (error); 1888 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1889 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1890 fdrop(fp, td); 1891 return (error); 1892 } 1893 1894 #if defined(COMPAT_43) 1895 /* 1896 * Reposition read/write file offset. 1897 */ 1898 #ifndef _SYS_SYSPROTO_H_ 1899 struct olseek_args { 1900 int fd; 1901 long offset; 1902 int whence; 1903 }; 1904 #endif 1905 int 1906 olseek(td, uap) 1907 struct thread *td; 1908 register struct olseek_args /* { 1909 int fd; 1910 long offset; 1911 int whence; 1912 } */ *uap; 1913 { 1914 struct lseek_args /* { 1915 int fd; 1916 int pad; 1917 off_t offset; 1918 int whence; 1919 } */ nuap; 1920 1921 nuap.fd = uap->fd; 1922 nuap.offset = uap->offset; 1923 nuap.whence = uap->whence; 1924 return (sys_lseek(td, &nuap)); 1925 } 1926 #endif /* COMPAT_43 */ 1927 1928 /* Version with the 'pad' argument */ 1929 int 1930 freebsd6_lseek(td, uap) 1931 struct thread *td; 1932 register struct freebsd6_lseek_args *uap; 1933 { 1934 struct lseek_args ouap; 1935 1936 ouap.fd = uap->fd; 1937 ouap.offset = uap->offset; 1938 ouap.whence = uap->whence; 1939 return (sys_lseek(td, &ouap)); 1940 } 1941 1942 /* 1943 * Check access permissions using passed credentials. 1944 */ 1945 static int 1946 vn_access(vp, user_flags, cred, td) 1947 struct vnode *vp; 1948 int user_flags; 1949 struct ucred *cred; 1950 struct thread *td; 1951 { 1952 int error; 1953 accmode_t accmode; 1954 1955 /* Flags == 0 means only check for existence. */ 1956 error = 0; 1957 if (user_flags) { 1958 accmode = 0; 1959 if (user_flags & R_OK) 1960 accmode |= VREAD; 1961 if (user_flags & W_OK) 1962 accmode |= VWRITE; 1963 if (user_flags & X_OK) 1964 accmode |= VEXEC; 1965 #ifdef MAC 1966 error = mac_vnode_check_access(cred, vp, accmode); 1967 if (error) 1968 return (error); 1969 #endif 1970 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1971 error = VOP_ACCESS(vp, accmode, cred, td); 1972 } 1973 return (error); 1974 } 1975 1976 /* 1977 * Check access permissions using "real" credentials. 1978 */ 1979 #ifndef _SYS_SYSPROTO_H_ 1980 struct access_args { 1981 char *path; 1982 int amode; 1983 }; 1984 #endif 1985 int 1986 sys_access(td, uap) 1987 struct thread *td; 1988 register struct access_args /* { 1989 char *path; 1990 int amode; 1991 } */ *uap; 1992 { 1993 1994 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 1995 } 1996 1997 #ifndef _SYS_SYSPROTO_H_ 1998 struct faccessat_args { 1999 int dirfd; 2000 char *path; 2001 int amode; 2002 int flag; 2003 } 2004 #endif 2005 int 2006 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2007 { 2008 2009 if (uap->flag & ~AT_EACCESS) 2010 return (EINVAL); 2011 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2012 uap->amode)); 2013 } 2014 2015 int 2016 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2017 { 2018 2019 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2020 } 2021 2022 int 2023 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2024 int flag, int amode) 2025 { 2026 struct ucred *cred, *tmpcred; 2027 struct vnode *vp; 2028 struct nameidata nd; 2029 int error; 2030 2031 /* 2032 * Create and modify a temporary credential instead of one that 2033 * is potentially shared. 2034 */ 2035 if (!(flag & AT_EACCESS)) { 2036 cred = td->td_ucred; 2037 tmpcred = crdup(cred); 2038 tmpcred->cr_uid = cred->cr_ruid; 2039 tmpcred->cr_groups[0] = cred->cr_rgid; 2040 td->td_ucred = tmpcred; 2041 } else 2042 cred = tmpcred = td->td_ucred; 2043 AUDIT_ARG_VALUE(amode); 2044 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2045 AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td); 2046 if ((error = namei(&nd)) != 0) 2047 goto out1; 2048 vp = nd.ni_vp; 2049 2050 error = vn_access(vp, amode, tmpcred, td); 2051 NDFREE(&nd, NDF_ONLY_PNBUF); 2052 vput(vp); 2053 out1: 2054 if (!(flag & AT_EACCESS)) { 2055 td->td_ucred = cred; 2056 crfree(tmpcred); 2057 } 2058 return (error); 2059 } 2060 2061 /* 2062 * Check access permissions using "effective" credentials. 2063 */ 2064 #ifndef _SYS_SYSPROTO_H_ 2065 struct eaccess_args { 2066 char *path; 2067 int amode; 2068 }; 2069 #endif 2070 int 2071 sys_eaccess(td, uap) 2072 struct thread *td; 2073 register struct eaccess_args /* { 2074 char *path; 2075 int amode; 2076 } */ *uap; 2077 { 2078 2079 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2080 } 2081 2082 int 2083 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2084 { 2085 2086 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2087 } 2088 2089 #if defined(COMPAT_43) 2090 /* 2091 * Get file status; this version follows links. 2092 */ 2093 #ifndef _SYS_SYSPROTO_H_ 2094 struct ostat_args { 2095 char *path; 2096 struct ostat *ub; 2097 }; 2098 #endif 2099 int 2100 ostat(td, uap) 2101 struct thread *td; 2102 register struct ostat_args /* { 2103 char *path; 2104 struct ostat *ub; 2105 } */ *uap; 2106 { 2107 struct stat sb; 2108 struct ostat osb; 2109 int error; 2110 2111 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2112 if (error) 2113 return (error); 2114 cvtstat(&sb, &osb); 2115 error = copyout(&osb, uap->ub, sizeof (osb)); 2116 return (error); 2117 } 2118 2119 /* 2120 * Get file status; this version does not follow links. 2121 */ 2122 #ifndef _SYS_SYSPROTO_H_ 2123 struct olstat_args { 2124 char *path; 2125 struct ostat *ub; 2126 }; 2127 #endif 2128 int 2129 olstat(td, uap) 2130 struct thread *td; 2131 register struct olstat_args /* { 2132 char *path; 2133 struct ostat *ub; 2134 } */ *uap; 2135 { 2136 struct stat sb; 2137 struct ostat osb; 2138 int error; 2139 2140 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2141 if (error) 2142 return (error); 2143 cvtstat(&sb, &osb); 2144 error = copyout(&osb, uap->ub, sizeof (osb)); 2145 return (error); 2146 } 2147 2148 /* 2149 * Convert from an old to a new stat structure. 2150 */ 2151 void 2152 cvtstat(st, ost) 2153 struct stat *st; 2154 struct ostat *ost; 2155 { 2156 2157 ost->st_dev = st->st_dev; 2158 ost->st_ino = st->st_ino; 2159 ost->st_mode = st->st_mode; 2160 ost->st_nlink = st->st_nlink; 2161 ost->st_uid = st->st_uid; 2162 ost->st_gid = st->st_gid; 2163 ost->st_rdev = st->st_rdev; 2164 if (st->st_size < (quad_t)1 << 32) 2165 ost->st_size = st->st_size; 2166 else 2167 ost->st_size = -2; 2168 ost->st_atim = st->st_atim; 2169 ost->st_mtim = st->st_mtim; 2170 ost->st_ctim = st->st_ctim; 2171 ost->st_blksize = st->st_blksize; 2172 ost->st_blocks = st->st_blocks; 2173 ost->st_flags = st->st_flags; 2174 ost->st_gen = st->st_gen; 2175 } 2176 #endif /* COMPAT_43 */ 2177 2178 /* 2179 * Get file status; this version follows links. 2180 */ 2181 #ifndef _SYS_SYSPROTO_H_ 2182 struct stat_args { 2183 char *path; 2184 struct stat *ub; 2185 }; 2186 #endif 2187 int 2188 sys_stat(td, uap) 2189 struct thread *td; 2190 register struct stat_args /* { 2191 char *path; 2192 struct stat *ub; 2193 } */ *uap; 2194 { 2195 struct stat sb; 2196 int error; 2197 2198 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2199 if (error == 0) 2200 error = copyout(&sb, uap->ub, sizeof (sb)); 2201 return (error); 2202 } 2203 2204 #ifndef _SYS_SYSPROTO_H_ 2205 struct fstatat_args { 2206 int fd; 2207 char *path; 2208 struct stat *buf; 2209 int flag; 2210 } 2211 #endif 2212 int 2213 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2214 { 2215 struct stat sb; 2216 int error; 2217 2218 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2219 UIO_USERSPACE, &sb); 2220 if (error == 0) 2221 error = copyout(&sb, uap->buf, sizeof (sb)); 2222 return (error); 2223 } 2224 2225 int 2226 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2227 { 2228 2229 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2230 } 2231 2232 int 2233 kern_statat(struct thread *td, int flag, int fd, char *path, 2234 enum uio_seg pathseg, struct stat *sbp) 2235 { 2236 2237 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2238 } 2239 2240 int 2241 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2242 enum uio_seg pathseg, struct stat *sbp, 2243 void (*hook)(struct vnode *vp, struct stat *sbp)) 2244 { 2245 struct nameidata nd; 2246 struct stat sb; 2247 int error; 2248 2249 if (flag & ~AT_SYMLINK_NOFOLLOW) 2250 return (EINVAL); 2251 2252 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2253 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2254 CAP_FSTAT, td); 2255 2256 if ((error = namei(&nd)) != 0) 2257 return (error); 2258 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2259 if (!error) { 2260 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2261 if (S_ISREG(sb.st_mode)) 2262 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2263 if (__predict_false(hook != NULL)) 2264 hook(nd.ni_vp, &sb); 2265 } 2266 NDFREE(&nd, NDF_ONLY_PNBUF); 2267 vput(nd.ni_vp); 2268 if (error) 2269 return (error); 2270 *sbp = sb; 2271 #ifdef KTRACE 2272 if (KTRPOINT(td, KTR_STRUCT)) 2273 ktrstat(&sb); 2274 #endif 2275 return (0); 2276 } 2277 2278 /* 2279 * Get file status; this version does not follow links. 2280 */ 2281 #ifndef _SYS_SYSPROTO_H_ 2282 struct lstat_args { 2283 char *path; 2284 struct stat *ub; 2285 }; 2286 #endif 2287 int 2288 sys_lstat(td, uap) 2289 struct thread *td; 2290 register struct lstat_args /* { 2291 char *path; 2292 struct stat *ub; 2293 } */ *uap; 2294 { 2295 struct stat sb; 2296 int error; 2297 2298 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2299 if (error == 0) 2300 error = copyout(&sb, uap->ub, sizeof (sb)); 2301 return (error); 2302 } 2303 2304 int 2305 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2306 { 2307 2308 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2309 sbp)); 2310 } 2311 2312 /* 2313 * Implementation of the NetBSD [l]stat() functions. 2314 */ 2315 void 2316 cvtnstat(sb, nsb) 2317 struct stat *sb; 2318 struct nstat *nsb; 2319 { 2320 bzero(nsb, sizeof *nsb); 2321 nsb->st_dev = sb->st_dev; 2322 nsb->st_ino = sb->st_ino; 2323 nsb->st_mode = sb->st_mode; 2324 nsb->st_nlink = sb->st_nlink; 2325 nsb->st_uid = sb->st_uid; 2326 nsb->st_gid = sb->st_gid; 2327 nsb->st_rdev = sb->st_rdev; 2328 nsb->st_atim = sb->st_atim; 2329 nsb->st_mtim = sb->st_mtim; 2330 nsb->st_ctim = sb->st_ctim; 2331 nsb->st_size = sb->st_size; 2332 nsb->st_blocks = sb->st_blocks; 2333 nsb->st_blksize = sb->st_blksize; 2334 nsb->st_flags = sb->st_flags; 2335 nsb->st_gen = sb->st_gen; 2336 nsb->st_birthtim = sb->st_birthtim; 2337 } 2338 2339 #ifndef _SYS_SYSPROTO_H_ 2340 struct nstat_args { 2341 char *path; 2342 struct nstat *ub; 2343 }; 2344 #endif 2345 int 2346 sys_nstat(td, uap) 2347 struct thread *td; 2348 register struct nstat_args /* { 2349 char *path; 2350 struct nstat *ub; 2351 } */ *uap; 2352 { 2353 struct stat sb; 2354 struct nstat nsb; 2355 int error; 2356 2357 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2358 if (error) 2359 return (error); 2360 cvtnstat(&sb, &nsb); 2361 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2362 return (error); 2363 } 2364 2365 /* 2366 * NetBSD lstat. Get file status; this version does not follow links. 2367 */ 2368 #ifndef _SYS_SYSPROTO_H_ 2369 struct lstat_args { 2370 char *path; 2371 struct stat *ub; 2372 }; 2373 #endif 2374 int 2375 sys_nlstat(td, uap) 2376 struct thread *td; 2377 register struct nlstat_args /* { 2378 char *path; 2379 struct nstat *ub; 2380 } */ *uap; 2381 { 2382 struct stat sb; 2383 struct nstat nsb; 2384 int error; 2385 2386 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2387 if (error) 2388 return (error); 2389 cvtnstat(&sb, &nsb); 2390 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2391 return (error); 2392 } 2393 2394 /* 2395 * Get configurable pathname variables. 2396 */ 2397 #ifndef _SYS_SYSPROTO_H_ 2398 struct pathconf_args { 2399 char *path; 2400 int name; 2401 }; 2402 #endif 2403 int 2404 sys_pathconf(td, uap) 2405 struct thread *td; 2406 register struct pathconf_args /* { 2407 char *path; 2408 int name; 2409 } */ *uap; 2410 { 2411 2412 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2413 } 2414 2415 #ifndef _SYS_SYSPROTO_H_ 2416 struct lpathconf_args { 2417 char *path; 2418 int name; 2419 }; 2420 #endif 2421 int 2422 sys_lpathconf(td, uap) 2423 struct thread *td; 2424 register struct lpathconf_args /* { 2425 char *path; 2426 int name; 2427 } */ *uap; 2428 { 2429 2430 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2431 NOFOLLOW)); 2432 } 2433 2434 int 2435 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2436 u_long flags) 2437 { 2438 struct nameidata nd; 2439 int error; 2440 2441 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2442 pathseg, path, td); 2443 if ((error = namei(&nd)) != 0) 2444 return (error); 2445 NDFREE(&nd, NDF_ONLY_PNBUF); 2446 2447 /* If asynchronous I/O is available, it works for all files. */ 2448 if (name == _PC_ASYNC_IO) 2449 td->td_retval[0] = async_io_version; 2450 else 2451 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2452 vput(nd.ni_vp); 2453 return (error); 2454 } 2455 2456 /* 2457 * Return target name of a symbolic link. 2458 */ 2459 #ifndef _SYS_SYSPROTO_H_ 2460 struct readlink_args { 2461 char *path; 2462 char *buf; 2463 size_t count; 2464 }; 2465 #endif 2466 int 2467 sys_readlink(td, uap) 2468 struct thread *td; 2469 register struct readlink_args /* { 2470 char *path; 2471 char *buf; 2472 size_t count; 2473 } */ *uap; 2474 { 2475 2476 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2477 UIO_USERSPACE, uap->count)); 2478 } 2479 #ifndef _SYS_SYSPROTO_H_ 2480 struct readlinkat_args { 2481 int fd; 2482 char *path; 2483 char *buf; 2484 size_t bufsize; 2485 }; 2486 #endif 2487 int 2488 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2489 { 2490 2491 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2492 uap->buf, UIO_USERSPACE, uap->bufsize)); 2493 } 2494 2495 int 2496 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2497 enum uio_seg bufseg, size_t count) 2498 { 2499 2500 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2501 count)); 2502 } 2503 2504 int 2505 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2506 char *buf, enum uio_seg bufseg, size_t count) 2507 { 2508 struct vnode *vp; 2509 struct iovec aiov; 2510 struct uio auio; 2511 int error; 2512 struct nameidata nd; 2513 2514 if (count > IOSIZE_MAX) 2515 return (EINVAL); 2516 2517 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2518 pathseg, path, fd, td); 2519 2520 if ((error = namei(&nd)) != 0) 2521 return (error); 2522 NDFREE(&nd, NDF_ONLY_PNBUF); 2523 vp = nd.ni_vp; 2524 #ifdef MAC 2525 error = mac_vnode_check_readlink(td->td_ucred, vp); 2526 if (error) { 2527 vput(vp); 2528 return (error); 2529 } 2530 #endif 2531 if (vp->v_type != VLNK) 2532 error = EINVAL; 2533 else { 2534 aiov.iov_base = buf; 2535 aiov.iov_len = count; 2536 auio.uio_iov = &aiov; 2537 auio.uio_iovcnt = 1; 2538 auio.uio_offset = 0; 2539 auio.uio_rw = UIO_READ; 2540 auio.uio_segflg = bufseg; 2541 auio.uio_td = td; 2542 auio.uio_resid = count; 2543 error = VOP_READLINK(vp, &auio, td->td_ucred); 2544 } 2545 vput(vp); 2546 td->td_retval[0] = count - auio.uio_resid; 2547 return (error); 2548 } 2549 2550 /* 2551 * Common implementation code for chflags() and fchflags(). 2552 */ 2553 static int 2554 setfflags(td, vp, flags) 2555 struct thread *td; 2556 struct vnode *vp; 2557 u_long flags; 2558 { 2559 int error; 2560 struct mount *mp; 2561 struct vattr vattr; 2562 2563 /* We can't support the value matching VNOVAL. */ 2564 if (flags == VNOVAL) 2565 return (EOPNOTSUPP); 2566 2567 /* 2568 * Prevent non-root users from setting flags on devices. When 2569 * a device is reused, users can retain ownership of the device 2570 * if they are allowed to set flags and programs assume that 2571 * chown can't fail when done as root. 2572 */ 2573 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2574 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2575 if (error) 2576 return (error); 2577 } 2578 2579 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2580 return (error); 2581 VATTR_NULL(&vattr); 2582 vattr.va_flags = flags; 2583 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2584 #ifdef MAC 2585 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2586 if (error == 0) 2587 #endif 2588 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2589 VOP_UNLOCK(vp, 0); 2590 vn_finished_write(mp); 2591 return (error); 2592 } 2593 2594 /* 2595 * Change flags of a file given a path name. 2596 */ 2597 #ifndef _SYS_SYSPROTO_H_ 2598 struct chflags_args { 2599 const char *path; 2600 u_long flags; 2601 }; 2602 #endif 2603 int 2604 sys_chflags(td, uap) 2605 struct thread *td; 2606 register struct chflags_args /* { 2607 const char *path; 2608 u_long flags; 2609 } */ *uap; 2610 { 2611 2612 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2613 } 2614 2615 #ifndef _SYS_SYSPROTO_H_ 2616 struct chflagsat_args { 2617 int fd; 2618 const char *path; 2619 u_long flags; 2620 int atflag; 2621 } 2622 #endif 2623 int 2624 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2625 { 2626 int fd = uap->fd; 2627 const char *path = uap->path; 2628 u_long flags = uap->flags; 2629 int atflag = uap->atflag; 2630 2631 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2632 return (EINVAL); 2633 2634 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2635 } 2636 2637 static int 2638 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2639 u_long flags) 2640 { 2641 2642 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2643 } 2644 2645 /* 2646 * Same as chflags() but doesn't follow symlinks. 2647 */ 2648 int 2649 sys_lchflags(td, uap) 2650 struct thread *td; 2651 register struct lchflags_args /* { 2652 const char *path; 2653 u_long flags; 2654 } */ *uap; 2655 { 2656 2657 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2658 uap->flags, AT_SYMLINK_NOFOLLOW)); 2659 } 2660 2661 static int 2662 kern_chflagsat(struct thread *td, int fd, const char *path, 2663 enum uio_seg pathseg, u_long flags, int atflag) 2664 { 2665 struct nameidata nd; 2666 int error, follow; 2667 2668 AUDIT_ARG_FFLAGS(flags); 2669 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2670 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2671 CAP_FCHFLAGS, td); 2672 if ((error = namei(&nd)) != 0) 2673 return (error); 2674 NDFREE(&nd, NDF_ONLY_PNBUF); 2675 error = setfflags(td, nd.ni_vp, flags); 2676 vrele(nd.ni_vp); 2677 return (error); 2678 } 2679 2680 /* 2681 * Change flags of a file given a file descriptor. 2682 */ 2683 #ifndef _SYS_SYSPROTO_H_ 2684 struct fchflags_args { 2685 int fd; 2686 u_long flags; 2687 }; 2688 #endif 2689 int 2690 sys_fchflags(td, uap) 2691 struct thread *td; 2692 register struct fchflags_args /* { 2693 int fd; 2694 u_long flags; 2695 } */ *uap; 2696 { 2697 struct file *fp; 2698 int error; 2699 2700 AUDIT_ARG_FD(uap->fd); 2701 AUDIT_ARG_FFLAGS(uap->flags); 2702 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS, 2703 &fp)) != 0) 2704 return (error); 2705 #ifdef AUDIT 2706 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2707 AUDIT_ARG_VNODE1(fp->f_vnode); 2708 VOP_UNLOCK(fp->f_vnode, 0); 2709 #endif 2710 error = setfflags(td, fp->f_vnode, uap->flags); 2711 fdrop(fp, td); 2712 return (error); 2713 } 2714 2715 /* 2716 * Common implementation code for chmod(), lchmod() and fchmod(). 2717 */ 2718 int 2719 setfmode(td, cred, vp, mode) 2720 struct thread *td; 2721 struct ucred *cred; 2722 struct vnode *vp; 2723 int mode; 2724 { 2725 int error; 2726 struct mount *mp; 2727 struct vattr vattr; 2728 2729 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2730 return (error); 2731 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2732 VATTR_NULL(&vattr); 2733 vattr.va_mode = mode & ALLPERMS; 2734 #ifdef MAC 2735 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2736 if (error == 0) 2737 #endif 2738 error = VOP_SETATTR(vp, &vattr, cred); 2739 VOP_UNLOCK(vp, 0); 2740 vn_finished_write(mp); 2741 return (error); 2742 } 2743 2744 /* 2745 * Change mode of a file given path name. 2746 */ 2747 #ifndef _SYS_SYSPROTO_H_ 2748 struct chmod_args { 2749 char *path; 2750 int mode; 2751 }; 2752 #endif 2753 int 2754 sys_chmod(td, uap) 2755 struct thread *td; 2756 register struct chmod_args /* { 2757 char *path; 2758 int mode; 2759 } */ *uap; 2760 { 2761 2762 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2763 } 2764 2765 #ifndef _SYS_SYSPROTO_H_ 2766 struct fchmodat_args { 2767 int dirfd; 2768 char *path; 2769 mode_t mode; 2770 int flag; 2771 } 2772 #endif 2773 int 2774 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2775 { 2776 int flag = uap->flag; 2777 int fd = uap->fd; 2778 char *path = uap->path; 2779 mode_t mode = uap->mode; 2780 2781 if (flag & ~AT_SYMLINK_NOFOLLOW) 2782 return (EINVAL); 2783 2784 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2785 } 2786 2787 int 2788 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2789 { 2790 2791 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2792 } 2793 2794 /* 2795 * Change mode of a file given path name (don't follow links.) 2796 */ 2797 #ifndef _SYS_SYSPROTO_H_ 2798 struct lchmod_args { 2799 char *path; 2800 int mode; 2801 }; 2802 #endif 2803 int 2804 sys_lchmod(td, uap) 2805 struct thread *td; 2806 register struct lchmod_args /* { 2807 char *path; 2808 int mode; 2809 } */ *uap; 2810 { 2811 2812 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2813 uap->mode, AT_SYMLINK_NOFOLLOW)); 2814 } 2815 2816 int 2817 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2818 mode_t mode, int flag) 2819 { 2820 int error; 2821 struct nameidata nd; 2822 int follow; 2823 2824 AUDIT_ARG_MODE(mode); 2825 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2826 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2827 CAP_FCHMOD, td); 2828 if ((error = namei(&nd)) != 0) 2829 return (error); 2830 NDFREE(&nd, NDF_ONLY_PNBUF); 2831 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2832 vrele(nd.ni_vp); 2833 return (error); 2834 } 2835 2836 /* 2837 * Change mode of a file given a file descriptor. 2838 */ 2839 #ifndef _SYS_SYSPROTO_H_ 2840 struct fchmod_args { 2841 int fd; 2842 int mode; 2843 }; 2844 #endif 2845 int 2846 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2847 { 2848 struct file *fp; 2849 int error; 2850 2851 AUDIT_ARG_FD(uap->fd); 2852 AUDIT_ARG_MODE(uap->mode); 2853 2854 error = fget(td, uap->fd, CAP_FCHMOD, &fp); 2855 if (error != 0) 2856 return (error); 2857 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2858 fdrop(fp, td); 2859 return (error); 2860 } 2861 2862 /* 2863 * Common implementation for chown(), lchown(), and fchown() 2864 */ 2865 int 2866 setfown(td, cred, vp, uid, gid) 2867 struct thread *td; 2868 struct ucred *cred; 2869 struct vnode *vp; 2870 uid_t uid; 2871 gid_t gid; 2872 { 2873 int error; 2874 struct mount *mp; 2875 struct vattr vattr; 2876 2877 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2878 return (error); 2879 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2880 VATTR_NULL(&vattr); 2881 vattr.va_uid = uid; 2882 vattr.va_gid = gid; 2883 #ifdef MAC 2884 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2885 vattr.va_gid); 2886 if (error == 0) 2887 #endif 2888 error = VOP_SETATTR(vp, &vattr, cred); 2889 VOP_UNLOCK(vp, 0); 2890 vn_finished_write(mp); 2891 return (error); 2892 } 2893 2894 /* 2895 * Set ownership given a path name. 2896 */ 2897 #ifndef _SYS_SYSPROTO_H_ 2898 struct chown_args { 2899 char *path; 2900 int uid; 2901 int gid; 2902 }; 2903 #endif 2904 int 2905 sys_chown(td, uap) 2906 struct thread *td; 2907 register struct chown_args /* { 2908 char *path; 2909 int uid; 2910 int gid; 2911 } */ *uap; 2912 { 2913 2914 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2915 } 2916 2917 #ifndef _SYS_SYSPROTO_H_ 2918 struct fchownat_args { 2919 int fd; 2920 const char * path; 2921 uid_t uid; 2922 gid_t gid; 2923 int flag; 2924 }; 2925 #endif 2926 int 2927 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2928 { 2929 int flag; 2930 2931 flag = uap->flag; 2932 if (flag & ~AT_SYMLINK_NOFOLLOW) 2933 return (EINVAL); 2934 2935 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2936 uap->gid, uap->flag)); 2937 } 2938 2939 int 2940 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2941 int gid) 2942 { 2943 2944 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2945 } 2946 2947 int 2948 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2949 int uid, int gid, int flag) 2950 { 2951 struct nameidata nd; 2952 int error, follow; 2953 2954 AUDIT_ARG_OWNER(uid, gid); 2955 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2956 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2957 CAP_FCHOWN, td); 2958 2959 if ((error = namei(&nd)) != 0) 2960 return (error); 2961 NDFREE(&nd, NDF_ONLY_PNBUF); 2962 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2963 vrele(nd.ni_vp); 2964 return (error); 2965 } 2966 2967 /* 2968 * Set ownership given a path name, do not cross symlinks. 2969 */ 2970 #ifndef _SYS_SYSPROTO_H_ 2971 struct lchown_args { 2972 char *path; 2973 int uid; 2974 int gid; 2975 }; 2976 #endif 2977 int 2978 sys_lchown(td, uap) 2979 struct thread *td; 2980 register struct lchown_args /* { 2981 char *path; 2982 int uid; 2983 int gid; 2984 } */ *uap; 2985 { 2986 2987 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2988 } 2989 2990 int 2991 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2992 int gid) 2993 { 2994 2995 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 2996 AT_SYMLINK_NOFOLLOW)); 2997 } 2998 2999 /* 3000 * Set ownership given a file descriptor. 3001 */ 3002 #ifndef _SYS_SYSPROTO_H_ 3003 struct fchown_args { 3004 int fd; 3005 int uid; 3006 int gid; 3007 }; 3008 #endif 3009 int 3010 sys_fchown(td, uap) 3011 struct thread *td; 3012 register struct fchown_args /* { 3013 int fd; 3014 int uid; 3015 int gid; 3016 } */ *uap; 3017 { 3018 struct file *fp; 3019 int error; 3020 3021 AUDIT_ARG_FD(uap->fd); 3022 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3023 error = fget(td, uap->fd, CAP_FCHOWN, &fp); 3024 if (error != 0) 3025 return (error); 3026 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3027 fdrop(fp, td); 3028 return (error); 3029 } 3030 3031 /* 3032 * Common implementation code for utimes(), lutimes(), and futimes(). 3033 */ 3034 static int 3035 getutimes(usrtvp, tvpseg, tsp) 3036 const struct timeval *usrtvp; 3037 enum uio_seg tvpseg; 3038 struct timespec *tsp; 3039 { 3040 struct timeval tv[2]; 3041 const struct timeval *tvp; 3042 int error; 3043 3044 if (usrtvp == NULL) { 3045 vfs_timestamp(&tsp[0]); 3046 tsp[1] = tsp[0]; 3047 } else { 3048 if (tvpseg == UIO_SYSSPACE) { 3049 tvp = usrtvp; 3050 } else { 3051 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3052 return (error); 3053 tvp = tv; 3054 } 3055 3056 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3057 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3058 return (EINVAL); 3059 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3060 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3061 } 3062 return (0); 3063 } 3064 3065 /* 3066 * Common implementation code for utimes(), lutimes(), and futimes(). 3067 */ 3068 static int 3069 setutimes(td, vp, ts, numtimes, nullflag) 3070 struct thread *td; 3071 struct vnode *vp; 3072 const struct timespec *ts; 3073 int numtimes; 3074 int nullflag; 3075 { 3076 int error, setbirthtime; 3077 struct mount *mp; 3078 struct vattr vattr; 3079 3080 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3081 return (error); 3082 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3083 setbirthtime = 0; 3084 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3085 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3086 setbirthtime = 1; 3087 VATTR_NULL(&vattr); 3088 vattr.va_atime = ts[0]; 3089 vattr.va_mtime = ts[1]; 3090 if (setbirthtime) 3091 vattr.va_birthtime = ts[1]; 3092 if (numtimes > 2) 3093 vattr.va_birthtime = ts[2]; 3094 if (nullflag) 3095 vattr.va_vaflags |= VA_UTIMES_NULL; 3096 #ifdef MAC 3097 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3098 vattr.va_mtime); 3099 #endif 3100 if (error == 0) 3101 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3102 VOP_UNLOCK(vp, 0); 3103 vn_finished_write(mp); 3104 return (error); 3105 } 3106 3107 /* 3108 * Set the access and modification times of a file. 3109 */ 3110 #ifndef _SYS_SYSPROTO_H_ 3111 struct utimes_args { 3112 char *path; 3113 struct timeval *tptr; 3114 }; 3115 #endif 3116 int 3117 sys_utimes(td, uap) 3118 struct thread *td; 3119 register struct utimes_args /* { 3120 char *path; 3121 struct timeval *tptr; 3122 } */ *uap; 3123 { 3124 3125 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3126 UIO_USERSPACE)); 3127 } 3128 3129 #ifndef _SYS_SYSPROTO_H_ 3130 struct futimesat_args { 3131 int fd; 3132 const char * path; 3133 const struct timeval * times; 3134 }; 3135 #endif 3136 int 3137 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3138 { 3139 3140 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3141 uap->times, UIO_USERSPACE)); 3142 } 3143 3144 int 3145 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3146 struct timeval *tptr, enum uio_seg tptrseg) 3147 { 3148 3149 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3150 } 3151 3152 int 3153 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3154 struct timeval *tptr, enum uio_seg tptrseg) 3155 { 3156 struct nameidata nd; 3157 struct timespec ts[2]; 3158 int error; 3159 3160 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3161 return (error); 3162 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3163 CAP_FUTIMES, td); 3164 3165 if ((error = namei(&nd)) != 0) 3166 return (error); 3167 NDFREE(&nd, NDF_ONLY_PNBUF); 3168 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3169 vrele(nd.ni_vp); 3170 return (error); 3171 } 3172 3173 /* 3174 * Set the access and modification times of a file. 3175 */ 3176 #ifndef _SYS_SYSPROTO_H_ 3177 struct lutimes_args { 3178 char *path; 3179 struct timeval *tptr; 3180 }; 3181 #endif 3182 int 3183 sys_lutimes(td, uap) 3184 struct thread *td; 3185 register struct lutimes_args /* { 3186 char *path; 3187 struct timeval *tptr; 3188 } */ *uap; 3189 { 3190 3191 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3192 UIO_USERSPACE)); 3193 } 3194 3195 int 3196 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3197 struct timeval *tptr, enum uio_seg tptrseg) 3198 { 3199 struct timespec ts[2]; 3200 int error; 3201 struct nameidata nd; 3202 3203 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3204 return (error); 3205 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3206 if ((error = namei(&nd)) != 0) 3207 return (error); 3208 NDFREE(&nd, NDF_ONLY_PNBUF); 3209 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3210 vrele(nd.ni_vp); 3211 return (error); 3212 } 3213 3214 /* 3215 * Set the access and modification times of a file. 3216 */ 3217 #ifndef _SYS_SYSPROTO_H_ 3218 struct futimes_args { 3219 int fd; 3220 struct timeval *tptr; 3221 }; 3222 #endif 3223 int 3224 sys_futimes(td, uap) 3225 struct thread *td; 3226 register struct futimes_args /* { 3227 int fd; 3228 struct timeval *tptr; 3229 } */ *uap; 3230 { 3231 3232 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3233 } 3234 3235 int 3236 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3237 enum uio_seg tptrseg) 3238 { 3239 struct timespec ts[2]; 3240 struct file *fp; 3241 int error; 3242 3243 AUDIT_ARG_FD(fd); 3244 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3245 return (error); 3246 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp)) != 0) 3247 return (error); 3248 #ifdef AUDIT 3249 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3250 AUDIT_ARG_VNODE1(fp->f_vnode); 3251 VOP_UNLOCK(fp->f_vnode, 0); 3252 #endif 3253 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3254 fdrop(fp, td); 3255 return (error); 3256 } 3257 3258 /* 3259 * Truncate a file given its path name. 3260 */ 3261 #ifndef _SYS_SYSPROTO_H_ 3262 struct truncate_args { 3263 char *path; 3264 int pad; 3265 off_t length; 3266 }; 3267 #endif 3268 int 3269 sys_truncate(td, uap) 3270 struct thread *td; 3271 register struct truncate_args /* { 3272 char *path; 3273 int pad; 3274 off_t length; 3275 } */ *uap; 3276 { 3277 3278 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3279 } 3280 3281 int 3282 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3283 { 3284 struct mount *mp; 3285 struct vnode *vp; 3286 void *rl_cookie; 3287 struct vattr vattr; 3288 struct nameidata nd; 3289 int error; 3290 3291 if (length < 0) 3292 return(EINVAL); 3293 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3294 if ((error = namei(&nd)) != 0) 3295 return (error); 3296 vp = nd.ni_vp; 3297 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3298 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3299 vn_rangelock_unlock(vp, rl_cookie); 3300 vrele(vp); 3301 return (error); 3302 } 3303 NDFREE(&nd, NDF_ONLY_PNBUF); 3304 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3305 if (vp->v_type == VDIR) 3306 error = EISDIR; 3307 #ifdef MAC 3308 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3309 } 3310 #endif 3311 else if ((error = vn_writechk(vp)) == 0 && 3312 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3313 VATTR_NULL(&vattr); 3314 vattr.va_size = length; 3315 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3316 } 3317 VOP_UNLOCK(vp, 0); 3318 vn_finished_write(mp); 3319 vn_rangelock_unlock(vp, rl_cookie); 3320 vrele(vp); 3321 return (error); 3322 } 3323 3324 #if defined(COMPAT_43) 3325 /* 3326 * Truncate a file given its path name. 3327 */ 3328 #ifndef _SYS_SYSPROTO_H_ 3329 struct otruncate_args { 3330 char *path; 3331 long length; 3332 }; 3333 #endif 3334 int 3335 otruncate(td, uap) 3336 struct thread *td; 3337 register struct otruncate_args /* { 3338 char *path; 3339 long length; 3340 } */ *uap; 3341 { 3342 struct truncate_args /* { 3343 char *path; 3344 int pad; 3345 off_t length; 3346 } */ nuap; 3347 3348 nuap.path = uap->path; 3349 nuap.length = uap->length; 3350 return (sys_truncate(td, &nuap)); 3351 } 3352 #endif /* COMPAT_43 */ 3353 3354 /* Versions with the pad argument */ 3355 int 3356 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3357 { 3358 struct truncate_args ouap; 3359 3360 ouap.path = uap->path; 3361 ouap.length = uap->length; 3362 return (sys_truncate(td, &ouap)); 3363 } 3364 3365 int 3366 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3367 { 3368 struct ftruncate_args ouap; 3369 3370 ouap.fd = uap->fd; 3371 ouap.length = uap->length; 3372 return (sys_ftruncate(td, &ouap)); 3373 } 3374 3375 /* 3376 * Sync an open file. 3377 */ 3378 #ifndef _SYS_SYSPROTO_H_ 3379 struct fsync_args { 3380 int fd; 3381 }; 3382 #endif 3383 int 3384 sys_fsync(td, uap) 3385 struct thread *td; 3386 struct fsync_args /* { 3387 int fd; 3388 } */ *uap; 3389 { 3390 struct vnode *vp; 3391 struct mount *mp; 3392 struct file *fp; 3393 int error, lock_flags; 3394 3395 AUDIT_ARG_FD(uap->fd); 3396 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC, &fp)) != 0) 3397 return (error); 3398 vp = fp->f_vnode; 3399 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3400 goto drop; 3401 if (MNT_SHARED_WRITES(mp) || 3402 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3403 lock_flags = LK_SHARED; 3404 } else { 3405 lock_flags = LK_EXCLUSIVE; 3406 } 3407 vn_lock(vp, lock_flags | LK_RETRY); 3408 AUDIT_ARG_VNODE1(vp); 3409 if (vp->v_object != NULL) { 3410 VM_OBJECT_WLOCK(vp->v_object); 3411 vm_object_page_clean(vp->v_object, 0, 0, 0); 3412 VM_OBJECT_WUNLOCK(vp->v_object); 3413 } 3414 error = VOP_FSYNC(vp, MNT_WAIT, td); 3415 3416 VOP_UNLOCK(vp, 0); 3417 vn_finished_write(mp); 3418 drop: 3419 fdrop(fp, td); 3420 return (error); 3421 } 3422 3423 /* 3424 * Rename files. Source and destination must either both be directories, or 3425 * both not be directories. If target is a directory, it must be empty. 3426 */ 3427 #ifndef _SYS_SYSPROTO_H_ 3428 struct rename_args { 3429 char *from; 3430 char *to; 3431 }; 3432 #endif 3433 int 3434 sys_rename(td, uap) 3435 struct thread *td; 3436 register struct rename_args /* { 3437 char *from; 3438 char *to; 3439 } */ *uap; 3440 { 3441 3442 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3443 } 3444 3445 #ifndef _SYS_SYSPROTO_H_ 3446 struct renameat_args { 3447 int oldfd; 3448 char *old; 3449 int newfd; 3450 char *new; 3451 }; 3452 #endif 3453 int 3454 sys_renameat(struct thread *td, struct renameat_args *uap) 3455 { 3456 3457 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3458 UIO_USERSPACE)); 3459 } 3460 3461 int 3462 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3463 { 3464 3465 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3466 } 3467 3468 int 3469 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3470 enum uio_seg pathseg) 3471 { 3472 struct mount *mp = NULL; 3473 struct vnode *tvp, *fvp, *tdvp; 3474 struct nameidata fromnd, tond; 3475 int error; 3476 3477 bwillwrite(); 3478 #ifdef MAC 3479 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3480 AUDITVNODE1, pathseg, old, oldfd, CAP_RENAMEAT, td); 3481 #else 3482 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3483 pathseg, old, oldfd, CAP_RENAMEAT, td); 3484 #endif 3485 3486 if ((error = namei(&fromnd)) != 0) 3487 return (error); 3488 #ifdef MAC 3489 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3490 fromnd.ni_vp, &fromnd.ni_cnd); 3491 VOP_UNLOCK(fromnd.ni_dvp, 0); 3492 if (fromnd.ni_dvp != fromnd.ni_vp) 3493 VOP_UNLOCK(fromnd.ni_vp, 0); 3494 #endif 3495 fvp = fromnd.ni_vp; 3496 if (error == 0) 3497 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); 3498 if (error != 0) { 3499 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3500 vrele(fromnd.ni_dvp); 3501 vrele(fvp); 3502 goto out1; 3503 } 3504 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3505 SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_LINKAT, td); 3506 if (fromnd.ni_vp->v_type == VDIR) 3507 tond.ni_cnd.cn_flags |= WILLBEDIR; 3508 if ((error = namei(&tond)) != 0) { 3509 /* Translate error code for rename("dir1", "dir2/."). */ 3510 if (error == EISDIR && fvp->v_type == VDIR) 3511 error = EINVAL; 3512 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3513 vrele(fromnd.ni_dvp); 3514 vrele(fvp); 3515 vn_finished_write(mp); 3516 goto out1; 3517 } 3518 tdvp = tond.ni_dvp; 3519 tvp = tond.ni_vp; 3520 if (tvp != NULL) { 3521 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3522 error = ENOTDIR; 3523 goto out; 3524 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3525 error = EISDIR; 3526 goto out; 3527 } 3528 #ifdef CAPABILITIES 3529 if (newfd != AT_FDCWD) { 3530 /* 3531 * If the target already exists we require CAP_UNLINKAT 3532 * from 'newfd'. 3533 */ 3534 error = cap_check(tond.ni_filecaps.fc_rights, 3535 CAP_UNLINKAT); 3536 if (error != 0) 3537 goto out; 3538 } 3539 #endif 3540 } 3541 if (fvp == tdvp) { 3542 error = EINVAL; 3543 goto out; 3544 } 3545 /* 3546 * If the source is the same as the destination (that is, if they 3547 * are links to the same vnode), then there is nothing to do. 3548 */ 3549 if (fvp == tvp) 3550 error = -1; 3551 #ifdef MAC 3552 else 3553 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3554 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3555 #endif 3556 out: 3557 if (!error) { 3558 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3559 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3560 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3561 NDFREE(&tond, NDF_ONLY_PNBUF); 3562 } else { 3563 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3564 NDFREE(&tond, NDF_ONLY_PNBUF); 3565 if (tvp) 3566 vput(tvp); 3567 if (tdvp == tvp) 3568 vrele(tdvp); 3569 else 3570 vput(tdvp); 3571 vrele(fromnd.ni_dvp); 3572 vrele(fvp); 3573 } 3574 vrele(tond.ni_startdir); 3575 vn_finished_write(mp); 3576 out1: 3577 if (fromnd.ni_startdir) 3578 vrele(fromnd.ni_startdir); 3579 if (error == -1) 3580 return (0); 3581 return (error); 3582 } 3583 3584 /* 3585 * Make a directory file. 3586 */ 3587 #ifndef _SYS_SYSPROTO_H_ 3588 struct mkdir_args { 3589 char *path; 3590 int mode; 3591 }; 3592 #endif 3593 int 3594 sys_mkdir(td, uap) 3595 struct thread *td; 3596 register struct mkdir_args /* { 3597 char *path; 3598 int mode; 3599 } */ *uap; 3600 { 3601 3602 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3603 } 3604 3605 #ifndef _SYS_SYSPROTO_H_ 3606 struct mkdirat_args { 3607 int fd; 3608 char *path; 3609 mode_t mode; 3610 }; 3611 #endif 3612 int 3613 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3614 { 3615 3616 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3617 } 3618 3619 int 3620 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3621 { 3622 3623 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3624 } 3625 3626 int 3627 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3628 int mode) 3629 { 3630 struct mount *mp; 3631 struct vnode *vp; 3632 struct vattr vattr; 3633 int error; 3634 struct nameidata nd; 3635 3636 AUDIT_ARG_MODE(mode); 3637 restart: 3638 bwillwrite(); 3639 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 3640 segflg, path, fd, CAP_MKDIRAT, td); 3641 nd.ni_cnd.cn_flags |= WILLBEDIR; 3642 if ((error = namei(&nd)) != 0) 3643 return (error); 3644 vp = nd.ni_vp; 3645 if (vp != NULL) { 3646 NDFREE(&nd, NDF_ONLY_PNBUF); 3647 /* 3648 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3649 * the strange behaviour of leaving the vnode unlocked 3650 * if the target is the same vnode as the parent. 3651 */ 3652 if (vp == nd.ni_dvp) 3653 vrele(nd.ni_dvp); 3654 else 3655 vput(nd.ni_dvp); 3656 vrele(vp); 3657 return (EEXIST); 3658 } 3659 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3660 NDFREE(&nd, NDF_ONLY_PNBUF); 3661 vput(nd.ni_dvp); 3662 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3663 return (error); 3664 goto restart; 3665 } 3666 VATTR_NULL(&vattr); 3667 vattr.va_type = VDIR; 3668 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3669 #ifdef MAC 3670 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3671 &vattr); 3672 if (error) 3673 goto out; 3674 #endif 3675 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3676 #ifdef MAC 3677 out: 3678 #endif 3679 NDFREE(&nd, NDF_ONLY_PNBUF); 3680 vput(nd.ni_dvp); 3681 if (!error) 3682 vput(nd.ni_vp); 3683 vn_finished_write(mp); 3684 return (error); 3685 } 3686 3687 /* 3688 * Remove a directory file. 3689 */ 3690 #ifndef _SYS_SYSPROTO_H_ 3691 struct rmdir_args { 3692 char *path; 3693 }; 3694 #endif 3695 int 3696 sys_rmdir(td, uap) 3697 struct thread *td; 3698 struct rmdir_args /* { 3699 char *path; 3700 } */ *uap; 3701 { 3702 3703 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3704 } 3705 3706 int 3707 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3708 { 3709 3710 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3711 } 3712 3713 int 3714 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3715 { 3716 struct mount *mp; 3717 struct vnode *vp; 3718 int error; 3719 struct nameidata nd; 3720 3721 restart: 3722 bwillwrite(); 3723 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3724 pathseg, path, fd, CAP_UNLINKAT, td); 3725 if ((error = namei(&nd)) != 0) 3726 return (error); 3727 vp = nd.ni_vp; 3728 if (vp->v_type != VDIR) { 3729 error = ENOTDIR; 3730 goto out; 3731 } 3732 /* 3733 * No rmdir "." please. 3734 */ 3735 if (nd.ni_dvp == vp) { 3736 error = EINVAL; 3737 goto out; 3738 } 3739 /* 3740 * The root of a mounted filesystem cannot be deleted. 3741 */ 3742 if (vp->v_vflag & VV_ROOT) { 3743 error = EBUSY; 3744 goto out; 3745 } 3746 #ifdef MAC 3747 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3748 &nd.ni_cnd); 3749 if (error) 3750 goto out; 3751 #endif 3752 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3753 NDFREE(&nd, NDF_ONLY_PNBUF); 3754 vput(vp); 3755 if (nd.ni_dvp == vp) 3756 vrele(nd.ni_dvp); 3757 else 3758 vput(nd.ni_dvp); 3759 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3760 return (error); 3761 goto restart; 3762 } 3763 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3764 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3765 vn_finished_write(mp); 3766 out: 3767 NDFREE(&nd, NDF_ONLY_PNBUF); 3768 vput(vp); 3769 if (nd.ni_dvp == vp) 3770 vrele(nd.ni_dvp); 3771 else 3772 vput(nd.ni_dvp); 3773 return (error); 3774 } 3775 3776 #ifdef COMPAT_43 3777 /* 3778 * Read a block of directory entries in a filesystem independent format. 3779 */ 3780 #ifndef _SYS_SYSPROTO_H_ 3781 struct ogetdirentries_args { 3782 int fd; 3783 char *buf; 3784 u_int count; 3785 long *basep; 3786 }; 3787 #endif 3788 int 3789 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3790 { 3791 long loff; 3792 int error; 3793 3794 error = kern_ogetdirentries(td, uap, &loff); 3795 if (error == 0) 3796 error = copyout(&loff, uap->basep, sizeof(long)); 3797 return (error); 3798 } 3799 3800 int 3801 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3802 long *ploff) 3803 { 3804 struct vnode *vp; 3805 struct file *fp; 3806 struct uio auio, kuio; 3807 struct iovec aiov, kiov; 3808 struct dirent *dp, *edp; 3809 caddr_t dirbuf; 3810 int error, eofflag, readcnt; 3811 long loff; 3812 off_t foffset; 3813 3814 /* XXX arbitrary sanity limit on `count'. */ 3815 if (uap->count > 64 * 1024) 3816 return (EINVAL); 3817 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0) 3818 return (error); 3819 if ((fp->f_flag & FREAD) == 0) { 3820 fdrop(fp, td); 3821 return (EBADF); 3822 } 3823 vp = fp->f_vnode; 3824 foffset = foffset_lock(fp, 0); 3825 unionread: 3826 if (vp->v_type != VDIR) { 3827 foffset_unlock(fp, foffset, 0); 3828 fdrop(fp, td); 3829 return (EINVAL); 3830 } 3831 aiov.iov_base = uap->buf; 3832 aiov.iov_len = uap->count; 3833 auio.uio_iov = &aiov; 3834 auio.uio_iovcnt = 1; 3835 auio.uio_rw = UIO_READ; 3836 auio.uio_segflg = UIO_USERSPACE; 3837 auio.uio_td = td; 3838 auio.uio_resid = uap->count; 3839 vn_lock(vp, LK_SHARED | LK_RETRY); 3840 loff = auio.uio_offset = foffset; 3841 #ifdef MAC 3842 error = mac_vnode_check_readdir(td->td_ucred, vp); 3843 if (error) { 3844 VOP_UNLOCK(vp, 0); 3845 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3846 fdrop(fp, td); 3847 return (error); 3848 } 3849 #endif 3850 # if (BYTE_ORDER != LITTLE_ENDIAN) 3851 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3852 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3853 NULL, NULL); 3854 foffset = auio.uio_offset; 3855 } else 3856 # endif 3857 { 3858 kuio = auio; 3859 kuio.uio_iov = &kiov; 3860 kuio.uio_segflg = UIO_SYSSPACE; 3861 kiov.iov_len = uap->count; 3862 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3863 kiov.iov_base = dirbuf; 3864 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3865 NULL, NULL); 3866 foffset = kuio.uio_offset; 3867 if (error == 0) { 3868 readcnt = uap->count - kuio.uio_resid; 3869 edp = (struct dirent *)&dirbuf[readcnt]; 3870 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3871 # if (BYTE_ORDER == LITTLE_ENDIAN) 3872 /* 3873 * The expected low byte of 3874 * dp->d_namlen is our dp->d_type. 3875 * The high MBZ byte of dp->d_namlen 3876 * is our dp->d_namlen. 3877 */ 3878 dp->d_type = dp->d_namlen; 3879 dp->d_namlen = 0; 3880 # else 3881 /* 3882 * The dp->d_type is the high byte 3883 * of the expected dp->d_namlen, 3884 * so must be zero'ed. 3885 */ 3886 dp->d_type = 0; 3887 # endif 3888 if (dp->d_reclen > 0) { 3889 dp = (struct dirent *) 3890 ((char *)dp + dp->d_reclen); 3891 } else { 3892 error = EIO; 3893 break; 3894 } 3895 } 3896 if (dp >= edp) 3897 error = uiomove(dirbuf, readcnt, &auio); 3898 } 3899 free(dirbuf, M_TEMP); 3900 } 3901 if (error) { 3902 VOP_UNLOCK(vp, 0); 3903 foffset_unlock(fp, foffset, 0); 3904 fdrop(fp, td); 3905 return (error); 3906 } 3907 if (uap->count == auio.uio_resid && 3908 (vp->v_vflag & VV_ROOT) && 3909 (vp->v_mount->mnt_flag & MNT_UNION)) { 3910 struct vnode *tvp = vp; 3911 vp = vp->v_mount->mnt_vnodecovered; 3912 VREF(vp); 3913 fp->f_vnode = vp; 3914 fp->f_data = vp; 3915 foffset = 0; 3916 vput(tvp); 3917 goto unionread; 3918 } 3919 VOP_UNLOCK(vp, 0); 3920 foffset_unlock(fp, foffset, 0); 3921 fdrop(fp, td); 3922 td->td_retval[0] = uap->count - auio.uio_resid; 3923 if (error == 0) 3924 *ploff = loff; 3925 return (error); 3926 } 3927 #endif /* COMPAT_43 */ 3928 3929 /* 3930 * Read a block of directory entries in a filesystem independent format. 3931 */ 3932 #ifndef _SYS_SYSPROTO_H_ 3933 struct getdirentries_args { 3934 int fd; 3935 char *buf; 3936 u_int count; 3937 long *basep; 3938 }; 3939 #endif 3940 int 3941 sys_getdirentries(td, uap) 3942 struct thread *td; 3943 register struct getdirentries_args /* { 3944 int fd; 3945 char *buf; 3946 u_int count; 3947 long *basep; 3948 } */ *uap; 3949 { 3950 long base; 3951 int error; 3952 3953 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3954 NULL, UIO_USERSPACE); 3955 if (error) 3956 return (error); 3957 if (uap->basep != NULL) 3958 error = copyout(&base, uap->basep, sizeof(long)); 3959 return (error); 3960 } 3961 3962 int 3963 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3964 long *basep, ssize_t *residp, enum uio_seg bufseg) 3965 { 3966 struct vnode *vp; 3967 struct file *fp; 3968 struct uio auio; 3969 struct iovec aiov; 3970 long loff; 3971 int error, eofflag; 3972 off_t foffset; 3973 3974 AUDIT_ARG_FD(fd); 3975 if (count > IOSIZE_MAX) 3976 return (EINVAL); 3977 auio.uio_resid = count; 3978 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ, &fp)) != 0) 3979 return (error); 3980 if ((fp->f_flag & FREAD) == 0) { 3981 fdrop(fp, td); 3982 return (EBADF); 3983 } 3984 vp = fp->f_vnode; 3985 foffset = foffset_lock(fp, 0); 3986 unionread: 3987 if (vp->v_type != VDIR) { 3988 error = EINVAL; 3989 goto fail; 3990 } 3991 aiov.iov_base = buf; 3992 aiov.iov_len = count; 3993 auio.uio_iov = &aiov; 3994 auio.uio_iovcnt = 1; 3995 auio.uio_rw = UIO_READ; 3996 auio.uio_segflg = bufseg; 3997 auio.uio_td = td; 3998 vn_lock(vp, LK_SHARED | LK_RETRY); 3999 AUDIT_ARG_VNODE1(vp); 4000 loff = auio.uio_offset = foffset; 4001 #ifdef MAC 4002 error = mac_vnode_check_readdir(td->td_ucred, vp); 4003 if (error == 0) 4004 #endif 4005 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4006 NULL); 4007 foffset = auio.uio_offset; 4008 if (error) { 4009 VOP_UNLOCK(vp, 0); 4010 goto fail; 4011 } 4012 if (count == auio.uio_resid && 4013 (vp->v_vflag & VV_ROOT) && 4014 (vp->v_mount->mnt_flag & MNT_UNION)) { 4015 struct vnode *tvp = vp; 4016 vp = vp->v_mount->mnt_vnodecovered; 4017 VREF(vp); 4018 fp->f_vnode = vp; 4019 fp->f_data = vp; 4020 foffset = 0; 4021 vput(tvp); 4022 goto unionread; 4023 } 4024 VOP_UNLOCK(vp, 0); 4025 *basep = loff; 4026 if (residp != NULL) 4027 *residp = auio.uio_resid; 4028 td->td_retval[0] = count - auio.uio_resid; 4029 fail: 4030 foffset_unlock(fp, foffset, 0); 4031 fdrop(fp, td); 4032 return (error); 4033 } 4034 4035 #ifndef _SYS_SYSPROTO_H_ 4036 struct getdents_args { 4037 int fd; 4038 char *buf; 4039 size_t count; 4040 }; 4041 #endif 4042 int 4043 sys_getdents(td, uap) 4044 struct thread *td; 4045 register struct getdents_args /* { 4046 int fd; 4047 char *buf; 4048 u_int count; 4049 } */ *uap; 4050 { 4051 struct getdirentries_args ap; 4052 ap.fd = uap->fd; 4053 ap.buf = uap->buf; 4054 ap.count = uap->count; 4055 ap.basep = NULL; 4056 return (sys_getdirentries(td, &ap)); 4057 } 4058 4059 /* 4060 * Set the mode mask for creation of filesystem nodes. 4061 */ 4062 #ifndef _SYS_SYSPROTO_H_ 4063 struct umask_args { 4064 int newmask; 4065 }; 4066 #endif 4067 int 4068 sys_umask(td, uap) 4069 struct thread *td; 4070 struct umask_args /* { 4071 int newmask; 4072 } */ *uap; 4073 { 4074 register struct filedesc *fdp; 4075 4076 FILEDESC_XLOCK(td->td_proc->p_fd); 4077 fdp = td->td_proc->p_fd; 4078 td->td_retval[0] = fdp->fd_cmask; 4079 fdp->fd_cmask = uap->newmask & ALLPERMS; 4080 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4081 return (0); 4082 } 4083 4084 /* 4085 * Void all references to file by ripping underlying filesystem away from 4086 * vnode. 4087 */ 4088 #ifndef _SYS_SYSPROTO_H_ 4089 struct revoke_args { 4090 char *path; 4091 }; 4092 #endif 4093 int 4094 sys_revoke(td, uap) 4095 struct thread *td; 4096 register struct revoke_args /* { 4097 char *path; 4098 } */ *uap; 4099 { 4100 struct vnode *vp; 4101 struct vattr vattr; 4102 int error; 4103 struct nameidata nd; 4104 4105 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4106 uap->path, td); 4107 if ((error = namei(&nd)) != 0) 4108 return (error); 4109 vp = nd.ni_vp; 4110 NDFREE(&nd, NDF_ONLY_PNBUF); 4111 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4112 error = EINVAL; 4113 goto out; 4114 } 4115 #ifdef MAC 4116 error = mac_vnode_check_revoke(td->td_ucred, vp); 4117 if (error) 4118 goto out; 4119 #endif 4120 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4121 if (error) 4122 goto out; 4123 if (td->td_ucred->cr_uid != vattr.va_uid) { 4124 error = priv_check(td, PRIV_VFS_ADMIN); 4125 if (error) 4126 goto out; 4127 } 4128 if (vcount(vp) > 1) 4129 VOP_REVOKE(vp, REVOKEALL); 4130 out: 4131 vput(vp); 4132 return (error); 4133 } 4134 4135 /* 4136 * Convert a user file descriptor to a kernel file entry and check that, if it 4137 * is a capability, the correct rights are present. A reference on the file 4138 * entry is held upon returning. 4139 */ 4140 int 4141 getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp) 4142 { 4143 struct file *fp; 4144 int error; 4145 4146 error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); 4147 if (error != 0) 4148 return (error); 4149 4150 /* 4151 * The file could be not of the vnode type, or it may be not 4152 * yet fully initialized, in which case the f_vnode pointer 4153 * may be set, but f_ops is still badfileops. E.g., 4154 * devfs_open() transiently create such situation to 4155 * facilitate csw d_fdopen(). 4156 * 4157 * Dupfdopen() handling in kern_openat() installs the 4158 * half-baked file into the process descriptor table, allowing 4159 * other thread to dereference it. Guard against the race by 4160 * checking f_ops. 4161 */ 4162 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4163 fdrop(fp, curthread); 4164 return (EINVAL); 4165 } 4166 *fpp = fp; 4167 return (0); 4168 } 4169 4170 4171 /* 4172 * Get an (NFS) file handle. 4173 */ 4174 #ifndef _SYS_SYSPROTO_H_ 4175 struct lgetfh_args { 4176 char *fname; 4177 fhandle_t *fhp; 4178 }; 4179 #endif 4180 int 4181 sys_lgetfh(td, uap) 4182 struct thread *td; 4183 register struct lgetfh_args *uap; 4184 { 4185 struct nameidata nd; 4186 fhandle_t fh; 4187 register struct vnode *vp; 4188 int error; 4189 4190 error = priv_check(td, PRIV_VFS_GETFH); 4191 if (error) 4192 return (error); 4193 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4194 uap->fname, td); 4195 error = namei(&nd); 4196 if (error) 4197 return (error); 4198 NDFREE(&nd, NDF_ONLY_PNBUF); 4199 vp = nd.ni_vp; 4200 bzero(&fh, sizeof(fh)); 4201 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4202 error = VOP_VPTOFH(vp, &fh.fh_fid); 4203 vput(vp); 4204 if (error) 4205 return (error); 4206 error = copyout(&fh, uap->fhp, sizeof (fh)); 4207 return (error); 4208 } 4209 4210 #ifndef _SYS_SYSPROTO_H_ 4211 struct getfh_args { 4212 char *fname; 4213 fhandle_t *fhp; 4214 }; 4215 #endif 4216 int 4217 sys_getfh(td, uap) 4218 struct thread *td; 4219 register struct getfh_args *uap; 4220 { 4221 struct nameidata nd; 4222 fhandle_t fh; 4223 register struct vnode *vp; 4224 int error; 4225 4226 error = priv_check(td, PRIV_VFS_GETFH); 4227 if (error) 4228 return (error); 4229 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4230 uap->fname, td); 4231 error = namei(&nd); 4232 if (error) 4233 return (error); 4234 NDFREE(&nd, NDF_ONLY_PNBUF); 4235 vp = nd.ni_vp; 4236 bzero(&fh, sizeof(fh)); 4237 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4238 error = VOP_VPTOFH(vp, &fh.fh_fid); 4239 vput(vp); 4240 if (error) 4241 return (error); 4242 error = copyout(&fh, uap->fhp, sizeof (fh)); 4243 return (error); 4244 } 4245 4246 /* 4247 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4248 * open descriptor. 4249 * 4250 * warning: do not remove the priv_check() call or this becomes one giant 4251 * security hole. 4252 */ 4253 #ifndef _SYS_SYSPROTO_H_ 4254 struct fhopen_args { 4255 const struct fhandle *u_fhp; 4256 int flags; 4257 }; 4258 #endif 4259 int 4260 sys_fhopen(td, uap) 4261 struct thread *td; 4262 struct fhopen_args /* { 4263 const struct fhandle *u_fhp; 4264 int flags; 4265 } */ *uap; 4266 { 4267 struct mount *mp; 4268 struct vnode *vp; 4269 struct fhandle fhp; 4270 struct file *fp; 4271 int fmode, error; 4272 int indx; 4273 4274 error = priv_check(td, PRIV_VFS_FHOPEN); 4275 if (error) 4276 return (error); 4277 indx = -1; 4278 fmode = FFLAGS(uap->flags); 4279 /* why not allow a non-read/write open for our lockd? */ 4280 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4281 return (EINVAL); 4282 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4283 if (error) 4284 return(error); 4285 /* find the mount point */ 4286 mp = vfs_busyfs(&fhp.fh_fsid); 4287 if (mp == NULL) 4288 return (ESTALE); 4289 /* now give me my vnode, it gets returned to me locked */ 4290 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4291 vfs_unbusy(mp); 4292 if (error) 4293 return (error); 4294 4295 error = falloc_noinstall(td, &fp); 4296 if (error) { 4297 vput(vp); 4298 return (error); 4299 } 4300 /* 4301 * An extra reference on `fp' has been held for us by 4302 * falloc_noinstall(). 4303 */ 4304 4305 #ifdef INVARIANTS 4306 td->td_dupfd = -1; 4307 #endif 4308 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4309 if (error) { 4310 KASSERT(fp->f_ops == &badfileops, 4311 ("VOP_OPEN in fhopen() set f_ops")); 4312 KASSERT(td->td_dupfd < 0, 4313 ("fhopen() encountered fdopen()")); 4314 4315 vput(vp); 4316 goto bad; 4317 } 4318 #ifdef INVARIANTS 4319 td->td_dupfd = 0; 4320 #endif 4321 fp->f_vnode = vp; 4322 fp->f_seqcount = 1; 4323 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4324 &vnops); 4325 VOP_UNLOCK(vp, 0); 4326 if (fmode & O_TRUNC) { 4327 error = fo_truncate(fp, 0, td->td_ucred, td); 4328 if (error) 4329 goto bad; 4330 } 4331 4332 error = finstall(td, fp, &indx, fmode, NULL); 4333 bad: 4334 fdrop(fp, td); 4335 td->td_retval[0] = indx; 4336 return (error); 4337 } 4338 4339 /* 4340 * Stat an (NFS) file handle. 4341 */ 4342 #ifndef _SYS_SYSPROTO_H_ 4343 struct fhstat_args { 4344 struct fhandle *u_fhp; 4345 struct stat *sb; 4346 }; 4347 #endif 4348 int 4349 sys_fhstat(td, uap) 4350 struct thread *td; 4351 register struct fhstat_args /* { 4352 struct fhandle *u_fhp; 4353 struct stat *sb; 4354 } */ *uap; 4355 { 4356 struct stat sb; 4357 struct fhandle fh; 4358 int error; 4359 4360 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4361 if (error != 0) 4362 return (error); 4363 error = kern_fhstat(td, fh, &sb); 4364 if (error != 0) 4365 return (error); 4366 error = copyout(&sb, uap->sb, sizeof(sb)); 4367 return (error); 4368 } 4369 4370 int 4371 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4372 { 4373 struct mount *mp; 4374 struct vnode *vp; 4375 int error; 4376 4377 error = priv_check(td, PRIV_VFS_FHSTAT); 4378 if (error) 4379 return (error); 4380 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4381 return (ESTALE); 4382 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4383 vfs_unbusy(mp); 4384 if (error) 4385 return (error); 4386 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4387 vput(vp); 4388 return (error); 4389 } 4390 4391 /* 4392 * Implement fstatfs() for (NFS) file handles. 4393 */ 4394 #ifndef _SYS_SYSPROTO_H_ 4395 struct fhstatfs_args { 4396 struct fhandle *u_fhp; 4397 struct statfs *buf; 4398 }; 4399 #endif 4400 int 4401 sys_fhstatfs(td, uap) 4402 struct thread *td; 4403 struct fhstatfs_args /* { 4404 struct fhandle *u_fhp; 4405 struct statfs *buf; 4406 } */ *uap; 4407 { 4408 struct statfs sf; 4409 fhandle_t fh; 4410 int error; 4411 4412 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4413 if (error) 4414 return (error); 4415 error = kern_fhstatfs(td, fh, &sf); 4416 if (error) 4417 return (error); 4418 return (copyout(&sf, uap->buf, sizeof(sf))); 4419 } 4420 4421 int 4422 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4423 { 4424 struct statfs *sp; 4425 struct mount *mp; 4426 struct vnode *vp; 4427 int error; 4428 4429 error = priv_check(td, PRIV_VFS_FHSTATFS); 4430 if (error) 4431 return (error); 4432 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4433 return (ESTALE); 4434 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4435 if (error) { 4436 vfs_unbusy(mp); 4437 return (error); 4438 } 4439 vput(vp); 4440 error = prison_canseemount(td->td_ucred, mp); 4441 if (error) 4442 goto out; 4443 #ifdef MAC 4444 error = mac_mount_check_stat(td->td_ucred, mp); 4445 if (error) 4446 goto out; 4447 #endif 4448 /* 4449 * Set these in case the underlying filesystem fails to do so. 4450 */ 4451 sp = &mp->mnt_stat; 4452 sp->f_version = STATFS_VERSION; 4453 sp->f_namemax = NAME_MAX; 4454 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4455 error = VFS_STATFS(mp, sp); 4456 if (error == 0) 4457 *buf = *sp; 4458 out: 4459 vfs_unbusy(mp); 4460 return (error); 4461 } 4462 4463 int 4464 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4465 { 4466 struct file *fp; 4467 struct mount *mp; 4468 struct vnode *vp; 4469 off_t olen, ooffset; 4470 int error; 4471 4472 fp = NULL; 4473 error = fget(td, fd, CAP_WRITE, &fp); 4474 if (error != 0) 4475 goto out; 4476 4477 switch (fp->f_type) { 4478 case DTYPE_VNODE: 4479 break; 4480 case DTYPE_PIPE: 4481 case DTYPE_FIFO: 4482 error = ESPIPE; 4483 goto out; 4484 default: 4485 error = ENODEV; 4486 goto out; 4487 } 4488 if ((fp->f_flag & FWRITE) == 0) { 4489 error = EBADF; 4490 goto out; 4491 } 4492 vp = fp->f_vnode; 4493 if (vp->v_type != VREG) { 4494 error = ENODEV; 4495 goto out; 4496 } 4497 if (offset < 0 || len <= 0) { 4498 error = EINVAL; 4499 goto out; 4500 } 4501 /* Check for wrap. */ 4502 if (offset > OFF_MAX - len) { 4503 error = EFBIG; 4504 goto out; 4505 } 4506 4507 /* Allocating blocks may take a long time, so iterate. */ 4508 for (;;) { 4509 olen = len; 4510 ooffset = offset; 4511 4512 bwillwrite(); 4513 mp = NULL; 4514 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4515 if (error != 0) 4516 break; 4517 error = vn_lock(vp, LK_EXCLUSIVE); 4518 if (error != 0) { 4519 vn_finished_write(mp); 4520 break; 4521 } 4522 #ifdef MAC 4523 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4524 if (error == 0) 4525 #endif 4526 error = VOP_ALLOCATE(vp, &offset, &len); 4527 VOP_UNLOCK(vp, 0); 4528 vn_finished_write(mp); 4529 4530 if (olen + ooffset != offset + len) { 4531 panic("offset + len changed from %jx/%jx to %jx/%jx", 4532 ooffset, olen, offset, len); 4533 } 4534 if (error != 0 || len == 0) 4535 break; 4536 KASSERT(olen > len, ("Iteration did not make progress?")); 4537 maybe_yield(); 4538 } 4539 out: 4540 if (fp != NULL) 4541 fdrop(fp, td); 4542 return (error); 4543 } 4544 4545 int 4546 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4547 { 4548 4549 return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len)); 4550 } 4551 4552 /* 4553 * Unlike madvise(2), we do not make a best effort to remember every 4554 * possible caching hint. Instead, we remember the last setting with 4555 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4556 * region of any current setting. 4557 */ 4558 int 4559 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4560 int advice) 4561 { 4562 struct fadvise_info *fa, *new; 4563 struct file *fp; 4564 struct vnode *vp; 4565 off_t end; 4566 int error; 4567 4568 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4569 return (EINVAL); 4570 switch (advice) { 4571 case POSIX_FADV_SEQUENTIAL: 4572 case POSIX_FADV_RANDOM: 4573 case POSIX_FADV_NOREUSE: 4574 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4575 break; 4576 case POSIX_FADV_NORMAL: 4577 case POSIX_FADV_WILLNEED: 4578 case POSIX_FADV_DONTNEED: 4579 new = NULL; 4580 break; 4581 default: 4582 return (EINVAL); 4583 } 4584 /* XXX: CAP_POSIX_FADVISE? */ 4585 error = fget(td, fd, CAP_NONE, &fp); 4586 if (error != 0) 4587 goto out; 4588 4589 switch (fp->f_type) { 4590 case DTYPE_VNODE: 4591 break; 4592 case DTYPE_PIPE: 4593 case DTYPE_FIFO: 4594 error = ESPIPE; 4595 goto out; 4596 default: 4597 error = ENODEV; 4598 goto out; 4599 } 4600 vp = fp->f_vnode; 4601 if (vp->v_type != VREG) { 4602 error = ENODEV; 4603 goto out; 4604 } 4605 if (len == 0) 4606 end = OFF_MAX; 4607 else 4608 end = offset + len - 1; 4609 switch (advice) { 4610 case POSIX_FADV_SEQUENTIAL: 4611 case POSIX_FADV_RANDOM: 4612 case POSIX_FADV_NOREUSE: 4613 /* 4614 * Try to merge any existing non-standard region with 4615 * this new region if possible, otherwise create a new 4616 * non-standard region for this request. 4617 */ 4618 mtx_pool_lock(mtxpool_sleep, fp); 4619 fa = fp->f_advice; 4620 if (fa != NULL && fa->fa_advice == advice && 4621 ((fa->fa_start <= end && fa->fa_end >= offset) || 4622 (end != OFF_MAX && fa->fa_start == end + 1) || 4623 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4624 if (offset < fa->fa_start) 4625 fa->fa_start = offset; 4626 if (end > fa->fa_end) 4627 fa->fa_end = end; 4628 } else { 4629 new->fa_advice = advice; 4630 new->fa_start = offset; 4631 new->fa_end = end; 4632 new->fa_prevstart = 0; 4633 new->fa_prevend = 0; 4634 fp->f_advice = new; 4635 new = fa; 4636 } 4637 mtx_pool_unlock(mtxpool_sleep, fp); 4638 break; 4639 case POSIX_FADV_NORMAL: 4640 /* 4641 * If a the "normal" region overlaps with an existing 4642 * non-standard region, trim or remove the 4643 * non-standard region. 4644 */ 4645 mtx_pool_lock(mtxpool_sleep, fp); 4646 fa = fp->f_advice; 4647 if (fa != NULL) { 4648 if (offset <= fa->fa_start && end >= fa->fa_end) { 4649 new = fa; 4650 fp->f_advice = NULL; 4651 } else if (offset <= fa->fa_start && 4652 end >= fa->fa_start) 4653 fa->fa_start = end + 1; 4654 else if (offset <= fa->fa_end && end >= fa->fa_end) 4655 fa->fa_end = offset - 1; 4656 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4657 /* 4658 * If the "normal" region is a middle 4659 * portion of the existing 4660 * non-standard region, just remove 4661 * the whole thing rather than picking 4662 * one side or the other to 4663 * preserve. 4664 */ 4665 new = fa; 4666 fp->f_advice = NULL; 4667 } 4668 } 4669 mtx_pool_unlock(mtxpool_sleep, fp); 4670 break; 4671 case POSIX_FADV_WILLNEED: 4672 case POSIX_FADV_DONTNEED: 4673 error = VOP_ADVISE(vp, offset, end, advice); 4674 break; 4675 } 4676 out: 4677 if (fp != NULL) 4678 fdrop(fp, td); 4679 free(new, M_FADVISE); 4680 return (error); 4681 } 4682 4683 int 4684 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4685 { 4686 4687 return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4688 uap->advice)); 4689 } 4690