1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 98 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 99 static int kern_chflagsat(struct thread *td, int fd, const char *path, 100 enum uio_seg pathseg, u_long flags, int atflag); 101 static int setfflags(struct thread *td, struct vnode *, u_long); 102 static int setutimes(struct thread *td, struct vnode *, 103 const struct timespec *, int, int); 104 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 105 struct thread *td); 106 107 /* 108 * The module initialization routine for POSIX asynchronous I/O will 109 * set this to the version of AIO that it implements. (Zero means 110 * that it is not implemented.) This value is used here by pathconf() 111 * and in kern_descrip.c by fpathconf(). 112 */ 113 int async_io_version; 114 115 /* 116 * Sync each mounted filesystem. 117 */ 118 #ifndef _SYS_SYSPROTO_H_ 119 struct sync_args { 120 int dummy; 121 }; 122 #endif 123 /* ARGSUSED */ 124 int 125 sys_sync(td, uap) 126 struct thread *td; 127 struct sync_args *uap; 128 { 129 struct mount *mp, *nmp; 130 int save; 131 132 mtx_lock(&mountlist_mtx); 133 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 134 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 135 nmp = TAILQ_NEXT(mp, mnt_list); 136 continue; 137 } 138 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 139 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 140 save = curthread_pflags_set(TDP_SYNCIO); 141 vfs_msync(mp, MNT_NOWAIT); 142 VFS_SYNC(mp, MNT_NOWAIT); 143 curthread_pflags_restore(save); 144 vn_finished_write(mp); 145 } 146 mtx_lock(&mountlist_mtx); 147 nmp = TAILQ_NEXT(mp, mnt_list); 148 vfs_unbusy(mp); 149 } 150 mtx_unlock(&mountlist_mtx); 151 return (0); 152 } 153 154 /* 155 * Change filesystem quotas. 156 */ 157 #ifndef _SYS_SYSPROTO_H_ 158 struct quotactl_args { 159 char *path; 160 int cmd; 161 int uid; 162 caddr_t arg; 163 }; 164 #endif 165 int 166 sys_quotactl(td, uap) 167 struct thread *td; 168 register struct quotactl_args /* { 169 char *path; 170 int cmd; 171 int uid; 172 caddr_t arg; 173 } */ *uap; 174 { 175 struct mount *mp; 176 struct nameidata nd; 177 int error; 178 179 AUDIT_ARG_CMD(uap->cmd); 180 AUDIT_ARG_UID(uap->uid); 181 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 182 return (EPERM); 183 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 184 uap->path, td); 185 if ((error = namei(&nd)) != 0) 186 return (error); 187 NDFREE(&nd, NDF_ONLY_PNBUF); 188 mp = nd.ni_vp->v_mount; 189 vfs_ref(mp); 190 vput(nd.ni_vp); 191 error = vfs_busy(mp, 0); 192 vfs_rel(mp); 193 if (error != 0) 194 return (error); 195 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 196 197 /* 198 * Since quota on operation typically needs to open quota 199 * file, the Q_QUOTAON handler needs to unbusy the mount point 200 * before calling into namei. Otherwise, unmount might be 201 * started between two vfs_busy() invocations (first is our, 202 * second is from mount point cross-walk code in lookup()), 203 * causing deadlock. 204 * 205 * Require that Q_QUOTAON handles the vfs_busy() reference on 206 * its own, always returning with ubusied mount point. 207 */ 208 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 209 vfs_unbusy(mp); 210 return (error); 211 } 212 213 /* 214 * Used by statfs conversion routines to scale the block size up if 215 * necessary so that all of the block counts are <= 'max_size'. Note 216 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 217 * value of 'n'. 218 */ 219 void 220 statfs_scale_blocks(struct statfs *sf, long max_size) 221 { 222 uint64_t count; 223 int shift; 224 225 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 226 227 /* 228 * Attempt to scale the block counts to give a more accurate 229 * overview to userland of the ratio of free space to used 230 * space. To do this, find the largest block count and compute 231 * a divisor that lets it fit into a signed integer <= max_size. 232 */ 233 if (sf->f_bavail < 0) 234 count = -sf->f_bavail; 235 else 236 count = sf->f_bavail; 237 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 238 if (count <= max_size) 239 return; 240 241 count >>= flsl(max_size); 242 shift = 0; 243 while (count > 0) { 244 shift++; 245 count >>=1; 246 } 247 248 sf->f_bsize <<= shift; 249 sf->f_blocks >>= shift; 250 sf->f_bfree >>= shift; 251 sf->f_bavail >>= shift; 252 } 253 254 /* 255 * Get filesystem statistics. 256 */ 257 #ifndef _SYS_SYSPROTO_H_ 258 struct statfs_args { 259 char *path; 260 struct statfs *buf; 261 }; 262 #endif 263 int 264 sys_statfs(td, uap) 265 struct thread *td; 266 register struct statfs_args /* { 267 char *path; 268 struct statfs *buf; 269 } */ *uap; 270 { 271 struct statfs sf; 272 int error; 273 274 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 275 if (error == 0) 276 error = copyout(&sf, uap->buf, sizeof(sf)); 277 return (error); 278 } 279 280 int 281 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 282 struct statfs *buf) 283 { 284 struct mount *mp; 285 struct statfs *sp, sb; 286 struct nameidata nd; 287 int error; 288 289 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 290 pathseg, path, td); 291 error = namei(&nd); 292 if (error != 0) 293 return (error); 294 mp = nd.ni_vp->v_mount; 295 vfs_ref(mp); 296 NDFREE(&nd, NDF_ONLY_PNBUF); 297 vput(nd.ni_vp); 298 error = vfs_busy(mp, 0); 299 vfs_rel(mp); 300 if (error != 0) 301 return (error); 302 #ifdef MAC 303 error = mac_mount_check_stat(td->td_ucred, mp); 304 if (error != 0) 305 goto out; 306 #endif 307 /* 308 * Set these in case the underlying filesystem fails to do so. 309 */ 310 sp = &mp->mnt_stat; 311 sp->f_version = STATFS_VERSION; 312 sp->f_namemax = NAME_MAX; 313 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 314 error = VFS_STATFS(mp, sp); 315 if (error != 0) 316 goto out; 317 if (priv_check(td, PRIV_VFS_GENERATION)) { 318 bcopy(sp, &sb, sizeof(sb)); 319 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 320 prison_enforce_statfs(td->td_ucred, mp, &sb); 321 sp = &sb; 322 } 323 *buf = *sp; 324 out: 325 vfs_unbusy(mp); 326 return (error); 327 } 328 329 /* 330 * Get filesystem statistics. 331 */ 332 #ifndef _SYS_SYSPROTO_H_ 333 struct fstatfs_args { 334 int fd; 335 struct statfs *buf; 336 }; 337 #endif 338 int 339 sys_fstatfs(td, uap) 340 struct thread *td; 341 register struct fstatfs_args /* { 342 int fd; 343 struct statfs *buf; 344 } */ *uap; 345 { 346 struct statfs sf; 347 int error; 348 349 error = kern_fstatfs(td, uap->fd, &sf); 350 if (error == 0) 351 error = copyout(&sf, uap->buf, sizeof(sf)); 352 return (error); 353 } 354 355 int 356 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 357 { 358 struct file *fp; 359 struct mount *mp; 360 struct statfs *sp, sb; 361 struct vnode *vp; 362 cap_rights_t rights; 363 int error; 364 365 AUDIT_ARG_FD(fd); 366 error = getvnode(td->td_proc->p_fd, fd, 367 cap_rights_init(&rights, CAP_FSTATFS), &fp); 368 if (error != 0) 369 return (error); 370 vp = fp->f_vnode; 371 vn_lock(vp, LK_SHARED | LK_RETRY); 372 #ifdef AUDIT 373 AUDIT_ARG_VNODE1(vp); 374 #endif 375 mp = vp->v_mount; 376 if (mp) 377 vfs_ref(mp); 378 VOP_UNLOCK(vp, 0); 379 fdrop(fp, td); 380 if (mp == NULL) { 381 error = EBADF; 382 goto out; 383 } 384 error = vfs_busy(mp, 0); 385 vfs_rel(mp); 386 if (error != 0) 387 return (error); 388 #ifdef MAC 389 error = mac_mount_check_stat(td->td_ucred, mp); 390 if (error != 0) 391 goto out; 392 #endif 393 /* 394 * Set these in case the underlying filesystem fails to do so. 395 */ 396 sp = &mp->mnt_stat; 397 sp->f_version = STATFS_VERSION; 398 sp->f_namemax = NAME_MAX; 399 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 400 error = VFS_STATFS(mp, sp); 401 if (error != 0) 402 goto out; 403 if (priv_check(td, PRIV_VFS_GENERATION)) { 404 bcopy(sp, &sb, sizeof(sb)); 405 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 406 prison_enforce_statfs(td->td_ucred, mp, &sb); 407 sp = &sb; 408 } 409 *buf = *sp; 410 out: 411 if (mp) 412 vfs_unbusy(mp); 413 return (error); 414 } 415 416 /* 417 * Get statistics on all filesystems. 418 */ 419 #ifndef _SYS_SYSPROTO_H_ 420 struct getfsstat_args { 421 struct statfs *buf; 422 long bufsize; 423 int flags; 424 }; 425 #endif 426 int 427 sys_getfsstat(td, uap) 428 struct thread *td; 429 register struct getfsstat_args /* { 430 struct statfs *buf; 431 long bufsize; 432 int flags; 433 } */ *uap; 434 { 435 436 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 437 uap->flags)); 438 } 439 440 /* 441 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 442 * The caller is responsible for freeing memory which will be allocated 443 * in '*buf'. 444 */ 445 int 446 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 447 enum uio_seg bufseg, int flags) 448 { 449 struct mount *mp, *nmp; 450 struct statfs *sfsp, *sp, sb; 451 size_t count, maxcount; 452 int error; 453 454 maxcount = bufsize / sizeof(struct statfs); 455 if (bufsize == 0) 456 sfsp = NULL; 457 else if (bufseg == UIO_USERSPACE) 458 sfsp = *buf; 459 else /* if (bufseg == UIO_SYSSPACE) */ { 460 count = 0; 461 mtx_lock(&mountlist_mtx); 462 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 463 count++; 464 } 465 mtx_unlock(&mountlist_mtx); 466 if (maxcount > count) 467 maxcount = count; 468 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 469 M_WAITOK); 470 } 471 count = 0; 472 mtx_lock(&mountlist_mtx); 473 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 474 if (prison_canseemount(td->td_ucred, mp) != 0) { 475 nmp = TAILQ_NEXT(mp, mnt_list); 476 continue; 477 } 478 #ifdef MAC 479 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 continue; 482 } 483 #endif 484 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 continue; 487 } 488 if (sfsp && count < maxcount) { 489 sp = &mp->mnt_stat; 490 /* 491 * Set these in case the underlying filesystem 492 * fails to do so. 493 */ 494 sp->f_version = STATFS_VERSION; 495 sp->f_namemax = NAME_MAX; 496 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 497 /* 498 * If MNT_NOWAIT or MNT_LAZY is specified, do not 499 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 500 * overrides MNT_WAIT. 501 */ 502 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 503 (flags & MNT_WAIT)) && 504 (error = VFS_STATFS(mp, sp))) { 505 mtx_lock(&mountlist_mtx); 506 nmp = TAILQ_NEXT(mp, mnt_list); 507 vfs_unbusy(mp); 508 continue; 509 } 510 if (priv_check(td, PRIV_VFS_GENERATION)) { 511 bcopy(sp, &sb, sizeof(sb)); 512 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 513 prison_enforce_statfs(td->td_ucred, mp, &sb); 514 sp = &sb; 515 } 516 if (bufseg == UIO_SYSSPACE) 517 bcopy(sp, sfsp, sizeof(*sp)); 518 else /* if (bufseg == UIO_USERSPACE) */ { 519 error = copyout(sp, sfsp, sizeof(*sp)); 520 if (error != 0) { 521 vfs_unbusy(mp); 522 return (error); 523 } 524 } 525 sfsp++; 526 } 527 count++; 528 mtx_lock(&mountlist_mtx); 529 nmp = TAILQ_NEXT(mp, mnt_list); 530 vfs_unbusy(mp); 531 } 532 mtx_unlock(&mountlist_mtx); 533 if (sfsp && count > maxcount) 534 td->td_retval[0] = maxcount; 535 else 536 td->td_retval[0] = count; 537 return (0); 538 } 539 540 #ifdef COMPAT_FREEBSD4 541 /* 542 * Get old format filesystem statistics. 543 */ 544 static void cvtstatfs(struct statfs *, struct ostatfs *); 545 546 #ifndef _SYS_SYSPROTO_H_ 547 struct freebsd4_statfs_args { 548 char *path; 549 struct ostatfs *buf; 550 }; 551 #endif 552 int 553 freebsd4_statfs(td, uap) 554 struct thread *td; 555 struct freebsd4_statfs_args /* { 556 char *path; 557 struct ostatfs *buf; 558 } */ *uap; 559 { 560 struct ostatfs osb; 561 struct statfs sf; 562 int error; 563 564 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 565 if (error != 0) 566 return (error); 567 cvtstatfs(&sf, &osb); 568 return (copyout(&osb, uap->buf, sizeof(osb))); 569 } 570 571 /* 572 * Get filesystem statistics. 573 */ 574 #ifndef _SYS_SYSPROTO_H_ 575 struct freebsd4_fstatfs_args { 576 int fd; 577 struct ostatfs *buf; 578 }; 579 #endif 580 int 581 freebsd4_fstatfs(td, uap) 582 struct thread *td; 583 struct freebsd4_fstatfs_args /* { 584 int fd; 585 struct ostatfs *buf; 586 } */ *uap; 587 { 588 struct ostatfs osb; 589 struct statfs sf; 590 int error; 591 592 error = kern_fstatfs(td, uap->fd, &sf); 593 if (error != 0) 594 return (error); 595 cvtstatfs(&sf, &osb); 596 return (copyout(&osb, uap->buf, sizeof(osb))); 597 } 598 599 /* 600 * Get statistics on all filesystems. 601 */ 602 #ifndef _SYS_SYSPROTO_H_ 603 struct freebsd4_getfsstat_args { 604 struct ostatfs *buf; 605 long bufsize; 606 int flags; 607 }; 608 #endif 609 int 610 freebsd4_getfsstat(td, uap) 611 struct thread *td; 612 register struct freebsd4_getfsstat_args /* { 613 struct ostatfs *buf; 614 long bufsize; 615 int flags; 616 } */ *uap; 617 { 618 struct statfs *buf, *sp; 619 struct ostatfs osb; 620 size_t count, size; 621 int error; 622 623 count = uap->bufsize / sizeof(struct ostatfs); 624 size = count * sizeof(struct statfs); 625 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 626 if (size > 0) { 627 count = td->td_retval[0]; 628 sp = buf; 629 while (count > 0 && error == 0) { 630 cvtstatfs(sp, &osb); 631 error = copyout(&osb, uap->buf, sizeof(osb)); 632 sp++; 633 uap->buf++; 634 count--; 635 } 636 free(buf, M_TEMP); 637 } 638 return (error); 639 } 640 641 /* 642 * Implement fstatfs() for (NFS) file handles. 643 */ 644 #ifndef _SYS_SYSPROTO_H_ 645 struct freebsd4_fhstatfs_args { 646 struct fhandle *u_fhp; 647 struct ostatfs *buf; 648 }; 649 #endif 650 int 651 freebsd4_fhstatfs(td, uap) 652 struct thread *td; 653 struct freebsd4_fhstatfs_args /* { 654 struct fhandle *u_fhp; 655 struct ostatfs *buf; 656 } */ *uap; 657 { 658 struct ostatfs osb; 659 struct statfs sf; 660 fhandle_t fh; 661 int error; 662 663 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 664 if (error != 0) 665 return (error); 666 error = kern_fhstatfs(td, fh, &sf); 667 if (error != 0) 668 return (error); 669 cvtstatfs(&sf, &osb); 670 return (copyout(&osb, uap->buf, sizeof(osb))); 671 } 672 673 /* 674 * Convert a new format statfs structure to an old format statfs structure. 675 */ 676 static void 677 cvtstatfs(nsp, osp) 678 struct statfs *nsp; 679 struct ostatfs *osp; 680 { 681 682 statfs_scale_blocks(nsp, LONG_MAX); 683 bzero(osp, sizeof(*osp)); 684 osp->f_bsize = nsp->f_bsize; 685 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 686 osp->f_blocks = nsp->f_blocks; 687 osp->f_bfree = nsp->f_bfree; 688 osp->f_bavail = nsp->f_bavail; 689 osp->f_files = MIN(nsp->f_files, LONG_MAX); 690 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 691 osp->f_owner = nsp->f_owner; 692 osp->f_type = nsp->f_type; 693 osp->f_flags = nsp->f_flags; 694 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 695 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 696 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 697 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 698 strlcpy(osp->f_fstypename, nsp->f_fstypename, 699 MIN(MFSNAMELEN, OMFSNAMELEN)); 700 strlcpy(osp->f_mntonname, nsp->f_mntonname, 701 MIN(MNAMELEN, OMNAMELEN)); 702 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 703 MIN(MNAMELEN, OMNAMELEN)); 704 osp->f_fsid = nsp->f_fsid; 705 } 706 #endif /* COMPAT_FREEBSD4 */ 707 708 /* 709 * Change current working directory to a given file descriptor. 710 */ 711 #ifndef _SYS_SYSPROTO_H_ 712 struct fchdir_args { 713 int fd; 714 }; 715 #endif 716 int 717 sys_fchdir(td, uap) 718 struct thread *td; 719 struct fchdir_args /* { 720 int fd; 721 } */ *uap; 722 { 723 register struct filedesc *fdp = td->td_proc->p_fd; 724 struct vnode *vp, *tdp, *vpold; 725 struct mount *mp; 726 struct file *fp; 727 cap_rights_t rights; 728 int error; 729 730 AUDIT_ARG_FD(uap->fd); 731 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 732 &fp); 733 if (error != 0) 734 return (error); 735 vp = fp->f_vnode; 736 VREF(vp); 737 fdrop(fp, td); 738 vn_lock(vp, LK_SHARED | LK_RETRY); 739 AUDIT_ARG_VNODE1(vp); 740 error = change_dir(vp, td); 741 while (!error && (mp = vp->v_mountedhere) != NULL) { 742 if (vfs_busy(mp, 0)) 743 continue; 744 error = VFS_ROOT(mp, LK_SHARED, &tdp); 745 vfs_unbusy(mp); 746 if (error != 0) 747 break; 748 vput(vp); 749 vp = tdp; 750 } 751 if (error != 0) { 752 vput(vp); 753 return (error); 754 } 755 VOP_UNLOCK(vp, 0); 756 FILEDESC_XLOCK(fdp); 757 vpold = fdp->fd_cdir; 758 fdp->fd_cdir = vp; 759 FILEDESC_XUNLOCK(fdp); 760 vrele(vpold); 761 return (0); 762 } 763 764 /* 765 * Change current working directory (``.''). 766 */ 767 #ifndef _SYS_SYSPROTO_H_ 768 struct chdir_args { 769 char *path; 770 }; 771 #endif 772 int 773 sys_chdir(td, uap) 774 struct thread *td; 775 struct chdir_args /* { 776 char *path; 777 } */ *uap; 778 { 779 780 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 781 } 782 783 int 784 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 785 { 786 register struct filedesc *fdp = td->td_proc->p_fd; 787 struct nameidata nd; 788 struct vnode *vp; 789 int error; 790 791 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 792 pathseg, path, td); 793 if ((error = namei(&nd)) != 0) 794 return (error); 795 if ((error = change_dir(nd.ni_vp, td)) != 0) { 796 vput(nd.ni_vp); 797 NDFREE(&nd, NDF_ONLY_PNBUF); 798 return (error); 799 } 800 VOP_UNLOCK(nd.ni_vp, 0); 801 NDFREE(&nd, NDF_ONLY_PNBUF); 802 FILEDESC_XLOCK(fdp); 803 vp = fdp->fd_cdir; 804 fdp->fd_cdir = nd.ni_vp; 805 FILEDESC_XUNLOCK(fdp); 806 vrele(vp); 807 return (0); 808 } 809 810 /* 811 * Helper function for raised chroot(2) security function: Refuse if 812 * any filedescriptors are open directories. 813 */ 814 static int 815 chroot_refuse_vdir_fds(fdp) 816 struct filedesc *fdp; 817 { 818 struct vnode *vp; 819 struct file *fp; 820 int fd; 821 822 FILEDESC_LOCK_ASSERT(fdp); 823 824 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 825 fp = fget_locked(fdp, fd); 826 if (fp == NULL) 827 continue; 828 if (fp->f_type == DTYPE_VNODE) { 829 vp = fp->f_vnode; 830 if (vp->v_type == VDIR) 831 return (EPERM); 832 } 833 } 834 return (0); 835 } 836 837 /* 838 * This sysctl determines if we will allow a process to chroot(2) if it 839 * has a directory open: 840 * 0: disallowed for all processes. 841 * 1: allowed for processes that were not already chroot(2)'ed. 842 * 2: allowed for all processes. 843 */ 844 845 static int chroot_allow_open_directories = 1; 846 847 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 848 &chroot_allow_open_directories, 0, 849 "Allow a process to chroot(2) if it has a directory open"); 850 851 /* 852 * Change notion of root (``/'') directory. 853 */ 854 #ifndef _SYS_SYSPROTO_H_ 855 struct chroot_args { 856 char *path; 857 }; 858 #endif 859 int 860 sys_chroot(td, uap) 861 struct thread *td; 862 struct chroot_args /* { 863 char *path; 864 } */ *uap; 865 { 866 struct nameidata nd; 867 int error; 868 869 error = priv_check(td, PRIV_VFS_CHROOT); 870 if (error != 0) 871 return (error); 872 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 873 UIO_USERSPACE, uap->path, td); 874 error = namei(&nd); 875 if (error != 0) 876 goto error; 877 error = change_dir(nd.ni_vp, td); 878 if (error != 0) 879 goto e_vunlock; 880 #ifdef MAC 881 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 882 if (error != 0) 883 goto e_vunlock; 884 #endif 885 VOP_UNLOCK(nd.ni_vp, 0); 886 error = change_root(nd.ni_vp, td); 887 vrele(nd.ni_vp); 888 NDFREE(&nd, NDF_ONLY_PNBUF); 889 return (error); 890 e_vunlock: 891 vput(nd.ni_vp); 892 error: 893 NDFREE(&nd, NDF_ONLY_PNBUF); 894 return (error); 895 } 896 897 /* 898 * Common routine for chroot and chdir. Callers must provide a locked vnode 899 * instance. 900 */ 901 int 902 change_dir(vp, td) 903 struct vnode *vp; 904 struct thread *td; 905 { 906 #ifdef MAC 907 int error; 908 #endif 909 910 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 911 if (vp->v_type != VDIR) 912 return (ENOTDIR); 913 #ifdef MAC 914 error = mac_vnode_check_chdir(td->td_ucred, vp); 915 if (error != 0) 916 return (error); 917 #endif 918 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 919 } 920 921 /* 922 * Common routine for kern_chroot() and jail_attach(). The caller is 923 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 924 * authorize this operation. 925 */ 926 int 927 change_root(vp, td) 928 struct vnode *vp; 929 struct thread *td; 930 { 931 struct filedesc *fdp; 932 struct vnode *oldvp; 933 int error; 934 935 fdp = td->td_proc->p_fd; 936 FILEDESC_XLOCK(fdp); 937 if (chroot_allow_open_directories == 0 || 938 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 939 error = chroot_refuse_vdir_fds(fdp); 940 if (error != 0) { 941 FILEDESC_XUNLOCK(fdp); 942 return (error); 943 } 944 } 945 oldvp = fdp->fd_rdir; 946 fdp->fd_rdir = vp; 947 VREF(fdp->fd_rdir); 948 if (!fdp->fd_jdir) { 949 fdp->fd_jdir = vp; 950 VREF(fdp->fd_jdir); 951 } 952 FILEDESC_XUNLOCK(fdp); 953 vrele(oldvp); 954 return (0); 955 } 956 957 static __inline void 958 flags_to_rights(int flags, cap_rights_t *rightsp) 959 { 960 961 if (flags & O_EXEC) { 962 cap_rights_set(rightsp, CAP_FEXECVE); 963 } else { 964 switch ((flags & O_ACCMODE)) { 965 case O_RDONLY: 966 cap_rights_set(rightsp, CAP_READ); 967 break; 968 case O_RDWR: 969 cap_rights_set(rightsp, CAP_READ); 970 /* FALLTHROUGH */ 971 case O_WRONLY: 972 cap_rights_set(rightsp, CAP_WRITE); 973 if (!(flags & (O_APPEND | O_TRUNC))) 974 cap_rights_set(rightsp, CAP_SEEK); 975 break; 976 } 977 } 978 979 if (flags & O_CREAT) 980 cap_rights_set(rightsp, CAP_CREATE); 981 982 if (flags & O_TRUNC) 983 cap_rights_set(rightsp, CAP_FTRUNCATE); 984 985 if (flags & (O_SYNC | O_FSYNC)) 986 cap_rights_set(rightsp, CAP_FSYNC); 987 988 if (flags & (O_EXLOCK | O_SHLOCK)) 989 cap_rights_set(rightsp, CAP_FLOCK); 990 } 991 992 /* 993 * Check permissions, allocate an open file structure, and call the device 994 * open routine if any. 995 */ 996 #ifndef _SYS_SYSPROTO_H_ 997 struct open_args { 998 char *path; 999 int flags; 1000 int mode; 1001 }; 1002 #endif 1003 int 1004 sys_open(td, uap) 1005 struct thread *td; 1006 register struct open_args /* { 1007 char *path; 1008 int flags; 1009 int mode; 1010 } */ *uap; 1011 { 1012 1013 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1014 uap->flags, uap->mode)); 1015 } 1016 1017 #ifndef _SYS_SYSPROTO_H_ 1018 struct openat_args { 1019 int fd; 1020 char *path; 1021 int flag; 1022 int mode; 1023 }; 1024 #endif 1025 int 1026 sys_openat(struct thread *td, struct openat_args *uap) 1027 { 1028 1029 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1030 uap->mode)); 1031 } 1032 1033 int 1034 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1035 int flags, int mode) 1036 { 1037 struct proc *p = td->td_proc; 1038 struct filedesc *fdp = p->p_fd; 1039 struct file *fp; 1040 struct vnode *vp; 1041 struct nameidata nd; 1042 cap_rights_t rights; 1043 int cmode, error, indx; 1044 1045 indx = -1; 1046 1047 AUDIT_ARG_FFLAGS(flags); 1048 AUDIT_ARG_MODE(mode); 1049 /* XXX: audit dirfd */ 1050 cap_rights_init(&rights, CAP_LOOKUP); 1051 flags_to_rights(flags, &rights); 1052 /* 1053 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1054 * may be specified. 1055 */ 1056 if (flags & O_EXEC) { 1057 if (flags & O_ACCMODE) 1058 return (EINVAL); 1059 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1060 return (EINVAL); 1061 } else { 1062 flags = FFLAGS(flags); 1063 } 1064 1065 /* 1066 * Allocate the file descriptor, but don't install a descriptor yet. 1067 */ 1068 error = falloc_noinstall(td, &fp); 1069 if (error != 0) 1070 return (error); 1071 /* 1072 * An extra reference on `fp' has been held for us by 1073 * falloc_noinstall(). 1074 */ 1075 /* Set the flags early so the finit in devfs can pick them up. */ 1076 fp->f_flag = flags & FMASK; 1077 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1078 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1079 &rights, td); 1080 td->td_dupfd = -1; /* XXX check for fdopen */ 1081 error = vn_open(&nd, &flags, cmode, fp); 1082 if (error != 0) { 1083 /* 1084 * If the vn_open replaced the method vector, something 1085 * wonderous happened deep below and we just pass it up 1086 * pretending we know what we do. 1087 */ 1088 if (error == ENXIO && fp->f_ops != &badfileops) 1089 goto success; 1090 1091 /* 1092 * Handle special fdopen() case. bleh. 1093 * 1094 * Don't do this for relative (capability) lookups; we don't 1095 * understand exactly what would happen, and we don't think 1096 * that it ever should. 1097 */ 1098 if (nd.ni_strictrelative == 0 && 1099 (error == ENODEV || error == ENXIO) && 1100 td->td_dupfd >= 0) { 1101 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1102 &indx); 1103 if (error == 0) 1104 goto success; 1105 } 1106 1107 goto bad; 1108 } 1109 td->td_dupfd = 0; 1110 NDFREE(&nd, NDF_ONLY_PNBUF); 1111 vp = nd.ni_vp; 1112 1113 /* 1114 * Store the vnode, for any f_type. Typically, the vnode use 1115 * count is decremented by direct call to vn_closefile() for 1116 * files that switched type in the cdevsw fdopen() method. 1117 */ 1118 fp->f_vnode = vp; 1119 /* 1120 * If the file wasn't claimed by devfs bind it to the normal 1121 * vnode operations here. 1122 */ 1123 if (fp->f_ops == &badfileops) { 1124 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1125 fp->f_seqcount = 1; 1126 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1127 DTYPE_VNODE, vp, &vnops); 1128 } 1129 1130 VOP_UNLOCK(vp, 0); 1131 if (flags & O_TRUNC) { 1132 error = fo_truncate(fp, 0, td->td_ucred, td); 1133 if (error != 0) 1134 goto bad; 1135 } 1136 success: 1137 /* 1138 * If we haven't already installed the FD (for dupfdopen), do so now. 1139 */ 1140 if (indx == -1) { 1141 struct filecaps *fcaps; 1142 1143 #ifdef CAPABILITIES 1144 if (nd.ni_strictrelative == 1) 1145 fcaps = &nd.ni_filecaps; 1146 else 1147 #endif 1148 fcaps = NULL; 1149 error = finstall(td, fp, &indx, flags, fcaps); 1150 /* On success finstall() consumes fcaps. */ 1151 if (error != 0) { 1152 filecaps_free(&nd.ni_filecaps); 1153 goto bad; 1154 } 1155 } else { 1156 filecaps_free(&nd.ni_filecaps); 1157 } 1158 1159 /* 1160 * Release our private reference, leaving the one associated with 1161 * the descriptor table intact. 1162 */ 1163 fdrop(fp, td); 1164 td->td_retval[0] = indx; 1165 return (0); 1166 bad: 1167 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1168 fdrop(fp, td); 1169 return (error); 1170 } 1171 1172 #ifdef COMPAT_43 1173 /* 1174 * Create a file. 1175 */ 1176 #ifndef _SYS_SYSPROTO_H_ 1177 struct ocreat_args { 1178 char *path; 1179 int mode; 1180 }; 1181 #endif 1182 int 1183 ocreat(td, uap) 1184 struct thread *td; 1185 register struct ocreat_args /* { 1186 char *path; 1187 int mode; 1188 } */ *uap; 1189 { 1190 1191 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1192 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1193 } 1194 #endif /* COMPAT_43 */ 1195 1196 /* 1197 * Create a special file. 1198 */ 1199 #ifndef _SYS_SYSPROTO_H_ 1200 struct mknod_args { 1201 char *path; 1202 int mode; 1203 int dev; 1204 }; 1205 #endif 1206 int 1207 sys_mknod(td, uap) 1208 struct thread *td; 1209 register struct mknod_args /* { 1210 char *path; 1211 int mode; 1212 int dev; 1213 } */ *uap; 1214 { 1215 1216 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1217 uap->mode, uap->dev)); 1218 } 1219 1220 #ifndef _SYS_SYSPROTO_H_ 1221 struct mknodat_args { 1222 int fd; 1223 char *path; 1224 mode_t mode; 1225 dev_t dev; 1226 }; 1227 #endif 1228 int 1229 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1230 { 1231 1232 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1233 uap->dev)); 1234 } 1235 1236 int 1237 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1238 int mode, int dev) 1239 { 1240 struct vnode *vp; 1241 struct mount *mp; 1242 struct vattr vattr; 1243 struct nameidata nd; 1244 cap_rights_t rights; 1245 int error, whiteout = 0; 1246 1247 AUDIT_ARG_MODE(mode); 1248 AUDIT_ARG_DEV(dev); 1249 switch (mode & S_IFMT) { 1250 case S_IFCHR: 1251 case S_IFBLK: 1252 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1253 break; 1254 case S_IFMT: 1255 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1256 break; 1257 case S_IFWHT: 1258 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1259 break; 1260 case S_IFIFO: 1261 if (dev == 0) 1262 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1263 /* FALLTHROUGH */ 1264 default: 1265 error = EINVAL; 1266 break; 1267 } 1268 if (error != 0) 1269 return (error); 1270 restart: 1271 bwillwrite(); 1272 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1273 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1274 td); 1275 if ((error = namei(&nd)) != 0) 1276 return (error); 1277 vp = nd.ni_vp; 1278 if (vp != NULL) { 1279 NDFREE(&nd, NDF_ONLY_PNBUF); 1280 if (vp == nd.ni_dvp) 1281 vrele(nd.ni_dvp); 1282 else 1283 vput(nd.ni_dvp); 1284 vrele(vp); 1285 return (EEXIST); 1286 } else { 1287 VATTR_NULL(&vattr); 1288 vattr.va_mode = (mode & ALLPERMS) & 1289 ~td->td_proc->p_fd->fd_cmask; 1290 vattr.va_rdev = dev; 1291 whiteout = 0; 1292 1293 switch (mode & S_IFMT) { 1294 case S_IFMT: /* used by badsect to flag bad sectors */ 1295 vattr.va_type = VBAD; 1296 break; 1297 case S_IFCHR: 1298 vattr.va_type = VCHR; 1299 break; 1300 case S_IFBLK: 1301 vattr.va_type = VBLK; 1302 break; 1303 case S_IFWHT: 1304 whiteout = 1; 1305 break; 1306 default: 1307 panic("kern_mknod: invalid mode"); 1308 } 1309 } 1310 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1311 NDFREE(&nd, NDF_ONLY_PNBUF); 1312 vput(nd.ni_dvp); 1313 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1314 return (error); 1315 goto restart; 1316 } 1317 #ifdef MAC 1318 if (error == 0 && !whiteout) 1319 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1320 &nd.ni_cnd, &vattr); 1321 #endif 1322 if (error == 0) { 1323 if (whiteout) 1324 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1325 else { 1326 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1327 &nd.ni_cnd, &vattr); 1328 if (error == 0) 1329 vput(nd.ni_vp); 1330 } 1331 } 1332 NDFREE(&nd, NDF_ONLY_PNBUF); 1333 vput(nd.ni_dvp); 1334 vn_finished_write(mp); 1335 return (error); 1336 } 1337 1338 /* 1339 * Create a named pipe. 1340 */ 1341 #ifndef _SYS_SYSPROTO_H_ 1342 struct mkfifo_args { 1343 char *path; 1344 int mode; 1345 }; 1346 #endif 1347 int 1348 sys_mkfifo(td, uap) 1349 struct thread *td; 1350 register struct mkfifo_args /* { 1351 char *path; 1352 int mode; 1353 } */ *uap; 1354 { 1355 1356 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1357 uap->mode)); 1358 } 1359 1360 #ifndef _SYS_SYSPROTO_H_ 1361 struct mkfifoat_args { 1362 int fd; 1363 char *path; 1364 mode_t mode; 1365 }; 1366 #endif 1367 int 1368 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1369 { 1370 1371 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1372 uap->mode)); 1373 } 1374 1375 int 1376 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1377 int mode) 1378 { 1379 struct mount *mp; 1380 struct vattr vattr; 1381 struct nameidata nd; 1382 cap_rights_t rights; 1383 int error; 1384 1385 AUDIT_ARG_MODE(mode); 1386 restart: 1387 bwillwrite(); 1388 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1389 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1390 td); 1391 if ((error = namei(&nd)) != 0) 1392 return (error); 1393 if (nd.ni_vp != NULL) { 1394 NDFREE(&nd, NDF_ONLY_PNBUF); 1395 if (nd.ni_vp == nd.ni_dvp) 1396 vrele(nd.ni_dvp); 1397 else 1398 vput(nd.ni_dvp); 1399 vrele(nd.ni_vp); 1400 return (EEXIST); 1401 } 1402 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1403 NDFREE(&nd, NDF_ONLY_PNBUF); 1404 vput(nd.ni_dvp); 1405 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1406 return (error); 1407 goto restart; 1408 } 1409 VATTR_NULL(&vattr); 1410 vattr.va_type = VFIFO; 1411 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1412 #ifdef MAC 1413 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1414 &vattr); 1415 if (error != 0) 1416 goto out; 1417 #endif 1418 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1419 if (error == 0) 1420 vput(nd.ni_vp); 1421 #ifdef MAC 1422 out: 1423 #endif 1424 vput(nd.ni_dvp); 1425 vn_finished_write(mp); 1426 NDFREE(&nd, NDF_ONLY_PNBUF); 1427 return (error); 1428 } 1429 1430 /* 1431 * Make a hard file link. 1432 */ 1433 #ifndef _SYS_SYSPROTO_H_ 1434 struct link_args { 1435 char *path; 1436 char *link; 1437 }; 1438 #endif 1439 int 1440 sys_link(td, uap) 1441 struct thread *td; 1442 register struct link_args /* { 1443 char *path; 1444 char *link; 1445 } */ *uap; 1446 { 1447 1448 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1449 UIO_USERSPACE, FOLLOW)); 1450 } 1451 1452 #ifndef _SYS_SYSPROTO_H_ 1453 struct linkat_args { 1454 int fd1; 1455 char *path1; 1456 int fd2; 1457 char *path2; 1458 int flag; 1459 }; 1460 #endif 1461 int 1462 sys_linkat(struct thread *td, struct linkat_args *uap) 1463 { 1464 int flag; 1465 1466 flag = uap->flag; 1467 if (flag & ~AT_SYMLINK_FOLLOW) 1468 return (EINVAL); 1469 1470 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1471 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1472 } 1473 1474 int hardlink_check_uid = 0; 1475 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1476 &hardlink_check_uid, 0, 1477 "Unprivileged processes cannot create hard links to files owned by other " 1478 "users"); 1479 static int hardlink_check_gid = 0; 1480 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1481 &hardlink_check_gid, 0, 1482 "Unprivileged processes cannot create hard links to files owned by other " 1483 "groups"); 1484 1485 static int 1486 can_hardlink(struct vnode *vp, struct ucred *cred) 1487 { 1488 struct vattr va; 1489 int error; 1490 1491 if (!hardlink_check_uid && !hardlink_check_gid) 1492 return (0); 1493 1494 error = VOP_GETATTR(vp, &va, cred); 1495 if (error != 0) 1496 return (error); 1497 1498 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1499 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1500 if (error != 0) 1501 return (error); 1502 } 1503 1504 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1505 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1506 if (error != 0) 1507 return (error); 1508 } 1509 1510 return (0); 1511 } 1512 1513 int 1514 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1515 enum uio_seg segflg, int follow) 1516 { 1517 struct vnode *vp; 1518 struct mount *mp; 1519 struct nameidata nd; 1520 cap_rights_t rights; 1521 int error; 1522 1523 again: 1524 bwillwrite(); 1525 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1526 1527 if ((error = namei(&nd)) != 0) 1528 return (error); 1529 NDFREE(&nd, NDF_ONLY_PNBUF); 1530 vp = nd.ni_vp; 1531 if (vp->v_type == VDIR) { 1532 vrele(vp); 1533 return (EPERM); /* POSIX */ 1534 } 1535 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 | 1536 NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), 1537 td); 1538 if ((error = namei(&nd)) == 0) { 1539 if (nd.ni_vp != NULL) { 1540 NDFREE(&nd, NDF_ONLY_PNBUF); 1541 if (nd.ni_dvp == nd.ni_vp) 1542 vrele(nd.ni_dvp); 1543 else 1544 vput(nd.ni_dvp); 1545 vrele(nd.ni_vp); 1546 vrele(vp); 1547 return (EEXIST); 1548 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1549 /* 1550 * Cross-device link. No need to recheck 1551 * vp->v_type, since it cannot change, except 1552 * to VBAD. 1553 */ 1554 NDFREE(&nd, NDF_ONLY_PNBUF); 1555 vput(nd.ni_dvp); 1556 vrele(vp); 1557 return (EXDEV); 1558 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1559 error = can_hardlink(vp, td->td_ucred); 1560 #ifdef MAC 1561 if (error == 0) 1562 error = mac_vnode_check_link(td->td_ucred, 1563 nd.ni_dvp, vp, &nd.ni_cnd); 1564 #endif 1565 if (error != 0) { 1566 vput(vp); 1567 vput(nd.ni_dvp); 1568 NDFREE(&nd, NDF_ONLY_PNBUF); 1569 return (error); 1570 } 1571 error = vn_start_write(vp, &mp, V_NOWAIT); 1572 if (error != 0) { 1573 vput(vp); 1574 vput(nd.ni_dvp); 1575 NDFREE(&nd, NDF_ONLY_PNBUF); 1576 error = vn_start_write(NULL, &mp, 1577 V_XSLEEP | PCATCH); 1578 if (error != 0) 1579 return (error); 1580 goto again; 1581 } 1582 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1583 VOP_UNLOCK(vp, 0); 1584 vput(nd.ni_dvp); 1585 vn_finished_write(mp); 1586 NDFREE(&nd, NDF_ONLY_PNBUF); 1587 } else { 1588 vput(nd.ni_dvp); 1589 NDFREE(&nd, NDF_ONLY_PNBUF); 1590 vrele(vp); 1591 goto again; 1592 } 1593 } 1594 vrele(vp); 1595 return (error); 1596 } 1597 1598 /* 1599 * Make a symbolic link. 1600 */ 1601 #ifndef _SYS_SYSPROTO_H_ 1602 struct symlink_args { 1603 char *path; 1604 char *link; 1605 }; 1606 #endif 1607 int 1608 sys_symlink(td, uap) 1609 struct thread *td; 1610 register struct symlink_args /* { 1611 char *path; 1612 char *link; 1613 } */ *uap; 1614 { 1615 1616 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1617 UIO_USERSPACE)); 1618 } 1619 1620 #ifndef _SYS_SYSPROTO_H_ 1621 struct symlinkat_args { 1622 char *path; 1623 int fd; 1624 char *path2; 1625 }; 1626 #endif 1627 int 1628 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1629 { 1630 1631 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1632 UIO_USERSPACE)); 1633 } 1634 1635 int 1636 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1637 enum uio_seg segflg) 1638 { 1639 struct mount *mp; 1640 struct vattr vattr; 1641 char *syspath; 1642 struct nameidata nd; 1643 int error; 1644 cap_rights_t rights; 1645 1646 if (segflg == UIO_SYSSPACE) { 1647 syspath = path1; 1648 } else { 1649 syspath = uma_zalloc(namei_zone, M_WAITOK); 1650 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1651 goto out; 1652 } 1653 AUDIT_ARG_TEXT(syspath); 1654 restart: 1655 bwillwrite(); 1656 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1657 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1658 td); 1659 if ((error = namei(&nd)) != 0) 1660 goto out; 1661 if (nd.ni_vp) { 1662 NDFREE(&nd, NDF_ONLY_PNBUF); 1663 if (nd.ni_vp == nd.ni_dvp) 1664 vrele(nd.ni_dvp); 1665 else 1666 vput(nd.ni_dvp); 1667 vrele(nd.ni_vp); 1668 error = EEXIST; 1669 goto out; 1670 } 1671 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1672 NDFREE(&nd, NDF_ONLY_PNBUF); 1673 vput(nd.ni_dvp); 1674 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1675 goto out; 1676 goto restart; 1677 } 1678 VATTR_NULL(&vattr); 1679 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1680 #ifdef MAC 1681 vattr.va_type = VLNK; 1682 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1683 &vattr); 1684 if (error != 0) 1685 goto out2; 1686 #endif 1687 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1688 if (error == 0) 1689 vput(nd.ni_vp); 1690 #ifdef MAC 1691 out2: 1692 #endif 1693 NDFREE(&nd, NDF_ONLY_PNBUF); 1694 vput(nd.ni_dvp); 1695 vn_finished_write(mp); 1696 out: 1697 if (segflg != UIO_SYSSPACE) 1698 uma_zfree(namei_zone, syspath); 1699 return (error); 1700 } 1701 1702 /* 1703 * Delete a whiteout from the filesystem. 1704 */ 1705 int 1706 sys_undelete(td, uap) 1707 struct thread *td; 1708 register struct undelete_args /* { 1709 char *path; 1710 } */ *uap; 1711 { 1712 struct mount *mp; 1713 struct nameidata nd; 1714 int error; 1715 1716 restart: 1717 bwillwrite(); 1718 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1719 UIO_USERSPACE, uap->path, td); 1720 error = namei(&nd); 1721 if (error != 0) 1722 return (error); 1723 1724 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1725 NDFREE(&nd, NDF_ONLY_PNBUF); 1726 if (nd.ni_vp == nd.ni_dvp) 1727 vrele(nd.ni_dvp); 1728 else 1729 vput(nd.ni_dvp); 1730 if (nd.ni_vp) 1731 vrele(nd.ni_vp); 1732 return (EEXIST); 1733 } 1734 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1735 NDFREE(&nd, NDF_ONLY_PNBUF); 1736 vput(nd.ni_dvp); 1737 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1738 return (error); 1739 goto restart; 1740 } 1741 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1742 NDFREE(&nd, NDF_ONLY_PNBUF); 1743 vput(nd.ni_dvp); 1744 vn_finished_write(mp); 1745 return (error); 1746 } 1747 1748 /* 1749 * Delete a name from the filesystem. 1750 */ 1751 #ifndef _SYS_SYSPROTO_H_ 1752 struct unlink_args { 1753 char *path; 1754 }; 1755 #endif 1756 int 1757 sys_unlink(td, uap) 1758 struct thread *td; 1759 struct unlink_args /* { 1760 char *path; 1761 } */ *uap; 1762 { 1763 1764 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1765 } 1766 1767 #ifndef _SYS_SYSPROTO_H_ 1768 struct unlinkat_args { 1769 int fd; 1770 char *path; 1771 int flag; 1772 }; 1773 #endif 1774 int 1775 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1776 { 1777 int flag = uap->flag; 1778 int fd = uap->fd; 1779 char *path = uap->path; 1780 1781 if (flag & ~AT_REMOVEDIR) 1782 return (EINVAL); 1783 1784 if (flag & AT_REMOVEDIR) 1785 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1786 else 1787 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1788 } 1789 1790 int 1791 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1792 ino_t oldinum) 1793 { 1794 struct mount *mp; 1795 struct vnode *vp; 1796 struct nameidata nd; 1797 struct stat sb; 1798 cap_rights_t rights; 1799 int error; 1800 1801 restart: 1802 bwillwrite(); 1803 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1804 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1805 if ((error = namei(&nd)) != 0) 1806 return (error == EINVAL ? EPERM : error); 1807 vp = nd.ni_vp; 1808 if (vp->v_type == VDIR && oldinum == 0) { 1809 error = EPERM; /* POSIX */ 1810 } else if (oldinum != 0 && 1811 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1812 sb.st_ino != oldinum) { 1813 error = EIDRM; /* Identifier removed */ 1814 } else { 1815 /* 1816 * The root of a mounted filesystem cannot be deleted. 1817 * 1818 * XXX: can this only be a VDIR case? 1819 */ 1820 if (vp->v_vflag & VV_ROOT) 1821 error = EBUSY; 1822 } 1823 if (error == 0) { 1824 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1825 NDFREE(&nd, NDF_ONLY_PNBUF); 1826 vput(nd.ni_dvp); 1827 if (vp == nd.ni_dvp) 1828 vrele(vp); 1829 else 1830 vput(vp); 1831 if ((error = vn_start_write(NULL, &mp, 1832 V_XSLEEP | PCATCH)) != 0) 1833 return (error); 1834 goto restart; 1835 } 1836 #ifdef MAC 1837 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1838 &nd.ni_cnd); 1839 if (error != 0) 1840 goto out; 1841 #endif 1842 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1843 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1844 #ifdef MAC 1845 out: 1846 #endif 1847 vn_finished_write(mp); 1848 } 1849 NDFREE(&nd, NDF_ONLY_PNBUF); 1850 vput(nd.ni_dvp); 1851 if (vp == nd.ni_dvp) 1852 vrele(vp); 1853 else 1854 vput(vp); 1855 return (error); 1856 } 1857 1858 /* 1859 * Reposition read/write file offset. 1860 */ 1861 #ifndef _SYS_SYSPROTO_H_ 1862 struct lseek_args { 1863 int fd; 1864 int pad; 1865 off_t offset; 1866 int whence; 1867 }; 1868 #endif 1869 int 1870 sys_lseek(td, uap) 1871 struct thread *td; 1872 register struct lseek_args /* { 1873 int fd; 1874 int pad; 1875 off_t offset; 1876 int whence; 1877 } */ *uap; 1878 { 1879 struct file *fp; 1880 cap_rights_t rights; 1881 int error; 1882 1883 AUDIT_ARG_FD(uap->fd); 1884 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1885 if (error != 0) 1886 return (error); 1887 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1888 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1889 fdrop(fp, td); 1890 return (error); 1891 } 1892 1893 #if defined(COMPAT_43) 1894 /* 1895 * Reposition read/write file offset. 1896 */ 1897 #ifndef _SYS_SYSPROTO_H_ 1898 struct olseek_args { 1899 int fd; 1900 long offset; 1901 int whence; 1902 }; 1903 #endif 1904 int 1905 olseek(td, uap) 1906 struct thread *td; 1907 register struct olseek_args /* { 1908 int fd; 1909 long offset; 1910 int whence; 1911 } */ *uap; 1912 { 1913 struct lseek_args /* { 1914 int fd; 1915 int pad; 1916 off_t offset; 1917 int whence; 1918 } */ nuap; 1919 1920 nuap.fd = uap->fd; 1921 nuap.offset = uap->offset; 1922 nuap.whence = uap->whence; 1923 return (sys_lseek(td, &nuap)); 1924 } 1925 #endif /* COMPAT_43 */ 1926 1927 /* Version with the 'pad' argument */ 1928 int 1929 freebsd6_lseek(td, uap) 1930 struct thread *td; 1931 register struct freebsd6_lseek_args *uap; 1932 { 1933 struct lseek_args ouap; 1934 1935 ouap.fd = uap->fd; 1936 ouap.offset = uap->offset; 1937 ouap.whence = uap->whence; 1938 return (sys_lseek(td, &ouap)); 1939 } 1940 1941 /* 1942 * Check access permissions using passed credentials. 1943 */ 1944 static int 1945 vn_access(vp, user_flags, cred, td) 1946 struct vnode *vp; 1947 int user_flags; 1948 struct ucred *cred; 1949 struct thread *td; 1950 { 1951 accmode_t accmode; 1952 int error; 1953 1954 /* Flags == 0 means only check for existence. */ 1955 if (user_flags == 0) 1956 return (0); 1957 1958 accmode = 0; 1959 if (user_flags & R_OK) 1960 accmode |= VREAD; 1961 if (user_flags & W_OK) 1962 accmode |= VWRITE; 1963 if (user_flags & X_OK) 1964 accmode |= VEXEC; 1965 #ifdef MAC 1966 error = mac_vnode_check_access(cred, vp, accmode); 1967 if (error != 0) 1968 return (error); 1969 #endif 1970 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1971 error = VOP_ACCESS(vp, accmode, cred, td); 1972 return (error); 1973 } 1974 1975 /* 1976 * Check access permissions using "real" credentials. 1977 */ 1978 #ifndef _SYS_SYSPROTO_H_ 1979 struct access_args { 1980 char *path; 1981 int amode; 1982 }; 1983 #endif 1984 int 1985 sys_access(td, uap) 1986 struct thread *td; 1987 register struct access_args /* { 1988 char *path; 1989 int amode; 1990 } */ *uap; 1991 { 1992 1993 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1994 0, uap->amode)); 1995 } 1996 1997 #ifndef _SYS_SYSPROTO_H_ 1998 struct faccessat_args { 1999 int dirfd; 2000 char *path; 2001 int amode; 2002 int flag; 2003 } 2004 #endif 2005 int 2006 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2007 { 2008 2009 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2010 uap->amode)); 2011 } 2012 2013 int 2014 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2015 int flag, int amode) 2016 { 2017 struct ucred *cred, *usecred; 2018 struct vnode *vp; 2019 struct nameidata nd; 2020 cap_rights_t rights; 2021 int error; 2022 2023 if (flag & ~AT_EACCESS) 2024 return (EINVAL); 2025 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2026 return (EINVAL); 2027 2028 /* 2029 * Create and modify a temporary credential instead of one that 2030 * is potentially shared (if we need one). 2031 */ 2032 cred = td->td_ucred; 2033 if ((flag & AT_EACCESS) == 0 && 2034 ((cred->cr_uid != cred->cr_ruid || 2035 cred->cr_rgid != cred->cr_groups[0]))) { 2036 usecred = crdup(cred); 2037 usecred->cr_uid = cred->cr_ruid; 2038 usecred->cr_groups[0] = cred->cr_rgid; 2039 td->td_ucred = usecred; 2040 } else 2041 usecred = cred; 2042 AUDIT_ARG_VALUE(amode); 2043 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2044 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2045 td); 2046 if ((error = namei(&nd)) != 0) 2047 goto out; 2048 vp = nd.ni_vp; 2049 2050 error = vn_access(vp, amode, usecred, td); 2051 NDFREE(&nd, NDF_ONLY_PNBUF); 2052 vput(vp); 2053 out: 2054 if (usecred != cred) { 2055 td->td_ucred = cred; 2056 crfree(usecred); 2057 } 2058 return (error); 2059 } 2060 2061 /* 2062 * Check access permissions using "effective" credentials. 2063 */ 2064 #ifndef _SYS_SYSPROTO_H_ 2065 struct eaccess_args { 2066 char *path; 2067 int amode; 2068 }; 2069 #endif 2070 int 2071 sys_eaccess(td, uap) 2072 struct thread *td; 2073 register struct eaccess_args /* { 2074 char *path; 2075 int amode; 2076 } */ *uap; 2077 { 2078 2079 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2080 AT_EACCESS, uap->amode)); 2081 } 2082 2083 #if defined(COMPAT_43) 2084 /* 2085 * Get file status; this version follows links. 2086 */ 2087 #ifndef _SYS_SYSPROTO_H_ 2088 struct ostat_args { 2089 char *path; 2090 struct ostat *ub; 2091 }; 2092 #endif 2093 int 2094 ostat(td, uap) 2095 struct thread *td; 2096 register struct ostat_args /* { 2097 char *path; 2098 struct ostat *ub; 2099 } */ *uap; 2100 { 2101 struct stat sb; 2102 struct ostat osb; 2103 int error; 2104 2105 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2106 &sb, NULL); 2107 if (error != 0) 2108 return (error); 2109 cvtstat(&sb, &osb); 2110 return (copyout(&osb, uap->ub, sizeof (osb))); 2111 } 2112 2113 /* 2114 * Get file status; this version does not follow links. 2115 */ 2116 #ifndef _SYS_SYSPROTO_H_ 2117 struct olstat_args { 2118 char *path; 2119 struct ostat *ub; 2120 }; 2121 #endif 2122 int 2123 olstat(td, uap) 2124 struct thread *td; 2125 register struct olstat_args /* { 2126 char *path; 2127 struct ostat *ub; 2128 } */ *uap; 2129 { 2130 struct stat sb; 2131 struct ostat osb; 2132 int error; 2133 2134 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2135 UIO_USERSPACE, &sb, NULL); 2136 if (error != 0) 2137 return (error); 2138 cvtstat(&sb, &osb); 2139 return (copyout(&osb, uap->ub, sizeof (osb))); 2140 } 2141 2142 /* 2143 * Convert from an old to a new stat structure. 2144 */ 2145 void 2146 cvtstat(st, ost) 2147 struct stat *st; 2148 struct ostat *ost; 2149 { 2150 2151 ost->st_dev = st->st_dev; 2152 ost->st_ino = st->st_ino; 2153 ost->st_mode = st->st_mode; 2154 ost->st_nlink = st->st_nlink; 2155 ost->st_uid = st->st_uid; 2156 ost->st_gid = st->st_gid; 2157 ost->st_rdev = st->st_rdev; 2158 if (st->st_size < (quad_t)1 << 32) 2159 ost->st_size = st->st_size; 2160 else 2161 ost->st_size = -2; 2162 ost->st_atim = st->st_atim; 2163 ost->st_mtim = st->st_mtim; 2164 ost->st_ctim = st->st_ctim; 2165 ost->st_blksize = st->st_blksize; 2166 ost->st_blocks = st->st_blocks; 2167 ost->st_flags = st->st_flags; 2168 ost->st_gen = st->st_gen; 2169 } 2170 #endif /* COMPAT_43 */ 2171 2172 /* 2173 * Get file status; this version follows links. 2174 */ 2175 #ifndef _SYS_SYSPROTO_H_ 2176 struct stat_args { 2177 char *path; 2178 struct stat *ub; 2179 }; 2180 #endif 2181 int 2182 sys_stat(td, uap) 2183 struct thread *td; 2184 register struct stat_args /* { 2185 char *path; 2186 struct stat *ub; 2187 } */ *uap; 2188 { 2189 struct stat sb; 2190 int error; 2191 2192 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2193 &sb, NULL); 2194 if (error == 0) 2195 error = copyout(&sb, uap->ub, sizeof (sb)); 2196 return (error); 2197 } 2198 2199 #ifndef _SYS_SYSPROTO_H_ 2200 struct fstatat_args { 2201 int fd; 2202 char *path; 2203 struct stat *buf; 2204 int flag; 2205 } 2206 #endif 2207 int 2208 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2209 { 2210 struct stat sb; 2211 int error; 2212 2213 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2214 UIO_USERSPACE, &sb, NULL); 2215 if (error == 0) 2216 error = copyout(&sb, uap->buf, sizeof (sb)); 2217 return (error); 2218 } 2219 2220 int 2221 kern_statat(struct thread *td, int flag, int fd, char *path, 2222 enum uio_seg pathseg, struct stat *sbp, 2223 void (*hook)(struct vnode *vp, struct stat *sbp)) 2224 { 2225 struct nameidata nd; 2226 struct stat sb; 2227 cap_rights_t rights; 2228 int error; 2229 2230 if (flag & ~AT_SYMLINK_NOFOLLOW) 2231 return (EINVAL); 2232 2233 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2234 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2235 cap_rights_init(&rights, CAP_FSTAT), td); 2236 2237 if ((error = namei(&nd)) != 0) 2238 return (error); 2239 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2240 if (error == 0) { 2241 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2242 if (S_ISREG(sb.st_mode)) 2243 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2244 if (__predict_false(hook != NULL)) 2245 hook(nd.ni_vp, &sb); 2246 } 2247 NDFREE(&nd, NDF_ONLY_PNBUF); 2248 vput(nd.ni_vp); 2249 if (error != 0) 2250 return (error); 2251 *sbp = sb; 2252 #ifdef KTRACE 2253 if (KTRPOINT(td, KTR_STRUCT)) 2254 ktrstat(&sb); 2255 #endif 2256 return (0); 2257 } 2258 2259 /* 2260 * Get file status; this version does not follow links. 2261 */ 2262 #ifndef _SYS_SYSPROTO_H_ 2263 struct lstat_args { 2264 char *path; 2265 struct stat *ub; 2266 }; 2267 #endif 2268 int 2269 sys_lstat(td, uap) 2270 struct thread *td; 2271 register struct lstat_args /* { 2272 char *path; 2273 struct stat *ub; 2274 } */ *uap; 2275 { 2276 struct stat sb; 2277 int error; 2278 2279 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2280 UIO_USERSPACE, &sb, NULL); 2281 if (error == 0) 2282 error = copyout(&sb, uap->ub, sizeof (sb)); 2283 return (error); 2284 } 2285 2286 /* 2287 * Implementation of the NetBSD [l]stat() functions. 2288 */ 2289 void 2290 cvtnstat(sb, nsb) 2291 struct stat *sb; 2292 struct nstat *nsb; 2293 { 2294 2295 bzero(nsb, sizeof *nsb); 2296 nsb->st_dev = sb->st_dev; 2297 nsb->st_ino = sb->st_ino; 2298 nsb->st_mode = sb->st_mode; 2299 nsb->st_nlink = sb->st_nlink; 2300 nsb->st_uid = sb->st_uid; 2301 nsb->st_gid = sb->st_gid; 2302 nsb->st_rdev = sb->st_rdev; 2303 nsb->st_atim = sb->st_atim; 2304 nsb->st_mtim = sb->st_mtim; 2305 nsb->st_ctim = sb->st_ctim; 2306 nsb->st_size = sb->st_size; 2307 nsb->st_blocks = sb->st_blocks; 2308 nsb->st_blksize = sb->st_blksize; 2309 nsb->st_flags = sb->st_flags; 2310 nsb->st_gen = sb->st_gen; 2311 nsb->st_birthtim = sb->st_birthtim; 2312 } 2313 2314 #ifndef _SYS_SYSPROTO_H_ 2315 struct nstat_args { 2316 char *path; 2317 struct nstat *ub; 2318 }; 2319 #endif 2320 int 2321 sys_nstat(td, uap) 2322 struct thread *td; 2323 register struct nstat_args /* { 2324 char *path; 2325 struct nstat *ub; 2326 } */ *uap; 2327 { 2328 struct stat sb; 2329 struct nstat nsb; 2330 int error; 2331 2332 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2333 &sb, NULL); 2334 if (error != 0) 2335 return (error); 2336 cvtnstat(&sb, &nsb); 2337 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2338 } 2339 2340 /* 2341 * NetBSD lstat. Get file status; this version does not follow links. 2342 */ 2343 #ifndef _SYS_SYSPROTO_H_ 2344 struct lstat_args { 2345 char *path; 2346 struct stat *ub; 2347 }; 2348 #endif 2349 int 2350 sys_nlstat(td, uap) 2351 struct thread *td; 2352 register struct nlstat_args /* { 2353 char *path; 2354 struct nstat *ub; 2355 } */ *uap; 2356 { 2357 struct stat sb; 2358 struct nstat nsb; 2359 int error; 2360 2361 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2362 UIO_USERSPACE, &sb, NULL); 2363 if (error != 0) 2364 return (error); 2365 cvtnstat(&sb, &nsb); 2366 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2367 } 2368 2369 /* 2370 * Get configurable pathname variables. 2371 */ 2372 #ifndef _SYS_SYSPROTO_H_ 2373 struct pathconf_args { 2374 char *path; 2375 int name; 2376 }; 2377 #endif 2378 int 2379 sys_pathconf(td, uap) 2380 struct thread *td; 2381 register struct pathconf_args /* { 2382 char *path; 2383 int name; 2384 } */ *uap; 2385 { 2386 2387 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2388 } 2389 2390 #ifndef _SYS_SYSPROTO_H_ 2391 struct lpathconf_args { 2392 char *path; 2393 int name; 2394 }; 2395 #endif 2396 int 2397 sys_lpathconf(td, uap) 2398 struct thread *td; 2399 register struct lpathconf_args /* { 2400 char *path; 2401 int name; 2402 } */ *uap; 2403 { 2404 2405 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2406 NOFOLLOW)); 2407 } 2408 2409 int 2410 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2411 u_long flags) 2412 { 2413 struct nameidata nd; 2414 int error; 2415 2416 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2417 pathseg, path, td); 2418 if ((error = namei(&nd)) != 0) 2419 return (error); 2420 NDFREE(&nd, NDF_ONLY_PNBUF); 2421 2422 /* If asynchronous I/O is available, it works for all files. */ 2423 if (name == _PC_ASYNC_IO) 2424 td->td_retval[0] = async_io_version; 2425 else 2426 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2427 vput(nd.ni_vp); 2428 return (error); 2429 } 2430 2431 /* 2432 * Return target name of a symbolic link. 2433 */ 2434 #ifndef _SYS_SYSPROTO_H_ 2435 struct readlink_args { 2436 char *path; 2437 char *buf; 2438 size_t count; 2439 }; 2440 #endif 2441 int 2442 sys_readlink(td, uap) 2443 struct thread *td; 2444 register struct readlink_args /* { 2445 char *path; 2446 char *buf; 2447 size_t count; 2448 } */ *uap; 2449 { 2450 2451 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2452 uap->buf, UIO_USERSPACE, uap->count)); 2453 } 2454 #ifndef _SYS_SYSPROTO_H_ 2455 struct readlinkat_args { 2456 int fd; 2457 char *path; 2458 char *buf; 2459 size_t bufsize; 2460 }; 2461 #endif 2462 int 2463 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2464 { 2465 2466 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2467 uap->buf, UIO_USERSPACE, uap->bufsize)); 2468 } 2469 2470 int 2471 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2472 char *buf, enum uio_seg bufseg, size_t count) 2473 { 2474 struct vnode *vp; 2475 struct iovec aiov; 2476 struct uio auio; 2477 struct nameidata nd; 2478 int error; 2479 2480 if (count > IOSIZE_MAX) 2481 return (EINVAL); 2482 2483 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2484 pathseg, path, fd, td); 2485 2486 if ((error = namei(&nd)) != 0) 2487 return (error); 2488 NDFREE(&nd, NDF_ONLY_PNBUF); 2489 vp = nd.ni_vp; 2490 #ifdef MAC 2491 error = mac_vnode_check_readlink(td->td_ucred, vp); 2492 if (error != 0) { 2493 vput(vp); 2494 return (error); 2495 } 2496 #endif 2497 if (vp->v_type != VLNK) 2498 error = EINVAL; 2499 else { 2500 aiov.iov_base = buf; 2501 aiov.iov_len = count; 2502 auio.uio_iov = &aiov; 2503 auio.uio_iovcnt = 1; 2504 auio.uio_offset = 0; 2505 auio.uio_rw = UIO_READ; 2506 auio.uio_segflg = bufseg; 2507 auio.uio_td = td; 2508 auio.uio_resid = count; 2509 error = VOP_READLINK(vp, &auio, td->td_ucred); 2510 td->td_retval[0] = count - auio.uio_resid; 2511 } 2512 vput(vp); 2513 return (error); 2514 } 2515 2516 /* 2517 * Common implementation code for chflags() and fchflags(). 2518 */ 2519 static int 2520 setfflags(td, vp, flags) 2521 struct thread *td; 2522 struct vnode *vp; 2523 u_long flags; 2524 { 2525 struct mount *mp; 2526 struct vattr vattr; 2527 int error; 2528 2529 /* We can't support the value matching VNOVAL. */ 2530 if (flags == VNOVAL) 2531 return (EOPNOTSUPP); 2532 2533 /* 2534 * Prevent non-root users from setting flags on devices. When 2535 * a device is reused, users can retain ownership of the device 2536 * if they are allowed to set flags and programs assume that 2537 * chown can't fail when done as root. 2538 */ 2539 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2540 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2541 if (error != 0) 2542 return (error); 2543 } 2544 2545 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2546 return (error); 2547 VATTR_NULL(&vattr); 2548 vattr.va_flags = flags; 2549 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2550 #ifdef MAC 2551 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2552 if (error == 0) 2553 #endif 2554 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2555 VOP_UNLOCK(vp, 0); 2556 vn_finished_write(mp); 2557 return (error); 2558 } 2559 2560 /* 2561 * Change flags of a file given a path name. 2562 */ 2563 #ifndef _SYS_SYSPROTO_H_ 2564 struct chflags_args { 2565 const char *path; 2566 u_long flags; 2567 }; 2568 #endif 2569 int 2570 sys_chflags(td, uap) 2571 struct thread *td; 2572 register struct chflags_args /* { 2573 const char *path; 2574 u_long flags; 2575 } */ *uap; 2576 { 2577 2578 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2579 uap->flags, 0)); 2580 } 2581 2582 #ifndef _SYS_SYSPROTO_H_ 2583 struct chflagsat_args { 2584 int fd; 2585 const char *path; 2586 u_long flags; 2587 int atflag; 2588 } 2589 #endif 2590 int 2591 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2592 { 2593 int fd = uap->fd; 2594 const char *path = uap->path; 2595 u_long flags = uap->flags; 2596 int atflag = uap->atflag; 2597 2598 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2599 return (EINVAL); 2600 2601 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2602 } 2603 2604 /* 2605 * Same as chflags() but doesn't follow symlinks. 2606 */ 2607 int 2608 sys_lchflags(td, uap) 2609 struct thread *td; 2610 register struct lchflags_args /* { 2611 const char *path; 2612 u_long flags; 2613 } */ *uap; 2614 { 2615 2616 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2617 uap->flags, AT_SYMLINK_NOFOLLOW)); 2618 } 2619 2620 static int 2621 kern_chflagsat(struct thread *td, int fd, const char *path, 2622 enum uio_seg pathseg, u_long flags, int atflag) 2623 { 2624 struct nameidata nd; 2625 cap_rights_t rights; 2626 int error, follow; 2627 2628 AUDIT_ARG_FFLAGS(flags); 2629 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2630 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2631 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2632 if ((error = namei(&nd)) != 0) 2633 return (error); 2634 NDFREE(&nd, NDF_ONLY_PNBUF); 2635 error = setfflags(td, nd.ni_vp, flags); 2636 vrele(nd.ni_vp); 2637 return (error); 2638 } 2639 2640 /* 2641 * Change flags of a file given a file descriptor. 2642 */ 2643 #ifndef _SYS_SYSPROTO_H_ 2644 struct fchflags_args { 2645 int fd; 2646 u_long flags; 2647 }; 2648 #endif 2649 int 2650 sys_fchflags(td, uap) 2651 struct thread *td; 2652 register struct fchflags_args /* { 2653 int fd; 2654 u_long flags; 2655 } */ *uap; 2656 { 2657 struct file *fp; 2658 cap_rights_t rights; 2659 int error; 2660 2661 AUDIT_ARG_FD(uap->fd); 2662 AUDIT_ARG_FFLAGS(uap->flags); 2663 error = getvnode(td->td_proc->p_fd, uap->fd, 2664 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2665 if (error != 0) 2666 return (error); 2667 #ifdef AUDIT 2668 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2669 AUDIT_ARG_VNODE1(fp->f_vnode); 2670 VOP_UNLOCK(fp->f_vnode, 0); 2671 #endif 2672 error = setfflags(td, fp->f_vnode, uap->flags); 2673 fdrop(fp, td); 2674 return (error); 2675 } 2676 2677 /* 2678 * Common implementation code for chmod(), lchmod() and fchmod(). 2679 */ 2680 int 2681 setfmode(td, cred, vp, mode) 2682 struct thread *td; 2683 struct ucred *cred; 2684 struct vnode *vp; 2685 int mode; 2686 { 2687 struct mount *mp; 2688 struct vattr vattr; 2689 int error; 2690 2691 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2692 return (error); 2693 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2694 VATTR_NULL(&vattr); 2695 vattr.va_mode = mode & ALLPERMS; 2696 #ifdef MAC 2697 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2698 if (error == 0) 2699 #endif 2700 error = VOP_SETATTR(vp, &vattr, cred); 2701 VOP_UNLOCK(vp, 0); 2702 vn_finished_write(mp); 2703 return (error); 2704 } 2705 2706 /* 2707 * Change mode of a file given path name. 2708 */ 2709 #ifndef _SYS_SYSPROTO_H_ 2710 struct chmod_args { 2711 char *path; 2712 int mode; 2713 }; 2714 #endif 2715 int 2716 sys_chmod(td, uap) 2717 struct thread *td; 2718 register struct chmod_args /* { 2719 char *path; 2720 int mode; 2721 } */ *uap; 2722 { 2723 2724 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2725 uap->mode, 0)); 2726 } 2727 2728 #ifndef _SYS_SYSPROTO_H_ 2729 struct fchmodat_args { 2730 int dirfd; 2731 char *path; 2732 mode_t mode; 2733 int flag; 2734 } 2735 #endif 2736 int 2737 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2738 { 2739 int flag = uap->flag; 2740 int fd = uap->fd; 2741 char *path = uap->path; 2742 mode_t mode = uap->mode; 2743 2744 if (flag & ~AT_SYMLINK_NOFOLLOW) 2745 return (EINVAL); 2746 2747 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2748 } 2749 2750 /* 2751 * Change mode of a file given path name (don't follow links.) 2752 */ 2753 #ifndef _SYS_SYSPROTO_H_ 2754 struct lchmod_args { 2755 char *path; 2756 int mode; 2757 }; 2758 #endif 2759 int 2760 sys_lchmod(td, uap) 2761 struct thread *td; 2762 register struct lchmod_args /* { 2763 char *path; 2764 int mode; 2765 } */ *uap; 2766 { 2767 2768 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2769 uap->mode, AT_SYMLINK_NOFOLLOW)); 2770 } 2771 2772 int 2773 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2774 mode_t mode, int flag) 2775 { 2776 struct nameidata nd; 2777 cap_rights_t rights; 2778 int error, follow; 2779 2780 AUDIT_ARG_MODE(mode); 2781 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2782 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2783 cap_rights_init(&rights, CAP_FCHMOD), td); 2784 if ((error = namei(&nd)) != 0) 2785 return (error); 2786 NDFREE(&nd, NDF_ONLY_PNBUF); 2787 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2788 vrele(nd.ni_vp); 2789 return (error); 2790 } 2791 2792 /* 2793 * Change mode of a file given a file descriptor. 2794 */ 2795 #ifndef _SYS_SYSPROTO_H_ 2796 struct fchmod_args { 2797 int fd; 2798 int mode; 2799 }; 2800 #endif 2801 int 2802 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2803 { 2804 struct file *fp; 2805 cap_rights_t rights; 2806 int error; 2807 2808 AUDIT_ARG_FD(uap->fd); 2809 AUDIT_ARG_MODE(uap->mode); 2810 2811 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2812 if (error != 0) 2813 return (error); 2814 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2815 fdrop(fp, td); 2816 return (error); 2817 } 2818 2819 /* 2820 * Common implementation for chown(), lchown(), and fchown() 2821 */ 2822 int 2823 setfown(td, cred, vp, uid, gid) 2824 struct thread *td; 2825 struct ucred *cred; 2826 struct vnode *vp; 2827 uid_t uid; 2828 gid_t gid; 2829 { 2830 struct mount *mp; 2831 struct vattr vattr; 2832 int error; 2833 2834 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2835 return (error); 2836 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2837 VATTR_NULL(&vattr); 2838 vattr.va_uid = uid; 2839 vattr.va_gid = gid; 2840 #ifdef MAC 2841 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2842 vattr.va_gid); 2843 if (error == 0) 2844 #endif 2845 error = VOP_SETATTR(vp, &vattr, cred); 2846 VOP_UNLOCK(vp, 0); 2847 vn_finished_write(mp); 2848 return (error); 2849 } 2850 2851 /* 2852 * Set ownership given a path name. 2853 */ 2854 #ifndef _SYS_SYSPROTO_H_ 2855 struct chown_args { 2856 char *path; 2857 int uid; 2858 int gid; 2859 }; 2860 #endif 2861 int 2862 sys_chown(td, uap) 2863 struct thread *td; 2864 register struct chown_args /* { 2865 char *path; 2866 int uid; 2867 int gid; 2868 } */ *uap; 2869 { 2870 2871 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2872 uap->gid, 0)); 2873 } 2874 2875 #ifndef _SYS_SYSPROTO_H_ 2876 struct fchownat_args { 2877 int fd; 2878 const char * path; 2879 uid_t uid; 2880 gid_t gid; 2881 int flag; 2882 }; 2883 #endif 2884 int 2885 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2886 { 2887 int flag; 2888 2889 flag = uap->flag; 2890 if (flag & ~AT_SYMLINK_NOFOLLOW) 2891 return (EINVAL); 2892 2893 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2894 uap->gid, uap->flag)); 2895 } 2896 2897 int 2898 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2899 int uid, int gid, int flag) 2900 { 2901 struct nameidata nd; 2902 cap_rights_t rights; 2903 int error, follow; 2904 2905 AUDIT_ARG_OWNER(uid, gid); 2906 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2907 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2908 cap_rights_init(&rights, CAP_FCHOWN), td); 2909 2910 if ((error = namei(&nd)) != 0) 2911 return (error); 2912 NDFREE(&nd, NDF_ONLY_PNBUF); 2913 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2914 vrele(nd.ni_vp); 2915 return (error); 2916 } 2917 2918 /* 2919 * Set ownership given a path name, do not cross symlinks. 2920 */ 2921 #ifndef _SYS_SYSPROTO_H_ 2922 struct lchown_args { 2923 char *path; 2924 int uid; 2925 int gid; 2926 }; 2927 #endif 2928 int 2929 sys_lchown(td, uap) 2930 struct thread *td; 2931 register struct lchown_args /* { 2932 char *path; 2933 int uid; 2934 int gid; 2935 } */ *uap; 2936 { 2937 2938 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2939 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2940 } 2941 2942 /* 2943 * Set ownership given a file descriptor. 2944 */ 2945 #ifndef _SYS_SYSPROTO_H_ 2946 struct fchown_args { 2947 int fd; 2948 int uid; 2949 int gid; 2950 }; 2951 #endif 2952 int 2953 sys_fchown(td, uap) 2954 struct thread *td; 2955 register struct fchown_args /* { 2956 int fd; 2957 int uid; 2958 int gid; 2959 } */ *uap; 2960 { 2961 struct file *fp; 2962 cap_rights_t rights; 2963 int error; 2964 2965 AUDIT_ARG_FD(uap->fd); 2966 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2967 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2968 if (error != 0) 2969 return (error); 2970 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2971 fdrop(fp, td); 2972 return (error); 2973 } 2974 2975 /* 2976 * Common implementation code for utimes(), lutimes(), and futimes(). 2977 */ 2978 static int 2979 getutimes(usrtvp, tvpseg, tsp) 2980 const struct timeval *usrtvp; 2981 enum uio_seg tvpseg; 2982 struct timespec *tsp; 2983 { 2984 struct timeval tv[2]; 2985 const struct timeval *tvp; 2986 int error; 2987 2988 if (usrtvp == NULL) { 2989 vfs_timestamp(&tsp[0]); 2990 tsp[1] = tsp[0]; 2991 } else { 2992 if (tvpseg == UIO_SYSSPACE) { 2993 tvp = usrtvp; 2994 } else { 2995 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2996 return (error); 2997 tvp = tv; 2998 } 2999 3000 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3001 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3002 return (EINVAL); 3003 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3004 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3005 } 3006 return (0); 3007 } 3008 3009 /* 3010 * Common implementation code for utimes(), lutimes(), and futimes(). 3011 */ 3012 static int 3013 setutimes(td, vp, ts, numtimes, nullflag) 3014 struct thread *td; 3015 struct vnode *vp; 3016 const struct timespec *ts; 3017 int numtimes; 3018 int nullflag; 3019 { 3020 struct mount *mp; 3021 struct vattr vattr; 3022 int error, setbirthtime; 3023 3024 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3025 return (error); 3026 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3027 setbirthtime = 0; 3028 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3029 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3030 setbirthtime = 1; 3031 VATTR_NULL(&vattr); 3032 vattr.va_atime = ts[0]; 3033 vattr.va_mtime = ts[1]; 3034 if (setbirthtime) 3035 vattr.va_birthtime = ts[1]; 3036 if (numtimes > 2) 3037 vattr.va_birthtime = ts[2]; 3038 if (nullflag) 3039 vattr.va_vaflags |= VA_UTIMES_NULL; 3040 #ifdef MAC 3041 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3042 vattr.va_mtime); 3043 #endif 3044 if (error == 0) 3045 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3046 VOP_UNLOCK(vp, 0); 3047 vn_finished_write(mp); 3048 return (error); 3049 } 3050 3051 /* 3052 * Set the access and modification times of a file. 3053 */ 3054 #ifndef _SYS_SYSPROTO_H_ 3055 struct utimes_args { 3056 char *path; 3057 struct timeval *tptr; 3058 }; 3059 #endif 3060 int 3061 sys_utimes(td, uap) 3062 struct thread *td; 3063 register struct utimes_args /* { 3064 char *path; 3065 struct timeval *tptr; 3066 } */ *uap; 3067 { 3068 3069 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3070 uap->tptr, UIO_USERSPACE)); 3071 } 3072 3073 #ifndef _SYS_SYSPROTO_H_ 3074 struct futimesat_args { 3075 int fd; 3076 const char * path; 3077 const struct timeval * times; 3078 }; 3079 #endif 3080 int 3081 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3082 { 3083 3084 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3085 uap->times, UIO_USERSPACE)); 3086 } 3087 3088 int 3089 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3090 struct timeval *tptr, enum uio_seg tptrseg) 3091 { 3092 struct nameidata nd; 3093 struct timespec ts[2]; 3094 cap_rights_t rights; 3095 int error; 3096 3097 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3098 return (error); 3099 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3100 cap_rights_init(&rights, CAP_FUTIMES), td); 3101 3102 if ((error = namei(&nd)) != 0) 3103 return (error); 3104 NDFREE(&nd, NDF_ONLY_PNBUF); 3105 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3106 vrele(nd.ni_vp); 3107 return (error); 3108 } 3109 3110 /* 3111 * Set the access and modification times of a file. 3112 */ 3113 #ifndef _SYS_SYSPROTO_H_ 3114 struct lutimes_args { 3115 char *path; 3116 struct timeval *tptr; 3117 }; 3118 #endif 3119 int 3120 sys_lutimes(td, uap) 3121 struct thread *td; 3122 register struct lutimes_args /* { 3123 char *path; 3124 struct timeval *tptr; 3125 } */ *uap; 3126 { 3127 3128 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3129 UIO_USERSPACE)); 3130 } 3131 3132 int 3133 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3134 struct timeval *tptr, enum uio_seg tptrseg) 3135 { 3136 struct timespec ts[2]; 3137 struct nameidata nd; 3138 int error; 3139 3140 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3141 return (error); 3142 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3143 if ((error = namei(&nd)) != 0) 3144 return (error); 3145 NDFREE(&nd, NDF_ONLY_PNBUF); 3146 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3147 vrele(nd.ni_vp); 3148 return (error); 3149 } 3150 3151 /* 3152 * Set the access and modification times of a file. 3153 */ 3154 #ifndef _SYS_SYSPROTO_H_ 3155 struct futimes_args { 3156 int fd; 3157 struct timeval *tptr; 3158 }; 3159 #endif 3160 int 3161 sys_futimes(td, uap) 3162 struct thread *td; 3163 register struct futimes_args /* { 3164 int fd; 3165 struct timeval *tptr; 3166 } */ *uap; 3167 { 3168 3169 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3170 } 3171 3172 int 3173 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3174 enum uio_seg tptrseg) 3175 { 3176 struct timespec ts[2]; 3177 struct file *fp; 3178 cap_rights_t rights; 3179 int error; 3180 3181 AUDIT_ARG_FD(fd); 3182 error = getutimes(tptr, tptrseg, ts); 3183 if (error != 0) 3184 return (error); 3185 error = getvnode(td->td_proc->p_fd, fd, 3186 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3187 if (error != 0) 3188 return (error); 3189 #ifdef AUDIT 3190 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3191 AUDIT_ARG_VNODE1(fp->f_vnode); 3192 VOP_UNLOCK(fp->f_vnode, 0); 3193 #endif 3194 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3195 fdrop(fp, td); 3196 return (error); 3197 } 3198 3199 /* 3200 * Truncate a file given its path name. 3201 */ 3202 #ifndef _SYS_SYSPROTO_H_ 3203 struct truncate_args { 3204 char *path; 3205 int pad; 3206 off_t length; 3207 }; 3208 #endif 3209 int 3210 sys_truncate(td, uap) 3211 struct thread *td; 3212 register struct truncate_args /* { 3213 char *path; 3214 int pad; 3215 off_t length; 3216 } */ *uap; 3217 { 3218 3219 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3220 } 3221 3222 int 3223 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3224 { 3225 struct mount *mp; 3226 struct vnode *vp; 3227 void *rl_cookie; 3228 struct vattr vattr; 3229 struct nameidata nd; 3230 int error; 3231 3232 if (length < 0) 3233 return(EINVAL); 3234 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3235 if ((error = namei(&nd)) != 0) 3236 return (error); 3237 vp = nd.ni_vp; 3238 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3239 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3240 vn_rangelock_unlock(vp, rl_cookie); 3241 vrele(vp); 3242 return (error); 3243 } 3244 NDFREE(&nd, NDF_ONLY_PNBUF); 3245 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3246 if (vp->v_type == VDIR) 3247 error = EISDIR; 3248 #ifdef MAC 3249 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3250 } 3251 #endif 3252 else if ((error = vn_writechk(vp)) == 0 && 3253 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3254 VATTR_NULL(&vattr); 3255 vattr.va_size = length; 3256 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3257 } 3258 VOP_UNLOCK(vp, 0); 3259 vn_finished_write(mp); 3260 vn_rangelock_unlock(vp, rl_cookie); 3261 vrele(vp); 3262 return (error); 3263 } 3264 3265 #if defined(COMPAT_43) 3266 /* 3267 * Truncate a file given its path name. 3268 */ 3269 #ifndef _SYS_SYSPROTO_H_ 3270 struct otruncate_args { 3271 char *path; 3272 long length; 3273 }; 3274 #endif 3275 int 3276 otruncate(td, uap) 3277 struct thread *td; 3278 register struct otruncate_args /* { 3279 char *path; 3280 long length; 3281 } */ *uap; 3282 { 3283 struct truncate_args /* { 3284 char *path; 3285 int pad; 3286 off_t length; 3287 } */ nuap; 3288 3289 nuap.path = uap->path; 3290 nuap.length = uap->length; 3291 return (sys_truncate(td, &nuap)); 3292 } 3293 #endif /* COMPAT_43 */ 3294 3295 /* Versions with the pad argument */ 3296 int 3297 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3298 { 3299 struct truncate_args ouap; 3300 3301 ouap.path = uap->path; 3302 ouap.length = uap->length; 3303 return (sys_truncate(td, &ouap)); 3304 } 3305 3306 int 3307 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3308 { 3309 struct ftruncate_args ouap; 3310 3311 ouap.fd = uap->fd; 3312 ouap.length = uap->length; 3313 return (sys_ftruncate(td, &ouap)); 3314 } 3315 3316 /* 3317 * Sync an open file. 3318 */ 3319 #ifndef _SYS_SYSPROTO_H_ 3320 struct fsync_args { 3321 int fd; 3322 }; 3323 #endif 3324 int 3325 sys_fsync(td, uap) 3326 struct thread *td; 3327 struct fsync_args /* { 3328 int fd; 3329 } */ *uap; 3330 { 3331 struct vnode *vp; 3332 struct mount *mp; 3333 struct file *fp; 3334 cap_rights_t rights; 3335 int error, lock_flags; 3336 3337 AUDIT_ARG_FD(uap->fd); 3338 error = getvnode(td->td_proc->p_fd, uap->fd, 3339 cap_rights_init(&rights, CAP_FSYNC), &fp); 3340 if (error != 0) 3341 return (error); 3342 vp = fp->f_vnode; 3343 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3344 if (error != 0) 3345 goto drop; 3346 if (MNT_SHARED_WRITES(mp) || 3347 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3348 lock_flags = LK_SHARED; 3349 } else { 3350 lock_flags = LK_EXCLUSIVE; 3351 } 3352 vn_lock(vp, lock_flags | LK_RETRY); 3353 AUDIT_ARG_VNODE1(vp); 3354 if (vp->v_object != NULL) { 3355 VM_OBJECT_WLOCK(vp->v_object); 3356 vm_object_page_clean(vp->v_object, 0, 0, 0); 3357 VM_OBJECT_WUNLOCK(vp->v_object); 3358 } 3359 error = VOP_FSYNC(vp, MNT_WAIT, td); 3360 3361 VOP_UNLOCK(vp, 0); 3362 vn_finished_write(mp); 3363 drop: 3364 fdrop(fp, td); 3365 return (error); 3366 } 3367 3368 /* 3369 * Rename files. Source and destination must either both be directories, or 3370 * both not be directories. If target is a directory, it must be empty. 3371 */ 3372 #ifndef _SYS_SYSPROTO_H_ 3373 struct rename_args { 3374 char *from; 3375 char *to; 3376 }; 3377 #endif 3378 int 3379 sys_rename(td, uap) 3380 struct thread *td; 3381 register struct rename_args /* { 3382 char *from; 3383 char *to; 3384 } */ *uap; 3385 { 3386 3387 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3388 uap->to, UIO_USERSPACE)); 3389 } 3390 3391 #ifndef _SYS_SYSPROTO_H_ 3392 struct renameat_args { 3393 int oldfd; 3394 char *old; 3395 int newfd; 3396 char *new; 3397 }; 3398 #endif 3399 int 3400 sys_renameat(struct thread *td, struct renameat_args *uap) 3401 { 3402 3403 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3404 UIO_USERSPACE)); 3405 } 3406 3407 int 3408 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3409 enum uio_seg pathseg) 3410 { 3411 struct mount *mp = NULL; 3412 struct vnode *tvp, *fvp, *tdvp; 3413 struct nameidata fromnd, tond; 3414 cap_rights_t rights; 3415 int error; 3416 3417 again: 3418 bwillwrite(); 3419 #ifdef MAC 3420 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3421 AUDITVNODE1, pathseg, old, oldfd, 3422 cap_rights_init(&rights, CAP_RENAMEAT), td); 3423 #else 3424 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3425 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3426 #endif 3427 3428 if ((error = namei(&fromnd)) != 0) 3429 return (error); 3430 #ifdef MAC 3431 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3432 fromnd.ni_vp, &fromnd.ni_cnd); 3433 VOP_UNLOCK(fromnd.ni_dvp, 0); 3434 if (fromnd.ni_dvp != fromnd.ni_vp) 3435 VOP_UNLOCK(fromnd.ni_vp, 0); 3436 #endif 3437 fvp = fromnd.ni_vp; 3438 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3439 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3440 cap_rights_init(&rights, CAP_LINKAT), td); 3441 if (fromnd.ni_vp->v_type == VDIR) 3442 tond.ni_cnd.cn_flags |= WILLBEDIR; 3443 if ((error = namei(&tond)) != 0) { 3444 /* Translate error code for rename("dir1", "dir2/."). */ 3445 if (error == EISDIR && fvp->v_type == VDIR) 3446 error = EINVAL; 3447 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3448 vrele(fromnd.ni_dvp); 3449 vrele(fvp); 3450 goto out1; 3451 } 3452 tdvp = tond.ni_dvp; 3453 tvp = tond.ni_vp; 3454 error = vn_start_write(fvp, &mp, V_NOWAIT); 3455 if (error != 0) { 3456 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3457 NDFREE(&tond, NDF_ONLY_PNBUF); 3458 if (tvp != NULL) 3459 vput(tvp); 3460 if (tdvp == tvp) 3461 vrele(tdvp); 3462 else 3463 vput(tdvp); 3464 vrele(fromnd.ni_dvp); 3465 vrele(fvp); 3466 vrele(tond.ni_startdir); 3467 if (fromnd.ni_startdir != NULL) 3468 vrele(fromnd.ni_startdir); 3469 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3470 if (error != 0) 3471 return (error); 3472 goto again; 3473 } 3474 if (tvp != NULL) { 3475 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3476 error = ENOTDIR; 3477 goto out; 3478 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3479 error = EISDIR; 3480 goto out; 3481 } 3482 #ifdef CAPABILITIES 3483 if (newfd != AT_FDCWD) { 3484 /* 3485 * If the target already exists we require CAP_UNLINKAT 3486 * from 'newfd'. 3487 */ 3488 error = cap_check(&tond.ni_filecaps.fc_rights, 3489 cap_rights_init(&rights, CAP_UNLINKAT)); 3490 if (error != 0) 3491 goto out; 3492 } 3493 #endif 3494 } 3495 if (fvp == tdvp) { 3496 error = EINVAL; 3497 goto out; 3498 } 3499 /* 3500 * If the source is the same as the destination (that is, if they 3501 * are links to the same vnode), then there is nothing to do. 3502 */ 3503 if (fvp == tvp) 3504 error = -1; 3505 #ifdef MAC 3506 else 3507 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3508 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3509 #endif 3510 out: 3511 if (error == 0) { 3512 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3513 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3514 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3515 NDFREE(&tond, NDF_ONLY_PNBUF); 3516 } else { 3517 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3518 NDFREE(&tond, NDF_ONLY_PNBUF); 3519 if (tvp != NULL) 3520 vput(tvp); 3521 if (tdvp == tvp) 3522 vrele(tdvp); 3523 else 3524 vput(tdvp); 3525 vrele(fromnd.ni_dvp); 3526 vrele(fvp); 3527 } 3528 vrele(tond.ni_startdir); 3529 vn_finished_write(mp); 3530 out1: 3531 if (fromnd.ni_startdir) 3532 vrele(fromnd.ni_startdir); 3533 if (error == -1) 3534 return (0); 3535 return (error); 3536 } 3537 3538 /* 3539 * Make a directory file. 3540 */ 3541 #ifndef _SYS_SYSPROTO_H_ 3542 struct mkdir_args { 3543 char *path; 3544 int mode; 3545 }; 3546 #endif 3547 int 3548 sys_mkdir(td, uap) 3549 struct thread *td; 3550 register struct mkdir_args /* { 3551 char *path; 3552 int mode; 3553 } */ *uap; 3554 { 3555 3556 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3557 uap->mode)); 3558 } 3559 3560 #ifndef _SYS_SYSPROTO_H_ 3561 struct mkdirat_args { 3562 int fd; 3563 char *path; 3564 mode_t mode; 3565 }; 3566 #endif 3567 int 3568 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3569 { 3570 3571 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3572 } 3573 3574 int 3575 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3576 int mode) 3577 { 3578 struct mount *mp; 3579 struct vnode *vp; 3580 struct vattr vattr; 3581 struct nameidata nd; 3582 cap_rights_t rights; 3583 int error; 3584 3585 AUDIT_ARG_MODE(mode); 3586 restart: 3587 bwillwrite(); 3588 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3589 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3590 td); 3591 nd.ni_cnd.cn_flags |= WILLBEDIR; 3592 if ((error = namei(&nd)) != 0) 3593 return (error); 3594 vp = nd.ni_vp; 3595 if (vp != NULL) { 3596 NDFREE(&nd, NDF_ONLY_PNBUF); 3597 /* 3598 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3599 * the strange behaviour of leaving the vnode unlocked 3600 * if the target is the same vnode as the parent. 3601 */ 3602 if (vp == nd.ni_dvp) 3603 vrele(nd.ni_dvp); 3604 else 3605 vput(nd.ni_dvp); 3606 vrele(vp); 3607 return (EEXIST); 3608 } 3609 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3610 NDFREE(&nd, NDF_ONLY_PNBUF); 3611 vput(nd.ni_dvp); 3612 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3613 return (error); 3614 goto restart; 3615 } 3616 VATTR_NULL(&vattr); 3617 vattr.va_type = VDIR; 3618 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3619 #ifdef MAC 3620 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3621 &vattr); 3622 if (error != 0) 3623 goto out; 3624 #endif 3625 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3626 #ifdef MAC 3627 out: 3628 #endif 3629 NDFREE(&nd, NDF_ONLY_PNBUF); 3630 vput(nd.ni_dvp); 3631 if (error == 0) 3632 vput(nd.ni_vp); 3633 vn_finished_write(mp); 3634 return (error); 3635 } 3636 3637 /* 3638 * Remove a directory file. 3639 */ 3640 #ifndef _SYS_SYSPROTO_H_ 3641 struct rmdir_args { 3642 char *path; 3643 }; 3644 #endif 3645 int 3646 sys_rmdir(td, uap) 3647 struct thread *td; 3648 struct rmdir_args /* { 3649 char *path; 3650 } */ *uap; 3651 { 3652 3653 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3654 } 3655 3656 int 3657 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3658 { 3659 struct mount *mp; 3660 struct vnode *vp; 3661 struct nameidata nd; 3662 cap_rights_t rights; 3663 int error; 3664 3665 restart: 3666 bwillwrite(); 3667 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3668 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3669 if ((error = namei(&nd)) != 0) 3670 return (error); 3671 vp = nd.ni_vp; 3672 if (vp->v_type != VDIR) { 3673 error = ENOTDIR; 3674 goto out; 3675 } 3676 /* 3677 * No rmdir "." please. 3678 */ 3679 if (nd.ni_dvp == vp) { 3680 error = EINVAL; 3681 goto out; 3682 } 3683 /* 3684 * The root of a mounted filesystem cannot be deleted. 3685 */ 3686 if (vp->v_vflag & VV_ROOT) { 3687 error = EBUSY; 3688 goto out; 3689 } 3690 #ifdef MAC 3691 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3692 &nd.ni_cnd); 3693 if (error != 0) 3694 goto out; 3695 #endif 3696 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3697 NDFREE(&nd, NDF_ONLY_PNBUF); 3698 vput(vp); 3699 if (nd.ni_dvp == vp) 3700 vrele(nd.ni_dvp); 3701 else 3702 vput(nd.ni_dvp); 3703 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3704 return (error); 3705 goto restart; 3706 } 3707 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3708 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3709 vn_finished_write(mp); 3710 out: 3711 NDFREE(&nd, NDF_ONLY_PNBUF); 3712 vput(vp); 3713 if (nd.ni_dvp == vp) 3714 vrele(nd.ni_dvp); 3715 else 3716 vput(nd.ni_dvp); 3717 return (error); 3718 } 3719 3720 #ifdef COMPAT_43 3721 /* 3722 * Read a block of directory entries in a filesystem independent format. 3723 */ 3724 #ifndef _SYS_SYSPROTO_H_ 3725 struct ogetdirentries_args { 3726 int fd; 3727 char *buf; 3728 u_int count; 3729 long *basep; 3730 }; 3731 #endif 3732 int 3733 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3734 { 3735 long loff; 3736 int error; 3737 3738 error = kern_ogetdirentries(td, uap, &loff); 3739 if (error == 0) 3740 error = copyout(&loff, uap->basep, sizeof(long)); 3741 return (error); 3742 } 3743 3744 int 3745 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3746 long *ploff) 3747 { 3748 struct vnode *vp; 3749 struct file *fp; 3750 struct uio auio, kuio; 3751 struct iovec aiov, kiov; 3752 struct dirent *dp, *edp; 3753 cap_rights_t rights; 3754 caddr_t dirbuf; 3755 int error, eofflag, readcnt; 3756 long loff; 3757 off_t foffset; 3758 3759 /* XXX arbitrary sanity limit on `count'. */ 3760 if (uap->count > 64 * 1024) 3761 return (EINVAL); 3762 error = getvnode(td->td_proc->p_fd, uap->fd, 3763 cap_rights_init(&rights, CAP_READ), &fp); 3764 if (error != 0) 3765 return (error); 3766 if ((fp->f_flag & FREAD) == 0) { 3767 fdrop(fp, td); 3768 return (EBADF); 3769 } 3770 vp = fp->f_vnode; 3771 foffset = foffset_lock(fp, 0); 3772 unionread: 3773 if (vp->v_type != VDIR) { 3774 foffset_unlock(fp, foffset, 0); 3775 fdrop(fp, td); 3776 return (EINVAL); 3777 } 3778 aiov.iov_base = uap->buf; 3779 aiov.iov_len = uap->count; 3780 auio.uio_iov = &aiov; 3781 auio.uio_iovcnt = 1; 3782 auio.uio_rw = UIO_READ; 3783 auio.uio_segflg = UIO_USERSPACE; 3784 auio.uio_td = td; 3785 auio.uio_resid = uap->count; 3786 vn_lock(vp, LK_SHARED | LK_RETRY); 3787 loff = auio.uio_offset = foffset; 3788 #ifdef MAC 3789 error = mac_vnode_check_readdir(td->td_ucred, vp); 3790 if (error != 0) { 3791 VOP_UNLOCK(vp, 0); 3792 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3793 fdrop(fp, td); 3794 return (error); 3795 } 3796 #endif 3797 # if (BYTE_ORDER != LITTLE_ENDIAN) 3798 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3799 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3800 NULL, NULL); 3801 foffset = auio.uio_offset; 3802 } else 3803 # endif 3804 { 3805 kuio = auio; 3806 kuio.uio_iov = &kiov; 3807 kuio.uio_segflg = UIO_SYSSPACE; 3808 kiov.iov_len = uap->count; 3809 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3810 kiov.iov_base = dirbuf; 3811 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3812 NULL, NULL); 3813 foffset = kuio.uio_offset; 3814 if (error == 0) { 3815 readcnt = uap->count - kuio.uio_resid; 3816 edp = (struct dirent *)&dirbuf[readcnt]; 3817 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3818 # if (BYTE_ORDER == LITTLE_ENDIAN) 3819 /* 3820 * The expected low byte of 3821 * dp->d_namlen is our dp->d_type. 3822 * The high MBZ byte of dp->d_namlen 3823 * is our dp->d_namlen. 3824 */ 3825 dp->d_type = dp->d_namlen; 3826 dp->d_namlen = 0; 3827 # else 3828 /* 3829 * The dp->d_type is the high byte 3830 * of the expected dp->d_namlen, 3831 * so must be zero'ed. 3832 */ 3833 dp->d_type = 0; 3834 # endif 3835 if (dp->d_reclen > 0) { 3836 dp = (struct dirent *) 3837 ((char *)dp + dp->d_reclen); 3838 } else { 3839 error = EIO; 3840 break; 3841 } 3842 } 3843 if (dp >= edp) 3844 error = uiomove(dirbuf, readcnt, &auio); 3845 } 3846 free(dirbuf, M_TEMP); 3847 } 3848 if (error != 0) { 3849 VOP_UNLOCK(vp, 0); 3850 foffset_unlock(fp, foffset, 0); 3851 fdrop(fp, td); 3852 return (error); 3853 } 3854 if (uap->count == auio.uio_resid && 3855 (vp->v_vflag & VV_ROOT) && 3856 (vp->v_mount->mnt_flag & MNT_UNION)) { 3857 struct vnode *tvp = vp; 3858 vp = vp->v_mount->mnt_vnodecovered; 3859 VREF(vp); 3860 fp->f_vnode = vp; 3861 fp->f_data = vp; 3862 foffset = 0; 3863 vput(tvp); 3864 goto unionread; 3865 } 3866 VOP_UNLOCK(vp, 0); 3867 foffset_unlock(fp, foffset, 0); 3868 fdrop(fp, td); 3869 td->td_retval[0] = uap->count - auio.uio_resid; 3870 if (error == 0) 3871 *ploff = loff; 3872 return (error); 3873 } 3874 #endif /* COMPAT_43 */ 3875 3876 /* 3877 * Read a block of directory entries in a filesystem independent format. 3878 */ 3879 #ifndef _SYS_SYSPROTO_H_ 3880 struct getdirentries_args { 3881 int fd; 3882 char *buf; 3883 u_int count; 3884 long *basep; 3885 }; 3886 #endif 3887 int 3888 sys_getdirentries(td, uap) 3889 struct thread *td; 3890 register struct getdirentries_args /* { 3891 int fd; 3892 char *buf; 3893 u_int count; 3894 long *basep; 3895 } */ *uap; 3896 { 3897 long base; 3898 int error; 3899 3900 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3901 NULL, UIO_USERSPACE); 3902 if (error != 0) 3903 return (error); 3904 if (uap->basep != NULL) 3905 error = copyout(&base, uap->basep, sizeof(long)); 3906 return (error); 3907 } 3908 3909 int 3910 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3911 long *basep, ssize_t *residp, enum uio_seg bufseg) 3912 { 3913 struct vnode *vp; 3914 struct file *fp; 3915 struct uio auio; 3916 struct iovec aiov; 3917 cap_rights_t rights; 3918 long loff; 3919 int error, eofflag; 3920 off_t foffset; 3921 3922 AUDIT_ARG_FD(fd); 3923 if (count > IOSIZE_MAX) 3924 return (EINVAL); 3925 auio.uio_resid = count; 3926 error = getvnode(td->td_proc->p_fd, fd, 3927 cap_rights_init(&rights, CAP_READ), &fp); 3928 if (error != 0) 3929 return (error); 3930 if ((fp->f_flag & FREAD) == 0) { 3931 fdrop(fp, td); 3932 return (EBADF); 3933 } 3934 vp = fp->f_vnode; 3935 foffset = foffset_lock(fp, 0); 3936 unionread: 3937 if (vp->v_type != VDIR) { 3938 error = EINVAL; 3939 goto fail; 3940 } 3941 aiov.iov_base = buf; 3942 aiov.iov_len = count; 3943 auio.uio_iov = &aiov; 3944 auio.uio_iovcnt = 1; 3945 auio.uio_rw = UIO_READ; 3946 auio.uio_segflg = bufseg; 3947 auio.uio_td = td; 3948 vn_lock(vp, LK_SHARED | LK_RETRY); 3949 AUDIT_ARG_VNODE1(vp); 3950 loff = auio.uio_offset = foffset; 3951 #ifdef MAC 3952 error = mac_vnode_check_readdir(td->td_ucred, vp); 3953 if (error == 0) 3954 #endif 3955 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3956 NULL); 3957 foffset = auio.uio_offset; 3958 if (error != 0) { 3959 VOP_UNLOCK(vp, 0); 3960 goto fail; 3961 } 3962 if (count == auio.uio_resid && 3963 (vp->v_vflag & VV_ROOT) && 3964 (vp->v_mount->mnt_flag & MNT_UNION)) { 3965 struct vnode *tvp = vp; 3966 3967 vp = vp->v_mount->mnt_vnodecovered; 3968 VREF(vp); 3969 fp->f_vnode = vp; 3970 fp->f_data = vp; 3971 foffset = 0; 3972 vput(tvp); 3973 goto unionread; 3974 } 3975 VOP_UNLOCK(vp, 0); 3976 *basep = loff; 3977 if (residp != NULL) 3978 *residp = auio.uio_resid; 3979 td->td_retval[0] = count - auio.uio_resid; 3980 fail: 3981 foffset_unlock(fp, foffset, 0); 3982 fdrop(fp, td); 3983 return (error); 3984 } 3985 3986 #ifndef _SYS_SYSPROTO_H_ 3987 struct getdents_args { 3988 int fd; 3989 char *buf; 3990 size_t count; 3991 }; 3992 #endif 3993 int 3994 sys_getdents(td, uap) 3995 struct thread *td; 3996 register struct getdents_args /* { 3997 int fd; 3998 char *buf; 3999 u_int count; 4000 } */ *uap; 4001 { 4002 struct getdirentries_args ap; 4003 4004 ap.fd = uap->fd; 4005 ap.buf = uap->buf; 4006 ap.count = uap->count; 4007 ap.basep = NULL; 4008 return (sys_getdirentries(td, &ap)); 4009 } 4010 4011 /* 4012 * Set the mode mask for creation of filesystem nodes. 4013 */ 4014 #ifndef _SYS_SYSPROTO_H_ 4015 struct umask_args { 4016 int newmask; 4017 }; 4018 #endif 4019 int 4020 sys_umask(td, uap) 4021 struct thread *td; 4022 struct umask_args /* { 4023 int newmask; 4024 } */ *uap; 4025 { 4026 register struct filedesc *fdp; 4027 4028 FILEDESC_XLOCK(td->td_proc->p_fd); 4029 fdp = td->td_proc->p_fd; 4030 td->td_retval[0] = fdp->fd_cmask; 4031 fdp->fd_cmask = uap->newmask & ALLPERMS; 4032 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4033 return (0); 4034 } 4035 4036 /* 4037 * Void all references to file by ripping underlying filesystem away from 4038 * vnode. 4039 */ 4040 #ifndef _SYS_SYSPROTO_H_ 4041 struct revoke_args { 4042 char *path; 4043 }; 4044 #endif 4045 int 4046 sys_revoke(td, uap) 4047 struct thread *td; 4048 register struct revoke_args /* { 4049 char *path; 4050 } */ *uap; 4051 { 4052 struct vnode *vp; 4053 struct vattr vattr; 4054 struct nameidata nd; 4055 int error; 4056 4057 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4058 uap->path, td); 4059 if ((error = namei(&nd)) != 0) 4060 return (error); 4061 vp = nd.ni_vp; 4062 NDFREE(&nd, NDF_ONLY_PNBUF); 4063 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4064 error = EINVAL; 4065 goto out; 4066 } 4067 #ifdef MAC 4068 error = mac_vnode_check_revoke(td->td_ucred, vp); 4069 if (error != 0) 4070 goto out; 4071 #endif 4072 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4073 if (error != 0) 4074 goto out; 4075 if (td->td_ucred->cr_uid != vattr.va_uid) { 4076 error = priv_check(td, PRIV_VFS_ADMIN); 4077 if (error != 0) 4078 goto out; 4079 } 4080 if (vcount(vp) > 1) 4081 VOP_REVOKE(vp, REVOKEALL); 4082 out: 4083 vput(vp); 4084 return (error); 4085 } 4086 4087 /* 4088 * Convert a user file descriptor to a kernel file entry and check that, if it 4089 * is a capability, the correct rights are present. A reference on the file 4090 * entry is held upon returning. 4091 */ 4092 int 4093 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4094 { 4095 struct file *fp; 4096 int error; 4097 4098 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4099 if (error != 0) 4100 return (error); 4101 4102 /* 4103 * The file could be not of the vnode type, or it may be not 4104 * yet fully initialized, in which case the f_vnode pointer 4105 * may be set, but f_ops is still badfileops. E.g., 4106 * devfs_open() transiently create such situation to 4107 * facilitate csw d_fdopen(). 4108 * 4109 * Dupfdopen() handling in kern_openat() installs the 4110 * half-baked file into the process descriptor table, allowing 4111 * other thread to dereference it. Guard against the race by 4112 * checking f_ops. 4113 */ 4114 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4115 fdrop(fp, curthread); 4116 return (EINVAL); 4117 } 4118 *fpp = fp; 4119 return (0); 4120 } 4121 4122 4123 /* 4124 * Get an (NFS) file handle. 4125 */ 4126 #ifndef _SYS_SYSPROTO_H_ 4127 struct lgetfh_args { 4128 char *fname; 4129 fhandle_t *fhp; 4130 }; 4131 #endif 4132 int 4133 sys_lgetfh(td, uap) 4134 struct thread *td; 4135 register struct lgetfh_args *uap; 4136 { 4137 struct nameidata nd; 4138 fhandle_t fh; 4139 register struct vnode *vp; 4140 int error; 4141 4142 error = priv_check(td, PRIV_VFS_GETFH); 4143 if (error != 0) 4144 return (error); 4145 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4146 uap->fname, td); 4147 error = namei(&nd); 4148 if (error != 0) 4149 return (error); 4150 NDFREE(&nd, NDF_ONLY_PNBUF); 4151 vp = nd.ni_vp; 4152 bzero(&fh, sizeof(fh)); 4153 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4154 error = VOP_VPTOFH(vp, &fh.fh_fid); 4155 vput(vp); 4156 if (error == 0) 4157 error = copyout(&fh, uap->fhp, sizeof (fh)); 4158 return (error); 4159 } 4160 4161 #ifndef _SYS_SYSPROTO_H_ 4162 struct getfh_args { 4163 char *fname; 4164 fhandle_t *fhp; 4165 }; 4166 #endif 4167 int 4168 sys_getfh(td, uap) 4169 struct thread *td; 4170 register struct getfh_args *uap; 4171 { 4172 struct nameidata nd; 4173 fhandle_t fh; 4174 register struct vnode *vp; 4175 int error; 4176 4177 error = priv_check(td, PRIV_VFS_GETFH); 4178 if (error != 0) 4179 return (error); 4180 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4181 uap->fname, td); 4182 error = namei(&nd); 4183 if (error != 0) 4184 return (error); 4185 NDFREE(&nd, NDF_ONLY_PNBUF); 4186 vp = nd.ni_vp; 4187 bzero(&fh, sizeof(fh)); 4188 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4189 error = VOP_VPTOFH(vp, &fh.fh_fid); 4190 vput(vp); 4191 if (error == 0) 4192 error = copyout(&fh, uap->fhp, sizeof (fh)); 4193 return (error); 4194 } 4195 4196 /* 4197 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4198 * open descriptor. 4199 * 4200 * warning: do not remove the priv_check() call or this becomes one giant 4201 * security hole. 4202 */ 4203 #ifndef _SYS_SYSPROTO_H_ 4204 struct fhopen_args { 4205 const struct fhandle *u_fhp; 4206 int flags; 4207 }; 4208 #endif 4209 int 4210 sys_fhopen(td, uap) 4211 struct thread *td; 4212 struct fhopen_args /* { 4213 const struct fhandle *u_fhp; 4214 int flags; 4215 } */ *uap; 4216 { 4217 struct mount *mp; 4218 struct vnode *vp; 4219 struct fhandle fhp; 4220 struct file *fp; 4221 int fmode, error; 4222 int indx; 4223 4224 error = priv_check(td, PRIV_VFS_FHOPEN); 4225 if (error != 0) 4226 return (error); 4227 indx = -1; 4228 fmode = FFLAGS(uap->flags); 4229 /* why not allow a non-read/write open for our lockd? */ 4230 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4231 return (EINVAL); 4232 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4233 if (error != 0) 4234 return(error); 4235 /* find the mount point */ 4236 mp = vfs_busyfs(&fhp.fh_fsid); 4237 if (mp == NULL) 4238 return (ESTALE); 4239 /* now give me my vnode, it gets returned to me locked */ 4240 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4241 vfs_unbusy(mp); 4242 if (error != 0) 4243 return (error); 4244 4245 error = falloc_noinstall(td, &fp); 4246 if (error != 0) { 4247 vput(vp); 4248 return (error); 4249 } 4250 /* 4251 * An extra reference on `fp' has been held for us by 4252 * falloc_noinstall(). 4253 */ 4254 4255 #ifdef INVARIANTS 4256 td->td_dupfd = -1; 4257 #endif 4258 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4259 if (error != 0) { 4260 KASSERT(fp->f_ops == &badfileops, 4261 ("VOP_OPEN in fhopen() set f_ops")); 4262 KASSERT(td->td_dupfd < 0, 4263 ("fhopen() encountered fdopen()")); 4264 4265 vput(vp); 4266 goto bad; 4267 } 4268 #ifdef INVARIANTS 4269 td->td_dupfd = 0; 4270 #endif 4271 fp->f_vnode = vp; 4272 fp->f_seqcount = 1; 4273 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4274 &vnops); 4275 VOP_UNLOCK(vp, 0); 4276 if ((fmode & O_TRUNC) != 0) { 4277 error = fo_truncate(fp, 0, td->td_ucred, td); 4278 if (error != 0) 4279 goto bad; 4280 } 4281 4282 error = finstall(td, fp, &indx, fmode, NULL); 4283 bad: 4284 fdrop(fp, td); 4285 td->td_retval[0] = indx; 4286 return (error); 4287 } 4288 4289 /* 4290 * Stat an (NFS) file handle. 4291 */ 4292 #ifndef _SYS_SYSPROTO_H_ 4293 struct fhstat_args { 4294 struct fhandle *u_fhp; 4295 struct stat *sb; 4296 }; 4297 #endif 4298 int 4299 sys_fhstat(td, uap) 4300 struct thread *td; 4301 register struct fhstat_args /* { 4302 struct fhandle *u_fhp; 4303 struct stat *sb; 4304 } */ *uap; 4305 { 4306 struct stat sb; 4307 struct fhandle fh; 4308 int error; 4309 4310 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4311 if (error != 0) 4312 return (error); 4313 error = kern_fhstat(td, fh, &sb); 4314 if (error == 0) 4315 error = copyout(&sb, uap->sb, sizeof(sb)); 4316 return (error); 4317 } 4318 4319 int 4320 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4321 { 4322 struct mount *mp; 4323 struct vnode *vp; 4324 int error; 4325 4326 error = priv_check(td, PRIV_VFS_FHSTAT); 4327 if (error != 0) 4328 return (error); 4329 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4330 return (ESTALE); 4331 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4332 vfs_unbusy(mp); 4333 if (error != 0) 4334 return (error); 4335 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4336 vput(vp); 4337 return (error); 4338 } 4339 4340 /* 4341 * Implement fstatfs() for (NFS) file handles. 4342 */ 4343 #ifndef _SYS_SYSPROTO_H_ 4344 struct fhstatfs_args { 4345 struct fhandle *u_fhp; 4346 struct statfs *buf; 4347 }; 4348 #endif 4349 int 4350 sys_fhstatfs(td, uap) 4351 struct thread *td; 4352 struct fhstatfs_args /* { 4353 struct fhandle *u_fhp; 4354 struct statfs *buf; 4355 } */ *uap; 4356 { 4357 struct statfs sf; 4358 fhandle_t fh; 4359 int error; 4360 4361 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4362 if (error != 0) 4363 return (error); 4364 error = kern_fhstatfs(td, fh, &sf); 4365 if (error != 0) 4366 return (error); 4367 return (copyout(&sf, uap->buf, sizeof(sf))); 4368 } 4369 4370 int 4371 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4372 { 4373 struct statfs *sp; 4374 struct mount *mp; 4375 struct vnode *vp; 4376 int error; 4377 4378 error = priv_check(td, PRIV_VFS_FHSTATFS); 4379 if (error != 0) 4380 return (error); 4381 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4382 return (ESTALE); 4383 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4384 if (error != 0) { 4385 vfs_unbusy(mp); 4386 return (error); 4387 } 4388 vput(vp); 4389 error = prison_canseemount(td->td_ucred, mp); 4390 if (error != 0) 4391 goto out; 4392 #ifdef MAC 4393 error = mac_mount_check_stat(td->td_ucred, mp); 4394 if (error != 0) 4395 goto out; 4396 #endif 4397 /* 4398 * Set these in case the underlying filesystem fails to do so. 4399 */ 4400 sp = &mp->mnt_stat; 4401 sp->f_version = STATFS_VERSION; 4402 sp->f_namemax = NAME_MAX; 4403 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4404 error = VFS_STATFS(mp, sp); 4405 if (error == 0) 4406 *buf = *sp; 4407 out: 4408 vfs_unbusy(mp); 4409 return (error); 4410 } 4411 4412 int 4413 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4414 { 4415 struct file *fp; 4416 struct mount *mp; 4417 struct vnode *vp; 4418 cap_rights_t rights; 4419 off_t olen, ooffset; 4420 int error; 4421 4422 if (offset < 0 || len <= 0) 4423 return (EINVAL); 4424 /* Check for wrap. */ 4425 if (offset > OFF_MAX - len) 4426 return (EFBIG); 4427 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4428 if (error != 0) 4429 return (error); 4430 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4431 error = ESPIPE; 4432 goto out; 4433 } 4434 if ((fp->f_flag & FWRITE) == 0) { 4435 error = EBADF; 4436 goto out; 4437 } 4438 if (fp->f_type != DTYPE_VNODE) { 4439 error = ENODEV; 4440 goto out; 4441 } 4442 vp = fp->f_vnode; 4443 if (vp->v_type != VREG) { 4444 error = ENODEV; 4445 goto out; 4446 } 4447 4448 /* Allocating blocks may take a long time, so iterate. */ 4449 for (;;) { 4450 olen = len; 4451 ooffset = offset; 4452 4453 bwillwrite(); 4454 mp = NULL; 4455 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4456 if (error != 0) 4457 break; 4458 error = vn_lock(vp, LK_EXCLUSIVE); 4459 if (error != 0) { 4460 vn_finished_write(mp); 4461 break; 4462 } 4463 #ifdef MAC 4464 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4465 if (error == 0) 4466 #endif 4467 error = VOP_ALLOCATE(vp, &offset, &len); 4468 VOP_UNLOCK(vp, 0); 4469 vn_finished_write(mp); 4470 4471 if (olen + ooffset != offset + len) { 4472 panic("offset + len changed from %jx/%jx to %jx/%jx", 4473 ooffset, olen, offset, len); 4474 } 4475 if (error != 0 || len == 0) 4476 break; 4477 KASSERT(olen > len, ("Iteration did not make progress?")); 4478 maybe_yield(); 4479 } 4480 out: 4481 fdrop(fp, td); 4482 return (error); 4483 } 4484 4485 int 4486 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4487 { 4488 4489 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4490 uap->len); 4491 return (0); 4492 } 4493 4494 /* 4495 * Unlike madvise(2), we do not make a best effort to remember every 4496 * possible caching hint. Instead, we remember the last setting with 4497 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4498 * region of any current setting. 4499 */ 4500 int 4501 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4502 int advice) 4503 { 4504 struct fadvise_info *fa, *new; 4505 struct file *fp; 4506 struct vnode *vp; 4507 cap_rights_t rights; 4508 off_t end; 4509 int error; 4510 4511 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4512 return (EINVAL); 4513 switch (advice) { 4514 case POSIX_FADV_SEQUENTIAL: 4515 case POSIX_FADV_RANDOM: 4516 case POSIX_FADV_NOREUSE: 4517 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4518 break; 4519 case POSIX_FADV_NORMAL: 4520 case POSIX_FADV_WILLNEED: 4521 case POSIX_FADV_DONTNEED: 4522 new = NULL; 4523 break; 4524 default: 4525 return (EINVAL); 4526 } 4527 /* XXX: CAP_POSIX_FADVISE? */ 4528 error = fget(td, fd, cap_rights_init(&rights), &fp); 4529 if (error != 0) 4530 goto out; 4531 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4532 error = ESPIPE; 4533 goto out; 4534 } 4535 if (fp->f_type != DTYPE_VNODE) { 4536 error = ENODEV; 4537 goto out; 4538 } 4539 vp = fp->f_vnode; 4540 if (vp->v_type != VREG) { 4541 error = ENODEV; 4542 goto out; 4543 } 4544 if (len == 0) 4545 end = OFF_MAX; 4546 else 4547 end = offset + len - 1; 4548 switch (advice) { 4549 case POSIX_FADV_SEQUENTIAL: 4550 case POSIX_FADV_RANDOM: 4551 case POSIX_FADV_NOREUSE: 4552 /* 4553 * Try to merge any existing non-standard region with 4554 * this new region if possible, otherwise create a new 4555 * non-standard region for this request. 4556 */ 4557 mtx_pool_lock(mtxpool_sleep, fp); 4558 fa = fp->f_advice; 4559 if (fa != NULL && fa->fa_advice == advice && 4560 ((fa->fa_start <= end && fa->fa_end >= offset) || 4561 (end != OFF_MAX && fa->fa_start == end + 1) || 4562 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4563 if (offset < fa->fa_start) 4564 fa->fa_start = offset; 4565 if (end > fa->fa_end) 4566 fa->fa_end = end; 4567 } else { 4568 new->fa_advice = advice; 4569 new->fa_start = offset; 4570 new->fa_end = end; 4571 new->fa_prevstart = 0; 4572 new->fa_prevend = 0; 4573 fp->f_advice = new; 4574 new = fa; 4575 } 4576 mtx_pool_unlock(mtxpool_sleep, fp); 4577 break; 4578 case POSIX_FADV_NORMAL: 4579 /* 4580 * If a the "normal" region overlaps with an existing 4581 * non-standard region, trim or remove the 4582 * non-standard region. 4583 */ 4584 mtx_pool_lock(mtxpool_sleep, fp); 4585 fa = fp->f_advice; 4586 if (fa != NULL) { 4587 if (offset <= fa->fa_start && end >= fa->fa_end) { 4588 new = fa; 4589 fp->f_advice = NULL; 4590 } else if (offset <= fa->fa_start && 4591 end >= fa->fa_start) 4592 fa->fa_start = end + 1; 4593 else if (offset <= fa->fa_end && end >= fa->fa_end) 4594 fa->fa_end = offset - 1; 4595 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4596 /* 4597 * If the "normal" region is a middle 4598 * portion of the existing 4599 * non-standard region, just remove 4600 * the whole thing rather than picking 4601 * one side or the other to 4602 * preserve. 4603 */ 4604 new = fa; 4605 fp->f_advice = NULL; 4606 } 4607 } 4608 mtx_pool_unlock(mtxpool_sleep, fp); 4609 break; 4610 case POSIX_FADV_WILLNEED: 4611 case POSIX_FADV_DONTNEED: 4612 error = VOP_ADVISE(vp, offset, end, advice); 4613 break; 4614 } 4615 out: 4616 if (fp != NULL) 4617 fdrop(fp, td); 4618 free(new, M_FADVISE); 4619 return (error); 4620 } 4621 4622 int 4623 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4624 { 4625 4626 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4627 uap->len, uap->advice); 4628 return (0); 4629 } 4630