1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 98 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 99 static int kern_chflagsat(struct thread *td, int fd, const char *path, 100 enum uio_seg pathseg, u_long flags, int atflag); 101 static int setfflags(struct thread *td, struct vnode *, u_long); 102 static int setutimes(struct thread *td, struct vnode *, 103 const struct timespec *, int, int); 104 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 105 struct thread *td); 106 107 /* 108 * The module initialization routine for POSIX asynchronous I/O will 109 * set this to the version of AIO that it implements. (Zero means 110 * that it is not implemented.) This value is used here by pathconf() 111 * and in kern_descrip.c by fpathconf(). 112 */ 113 int async_io_version; 114 115 /* 116 * Sync each mounted filesystem. 117 */ 118 #ifndef _SYS_SYSPROTO_H_ 119 struct sync_args { 120 int dummy; 121 }; 122 #endif 123 /* ARGSUSED */ 124 int 125 sys_sync(td, uap) 126 struct thread *td; 127 struct sync_args *uap; 128 { 129 struct mount *mp, *nmp; 130 int save; 131 132 mtx_lock(&mountlist_mtx); 133 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 134 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 135 nmp = TAILQ_NEXT(mp, mnt_list); 136 continue; 137 } 138 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 139 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 140 save = curthread_pflags_set(TDP_SYNCIO); 141 vfs_msync(mp, MNT_NOWAIT); 142 VFS_SYNC(mp, MNT_NOWAIT); 143 curthread_pflags_restore(save); 144 vn_finished_write(mp); 145 } 146 mtx_lock(&mountlist_mtx); 147 nmp = TAILQ_NEXT(mp, mnt_list); 148 vfs_unbusy(mp); 149 } 150 mtx_unlock(&mountlist_mtx); 151 return (0); 152 } 153 154 /* 155 * Change filesystem quotas. 156 */ 157 #ifndef _SYS_SYSPROTO_H_ 158 struct quotactl_args { 159 char *path; 160 int cmd; 161 int uid; 162 caddr_t arg; 163 }; 164 #endif 165 int 166 sys_quotactl(td, uap) 167 struct thread *td; 168 register struct quotactl_args /* { 169 char *path; 170 int cmd; 171 int uid; 172 caddr_t arg; 173 } */ *uap; 174 { 175 struct mount *mp; 176 struct nameidata nd; 177 int error; 178 179 AUDIT_ARG_CMD(uap->cmd); 180 AUDIT_ARG_UID(uap->uid); 181 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 182 return (EPERM); 183 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 184 uap->path, td); 185 if ((error = namei(&nd)) != 0) 186 return (error); 187 NDFREE(&nd, NDF_ONLY_PNBUF); 188 mp = nd.ni_vp->v_mount; 189 vfs_ref(mp); 190 vput(nd.ni_vp); 191 error = vfs_busy(mp, 0); 192 vfs_rel(mp); 193 if (error != 0) 194 return (error); 195 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 196 197 /* 198 * Since quota on operation typically needs to open quota 199 * file, the Q_QUOTAON handler needs to unbusy the mount point 200 * before calling into namei. Otherwise, unmount might be 201 * started between two vfs_busy() invocations (first is our, 202 * second is from mount point cross-walk code in lookup()), 203 * causing deadlock. 204 * 205 * Require that Q_QUOTAON handles the vfs_busy() reference on 206 * its own, always returning with ubusied mount point. 207 */ 208 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 209 vfs_unbusy(mp); 210 return (error); 211 } 212 213 /* 214 * Used by statfs conversion routines to scale the block size up if 215 * necessary so that all of the block counts are <= 'max_size'. Note 216 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 217 * value of 'n'. 218 */ 219 void 220 statfs_scale_blocks(struct statfs *sf, long max_size) 221 { 222 uint64_t count; 223 int shift; 224 225 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 226 227 /* 228 * Attempt to scale the block counts to give a more accurate 229 * overview to userland of the ratio of free space to used 230 * space. To do this, find the largest block count and compute 231 * a divisor that lets it fit into a signed integer <= max_size. 232 */ 233 if (sf->f_bavail < 0) 234 count = -sf->f_bavail; 235 else 236 count = sf->f_bavail; 237 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 238 if (count <= max_size) 239 return; 240 241 count >>= flsl(max_size); 242 shift = 0; 243 while (count > 0) { 244 shift++; 245 count >>=1; 246 } 247 248 sf->f_bsize <<= shift; 249 sf->f_blocks >>= shift; 250 sf->f_bfree >>= shift; 251 sf->f_bavail >>= shift; 252 } 253 254 /* 255 * Get filesystem statistics. 256 */ 257 #ifndef _SYS_SYSPROTO_H_ 258 struct statfs_args { 259 char *path; 260 struct statfs *buf; 261 }; 262 #endif 263 int 264 sys_statfs(td, uap) 265 struct thread *td; 266 register struct statfs_args /* { 267 char *path; 268 struct statfs *buf; 269 } */ *uap; 270 { 271 struct statfs sf; 272 int error; 273 274 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 275 if (error == 0) 276 error = copyout(&sf, uap->buf, sizeof(sf)); 277 return (error); 278 } 279 280 int 281 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 282 struct statfs *buf) 283 { 284 struct mount *mp; 285 struct statfs *sp, sb; 286 struct nameidata nd; 287 int error; 288 289 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 290 pathseg, path, td); 291 error = namei(&nd); 292 if (error != 0) 293 return (error); 294 mp = nd.ni_vp->v_mount; 295 vfs_ref(mp); 296 NDFREE(&nd, NDF_ONLY_PNBUF); 297 vput(nd.ni_vp); 298 error = vfs_busy(mp, 0); 299 vfs_rel(mp); 300 if (error != 0) 301 return (error); 302 #ifdef MAC 303 error = mac_mount_check_stat(td->td_ucred, mp); 304 if (error != 0) 305 goto out; 306 #endif 307 /* 308 * Set these in case the underlying filesystem fails to do so. 309 */ 310 sp = &mp->mnt_stat; 311 sp->f_version = STATFS_VERSION; 312 sp->f_namemax = NAME_MAX; 313 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 314 error = VFS_STATFS(mp, sp); 315 if (error != 0) 316 goto out; 317 if (priv_check(td, PRIV_VFS_GENERATION)) { 318 bcopy(sp, &sb, sizeof(sb)); 319 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 320 prison_enforce_statfs(td->td_ucred, mp, &sb); 321 sp = &sb; 322 } 323 *buf = *sp; 324 out: 325 vfs_unbusy(mp); 326 return (error); 327 } 328 329 /* 330 * Get filesystem statistics. 331 */ 332 #ifndef _SYS_SYSPROTO_H_ 333 struct fstatfs_args { 334 int fd; 335 struct statfs *buf; 336 }; 337 #endif 338 int 339 sys_fstatfs(td, uap) 340 struct thread *td; 341 register struct fstatfs_args /* { 342 int fd; 343 struct statfs *buf; 344 } */ *uap; 345 { 346 struct statfs sf; 347 int error; 348 349 error = kern_fstatfs(td, uap->fd, &sf); 350 if (error == 0) 351 error = copyout(&sf, uap->buf, sizeof(sf)); 352 return (error); 353 } 354 355 int 356 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 357 { 358 struct file *fp; 359 struct mount *mp; 360 struct statfs *sp, sb; 361 struct vnode *vp; 362 cap_rights_t rights; 363 int error; 364 365 AUDIT_ARG_FD(fd); 366 error = getvnode(td->td_proc->p_fd, fd, 367 cap_rights_init(&rights, CAP_FSTATFS), &fp); 368 if (error != 0) 369 return (error); 370 vp = fp->f_vnode; 371 vn_lock(vp, LK_SHARED | LK_RETRY); 372 #ifdef AUDIT 373 AUDIT_ARG_VNODE1(vp); 374 #endif 375 mp = vp->v_mount; 376 if (mp) 377 vfs_ref(mp); 378 VOP_UNLOCK(vp, 0); 379 fdrop(fp, td); 380 if (mp == NULL) { 381 error = EBADF; 382 goto out; 383 } 384 error = vfs_busy(mp, 0); 385 vfs_rel(mp); 386 if (error != 0) 387 return (error); 388 #ifdef MAC 389 error = mac_mount_check_stat(td->td_ucred, mp); 390 if (error != 0) 391 goto out; 392 #endif 393 /* 394 * Set these in case the underlying filesystem fails to do so. 395 */ 396 sp = &mp->mnt_stat; 397 sp->f_version = STATFS_VERSION; 398 sp->f_namemax = NAME_MAX; 399 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 400 error = VFS_STATFS(mp, sp); 401 if (error != 0) 402 goto out; 403 if (priv_check(td, PRIV_VFS_GENERATION)) { 404 bcopy(sp, &sb, sizeof(sb)); 405 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 406 prison_enforce_statfs(td->td_ucred, mp, &sb); 407 sp = &sb; 408 } 409 *buf = *sp; 410 out: 411 if (mp) 412 vfs_unbusy(mp); 413 return (error); 414 } 415 416 /* 417 * Get statistics on all filesystems. 418 */ 419 #ifndef _SYS_SYSPROTO_H_ 420 struct getfsstat_args { 421 struct statfs *buf; 422 long bufsize; 423 int flags; 424 }; 425 #endif 426 int 427 sys_getfsstat(td, uap) 428 struct thread *td; 429 register struct getfsstat_args /* { 430 struct statfs *buf; 431 long bufsize; 432 int flags; 433 } */ *uap; 434 { 435 436 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 437 uap->flags)); 438 } 439 440 /* 441 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 442 * The caller is responsible for freeing memory which will be allocated 443 * in '*buf'. 444 */ 445 int 446 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 447 enum uio_seg bufseg, int flags) 448 { 449 struct mount *mp, *nmp; 450 struct statfs *sfsp, *sp, sb; 451 size_t count, maxcount; 452 int error; 453 454 maxcount = bufsize / sizeof(struct statfs); 455 if (bufsize == 0) 456 sfsp = NULL; 457 else if (bufseg == UIO_USERSPACE) 458 sfsp = *buf; 459 else /* if (bufseg == UIO_SYSSPACE) */ { 460 count = 0; 461 mtx_lock(&mountlist_mtx); 462 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 463 count++; 464 } 465 mtx_unlock(&mountlist_mtx); 466 if (maxcount > count) 467 maxcount = count; 468 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 469 M_WAITOK); 470 } 471 count = 0; 472 mtx_lock(&mountlist_mtx); 473 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 474 if (prison_canseemount(td->td_ucred, mp) != 0) { 475 nmp = TAILQ_NEXT(mp, mnt_list); 476 continue; 477 } 478 #ifdef MAC 479 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 continue; 482 } 483 #endif 484 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 continue; 487 } 488 if (sfsp && count < maxcount) { 489 sp = &mp->mnt_stat; 490 /* 491 * Set these in case the underlying filesystem 492 * fails to do so. 493 */ 494 sp->f_version = STATFS_VERSION; 495 sp->f_namemax = NAME_MAX; 496 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 497 /* 498 * If MNT_NOWAIT or MNT_LAZY is specified, do not 499 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 500 * overrides MNT_WAIT. 501 */ 502 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 503 (flags & MNT_WAIT)) && 504 (error = VFS_STATFS(mp, sp))) { 505 mtx_lock(&mountlist_mtx); 506 nmp = TAILQ_NEXT(mp, mnt_list); 507 vfs_unbusy(mp); 508 continue; 509 } 510 if (priv_check(td, PRIV_VFS_GENERATION)) { 511 bcopy(sp, &sb, sizeof(sb)); 512 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 513 prison_enforce_statfs(td->td_ucred, mp, &sb); 514 sp = &sb; 515 } 516 if (bufseg == UIO_SYSSPACE) 517 bcopy(sp, sfsp, sizeof(*sp)); 518 else /* if (bufseg == UIO_USERSPACE) */ { 519 error = copyout(sp, sfsp, sizeof(*sp)); 520 if (error != 0) { 521 vfs_unbusy(mp); 522 return (error); 523 } 524 } 525 sfsp++; 526 } 527 count++; 528 mtx_lock(&mountlist_mtx); 529 nmp = TAILQ_NEXT(mp, mnt_list); 530 vfs_unbusy(mp); 531 } 532 mtx_unlock(&mountlist_mtx); 533 if (sfsp && count > maxcount) 534 td->td_retval[0] = maxcount; 535 else 536 td->td_retval[0] = count; 537 return (0); 538 } 539 540 #ifdef COMPAT_FREEBSD4 541 /* 542 * Get old format filesystem statistics. 543 */ 544 static void cvtstatfs(struct statfs *, struct ostatfs *); 545 546 #ifndef _SYS_SYSPROTO_H_ 547 struct freebsd4_statfs_args { 548 char *path; 549 struct ostatfs *buf; 550 }; 551 #endif 552 int 553 freebsd4_statfs(td, uap) 554 struct thread *td; 555 struct freebsd4_statfs_args /* { 556 char *path; 557 struct ostatfs *buf; 558 } */ *uap; 559 { 560 struct ostatfs osb; 561 struct statfs sf; 562 int error; 563 564 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 565 if (error != 0) 566 return (error); 567 cvtstatfs(&sf, &osb); 568 return (copyout(&osb, uap->buf, sizeof(osb))); 569 } 570 571 /* 572 * Get filesystem statistics. 573 */ 574 #ifndef _SYS_SYSPROTO_H_ 575 struct freebsd4_fstatfs_args { 576 int fd; 577 struct ostatfs *buf; 578 }; 579 #endif 580 int 581 freebsd4_fstatfs(td, uap) 582 struct thread *td; 583 struct freebsd4_fstatfs_args /* { 584 int fd; 585 struct ostatfs *buf; 586 } */ *uap; 587 { 588 struct ostatfs osb; 589 struct statfs sf; 590 int error; 591 592 error = kern_fstatfs(td, uap->fd, &sf); 593 if (error != 0) 594 return (error); 595 cvtstatfs(&sf, &osb); 596 return (copyout(&osb, uap->buf, sizeof(osb))); 597 } 598 599 /* 600 * Get statistics on all filesystems. 601 */ 602 #ifndef _SYS_SYSPROTO_H_ 603 struct freebsd4_getfsstat_args { 604 struct ostatfs *buf; 605 long bufsize; 606 int flags; 607 }; 608 #endif 609 int 610 freebsd4_getfsstat(td, uap) 611 struct thread *td; 612 register struct freebsd4_getfsstat_args /* { 613 struct ostatfs *buf; 614 long bufsize; 615 int flags; 616 } */ *uap; 617 { 618 struct statfs *buf, *sp; 619 struct ostatfs osb; 620 size_t count, size; 621 int error; 622 623 count = uap->bufsize / sizeof(struct ostatfs); 624 size = count * sizeof(struct statfs); 625 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 626 if (size > 0) { 627 count = td->td_retval[0]; 628 sp = buf; 629 while (count > 0 && error == 0) { 630 cvtstatfs(sp, &osb); 631 error = copyout(&osb, uap->buf, sizeof(osb)); 632 sp++; 633 uap->buf++; 634 count--; 635 } 636 free(buf, M_TEMP); 637 } 638 return (error); 639 } 640 641 /* 642 * Implement fstatfs() for (NFS) file handles. 643 */ 644 #ifndef _SYS_SYSPROTO_H_ 645 struct freebsd4_fhstatfs_args { 646 struct fhandle *u_fhp; 647 struct ostatfs *buf; 648 }; 649 #endif 650 int 651 freebsd4_fhstatfs(td, uap) 652 struct thread *td; 653 struct freebsd4_fhstatfs_args /* { 654 struct fhandle *u_fhp; 655 struct ostatfs *buf; 656 } */ *uap; 657 { 658 struct ostatfs osb; 659 struct statfs sf; 660 fhandle_t fh; 661 int error; 662 663 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 664 if (error != 0) 665 return (error); 666 error = kern_fhstatfs(td, fh, &sf); 667 if (error != 0) 668 return (error); 669 cvtstatfs(&sf, &osb); 670 return (copyout(&osb, uap->buf, sizeof(osb))); 671 } 672 673 /* 674 * Convert a new format statfs structure to an old format statfs structure. 675 */ 676 static void 677 cvtstatfs(nsp, osp) 678 struct statfs *nsp; 679 struct ostatfs *osp; 680 { 681 682 statfs_scale_blocks(nsp, LONG_MAX); 683 bzero(osp, sizeof(*osp)); 684 osp->f_bsize = nsp->f_bsize; 685 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 686 osp->f_blocks = nsp->f_blocks; 687 osp->f_bfree = nsp->f_bfree; 688 osp->f_bavail = nsp->f_bavail; 689 osp->f_files = MIN(nsp->f_files, LONG_MAX); 690 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 691 osp->f_owner = nsp->f_owner; 692 osp->f_type = nsp->f_type; 693 osp->f_flags = nsp->f_flags; 694 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 695 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 696 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 697 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 698 strlcpy(osp->f_fstypename, nsp->f_fstypename, 699 MIN(MFSNAMELEN, OMFSNAMELEN)); 700 strlcpy(osp->f_mntonname, nsp->f_mntonname, 701 MIN(MNAMELEN, OMNAMELEN)); 702 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 703 MIN(MNAMELEN, OMNAMELEN)); 704 osp->f_fsid = nsp->f_fsid; 705 } 706 #endif /* COMPAT_FREEBSD4 */ 707 708 /* 709 * Change current working directory to a given file descriptor. 710 */ 711 #ifndef _SYS_SYSPROTO_H_ 712 struct fchdir_args { 713 int fd; 714 }; 715 #endif 716 int 717 sys_fchdir(td, uap) 718 struct thread *td; 719 struct fchdir_args /* { 720 int fd; 721 } */ *uap; 722 { 723 register struct filedesc *fdp = td->td_proc->p_fd; 724 struct vnode *vp, *tdp, *vpold; 725 struct mount *mp; 726 struct file *fp; 727 cap_rights_t rights; 728 int error; 729 730 AUDIT_ARG_FD(uap->fd); 731 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 732 &fp); 733 if (error != 0) 734 return (error); 735 vp = fp->f_vnode; 736 VREF(vp); 737 fdrop(fp, td); 738 vn_lock(vp, LK_SHARED | LK_RETRY); 739 AUDIT_ARG_VNODE1(vp); 740 error = change_dir(vp, td); 741 while (!error && (mp = vp->v_mountedhere) != NULL) { 742 if (vfs_busy(mp, 0)) 743 continue; 744 error = VFS_ROOT(mp, LK_SHARED, &tdp); 745 vfs_unbusy(mp); 746 if (error != 0) 747 break; 748 vput(vp); 749 vp = tdp; 750 } 751 if (error != 0) { 752 vput(vp); 753 return (error); 754 } 755 VOP_UNLOCK(vp, 0); 756 FILEDESC_XLOCK(fdp); 757 vpold = fdp->fd_cdir; 758 fdp->fd_cdir = vp; 759 FILEDESC_XUNLOCK(fdp); 760 vrele(vpold); 761 return (0); 762 } 763 764 /* 765 * Change current working directory (``.''). 766 */ 767 #ifndef _SYS_SYSPROTO_H_ 768 struct chdir_args { 769 char *path; 770 }; 771 #endif 772 int 773 sys_chdir(td, uap) 774 struct thread *td; 775 struct chdir_args /* { 776 char *path; 777 } */ *uap; 778 { 779 780 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 781 } 782 783 int 784 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 785 { 786 register struct filedesc *fdp = td->td_proc->p_fd; 787 struct nameidata nd; 788 struct vnode *vp; 789 int error; 790 791 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 792 pathseg, path, td); 793 if ((error = namei(&nd)) != 0) 794 return (error); 795 if ((error = change_dir(nd.ni_vp, td)) != 0) { 796 vput(nd.ni_vp); 797 NDFREE(&nd, NDF_ONLY_PNBUF); 798 return (error); 799 } 800 VOP_UNLOCK(nd.ni_vp, 0); 801 NDFREE(&nd, NDF_ONLY_PNBUF); 802 FILEDESC_XLOCK(fdp); 803 vp = fdp->fd_cdir; 804 fdp->fd_cdir = nd.ni_vp; 805 FILEDESC_XUNLOCK(fdp); 806 vrele(vp); 807 return (0); 808 } 809 810 /* 811 * Helper function for raised chroot(2) security function: Refuse if 812 * any filedescriptors are open directories. 813 */ 814 static int 815 chroot_refuse_vdir_fds(fdp) 816 struct filedesc *fdp; 817 { 818 struct vnode *vp; 819 struct file *fp; 820 int fd; 821 822 FILEDESC_LOCK_ASSERT(fdp); 823 824 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 825 fp = fget_locked(fdp, fd); 826 if (fp == NULL) 827 continue; 828 if (fp->f_type == DTYPE_VNODE) { 829 vp = fp->f_vnode; 830 if (vp->v_type == VDIR) 831 return (EPERM); 832 } 833 } 834 return (0); 835 } 836 837 /* 838 * This sysctl determines if we will allow a process to chroot(2) if it 839 * has a directory open: 840 * 0: disallowed for all processes. 841 * 1: allowed for processes that were not already chroot(2)'ed. 842 * 2: allowed for all processes. 843 */ 844 845 static int chroot_allow_open_directories = 1; 846 847 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 848 &chroot_allow_open_directories, 0, 849 "Allow a process to chroot(2) if it has a directory open"); 850 851 /* 852 * Change notion of root (``/'') directory. 853 */ 854 #ifndef _SYS_SYSPROTO_H_ 855 struct chroot_args { 856 char *path; 857 }; 858 #endif 859 int 860 sys_chroot(td, uap) 861 struct thread *td; 862 struct chroot_args /* { 863 char *path; 864 } */ *uap; 865 { 866 struct nameidata nd; 867 int error; 868 869 error = priv_check(td, PRIV_VFS_CHROOT); 870 if (error != 0) 871 return (error); 872 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 873 UIO_USERSPACE, uap->path, td); 874 error = namei(&nd); 875 if (error != 0) 876 goto error; 877 error = change_dir(nd.ni_vp, td); 878 if (error != 0) 879 goto e_vunlock; 880 #ifdef MAC 881 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 882 if (error != 0) 883 goto e_vunlock; 884 #endif 885 VOP_UNLOCK(nd.ni_vp, 0); 886 error = change_root(nd.ni_vp, td); 887 vrele(nd.ni_vp); 888 NDFREE(&nd, NDF_ONLY_PNBUF); 889 return (error); 890 e_vunlock: 891 vput(nd.ni_vp); 892 error: 893 NDFREE(&nd, NDF_ONLY_PNBUF); 894 return (error); 895 } 896 897 /* 898 * Common routine for chroot and chdir. Callers must provide a locked vnode 899 * instance. 900 */ 901 int 902 change_dir(vp, td) 903 struct vnode *vp; 904 struct thread *td; 905 { 906 #ifdef MAC 907 int error; 908 #endif 909 910 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 911 if (vp->v_type != VDIR) 912 return (ENOTDIR); 913 #ifdef MAC 914 error = mac_vnode_check_chdir(td->td_ucred, vp); 915 if (error != 0) 916 return (error); 917 #endif 918 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 919 } 920 921 /* 922 * Common routine for kern_chroot() and jail_attach(). The caller is 923 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 924 * authorize this operation. 925 */ 926 int 927 change_root(vp, td) 928 struct vnode *vp; 929 struct thread *td; 930 { 931 struct filedesc *fdp; 932 struct vnode *oldvp; 933 int error; 934 935 fdp = td->td_proc->p_fd; 936 FILEDESC_XLOCK(fdp); 937 if (chroot_allow_open_directories == 0 || 938 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 939 error = chroot_refuse_vdir_fds(fdp); 940 if (error != 0) { 941 FILEDESC_XUNLOCK(fdp); 942 return (error); 943 } 944 } 945 oldvp = fdp->fd_rdir; 946 fdp->fd_rdir = vp; 947 VREF(fdp->fd_rdir); 948 if (!fdp->fd_jdir) { 949 fdp->fd_jdir = vp; 950 VREF(fdp->fd_jdir); 951 } 952 FILEDESC_XUNLOCK(fdp); 953 vrele(oldvp); 954 return (0); 955 } 956 957 static __inline void 958 flags_to_rights(int flags, cap_rights_t *rightsp) 959 { 960 961 if (flags & O_EXEC) { 962 cap_rights_set(rightsp, CAP_FEXECVE); 963 } else { 964 switch ((flags & O_ACCMODE)) { 965 case O_RDONLY: 966 cap_rights_set(rightsp, CAP_READ); 967 break; 968 case O_RDWR: 969 cap_rights_set(rightsp, CAP_READ); 970 /* FALLTHROUGH */ 971 case O_WRONLY: 972 cap_rights_set(rightsp, CAP_WRITE); 973 if (!(flags & (O_APPEND | O_TRUNC))) 974 cap_rights_set(rightsp, CAP_SEEK); 975 break; 976 } 977 } 978 979 if (flags & O_CREAT) 980 cap_rights_set(rightsp, CAP_CREATE); 981 982 if (flags & O_TRUNC) 983 cap_rights_set(rightsp, CAP_FTRUNCATE); 984 985 if (flags & (O_SYNC | O_FSYNC)) 986 cap_rights_set(rightsp, CAP_FSYNC); 987 988 if (flags & (O_EXLOCK | O_SHLOCK)) 989 cap_rights_set(rightsp, CAP_FLOCK); 990 } 991 992 /* 993 * Check permissions, allocate an open file structure, and call the device 994 * open routine if any. 995 */ 996 #ifndef _SYS_SYSPROTO_H_ 997 struct open_args { 998 char *path; 999 int flags; 1000 int mode; 1001 }; 1002 #endif 1003 int 1004 sys_open(td, uap) 1005 struct thread *td; 1006 register struct open_args /* { 1007 char *path; 1008 int flags; 1009 int mode; 1010 } */ *uap; 1011 { 1012 1013 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1014 uap->flags, uap->mode)); 1015 } 1016 1017 #ifndef _SYS_SYSPROTO_H_ 1018 struct openat_args { 1019 int fd; 1020 char *path; 1021 int flag; 1022 int mode; 1023 }; 1024 #endif 1025 int 1026 sys_openat(struct thread *td, struct openat_args *uap) 1027 { 1028 1029 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1030 uap->mode)); 1031 } 1032 1033 int 1034 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1035 int flags, int mode) 1036 { 1037 struct proc *p = td->td_proc; 1038 struct filedesc *fdp = p->p_fd; 1039 struct file *fp; 1040 struct vnode *vp; 1041 struct nameidata nd; 1042 cap_rights_t rights; 1043 int cmode, error, indx; 1044 1045 indx = -1; 1046 1047 AUDIT_ARG_FFLAGS(flags); 1048 AUDIT_ARG_MODE(mode); 1049 /* XXX: audit dirfd */ 1050 cap_rights_init(&rights, CAP_LOOKUP); 1051 flags_to_rights(flags, &rights); 1052 /* 1053 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1054 * may be specified. 1055 */ 1056 if (flags & O_EXEC) { 1057 if (flags & O_ACCMODE) 1058 return (EINVAL); 1059 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1060 return (EINVAL); 1061 } else { 1062 flags = FFLAGS(flags); 1063 } 1064 1065 /* 1066 * Allocate the file descriptor, but don't install a descriptor yet. 1067 */ 1068 error = falloc_noinstall(td, &fp); 1069 if (error != 0) 1070 return (error); 1071 /* 1072 * An extra reference on `fp' has been held for us by 1073 * falloc_noinstall(). 1074 */ 1075 /* Set the flags early so the finit in devfs can pick them up. */ 1076 fp->f_flag = flags & FMASK; 1077 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1078 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1079 &rights, td); 1080 td->td_dupfd = -1; /* XXX check for fdopen */ 1081 error = vn_open(&nd, &flags, cmode, fp); 1082 if (error != 0) { 1083 /* 1084 * If the vn_open replaced the method vector, something 1085 * wonderous happened deep below and we just pass it up 1086 * pretending we know what we do. 1087 */ 1088 if (error == ENXIO && fp->f_ops != &badfileops) 1089 goto success; 1090 1091 /* 1092 * Handle special fdopen() case. bleh. 1093 * 1094 * Don't do this for relative (capability) lookups; we don't 1095 * understand exactly what would happen, and we don't think 1096 * that it ever should. 1097 */ 1098 if (nd.ni_strictrelative == 0 && 1099 (error == ENODEV || error == ENXIO) && 1100 td->td_dupfd >= 0) { 1101 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1102 &indx); 1103 if (error == 0) 1104 goto success; 1105 } 1106 1107 goto bad; 1108 } 1109 td->td_dupfd = 0; 1110 NDFREE(&nd, NDF_ONLY_PNBUF); 1111 vp = nd.ni_vp; 1112 1113 /* 1114 * Store the vnode, for any f_type. Typically, the vnode use 1115 * count is decremented by direct call to vn_closefile() for 1116 * files that switched type in the cdevsw fdopen() method. 1117 */ 1118 fp->f_vnode = vp; 1119 /* 1120 * If the file wasn't claimed by devfs bind it to the normal 1121 * vnode operations here. 1122 */ 1123 if (fp->f_ops == &badfileops) { 1124 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1125 fp->f_seqcount = 1; 1126 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1127 DTYPE_VNODE, vp, &vnops); 1128 } 1129 1130 VOP_UNLOCK(vp, 0); 1131 if (flags & O_TRUNC) { 1132 error = fo_truncate(fp, 0, td->td_ucred, td); 1133 if (error != 0) 1134 goto bad; 1135 } 1136 success: 1137 /* 1138 * If we haven't already installed the FD (for dupfdopen), do so now. 1139 */ 1140 if (indx == -1) { 1141 struct filecaps *fcaps; 1142 1143 #ifdef CAPABILITIES 1144 if (nd.ni_strictrelative == 1) 1145 fcaps = &nd.ni_filecaps; 1146 else 1147 #endif 1148 fcaps = NULL; 1149 error = finstall(td, fp, &indx, flags, fcaps); 1150 /* On success finstall() consumes fcaps. */ 1151 if (error != 0) { 1152 filecaps_free(&nd.ni_filecaps); 1153 goto bad; 1154 } 1155 } else { 1156 filecaps_free(&nd.ni_filecaps); 1157 } 1158 1159 /* 1160 * Release our private reference, leaving the one associated with 1161 * the descriptor table intact. 1162 */ 1163 fdrop(fp, td); 1164 td->td_retval[0] = indx; 1165 return (0); 1166 bad: 1167 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1168 fdrop(fp, td); 1169 return (error); 1170 } 1171 1172 #ifdef COMPAT_43 1173 /* 1174 * Create a file. 1175 */ 1176 #ifndef _SYS_SYSPROTO_H_ 1177 struct ocreat_args { 1178 char *path; 1179 int mode; 1180 }; 1181 #endif 1182 int 1183 ocreat(td, uap) 1184 struct thread *td; 1185 register struct ocreat_args /* { 1186 char *path; 1187 int mode; 1188 } */ *uap; 1189 { 1190 1191 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1192 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1193 } 1194 #endif /* COMPAT_43 */ 1195 1196 /* 1197 * Create a special file. 1198 */ 1199 #ifndef _SYS_SYSPROTO_H_ 1200 struct mknod_args { 1201 char *path; 1202 int mode; 1203 int dev; 1204 }; 1205 #endif 1206 int 1207 sys_mknod(td, uap) 1208 struct thread *td; 1209 register struct mknod_args /* { 1210 char *path; 1211 int mode; 1212 int dev; 1213 } */ *uap; 1214 { 1215 1216 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1217 uap->mode, uap->dev)); 1218 } 1219 1220 #ifndef _SYS_SYSPROTO_H_ 1221 struct mknodat_args { 1222 int fd; 1223 char *path; 1224 mode_t mode; 1225 dev_t dev; 1226 }; 1227 #endif 1228 int 1229 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1230 { 1231 1232 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1233 uap->dev)); 1234 } 1235 1236 int 1237 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1238 int mode, int dev) 1239 { 1240 struct vnode *vp; 1241 struct mount *mp; 1242 struct vattr vattr; 1243 struct nameidata nd; 1244 cap_rights_t rights; 1245 int error, whiteout = 0; 1246 1247 AUDIT_ARG_MODE(mode); 1248 AUDIT_ARG_DEV(dev); 1249 switch (mode & S_IFMT) { 1250 case S_IFCHR: 1251 case S_IFBLK: 1252 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1253 break; 1254 case S_IFMT: 1255 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1256 break; 1257 case S_IFWHT: 1258 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1259 break; 1260 case S_IFIFO: 1261 if (dev == 0) 1262 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1263 /* FALLTHROUGH */ 1264 default: 1265 error = EINVAL; 1266 break; 1267 } 1268 if (error != 0) 1269 return (error); 1270 restart: 1271 bwillwrite(); 1272 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1273 pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td); 1274 if ((error = namei(&nd)) != 0) 1275 return (error); 1276 vp = nd.ni_vp; 1277 if (vp != NULL) { 1278 NDFREE(&nd, NDF_ONLY_PNBUF); 1279 if (vp == nd.ni_dvp) 1280 vrele(nd.ni_dvp); 1281 else 1282 vput(nd.ni_dvp); 1283 vrele(vp); 1284 return (EEXIST); 1285 } else { 1286 VATTR_NULL(&vattr); 1287 vattr.va_mode = (mode & ALLPERMS) & 1288 ~td->td_proc->p_fd->fd_cmask; 1289 vattr.va_rdev = dev; 1290 whiteout = 0; 1291 1292 switch (mode & S_IFMT) { 1293 case S_IFMT: /* used by badsect to flag bad sectors */ 1294 vattr.va_type = VBAD; 1295 break; 1296 case S_IFCHR: 1297 vattr.va_type = VCHR; 1298 break; 1299 case S_IFBLK: 1300 vattr.va_type = VBLK; 1301 break; 1302 case S_IFWHT: 1303 whiteout = 1; 1304 break; 1305 default: 1306 panic("kern_mknod: invalid mode"); 1307 } 1308 } 1309 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1310 NDFREE(&nd, NDF_ONLY_PNBUF); 1311 vput(nd.ni_dvp); 1312 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1313 return (error); 1314 goto restart; 1315 } 1316 #ifdef MAC 1317 if (error == 0 && !whiteout) 1318 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1319 &nd.ni_cnd, &vattr); 1320 #endif 1321 if (error == 0) { 1322 if (whiteout) 1323 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1324 else { 1325 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1326 &nd.ni_cnd, &vattr); 1327 if (error == 0) 1328 vput(nd.ni_vp); 1329 } 1330 } 1331 NDFREE(&nd, NDF_ONLY_PNBUF); 1332 vput(nd.ni_dvp); 1333 vn_finished_write(mp); 1334 return (error); 1335 } 1336 1337 /* 1338 * Create a named pipe. 1339 */ 1340 #ifndef _SYS_SYSPROTO_H_ 1341 struct mkfifo_args { 1342 char *path; 1343 int mode; 1344 }; 1345 #endif 1346 int 1347 sys_mkfifo(td, uap) 1348 struct thread *td; 1349 register struct mkfifo_args /* { 1350 char *path; 1351 int mode; 1352 } */ *uap; 1353 { 1354 1355 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1356 uap->mode)); 1357 } 1358 1359 #ifndef _SYS_SYSPROTO_H_ 1360 struct mkfifoat_args { 1361 int fd; 1362 char *path; 1363 mode_t mode; 1364 }; 1365 #endif 1366 int 1367 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1368 { 1369 1370 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1371 uap->mode)); 1372 } 1373 1374 int 1375 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1376 int mode) 1377 { 1378 struct mount *mp; 1379 struct vattr vattr; 1380 struct nameidata nd; 1381 cap_rights_t rights; 1382 int error; 1383 1384 AUDIT_ARG_MODE(mode); 1385 restart: 1386 bwillwrite(); 1387 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1388 pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td); 1389 if ((error = namei(&nd)) != 0) 1390 return (error); 1391 if (nd.ni_vp != NULL) { 1392 NDFREE(&nd, NDF_ONLY_PNBUF); 1393 if (nd.ni_vp == nd.ni_dvp) 1394 vrele(nd.ni_dvp); 1395 else 1396 vput(nd.ni_dvp); 1397 vrele(nd.ni_vp); 1398 return (EEXIST); 1399 } 1400 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1401 NDFREE(&nd, NDF_ONLY_PNBUF); 1402 vput(nd.ni_dvp); 1403 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1404 return (error); 1405 goto restart; 1406 } 1407 VATTR_NULL(&vattr); 1408 vattr.va_type = VFIFO; 1409 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1410 #ifdef MAC 1411 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1412 &vattr); 1413 if (error != 0) 1414 goto out; 1415 #endif 1416 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1417 if (error == 0) 1418 vput(nd.ni_vp); 1419 #ifdef MAC 1420 out: 1421 #endif 1422 vput(nd.ni_dvp); 1423 vn_finished_write(mp); 1424 NDFREE(&nd, NDF_ONLY_PNBUF); 1425 return (error); 1426 } 1427 1428 /* 1429 * Make a hard file link. 1430 */ 1431 #ifndef _SYS_SYSPROTO_H_ 1432 struct link_args { 1433 char *path; 1434 char *link; 1435 }; 1436 #endif 1437 int 1438 sys_link(td, uap) 1439 struct thread *td; 1440 register struct link_args /* { 1441 char *path; 1442 char *link; 1443 } */ *uap; 1444 { 1445 1446 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1447 UIO_USERSPACE, FOLLOW)); 1448 } 1449 1450 #ifndef _SYS_SYSPROTO_H_ 1451 struct linkat_args { 1452 int fd1; 1453 char *path1; 1454 int fd2; 1455 char *path2; 1456 int flag; 1457 }; 1458 #endif 1459 int 1460 sys_linkat(struct thread *td, struct linkat_args *uap) 1461 { 1462 int flag; 1463 1464 flag = uap->flag; 1465 if (flag & ~AT_SYMLINK_FOLLOW) 1466 return (EINVAL); 1467 1468 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1469 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1470 } 1471 1472 int hardlink_check_uid = 0; 1473 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1474 &hardlink_check_uid, 0, 1475 "Unprivileged processes cannot create hard links to files owned by other " 1476 "users"); 1477 static int hardlink_check_gid = 0; 1478 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1479 &hardlink_check_gid, 0, 1480 "Unprivileged processes cannot create hard links to files owned by other " 1481 "groups"); 1482 1483 static int 1484 can_hardlink(struct vnode *vp, struct ucred *cred) 1485 { 1486 struct vattr va; 1487 int error; 1488 1489 if (!hardlink_check_uid && !hardlink_check_gid) 1490 return (0); 1491 1492 error = VOP_GETATTR(vp, &va, cred); 1493 if (error != 0) 1494 return (error); 1495 1496 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1497 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1498 if (error != 0) 1499 return (error); 1500 } 1501 1502 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1503 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1504 if (error != 0) 1505 return (error); 1506 } 1507 1508 return (0); 1509 } 1510 1511 int 1512 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1513 enum uio_seg segflg, int follow) 1514 { 1515 struct vnode *vp; 1516 struct mount *mp; 1517 struct nameidata nd; 1518 cap_rights_t rights; 1519 int error; 1520 1521 again: 1522 bwillwrite(); 1523 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1524 1525 if ((error = namei(&nd)) != 0) 1526 return (error); 1527 NDFREE(&nd, NDF_ONLY_PNBUF); 1528 vp = nd.ni_vp; 1529 if (vp->v_type == VDIR) { 1530 vrele(vp); 1531 return (EPERM); /* POSIX */ 1532 } 1533 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, 1534 segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td); 1535 if ((error = namei(&nd)) == 0) { 1536 if (nd.ni_vp != NULL) { 1537 NDFREE(&nd, NDF_ONLY_PNBUF); 1538 if (nd.ni_dvp == nd.ni_vp) 1539 vrele(nd.ni_dvp); 1540 else 1541 vput(nd.ni_dvp); 1542 vrele(nd.ni_vp); 1543 vrele(vp); 1544 return (EEXIST); 1545 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1546 /* 1547 * Cross-device link. No need to recheck 1548 * vp->v_type, since it cannot change, except 1549 * to VBAD. 1550 */ 1551 NDFREE(&nd, NDF_ONLY_PNBUF); 1552 vput(nd.ni_dvp); 1553 vrele(vp); 1554 return (EXDEV); 1555 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1556 error = can_hardlink(vp, td->td_ucred); 1557 #ifdef MAC 1558 if (error == 0) 1559 error = mac_vnode_check_link(td->td_ucred, 1560 nd.ni_dvp, vp, &nd.ni_cnd); 1561 #endif 1562 if (error != 0) { 1563 vput(vp); 1564 vput(nd.ni_dvp); 1565 NDFREE(&nd, NDF_ONLY_PNBUF); 1566 return (error); 1567 } 1568 error = vn_start_write(vp, &mp, V_NOWAIT); 1569 if (error != 0) { 1570 vput(vp); 1571 vput(nd.ni_dvp); 1572 NDFREE(&nd, NDF_ONLY_PNBUF); 1573 error = vn_start_write(NULL, &mp, 1574 V_XSLEEP | PCATCH); 1575 if (error != 0) 1576 return (error); 1577 goto again; 1578 } 1579 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1580 VOP_UNLOCK(vp, 0); 1581 vput(nd.ni_dvp); 1582 vn_finished_write(mp); 1583 NDFREE(&nd, NDF_ONLY_PNBUF); 1584 } else { 1585 vput(nd.ni_dvp); 1586 NDFREE(&nd, NDF_ONLY_PNBUF); 1587 vrele(vp); 1588 goto again; 1589 } 1590 } 1591 vrele(vp); 1592 return (error); 1593 } 1594 1595 /* 1596 * Make a symbolic link. 1597 */ 1598 #ifndef _SYS_SYSPROTO_H_ 1599 struct symlink_args { 1600 char *path; 1601 char *link; 1602 }; 1603 #endif 1604 int 1605 sys_symlink(td, uap) 1606 struct thread *td; 1607 register struct symlink_args /* { 1608 char *path; 1609 char *link; 1610 } */ *uap; 1611 { 1612 1613 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1614 UIO_USERSPACE)); 1615 } 1616 1617 #ifndef _SYS_SYSPROTO_H_ 1618 struct symlinkat_args { 1619 char *path; 1620 int fd; 1621 char *path2; 1622 }; 1623 #endif 1624 int 1625 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1626 { 1627 1628 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1629 UIO_USERSPACE)); 1630 } 1631 1632 int 1633 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1634 enum uio_seg segflg) 1635 { 1636 struct mount *mp; 1637 struct vattr vattr; 1638 char *syspath; 1639 struct nameidata nd; 1640 int error; 1641 cap_rights_t rights; 1642 1643 if (segflg == UIO_SYSSPACE) { 1644 syspath = path1; 1645 } else { 1646 syspath = uma_zalloc(namei_zone, M_WAITOK); 1647 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1648 goto out; 1649 } 1650 AUDIT_ARG_TEXT(syspath); 1651 restart: 1652 bwillwrite(); 1653 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1654 segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td); 1655 if ((error = namei(&nd)) != 0) 1656 goto out; 1657 if (nd.ni_vp) { 1658 NDFREE(&nd, NDF_ONLY_PNBUF); 1659 if (nd.ni_vp == nd.ni_dvp) 1660 vrele(nd.ni_dvp); 1661 else 1662 vput(nd.ni_dvp); 1663 vrele(nd.ni_vp); 1664 error = EEXIST; 1665 goto out; 1666 } 1667 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1668 NDFREE(&nd, NDF_ONLY_PNBUF); 1669 vput(nd.ni_dvp); 1670 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1671 goto out; 1672 goto restart; 1673 } 1674 VATTR_NULL(&vattr); 1675 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1676 #ifdef MAC 1677 vattr.va_type = VLNK; 1678 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1679 &vattr); 1680 if (error != 0) 1681 goto out2; 1682 #endif 1683 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1684 if (error == 0) 1685 vput(nd.ni_vp); 1686 #ifdef MAC 1687 out2: 1688 #endif 1689 NDFREE(&nd, NDF_ONLY_PNBUF); 1690 vput(nd.ni_dvp); 1691 vn_finished_write(mp); 1692 out: 1693 if (segflg != UIO_SYSSPACE) 1694 uma_zfree(namei_zone, syspath); 1695 return (error); 1696 } 1697 1698 /* 1699 * Delete a whiteout from the filesystem. 1700 */ 1701 int 1702 sys_undelete(td, uap) 1703 struct thread *td; 1704 register struct undelete_args /* { 1705 char *path; 1706 } */ *uap; 1707 { 1708 struct mount *mp; 1709 struct nameidata nd; 1710 int error; 1711 1712 restart: 1713 bwillwrite(); 1714 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1715 UIO_USERSPACE, uap->path, td); 1716 error = namei(&nd); 1717 if (error != 0) 1718 return (error); 1719 1720 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1721 NDFREE(&nd, NDF_ONLY_PNBUF); 1722 if (nd.ni_vp == nd.ni_dvp) 1723 vrele(nd.ni_dvp); 1724 else 1725 vput(nd.ni_dvp); 1726 if (nd.ni_vp) 1727 vrele(nd.ni_vp); 1728 return (EEXIST); 1729 } 1730 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1731 NDFREE(&nd, NDF_ONLY_PNBUF); 1732 vput(nd.ni_dvp); 1733 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1734 return (error); 1735 goto restart; 1736 } 1737 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1738 NDFREE(&nd, NDF_ONLY_PNBUF); 1739 vput(nd.ni_dvp); 1740 vn_finished_write(mp); 1741 return (error); 1742 } 1743 1744 /* 1745 * Delete a name from the filesystem. 1746 */ 1747 #ifndef _SYS_SYSPROTO_H_ 1748 struct unlink_args { 1749 char *path; 1750 }; 1751 #endif 1752 int 1753 sys_unlink(td, uap) 1754 struct thread *td; 1755 struct unlink_args /* { 1756 char *path; 1757 } */ *uap; 1758 { 1759 1760 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1761 } 1762 1763 #ifndef _SYS_SYSPROTO_H_ 1764 struct unlinkat_args { 1765 int fd; 1766 char *path; 1767 int flag; 1768 }; 1769 #endif 1770 int 1771 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1772 { 1773 int flag = uap->flag; 1774 int fd = uap->fd; 1775 char *path = uap->path; 1776 1777 if (flag & ~AT_REMOVEDIR) 1778 return (EINVAL); 1779 1780 if (flag & AT_REMOVEDIR) 1781 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1782 else 1783 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1784 } 1785 1786 int 1787 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1788 ino_t oldinum) 1789 { 1790 struct mount *mp; 1791 struct vnode *vp; 1792 struct nameidata nd; 1793 struct stat sb; 1794 cap_rights_t rights; 1795 int error; 1796 1797 restart: 1798 bwillwrite(); 1799 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1800 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1801 if ((error = namei(&nd)) != 0) 1802 return (error == EINVAL ? EPERM : error); 1803 vp = nd.ni_vp; 1804 if (vp->v_type == VDIR && oldinum == 0) { 1805 error = EPERM; /* POSIX */ 1806 } else if (oldinum != 0 && 1807 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1808 sb.st_ino != oldinum) { 1809 error = EIDRM; /* Identifier removed */ 1810 } else { 1811 /* 1812 * The root of a mounted filesystem cannot be deleted. 1813 * 1814 * XXX: can this only be a VDIR case? 1815 */ 1816 if (vp->v_vflag & VV_ROOT) 1817 error = EBUSY; 1818 } 1819 if (error == 0) { 1820 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1821 NDFREE(&nd, NDF_ONLY_PNBUF); 1822 vput(nd.ni_dvp); 1823 if (vp == nd.ni_dvp) 1824 vrele(vp); 1825 else 1826 vput(vp); 1827 if ((error = vn_start_write(NULL, &mp, 1828 V_XSLEEP | PCATCH)) != 0) 1829 return (error); 1830 goto restart; 1831 } 1832 #ifdef MAC 1833 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1834 &nd.ni_cnd); 1835 if (error != 0) 1836 goto out; 1837 #endif 1838 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1839 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1840 #ifdef MAC 1841 out: 1842 #endif 1843 vn_finished_write(mp); 1844 } 1845 NDFREE(&nd, NDF_ONLY_PNBUF); 1846 vput(nd.ni_dvp); 1847 if (vp == nd.ni_dvp) 1848 vrele(vp); 1849 else 1850 vput(vp); 1851 return (error); 1852 } 1853 1854 /* 1855 * Reposition read/write file offset. 1856 */ 1857 #ifndef _SYS_SYSPROTO_H_ 1858 struct lseek_args { 1859 int fd; 1860 int pad; 1861 off_t offset; 1862 int whence; 1863 }; 1864 #endif 1865 int 1866 sys_lseek(td, uap) 1867 struct thread *td; 1868 register struct lseek_args /* { 1869 int fd; 1870 int pad; 1871 off_t offset; 1872 int whence; 1873 } */ *uap; 1874 { 1875 struct file *fp; 1876 cap_rights_t rights; 1877 int error; 1878 1879 AUDIT_ARG_FD(uap->fd); 1880 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1881 if (error != 0) 1882 return (error); 1883 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1884 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1885 fdrop(fp, td); 1886 return (error); 1887 } 1888 1889 #if defined(COMPAT_43) 1890 /* 1891 * Reposition read/write file offset. 1892 */ 1893 #ifndef _SYS_SYSPROTO_H_ 1894 struct olseek_args { 1895 int fd; 1896 long offset; 1897 int whence; 1898 }; 1899 #endif 1900 int 1901 olseek(td, uap) 1902 struct thread *td; 1903 register struct olseek_args /* { 1904 int fd; 1905 long offset; 1906 int whence; 1907 } */ *uap; 1908 { 1909 struct lseek_args /* { 1910 int fd; 1911 int pad; 1912 off_t offset; 1913 int whence; 1914 } */ nuap; 1915 1916 nuap.fd = uap->fd; 1917 nuap.offset = uap->offset; 1918 nuap.whence = uap->whence; 1919 return (sys_lseek(td, &nuap)); 1920 } 1921 #endif /* COMPAT_43 */ 1922 1923 /* Version with the 'pad' argument */ 1924 int 1925 freebsd6_lseek(td, uap) 1926 struct thread *td; 1927 register struct freebsd6_lseek_args *uap; 1928 { 1929 struct lseek_args ouap; 1930 1931 ouap.fd = uap->fd; 1932 ouap.offset = uap->offset; 1933 ouap.whence = uap->whence; 1934 return (sys_lseek(td, &ouap)); 1935 } 1936 1937 /* 1938 * Check access permissions using passed credentials. 1939 */ 1940 static int 1941 vn_access(vp, user_flags, cred, td) 1942 struct vnode *vp; 1943 int user_flags; 1944 struct ucred *cred; 1945 struct thread *td; 1946 { 1947 accmode_t accmode; 1948 int error; 1949 1950 /* Flags == 0 means only check for existence. */ 1951 if (user_flags == 0) 1952 return (0); 1953 1954 accmode = 0; 1955 if (user_flags & R_OK) 1956 accmode |= VREAD; 1957 if (user_flags & W_OK) 1958 accmode |= VWRITE; 1959 if (user_flags & X_OK) 1960 accmode |= VEXEC; 1961 #ifdef MAC 1962 error = mac_vnode_check_access(cred, vp, accmode); 1963 if (error != 0) 1964 return (error); 1965 #endif 1966 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1967 error = VOP_ACCESS(vp, accmode, cred, td); 1968 return (error); 1969 } 1970 1971 /* 1972 * Check access permissions using "real" credentials. 1973 */ 1974 #ifndef _SYS_SYSPROTO_H_ 1975 struct access_args { 1976 char *path; 1977 int amode; 1978 }; 1979 #endif 1980 int 1981 sys_access(td, uap) 1982 struct thread *td; 1983 register struct access_args /* { 1984 char *path; 1985 int amode; 1986 } */ *uap; 1987 { 1988 1989 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1990 0, uap->amode)); 1991 } 1992 1993 #ifndef _SYS_SYSPROTO_H_ 1994 struct faccessat_args { 1995 int dirfd; 1996 char *path; 1997 int amode; 1998 int flag; 1999 } 2000 #endif 2001 int 2002 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2003 { 2004 2005 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2006 uap->amode)); 2007 } 2008 2009 int 2010 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2011 int flag, int amode) 2012 { 2013 struct ucred *cred, *usecred; 2014 struct vnode *vp; 2015 struct nameidata nd; 2016 cap_rights_t rights; 2017 int error; 2018 2019 if (flag & ~AT_EACCESS) 2020 return (EINVAL); 2021 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2022 return (EINVAL); 2023 2024 /* 2025 * Create and modify a temporary credential instead of one that 2026 * is potentially shared (if we need one). 2027 */ 2028 cred = td->td_ucred; 2029 if ((flag & AT_EACCESS) == 0 && 2030 ((cred->cr_uid != cred->cr_ruid || 2031 cred->cr_rgid != cred->cr_groups[0]))) { 2032 usecred = crdup(cred); 2033 usecred->cr_uid = cred->cr_ruid; 2034 usecred->cr_groups[0] = cred->cr_rgid; 2035 td->td_ucred = usecred; 2036 } else 2037 usecred = cred; 2038 AUDIT_ARG_VALUE(amode); 2039 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2040 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2041 td); 2042 if ((error = namei(&nd)) != 0) 2043 goto out; 2044 vp = nd.ni_vp; 2045 2046 error = vn_access(vp, amode, usecred, td); 2047 NDFREE(&nd, NDF_ONLY_PNBUF); 2048 vput(vp); 2049 out: 2050 if (usecred != cred) { 2051 td->td_ucred = cred; 2052 crfree(usecred); 2053 } 2054 return (error); 2055 } 2056 2057 /* 2058 * Check access permissions using "effective" credentials. 2059 */ 2060 #ifndef _SYS_SYSPROTO_H_ 2061 struct eaccess_args { 2062 char *path; 2063 int amode; 2064 }; 2065 #endif 2066 int 2067 sys_eaccess(td, uap) 2068 struct thread *td; 2069 register struct eaccess_args /* { 2070 char *path; 2071 int amode; 2072 } */ *uap; 2073 { 2074 2075 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2076 AT_EACCESS, uap->amode)); 2077 } 2078 2079 #if defined(COMPAT_43) 2080 /* 2081 * Get file status; this version follows links. 2082 */ 2083 #ifndef _SYS_SYSPROTO_H_ 2084 struct ostat_args { 2085 char *path; 2086 struct ostat *ub; 2087 }; 2088 #endif 2089 int 2090 ostat(td, uap) 2091 struct thread *td; 2092 register struct ostat_args /* { 2093 char *path; 2094 struct ostat *ub; 2095 } */ *uap; 2096 { 2097 struct stat sb; 2098 struct ostat osb; 2099 int error; 2100 2101 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2102 &sb, NULL); 2103 if (error != 0) 2104 return (error); 2105 cvtstat(&sb, &osb); 2106 return (copyout(&osb, uap->ub, sizeof (osb))); 2107 } 2108 2109 /* 2110 * Get file status; this version does not follow links. 2111 */ 2112 #ifndef _SYS_SYSPROTO_H_ 2113 struct olstat_args { 2114 char *path; 2115 struct ostat *ub; 2116 }; 2117 #endif 2118 int 2119 olstat(td, uap) 2120 struct thread *td; 2121 register struct olstat_args /* { 2122 char *path; 2123 struct ostat *ub; 2124 } */ *uap; 2125 { 2126 struct stat sb; 2127 struct ostat osb; 2128 int error; 2129 2130 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2131 UIO_USERSPACE, &sb, NULL); 2132 if (error != 0) 2133 return (error); 2134 cvtstat(&sb, &osb); 2135 return (copyout(&osb, uap->ub, sizeof (osb))); 2136 } 2137 2138 /* 2139 * Convert from an old to a new stat structure. 2140 */ 2141 void 2142 cvtstat(st, ost) 2143 struct stat *st; 2144 struct ostat *ost; 2145 { 2146 2147 ost->st_dev = st->st_dev; 2148 ost->st_ino = st->st_ino; 2149 ost->st_mode = st->st_mode; 2150 ost->st_nlink = st->st_nlink; 2151 ost->st_uid = st->st_uid; 2152 ost->st_gid = st->st_gid; 2153 ost->st_rdev = st->st_rdev; 2154 if (st->st_size < (quad_t)1 << 32) 2155 ost->st_size = st->st_size; 2156 else 2157 ost->st_size = -2; 2158 ost->st_atim = st->st_atim; 2159 ost->st_mtim = st->st_mtim; 2160 ost->st_ctim = st->st_ctim; 2161 ost->st_blksize = st->st_blksize; 2162 ost->st_blocks = st->st_blocks; 2163 ost->st_flags = st->st_flags; 2164 ost->st_gen = st->st_gen; 2165 } 2166 #endif /* COMPAT_43 */ 2167 2168 /* 2169 * Get file status; this version follows links. 2170 */ 2171 #ifndef _SYS_SYSPROTO_H_ 2172 struct stat_args { 2173 char *path; 2174 struct stat *ub; 2175 }; 2176 #endif 2177 int 2178 sys_stat(td, uap) 2179 struct thread *td; 2180 register struct stat_args /* { 2181 char *path; 2182 struct stat *ub; 2183 } */ *uap; 2184 { 2185 struct stat sb; 2186 int error; 2187 2188 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2189 &sb, NULL); 2190 if (error == 0) 2191 error = copyout(&sb, uap->ub, sizeof (sb)); 2192 return (error); 2193 } 2194 2195 #ifndef _SYS_SYSPROTO_H_ 2196 struct fstatat_args { 2197 int fd; 2198 char *path; 2199 struct stat *buf; 2200 int flag; 2201 } 2202 #endif 2203 int 2204 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2205 { 2206 struct stat sb; 2207 int error; 2208 2209 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2210 UIO_USERSPACE, &sb, NULL); 2211 if (error == 0) 2212 error = copyout(&sb, uap->buf, sizeof (sb)); 2213 return (error); 2214 } 2215 2216 int 2217 kern_statat(struct thread *td, int flag, int fd, char *path, 2218 enum uio_seg pathseg, struct stat *sbp, 2219 void (*hook)(struct vnode *vp, struct stat *sbp)) 2220 { 2221 struct nameidata nd; 2222 struct stat sb; 2223 cap_rights_t rights; 2224 int error; 2225 2226 if (flag & ~AT_SYMLINK_NOFOLLOW) 2227 return (EINVAL); 2228 2229 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2230 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2231 cap_rights_init(&rights, CAP_FSTAT), td); 2232 2233 if ((error = namei(&nd)) != 0) 2234 return (error); 2235 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2236 if (error == 0) { 2237 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2238 if (S_ISREG(sb.st_mode)) 2239 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2240 if (__predict_false(hook != NULL)) 2241 hook(nd.ni_vp, &sb); 2242 } 2243 NDFREE(&nd, NDF_ONLY_PNBUF); 2244 vput(nd.ni_vp); 2245 if (error != 0) 2246 return (error); 2247 *sbp = sb; 2248 #ifdef KTRACE 2249 if (KTRPOINT(td, KTR_STRUCT)) 2250 ktrstat(&sb); 2251 #endif 2252 return (0); 2253 } 2254 2255 /* 2256 * Get file status; this version does not follow links. 2257 */ 2258 #ifndef _SYS_SYSPROTO_H_ 2259 struct lstat_args { 2260 char *path; 2261 struct stat *ub; 2262 }; 2263 #endif 2264 int 2265 sys_lstat(td, uap) 2266 struct thread *td; 2267 register struct lstat_args /* { 2268 char *path; 2269 struct stat *ub; 2270 } */ *uap; 2271 { 2272 struct stat sb; 2273 int error; 2274 2275 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2276 UIO_USERSPACE, &sb, NULL); 2277 if (error == 0) 2278 error = copyout(&sb, uap->ub, sizeof (sb)); 2279 return (error); 2280 } 2281 2282 /* 2283 * Implementation of the NetBSD [l]stat() functions. 2284 */ 2285 void 2286 cvtnstat(sb, nsb) 2287 struct stat *sb; 2288 struct nstat *nsb; 2289 { 2290 2291 bzero(nsb, sizeof *nsb); 2292 nsb->st_dev = sb->st_dev; 2293 nsb->st_ino = sb->st_ino; 2294 nsb->st_mode = sb->st_mode; 2295 nsb->st_nlink = sb->st_nlink; 2296 nsb->st_uid = sb->st_uid; 2297 nsb->st_gid = sb->st_gid; 2298 nsb->st_rdev = sb->st_rdev; 2299 nsb->st_atim = sb->st_atim; 2300 nsb->st_mtim = sb->st_mtim; 2301 nsb->st_ctim = sb->st_ctim; 2302 nsb->st_size = sb->st_size; 2303 nsb->st_blocks = sb->st_blocks; 2304 nsb->st_blksize = sb->st_blksize; 2305 nsb->st_flags = sb->st_flags; 2306 nsb->st_gen = sb->st_gen; 2307 nsb->st_birthtim = sb->st_birthtim; 2308 } 2309 2310 #ifndef _SYS_SYSPROTO_H_ 2311 struct nstat_args { 2312 char *path; 2313 struct nstat *ub; 2314 }; 2315 #endif 2316 int 2317 sys_nstat(td, uap) 2318 struct thread *td; 2319 register struct nstat_args /* { 2320 char *path; 2321 struct nstat *ub; 2322 } */ *uap; 2323 { 2324 struct stat sb; 2325 struct nstat nsb; 2326 int error; 2327 2328 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2329 &sb, NULL); 2330 if (error != 0) 2331 return (error); 2332 cvtnstat(&sb, &nsb); 2333 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2334 } 2335 2336 /* 2337 * NetBSD lstat. Get file status; this version does not follow links. 2338 */ 2339 #ifndef _SYS_SYSPROTO_H_ 2340 struct lstat_args { 2341 char *path; 2342 struct stat *ub; 2343 }; 2344 #endif 2345 int 2346 sys_nlstat(td, uap) 2347 struct thread *td; 2348 register struct nlstat_args /* { 2349 char *path; 2350 struct nstat *ub; 2351 } */ *uap; 2352 { 2353 struct stat sb; 2354 struct nstat nsb; 2355 int error; 2356 2357 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2358 UIO_USERSPACE, &sb, NULL); 2359 if (error != 0) 2360 return (error); 2361 cvtnstat(&sb, &nsb); 2362 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2363 } 2364 2365 /* 2366 * Get configurable pathname variables. 2367 */ 2368 #ifndef _SYS_SYSPROTO_H_ 2369 struct pathconf_args { 2370 char *path; 2371 int name; 2372 }; 2373 #endif 2374 int 2375 sys_pathconf(td, uap) 2376 struct thread *td; 2377 register struct pathconf_args /* { 2378 char *path; 2379 int name; 2380 } */ *uap; 2381 { 2382 2383 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2384 } 2385 2386 #ifndef _SYS_SYSPROTO_H_ 2387 struct lpathconf_args { 2388 char *path; 2389 int name; 2390 }; 2391 #endif 2392 int 2393 sys_lpathconf(td, uap) 2394 struct thread *td; 2395 register struct lpathconf_args /* { 2396 char *path; 2397 int name; 2398 } */ *uap; 2399 { 2400 2401 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2402 NOFOLLOW)); 2403 } 2404 2405 int 2406 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2407 u_long flags) 2408 { 2409 struct nameidata nd; 2410 int error; 2411 2412 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2413 pathseg, path, td); 2414 if ((error = namei(&nd)) != 0) 2415 return (error); 2416 NDFREE(&nd, NDF_ONLY_PNBUF); 2417 2418 /* If asynchronous I/O is available, it works for all files. */ 2419 if (name == _PC_ASYNC_IO) 2420 td->td_retval[0] = async_io_version; 2421 else 2422 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2423 vput(nd.ni_vp); 2424 return (error); 2425 } 2426 2427 /* 2428 * Return target name of a symbolic link. 2429 */ 2430 #ifndef _SYS_SYSPROTO_H_ 2431 struct readlink_args { 2432 char *path; 2433 char *buf; 2434 size_t count; 2435 }; 2436 #endif 2437 int 2438 sys_readlink(td, uap) 2439 struct thread *td; 2440 register struct readlink_args /* { 2441 char *path; 2442 char *buf; 2443 size_t count; 2444 } */ *uap; 2445 { 2446 2447 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2448 uap->buf, UIO_USERSPACE, uap->count)); 2449 } 2450 #ifndef _SYS_SYSPROTO_H_ 2451 struct readlinkat_args { 2452 int fd; 2453 char *path; 2454 char *buf; 2455 size_t bufsize; 2456 }; 2457 #endif 2458 int 2459 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2460 { 2461 2462 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2463 uap->buf, UIO_USERSPACE, uap->bufsize)); 2464 } 2465 2466 int 2467 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2468 char *buf, enum uio_seg bufseg, size_t count) 2469 { 2470 struct vnode *vp; 2471 struct iovec aiov; 2472 struct uio auio; 2473 struct nameidata nd; 2474 int error; 2475 2476 if (count > IOSIZE_MAX) 2477 return (EINVAL); 2478 2479 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2480 pathseg, path, fd, td); 2481 2482 if ((error = namei(&nd)) != 0) 2483 return (error); 2484 NDFREE(&nd, NDF_ONLY_PNBUF); 2485 vp = nd.ni_vp; 2486 #ifdef MAC 2487 error = mac_vnode_check_readlink(td->td_ucred, vp); 2488 if (error != 0) { 2489 vput(vp); 2490 return (error); 2491 } 2492 #endif 2493 if (vp->v_type != VLNK) 2494 error = EINVAL; 2495 else { 2496 aiov.iov_base = buf; 2497 aiov.iov_len = count; 2498 auio.uio_iov = &aiov; 2499 auio.uio_iovcnt = 1; 2500 auio.uio_offset = 0; 2501 auio.uio_rw = UIO_READ; 2502 auio.uio_segflg = bufseg; 2503 auio.uio_td = td; 2504 auio.uio_resid = count; 2505 error = VOP_READLINK(vp, &auio, td->td_ucred); 2506 td->td_retval[0] = count - auio.uio_resid; 2507 } 2508 vput(vp); 2509 return (error); 2510 } 2511 2512 /* 2513 * Common implementation code for chflags() and fchflags(). 2514 */ 2515 static int 2516 setfflags(td, vp, flags) 2517 struct thread *td; 2518 struct vnode *vp; 2519 u_long flags; 2520 { 2521 struct mount *mp; 2522 struct vattr vattr; 2523 int error; 2524 2525 /* We can't support the value matching VNOVAL. */ 2526 if (flags == VNOVAL) 2527 return (EOPNOTSUPP); 2528 2529 /* 2530 * Prevent non-root users from setting flags on devices. When 2531 * a device is reused, users can retain ownership of the device 2532 * if they are allowed to set flags and programs assume that 2533 * chown can't fail when done as root. 2534 */ 2535 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2536 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2537 if (error != 0) 2538 return (error); 2539 } 2540 2541 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2542 return (error); 2543 VATTR_NULL(&vattr); 2544 vattr.va_flags = flags; 2545 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2546 #ifdef MAC 2547 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2548 if (error == 0) 2549 #endif 2550 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2551 VOP_UNLOCK(vp, 0); 2552 vn_finished_write(mp); 2553 return (error); 2554 } 2555 2556 /* 2557 * Change flags of a file given a path name. 2558 */ 2559 #ifndef _SYS_SYSPROTO_H_ 2560 struct chflags_args { 2561 const char *path; 2562 u_long flags; 2563 }; 2564 #endif 2565 int 2566 sys_chflags(td, uap) 2567 struct thread *td; 2568 register struct chflags_args /* { 2569 const char *path; 2570 u_long flags; 2571 } */ *uap; 2572 { 2573 2574 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2575 uap->flags, 0)); 2576 } 2577 2578 #ifndef _SYS_SYSPROTO_H_ 2579 struct chflagsat_args { 2580 int fd; 2581 const char *path; 2582 u_long flags; 2583 int atflag; 2584 } 2585 #endif 2586 int 2587 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2588 { 2589 int fd = uap->fd; 2590 const char *path = uap->path; 2591 u_long flags = uap->flags; 2592 int atflag = uap->atflag; 2593 2594 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2595 return (EINVAL); 2596 2597 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2598 } 2599 2600 /* 2601 * Same as chflags() but doesn't follow symlinks. 2602 */ 2603 int 2604 sys_lchflags(td, uap) 2605 struct thread *td; 2606 register struct lchflags_args /* { 2607 const char *path; 2608 u_long flags; 2609 } */ *uap; 2610 { 2611 2612 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2613 uap->flags, AT_SYMLINK_NOFOLLOW)); 2614 } 2615 2616 static int 2617 kern_chflagsat(struct thread *td, int fd, const char *path, 2618 enum uio_seg pathseg, u_long flags, int atflag) 2619 { 2620 struct nameidata nd; 2621 cap_rights_t rights; 2622 int error, follow; 2623 2624 AUDIT_ARG_FFLAGS(flags); 2625 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2626 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2627 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2628 if ((error = namei(&nd)) != 0) 2629 return (error); 2630 NDFREE(&nd, NDF_ONLY_PNBUF); 2631 error = setfflags(td, nd.ni_vp, flags); 2632 vrele(nd.ni_vp); 2633 return (error); 2634 } 2635 2636 /* 2637 * Change flags of a file given a file descriptor. 2638 */ 2639 #ifndef _SYS_SYSPROTO_H_ 2640 struct fchflags_args { 2641 int fd; 2642 u_long flags; 2643 }; 2644 #endif 2645 int 2646 sys_fchflags(td, uap) 2647 struct thread *td; 2648 register struct fchflags_args /* { 2649 int fd; 2650 u_long flags; 2651 } */ *uap; 2652 { 2653 struct file *fp; 2654 cap_rights_t rights; 2655 int error; 2656 2657 AUDIT_ARG_FD(uap->fd); 2658 AUDIT_ARG_FFLAGS(uap->flags); 2659 error = getvnode(td->td_proc->p_fd, uap->fd, 2660 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2661 if (error != 0) 2662 return (error); 2663 #ifdef AUDIT 2664 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2665 AUDIT_ARG_VNODE1(fp->f_vnode); 2666 VOP_UNLOCK(fp->f_vnode, 0); 2667 #endif 2668 error = setfflags(td, fp->f_vnode, uap->flags); 2669 fdrop(fp, td); 2670 return (error); 2671 } 2672 2673 /* 2674 * Common implementation code for chmod(), lchmod() and fchmod(). 2675 */ 2676 int 2677 setfmode(td, cred, vp, mode) 2678 struct thread *td; 2679 struct ucred *cred; 2680 struct vnode *vp; 2681 int mode; 2682 { 2683 struct mount *mp; 2684 struct vattr vattr; 2685 int error; 2686 2687 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2688 return (error); 2689 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2690 VATTR_NULL(&vattr); 2691 vattr.va_mode = mode & ALLPERMS; 2692 #ifdef MAC 2693 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2694 if (error == 0) 2695 #endif 2696 error = VOP_SETATTR(vp, &vattr, cred); 2697 VOP_UNLOCK(vp, 0); 2698 vn_finished_write(mp); 2699 return (error); 2700 } 2701 2702 /* 2703 * Change mode of a file given path name. 2704 */ 2705 #ifndef _SYS_SYSPROTO_H_ 2706 struct chmod_args { 2707 char *path; 2708 int mode; 2709 }; 2710 #endif 2711 int 2712 sys_chmod(td, uap) 2713 struct thread *td; 2714 register struct chmod_args /* { 2715 char *path; 2716 int mode; 2717 } */ *uap; 2718 { 2719 2720 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2721 uap->mode, 0)); 2722 } 2723 2724 #ifndef _SYS_SYSPROTO_H_ 2725 struct fchmodat_args { 2726 int dirfd; 2727 char *path; 2728 mode_t mode; 2729 int flag; 2730 } 2731 #endif 2732 int 2733 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2734 { 2735 int flag = uap->flag; 2736 int fd = uap->fd; 2737 char *path = uap->path; 2738 mode_t mode = uap->mode; 2739 2740 if (flag & ~AT_SYMLINK_NOFOLLOW) 2741 return (EINVAL); 2742 2743 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2744 } 2745 2746 /* 2747 * Change mode of a file given path name (don't follow links.) 2748 */ 2749 #ifndef _SYS_SYSPROTO_H_ 2750 struct lchmod_args { 2751 char *path; 2752 int mode; 2753 }; 2754 #endif 2755 int 2756 sys_lchmod(td, uap) 2757 struct thread *td; 2758 register struct lchmod_args /* { 2759 char *path; 2760 int mode; 2761 } */ *uap; 2762 { 2763 2764 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2765 uap->mode, AT_SYMLINK_NOFOLLOW)); 2766 } 2767 2768 int 2769 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2770 mode_t mode, int flag) 2771 { 2772 struct nameidata nd; 2773 cap_rights_t rights; 2774 int error, follow; 2775 2776 AUDIT_ARG_MODE(mode); 2777 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2778 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2779 cap_rights_init(&rights, CAP_FCHMOD), td); 2780 if ((error = namei(&nd)) != 0) 2781 return (error); 2782 NDFREE(&nd, NDF_ONLY_PNBUF); 2783 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2784 vrele(nd.ni_vp); 2785 return (error); 2786 } 2787 2788 /* 2789 * Change mode of a file given a file descriptor. 2790 */ 2791 #ifndef _SYS_SYSPROTO_H_ 2792 struct fchmod_args { 2793 int fd; 2794 int mode; 2795 }; 2796 #endif 2797 int 2798 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2799 { 2800 struct file *fp; 2801 cap_rights_t rights; 2802 int error; 2803 2804 AUDIT_ARG_FD(uap->fd); 2805 AUDIT_ARG_MODE(uap->mode); 2806 2807 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2808 if (error != 0) 2809 return (error); 2810 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2811 fdrop(fp, td); 2812 return (error); 2813 } 2814 2815 /* 2816 * Common implementation for chown(), lchown(), and fchown() 2817 */ 2818 int 2819 setfown(td, cred, vp, uid, gid) 2820 struct thread *td; 2821 struct ucred *cred; 2822 struct vnode *vp; 2823 uid_t uid; 2824 gid_t gid; 2825 { 2826 struct mount *mp; 2827 struct vattr vattr; 2828 int error; 2829 2830 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2831 return (error); 2832 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2833 VATTR_NULL(&vattr); 2834 vattr.va_uid = uid; 2835 vattr.va_gid = gid; 2836 #ifdef MAC 2837 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2838 vattr.va_gid); 2839 if (error == 0) 2840 #endif 2841 error = VOP_SETATTR(vp, &vattr, cred); 2842 VOP_UNLOCK(vp, 0); 2843 vn_finished_write(mp); 2844 return (error); 2845 } 2846 2847 /* 2848 * Set ownership given a path name. 2849 */ 2850 #ifndef _SYS_SYSPROTO_H_ 2851 struct chown_args { 2852 char *path; 2853 int uid; 2854 int gid; 2855 }; 2856 #endif 2857 int 2858 sys_chown(td, uap) 2859 struct thread *td; 2860 register struct chown_args /* { 2861 char *path; 2862 int uid; 2863 int gid; 2864 } */ *uap; 2865 { 2866 2867 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2868 uap->gid, 0)); 2869 } 2870 2871 #ifndef _SYS_SYSPROTO_H_ 2872 struct fchownat_args { 2873 int fd; 2874 const char * path; 2875 uid_t uid; 2876 gid_t gid; 2877 int flag; 2878 }; 2879 #endif 2880 int 2881 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2882 { 2883 int flag; 2884 2885 flag = uap->flag; 2886 if (flag & ~AT_SYMLINK_NOFOLLOW) 2887 return (EINVAL); 2888 2889 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2890 uap->gid, uap->flag)); 2891 } 2892 2893 int 2894 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2895 int uid, int gid, int flag) 2896 { 2897 struct nameidata nd; 2898 cap_rights_t rights; 2899 int error, follow; 2900 2901 AUDIT_ARG_OWNER(uid, gid); 2902 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2903 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2904 cap_rights_init(&rights, CAP_FCHOWN), td); 2905 2906 if ((error = namei(&nd)) != 0) 2907 return (error); 2908 NDFREE(&nd, NDF_ONLY_PNBUF); 2909 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2910 vrele(nd.ni_vp); 2911 return (error); 2912 } 2913 2914 /* 2915 * Set ownership given a path name, do not cross symlinks. 2916 */ 2917 #ifndef _SYS_SYSPROTO_H_ 2918 struct lchown_args { 2919 char *path; 2920 int uid; 2921 int gid; 2922 }; 2923 #endif 2924 int 2925 sys_lchown(td, uap) 2926 struct thread *td; 2927 register struct lchown_args /* { 2928 char *path; 2929 int uid; 2930 int gid; 2931 } */ *uap; 2932 { 2933 2934 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2935 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2936 } 2937 2938 /* 2939 * Set ownership given a file descriptor. 2940 */ 2941 #ifndef _SYS_SYSPROTO_H_ 2942 struct fchown_args { 2943 int fd; 2944 int uid; 2945 int gid; 2946 }; 2947 #endif 2948 int 2949 sys_fchown(td, uap) 2950 struct thread *td; 2951 register struct fchown_args /* { 2952 int fd; 2953 int uid; 2954 int gid; 2955 } */ *uap; 2956 { 2957 struct file *fp; 2958 cap_rights_t rights; 2959 int error; 2960 2961 AUDIT_ARG_FD(uap->fd); 2962 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2963 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2964 if (error != 0) 2965 return (error); 2966 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2967 fdrop(fp, td); 2968 return (error); 2969 } 2970 2971 /* 2972 * Common implementation code for utimes(), lutimes(), and futimes(). 2973 */ 2974 static int 2975 getutimes(usrtvp, tvpseg, tsp) 2976 const struct timeval *usrtvp; 2977 enum uio_seg tvpseg; 2978 struct timespec *tsp; 2979 { 2980 struct timeval tv[2]; 2981 const struct timeval *tvp; 2982 int error; 2983 2984 if (usrtvp == NULL) { 2985 vfs_timestamp(&tsp[0]); 2986 tsp[1] = tsp[0]; 2987 } else { 2988 if (tvpseg == UIO_SYSSPACE) { 2989 tvp = usrtvp; 2990 } else { 2991 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2992 return (error); 2993 tvp = tv; 2994 } 2995 2996 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2997 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2998 return (EINVAL); 2999 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3000 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3001 } 3002 return (0); 3003 } 3004 3005 /* 3006 * Common implementation code for utimes(), lutimes(), and futimes(). 3007 */ 3008 static int 3009 setutimes(td, vp, ts, numtimes, nullflag) 3010 struct thread *td; 3011 struct vnode *vp; 3012 const struct timespec *ts; 3013 int numtimes; 3014 int nullflag; 3015 { 3016 struct mount *mp; 3017 struct vattr vattr; 3018 int error, setbirthtime; 3019 3020 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3021 return (error); 3022 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3023 setbirthtime = 0; 3024 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3025 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3026 setbirthtime = 1; 3027 VATTR_NULL(&vattr); 3028 vattr.va_atime = ts[0]; 3029 vattr.va_mtime = ts[1]; 3030 if (setbirthtime) 3031 vattr.va_birthtime = ts[1]; 3032 if (numtimes > 2) 3033 vattr.va_birthtime = ts[2]; 3034 if (nullflag) 3035 vattr.va_vaflags |= VA_UTIMES_NULL; 3036 #ifdef MAC 3037 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3038 vattr.va_mtime); 3039 #endif 3040 if (error == 0) 3041 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3042 VOP_UNLOCK(vp, 0); 3043 vn_finished_write(mp); 3044 return (error); 3045 } 3046 3047 /* 3048 * Set the access and modification times of a file. 3049 */ 3050 #ifndef _SYS_SYSPROTO_H_ 3051 struct utimes_args { 3052 char *path; 3053 struct timeval *tptr; 3054 }; 3055 #endif 3056 int 3057 sys_utimes(td, uap) 3058 struct thread *td; 3059 register struct utimes_args /* { 3060 char *path; 3061 struct timeval *tptr; 3062 } */ *uap; 3063 { 3064 3065 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3066 uap->tptr, UIO_USERSPACE)); 3067 } 3068 3069 #ifndef _SYS_SYSPROTO_H_ 3070 struct futimesat_args { 3071 int fd; 3072 const char * path; 3073 const struct timeval * times; 3074 }; 3075 #endif 3076 int 3077 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3078 { 3079 3080 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3081 uap->times, UIO_USERSPACE)); 3082 } 3083 3084 int 3085 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3086 struct timeval *tptr, enum uio_seg tptrseg) 3087 { 3088 struct nameidata nd; 3089 struct timespec ts[2]; 3090 cap_rights_t rights; 3091 int error; 3092 3093 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3094 return (error); 3095 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3096 cap_rights_init(&rights, CAP_FUTIMES), td); 3097 3098 if ((error = namei(&nd)) != 0) 3099 return (error); 3100 NDFREE(&nd, NDF_ONLY_PNBUF); 3101 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3102 vrele(nd.ni_vp); 3103 return (error); 3104 } 3105 3106 /* 3107 * Set the access and modification times of a file. 3108 */ 3109 #ifndef _SYS_SYSPROTO_H_ 3110 struct lutimes_args { 3111 char *path; 3112 struct timeval *tptr; 3113 }; 3114 #endif 3115 int 3116 sys_lutimes(td, uap) 3117 struct thread *td; 3118 register struct lutimes_args /* { 3119 char *path; 3120 struct timeval *tptr; 3121 } */ *uap; 3122 { 3123 3124 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3125 UIO_USERSPACE)); 3126 } 3127 3128 int 3129 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3130 struct timeval *tptr, enum uio_seg tptrseg) 3131 { 3132 struct timespec ts[2]; 3133 struct nameidata nd; 3134 int error; 3135 3136 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3137 return (error); 3138 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3139 if ((error = namei(&nd)) != 0) 3140 return (error); 3141 NDFREE(&nd, NDF_ONLY_PNBUF); 3142 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3143 vrele(nd.ni_vp); 3144 return (error); 3145 } 3146 3147 /* 3148 * Set the access and modification times of a file. 3149 */ 3150 #ifndef _SYS_SYSPROTO_H_ 3151 struct futimes_args { 3152 int fd; 3153 struct timeval *tptr; 3154 }; 3155 #endif 3156 int 3157 sys_futimes(td, uap) 3158 struct thread *td; 3159 register struct futimes_args /* { 3160 int fd; 3161 struct timeval *tptr; 3162 } */ *uap; 3163 { 3164 3165 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3166 } 3167 3168 int 3169 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3170 enum uio_seg tptrseg) 3171 { 3172 struct timespec ts[2]; 3173 struct file *fp; 3174 cap_rights_t rights; 3175 int error; 3176 3177 AUDIT_ARG_FD(fd); 3178 error = getutimes(tptr, tptrseg, ts); 3179 if (error != 0) 3180 return (error); 3181 error = getvnode(td->td_proc->p_fd, fd, 3182 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3183 if (error != 0) 3184 return (error); 3185 #ifdef AUDIT 3186 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3187 AUDIT_ARG_VNODE1(fp->f_vnode); 3188 VOP_UNLOCK(fp->f_vnode, 0); 3189 #endif 3190 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3191 fdrop(fp, td); 3192 return (error); 3193 } 3194 3195 /* 3196 * Truncate a file given its path name. 3197 */ 3198 #ifndef _SYS_SYSPROTO_H_ 3199 struct truncate_args { 3200 char *path; 3201 int pad; 3202 off_t length; 3203 }; 3204 #endif 3205 int 3206 sys_truncate(td, uap) 3207 struct thread *td; 3208 register struct truncate_args /* { 3209 char *path; 3210 int pad; 3211 off_t length; 3212 } */ *uap; 3213 { 3214 3215 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3216 } 3217 3218 int 3219 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3220 { 3221 struct mount *mp; 3222 struct vnode *vp; 3223 void *rl_cookie; 3224 struct vattr vattr; 3225 struct nameidata nd; 3226 int error; 3227 3228 if (length < 0) 3229 return(EINVAL); 3230 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3231 if ((error = namei(&nd)) != 0) 3232 return (error); 3233 vp = nd.ni_vp; 3234 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3235 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3236 vn_rangelock_unlock(vp, rl_cookie); 3237 vrele(vp); 3238 return (error); 3239 } 3240 NDFREE(&nd, NDF_ONLY_PNBUF); 3241 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3242 if (vp->v_type == VDIR) 3243 error = EISDIR; 3244 #ifdef MAC 3245 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3246 } 3247 #endif 3248 else if ((error = vn_writechk(vp)) == 0 && 3249 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3250 VATTR_NULL(&vattr); 3251 vattr.va_size = length; 3252 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3253 } 3254 VOP_UNLOCK(vp, 0); 3255 vn_finished_write(mp); 3256 vn_rangelock_unlock(vp, rl_cookie); 3257 vrele(vp); 3258 return (error); 3259 } 3260 3261 #if defined(COMPAT_43) 3262 /* 3263 * Truncate a file given its path name. 3264 */ 3265 #ifndef _SYS_SYSPROTO_H_ 3266 struct otruncate_args { 3267 char *path; 3268 long length; 3269 }; 3270 #endif 3271 int 3272 otruncate(td, uap) 3273 struct thread *td; 3274 register struct otruncate_args /* { 3275 char *path; 3276 long length; 3277 } */ *uap; 3278 { 3279 struct truncate_args /* { 3280 char *path; 3281 int pad; 3282 off_t length; 3283 } */ nuap; 3284 3285 nuap.path = uap->path; 3286 nuap.length = uap->length; 3287 return (sys_truncate(td, &nuap)); 3288 } 3289 #endif /* COMPAT_43 */ 3290 3291 /* Versions with the pad argument */ 3292 int 3293 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3294 { 3295 struct truncate_args ouap; 3296 3297 ouap.path = uap->path; 3298 ouap.length = uap->length; 3299 return (sys_truncate(td, &ouap)); 3300 } 3301 3302 int 3303 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3304 { 3305 struct ftruncate_args ouap; 3306 3307 ouap.fd = uap->fd; 3308 ouap.length = uap->length; 3309 return (sys_ftruncate(td, &ouap)); 3310 } 3311 3312 /* 3313 * Sync an open file. 3314 */ 3315 #ifndef _SYS_SYSPROTO_H_ 3316 struct fsync_args { 3317 int fd; 3318 }; 3319 #endif 3320 int 3321 sys_fsync(td, uap) 3322 struct thread *td; 3323 struct fsync_args /* { 3324 int fd; 3325 } */ *uap; 3326 { 3327 struct vnode *vp; 3328 struct mount *mp; 3329 struct file *fp; 3330 cap_rights_t rights; 3331 int error, lock_flags; 3332 3333 AUDIT_ARG_FD(uap->fd); 3334 error = getvnode(td->td_proc->p_fd, uap->fd, 3335 cap_rights_init(&rights, CAP_FSYNC), &fp); 3336 if (error != 0) 3337 return (error); 3338 vp = fp->f_vnode; 3339 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3340 if (error != 0) 3341 goto drop; 3342 if (MNT_SHARED_WRITES(mp) || 3343 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3344 lock_flags = LK_SHARED; 3345 } else { 3346 lock_flags = LK_EXCLUSIVE; 3347 } 3348 vn_lock(vp, lock_flags | LK_RETRY); 3349 AUDIT_ARG_VNODE1(vp); 3350 if (vp->v_object != NULL) { 3351 VM_OBJECT_WLOCK(vp->v_object); 3352 vm_object_page_clean(vp->v_object, 0, 0, 0); 3353 VM_OBJECT_WUNLOCK(vp->v_object); 3354 } 3355 error = VOP_FSYNC(vp, MNT_WAIT, td); 3356 3357 VOP_UNLOCK(vp, 0); 3358 vn_finished_write(mp); 3359 drop: 3360 fdrop(fp, td); 3361 return (error); 3362 } 3363 3364 /* 3365 * Rename files. Source and destination must either both be directories, or 3366 * both not be directories. If target is a directory, it must be empty. 3367 */ 3368 #ifndef _SYS_SYSPROTO_H_ 3369 struct rename_args { 3370 char *from; 3371 char *to; 3372 }; 3373 #endif 3374 int 3375 sys_rename(td, uap) 3376 struct thread *td; 3377 register struct rename_args /* { 3378 char *from; 3379 char *to; 3380 } */ *uap; 3381 { 3382 3383 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3384 uap->to, UIO_USERSPACE)); 3385 } 3386 3387 #ifndef _SYS_SYSPROTO_H_ 3388 struct renameat_args { 3389 int oldfd; 3390 char *old; 3391 int newfd; 3392 char *new; 3393 }; 3394 #endif 3395 int 3396 sys_renameat(struct thread *td, struct renameat_args *uap) 3397 { 3398 3399 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3400 UIO_USERSPACE)); 3401 } 3402 3403 int 3404 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3405 enum uio_seg pathseg) 3406 { 3407 struct mount *mp = NULL; 3408 struct vnode *tvp, *fvp, *tdvp; 3409 struct nameidata fromnd, tond; 3410 cap_rights_t rights; 3411 int error; 3412 3413 again: 3414 bwillwrite(); 3415 #ifdef MAC 3416 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3417 AUDITVNODE1, pathseg, old, oldfd, 3418 cap_rights_init(&rights, CAP_RENAMEAT), td); 3419 #else 3420 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3421 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3422 #endif 3423 3424 if ((error = namei(&fromnd)) != 0) 3425 return (error); 3426 #ifdef MAC 3427 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3428 fromnd.ni_vp, &fromnd.ni_cnd); 3429 VOP_UNLOCK(fromnd.ni_dvp, 0); 3430 if (fromnd.ni_dvp != fromnd.ni_vp) 3431 VOP_UNLOCK(fromnd.ni_vp, 0); 3432 #endif 3433 fvp = fromnd.ni_vp; 3434 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3435 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3436 cap_rights_init(&rights, CAP_LINKAT), td); 3437 if (fromnd.ni_vp->v_type == VDIR) 3438 tond.ni_cnd.cn_flags |= WILLBEDIR; 3439 if ((error = namei(&tond)) != 0) { 3440 /* Translate error code for rename("dir1", "dir2/."). */ 3441 if (error == EISDIR && fvp->v_type == VDIR) 3442 error = EINVAL; 3443 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3444 vrele(fromnd.ni_dvp); 3445 vrele(fvp); 3446 goto out1; 3447 } 3448 tdvp = tond.ni_dvp; 3449 tvp = tond.ni_vp; 3450 error = vn_start_write(fvp, &mp, V_NOWAIT); 3451 if (error != 0) { 3452 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3453 NDFREE(&tond, NDF_ONLY_PNBUF); 3454 if (tvp != NULL) 3455 vput(tvp); 3456 if (tdvp == tvp) 3457 vrele(tdvp); 3458 else 3459 vput(tdvp); 3460 vrele(fromnd.ni_dvp); 3461 vrele(fvp); 3462 vrele(tond.ni_startdir); 3463 if (fromnd.ni_startdir != NULL) 3464 vrele(fromnd.ni_startdir); 3465 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3466 if (error != 0) 3467 return (error); 3468 goto again; 3469 } 3470 if (tvp != NULL) { 3471 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3472 error = ENOTDIR; 3473 goto out; 3474 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3475 error = EISDIR; 3476 goto out; 3477 } 3478 #ifdef CAPABILITIES 3479 if (newfd != AT_FDCWD) { 3480 /* 3481 * If the target already exists we require CAP_UNLINKAT 3482 * from 'newfd'. 3483 */ 3484 error = cap_check(&tond.ni_filecaps.fc_rights, 3485 cap_rights_init(&rights, CAP_UNLINKAT)); 3486 if (error != 0) 3487 goto out; 3488 } 3489 #endif 3490 } 3491 if (fvp == tdvp) { 3492 error = EINVAL; 3493 goto out; 3494 } 3495 /* 3496 * If the source is the same as the destination (that is, if they 3497 * are links to the same vnode), then there is nothing to do. 3498 */ 3499 if (fvp == tvp) 3500 error = -1; 3501 #ifdef MAC 3502 else 3503 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3504 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3505 #endif 3506 out: 3507 if (error == 0) { 3508 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3509 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3510 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3511 NDFREE(&tond, NDF_ONLY_PNBUF); 3512 } else { 3513 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3514 NDFREE(&tond, NDF_ONLY_PNBUF); 3515 if (tvp != NULL) 3516 vput(tvp); 3517 if (tdvp == tvp) 3518 vrele(tdvp); 3519 else 3520 vput(tdvp); 3521 vrele(fromnd.ni_dvp); 3522 vrele(fvp); 3523 } 3524 vrele(tond.ni_startdir); 3525 vn_finished_write(mp); 3526 out1: 3527 if (fromnd.ni_startdir) 3528 vrele(fromnd.ni_startdir); 3529 if (error == -1) 3530 return (0); 3531 return (error); 3532 } 3533 3534 /* 3535 * Make a directory file. 3536 */ 3537 #ifndef _SYS_SYSPROTO_H_ 3538 struct mkdir_args { 3539 char *path; 3540 int mode; 3541 }; 3542 #endif 3543 int 3544 sys_mkdir(td, uap) 3545 struct thread *td; 3546 register struct mkdir_args /* { 3547 char *path; 3548 int mode; 3549 } */ *uap; 3550 { 3551 3552 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3553 uap->mode)); 3554 } 3555 3556 #ifndef _SYS_SYSPROTO_H_ 3557 struct mkdirat_args { 3558 int fd; 3559 char *path; 3560 mode_t mode; 3561 }; 3562 #endif 3563 int 3564 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3565 { 3566 3567 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3568 } 3569 3570 int 3571 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3572 int mode) 3573 { 3574 struct mount *mp; 3575 struct vnode *vp; 3576 struct vattr vattr; 3577 struct nameidata nd; 3578 cap_rights_t rights; 3579 int error; 3580 3581 AUDIT_ARG_MODE(mode); 3582 restart: 3583 bwillwrite(); 3584 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 3585 segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td); 3586 nd.ni_cnd.cn_flags |= WILLBEDIR; 3587 if ((error = namei(&nd)) != 0) 3588 return (error); 3589 vp = nd.ni_vp; 3590 if (vp != NULL) { 3591 NDFREE(&nd, NDF_ONLY_PNBUF); 3592 /* 3593 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3594 * the strange behaviour of leaving the vnode unlocked 3595 * if the target is the same vnode as the parent. 3596 */ 3597 if (vp == nd.ni_dvp) 3598 vrele(nd.ni_dvp); 3599 else 3600 vput(nd.ni_dvp); 3601 vrele(vp); 3602 return (EEXIST); 3603 } 3604 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3605 NDFREE(&nd, NDF_ONLY_PNBUF); 3606 vput(nd.ni_dvp); 3607 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3608 return (error); 3609 goto restart; 3610 } 3611 VATTR_NULL(&vattr); 3612 vattr.va_type = VDIR; 3613 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3614 #ifdef MAC 3615 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3616 &vattr); 3617 if (error != 0) 3618 goto out; 3619 #endif 3620 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3621 #ifdef MAC 3622 out: 3623 #endif 3624 NDFREE(&nd, NDF_ONLY_PNBUF); 3625 vput(nd.ni_dvp); 3626 if (error == 0) 3627 vput(nd.ni_vp); 3628 vn_finished_write(mp); 3629 return (error); 3630 } 3631 3632 /* 3633 * Remove a directory file. 3634 */ 3635 #ifndef _SYS_SYSPROTO_H_ 3636 struct rmdir_args { 3637 char *path; 3638 }; 3639 #endif 3640 int 3641 sys_rmdir(td, uap) 3642 struct thread *td; 3643 struct rmdir_args /* { 3644 char *path; 3645 } */ *uap; 3646 { 3647 3648 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3649 } 3650 3651 int 3652 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3653 { 3654 struct mount *mp; 3655 struct vnode *vp; 3656 struct nameidata nd; 3657 cap_rights_t rights; 3658 int error; 3659 3660 restart: 3661 bwillwrite(); 3662 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3663 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3664 if ((error = namei(&nd)) != 0) 3665 return (error); 3666 vp = nd.ni_vp; 3667 if (vp->v_type != VDIR) { 3668 error = ENOTDIR; 3669 goto out; 3670 } 3671 /* 3672 * No rmdir "." please. 3673 */ 3674 if (nd.ni_dvp == vp) { 3675 error = EINVAL; 3676 goto out; 3677 } 3678 /* 3679 * The root of a mounted filesystem cannot be deleted. 3680 */ 3681 if (vp->v_vflag & VV_ROOT) { 3682 error = EBUSY; 3683 goto out; 3684 } 3685 #ifdef MAC 3686 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3687 &nd.ni_cnd); 3688 if (error != 0) 3689 goto out; 3690 #endif 3691 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3692 NDFREE(&nd, NDF_ONLY_PNBUF); 3693 vput(vp); 3694 if (nd.ni_dvp == vp) 3695 vrele(nd.ni_dvp); 3696 else 3697 vput(nd.ni_dvp); 3698 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3699 return (error); 3700 goto restart; 3701 } 3702 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3703 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3704 vn_finished_write(mp); 3705 out: 3706 NDFREE(&nd, NDF_ONLY_PNBUF); 3707 vput(vp); 3708 if (nd.ni_dvp == vp) 3709 vrele(nd.ni_dvp); 3710 else 3711 vput(nd.ni_dvp); 3712 return (error); 3713 } 3714 3715 #ifdef COMPAT_43 3716 /* 3717 * Read a block of directory entries in a filesystem independent format. 3718 */ 3719 #ifndef _SYS_SYSPROTO_H_ 3720 struct ogetdirentries_args { 3721 int fd; 3722 char *buf; 3723 u_int count; 3724 long *basep; 3725 }; 3726 #endif 3727 int 3728 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3729 { 3730 long loff; 3731 int error; 3732 3733 error = kern_ogetdirentries(td, uap, &loff); 3734 if (error == 0) 3735 error = copyout(&loff, uap->basep, sizeof(long)); 3736 return (error); 3737 } 3738 3739 int 3740 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3741 long *ploff) 3742 { 3743 struct vnode *vp; 3744 struct file *fp; 3745 struct uio auio, kuio; 3746 struct iovec aiov, kiov; 3747 struct dirent *dp, *edp; 3748 cap_rights_t rights; 3749 caddr_t dirbuf; 3750 int error, eofflag, readcnt; 3751 long loff; 3752 off_t foffset; 3753 3754 /* XXX arbitrary sanity limit on `count'. */ 3755 if (uap->count > 64 * 1024) 3756 return (EINVAL); 3757 error = getvnode(td->td_proc->p_fd, uap->fd, 3758 cap_rights_init(&rights, CAP_READ), &fp); 3759 if (error != 0) 3760 return (error); 3761 if ((fp->f_flag & FREAD) == 0) { 3762 fdrop(fp, td); 3763 return (EBADF); 3764 } 3765 vp = fp->f_vnode; 3766 foffset = foffset_lock(fp, 0); 3767 unionread: 3768 if (vp->v_type != VDIR) { 3769 foffset_unlock(fp, foffset, 0); 3770 fdrop(fp, td); 3771 return (EINVAL); 3772 } 3773 aiov.iov_base = uap->buf; 3774 aiov.iov_len = uap->count; 3775 auio.uio_iov = &aiov; 3776 auio.uio_iovcnt = 1; 3777 auio.uio_rw = UIO_READ; 3778 auio.uio_segflg = UIO_USERSPACE; 3779 auio.uio_td = td; 3780 auio.uio_resid = uap->count; 3781 vn_lock(vp, LK_SHARED | LK_RETRY); 3782 loff = auio.uio_offset = foffset; 3783 #ifdef MAC 3784 error = mac_vnode_check_readdir(td->td_ucred, vp); 3785 if (error != 0) { 3786 VOP_UNLOCK(vp, 0); 3787 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3788 fdrop(fp, td); 3789 return (error); 3790 } 3791 #endif 3792 # if (BYTE_ORDER != LITTLE_ENDIAN) 3793 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3794 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3795 NULL, NULL); 3796 foffset = auio.uio_offset; 3797 } else 3798 # endif 3799 { 3800 kuio = auio; 3801 kuio.uio_iov = &kiov; 3802 kuio.uio_segflg = UIO_SYSSPACE; 3803 kiov.iov_len = uap->count; 3804 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3805 kiov.iov_base = dirbuf; 3806 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3807 NULL, NULL); 3808 foffset = kuio.uio_offset; 3809 if (error == 0) { 3810 readcnt = uap->count - kuio.uio_resid; 3811 edp = (struct dirent *)&dirbuf[readcnt]; 3812 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3813 # if (BYTE_ORDER == LITTLE_ENDIAN) 3814 /* 3815 * The expected low byte of 3816 * dp->d_namlen is our dp->d_type. 3817 * The high MBZ byte of dp->d_namlen 3818 * is our dp->d_namlen. 3819 */ 3820 dp->d_type = dp->d_namlen; 3821 dp->d_namlen = 0; 3822 # else 3823 /* 3824 * The dp->d_type is the high byte 3825 * of the expected dp->d_namlen, 3826 * so must be zero'ed. 3827 */ 3828 dp->d_type = 0; 3829 # endif 3830 if (dp->d_reclen > 0) { 3831 dp = (struct dirent *) 3832 ((char *)dp + dp->d_reclen); 3833 } else { 3834 error = EIO; 3835 break; 3836 } 3837 } 3838 if (dp >= edp) 3839 error = uiomove(dirbuf, readcnt, &auio); 3840 } 3841 free(dirbuf, M_TEMP); 3842 } 3843 if (error != 0) { 3844 VOP_UNLOCK(vp, 0); 3845 foffset_unlock(fp, foffset, 0); 3846 fdrop(fp, td); 3847 return (error); 3848 } 3849 if (uap->count == auio.uio_resid && 3850 (vp->v_vflag & VV_ROOT) && 3851 (vp->v_mount->mnt_flag & MNT_UNION)) { 3852 struct vnode *tvp = vp; 3853 vp = vp->v_mount->mnt_vnodecovered; 3854 VREF(vp); 3855 fp->f_vnode = vp; 3856 fp->f_data = vp; 3857 foffset = 0; 3858 vput(tvp); 3859 goto unionread; 3860 } 3861 VOP_UNLOCK(vp, 0); 3862 foffset_unlock(fp, foffset, 0); 3863 fdrop(fp, td); 3864 td->td_retval[0] = uap->count - auio.uio_resid; 3865 if (error == 0) 3866 *ploff = loff; 3867 return (error); 3868 } 3869 #endif /* COMPAT_43 */ 3870 3871 /* 3872 * Read a block of directory entries in a filesystem independent format. 3873 */ 3874 #ifndef _SYS_SYSPROTO_H_ 3875 struct getdirentries_args { 3876 int fd; 3877 char *buf; 3878 u_int count; 3879 long *basep; 3880 }; 3881 #endif 3882 int 3883 sys_getdirentries(td, uap) 3884 struct thread *td; 3885 register struct getdirentries_args /* { 3886 int fd; 3887 char *buf; 3888 u_int count; 3889 long *basep; 3890 } */ *uap; 3891 { 3892 long base; 3893 int error; 3894 3895 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3896 NULL, UIO_USERSPACE); 3897 if (error != 0) 3898 return (error); 3899 if (uap->basep != NULL) 3900 error = copyout(&base, uap->basep, sizeof(long)); 3901 return (error); 3902 } 3903 3904 int 3905 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3906 long *basep, ssize_t *residp, enum uio_seg bufseg) 3907 { 3908 struct vnode *vp; 3909 struct file *fp; 3910 struct uio auio; 3911 struct iovec aiov; 3912 cap_rights_t rights; 3913 long loff; 3914 int error, eofflag; 3915 off_t foffset; 3916 3917 AUDIT_ARG_FD(fd); 3918 if (count > IOSIZE_MAX) 3919 return (EINVAL); 3920 auio.uio_resid = count; 3921 error = getvnode(td->td_proc->p_fd, fd, 3922 cap_rights_init(&rights, CAP_READ), &fp); 3923 if (error != 0) 3924 return (error); 3925 if ((fp->f_flag & FREAD) == 0) { 3926 fdrop(fp, td); 3927 return (EBADF); 3928 } 3929 vp = fp->f_vnode; 3930 foffset = foffset_lock(fp, 0); 3931 unionread: 3932 if (vp->v_type != VDIR) { 3933 error = EINVAL; 3934 goto fail; 3935 } 3936 aiov.iov_base = buf; 3937 aiov.iov_len = count; 3938 auio.uio_iov = &aiov; 3939 auio.uio_iovcnt = 1; 3940 auio.uio_rw = UIO_READ; 3941 auio.uio_segflg = bufseg; 3942 auio.uio_td = td; 3943 vn_lock(vp, LK_SHARED | LK_RETRY); 3944 AUDIT_ARG_VNODE1(vp); 3945 loff = auio.uio_offset = foffset; 3946 #ifdef MAC 3947 error = mac_vnode_check_readdir(td->td_ucred, vp); 3948 if (error == 0) 3949 #endif 3950 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3951 NULL); 3952 foffset = auio.uio_offset; 3953 if (error != 0) { 3954 VOP_UNLOCK(vp, 0); 3955 goto fail; 3956 } 3957 if (count == auio.uio_resid && 3958 (vp->v_vflag & VV_ROOT) && 3959 (vp->v_mount->mnt_flag & MNT_UNION)) { 3960 struct vnode *tvp = vp; 3961 3962 vp = vp->v_mount->mnt_vnodecovered; 3963 VREF(vp); 3964 fp->f_vnode = vp; 3965 fp->f_data = vp; 3966 foffset = 0; 3967 vput(tvp); 3968 goto unionread; 3969 } 3970 VOP_UNLOCK(vp, 0); 3971 *basep = loff; 3972 if (residp != NULL) 3973 *residp = auio.uio_resid; 3974 td->td_retval[0] = count - auio.uio_resid; 3975 fail: 3976 foffset_unlock(fp, foffset, 0); 3977 fdrop(fp, td); 3978 return (error); 3979 } 3980 3981 #ifndef _SYS_SYSPROTO_H_ 3982 struct getdents_args { 3983 int fd; 3984 char *buf; 3985 size_t count; 3986 }; 3987 #endif 3988 int 3989 sys_getdents(td, uap) 3990 struct thread *td; 3991 register struct getdents_args /* { 3992 int fd; 3993 char *buf; 3994 u_int count; 3995 } */ *uap; 3996 { 3997 struct getdirentries_args ap; 3998 3999 ap.fd = uap->fd; 4000 ap.buf = uap->buf; 4001 ap.count = uap->count; 4002 ap.basep = NULL; 4003 return (sys_getdirentries(td, &ap)); 4004 } 4005 4006 /* 4007 * Set the mode mask for creation of filesystem nodes. 4008 */ 4009 #ifndef _SYS_SYSPROTO_H_ 4010 struct umask_args { 4011 int newmask; 4012 }; 4013 #endif 4014 int 4015 sys_umask(td, uap) 4016 struct thread *td; 4017 struct umask_args /* { 4018 int newmask; 4019 } */ *uap; 4020 { 4021 register struct filedesc *fdp; 4022 4023 FILEDESC_XLOCK(td->td_proc->p_fd); 4024 fdp = td->td_proc->p_fd; 4025 td->td_retval[0] = fdp->fd_cmask; 4026 fdp->fd_cmask = uap->newmask & ALLPERMS; 4027 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4028 return (0); 4029 } 4030 4031 /* 4032 * Void all references to file by ripping underlying filesystem away from 4033 * vnode. 4034 */ 4035 #ifndef _SYS_SYSPROTO_H_ 4036 struct revoke_args { 4037 char *path; 4038 }; 4039 #endif 4040 int 4041 sys_revoke(td, uap) 4042 struct thread *td; 4043 register struct revoke_args /* { 4044 char *path; 4045 } */ *uap; 4046 { 4047 struct vnode *vp; 4048 struct vattr vattr; 4049 struct nameidata nd; 4050 int error; 4051 4052 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4053 uap->path, td); 4054 if ((error = namei(&nd)) != 0) 4055 return (error); 4056 vp = nd.ni_vp; 4057 NDFREE(&nd, NDF_ONLY_PNBUF); 4058 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4059 error = EINVAL; 4060 goto out; 4061 } 4062 #ifdef MAC 4063 error = mac_vnode_check_revoke(td->td_ucred, vp); 4064 if (error != 0) 4065 goto out; 4066 #endif 4067 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4068 if (error != 0) 4069 goto out; 4070 if (td->td_ucred->cr_uid != vattr.va_uid) { 4071 error = priv_check(td, PRIV_VFS_ADMIN); 4072 if (error != 0) 4073 goto out; 4074 } 4075 if (vcount(vp) > 1) 4076 VOP_REVOKE(vp, REVOKEALL); 4077 out: 4078 vput(vp); 4079 return (error); 4080 } 4081 4082 /* 4083 * Convert a user file descriptor to a kernel file entry and check that, if it 4084 * is a capability, the correct rights are present. A reference on the file 4085 * entry is held upon returning. 4086 */ 4087 int 4088 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4089 { 4090 struct file *fp; 4091 int error; 4092 4093 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4094 if (error != 0) 4095 return (error); 4096 4097 /* 4098 * The file could be not of the vnode type, or it may be not 4099 * yet fully initialized, in which case the f_vnode pointer 4100 * may be set, but f_ops is still badfileops. E.g., 4101 * devfs_open() transiently create such situation to 4102 * facilitate csw d_fdopen(). 4103 * 4104 * Dupfdopen() handling in kern_openat() installs the 4105 * half-baked file into the process descriptor table, allowing 4106 * other thread to dereference it. Guard against the race by 4107 * checking f_ops. 4108 */ 4109 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4110 fdrop(fp, curthread); 4111 return (EINVAL); 4112 } 4113 *fpp = fp; 4114 return (0); 4115 } 4116 4117 4118 /* 4119 * Get an (NFS) file handle. 4120 */ 4121 #ifndef _SYS_SYSPROTO_H_ 4122 struct lgetfh_args { 4123 char *fname; 4124 fhandle_t *fhp; 4125 }; 4126 #endif 4127 int 4128 sys_lgetfh(td, uap) 4129 struct thread *td; 4130 register struct lgetfh_args *uap; 4131 { 4132 struct nameidata nd; 4133 fhandle_t fh; 4134 register struct vnode *vp; 4135 int error; 4136 4137 error = priv_check(td, PRIV_VFS_GETFH); 4138 if (error != 0) 4139 return (error); 4140 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4141 uap->fname, td); 4142 error = namei(&nd); 4143 if (error != 0) 4144 return (error); 4145 NDFREE(&nd, NDF_ONLY_PNBUF); 4146 vp = nd.ni_vp; 4147 bzero(&fh, sizeof(fh)); 4148 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4149 error = VOP_VPTOFH(vp, &fh.fh_fid); 4150 vput(vp); 4151 if (error == 0) 4152 error = copyout(&fh, uap->fhp, sizeof (fh)); 4153 return (error); 4154 } 4155 4156 #ifndef _SYS_SYSPROTO_H_ 4157 struct getfh_args { 4158 char *fname; 4159 fhandle_t *fhp; 4160 }; 4161 #endif 4162 int 4163 sys_getfh(td, uap) 4164 struct thread *td; 4165 register struct getfh_args *uap; 4166 { 4167 struct nameidata nd; 4168 fhandle_t fh; 4169 register struct vnode *vp; 4170 int error; 4171 4172 error = priv_check(td, PRIV_VFS_GETFH); 4173 if (error != 0) 4174 return (error); 4175 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4176 uap->fname, td); 4177 error = namei(&nd); 4178 if (error != 0) 4179 return (error); 4180 NDFREE(&nd, NDF_ONLY_PNBUF); 4181 vp = nd.ni_vp; 4182 bzero(&fh, sizeof(fh)); 4183 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4184 error = VOP_VPTOFH(vp, &fh.fh_fid); 4185 vput(vp); 4186 if (error == 0) 4187 error = copyout(&fh, uap->fhp, sizeof (fh)); 4188 return (error); 4189 } 4190 4191 /* 4192 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4193 * open descriptor. 4194 * 4195 * warning: do not remove the priv_check() call or this becomes one giant 4196 * security hole. 4197 */ 4198 #ifndef _SYS_SYSPROTO_H_ 4199 struct fhopen_args { 4200 const struct fhandle *u_fhp; 4201 int flags; 4202 }; 4203 #endif 4204 int 4205 sys_fhopen(td, uap) 4206 struct thread *td; 4207 struct fhopen_args /* { 4208 const struct fhandle *u_fhp; 4209 int flags; 4210 } */ *uap; 4211 { 4212 struct mount *mp; 4213 struct vnode *vp; 4214 struct fhandle fhp; 4215 struct file *fp; 4216 int fmode, error; 4217 int indx; 4218 4219 error = priv_check(td, PRIV_VFS_FHOPEN); 4220 if (error != 0) 4221 return (error); 4222 indx = -1; 4223 fmode = FFLAGS(uap->flags); 4224 /* why not allow a non-read/write open for our lockd? */ 4225 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4226 return (EINVAL); 4227 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4228 if (error != 0) 4229 return(error); 4230 /* find the mount point */ 4231 mp = vfs_busyfs(&fhp.fh_fsid); 4232 if (mp == NULL) 4233 return (ESTALE); 4234 /* now give me my vnode, it gets returned to me locked */ 4235 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4236 vfs_unbusy(mp); 4237 if (error != 0) 4238 return (error); 4239 4240 error = falloc_noinstall(td, &fp); 4241 if (error != 0) { 4242 vput(vp); 4243 return (error); 4244 } 4245 /* 4246 * An extra reference on `fp' has been held for us by 4247 * falloc_noinstall(). 4248 */ 4249 4250 #ifdef INVARIANTS 4251 td->td_dupfd = -1; 4252 #endif 4253 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4254 if (error != 0) { 4255 KASSERT(fp->f_ops == &badfileops, 4256 ("VOP_OPEN in fhopen() set f_ops")); 4257 KASSERT(td->td_dupfd < 0, 4258 ("fhopen() encountered fdopen()")); 4259 4260 vput(vp); 4261 goto bad; 4262 } 4263 #ifdef INVARIANTS 4264 td->td_dupfd = 0; 4265 #endif 4266 fp->f_vnode = vp; 4267 fp->f_seqcount = 1; 4268 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4269 &vnops); 4270 VOP_UNLOCK(vp, 0); 4271 if ((fmode & O_TRUNC) != 0) { 4272 error = fo_truncate(fp, 0, td->td_ucred, td); 4273 if (error != 0) 4274 goto bad; 4275 } 4276 4277 error = finstall(td, fp, &indx, fmode, NULL); 4278 bad: 4279 fdrop(fp, td); 4280 td->td_retval[0] = indx; 4281 return (error); 4282 } 4283 4284 /* 4285 * Stat an (NFS) file handle. 4286 */ 4287 #ifndef _SYS_SYSPROTO_H_ 4288 struct fhstat_args { 4289 struct fhandle *u_fhp; 4290 struct stat *sb; 4291 }; 4292 #endif 4293 int 4294 sys_fhstat(td, uap) 4295 struct thread *td; 4296 register struct fhstat_args /* { 4297 struct fhandle *u_fhp; 4298 struct stat *sb; 4299 } */ *uap; 4300 { 4301 struct stat sb; 4302 struct fhandle fh; 4303 int error; 4304 4305 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4306 if (error != 0) 4307 return (error); 4308 error = kern_fhstat(td, fh, &sb); 4309 if (error == 0) 4310 error = copyout(&sb, uap->sb, sizeof(sb)); 4311 return (error); 4312 } 4313 4314 int 4315 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4316 { 4317 struct mount *mp; 4318 struct vnode *vp; 4319 int error; 4320 4321 error = priv_check(td, PRIV_VFS_FHSTAT); 4322 if (error != 0) 4323 return (error); 4324 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4325 return (ESTALE); 4326 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4327 vfs_unbusy(mp); 4328 if (error != 0) 4329 return (error); 4330 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4331 vput(vp); 4332 return (error); 4333 } 4334 4335 /* 4336 * Implement fstatfs() for (NFS) file handles. 4337 */ 4338 #ifndef _SYS_SYSPROTO_H_ 4339 struct fhstatfs_args { 4340 struct fhandle *u_fhp; 4341 struct statfs *buf; 4342 }; 4343 #endif 4344 int 4345 sys_fhstatfs(td, uap) 4346 struct thread *td; 4347 struct fhstatfs_args /* { 4348 struct fhandle *u_fhp; 4349 struct statfs *buf; 4350 } */ *uap; 4351 { 4352 struct statfs sf; 4353 fhandle_t fh; 4354 int error; 4355 4356 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4357 if (error != 0) 4358 return (error); 4359 error = kern_fhstatfs(td, fh, &sf); 4360 if (error != 0) 4361 return (error); 4362 return (copyout(&sf, uap->buf, sizeof(sf))); 4363 } 4364 4365 int 4366 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4367 { 4368 struct statfs *sp; 4369 struct mount *mp; 4370 struct vnode *vp; 4371 int error; 4372 4373 error = priv_check(td, PRIV_VFS_FHSTATFS); 4374 if (error != 0) 4375 return (error); 4376 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4377 return (ESTALE); 4378 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4379 if (error != 0) { 4380 vfs_unbusy(mp); 4381 return (error); 4382 } 4383 vput(vp); 4384 error = prison_canseemount(td->td_ucred, mp); 4385 if (error != 0) 4386 goto out; 4387 #ifdef MAC 4388 error = mac_mount_check_stat(td->td_ucred, mp); 4389 if (error != 0) 4390 goto out; 4391 #endif 4392 /* 4393 * Set these in case the underlying filesystem fails to do so. 4394 */ 4395 sp = &mp->mnt_stat; 4396 sp->f_version = STATFS_VERSION; 4397 sp->f_namemax = NAME_MAX; 4398 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4399 error = VFS_STATFS(mp, sp); 4400 if (error == 0) 4401 *buf = *sp; 4402 out: 4403 vfs_unbusy(mp); 4404 return (error); 4405 } 4406 4407 int 4408 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4409 { 4410 struct file *fp; 4411 struct mount *mp; 4412 struct vnode *vp; 4413 cap_rights_t rights; 4414 off_t olen, ooffset; 4415 int error; 4416 4417 if (offset < 0 || len <= 0) 4418 return (EINVAL); 4419 /* Check for wrap. */ 4420 if (offset > OFF_MAX - len) 4421 return (EFBIG); 4422 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4423 if (error != 0) 4424 return (error); 4425 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4426 error = ESPIPE; 4427 goto out; 4428 } 4429 if ((fp->f_flag & FWRITE) == 0) { 4430 error = EBADF; 4431 goto out; 4432 } 4433 if (fp->f_type != DTYPE_VNODE) { 4434 error = ENODEV; 4435 goto out; 4436 } 4437 vp = fp->f_vnode; 4438 if (vp->v_type != VREG) { 4439 error = ENODEV; 4440 goto out; 4441 } 4442 4443 /* Allocating blocks may take a long time, so iterate. */ 4444 for (;;) { 4445 olen = len; 4446 ooffset = offset; 4447 4448 bwillwrite(); 4449 mp = NULL; 4450 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4451 if (error != 0) 4452 break; 4453 error = vn_lock(vp, LK_EXCLUSIVE); 4454 if (error != 0) { 4455 vn_finished_write(mp); 4456 break; 4457 } 4458 #ifdef MAC 4459 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4460 if (error == 0) 4461 #endif 4462 error = VOP_ALLOCATE(vp, &offset, &len); 4463 VOP_UNLOCK(vp, 0); 4464 vn_finished_write(mp); 4465 4466 if (olen + ooffset != offset + len) { 4467 panic("offset + len changed from %jx/%jx to %jx/%jx", 4468 ooffset, olen, offset, len); 4469 } 4470 if (error != 0 || len == 0) 4471 break; 4472 KASSERT(olen > len, ("Iteration did not make progress?")); 4473 maybe_yield(); 4474 } 4475 out: 4476 fdrop(fp, td); 4477 return (error); 4478 } 4479 4480 int 4481 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4482 { 4483 4484 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4485 uap->len); 4486 return (0); 4487 } 4488 4489 /* 4490 * Unlike madvise(2), we do not make a best effort to remember every 4491 * possible caching hint. Instead, we remember the last setting with 4492 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4493 * region of any current setting. 4494 */ 4495 int 4496 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4497 int advice) 4498 { 4499 struct fadvise_info *fa, *new; 4500 struct file *fp; 4501 struct vnode *vp; 4502 cap_rights_t rights; 4503 off_t end; 4504 int error; 4505 4506 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4507 return (EINVAL); 4508 switch (advice) { 4509 case POSIX_FADV_SEQUENTIAL: 4510 case POSIX_FADV_RANDOM: 4511 case POSIX_FADV_NOREUSE: 4512 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4513 break; 4514 case POSIX_FADV_NORMAL: 4515 case POSIX_FADV_WILLNEED: 4516 case POSIX_FADV_DONTNEED: 4517 new = NULL; 4518 break; 4519 default: 4520 return (EINVAL); 4521 } 4522 /* XXX: CAP_POSIX_FADVISE? */ 4523 error = fget(td, fd, cap_rights_init(&rights), &fp); 4524 if (error != 0) 4525 goto out; 4526 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4527 error = ESPIPE; 4528 goto out; 4529 } 4530 if (fp->f_type != DTYPE_VNODE) { 4531 error = ENODEV; 4532 goto out; 4533 } 4534 vp = fp->f_vnode; 4535 if (vp->v_type != VREG) { 4536 error = ENODEV; 4537 goto out; 4538 } 4539 if (len == 0) 4540 end = OFF_MAX; 4541 else 4542 end = offset + len - 1; 4543 switch (advice) { 4544 case POSIX_FADV_SEQUENTIAL: 4545 case POSIX_FADV_RANDOM: 4546 case POSIX_FADV_NOREUSE: 4547 /* 4548 * Try to merge any existing non-standard region with 4549 * this new region if possible, otherwise create a new 4550 * non-standard region for this request. 4551 */ 4552 mtx_pool_lock(mtxpool_sleep, fp); 4553 fa = fp->f_advice; 4554 if (fa != NULL && fa->fa_advice == advice && 4555 ((fa->fa_start <= end && fa->fa_end >= offset) || 4556 (end != OFF_MAX && fa->fa_start == end + 1) || 4557 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4558 if (offset < fa->fa_start) 4559 fa->fa_start = offset; 4560 if (end > fa->fa_end) 4561 fa->fa_end = end; 4562 } else { 4563 new->fa_advice = advice; 4564 new->fa_start = offset; 4565 new->fa_end = end; 4566 new->fa_prevstart = 0; 4567 new->fa_prevend = 0; 4568 fp->f_advice = new; 4569 new = fa; 4570 } 4571 mtx_pool_unlock(mtxpool_sleep, fp); 4572 break; 4573 case POSIX_FADV_NORMAL: 4574 /* 4575 * If a the "normal" region overlaps with an existing 4576 * non-standard region, trim or remove the 4577 * non-standard region. 4578 */ 4579 mtx_pool_lock(mtxpool_sleep, fp); 4580 fa = fp->f_advice; 4581 if (fa != NULL) { 4582 if (offset <= fa->fa_start && end >= fa->fa_end) { 4583 new = fa; 4584 fp->f_advice = NULL; 4585 } else if (offset <= fa->fa_start && 4586 end >= fa->fa_start) 4587 fa->fa_start = end + 1; 4588 else if (offset <= fa->fa_end && end >= fa->fa_end) 4589 fa->fa_end = offset - 1; 4590 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4591 /* 4592 * If the "normal" region is a middle 4593 * portion of the existing 4594 * non-standard region, just remove 4595 * the whole thing rather than picking 4596 * one side or the other to 4597 * preserve. 4598 */ 4599 new = fa; 4600 fp->f_advice = NULL; 4601 } 4602 } 4603 mtx_pool_unlock(mtxpool_sleep, fp); 4604 break; 4605 case POSIX_FADV_WILLNEED: 4606 case POSIX_FADV_DONTNEED: 4607 error = VOP_ADVISE(vp, offset, end, advice); 4608 break; 4609 } 4610 out: 4611 if (fp != NULL) 4612 fdrop(fp, td); 4613 free(new, M_FADVISE); 4614 return (error); 4615 } 4616 4617 int 4618 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4619 { 4620 4621 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4622 uap->len, uap->advice); 4623 return (0); 4624 } 4625