1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_kdtrace.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capability.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/rwlock.h> 65 #include <sys/sdt.h> 66 #include <sys/stat.h> 67 #include <sys/sx.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/dirent.h> 73 #include <sys/jail.h> 74 #include <sys/syscallsubr.h> 75 #include <sys/sysctl.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <machine/stdarg.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/uma.h> 89 90 #include <ufs/ufs/quota.h> 91 92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93 94 SDT_PROVIDER_DEFINE(vfs); 95 SDT_PROBE_DEFINE2(vfs, , stat, mode, mode, "char *", "int"); 96 SDT_PROBE_DEFINE2(vfs, , stat, reg, reg, "char *", "int"); 97 98 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 100 static int kern_chflags(struct thread *td, const char *path, 101 enum uio_seg pathseg, u_long flags); 102 static int kern_chflagsat(struct thread *td, int fd, const char *path, 103 enum uio_seg pathseg, u_long flags, int atflag); 104 static int setfflags(struct thread *td, struct vnode *, u_long); 105 static int setutimes(struct thread *td, struct vnode *, 106 const struct timespec *, int, int); 107 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 108 struct thread *td); 109 110 /* 111 * The module initialization routine for POSIX asynchronous I/O will 112 * set this to the version of AIO that it implements. (Zero means 113 * that it is not implemented.) This value is used here by pathconf() 114 * and in kern_descrip.c by fpathconf(). 115 */ 116 int async_io_version; 117 118 #ifdef DEBUG 119 static int syncprt = 0; 120 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 121 #endif 122 123 /* 124 * Sync each mounted filesystem. 125 */ 126 #ifndef _SYS_SYSPROTO_H_ 127 struct sync_args { 128 int dummy; 129 }; 130 #endif 131 /* ARGSUSED */ 132 int 133 sys_sync(td, uap) 134 struct thread *td; 135 struct sync_args *uap; 136 { 137 struct mount *mp, *nmp; 138 int save; 139 140 mtx_lock(&mountlist_mtx); 141 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 142 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 143 nmp = TAILQ_NEXT(mp, mnt_list); 144 continue; 145 } 146 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 147 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 148 save = curthread_pflags_set(TDP_SYNCIO); 149 vfs_msync(mp, MNT_NOWAIT); 150 VFS_SYNC(mp, MNT_NOWAIT); 151 curthread_pflags_restore(save); 152 vn_finished_write(mp); 153 } 154 mtx_lock(&mountlist_mtx); 155 nmp = TAILQ_NEXT(mp, mnt_list); 156 vfs_unbusy(mp); 157 } 158 mtx_unlock(&mountlist_mtx); 159 return (0); 160 } 161 162 /* 163 * Change filesystem quotas. 164 */ 165 #ifndef _SYS_SYSPROTO_H_ 166 struct quotactl_args { 167 char *path; 168 int cmd; 169 int uid; 170 caddr_t arg; 171 }; 172 #endif 173 int 174 sys_quotactl(td, uap) 175 struct thread *td; 176 register struct quotactl_args /* { 177 char *path; 178 int cmd; 179 int uid; 180 caddr_t arg; 181 } */ *uap; 182 { 183 struct mount *mp; 184 struct nameidata nd; 185 int error; 186 187 AUDIT_ARG_CMD(uap->cmd); 188 AUDIT_ARG_UID(uap->uid); 189 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 190 return (EPERM); 191 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 192 uap->path, td); 193 if ((error = namei(&nd)) != 0) 194 return (error); 195 NDFREE(&nd, NDF_ONLY_PNBUF); 196 mp = nd.ni_vp->v_mount; 197 vfs_ref(mp); 198 vput(nd.ni_vp); 199 error = vfs_busy(mp, 0); 200 vfs_rel(mp); 201 if (error != 0) 202 return (error); 203 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 204 205 /* 206 * Since quota on operation typically needs to open quota 207 * file, the Q_QUOTAON handler needs to unbusy the mount point 208 * before calling into namei. Otherwise, unmount might be 209 * started between two vfs_busy() invocations (first is our, 210 * second is from mount point cross-walk code in lookup()), 211 * causing deadlock. 212 * 213 * Require that Q_QUOTAON handles the vfs_busy() reference on 214 * its own, always returning with ubusied mount point. 215 */ 216 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 217 vfs_unbusy(mp); 218 return (error); 219 } 220 221 /* 222 * Used by statfs conversion routines to scale the block size up if 223 * necessary so that all of the block counts are <= 'max_size'. Note 224 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 225 * value of 'n'. 226 */ 227 void 228 statfs_scale_blocks(struct statfs *sf, long max_size) 229 { 230 uint64_t count; 231 int shift; 232 233 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 234 235 /* 236 * Attempt to scale the block counts to give a more accurate 237 * overview to userland of the ratio of free space to used 238 * space. To do this, find the largest block count and compute 239 * a divisor that lets it fit into a signed integer <= max_size. 240 */ 241 if (sf->f_bavail < 0) 242 count = -sf->f_bavail; 243 else 244 count = sf->f_bavail; 245 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 246 if (count <= max_size) 247 return; 248 249 count >>= flsl(max_size); 250 shift = 0; 251 while (count > 0) { 252 shift++; 253 count >>=1; 254 } 255 256 sf->f_bsize <<= shift; 257 sf->f_blocks >>= shift; 258 sf->f_bfree >>= shift; 259 sf->f_bavail >>= shift; 260 } 261 262 /* 263 * Get filesystem statistics. 264 */ 265 #ifndef _SYS_SYSPROTO_H_ 266 struct statfs_args { 267 char *path; 268 struct statfs *buf; 269 }; 270 #endif 271 int 272 sys_statfs(td, uap) 273 struct thread *td; 274 register struct statfs_args /* { 275 char *path; 276 struct statfs *buf; 277 } */ *uap; 278 { 279 struct statfs sf; 280 int error; 281 282 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 283 if (error == 0) 284 error = copyout(&sf, uap->buf, sizeof(sf)); 285 return (error); 286 } 287 288 int 289 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 290 struct statfs *buf) 291 { 292 struct mount *mp; 293 struct statfs *sp, sb; 294 struct nameidata nd; 295 int error; 296 297 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 298 pathseg, path, td); 299 error = namei(&nd); 300 if (error != 0) 301 return (error); 302 mp = nd.ni_vp->v_mount; 303 vfs_ref(mp); 304 NDFREE(&nd, NDF_ONLY_PNBUF); 305 vput(nd.ni_vp); 306 error = vfs_busy(mp, 0); 307 vfs_rel(mp); 308 if (error != 0) 309 return (error); 310 #ifdef MAC 311 error = mac_mount_check_stat(td->td_ucred, mp); 312 if (error != 0) 313 goto out; 314 #endif 315 /* 316 * Set these in case the underlying filesystem fails to do so. 317 */ 318 sp = &mp->mnt_stat; 319 sp->f_version = STATFS_VERSION; 320 sp->f_namemax = NAME_MAX; 321 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 322 error = VFS_STATFS(mp, sp); 323 if (error != 0) 324 goto out; 325 if (priv_check(td, PRIV_VFS_GENERATION)) { 326 bcopy(sp, &sb, sizeof(sb)); 327 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 328 prison_enforce_statfs(td->td_ucred, mp, &sb); 329 sp = &sb; 330 } 331 *buf = *sp; 332 out: 333 vfs_unbusy(mp); 334 return (error); 335 } 336 337 /* 338 * Get filesystem statistics. 339 */ 340 #ifndef _SYS_SYSPROTO_H_ 341 struct fstatfs_args { 342 int fd; 343 struct statfs *buf; 344 }; 345 #endif 346 int 347 sys_fstatfs(td, uap) 348 struct thread *td; 349 register struct fstatfs_args /* { 350 int fd; 351 struct statfs *buf; 352 } */ *uap; 353 { 354 struct statfs sf; 355 int error; 356 357 error = kern_fstatfs(td, uap->fd, &sf); 358 if (error == 0) 359 error = copyout(&sf, uap->buf, sizeof(sf)); 360 return (error); 361 } 362 363 int 364 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 365 { 366 struct file *fp; 367 struct mount *mp; 368 struct statfs *sp, sb; 369 struct vnode *vp; 370 cap_rights_t rights; 371 int error; 372 373 AUDIT_ARG_FD(fd); 374 error = getvnode(td->td_proc->p_fd, fd, 375 cap_rights_init(&rights, CAP_FSTATFS), &fp); 376 if (error != 0) 377 return (error); 378 vp = fp->f_vnode; 379 vn_lock(vp, LK_SHARED | LK_RETRY); 380 #ifdef AUDIT 381 AUDIT_ARG_VNODE1(vp); 382 #endif 383 mp = vp->v_mount; 384 if (mp) 385 vfs_ref(mp); 386 VOP_UNLOCK(vp, 0); 387 fdrop(fp, td); 388 if (mp == NULL) { 389 error = EBADF; 390 goto out; 391 } 392 error = vfs_busy(mp, 0); 393 vfs_rel(mp); 394 if (error != 0) 395 return (error); 396 #ifdef MAC 397 error = mac_mount_check_stat(td->td_ucred, mp); 398 if (error != 0) 399 goto out; 400 #endif 401 /* 402 * Set these in case the underlying filesystem fails to do so. 403 */ 404 sp = &mp->mnt_stat; 405 sp->f_version = STATFS_VERSION; 406 sp->f_namemax = NAME_MAX; 407 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 408 error = VFS_STATFS(mp, sp); 409 if (error != 0) 410 goto out; 411 if (priv_check(td, PRIV_VFS_GENERATION)) { 412 bcopy(sp, &sb, sizeof(sb)); 413 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 414 prison_enforce_statfs(td->td_ucred, mp, &sb); 415 sp = &sb; 416 } 417 *buf = *sp; 418 out: 419 if (mp) 420 vfs_unbusy(mp); 421 return (error); 422 } 423 424 /* 425 * Get statistics on all filesystems. 426 */ 427 #ifndef _SYS_SYSPROTO_H_ 428 struct getfsstat_args { 429 struct statfs *buf; 430 long bufsize; 431 int flags; 432 }; 433 #endif 434 int 435 sys_getfsstat(td, uap) 436 struct thread *td; 437 register struct getfsstat_args /* { 438 struct statfs *buf; 439 long bufsize; 440 int flags; 441 } */ *uap; 442 { 443 444 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 445 uap->flags)); 446 } 447 448 /* 449 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 450 * The caller is responsible for freeing memory which will be allocated 451 * in '*buf'. 452 */ 453 int 454 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 455 enum uio_seg bufseg, int flags) 456 { 457 struct mount *mp, *nmp; 458 struct statfs *sfsp, *sp, sb; 459 size_t count, maxcount; 460 int error; 461 462 maxcount = bufsize / sizeof(struct statfs); 463 if (bufsize == 0) 464 sfsp = NULL; 465 else if (bufseg == UIO_USERSPACE) 466 sfsp = *buf; 467 else /* if (bufseg == UIO_SYSSPACE) */ { 468 count = 0; 469 mtx_lock(&mountlist_mtx); 470 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 471 count++; 472 } 473 mtx_unlock(&mountlist_mtx); 474 if (maxcount > count) 475 maxcount = count; 476 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 477 M_WAITOK); 478 } 479 count = 0; 480 mtx_lock(&mountlist_mtx); 481 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 482 if (prison_canseemount(td->td_ucred, mp) != 0) { 483 nmp = TAILQ_NEXT(mp, mnt_list); 484 continue; 485 } 486 #ifdef MAC 487 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 488 nmp = TAILQ_NEXT(mp, mnt_list); 489 continue; 490 } 491 #endif 492 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 493 nmp = TAILQ_NEXT(mp, mnt_list); 494 continue; 495 } 496 if (sfsp && count < maxcount) { 497 sp = &mp->mnt_stat; 498 /* 499 * Set these in case the underlying filesystem 500 * fails to do so. 501 */ 502 sp->f_version = STATFS_VERSION; 503 sp->f_namemax = NAME_MAX; 504 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 505 /* 506 * If MNT_NOWAIT or MNT_LAZY is specified, do not 507 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 508 * overrides MNT_WAIT. 509 */ 510 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 511 (flags & MNT_WAIT)) && 512 (error = VFS_STATFS(mp, sp))) { 513 mtx_lock(&mountlist_mtx); 514 nmp = TAILQ_NEXT(mp, mnt_list); 515 vfs_unbusy(mp); 516 continue; 517 } 518 if (priv_check(td, PRIV_VFS_GENERATION)) { 519 bcopy(sp, &sb, sizeof(sb)); 520 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 521 prison_enforce_statfs(td->td_ucred, mp, &sb); 522 sp = &sb; 523 } 524 if (bufseg == UIO_SYSSPACE) 525 bcopy(sp, sfsp, sizeof(*sp)); 526 else /* if (bufseg == UIO_USERSPACE) */ { 527 error = copyout(sp, sfsp, sizeof(*sp)); 528 if (error != 0) { 529 vfs_unbusy(mp); 530 return (error); 531 } 532 } 533 sfsp++; 534 } 535 count++; 536 mtx_lock(&mountlist_mtx); 537 nmp = TAILQ_NEXT(mp, mnt_list); 538 vfs_unbusy(mp); 539 } 540 mtx_unlock(&mountlist_mtx); 541 if (sfsp && count > maxcount) 542 td->td_retval[0] = maxcount; 543 else 544 td->td_retval[0] = count; 545 return (0); 546 } 547 548 #ifdef COMPAT_FREEBSD4 549 /* 550 * Get old format filesystem statistics. 551 */ 552 static void cvtstatfs(struct statfs *, struct ostatfs *); 553 554 #ifndef _SYS_SYSPROTO_H_ 555 struct freebsd4_statfs_args { 556 char *path; 557 struct ostatfs *buf; 558 }; 559 #endif 560 int 561 freebsd4_statfs(td, uap) 562 struct thread *td; 563 struct freebsd4_statfs_args /* { 564 char *path; 565 struct ostatfs *buf; 566 } */ *uap; 567 { 568 struct ostatfs osb; 569 struct statfs sf; 570 int error; 571 572 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 573 if (error != 0) 574 return (error); 575 cvtstatfs(&sf, &osb); 576 return (copyout(&osb, uap->buf, sizeof(osb))); 577 } 578 579 /* 580 * Get filesystem statistics. 581 */ 582 #ifndef _SYS_SYSPROTO_H_ 583 struct freebsd4_fstatfs_args { 584 int fd; 585 struct ostatfs *buf; 586 }; 587 #endif 588 int 589 freebsd4_fstatfs(td, uap) 590 struct thread *td; 591 struct freebsd4_fstatfs_args /* { 592 int fd; 593 struct ostatfs *buf; 594 } */ *uap; 595 { 596 struct ostatfs osb; 597 struct statfs sf; 598 int error; 599 600 error = kern_fstatfs(td, uap->fd, &sf); 601 if (error != 0) 602 return (error); 603 cvtstatfs(&sf, &osb); 604 return (copyout(&osb, uap->buf, sizeof(osb))); 605 } 606 607 /* 608 * Get statistics on all filesystems. 609 */ 610 #ifndef _SYS_SYSPROTO_H_ 611 struct freebsd4_getfsstat_args { 612 struct ostatfs *buf; 613 long bufsize; 614 int flags; 615 }; 616 #endif 617 int 618 freebsd4_getfsstat(td, uap) 619 struct thread *td; 620 register struct freebsd4_getfsstat_args /* { 621 struct ostatfs *buf; 622 long bufsize; 623 int flags; 624 } */ *uap; 625 { 626 struct statfs *buf, *sp; 627 struct ostatfs osb; 628 size_t count, size; 629 int error; 630 631 count = uap->bufsize / sizeof(struct ostatfs); 632 size = count * sizeof(struct statfs); 633 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 634 if (size > 0) { 635 count = td->td_retval[0]; 636 sp = buf; 637 while (count > 0 && error == 0) { 638 cvtstatfs(sp, &osb); 639 error = copyout(&osb, uap->buf, sizeof(osb)); 640 sp++; 641 uap->buf++; 642 count--; 643 } 644 free(buf, M_TEMP); 645 } 646 return (error); 647 } 648 649 /* 650 * Implement fstatfs() for (NFS) file handles. 651 */ 652 #ifndef _SYS_SYSPROTO_H_ 653 struct freebsd4_fhstatfs_args { 654 struct fhandle *u_fhp; 655 struct ostatfs *buf; 656 }; 657 #endif 658 int 659 freebsd4_fhstatfs(td, uap) 660 struct thread *td; 661 struct freebsd4_fhstatfs_args /* { 662 struct fhandle *u_fhp; 663 struct ostatfs *buf; 664 } */ *uap; 665 { 666 struct ostatfs osb; 667 struct statfs sf; 668 fhandle_t fh; 669 int error; 670 671 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 672 if (error != 0) 673 return (error); 674 error = kern_fhstatfs(td, fh, &sf); 675 if (error != 0) 676 return (error); 677 cvtstatfs(&sf, &osb); 678 return (copyout(&osb, uap->buf, sizeof(osb))); 679 } 680 681 /* 682 * Convert a new format statfs structure to an old format statfs structure. 683 */ 684 static void 685 cvtstatfs(nsp, osp) 686 struct statfs *nsp; 687 struct ostatfs *osp; 688 { 689 690 statfs_scale_blocks(nsp, LONG_MAX); 691 bzero(osp, sizeof(*osp)); 692 osp->f_bsize = nsp->f_bsize; 693 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 694 osp->f_blocks = nsp->f_blocks; 695 osp->f_bfree = nsp->f_bfree; 696 osp->f_bavail = nsp->f_bavail; 697 osp->f_files = MIN(nsp->f_files, LONG_MAX); 698 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 699 osp->f_owner = nsp->f_owner; 700 osp->f_type = nsp->f_type; 701 osp->f_flags = nsp->f_flags; 702 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 703 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 704 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 705 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 706 strlcpy(osp->f_fstypename, nsp->f_fstypename, 707 MIN(MFSNAMELEN, OMFSNAMELEN)); 708 strlcpy(osp->f_mntonname, nsp->f_mntonname, 709 MIN(MNAMELEN, OMNAMELEN)); 710 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 711 MIN(MNAMELEN, OMNAMELEN)); 712 osp->f_fsid = nsp->f_fsid; 713 } 714 #endif /* COMPAT_FREEBSD4 */ 715 716 /* 717 * Change current working directory to a given file descriptor. 718 */ 719 #ifndef _SYS_SYSPROTO_H_ 720 struct fchdir_args { 721 int fd; 722 }; 723 #endif 724 int 725 sys_fchdir(td, uap) 726 struct thread *td; 727 struct fchdir_args /* { 728 int fd; 729 } */ *uap; 730 { 731 register struct filedesc *fdp = td->td_proc->p_fd; 732 struct vnode *vp, *tdp, *vpold; 733 struct mount *mp; 734 struct file *fp; 735 cap_rights_t rights; 736 int error; 737 738 AUDIT_ARG_FD(uap->fd); 739 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 740 &fp); 741 if (error != 0) 742 return (error); 743 vp = fp->f_vnode; 744 VREF(vp); 745 fdrop(fp, td); 746 vn_lock(vp, LK_SHARED | LK_RETRY); 747 AUDIT_ARG_VNODE1(vp); 748 error = change_dir(vp, td); 749 while (!error && (mp = vp->v_mountedhere) != NULL) { 750 if (vfs_busy(mp, 0)) 751 continue; 752 error = VFS_ROOT(mp, LK_SHARED, &tdp); 753 vfs_unbusy(mp); 754 if (error != 0) 755 break; 756 vput(vp); 757 vp = tdp; 758 } 759 if (error != 0) { 760 vput(vp); 761 return (error); 762 } 763 VOP_UNLOCK(vp, 0); 764 FILEDESC_XLOCK(fdp); 765 vpold = fdp->fd_cdir; 766 fdp->fd_cdir = vp; 767 FILEDESC_XUNLOCK(fdp); 768 vrele(vpold); 769 return (0); 770 } 771 772 /* 773 * Change current working directory (``.''). 774 */ 775 #ifndef _SYS_SYSPROTO_H_ 776 struct chdir_args { 777 char *path; 778 }; 779 #endif 780 int 781 sys_chdir(td, uap) 782 struct thread *td; 783 struct chdir_args /* { 784 char *path; 785 } */ *uap; 786 { 787 788 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 789 } 790 791 int 792 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 793 { 794 register struct filedesc *fdp = td->td_proc->p_fd; 795 struct nameidata nd; 796 struct vnode *vp; 797 int error; 798 799 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 800 pathseg, path, td); 801 if ((error = namei(&nd)) != 0) 802 return (error); 803 if ((error = change_dir(nd.ni_vp, td)) != 0) { 804 vput(nd.ni_vp); 805 NDFREE(&nd, NDF_ONLY_PNBUF); 806 return (error); 807 } 808 VOP_UNLOCK(nd.ni_vp, 0); 809 NDFREE(&nd, NDF_ONLY_PNBUF); 810 FILEDESC_XLOCK(fdp); 811 vp = fdp->fd_cdir; 812 fdp->fd_cdir = nd.ni_vp; 813 FILEDESC_XUNLOCK(fdp); 814 vrele(vp); 815 return (0); 816 } 817 818 /* 819 * Helper function for raised chroot(2) security function: Refuse if 820 * any filedescriptors are open directories. 821 */ 822 static int 823 chroot_refuse_vdir_fds(fdp) 824 struct filedesc *fdp; 825 { 826 struct vnode *vp; 827 struct file *fp; 828 int fd; 829 830 FILEDESC_LOCK_ASSERT(fdp); 831 832 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 833 fp = fget_locked(fdp, fd); 834 if (fp == NULL) 835 continue; 836 if (fp->f_type == DTYPE_VNODE) { 837 vp = fp->f_vnode; 838 if (vp->v_type == VDIR) 839 return (EPERM); 840 } 841 } 842 return (0); 843 } 844 845 /* 846 * This sysctl determines if we will allow a process to chroot(2) if it 847 * has a directory open: 848 * 0: disallowed for all processes. 849 * 1: allowed for processes that were not already chroot(2)'ed. 850 * 2: allowed for all processes. 851 */ 852 853 static int chroot_allow_open_directories = 1; 854 855 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 856 &chroot_allow_open_directories, 0, 857 "Allow a process to chroot(2) if it has a directory open"); 858 859 /* 860 * Change notion of root (``/'') directory. 861 */ 862 #ifndef _SYS_SYSPROTO_H_ 863 struct chroot_args { 864 char *path; 865 }; 866 #endif 867 int 868 sys_chroot(td, uap) 869 struct thread *td; 870 struct chroot_args /* { 871 char *path; 872 } */ *uap; 873 { 874 struct nameidata nd; 875 int error; 876 877 error = priv_check(td, PRIV_VFS_CHROOT); 878 if (error != 0) 879 return (error); 880 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 881 UIO_USERSPACE, uap->path, td); 882 error = namei(&nd); 883 if (error != 0) 884 goto error; 885 error = change_dir(nd.ni_vp, td); 886 if (error != 0) 887 goto e_vunlock; 888 #ifdef MAC 889 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 890 if (error != 0) 891 goto e_vunlock; 892 #endif 893 VOP_UNLOCK(nd.ni_vp, 0); 894 error = change_root(nd.ni_vp, td); 895 vrele(nd.ni_vp); 896 NDFREE(&nd, NDF_ONLY_PNBUF); 897 return (error); 898 e_vunlock: 899 vput(nd.ni_vp); 900 error: 901 NDFREE(&nd, NDF_ONLY_PNBUF); 902 return (error); 903 } 904 905 /* 906 * Common routine for chroot and chdir. Callers must provide a locked vnode 907 * instance. 908 */ 909 int 910 change_dir(vp, td) 911 struct vnode *vp; 912 struct thread *td; 913 { 914 #ifdef MAC 915 int error; 916 #endif 917 918 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 919 if (vp->v_type != VDIR) 920 return (ENOTDIR); 921 #ifdef MAC 922 error = mac_vnode_check_chdir(td->td_ucred, vp); 923 if (error != 0) 924 return (error); 925 #endif 926 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 927 } 928 929 /* 930 * Common routine for kern_chroot() and jail_attach(). The caller is 931 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 932 * authorize this operation. 933 */ 934 int 935 change_root(vp, td) 936 struct vnode *vp; 937 struct thread *td; 938 { 939 struct filedesc *fdp; 940 struct vnode *oldvp; 941 int error; 942 943 fdp = td->td_proc->p_fd; 944 FILEDESC_XLOCK(fdp); 945 if (chroot_allow_open_directories == 0 || 946 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 947 error = chroot_refuse_vdir_fds(fdp); 948 if (error != 0) { 949 FILEDESC_XUNLOCK(fdp); 950 return (error); 951 } 952 } 953 oldvp = fdp->fd_rdir; 954 fdp->fd_rdir = vp; 955 VREF(fdp->fd_rdir); 956 if (!fdp->fd_jdir) { 957 fdp->fd_jdir = vp; 958 VREF(fdp->fd_jdir); 959 } 960 FILEDESC_XUNLOCK(fdp); 961 vrele(oldvp); 962 return (0); 963 } 964 965 static __inline void 966 flags_to_rights(int flags, cap_rights_t *rightsp) 967 { 968 969 if (flags & O_EXEC) { 970 cap_rights_set(rightsp, CAP_FEXECVE); 971 } else { 972 switch ((flags & O_ACCMODE)) { 973 case O_RDONLY: 974 cap_rights_set(rightsp, CAP_READ); 975 break; 976 case O_RDWR: 977 cap_rights_set(rightsp, CAP_READ); 978 /* FALLTHROUGH */ 979 case O_WRONLY: 980 cap_rights_set(rightsp, CAP_WRITE); 981 if (!(flags & (O_APPEND | O_TRUNC))) 982 cap_rights_set(rightsp, CAP_SEEK); 983 break; 984 } 985 } 986 987 if (flags & O_CREAT) 988 cap_rights_set(rightsp, CAP_CREATE); 989 990 if (flags & O_TRUNC) 991 cap_rights_set(rightsp, CAP_FTRUNCATE); 992 993 if (flags & (O_SYNC | O_FSYNC)) 994 cap_rights_set(rightsp, CAP_FSYNC); 995 996 if (flags & (O_EXLOCK | O_SHLOCK)) 997 cap_rights_set(rightsp, CAP_FLOCK); 998 } 999 1000 /* 1001 * Check permissions, allocate an open file structure, and call the device 1002 * open routine if any. 1003 */ 1004 #ifndef _SYS_SYSPROTO_H_ 1005 struct open_args { 1006 char *path; 1007 int flags; 1008 int mode; 1009 }; 1010 #endif 1011 int 1012 sys_open(td, uap) 1013 struct thread *td; 1014 register struct open_args /* { 1015 char *path; 1016 int flags; 1017 int mode; 1018 } */ *uap; 1019 { 1020 1021 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1022 } 1023 1024 #ifndef _SYS_SYSPROTO_H_ 1025 struct openat_args { 1026 int fd; 1027 char *path; 1028 int flag; 1029 int mode; 1030 }; 1031 #endif 1032 int 1033 sys_openat(struct thread *td, struct openat_args *uap) 1034 { 1035 1036 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1037 uap->mode)); 1038 } 1039 1040 int 1041 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1042 int mode) 1043 { 1044 1045 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1046 } 1047 1048 int 1049 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1050 int flags, int mode) 1051 { 1052 struct proc *p = td->td_proc; 1053 struct filedesc *fdp = p->p_fd; 1054 struct file *fp; 1055 struct vnode *vp; 1056 struct nameidata nd; 1057 cap_rights_t rights; 1058 int cmode, error, indx; 1059 1060 indx = -1; 1061 1062 AUDIT_ARG_FFLAGS(flags); 1063 AUDIT_ARG_MODE(mode); 1064 /* XXX: audit dirfd */ 1065 cap_rights_init(&rights, CAP_LOOKUP); 1066 flags_to_rights(flags, &rights); 1067 /* 1068 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1069 * may be specified. 1070 */ 1071 if (flags & O_EXEC) { 1072 if (flags & O_ACCMODE) 1073 return (EINVAL); 1074 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1075 return (EINVAL); 1076 } else { 1077 flags = FFLAGS(flags); 1078 } 1079 1080 /* 1081 * Allocate the file descriptor, but don't install a descriptor yet. 1082 */ 1083 error = falloc_noinstall(td, &fp); 1084 if (error != 0) 1085 return (error); 1086 /* 1087 * An extra reference on `fp' has been held for us by 1088 * falloc_noinstall(). 1089 */ 1090 /* Set the flags early so the finit in devfs can pick them up. */ 1091 fp->f_flag = flags & FMASK; 1092 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1093 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1094 &rights, td); 1095 td->td_dupfd = -1; /* XXX check for fdopen */ 1096 error = vn_open(&nd, &flags, cmode, fp); 1097 if (error != 0) { 1098 /* 1099 * If the vn_open replaced the method vector, something 1100 * wonderous happened deep below and we just pass it up 1101 * pretending we know what we do. 1102 */ 1103 if (error == ENXIO && fp->f_ops != &badfileops) 1104 goto success; 1105 1106 /* 1107 * Handle special fdopen() case. bleh. 1108 * 1109 * Don't do this for relative (capability) lookups; we don't 1110 * understand exactly what would happen, and we don't think 1111 * that it ever should. 1112 */ 1113 if (nd.ni_strictrelative == 0 && 1114 (error == ENODEV || error == ENXIO) && 1115 td->td_dupfd >= 0) { 1116 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1117 &indx); 1118 if (error == 0) 1119 goto success; 1120 } 1121 1122 goto bad; 1123 } 1124 td->td_dupfd = 0; 1125 NDFREE(&nd, NDF_ONLY_PNBUF); 1126 vp = nd.ni_vp; 1127 1128 /* 1129 * Store the vnode, for any f_type. Typically, the vnode use 1130 * count is decremented by direct call to vn_closefile() for 1131 * files that switched type in the cdevsw fdopen() method. 1132 */ 1133 fp->f_vnode = vp; 1134 /* 1135 * If the file wasn't claimed by devfs bind it to the normal 1136 * vnode operations here. 1137 */ 1138 if (fp->f_ops == &badfileops) { 1139 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1140 fp->f_seqcount = 1; 1141 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1142 DTYPE_VNODE, vp, &vnops); 1143 } 1144 1145 VOP_UNLOCK(vp, 0); 1146 if (flags & O_TRUNC) { 1147 error = fo_truncate(fp, 0, td->td_ucred, td); 1148 if (error != 0) 1149 goto bad; 1150 } 1151 success: 1152 /* 1153 * If we haven't already installed the FD (for dupfdopen), do so now. 1154 */ 1155 if (indx == -1) { 1156 struct filecaps *fcaps; 1157 1158 #ifdef CAPABILITIES 1159 if (nd.ni_strictrelative == 1) 1160 fcaps = &nd.ni_filecaps; 1161 else 1162 #endif 1163 fcaps = NULL; 1164 error = finstall(td, fp, &indx, flags, fcaps); 1165 /* On success finstall() consumes fcaps. */ 1166 if (error != 0) { 1167 filecaps_free(&nd.ni_filecaps); 1168 goto bad; 1169 } 1170 } else { 1171 filecaps_free(&nd.ni_filecaps); 1172 } 1173 1174 /* 1175 * Release our private reference, leaving the one associated with 1176 * the descriptor table intact. 1177 */ 1178 fdrop(fp, td); 1179 td->td_retval[0] = indx; 1180 return (0); 1181 bad: 1182 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1183 fdrop(fp, td); 1184 return (error); 1185 } 1186 1187 #ifdef COMPAT_43 1188 /* 1189 * Create a file. 1190 */ 1191 #ifndef _SYS_SYSPROTO_H_ 1192 struct ocreat_args { 1193 char *path; 1194 int mode; 1195 }; 1196 #endif 1197 int 1198 ocreat(td, uap) 1199 struct thread *td; 1200 register struct ocreat_args /* { 1201 char *path; 1202 int mode; 1203 } */ *uap; 1204 { 1205 1206 return (kern_open(td, uap->path, UIO_USERSPACE, 1207 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1208 } 1209 #endif /* COMPAT_43 */ 1210 1211 /* 1212 * Create a special file. 1213 */ 1214 #ifndef _SYS_SYSPROTO_H_ 1215 struct mknod_args { 1216 char *path; 1217 int mode; 1218 int dev; 1219 }; 1220 #endif 1221 int 1222 sys_mknod(td, uap) 1223 struct thread *td; 1224 register struct mknod_args /* { 1225 char *path; 1226 int mode; 1227 int dev; 1228 } */ *uap; 1229 { 1230 1231 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1232 } 1233 1234 #ifndef _SYS_SYSPROTO_H_ 1235 struct mknodat_args { 1236 int fd; 1237 char *path; 1238 mode_t mode; 1239 dev_t dev; 1240 }; 1241 #endif 1242 int 1243 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1244 { 1245 1246 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1247 uap->dev)); 1248 } 1249 1250 int 1251 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1252 int dev) 1253 { 1254 1255 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1256 } 1257 1258 int 1259 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1260 int mode, int dev) 1261 { 1262 struct vnode *vp; 1263 struct mount *mp; 1264 struct vattr vattr; 1265 struct nameidata nd; 1266 cap_rights_t rights; 1267 int error, whiteout = 0; 1268 1269 AUDIT_ARG_MODE(mode); 1270 AUDIT_ARG_DEV(dev); 1271 switch (mode & S_IFMT) { 1272 case S_IFCHR: 1273 case S_IFBLK: 1274 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1275 break; 1276 case S_IFMT: 1277 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1278 break; 1279 case S_IFWHT: 1280 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1281 break; 1282 case S_IFIFO: 1283 if (dev == 0) 1284 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1285 /* FALLTHROUGH */ 1286 default: 1287 error = EINVAL; 1288 break; 1289 } 1290 if (error != 0) 1291 return (error); 1292 restart: 1293 bwillwrite(); 1294 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1295 pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td); 1296 if ((error = namei(&nd)) != 0) 1297 return (error); 1298 vp = nd.ni_vp; 1299 if (vp != NULL) { 1300 NDFREE(&nd, NDF_ONLY_PNBUF); 1301 if (vp == nd.ni_dvp) 1302 vrele(nd.ni_dvp); 1303 else 1304 vput(nd.ni_dvp); 1305 vrele(vp); 1306 return (EEXIST); 1307 } else { 1308 VATTR_NULL(&vattr); 1309 vattr.va_mode = (mode & ALLPERMS) & 1310 ~td->td_proc->p_fd->fd_cmask; 1311 vattr.va_rdev = dev; 1312 whiteout = 0; 1313 1314 switch (mode & S_IFMT) { 1315 case S_IFMT: /* used by badsect to flag bad sectors */ 1316 vattr.va_type = VBAD; 1317 break; 1318 case S_IFCHR: 1319 vattr.va_type = VCHR; 1320 break; 1321 case S_IFBLK: 1322 vattr.va_type = VBLK; 1323 break; 1324 case S_IFWHT: 1325 whiteout = 1; 1326 break; 1327 default: 1328 panic("kern_mknod: invalid mode"); 1329 } 1330 } 1331 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1332 NDFREE(&nd, NDF_ONLY_PNBUF); 1333 vput(nd.ni_dvp); 1334 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1335 return (error); 1336 goto restart; 1337 } 1338 #ifdef MAC 1339 if (error == 0 && !whiteout) 1340 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1341 &nd.ni_cnd, &vattr); 1342 #endif 1343 if (error == 0) { 1344 if (whiteout) 1345 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1346 else { 1347 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1348 &nd.ni_cnd, &vattr); 1349 if (error == 0) 1350 vput(nd.ni_vp); 1351 } 1352 } 1353 NDFREE(&nd, NDF_ONLY_PNBUF); 1354 vput(nd.ni_dvp); 1355 vn_finished_write(mp); 1356 return (error); 1357 } 1358 1359 /* 1360 * Create a named pipe. 1361 */ 1362 #ifndef _SYS_SYSPROTO_H_ 1363 struct mkfifo_args { 1364 char *path; 1365 int mode; 1366 }; 1367 #endif 1368 int 1369 sys_mkfifo(td, uap) 1370 struct thread *td; 1371 register struct mkfifo_args /* { 1372 char *path; 1373 int mode; 1374 } */ *uap; 1375 { 1376 1377 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1378 } 1379 1380 #ifndef _SYS_SYSPROTO_H_ 1381 struct mkfifoat_args { 1382 int fd; 1383 char *path; 1384 mode_t mode; 1385 }; 1386 #endif 1387 int 1388 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1389 { 1390 1391 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1392 uap->mode)); 1393 } 1394 1395 int 1396 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1397 { 1398 1399 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1400 } 1401 1402 int 1403 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1404 int mode) 1405 { 1406 struct mount *mp; 1407 struct vattr vattr; 1408 struct nameidata nd; 1409 cap_rights_t rights; 1410 int error; 1411 1412 AUDIT_ARG_MODE(mode); 1413 restart: 1414 bwillwrite(); 1415 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1416 pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td); 1417 if ((error = namei(&nd)) != 0) 1418 return (error); 1419 if (nd.ni_vp != NULL) { 1420 NDFREE(&nd, NDF_ONLY_PNBUF); 1421 if (nd.ni_vp == nd.ni_dvp) 1422 vrele(nd.ni_dvp); 1423 else 1424 vput(nd.ni_dvp); 1425 vrele(nd.ni_vp); 1426 return (EEXIST); 1427 } 1428 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1429 NDFREE(&nd, NDF_ONLY_PNBUF); 1430 vput(nd.ni_dvp); 1431 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1432 return (error); 1433 goto restart; 1434 } 1435 VATTR_NULL(&vattr); 1436 vattr.va_type = VFIFO; 1437 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1438 #ifdef MAC 1439 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1440 &vattr); 1441 if (error != 0) 1442 goto out; 1443 #endif 1444 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1445 if (error == 0) 1446 vput(nd.ni_vp); 1447 #ifdef MAC 1448 out: 1449 #endif 1450 vput(nd.ni_dvp); 1451 vn_finished_write(mp); 1452 NDFREE(&nd, NDF_ONLY_PNBUF); 1453 return (error); 1454 } 1455 1456 /* 1457 * Make a hard file link. 1458 */ 1459 #ifndef _SYS_SYSPROTO_H_ 1460 struct link_args { 1461 char *path; 1462 char *link; 1463 }; 1464 #endif 1465 int 1466 sys_link(td, uap) 1467 struct thread *td; 1468 register struct link_args /* { 1469 char *path; 1470 char *link; 1471 } */ *uap; 1472 { 1473 1474 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1475 } 1476 1477 #ifndef _SYS_SYSPROTO_H_ 1478 struct linkat_args { 1479 int fd1; 1480 char *path1; 1481 int fd2; 1482 char *path2; 1483 int flag; 1484 }; 1485 #endif 1486 int 1487 sys_linkat(struct thread *td, struct linkat_args *uap) 1488 { 1489 int flag; 1490 1491 flag = uap->flag; 1492 if (flag & ~AT_SYMLINK_FOLLOW) 1493 return (EINVAL); 1494 1495 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1496 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1497 } 1498 1499 int hardlink_check_uid = 0; 1500 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1501 &hardlink_check_uid, 0, 1502 "Unprivileged processes cannot create hard links to files owned by other " 1503 "users"); 1504 static int hardlink_check_gid = 0; 1505 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1506 &hardlink_check_gid, 0, 1507 "Unprivileged processes cannot create hard links to files owned by other " 1508 "groups"); 1509 1510 static int 1511 can_hardlink(struct vnode *vp, struct ucred *cred) 1512 { 1513 struct vattr va; 1514 int error; 1515 1516 if (!hardlink_check_uid && !hardlink_check_gid) 1517 return (0); 1518 1519 error = VOP_GETATTR(vp, &va, cred); 1520 if (error != 0) 1521 return (error); 1522 1523 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1524 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1525 if (error != 0) 1526 return (error); 1527 } 1528 1529 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1530 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1531 if (error != 0) 1532 return (error); 1533 } 1534 1535 return (0); 1536 } 1537 1538 int 1539 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1540 { 1541 1542 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1543 } 1544 1545 int 1546 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1547 enum uio_seg segflg, int follow) 1548 { 1549 struct vnode *vp; 1550 struct mount *mp; 1551 struct nameidata nd; 1552 cap_rights_t rights; 1553 int error; 1554 1555 bwillwrite(); 1556 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1557 1558 if ((error = namei(&nd)) != 0) 1559 return (error); 1560 NDFREE(&nd, NDF_ONLY_PNBUF); 1561 vp = nd.ni_vp; 1562 if (vp->v_type == VDIR) { 1563 vrele(vp); 1564 return (EPERM); /* POSIX */ 1565 } 1566 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 1567 vrele(vp); 1568 return (error); 1569 } 1570 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, 1571 segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td); 1572 if ((error = namei(&nd)) == 0) { 1573 if (nd.ni_vp != NULL) { 1574 if (nd.ni_dvp == nd.ni_vp) 1575 vrele(nd.ni_dvp); 1576 else 1577 vput(nd.ni_dvp); 1578 vrele(nd.ni_vp); 1579 error = EEXIST; 1580 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) 1581 == 0) { 1582 error = can_hardlink(vp, td->td_ucred); 1583 if (error == 0) 1584 #ifdef MAC 1585 error = mac_vnode_check_link(td->td_ucred, 1586 nd.ni_dvp, vp, &nd.ni_cnd); 1587 if (error == 0) 1588 #endif 1589 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1590 VOP_UNLOCK(vp, 0); 1591 vput(nd.ni_dvp); 1592 } 1593 NDFREE(&nd, NDF_ONLY_PNBUF); 1594 } 1595 vrele(vp); 1596 vn_finished_write(mp); 1597 return (error); 1598 } 1599 1600 /* 1601 * Make a symbolic link. 1602 */ 1603 #ifndef _SYS_SYSPROTO_H_ 1604 struct symlink_args { 1605 char *path; 1606 char *link; 1607 }; 1608 #endif 1609 int 1610 sys_symlink(td, uap) 1611 struct thread *td; 1612 register struct symlink_args /* { 1613 char *path; 1614 char *link; 1615 } */ *uap; 1616 { 1617 1618 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1619 } 1620 1621 #ifndef _SYS_SYSPROTO_H_ 1622 struct symlinkat_args { 1623 char *path; 1624 int fd; 1625 char *path2; 1626 }; 1627 #endif 1628 int 1629 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1630 { 1631 1632 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1633 UIO_USERSPACE)); 1634 } 1635 1636 int 1637 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1638 { 1639 1640 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1641 } 1642 1643 int 1644 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1645 enum uio_seg segflg) 1646 { 1647 struct mount *mp; 1648 struct vattr vattr; 1649 char *syspath; 1650 struct nameidata nd; 1651 int error; 1652 cap_rights_t rights; 1653 1654 if (segflg == UIO_SYSSPACE) { 1655 syspath = path1; 1656 } else { 1657 syspath = uma_zalloc(namei_zone, M_WAITOK); 1658 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1659 goto out; 1660 } 1661 AUDIT_ARG_TEXT(syspath); 1662 restart: 1663 bwillwrite(); 1664 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1665 segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td); 1666 if ((error = namei(&nd)) != 0) 1667 goto out; 1668 if (nd.ni_vp) { 1669 NDFREE(&nd, NDF_ONLY_PNBUF); 1670 if (nd.ni_vp == nd.ni_dvp) 1671 vrele(nd.ni_dvp); 1672 else 1673 vput(nd.ni_dvp); 1674 vrele(nd.ni_vp); 1675 error = EEXIST; 1676 goto out; 1677 } 1678 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1679 NDFREE(&nd, NDF_ONLY_PNBUF); 1680 vput(nd.ni_dvp); 1681 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1682 goto out; 1683 goto restart; 1684 } 1685 VATTR_NULL(&vattr); 1686 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1687 #ifdef MAC 1688 vattr.va_type = VLNK; 1689 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1690 &vattr); 1691 if (error != 0) 1692 goto out2; 1693 #endif 1694 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1695 if (error == 0) 1696 vput(nd.ni_vp); 1697 #ifdef MAC 1698 out2: 1699 #endif 1700 NDFREE(&nd, NDF_ONLY_PNBUF); 1701 vput(nd.ni_dvp); 1702 vn_finished_write(mp); 1703 out: 1704 if (segflg != UIO_SYSSPACE) 1705 uma_zfree(namei_zone, syspath); 1706 return (error); 1707 } 1708 1709 /* 1710 * Delete a whiteout from the filesystem. 1711 */ 1712 int 1713 sys_undelete(td, uap) 1714 struct thread *td; 1715 register struct undelete_args /* { 1716 char *path; 1717 } */ *uap; 1718 { 1719 struct mount *mp; 1720 struct nameidata nd; 1721 int error; 1722 1723 restart: 1724 bwillwrite(); 1725 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1726 UIO_USERSPACE, uap->path, td); 1727 error = namei(&nd); 1728 if (error != 0) 1729 return (error); 1730 1731 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1732 NDFREE(&nd, NDF_ONLY_PNBUF); 1733 if (nd.ni_vp == nd.ni_dvp) 1734 vrele(nd.ni_dvp); 1735 else 1736 vput(nd.ni_dvp); 1737 if (nd.ni_vp) 1738 vrele(nd.ni_vp); 1739 return (EEXIST); 1740 } 1741 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1742 NDFREE(&nd, NDF_ONLY_PNBUF); 1743 vput(nd.ni_dvp); 1744 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1745 return (error); 1746 goto restart; 1747 } 1748 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1749 NDFREE(&nd, NDF_ONLY_PNBUF); 1750 vput(nd.ni_dvp); 1751 vn_finished_write(mp); 1752 return (error); 1753 } 1754 1755 /* 1756 * Delete a name from the filesystem. 1757 */ 1758 #ifndef _SYS_SYSPROTO_H_ 1759 struct unlink_args { 1760 char *path; 1761 }; 1762 #endif 1763 int 1764 sys_unlink(td, uap) 1765 struct thread *td; 1766 struct unlink_args /* { 1767 char *path; 1768 } */ *uap; 1769 { 1770 1771 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1772 } 1773 1774 #ifndef _SYS_SYSPROTO_H_ 1775 struct unlinkat_args { 1776 int fd; 1777 char *path; 1778 int flag; 1779 }; 1780 #endif 1781 int 1782 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1783 { 1784 int flag = uap->flag; 1785 int fd = uap->fd; 1786 char *path = uap->path; 1787 1788 if (flag & ~AT_REMOVEDIR) 1789 return (EINVAL); 1790 1791 if (flag & AT_REMOVEDIR) 1792 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1793 else 1794 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1795 } 1796 1797 int 1798 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1799 { 1800 1801 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1802 } 1803 1804 int 1805 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1806 ino_t oldinum) 1807 { 1808 struct mount *mp; 1809 struct vnode *vp; 1810 struct nameidata nd; 1811 struct stat sb; 1812 cap_rights_t rights; 1813 int error; 1814 1815 restart: 1816 bwillwrite(); 1817 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1818 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1819 if ((error = namei(&nd)) != 0) 1820 return (error == EINVAL ? EPERM : error); 1821 vp = nd.ni_vp; 1822 if (vp->v_type == VDIR && oldinum == 0) { 1823 error = EPERM; /* POSIX */ 1824 } else if (oldinum != 0 && 1825 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1826 sb.st_ino != oldinum) { 1827 error = EIDRM; /* Identifier removed */ 1828 } else { 1829 /* 1830 * The root of a mounted filesystem cannot be deleted. 1831 * 1832 * XXX: can this only be a VDIR case? 1833 */ 1834 if (vp->v_vflag & VV_ROOT) 1835 error = EBUSY; 1836 } 1837 if (error == 0) { 1838 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1839 NDFREE(&nd, NDF_ONLY_PNBUF); 1840 vput(nd.ni_dvp); 1841 if (vp == nd.ni_dvp) 1842 vrele(vp); 1843 else 1844 vput(vp); 1845 if ((error = vn_start_write(NULL, &mp, 1846 V_XSLEEP | PCATCH)) != 0) 1847 return (error); 1848 goto restart; 1849 } 1850 #ifdef MAC 1851 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1852 &nd.ni_cnd); 1853 if (error != 0) 1854 goto out; 1855 #endif 1856 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1857 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1858 #ifdef MAC 1859 out: 1860 #endif 1861 vn_finished_write(mp); 1862 } 1863 NDFREE(&nd, NDF_ONLY_PNBUF); 1864 vput(nd.ni_dvp); 1865 if (vp == nd.ni_dvp) 1866 vrele(vp); 1867 else 1868 vput(vp); 1869 return (error); 1870 } 1871 1872 /* 1873 * Reposition read/write file offset. 1874 */ 1875 #ifndef _SYS_SYSPROTO_H_ 1876 struct lseek_args { 1877 int fd; 1878 int pad; 1879 off_t offset; 1880 int whence; 1881 }; 1882 #endif 1883 int 1884 sys_lseek(td, uap) 1885 struct thread *td; 1886 register struct lseek_args /* { 1887 int fd; 1888 int pad; 1889 off_t offset; 1890 int whence; 1891 } */ *uap; 1892 { 1893 struct file *fp; 1894 cap_rights_t rights; 1895 int error; 1896 1897 AUDIT_ARG_FD(uap->fd); 1898 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1899 if (error != 0) 1900 return (error); 1901 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1902 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1903 fdrop(fp, td); 1904 return (error); 1905 } 1906 1907 #if defined(COMPAT_43) 1908 /* 1909 * Reposition read/write file offset. 1910 */ 1911 #ifndef _SYS_SYSPROTO_H_ 1912 struct olseek_args { 1913 int fd; 1914 long offset; 1915 int whence; 1916 }; 1917 #endif 1918 int 1919 olseek(td, uap) 1920 struct thread *td; 1921 register struct olseek_args /* { 1922 int fd; 1923 long offset; 1924 int whence; 1925 } */ *uap; 1926 { 1927 struct lseek_args /* { 1928 int fd; 1929 int pad; 1930 off_t offset; 1931 int whence; 1932 } */ nuap; 1933 1934 nuap.fd = uap->fd; 1935 nuap.offset = uap->offset; 1936 nuap.whence = uap->whence; 1937 return (sys_lseek(td, &nuap)); 1938 } 1939 #endif /* COMPAT_43 */ 1940 1941 /* Version with the 'pad' argument */ 1942 int 1943 freebsd6_lseek(td, uap) 1944 struct thread *td; 1945 register struct freebsd6_lseek_args *uap; 1946 { 1947 struct lseek_args ouap; 1948 1949 ouap.fd = uap->fd; 1950 ouap.offset = uap->offset; 1951 ouap.whence = uap->whence; 1952 return (sys_lseek(td, &ouap)); 1953 } 1954 1955 /* 1956 * Check access permissions using passed credentials. 1957 */ 1958 static int 1959 vn_access(vp, user_flags, cred, td) 1960 struct vnode *vp; 1961 int user_flags; 1962 struct ucred *cred; 1963 struct thread *td; 1964 { 1965 accmode_t accmode; 1966 int error; 1967 1968 /* Flags == 0 means only check for existence. */ 1969 error = 0; 1970 if (user_flags) { 1971 accmode = 0; 1972 if (user_flags & R_OK) 1973 accmode |= VREAD; 1974 if (user_flags & W_OK) 1975 accmode |= VWRITE; 1976 if (user_flags & X_OK) 1977 accmode |= VEXEC; 1978 #ifdef MAC 1979 error = mac_vnode_check_access(cred, vp, accmode); 1980 if (error != 0) 1981 return (error); 1982 #endif 1983 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1984 error = VOP_ACCESS(vp, accmode, cred, td); 1985 } 1986 return (error); 1987 } 1988 1989 /* 1990 * Check access permissions using "real" credentials. 1991 */ 1992 #ifndef _SYS_SYSPROTO_H_ 1993 struct access_args { 1994 char *path; 1995 int amode; 1996 }; 1997 #endif 1998 int 1999 sys_access(td, uap) 2000 struct thread *td; 2001 register struct access_args /* { 2002 char *path; 2003 int amode; 2004 } */ *uap; 2005 { 2006 2007 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2008 } 2009 2010 #ifndef _SYS_SYSPROTO_H_ 2011 struct faccessat_args { 2012 int dirfd; 2013 char *path; 2014 int amode; 2015 int flag; 2016 } 2017 #endif 2018 int 2019 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2020 { 2021 2022 if (uap->flag & ~AT_EACCESS) 2023 return (EINVAL); 2024 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2025 uap->amode)); 2026 } 2027 2028 int 2029 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2030 { 2031 2032 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2033 } 2034 2035 int 2036 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2037 int flag, int amode) 2038 { 2039 struct ucred *cred, *tmpcred; 2040 struct vnode *vp; 2041 struct nameidata nd; 2042 cap_rights_t rights; 2043 int error; 2044 2045 /* 2046 * Create and modify a temporary credential instead of one that 2047 * is potentially shared. 2048 */ 2049 if (!(flag & AT_EACCESS)) { 2050 cred = td->td_ucred; 2051 tmpcred = crdup(cred); 2052 tmpcred->cr_uid = cred->cr_ruid; 2053 tmpcred->cr_groups[0] = cred->cr_rgid; 2054 td->td_ucred = tmpcred; 2055 } else 2056 cred = tmpcred = td->td_ucred; 2057 AUDIT_ARG_VALUE(amode); 2058 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2059 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2060 td); 2061 if ((error = namei(&nd)) != 0) 2062 goto out1; 2063 vp = nd.ni_vp; 2064 2065 error = vn_access(vp, amode, tmpcred, td); 2066 NDFREE(&nd, NDF_ONLY_PNBUF); 2067 vput(vp); 2068 out1: 2069 if (!(flag & AT_EACCESS)) { 2070 td->td_ucred = cred; 2071 crfree(tmpcred); 2072 } 2073 return (error); 2074 } 2075 2076 /* 2077 * Check access permissions using "effective" credentials. 2078 */ 2079 #ifndef _SYS_SYSPROTO_H_ 2080 struct eaccess_args { 2081 char *path; 2082 int amode; 2083 }; 2084 #endif 2085 int 2086 sys_eaccess(td, uap) 2087 struct thread *td; 2088 register struct eaccess_args /* { 2089 char *path; 2090 int amode; 2091 } */ *uap; 2092 { 2093 2094 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2095 } 2096 2097 int 2098 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2099 { 2100 2101 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2102 } 2103 2104 #if defined(COMPAT_43) 2105 /* 2106 * Get file status; this version follows links. 2107 */ 2108 #ifndef _SYS_SYSPROTO_H_ 2109 struct ostat_args { 2110 char *path; 2111 struct ostat *ub; 2112 }; 2113 #endif 2114 int 2115 ostat(td, uap) 2116 struct thread *td; 2117 register struct ostat_args /* { 2118 char *path; 2119 struct ostat *ub; 2120 } */ *uap; 2121 { 2122 struct stat sb; 2123 struct ostat osb; 2124 int error; 2125 2126 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2127 if (error != 0) 2128 return (error); 2129 cvtstat(&sb, &osb); 2130 return (copyout(&osb, uap->ub, sizeof (osb))); 2131 } 2132 2133 /* 2134 * Get file status; this version does not follow links. 2135 */ 2136 #ifndef _SYS_SYSPROTO_H_ 2137 struct olstat_args { 2138 char *path; 2139 struct ostat *ub; 2140 }; 2141 #endif 2142 int 2143 olstat(td, uap) 2144 struct thread *td; 2145 register struct olstat_args /* { 2146 char *path; 2147 struct ostat *ub; 2148 } */ *uap; 2149 { 2150 struct stat sb; 2151 struct ostat osb; 2152 int error; 2153 2154 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2155 if (error != 0) 2156 return (error); 2157 cvtstat(&sb, &osb); 2158 return (copyout(&osb, uap->ub, sizeof (osb))); 2159 } 2160 2161 /* 2162 * Convert from an old to a new stat structure. 2163 */ 2164 void 2165 cvtstat(st, ost) 2166 struct stat *st; 2167 struct ostat *ost; 2168 { 2169 2170 ost->st_dev = st->st_dev; 2171 ost->st_ino = st->st_ino; 2172 ost->st_mode = st->st_mode; 2173 ost->st_nlink = st->st_nlink; 2174 ost->st_uid = st->st_uid; 2175 ost->st_gid = st->st_gid; 2176 ost->st_rdev = st->st_rdev; 2177 if (st->st_size < (quad_t)1 << 32) 2178 ost->st_size = st->st_size; 2179 else 2180 ost->st_size = -2; 2181 ost->st_atim = st->st_atim; 2182 ost->st_mtim = st->st_mtim; 2183 ost->st_ctim = st->st_ctim; 2184 ost->st_blksize = st->st_blksize; 2185 ost->st_blocks = st->st_blocks; 2186 ost->st_flags = st->st_flags; 2187 ost->st_gen = st->st_gen; 2188 } 2189 #endif /* COMPAT_43 */ 2190 2191 /* 2192 * Get file status; this version follows links. 2193 */ 2194 #ifndef _SYS_SYSPROTO_H_ 2195 struct stat_args { 2196 char *path; 2197 struct stat *ub; 2198 }; 2199 #endif 2200 int 2201 sys_stat(td, uap) 2202 struct thread *td; 2203 register struct stat_args /* { 2204 char *path; 2205 struct stat *ub; 2206 } */ *uap; 2207 { 2208 struct stat sb; 2209 int error; 2210 2211 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2212 if (error == 0) 2213 error = copyout(&sb, uap->ub, sizeof (sb)); 2214 return (error); 2215 } 2216 2217 #ifndef _SYS_SYSPROTO_H_ 2218 struct fstatat_args { 2219 int fd; 2220 char *path; 2221 struct stat *buf; 2222 int flag; 2223 } 2224 #endif 2225 int 2226 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2227 { 2228 struct stat sb; 2229 int error; 2230 2231 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2232 UIO_USERSPACE, &sb); 2233 if (error == 0) 2234 error = copyout(&sb, uap->buf, sizeof (sb)); 2235 return (error); 2236 } 2237 2238 int 2239 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2240 { 2241 2242 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2243 } 2244 2245 int 2246 kern_statat(struct thread *td, int flag, int fd, char *path, 2247 enum uio_seg pathseg, struct stat *sbp) 2248 { 2249 2250 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2251 } 2252 2253 int 2254 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2255 enum uio_seg pathseg, struct stat *sbp, 2256 void (*hook)(struct vnode *vp, struct stat *sbp)) 2257 { 2258 struct nameidata nd; 2259 struct stat sb; 2260 cap_rights_t rights; 2261 int error; 2262 2263 if (flag & ~AT_SYMLINK_NOFOLLOW) 2264 return (EINVAL); 2265 2266 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2267 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2268 cap_rights_init(&rights, CAP_FSTAT), td); 2269 2270 if ((error = namei(&nd)) != 0) 2271 return (error); 2272 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2273 if (error == 0) { 2274 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2275 if (S_ISREG(sb.st_mode)) 2276 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2277 if (__predict_false(hook != NULL)) 2278 hook(nd.ni_vp, &sb); 2279 } 2280 NDFREE(&nd, NDF_ONLY_PNBUF); 2281 vput(nd.ni_vp); 2282 if (error != 0) 2283 return (error); 2284 *sbp = sb; 2285 #ifdef KTRACE 2286 if (KTRPOINT(td, KTR_STRUCT)) 2287 ktrstat(&sb); 2288 #endif 2289 return (0); 2290 } 2291 2292 /* 2293 * Get file status; this version does not follow links. 2294 */ 2295 #ifndef _SYS_SYSPROTO_H_ 2296 struct lstat_args { 2297 char *path; 2298 struct stat *ub; 2299 }; 2300 #endif 2301 int 2302 sys_lstat(td, uap) 2303 struct thread *td; 2304 register struct lstat_args /* { 2305 char *path; 2306 struct stat *ub; 2307 } */ *uap; 2308 { 2309 struct stat sb; 2310 int error; 2311 2312 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2313 if (error == 0) 2314 error = copyout(&sb, uap->ub, sizeof (sb)); 2315 return (error); 2316 } 2317 2318 int 2319 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2320 { 2321 2322 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2323 sbp)); 2324 } 2325 2326 /* 2327 * Implementation of the NetBSD [l]stat() functions. 2328 */ 2329 void 2330 cvtnstat(sb, nsb) 2331 struct stat *sb; 2332 struct nstat *nsb; 2333 { 2334 2335 bzero(nsb, sizeof *nsb); 2336 nsb->st_dev = sb->st_dev; 2337 nsb->st_ino = sb->st_ino; 2338 nsb->st_mode = sb->st_mode; 2339 nsb->st_nlink = sb->st_nlink; 2340 nsb->st_uid = sb->st_uid; 2341 nsb->st_gid = sb->st_gid; 2342 nsb->st_rdev = sb->st_rdev; 2343 nsb->st_atim = sb->st_atim; 2344 nsb->st_mtim = sb->st_mtim; 2345 nsb->st_ctim = sb->st_ctim; 2346 nsb->st_size = sb->st_size; 2347 nsb->st_blocks = sb->st_blocks; 2348 nsb->st_blksize = sb->st_blksize; 2349 nsb->st_flags = sb->st_flags; 2350 nsb->st_gen = sb->st_gen; 2351 nsb->st_birthtim = sb->st_birthtim; 2352 } 2353 2354 #ifndef _SYS_SYSPROTO_H_ 2355 struct nstat_args { 2356 char *path; 2357 struct nstat *ub; 2358 }; 2359 #endif 2360 int 2361 sys_nstat(td, uap) 2362 struct thread *td; 2363 register struct nstat_args /* { 2364 char *path; 2365 struct nstat *ub; 2366 } */ *uap; 2367 { 2368 struct stat sb; 2369 struct nstat nsb; 2370 int error; 2371 2372 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2373 if (error != 0) 2374 return (error); 2375 cvtnstat(&sb, &nsb); 2376 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2377 } 2378 2379 /* 2380 * NetBSD lstat. Get file status; this version does not follow links. 2381 */ 2382 #ifndef _SYS_SYSPROTO_H_ 2383 struct lstat_args { 2384 char *path; 2385 struct stat *ub; 2386 }; 2387 #endif 2388 int 2389 sys_nlstat(td, uap) 2390 struct thread *td; 2391 register struct nlstat_args /* { 2392 char *path; 2393 struct nstat *ub; 2394 } */ *uap; 2395 { 2396 struct stat sb; 2397 struct nstat nsb; 2398 int error; 2399 2400 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2401 if (error != 0) 2402 return (error); 2403 cvtnstat(&sb, &nsb); 2404 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2405 } 2406 2407 /* 2408 * Get configurable pathname variables. 2409 */ 2410 #ifndef _SYS_SYSPROTO_H_ 2411 struct pathconf_args { 2412 char *path; 2413 int name; 2414 }; 2415 #endif 2416 int 2417 sys_pathconf(td, uap) 2418 struct thread *td; 2419 register struct pathconf_args /* { 2420 char *path; 2421 int name; 2422 } */ *uap; 2423 { 2424 2425 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2426 } 2427 2428 #ifndef _SYS_SYSPROTO_H_ 2429 struct lpathconf_args { 2430 char *path; 2431 int name; 2432 }; 2433 #endif 2434 int 2435 sys_lpathconf(td, uap) 2436 struct thread *td; 2437 register struct lpathconf_args /* { 2438 char *path; 2439 int name; 2440 } */ *uap; 2441 { 2442 2443 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2444 NOFOLLOW)); 2445 } 2446 2447 int 2448 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2449 u_long flags) 2450 { 2451 struct nameidata nd; 2452 int error; 2453 2454 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2455 pathseg, path, td); 2456 if ((error = namei(&nd)) != 0) 2457 return (error); 2458 NDFREE(&nd, NDF_ONLY_PNBUF); 2459 2460 /* If asynchronous I/O is available, it works for all files. */ 2461 if (name == _PC_ASYNC_IO) 2462 td->td_retval[0] = async_io_version; 2463 else 2464 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2465 vput(nd.ni_vp); 2466 return (error); 2467 } 2468 2469 /* 2470 * Return target name of a symbolic link. 2471 */ 2472 #ifndef _SYS_SYSPROTO_H_ 2473 struct readlink_args { 2474 char *path; 2475 char *buf; 2476 size_t count; 2477 }; 2478 #endif 2479 int 2480 sys_readlink(td, uap) 2481 struct thread *td; 2482 register struct readlink_args /* { 2483 char *path; 2484 char *buf; 2485 size_t count; 2486 } */ *uap; 2487 { 2488 2489 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2490 UIO_USERSPACE, uap->count)); 2491 } 2492 #ifndef _SYS_SYSPROTO_H_ 2493 struct readlinkat_args { 2494 int fd; 2495 char *path; 2496 char *buf; 2497 size_t bufsize; 2498 }; 2499 #endif 2500 int 2501 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2502 { 2503 2504 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2505 uap->buf, UIO_USERSPACE, uap->bufsize)); 2506 } 2507 2508 int 2509 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2510 enum uio_seg bufseg, size_t count) 2511 { 2512 2513 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2514 count)); 2515 } 2516 2517 int 2518 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2519 char *buf, enum uio_seg bufseg, size_t count) 2520 { 2521 struct vnode *vp; 2522 struct iovec aiov; 2523 struct uio auio; 2524 struct nameidata nd; 2525 int error; 2526 2527 if (count > IOSIZE_MAX) 2528 return (EINVAL); 2529 2530 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2531 pathseg, path, fd, td); 2532 2533 if ((error = namei(&nd)) != 0) 2534 return (error); 2535 NDFREE(&nd, NDF_ONLY_PNBUF); 2536 vp = nd.ni_vp; 2537 #ifdef MAC 2538 error = mac_vnode_check_readlink(td->td_ucred, vp); 2539 if (error != 0) { 2540 vput(vp); 2541 return (error); 2542 } 2543 #endif 2544 if (vp->v_type != VLNK) 2545 error = EINVAL; 2546 else { 2547 aiov.iov_base = buf; 2548 aiov.iov_len = count; 2549 auio.uio_iov = &aiov; 2550 auio.uio_iovcnt = 1; 2551 auio.uio_offset = 0; 2552 auio.uio_rw = UIO_READ; 2553 auio.uio_segflg = bufseg; 2554 auio.uio_td = td; 2555 auio.uio_resid = count; 2556 error = VOP_READLINK(vp, &auio, td->td_ucred); 2557 } 2558 vput(vp); 2559 td->td_retval[0] = count - auio.uio_resid; 2560 return (error); 2561 } 2562 2563 /* 2564 * Common implementation code for chflags() and fchflags(). 2565 */ 2566 static int 2567 setfflags(td, vp, flags) 2568 struct thread *td; 2569 struct vnode *vp; 2570 u_long flags; 2571 { 2572 struct mount *mp; 2573 struct vattr vattr; 2574 int error; 2575 2576 /* We can't support the value matching VNOVAL. */ 2577 if (flags == VNOVAL) 2578 return (EOPNOTSUPP); 2579 2580 /* 2581 * Prevent non-root users from setting flags on devices. When 2582 * a device is reused, users can retain ownership of the device 2583 * if they are allowed to set flags and programs assume that 2584 * chown can't fail when done as root. 2585 */ 2586 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2587 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2588 if (error != 0) 2589 return (error); 2590 } 2591 2592 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2593 return (error); 2594 VATTR_NULL(&vattr); 2595 vattr.va_flags = flags; 2596 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2597 #ifdef MAC 2598 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2599 if (error == 0) 2600 #endif 2601 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2602 VOP_UNLOCK(vp, 0); 2603 vn_finished_write(mp); 2604 return (error); 2605 } 2606 2607 /* 2608 * Change flags of a file given a path name. 2609 */ 2610 #ifndef _SYS_SYSPROTO_H_ 2611 struct chflags_args { 2612 const char *path; 2613 u_long flags; 2614 }; 2615 #endif 2616 int 2617 sys_chflags(td, uap) 2618 struct thread *td; 2619 register struct chflags_args /* { 2620 const char *path; 2621 u_long flags; 2622 } */ *uap; 2623 { 2624 2625 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2626 } 2627 2628 #ifndef _SYS_SYSPROTO_H_ 2629 struct chflagsat_args { 2630 int fd; 2631 const char *path; 2632 u_long flags; 2633 int atflag; 2634 } 2635 #endif 2636 int 2637 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2638 { 2639 int fd = uap->fd; 2640 const char *path = uap->path; 2641 u_long flags = uap->flags; 2642 int atflag = uap->atflag; 2643 2644 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2645 return (EINVAL); 2646 2647 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2648 } 2649 2650 static int 2651 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2652 u_long flags) 2653 { 2654 2655 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2656 } 2657 2658 /* 2659 * Same as chflags() but doesn't follow symlinks. 2660 */ 2661 int 2662 sys_lchflags(td, uap) 2663 struct thread *td; 2664 register struct lchflags_args /* { 2665 const char *path; 2666 u_long flags; 2667 } */ *uap; 2668 { 2669 2670 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2671 uap->flags, AT_SYMLINK_NOFOLLOW)); 2672 } 2673 2674 static int 2675 kern_chflagsat(struct thread *td, int fd, const char *path, 2676 enum uio_seg pathseg, u_long flags, int atflag) 2677 { 2678 struct nameidata nd; 2679 cap_rights_t rights; 2680 int error, follow; 2681 2682 AUDIT_ARG_FFLAGS(flags); 2683 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2684 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2685 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2686 if ((error = namei(&nd)) != 0) 2687 return (error); 2688 NDFREE(&nd, NDF_ONLY_PNBUF); 2689 error = setfflags(td, nd.ni_vp, flags); 2690 vrele(nd.ni_vp); 2691 return (error); 2692 } 2693 2694 /* 2695 * Change flags of a file given a file descriptor. 2696 */ 2697 #ifndef _SYS_SYSPROTO_H_ 2698 struct fchflags_args { 2699 int fd; 2700 u_long flags; 2701 }; 2702 #endif 2703 int 2704 sys_fchflags(td, uap) 2705 struct thread *td; 2706 register struct fchflags_args /* { 2707 int fd; 2708 u_long flags; 2709 } */ *uap; 2710 { 2711 struct file *fp; 2712 cap_rights_t rights; 2713 int error; 2714 2715 AUDIT_ARG_FD(uap->fd); 2716 AUDIT_ARG_FFLAGS(uap->flags); 2717 error = getvnode(td->td_proc->p_fd, uap->fd, 2718 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2719 if (error != 0) 2720 return (error); 2721 #ifdef AUDIT 2722 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2723 AUDIT_ARG_VNODE1(fp->f_vnode); 2724 VOP_UNLOCK(fp->f_vnode, 0); 2725 #endif 2726 error = setfflags(td, fp->f_vnode, uap->flags); 2727 fdrop(fp, td); 2728 return (error); 2729 } 2730 2731 /* 2732 * Common implementation code for chmod(), lchmod() and fchmod(). 2733 */ 2734 int 2735 setfmode(td, cred, vp, mode) 2736 struct thread *td; 2737 struct ucred *cred; 2738 struct vnode *vp; 2739 int mode; 2740 { 2741 struct mount *mp; 2742 struct vattr vattr; 2743 int error; 2744 2745 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2746 return (error); 2747 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2748 VATTR_NULL(&vattr); 2749 vattr.va_mode = mode & ALLPERMS; 2750 #ifdef MAC 2751 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2752 if (error == 0) 2753 #endif 2754 error = VOP_SETATTR(vp, &vattr, cred); 2755 VOP_UNLOCK(vp, 0); 2756 vn_finished_write(mp); 2757 return (error); 2758 } 2759 2760 /* 2761 * Change mode of a file given path name. 2762 */ 2763 #ifndef _SYS_SYSPROTO_H_ 2764 struct chmod_args { 2765 char *path; 2766 int mode; 2767 }; 2768 #endif 2769 int 2770 sys_chmod(td, uap) 2771 struct thread *td; 2772 register struct chmod_args /* { 2773 char *path; 2774 int mode; 2775 } */ *uap; 2776 { 2777 2778 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2779 } 2780 2781 #ifndef _SYS_SYSPROTO_H_ 2782 struct fchmodat_args { 2783 int dirfd; 2784 char *path; 2785 mode_t mode; 2786 int flag; 2787 } 2788 #endif 2789 int 2790 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2791 { 2792 int flag = uap->flag; 2793 int fd = uap->fd; 2794 char *path = uap->path; 2795 mode_t mode = uap->mode; 2796 2797 if (flag & ~AT_SYMLINK_NOFOLLOW) 2798 return (EINVAL); 2799 2800 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2801 } 2802 2803 int 2804 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2805 { 2806 2807 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2808 } 2809 2810 /* 2811 * Change mode of a file given path name (don't follow links.) 2812 */ 2813 #ifndef _SYS_SYSPROTO_H_ 2814 struct lchmod_args { 2815 char *path; 2816 int mode; 2817 }; 2818 #endif 2819 int 2820 sys_lchmod(td, uap) 2821 struct thread *td; 2822 register struct lchmod_args /* { 2823 char *path; 2824 int mode; 2825 } */ *uap; 2826 { 2827 2828 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2829 uap->mode, AT_SYMLINK_NOFOLLOW)); 2830 } 2831 2832 int 2833 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2834 mode_t mode, int flag) 2835 { 2836 struct nameidata nd; 2837 cap_rights_t rights; 2838 int error, follow; 2839 2840 AUDIT_ARG_MODE(mode); 2841 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2842 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2843 cap_rights_init(&rights, CAP_FCHMOD), td); 2844 if ((error = namei(&nd)) != 0) 2845 return (error); 2846 NDFREE(&nd, NDF_ONLY_PNBUF); 2847 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2848 vrele(nd.ni_vp); 2849 return (error); 2850 } 2851 2852 /* 2853 * Change mode of a file given a file descriptor. 2854 */ 2855 #ifndef _SYS_SYSPROTO_H_ 2856 struct fchmod_args { 2857 int fd; 2858 int mode; 2859 }; 2860 #endif 2861 int 2862 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2863 { 2864 struct file *fp; 2865 cap_rights_t rights; 2866 int error; 2867 2868 AUDIT_ARG_FD(uap->fd); 2869 AUDIT_ARG_MODE(uap->mode); 2870 2871 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2872 if (error != 0) 2873 return (error); 2874 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2875 fdrop(fp, td); 2876 return (error); 2877 } 2878 2879 /* 2880 * Common implementation for chown(), lchown(), and fchown() 2881 */ 2882 int 2883 setfown(td, cred, vp, uid, gid) 2884 struct thread *td; 2885 struct ucred *cred; 2886 struct vnode *vp; 2887 uid_t uid; 2888 gid_t gid; 2889 { 2890 struct mount *mp; 2891 struct vattr vattr; 2892 int error; 2893 2894 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2895 return (error); 2896 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2897 VATTR_NULL(&vattr); 2898 vattr.va_uid = uid; 2899 vattr.va_gid = gid; 2900 #ifdef MAC 2901 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2902 vattr.va_gid); 2903 if (error == 0) 2904 #endif 2905 error = VOP_SETATTR(vp, &vattr, cred); 2906 VOP_UNLOCK(vp, 0); 2907 vn_finished_write(mp); 2908 return (error); 2909 } 2910 2911 /* 2912 * Set ownership given a path name. 2913 */ 2914 #ifndef _SYS_SYSPROTO_H_ 2915 struct chown_args { 2916 char *path; 2917 int uid; 2918 int gid; 2919 }; 2920 #endif 2921 int 2922 sys_chown(td, uap) 2923 struct thread *td; 2924 register struct chown_args /* { 2925 char *path; 2926 int uid; 2927 int gid; 2928 } */ *uap; 2929 { 2930 2931 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2932 } 2933 2934 #ifndef _SYS_SYSPROTO_H_ 2935 struct fchownat_args { 2936 int fd; 2937 const char * path; 2938 uid_t uid; 2939 gid_t gid; 2940 int flag; 2941 }; 2942 #endif 2943 int 2944 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2945 { 2946 int flag; 2947 2948 flag = uap->flag; 2949 if (flag & ~AT_SYMLINK_NOFOLLOW) 2950 return (EINVAL); 2951 2952 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2953 uap->gid, uap->flag)); 2954 } 2955 2956 int 2957 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2958 int gid) 2959 { 2960 2961 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2962 } 2963 2964 int 2965 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2966 int uid, int gid, int flag) 2967 { 2968 struct nameidata nd; 2969 cap_rights_t rights; 2970 int error, follow; 2971 2972 AUDIT_ARG_OWNER(uid, gid); 2973 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2974 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2975 cap_rights_init(&rights, CAP_FCHOWN), td); 2976 2977 if ((error = namei(&nd)) != 0) 2978 return (error); 2979 NDFREE(&nd, NDF_ONLY_PNBUF); 2980 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2981 vrele(nd.ni_vp); 2982 return (error); 2983 } 2984 2985 /* 2986 * Set ownership given a path name, do not cross symlinks. 2987 */ 2988 #ifndef _SYS_SYSPROTO_H_ 2989 struct lchown_args { 2990 char *path; 2991 int uid; 2992 int gid; 2993 }; 2994 #endif 2995 int 2996 sys_lchown(td, uap) 2997 struct thread *td; 2998 register struct lchown_args /* { 2999 char *path; 3000 int uid; 3001 int gid; 3002 } */ *uap; 3003 { 3004 3005 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3006 } 3007 3008 int 3009 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3010 int gid) 3011 { 3012 3013 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3014 AT_SYMLINK_NOFOLLOW)); 3015 } 3016 3017 /* 3018 * Set ownership given a file descriptor. 3019 */ 3020 #ifndef _SYS_SYSPROTO_H_ 3021 struct fchown_args { 3022 int fd; 3023 int uid; 3024 int gid; 3025 }; 3026 #endif 3027 int 3028 sys_fchown(td, uap) 3029 struct thread *td; 3030 register struct fchown_args /* { 3031 int fd; 3032 int uid; 3033 int gid; 3034 } */ *uap; 3035 { 3036 struct file *fp; 3037 cap_rights_t rights; 3038 int error; 3039 3040 AUDIT_ARG_FD(uap->fd); 3041 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3042 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 3043 if (error != 0) 3044 return (error); 3045 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3046 fdrop(fp, td); 3047 return (error); 3048 } 3049 3050 /* 3051 * Common implementation code for utimes(), lutimes(), and futimes(). 3052 */ 3053 static int 3054 getutimes(usrtvp, tvpseg, tsp) 3055 const struct timeval *usrtvp; 3056 enum uio_seg tvpseg; 3057 struct timespec *tsp; 3058 { 3059 struct timeval tv[2]; 3060 const struct timeval *tvp; 3061 int error; 3062 3063 if (usrtvp == NULL) { 3064 vfs_timestamp(&tsp[0]); 3065 tsp[1] = tsp[0]; 3066 } else { 3067 if (tvpseg == UIO_SYSSPACE) { 3068 tvp = usrtvp; 3069 } else { 3070 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3071 return (error); 3072 tvp = tv; 3073 } 3074 3075 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3076 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3077 return (EINVAL); 3078 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3079 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3080 } 3081 return (0); 3082 } 3083 3084 /* 3085 * Common implementation code for utimes(), lutimes(), and futimes(). 3086 */ 3087 static int 3088 setutimes(td, vp, ts, numtimes, nullflag) 3089 struct thread *td; 3090 struct vnode *vp; 3091 const struct timespec *ts; 3092 int numtimes; 3093 int nullflag; 3094 { 3095 struct mount *mp; 3096 struct vattr vattr; 3097 int error, setbirthtime; 3098 3099 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3100 return (error); 3101 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3102 setbirthtime = 0; 3103 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3104 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3105 setbirthtime = 1; 3106 VATTR_NULL(&vattr); 3107 vattr.va_atime = ts[0]; 3108 vattr.va_mtime = ts[1]; 3109 if (setbirthtime) 3110 vattr.va_birthtime = ts[1]; 3111 if (numtimes > 2) 3112 vattr.va_birthtime = ts[2]; 3113 if (nullflag) 3114 vattr.va_vaflags |= VA_UTIMES_NULL; 3115 #ifdef MAC 3116 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3117 vattr.va_mtime); 3118 #endif 3119 if (error == 0) 3120 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3121 VOP_UNLOCK(vp, 0); 3122 vn_finished_write(mp); 3123 return (error); 3124 } 3125 3126 /* 3127 * Set the access and modification times of a file. 3128 */ 3129 #ifndef _SYS_SYSPROTO_H_ 3130 struct utimes_args { 3131 char *path; 3132 struct timeval *tptr; 3133 }; 3134 #endif 3135 int 3136 sys_utimes(td, uap) 3137 struct thread *td; 3138 register struct utimes_args /* { 3139 char *path; 3140 struct timeval *tptr; 3141 } */ *uap; 3142 { 3143 3144 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3145 UIO_USERSPACE)); 3146 } 3147 3148 #ifndef _SYS_SYSPROTO_H_ 3149 struct futimesat_args { 3150 int fd; 3151 const char * path; 3152 const struct timeval * times; 3153 }; 3154 #endif 3155 int 3156 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3157 { 3158 3159 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3160 uap->times, UIO_USERSPACE)); 3161 } 3162 3163 int 3164 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3165 struct timeval *tptr, enum uio_seg tptrseg) 3166 { 3167 3168 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3169 } 3170 3171 int 3172 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3173 struct timeval *tptr, enum uio_seg tptrseg) 3174 { 3175 struct nameidata nd; 3176 struct timespec ts[2]; 3177 cap_rights_t rights; 3178 int error; 3179 3180 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3181 return (error); 3182 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3183 cap_rights_init(&rights, CAP_FUTIMES), td); 3184 3185 if ((error = namei(&nd)) != 0) 3186 return (error); 3187 NDFREE(&nd, NDF_ONLY_PNBUF); 3188 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3189 vrele(nd.ni_vp); 3190 return (error); 3191 } 3192 3193 /* 3194 * Set the access and modification times of a file. 3195 */ 3196 #ifndef _SYS_SYSPROTO_H_ 3197 struct lutimes_args { 3198 char *path; 3199 struct timeval *tptr; 3200 }; 3201 #endif 3202 int 3203 sys_lutimes(td, uap) 3204 struct thread *td; 3205 register struct lutimes_args /* { 3206 char *path; 3207 struct timeval *tptr; 3208 } */ *uap; 3209 { 3210 3211 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3212 UIO_USERSPACE)); 3213 } 3214 3215 int 3216 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3217 struct timeval *tptr, enum uio_seg tptrseg) 3218 { 3219 struct timespec ts[2]; 3220 struct nameidata nd; 3221 int error; 3222 3223 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3224 return (error); 3225 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3226 if ((error = namei(&nd)) != 0) 3227 return (error); 3228 NDFREE(&nd, NDF_ONLY_PNBUF); 3229 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3230 vrele(nd.ni_vp); 3231 return (error); 3232 } 3233 3234 /* 3235 * Set the access and modification times of a file. 3236 */ 3237 #ifndef _SYS_SYSPROTO_H_ 3238 struct futimes_args { 3239 int fd; 3240 struct timeval *tptr; 3241 }; 3242 #endif 3243 int 3244 sys_futimes(td, uap) 3245 struct thread *td; 3246 register struct futimes_args /* { 3247 int fd; 3248 struct timeval *tptr; 3249 } */ *uap; 3250 { 3251 3252 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3253 } 3254 3255 int 3256 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3257 enum uio_seg tptrseg) 3258 { 3259 struct timespec ts[2]; 3260 struct file *fp; 3261 cap_rights_t rights; 3262 int error; 3263 3264 AUDIT_ARG_FD(fd); 3265 error = getutimes(tptr, tptrseg, ts); 3266 if (error != 0) 3267 return (error); 3268 error = getvnode(td->td_proc->p_fd, fd, 3269 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3270 if (error != 0) 3271 return (error); 3272 #ifdef AUDIT 3273 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3274 AUDIT_ARG_VNODE1(fp->f_vnode); 3275 VOP_UNLOCK(fp->f_vnode, 0); 3276 #endif 3277 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3278 fdrop(fp, td); 3279 return (error); 3280 } 3281 3282 /* 3283 * Truncate a file given its path name. 3284 */ 3285 #ifndef _SYS_SYSPROTO_H_ 3286 struct truncate_args { 3287 char *path; 3288 int pad; 3289 off_t length; 3290 }; 3291 #endif 3292 int 3293 sys_truncate(td, uap) 3294 struct thread *td; 3295 register struct truncate_args /* { 3296 char *path; 3297 int pad; 3298 off_t length; 3299 } */ *uap; 3300 { 3301 3302 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3303 } 3304 3305 int 3306 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3307 { 3308 struct mount *mp; 3309 struct vnode *vp; 3310 void *rl_cookie; 3311 struct vattr vattr; 3312 struct nameidata nd; 3313 int error; 3314 3315 if (length < 0) 3316 return(EINVAL); 3317 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3318 if ((error = namei(&nd)) != 0) 3319 return (error); 3320 vp = nd.ni_vp; 3321 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3322 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3323 vn_rangelock_unlock(vp, rl_cookie); 3324 vrele(vp); 3325 return (error); 3326 } 3327 NDFREE(&nd, NDF_ONLY_PNBUF); 3328 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3329 if (vp->v_type == VDIR) 3330 error = EISDIR; 3331 #ifdef MAC 3332 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3333 } 3334 #endif 3335 else if ((error = vn_writechk(vp)) == 0 && 3336 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3337 VATTR_NULL(&vattr); 3338 vattr.va_size = length; 3339 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3340 } 3341 VOP_UNLOCK(vp, 0); 3342 vn_finished_write(mp); 3343 vn_rangelock_unlock(vp, rl_cookie); 3344 vrele(vp); 3345 return (error); 3346 } 3347 3348 #if defined(COMPAT_43) 3349 /* 3350 * Truncate a file given its path name. 3351 */ 3352 #ifndef _SYS_SYSPROTO_H_ 3353 struct otruncate_args { 3354 char *path; 3355 long length; 3356 }; 3357 #endif 3358 int 3359 otruncate(td, uap) 3360 struct thread *td; 3361 register struct otruncate_args /* { 3362 char *path; 3363 long length; 3364 } */ *uap; 3365 { 3366 struct truncate_args /* { 3367 char *path; 3368 int pad; 3369 off_t length; 3370 } */ nuap; 3371 3372 nuap.path = uap->path; 3373 nuap.length = uap->length; 3374 return (sys_truncate(td, &nuap)); 3375 } 3376 #endif /* COMPAT_43 */ 3377 3378 /* Versions with the pad argument */ 3379 int 3380 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3381 { 3382 struct truncate_args ouap; 3383 3384 ouap.path = uap->path; 3385 ouap.length = uap->length; 3386 return (sys_truncate(td, &ouap)); 3387 } 3388 3389 int 3390 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3391 { 3392 struct ftruncate_args ouap; 3393 3394 ouap.fd = uap->fd; 3395 ouap.length = uap->length; 3396 return (sys_ftruncate(td, &ouap)); 3397 } 3398 3399 /* 3400 * Sync an open file. 3401 */ 3402 #ifndef _SYS_SYSPROTO_H_ 3403 struct fsync_args { 3404 int fd; 3405 }; 3406 #endif 3407 int 3408 sys_fsync(td, uap) 3409 struct thread *td; 3410 struct fsync_args /* { 3411 int fd; 3412 } */ *uap; 3413 { 3414 struct vnode *vp; 3415 struct mount *mp; 3416 struct file *fp; 3417 cap_rights_t rights; 3418 int error, lock_flags; 3419 3420 AUDIT_ARG_FD(uap->fd); 3421 error = getvnode(td->td_proc->p_fd, uap->fd, 3422 cap_rights_init(&rights, CAP_FSYNC), &fp); 3423 if (error != 0) 3424 return (error); 3425 vp = fp->f_vnode; 3426 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3427 if (error != 0) 3428 goto drop; 3429 if (MNT_SHARED_WRITES(mp) || 3430 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3431 lock_flags = LK_SHARED; 3432 } else { 3433 lock_flags = LK_EXCLUSIVE; 3434 } 3435 vn_lock(vp, lock_flags | LK_RETRY); 3436 AUDIT_ARG_VNODE1(vp); 3437 if (vp->v_object != NULL) { 3438 VM_OBJECT_WLOCK(vp->v_object); 3439 vm_object_page_clean(vp->v_object, 0, 0, 0); 3440 VM_OBJECT_WUNLOCK(vp->v_object); 3441 } 3442 error = VOP_FSYNC(vp, MNT_WAIT, td); 3443 3444 VOP_UNLOCK(vp, 0); 3445 vn_finished_write(mp); 3446 drop: 3447 fdrop(fp, td); 3448 return (error); 3449 } 3450 3451 /* 3452 * Rename files. Source and destination must either both be directories, or 3453 * both not be directories. If target is a directory, it must be empty. 3454 */ 3455 #ifndef _SYS_SYSPROTO_H_ 3456 struct rename_args { 3457 char *from; 3458 char *to; 3459 }; 3460 #endif 3461 int 3462 sys_rename(td, uap) 3463 struct thread *td; 3464 register struct rename_args /* { 3465 char *from; 3466 char *to; 3467 } */ *uap; 3468 { 3469 3470 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3471 } 3472 3473 #ifndef _SYS_SYSPROTO_H_ 3474 struct renameat_args { 3475 int oldfd; 3476 char *old; 3477 int newfd; 3478 char *new; 3479 }; 3480 #endif 3481 int 3482 sys_renameat(struct thread *td, struct renameat_args *uap) 3483 { 3484 3485 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3486 UIO_USERSPACE)); 3487 } 3488 3489 int 3490 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3491 { 3492 3493 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3494 } 3495 3496 int 3497 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3498 enum uio_seg pathseg) 3499 { 3500 struct mount *mp = NULL; 3501 struct vnode *tvp, *fvp, *tdvp; 3502 struct nameidata fromnd, tond; 3503 cap_rights_t rights; 3504 int error; 3505 3506 bwillwrite(); 3507 #ifdef MAC 3508 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3509 AUDITVNODE1, pathseg, old, oldfd, 3510 cap_rights_init(&rights, CAP_RENAMEAT), td); 3511 #else 3512 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3513 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3514 #endif 3515 3516 if ((error = namei(&fromnd)) != 0) 3517 return (error); 3518 #ifdef MAC 3519 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3520 fromnd.ni_vp, &fromnd.ni_cnd); 3521 VOP_UNLOCK(fromnd.ni_dvp, 0); 3522 if (fromnd.ni_dvp != fromnd.ni_vp) 3523 VOP_UNLOCK(fromnd.ni_vp, 0); 3524 #endif 3525 fvp = fromnd.ni_vp; 3526 if (error == 0) 3527 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); 3528 if (error != 0) { 3529 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3530 vrele(fromnd.ni_dvp); 3531 vrele(fvp); 3532 goto out1; 3533 } 3534 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3535 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3536 cap_rights_init(&rights, CAP_LINKAT), td); 3537 if (fromnd.ni_vp->v_type == VDIR) 3538 tond.ni_cnd.cn_flags |= WILLBEDIR; 3539 if ((error = namei(&tond)) != 0) { 3540 /* Translate error code for rename("dir1", "dir2/."). */ 3541 if (error == EISDIR && fvp->v_type == VDIR) 3542 error = EINVAL; 3543 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3544 vrele(fromnd.ni_dvp); 3545 vrele(fvp); 3546 vn_finished_write(mp); 3547 goto out1; 3548 } 3549 tdvp = tond.ni_dvp; 3550 tvp = tond.ni_vp; 3551 if (tvp != NULL) { 3552 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3553 error = ENOTDIR; 3554 goto out; 3555 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3556 error = EISDIR; 3557 goto out; 3558 } 3559 #ifdef CAPABILITIES 3560 if (newfd != AT_FDCWD) { 3561 /* 3562 * If the target already exists we require CAP_UNLINKAT 3563 * from 'newfd'. 3564 */ 3565 error = cap_check(&tond.ni_filecaps.fc_rights, 3566 cap_rights_init(&rights, CAP_UNLINKAT)); 3567 if (error != 0) 3568 goto out; 3569 } 3570 #endif 3571 } 3572 if (fvp == tdvp) { 3573 error = EINVAL; 3574 goto out; 3575 } 3576 /* 3577 * If the source is the same as the destination (that is, if they 3578 * are links to the same vnode), then there is nothing to do. 3579 */ 3580 if (fvp == tvp) 3581 error = -1; 3582 #ifdef MAC 3583 else 3584 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3585 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3586 #endif 3587 out: 3588 if (error == 0) { 3589 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3590 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3591 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3592 NDFREE(&tond, NDF_ONLY_PNBUF); 3593 } else { 3594 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3595 NDFREE(&tond, NDF_ONLY_PNBUF); 3596 if (tvp != NULL) 3597 vput(tvp); 3598 if (tdvp == tvp) 3599 vrele(tdvp); 3600 else 3601 vput(tdvp); 3602 vrele(fromnd.ni_dvp); 3603 vrele(fvp); 3604 } 3605 vrele(tond.ni_startdir); 3606 vn_finished_write(mp); 3607 out1: 3608 if (fromnd.ni_startdir) 3609 vrele(fromnd.ni_startdir); 3610 if (error == -1) 3611 return (0); 3612 return (error); 3613 } 3614 3615 /* 3616 * Make a directory file. 3617 */ 3618 #ifndef _SYS_SYSPROTO_H_ 3619 struct mkdir_args { 3620 char *path; 3621 int mode; 3622 }; 3623 #endif 3624 int 3625 sys_mkdir(td, uap) 3626 struct thread *td; 3627 register struct mkdir_args /* { 3628 char *path; 3629 int mode; 3630 } */ *uap; 3631 { 3632 3633 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3634 } 3635 3636 #ifndef _SYS_SYSPROTO_H_ 3637 struct mkdirat_args { 3638 int fd; 3639 char *path; 3640 mode_t mode; 3641 }; 3642 #endif 3643 int 3644 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3645 { 3646 3647 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3648 } 3649 3650 int 3651 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3652 { 3653 3654 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3655 } 3656 3657 int 3658 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3659 int mode) 3660 { 3661 struct mount *mp; 3662 struct vnode *vp; 3663 struct vattr vattr; 3664 struct nameidata nd; 3665 cap_rights_t rights; 3666 int error; 3667 3668 AUDIT_ARG_MODE(mode); 3669 restart: 3670 bwillwrite(); 3671 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 3672 segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td); 3673 nd.ni_cnd.cn_flags |= WILLBEDIR; 3674 if ((error = namei(&nd)) != 0) 3675 return (error); 3676 vp = nd.ni_vp; 3677 if (vp != NULL) { 3678 NDFREE(&nd, NDF_ONLY_PNBUF); 3679 /* 3680 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3681 * the strange behaviour of leaving the vnode unlocked 3682 * if the target is the same vnode as the parent. 3683 */ 3684 if (vp == nd.ni_dvp) 3685 vrele(nd.ni_dvp); 3686 else 3687 vput(nd.ni_dvp); 3688 vrele(vp); 3689 return (EEXIST); 3690 } 3691 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3692 NDFREE(&nd, NDF_ONLY_PNBUF); 3693 vput(nd.ni_dvp); 3694 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3695 return (error); 3696 goto restart; 3697 } 3698 VATTR_NULL(&vattr); 3699 vattr.va_type = VDIR; 3700 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3701 #ifdef MAC 3702 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3703 &vattr); 3704 if (error != 0) 3705 goto out; 3706 #endif 3707 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3708 #ifdef MAC 3709 out: 3710 #endif 3711 NDFREE(&nd, NDF_ONLY_PNBUF); 3712 vput(nd.ni_dvp); 3713 if (error == 0) 3714 vput(nd.ni_vp); 3715 vn_finished_write(mp); 3716 return (error); 3717 } 3718 3719 /* 3720 * Remove a directory file. 3721 */ 3722 #ifndef _SYS_SYSPROTO_H_ 3723 struct rmdir_args { 3724 char *path; 3725 }; 3726 #endif 3727 int 3728 sys_rmdir(td, uap) 3729 struct thread *td; 3730 struct rmdir_args /* { 3731 char *path; 3732 } */ *uap; 3733 { 3734 3735 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3736 } 3737 3738 int 3739 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3740 { 3741 3742 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3743 } 3744 3745 int 3746 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3747 { 3748 struct mount *mp; 3749 struct vnode *vp; 3750 struct nameidata nd; 3751 cap_rights_t rights; 3752 int error; 3753 3754 restart: 3755 bwillwrite(); 3756 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3757 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3758 if ((error = namei(&nd)) != 0) 3759 return (error); 3760 vp = nd.ni_vp; 3761 if (vp->v_type != VDIR) { 3762 error = ENOTDIR; 3763 goto out; 3764 } 3765 /* 3766 * No rmdir "." please. 3767 */ 3768 if (nd.ni_dvp == vp) { 3769 error = EINVAL; 3770 goto out; 3771 } 3772 /* 3773 * The root of a mounted filesystem cannot be deleted. 3774 */ 3775 if (vp->v_vflag & VV_ROOT) { 3776 error = EBUSY; 3777 goto out; 3778 } 3779 #ifdef MAC 3780 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3781 &nd.ni_cnd); 3782 if (error != 0) 3783 goto out; 3784 #endif 3785 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3786 NDFREE(&nd, NDF_ONLY_PNBUF); 3787 vput(vp); 3788 if (nd.ni_dvp == vp) 3789 vrele(nd.ni_dvp); 3790 else 3791 vput(nd.ni_dvp); 3792 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3793 return (error); 3794 goto restart; 3795 } 3796 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3797 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3798 vn_finished_write(mp); 3799 out: 3800 NDFREE(&nd, NDF_ONLY_PNBUF); 3801 vput(vp); 3802 if (nd.ni_dvp == vp) 3803 vrele(nd.ni_dvp); 3804 else 3805 vput(nd.ni_dvp); 3806 return (error); 3807 } 3808 3809 #ifdef COMPAT_43 3810 /* 3811 * Read a block of directory entries in a filesystem independent format. 3812 */ 3813 #ifndef _SYS_SYSPROTO_H_ 3814 struct ogetdirentries_args { 3815 int fd; 3816 char *buf; 3817 u_int count; 3818 long *basep; 3819 }; 3820 #endif 3821 int 3822 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3823 { 3824 long loff; 3825 int error; 3826 3827 error = kern_ogetdirentries(td, uap, &loff); 3828 if (error == 0) 3829 error = copyout(&loff, uap->basep, sizeof(long)); 3830 return (error); 3831 } 3832 3833 int 3834 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3835 long *ploff) 3836 { 3837 struct vnode *vp; 3838 struct file *fp; 3839 struct uio auio, kuio; 3840 struct iovec aiov, kiov; 3841 struct dirent *dp, *edp; 3842 cap_rights_t rights; 3843 caddr_t dirbuf; 3844 int error, eofflag, readcnt; 3845 long loff; 3846 off_t foffset; 3847 3848 /* XXX arbitrary sanity limit on `count'. */ 3849 if (uap->count > 64 * 1024) 3850 return (EINVAL); 3851 error = getvnode(td->td_proc->p_fd, uap->fd, 3852 cap_rights_init(&rights, CAP_READ), &fp); 3853 if (error != 0) 3854 return (error); 3855 if ((fp->f_flag & FREAD) == 0) { 3856 fdrop(fp, td); 3857 return (EBADF); 3858 } 3859 vp = fp->f_vnode; 3860 foffset = foffset_lock(fp, 0); 3861 unionread: 3862 if (vp->v_type != VDIR) { 3863 foffset_unlock(fp, foffset, 0); 3864 fdrop(fp, td); 3865 return (EINVAL); 3866 } 3867 aiov.iov_base = uap->buf; 3868 aiov.iov_len = uap->count; 3869 auio.uio_iov = &aiov; 3870 auio.uio_iovcnt = 1; 3871 auio.uio_rw = UIO_READ; 3872 auio.uio_segflg = UIO_USERSPACE; 3873 auio.uio_td = td; 3874 auio.uio_resid = uap->count; 3875 vn_lock(vp, LK_SHARED | LK_RETRY); 3876 loff = auio.uio_offset = foffset; 3877 #ifdef MAC 3878 error = mac_vnode_check_readdir(td->td_ucred, vp); 3879 if (error != 0) { 3880 VOP_UNLOCK(vp, 0); 3881 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3882 fdrop(fp, td); 3883 return (error); 3884 } 3885 #endif 3886 # if (BYTE_ORDER != LITTLE_ENDIAN) 3887 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3888 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3889 NULL, NULL); 3890 foffset = auio.uio_offset; 3891 } else 3892 # endif 3893 { 3894 kuio = auio; 3895 kuio.uio_iov = &kiov; 3896 kuio.uio_segflg = UIO_SYSSPACE; 3897 kiov.iov_len = uap->count; 3898 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3899 kiov.iov_base = dirbuf; 3900 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3901 NULL, NULL); 3902 foffset = kuio.uio_offset; 3903 if (error == 0) { 3904 readcnt = uap->count - kuio.uio_resid; 3905 edp = (struct dirent *)&dirbuf[readcnt]; 3906 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3907 # if (BYTE_ORDER == LITTLE_ENDIAN) 3908 /* 3909 * The expected low byte of 3910 * dp->d_namlen is our dp->d_type. 3911 * The high MBZ byte of dp->d_namlen 3912 * is our dp->d_namlen. 3913 */ 3914 dp->d_type = dp->d_namlen; 3915 dp->d_namlen = 0; 3916 # else 3917 /* 3918 * The dp->d_type is the high byte 3919 * of the expected dp->d_namlen, 3920 * so must be zero'ed. 3921 */ 3922 dp->d_type = 0; 3923 # endif 3924 if (dp->d_reclen > 0) { 3925 dp = (struct dirent *) 3926 ((char *)dp + dp->d_reclen); 3927 } else { 3928 error = EIO; 3929 break; 3930 } 3931 } 3932 if (dp >= edp) 3933 error = uiomove(dirbuf, readcnt, &auio); 3934 } 3935 free(dirbuf, M_TEMP); 3936 } 3937 if (error != 0) { 3938 VOP_UNLOCK(vp, 0); 3939 foffset_unlock(fp, foffset, 0); 3940 fdrop(fp, td); 3941 return (error); 3942 } 3943 if (uap->count == auio.uio_resid && 3944 (vp->v_vflag & VV_ROOT) && 3945 (vp->v_mount->mnt_flag & MNT_UNION)) { 3946 struct vnode *tvp = vp; 3947 vp = vp->v_mount->mnt_vnodecovered; 3948 VREF(vp); 3949 fp->f_vnode = vp; 3950 fp->f_data = vp; 3951 foffset = 0; 3952 vput(tvp); 3953 goto unionread; 3954 } 3955 VOP_UNLOCK(vp, 0); 3956 foffset_unlock(fp, foffset, 0); 3957 fdrop(fp, td); 3958 td->td_retval[0] = uap->count - auio.uio_resid; 3959 if (error == 0) 3960 *ploff = loff; 3961 return (error); 3962 } 3963 #endif /* COMPAT_43 */ 3964 3965 /* 3966 * Read a block of directory entries in a filesystem independent format. 3967 */ 3968 #ifndef _SYS_SYSPROTO_H_ 3969 struct getdirentries_args { 3970 int fd; 3971 char *buf; 3972 u_int count; 3973 long *basep; 3974 }; 3975 #endif 3976 int 3977 sys_getdirentries(td, uap) 3978 struct thread *td; 3979 register struct getdirentries_args /* { 3980 int fd; 3981 char *buf; 3982 u_int count; 3983 long *basep; 3984 } */ *uap; 3985 { 3986 long base; 3987 int error; 3988 3989 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3990 NULL, UIO_USERSPACE); 3991 if (error != 0) 3992 return (error); 3993 if (uap->basep != NULL) 3994 error = copyout(&base, uap->basep, sizeof(long)); 3995 return (error); 3996 } 3997 3998 int 3999 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4000 long *basep, ssize_t *residp, enum uio_seg bufseg) 4001 { 4002 struct vnode *vp; 4003 struct file *fp; 4004 struct uio auio; 4005 struct iovec aiov; 4006 cap_rights_t rights; 4007 long loff; 4008 int error, eofflag; 4009 off_t foffset; 4010 4011 AUDIT_ARG_FD(fd); 4012 if (count > IOSIZE_MAX) 4013 return (EINVAL); 4014 auio.uio_resid = count; 4015 error = getvnode(td->td_proc->p_fd, fd, 4016 cap_rights_init(&rights, CAP_READ), &fp); 4017 if (error != 0) 4018 return (error); 4019 if ((fp->f_flag & FREAD) == 0) { 4020 fdrop(fp, td); 4021 return (EBADF); 4022 } 4023 vp = fp->f_vnode; 4024 foffset = foffset_lock(fp, 0); 4025 unionread: 4026 if (vp->v_type != VDIR) { 4027 error = EINVAL; 4028 goto fail; 4029 } 4030 aiov.iov_base = buf; 4031 aiov.iov_len = count; 4032 auio.uio_iov = &aiov; 4033 auio.uio_iovcnt = 1; 4034 auio.uio_rw = UIO_READ; 4035 auio.uio_segflg = bufseg; 4036 auio.uio_td = td; 4037 vn_lock(vp, LK_SHARED | LK_RETRY); 4038 AUDIT_ARG_VNODE1(vp); 4039 loff = auio.uio_offset = foffset; 4040 #ifdef MAC 4041 error = mac_vnode_check_readdir(td->td_ucred, vp); 4042 if (error == 0) 4043 #endif 4044 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4045 NULL); 4046 foffset = auio.uio_offset; 4047 if (error != 0) { 4048 VOP_UNLOCK(vp, 0); 4049 goto fail; 4050 } 4051 if (count == auio.uio_resid && 4052 (vp->v_vflag & VV_ROOT) && 4053 (vp->v_mount->mnt_flag & MNT_UNION)) { 4054 struct vnode *tvp = vp; 4055 4056 vp = vp->v_mount->mnt_vnodecovered; 4057 VREF(vp); 4058 fp->f_vnode = vp; 4059 fp->f_data = vp; 4060 foffset = 0; 4061 vput(tvp); 4062 goto unionread; 4063 } 4064 VOP_UNLOCK(vp, 0); 4065 *basep = loff; 4066 if (residp != NULL) 4067 *residp = auio.uio_resid; 4068 td->td_retval[0] = count - auio.uio_resid; 4069 fail: 4070 foffset_unlock(fp, foffset, 0); 4071 fdrop(fp, td); 4072 return (error); 4073 } 4074 4075 #ifndef _SYS_SYSPROTO_H_ 4076 struct getdents_args { 4077 int fd; 4078 char *buf; 4079 size_t count; 4080 }; 4081 #endif 4082 int 4083 sys_getdents(td, uap) 4084 struct thread *td; 4085 register struct getdents_args /* { 4086 int fd; 4087 char *buf; 4088 u_int count; 4089 } */ *uap; 4090 { 4091 struct getdirentries_args ap; 4092 4093 ap.fd = uap->fd; 4094 ap.buf = uap->buf; 4095 ap.count = uap->count; 4096 ap.basep = NULL; 4097 return (sys_getdirentries(td, &ap)); 4098 } 4099 4100 /* 4101 * Set the mode mask for creation of filesystem nodes. 4102 */ 4103 #ifndef _SYS_SYSPROTO_H_ 4104 struct umask_args { 4105 int newmask; 4106 }; 4107 #endif 4108 int 4109 sys_umask(td, uap) 4110 struct thread *td; 4111 struct umask_args /* { 4112 int newmask; 4113 } */ *uap; 4114 { 4115 register struct filedesc *fdp; 4116 4117 FILEDESC_XLOCK(td->td_proc->p_fd); 4118 fdp = td->td_proc->p_fd; 4119 td->td_retval[0] = fdp->fd_cmask; 4120 fdp->fd_cmask = uap->newmask & ALLPERMS; 4121 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4122 return (0); 4123 } 4124 4125 /* 4126 * Void all references to file by ripping underlying filesystem away from 4127 * vnode. 4128 */ 4129 #ifndef _SYS_SYSPROTO_H_ 4130 struct revoke_args { 4131 char *path; 4132 }; 4133 #endif 4134 int 4135 sys_revoke(td, uap) 4136 struct thread *td; 4137 register struct revoke_args /* { 4138 char *path; 4139 } */ *uap; 4140 { 4141 struct vnode *vp; 4142 struct vattr vattr; 4143 struct nameidata nd; 4144 int error; 4145 4146 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4147 uap->path, td); 4148 if ((error = namei(&nd)) != 0) 4149 return (error); 4150 vp = nd.ni_vp; 4151 NDFREE(&nd, NDF_ONLY_PNBUF); 4152 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4153 error = EINVAL; 4154 goto out; 4155 } 4156 #ifdef MAC 4157 error = mac_vnode_check_revoke(td->td_ucred, vp); 4158 if (error != 0) 4159 goto out; 4160 #endif 4161 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4162 if (error != 0) 4163 goto out; 4164 if (td->td_ucred->cr_uid != vattr.va_uid) { 4165 error = priv_check(td, PRIV_VFS_ADMIN); 4166 if (error != 0) 4167 goto out; 4168 } 4169 if (vcount(vp) > 1) 4170 VOP_REVOKE(vp, REVOKEALL); 4171 out: 4172 vput(vp); 4173 return (error); 4174 } 4175 4176 /* 4177 * Convert a user file descriptor to a kernel file entry and check that, if it 4178 * is a capability, the correct rights are present. A reference on the file 4179 * entry is held upon returning. 4180 */ 4181 int 4182 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4183 { 4184 struct file *fp; 4185 int error; 4186 4187 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4188 if (error != 0) 4189 return (error); 4190 4191 /* 4192 * The file could be not of the vnode type, or it may be not 4193 * yet fully initialized, in which case the f_vnode pointer 4194 * may be set, but f_ops is still badfileops. E.g., 4195 * devfs_open() transiently create such situation to 4196 * facilitate csw d_fdopen(). 4197 * 4198 * Dupfdopen() handling in kern_openat() installs the 4199 * half-baked file into the process descriptor table, allowing 4200 * other thread to dereference it. Guard against the race by 4201 * checking f_ops. 4202 */ 4203 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4204 fdrop(fp, curthread); 4205 return (EINVAL); 4206 } 4207 *fpp = fp; 4208 return (0); 4209 } 4210 4211 4212 /* 4213 * Get an (NFS) file handle. 4214 */ 4215 #ifndef _SYS_SYSPROTO_H_ 4216 struct lgetfh_args { 4217 char *fname; 4218 fhandle_t *fhp; 4219 }; 4220 #endif 4221 int 4222 sys_lgetfh(td, uap) 4223 struct thread *td; 4224 register struct lgetfh_args *uap; 4225 { 4226 struct nameidata nd; 4227 fhandle_t fh; 4228 register struct vnode *vp; 4229 int error; 4230 4231 error = priv_check(td, PRIV_VFS_GETFH); 4232 if (error != 0) 4233 return (error); 4234 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4235 uap->fname, td); 4236 error = namei(&nd); 4237 if (error != 0) 4238 return (error); 4239 NDFREE(&nd, NDF_ONLY_PNBUF); 4240 vp = nd.ni_vp; 4241 bzero(&fh, sizeof(fh)); 4242 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4243 error = VOP_VPTOFH(vp, &fh.fh_fid); 4244 vput(vp); 4245 if (error == 0) 4246 error = copyout(&fh, uap->fhp, sizeof (fh)); 4247 return (error); 4248 } 4249 4250 #ifndef _SYS_SYSPROTO_H_ 4251 struct getfh_args { 4252 char *fname; 4253 fhandle_t *fhp; 4254 }; 4255 #endif 4256 int 4257 sys_getfh(td, uap) 4258 struct thread *td; 4259 register struct getfh_args *uap; 4260 { 4261 struct nameidata nd; 4262 fhandle_t fh; 4263 register struct vnode *vp; 4264 int error; 4265 4266 error = priv_check(td, PRIV_VFS_GETFH); 4267 if (error != 0) 4268 return (error); 4269 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4270 uap->fname, td); 4271 error = namei(&nd); 4272 if (error != 0) 4273 return (error); 4274 NDFREE(&nd, NDF_ONLY_PNBUF); 4275 vp = nd.ni_vp; 4276 bzero(&fh, sizeof(fh)); 4277 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4278 error = VOP_VPTOFH(vp, &fh.fh_fid); 4279 vput(vp); 4280 if (error == 0) 4281 error = copyout(&fh, uap->fhp, sizeof (fh)); 4282 return (error); 4283 } 4284 4285 /* 4286 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4287 * open descriptor. 4288 * 4289 * warning: do not remove the priv_check() call or this becomes one giant 4290 * security hole. 4291 */ 4292 #ifndef _SYS_SYSPROTO_H_ 4293 struct fhopen_args { 4294 const struct fhandle *u_fhp; 4295 int flags; 4296 }; 4297 #endif 4298 int 4299 sys_fhopen(td, uap) 4300 struct thread *td; 4301 struct fhopen_args /* { 4302 const struct fhandle *u_fhp; 4303 int flags; 4304 } */ *uap; 4305 { 4306 struct mount *mp; 4307 struct vnode *vp; 4308 struct fhandle fhp; 4309 struct file *fp; 4310 int fmode, error; 4311 int indx; 4312 4313 error = priv_check(td, PRIV_VFS_FHOPEN); 4314 if (error != 0) 4315 return (error); 4316 indx = -1; 4317 fmode = FFLAGS(uap->flags); 4318 /* why not allow a non-read/write open for our lockd? */ 4319 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4320 return (EINVAL); 4321 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4322 if (error != 0) 4323 return(error); 4324 /* find the mount point */ 4325 mp = vfs_busyfs(&fhp.fh_fsid); 4326 if (mp == NULL) 4327 return (ESTALE); 4328 /* now give me my vnode, it gets returned to me locked */ 4329 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4330 vfs_unbusy(mp); 4331 if (error != 0) 4332 return (error); 4333 4334 error = falloc_noinstall(td, &fp); 4335 if (error != 0) { 4336 vput(vp); 4337 return (error); 4338 } 4339 /* 4340 * An extra reference on `fp' has been held for us by 4341 * falloc_noinstall(). 4342 */ 4343 4344 #ifdef INVARIANTS 4345 td->td_dupfd = -1; 4346 #endif 4347 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4348 if (error != 0) { 4349 KASSERT(fp->f_ops == &badfileops, 4350 ("VOP_OPEN in fhopen() set f_ops")); 4351 KASSERT(td->td_dupfd < 0, 4352 ("fhopen() encountered fdopen()")); 4353 4354 vput(vp); 4355 goto bad; 4356 } 4357 #ifdef INVARIANTS 4358 td->td_dupfd = 0; 4359 #endif 4360 fp->f_vnode = vp; 4361 fp->f_seqcount = 1; 4362 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4363 &vnops); 4364 VOP_UNLOCK(vp, 0); 4365 if ((fmode & O_TRUNC) != 0) { 4366 error = fo_truncate(fp, 0, td->td_ucred, td); 4367 if (error != 0) 4368 goto bad; 4369 } 4370 4371 error = finstall(td, fp, &indx, fmode, NULL); 4372 bad: 4373 fdrop(fp, td); 4374 td->td_retval[0] = indx; 4375 return (error); 4376 } 4377 4378 /* 4379 * Stat an (NFS) file handle. 4380 */ 4381 #ifndef _SYS_SYSPROTO_H_ 4382 struct fhstat_args { 4383 struct fhandle *u_fhp; 4384 struct stat *sb; 4385 }; 4386 #endif 4387 int 4388 sys_fhstat(td, uap) 4389 struct thread *td; 4390 register struct fhstat_args /* { 4391 struct fhandle *u_fhp; 4392 struct stat *sb; 4393 } */ *uap; 4394 { 4395 struct stat sb; 4396 struct fhandle fh; 4397 int error; 4398 4399 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4400 if (error != 0) 4401 return (error); 4402 error = kern_fhstat(td, fh, &sb); 4403 if (error == 0) 4404 error = copyout(&sb, uap->sb, sizeof(sb)); 4405 return (error); 4406 } 4407 4408 int 4409 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4410 { 4411 struct mount *mp; 4412 struct vnode *vp; 4413 int error; 4414 4415 error = priv_check(td, PRIV_VFS_FHSTAT); 4416 if (error != 0) 4417 return (error); 4418 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4419 return (ESTALE); 4420 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4421 vfs_unbusy(mp); 4422 if (error != 0) 4423 return (error); 4424 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4425 vput(vp); 4426 return (error); 4427 } 4428 4429 /* 4430 * Implement fstatfs() for (NFS) file handles. 4431 */ 4432 #ifndef _SYS_SYSPROTO_H_ 4433 struct fhstatfs_args { 4434 struct fhandle *u_fhp; 4435 struct statfs *buf; 4436 }; 4437 #endif 4438 int 4439 sys_fhstatfs(td, uap) 4440 struct thread *td; 4441 struct fhstatfs_args /* { 4442 struct fhandle *u_fhp; 4443 struct statfs *buf; 4444 } */ *uap; 4445 { 4446 struct statfs sf; 4447 fhandle_t fh; 4448 int error; 4449 4450 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4451 if (error != 0) 4452 return (error); 4453 error = kern_fhstatfs(td, fh, &sf); 4454 if (error != 0) 4455 return (error); 4456 return (copyout(&sf, uap->buf, sizeof(sf))); 4457 } 4458 4459 int 4460 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4461 { 4462 struct statfs *sp; 4463 struct mount *mp; 4464 struct vnode *vp; 4465 int error; 4466 4467 error = priv_check(td, PRIV_VFS_FHSTATFS); 4468 if (error != 0) 4469 return (error); 4470 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4471 return (ESTALE); 4472 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4473 if (error != 0) { 4474 vfs_unbusy(mp); 4475 return (error); 4476 } 4477 vput(vp); 4478 error = prison_canseemount(td->td_ucred, mp); 4479 if (error != 0) 4480 goto out; 4481 #ifdef MAC 4482 error = mac_mount_check_stat(td->td_ucred, mp); 4483 if (error != 0) 4484 goto out; 4485 #endif 4486 /* 4487 * Set these in case the underlying filesystem fails to do so. 4488 */ 4489 sp = &mp->mnt_stat; 4490 sp->f_version = STATFS_VERSION; 4491 sp->f_namemax = NAME_MAX; 4492 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4493 error = VFS_STATFS(mp, sp); 4494 if (error == 0) 4495 *buf = *sp; 4496 out: 4497 vfs_unbusy(mp); 4498 return (error); 4499 } 4500 4501 int 4502 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4503 { 4504 struct file *fp; 4505 struct mount *mp; 4506 struct vnode *vp; 4507 cap_rights_t rights; 4508 off_t olen, ooffset; 4509 int error; 4510 4511 fp = NULL; 4512 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4513 if (error != 0) 4514 goto out; 4515 4516 switch (fp->f_type) { 4517 case DTYPE_VNODE: 4518 break; 4519 case DTYPE_PIPE: 4520 case DTYPE_FIFO: 4521 error = ESPIPE; 4522 goto out; 4523 default: 4524 error = ENODEV; 4525 goto out; 4526 } 4527 if ((fp->f_flag & FWRITE) == 0) { 4528 error = EBADF; 4529 goto out; 4530 } 4531 vp = fp->f_vnode; 4532 if (vp->v_type != VREG) { 4533 error = ENODEV; 4534 goto out; 4535 } 4536 if (offset < 0 || len <= 0) { 4537 error = EINVAL; 4538 goto out; 4539 } 4540 /* Check for wrap. */ 4541 if (offset > OFF_MAX - len) { 4542 error = EFBIG; 4543 goto out; 4544 } 4545 4546 /* Allocating blocks may take a long time, so iterate. */ 4547 for (;;) { 4548 olen = len; 4549 ooffset = offset; 4550 4551 bwillwrite(); 4552 mp = NULL; 4553 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4554 if (error != 0) 4555 break; 4556 error = vn_lock(vp, LK_EXCLUSIVE); 4557 if (error != 0) { 4558 vn_finished_write(mp); 4559 break; 4560 } 4561 #ifdef MAC 4562 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4563 if (error == 0) 4564 #endif 4565 error = VOP_ALLOCATE(vp, &offset, &len); 4566 VOP_UNLOCK(vp, 0); 4567 vn_finished_write(mp); 4568 4569 if (olen + ooffset != offset + len) { 4570 panic("offset + len changed from %jx/%jx to %jx/%jx", 4571 ooffset, olen, offset, len); 4572 } 4573 if (error != 0 || len == 0) 4574 break; 4575 KASSERT(olen > len, ("Iteration did not make progress?")); 4576 maybe_yield(); 4577 } 4578 out: 4579 if (fp != NULL) 4580 fdrop(fp, td); 4581 return (error); 4582 } 4583 4584 int 4585 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4586 { 4587 4588 return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len)); 4589 } 4590 4591 /* 4592 * Unlike madvise(2), we do not make a best effort to remember every 4593 * possible caching hint. Instead, we remember the last setting with 4594 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4595 * region of any current setting. 4596 */ 4597 int 4598 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4599 int advice) 4600 { 4601 struct fadvise_info *fa, *new; 4602 struct file *fp; 4603 struct vnode *vp; 4604 cap_rights_t rights; 4605 off_t end; 4606 int error; 4607 4608 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4609 return (EINVAL); 4610 switch (advice) { 4611 case POSIX_FADV_SEQUENTIAL: 4612 case POSIX_FADV_RANDOM: 4613 case POSIX_FADV_NOREUSE: 4614 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4615 break; 4616 case POSIX_FADV_NORMAL: 4617 case POSIX_FADV_WILLNEED: 4618 case POSIX_FADV_DONTNEED: 4619 new = NULL; 4620 break; 4621 default: 4622 return (EINVAL); 4623 } 4624 /* XXX: CAP_POSIX_FADVISE? */ 4625 error = fget(td, fd, cap_rights_init(&rights), &fp); 4626 if (error != 0) 4627 goto out; 4628 4629 switch (fp->f_type) { 4630 case DTYPE_VNODE: 4631 break; 4632 case DTYPE_PIPE: 4633 case DTYPE_FIFO: 4634 error = ESPIPE; 4635 goto out; 4636 default: 4637 error = ENODEV; 4638 goto out; 4639 } 4640 vp = fp->f_vnode; 4641 if (vp->v_type != VREG) { 4642 error = ENODEV; 4643 goto out; 4644 } 4645 if (len == 0) 4646 end = OFF_MAX; 4647 else 4648 end = offset + len - 1; 4649 switch (advice) { 4650 case POSIX_FADV_SEQUENTIAL: 4651 case POSIX_FADV_RANDOM: 4652 case POSIX_FADV_NOREUSE: 4653 /* 4654 * Try to merge any existing non-standard region with 4655 * this new region if possible, otherwise create a new 4656 * non-standard region for this request. 4657 */ 4658 mtx_pool_lock(mtxpool_sleep, fp); 4659 fa = fp->f_advice; 4660 if (fa != NULL && fa->fa_advice == advice && 4661 ((fa->fa_start <= end && fa->fa_end >= offset) || 4662 (end != OFF_MAX && fa->fa_start == end + 1) || 4663 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4664 if (offset < fa->fa_start) 4665 fa->fa_start = offset; 4666 if (end > fa->fa_end) 4667 fa->fa_end = end; 4668 } else { 4669 new->fa_advice = advice; 4670 new->fa_start = offset; 4671 new->fa_end = end; 4672 new->fa_prevstart = 0; 4673 new->fa_prevend = 0; 4674 fp->f_advice = new; 4675 new = fa; 4676 } 4677 mtx_pool_unlock(mtxpool_sleep, fp); 4678 break; 4679 case POSIX_FADV_NORMAL: 4680 /* 4681 * If a the "normal" region overlaps with an existing 4682 * non-standard region, trim or remove the 4683 * non-standard region. 4684 */ 4685 mtx_pool_lock(mtxpool_sleep, fp); 4686 fa = fp->f_advice; 4687 if (fa != NULL) { 4688 if (offset <= fa->fa_start && end >= fa->fa_end) { 4689 new = fa; 4690 fp->f_advice = NULL; 4691 } else if (offset <= fa->fa_start && 4692 end >= fa->fa_start) 4693 fa->fa_start = end + 1; 4694 else if (offset <= fa->fa_end && end >= fa->fa_end) 4695 fa->fa_end = offset - 1; 4696 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4697 /* 4698 * If the "normal" region is a middle 4699 * portion of the existing 4700 * non-standard region, just remove 4701 * the whole thing rather than picking 4702 * one side or the other to 4703 * preserve. 4704 */ 4705 new = fa; 4706 fp->f_advice = NULL; 4707 } 4708 } 4709 mtx_pool_unlock(mtxpool_sleep, fp); 4710 break; 4711 case POSIX_FADV_WILLNEED: 4712 case POSIX_FADV_DONTNEED: 4713 error = VOP_ADVISE(vp, offset, end, advice); 4714 break; 4715 } 4716 out: 4717 if (fp != NULL) 4718 fdrop(fp, td); 4719 free(new, M_FADVISE); 4720 return (error); 4721 } 4722 4723 int 4724 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4725 { 4726 4727 return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4728 uap->advice)); 4729 } 4730