1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_kdtrace.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capability.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/rwlock.h> 65 #include <sys/sdt.h> 66 #include <sys/stat.h> 67 #include <sys/sx.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/dirent.h> 73 #include <sys/jail.h> 74 #include <sys/syscallsubr.h> 75 #include <sys/sysctl.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <machine/stdarg.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/uma.h> 89 90 #include <ufs/ufs/quota.h> 91 92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93 94 SDT_PROVIDER_DEFINE(vfs); 95 SDT_PROBE_DEFINE(vfs, , stat, mode, mode); 96 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *"); 97 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int"); 98 SDT_PROBE_DEFINE(vfs, , stat, reg, reg); 99 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *"); 100 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int"); 101 102 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 103 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 104 static int kern_chflags(struct thread *td, const char *path, 105 enum uio_seg pathseg, u_long flags); 106 static int kern_chflagsat(struct thread *td, int fd, const char *path, 107 enum uio_seg pathseg, u_long flags, int atflag); 108 static int setfflags(struct thread *td, struct vnode *, u_long); 109 static int setutimes(struct thread *td, struct vnode *, 110 const struct timespec *, int, int); 111 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 112 struct thread *td); 113 114 /* 115 * The module initialization routine for POSIX asynchronous I/O will 116 * set this to the version of AIO that it implements. (Zero means 117 * that it is not implemented.) This value is used here by pathconf() 118 * and in kern_descrip.c by fpathconf(). 119 */ 120 int async_io_version; 121 122 #ifdef DEBUG 123 static int syncprt = 0; 124 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 125 #endif 126 127 /* 128 * Sync each mounted filesystem. 129 */ 130 #ifndef _SYS_SYSPROTO_H_ 131 struct sync_args { 132 int dummy; 133 }; 134 #endif 135 /* ARGSUSED */ 136 int 137 sys_sync(td, uap) 138 struct thread *td; 139 struct sync_args *uap; 140 { 141 struct mount *mp, *nmp; 142 int save; 143 144 mtx_lock(&mountlist_mtx); 145 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 146 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 147 nmp = TAILQ_NEXT(mp, mnt_list); 148 continue; 149 } 150 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 151 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 152 save = curthread_pflags_set(TDP_SYNCIO); 153 vfs_msync(mp, MNT_NOWAIT); 154 VFS_SYNC(mp, MNT_NOWAIT); 155 curthread_pflags_restore(save); 156 vn_finished_write(mp); 157 } 158 mtx_lock(&mountlist_mtx); 159 nmp = TAILQ_NEXT(mp, mnt_list); 160 vfs_unbusy(mp); 161 } 162 mtx_unlock(&mountlist_mtx); 163 return (0); 164 } 165 166 /* 167 * Change filesystem quotas. 168 */ 169 #ifndef _SYS_SYSPROTO_H_ 170 struct quotactl_args { 171 char *path; 172 int cmd; 173 int uid; 174 caddr_t arg; 175 }; 176 #endif 177 int 178 sys_quotactl(td, uap) 179 struct thread *td; 180 register struct quotactl_args /* { 181 char *path; 182 int cmd; 183 int uid; 184 caddr_t arg; 185 } */ *uap; 186 { 187 struct mount *mp; 188 int error; 189 struct nameidata nd; 190 191 AUDIT_ARG_CMD(uap->cmd); 192 AUDIT_ARG_UID(uap->uid); 193 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 194 return (EPERM); 195 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 196 uap->path, td); 197 if ((error = namei(&nd)) != 0) 198 return (error); 199 NDFREE(&nd, NDF_ONLY_PNBUF); 200 mp = nd.ni_vp->v_mount; 201 vfs_ref(mp); 202 vput(nd.ni_vp); 203 error = vfs_busy(mp, 0); 204 vfs_rel(mp); 205 if (error) 206 return (error); 207 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 208 209 /* 210 * Since quota on operation typically needs to open quota 211 * file, the Q_QUOTAON handler needs to unbusy the mount point 212 * before calling into namei. Otherwise, unmount might be 213 * started between two vfs_busy() invocations (first is our, 214 * second is from mount point cross-walk code in lookup()), 215 * causing deadlock. 216 * 217 * Require that Q_QUOTAON handles the vfs_busy() reference on 218 * its own, always returning with ubusied mount point. 219 */ 220 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 221 vfs_unbusy(mp); 222 return (error); 223 } 224 225 /* 226 * Used by statfs conversion routines to scale the block size up if 227 * necessary so that all of the block counts are <= 'max_size'. Note 228 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 229 * value of 'n'. 230 */ 231 void 232 statfs_scale_blocks(struct statfs *sf, long max_size) 233 { 234 uint64_t count; 235 int shift; 236 237 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 238 239 /* 240 * Attempt to scale the block counts to give a more accurate 241 * overview to userland of the ratio of free space to used 242 * space. To do this, find the largest block count and compute 243 * a divisor that lets it fit into a signed integer <= max_size. 244 */ 245 if (sf->f_bavail < 0) 246 count = -sf->f_bavail; 247 else 248 count = sf->f_bavail; 249 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 250 if (count <= max_size) 251 return; 252 253 count >>= flsl(max_size); 254 shift = 0; 255 while (count > 0) { 256 shift++; 257 count >>=1; 258 } 259 260 sf->f_bsize <<= shift; 261 sf->f_blocks >>= shift; 262 sf->f_bfree >>= shift; 263 sf->f_bavail >>= shift; 264 } 265 266 /* 267 * Get filesystem statistics. 268 */ 269 #ifndef _SYS_SYSPROTO_H_ 270 struct statfs_args { 271 char *path; 272 struct statfs *buf; 273 }; 274 #endif 275 int 276 sys_statfs(td, uap) 277 struct thread *td; 278 register struct statfs_args /* { 279 char *path; 280 struct statfs *buf; 281 } */ *uap; 282 { 283 struct statfs sf; 284 int error; 285 286 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 287 if (error == 0) 288 error = copyout(&sf, uap->buf, sizeof(sf)); 289 return (error); 290 } 291 292 int 293 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 294 struct statfs *buf) 295 { 296 struct mount *mp; 297 struct statfs *sp, sb; 298 int error; 299 struct nameidata nd; 300 301 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 302 pathseg, path, td); 303 error = namei(&nd); 304 if (error) 305 return (error); 306 mp = nd.ni_vp->v_mount; 307 vfs_ref(mp); 308 NDFREE(&nd, NDF_ONLY_PNBUF); 309 vput(nd.ni_vp); 310 error = vfs_busy(mp, 0); 311 vfs_rel(mp); 312 if (error) 313 return (error); 314 #ifdef MAC 315 error = mac_mount_check_stat(td->td_ucred, mp); 316 if (error) 317 goto out; 318 #endif 319 /* 320 * Set these in case the underlying filesystem fails to do so. 321 */ 322 sp = &mp->mnt_stat; 323 sp->f_version = STATFS_VERSION; 324 sp->f_namemax = NAME_MAX; 325 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 326 error = VFS_STATFS(mp, sp); 327 if (error) 328 goto out; 329 if (priv_check(td, PRIV_VFS_GENERATION)) { 330 bcopy(sp, &sb, sizeof(sb)); 331 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 332 prison_enforce_statfs(td->td_ucred, mp, &sb); 333 sp = &sb; 334 } 335 *buf = *sp; 336 out: 337 vfs_unbusy(mp); 338 return (error); 339 } 340 341 /* 342 * Get filesystem statistics. 343 */ 344 #ifndef _SYS_SYSPROTO_H_ 345 struct fstatfs_args { 346 int fd; 347 struct statfs *buf; 348 }; 349 #endif 350 int 351 sys_fstatfs(td, uap) 352 struct thread *td; 353 register struct fstatfs_args /* { 354 int fd; 355 struct statfs *buf; 356 } */ *uap; 357 { 358 struct statfs sf; 359 int error; 360 361 error = kern_fstatfs(td, uap->fd, &sf); 362 if (error == 0) 363 error = copyout(&sf, uap->buf, sizeof(sf)); 364 return (error); 365 } 366 367 int 368 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 369 { 370 struct file *fp; 371 struct mount *mp; 372 struct statfs *sp, sb; 373 struct vnode *vp; 374 int error; 375 376 AUDIT_ARG_FD(fd); 377 error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp); 378 if (error) 379 return (error); 380 vp = fp->f_vnode; 381 vn_lock(vp, LK_SHARED | LK_RETRY); 382 #ifdef AUDIT 383 AUDIT_ARG_VNODE1(vp); 384 #endif 385 mp = vp->v_mount; 386 if (mp) 387 vfs_ref(mp); 388 VOP_UNLOCK(vp, 0); 389 fdrop(fp, td); 390 if (mp == NULL) { 391 error = EBADF; 392 goto out; 393 } 394 error = vfs_busy(mp, 0); 395 vfs_rel(mp); 396 if (error) 397 return (error); 398 #ifdef MAC 399 error = mac_mount_check_stat(td->td_ucred, mp); 400 if (error) 401 goto out; 402 #endif 403 /* 404 * Set these in case the underlying filesystem fails to do so. 405 */ 406 sp = &mp->mnt_stat; 407 sp->f_version = STATFS_VERSION; 408 sp->f_namemax = NAME_MAX; 409 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 410 error = VFS_STATFS(mp, sp); 411 if (error) 412 goto out; 413 if (priv_check(td, PRIV_VFS_GENERATION)) { 414 bcopy(sp, &sb, sizeof(sb)); 415 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 416 prison_enforce_statfs(td->td_ucred, mp, &sb); 417 sp = &sb; 418 } 419 *buf = *sp; 420 out: 421 if (mp) 422 vfs_unbusy(mp); 423 return (error); 424 } 425 426 /* 427 * Get statistics on all filesystems. 428 */ 429 #ifndef _SYS_SYSPROTO_H_ 430 struct getfsstat_args { 431 struct statfs *buf; 432 long bufsize; 433 int flags; 434 }; 435 #endif 436 int 437 sys_getfsstat(td, uap) 438 struct thread *td; 439 register struct getfsstat_args /* { 440 struct statfs *buf; 441 long bufsize; 442 int flags; 443 } */ *uap; 444 { 445 446 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 447 uap->flags)); 448 } 449 450 /* 451 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 452 * The caller is responsible for freeing memory which will be allocated 453 * in '*buf'. 454 */ 455 int 456 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 457 enum uio_seg bufseg, int flags) 458 { 459 struct mount *mp, *nmp; 460 struct statfs *sfsp, *sp, sb; 461 size_t count, maxcount; 462 int error; 463 464 maxcount = bufsize / sizeof(struct statfs); 465 if (bufsize == 0) 466 sfsp = NULL; 467 else if (bufseg == UIO_USERSPACE) 468 sfsp = *buf; 469 else /* if (bufseg == UIO_SYSSPACE) */ { 470 count = 0; 471 mtx_lock(&mountlist_mtx); 472 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 473 count++; 474 } 475 mtx_unlock(&mountlist_mtx); 476 if (maxcount > count) 477 maxcount = count; 478 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 479 M_WAITOK); 480 } 481 count = 0; 482 mtx_lock(&mountlist_mtx); 483 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 484 if (prison_canseemount(td->td_ucred, mp) != 0) { 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 continue; 487 } 488 #ifdef MAC 489 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 490 nmp = TAILQ_NEXT(mp, mnt_list); 491 continue; 492 } 493 #endif 494 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 495 nmp = TAILQ_NEXT(mp, mnt_list); 496 continue; 497 } 498 if (sfsp && count < maxcount) { 499 sp = &mp->mnt_stat; 500 /* 501 * Set these in case the underlying filesystem 502 * fails to do so. 503 */ 504 sp->f_version = STATFS_VERSION; 505 sp->f_namemax = NAME_MAX; 506 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 507 /* 508 * If MNT_NOWAIT or MNT_LAZY is specified, do not 509 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 510 * overrides MNT_WAIT. 511 */ 512 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 513 (flags & MNT_WAIT)) && 514 (error = VFS_STATFS(mp, sp))) { 515 mtx_lock(&mountlist_mtx); 516 nmp = TAILQ_NEXT(mp, mnt_list); 517 vfs_unbusy(mp); 518 continue; 519 } 520 if (priv_check(td, PRIV_VFS_GENERATION)) { 521 bcopy(sp, &sb, sizeof(sb)); 522 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 523 prison_enforce_statfs(td->td_ucred, mp, &sb); 524 sp = &sb; 525 } 526 if (bufseg == UIO_SYSSPACE) 527 bcopy(sp, sfsp, sizeof(*sp)); 528 else /* if (bufseg == UIO_USERSPACE) */ { 529 error = copyout(sp, sfsp, sizeof(*sp)); 530 if (error) { 531 vfs_unbusy(mp); 532 return (error); 533 } 534 } 535 sfsp++; 536 } 537 count++; 538 mtx_lock(&mountlist_mtx); 539 nmp = TAILQ_NEXT(mp, mnt_list); 540 vfs_unbusy(mp); 541 } 542 mtx_unlock(&mountlist_mtx); 543 if (sfsp && count > maxcount) 544 td->td_retval[0] = maxcount; 545 else 546 td->td_retval[0] = count; 547 return (0); 548 } 549 550 #ifdef COMPAT_FREEBSD4 551 /* 552 * Get old format filesystem statistics. 553 */ 554 static void cvtstatfs(struct statfs *, struct ostatfs *); 555 556 #ifndef _SYS_SYSPROTO_H_ 557 struct freebsd4_statfs_args { 558 char *path; 559 struct ostatfs *buf; 560 }; 561 #endif 562 int 563 freebsd4_statfs(td, uap) 564 struct thread *td; 565 struct freebsd4_statfs_args /* { 566 char *path; 567 struct ostatfs *buf; 568 } */ *uap; 569 { 570 struct ostatfs osb; 571 struct statfs sf; 572 int error; 573 574 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 575 if (error) 576 return (error); 577 cvtstatfs(&sf, &osb); 578 return (copyout(&osb, uap->buf, sizeof(osb))); 579 } 580 581 /* 582 * Get filesystem statistics. 583 */ 584 #ifndef _SYS_SYSPROTO_H_ 585 struct freebsd4_fstatfs_args { 586 int fd; 587 struct ostatfs *buf; 588 }; 589 #endif 590 int 591 freebsd4_fstatfs(td, uap) 592 struct thread *td; 593 struct freebsd4_fstatfs_args /* { 594 int fd; 595 struct ostatfs *buf; 596 } */ *uap; 597 { 598 struct ostatfs osb; 599 struct statfs sf; 600 int error; 601 602 error = kern_fstatfs(td, uap->fd, &sf); 603 if (error) 604 return (error); 605 cvtstatfs(&sf, &osb); 606 return (copyout(&osb, uap->buf, sizeof(osb))); 607 } 608 609 /* 610 * Get statistics on all filesystems. 611 */ 612 #ifndef _SYS_SYSPROTO_H_ 613 struct freebsd4_getfsstat_args { 614 struct ostatfs *buf; 615 long bufsize; 616 int flags; 617 }; 618 #endif 619 int 620 freebsd4_getfsstat(td, uap) 621 struct thread *td; 622 register struct freebsd4_getfsstat_args /* { 623 struct ostatfs *buf; 624 long bufsize; 625 int flags; 626 } */ *uap; 627 { 628 struct statfs *buf, *sp; 629 struct ostatfs osb; 630 size_t count, size; 631 int error; 632 633 count = uap->bufsize / sizeof(struct ostatfs); 634 size = count * sizeof(struct statfs); 635 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 636 if (size > 0) { 637 count = td->td_retval[0]; 638 sp = buf; 639 while (count > 0 && error == 0) { 640 cvtstatfs(sp, &osb); 641 error = copyout(&osb, uap->buf, sizeof(osb)); 642 sp++; 643 uap->buf++; 644 count--; 645 } 646 free(buf, M_TEMP); 647 } 648 return (error); 649 } 650 651 /* 652 * Implement fstatfs() for (NFS) file handles. 653 */ 654 #ifndef _SYS_SYSPROTO_H_ 655 struct freebsd4_fhstatfs_args { 656 struct fhandle *u_fhp; 657 struct ostatfs *buf; 658 }; 659 #endif 660 int 661 freebsd4_fhstatfs(td, uap) 662 struct thread *td; 663 struct freebsd4_fhstatfs_args /* { 664 struct fhandle *u_fhp; 665 struct ostatfs *buf; 666 } */ *uap; 667 { 668 struct ostatfs osb; 669 struct statfs sf; 670 fhandle_t fh; 671 int error; 672 673 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 674 if (error) 675 return (error); 676 error = kern_fhstatfs(td, fh, &sf); 677 if (error) 678 return (error); 679 cvtstatfs(&sf, &osb); 680 return (copyout(&osb, uap->buf, sizeof(osb))); 681 } 682 683 /* 684 * Convert a new format statfs structure to an old format statfs structure. 685 */ 686 static void 687 cvtstatfs(nsp, osp) 688 struct statfs *nsp; 689 struct ostatfs *osp; 690 { 691 692 statfs_scale_blocks(nsp, LONG_MAX); 693 bzero(osp, sizeof(*osp)); 694 osp->f_bsize = nsp->f_bsize; 695 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 696 osp->f_blocks = nsp->f_blocks; 697 osp->f_bfree = nsp->f_bfree; 698 osp->f_bavail = nsp->f_bavail; 699 osp->f_files = MIN(nsp->f_files, LONG_MAX); 700 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 701 osp->f_owner = nsp->f_owner; 702 osp->f_type = nsp->f_type; 703 osp->f_flags = nsp->f_flags; 704 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 705 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 706 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 707 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 708 strlcpy(osp->f_fstypename, nsp->f_fstypename, 709 MIN(MFSNAMELEN, OMFSNAMELEN)); 710 strlcpy(osp->f_mntonname, nsp->f_mntonname, 711 MIN(MNAMELEN, OMNAMELEN)); 712 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 713 MIN(MNAMELEN, OMNAMELEN)); 714 osp->f_fsid = nsp->f_fsid; 715 } 716 #endif /* COMPAT_FREEBSD4 */ 717 718 /* 719 * Change current working directory to a given file descriptor. 720 */ 721 #ifndef _SYS_SYSPROTO_H_ 722 struct fchdir_args { 723 int fd; 724 }; 725 #endif 726 int 727 sys_fchdir(td, uap) 728 struct thread *td; 729 struct fchdir_args /* { 730 int fd; 731 } */ *uap; 732 { 733 register struct filedesc *fdp = td->td_proc->p_fd; 734 struct vnode *vp, *tdp, *vpold; 735 struct mount *mp; 736 struct file *fp; 737 int error; 738 739 AUDIT_ARG_FD(uap->fd); 740 if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0) 741 return (error); 742 vp = fp->f_vnode; 743 VREF(vp); 744 fdrop(fp, td); 745 vn_lock(vp, LK_SHARED | LK_RETRY); 746 AUDIT_ARG_VNODE1(vp); 747 error = change_dir(vp, td); 748 while (!error && (mp = vp->v_mountedhere) != NULL) { 749 if (vfs_busy(mp, 0)) 750 continue; 751 error = VFS_ROOT(mp, LK_SHARED, &tdp); 752 vfs_unbusy(mp); 753 if (error) 754 break; 755 vput(vp); 756 vp = tdp; 757 } 758 if (error) { 759 vput(vp); 760 return (error); 761 } 762 VOP_UNLOCK(vp, 0); 763 FILEDESC_XLOCK(fdp); 764 vpold = fdp->fd_cdir; 765 fdp->fd_cdir = vp; 766 FILEDESC_XUNLOCK(fdp); 767 vrele(vpold); 768 return (0); 769 } 770 771 /* 772 * Change current working directory (``.''). 773 */ 774 #ifndef _SYS_SYSPROTO_H_ 775 struct chdir_args { 776 char *path; 777 }; 778 #endif 779 int 780 sys_chdir(td, uap) 781 struct thread *td; 782 struct chdir_args /* { 783 char *path; 784 } */ *uap; 785 { 786 787 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 788 } 789 790 int 791 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 792 { 793 register struct filedesc *fdp = td->td_proc->p_fd; 794 int error; 795 struct nameidata nd; 796 struct vnode *vp; 797 798 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 799 pathseg, path, td); 800 if ((error = namei(&nd)) != 0) 801 return (error); 802 if ((error = change_dir(nd.ni_vp, td)) != 0) { 803 vput(nd.ni_vp); 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 return (error); 806 } 807 VOP_UNLOCK(nd.ni_vp, 0); 808 NDFREE(&nd, NDF_ONLY_PNBUF); 809 FILEDESC_XLOCK(fdp); 810 vp = fdp->fd_cdir; 811 fdp->fd_cdir = nd.ni_vp; 812 FILEDESC_XUNLOCK(fdp); 813 vrele(vp); 814 return (0); 815 } 816 817 /* 818 * Helper function for raised chroot(2) security function: Refuse if 819 * any filedescriptors are open directories. 820 */ 821 static int 822 chroot_refuse_vdir_fds(fdp) 823 struct filedesc *fdp; 824 { 825 struct vnode *vp; 826 struct file *fp; 827 int fd; 828 829 FILEDESC_LOCK_ASSERT(fdp); 830 831 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 832 fp = fget_locked(fdp, fd); 833 if (fp == NULL) 834 continue; 835 if (fp->f_type == DTYPE_VNODE) { 836 vp = fp->f_vnode; 837 if (vp->v_type == VDIR) 838 return (EPERM); 839 } 840 } 841 return (0); 842 } 843 844 /* 845 * This sysctl determines if we will allow a process to chroot(2) if it 846 * has a directory open: 847 * 0: disallowed for all processes. 848 * 1: allowed for processes that were not already chroot(2)'ed. 849 * 2: allowed for all processes. 850 */ 851 852 static int chroot_allow_open_directories = 1; 853 854 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 855 &chroot_allow_open_directories, 0, 856 "Allow a process to chroot(2) if it has a directory open"); 857 858 /* 859 * Change notion of root (``/'') directory. 860 */ 861 #ifndef _SYS_SYSPROTO_H_ 862 struct chroot_args { 863 char *path; 864 }; 865 #endif 866 int 867 sys_chroot(td, uap) 868 struct thread *td; 869 struct chroot_args /* { 870 char *path; 871 } */ *uap; 872 { 873 int error; 874 struct nameidata nd; 875 876 error = priv_check(td, PRIV_VFS_CHROOT); 877 if (error) 878 return (error); 879 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 880 UIO_USERSPACE, uap->path, td); 881 error = namei(&nd); 882 if (error) 883 goto error; 884 if ((error = change_dir(nd.ni_vp, td)) != 0) 885 goto e_vunlock; 886 #ifdef MAC 887 if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp))) 888 goto e_vunlock; 889 #endif 890 VOP_UNLOCK(nd.ni_vp, 0); 891 error = change_root(nd.ni_vp, td); 892 vrele(nd.ni_vp); 893 NDFREE(&nd, NDF_ONLY_PNBUF); 894 return (error); 895 e_vunlock: 896 vput(nd.ni_vp); 897 error: 898 NDFREE(&nd, NDF_ONLY_PNBUF); 899 return (error); 900 } 901 902 /* 903 * Common routine for chroot and chdir. Callers must provide a locked vnode 904 * instance. 905 */ 906 int 907 change_dir(vp, td) 908 struct vnode *vp; 909 struct thread *td; 910 { 911 int error; 912 913 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 914 if (vp->v_type != VDIR) 915 return (ENOTDIR); 916 #ifdef MAC 917 error = mac_vnode_check_chdir(td->td_ucred, vp); 918 if (error) 919 return (error); 920 #endif 921 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 922 return (error); 923 } 924 925 /* 926 * Common routine for kern_chroot() and jail_attach(). The caller is 927 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 928 * authorize this operation. 929 */ 930 int 931 change_root(vp, td) 932 struct vnode *vp; 933 struct thread *td; 934 { 935 struct filedesc *fdp; 936 struct vnode *oldvp; 937 int error; 938 939 fdp = td->td_proc->p_fd; 940 FILEDESC_XLOCK(fdp); 941 if (chroot_allow_open_directories == 0 || 942 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 943 error = chroot_refuse_vdir_fds(fdp); 944 if (error) { 945 FILEDESC_XUNLOCK(fdp); 946 return (error); 947 } 948 } 949 oldvp = fdp->fd_rdir; 950 fdp->fd_rdir = vp; 951 VREF(fdp->fd_rdir); 952 if (!fdp->fd_jdir) { 953 fdp->fd_jdir = vp; 954 VREF(fdp->fd_jdir); 955 } 956 FILEDESC_XUNLOCK(fdp); 957 vrele(oldvp); 958 return (0); 959 } 960 961 static __inline cap_rights_t 962 flags_to_rights(int flags) 963 { 964 cap_rights_t rights = 0; 965 966 if (flags & O_EXEC) { 967 rights |= CAP_FEXECVE; 968 } else { 969 switch ((flags & O_ACCMODE)) { 970 case O_RDONLY: 971 rights |= CAP_READ; 972 break; 973 case O_RDWR: 974 rights |= CAP_READ; 975 /* FALLTHROUGH */ 976 case O_WRONLY: 977 rights |= CAP_WRITE; 978 if (!(flags & (O_APPEND | O_TRUNC))) 979 rights |= CAP_SEEK; 980 break; 981 } 982 } 983 984 if (flags & O_CREAT) 985 rights |= CAP_CREATE; 986 987 if (flags & O_TRUNC) 988 rights |= CAP_FTRUNCATE; 989 990 if (flags & (O_SYNC | O_FSYNC)) 991 rights |= CAP_FSYNC; 992 993 if (flags & (O_EXLOCK | O_SHLOCK)) 994 rights |= CAP_FLOCK; 995 996 return (rights); 997 } 998 999 /* 1000 * Check permissions, allocate an open file structure, and call the device 1001 * open routine if any. 1002 */ 1003 #ifndef _SYS_SYSPROTO_H_ 1004 struct open_args { 1005 char *path; 1006 int flags; 1007 int mode; 1008 }; 1009 #endif 1010 int 1011 sys_open(td, uap) 1012 struct thread *td; 1013 register struct open_args /* { 1014 char *path; 1015 int flags; 1016 int mode; 1017 } */ *uap; 1018 { 1019 1020 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1021 } 1022 1023 #ifndef _SYS_SYSPROTO_H_ 1024 struct openat_args { 1025 int fd; 1026 char *path; 1027 int flag; 1028 int mode; 1029 }; 1030 #endif 1031 int 1032 sys_openat(struct thread *td, struct openat_args *uap) 1033 { 1034 1035 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1036 uap->mode)); 1037 } 1038 1039 int 1040 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1041 int mode) 1042 { 1043 1044 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1045 } 1046 1047 int 1048 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1049 int flags, int mode) 1050 { 1051 struct proc *p = td->td_proc; 1052 struct filedesc *fdp = p->p_fd; 1053 struct file *fp; 1054 struct vnode *vp; 1055 int cmode; 1056 int indx = -1, error; 1057 struct nameidata nd; 1058 cap_rights_t rights_needed = CAP_LOOKUP; 1059 1060 AUDIT_ARG_FFLAGS(flags); 1061 AUDIT_ARG_MODE(mode); 1062 /* XXX: audit dirfd */ 1063 rights_needed |= flags_to_rights(flags); 1064 /* 1065 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1066 * may be specified. 1067 */ 1068 if (flags & O_EXEC) { 1069 if (flags & O_ACCMODE) 1070 return (EINVAL); 1071 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1072 return (EINVAL); 1073 } else { 1074 flags = FFLAGS(flags); 1075 } 1076 1077 /* 1078 * Allocate the file descriptor, but don't install a descriptor yet. 1079 */ 1080 error = falloc_noinstall(td, &fp); 1081 if (error) 1082 return (error); 1083 /* 1084 * An extra reference on `fp' has been held for us by 1085 * falloc_noinstall(). 1086 */ 1087 /* Set the flags early so the finit in devfs can pick them up. */ 1088 fp->f_flag = flags & FMASK; 1089 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1090 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1091 rights_needed, td); 1092 td->td_dupfd = -1; /* XXX check for fdopen */ 1093 error = vn_open(&nd, &flags, cmode, fp); 1094 if (error) { 1095 /* 1096 * If the vn_open replaced the method vector, something 1097 * wonderous happened deep below and we just pass it up 1098 * pretending we know what we do. 1099 */ 1100 if (error == ENXIO && fp->f_ops != &badfileops) 1101 goto success; 1102 1103 /* 1104 * Handle special fdopen() case. bleh. 1105 * 1106 * Don't do this for relative (capability) lookups; we don't 1107 * understand exactly what would happen, and we don't think 1108 * that it ever should. 1109 */ 1110 if (nd.ni_strictrelative == 0 && 1111 (error == ENODEV || error == ENXIO) && 1112 td->td_dupfd >= 0) { 1113 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1114 &indx); 1115 if (error == 0) 1116 goto success; 1117 } 1118 1119 goto bad; 1120 } 1121 td->td_dupfd = 0; 1122 NDFREE(&nd, NDF_ONLY_PNBUF); 1123 vp = nd.ni_vp; 1124 1125 /* 1126 * Store the vnode, for any f_type. Typically, the vnode use 1127 * count is decremented by direct call to vn_closefile() for 1128 * files that switched type in the cdevsw fdopen() method. 1129 */ 1130 fp->f_vnode = vp; 1131 /* 1132 * If the file wasn't claimed by devfs bind it to the normal 1133 * vnode operations here. 1134 */ 1135 if (fp->f_ops == &badfileops) { 1136 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1137 fp->f_seqcount = 1; 1138 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, 1139 vp, &vnops); 1140 } 1141 1142 VOP_UNLOCK(vp, 0); 1143 if (flags & O_TRUNC) { 1144 error = fo_truncate(fp, 0, td->td_ucred, td); 1145 if (error) 1146 goto bad; 1147 } 1148 success: 1149 /* 1150 * If we haven't already installed the FD (for dupfdopen), do so now. 1151 */ 1152 if (indx == -1) { 1153 struct filecaps *fcaps; 1154 1155 #ifdef CAPABILITIES 1156 if (nd.ni_strictrelative == 1) 1157 fcaps = &nd.ni_filecaps; 1158 else 1159 #endif 1160 fcaps = NULL; 1161 error = finstall(td, fp, &indx, flags, fcaps); 1162 /* On success finstall() consumes fcaps. */ 1163 if (error != 0) { 1164 filecaps_free(&nd.ni_filecaps); 1165 goto bad; 1166 } 1167 } else { 1168 filecaps_free(&nd.ni_filecaps); 1169 } 1170 1171 /* 1172 * Release our private reference, leaving the one associated with 1173 * the descriptor table intact. 1174 */ 1175 fdrop(fp, td); 1176 td->td_retval[0] = indx; 1177 return (0); 1178 bad: 1179 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1180 fdrop(fp, td); 1181 return (error); 1182 } 1183 1184 #ifdef COMPAT_43 1185 /* 1186 * Create a file. 1187 */ 1188 #ifndef _SYS_SYSPROTO_H_ 1189 struct ocreat_args { 1190 char *path; 1191 int mode; 1192 }; 1193 #endif 1194 int 1195 ocreat(td, uap) 1196 struct thread *td; 1197 register struct ocreat_args /* { 1198 char *path; 1199 int mode; 1200 } */ *uap; 1201 { 1202 1203 return (kern_open(td, uap->path, UIO_USERSPACE, 1204 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1205 } 1206 #endif /* COMPAT_43 */ 1207 1208 /* 1209 * Create a special file. 1210 */ 1211 #ifndef _SYS_SYSPROTO_H_ 1212 struct mknod_args { 1213 char *path; 1214 int mode; 1215 int dev; 1216 }; 1217 #endif 1218 int 1219 sys_mknod(td, uap) 1220 struct thread *td; 1221 register struct mknod_args /* { 1222 char *path; 1223 int mode; 1224 int dev; 1225 } */ *uap; 1226 { 1227 1228 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1229 } 1230 1231 #ifndef _SYS_SYSPROTO_H_ 1232 struct mknodat_args { 1233 int fd; 1234 char *path; 1235 mode_t mode; 1236 dev_t dev; 1237 }; 1238 #endif 1239 int 1240 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1241 { 1242 1243 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1244 uap->dev)); 1245 } 1246 1247 int 1248 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1249 int dev) 1250 { 1251 1252 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1253 } 1254 1255 int 1256 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1257 int mode, int dev) 1258 { 1259 struct vnode *vp; 1260 struct mount *mp; 1261 struct vattr vattr; 1262 int error; 1263 int whiteout = 0; 1264 struct nameidata nd; 1265 1266 AUDIT_ARG_MODE(mode); 1267 AUDIT_ARG_DEV(dev); 1268 switch (mode & S_IFMT) { 1269 case S_IFCHR: 1270 case S_IFBLK: 1271 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1272 break; 1273 case S_IFMT: 1274 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1275 break; 1276 case S_IFWHT: 1277 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1278 break; 1279 case S_IFIFO: 1280 if (dev == 0) 1281 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1282 /* FALLTHROUGH */ 1283 default: 1284 error = EINVAL; 1285 break; 1286 } 1287 if (error) 1288 return (error); 1289 restart: 1290 bwillwrite(); 1291 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1292 pathseg, path, fd, CAP_MKNODAT, td); 1293 if ((error = namei(&nd)) != 0) 1294 return (error); 1295 vp = nd.ni_vp; 1296 if (vp != NULL) { 1297 NDFREE(&nd, NDF_ONLY_PNBUF); 1298 if (vp == nd.ni_dvp) 1299 vrele(nd.ni_dvp); 1300 else 1301 vput(nd.ni_dvp); 1302 vrele(vp); 1303 return (EEXIST); 1304 } else { 1305 VATTR_NULL(&vattr); 1306 vattr.va_mode = (mode & ALLPERMS) & 1307 ~td->td_proc->p_fd->fd_cmask; 1308 vattr.va_rdev = dev; 1309 whiteout = 0; 1310 1311 switch (mode & S_IFMT) { 1312 case S_IFMT: /* used by badsect to flag bad sectors */ 1313 vattr.va_type = VBAD; 1314 break; 1315 case S_IFCHR: 1316 vattr.va_type = VCHR; 1317 break; 1318 case S_IFBLK: 1319 vattr.va_type = VBLK; 1320 break; 1321 case S_IFWHT: 1322 whiteout = 1; 1323 break; 1324 default: 1325 panic("kern_mknod: invalid mode"); 1326 } 1327 } 1328 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1329 NDFREE(&nd, NDF_ONLY_PNBUF); 1330 vput(nd.ni_dvp); 1331 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1332 return (error); 1333 goto restart; 1334 } 1335 #ifdef MAC 1336 if (error == 0 && !whiteout) 1337 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1338 &nd.ni_cnd, &vattr); 1339 #endif 1340 if (!error) { 1341 if (whiteout) 1342 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1343 else { 1344 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1345 &nd.ni_cnd, &vattr); 1346 if (error == 0) 1347 vput(nd.ni_vp); 1348 } 1349 } 1350 NDFREE(&nd, NDF_ONLY_PNBUF); 1351 vput(nd.ni_dvp); 1352 vn_finished_write(mp); 1353 return (error); 1354 } 1355 1356 /* 1357 * Create a named pipe. 1358 */ 1359 #ifndef _SYS_SYSPROTO_H_ 1360 struct mkfifo_args { 1361 char *path; 1362 int mode; 1363 }; 1364 #endif 1365 int 1366 sys_mkfifo(td, uap) 1367 struct thread *td; 1368 register struct mkfifo_args /* { 1369 char *path; 1370 int mode; 1371 } */ *uap; 1372 { 1373 1374 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1375 } 1376 1377 #ifndef _SYS_SYSPROTO_H_ 1378 struct mkfifoat_args { 1379 int fd; 1380 char *path; 1381 mode_t mode; 1382 }; 1383 #endif 1384 int 1385 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1386 { 1387 1388 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1389 uap->mode)); 1390 } 1391 1392 int 1393 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1394 { 1395 1396 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1397 } 1398 1399 int 1400 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1401 int mode) 1402 { 1403 struct mount *mp; 1404 struct vattr vattr; 1405 int error; 1406 struct nameidata nd; 1407 1408 AUDIT_ARG_MODE(mode); 1409 restart: 1410 bwillwrite(); 1411 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1412 pathseg, path, fd, CAP_MKFIFOAT, td); 1413 if ((error = namei(&nd)) != 0) 1414 return (error); 1415 if (nd.ni_vp != NULL) { 1416 NDFREE(&nd, NDF_ONLY_PNBUF); 1417 if (nd.ni_vp == nd.ni_dvp) 1418 vrele(nd.ni_dvp); 1419 else 1420 vput(nd.ni_dvp); 1421 vrele(nd.ni_vp); 1422 return (EEXIST); 1423 } 1424 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1425 NDFREE(&nd, NDF_ONLY_PNBUF); 1426 vput(nd.ni_dvp); 1427 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1428 return (error); 1429 goto restart; 1430 } 1431 VATTR_NULL(&vattr); 1432 vattr.va_type = VFIFO; 1433 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1434 #ifdef MAC 1435 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1436 &vattr); 1437 if (error) 1438 goto out; 1439 #endif 1440 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1441 if (error == 0) 1442 vput(nd.ni_vp); 1443 #ifdef MAC 1444 out: 1445 #endif 1446 vput(nd.ni_dvp); 1447 vn_finished_write(mp); 1448 NDFREE(&nd, NDF_ONLY_PNBUF); 1449 return (error); 1450 } 1451 1452 /* 1453 * Make a hard file link. 1454 */ 1455 #ifndef _SYS_SYSPROTO_H_ 1456 struct link_args { 1457 char *path; 1458 char *link; 1459 }; 1460 #endif 1461 int 1462 sys_link(td, uap) 1463 struct thread *td; 1464 register struct link_args /* { 1465 char *path; 1466 char *link; 1467 } */ *uap; 1468 { 1469 1470 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1471 } 1472 1473 #ifndef _SYS_SYSPROTO_H_ 1474 struct linkat_args { 1475 int fd1; 1476 char *path1; 1477 int fd2; 1478 char *path2; 1479 int flag; 1480 }; 1481 #endif 1482 int 1483 sys_linkat(struct thread *td, struct linkat_args *uap) 1484 { 1485 int flag; 1486 1487 flag = uap->flag; 1488 if (flag & ~AT_SYMLINK_FOLLOW) 1489 return (EINVAL); 1490 1491 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1492 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1493 } 1494 1495 int hardlink_check_uid = 0; 1496 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1497 &hardlink_check_uid, 0, 1498 "Unprivileged processes cannot create hard links to files owned by other " 1499 "users"); 1500 static int hardlink_check_gid = 0; 1501 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1502 &hardlink_check_gid, 0, 1503 "Unprivileged processes cannot create hard links to files owned by other " 1504 "groups"); 1505 1506 static int 1507 can_hardlink(struct vnode *vp, struct ucred *cred) 1508 { 1509 struct vattr va; 1510 int error; 1511 1512 if (!hardlink_check_uid && !hardlink_check_gid) 1513 return (0); 1514 1515 error = VOP_GETATTR(vp, &va, cred); 1516 if (error != 0) 1517 return (error); 1518 1519 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1520 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1521 if (error) 1522 return (error); 1523 } 1524 1525 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1526 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1527 if (error) 1528 return (error); 1529 } 1530 1531 return (0); 1532 } 1533 1534 int 1535 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1536 { 1537 1538 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1539 } 1540 1541 int 1542 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1543 enum uio_seg segflg, int follow) 1544 { 1545 struct vnode *vp; 1546 struct mount *mp; 1547 struct nameidata nd; 1548 int error; 1549 1550 bwillwrite(); 1551 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1552 1553 if ((error = namei(&nd)) != 0) 1554 return (error); 1555 NDFREE(&nd, NDF_ONLY_PNBUF); 1556 vp = nd.ni_vp; 1557 if (vp->v_type == VDIR) { 1558 vrele(vp); 1559 return (EPERM); /* POSIX */ 1560 } 1561 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 1562 vrele(vp); 1563 return (error); 1564 } 1565 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, 1566 segflg, path2, fd2, CAP_LINKAT, td); 1567 if ((error = namei(&nd)) == 0) { 1568 if (nd.ni_vp != NULL) { 1569 if (nd.ni_dvp == nd.ni_vp) 1570 vrele(nd.ni_dvp); 1571 else 1572 vput(nd.ni_dvp); 1573 vrele(nd.ni_vp); 1574 error = EEXIST; 1575 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) 1576 == 0) { 1577 error = can_hardlink(vp, td->td_ucred); 1578 if (error == 0) 1579 #ifdef MAC 1580 error = mac_vnode_check_link(td->td_ucred, 1581 nd.ni_dvp, vp, &nd.ni_cnd); 1582 if (error == 0) 1583 #endif 1584 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1585 VOP_UNLOCK(vp, 0); 1586 vput(nd.ni_dvp); 1587 } 1588 NDFREE(&nd, NDF_ONLY_PNBUF); 1589 } 1590 vrele(vp); 1591 vn_finished_write(mp); 1592 return (error); 1593 } 1594 1595 /* 1596 * Make a symbolic link. 1597 */ 1598 #ifndef _SYS_SYSPROTO_H_ 1599 struct symlink_args { 1600 char *path; 1601 char *link; 1602 }; 1603 #endif 1604 int 1605 sys_symlink(td, uap) 1606 struct thread *td; 1607 register struct symlink_args /* { 1608 char *path; 1609 char *link; 1610 } */ *uap; 1611 { 1612 1613 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1614 } 1615 1616 #ifndef _SYS_SYSPROTO_H_ 1617 struct symlinkat_args { 1618 char *path; 1619 int fd; 1620 char *path2; 1621 }; 1622 #endif 1623 int 1624 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1625 { 1626 1627 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1628 UIO_USERSPACE)); 1629 } 1630 1631 int 1632 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1633 { 1634 1635 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1636 } 1637 1638 int 1639 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1640 enum uio_seg segflg) 1641 { 1642 struct mount *mp; 1643 struct vattr vattr; 1644 char *syspath; 1645 int error; 1646 struct nameidata nd; 1647 1648 if (segflg == UIO_SYSSPACE) { 1649 syspath = path1; 1650 } else { 1651 syspath = uma_zalloc(namei_zone, M_WAITOK); 1652 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1653 goto out; 1654 } 1655 AUDIT_ARG_TEXT(syspath); 1656 restart: 1657 bwillwrite(); 1658 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1659 segflg, path2, fd, CAP_SYMLINKAT, td); 1660 if ((error = namei(&nd)) != 0) 1661 goto out; 1662 if (nd.ni_vp) { 1663 NDFREE(&nd, NDF_ONLY_PNBUF); 1664 if (nd.ni_vp == nd.ni_dvp) 1665 vrele(nd.ni_dvp); 1666 else 1667 vput(nd.ni_dvp); 1668 vrele(nd.ni_vp); 1669 error = EEXIST; 1670 goto out; 1671 } 1672 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1673 NDFREE(&nd, NDF_ONLY_PNBUF); 1674 vput(nd.ni_dvp); 1675 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1676 goto out; 1677 goto restart; 1678 } 1679 VATTR_NULL(&vattr); 1680 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1681 #ifdef MAC 1682 vattr.va_type = VLNK; 1683 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1684 &vattr); 1685 if (error) 1686 goto out2; 1687 #endif 1688 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1689 if (error == 0) 1690 vput(nd.ni_vp); 1691 #ifdef MAC 1692 out2: 1693 #endif 1694 NDFREE(&nd, NDF_ONLY_PNBUF); 1695 vput(nd.ni_dvp); 1696 vn_finished_write(mp); 1697 out: 1698 if (segflg != UIO_SYSSPACE) 1699 uma_zfree(namei_zone, syspath); 1700 return (error); 1701 } 1702 1703 /* 1704 * Delete a whiteout from the filesystem. 1705 */ 1706 int 1707 sys_undelete(td, uap) 1708 struct thread *td; 1709 register struct undelete_args /* { 1710 char *path; 1711 } */ *uap; 1712 { 1713 int error; 1714 struct mount *mp; 1715 struct nameidata nd; 1716 1717 restart: 1718 bwillwrite(); 1719 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1720 UIO_USERSPACE, uap->path, td); 1721 error = namei(&nd); 1722 if (error) 1723 return (error); 1724 1725 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1726 NDFREE(&nd, NDF_ONLY_PNBUF); 1727 if (nd.ni_vp == nd.ni_dvp) 1728 vrele(nd.ni_dvp); 1729 else 1730 vput(nd.ni_dvp); 1731 if (nd.ni_vp) 1732 vrele(nd.ni_vp); 1733 return (EEXIST); 1734 } 1735 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1736 NDFREE(&nd, NDF_ONLY_PNBUF); 1737 vput(nd.ni_dvp); 1738 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1739 return (error); 1740 goto restart; 1741 } 1742 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1743 NDFREE(&nd, NDF_ONLY_PNBUF); 1744 vput(nd.ni_dvp); 1745 vn_finished_write(mp); 1746 return (error); 1747 } 1748 1749 /* 1750 * Delete a name from the filesystem. 1751 */ 1752 #ifndef _SYS_SYSPROTO_H_ 1753 struct unlink_args { 1754 char *path; 1755 }; 1756 #endif 1757 int 1758 sys_unlink(td, uap) 1759 struct thread *td; 1760 struct unlink_args /* { 1761 char *path; 1762 } */ *uap; 1763 { 1764 1765 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1766 } 1767 1768 #ifndef _SYS_SYSPROTO_H_ 1769 struct unlinkat_args { 1770 int fd; 1771 char *path; 1772 int flag; 1773 }; 1774 #endif 1775 int 1776 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1777 { 1778 int flag = uap->flag; 1779 int fd = uap->fd; 1780 char *path = uap->path; 1781 1782 if (flag & ~AT_REMOVEDIR) 1783 return (EINVAL); 1784 1785 if (flag & AT_REMOVEDIR) 1786 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1787 else 1788 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1789 } 1790 1791 int 1792 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1793 { 1794 1795 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1796 } 1797 1798 int 1799 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1800 ino_t oldinum) 1801 { 1802 struct mount *mp; 1803 struct vnode *vp; 1804 int error; 1805 struct nameidata nd; 1806 struct stat sb; 1807 1808 restart: 1809 bwillwrite(); 1810 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1811 pathseg, path, fd, CAP_UNLINKAT, td); 1812 if ((error = namei(&nd)) != 0) 1813 return (error == EINVAL ? EPERM : error); 1814 vp = nd.ni_vp; 1815 if (vp->v_type == VDIR && oldinum == 0) { 1816 error = EPERM; /* POSIX */ 1817 } else if (oldinum != 0 && 1818 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1819 sb.st_ino != oldinum) { 1820 error = EIDRM; /* Identifier removed */ 1821 } else { 1822 /* 1823 * The root of a mounted filesystem cannot be deleted. 1824 * 1825 * XXX: can this only be a VDIR case? 1826 */ 1827 if (vp->v_vflag & VV_ROOT) 1828 error = EBUSY; 1829 } 1830 if (error == 0) { 1831 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1832 NDFREE(&nd, NDF_ONLY_PNBUF); 1833 vput(nd.ni_dvp); 1834 if (vp == nd.ni_dvp) 1835 vrele(vp); 1836 else 1837 vput(vp); 1838 if ((error = vn_start_write(NULL, &mp, 1839 V_XSLEEP | PCATCH)) != 0) 1840 return (error); 1841 goto restart; 1842 } 1843 #ifdef MAC 1844 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1845 &nd.ni_cnd); 1846 if (error) 1847 goto out; 1848 #endif 1849 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1850 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1851 #ifdef MAC 1852 out: 1853 #endif 1854 vn_finished_write(mp); 1855 } 1856 NDFREE(&nd, NDF_ONLY_PNBUF); 1857 vput(nd.ni_dvp); 1858 if (vp == nd.ni_dvp) 1859 vrele(vp); 1860 else 1861 vput(vp); 1862 return (error); 1863 } 1864 1865 /* 1866 * Reposition read/write file offset. 1867 */ 1868 #ifndef _SYS_SYSPROTO_H_ 1869 struct lseek_args { 1870 int fd; 1871 int pad; 1872 off_t offset; 1873 int whence; 1874 }; 1875 #endif 1876 int 1877 sys_lseek(td, uap) 1878 struct thread *td; 1879 register struct lseek_args /* { 1880 int fd; 1881 int pad; 1882 off_t offset; 1883 int whence; 1884 } */ *uap; 1885 { 1886 struct ucred *cred = td->td_ucred; 1887 struct file *fp; 1888 struct vnode *vp; 1889 struct vattr vattr; 1890 off_t foffset, offset, size; 1891 int error, noneg; 1892 1893 AUDIT_ARG_FD(uap->fd); 1894 if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0) 1895 return (error); 1896 if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) { 1897 fdrop(fp, td); 1898 return (ESPIPE); 1899 } 1900 vp = fp->f_vnode; 1901 foffset = foffset_lock(fp, 0); 1902 noneg = (vp->v_type != VCHR); 1903 offset = uap->offset; 1904 switch (uap->whence) { 1905 case L_INCR: 1906 if (noneg && 1907 (foffset < 0 || 1908 (offset > 0 && foffset > OFF_MAX - offset))) { 1909 error = EOVERFLOW; 1910 break; 1911 } 1912 offset += foffset; 1913 break; 1914 case L_XTND: 1915 vn_lock(vp, LK_SHARED | LK_RETRY); 1916 error = VOP_GETATTR(vp, &vattr, cred); 1917 VOP_UNLOCK(vp, 0); 1918 if (error) 1919 break; 1920 1921 /* 1922 * If the file references a disk device, then fetch 1923 * the media size and use that to determine the ending 1924 * offset. 1925 */ 1926 if (vattr.va_size == 0 && vp->v_type == VCHR && 1927 fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0) 1928 vattr.va_size = size; 1929 if (noneg && 1930 (vattr.va_size > OFF_MAX || 1931 (offset > 0 && vattr.va_size > OFF_MAX - offset))) { 1932 error = EOVERFLOW; 1933 break; 1934 } 1935 offset += vattr.va_size; 1936 break; 1937 case L_SET: 1938 break; 1939 case SEEK_DATA: 1940 error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td); 1941 break; 1942 case SEEK_HOLE: 1943 error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td); 1944 break; 1945 default: 1946 error = EINVAL; 1947 } 1948 if (error == 0 && noneg && offset < 0) 1949 error = EINVAL; 1950 if (error != 0) 1951 goto drop; 1952 VFS_KNOTE_UNLOCKED(vp, 0); 1953 *(off_t *)(td->td_retval) = offset; 1954 drop: 1955 fdrop(fp, td); 1956 foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0); 1957 return (error); 1958 } 1959 1960 #if defined(COMPAT_43) 1961 /* 1962 * Reposition read/write file offset. 1963 */ 1964 #ifndef _SYS_SYSPROTO_H_ 1965 struct olseek_args { 1966 int fd; 1967 long offset; 1968 int whence; 1969 }; 1970 #endif 1971 int 1972 olseek(td, uap) 1973 struct thread *td; 1974 register struct olseek_args /* { 1975 int fd; 1976 long offset; 1977 int whence; 1978 } */ *uap; 1979 { 1980 struct lseek_args /* { 1981 int fd; 1982 int pad; 1983 off_t offset; 1984 int whence; 1985 } */ nuap; 1986 1987 nuap.fd = uap->fd; 1988 nuap.offset = uap->offset; 1989 nuap.whence = uap->whence; 1990 return (sys_lseek(td, &nuap)); 1991 } 1992 #endif /* COMPAT_43 */ 1993 1994 /* Version with the 'pad' argument */ 1995 int 1996 freebsd6_lseek(td, uap) 1997 struct thread *td; 1998 register struct freebsd6_lseek_args *uap; 1999 { 2000 struct lseek_args ouap; 2001 2002 ouap.fd = uap->fd; 2003 ouap.offset = uap->offset; 2004 ouap.whence = uap->whence; 2005 return (sys_lseek(td, &ouap)); 2006 } 2007 2008 /* 2009 * Check access permissions using passed credentials. 2010 */ 2011 static int 2012 vn_access(vp, user_flags, cred, td) 2013 struct vnode *vp; 2014 int user_flags; 2015 struct ucred *cred; 2016 struct thread *td; 2017 { 2018 int error; 2019 accmode_t accmode; 2020 2021 /* Flags == 0 means only check for existence. */ 2022 error = 0; 2023 if (user_flags) { 2024 accmode = 0; 2025 if (user_flags & R_OK) 2026 accmode |= VREAD; 2027 if (user_flags & W_OK) 2028 accmode |= VWRITE; 2029 if (user_flags & X_OK) 2030 accmode |= VEXEC; 2031 #ifdef MAC 2032 error = mac_vnode_check_access(cred, vp, accmode); 2033 if (error) 2034 return (error); 2035 #endif 2036 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2037 error = VOP_ACCESS(vp, accmode, cred, td); 2038 } 2039 return (error); 2040 } 2041 2042 /* 2043 * Check access permissions using "real" credentials. 2044 */ 2045 #ifndef _SYS_SYSPROTO_H_ 2046 struct access_args { 2047 char *path; 2048 int amode; 2049 }; 2050 #endif 2051 int 2052 sys_access(td, uap) 2053 struct thread *td; 2054 register struct access_args /* { 2055 char *path; 2056 int amode; 2057 } */ *uap; 2058 { 2059 2060 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2061 } 2062 2063 #ifndef _SYS_SYSPROTO_H_ 2064 struct faccessat_args { 2065 int dirfd; 2066 char *path; 2067 int amode; 2068 int flag; 2069 } 2070 #endif 2071 int 2072 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2073 { 2074 2075 if (uap->flag & ~AT_EACCESS) 2076 return (EINVAL); 2077 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2078 uap->amode)); 2079 } 2080 2081 int 2082 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2083 { 2084 2085 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2086 } 2087 2088 int 2089 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2090 int flag, int amode) 2091 { 2092 struct ucred *cred, *tmpcred; 2093 struct vnode *vp; 2094 struct nameidata nd; 2095 int error; 2096 2097 /* 2098 * Create and modify a temporary credential instead of one that 2099 * is potentially shared. 2100 */ 2101 if (!(flag & AT_EACCESS)) { 2102 cred = td->td_ucred; 2103 tmpcred = crdup(cred); 2104 tmpcred->cr_uid = cred->cr_ruid; 2105 tmpcred->cr_groups[0] = cred->cr_rgid; 2106 td->td_ucred = tmpcred; 2107 } else 2108 cred = tmpcred = td->td_ucred; 2109 AUDIT_ARG_VALUE(amode); 2110 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2111 AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td); 2112 if ((error = namei(&nd)) != 0) 2113 goto out1; 2114 vp = nd.ni_vp; 2115 2116 error = vn_access(vp, amode, tmpcred, td); 2117 NDFREE(&nd, NDF_ONLY_PNBUF); 2118 vput(vp); 2119 out1: 2120 if (!(flag & AT_EACCESS)) { 2121 td->td_ucred = cred; 2122 crfree(tmpcred); 2123 } 2124 return (error); 2125 } 2126 2127 /* 2128 * Check access permissions using "effective" credentials. 2129 */ 2130 #ifndef _SYS_SYSPROTO_H_ 2131 struct eaccess_args { 2132 char *path; 2133 int amode; 2134 }; 2135 #endif 2136 int 2137 sys_eaccess(td, uap) 2138 struct thread *td; 2139 register struct eaccess_args /* { 2140 char *path; 2141 int amode; 2142 } */ *uap; 2143 { 2144 2145 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2146 } 2147 2148 int 2149 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2150 { 2151 2152 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2153 } 2154 2155 #if defined(COMPAT_43) 2156 /* 2157 * Get file status; this version follows links. 2158 */ 2159 #ifndef _SYS_SYSPROTO_H_ 2160 struct ostat_args { 2161 char *path; 2162 struct ostat *ub; 2163 }; 2164 #endif 2165 int 2166 ostat(td, uap) 2167 struct thread *td; 2168 register struct ostat_args /* { 2169 char *path; 2170 struct ostat *ub; 2171 } */ *uap; 2172 { 2173 struct stat sb; 2174 struct ostat osb; 2175 int error; 2176 2177 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2178 if (error) 2179 return (error); 2180 cvtstat(&sb, &osb); 2181 error = copyout(&osb, uap->ub, sizeof (osb)); 2182 return (error); 2183 } 2184 2185 /* 2186 * Get file status; this version does not follow links. 2187 */ 2188 #ifndef _SYS_SYSPROTO_H_ 2189 struct olstat_args { 2190 char *path; 2191 struct ostat *ub; 2192 }; 2193 #endif 2194 int 2195 olstat(td, uap) 2196 struct thread *td; 2197 register struct olstat_args /* { 2198 char *path; 2199 struct ostat *ub; 2200 } */ *uap; 2201 { 2202 struct stat sb; 2203 struct ostat osb; 2204 int error; 2205 2206 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2207 if (error) 2208 return (error); 2209 cvtstat(&sb, &osb); 2210 error = copyout(&osb, uap->ub, sizeof (osb)); 2211 return (error); 2212 } 2213 2214 /* 2215 * Convert from an old to a new stat structure. 2216 */ 2217 void 2218 cvtstat(st, ost) 2219 struct stat *st; 2220 struct ostat *ost; 2221 { 2222 2223 ost->st_dev = st->st_dev; 2224 ost->st_ino = st->st_ino; 2225 ost->st_mode = st->st_mode; 2226 ost->st_nlink = st->st_nlink; 2227 ost->st_uid = st->st_uid; 2228 ost->st_gid = st->st_gid; 2229 ost->st_rdev = st->st_rdev; 2230 if (st->st_size < (quad_t)1 << 32) 2231 ost->st_size = st->st_size; 2232 else 2233 ost->st_size = -2; 2234 ost->st_atim = st->st_atim; 2235 ost->st_mtim = st->st_mtim; 2236 ost->st_ctim = st->st_ctim; 2237 ost->st_blksize = st->st_blksize; 2238 ost->st_blocks = st->st_blocks; 2239 ost->st_flags = st->st_flags; 2240 ost->st_gen = st->st_gen; 2241 } 2242 #endif /* COMPAT_43 */ 2243 2244 /* 2245 * Get file status; this version follows links. 2246 */ 2247 #ifndef _SYS_SYSPROTO_H_ 2248 struct stat_args { 2249 char *path; 2250 struct stat *ub; 2251 }; 2252 #endif 2253 int 2254 sys_stat(td, uap) 2255 struct thread *td; 2256 register struct stat_args /* { 2257 char *path; 2258 struct stat *ub; 2259 } */ *uap; 2260 { 2261 struct stat sb; 2262 int error; 2263 2264 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2265 if (error == 0) 2266 error = copyout(&sb, uap->ub, sizeof (sb)); 2267 return (error); 2268 } 2269 2270 #ifndef _SYS_SYSPROTO_H_ 2271 struct fstatat_args { 2272 int fd; 2273 char *path; 2274 struct stat *buf; 2275 int flag; 2276 } 2277 #endif 2278 int 2279 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2280 { 2281 struct stat sb; 2282 int error; 2283 2284 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2285 UIO_USERSPACE, &sb); 2286 if (error == 0) 2287 error = copyout(&sb, uap->buf, sizeof (sb)); 2288 return (error); 2289 } 2290 2291 int 2292 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2293 { 2294 2295 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2296 } 2297 2298 int 2299 kern_statat(struct thread *td, int flag, int fd, char *path, 2300 enum uio_seg pathseg, struct stat *sbp) 2301 { 2302 2303 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2304 } 2305 2306 int 2307 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2308 enum uio_seg pathseg, struct stat *sbp, 2309 void (*hook)(struct vnode *vp, struct stat *sbp)) 2310 { 2311 struct nameidata nd; 2312 struct stat sb; 2313 int error; 2314 2315 if (flag & ~AT_SYMLINK_NOFOLLOW) 2316 return (EINVAL); 2317 2318 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2319 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2320 CAP_FSTAT, td); 2321 2322 if ((error = namei(&nd)) != 0) 2323 return (error); 2324 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2325 if (!error) { 2326 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2327 if (S_ISREG(sb.st_mode)) 2328 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2329 if (__predict_false(hook != NULL)) 2330 hook(nd.ni_vp, &sb); 2331 } 2332 NDFREE(&nd, NDF_ONLY_PNBUF); 2333 vput(nd.ni_vp); 2334 if (error) 2335 return (error); 2336 *sbp = sb; 2337 #ifdef KTRACE 2338 if (KTRPOINT(td, KTR_STRUCT)) 2339 ktrstat(&sb); 2340 #endif 2341 return (0); 2342 } 2343 2344 /* 2345 * Get file status; this version does not follow links. 2346 */ 2347 #ifndef _SYS_SYSPROTO_H_ 2348 struct lstat_args { 2349 char *path; 2350 struct stat *ub; 2351 }; 2352 #endif 2353 int 2354 sys_lstat(td, uap) 2355 struct thread *td; 2356 register struct lstat_args /* { 2357 char *path; 2358 struct stat *ub; 2359 } */ *uap; 2360 { 2361 struct stat sb; 2362 int error; 2363 2364 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2365 if (error == 0) 2366 error = copyout(&sb, uap->ub, sizeof (sb)); 2367 return (error); 2368 } 2369 2370 int 2371 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2372 { 2373 2374 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2375 sbp)); 2376 } 2377 2378 /* 2379 * Implementation of the NetBSD [l]stat() functions. 2380 */ 2381 void 2382 cvtnstat(sb, nsb) 2383 struct stat *sb; 2384 struct nstat *nsb; 2385 { 2386 bzero(nsb, sizeof *nsb); 2387 nsb->st_dev = sb->st_dev; 2388 nsb->st_ino = sb->st_ino; 2389 nsb->st_mode = sb->st_mode; 2390 nsb->st_nlink = sb->st_nlink; 2391 nsb->st_uid = sb->st_uid; 2392 nsb->st_gid = sb->st_gid; 2393 nsb->st_rdev = sb->st_rdev; 2394 nsb->st_atim = sb->st_atim; 2395 nsb->st_mtim = sb->st_mtim; 2396 nsb->st_ctim = sb->st_ctim; 2397 nsb->st_size = sb->st_size; 2398 nsb->st_blocks = sb->st_blocks; 2399 nsb->st_blksize = sb->st_blksize; 2400 nsb->st_flags = sb->st_flags; 2401 nsb->st_gen = sb->st_gen; 2402 nsb->st_birthtim = sb->st_birthtim; 2403 } 2404 2405 #ifndef _SYS_SYSPROTO_H_ 2406 struct nstat_args { 2407 char *path; 2408 struct nstat *ub; 2409 }; 2410 #endif 2411 int 2412 sys_nstat(td, uap) 2413 struct thread *td; 2414 register struct nstat_args /* { 2415 char *path; 2416 struct nstat *ub; 2417 } */ *uap; 2418 { 2419 struct stat sb; 2420 struct nstat nsb; 2421 int error; 2422 2423 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2424 if (error) 2425 return (error); 2426 cvtnstat(&sb, &nsb); 2427 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2428 return (error); 2429 } 2430 2431 /* 2432 * NetBSD lstat. Get file status; this version does not follow links. 2433 */ 2434 #ifndef _SYS_SYSPROTO_H_ 2435 struct lstat_args { 2436 char *path; 2437 struct stat *ub; 2438 }; 2439 #endif 2440 int 2441 sys_nlstat(td, uap) 2442 struct thread *td; 2443 register struct nlstat_args /* { 2444 char *path; 2445 struct nstat *ub; 2446 } */ *uap; 2447 { 2448 struct stat sb; 2449 struct nstat nsb; 2450 int error; 2451 2452 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2453 if (error) 2454 return (error); 2455 cvtnstat(&sb, &nsb); 2456 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2457 return (error); 2458 } 2459 2460 /* 2461 * Get configurable pathname variables. 2462 */ 2463 #ifndef _SYS_SYSPROTO_H_ 2464 struct pathconf_args { 2465 char *path; 2466 int name; 2467 }; 2468 #endif 2469 int 2470 sys_pathconf(td, uap) 2471 struct thread *td; 2472 register struct pathconf_args /* { 2473 char *path; 2474 int name; 2475 } */ *uap; 2476 { 2477 2478 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2479 } 2480 2481 #ifndef _SYS_SYSPROTO_H_ 2482 struct lpathconf_args { 2483 char *path; 2484 int name; 2485 }; 2486 #endif 2487 int 2488 sys_lpathconf(td, uap) 2489 struct thread *td; 2490 register struct lpathconf_args /* { 2491 char *path; 2492 int name; 2493 } */ *uap; 2494 { 2495 2496 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2497 NOFOLLOW)); 2498 } 2499 2500 int 2501 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2502 u_long flags) 2503 { 2504 struct nameidata nd; 2505 int error; 2506 2507 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2508 pathseg, path, td); 2509 if ((error = namei(&nd)) != 0) 2510 return (error); 2511 NDFREE(&nd, NDF_ONLY_PNBUF); 2512 2513 /* If asynchronous I/O is available, it works for all files. */ 2514 if (name == _PC_ASYNC_IO) 2515 td->td_retval[0] = async_io_version; 2516 else 2517 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2518 vput(nd.ni_vp); 2519 return (error); 2520 } 2521 2522 /* 2523 * Return target name of a symbolic link. 2524 */ 2525 #ifndef _SYS_SYSPROTO_H_ 2526 struct readlink_args { 2527 char *path; 2528 char *buf; 2529 size_t count; 2530 }; 2531 #endif 2532 int 2533 sys_readlink(td, uap) 2534 struct thread *td; 2535 register struct readlink_args /* { 2536 char *path; 2537 char *buf; 2538 size_t count; 2539 } */ *uap; 2540 { 2541 2542 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2543 UIO_USERSPACE, uap->count)); 2544 } 2545 #ifndef _SYS_SYSPROTO_H_ 2546 struct readlinkat_args { 2547 int fd; 2548 char *path; 2549 char *buf; 2550 size_t bufsize; 2551 }; 2552 #endif 2553 int 2554 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2555 { 2556 2557 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2558 uap->buf, UIO_USERSPACE, uap->bufsize)); 2559 } 2560 2561 int 2562 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2563 enum uio_seg bufseg, size_t count) 2564 { 2565 2566 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2567 count)); 2568 } 2569 2570 int 2571 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2572 char *buf, enum uio_seg bufseg, size_t count) 2573 { 2574 struct vnode *vp; 2575 struct iovec aiov; 2576 struct uio auio; 2577 int error; 2578 struct nameidata nd; 2579 2580 if (count > IOSIZE_MAX) 2581 return (EINVAL); 2582 2583 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2584 pathseg, path, fd, td); 2585 2586 if ((error = namei(&nd)) != 0) 2587 return (error); 2588 NDFREE(&nd, NDF_ONLY_PNBUF); 2589 vp = nd.ni_vp; 2590 #ifdef MAC 2591 error = mac_vnode_check_readlink(td->td_ucred, vp); 2592 if (error) { 2593 vput(vp); 2594 return (error); 2595 } 2596 #endif 2597 if (vp->v_type != VLNK) 2598 error = EINVAL; 2599 else { 2600 aiov.iov_base = buf; 2601 aiov.iov_len = count; 2602 auio.uio_iov = &aiov; 2603 auio.uio_iovcnt = 1; 2604 auio.uio_offset = 0; 2605 auio.uio_rw = UIO_READ; 2606 auio.uio_segflg = bufseg; 2607 auio.uio_td = td; 2608 auio.uio_resid = count; 2609 error = VOP_READLINK(vp, &auio, td->td_ucred); 2610 } 2611 vput(vp); 2612 td->td_retval[0] = count - auio.uio_resid; 2613 return (error); 2614 } 2615 2616 /* 2617 * Common implementation code for chflags() and fchflags(). 2618 */ 2619 static int 2620 setfflags(td, vp, flags) 2621 struct thread *td; 2622 struct vnode *vp; 2623 u_long flags; 2624 { 2625 int error; 2626 struct mount *mp; 2627 struct vattr vattr; 2628 2629 /* We can't support the value matching VNOVAL. */ 2630 if (flags == VNOVAL) 2631 return (EOPNOTSUPP); 2632 2633 /* 2634 * Prevent non-root users from setting flags on devices. When 2635 * a device is reused, users can retain ownership of the device 2636 * if they are allowed to set flags and programs assume that 2637 * chown can't fail when done as root. 2638 */ 2639 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2640 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2641 if (error) 2642 return (error); 2643 } 2644 2645 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2646 return (error); 2647 VATTR_NULL(&vattr); 2648 vattr.va_flags = flags; 2649 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2650 #ifdef MAC 2651 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2652 if (error == 0) 2653 #endif 2654 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2655 VOP_UNLOCK(vp, 0); 2656 vn_finished_write(mp); 2657 return (error); 2658 } 2659 2660 /* 2661 * Change flags of a file given a path name. 2662 */ 2663 #ifndef _SYS_SYSPROTO_H_ 2664 struct chflags_args { 2665 const char *path; 2666 u_long flags; 2667 }; 2668 #endif 2669 int 2670 sys_chflags(td, uap) 2671 struct thread *td; 2672 register struct chflags_args /* { 2673 const char *path; 2674 u_long flags; 2675 } */ *uap; 2676 { 2677 2678 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2679 } 2680 2681 #ifndef _SYS_SYSPROTO_H_ 2682 struct chflagsat_args { 2683 int fd; 2684 const char *path; 2685 u_long flags; 2686 int atflag; 2687 } 2688 #endif 2689 int 2690 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2691 { 2692 int fd = uap->fd; 2693 const char *path = uap->path; 2694 u_long flags = uap->flags; 2695 int atflag = uap->atflag; 2696 2697 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2698 return (EINVAL); 2699 2700 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2701 } 2702 2703 static int 2704 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2705 u_long flags) 2706 { 2707 2708 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2709 } 2710 2711 /* 2712 * Same as chflags() but doesn't follow symlinks. 2713 */ 2714 int 2715 sys_lchflags(td, uap) 2716 struct thread *td; 2717 register struct lchflags_args /* { 2718 const char *path; 2719 u_long flags; 2720 } */ *uap; 2721 { 2722 2723 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2724 uap->flags, AT_SYMLINK_NOFOLLOW)); 2725 } 2726 2727 static int 2728 kern_chflagsat(struct thread *td, int fd, const char *path, 2729 enum uio_seg pathseg, u_long flags, int atflag) 2730 { 2731 struct nameidata nd; 2732 int error, follow; 2733 2734 AUDIT_ARG_FFLAGS(flags); 2735 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2736 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2737 CAP_FCHFLAGS, td); 2738 if ((error = namei(&nd)) != 0) 2739 return (error); 2740 NDFREE(&nd, NDF_ONLY_PNBUF); 2741 error = setfflags(td, nd.ni_vp, flags); 2742 vrele(nd.ni_vp); 2743 return (error); 2744 } 2745 2746 /* 2747 * Change flags of a file given a file descriptor. 2748 */ 2749 #ifndef _SYS_SYSPROTO_H_ 2750 struct fchflags_args { 2751 int fd; 2752 u_long flags; 2753 }; 2754 #endif 2755 int 2756 sys_fchflags(td, uap) 2757 struct thread *td; 2758 register struct fchflags_args /* { 2759 int fd; 2760 u_long flags; 2761 } */ *uap; 2762 { 2763 struct file *fp; 2764 int error; 2765 2766 AUDIT_ARG_FD(uap->fd); 2767 AUDIT_ARG_FFLAGS(uap->flags); 2768 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS, 2769 &fp)) != 0) 2770 return (error); 2771 #ifdef AUDIT 2772 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2773 AUDIT_ARG_VNODE1(fp->f_vnode); 2774 VOP_UNLOCK(fp->f_vnode, 0); 2775 #endif 2776 error = setfflags(td, fp->f_vnode, uap->flags); 2777 fdrop(fp, td); 2778 return (error); 2779 } 2780 2781 /* 2782 * Common implementation code for chmod(), lchmod() and fchmod(). 2783 */ 2784 int 2785 setfmode(td, cred, vp, mode) 2786 struct thread *td; 2787 struct ucred *cred; 2788 struct vnode *vp; 2789 int mode; 2790 { 2791 int error; 2792 struct mount *mp; 2793 struct vattr vattr; 2794 2795 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2796 return (error); 2797 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2798 VATTR_NULL(&vattr); 2799 vattr.va_mode = mode & ALLPERMS; 2800 #ifdef MAC 2801 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2802 if (error == 0) 2803 #endif 2804 error = VOP_SETATTR(vp, &vattr, cred); 2805 VOP_UNLOCK(vp, 0); 2806 vn_finished_write(mp); 2807 return (error); 2808 } 2809 2810 /* 2811 * Change mode of a file given path name. 2812 */ 2813 #ifndef _SYS_SYSPROTO_H_ 2814 struct chmod_args { 2815 char *path; 2816 int mode; 2817 }; 2818 #endif 2819 int 2820 sys_chmod(td, uap) 2821 struct thread *td; 2822 register struct chmod_args /* { 2823 char *path; 2824 int mode; 2825 } */ *uap; 2826 { 2827 2828 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2829 } 2830 2831 #ifndef _SYS_SYSPROTO_H_ 2832 struct fchmodat_args { 2833 int dirfd; 2834 char *path; 2835 mode_t mode; 2836 int flag; 2837 } 2838 #endif 2839 int 2840 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2841 { 2842 int flag = uap->flag; 2843 int fd = uap->fd; 2844 char *path = uap->path; 2845 mode_t mode = uap->mode; 2846 2847 if (flag & ~AT_SYMLINK_NOFOLLOW) 2848 return (EINVAL); 2849 2850 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2851 } 2852 2853 int 2854 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2855 { 2856 2857 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2858 } 2859 2860 /* 2861 * Change mode of a file given path name (don't follow links.) 2862 */ 2863 #ifndef _SYS_SYSPROTO_H_ 2864 struct lchmod_args { 2865 char *path; 2866 int mode; 2867 }; 2868 #endif 2869 int 2870 sys_lchmod(td, uap) 2871 struct thread *td; 2872 register struct lchmod_args /* { 2873 char *path; 2874 int mode; 2875 } */ *uap; 2876 { 2877 2878 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2879 uap->mode, AT_SYMLINK_NOFOLLOW)); 2880 } 2881 2882 int 2883 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2884 mode_t mode, int flag) 2885 { 2886 int error; 2887 struct nameidata nd; 2888 int follow; 2889 2890 AUDIT_ARG_MODE(mode); 2891 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2892 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2893 CAP_FCHMOD, td); 2894 if ((error = namei(&nd)) != 0) 2895 return (error); 2896 NDFREE(&nd, NDF_ONLY_PNBUF); 2897 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2898 vrele(nd.ni_vp); 2899 return (error); 2900 } 2901 2902 /* 2903 * Change mode of a file given a file descriptor. 2904 */ 2905 #ifndef _SYS_SYSPROTO_H_ 2906 struct fchmod_args { 2907 int fd; 2908 int mode; 2909 }; 2910 #endif 2911 int 2912 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2913 { 2914 struct file *fp; 2915 int error; 2916 2917 AUDIT_ARG_FD(uap->fd); 2918 AUDIT_ARG_MODE(uap->mode); 2919 2920 error = fget(td, uap->fd, CAP_FCHMOD, &fp); 2921 if (error != 0) 2922 return (error); 2923 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2924 fdrop(fp, td); 2925 return (error); 2926 } 2927 2928 /* 2929 * Common implementation for chown(), lchown(), and fchown() 2930 */ 2931 int 2932 setfown(td, cred, vp, uid, gid) 2933 struct thread *td; 2934 struct ucred *cred; 2935 struct vnode *vp; 2936 uid_t uid; 2937 gid_t gid; 2938 { 2939 int error; 2940 struct mount *mp; 2941 struct vattr vattr; 2942 2943 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2944 return (error); 2945 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2946 VATTR_NULL(&vattr); 2947 vattr.va_uid = uid; 2948 vattr.va_gid = gid; 2949 #ifdef MAC 2950 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2951 vattr.va_gid); 2952 if (error == 0) 2953 #endif 2954 error = VOP_SETATTR(vp, &vattr, cred); 2955 VOP_UNLOCK(vp, 0); 2956 vn_finished_write(mp); 2957 return (error); 2958 } 2959 2960 /* 2961 * Set ownership given a path name. 2962 */ 2963 #ifndef _SYS_SYSPROTO_H_ 2964 struct chown_args { 2965 char *path; 2966 int uid; 2967 int gid; 2968 }; 2969 #endif 2970 int 2971 sys_chown(td, uap) 2972 struct thread *td; 2973 register struct chown_args /* { 2974 char *path; 2975 int uid; 2976 int gid; 2977 } */ *uap; 2978 { 2979 2980 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2981 } 2982 2983 #ifndef _SYS_SYSPROTO_H_ 2984 struct fchownat_args { 2985 int fd; 2986 const char * path; 2987 uid_t uid; 2988 gid_t gid; 2989 int flag; 2990 }; 2991 #endif 2992 int 2993 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2994 { 2995 int flag; 2996 2997 flag = uap->flag; 2998 if (flag & ~AT_SYMLINK_NOFOLLOW) 2999 return (EINVAL); 3000 3001 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3002 uap->gid, uap->flag)); 3003 } 3004 3005 int 3006 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3007 int gid) 3008 { 3009 3010 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 3011 } 3012 3013 int 3014 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3015 int uid, int gid, int flag) 3016 { 3017 struct nameidata nd; 3018 int error, follow; 3019 3020 AUDIT_ARG_OWNER(uid, gid); 3021 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3022 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 3023 CAP_FCHOWN, td); 3024 3025 if ((error = namei(&nd)) != 0) 3026 return (error); 3027 NDFREE(&nd, NDF_ONLY_PNBUF); 3028 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3029 vrele(nd.ni_vp); 3030 return (error); 3031 } 3032 3033 /* 3034 * Set ownership given a path name, do not cross symlinks. 3035 */ 3036 #ifndef _SYS_SYSPROTO_H_ 3037 struct lchown_args { 3038 char *path; 3039 int uid; 3040 int gid; 3041 }; 3042 #endif 3043 int 3044 sys_lchown(td, uap) 3045 struct thread *td; 3046 register struct lchown_args /* { 3047 char *path; 3048 int uid; 3049 int gid; 3050 } */ *uap; 3051 { 3052 3053 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3054 } 3055 3056 int 3057 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3058 int gid) 3059 { 3060 3061 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3062 AT_SYMLINK_NOFOLLOW)); 3063 } 3064 3065 /* 3066 * Set ownership given a file descriptor. 3067 */ 3068 #ifndef _SYS_SYSPROTO_H_ 3069 struct fchown_args { 3070 int fd; 3071 int uid; 3072 int gid; 3073 }; 3074 #endif 3075 int 3076 sys_fchown(td, uap) 3077 struct thread *td; 3078 register struct fchown_args /* { 3079 int fd; 3080 int uid; 3081 int gid; 3082 } */ *uap; 3083 { 3084 struct file *fp; 3085 int error; 3086 3087 AUDIT_ARG_FD(uap->fd); 3088 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3089 error = fget(td, uap->fd, CAP_FCHOWN, &fp); 3090 if (error != 0) 3091 return (error); 3092 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3093 fdrop(fp, td); 3094 return (error); 3095 } 3096 3097 /* 3098 * Common implementation code for utimes(), lutimes(), and futimes(). 3099 */ 3100 static int 3101 getutimes(usrtvp, tvpseg, tsp) 3102 const struct timeval *usrtvp; 3103 enum uio_seg tvpseg; 3104 struct timespec *tsp; 3105 { 3106 struct timeval tv[2]; 3107 const struct timeval *tvp; 3108 int error; 3109 3110 if (usrtvp == NULL) { 3111 vfs_timestamp(&tsp[0]); 3112 tsp[1] = tsp[0]; 3113 } else { 3114 if (tvpseg == UIO_SYSSPACE) { 3115 tvp = usrtvp; 3116 } else { 3117 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3118 return (error); 3119 tvp = tv; 3120 } 3121 3122 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3123 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3124 return (EINVAL); 3125 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3126 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3127 } 3128 return (0); 3129 } 3130 3131 /* 3132 * Common implementation code for utimes(), lutimes(), and futimes(). 3133 */ 3134 static int 3135 setutimes(td, vp, ts, numtimes, nullflag) 3136 struct thread *td; 3137 struct vnode *vp; 3138 const struct timespec *ts; 3139 int numtimes; 3140 int nullflag; 3141 { 3142 int error, setbirthtime; 3143 struct mount *mp; 3144 struct vattr vattr; 3145 3146 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3147 return (error); 3148 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3149 setbirthtime = 0; 3150 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3151 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3152 setbirthtime = 1; 3153 VATTR_NULL(&vattr); 3154 vattr.va_atime = ts[0]; 3155 vattr.va_mtime = ts[1]; 3156 if (setbirthtime) 3157 vattr.va_birthtime = ts[1]; 3158 if (numtimes > 2) 3159 vattr.va_birthtime = ts[2]; 3160 if (nullflag) 3161 vattr.va_vaflags |= VA_UTIMES_NULL; 3162 #ifdef MAC 3163 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3164 vattr.va_mtime); 3165 #endif 3166 if (error == 0) 3167 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3168 VOP_UNLOCK(vp, 0); 3169 vn_finished_write(mp); 3170 return (error); 3171 } 3172 3173 /* 3174 * Set the access and modification times of a file. 3175 */ 3176 #ifndef _SYS_SYSPROTO_H_ 3177 struct utimes_args { 3178 char *path; 3179 struct timeval *tptr; 3180 }; 3181 #endif 3182 int 3183 sys_utimes(td, uap) 3184 struct thread *td; 3185 register struct utimes_args /* { 3186 char *path; 3187 struct timeval *tptr; 3188 } */ *uap; 3189 { 3190 3191 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3192 UIO_USERSPACE)); 3193 } 3194 3195 #ifndef _SYS_SYSPROTO_H_ 3196 struct futimesat_args { 3197 int fd; 3198 const char * path; 3199 const struct timeval * times; 3200 }; 3201 #endif 3202 int 3203 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3204 { 3205 3206 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3207 uap->times, UIO_USERSPACE)); 3208 } 3209 3210 int 3211 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3212 struct timeval *tptr, enum uio_seg tptrseg) 3213 { 3214 3215 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3216 } 3217 3218 int 3219 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3220 struct timeval *tptr, enum uio_seg tptrseg) 3221 { 3222 struct nameidata nd; 3223 struct timespec ts[2]; 3224 int error; 3225 3226 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3227 return (error); 3228 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3229 CAP_FUTIMES, td); 3230 3231 if ((error = namei(&nd)) != 0) 3232 return (error); 3233 NDFREE(&nd, NDF_ONLY_PNBUF); 3234 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3235 vrele(nd.ni_vp); 3236 return (error); 3237 } 3238 3239 /* 3240 * Set the access and modification times of a file. 3241 */ 3242 #ifndef _SYS_SYSPROTO_H_ 3243 struct lutimes_args { 3244 char *path; 3245 struct timeval *tptr; 3246 }; 3247 #endif 3248 int 3249 sys_lutimes(td, uap) 3250 struct thread *td; 3251 register struct lutimes_args /* { 3252 char *path; 3253 struct timeval *tptr; 3254 } */ *uap; 3255 { 3256 3257 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3258 UIO_USERSPACE)); 3259 } 3260 3261 int 3262 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3263 struct timeval *tptr, enum uio_seg tptrseg) 3264 { 3265 struct timespec ts[2]; 3266 int error; 3267 struct nameidata nd; 3268 3269 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3270 return (error); 3271 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3272 if ((error = namei(&nd)) != 0) 3273 return (error); 3274 NDFREE(&nd, NDF_ONLY_PNBUF); 3275 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3276 vrele(nd.ni_vp); 3277 return (error); 3278 } 3279 3280 /* 3281 * Set the access and modification times of a file. 3282 */ 3283 #ifndef _SYS_SYSPROTO_H_ 3284 struct futimes_args { 3285 int fd; 3286 struct timeval *tptr; 3287 }; 3288 #endif 3289 int 3290 sys_futimes(td, uap) 3291 struct thread *td; 3292 register struct futimes_args /* { 3293 int fd; 3294 struct timeval *tptr; 3295 } */ *uap; 3296 { 3297 3298 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3299 } 3300 3301 int 3302 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3303 enum uio_seg tptrseg) 3304 { 3305 struct timespec ts[2]; 3306 struct file *fp; 3307 int error; 3308 3309 AUDIT_ARG_FD(fd); 3310 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3311 return (error); 3312 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp)) != 0) 3313 return (error); 3314 #ifdef AUDIT 3315 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3316 AUDIT_ARG_VNODE1(fp->f_vnode); 3317 VOP_UNLOCK(fp->f_vnode, 0); 3318 #endif 3319 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3320 fdrop(fp, td); 3321 return (error); 3322 } 3323 3324 /* 3325 * Truncate a file given its path name. 3326 */ 3327 #ifndef _SYS_SYSPROTO_H_ 3328 struct truncate_args { 3329 char *path; 3330 int pad; 3331 off_t length; 3332 }; 3333 #endif 3334 int 3335 sys_truncate(td, uap) 3336 struct thread *td; 3337 register struct truncate_args /* { 3338 char *path; 3339 int pad; 3340 off_t length; 3341 } */ *uap; 3342 { 3343 3344 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3345 } 3346 3347 int 3348 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3349 { 3350 struct mount *mp; 3351 struct vnode *vp; 3352 void *rl_cookie; 3353 struct vattr vattr; 3354 struct nameidata nd; 3355 int error; 3356 3357 if (length < 0) 3358 return(EINVAL); 3359 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3360 if ((error = namei(&nd)) != 0) 3361 return (error); 3362 vp = nd.ni_vp; 3363 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3364 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3365 vn_rangelock_unlock(vp, rl_cookie); 3366 vrele(vp); 3367 return (error); 3368 } 3369 NDFREE(&nd, NDF_ONLY_PNBUF); 3370 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3371 if (vp->v_type == VDIR) 3372 error = EISDIR; 3373 #ifdef MAC 3374 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3375 } 3376 #endif 3377 else if ((error = vn_writechk(vp)) == 0 && 3378 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3379 VATTR_NULL(&vattr); 3380 vattr.va_size = length; 3381 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3382 } 3383 VOP_UNLOCK(vp, 0); 3384 vn_finished_write(mp); 3385 vn_rangelock_unlock(vp, rl_cookie); 3386 vrele(vp); 3387 return (error); 3388 } 3389 3390 #if defined(COMPAT_43) 3391 /* 3392 * Truncate a file given its path name. 3393 */ 3394 #ifndef _SYS_SYSPROTO_H_ 3395 struct otruncate_args { 3396 char *path; 3397 long length; 3398 }; 3399 #endif 3400 int 3401 otruncate(td, uap) 3402 struct thread *td; 3403 register struct otruncate_args /* { 3404 char *path; 3405 long length; 3406 } */ *uap; 3407 { 3408 struct truncate_args /* { 3409 char *path; 3410 int pad; 3411 off_t length; 3412 } */ nuap; 3413 3414 nuap.path = uap->path; 3415 nuap.length = uap->length; 3416 return (sys_truncate(td, &nuap)); 3417 } 3418 #endif /* COMPAT_43 */ 3419 3420 /* Versions with the pad argument */ 3421 int 3422 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3423 { 3424 struct truncate_args ouap; 3425 3426 ouap.path = uap->path; 3427 ouap.length = uap->length; 3428 return (sys_truncate(td, &ouap)); 3429 } 3430 3431 int 3432 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3433 { 3434 struct ftruncate_args ouap; 3435 3436 ouap.fd = uap->fd; 3437 ouap.length = uap->length; 3438 return (sys_ftruncate(td, &ouap)); 3439 } 3440 3441 /* 3442 * Sync an open file. 3443 */ 3444 #ifndef _SYS_SYSPROTO_H_ 3445 struct fsync_args { 3446 int fd; 3447 }; 3448 #endif 3449 int 3450 sys_fsync(td, uap) 3451 struct thread *td; 3452 struct fsync_args /* { 3453 int fd; 3454 } */ *uap; 3455 { 3456 struct vnode *vp; 3457 struct mount *mp; 3458 struct file *fp; 3459 int error, lock_flags; 3460 3461 AUDIT_ARG_FD(uap->fd); 3462 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC, &fp)) != 0) 3463 return (error); 3464 vp = fp->f_vnode; 3465 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3466 goto drop; 3467 if (MNT_SHARED_WRITES(mp) || 3468 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3469 lock_flags = LK_SHARED; 3470 } else { 3471 lock_flags = LK_EXCLUSIVE; 3472 } 3473 vn_lock(vp, lock_flags | LK_RETRY); 3474 AUDIT_ARG_VNODE1(vp); 3475 if (vp->v_object != NULL) { 3476 VM_OBJECT_WLOCK(vp->v_object); 3477 vm_object_page_clean(vp->v_object, 0, 0, 0); 3478 VM_OBJECT_WUNLOCK(vp->v_object); 3479 } 3480 error = VOP_FSYNC(vp, MNT_WAIT, td); 3481 3482 VOP_UNLOCK(vp, 0); 3483 vn_finished_write(mp); 3484 drop: 3485 fdrop(fp, td); 3486 return (error); 3487 } 3488 3489 /* 3490 * Rename files. Source and destination must either both be directories, or 3491 * both not be directories. If target is a directory, it must be empty. 3492 */ 3493 #ifndef _SYS_SYSPROTO_H_ 3494 struct rename_args { 3495 char *from; 3496 char *to; 3497 }; 3498 #endif 3499 int 3500 sys_rename(td, uap) 3501 struct thread *td; 3502 register struct rename_args /* { 3503 char *from; 3504 char *to; 3505 } */ *uap; 3506 { 3507 3508 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3509 } 3510 3511 #ifndef _SYS_SYSPROTO_H_ 3512 struct renameat_args { 3513 int oldfd; 3514 char *old; 3515 int newfd; 3516 char *new; 3517 }; 3518 #endif 3519 int 3520 sys_renameat(struct thread *td, struct renameat_args *uap) 3521 { 3522 3523 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3524 UIO_USERSPACE)); 3525 } 3526 3527 int 3528 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3529 { 3530 3531 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3532 } 3533 3534 int 3535 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3536 enum uio_seg pathseg) 3537 { 3538 struct mount *mp = NULL; 3539 struct vnode *tvp, *fvp, *tdvp; 3540 struct nameidata fromnd, tond; 3541 int error; 3542 3543 bwillwrite(); 3544 #ifdef MAC 3545 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3546 AUDITVNODE1, pathseg, old, oldfd, CAP_RENAMEAT, td); 3547 #else 3548 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3549 pathseg, old, oldfd, CAP_RENAMEAT, td); 3550 #endif 3551 3552 if ((error = namei(&fromnd)) != 0) 3553 return (error); 3554 #ifdef MAC 3555 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3556 fromnd.ni_vp, &fromnd.ni_cnd); 3557 VOP_UNLOCK(fromnd.ni_dvp, 0); 3558 if (fromnd.ni_dvp != fromnd.ni_vp) 3559 VOP_UNLOCK(fromnd.ni_vp, 0); 3560 #endif 3561 fvp = fromnd.ni_vp; 3562 if (error == 0) 3563 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); 3564 if (error != 0) { 3565 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3566 vrele(fromnd.ni_dvp); 3567 vrele(fvp); 3568 goto out1; 3569 } 3570 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3571 SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_LINKAT, td); 3572 if (fromnd.ni_vp->v_type == VDIR) 3573 tond.ni_cnd.cn_flags |= WILLBEDIR; 3574 if ((error = namei(&tond)) != 0) { 3575 /* Translate error code for rename("dir1", "dir2/."). */ 3576 if (error == EISDIR && fvp->v_type == VDIR) 3577 error = EINVAL; 3578 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3579 vrele(fromnd.ni_dvp); 3580 vrele(fvp); 3581 vn_finished_write(mp); 3582 goto out1; 3583 } 3584 tdvp = tond.ni_dvp; 3585 tvp = tond.ni_vp; 3586 if (tvp != NULL) { 3587 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3588 error = ENOTDIR; 3589 goto out; 3590 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3591 error = EISDIR; 3592 goto out; 3593 } 3594 #ifdef CAPABILITIES 3595 if (newfd != AT_FDCWD) { 3596 /* 3597 * If the target already exists we require CAP_UNLINKAT 3598 * from 'newfd'. 3599 */ 3600 error = cap_check(tond.ni_filecaps.fc_rights, 3601 CAP_UNLINKAT); 3602 if (error != 0) 3603 goto out; 3604 } 3605 #endif 3606 } 3607 if (fvp == tdvp) { 3608 error = EINVAL; 3609 goto out; 3610 } 3611 /* 3612 * If the source is the same as the destination (that is, if they 3613 * are links to the same vnode), then there is nothing to do. 3614 */ 3615 if (fvp == tvp) 3616 error = -1; 3617 #ifdef MAC 3618 else 3619 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3620 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3621 #endif 3622 out: 3623 if (!error) { 3624 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3625 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3626 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3627 NDFREE(&tond, NDF_ONLY_PNBUF); 3628 } else { 3629 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3630 NDFREE(&tond, NDF_ONLY_PNBUF); 3631 if (tvp) 3632 vput(tvp); 3633 if (tdvp == tvp) 3634 vrele(tdvp); 3635 else 3636 vput(tdvp); 3637 vrele(fromnd.ni_dvp); 3638 vrele(fvp); 3639 } 3640 vrele(tond.ni_startdir); 3641 vn_finished_write(mp); 3642 out1: 3643 if (fromnd.ni_startdir) 3644 vrele(fromnd.ni_startdir); 3645 if (error == -1) 3646 return (0); 3647 return (error); 3648 } 3649 3650 /* 3651 * Make a directory file. 3652 */ 3653 #ifndef _SYS_SYSPROTO_H_ 3654 struct mkdir_args { 3655 char *path; 3656 int mode; 3657 }; 3658 #endif 3659 int 3660 sys_mkdir(td, uap) 3661 struct thread *td; 3662 register struct mkdir_args /* { 3663 char *path; 3664 int mode; 3665 } */ *uap; 3666 { 3667 3668 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3669 } 3670 3671 #ifndef _SYS_SYSPROTO_H_ 3672 struct mkdirat_args { 3673 int fd; 3674 char *path; 3675 mode_t mode; 3676 }; 3677 #endif 3678 int 3679 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3680 { 3681 3682 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3683 } 3684 3685 int 3686 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3687 { 3688 3689 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3690 } 3691 3692 int 3693 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3694 int mode) 3695 { 3696 struct mount *mp; 3697 struct vnode *vp; 3698 struct vattr vattr; 3699 int error; 3700 struct nameidata nd; 3701 3702 AUDIT_ARG_MODE(mode); 3703 restart: 3704 bwillwrite(); 3705 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 3706 segflg, path, fd, CAP_MKDIRAT, td); 3707 nd.ni_cnd.cn_flags |= WILLBEDIR; 3708 if ((error = namei(&nd)) != 0) 3709 return (error); 3710 vp = nd.ni_vp; 3711 if (vp != NULL) { 3712 NDFREE(&nd, NDF_ONLY_PNBUF); 3713 /* 3714 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3715 * the strange behaviour of leaving the vnode unlocked 3716 * if the target is the same vnode as the parent. 3717 */ 3718 if (vp == nd.ni_dvp) 3719 vrele(nd.ni_dvp); 3720 else 3721 vput(nd.ni_dvp); 3722 vrele(vp); 3723 return (EEXIST); 3724 } 3725 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3726 NDFREE(&nd, NDF_ONLY_PNBUF); 3727 vput(nd.ni_dvp); 3728 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3729 return (error); 3730 goto restart; 3731 } 3732 VATTR_NULL(&vattr); 3733 vattr.va_type = VDIR; 3734 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3735 #ifdef MAC 3736 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3737 &vattr); 3738 if (error) 3739 goto out; 3740 #endif 3741 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3742 #ifdef MAC 3743 out: 3744 #endif 3745 NDFREE(&nd, NDF_ONLY_PNBUF); 3746 vput(nd.ni_dvp); 3747 if (!error) 3748 vput(nd.ni_vp); 3749 vn_finished_write(mp); 3750 return (error); 3751 } 3752 3753 /* 3754 * Remove a directory file. 3755 */ 3756 #ifndef _SYS_SYSPROTO_H_ 3757 struct rmdir_args { 3758 char *path; 3759 }; 3760 #endif 3761 int 3762 sys_rmdir(td, uap) 3763 struct thread *td; 3764 struct rmdir_args /* { 3765 char *path; 3766 } */ *uap; 3767 { 3768 3769 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3770 } 3771 3772 int 3773 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3774 { 3775 3776 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3777 } 3778 3779 int 3780 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3781 { 3782 struct mount *mp; 3783 struct vnode *vp; 3784 int error; 3785 struct nameidata nd; 3786 3787 restart: 3788 bwillwrite(); 3789 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3790 pathseg, path, fd, CAP_UNLINKAT, td); 3791 if ((error = namei(&nd)) != 0) 3792 return (error); 3793 vp = nd.ni_vp; 3794 if (vp->v_type != VDIR) { 3795 error = ENOTDIR; 3796 goto out; 3797 } 3798 /* 3799 * No rmdir "." please. 3800 */ 3801 if (nd.ni_dvp == vp) { 3802 error = EINVAL; 3803 goto out; 3804 } 3805 /* 3806 * The root of a mounted filesystem cannot be deleted. 3807 */ 3808 if (vp->v_vflag & VV_ROOT) { 3809 error = EBUSY; 3810 goto out; 3811 } 3812 #ifdef MAC 3813 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3814 &nd.ni_cnd); 3815 if (error) 3816 goto out; 3817 #endif 3818 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3819 NDFREE(&nd, NDF_ONLY_PNBUF); 3820 vput(vp); 3821 if (nd.ni_dvp == vp) 3822 vrele(nd.ni_dvp); 3823 else 3824 vput(nd.ni_dvp); 3825 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3826 return (error); 3827 goto restart; 3828 } 3829 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3830 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3831 vn_finished_write(mp); 3832 out: 3833 NDFREE(&nd, NDF_ONLY_PNBUF); 3834 vput(vp); 3835 if (nd.ni_dvp == vp) 3836 vrele(nd.ni_dvp); 3837 else 3838 vput(nd.ni_dvp); 3839 return (error); 3840 } 3841 3842 #ifdef COMPAT_43 3843 /* 3844 * Read a block of directory entries in a filesystem independent format. 3845 */ 3846 #ifndef _SYS_SYSPROTO_H_ 3847 struct ogetdirentries_args { 3848 int fd; 3849 char *buf; 3850 u_int count; 3851 long *basep; 3852 }; 3853 #endif 3854 int 3855 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3856 { 3857 long loff; 3858 int error; 3859 3860 error = kern_ogetdirentries(td, uap, &loff); 3861 if (error == 0) 3862 error = copyout(&loff, uap->basep, sizeof(long)); 3863 return (error); 3864 } 3865 3866 int 3867 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3868 long *ploff) 3869 { 3870 struct vnode *vp; 3871 struct file *fp; 3872 struct uio auio, kuio; 3873 struct iovec aiov, kiov; 3874 struct dirent *dp, *edp; 3875 caddr_t dirbuf; 3876 int error, eofflag, readcnt; 3877 long loff; 3878 off_t foffset; 3879 3880 /* XXX arbitrary sanity limit on `count'. */ 3881 if (uap->count > 64 * 1024) 3882 return (EINVAL); 3883 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0) 3884 return (error); 3885 if ((fp->f_flag & FREAD) == 0) { 3886 fdrop(fp, td); 3887 return (EBADF); 3888 } 3889 vp = fp->f_vnode; 3890 foffset = foffset_lock(fp, 0); 3891 unionread: 3892 if (vp->v_type != VDIR) { 3893 foffset_unlock(fp, foffset, 0); 3894 fdrop(fp, td); 3895 return (EINVAL); 3896 } 3897 aiov.iov_base = uap->buf; 3898 aiov.iov_len = uap->count; 3899 auio.uio_iov = &aiov; 3900 auio.uio_iovcnt = 1; 3901 auio.uio_rw = UIO_READ; 3902 auio.uio_segflg = UIO_USERSPACE; 3903 auio.uio_td = td; 3904 auio.uio_resid = uap->count; 3905 vn_lock(vp, LK_SHARED | LK_RETRY); 3906 loff = auio.uio_offset = foffset; 3907 #ifdef MAC 3908 error = mac_vnode_check_readdir(td->td_ucred, vp); 3909 if (error) { 3910 VOP_UNLOCK(vp, 0); 3911 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3912 fdrop(fp, td); 3913 return (error); 3914 } 3915 #endif 3916 # if (BYTE_ORDER != LITTLE_ENDIAN) 3917 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3918 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3919 NULL, NULL); 3920 foffset = auio.uio_offset; 3921 } else 3922 # endif 3923 { 3924 kuio = auio; 3925 kuio.uio_iov = &kiov; 3926 kuio.uio_segflg = UIO_SYSSPACE; 3927 kiov.iov_len = uap->count; 3928 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3929 kiov.iov_base = dirbuf; 3930 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3931 NULL, NULL); 3932 foffset = kuio.uio_offset; 3933 if (error == 0) { 3934 readcnt = uap->count - kuio.uio_resid; 3935 edp = (struct dirent *)&dirbuf[readcnt]; 3936 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3937 # if (BYTE_ORDER == LITTLE_ENDIAN) 3938 /* 3939 * The expected low byte of 3940 * dp->d_namlen is our dp->d_type. 3941 * The high MBZ byte of dp->d_namlen 3942 * is our dp->d_namlen. 3943 */ 3944 dp->d_type = dp->d_namlen; 3945 dp->d_namlen = 0; 3946 # else 3947 /* 3948 * The dp->d_type is the high byte 3949 * of the expected dp->d_namlen, 3950 * so must be zero'ed. 3951 */ 3952 dp->d_type = 0; 3953 # endif 3954 if (dp->d_reclen > 0) { 3955 dp = (struct dirent *) 3956 ((char *)dp + dp->d_reclen); 3957 } else { 3958 error = EIO; 3959 break; 3960 } 3961 } 3962 if (dp >= edp) 3963 error = uiomove(dirbuf, readcnt, &auio); 3964 } 3965 free(dirbuf, M_TEMP); 3966 } 3967 if (error) { 3968 VOP_UNLOCK(vp, 0); 3969 foffset_unlock(fp, foffset, 0); 3970 fdrop(fp, td); 3971 return (error); 3972 } 3973 if (uap->count == auio.uio_resid && 3974 (vp->v_vflag & VV_ROOT) && 3975 (vp->v_mount->mnt_flag & MNT_UNION)) { 3976 struct vnode *tvp = vp; 3977 vp = vp->v_mount->mnt_vnodecovered; 3978 VREF(vp); 3979 fp->f_vnode = vp; 3980 fp->f_data = vp; 3981 foffset = 0; 3982 vput(tvp); 3983 goto unionread; 3984 } 3985 VOP_UNLOCK(vp, 0); 3986 foffset_unlock(fp, foffset, 0); 3987 fdrop(fp, td); 3988 td->td_retval[0] = uap->count - auio.uio_resid; 3989 if (error == 0) 3990 *ploff = loff; 3991 return (error); 3992 } 3993 #endif /* COMPAT_43 */ 3994 3995 /* 3996 * Read a block of directory entries in a filesystem independent format. 3997 */ 3998 #ifndef _SYS_SYSPROTO_H_ 3999 struct getdirentries_args { 4000 int fd; 4001 char *buf; 4002 u_int count; 4003 long *basep; 4004 }; 4005 #endif 4006 int 4007 sys_getdirentries(td, uap) 4008 struct thread *td; 4009 register struct getdirentries_args /* { 4010 int fd; 4011 char *buf; 4012 u_int count; 4013 long *basep; 4014 } */ *uap; 4015 { 4016 long base; 4017 int error; 4018 4019 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4020 NULL, UIO_USERSPACE); 4021 if (error) 4022 return (error); 4023 if (uap->basep != NULL) 4024 error = copyout(&base, uap->basep, sizeof(long)); 4025 return (error); 4026 } 4027 4028 int 4029 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4030 long *basep, ssize_t *residp, enum uio_seg bufseg) 4031 { 4032 struct vnode *vp; 4033 struct file *fp; 4034 struct uio auio; 4035 struct iovec aiov; 4036 long loff; 4037 int error, eofflag; 4038 off_t foffset; 4039 4040 AUDIT_ARG_FD(fd); 4041 if (count > IOSIZE_MAX) 4042 return (EINVAL); 4043 auio.uio_resid = count; 4044 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ, &fp)) != 0) 4045 return (error); 4046 if ((fp->f_flag & FREAD) == 0) { 4047 fdrop(fp, td); 4048 return (EBADF); 4049 } 4050 vp = fp->f_vnode; 4051 foffset = foffset_lock(fp, 0); 4052 unionread: 4053 if (vp->v_type != VDIR) { 4054 error = EINVAL; 4055 goto fail; 4056 } 4057 aiov.iov_base = buf; 4058 aiov.iov_len = count; 4059 auio.uio_iov = &aiov; 4060 auio.uio_iovcnt = 1; 4061 auio.uio_rw = UIO_READ; 4062 auio.uio_segflg = bufseg; 4063 auio.uio_td = td; 4064 vn_lock(vp, LK_SHARED | LK_RETRY); 4065 AUDIT_ARG_VNODE1(vp); 4066 loff = auio.uio_offset = foffset; 4067 #ifdef MAC 4068 error = mac_vnode_check_readdir(td->td_ucred, vp); 4069 if (error == 0) 4070 #endif 4071 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4072 NULL); 4073 foffset = auio.uio_offset; 4074 if (error) { 4075 VOP_UNLOCK(vp, 0); 4076 goto fail; 4077 } 4078 if (count == auio.uio_resid && 4079 (vp->v_vflag & VV_ROOT) && 4080 (vp->v_mount->mnt_flag & MNT_UNION)) { 4081 struct vnode *tvp = vp; 4082 vp = vp->v_mount->mnt_vnodecovered; 4083 VREF(vp); 4084 fp->f_vnode = vp; 4085 fp->f_data = vp; 4086 foffset = 0; 4087 vput(tvp); 4088 goto unionread; 4089 } 4090 VOP_UNLOCK(vp, 0); 4091 *basep = loff; 4092 if (residp != NULL) 4093 *residp = auio.uio_resid; 4094 td->td_retval[0] = count - auio.uio_resid; 4095 fail: 4096 foffset_unlock(fp, foffset, 0); 4097 fdrop(fp, td); 4098 return (error); 4099 } 4100 4101 #ifndef _SYS_SYSPROTO_H_ 4102 struct getdents_args { 4103 int fd; 4104 char *buf; 4105 size_t count; 4106 }; 4107 #endif 4108 int 4109 sys_getdents(td, uap) 4110 struct thread *td; 4111 register struct getdents_args /* { 4112 int fd; 4113 char *buf; 4114 u_int count; 4115 } */ *uap; 4116 { 4117 struct getdirentries_args ap; 4118 ap.fd = uap->fd; 4119 ap.buf = uap->buf; 4120 ap.count = uap->count; 4121 ap.basep = NULL; 4122 return (sys_getdirentries(td, &ap)); 4123 } 4124 4125 /* 4126 * Set the mode mask for creation of filesystem nodes. 4127 */ 4128 #ifndef _SYS_SYSPROTO_H_ 4129 struct umask_args { 4130 int newmask; 4131 }; 4132 #endif 4133 int 4134 sys_umask(td, uap) 4135 struct thread *td; 4136 struct umask_args /* { 4137 int newmask; 4138 } */ *uap; 4139 { 4140 register struct filedesc *fdp; 4141 4142 FILEDESC_XLOCK(td->td_proc->p_fd); 4143 fdp = td->td_proc->p_fd; 4144 td->td_retval[0] = fdp->fd_cmask; 4145 fdp->fd_cmask = uap->newmask & ALLPERMS; 4146 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4147 return (0); 4148 } 4149 4150 /* 4151 * Void all references to file by ripping underlying filesystem away from 4152 * vnode. 4153 */ 4154 #ifndef _SYS_SYSPROTO_H_ 4155 struct revoke_args { 4156 char *path; 4157 }; 4158 #endif 4159 int 4160 sys_revoke(td, uap) 4161 struct thread *td; 4162 register struct revoke_args /* { 4163 char *path; 4164 } */ *uap; 4165 { 4166 struct vnode *vp; 4167 struct vattr vattr; 4168 int error; 4169 struct nameidata nd; 4170 4171 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4172 uap->path, td); 4173 if ((error = namei(&nd)) != 0) 4174 return (error); 4175 vp = nd.ni_vp; 4176 NDFREE(&nd, NDF_ONLY_PNBUF); 4177 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4178 error = EINVAL; 4179 goto out; 4180 } 4181 #ifdef MAC 4182 error = mac_vnode_check_revoke(td->td_ucred, vp); 4183 if (error) 4184 goto out; 4185 #endif 4186 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4187 if (error) 4188 goto out; 4189 if (td->td_ucred->cr_uid != vattr.va_uid) { 4190 error = priv_check(td, PRIV_VFS_ADMIN); 4191 if (error) 4192 goto out; 4193 } 4194 if (vcount(vp) > 1) 4195 VOP_REVOKE(vp, REVOKEALL); 4196 out: 4197 vput(vp); 4198 return (error); 4199 } 4200 4201 /* 4202 * Convert a user file descriptor to a kernel file entry and check that, if it 4203 * is a capability, the correct rights are present. A reference on the file 4204 * entry is held upon returning. 4205 */ 4206 int 4207 getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp) 4208 { 4209 struct file *fp; 4210 int error; 4211 4212 error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); 4213 if (error != 0) 4214 return (error); 4215 4216 /* 4217 * The file could be not of the vnode type, or it may be not 4218 * yet fully initialized, in which case the f_vnode pointer 4219 * may be set, but f_ops is still badfileops. E.g., 4220 * devfs_open() transiently create such situation to 4221 * facilitate csw d_fdopen(). 4222 * 4223 * Dupfdopen() handling in kern_openat() installs the 4224 * half-baked file into the process descriptor table, allowing 4225 * other thread to dereference it. Guard against the race by 4226 * checking f_ops. 4227 */ 4228 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4229 fdrop(fp, curthread); 4230 return (EINVAL); 4231 } 4232 *fpp = fp; 4233 return (0); 4234 } 4235 4236 4237 /* 4238 * Get an (NFS) file handle. 4239 */ 4240 #ifndef _SYS_SYSPROTO_H_ 4241 struct lgetfh_args { 4242 char *fname; 4243 fhandle_t *fhp; 4244 }; 4245 #endif 4246 int 4247 sys_lgetfh(td, uap) 4248 struct thread *td; 4249 register struct lgetfh_args *uap; 4250 { 4251 struct nameidata nd; 4252 fhandle_t fh; 4253 register struct vnode *vp; 4254 int error; 4255 4256 error = priv_check(td, PRIV_VFS_GETFH); 4257 if (error) 4258 return (error); 4259 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4260 uap->fname, td); 4261 error = namei(&nd); 4262 if (error) 4263 return (error); 4264 NDFREE(&nd, NDF_ONLY_PNBUF); 4265 vp = nd.ni_vp; 4266 bzero(&fh, sizeof(fh)); 4267 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4268 error = VOP_VPTOFH(vp, &fh.fh_fid); 4269 vput(vp); 4270 if (error) 4271 return (error); 4272 error = copyout(&fh, uap->fhp, sizeof (fh)); 4273 return (error); 4274 } 4275 4276 #ifndef _SYS_SYSPROTO_H_ 4277 struct getfh_args { 4278 char *fname; 4279 fhandle_t *fhp; 4280 }; 4281 #endif 4282 int 4283 sys_getfh(td, uap) 4284 struct thread *td; 4285 register struct getfh_args *uap; 4286 { 4287 struct nameidata nd; 4288 fhandle_t fh; 4289 register struct vnode *vp; 4290 int error; 4291 4292 error = priv_check(td, PRIV_VFS_GETFH); 4293 if (error) 4294 return (error); 4295 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4296 uap->fname, td); 4297 error = namei(&nd); 4298 if (error) 4299 return (error); 4300 NDFREE(&nd, NDF_ONLY_PNBUF); 4301 vp = nd.ni_vp; 4302 bzero(&fh, sizeof(fh)); 4303 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4304 error = VOP_VPTOFH(vp, &fh.fh_fid); 4305 vput(vp); 4306 if (error) 4307 return (error); 4308 error = copyout(&fh, uap->fhp, sizeof (fh)); 4309 return (error); 4310 } 4311 4312 /* 4313 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4314 * open descriptor. 4315 * 4316 * warning: do not remove the priv_check() call or this becomes one giant 4317 * security hole. 4318 */ 4319 #ifndef _SYS_SYSPROTO_H_ 4320 struct fhopen_args { 4321 const struct fhandle *u_fhp; 4322 int flags; 4323 }; 4324 #endif 4325 int 4326 sys_fhopen(td, uap) 4327 struct thread *td; 4328 struct fhopen_args /* { 4329 const struct fhandle *u_fhp; 4330 int flags; 4331 } */ *uap; 4332 { 4333 struct mount *mp; 4334 struct vnode *vp; 4335 struct fhandle fhp; 4336 struct file *fp; 4337 int fmode, error; 4338 int indx; 4339 4340 error = priv_check(td, PRIV_VFS_FHOPEN); 4341 if (error) 4342 return (error); 4343 indx = -1; 4344 fmode = FFLAGS(uap->flags); 4345 /* why not allow a non-read/write open for our lockd? */ 4346 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4347 return (EINVAL); 4348 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4349 if (error) 4350 return(error); 4351 /* find the mount point */ 4352 mp = vfs_busyfs(&fhp.fh_fsid); 4353 if (mp == NULL) 4354 return (ESTALE); 4355 /* now give me my vnode, it gets returned to me locked */ 4356 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4357 vfs_unbusy(mp); 4358 if (error) 4359 return (error); 4360 4361 error = falloc_noinstall(td, &fp); 4362 if (error) { 4363 vput(vp); 4364 return (error); 4365 } 4366 /* 4367 * An extra reference on `fp' has been held for us by 4368 * falloc_noinstall(). 4369 */ 4370 4371 #ifdef INVARIANTS 4372 td->td_dupfd = -1; 4373 #endif 4374 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4375 if (error) { 4376 KASSERT(fp->f_ops == &badfileops, 4377 ("VOP_OPEN in fhopen() set f_ops")); 4378 KASSERT(td->td_dupfd < 0, 4379 ("fhopen() encountered fdopen()")); 4380 4381 vput(vp); 4382 goto bad; 4383 } 4384 #ifdef INVARIANTS 4385 td->td_dupfd = 0; 4386 #endif 4387 fp->f_vnode = vp; 4388 fp->f_seqcount = 1; 4389 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4390 &vnops); 4391 VOP_UNLOCK(vp, 0); 4392 if (fmode & O_TRUNC) { 4393 error = fo_truncate(fp, 0, td->td_ucred, td); 4394 if (error) 4395 goto bad; 4396 } 4397 4398 error = finstall(td, fp, &indx, fmode, NULL); 4399 bad: 4400 fdrop(fp, td); 4401 td->td_retval[0] = indx; 4402 return (error); 4403 } 4404 4405 /* 4406 * Stat an (NFS) file handle. 4407 */ 4408 #ifndef _SYS_SYSPROTO_H_ 4409 struct fhstat_args { 4410 struct fhandle *u_fhp; 4411 struct stat *sb; 4412 }; 4413 #endif 4414 int 4415 sys_fhstat(td, uap) 4416 struct thread *td; 4417 register struct fhstat_args /* { 4418 struct fhandle *u_fhp; 4419 struct stat *sb; 4420 } */ *uap; 4421 { 4422 struct stat sb; 4423 struct fhandle fh; 4424 int error; 4425 4426 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4427 if (error != 0) 4428 return (error); 4429 error = kern_fhstat(td, fh, &sb); 4430 if (error != 0) 4431 return (error); 4432 error = copyout(&sb, uap->sb, sizeof(sb)); 4433 return (error); 4434 } 4435 4436 int 4437 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4438 { 4439 struct mount *mp; 4440 struct vnode *vp; 4441 int error; 4442 4443 error = priv_check(td, PRIV_VFS_FHSTAT); 4444 if (error) 4445 return (error); 4446 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4447 return (ESTALE); 4448 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4449 vfs_unbusy(mp); 4450 if (error) 4451 return (error); 4452 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4453 vput(vp); 4454 return (error); 4455 } 4456 4457 /* 4458 * Implement fstatfs() for (NFS) file handles. 4459 */ 4460 #ifndef _SYS_SYSPROTO_H_ 4461 struct fhstatfs_args { 4462 struct fhandle *u_fhp; 4463 struct statfs *buf; 4464 }; 4465 #endif 4466 int 4467 sys_fhstatfs(td, uap) 4468 struct thread *td; 4469 struct fhstatfs_args /* { 4470 struct fhandle *u_fhp; 4471 struct statfs *buf; 4472 } */ *uap; 4473 { 4474 struct statfs sf; 4475 fhandle_t fh; 4476 int error; 4477 4478 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4479 if (error) 4480 return (error); 4481 error = kern_fhstatfs(td, fh, &sf); 4482 if (error) 4483 return (error); 4484 return (copyout(&sf, uap->buf, sizeof(sf))); 4485 } 4486 4487 int 4488 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4489 { 4490 struct statfs *sp; 4491 struct mount *mp; 4492 struct vnode *vp; 4493 int error; 4494 4495 error = priv_check(td, PRIV_VFS_FHSTATFS); 4496 if (error) 4497 return (error); 4498 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4499 return (ESTALE); 4500 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4501 if (error) { 4502 vfs_unbusy(mp); 4503 return (error); 4504 } 4505 vput(vp); 4506 error = prison_canseemount(td->td_ucred, mp); 4507 if (error) 4508 goto out; 4509 #ifdef MAC 4510 error = mac_mount_check_stat(td->td_ucred, mp); 4511 if (error) 4512 goto out; 4513 #endif 4514 /* 4515 * Set these in case the underlying filesystem fails to do so. 4516 */ 4517 sp = &mp->mnt_stat; 4518 sp->f_version = STATFS_VERSION; 4519 sp->f_namemax = NAME_MAX; 4520 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4521 error = VFS_STATFS(mp, sp); 4522 if (error == 0) 4523 *buf = *sp; 4524 out: 4525 vfs_unbusy(mp); 4526 return (error); 4527 } 4528 4529 int 4530 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4531 { 4532 struct file *fp; 4533 struct mount *mp; 4534 struct vnode *vp; 4535 off_t olen, ooffset; 4536 int error; 4537 4538 fp = NULL; 4539 error = fget(td, fd, CAP_WRITE, &fp); 4540 if (error != 0) 4541 goto out; 4542 4543 switch (fp->f_type) { 4544 case DTYPE_VNODE: 4545 break; 4546 case DTYPE_PIPE: 4547 case DTYPE_FIFO: 4548 error = ESPIPE; 4549 goto out; 4550 default: 4551 error = ENODEV; 4552 goto out; 4553 } 4554 if ((fp->f_flag & FWRITE) == 0) { 4555 error = EBADF; 4556 goto out; 4557 } 4558 vp = fp->f_vnode; 4559 if (vp->v_type != VREG) { 4560 error = ENODEV; 4561 goto out; 4562 } 4563 if (offset < 0 || len <= 0) { 4564 error = EINVAL; 4565 goto out; 4566 } 4567 /* Check for wrap. */ 4568 if (offset > OFF_MAX - len) { 4569 error = EFBIG; 4570 goto out; 4571 } 4572 4573 /* Allocating blocks may take a long time, so iterate. */ 4574 for (;;) { 4575 olen = len; 4576 ooffset = offset; 4577 4578 bwillwrite(); 4579 mp = NULL; 4580 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4581 if (error != 0) 4582 break; 4583 error = vn_lock(vp, LK_EXCLUSIVE); 4584 if (error != 0) { 4585 vn_finished_write(mp); 4586 break; 4587 } 4588 #ifdef MAC 4589 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4590 if (error == 0) 4591 #endif 4592 error = VOP_ALLOCATE(vp, &offset, &len); 4593 VOP_UNLOCK(vp, 0); 4594 vn_finished_write(mp); 4595 4596 if (olen + ooffset != offset + len) { 4597 panic("offset + len changed from %jx/%jx to %jx/%jx", 4598 ooffset, olen, offset, len); 4599 } 4600 if (error != 0 || len == 0) 4601 break; 4602 KASSERT(olen > len, ("Iteration did not make progress?")); 4603 maybe_yield(); 4604 } 4605 out: 4606 if (fp != NULL) 4607 fdrop(fp, td); 4608 return (error); 4609 } 4610 4611 int 4612 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4613 { 4614 4615 return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len)); 4616 } 4617 4618 /* 4619 * Unlike madvise(2), we do not make a best effort to remember every 4620 * possible caching hint. Instead, we remember the last setting with 4621 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4622 * region of any current setting. 4623 */ 4624 int 4625 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4626 int advice) 4627 { 4628 struct fadvise_info *fa, *new; 4629 struct file *fp; 4630 struct vnode *vp; 4631 off_t end; 4632 int error; 4633 4634 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4635 return (EINVAL); 4636 switch (advice) { 4637 case POSIX_FADV_SEQUENTIAL: 4638 case POSIX_FADV_RANDOM: 4639 case POSIX_FADV_NOREUSE: 4640 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4641 break; 4642 case POSIX_FADV_NORMAL: 4643 case POSIX_FADV_WILLNEED: 4644 case POSIX_FADV_DONTNEED: 4645 new = NULL; 4646 break; 4647 default: 4648 return (EINVAL); 4649 } 4650 /* XXX: CAP_POSIX_FADVISE? */ 4651 error = fget(td, fd, CAP_NONE, &fp); 4652 if (error != 0) 4653 goto out; 4654 4655 switch (fp->f_type) { 4656 case DTYPE_VNODE: 4657 break; 4658 case DTYPE_PIPE: 4659 case DTYPE_FIFO: 4660 error = ESPIPE; 4661 goto out; 4662 default: 4663 error = ENODEV; 4664 goto out; 4665 } 4666 vp = fp->f_vnode; 4667 if (vp->v_type != VREG) { 4668 error = ENODEV; 4669 goto out; 4670 } 4671 if (len == 0) 4672 end = OFF_MAX; 4673 else 4674 end = offset + len - 1; 4675 switch (advice) { 4676 case POSIX_FADV_SEQUENTIAL: 4677 case POSIX_FADV_RANDOM: 4678 case POSIX_FADV_NOREUSE: 4679 /* 4680 * Try to merge any existing non-standard region with 4681 * this new region if possible, otherwise create a new 4682 * non-standard region for this request. 4683 */ 4684 mtx_pool_lock(mtxpool_sleep, fp); 4685 fa = fp->f_advice; 4686 if (fa != NULL && fa->fa_advice == advice && 4687 ((fa->fa_start <= end && fa->fa_end >= offset) || 4688 (end != OFF_MAX && fa->fa_start == end + 1) || 4689 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4690 if (offset < fa->fa_start) 4691 fa->fa_start = offset; 4692 if (end > fa->fa_end) 4693 fa->fa_end = end; 4694 } else { 4695 new->fa_advice = advice; 4696 new->fa_start = offset; 4697 new->fa_end = end; 4698 new->fa_prevstart = 0; 4699 new->fa_prevend = 0; 4700 fp->f_advice = new; 4701 new = fa; 4702 } 4703 mtx_pool_unlock(mtxpool_sleep, fp); 4704 break; 4705 case POSIX_FADV_NORMAL: 4706 /* 4707 * If a the "normal" region overlaps with an existing 4708 * non-standard region, trim or remove the 4709 * non-standard region. 4710 */ 4711 mtx_pool_lock(mtxpool_sleep, fp); 4712 fa = fp->f_advice; 4713 if (fa != NULL) { 4714 if (offset <= fa->fa_start && end >= fa->fa_end) { 4715 new = fa; 4716 fp->f_advice = NULL; 4717 } else if (offset <= fa->fa_start && 4718 end >= fa->fa_start) 4719 fa->fa_start = end + 1; 4720 else if (offset <= fa->fa_end && end >= fa->fa_end) 4721 fa->fa_end = offset - 1; 4722 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4723 /* 4724 * If the "normal" region is a middle 4725 * portion of the existing 4726 * non-standard region, just remove 4727 * the whole thing rather than picking 4728 * one side or the other to 4729 * preserve. 4730 */ 4731 new = fa; 4732 fp->f_advice = NULL; 4733 } 4734 } 4735 mtx_pool_unlock(mtxpool_sleep, fp); 4736 break; 4737 case POSIX_FADV_WILLNEED: 4738 case POSIX_FADV_DONTNEED: 4739 error = VOP_ADVISE(vp, offset, end, advice); 4740 break; 4741 } 4742 out: 4743 if (fp != NULL) 4744 fdrop(fp, td); 4745 free(new, M_FADVISE); 4746 return (error); 4747 } 4748 4749 int 4750 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4751 { 4752 4753 return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4754 uap->advice)); 4755 } 4756