1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_kdtrace.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capability.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE(vfs, , stat, mode, mode); 95 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *"); 96 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int"); 97 SDT_PROBE_DEFINE(vfs, , stat, reg, reg); 98 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *"); 99 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int"); 100 101 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 102 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 103 static int setfflags(struct thread *td, struct vnode *, int); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * The module initialization routine for POSIX asynchronous I/O will 111 * set this to the version of AIO that it implements. (Zero means 112 * that it is not implemented.) This value is used here by pathconf() 113 * and in kern_descrip.c by fpathconf(). 114 */ 115 int async_io_version; 116 117 #ifdef DEBUG 118 static int syncprt = 0; 119 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 120 #endif 121 122 /* 123 * Sync each mounted filesystem. 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct sync_args { 127 int dummy; 128 }; 129 #endif 130 /* ARGSUSED */ 131 int 132 sys_sync(td, uap) 133 struct thread *td; 134 struct sync_args *uap; 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_msync(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Change filesystem quotas. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct quotactl_args { 166 char *path; 167 int cmd; 168 int uid; 169 caddr_t arg; 170 }; 171 #endif 172 int 173 sys_quotactl(td, uap) 174 struct thread *td; 175 register struct quotactl_args /* { 176 char *path; 177 int cmd; 178 int uid; 179 caddr_t arg; 180 } */ *uap; 181 { 182 struct mount *mp; 183 int error; 184 struct nameidata nd; 185 186 AUDIT_ARG_CMD(uap->cmd); 187 AUDIT_ARG_UID(uap->uid); 188 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 189 return (EPERM); 190 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, 191 UIO_USERSPACE, uap->path, td); 192 if ((error = namei(&nd)) != 0) 193 return (error); 194 NDFREE(&nd, NDF_ONLY_PNBUF); 195 mp = nd.ni_vp->v_mount; 196 vfs_ref(mp); 197 vput(nd.ni_vp); 198 error = vfs_busy(mp, 0); 199 vfs_rel(mp); 200 if (error) 201 return (error); 202 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 203 204 /* 205 * Since quota on operation typically needs to open quota 206 * file, the Q_QUOTAON handler needs to unbusy the mount point 207 * before calling into namei. Otherwise, unmount might be 208 * started between two vfs_busy() invocations (first is our, 209 * second is from mount point cross-walk code in lookup()), 210 * causing deadlock. 211 * 212 * Require that Q_QUOTAON handles the vfs_busy() reference on 213 * its own, always returning with ubusied mount point. 214 */ 215 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 216 vfs_unbusy(mp); 217 return (error); 218 } 219 220 /* 221 * Used by statfs conversion routines to scale the block size up if 222 * necessary so that all of the block counts are <= 'max_size'. Note 223 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 224 * value of 'n'. 225 */ 226 void 227 statfs_scale_blocks(struct statfs *sf, long max_size) 228 { 229 uint64_t count; 230 int shift; 231 232 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 233 234 /* 235 * Attempt to scale the block counts to give a more accurate 236 * overview to userland of the ratio of free space to used 237 * space. To do this, find the largest block count and compute 238 * a divisor that lets it fit into a signed integer <= max_size. 239 */ 240 if (sf->f_bavail < 0) 241 count = -sf->f_bavail; 242 else 243 count = sf->f_bavail; 244 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 245 if (count <= max_size) 246 return; 247 248 count >>= flsl(max_size); 249 shift = 0; 250 while (count > 0) { 251 shift++; 252 count >>=1; 253 } 254 255 sf->f_bsize <<= shift; 256 sf->f_blocks >>= shift; 257 sf->f_bfree >>= shift; 258 sf->f_bavail >>= shift; 259 } 260 261 /* 262 * Get filesystem statistics. 263 */ 264 #ifndef _SYS_SYSPROTO_H_ 265 struct statfs_args { 266 char *path; 267 struct statfs *buf; 268 }; 269 #endif 270 int 271 sys_statfs(td, uap) 272 struct thread *td; 273 register struct statfs_args /* { 274 char *path; 275 struct statfs *buf; 276 } */ *uap; 277 { 278 struct statfs sf; 279 int error; 280 281 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 282 if (error == 0) 283 error = copyout(&sf, uap->buf, sizeof(sf)); 284 return (error); 285 } 286 287 int 288 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 289 struct statfs *buf) 290 { 291 struct mount *mp; 292 struct statfs *sp, sb; 293 int error; 294 struct nameidata nd; 295 296 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 297 AUDITVNODE1, pathseg, path, td); 298 error = namei(&nd); 299 if (error) 300 return (error); 301 mp = nd.ni_vp->v_mount; 302 vfs_ref(mp); 303 NDFREE(&nd, NDF_ONLY_PNBUF); 304 vput(nd.ni_vp); 305 error = vfs_busy(mp, 0); 306 vfs_rel(mp); 307 if (error) 308 return (error); 309 #ifdef MAC 310 error = mac_mount_check_stat(td->td_ucred, mp); 311 if (error) 312 goto out; 313 #endif 314 /* 315 * Set these in case the underlying filesystem fails to do so. 316 */ 317 sp = &mp->mnt_stat; 318 sp->f_version = STATFS_VERSION; 319 sp->f_namemax = NAME_MAX; 320 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 321 error = VFS_STATFS(mp, sp); 322 if (error) 323 goto out; 324 if (priv_check(td, PRIV_VFS_GENERATION)) { 325 bcopy(sp, &sb, sizeof(sb)); 326 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 327 prison_enforce_statfs(td->td_ucred, mp, &sb); 328 sp = &sb; 329 } 330 *buf = *sp; 331 out: 332 vfs_unbusy(mp); 333 return (error); 334 } 335 336 /* 337 * Get filesystem statistics. 338 */ 339 #ifndef _SYS_SYSPROTO_H_ 340 struct fstatfs_args { 341 int fd; 342 struct statfs *buf; 343 }; 344 #endif 345 int 346 sys_fstatfs(td, uap) 347 struct thread *td; 348 register struct fstatfs_args /* { 349 int fd; 350 struct statfs *buf; 351 } */ *uap; 352 { 353 struct statfs sf; 354 int error; 355 356 error = kern_fstatfs(td, uap->fd, &sf); 357 if (error == 0) 358 error = copyout(&sf, uap->buf, sizeof(sf)); 359 return (error); 360 } 361 362 int 363 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 364 { 365 struct file *fp; 366 struct mount *mp; 367 struct statfs *sp, sb; 368 struct vnode *vp; 369 int error; 370 371 AUDIT_ARG_FD(fd); 372 error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp); 373 if (error) 374 return (error); 375 vp = fp->f_vnode; 376 vn_lock(vp, LK_SHARED | LK_RETRY); 377 #ifdef AUDIT 378 AUDIT_ARG_VNODE1(vp); 379 #endif 380 mp = vp->v_mount; 381 if (mp) 382 vfs_ref(mp); 383 VOP_UNLOCK(vp, 0); 384 fdrop(fp, td); 385 if (mp == NULL) { 386 error = EBADF; 387 goto out; 388 } 389 error = vfs_busy(mp, 0); 390 vfs_rel(mp); 391 if (error) 392 return (error); 393 #ifdef MAC 394 error = mac_mount_check_stat(td->td_ucred, mp); 395 if (error) 396 goto out; 397 #endif 398 /* 399 * Set these in case the underlying filesystem fails to do so. 400 */ 401 sp = &mp->mnt_stat; 402 sp->f_version = STATFS_VERSION; 403 sp->f_namemax = NAME_MAX; 404 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 405 error = VFS_STATFS(mp, sp); 406 if (error) 407 goto out; 408 if (priv_check(td, PRIV_VFS_GENERATION)) { 409 bcopy(sp, &sb, sizeof(sb)); 410 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 411 prison_enforce_statfs(td->td_ucred, mp, &sb); 412 sp = &sb; 413 } 414 *buf = *sp; 415 out: 416 if (mp) 417 vfs_unbusy(mp); 418 return (error); 419 } 420 421 /* 422 * Get statistics on all filesystems. 423 */ 424 #ifndef _SYS_SYSPROTO_H_ 425 struct getfsstat_args { 426 struct statfs *buf; 427 long bufsize; 428 int flags; 429 }; 430 #endif 431 int 432 sys_getfsstat(td, uap) 433 struct thread *td; 434 register struct getfsstat_args /* { 435 struct statfs *buf; 436 long bufsize; 437 int flags; 438 } */ *uap; 439 { 440 441 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 442 uap->flags)); 443 } 444 445 /* 446 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 447 * The caller is responsible for freeing memory which will be allocated 448 * in '*buf'. 449 */ 450 int 451 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 452 enum uio_seg bufseg, int flags) 453 { 454 struct mount *mp, *nmp; 455 struct statfs *sfsp, *sp, sb; 456 size_t count, maxcount; 457 int error; 458 459 maxcount = bufsize / sizeof(struct statfs); 460 if (bufsize == 0) 461 sfsp = NULL; 462 else if (bufseg == UIO_USERSPACE) 463 sfsp = *buf; 464 else /* if (bufseg == UIO_SYSSPACE) */ { 465 count = 0; 466 mtx_lock(&mountlist_mtx); 467 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 468 count++; 469 } 470 mtx_unlock(&mountlist_mtx); 471 if (maxcount > count) 472 maxcount = count; 473 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 474 M_WAITOK); 475 } 476 count = 0; 477 mtx_lock(&mountlist_mtx); 478 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 479 if (prison_canseemount(td->td_ucred, mp) != 0) { 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 continue; 482 } 483 #ifdef MAC 484 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 continue; 487 } 488 #endif 489 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 490 nmp = TAILQ_NEXT(mp, mnt_list); 491 continue; 492 } 493 if (sfsp && count < maxcount) { 494 sp = &mp->mnt_stat; 495 /* 496 * Set these in case the underlying filesystem 497 * fails to do so. 498 */ 499 sp->f_version = STATFS_VERSION; 500 sp->f_namemax = NAME_MAX; 501 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 502 /* 503 * If MNT_NOWAIT or MNT_LAZY is specified, do not 504 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 505 * overrides MNT_WAIT. 506 */ 507 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 508 (flags & MNT_WAIT)) && 509 (error = VFS_STATFS(mp, sp))) { 510 mtx_lock(&mountlist_mtx); 511 nmp = TAILQ_NEXT(mp, mnt_list); 512 vfs_unbusy(mp); 513 continue; 514 } 515 if (priv_check(td, PRIV_VFS_GENERATION)) { 516 bcopy(sp, &sb, sizeof(sb)); 517 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 518 prison_enforce_statfs(td->td_ucred, mp, &sb); 519 sp = &sb; 520 } 521 if (bufseg == UIO_SYSSPACE) 522 bcopy(sp, sfsp, sizeof(*sp)); 523 else /* if (bufseg == UIO_USERSPACE) */ { 524 error = copyout(sp, sfsp, sizeof(*sp)); 525 if (error) { 526 vfs_unbusy(mp); 527 return (error); 528 } 529 } 530 sfsp++; 531 } 532 count++; 533 mtx_lock(&mountlist_mtx); 534 nmp = TAILQ_NEXT(mp, mnt_list); 535 vfs_unbusy(mp); 536 } 537 mtx_unlock(&mountlist_mtx); 538 if (sfsp && count > maxcount) 539 td->td_retval[0] = maxcount; 540 else 541 td->td_retval[0] = count; 542 return (0); 543 } 544 545 #ifdef COMPAT_FREEBSD4 546 /* 547 * Get old format filesystem statistics. 548 */ 549 static void cvtstatfs(struct statfs *, struct ostatfs *); 550 551 #ifndef _SYS_SYSPROTO_H_ 552 struct freebsd4_statfs_args { 553 char *path; 554 struct ostatfs *buf; 555 }; 556 #endif 557 int 558 freebsd4_statfs(td, uap) 559 struct thread *td; 560 struct freebsd4_statfs_args /* { 561 char *path; 562 struct ostatfs *buf; 563 } */ *uap; 564 { 565 struct ostatfs osb; 566 struct statfs sf; 567 int error; 568 569 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 570 if (error) 571 return (error); 572 cvtstatfs(&sf, &osb); 573 return (copyout(&osb, uap->buf, sizeof(osb))); 574 } 575 576 /* 577 * Get filesystem statistics. 578 */ 579 #ifndef _SYS_SYSPROTO_H_ 580 struct freebsd4_fstatfs_args { 581 int fd; 582 struct ostatfs *buf; 583 }; 584 #endif 585 int 586 freebsd4_fstatfs(td, uap) 587 struct thread *td; 588 struct freebsd4_fstatfs_args /* { 589 int fd; 590 struct ostatfs *buf; 591 } */ *uap; 592 { 593 struct ostatfs osb; 594 struct statfs sf; 595 int error; 596 597 error = kern_fstatfs(td, uap->fd, &sf); 598 if (error) 599 return (error); 600 cvtstatfs(&sf, &osb); 601 return (copyout(&osb, uap->buf, sizeof(osb))); 602 } 603 604 /* 605 * Get statistics on all filesystems. 606 */ 607 #ifndef _SYS_SYSPROTO_H_ 608 struct freebsd4_getfsstat_args { 609 struct ostatfs *buf; 610 long bufsize; 611 int flags; 612 }; 613 #endif 614 int 615 freebsd4_getfsstat(td, uap) 616 struct thread *td; 617 register struct freebsd4_getfsstat_args /* { 618 struct ostatfs *buf; 619 long bufsize; 620 int flags; 621 } */ *uap; 622 { 623 struct statfs *buf, *sp; 624 struct ostatfs osb; 625 size_t count, size; 626 int error; 627 628 count = uap->bufsize / sizeof(struct ostatfs); 629 size = count * sizeof(struct statfs); 630 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 631 if (size > 0) { 632 count = td->td_retval[0]; 633 sp = buf; 634 while (count > 0 && error == 0) { 635 cvtstatfs(sp, &osb); 636 error = copyout(&osb, uap->buf, sizeof(osb)); 637 sp++; 638 uap->buf++; 639 count--; 640 } 641 free(buf, M_TEMP); 642 } 643 return (error); 644 } 645 646 /* 647 * Implement fstatfs() for (NFS) file handles. 648 */ 649 #ifndef _SYS_SYSPROTO_H_ 650 struct freebsd4_fhstatfs_args { 651 struct fhandle *u_fhp; 652 struct ostatfs *buf; 653 }; 654 #endif 655 int 656 freebsd4_fhstatfs(td, uap) 657 struct thread *td; 658 struct freebsd4_fhstatfs_args /* { 659 struct fhandle *u_fhp; 660 struct ostatfs *buf; 661 } */ *uap; 662 { 663 struct ostatfs osb; 664 struct statfs sf; 665 fhandle_t fh; 666 int error; 667 668 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 669 if (error) 670 return (error); 671 error = kern_fhstatfs(td, fh, &sf); 672 if (error) 673 return (error); 674 cvtstatfs(&sf, &osb); 675 return (copyout(&osb, uap->buf, sizeof(osb))); 676 } 677 678 /* 679 * Convert a new format statfs structure to an old format statfs structure. 680 */ 681 static void 682 cvtstatfs(nsp, osp) 683 struct statfs *nsp; 684 struct ostatfs *osp; 685 { 686 687 statfs_scale_blocks(nsp, LONG_MAX); 688 bzero(osp, sizeof(*osp)); 689 osp->f_bsize = nsp->f_bsize; 690 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 691 osp->f_blocks = nsp->f_blocks; 692 osp->f_bfree = nsp->f_bfree; 693 osp->f_bavail = nsp->f_bavail; 694 osp->f_files = MIN(nsp->f_files, LONG_MAX); 695 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 696 osp->f_owner = nsp->f_owner; 697 osp->f_type = nsp->f_type; 698 osp->f_flags = nsp->f_flags; 699 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 700 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 701 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 702 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 703 strlcpy(osp->f_fstypename, nsp->f_fstypename, 704 MIN(MFSNAMELEN, OMFSNAMELEN)); 705 strlcpy(osp->f_mntonname, nsp->f_mntonname, 706 MIN(MNAMELEN, OMNAMELEN)); 707 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 osp->f_fsid = nsp->f_fsid; 710 } 711 #endif /* COMPAT_FREEBSD4 */ 712 713 /* 714 * Change current working directory to a given file descriptor. 715 */ 716 #ifndef _SYS_SYSPROTO_H_ 717 struct fchdir_args { 718 int fd; 719 }; 720 #endif 721 int 722 sys_fchdir(td, uap) 723 struct thread *td; 724 struct fchdir_args /* { 725 int fd; 726 } */ *uap; 727 { 728 register struct filedesc *fdp = td->td_proc->p_fd; 729 struct vnode *vp, *tdp, *vpold; 730 struct mount *mp; 731 struct file *fp; 732 int error; 733 734 AUDIT_ARG_FD(uap->fd); 735 if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0) 736 return (error); 737 vp = fp->f_vnode; 738 VREF(vp); 739 fdrop(fp, td); 740 vn_lock(vp, LK_SHARED | LK_RETRY); 741 AUDIT_ARG_VNODE1(vp); 742 error = change_dir(vp, td); 743 while (!error && (mp = vp->v_mountedhere) != NULL) { 744 if (vfs_busy(mp, 0)) 745 continue; 746 error = VFS_ROOT(mp, LK_SHARED, &tdp); 747 vfs_unbusy(mp); 748 if (error) 749 break; 750 vput(vp); 751 vp = tdp; 752 } 753 if (error) { 754 vput(vp); 755 return (error); 756 } 757 VOP_UNLOCK(vp, 0); 758 FILEDESC_XLOCK(fdp); 759 vpold = fdp->fd_cdir; 760 fdp->fd_cdir = vp; 761 FILEDESC_XUNLOCK(fdp); 762 vrele(vpold); 763 return (0); 764 } 765 766 /* 767 * Change current working directory (``.''). 768 */ 769 #ifndef _SYS_SYSPROTO_H_ 770 struct chdir_args { 771 char *path; 772 }; 773 #endif 774 int 775 sys_chdir(td, uap) 776 struct thread *td; 777 struct chdir_args /* { 778 char *path; 779 } */ *uap; 780 { 781 782 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 783 } 784 785 int 786 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 787 { 788 register struct filedesc *fdp = td->td_proc->p_fd; 789 int error; 790 struct nameidata nd; 791 struct vnode *vp; 792 793 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 794 pathseg, path, td); 795 if ((error = namei(&nd)) != 0) 796 return (error); 797 if ((error = change_dir(nd.ni_vp, td)) != 0) { 798 vput(nd.ni_vp); 799 NDFREE(&nd, NDF_ONLY_PNBUF); 800 return (error); 801 } 802 VOP_UNLOCK(nd.ni_vp, 0); 803 NDFREE(&nd, NDF_ONLY_PNBUF); 804 FILEDESC_XLOCK(fdp); 805 vp = fdp->fd_cdir; 806 fdp->fd_cdir = nd.ni_vp; 807 FILEDESC_XUNLOCK(fdp); 808 vrele(vp); 809 return (0); 810 } 811 812 /* 813 * Helper function for raised chroot(2) security function: Refuse if 814 * any filedescriptors are open directories. 815 */ 816 static int 817 chroot_refuse_vdir_fds(fdp) 818 struct filedesc *fdp; 819 { 820 struct vnode *vp; 821 struct file *fp; 822 int fd; 823 824 FILEDESC_LOCK_ASSERT(fdp); 825 826 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 827 fp = fget_locked(fdp, fd); 828 if (fp == NULL) 829 continue; 830 if (fp->f_type == DTYPE_VNODE) { 831 vp = fp->f_vnode; 832 if (vp->v_type == VDIR) 833 return (EPERM); 834 } 835 } 836 return (0); 837 } 838 839 /* 840 * This sysctl determines if we will allow a process to chroot(2) if it 841 * has a directory open: 842 * 0: disallowed for all processes. 843 * 1: allowed for processes that were not already chroot(2)'ed. 844 * 2: allowed for all processes. 845 */ 846 847 static int chroot_allow_open_directories = 1; 848 849 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 850 &chroot_allow_open_directories, 0, 851 "Allow a process to chroot(2) if it has a directory open"); 852 853 /* 854 * Change notion of root (``/'') directory. 855 */ 856 #ifndef _SYS_SYSPROTO_H_ 857 struct chroot_args { 858 char *path; 859 }; 860 #endif 861 int 862 sys_chroot(td, uap) 863 struct thread *td; 864 struct chroot_args /* { 865 char *path; 866 } */ *uap; 867 { 868 int error; 869 struct nameidata nd; 870 871 error = priv_check(td, PRIV_VFS_CHROOT); 872 if (error) 873 return (error); 874 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 875 AUDITVNODE1, UIO_USERSPACE, uap->path, td); 876 error = namei(&nd); 877 if (error) 878 goto error; 879 if ((error = change_dir(nd.ni_vp, td)) != 0) 880 goto e_vunlock; 881 #ifdef MAC 882 if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp))) 883 goto e_vunlock; 884 #endif 885 VOP_UNLOCK(nd.ni_vp, 0); 886 error = change_root(nd.ni_vp, td); 887 vrele(nd.ni_vp); 888 NDFREE(&nd, NDF_ONLY_PNBUF); 889 return (error); 890 e_vunlock: 891 vput(nd.ni_vp); 892 error: 893 NDFREE(&nd, NDF_ONLY_PNBUF); 894 return (error); 895 } 896 897 /* 898 * Common routine for chroot and chdir. Callers must provide a locked vnode 899 * instance. 900 */ 901 int 902 change_dir(vp, td) 903 struct vnode *vp; 904 struct thread *td; 905 { 906 int error; 907 908 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 909 if (vp->v_type != VDIR) 910 return (ENOTDIR); 911 #ifdef MAC 912 error = mac_vnode_check_chdir(td->td_ucred, vp); 913 if (error) 914 return (error); 915 #endif 916 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 917 return (error); 918 } 919 920 /* 921 * Common routine for kern_chroot() and jail_attach(). The caller is 922 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 923 * authorize this operation. 924 */ 925 int 926 change_root(vp, td) 927 struct vnode *vp; 928 struct thread *td; 929 { 930 struct filedesc *fdp; 931 struct vnode *oldvp; 932 int error; 933 934 fdp = td->td_proc->p_fd; 935 FILEDESC_XLOCK(fdp); 936 if (chroot_allow_open_directories == 0 || 937 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 938 error = chroot_refuse_vdir_fds(fdp); 939 if (error) { 940 FILEDESC_XUNLOCK(fdp); 941 return (error); 942 } 943 } 944 oldvp = fdp->fd_rdir; 945 fdp->fd_rdir = vp; 946 VREF(fdp->fd_rdir); 947 if (!fdp->fd_jdir) { 948 fdp->fd_jdir = vp; 949 VREF(fdp->fd_jdir); 950 } 951 FILEDESC_XUNLOCK(fdp); 952 vrele(oldvp); 953 return (0); 954 } 955 956 static __inline cap_rights_t 957 flags_to_rights(int flags) 958 { 959 cap_rights_t rights = 0; 960 961 if (flags & O_EXEC) { 962 rights |= CAP_FEXECVE; 963 } else { 964 switch ((flags & O_ACCMODE)) { 965 case O_RDONLY: 966 rights |= CAP_READ; 967 break; 968 case O_RDWR: 969 rights |= CAP_READ; 970 /* FALLTHROUGH */ 971 case O_WRONLY: 972 rights |= CAP_WRITE; 973 break; 974 } 975 } 976 977 if (flags & O_CREAT) 978 rights |= CAP_CREATE; 979 980 if (flags & O_TRUNC) 981 rights |= CAP_FTRUNCATE; 982 983 if ((flags & O_EXLOCK) || (flags & O_SHLOCK)) 984 rights |= CAP_FLOCK; 985 986 return (rights); 987 } 988 989 /* 990 * Check permissions, allocate an open file structure, and call the device 991 * open routine if any. 992 */ 993 #ifndef _SYS_SYSPROTO_H_ 994 struct open_args { 995 char *path; 996 int flags; 997 int mode; 998 }; 999 #endif 1000 int 1001 sys_open(td, uap) 1002 struct thread *td; 1003 register struct open_args /* { 1004 char *path; 1005 int flags; 1006 int mode; 1007 } */ *uap; 1008 { 1009 1010 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1011 } 1012 1013 #ifndef _SYS_SYSPROTO_H_ 1014 struct openat_args { 1015 int fd; 1016 char *path; 1017 int flag; 1018 int mode; 1019 }; 1020 #endif 1021 int 1022 sys_openat(struct thread *td, struct openat_args *uap) 1023 { 1024 1025 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1026 uap->mode)); 1027 } 1028 1029 int 1030 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1031 int mode) 1032 { 1033 1034 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1035 } 1036 1037 int 1038 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1039 int flags, int mode) 1040 { 1041 struct proc *p = td->td_proc; 1042 struct filedesc *fdp = p->p_fd; 1043 struct file *fp; 1044 struct vnode *vp; 1045 int cmode; 1046 int indx = -1, error; 1047 struct nameidata nd; 1048 cap_rights_t rights_needed = CAP_LOOKUP; 1049 1050 AUDIT_ARG_FFLAGS(flags); 1051 AUDIT_ARG_MODE(mode); 1052 /* XXX: audit dirfd */ 1053 rights_needed |= flags_to_rights(flags); 1054 /* 1055 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1056 * may be specified. 1057 */ 1058 if (flags & O_EXEC) { 1059 if (flags & O_ACCMODE) 1060 return (EINVAL); 1061 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1062 return (EINVAL); 1063 } else { 1064 flags = FFLAGS(flags); 1065 } 1066 1067 /* 1068 * Allocate the file descriptor, but don't install a descriptor yet. 1069 */ 1070 error = falloc_noinstall(td, &fp); 1071 if (error) 1072 return (error); 1073 /* 1074 * An extra reference on `fp' has been held for us by 1075 * falloc_noinstall(). 1076 */ 1077 /* Set the flags early so the finit in devfs can pick them up. */ 1078 fp->f_flag = flags & FMASK; 1079 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1080 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, 1081 path, fd, rights_needed, td); 1082 td->td_dupfd = -1; /* XXX check for fdopen */ 1083 error = vn_open(&nd, &flags, cmode, fp); 1084 if (error) { 1085 /* 1086 * If the vn_open replaced the method vector, something 1087 * wonderous happened deep below and we just pass it up 1088 * pretending we know what we do. 1089 */ 1090 if (error == ENXIO && fp->f_ops != &badfileops) 1091 goto success; 1092 1093 /* 1094 * Handle special fdopen() case. bleh. 1095 * 1096 * Don't do this for relative (capability) lookups; we don't 1097 * understand exactly what would happen, and we don't think 1098 * that it ever should. 1099 */ 1100 if (nd.ni_strictrelative == 0 && 1101 (error == ENODEV || error == ENXIO) && 1102 td->td_dupfd >= 0) { 1103 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1104 &indx); 1105 if (error == 0) 1106 goto success; 1107 } 1108 1109 if (error == ERESTART) 1110 error = EINTR; 1111 goto bad_unlocked; 1112 } 1113 td->td_dupfd = 0; 1114 NDFREE(&nd, NDF_ONLY_PNBUF); 1115 vp = nd.ni_vp; 1116 1117 /* 1118 * Store the vnode, for any f_type. Typically, the vnode use 1119 * count is decremented by direct call to vn_closefile() for 1120 * files that switched type in the cdevsw fdopen() method. 1121 */ 1122 fp->f_vnode = vp; 1123 /* 1124 * If the file wasn't claimed by devfs bind it to the normal 1125 * vnode operations here. 1126 */ 1127 if (fp->f_ops == &badfileops) { 1128 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1129 fp->f_seqcount = 1; 1130 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, 1131 vp, &vnops); 1132 } 1133 1134 VOP_UNLOCK(vp, 0); 1135 if (flags & O_TRUNC) { 1136 error = fo_truncate(fp, 0, td->td_ucred, td); 1137 if (error) 1138 goto bad; 1139 } 1140 success: 1141 /* 1142 * If we haven't already installed the FD (for dupfdopen), do so now. 1143 */ 1144 if (indx == -1) { 1145 #ifdef CAPABILITIES 1146 if (nd.ni_strictrelative == 1) { 1147 /* 1148 * We are doing a strict relative lookup; wrap the 1149 * result in a capability. 1150 */ 1151 if ((error = kern_capwrap(td, fp, nd.ni_baserights, 1152 &indx)) != 0) 1153 goto bad_unlocked; 1154 } else 1155 #endif 1156 if ((error = finstall(td, fp, &indx, flags)) != 0) 1157 goto bad_unlocked; 1158 1159 } 1160 1161 /* 1162 * Release our private reference, leaving the one associated with 1163 * the descriptor table intact. 1164 */ 1165 fdrop(fp, td); 1166 td->td_retval[0] = indx; 1167 return (0); 1168 bad: 1169 bad_unlocked: 1170 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1171 fdrop(fp, td); 1172 return (error); 1173 } 1174 1175 #ifdef COMPAT_43 1176 /* 1177 * Create a file. 1178 */ 1179 #ifndef _SYS_SYSPROTO_H_ 1180 struct ocreat_args { 1181 char *path; 1182 int mode; 1183 }; 1184 #endif 1185 int 1186 ocreat(td, uap) 1187 struct thread *td; 1188 register struct ocreat_args /* { 1189 char *path; 1190 int mode; 1191 } */ *uap; 1192 { 1193 1194 return (kern_open(td, uap->path, UIO_USERSPACE, 1195 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1196 } 1197 #endif /* COMPAT_43 */ 1198 1199 /* 1200 * Create a special file. 1201 */ 1202 #ifndef _SYS_SYSPROTO_H_ 1203 struct mknod_args { 1204 char *path; 1205 int mode; 1206 int dev; 1207 }; 1208 #endif 1209 int 1210 sys_mknod(td, uap) 1211 struct thread *td; 1212 register struct mknod_args /* { 1213 char *path; 1214 int mode; 1215 int dev; 1216 } */ *uap; 1217 { 1218 1219 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1220 } 1221 1222 #ifndef _SYS_SYSPROTO_H_ 1223 struct mknodat_args { 1224 int fd; 1225 char *path; 1226 mode_t mode; 1227 dev_t dev; 1228 }; 1229 #endif 1230 int 1231 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1232 { 1233 1234 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1235 uap->dev)); 1236 } 1237 1238 int 1239 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1240 int dev) 1241 { 1242 1243 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1244 } 1245 1246 int 1247 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1248 int mode, int dev) 1249 { 1250 struct vnode *vp; 1251 struct mount *mp; 1252 struct vattr vattr; 1253 int error; 1254 int whiteout = 0; 1255 struct nameidata nd; 1256 1257 AUDIT_ARG_MODE(mode); 1258 AUDIT_ARG_DEV(dev); 1259 switch (mode & S_IFMT) { 1260 case S_IFCHR: 1261 case S_IFBLK: 1262 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1263 break; 1264 case S_IFMT: 1265 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1266 break; 1267 case S_IFWHT: 1268 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1269 break; 1270 case S_IFIFO: 1271 if (dev == 0) 1272 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1273 /* FALLTHROUGH */ 1274 default: 1275 error = EINVAL; 1276 break; 1277 } 1278 if (error) 1279 return (error); 1280 restart: 1281 bwillwrite(); 1282 NDINIT_ATRIGHTS(&nd, CREATE, 1283 LOCKPARENT | SAVENAME | AUDITVNODE1, pathseg, path, fd, 1284 CAP_MKNOD, td); 1285 if ((error = namei(&nd)) != 0) 1286 return (error); 1287 vp = nd.ni_vp; 1288 if (vp != NULL) { 1289 NDFREE(&nd, NDF_ONLY_PNBUF); 1290 if (vp == nd.ni_dvp) 1291 vrele(nd.ni_dvp); 1292 else 1293 vput(nd.ni_dvp); 1294 vrele(vp); 1295 return (EEXIST); 1296 } else { 1297 VATTR_NULL(&vattr); 1298 vattr.va_mode = (mode & ALLPERMS) & 1299 ~td->td_proc->p_fd->fd_cmask; 1300 vattr.va_rdev = dev; 1301 whiteout = 0; 1302 1303 switch (mode & S_IFMT) { 1304 case S_IFMT: /* used by badsect to flag bad sectors */ 1305 vattr.va_type = VBAD; 1306 break; 1307 case S_IFCHR: 1308 vattr.va_type = VCHR; 1309 break; 1310 case S_IFBLK: 1311 vattr.va_type = VBLK; 1312 break; 1313 case S_IFWHT: 1314 whiteout = 1; 1315 break; 1316 default: 1317 panic("kern_mknod: invalid mode"); 1318 } 1319 } 1320 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1321 NDFREE(&nd, NDF_ONLY_PNBUF); 1322 vput(nd.ni_dvp); 1323 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1324 return (error); 1325 goto restart; 1326 } 1327 #ifdef MAC 1328 if (error == 0 && !whiteout) 1329 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1330 &nd.ni_cnd, &vattr); 1331 #endif 1332 if (!error) { 1333 if (whiteout) 1334 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1335 else { 1336 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1337 &nd.ni_cnd, &vattr); 1338 if (error == 0) 1339 vput(nd.ni_vp); 1340 } 1341 } 1342 NDFREE(&nd, NDF_ONLY_PNBUF); 1343 vput(nd.ni_dvp); 1344 vn_finished_write(mp); 1345 return (error); 1346 } 1347 1348 /* 1349 * Create a named pipe. 1350 */ 1351 #ifndef _SYS_SYSPROTO_H_ 1352 struct mkfifo_args { 1353 char *path; 1354 int mode; 1355 }; 1356 #endif 1357 int 1358 sys_mkfifo(td, uap) 1359 struct thread *td; 1360 register struct mkfifo_args /* { 1361 char *path; 1362 int mode; 1363 } */ *uap; 1364 { 1365 1366 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1367 } 1368 1369 #ifndef _SYS_SYSPROTO_H_ 1370 struct mkfifoat_args { 1371 int fd; 1372 char *path; 1373 mode_t mode; 1374 }; 1375 #endif 1376 int 1377 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1378 { 1379 1380 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1381 uap->mode)); 1382 } 1383 1384 int 1385 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1386 { 1387 1388 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1389 } 1390 1391 int 1392 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1393 int mode) 1394 { 1395 struct mount *mp; 1396 struct vattr vattr; 1397 int error; 1398 struct nameidata nd; 1399 1400 AUDIT_ARG_MODE(mode); 1401 restart: 1402 bwillwrite(); 1403 NDINIT_ATRIGHTS(&nd, CREATE, 1404 LOCKPARENT | SAVENAME | AUDITVNODE1, pathseg, path, fd, 1405 CAP_MKFIFO, td); 1406 if ((error = namei(&nd)) != 0) 1407 return (error); 1408 if (nd.ni_vp != NULL) { 1409 NDFREE(&nd, NDF_ONLY_PNBUF); 1410 if (nd.ni_vp == nd.ni_dvp) 1411 vrele(nd.ni_dvp); 1412 else 1413 vput(nd.ni_dvp); 1414 vrele(nd.ni_vp); 1415 return (EEXIST); 1416 } 1417 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1418 NDFREE(&nd, NDF_ONLY_PNBUF); 1419 vput(nd.ni_dvp); 1420 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1421 return (error); 1422 goto restart; 1423 } 1424 VATTR_NULL(&vattr); 1425 vattr.va_type = VFIFO; 1426 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1427 #ifdef MAC 1428 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1429 &vattr); 1430 if (error) 1431 goto out; 1432 #endif 1433 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1434 if (error == 0) 1435 vput(nd.ni_vp); 1436 #ifdef MAC 1437 out: 1438 #endif 1439 vput(nd.ni_dvp); 1440 vn_finished_write(mp); 1441 NDFREE(&nd, NDF_ONLY_PNBUF); 1442 return (error); 1443 } 1444 1445 /* 1446 * Make a hard file link. 1447 */ 1448 #ifndef _SYS_SYSPROTO_H_ 1449 struct link_args { 1450 char *path; 1451 char *link; 1452 }; 1453 #endif 1454 int 1455 sys_link(td, uap) 1456 struct thread *td; 1457 register struct link_args /* { 1458 char *path; 1459 char *link; 1460 } */ *uap; 1461 { 1462 1463 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1464 } 1465 1466 #ifndef _SYS_SYSPROTO_H_ 1467 struct linkat_args { 1468 int fd1; 1469 char *path1; 1470 int fd2; 1471 char *path2; 1472 int flag; 1473 }; 1474 #endif 1475 int 1476 sys_linkat(struct thread *td, struct linkat_args *uap) 1477 { 1478 int flag; 1479 1480 flag = uap->flag; 1481 if (flag & ~AT_SYMLINK_FOLLOW) 1482 return (EINVAL); 1483 1484 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1485 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1486 } 1487 1488 int hardlink_check_uid = 0; 1489 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1490 &hardlink_check_uid, 0, 1491 "Unprivileged processes cannot create hard links to files owned by other " 1492 "users"); 1493 static int hardlink_check_gid = 0; 1494 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1495 &hardlink_check_gid, 0, 1496 "Unprivileged processes cannot create hard links to files owned by other " 1497 "groups"); 1498 1499 static int 1500 can_hardlink(struct vnode *vp, struct ucred *cred) 1501 { 1502 struct vattr va; 1503 int error; 1504 1505 if (!hardlink_check_uid && !hardlink_check_gid) 1506 return (0); 1507 1508 error = VOP_GETATTR(vp, &va, cred); 1509 if (error != 0) 1510 return (error); 1511 1512 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1513 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1514 if (error) 1515 return (error); 1516 } 1517 1518 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1519 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1520 if (error) 1521 return (error); 1522 } 1523 1524 return (0); 1525 } 1526 1527 int 1528 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1529 { 1530 1531 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1532 } 1533 1534 int 1535 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1536 enum uio_seg segflg, int follow) 1537 { 1538 struct vnode *vp; 1539 struct mount *mp; 1540 struct nameidata nd; 1541 int error; 1542 1543 bwillwrite(); 1544 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, 1545 fd1, td); 1546 1547 if ((error = namei(&nd)) != 0) 1548 return (error); 1549 NDFREE(&nd, NDF_ONLY_PNBUF); 1550 vp = nd.ni_vp; 1551 if (vp->v_type == VDIR) { 1552 vrele(vp); 1553 return (EPERM); /* POSIX */ 1554 } 1555 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 1556 vrele(vp); 1557 return (error); 1558 } 1559 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | 1560 AUDITVNODE2, segflg, path2, fd2, CAP_CREATE, td); 1561 if ((error = namei(&nd)) == 0) { 1562 if (nd.ni_vp != NULL) { 1563 if (nd.ni_dvp == nd.ni_vp) 1564 vrele(nd.ni_dvp); 1565 else 1566 vput(nd.ni_dvp); 1567 vrele(nd.ni_vp); 1568 error = EEXIST; 1569 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) 1570 == 0) { 1571 error = can_hardlink(vp, td->td_ucred); 1572 if (error == 0) 1573 #ifdef MAC 1574 error = mac_vnode_check_link(td->td_ucred, 1575 nd.ni_dvp, vp, &nd.ni_cnd); 1576 if (error == 0) 1577 #endif 1578 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1579 VOP_UNLOCK(vp, 0); 1580 vput(nd.ni_dvp); 1581 } 1582 NDFREE(&nd, NDF_ONLY_PNBUF); 1583 } 1584 vrele(vp); 1585 vn_finished_write(mp); 1586 return (error); 1587 } 1588 1589 /* 1590 * Make a symbolic link. 1591 */ 1592 #ifndef _SYS_SYSPROTO_H_ 1593 struct symlink_args { 1594 char *path; 1595 char *link; 1596 }; 1597 #endif 1598 int 1599 sys_symlink(td, uap) 1600 struct thread *td; 1601 register struct symlink_args /* { 1602 char *path; 1603 char *link; 1604 } */ *uap; 1605 { 1606 1607 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1608 } 1609 1610 #ifndef _SYS_SYSPROTO_H_ 1611 struct symlinkat_args { 1612 char *path; 1613 int fd; 1614 char *path2; 1615 }; 1616 #endif 1617 int 1618 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1619 { 1620 1621 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1622 UIO_USERSPACE)); 1623 } 1624 1625 int 1626 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1627 { 1628 1629 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1630 } 1631 1632 int 1633 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1634 enum uio_seg segflg) 1635 { 1636 struct mount *mp; 1637 struct vattr vattr; 1638 char *syspath; 1639 int error; 1640 struct nameidata nd; 1641 1642 if (segflg == UIO_SYSSPACE) { 1643 syspath = path1; 1644 } else { 1645 syspath = uma_zalloc(namei_zone, M_WAITOK); 1646 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1647 goto out; 1648 } 1649 AUDIT_ARG_TEXT(syspath); 1650 restart: 1651 bwillwrite(); 1652 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | 1653 AUDITVNODE1, segflg, path2, fd, CAP_CREATE, td); 1654 if ((error = namei(&nd)) != 0) 1655 goto out; 1656 if (nd.ni_vp) { 1657 NDFREE(&nd, NDF_ONLY_PNBUF); 1658 if (nd.ni_vp == nd.ni_dvp) 1659 vrele(nd.ni_dvp); 1660 else 1661 vput(nd.ni_dvp); 1662 vrele(nd.ni_vp); 1663 error = EEXIST; 1664 goto out; 1665 } 1666 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1667 NDFREE(&nd, NDF_ONLY_PNBUF); 1668 vput(nd.ni_dvp); 1669 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1670 goto out; 1671 goto restart; 1672 } 1673 VATTR_NULL(&vattr); 1674 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1675 #ifdef MAC 1676 vattr.va_type = VLNK; 1677 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1678 &vattr); 1679 if (error) 1680 goto out2; 1681 #endif 1682 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1683 if (error == 0) 1684 vput(nd.ni_vp); 1685 #ifdef MAC 1686 out2: 1687 #endif 1688 NDFREE(&nd, NDF_ONLY_PNBUF); 1689 vput(nd.ni_dvp); 1690 vn_finished_write(mp); 1691 out: 1692 if (segflg != UIO_SYSSPACE) 1693 uma_zfree(namei_zone, syspath); 1694 return (error); 1695 } 1696 1697 /* 1698 * Delete a whiteout from the filesystem. 1699 */ 1700 int 1701 sys_undelete(td, uap) 1702 struct thread *td; 1703 register struct undelete_args /* { 1704 char *path; 1705 } */ *uap; 1706 { 1707 int error; 1708 struct mount *mp; 1709 struct nameidata nd; 1710 1711 restart: 1712 bwillwrite(); 1713 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1714 UIO_USERSPACE, uap->path, td); 1715 error = namei(&nd); 1716 if (error) 1717 return (error); 1718 1719 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1720 NDFREE(&nd, NDF_ONLY_PNBUF); 1721 if (nd.ni_vp == nd.ni_dvp) 1722 vrele(nd.ni_dvp); 1723 else 1724 vput(nd.ni_dvp); 1725 if (nd.ni_vp) 1726 vrele(nd.ni_vp); 1727 return (EEXIST); 1728 } 1729 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1730 NDFREE(&nd, NDF_ONLY_PNBUF); 1731 vput(nd.ni_dvp); 1732 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1733 return (error); 1734 goto restart; 1735 } 1736 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1737 NDFREE(&nd, NDF_ONLY_PNBUF); 1738 vput(nd.ni_dvp); 1739 vn_finished_write(mp); 1740 return (error); 1741 } 1742 1743 /* 1744 * Delete a name from the filesystem. 1745 */ 1746 #ifndef _SYS_SYSPROTO_H_ 1747 struct unlink_args { 1748 char *path; 1749 }; 1750 #endif 1751 int 1752 sys_unlink(td, uap) 1753 struct thread *td; 1754 struct unlink_args /* { 1755 char *path; 1756 } */ *uap; 1757 { 1758 1759 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1760 } 1761 1762 #ifndef _SYS_SYSPROTO_H_ 1763 struct unlinkat_args { 1764 int fd; 1765 char *path; 1766 int flag; 1767 }; 1768 #endif 1769 int 1770 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1771 { 1772 int flag = uap->flag; 1773 int fd = uap->fd; 1774 char *path = uap->path; 1775 1776 if (flag & ~AT_REMOVEDIR) 1777 return (EINVAL); 1778 1779 if (flag & AT_REMOVEDIR) 1780 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1781 else 1782 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1783 } 1784 1785 int 1786 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1787 { 1788 1789 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1790 } 1791 1792 int 1793 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1794 ino_t oldinum) 1795 { 1796 struct mount *mp; 1797 struct vnode *vp; 1798 int error; 1799 struct nameidata nd; 1800 struct stat sb; 1801 1802 restart: 1803 bwillwrite(); 1804 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | 1805 AUDITVNODE1, pathseg, path, fd, CAP_DELETE, td); 1806 if ((error = namei(&nd)) != 0) 1807 return (error == EINVAL ? EPERM : error); 1808 vp = nd.ni_vp; 1809 if (vp->v_type == VDIR && oldinum == 0) { 1810 error = EPERM; /* POSIX */ 1811 } else if (oldinum != 0 && 1812 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1813 sb.st_ino != oldinum) { 1814 error = EIDRM; /* Identifier removed */ 1815 } else { 1816 /* 1817 * The root of a mounted filesystem cannot be deleted. 1818 * 1819 * XXX: can this only be a VDIR case? 1820 */ 1821 if (vp->v_vflag & VV_ROOT) 1822 error = EBUSY; 1823 } 1824 if (error == 0) { 1825 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1826 NDFREE(&nd, NDF_ONLY_PNBUF); 1827 vput(nd.ni_dvp); 1828 if (vp == nd.ni_dvp) 1829 vrele(vp); 1830 else 1831 vput(vp); 1832 if ((error = vn_start_write(NULL, &mp, 1833 V_XSLEEP | PCATCH)) != 0) 1834 return (error); 1835 goto restart; 1836 } 1837 #ifdef MAC 1838 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1839 &nd.ni_cnd); 1840 if (error) 1841 goto out; 1842 #endif 1843 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1844 #ifdef MAC 1845 out: 1846 #endif 1847 vn_finished_write(mp); 1848 } 1849 NDFREE(&nd, NDF_ONLY_PNBUF); 1850 vput(nd.ni_dvp); 1851 if (vp == nd.ni_dvp) 1852 vrele(vp); 1853 else 1854 vput(vp); 1855 return (error); 1856 } 1857 1858 /* 1859 * Reposition read/write file offset. 1860 */ 1861 #ifndef _SYS_SYSPROTO_H_ 1862 struct lseek_args { 1863 int fd; 1864 int pad; 1865 off_t offset; 1866 int whence; 1867 }; 1868 #endif 1869 int 1870 sys_lseek(td, uap) 1871 struct thread *td; 1872 register struct lseek_args /* { 1873 int fd; 1874 int pad; 1875 off_t offset; 1876 int whence; 1877 } */ *uap; 1878 { 1879 struct ucred *cred = td->td_ucred; 1880 struct file *fp; 1881 struct vnode *vp; 1882 struct vattr vattr; 1883 off_t foffset, offset, size; 1884 int error, noneg; 1885 1886 AUDIT_ARG_FD(uap->fd); 1887 if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0) 1888 return (error); 1889 if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) { 1890 fdrop(fp, td); 1891 return (ESPIPE); 1892 } 1893 vp = fp->f_vnode; 1894 foffset = foffset_lock(fp, 0); 1895 noneg = (vp->v_type != VCHR); 1896 offset = uap->offset; 1897 switch (uap->whence) { 1898 case L_INCR: 1899 if (noneg && 1900 (foffset < 0 || 1901 (offset > 0 && foffset > OFF_MAX - offset))) { 1902 error = EOVERFLOW; 1903 break; 1904 } 1905 offset += foffset; 1906 break; 1907 case L_XTND: 1908 vn_lock(vp, LK_SHARED | LK_RETRY); 1909 error = VOP_GETATTR(vp, &vattr, cred); 1910 VOP_UNLOCK(vp, 0); 1911 if (error) 1912 break; 1913 1914 /* 1915 * If the file references a disk device, then fetch 1916 * the media size and use that to determine the ending 1917 * offset. 1918 */ 1919 if (vattr.va_size == 0 && vp->v_type == VCHR && 1920 fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0) 1921 vattr.va_size = size; 1922 if (noneg && 1923 (vattr.va_size > OFF_MAX || 1924 (offset > 0 && vattr.va_size > OFF_MAX - offset))) { 1925 error = EOVERFLOW; 1926 break; 1927 } 1928 offset += vattr.va_size; 1929 break; 1930 case L_SET: 1931 break; 1932 case SEEK_DATA: 1933 error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td); 1934 break; 1935 case SEEK_HOLE: 1936 error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td); 1937 break; 1938 default: 1939 error = EINVAL; 1940 } 1941 if (error == 0 && noneg && offset < 0) 1942 error = EINVAL; 1943 if (error != 0) 1944 goto drop; 1945 VFS_KNOTE_UNLOCKED(vp, 0); 1946 *(off_t *)(td->td_retval) = offset; 1947 drop: 1948 fdrop(fp, td); 1949 foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0); 1950 return (error); 1951 } 1952 1953 #if defined(COMPAT_43) 1954 /* 1955 * Reposition read/write file offset. 1956 */ 1957 #ifndef _SYS_SYSPROTO_H_ 1958 struct olseek_args { 1959 int fd; 1960 long offset; 1961 int whence; 1962 }; 1963 #endif 1964 int 1965 olseek(td, uap) 1966 struct thread *td; 1967 register struct olseek_args /* { 1968 int fd; 1969 long offset; 1970 int whence; 1971 } */ *uap; 1972 { 1973 struct lseek_args /* { 1974 int fd; 1975 int pad; 1976 off_t offset; 1977 int whence; 1978 } */ nuap; 1979 1980 nuap.fd = uap->fd; 1981 nuap.offset = uap->offset; 1982 nuap.whence = uap->whence; 1983 return (sys_lseek(td, &nuap)); 1984 } 1985 #endif /* COMPAT_43 */ 1986 1987 /* Version with the 'pad' argument */ 1988 int 1989 freebsd6_lseek(td, uap) 1990 struct thread *td; 1991 register struct freebsd6_lseek_args *uap; 1992 { 1993 struct lseek_args ouap; 1994 1995 ouap.fd = uap->fd; 1996 ouap.offset = uap->offset; 1997 ouap.whence = uap->whence; 1998 return (sys_lseek(td, &ouap)); 1999 } 2000 2001 /* 2002 * Check access permissions using passed credentials. 2003 */ 2004 static int 2005 vn_access(vp, user_flags, cred, td) 2006 struct vnode *vp; 2007 int user_flags; 2008 struct ucred *cred; 2009 struct thread *td; 2010 { 2011 int error; 2012 accmode_t accmode; 2013 2014 /* Flags == 0 means only check for existence. */ 2015 error = 0; 2016 if (user_flags) { 2017 accmode = 0; 2018 if (user_flags & R_OK) 2019 accmode |= VREAD; 2020 if (user_flags & W_OK) 2021 accmode |= VWRITE; 2022 if (user_flags & X_OK) 2023 accmode |= VEXEC; 2024 #ifdef MAC 2025 error = mac_vnode_check_access(cred, vp, accmode); 2026 if (error) 2027 return (error); 2028 #endif 2029 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2030 error = VOP_ACCESS(vp, accmode, cred, td); 2031 } 2032 return (error); 2033 } 2034 2035 /* 2036 * Check access permissions using "real" credentials. 2037 */ 2038 #ifndef _SYS_SYSPROTO_H_ 2039 struct access_args { 2040 char *path; 2041 int amode; 2042 }; 2043 #endif 2044 int 2045 sys_access(td, uap) 2046 struct thread *td; 2047 register struct access_args /* { 2048 char *path; 2049 int amode; 2050 } */ *uap; 2051 { 2052 2053 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2054 } 2055 2056 #ifndef _SYS_SYSPROTO_H_ 2057 struct faccessat_args { 2058 int dirfd; 2059 char *path; 2060 int amode; 2061 int flag; 2062 } 2063 #endif 2064 int 2065 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2066 { 2067 2068 if (uap->flag & ~AT_EACCESS) 2069 return (EINVAL); 2070 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2071 uap->amode)); 2072 } 2073 2074 int 2075 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2076 { 2077 2078 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2079 } 2080 2081 int 2082 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2083 int flag, int amode) 2084 { 2085 struct ucred *cred, *tmpcred; 2086 struct vnode *vp; 2087 struct nameidata nd; 2088 int error; 2089 2090 /* 2091 * Create and modify a temporary credential instead of one that 2092 * is potentially shared. 2093 */ 2094 if (!(flag & AT_EACCESS)) { 2095 cred = td->td_ucred; 2096 tmpcred = crdup(cred); 2097 tmpcred->cr_uid = cred->cr_ruid; 2098 tmpcred->cr_groups[0] = cred->cr_rgid; 2099 td->td_ucred = tmpcred; 2100 } else 2101 cred = tmpcred = td->td_ucred; 2102 AUDIT_ARG_VALUE(amode); 2103 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2104 AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td); 2105 if ((error = namei(&nd)) != 0) 2106 goto out1; 2107 vp = nd.ni_vp; 2108 2109 error = vn_access(vp, amode, tmpcred, td); 2110 NDFREE(&nd, NDF_ONLY_PNBUF); 2111 vput(vp); 2112 out1: 2113 if (!(flag & AT_EACCESS)) { 2114 td->td_ucred = cred; 2115 crfree(tmpcred); 2116 } 2117 return (error); 2118 } 2119 2120 /* 2121 * Check access permissions using "effective" credentials. 2122 */ 2123 #ifndef _SYS_SYSPROTO_H_ 2124 struct eaccess_args { 2125 char *path; 2126 int amode; 2127 }; 2128 #endif 2129 int 2130 sys_eaccess(td, uap) 2131 struct thread *td; 2132 register struct eaccess_args /* { 2133 char *path; 2134 int amode; 2135 } */ *uap; 2136 { 2137 2138 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2139 } 2140 2141 int 2142 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2143 { 2144 2145 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2146 } 2147 2148 #if defined(COMPAT_43) 2149 /* 2150 * Get file status; this version follows links. 2151 */ 2152 #ifndef _SYS_SYSPROTO_H_ 2153 struct ostat_args { 2154 char *path; 2155 struct ostat *ub; 2156 }; 2157 #endif 2158 int 2159 ostat(td, uap) 2160 struct thread *td; 2161 register struct ostat_args /* { 2162 char *path; 2163 struct ostat *ub; 2164 } */ *uap; 2165 { 2166 struct stat sb; 2167 struct ostat osb; 2168 int error; 2169 2170 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2171 if (error) 2172 return (error); 2173 cvtstat(&sb, &osb); 2174 error = copyout(&osb, uap->ub, sizeof (osb)); 2175 return (error); 2176 } 2177 2178 /* 2179 * Get file status; this version does not follow links. 2180 */ 2181 #ifndef _SYS_SYSPROTO_H_ 2182 struct olstat_args { 2183 char *path; 2184 struct ostat *ub; 2185 }; 2186 #endif 2187 int 2188 olstat(td, uap) 2189 struct thread *td; 2190 register struct olstat_args /* { 2191 char *path; 2192 struct ostat *ub; 2193 } */ *uap; 2194 { 2195 struct stat sb; 2196 struct ostat osb; 2197 int error; 2198 2199 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2200 if (error) 2201 return (error); 2202 cvtstat(&sb, &osb); 2203 error = copyout(&osb, uap->ub, sizeof (osb)); 2204 return (error); 2205 } 2206 2207 /* 2208 * Convert from an old to a new stat structure. 2209 */ 2210 void 2211 cvtstat(st, ost) 2212 struct stat *st; 2213 struct ostat *ost; 2214 { 2215 2216 ost->st_dev = st->st_dev; 2217 ost->st_ino = st->st_ino; 2218 ost->st_mode = st->st_mode; 2219 ost->st_nlink = st->st_nlink; 2220 ost->st_uid = st->st_uid; 2221 ost->st_gid = st->st_gid; 2222 ost->st_rdev = st->st_rdev; 2223 if (st->st_size < (quad_t)1 << 32) 2224 ost->st_size = st->st_size; 2225 else 2226 ost->st_size = -2; 2227 ost->st_atim = st->st_atim; 2228 ost->st_mtim = st->st_mtim; 2229 ost->st_ctim = st->st_ctim; 2230 ost->st_blksize = st->st_blksize; 2231 ost->st_blocks = st->st_blocks; 2232 ost->st_flags = st->st_flags; 2233 ost->st_gen = st->st_gen; 2234 } 2235 #endif /* COMPAT_43 */ 2236 2237 /* 2238 * Get file status; this version follows links. 2239 */ 2240 #ifndef _SYS_SYSPROTO_H_ 2241 struct stat_args { 2242 char *path; 2243 struct stat *ub; 2244 }; 2245 #endif 2246 int 2247 sys_stat(td, uap) 2248 struct thread *td; 2249 register struct stat_args /* { 2250 char *path; 2251 struct stat *ub; 2252 } */ *uap; 2253 { 2254 struct stat sb; 2255 int error; 2256 2257 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2258 if (error == 0) 2259 error = copyout(&sb, uap->ub, sizeof (sb)); 2260 return (error); 2261 } 2262 2263 #ifndef _SYS_SYSPROTO_H_ 2264 struct fstatat_args { 2265 int fd; 2266 char *path; 2267 struct stat *buf; 2268 int flag; 2269 } 2270 #endif 2271 int 2272 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2273 { 2274 struct stat sb; 2275 int error; 2276 2277 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2278 UIO_USERSPACE, &sb); 2279 if (error == 0) 2280 error = copyout(&sb, uap->buf, sizeof (sb)); 2281 return (error); 2282 } 2283 2284 int 2285 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2286 { 2287 2288 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2289 } 2290 2291 int 2292 kern_statat(struct thread *td, int flag, int fd, char *path, 2293 enum uio_seg pathseg, struct stat *sbp) 2294 { 2295 2296 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2297 } 2298 2299 int 2300 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2301 enum uio_seg pathseg, struct stat *sbp, 2302 void (*hook)(struct vnode *vp, struct stat *sbp)) 2303 { 2304 struct nameidata nd; 2305 struct stat sb; 2306 int error; 2307 2308 if (flag & ~AT_SYMLINK_NOFOLLOW) 2309 return (EINVAL); 2310 2311 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2312 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, 2313 path, fd, CAP_FSTAT, td); 2314 2315 if ((error = namei(&nd)) != 0) 2316 return (error); 2317 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2318 if (!error) { 2319 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2320 if (S_ISREG(sb.st_mode)) 2321 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2322 if (__predict_false(hook != NULL)) 2323 hook(nd.ni_vp, &sb); 2324 } 2325 NDFREE(&nd, NDF_ONLY_PNBUF); 2326 vput(nd.ni_vp); 2327 if (error) 2328 return (error); 2329 *sbp = sb; 2330 #ifdef KTRACE 2331 if (KTRPOINT(td, KTR_STRUCT)) 2332 ktrstat(&sb); 2333 #endif 2334 return (0); 2335 } 2336 2337 /* 2338 * Get file status; this version does not follow links. 2339 */ 2340 #ifndef _SYS_SYSPROTO_H_ 2341 struct lstat_args { 2342 char *path; 2343 struct stat *ub; 2344 }; 2345 #endif 2346 int 2347 sys_lstat(td, uap) 2348 struct thread *td; 2349 register struct lstat_args /* { 2350 char *path; 2351 struct stat *ub; 2352 } */ *uap; 2353 { 2354 struct stat sb; 2355 int error; 2356 2357 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2358 if (error == 0) 2359 error = copyout(&sb, uap->ub, sizeof (sb)); 2360 return (error); 2361 } 2362 2363 int 2364 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2365 { 2366 2367 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2368 sbp)); 2369 } 2370 2371 /* 2372 * Implementation of the NetBSD [l]stat() functions. 2373 */ 2374 void 2375 cvtnstat(sb, nsb) 2376 struct stat *sb; 2377 struct nstat *nsb; 2378 { 2379 bzero(nsb, sizeof *nsb); 2380 nsb->st_dev = sb->st_dev; 2381 nsb->st_ino = sb->st_ino; 2382 nsb->st_mode = sb->st_mode; 2383 nsb->st_nlink = sb->st_nlink; 2384 nsb->st_uid = sb->st_uid; 2385 nsb->st_gid = sb->st_gid; 2386 nsb->st_rdev = sb->st_rdev; 2387 nsb->st_atim = sb->st_atim; 2388 nsb->st_mtim = sb->st_mtim; 2389 nsb->st_ctim = sb->st_ctim; 2390 nsb->st_size = sb->st_size; 2391 nsb->st_blocks = sb->st_blocks; 2392 nsb->st_blksize = sb->st_blksize; 2393 nsb->st_flags = sb->st_flags; 2394 nsb->st_gen = sb->st_gen; 2395 nsb->st_birthtim = sb->st_birthtim; 2396 } 2397 2398 #ifndef _SYS_SYSPROTO_H_ 2399 struct nstat_args { 2400 char *path; 2401 struct nstat *ub; 2402 }; 2403 #endif 2404 int 2405 sys_nstat(td, uap) 2406 struct thread *td; 2407 register struct nstat_args /* { 2408 char *path; 2409 struct nstat *ub; 2410 } */ *uap; 2411 { 2412 struct stat sb; 2413 struct nstat nsb; 2414 int error; 2415 2416 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2417 if (error) 2418 return (error); 2419 cvtnstat(&sb, &nsb); 2420 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2421 return (error); 2422 } 2423 2424 /* 2425 * NetBSD lstat. Get file status; this version does not follow links. 2426 */ 2427 #ifndef _SYS_SYSPROTO_H_ 2428 struct lstat_args { 2429 char *path; 2430 struct stat *ub; 2431 }; 2432 #endif 2433 int 2434 sys_nlstat(td, uap) 2435 struct thread *td; 2436 register struct nlstat_args /* { 2437 char *path; 2438 struct nstat *ub; 2439 } */ *uap; 2440 { 2441 struct stat sb; 2442 struct nstat nsb; 2443 int error; 2444 2445 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2446 if (error) 2447 return (error); 2448 cvtnstat(&sb, &nsb); 2449 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2450 return (error); 2451 } 2452 2453 /* 2454 * Get configurable pathname variables. 2455 */ 2456 #ifndef _SYS_SYSPROTO_H_ 2457 struct pathconf_args { 2458 char *path; 2459 int name; 2460 }; 2461 #endif 2462 int 2463 sys_pathconf(td, uap) 2464 struct thread *td; 2465 register struct pathconf_args /* { 2466 char *path; 2467 int name; 2468 } */ *uap; 2469 { 2470 2471 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2472 } 2473 2474 #ifndef _SYS_SYSPROTO_H_ 2475 struct lpathconf_args { 2476 char *path; 2477 int name; 2478 }; 2479 #endif 2480 int 2481 sys_lpathconf(td, uap) 2482 struct thread *td; 2483 register struct lpathconf_args /* { 2484 char *path; 2485 int name; 2486 } */ *uap; 2487 { 2488 2489 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW)); 2490 } 2491 2492 int 2493 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2494 u_long flags) 2495 { 2496 struct nameidata nd; 2497 int error; 2498 2499 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2500 pathseg, path, td); 2501 if ((error = namei(&nd)) != 0) 2502 return (error); 2503 NDFREE(&nd, NDF_ONLY_PNBUF); 2504 2505 /* If asynchronous I/O is available, it works for all files. */ 2506 if (name == _PC_ASYNC_IO) 2507 td->td_retval[0] = async_io_version; 2508 else 2509 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2510 vput(nd.ni_vp); 2511 return (error); 2512 } 2513 2514 /* 2515 * Return target name of a symbolic link. 2516 */ 2517 #ifndef _SYS_SYSPROTO_H_ 2518 struct readlink_args { 2519 char *path; 2520 char *buf; 2521 size_t count; 2522 }; 2523 #endif 2524 int 2525 sys_readlink(td, uap) 2526 struct thread *td; 2527 register struct readlink_args /* { 2528 char *path; 2529 char *buf; 2530 size_t count; 2531 } */ *uap; 2532 { 2533 2534 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2535 UIO_USERSPACE, uap->count)); 2536 } 2537 #ifndef _SYS_SYSPROTO_H_ 2538 struct readlinkat_args { 2539 int fd; 2540 char *path; 2541 char *buf; 2542 size_t bufsize; 2543 }; 2544 #endif 2545 int 2546 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2547 { 2548 2549 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2550 uap->buf, UIO_USERSPACE, uap->bufsize)); 2551 } 2552 2553 int 2554 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2555 enum uio_seg bufseg, size_t count) 2556 { 2557 2558 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2559 count)); 2560 } 2561 2562 int 2563 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2564 char *buf, enum uio_seg bufseg, size_t count) 2565 { 2566 struct vnode *vp; 2567 struct iovec aiov; 2568 struct uio auio; 2569 int error; 2570 struct nameidata nd; 2571 2572 if (count > IOSIZE_MAX) 2573 return (EINVAL); 2574 2575 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | 2576 AUDITVNODE1, pathseg, path, fd, td); 2577 2578 if ((error = namei(&nd)) != 0) 2579 return (error); 2580 NDFREE(&nd, NDF_ONLY_PNBUF); 2581 vp = nd.ni_vp; 2582 #ifdef MAC 2583 error = mac_vnode_check_readlink(td->td_ucred, vp); 2584 if (error) { 2585 vput(vp); 2586 return (error); 2587 } 2588 #endif 2589 if (vp->v_type != VLNK) 2590 error = EINVAL; 2591 else { 2592 aiov.iov_base = buf; 2593 aiov.iov_len = count; 2594 auio.uio_iov = &aiov; 2595 auio.uio_iovcnt = 1; 2596 auio.uio_offset = 0; 2597 auio.uio_rw = UIO_READ; 2598 auio.uio_segflg = bufseg; 2599 auio.uio_td = td; 2600 auio.uio_resid = count; 2601 error = VOP_READLINK(vp, &auio, td->td_ucred); 2602 } 2603 vput(vp); 2604 td->td_retval[0] = count - auio.uio_resid; 2605 return (error); 2606 } 2607 2608 /* 2609 * Common implementation code for chflags() and fchflags(). 2610 */ 2611 static int 2612 setfflags(td, vp, flags) 2613 struct thread *td; 2614 struct vnode *vp; 2615 int flags; 2616 { 2617 int error; 2618 struct mount *mp; 2619 struct vattr vattr; 2620 2621 /* We can't support the value matching VNOVAL. */ 2622 if (flags == VNOVAL) 2623 return (EOPNOTSUPP); 2624 2625 /* 2626 * Prevent non-root users from setting flags on devices. When 2627 * a device is reused, users can retain ownership of the device 2628 * if they are allowed to set flags and programs assume that 2629 * chown can't fail when done as root. 2630 */ 2631 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2632 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2633 if (error) 2634 return (error); 2635 } 2636 2637 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2638 return (error); 2639 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2640 VATTR_NULL(&vattr); 2641 vattr.va_flags = flags; 2642 #ifdef MAC 2643 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2644 if (error == 0) 2645 #endif 2646 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2647 VOP_UNLOCK(vp, 0); 2648 vn_finished_write(mp); 2649 return (error); 2650 } 2651 2652 /* 2653 * Change flags of a file given a path name. 2654 */ 2655 #ifndef _SYS_SYSPROTO_H_ 2656 struct chflags_args { 2657 char *path; 2658 int flags; 2659 }; 2660 #endif 2661 int 2662 sys_chflags(td, uap) 2663 struct thread *td; 2664 register struct chflags_args /* { 2665 char *path; 2666 int flags; 2667 } */ *uap; 2668 { 2669 int error; 2670 struct nameidata nd; 2671 2672 AUDIT_ARG_FFLAGS(uap->flags); 2673 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, UIO_USERSPACE, 2674 uap->path, td); 2675 if ((error = namei(&nd)) != 0) 2676 return (error); 2677 NDFREE(&nd, NDF_ONLY_PNBUF); 2678 error = setfflags(td, nd.ni_vp, uap->flags); 2679 vrele(nd.ni_vp); 2680 return (error); 2681 } 2682 2683 /* 2684 * Same as chflags() but doesn't follow symlinks. 2685 */ 2686 int 2687 sys_lchflags(td, uap) 2688 struct thread *td; 2689 register struct lchflags_args /* { 2690 char *path; 2691 int flags; 2692 } */ *uap; 2693 { 2694 int error; 2695 struct nameidata nd; 2696 2697 AUDIT_ARG_FFLAGS(uap->flags); 2698 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, UIO_USERSPACE, 2699 uap->path, td); 2700 if ((error = namei(&nd)) != 0) 2701 return (error); 2702 NDFREE(&nd, NDF_ONLY_PNBUF); 2703 error = setfflags(td, nd.ni_vp, uap->flags); 2704 vrele(nd.ni_vp); 2705 return (error); 2706 } 2707 2708 /* 2709 * Change flags of a file given a file descriptor. 2710 */ 2711 #ifndef _SYS_SYSPROTO_H_ 2712 struct fchflags_args { 2713 int fd; 2714 int flags; 2715 }; 2716 #endif 2717 int 2718 sys_fchflags(td, uap) 2719 struct thread *td; 2720 register struct fchflags_args /* { 2721 int fd; 2722 int flags; 2723 } */ *uap; 2724 { 2725 struct file *fp; 2726 int error; 2727 2728 AUDIT_ARG_FD(uap->fd); 2729 AUDIT_ARG_FFLAGS(uap->flags); 2730 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS, 2731 &fp)) != 0) 2732 return (error); 2733 #ifdef AUDIT 2734 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2735 AUDIT_ARG_VNODE1(fp->f_vnode); 2736 VOP_UNLOCK(fp->f_vnode, 0); 2737 #endif 2738 error = setfflags(td, fp->f_vnode, uap->flags); 2739 fdrop(fp, td); 2740 return (error); 2741 } 2742 2743 /* 2744 * Common implementation code for chmod(), lchmod() and fchmod(). 2745 */ 2746 int 2747 setfmode(td, cred, vp, mode) 2748 struct thread *td; 2749 struct ucred *cred; 2750 struct vnode *vp; 2751 int mode; 2752 { 2753 int error; 2754 struct mount *mp; 2755 struct vattr vattr; 2756 2757 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2758 return (error); 2759 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2760 VATTR_NULL(&vattr); 2761 vattr.va_mode = mode & ALLPERMS; 2762 #ifdef MAC 2763 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2764 if (error == 0) 2765 #endif 2766 error = VOP_SETATTR(vp, &vattr, cred); 2767 VOP_UNLOCK(vp, 0); 2768 vn_finished_write(mp); 2769 return (error); 2770 } 2771 2772 /* 2773 * Change mode of a file given path name. 2774 */ 2775 #ifndef _SYS_SYSPROTO_H_ 2776 struct chmod_args { 2777 char *path; 2778 int mode; 2779 }; 2780 #endif 2781 int 2782 sys_chmod(td, uap) 2783 struct thread *td; 2784 register struct chmod_args /* { 2785 char *path; 2786 int mode; 2787 } */ *uap; 2788 { 2789 2790 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2791 } 2792 2793 #ifndef _SYS_SYSPROTO_H_ 2794 struct fchmodat_args { 2795 int dirfd; 2796 char *path; 2797 mode_t mode; 2798 int flag; 2799 } 2800 #endif 2801 int 2802 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2803 { 2804 int flag = uap->flag; 2805 int fd = uap->fd; 2806 char *path = uap->path; 2807 mode_t mode = uap->mode; 2808 2809 if (flag & ~AT_SYMLINK_NOFOLLOW) 2810 return (EINVAL); 2811 2812 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2813 } 2814 2815 int 2816 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2817 { 2818 2819 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2820 } 2821 2822 /* 2823 * Change mode of a file given path name (don't follow links.) 2824 */ 2825 #ifndef _SYS_SYSPROTO_H_ 2826 struct lchmod_args { 2827 char *path; 2828 int mode; 2829 }; 2830 #endif 2831 int 2832 sys_lchmod(td, uap) 2833 struct thread *td; 2834 register struct lchmod_args /* { 2835 char *path; 2836 int mode; 2837 } */ *uap; 2838 { 2839 2840 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2841 uap->mode, AT_SYMLINK_NOFOLLOW)); 2842 } 2843 2844 2845 int 2846 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2847 mode_t mode, int flag) 2848 { 2849 int error; 2850 struct nameidata nd; 2851 int follow; 2852 2853 AUDIT_ARG_MODE(mode); 2854 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2855 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, 2856 path, fd, CAP_FCHMOD, td); 2857 if ((error = namei(&nd)) != 0) 2858 return (error); 2859 NDFREE(&nd, NDF_ONLY_PNBUF); 2860 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2861 vrele(nd.ni_vp); 2862 return (error); 2863 } 2864 2865 /* 2866 * Change mode of a file given a file descriptor. 2867 */ 2868 #ifndef _SYS_SYSPROTO_H_ 2869 struct fchmod_args { 2870 int fd; 2871 int mode; 2872 }; 2873 #endif 2874 int 2875 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2876 { 2877 struct file *fp; 2878 int error; 2879 2880 AUDIT_ARG_FD(uap->fd); 2881 AUDIT_ARG_MODE(uap->mode); 2882 2883 error = fget(td, uap->fd, CAP_FCHMOD, &fp); 2884 if (error != 0) 2885 return (error); 2886 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2887 fdrop(fp, td); 2888 return (error); 2889 } 2890 2891 /* 2892 * Common implementation for chown(), lchown(), and fchown() 2893 */ 2894 int 2895 setfown(td, cred, vp, uid, gid) 2896 struct thread *td; 2897 struct ucred *cred; 2898 struct vnode *vp; 2899 uid_t uid; 2900 gid_t gid; 2901 { 2902 int error; 2903 struct mount *mp; 2904 struct vattr vattr; 2905 2906 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2907 return (error); 2908 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2909 VATTR_NULL(&vattr); 2910 vattr.va_uid = uid; 2911 vattr.va_gid = gid; 2912 #ifdef MAC 2913 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2914 vattr.va_gid); 2915 if (error == 0) 2916 #endif 2917 error = VOP_SETATTR(vp, &vattr, cred); 2918 VOP_UNLOCK(vp, 0); 2919 vn_finished_write(mp); 2920 return (error); 2921 } 2922 2923 /* 2924 * Set ownership given a path name. 2925 */ 2926 #ifndef _SYS_SYSPROTO_H_ 2927 struct chown_args { 2928 char *path; 2929 int uid; 2930 int gid; 2931 }; 2932 #endif 2933 int 2934 sys_chown(td, uap) 2935 struct thread *td; 2936 register struct chown_args /* { 2937 char *path; 2938 int uid; 2939 int gid; 2940 } */ *uap; 2941 { 2942 2943 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2944 } 2945 2946 #ifndef _SYS_SYSPROTO_H_ 2947 struct fchownat_args { 2948 int fd; 2949 const char * path; 2950 uid_t uid; 2951 gid_t gid; 2952 int flag; 2953 }; 2954 #endif 2955 int 2956 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2957 { 2958 int flag; 2959 2960 flag = uap->flag; 2961 if (flag & ~AT_SYMLINK_NOFOLLOW) 2962 return (EINVAL); 2963 2964 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2965 uap->gid, uap->flag)); 2966 } 2967 2968 int 2969 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2970 int gid) 2971 { 2972 2973 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2974 } 2975 2976 int 2977 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2978 int uid, int gid, int flag) 2979 { 2980 struct nameidata nd; 2981 int error, follow; 2982 2983 AUDIT_ARG_OWNER(uid, gid); 2984 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2985 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, 2986 path, fd, CAP_FCHOWN, td); 2987 2988 if ((error = namei(&nd)) != 0) 2989 return (error); 2990 NDFREE(&nd, NDF_ONLY_PNBUF); 2991 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2992 vrele(nd.ni_vp); 2993 return (error); 2994 } 2995 2996 /* 2997 * Set ownership given a path name, do not cross symlinks. 2998 */ 2999 #ifndef _SYS_SYSPROTO_H_ 3000 struct lchown_args { 3001 char *path; 3002 int uid; 3003 int gid; 3004 }; 3005 #endif 3006 int 3007 sys_lchown(td, uap) 3008 struct thread *td; 3009 register struct lchown_args /* { 3010 char *path; 3011 int uid; 3012 int gid; 3013 } */ *uap; 3014 { 3015 3016 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3017 } 3018 3019 int 3020 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3021 int gid) 3022 { 3023 3024 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3025 AT_SYMLINK_NOFOLLOW)); 3026 } 3027 3028 /* 3029 * Set ownership given a file descriptor. 3030 */ 3031 #ifndef _SYS_SYSPROTO_H_ 3032 struct fchown_args { 3033 int fd; 3034 int uid; 3035 int gid; 3036 }; 3037 #endif 3038 int 3039 sys_fchown(td, uap) 3040 struct thread *td; 3041 register struct fchown_args /* { 3042 int fd; 3043 int uid; 3044 int gid; 3045 } */ *uap; 3046 { 3047 struct file *fp; 3048 int error; 3049 3050 AUDIT_ARG_FD(uap->fd); 3051 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3052 error = fget(td, uap->fd, CAP_FCHOWN, &fp); 3053 if (error != 0) 3054 return (error); 3055 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3056 fdrop(fp, td); 3057 return (error); 3058 } 3059 3060 /* 3061 * Common implementation code for utimes(), lutimes(), and futimes(). 3062 */ 3063 static int 3064 getutimes(usrtvp, tvpseg, tsp) 3065 const struct timeval *usrtvp; 3066 enum uio_seg tvpseg; 3067 struct timespec *tsp; 3068 { 3069 struct timeval tv[2]; 3070 const struct timeval *tvp; 3071 int error; 3072 3073 if (usrtvp == NULL) { 3074 vfs_timestamp(&tsp[0]); 3075 tsp[1] = tsp[0]; 3076 } else { 3077 if (tvpseg == UIO_SYSSPACE) { 3078 tvp = usrtvp; 3079 } else { 3080 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3081 return (error); 3082 tvp = tv; 3083 } 3084 3085 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3086 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3087 return (EINVAL); 3088 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3089 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3090 } 3091 return (0); 3092 } 3093 3094 /* 3095 * Common implementation code for utimes(), lutimes(), and futimes(). 3096 */ 3097 static int 3098 setutimes(td, vp, ts, numtimes, nullflag) 3099 struct thread *td; 3100 struct vnode *vp; 3101 const struct timespec *ts; 3102 int numtimes; 3103 int nullflag; 3104 { 3105 int error, setbirthtime; 3106 struct mount *mp; 3107 struct vattr vattr; 3108 3109 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3110 return (error); 3111 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3112 setbirthtime = 0; 3113 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3114 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3115 setbirthtime = 1; 3116 VATTR_NULL(&vattr); 3117 vattr.va_atime = ts[0]; 3118 vattr.va_mtime = ts[1]; 3119 if (setbirthtime) 3120 vattr.va_birthtime = ts[1]; 3121 if (numtimes > 2) 3122 vattr.va_birthtime = ts[2]; 3123 if (nullflag) 3124 vattr.va_vaflags |= VA_UTIMES_NULL; 3125 #ifdef MAC 3126 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3127 vattr.va_mtime); 3128 #endif 3129 if (error == 0) 3130 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3131 VOP_UNLOCK(vp, 0); 3132 vn_finished_write(mp); 3133 return (error); 3134 } 3135 3136 /* 3137 * Set the access and modification times of a file. 3138 */ 3139 #ifndef _SYS_SYSPROTO_H_ 3140 struct utimes_args { 3141 char *path; 3142 struct timeval *tptr; 3143 }; 3144 #endif 3145 int 3146 sys_utimes(td, uap) 3147 struct thread *td; 3148 register struct utimes_args /* { 3149 char *path; 3150 struct timeval *tptr; 3151 } */ *uap; 3152 { 3153 3154 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3155 UIO_USERSPACE)); 3156 } 3157 3158 #ifndef _SYS_SYSPROTO_H_ 3159 struct futimesat_args { 3160 int fd; 3161 const char * path; 3162 const struct timeval * times; 3163 }; 3164 #endif 3165 int 3166 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3167 { 3168 3169 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3170 uap->times, UIO_USERSPACE)); 3171 } 3172 3173 int 3174 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3175 struct timeval *tptr, enum uio_seg tptrseg) 3176 { 3177 3178 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3179 } 3180 3181 int 3182 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3183 struct timeval *tptr, enum uio_seg tptrseg) 3184 { 3185 struct nameidata nd; 3186 struct timespec ts[2]; 3187 int error; 3188 3189 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3190 return (error); 3191 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, 3192 path, fd, CAP_FUTIMES, td); 3193 3194 if ((error = namei(&nd)) != 0) 3195 return (error); 3196 NDFREE(&nd, NDF_ONLY_PNBUF); 3197 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3198 vrele(nd.ni_vp); 3199 return (error); 3200 } 3201 3202 /* 3203 * Set the access and modification times of a file. 3204 */ 3205 #ifndef _SYS_SYSPROTO_H_ 3206 struct lutimes_args { 3207 char *path; 3208 struct timeval *tptr; 3209 }; 3210 #endif 3211 int 3212 sys_lutimes(td, uap) 3213 struct thread *td; 3214 register struct lutimes_args /* { 3215 char *path; 3216 struct timeval *tptr; 3217 } */ *uap; 3218 { 3219 3220 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3221 UIO_USERSPACE)); 3222 } 3223 3224 int 3225 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3226 struct timeval *tptr, enum uio_seg tptrseg) 3227 { 3228 struct timespec ts[2]; 3229 int error; 3230 struct nameidata nd; 3231 3232 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3233 return (error); 3234 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3235 if ((error = namei(&nd)) != 0) 3236 return (error); 3237 NDFREE(&nd, NDF_ONLY_PNBUF); 3238 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3239 vrele(nd.ni_vp); 3240 return (error); 3241 } 3242 3243 /* 3244 * Set the access and modification times of a file. 3245 */ 3246 #ifndef _SYS_SYSPROTO_H_ 3247 struct futimes_args { 3248 int fd; 3249 struct timeval *tptr; 3250 }; 3251 #endif 3252 int 3253 sys_futimes(td, uap) 3254 struct thread *td; 3255 register struct futimes_args /* { 3256 int fd; 3257 struct timeval *tptr; 3258 } */ *uap; 3259 { 3260 3261 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3262 } 3263 3264 int 3265 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3266 enum uio_seg tptrseg) 3267 { 3268 struct timespec ts[2]; 3269 struct file *fp; 3270 int error; 3271 3272 AUDIT_ARG_FD(fd); 3273 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3274 return (error); 3275 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp)) 3276 != 0) 3277 return (error); 3278 #ifdef AUDIT 3279 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3280 AUDIT_ARG_VNODE1(fp->f_vnode); 3281 VOP_UNLOCK(fp->f_vnode, 0); 3282 #endif 3283 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3284 fdrop(fp, td); 3285 return (error); 3286 } 3287 3288 /* 3289 * Truncate a file given its path name. 3290 */ 3291 #ifndef _SYS_SYSPROTO_H_ 3292 struct truncate_args { 3293 char *path; 3294 int pad; 3295 off_t length; 3296 }; 3297 #endif 3298 int 3299 sys_truncate(td, uap) 3300 struct thread *td; 3301 register struct truncate_args /* { 3302 char *path; 3303 int pad; 3304 off_t length; 3305 } */ *uap; 3306 { 3307 3308 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3309 } 3310 3311 int 3312 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3313 { 3314 struct mount *mp; 3315 struct vnode *vp; 3316 void *rl_cookie; 3317 struct vattr vattr; 3318 struct nameidata nd; 3319 int error; 3320 3321 if (length < 0) 3322 return(EINVAL); 3323 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3324 if ((error = namei(&nd)) != 0) 3325 return (error); 3326 vp = nd.ni_vp; 3327 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3328 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3329 vn_rangelock_unlock(vp, rl_cookie); 3330 vrele(vp); 3331 return (error); 3332 } 3333 NDFREE(&nd, NDF_ONLY_PNBUF); 3334 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3335 if (vp->v_type == VDIR) 3336 error = EISDIR; 3337 #ifdef MAC 3338 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3339 } 3340 #endif 3341 else if ((error = vn_writechk(vp)) == 0 && 3342 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3343 VATTR_NULL(&vattr); 3344 vattr.va_size = length; 3345 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3346 } 3347 VOP_UNLOCK(vp, 0); 3348 vn_finished_write(mp); 3349 vn_rangelock_unlock(vp, rl_cookie); 3350 vrele(vp); 3351 return (error); 3352 } 3353 3354 #if defined(COMPAT_43) 3355 /* 3356 * Truncate a file given its path name. 3357 */ 3358 #ifndef _SYS_SYSPROTO_H_ 3359 struct otruncate_args { 3360 char *path; 3361 long length; 3362 }; 3363 #endif 3364 int 3365 otruncate(td, uap) 3366 struct thread *td; 3367 register struct otruncate_args /* { 3368 char *path; 3369 long length; 3370 } */ *uap; 3371 { 3372 struct truncate_args /* { 3373 char *path; 3374 int pad; 3375 off_t length; 3376 } */ nuap; 3377 3378 nuap.path = uap->path; 3379 nuap.length = uap->length; 3380 return (sys_truncate(td, &nuap)); 3381 } 3382 #endif /* COMPAT_43 */ 3383 3384 /* Versions with the pad argument */ 3385 int 3386 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3387 { 3388 struct truncate_args ouap; 3389 3390 ouap.path = uap->path; 3391 ouap.length = uap->length; 3392 return (sys_truncate(td, &ouap)); 3393 } 3394 3395 int 3396 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3397 { 3398 struct ftruncate_args ouap; 3399 3400 ouap.fd = uap->fd; 3401 ouap.length = uap->length; 3402 return (sys_ftruncate(td, &ouap)); 3403 } 3404 3405 /* 3406 * Sync an open file. 3407 */ 3408 #ifndef _SYS_SYSPROTO_H_ 3409 struct fsync_args { 3410 int fd; 3411 }; 3412 #endif 3413 int 3414 sys_fsync(td, uap) 3415 struct thread *td; 3416 struct fsync_args /* { 3417 int fd; 3418 } */ *uap; 3419 { 3420 struct vnode *vp; 3421 struct mount *mp; 3422 struct file *fp; 3423 int error, lock_flags; 3424 3425 AUDIT_ARG_FD(uap->fd); 3426 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC, 3427 &fp)) != 0) 3428 return (error); 3429 vp = fp->f_vnode; 3430 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3431 goto drop; 3432 if (MNT_SHARED_WRITES(mp) || 3433 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3434 lock_flags = LK_SHARED; 3435 } else { 3436 lock_flags = LK_EXCLUSIVE; 3437 } 3438 vn_lock(vp, lock_flags | LK_RETRY); 3439 AUDIT_ARG_VNODE1(vp); 3440 if (vp->v_object != NULL) { 3441 VM_OBJECT_LOCK(vp->v_object); 3442 vm_object_page_clean(vp->v_object, 0, 0, 0); 3443 VM_OBJECT_UNLOCK(vp->v_object); 3444 } 3445 error = VOP_FSYNC(vp, MNT_WAIT, td); 3446 3447 VOP_UNLOCK(vp, 0); 3448 vn_finished_write(mp); 3449 drop: 3450 fdrop(fp, td); 3451 return (error); 3452 } 3453 3454 /* 3455 * Rename files. Source and destination must either both be directories, or 3456 * both not be directories. If target is a directory, it must be empty. 3457 */ 3458 #ifndef _SYS_SYSPROTO_H_ 3459 struct rename_args { 3460 char *from; 3461 char *to; 3462 }; 3463 #endif 3464 int 3465 sys_rename(td, uap) 3466 struct thread *td; 3467 register struct rename_args /* { 3468 char *from; 3469 char *to; 3470 } */ *uap; 3471 { 3472 3473 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3474 } 3475 3476 #ifndef _SYS_SYSPROTO_H_ 3477 struct renameat_args { 3478 int oldfd; 3479 char *old; 3480 int newfd; 3481 char *new; 3482 }; 3483 #endif 3484 int 3485 sys_renameat(struct thread *td, struct renameat_args *uap) 3486 { 3487 3488 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3489 UIO_USERSPACE)); 3490 } 3491 3492 int 3493 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3494 { 3495 3496 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3497 } 3498 3499 int 3500 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3501 enum uio_seg pathseg) 3502 { 3503 struct mount *mp = NULL; 3504 struct vnode *tvp, *fvp, *tdvp; 3505 struct nameidata fromnd, tond; 3506 int error; 3507 3508 bwillwrite(); 3509 #ifdef MAC 3510 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3511 AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); 3512 #else 3513 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | 3514 AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); 3515 #endif 3516 3517 if ((error = namei(&fromnd)) != 0) 3518 return (error); 3519 #ifdef MAC 3520 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3521 fromnd.ni_vp, &fromnd.ni_cnd); 3522 VOP_UNLOCK(fromnd.ni_dvp, 0); 3523 if (fromnd.ni_dvp != fromnd.ni_vp) 3524 VOP_UNLOCK(fromnd.ni_vp, 0); 3525 #endif 3526 fvp = fromnd.ni_vp; 3527 if (error == 0) 3528 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); 3529 if (error != 0) { 3530 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3531 vrele(fromnd.ni_dvp); 3532 vrele(fvp); 3533 goto out1; 3534 } 3535 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3536 SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE, 3537 td); 3538 if (fromnd.ni_vp->v_type == VDIR) 3539 tond.ni_cnd.cn_flags |= WILLBEDIR; 3540 if ((error = namei(&tond)) != 0) { 3541 /* Translate error code for rename("dir1", "dir2/."). */ 3542 if (error == EISDIR && fvp->v_type == VDIR) 3543 error = EINVAL; 3544 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3545 vrele(fromnd.ni_dvp); 3546 vrele(fvp); 3547 vn_finished_write(mp); 3548 goto out1; 3549 } 3550 tdvp = tond.ni_dvp; 3551 tvp = tond.ni_vp; 3552 if (tvp != NULL) { 3553 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3554 error = ENOTDIR; 3555 goto out; 3556 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3557 error = EISDIR; 3558 goto out; 3559 } 3560 } 3561 if (fvp == tdvp) { 3562 error = EINVAL; 3563 goto out; 3564 } 3565 /* 3566 * If the source is the same as the destination (that is, if they 3567 * are links to the same vnode), then there is nothing to do. 3568 */ 3569 if (fvp == tvp) 3570 error = -1; 3571 #ifdef MAC 3572 else 3573 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3574 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3575 #endif 3576 out: 3577 if (!error) { 3578 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3579 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3580 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3581 NDFREE(&tond, NDF_ONLY_PNBUF); 3582 } else { 3583 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3584 NDFREE(&tond, NDF_ONLY_PNBUF); 3585 if (tvp) 3586 vput(tvp); 3587 if (tdvp == tvp) 3588 vrele(tdvp); 3589 else 3590 vput(tdvp); 3591 vrele(fromnd.ni_dvp); 3592 vrele(fvp); 3593 } 3594 vrele(tond.ni_startdir); 3595 vn_finished_write(mp); 3596 out1: 3597 if (fromnd.ni_startdir) 3598 vrele(fromnd.ni_startdir); 3599 if (error == -1) 3600 return (0); 3601 return (error); 3602 } 3603 3604 /* 3605 * Make a directory file. 3606 */ 3607 #ifndef _SYS_SYSPROTO_H_ 3608 struct mkdir_args { 3609 char *path; 3610 int mode; 3611 }; 3612 #endif 3613 int 3614 sys_mkdir(td, uap) 3615 struct thread *td; 3616 register struct mkdir_args /* { 3617 char *path; 3618 int mode; 3619 } */ *uap; 3620 { 3621 3622 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3623 } 3624 3625 #ifndef _SYS_SYSPROTO_H_ 3626 struct mkdirat_args { 3627 int fd; 3628 char *path; 3629 mode_t mode; 3630 }; 3631 #endif 3632 int 3633 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3634 { 3635 3636 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3637 } 3638 3639 int 3640 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3641 { 3642 3643 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3644 } 3645 3646 int 3647 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3648 int mode) 3649 { 3650 struct mount *mp; 3651 struct vnode *vp; 3652 struct vattr vattr; 3653 int error; 3654 struct nameidata nd; 3655 3656 AUDIT_ARG_MODE(mode); 3657 restart: 3658 bwillwrite(); 3659 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | 3660 AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td); 3661 nd.ni_cnd.cn_flags |= WILLBEDIR; 3662 if ((error = namei(&nd)) != 0) 3663 return (error); 3664 vp = nd.ni_vp; 3665 if (vp != NULL) { 3666 NDFREE(&nd, NDF_ONLY_PNBUF); 3667 /* 3668 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3669 * the strange behaviour of leaving the vnode unlocked 3670 * if the target is the same vnode as the parent. 3671 */ 3672 if (vp == nd.ni_dvp) 3673 vrele(nd.ni_dvp); 3674 else 3675 vput(nd.ni_dvp); 3676 vrele(vp); 3677 return (EEXIST); 3678 } 3679 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3680 NDFREE(&nd, NDF_ONLY_PNBUF); 3681 vput(nd.ni_dvp); 3682 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3683 return (error); 3684 goto restart; 3685 } 3686 VATTR_NULL(&vattr); 3687 vattr.va_type = VDIR; 3688 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3689 #ifdef MAC 3690 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3691 &vattr); 3692 if (error) 3693 goto out; 3694 #endif 3695 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3696 #ifdef MAC 3697 out: 3698 #endif 3699 NDFREE(&nd, NDF_ONLY_PNBUF); 3700 vput(nd.ni_dvp); 3701 if (!error) 3702 vput(nd.ni_vp); 3703 vn_finished_write(mp); 3704 return (error); 3705 } 3706 3707 /* 3708 * Remove a directory file. 3709 */ 3710 #ifndef _SYS_SYSPROTO_H_ 3711 struct rmdir_args { 3712 char *path; 3713 }; 3714 #endif 3715 int 3716 sys_rmdir(td, uap) 3717 struct thread *td; 3718 struct rmdir_args /* { 3719 char *path; 3720 } */ *uap; 3721 { 3722 3723 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3724 } 3725 3726 int 3727 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3728 { 3729 3730 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3731 } 3732 3733 int 3734 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3735 { 3736 struct mount *mp; 3737 struct vnode *vp; 3738 int error; 3739 struct nameidata nd; 3740 3741 restart: 3742 bwillwrite(); 3743 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | 3744 AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td); 3745 if ((error = namei(&nd)) != 0) 3746 return (error); 3747 vp = nd.ni_vp; 3748 if (vp->v_type != VDIR) { 3749 error = ENOTDIR; 3750 goto out; 3751 } 3752 /* 3753 * No rmdir "." please. 3754 */ 3755 if (nd.ni_dvp == vp) { 3756 error = EINVAL; 3757 goto out; 3758 } 3759 /* 3760 * The root of a mounted filesystem cannot be deleted. 3761 */ 3762 if (vp->v_vflag & VV_ROOT) { 3763 error = EBUSY; 3764 goto out; 3765 } 3766 #ifdef MAC 3767 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3768 &nd.ni_cnd); 3769 if (error) 3770 goto out; 3771 #endif 3772 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3773 NDFREE(&nd, NDF_ONLY_PNBUF); 3774 vput(vp); 3775 if (nd.ni_dvp == vp) 3776 vrele(nd.ni_dvp); 3777 else 3778 vput(nd.ni_dvp); 3779 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3780 return (error); 3781 goto restart; 3782 } 3783 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3784 vn_finished_write(mp); 3785 out: 3786 NDFREE(&nd, NDF_ONLY_PNBUF); 3787 vput(vp); 3788 if (nd.ni_dvp == vp) 3789 vrele(nd.ni_dvp); 3790 else 3791 vput(nd.ni_dvp); 3792 return (error); 3793 } 3794 3795 #ifdef COMPAT_43 3796 /* 3797 * Read a block of directory entries in a filesystem independent format. 3798 */ 3799 #ifndef _SYS_SYSPROTO_H_ 3800 struct ogetdirentries_args { 3801 int fd; 3802 char *buf; 3803 u_int count; 3804 long *basep; 3805 }; 3806 #endif 3807 int 3808 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3809 { 3810 long loff; 3811 int error; 3812 3813 error = kern_ogetdirentries(td, uap, &loff); 3814 if (error == 0) 3815 error = copyout(&loff, uap->basep, sizeof(long)); 3816 return (error); 3817 } 3818 3819 int 3820 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3821 long *ploff) 3822 { 3823 struct vnode *vp; 3824 struct file *fp; 3825 struct uio auio, kuio; 3826 struct iovec aiov, kiov; 3827 struct dirent *dp, *edp; 3828 caddr_t dirbuf; 3829 int error, eofflag, readcnt; 3830 long loff; 3831 off_t foffset; 3832 3833 /* XXX arbitrary sanity limit on `count'. */ 3834 if (uap->count > 64 * 1024) 3835 return (EINVAL); 3836 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, 3837 &fp)) != 0) 3838 return (error); 3839 if ((fp->f_flag & FREAD) == 0) { 3840 fdrop(fp, td); 3841 return (EBADF); 3842 } 3843 vp = fp->f_vnode; 3844 foffset = foffset_lock(fp, 0); 3845 unionread: 3846 if (vp->v_type != VDIR) { 3847 foffset_unlock(fp, foffset, 0); 3848 fdrop(fp, td); 3849 return (EINVAL); 3850 } 3851 aiov.iov_base = uap->buf; 3852 aiov.iov_len = uap->count; 3853 auio.uio_iov = &aiov; 3854 auio.uio_iovcnt = 1; 3855 auio.uio_rw = UIO_READ; 3856 auio.uio_segflg = UIO_USERSPACE; 3857 auio.uio_td = td; 3858 auio.uio_resid = uap->count; 3859 vn_lock(vp, LK_SHARED | LK_RETRY); 3860 loff = auio.uio_offset = foffset; 3861 #ifdef MAC 3862 error = mac_vnode_check_readdir(td->td_ucred, vp); 3863 if (error) { 3864 VOP_UNLOCK(vp, 0); 3865 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3866 fdrop(fp, td); 3867 return (error); 3868 } 3869 #endif 3870 # if (BYTE_ORDER != LITTLE_ENDIAN) 3871 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3872 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3873 NULL, NULL); 3874 foffset = auio.uio_offset; 3875 } else 3876 # endif 3877 { 3878 kuio = auio; 3879 kuio.uio_iov = &kiov; 3880 kuio.uio_segflg = UIO_SYSSPACE; 3881 kiov.iov_len = uap->count; 3882 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3883 kiov.iov_base = dirbuf; 3884 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3885 NULL, NULL); 3886 foffset = kuio.uio_offset; 3887 if (error == 0) { 3888 readcnt = uap->count - kuio.uio_resid; 3889 edp = (struct dirent *)&dirbuf[readcnt]; 3890 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3891 # if (BYTE_ORDER == LITTLE_ENDIAN) 3892 /* 3893 * The expected low byte of 3894 * dp->d_namlen is our dp->d_type. 3895 * The high MBZ byte of dp->d_namlen 3896 * is our dp->d_namlen. 3897 */ 3898 dp->d_type = dp->d_namlen; 3899 dp->d_namlen = 0; 3900 # else 3901 /* 3902 * The dp->d_type is the high byte 3903 * of the expected dp->d_namlen, 3904 * so must be zero'ed. 3905 */ 3906 dp->d_type = 0; 3907 # endif 3908 if (dp->d_reclen > 0) { 3909 dp = (struct dirent *) 3910 ((char *)dp + dp->d_reclen); 3911 } else { 3912 error = EIO; 3913 break; 3914 } 3915 } 3916 if (dp >= edp) 3917 error = uiomove(dirbuf, readcnt, &auio); 3918 } 3919 free(dirbuf, M_TEMP); 3920 } 3921 if (error) { 3922 VOP_UNLOCK(vp, 0); 3923 foffset_unlock(fp, foffset, 0); 3924 fdrop(fp, td); 3925 return (error); 3926 } 3927 if (uap->count == auio.uio_resid && 3928 (vp->v_vflag & VV_ROOT) && 3929 (vp->v_mount->mnt_flag & MNT_UNION)) { 3930 struct vnode *tvp = vp; 3931 vp = vp->v_mount->mnt_vnodecovered; 3932 VREF(vp); 3933 fp->f_vnode = vp; 3934 fp->f_data = vp; 3935 foffset = 0; 3936 vput(tvp); 3937 goto unionread; 3938 } 3939 VOP_UNLOCK(vp, 0); 3940 foffset_unlock(fp, foffset, 0); 3941 fdrop(fp, td); 3942 td->td_retval[0] = uap->count - auio.uio_resid; 3943 if (error == 0) 3944 *ploff = loff; 3945 return (error); 3946 } 3947 #endif /* COMPAT_43 */ 3948 3949 /* 3950 * Read a block of directory entries in a filesystem independent format. 3951 */ 3952 #ifndef _SYS_SYSPROTO_H_ 3953 struct getdirentries_args { 3954 int fd; 3955 char *buf; 3956 u_int count; 3957 long *basep; 3958 }; 3959 #endif 3960 int 3961 sys_getdirentries(td, uap) 3962 struct thread *td; 3963 register struct getdirentries_args /* { 3964 int fd; 3965 char *buf; 3966 u_int count; 3967 long *basep; 3968 } */ *uap; 3969 { 3970 long base; 3971 int error; 3972 3973 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3974 NULL, UIO_USERSPACE); 3975 if (error) 3976 return (error); 3977 if (uap->basep != NULL) 3978 error = copyout(&base, uap->basep, sizeof(long)); 3979 return (error); 3980 } 3981 3982 int 3983 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3984 long *basep, ssize_t *residp, enum uio_seg bufseg) 3985 { 3986 struct vnode *vp; 3987 struct file *fp; 3988 struct uio auio; 3989 struct iovec aiov; 3990 long loff; 3991 int error, eofflag; 3992 off_t foffset; 3993 3994 AUDIT_ARG_FD(fd); 3995 if (count > IOSIZE_MAX) 3996 return (EINVAL); 3997 auio.uio_resid = count; 3998 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK, 3999 &fp)) != 0) 4000 return (error); 4001 if ((fp->f_flag & FREAD) == 0) { 4002 fdrop(fp, td); 4003 return (EBADF); 4004 } 4005 vp = fp->f_vnode; 4006 foffset = foffset_lock(fp, 0); 4007 unionread: 4008 if (vp->v_type != VDIR) { 4009 error = EINVAL; 4010 goto fail; 4011 } 4012 aiov.iov_base = buf; 4013 aiov.iov_len = count; 4014 auio.uio_iov = &aiov; 4015 auio.uio_iovcnt = 1; 4016 auio.uio_rw = UIO_READ; 4017 auio.uio_segflg = bufseg; 4018 auio.uio_td = td; 4019 vn_lock(vp, LK_SHARED | LK_RETRY); 4020 AUDIT_ARG_VNODE1(vp); 4021 loff = auio.uio_offset = foffset; 4022 #ifdef MAC 4023 error = mac_vnode_check_readdir(td->td_ucred, vp); 4024 if (error == 0) 4025 #endif 4026 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4027 NULL); 4028 foffset = auio.uio_offset; 4029 if (error) { 4030 VOP_UNLOCK(vp, 0); 4031 goto fail; 4032 } 4033 if (count == auio.uio_resid && 4034 (vp->v_vflag & VV_ROOT) && 4035 (vp->v_mount->mnt_flag & MNT_UNION)) { 4036 struct vnode *tvp = vp; 4037 vp = vp->v_mount->mnt_vnodecovered; 4038 VREF(vp); 4039 fp->f_vnode = vp; 4040 fp->f_data = vp; 4041 foffset = 0; 4042 vput(tvp); 4043 goto unionread; 4044 } 4045 VOP_UNLOCK(vp, 0); 4046 *basep = loff; 4047 if (residp != NULL) 4048 *residp = auio.uio_resid; 4049 td->td_retval[0] = count - auio.uio_resid; 4050 fail: 4051 foffset_unlock(fp, foffset, 0); 4052 fdrop(fp, td); 4053 return (error); 4054 } 4055 4056 #ifndef _SYS_SYSPROTO_H_ 4057 struct getdents_args { 4058 int fd; 4059 char *buf; 4060 size_t count; 4061 }; 4062 #endif 4063 int 4064 sys_getdents(td, uap) 4065 struct thread *td; 4066 register struct getdents_args /* { 4067 int fd; 4068 char *buf; 4069 u_int count; 4070 } */ *uap; 4071 { 4072 struct getdirentries_args ap; 4073 ap.fd = uap->fd; 4074 ap.buf = uap->buf; 4075 ap.count = uap->count; 4076 ap.basep = NULL; 4077 return (sys_getdirentries(td, &ap)); 4078 } 4079 4080 /* 4081 * Set the mode mask for creation of filesystem nodes. 4082 */ 4083 #ifndef _SYS_SYSPROTO_H_ 4084 struct umask_args { 4085 int newmask; 4086 }; 4087 #endif 4088 int 4089 sys_umask(td, uap) 4090 struct thread *td; 4091 struct umask_args /* { 4092 int newmask; 4093 } */ *uap; 4094 { 4095 register struct filedesc *fdp; 4096 4097 FILEDESC_XLOCK(td->td_proc->p_fd); 4098 fdp = td->td_proc->p_fd; 4099 td->td_retval[0] = fdp->fd_cmask; 4100 fdp->fd_cmask = uap->newmask & ALLPERMS; 4101 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4102 return (0); 4103 } 4104 4105 /* 4106 * Void all references to file by ripping underlying filesystem away from 4107 * vnode. 4108 */ 4109 #ifndef _SYS_SYSPROTO_H_ 4110 struct revoke_args { 4111 char *path; 4112 }; 4113 #endif 4114 int 4115 sys_revoke(td, uap) 4116 struct thread *td; 4117 register struct revoke_args /* { 4118 char *path; 4119 } */ *uap; 4120 { 4121 struct vnode *vp; 4122 struct vattr vattr; 4123 int error; 4124 struct nameidata nd; 4125 4126 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, 4127 UIO_USERSPACE, uap->path, td); 4128 if ((error = namei(&nd)) != 0) 4129 return (error); 4130 vp = nd.ni_vp; 4131 NDFREE(&nd, NDF_ONLY_PNBUF); 4132 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4133 error = EINVAL; 4134 goto out; 4135 } 4136 #ifdef MAC 4137 error = mac_vnode_check_revoke(td->td_ucred, vp); 4138 if (error) 4139 goto out; 4140 #endif 4141 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4142 if (error) 4143 goto out; 4144 if (td->td_ucred->cr_uid != vattr.va_uid) { 4145 error = priv_check(td, PRIV_VFS_ADMIN); 4146 if (error) 4147 goto out; 4148 } 4149 if (vcount(vp) > 1) 4150 VOP_REVOKE(vp, REVOKEALL); 4151 out: 4152 vput(vp); 4153 return (error); 4154 } 4155 4156 /* 4157 * Convert a user file descriptor to a kernel file entry and check that, if it 4158 * is a capability, the correct rights are present. A reference on the file 4159 * entry is held upon returning. 4160 */ 4161 int 4162 getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, 4163 struct file **fpp) 4164 { 4165 struct file *fp; 4166 #ifdef CAPABILITIES 4167 struct file *fp_fromcap; 4168 int error; 4169 #endif 4170 4171 if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) 4172 return (EBADF); 4173 #ifdef CAPABILITIES 4174 /* 4175 * If the file descriptor is for a capability, test rights and use the 4176 * file descriptor referenced by the capability. 4177 */ 4178 error = cap_funwrap(fp, rights, &fp_fromcap); 4179 if (error) { 4180 fdrop(fp, curthread); 4181 return (error); 4182 } 4183 if (fp != fp_fromcap) { 4184 fhold(fp_fromcap); 4185 fdrop(fp, curthread); 4186 fp = fp_fromcap; 4187 } 4188 #endif /* CAPABILITIES */ 4189 4190 /* 4191 * The file could be not of the vnode type, or it may be not 4192 * yet fully initialized, in which case the f_vnode pointer 4193 * may be set, but f_ops is still badfileops. E.g., 4194 * devfs_open() transiently create such situation to 4195 * facilitate csw d_fdopen(). 4196 * 4197 * Dupfdopen() handling in kern_openat() installs the 4198 * half-baked file into the process descriptor table, allowing 4199 * other thread to dereference it. Guard against the race by 4200 * checking f_ops. 4201 */ 4202 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4203 fdrop(fp, curthread); 4204 return (EINVAL); 4205 } 4206 *fpp = fp; 4207 return (0); 4208 } 4209 4210 4211 /* 4212 * Get an (NFS) file handle. 4213 */ 4214 #ifndef _SYS_SYSPROTO_H_ 4215 struct lgetfh_args { 4216 char *fname; 4217 fhandle_t *fhp; 4218 }; 4219 #endif 4220 int 4221 sys_lgetfh(td, uap) 4222 struct thread *td; 4223 register struct lgetfh_args *uap; 4224 { 4225 struct nameidata nd; 4226 fhandle_t fh; 4227 register struct vnode *vp; 4228 int error; 4229 4230 error = priv_check(td, PRIV_VFS_GETFH); 4231 if (error) 4232 return (error); 4233 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, 4234 UIO_USERSPACE, uap->fname, td); 4235 error = namei(&nd); 4236 if (error) 4237 return (error); 4238 NDFREE(&nd, NDF_ONLY_PNBUF); 4239 vp = nd.ni_vp; 4240 bzero(&fh, sizeof(fh)); 4241 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4242 error = VOP_VPTOFH(vp, &fh.fh_fid); 4243 vput(vp); 4244 if (error) 4245 return (error); 4246 error = copyout(&fh, uap->fhp, sizeof (fh)); 4247 return (error); 4248 } 4249 4250 #ifndef _SYS_SYSPROTO_H_ 4251 struct getfh_args { 4252 char *fname; 4253 fhandle_t *fhp; 4254 }; 4255 #endif 4256 int 4257 sys_getfh(td, uap) 4258 struct thread *td; 4259 register struct getfh_args *uap; 4260 { 4261 struct nameidata nd; 4262 fhandle_t fh; 4263 register struct vnode *vp; 4264 int error; 4265 4266 error = priv_check(td, PRIV_VFS_GETFH); 4267 if (error) 4268 return (error); 4269 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, 4270 UIO_USERSPACE, uap->fname, td); 4271 error = namei(&nd); 4272 if (error) 4273 return (error); 4274 NDFREE(&nd, NDF_ONLY_PNBUF); 4275 vp = nd.ni_vp; 4276 bzero(&fh, sizeof(fh)); 4277 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4278 error = VOP_VPTOFH(vp, &fh.fh_fid); 4279 vput(vp); 4280 if (error) 4281 return (error); 4282 error = copyout(&fh, uap->fhp, sizeof (fh)); 4283 return (error); 4284 } 4285 4286 /* 4287 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4288 * open descriptor. 4289 * 4290 * warning: do not remove the priv_check() call or this becomes one giant 4291 * security hole. 4292 */ 4293 #ifndef _SYS_SYSPROTO_H_ 4294 struct fhopen_args { 4295 const struct fhandle *u_fhp; 4296 int flags; 4297 }; 4298 #endif 4299 int 4300 sys_fhopen(td, uap) 4301 struct thread *td; 4302 struct fhopen_args /* { 4303 const struct fhandle *u_fhp; 4304 int flags; 4305 } */ *uap; 4306 { 4307 struct mount *mp; 4308 struct vnode *vp; 4309 struct fhandle fhp; 4310 struct file *fp; 4311 int fmode, error; 4312 int indx; 4313 4314 error = priv_check(td, PRIV_VFS_FHOPEN); 4315 if (error) 4316 return (error); 4317 indx = -1; 4318 fmode = FFLAGS(uap->flags); 4319 /* why not allow a non-read/write open for our lockd? */ 4320 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4321 return (EINVAL); 4322 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4323 if (error) 4324 return(error); 4325 /* find the mount point */ 4326 mp = vfs_busyfs(&fhp.fh_fsid); 4327 if (mp == NULL) 4328 return (ESTALE); 4329 /* now give me my vnode, it gets returned to me locked */ 4330 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4331 vfs_unbusy(mp); 4332 if (error) 4333 return (error); 4334 4335 error = falloc_noinstall(td, &fp); 4336 if (error) { 4337 vput(vp); 4338 return (error); 4339 } 4340 /* 4341 * An extra reference on `fp' has been held for us by 4342 * falloc_noinstall(). 4343 */ 4344 4345 #ifdef INVARIANTS 4346 td->td_dupfd = -1; 4347 #endif 4348 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4349 if (error) { 4350 KASSERT(fp->f_ops == &badfileops, 4351 ("VOP_OPEN in fhopen() set f_ops")); 4352 KASSERT(td->td_dupfd < 0, 4353 ("fhopen() encountered fdopen()")); 4354 4355 vput(vp); 4356 goto bad; 4357 } 4358 #ifdef INVARIANTS 4359 td->td_dupfd = 0; 4360 #endif 4361 fp->f_vnode = vp; 4362 fp->f_seqcount = 1; 4363 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4364 &vnops); 4365 VOP_UNLOCK(vp, 0); 4366 if (fmode & O_TRUNC) { 4367 error = fo_truncate(fp, 0, td->td_ucred, td); 4368 if (error) 4369 goto bad; 4370 } 4371 4372 error = finstall(td, fp, &indx, fmode); 4373 bad: 4374 fdrop(fp, td); 4375 td->td_retval[0] = indx; 4376 return (error); 4377 } 4378 4379 /* 4380 * Stat an (NFS) file handle. 4381 */ 4382 #ifndef _SYS_SYSPROTO_H_ 4383 struct fhstat_args { 4384 struct fhandle *u_fhp; 4385 struct stat *sb; 4386 }; 4387 #endif 4388 int 4389 sys_fhstat(td, uap) 4390 struct thread *td; 4391 register struct fhstat_args /* { 4392 struct fhandle *u_fhp; 4393 struct stat *sb; 4394 } */ *uap; 4395 { 4396 struct stat sb; 4397 struct fhandle fh; 4398 int error; 4399 4400 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4401 if (error != 0) 4402 return (error); 4403 error = kern_fhstat(td, fh, &sb); 4404 if (error != 0) 4405 return (error); 4406 error = copyout(&sb, uap->sb, sizeof(sb)); 4407 return (error); 4408 } 4409 4410 int 4411 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4412 { 4413 struct mount *mp; 4414 struct vnode *vp; 4415 int error; 4416 4417 error = priv_check(td, PRIV_VFS_FHSTAT); 4418 if (error) 4419 return (error); 4420 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4421 return (ESTALE); 4422 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4423 vfs_unbusy(mp); 4424 if (error) 4425 return (error); 4426 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4427 vput(vp); 4428 return (error); 4429 } 4430 4431 /* 4432 * Implement fstatfs() for (NFS) file handles. 4433 */ 4434 #ifndef _SYS_SYSPROTO_H_ 4435 struct fhstatfs_args { 4436 struct fhandle *u_fhp; 4437 struct statfs *buf; 4438 }; 4439 #endif 4440 int 4441 sys_fhstatfs(td, uap) 4442 struct thread *td; 4443 struct fhstatfs_args /* { 4444 struct fhandle *u_fhp; 4445 struct statfs *buf; 4446 } */ *uap; 4447 { 4448 struct statfs sf; 4449 fhandle_t fh; 4450 int error; 4451 4452 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4453 if (error) 4454 return (error); 4455 error = kern_fhstatfs(td, fh, &sf); 4456 if (error) 4457 return (error); 4458 return (copyout(&sf, uap->buf, sizeof(sf))); 4459 } 4460 4461 int 4462 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4463 { 4464 struct statfs *sp; 4465 struct mount *mp; 4466 struct vnode *vp; 4467 int error; 4468 4469 error = priv_check(td, PRIV_VFS_FHSTATFS); 4470 if (error) 4471 return (error); 4472 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4473 return (ESTALE); 4474 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4475 if (error) { 4476 vfs_unbusy(mp); 4477 return (error); 4478 } 4479 vput(vp); 4480 error = prison_canseemount(td->td_ucred, mp); 4481 if (error) 4482 goto out; 4483 #ifdef MAC 4484 error = mac_mount_check_stat(td->td_ucred, mp); 4485 if (error) 4486 goto out; 4487 #endif 4488 /* 4489 * Set these in case the underlying filesystem fails to do so. 4490 */ 4491 sp = &mp->mnt_stat; 4492 sp->f_version = STATFS_VERSION; 4493 sp->f_namemax = NAME_MAX; 4494 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4495 error = VFS_STATFS(mp, sp); 4496 if (error == 0) 4497 *buf = *sp; 4498 out: 4499 vfs_unbusy(mp); 4500 return (error); 4501 } 4502 4503 int 4504 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4505 { 4506 struct file *fp; 4507 struct mount *mp; 4508 struct vnode *vp; 4509 off_t olen, ooffset; 4510 int error; 4511 4512 fp = NULL; 4513 error = fget(td, fd, CAP_WRITE, &fp); 4514 if (error != 0) 4515 goto out; 4516 4517 switch (fp->f_type) { 4518 case DTYPE_VNODE: 4519 break; 4520 case DTYPE_PIPE: 4521 case DTYPE_FIFO: 4522 error = ESPIPE; 4523 goto out; 4524 default: 4525 error = ENODEV; 4526 goto out; 4527 } 4528 if ((fp->f_flag & FWRITE) == 0) { 4529 error = EBADF; 4530 goto out; 4531 } 4532 vp = fp->f_vnode; 4533 if (vp->v_type != VREG) { 4534 error = ENODEV; 4535 goto out; 4536 } 4537 if (offset < 0 || len <= 0) { 4538 error = EINVAL; 4539 goto out; 4540 } 4541 /* Check for wrap. */ 4542 if (offset > OFF_MAX - len) { 4543 error = EFBIG; 4544 goto out; 4545 } 4546 4547 /* Allocating blocks may take a long time, so iterate. */ 4548 for (;;) { 4549 olen = len; 4550 ooffset = offset; 4551 4552 bwillwrite(); 4553 mp = NULL; 4554 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4555 if (error != 0) 4556 break; 4557 error = vn_lock(vp, LK_EXCLUSIVE); 4558 if (error != 0) { 4559 vn_finished_write(mp); 4560 break; 4561 } 4562 #ifdef MAC 4563 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4564 if (error == 0) 4565 #endif 4566 error = VOP_ALLOCATE(vp, &offset, &len); 4567 VOP_UNLOCK(vp, 0); 4568 vn_finished_write(mp); 4569 4570 if (olen + ooffset != offset + len) { 4571 panic("offset + len changed from %jx/%jx to %jx/%jx", 4572 ooffset, olen, offset, len); 4573 } 4574 if (error != 0 || len == 0) 4575 break; 4576 KASSERT(olen > len, ("Iteration did not make progress?")); 4577 maybe_yield(); 4578 } 4579 out: 4580 if (fp != NULL) 4581 fdrop(fp, td); 4582 return (error); 4583 } 4584 4585 int 4586 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4587 { 4588 4589 return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len)); 4590 } 4591 4592 /* 4593 * Unlike madvise(2), we do not make a best effort to remember every 4594 * possible caching hint. Instead, we remember the last setting with 4595 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4596 * region of any current setting. 4597 */ 4598 int 4599 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4600 int advice) 4601 { 4602 struct fadvise_info *fa, *new; 4603 struct file *fp; 4604 struct vnode *vp; 4605 off_t end; 4606 int error; 4607 4608 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4609 return (EINVAL); 4610 switch (advice) { 4611 case POSIX_FADV_SEQUENTIAL: 4612 case POSIX_FADV_RANDOM: 4613 case POSIX_FADV_NOREUSE: 4614 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4615 break; 4616 case POSIX_FADV_NORMAL: 4617 case POSIX_FADV_WILLNEED: 4618 case POSIX_FADV_DONTNEED: 4619 new = NULL; 4620 break; 4621 default: 4622 return (EINVAL); 4623 } 4624 /* XXX: CAP_POSIX_FADVISE? */ 4625 error = fget(td, fd, 0, &fp); 4626 if (error != 0) 4627 goto out; 4628 4629 switch (fp->f_type) { 4630 case DTYPE_VNODE: 4631 break; 4632 case DTYPE_PIPE: 4633 case DTYPE_FIFO: 4634 error = ESPIPE; 4635 goto out; 4636 default: 4637 error = ENODEV; 4638 goto out; 4639 } 4640 vp = fp->f_vnode; 4641 if (vp->v_type != VREG) { 4642 error = ENODEV; 4643 goto out; 4644 } 4645 if (len == 0) 4646 end = OFF_MAX; 4647 else 4648 end = offset + len - 1; 4649 switch (advice) { 4650 case POSIX_FADV_SEQUENTIAL: 4651 case POSIX_FADV_RANDOM: 4652 case POSIX_FADV_NOREUSE: 4653 /* 4654 * Try to merge any existing non-standard region with 4655 * this new region if possible, otherwise create a new 4656 * non-standard region for this request. 4657 */ 4658 mtx_pool_lock(mtxpool_sleep, fp); 4659 fa = fp->f_advice; 4660 if (fa != NULL && fa->fa_advice == advice && 4661 ((fa->fa_start <= end && fa->fa_end >= offset) || 4662 (end != OFF_MAX && fa->fa_start == end + 1) || 4663 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4664 if (offset < fa->fa_start) 4665 fa->fa_start = offset; 4666 if (end > fa->fa_end) 4667 fa->fa_end = end; 4668 } else { 4669 new->fa_advice = advice; 4670 new->fa_start = offset; 4671 new->fa_end = end; 4672 new->fa_prevstart = 0; 4673 new->fa_prevend = 0; 4674 fp->f_advice = new; 4675 new = fa; 4676 } 4677 mtx_pool_unlock(mtxpool_sleep, fp); 4678 break; 4679 case POSIX_FADV_NORMAL: 4680 /* 4681 * If a the "normal" region overlaps with an existing 4682 * non-standard region, trim or remove the 4683 * non-standard region. 4684 */ 4685 mtx_pool_lock(mtxpool_sleep, fp); 4686 fa = fp->f_advice; 4687 if (fa != NULL) { 4688 if (offset <= fa->fa_start && end >= fa->fa_end) { 4689 new = fa; 4690 fp->f_advice = NULL; 4691 } else if (offset <= fa->fa_start && 4692 end >= fa->fa_start) 4693 fa->fa_start = end + 1; 4694 else if (offset <= fa->fa_end && end >= fa->fa_end) 4695 fa->fa_end = offset - 1; 4696 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4697 /* 4698 * If the "normal" region is a middle 4699 * portion of the existing 4700 * non-standard region, just remove 4701 * the whole thing rather than picking 4702 * one side or the other to 4703 * preserve. 4704 */ 4705 new = fa; 4706 fp->f_advice = NULL; 4707 } 4708 } 4709 mtx_pool_unlock(mtxpool_sleep, fp); 4710 break; 4711 case POSIX_FADV_WILLNEED: 4712 case POSIX_FADV_DONTNEED: 4713 error = VOP_ADVISE(vp, offset, end, advice); 4714 break; 4715 } 4716 out: 4717 if (fp != NULL) 4718 fdrop(fp, td); 4719 free(new, M_FADVISE); 4720 return (error); 4721 } 4722 4723 int 4724 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4725 { 4726 4727 return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4728 uap->advice)); 4729 } 4730