1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_kdtrace.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capability.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 static MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE(vfs, , stat, mode, mode); 95 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *"); 96 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int"); 97 SDT_PROBE_DEFINE(vfs, , stat, reg, reg); 98 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *"); 99 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int"); 100 101 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 102 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 103 static int setfflags(struct thread *td, struct vnode *, int); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * The module initialization routine for POSIX asynchronous I/O will 111 * set this to the version of AIO that it implements. (Zero means 112 * that it is not implemented.) This value is used here by pathconf() 113 * and in kern_descrip.c by fpathconf(). 114 */ 115 int async_io_version; 116 117 #ifdef DEBUG 118 static int syncprt = 0; 119 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 120 #endif 121 122 /* 123 * Sync each mounted filesystem. 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct sync_args { 127 int dummy; 128 }; 129 #endif 130 /* ARGSUSED */ 131 int 132 sys_sync(td, uap) 133 struct thread *td; 134 struct sync_args *uap; 135 { 136 struct mount *mp, *nmp; 137 int vfslocked; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 vfslocked = VFS_LOCK_GIANT(mp); 146 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 147 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 148 MNT_ILOCK(mp); 149 mp->mnt_noasync++; 150 mp->mnt_kern_flag &= ~MNTK_ASYNC; 151 MNT_IUNLOCK(mp); 152 vfs_msync(mp, MNT_NOWAIT); 153 VFS_SYNC(mp, MNT_NOWAIT); 154 MNT_ILOCK(mp); 155 mp->mnt_noasync--; 156 if ((mp->mnt_flag & MNT_ASYNC) != 0 && 157 mp->mnt_noasync == 0) 158 mp->mnt_kern_flag |= MNTK_ASYNC; 159 MNT_IUNLOCK(mp); 160 vn_finished_write(mp); 161 } 162 VFS_UNLOCK_GIANT(vfslocked); 163 mtx_lock(&mountlist_mtx); 164 nmp = TAILQ_NEXT(mp, mnt_list); 165 vfs_unbusy(mp); 166 } 167 mtx_unlock(&mountlist_mtx); 168 return (0); 169 } 170 171 /* 172 * Change filesystem quotas. 173 */ 174 #ifndef _SYS_SYSPROTO_H_ 175 struct quotactl_args { 176 char *path; 177 int cmd; 178 int uid; 179 caddr_t arg; 180 }; 181 #endif 182 int 183 sys_quotactl(td, uap) 184 struct thread *td; 185 register struct quotactl_args /* { 186 char *path; 187 int cmd; 188 int uid; 189 caddr_t arg; 190 } */ *uap; 191 { 192 struct mount *mp; 193 int vfslocked; 194 int error; 195 struct nameidata nd; 196 197 AUDIT_ARG_CMD(uap->cmd); 198 AUDIT_ARG_UID(uap->uid); 199 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 200 return (EPERM); 201 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, 202 UIO_USERSPACE, uap->path, td); 203 if ((error = namei(&nd)) != 0) 204 return (error); 205 vfslocked = NDHASGIANT(&nd); 206 NDFREE(&nd, NDF_ONLY_PNBUF); 207 mp = nd.ni_vp->v_mount; 208 vfs_ref(mp); 209 vput(nd.ni_vp); 210 error = vfs_busy(mp, 0); 211 vfs_rel(mp); 212 if (error) { 213 VFS_UNLOCK_GIANT(vfslocked); 214 return (error); 215 } 216 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 217 218 /* 219 * Since quota on operation typically needs to open quota 220 * file, the Q_QUOTAON handler needs to unbusy the mount point 221 * before calling into namei. Otherwise, unmount might be 222 * started between two vfs_busy() invocations (first is our, 223 * second is from mount point cross-walk code in lookup()), 224 * causing deadlock. 225 * 226 * Require that Q_QUOTAON handles the vfs_busy() reference on 227 * its own, always returning with ubusied mount point. 228 */ 229 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 230 vfs_unbusy(mp); 231 VFS_UNLOCK_GIANT(vfslocked); 232 return (error); 233 } 234 235 /* 236 * Used by statfs conversion routines to scale the block size up if 237 * necessary so that all of the block counts are <= 'max_size'. Note 238 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 239 * value of 'n'. 240 */ 241 void 242 statfs_scale_blocks(struct statfs *sf, long max_size) 243 { 244 uint64_t count; 245 int shift; 246 247 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 248 249 /* 250 * Attempt to scale the block counts to give a more accurate 251 * overview to userland of the ratio of free space to used 252 * space. To do this, find the largest block count and compute 253 * a divisor that lets it fit into a signed integer <= max_size. 254 */ 255 if (sf->f_bavail < 0) 256 count = -sf->f_bavail; 257 else 258 count = sf->f_bavail; 259 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 260 if (count <= max_size) 261 return; 262 263 count >>= flsl(max_size); 264 shift = 0; 265 while (count > 0) { 266 shift++; 267 count >>=1; 268 } 269 270 sf->f_bsize <<= shift; 271 sf->f_blocks >>= shift; 272 sf->f_bfree >>= shift; 273 sf->f_bavail >>= shift; 274 } 275 276 /* 277 * Get filesystem statistics. 278 */ 279 #ifndef _SYS_SYSPROTO_H_ 280 struct statfs_args { 281 char *path; 282 struct statfs *buf; 283 }; 284 #endif 285 int 286 sys_statfs(td, uap) 287 struct thread *td; 288 register struct statfs_args /* { 289 char *path; 290 struct statfs *buf; 291 } */ *uap; 292 { 293 struct statfs sf; 294 int error; 295 296 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 297 if (error == 0) 298 error = copyout(&sf, uap->buf, sizeof(sf)); 299 return (error); 300 } 301 302 int 303 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 304 struct statfs *buf) 305 { 306 struct mount *mp; 307 struct statfs *sp, sb; 308 int vfslocked; 309 int error; 310 struct nameidata nd; 311 312 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | 313 AUDITVNODE1, pathseg, path, td); 314 error = namei(&nd); 315 if (error) 316 return (error); 317 vfslocked = NDHASGIANT(&nd); 318 mp = nd.ni_vp->v_mount; 319 vfs_ref(mp); 320 NDFREE(&nd, NDF_ONLY_PNBUF); 321 vput(nd.ni_vp); 322 error = vfs_busy(mp, 0); 323 vfs_rel(mp); 324 if (error) { 325 VFS_UNLOCK_GIANT(vfslocked); 326 return (error); 327 } 328 #ifdef MAC 329 error = mac_mount_check_stat(td->td_ucred, mp); 330 if (error) 331 goto out; 332 #endif 333 /* 334 * Set these in case the underlying filesystem fails to do so. 335 */ 336 sp = &mp->mnt_stat; 337 sp->f_version = STATFS_VERSION; 338 sp->f_namemax = NAME_MAX; 339 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 340 error = VFS_STATFS(mp, sp); 341 if (error) 342 goto out; 343 if (priv_check(td, PRIV_VFS_GENERATION)) { 344 bcopy(sp, &sb, sizeof(sb)); 345 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 346 prison_enforce_statfs(td->td_ucred, mp, &sb); 347 sp = &sb; 348 } 349 *buf = *sp; 350 out: 351 vfs_unbusy(mp); 352 VFS_UNLOCK_GIANT(vfslocked); 353 return (error); 354 } 355 356 /* 357 * Get filesystem statistics. 358 */ 359 #ifndef _SYS_SYSPROTO_H_ 360 struct fstatfs_args { 361 int fd; 362 struct statfs *buf; 363 }; 364 #endif 365 int 366 sys_fstatfs(td, uap) 367 struct thread *td; 368 register struct fstatfs_args /* { 369 int fd; 370 struct statfs *buf; 371 } */ *uap; 372 { 373 struct statfs sf; 374 int error; 375 376 error = kern_fstatfs(td, uap->fd, &sf); 377 if (error == 0) 378 error = copyout(&sf, uap->buf, sizeof(sf)); 379 return (error); 380 } 381 382 int 383 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 384 { 385 struct file *fp; 386 struct mount *mp; 387 struct statfs *sp, sb; 388 int vfslocked; 389 struct vnode *vp; 390 int error; 391 392 AUDIT_ARG_FD(fd); 393 error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp); 394 if (error) 395 return (error); 396 vp = fp->f_vnode; 397 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 398 vn_lock(vp, LK_SHARED | LK_RETRY); 399 #ifdef AUDIT 400 AUDIT_ARG_VNODE1(vp); 401 #endif 402 mp = vp->v_mount; 403 if (mp) 404 vfs_ref(mp); 405 VOP_UNLOCK(vp, 0); 406 fdrop(fp, td); 407 if (mp == NULL) { 408 error = EBADF; 409 goto out; 410 } 411 error = vfs_busy(mp, 0); 412 vfs_rel(mp); 413 if (error) { 414 VFS_UNLOCK_GIANT(vfslocked); 415 return (error); 416 } 417 #ifdef MAC 418 error = mac_mount_check_stat(td->td_ucred, mp); 419 if (error) 420 goto out; 421 #endif 422 /* 423 * Set these in case the underlying filesystem fails to do so. 424 */ 425 sp = &mp->mnt_stat; 426 sp->f_version = STATFS_VERSION; 427 sp->f_namemax = NAME_MAX; 428 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 429 error = VFS_STATFS(mp, sp); 430 if (error) 431 goto out; 432 if (priv_check(td, PRIV_VFS_GENERATION)) { 433 bcopy(sp, &sb, sizeof(sb)); 434 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 435 prison_enforce_statfs(td->td_ucred, mp, &sb); 436 sp = &sb; 437 } 438 *buf = *sp; 439 out: 440 if (mp) 441 vfs_unbusy(mp); 442 VFS_UNLOCK_GIANT(vfslocked); 443 return (error); 444 } 445 446 /* 447 * Get statistics on all filesystems. 448 */ 449 #ifndef _SYS_SYSPROTO_H_ 450 struct getfsstat_args { 451 struct statfs *buf; 452 long bufsize; 453 int flags; 454 }; 455 #endif 456 int 457 sys_getfsstat(td, uap) 458 struct thread *td; 459 register struct getfsstat_args /* { 460 struct statfs *buf; 461 long bufsize; 462 int flags; 463 } */ *uap; 464 { 465 466 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 467 uap->flags)); 468 } 469 470 /* 471 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 472 * The caller is responsible for freeing memory which will be allocated 473 * in '*buf'. 474 */ 475 int 476 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 477 enum uio_seg bufseg, int flags) 478 { 479 struct mount *mp, *nmp; 480 struct statfs *sfsp, *sp, sb; 481 size_t count, maxcount; 482 int vfslocked; 483 int error; 484 485 maxcount = bufsize / sizeof(struct statfs); 486 if (bufsize == 0) 487 sfsp = NULL; 488 else if (bufseg == UIO_USERSPACE) 489 sfsp = *buf; 490 else /* if (bufseg == UIO_SYSSPACE) */ { 491 count = 0; 492 mtx_lock(&mountlist_mtx); 493 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 494 count++; 495 } 496 mtx_unlock(&mountlist_mtx); 497 if (maxcount > count) 498 maxcount = count; 499 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 500 M_WAITOK); 501 } 502 count = 0; 503 mtx_lock(&mountlist_mtx); 504 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 505 if (prison_canseemount(td->td_ucred, mp) != 0) { 506 nmp = TAILQ_NEXT(mp, mnt_list); 507 continue; 508 } 509 #ifdef MAC 510 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 511 nmp = TAILQ_NEXT(mp, mnt_list); 512 continue; 513 } 514 #endif 515 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 516 nmp = TAILQ_NEXT(mp, mnt_list); 517 continue; 518 } 519 vfslocked = VFS_LOCK_GIANT(mp); 520 if (sfsp && count < maxcount) { 521 sp = &mp->mnt_stat; 522 /* 523 * Set these in case the underlying filesystem 524 * fails to do so. 525 */ 526 sp->f_version = STATFS_VERSION; 527 sp->f_namemax = NAME_MAX; 528 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 529 /* 530 * If MNT_NOWAIT or MNT_LAZY is specified, do not 531 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 532 * overrides MNT_WAIT. 533 */ 534 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 535 (flags & MNT_WAIT)) && 536 (error = VFS_STATFS(mp, sp))) { 537 VFS_UNLOCK_GIANT(vfslocked); 538 mtx_lock(&mountlist_mtx); 539 nmp = TAILQ_NEXT(mp, mnt_list); 540 vfs_unbusy(mp); 541 continue; 542 } 543 if (priv_check(td, PRIV_VFS_GENERATION)) { 544 bcopy(sp, &sb, sizeof(sb)); 545 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 546 prison_enforce_statfs(td->td_ucred, mp, &sb); 547 sp = &sb; 548 } 549 if (bufseg == UIO_SYSSPACE) 550 bcopy(sp, sfsp, sizeof(*sp)); 551 else /* if (bufseg == UIO_USERSPACE) */ { 552 error = copyout(sp, sfsp, sizeof(*sp)); 553 if (error) { 554 vfs_unbusy(mp); 555 VFS_UNLOCK_GIANT(vfslocked); 556 return (error); 557 } 558 } 559 sfsp++; 560 } 561 VFS_UNLOCK_GIANT(vfslocked); 562 count++; 563 mtx_lock(&mountlist_mtx); 564 nmp = TAILQ_NEXT(mp, mnt_list); 565 vfs_unbusy(mp); 566 } 567 mtx_unlock(&mountlist_mtx); 568 if (sfsp && count > maxcount) 569 td->td_retval[0] = maxcount; 570 else 571 td->td_retval[0] = count; 572 return (0); 573 } 574 575 #ifdef COMPAT_FREEBSD4 576 /* 577 * Get old format filesystem statistics. 578 */ 579 static void cvtstatfs(struct statfs *, struct ostatfs *); 580 581 #ifndef _SYS_SYSPROTO_H_ 582 struct freebsd4_statfs_args { 583 char *path; 584 struct ostatfs *buf; 585 }; 586 #endif 587 int 588 freebsd4_statfs(td, uap) 589 struct thread *td; 590 struct freebsd4_statfs_args /* { 591 char *path; 592 struct ostatfs *buf; 593 } */ *uap; 594 { 595 struct ostatfs osb; 596 struct statfs sf; 597 int error; 598 599 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 600 if (error) 601 return (error); 602 cvtstatfs(&sf, &osb); 603 return (copyout(&osb, uap->buf, sizeof(osb))); 604 } 605 606 /* 607 * Get filesystem statistics. 608 */ 609 #ifndef _SYS_SYSPROTO_H_ 610 struct freebsd4_fstatfs_args { 611 int fd; 612 struct ostatfs *buf; 613 }; 614 #endif 615 int 616 freebsd4_fstatfs(td, uap) 617 struct thread *td; 618 struct freebsd4_fstatfs_args /* { 619 int fd; 620 struct ostatfs *buf; 621 } */ *uap; 622 { 623 struct ostatfs osb; 624 struct statfs sf; 625 int error; 626 627 error = kern_fstatfs(td, uap->fd, &sf); 628 if (error) 629 return (error); 630 cvtstatfs(&sf, &osb); 631 return (copyout(&osb, uap->buf, sizeof(osb))); 632 } 633 634 /* 635 * Get statistics on all filesystems. 636 */ 637 #ifndef _SYS_SYSPROTO_H_ 638 struct freebsd4_getfsstat_args { 639 struct ostatfs *buf; 640 long bufsize; 641 int flags; 642 }; 643 #endif 644 int 645 freebsd4_getfsstat(td, uap) 646 struct thread *td; 647 register struct freebsd4_getfsstat_args /* { 648 struct ostatfs *buf; 649 long bufsize; 650 int flags; 651 } */ *uap; 652 { 653 struct statfs *buf, *sp; 654 struct ostatfs osb; 655 size_t count, size; 656 int error; 657 658 count = uap->bufsize / sizeof(struct ostatfs); 659 size = count * sizeof(struct statfs); 660 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 661 if (size > 0) { 662 count = td->td_retval[0]; 663 sp = buf; 664 while (count > 0 && error == 0) { 665 cvtstatfs(sp, &osb); 666 error = copyout(&osb, uap->buf, sizeof(osb)); 667 sp++; 668 uap->buf++; 669 count--; 670 } 671 free(buf, M_TEMP); 672 } 673 return (error); 674 } 675 676 /* 677 * Implement fstatfs() for (NFS) file handles. 678 */ 679 #ifndef _SYS_SYSPROTO_H_ 680 struct freebsd4_fhstatfs_args { 681 struct fhandle *u_fhp; 682 struct ostatfs *buf; 683 }; 684 #endif 685 int 686 freebsd4_fhstatfs(td, uap) 687 struct thread *td; 688 struct freebsd4_fhstatfs_args /* { 689 struct fhandle *u_fhp; 690 struct ostatfs *buf; 691 } */ *uap; 692 { 693 struct ostatfs osb; 694 struct statfs sf; 695 fhandle_t fh; 696 int error; 697 698 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 699 if (error) 700 return (error); 701 error = kern_fhstatfs(td, fh, &sf); 702 if (error) 703 return (error); 704 cvtstatfs(&sf, &osb); 705 return (copyout(&osb, uap->buf, sizeof(osb))); 706 } 707 708 /* 709 * Convert a new format statfs structure to an old format statfs structure. 710 */ 711 static void 712 cvtstatfs(nsp, osp) 713 struct statfs *nsp; 714 struct ostatfs *osp; 715 { 716 717 statfs_scale_blocks(nsp, LONG_MAX); 718 bzero(osp, sizeof(*osp)); 719 osp->f_bsize = nsp->f_bsize; 720 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 721 osp->f_blocks = nsp->f_blocks; 722 osp->f_bfree = nsp->f_bfree; 723 osp->f_bavail = nsp->f_bavail; 724 osp->f_files = MIN(nsp->f_files, LONG_MAX); 725 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 726 osp->f_owner = nsp->f_owner; 727 osp->f_type = nsp->f_type; 728 osp->f_flags = nsp->f_flags; 729 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 730 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 731 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 732 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 733 strlcpy(osp->f_fstypename, nsp->f_fstypename, 734 MIN(MFSNAMELEN, OMFSNAMELEN)); 735 strlcpy(osp->f_mntonname, nsp->f_mntonname, 736 MIN(MNAMELEN, OMNAMELEN)); 737 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 738 MIN(MNAMELEN, OMNAMELEN)); 739 osp->f_fsid = nsp->f_fsid; 740 } 741 #endif /* COMPAT_FREEBSD4 */ 742 743 /* 744 * Change current working directory to a given file descriptor. 745 */ 746 #ifndef _SYS_SYSPROTO_H_ 747 struct fchdir_args { 748 int fd; 749 }; 750 #endif 751 int 752 sys_fchdir(td, uap) 753 struct thread *td; 754 struct fchdir_args /* { 755 int fd; 756 } */ *uap; 757 { 758 register struct filedesc *fdp = td->td_proc->p_fd; 759 struct vnode *vp, *tdp, *vpold; 760 struct mount *mp; 761 struct file *fp; 762 int vfslocked; 763 int error; 764 765 AUDIT_ARG_FD(uap->fd); 766 if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0) 767 return (error); 768 vp = fp->f_vnode; 769 VREF(vp); 770 fdrop(fp, td); 771 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 772 vn_lock(vp, LK_SHARED | LK_RETRY); 773 AUDIT_ARG_VNODE1(vp); 774 error = change_dir(vp, td); 775 while (!error && (mp = vp->v_mountedhere) != NULL) { 776 int tvfslocked; 777 if (vfs_busy(mp, 0)) 778 continue; 779 tvfslocked = VFS_LOCK_GIANT(mp); 780 error = VFS_ROOT(mp, LK_SHARED, &tdp); 781 vfs_unbusy(mp); 782 if (error) { 783 VFS_UNLOCK_GIANT(tvfslocked); 784 break; 785 } 786 vput(vp); 787 VFS_UNLOCK_GIANT(vfslocked); 788 vp = tdp; 789 vfslocked = tvfslocked; 790 } 791 if (error) { 792 vput(vp); 793 VFS_UNLOCK_GIANT(vfslocked); 794 return (error); 795 } 796 VOP_UNLOCK(vp, 0); 797 VFS_UNLOCK_GIANT(vfslocked); 798 FILEDESC_XLOCK(fdp); 799 vpold = fdp->fd_cdir; 800 fdp->fd_cdir = vp; 801 FILEDESC_XUNLOCK(fdp); 802 vfslocked = VFS_LOCK_GIANT(vpold->v_mount); 803 vrele(vpold); 804 VFS_UNLOCK_GIANT(vfslocked); 805 return (0); 806 } 807 808 /* 809 * Change current working directory (``.''). 810 */ 811 #ifndef _SYS_SYSPROTO_H_ 812 struct chdir_args { 813 char *path; 814 }; 815 #endif 816 int 817 sys_chdir(td, uap) 818 struct thread *td; 819 struct chdir_args /* { 820 char *path; 821 } */ *uap; 822 { 823 824 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 825 } 826 827 int 828 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 829 { 830 register struct filedesc *fdp = td->td_proc->p_fd; 831 int error; 832 struct nameidata nd; 833 struct vnode *vp; 834 int vfslocked; 835 836 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 837 MPSAFE, pathseg, path, td); 838 if ((error = namei(&nd)) != 0) 839 return (error); 840 vfslocked = NDHASGIANT(&nd); 841 if ((error = change_dir(nd.ni_vp, td)) != 0) { 842 vput(nd.ni_vp); 843 VFS_UNLOCK_GIANT(vfslocked); 844 NDFREE(&nd, NDF_ONLY_PNBUF); 845 return (error); 846 } 847 VOP_UNLOCK(nd.ni_vp, 0); 848 VFS_UNLOCK_GIANT(vfslocked); 849 NDFREE(&nd, NDF_ONLY_PNBUF); 850 FILEDESC_XLOCK(fdp); 851 vp = fdp->fd_cdir; 852 fdp->fd_cdir = nd.ni_vp; 853 FILEDESC_XUNLOCK(fdp); 854 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 855 vrele(vp); 856 VFS_UNLOCK_GIANT(vfslocked); 857 return (0); 858 } 859 860 /* 861 * Helper function for raised chroot(2) security function: Refuse if 862 * any filedescriptors are open directories. 863 */ 864 static int 865 chroot_refuse_vdir_fds(fdp) 866 struct filedesc *fdp; 867 { 868 struct vnode *vp; 869 struct file *fp; 870 int fd; 871 872 FILEDESC_LOCK_ASSERT(fdp); 873 874 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 875 fp = fget_locked(fdp, fd); 876 if (fp == NULL) 877 continue; 878 if (fp->f_type == DTYPE_VNODE) { 879 vp = fp->f_vnode; 880 if (vp->v_type == VDIR) 881 return (EPERM); 882 } 883 } 884 return (0); 885 } 886 887 /* 888 * This sysctl determines if we will allow a process to chroot(2) if it 889 * has a directory open: 890 * 0: disallowed for all processes. 891 * 1: allowed for processes that were not already chroot(2)'ed. 892 * 2: allowed for all processes. 893 */ 894 895 static int chroot_allow_open_directories = 1; 896 897 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 898 &chroot_allow_open_directories, 0, 899 "Allow a process to chroot(2) if it has a directory open"); 900 901 /* 902 * Change notion of root (``/'') directory. 903 */ 904 #ifndef _SYS_SYSPROTO_H_ 905 struct chroot_args { 906 char *path; 907 }; 908 #endif 909 int 910 sys_chroot(td, uap) 911 struct thread *td; 912 struct chroot_args /* { 913 char *path; 914 } */ *uap; 915 { 916 int error; 917 struct nameidata nd; 918 int vfslocked; 919 920 error = priv_check(td, PRIV_VFS_CHROOT); 921 if (error) 922 return (error); 923 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | 924 AUDITVNODE1, UIO_USERSPACE, uap->path, td); 925 error = namei(&nd); 926 if (error) 927 goto error; 928 vfslocked = NDHASGIANT(&nd); 929 if ((error = change_dir(nd.ni_vp, td)) != 0) 930 goto e_vunlock; 931 #ifdef MAC 932 if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp))) 933 goto e_vunlock; 934 #endif 935 VOP_UNLOCK(nd.ni_vp, 0); 936 error = change_root(nd.ni_vp, td); 937 vrele(nd.ni_vp); 938 VFS_UNLOCK_GIANT(vfslocked); 939 NDFREE(&nd, NDF_ONLY_PNBUF); 940 return (error); 941 e_vunlock: 942 vput(nd.ni_vp); 943 VFS_UNLOCK_GIANT(vfslocked); 944 error: 945 NDFREE(&nd, NDF_ONLY_PNBUF); 946 return (error); 947 } 948 949 /* 950 * Common routine for chroot and chdir. Callers must provide a locked vnode 951 * instance. 952 */ 953 int 954 change_dir(vp, td) 955 struct vnode *vp; 956 struct thread *td; 957 { 958 int error; 959 960 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 961 if (vp->v_type != VDIR) 962 return (ENOTDIR); 963 #ifdef MAC 964 error = mac_vnode_check_chdir(td->td_ucred, vp); 965 if (error) 966 return (error); 967 #endif 968 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 969 return (error); 970 } 971 972 /* 973 * Common routine for kern_chroot() and jail_attach(). The caller is 974 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 975 * authorize this operation. 976 */ 977 int 978 change_root(vp, td) 979 struct vnode *vp; 980 struct thread *td; 981 { 982 struct filedesc *fdp; 983 struct vnode *oldvp; 984 int vfslocked; 985 int error; 986 987 VFS_ASSERT_GIANT(vp->v_mount); 988 fdp = td->td_proc->p_fd; 989 FILEDESC_XLOCK(fdp); 990 if (chroot_allow_open_directories == 0 || 991 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 992 error = chroot_refuse_vdir_fds(fdp); 993 if (error) { 994 FILEDESC_XUNLOCK(fdp); 995 return (error); 996 } 997 } 998 oldvp = fdp->fd_rdir; 999 fdp->fd_rdir = vp; 1000 VREF(fdp->fd_rdir); 1001 if (!fdp->fd_jdir) { 1002 fdp->fd_jdir = vp; 1003 VREF(fdp->fd_jdir); 1004 } 1005 FILEDESC_XUNLOCK(fdp); 1006 vfslocked = VFS_LOCK_GIANT(oldvp->v_mount); 1007 vrele(oldvp); 1008 VFS_UNLOCK_GIANT(vfslocked); 1009 return (0); 1010 } 1011 1012 static __inline cap_rights_t 1013 flags_to_rights(int flags) 1014 { 1015 cap_rights_t rights = 0; 1016 1017 switch ((flags & O_ACCMODE)) { 1018 case O_RDONLY: 1019 rights |= CAP_READ; 1020 break; 1021 1022 case O_RDWR: 1023 rights |= CAP_READ; 1024 /* fall through */ 1025 1026 case O_WRONLY: 1027 rights |= CAP_WRITE; 1028 break; 1029 1030 case O_EXEC: 1031 rights |= CAP_FEXECVE; 1032 break; 1033 } 1034 1035 if (flags & O_CREAT) 1036 rights |= CAP_CREATE; 1037 1038 if (flags & O_TRUNC) 1039 rights |= CAP_FTRUNCATE; 1040 1041 if ((flags & O_EXLOCK) || (flags & O_SHLOCK)) 1042 rights |= CAP_FLOCK; 1043 1044 return (rights); 1045 } 1046 1047 /* 1048 * Check permissions, allocate an open file structure, and call the device 1049 * open routine if any. 1050 */ 1051 #ifndef _SYS_SYSPROTO_H_ 1052 struct open_args { 1053 char *path; 1054 int flags; 1055 int mode; 1056 }; 1057 #endif 1058 int 1059 sys_open(td, uap) 1060 struct thread *td; 1061 register struct open_args /* { 1062 char *path; 1063 int flags; 1064 int mode; 1065 } */ *uap; 1066 { 1067 1068 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1069 } 1070 1071 #ifndef _SYS_SYSPROTO_H_ 1072 struct openat_args { 1073 int fd; 1074 char *path; 1075 int flag; 1076 int mode; 1077 }; 1078 #endif 1079 int 1080 sys_openat(struct thread *td, struct openat_args *uap) 1081 { 1082 1083 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1084 uap->mode)); 1085 } 1086 1087 int 1088 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1089 int mode) 1090 { 1091 1092 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1093 } 1094 1095 int 1096 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1097 int flags, int mode) 1098 { 1099 struct proc *p = td->td_proc; 1100 struct filedesc *fdp = p->p_fd; 1101 struct file *fp; 1102 struct vnode *vp; 1103 int cmode; 1104 struct file *nfp; 1105 int type, indx = -1, error, error_open; 1106 struct flock lf; 1107 struct nameidata nd; 1108 int vfslocked; 1109 cap_rights_t rights_needed = CAP_LOOKUP; 1110 1111 AUDIT_ARG_FFLAGS(flags); 1112 AUDIT_ARG_MODE(mode); 1113 /* XXX: audit dirfd */ 1114 rights_needed |= flags_to_rights(flags); 1115 /* 1116 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1117 * may be specified. 1118 */ 1119 if (flags & O_EXEC) { 1120 if (flags & O_ACCMODE) 1121 return (EINVAL); 1122 } else if ((flags & O_ACCMODE) == O_ACCMODE) 1123 return (EINVAL); 1124 else 1125 flags = FFLAGS(flags); 1126 1127 /* 1128 * allocate the file descriptor, but don't install a descriptor yet 1129 */ 1130 error = falloc_noinstall(td, &nfp); 1131 if (error) 1132 return (error); 1133 /* An extra reference on `nfp' has been held for us by falloc_noinstall(). */ 1134 fp = nfp; 1135 /* Set the flags early so the finit in devfs can pick them up. */ 1136 fp->f_flag = flags & FMASK; 1137 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1138 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, 1139 path, fd, rights_needed, td); 1140 td->td_dupfd = -1; /* XXX check for fdopen */ 1141 error = vn_open(&nd, &flags, cmode, fp); 1142 if (error) { 1143 /* 1144 * If the vn_open replaced the method vector, something 1145 * wonderous happened deep below and we just pass it up 1146 * pretending we know what we do. 1147 */ 1148 if (error == ENXIO && fp->f_ops != &badfileops) 1149 goto success; 1150 1151 /* 1152 * handle special fdopen() case. bleh. dupfdopen() is 1153 * responsible for dropping the old contents of ofiles[indx] 1154 * if it succeeds. 1155 * 1156 * Don't do this for relative (capability) lookups; we don't 1157 * understand exactly what would happen, and we don't think 1158 * that it ever should. 1159 */ 1160 if ((nd.ni_strictrelative == 0) && 1161 (error == ENODEV || error == ENXIO) && 1162 (td->td_dupfd >= 0)) { 1163 /* XXX from fdopen */ 1164 error_open = error; 1165 if ((error = finstall(td, fp, &indx, flags)) != 0) 1166 goto bad_unlocked; 1167 if ((error = dupfdopen(td, fdp, indx, td->td_dupfd, 1168 flags, error_open)) == 0) 1169 goto success; 1170 } 1171 /* 1172 * Clean up the descriptor, but only if another thread hadn't 1173 * replaced or closed it. 1174 */ 1175 if (indx != -1) 1176 fdclose(fdp, fp, indx, td); 1177 fdrop(fp, td); 1178 1179 if (error == ERESTART) 1180 error = EINTR; 1181 return (error); 1182 } 1183 td->td_dupfd = 0; 1184 vfslocked = NDHASGIANT(&nd); 1185 NDFREE(&nd, NDF_ONLY_PNBUF); 1186 vp = nd.ni_vp; 1187 1188 /* 1189 * Store the vnode, for any f_type. Typically, the vnode use 1190 * count is decremented by direct call to vn_closefile() for 1191 * files that switched type in the cdevsw fdopen() method. 1192 */ 1193 fp->f_vnode = vp; 1194 /* 1195 * If the file wasn't claimed by devfs bind it to the normal 1196 * vnode operations here. 1197 */ 1198 if (fp->f_ops == &badfileops) { 1199 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1200 fp->f_seqcount = 1; 1201 finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops); 1202 } 1203 1204 VOP_UNLOCK(vp, 0); 1205 if (fp->f_type == DTYPE_VNODE && (flags & (O_EXLOCK | O_SHLOCK)) != 0) { 1206 lf.l_whence = SEEK_SET; 1207 lf.l_start = 0; 1208 lf.l_len = 0; 1209 if (flags & O_EXLOCK) 1210 lf.l_type = F_WRLCK; 1211 else 1212 lf.l_type = F_RDLCK; 1213 type = F_FLOCK; 1214 if ((flags & FNONBLOCK) == 0) 1215 type |= F_WAIT; 1216 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 1217 type)) != 0) 1218 goto bad; 1219 atomic_set_int(&fp->f_flag, FHASLOCK); 1220 } 1221 if (flags & O_TRUNC) { 1222 error = fo_truncate(fp, 0, td->td_ucred, td); 1223 if (error) 1224 goto bad; 1225 } 1226 VFS_UNLOCK_GIANT(vfslocked); 1227 success: 1228 /* 1229 * If we haven't already installed the FD (for dupfdopen), do so now. 1230 */ 1231 if (indx == -1) { 1232 #ifdef CAPABILITIES 1233 if (nd.ni_strictrelative == 1) { 1234 /* 1235 * We are doing a strict relative lookup; wrap the 1236 * result in a capability. 1237 */ 1238 if ((error = kern_capwrap(td, fp, nd.ni_baserights, 1239 &indx)) != 0) 1240 goto bad_unlocked; 1241 } else 1242 #endif 1243 if ((error = finstall(td, fp, &indx, flags)) != 0) 1244 goto bad_unlocked; 1245 1246 } 1247 1248 /* 1249 * Release our private reference, leaving the one associated with 1250 * the descriptor table intact. 1251 */ 1252 fdrop(fp, td); 1253 td->td_retval[0] = indx; 1254 return (0); 1255 bad: 1256 VFS_UNLOCK_GIANT(vfslocked); 1257 bad_unlocked: 1258 if (indx != -1) 1259 fdclose(fdp, fp, indx, td); 1260 fdrop(fp, td); 1261 td->td_retval[0] = -1; 1262 return (error); 1263 } 1264 1265 #ifdef COMPAT_43 1266 /* 1267 * Create a file. 1268 */ 1269 #ifndef _SYS_SYSPROTO_H_ 1270 struct ocreat_args { 1271 char *path; 1272 int mode; 1273 }; 1274 #endif 1275 int 1276 ocreat(td, uap) 1277 struct thread *td; 1278 register struct ocreat_args /* { 1279 char *path; 1280 int mode; 1281 } */ *uap; 1282 { 1283 1284 return (kern_open(td, uap->path, UIO_USERSPACE, 1285 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1286 } 1287 #endif /* COMPAT_43 */ 1288 1289 /* 1290 * Create a special file. 1291 */ 1292 #ifndef _SYS_SYSPROTO_H_ 1293 struct mknod_args { 1294 char *path; 1295 int mode; 1296 int dev; 1297 }; 1298 #endif 1299 int 1300 sys_mknod(td, uap) 1301 struct thread *td; 1302 register struct mknod_args /* { 1303 char *path; 1304 int mode; 1305 int dev; 1306 } */ *uap; 1307 { 1308 1309 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1310 } 1311 1312 #ifndef _SYS_SYSPROTO_H_ 1313 struct mknodat_args { 1314 int fd; 1315 char *path; 1316 mode_t mode; 1317 dev_t dev; 1318 }; 1319 #endif 1320 int 1321 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1322 { 1323 1324 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1325 uap->dev)); 1326 } 1327 1328 int 1329 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1330 int dev) 1331 { 1332 1333 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1334 } 1335 1336 int 1337 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1338 int mode, int dev) 1339 { 1340 struct vnode *vp; 1341 struct mount *mp; 1342 struct vattr vattr; 1343 int error; 1344 int whiteout = 0; 1345 struct nameidata nd; 1346 int vfslocked; 1347 1348 AUDIT_ARG_MODE(mode); 1349 AUDIT_ARG_DEV(dev); 1350 switch (mode & S_IFMT) { 1351 case S_IFCHR: 1352 case S_IFBLK: 1353 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1354 break; 1355 case S_IFMT: 1356 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1357 break; 1358 case S_IFWHT: 1359 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1360 break; 1361 case S_IFIFO: 1362 if (dev == 0) 1363 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1364 /* FALLTHROUGH */ 1365 default: 1366 error = EINVAL; 1367 break; 1368 } 1369 if (error) 1370 return (error); 1371 restart: 1372 bwillwrite(); 1373 NDINIT_ATRIGHTS(&nd, CREATE, 1374 LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd, 1375 CAP_MKFIFO, td); 1376 if ((error = namei(&nd)) != 0) 1377 return (error); 1378 vfslocked = NDHASGIANT(&nd); 1379 vp = nd.ni_vp; 1380 if (vp != NULL) { 1381 NDFREE(&nd, NDF_ONLY_PNBUF); 1382 if (vp == nd.ni_dvp) 1383 vrele(nd.ni_dvp); 1384 else 1385 vput(nd.ni_dvp); 1386 vrele(vp); 1387 VFS_UNLOCK_GIANT(vfslocked); 1388 return (EEXIST); 1389 } else { 1390 VATTR_NULL(&vattr); 1391 vattr.va_mode = (mode & ALLPERMS) & 1392 ~td->td_proc->p_fd->fd_cmask; 1393 vattr.va_rdev = dev; 1394 whiteout = 0; 1395 1396 switch (mode & S_IFMT) { 1397 case S_IFMT: /* used by badsect to flag bad sectors */ 1398 vattr.va_type = VBAD; 1399 break; 1400 case S_IFCHR: 1401 vattr.va_type = VCHR; 1402 break; 1403 case S_IFBLK: 1404 vattr.va_type = VBLK; 1405 break; 1406 case S_IFWHT: 1407 whiteout = 1; 1408 break; 1409 default: 1410 panic("kern_mknod: invalid mode"); 1411 } 1412 } 1413 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1414 NDFREE(&nd, NDF_ONLY_PNBUF); 1415 vput(nd.ni_dvp); 1416 VFS_UNLOCK_GIANT(vfslocked); 1417 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1418 return (error); 1419 goto restart; 1420 } 1421 #ifdef MAC 1422 if (error == 0 && !whiteout) 1423 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1424 &nd.ni_cnd, &vattr); 1425 #endif 1426 if (!error) { 1427 if (whiteout) 1428 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1429 else { 1430 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1431 &nd.ni_cnd, &vattr); 1432 if (error == 0) 1433 vput(nd.ni_vp); 1434 } 1435 } 1436 NDFREE(&nd, NDF_ONLY_PNBUF); 1437 vput(nd.ni_dvp); 1438 vn_finished_write(mp); 1439 VFS_UNLOCK_GIANT(vfslocked); 1440 return (error); 1441 } 1442 1443 /* 1444 * Create a named pipe. 1445 */ 1446 #ifndef _SYS_SYSPROTO_H_ 1447 struct mkfifo_args { 1448 char *path; 1449 int mode; 1450 }; 1451 #endif 1452 int 1453 sys_mkfifo(td, uap) 1454 struct thread *td; 1455 register struct mkfifo_args /* { 1456 char *path; 1457 int mode; 1458 } */ *uap; 1459 { 1460 1461 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1462 } 1463 1464 #ifndef _SYS_SYSPROTO_H_ 1465 struct mkfifoat_args { 1466 int fd; 1467 char *path; 1468 mode_t mode; 1469 }; 1470 #endif 1471 int 1472 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1473 { 1474 1475 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1476 uap->mode)); 1477 } 1478 1479 int 1480 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1481 { 1482 1483 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1484 } 1485 1486 int 1487 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1488 int mode) 1489 { 1490 struct mount *mp; 1491 struct vattr vattr; 1492 int error; 1493 struct nameidata nd; 1494 int vfslocked; 1495 1496 AUDIT_ARG_MODE(mode); 1497 restart: 1498 bwillwrite(); 1499 NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, 1500 pathseg, path, fd, td); 1501 if ((error = namei(&nd)) != 0) 1502 return (error); 1503 vfslocked = NDHASGIANT(&nd); 1504 if (nd.ni_vp != NULL) { 1505 NDFREE(&nd, NDF_ONLY_PNBUF); 1506 if (nd.ni_vp == nd.ni_dvp) 1507 vrele(nd.ni_dvp); 1508 else 1509 vput(nd.ni_dvp); 1510 vrele(nd.ni_vp); 1511 VFS_UNLOCK_GIANT(vfslocked); 1512 return (EEXIST); 1513 } 1514 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1515 NDFREE(&nd, NDF_ONLY_PNBUF); 1516 vput(nd.ni_dvp); 1517 VFS_UNLOCK_GIANT(vfslocked); 1518 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1519 return (error); 1520 goto restart; 1521 } 1522 VATTR_NULL(&vattr); 1523 vattr.va_type = VFIFO; 1524 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1525 #ifdef MAC 1526 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1527 &vattr); 1528 if (error) 1529 goto out; 1530 #endif 1531 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1532 if (error == 0) 1533 vput(nd.ni_vp); 1534 #ifdef MAC 1535 out: 1536 #endif 1537 vput(nd.ni_dvp); 1538 vn_finished_write(mp); 1539 VFS_UNLOCK_GIANT(vfslocked); 1540 NDFREE(&nd, NDF_ONLY_PNBUF); 1541 return (error); 1542 } 1543 1544 /* 1545 * Make a hard file link. 1546 */ 1547 #ifndef _SYS_SYSPROTO_H_ 1548 struct link_args { 1549 char *path; 1550 char *link; 1551 }; 1552 #endif 1553 int 1554 sys_link(td, uap) 1555 struct thread *td; 1556 register struct link_args /* { 1557 char *path; 1558 char *link; 1559 } */ *uap; 1560 { 1561 1562 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1563 } 1564 1565 #ifndef _SYS_SYSPROTO_H_ 1566 struct linkat_args { 1567 int fd1; 1568 char *path1; 1569 int fd2; 1570 char *path2; 1571 int flag; 1572 }; 1573 #endif 1574 int 1575 sys_linkat(struct thread *td, struct linkat_args *uap) 1576 { 1577 int flag; 1578 1579 flag = uap->flag; 1580 if (flag & ~AT_SYMLINK_FOLLOW) 1581 return (EINVAL); 1582 1583 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1584 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1585 } 1586 1587 int hardlink_check_uid = 0; 1588 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1589 &hardlink_check_uid, 0, 1590 "Unprivileged processes cannot create hard links to files owned by other " 1591 "users"); 1592 static int hardlink_check_gid = 0; 1593 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1594 &hardlink_check_gid, 0, 1595 "Unprivileged processes cannot create hard links to files owned by other " 1596 "groups"); 1597 1598 static int 1599 can_hardlink(struct vnode *vp, struct ucred *cred) 1600 { 1601 struct vattr va; 1602 int error; 1603 1604 if (!hardlink_check_uid && !hardlink_check_gid) 1605 return (0); 1606 1607 error = VOP_GETATTR(vp, &va, cred); 1608 if (error != 0) 1609 return (error); 1610 1611 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1612 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1613 if (error) 1614 return (error); 1615 } 1616 1617 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1618 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1619 if (error) 1620 return (error); 1621 } 1622 1623 return (0); 1624 } 1625 1626 int 1627 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1628 { 1629 1630 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1631 } 1632 1633 int 1634 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1635 enum uio_seg segflg, int follow) 1636 { 1637 struct vnode *vp; 1638 struct mount *mp; 1639 struct nameidata nd; 1640 int vfslocked; 1641 int lvfslocked; 1642 int error; 1643 1644 bwillwrite(); 1645 NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1, 1646 fd1, td); 1647 1648 if ((error = namei(&nd)) != 0) 1649 return (error); 1650 vfslocked = NDHASGIANT(&nd); 1651 NDFREE(&nd, NDF_ONLY_PNBUF); 1652 vp = nd.ni_vp; 1653 if (vp->v_type == VDIR) { 1654 vrele(vp); 1655 VFS_UNLOCK_GIANT(vfslocked); 1656 return (EPERM); /* POSIX */ 1657 } 1658 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 1659 vrele(vp); 1660 VFS_UNLOCK_GIANT(vfslocked); 1661 return (error); 1662 } 1663 NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2, 1664 segflg, path2, fd2, td); 1665 if ((error = namei(&nd)) == 0) { 1666 lvfslocked = NDHASGIANT(&nd); 1667 if (nd.ni_vp != NULL) { 1668 if (nd.ni_dvp == nd.ni_vp) 1669 vrele(nd.ni_dvp); 1670 else 1671 vput(nd.ni_dvp); 1672 vrele(nd.ni_vp); 1673 error = EEXIST; 1674 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) 1675 == 0) { 1676 error = can_hardlink(vp, td->td_ucred); 1677 if (error == 0) 1678 #ifdef MAC 1679 error = mac_vnode_check_link(td->td_ucred, 1680 nd.ni_dvp, vp, &nd.ni_cnd); 1681 if (error == 0) 1682 #endif 1683 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1684 VOP_UNLOCK(vp, 0); 1685 vput(nd.ni_dvp); 1686 } 1687 NDFREE(&nd, NDF_ONLY_PNBUF); 1688 VFS_UNLOCK_GIANT(lvfslocked); 1689 } 1690 vrele(vp); 1691 vn_finished_write(mp); 1692 VFS_UNLOCK_GIANT(vfslocked); 1693 return (error); 1694 } 1695 1696 /* 1697 * Make a symbolic link. 1698 */ 1699 #ifndef _SYS_SYSPROTO_H_ 1700 struct symlink_args { 1701 char *path; 1702 char *link; 1703 }; 1704 #endif 1705 int 1706 sys_symlink(td, uap) 1707 struct thread *td; 1708 register struct symlink_args /* { 1709 char *path; 1710 char *link; 1711 } */ *uap; 1712 { 1713 1714 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1715 } 1716 1717 #ifndef _SYS_SYSPROTO_H_ 1718 struct symlinkat_args { 1719 char *path; 1720 int fd; 1721 char *path2; 1722 }; 1723 #endif 1724 int 1725 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1726 { 1727 1728 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1729 UIO_USERSPACE)); 1730 } 1731 1732 int 1733 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1734 { 1735 1736 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1737 } 1738 1739 int 1740 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1741 enum uio_seg segflg) 1742 { 1743 struct mount *mp; 1744 struct vattr vattr; 1745 char *syspath; 1746 int error; 1747 struct nameidata nd; 1748 int vfslocked; 1749 1750 if (segflg == UIO_SYSSPACE) { 1751 syspath = path1; 1752 } else { 1753 syspath = uma_zalloc(namei_zone, M_WAITOK); 1754 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1755 goto out; 1756 } 1757 AUDIT_ARG_TEXT(syspath); 1758 restart: 1759 bwillwrite(); 1760 NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, 1761 segflg, path2, fd, td); 1762 if ((error = namei(&nd)) != 0) 1763 goto out; 1764 vfslocked = NDHASGIANT(&nd); 1765 if (nd.ni_vp) { 1766 NDFREE(&nd, NDF_ONLY_PNBUF); 1767 if (nd.ni_vp == nd.ni_dvp) 1768 vrele(nd.ni_dvp); 1769 else 1770 vput(nd.ni_dvp); 1771 vrele(nd.ni_vp); 1772 VFS_UNLOCK_GIANT(vfslocked); 1773 error = EEXIST; 1774 goto out; 1775 } 1776 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1777 NDFREE(&nd, NDF_ONLY_PNBUF); 1778 vput(nd.ni_dvp); 1779 VFS_UNLOCK_GIANT(vfslocked); 1780 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1781 goto out; 1782 goto restart; 1783 } 1784 VATTR_NULL(&vattr); 1785 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1786 #ifdef MAC 1787 vattr.va_type = VLNK; 1788 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1789 &vattr); 1790 if (error) 1791 goto out2; 1792 #endif 1793 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1794 if (error == 0) 1795 vput(nd.ni_vp); 1796 #ifdef MAC 1797 out2: 1798 #endif 1799 NDFREE(&nd, NDF_ONLY_PNBUF); 1800 vput(nd.ni_dvp); 1801 vn_finished_write(mp); 1802 VFS_UNLOCK_GIANT(vfslocked); 1803 out: 1804 if (segflg != UIO_SYSSPACE) 1805 uma_zfree(namei_zone, syspath); 1806 return (error); 1807 } 1808 1809 /* 1810 * Delete a whiteout from the filesystem. 1811 */ 1812 int 1813 sys_undelete(td, uap) 1814 struct thread *td; 1815 register struct undelete_args /* { 1816 char *path; 1817 } */ *uap; 1818 { 1819 int error; 1820 struct mount *mp; 1821 struct nameidata nd; 1822 int vfslocked; 1823 1824 restart: 1825 bwillwrite(); 1826 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1, 1827 UIO_USERSPACE, uap->path, td); 1828 error = namei(&nd); 1829 if (error) 1830 return (error); 1831 vfslocked = NDHASGIANT(&nd); 1832 1833 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1834 NDFREE(&nd, NDF_ONLY_PNBUF); 1835 if (nd.ni_vp == nd.ni_dvp) 1836 vrele(nd.ni_dvp); 1837 else 1838 vput(nd.ni_dvp); 1839 if (nd.ni_vp) 1840 vrele(nd.ni_vp); 1841 VFS_UNLOCK_GIANT(vfslocked); 1842 return (EEXIST); 1843 } 1844 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1845 NDFREE(&nd, NDF_ONLY_PNBUF); 1846 vput(nd.ni_dvp); 1847 VFS_UNLOCK_GIANT(vfslocked); 1848 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1849 return (error); 1850 goto restart; 1851 } 1852 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1853 NDFREE(&nd, NDF_ONLY_PNBUF); 1854 vput(nd.ni_dvp); 1855 vn_finished_write(mp); 1856 VFS_UNLOCK_GIANT(vfslocked); 1857 return (error); 1858 } 1859 1860 /* 1861 * Delete a name from the filesystem. 1862 */ 1863 #ifndef _SYS_SYSPROTO_H_ 1864 struct unlink_args { 1865 char *path; 1866 }; 1867 #endif 1868 int 1869 sys_unlink(td, uap) 1870 struct thread *td; 1871 struct unlink_args /* { 1872 char *path; 1873 } */ *uap; 1874 { 1875 1876 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1877 } 1878 1879 #ifndef _SYS_SYSPROTO_H_ 1880 struct unlinkat_args { 1881 int fd; 1882 char *path; 1883 int flag; 1884 }; 1885 #endif 1886 int 1887 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1888 { 1889 int flag = uap->flag; 1890 int fd = uap->fd; 1891 char *path = uap->path; 1892 1893 if (flag & ~AT_REMOVEDIR) 1894 return (EINVAL); 1895 1896 if (flag & AT_REMOVEDIR) 1897 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1898 else 1899 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1900 } 1901 1902 int 1903 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1904 { 1905 1906 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1907 } 1908 1909 int 1910 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1911 ino_t oldinum) 1912 { 1913 struct mount *mp; 1914 struct vnode *vp; 1915 int error; 1916 struct nameidata nd; 1917 struct stat sb; 1918 int vfslocked; 1919 1920 restart: 1921 bwillwrite(); 1922 NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1, 1923 pathseg, path, fd, td); 1924 if ((error = namei(&nd)) != 0) 1925 return (error == EINVAL ? EPERM : error); 1926 vfslocked = NDHASGIANT(&nd); 1927 vp = nd.ni_vp; 1928 if (vp->v_type == VDIR && oldinum == 0) { 1929 error = EPERM; /* POSIX */ 1930 } else if (oldinum != 0 && 1931 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1932 sb.st_ino != oldinum) { 1933 error = EIDRM; /* Identifier removed */ 1934 } else { 1935 /* 1936 * The root of a mounted filesystem cannot be deleted. 1937 * 1938 * XXX: can this only be a VDIR case? 1939 */ 1940 if (vp->v_vflag & VV_ROOT) 1941 error = EBUSY; 1942 } 1943 if (error == 0) { 1944 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1945 NDFREE(&nd, NDF_ONLY_PNBUF); 1946 vput(nd.ni_dvp); 1947 if (vp == nd.ni_dvp) 1948 vrele(vp); 1949 else 1950 vput(vp); 1951 VFS_UNLOCK_GIANT(vfslocked); 1952 if ((error = vn_start_write(NULL, &mp, 1953 V_XSLEEP | PCATCH)) != 0) 1954 return (error); 1955 goto restart; 1956 } 1957 #ifdef MAC 1958 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1959 &nd.ni_cnd); 1960 if (error) 1961 goto out; 1962 #endif 1963 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1964 #ifdef MAC 1965 out: 1966 #endif 1967 vn_finished_write(mp); 1968 } 1969 NDFREE(&nd, NDF_ONLY_PNBUF); 1970 vput(nd.ni_dvp); 1971 if (vp == nd.ni_dvp) 1972 vrele(vp); 1973 else 1974 vput(vp); 1975 VFS_UNLOCK_GIANT(vfslocked); 1976 return (error); 1977 } 1978 1979 /* 1980 * Reposition read/write file offset. 1981 */ 1982 #ifndef _SYS_SYSPROTO_H_ 1983 struct lseek_args { 1984 int fd; 1985 int pad; 1986 off_t offset; 1987 int whence; 1988 }; 1989 #endif 1990 int 1991 sys_lseek(td, uap) 1992 struct thread *td; 1993 register struct lseek_args /* { 1994 int fd; 1995 int pad; 1996 off_t offset; 1997 int whence; 1998 } */ *uap; 1999 { 2000 struct ucred *cred = td->td_ucred; 2001 struct file *fp; 2002 struct vnode *vp; 2003 struct vattr vattr; 2004 off_t offset, size; 2005 int error, noneg; 2006 int vfslocked; 2007 2008 AUDIT_ARG_FD(uap->fd); 2009 if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0) 2010 return (error); 2011 if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) { 2012 fdrop(fp, td); 2013 return (ESPIPE); 2014 } 2015 vp = fp->f_vnode; 2016 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2017 noneg = (vp->v_type != VCHR); 2018 offset = uap->offset; 2019 switch (uap->whence) { 2020 case L_INCR: 2021 if (noneg && 2022 (fp->f_offset < 0 || 2023 (offset > 0 && fp->f_offset > OFF_MAX - offset))) { 2024 error = EOVERFLOW; 2025 break; 2026 } 2027 offset += fp->f_offset; 2028 break; 2029 case L_XTND: 2030 vn_lock(vp, LK_SHARED | LK_RETRY); 2031 error = VOP_GETATTR(vp, &vattr, cred); 2032 VOP_UNLOCK(vp, 0); 2033 if (error) 2034 break; 2035 2036 /* 2037 * If the file references a disk device, then fetch 2038 * the media size and use that to determine the ending 2039 * offset. 2040 */ 2041 if (vattr.va_size == 0 && vp->v_type == VCHR && 2042 fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0) 2043 vattr.va_size = size; 2044 if (noneg && 2045 (vattr.va_size > OFF_MAX || 2046 (offset > 0 && vattr.va_size > OFF_MAX - offset))) { 2047 error = EOVERFLOW; 2048 break; 2049 } 2050 offset += vattr.va_size; 2051 break; 2052 case L_SET: 2053 break; 2054 case SEEK_DATA: 2055 error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td); 2056 break; 2057 case SEEK_HOLE: 2058 error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td); 2059 break; 2060 default: 2061 error = EINVAL; 2062 } 2063 if (error == 0 && noneg && offset < 0) 2064 error = EINVAL; 2065 if (error != 0) 2066 goto drop; 2067 fp->f_offset = offset; 2068 VFS_KNOTE_UNLOCKED(vp, 0); 2069 *(off_t *)(td->td_retval) = fp->f_offset; 2070 drop: 2071 fdrop(fp, td); 2072 VFS_UNLOCK_GIANT(vfslocked); 2073 return (error); 2074 } 2075 2076 #if defined(COMPAT_43) 2077 /* 2078 * Reposition read/write file offset. 2079 */ 2080 #ifndef _SYS_SYSPROTO_H_ 2081 struct olseek_args { 2082 int fd; 2083 long offset; 2084 int whence; 2085 }; 2086 #endif 2087 int 2088 olseek(td, uap) 2089 struct thread *td; 2090 register struct olseek_args /* { 2091 int fd; 2092 long offset; 2093 int whence; 2094 } */ *uap; 2095 { 2096 struct lseek_args /* { 2097 int fd; 2098 int pad; 2099 off_t offset; 2100 int whence; 2101 } */ nuap; 2102 2103 nuap.fd = uap->fd; 2104 nuap.offset = uap->offset; 2105 nuap.whence = uap->whence; 2106 return (sys_lseek(td, &nuap)); 2107 } 2108 #endif /* COMPAT_43 */ 2109 2110 /* Version with the 'pad' argument */ 2111 int 2112 freebsd6_lseek(td, uap) 2113 struct thread *td; 2114 register struct freebsd6_lseek_args *uap; 2115 { 2116 struct lseek_args ouap; 2117 2118 ouap.fd = uap->fd; 2119 ouap.offset = uap->offset; 2120 ouap.whence = uap->whence; 2121 return (sys_lseek(td, &ouap)); 2122 } 2123 2124 /* 2125 * Check access permissions using passed credentials. 2126 */ 2127 static int 2128 vn_access(vp, user_flags, cred, td) 2129 struct vnode *vp; 2130 int user_flags; 2131 struct ucred *cred; 2132 struct thread *td; 2133 { 2134 int error; 2135 accmode_t accmode; 2136 2137 /* Flags == 0 means only check for existence. */ 2138 error = 0; 2139 if (user_flags) { 2140 accmode = 0; 2141 if (user_flags & R_OK) 2142 accmode |= VREAD; 2143 if (user_flags & W_OK) 2144 accmode |= VWRITE; 2145 if (user_flags & X_OK) 2146 accmode |= VEXEC; 2147 #ifdef MAC 2148 error = mac_vnode_check_access(cred, vp, accmode); 2149 if (error) 2150 return (error); 2151 #endif 2152 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2153 error = VOP_ACCESS(vp, accmode, cred, td); 2154 } 2155 return (error); 2156 } 2157 2158 /* 2159 * Check access permissions using "real" credentials. 2160 */ 2161 #ifndef _SYS_SYSPROTO_H_ 2162 struct access_args { 2163 char *path; 2164 int amode; 2165 }; 2166 #endif 2167 int 2168 sys_access(td, uap) 2169 struct thread *td; 2170 register struct access_args /* { 2171 char *path; 2172 int amode; 2173 } */ *uap; 2174 { 2175 2176 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2177 } 2178 2179 #ifndef _SYS_SYSPROTO_H_ 2180 struct faccessat_args { 2181 int dirfd; 2182 char *path; 2183 int amode; 2184 int flag; 2185 } 2186 #endif 2187 int 2188 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2189 { 2190 2191 if (uap->flag & ~AT_EACCESS) 2192 return (EINVAL); 2193 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2194 uap->amode)); 2195 } 2196 2197 int 2198 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2199 { 2200 2201 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2202 } 2203 2204 int 2205 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2206 int flag, int amode) 2207 { 2208 struct ucred *cred, *tmpcred; 2209 struct vnode *vp; 2210 struct nameidata nd; 2211 int vfslocked; 2212 int error; 2213 2214 /* 2215 * Create and modify a temporary credential instead of one that 2216 * is potentially shared. 2217 */ 2218 if (!(flag & AT_EACCESS)) { 2219 cred = td->td_ucred; 2220 tmpcred = crdup(cred); 2221 tmpcred->cr_uid = cred->cr_ruid; 2222 tmpcred->cr_groups[0] = cred->cr_rgid; 2223 td->td_ucred = tmpcred; 2224 } else 2225 cred = tmpcred = td->td_ucred; 2226 AUDIT_ARG_VALUE(amode); 2227 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | 2228 AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td); 2229 if ((error = namei(&nd)) != 0) 2230 goto out1; 2231 vfslocked = NDHASGIANT(&nd); 2232 vp = nd.ni_vp; 2233 2234 error = vn_access(vp, amode, tmpcred, td); 2235 NDFREE(&nd, NDF_ONLY_PNBUF); 2236 vput(vp); 2237 VFS_UNLOCK_GIANT(vfslocked); 2238 out1: 2239 if (!(flag & AT_EACCESS)) { 2240 td->td_ucred = cred; 2241 crfree(tmpcred); 2242 } 2243 return (error); 2244 } 2245 2246 /* 2247 * Check access permissions using "effective" credentials. 2248 */ 2249 #ifndef _SYS_SYSPROTO_H_ 2250 struct eaccess_args { 2251 char *path; 2252 int amode; 2253 }; 2254 #endif 2255 int 2256 sys_eaccess(td, uap) 2257 struct thread *td; 2258 register struct eaccess_args /* { 2259 char *path; 2260 int amode; 2261 } */ *uap; 2262 { 2263 2264 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2265 } 2266 2267 int 2268 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2269 { 2270 2271 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2272 } 2273 2274 #if defined(COMPAT_43) 2275 /* 2276 * Get file status; this version follows links. 2277 */ 2278 #ifndef _SYS_SYSPROTO_H_ 2279 struct ostat_args { 2280 char *path; 2281 struct ostat *ub; 2282 }; 2283 #endif 2284 int 2285 ostat(td, uap) 2286 struct thread *td; 2287 register struct ostat_args /* { 2288 char *path; 2289 struct ostat *ub; 2290 } */ *uap; 2291 { 2292 struct stat sb; 2293 struct ostat osb; 2294 int error; 2295 2296 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2297 if (error) 2298 return (error); 2299 cvtstat(&sb, &osb); 2300 error = copyout(&osb, uap->ub, sizeof (osb)); 2301 return (error); 2302 } 2303 2304 /* 2305 * Get file status; this version does not follow links. 2306 */ 2307 #ifndef _SYS_SYSPROTO_H_ 2308 struct olstat_args { 2309 char *path; 2310 struct ostat *ub; 2311 }; 2312 #endif 2313 int 2314 olstat(td, uap) 2315 struct thread *td; 2316 register struct olstat_args /* { 2317 char *path; 2318 struct ostat *ub; 2319 } */ *uap; 2320 { 2321 struct stat sb; 2322 struct ostat osb; 2323 int error; 2324 2325 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2326 if (error) 2327 return (error); 2328 cvtstat(&sb, &osb); 2329 error = copyout(&osb, uap->ub, sizeof (osb)); 2330 return (error); 2331 } 2332 2333 /* 2334 * Convert from an old to a new stat structure. 2335 */ 2336 void 2337 cvtstat(st, ost) 2338 struct stat *st; 2339 struct ostat *ost; 2340 { 2341 2342 ost->st_dev = st->st_dev; 2343 ost->st_ino = st->st_ino; 2344 ost->st_mode = st->st_mode; 2345 ost->st_nlink = st->st_nlink; 2346 ost->st_uid = st->st_uid; 2347 ost->st_gid = st->st_gid; 2348 ost->st_rdev = st->st_rdev; 2349 if (st->st_size < (quad_t)1 << 32) 2350 ost->st_size = st->st_size; 2351 else 2352 ost->st_size = -2; 2353 ost->st_atim = st->st_atim; 2354 ost->st_mtim = st->st_mtim; 2355 ost->st_ctim = st->st_ctim; 2356 ost->st_blksize = st->st_blksize; 2357 ost->st_blocks = st->st_blocks; 2358 ost->st_flags = st->st_flags; 2359 ost->st_gen = st->st_gen; 2360 } 2361 #endif /* COMPAT_43 */ 2362 2363 /* 2364 * Get file status; this version follows links. 2365 */ 2366 #ifndef _SYS_SYSPROTO_H_ 2367 struct stat_args { 2368 char *path; 2369 struct stat *ub; 2370 }; 2371 #endif 2372 int 2373 sys_stat(td, uap) 2374 struct thread *td; 2375 register struct stat_args /* { 2376 char *path; 2377 struct stat *ub; 2378 } */ *uap; 2379 { 2380 struct stat sb; 2381 int error; 2382 2383 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2384 if (error == 0) 2385 error = copyout(&sb, uap->ub, sizeof (sb)); 2386 return (error); 2387 } 2388 2389 #ifndef _SYS_SYSPROTO_H_ 2390 struct fstatat_args { 2391 int fd; 2392 char *path; 2393 struct stat *buf; 2394 int flag; 2395 } 2396 #endif 2397 int 2398 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2399 { 2400 struct stat sb; 2401 int error; 2402 2403 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2404 UIO_USERSPACE, &sb); 2405 if (error == 0) 2406 error = copyout(&sb, uap->buf, sizeof (sb)); 2407 return (error); 2408 } 2409 2410 int 2411 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2412 { 2413 2414 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2415 } 2416 2417 int 2418 kern_statat(struct thread *td, int flag, int fd, char *path, 2419 enum uio_seg pathseg, struct stat *sbp) 2420 { 2421 2422 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2423 } 2424 2425 int 2426 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2427 enum uio_seg pathseg, struct stat *sbp, 2428 void (*hook)(struct vnode *vp, struct stat *sbp)) 2429 { 2430 struct nameidata nd; 2431 struct stat sb; 2432 int error, vfslocked; 2433 2434 if (flag & ~AT_SYMLINK_NOFOLLOW) 2435 return (EINVAL); 2436 2437 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2438 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg, 2439 path, fd, CAP_FSTAT, td); 2440 2441 if ((error = namei(&nd)) != 0) 2442 return (error); 2443 vfslocked = NDHASGIANT(&nd); 2444 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2445 if (!error) { 2446 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2447 if (S_ISREG(sb.st_mode)) 2448 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2449 if (__predict_false(hook != NULL)) 2450 hook(nd.ni_vp, &sb); 2451 } 2452 NDFREE(&nd, NDF_ONLY_PNBUF); 2453 vput(nd.ni_vp); 2454 VFS_UNLOCK_GIANT(vfslocked); 2455 if (error) 2456 return (error); 2457 *sbp = sb; 2458 #ifdef KTRACE 2459 if (KTRPOINT(td, KTR_STRUCT)) 2460 ktrstat(&sb); 2461 #endif 2462 return (0); 2463 } 2464 2465 /* 2466 * Get file status; this version does not follow links. 2467 */ 2468 #ifndef _SYS_SYSPROTO_H_ 2469 struct lstat_args { 2470 char *path; 2471 struct stat *ub; 2472 }; 2473 #endif 2474 int 2475 sys_lstat(td, uap) 2476 struct thread *td; 2477 register struct lstat_args /* { 2478 char *path; 2479 struct stat *ub; 2480 } */ *uap; 2481 { 2482 struct stat sb; 2483 int error; 2484 2485 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2486 if (error == 0) 2487 error = copyout(&sb, uap->ub, sizeof (sb)); 2488 return (error); 2489 } 2490 2491 int 2492 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2493 { 2494 2495 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2496 sbp)); 2497 } 2498 2499 /* 2500 * Implementation of the NetBSD [l]stat() functions. 2501 */ 2502 void 2503 cvtnstat(sb, nsb) 2504 struct stat *sb; 2505 struct nstat *nsb; 2506 { 2507 bzero(nsb, sizeof *nsb); 2508 nsb->st_dev = sb->st_dev; 2509 nsb->st_ino = sb->st_ino; 2510 nsb->st_mode = sb->st_mode; 2511 nsb->st_nlink = sb->st_nlink; 2512 nsb->st_uid = sb->st_uid; 2513 nsb->st_gid = sb->st_gid; 2514 nsb->st_rdev = sb->st_rdev; 2515 nsb->st_atim = sb->st_atim; 2516 nsb->st_mtim = sb->st_mtim; 2517 nsb->st_ctim = sb->st_ctim; 2518 nsb->st_size = sb->st_size; 2519 nsb->st_blocks = sb->st_blocks; 2520 nsb->st_blksize = sb->st_blksize; 2521 nsb->st_flags = sb->st_flags; 2522 nsb->st_gen = sb->st_gen; 2523 nsb->st_birthtim = sb->st_birthtim; 2524 } 2525 2526 #ifndef _SYS_SYSPROTO_H_ 2527 struct nstat_args { 2528 char *path; 2529 struct nstat *ub; 2530 }; 2531 #endif 2532 int 2533 sys_nstat(td, uap) 2534 struct thread *td; 2535 register struct nstat_args /* { 2536 char *path; 2537 struct nstat *ub; 2538 } */ *uap; 2539 { 2540 struct stat sb; 2541 struct nstat nsb; 2542 int error; 2543 2544 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2545 if (error) 2546 return (error); 2547 cvtnstat(&sb, &nsb); 2548 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2549 return (error); 2550 } 2551 2552 /* 2553 * NetBSD lstat. Get file status; this version does not follow links. 2554 */ 2555 #ifndef _SYS_SYSPROTO_H_ 2556 struct lstat_args { 2557 char *path; 2558 struct stat *ub; 2559 }; 2560 #endif 2561 int 2562 sys_nlstat(td, uap) 2563 struct thread *td; 2564 register struct nlstat_args /* { 2565 char *path; 2566 struct nstat *ub; 2567 } */ *uap; 2568 { 2569 struct stat sb; 2570 struct nstat nsb; 2571 int error; 2572 2573 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2574 if (error) 2575 return (error); 2576 cvtnstat(&sb, &nsb); 2577 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2578 return (error); 2579 } 2580 2581 /* 2582 * Get configurable pathname variables. 2583 */ 2584 #ifndef _SYS_SYSPROTO_H_ 2585 struct pathconf_args { 2586 char *path; 2587 int name; 2588 }; 2589 #endif 2590 int 2591 sys_pathconf(td, uap) 2592 struct thread *td; 2593 register struct pathconf_args /* { 2594 char *path; 2595 int name; 2596 } */ *uap; 2597 { 2598 2599 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2600 } 2601 2602 #ifndef _SYS_SYSPROTO_H_ 2603 struct lpathconf_args { 2604 char *path; 2605 int name; 2606 }; 2607 #endif 2608 int 2609 sys_lpathconf(td, uap) 2610 struct thread *td; 2611 register struct lpathconf_args /* { 2612 char *path; 2613 int name; 2614 } */ *uap; 2615 { 2616 2617 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW)); 2618 } 2619 2620 int 2621 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2622 u_long flags) 2623 { 2624 struct nameidata nd; 2625 int error, vfslocked; 2626 2627 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1 | 2628 flags, pathseg, path, td); 2629 if ((error = namei(&nd)) != 0) 2630 return (error); 2631 vfslocked = NDHASGIANT(&nd); 2632 NDFREE(&nd, NDF_ONLY_PNBUF); 2633 2634 /* If asynchronous I/O is available, it works for all files. */ 2635 if (name == _PC_ASYNC_IO) 2636 td->td_retval[0] = async_io_version; 2637 else 2638 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2639 vput(nd.ni_vp); 2640 VFS_UNLOCK_GIANT(vfslocked); 2641 return (error); 2642 } 2643 2644 /* 2645 * Return target name of a symbolic link. 2646 */ 2647 #ifndef _SYS_SYSPROTO_H_ 2648 struct readlink_args { 2649 char *path; 2650 char *buf; 2651 size_t count; 2652 }; 2653 #endif 2654 int 2655 sys_readlink(td, uap) 2656 struct thread *td; 2657 register struct readlink_args /* { 2658 char *path; 2659 char *buf; 2660 size_t count; 2661 } */ *uap; 2662 { 2663 2664 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2665 UIO_USERSPACE, uap->count)); 2666 } 2667 #ifndef _SYS_SYSPROTO_H_ 2668 struct readlinkat_args { 2669 int fd; 2670 char *path; 2671 char *buf; 2672 size_t bufsize; 2673 }; 2674 #endif 2675 int 2676 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2677 { 2678 2679 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2680 uap->buf, UIO_USERSPACE, uap->bufsize)); 2681 } 2682 2683 int 2684 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2685 enum uio_seg bufseg, size_t count) 2686 { 2687 2688 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2689 count)); 2690 } 2691 2692 int 2693 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2694 char *buf, enum uio_seg bufseg, size_t count) 2695 { 2696 struct vnode *vp; 2697 struct iovec aiov; 2698 struct uio auio; 2699 int error; 2700 struct nameidata nd; 2701 int vfslocked; 2702 2703 if (count > INT_MAX) 2704 return (EINVAL); 2705 2706 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | 2707 AUDITVNODE1, pathseg, path, fd, td); 2708 2709 if ((error = namei(&nd)) != 0) 2710 return (error); 2711 NDFREE(&nd, NDF_ONLY_PNBUF); 2712 vfslocked = NDHASGIANT(&nd); 2713 vp = nd.ni_vp; 2714 #ifdef MAC 2715 error = mac_vnode_check_readlink(td->td_ucred, vp); 2716 if (error) { 2717 vput(vp); 2718 VFS_UNLOCK_GIANT(vfslocked); 2719 return (error); 2720 } 2721 #endif 2722 if (vp->v_type != VLNK) 2723 error = EINVAL; 2724 else { 2725 aiov.iov_base = buf; 2726 aiov.iov_len = count; 2727 auio.uio_iov = &aiov; 2728 auio.uio_iovcnt = 1; 2729 auio.uio_offset = 0; 2730 auio.uio_rw = UIO_READ; 2731 auio.uio_segflg = bufseg; 2732 auio.uio_td = td; 2733 auio.uio_resid = count; 2734 error = VOP_READLINK(vp, &auio, td->td_ucred); 2735 } 2736 vput(vp); 2737 VFS_UNLOCK_GIANT(vfslocked); 2738 td->td_retval[0] = count - auio.uio_resid; 2739 return (error); 2740 } 2741 2742 /* 2743 * Common implementation code for chflags() and fchflags(). 2744 */ 2745 static int 2746 setfflags(td, vp, flags) 2747 struct thread *td; 2748 struct vnode *vp; 2749 int flags; 2750 { 2751 int error; 2752 struct mount *mp; 2753 struct vattr vattr; 2754 2755 /* 2756 * Prevent non-root users from setting flags on devices. When 2757 * a device is reused, users can retain ownership of the device 2758 * if they are allowed to set flags and programs assume that 2759 * chown can't fail when done as root. 2760 */ 2761 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2762 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2763 if (error) 2764 return (error); 2765 } 2766 2767 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2768 return (error); 2769 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2770 VATTR_NULL(&vattr); 2771 vattr.va_flags = flags; 2772 #ifdef MAC 2773 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2774 if (error == 0) 2775 #endif 2776 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2777 VOP_UNLOCK(vp, 0); 2778 vn_finished_write(mp); 2779 return (error); 2780 } 2781 2782 /* 2783 * Change flags of a file given a path name. 2784 */ 2785 #ifndef _SYS_SYSPROTO_H_ 2786 struct chflags_args { 2787 char *path; 2788 int flags; 2789 }; 2790 #endif 2791 int 2792 sys_chflags(td, uap) 2793 struct thread *td; 2794 register struct chflags_args /* { 2795 char *path; 2796 int flags; 2797 } */ *uap; 2798 { 2799 int error; 2800 struct nameidata nd; 2801 int vfslocked; 2802 2803 AUDIT_ARG_FFLAGS(uap->flags); 2804 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE, 2805 uap->path, td); 2806 if ((error = namei(&nd)) != 0) 2807 return (error); 2808 NDFREE(&nd, NDF_ONLY_PNBUF); 2809 vfslocked = NDHASGIANT(&nd); 2810 error = setfflags(td, nd.ni_vp, uap->flags); 2811 vrele(nd.ni_vp); 2812 VFS_UNLOCK_GIANT(vfslocked); 2813 return (error); 2814 } 2815 2816 /* 2817 * Same as chflags() but doesn't follow symlinks. 2818 */ 2819 int 2820 sys_lchflags(td, uap) 2821 struct thread *td; 2822 register struct lchflags_args /* { 2823 char *path; 2824 int flags; 2825 } */ *uap; 2826 { 2827 int error; 2828 struct nameidata nd; 2829 int vfslocked; 2830 2831 AUDIT_ARG_FFLAGS(uap->flags); 2832 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE, 2833 uap->path, td); 2834 if ((error = namei(&nd)) != 0) 2835 return (error); 2836 vfslocked = NDHASGIANT(&nd); 2837 NDFREE(&nd, NDF_ONLY_PNBUF); 2838 error = setfflags(td, nd.ni_vp, uap->flags); 2839 vrele(nd.ni_vp); 2840 VFS_UNLOCK_GIANT(vfslocked); 2841 return (error); 2842 } 2843 2844 /* 2845 * Change flags of a file given a file descriptor. 2846 */ 2847 #ifndef _SYS_SYSPROTO_H_ 2848 struct fchflags_args { 2849 int fd; 2850 int flags; 2851 }; 2852 #endif 2853 int 2854 sys_fchflags(td, uap) 2855 struct thread *td; 2856 register struct fchflags_args /* { 2857 int fd; 2858 int flags; 2859 } */ *uap; 2860 { 2861 struct file *fp; 2862 int vfslocked; 2863 int error; 2864 2865 AUDIT_ARG_FD(uap->fd); 2866 AUDIT_ARG_FFLAGS(uap->flags); 2867 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS, 2868 &fp)) != 0) 2869 return (error); 2870 vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); 2871 #ifdef AUDIT 2872 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2873 AUDIT_ARG_VNODE1(fp->f_vnode); 2874 VOP_UNLOCK(fp->f_vnode, 0); 2875 #endif 2876 error = setfflags(td, fp->f_vnode, uap->flags); 2877 VFS_UNLOCK_GIANT(vfslocked); 2878 fdrop(fp, td); 2879 return (error); 2880 } 2881 2882 /* 2883 * Common implementation code for chmod(), lchmod() and fchmod(). 2884 */ 2885 int 2886 setfmode(td, cred, vp, mode) 2887 struct thread *td; 2888 struct ucred *cred; 2889 struct vnode *vp; 2890 int mode; 2891 { 2892 int error; 2893 struct mount *mp; 2894 struct vattr vattr; 2895 2896 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2897 return (error); 2898 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2899 VATTR_NULL(&vattr); 2900 vattr.va_mode = mode & ALLPERMS; 2901 #ifdef MAC 2902 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2903 if (error == 0) 2904 #endif 2905 error = VOP_SETATTR(vp, &vattr, cred); 2906 VOP_UNLOCK(vp, 0); 2907 vn_finished_write(mp); 2908 return (error); 2909 } 2910 2911 /* 2912 * Change mode of a file given path name. 2913 */ 2914 #ifndef _SYS_SYSPROTO_H_ 2915 struct chmod_args { 2916 char *path; 2917 int mode; 2918 }; 2919 #endif 2920 int 2921 sys_chmod(td, uap) 2922 struct thread *td; 2923 register struct chmod_args /* { 2924 char *path; 2925 int mode; 2926 } */ *uap; 2927 { 2928 2929 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2930 } 2931 2932 #ifndef _SYS_SYSPROTO_H_ 2933 struct fchmodat_args { 2934 int dirfd; 2935 char *path; 2936 mode_t mode; 2937 int flag; 2938 } 2939 #endif 2940 int 2941 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2942 { 2943 int flag = uap->flag; 2944 int fd = uap->fd; 2945 char *path = uap->path; 2946 mode_t mode = uap->mode; 2947 2948 if (flag & ~AT_SYMLINK_NOFOLLOW) 2949 return (EINVAL); 2950 2951 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2952 } 2953 2954 int 2955 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2956 { 2957 2958 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2959 } 2960 2961 /* 2962 * Change mode of a file given path name (don't follow links.) 2963 */ 2964 #ifndef _SYS_SYSPROTO_H_ 2965 struct lchmod_args { 2966 char *path; 2967 int mode; 2968 }; 2969 #endif 2970 int 2971 sys_lchmod(td, uap) 2972 struct thread *td; 2973 register struct lchmod_args /* { 2974 char *path; 2975 int mode; 2976 } */ *uap; 2977 { 2978 2979 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2980 uap->mode, AT_SYMLINK_NOFOLLOW)); 2981 } 2982 2983 2984 int 2985 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2986 mode_t mode, int flag) 2987 { 2988 int error; 2989 struct nameidata nd; 2990 int vfslocked; 2991 int follow; 2992 2993 AUDIT_ARG_MODE(mode); 2994 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2995 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, 2996 path, fd, CAP_FCHMOD, td); 2997 if ((error = namei(&nd)) != 0) 2998 return (error); 2999 vfslocked = NDHASGIANT(&nd); 3000 NDFREE(&nd, NDF_ONLY_PNBUF); 3001 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 3002 vrele(nd.ni_vp); 3003 VFS_UNLOCK_GIANT(vfslocked); 3004 return (error); 3005 } 3006 3007 /* 3008 * Change mode of a file given a file descriptor. 3009 */ 3010 #ifndef _SYS_SYSPROTO_H_ 3011 struct fchmod_args { 3012 int fd; 3013 int mode; 3014 }; 3015 #endif 3016 int 3017 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3018 { 3019 struct file *fp; 3020 int error; 3021 3022 AUDIT_ARG_FD(uap->fd); 3023 AUDIT_ARG_MODE(uap->mode); 3024 3025 error = fget(td, uap->fd, CAP_FCHMOD, &fp); 3026 if (error != 0) 3027 return (error); 3028 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3029 fdrop(fp, td); 3030 return (error); 3031 } 3032 3033 /* 3034 * Common implementation for chown(), lchown(), and fchown() 3035 */ 3036 int 3037 setfown(td, cred, vp, uid, gid) 3038 struct thread *td; 3039 struct ucred *cred; 3040 struct vnode *vp; 3041 uid_t uid; 3042 gid_t gid; 3043 { 3044 int error; 3045 struct mount *mp; 3046 struct vattr vattr; 3047 3048 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3049 return (error); 3050 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3051 VATTR_NULL(&vattr); 3052 vattr.va_uid = uid; 3053 vattr.va_gid = gid; 3054 #ifdef MAC 3055 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3056 vattr.va_gid); 3057 if (error == 0) 3058 #endif 3059 error = VOP_SETATTR(vp, &vattr, cred); 3060 VOP_UNLOCK(vp, 0); 3061 vn_finished_write(mp); 3062 return (error); 3063 } 3064 3065 /* 3066 * Set ownership given a path name. 3067 */ 3068 #ifndef _SYS_SYSPROTO_H_ 3069 struct chown_args { 3070 char *path; 3071 int uid; 3072 int gid; 3073 }; 3074 #endif 3075 int 3076 sys_chown(td, uap) 3077 struct thread *td; 3078 register struct chown_args /* { 3079 char *path; 3080 int uid; 3081 int gid; 3082 } */ *uap; 3083 { 3084 3085 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3086 } 3087 3088 #ifndef _SYS_SYSPROTO_H_ 3089 struct fchownat_args { 3090 int fd; 3091 const char * path; 3092 uid_t uid; 3093 gid_t gid; 3094 int flag; 3095 }; 3096 #endif 3097 int 3098 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3099 { 3100 int flag; 3101 3102 flag = uap->flag; 3103 if (flag & ~AT_SYMLINK_NOFOLLOW) 3104 return (EINVAL); 3105 3106 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3107 uap->gid, uap->flag)); 3108 } 3109 3110 int 3111 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3112 int gid) 3113 { 3114 3115 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 3116 } 3117 3118 int 3119 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3120 int uid, int gid, int flag) 3121 { 3122 struct nameidata nd; 3123 int error, vfslocked, follow; 3124 3125 AUDIT_ARG_OWNER(uid, gid); 3126 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3127 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, 3128 path, fd, CAP_FCHOWN, td); 3129 3130 if ((error = namei(&nd)) != 0) 3131 return (error); 3132 vfslocked = NDHASGIANT(&nd); 3133 NDFREE(&nd, NDF_ONLY_PNBUF); 3134 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3135 vrele(nd.ni_vp); 3136 VFS_UNLOCK_GIANT(vfslocked); 3137 return (error); 3138 } 3139 3140 /* 3141 * Set ownership given a path name, do not cross symlinks. 3142 */ 3143 #ifndef _SYS_SYSPROTO_H_ 3144 struct lchown_args { 3145 char *path; 3146 int uid; 3147 int gid; 3148 }; 3149 #endif 3150 int 3151 sys_lchown(td, uap) 3152 struct thread *td; 3153 register struct lchown_args /* { 3154 char *path; 3155 int uid; 3156 int gid; 3157 } */ *uap; 3158 { 3159 3160 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3161 } 3162 3163 int 3164 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3165 int gid) 3166 { 3167 3168 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3169 AT_SYMLINK_NOFOLLOW)); 3170 } 3171 3172 /* 3173 * Set ownership given a file descriptor. 3174 */ 3175 #ifndef _SYS_SYSPROTO_H_ 3176 struct fchown_args { 3177 int fd; 3178 int uid; 3179 int gid; 3180 }; 3181 #endif 3182 int 3183 sys_fchown(td, uap) 3184 struct thread *td; 3185 register struct fchown_args /* { 3186 int fd; 3187 int uid; 3188 int gid; 3189 } */ *uap; 3190 { 3191 struct file *fp; 3192 int error; 3193 3194 AUDIT_ARG_FD(uap->fd); 3195 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3196 error = fget(td, uap->fd, CAP_FCHOWN, &fp); 3197 if (error != 0) 3198 return (error); 3199 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3200 fdrop(fp, td); 3201 return (error); 3202 } 3203 3204 /* 3205 * Common implementation code for utimes(), lutimes(), and futimes(). 3206 */ 3207 static int 3208 getutimes(usrtvp, tvpseg, tsp) 3209 const struct timeval *usrtvp; 3210 enum uio_seg tvpseg; 3211 struct timespec *tsp; 3212 { 3213 struct timeval tv[2]; 3214 const struct timeval *tvp; 3215 int error; 3216 3217 if (usrtvp == NULL) { 3218 vfs_timestamp(&tsp[0]); 3219 tsp[1] = tsp[0]; 3220 } else { 3221 if (tvpseg == UIO_SYSSPACE) { 3222 tvp = usrtvp; 3223 } else { 3224 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3225 return (error); 3226 tvp = tv; 3227 } 3228 3229 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3230 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3231 return (EINVAL); 3232 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3233 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3234 } 3235 return (0); 3236 } 3237 3238 /* 3239 * Common implementation code for utimes(), lutimes(), and futimes(). 3240 */ 3241 static int 3242 setutimes(td, vp, ts, numtimes, nullflag) 3243 struct thread *td; 3244 struct vnode *vp; 3245 const struct timespec *ts; 3246 int numtimes; 3247 int nullflag; 3248 { 3249 int error, setbirthtime; 3250 struct mount *mp; 3251 struct vattr vattr; 3252 3253 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3254 return (error); 3255 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3256 setbirthtime = 0; 3257 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3258 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3259 setbirthtime = 1; 3260 VATTR_NULL(&vattr); 3261 vattr.va_atime = ts[0]; 3262 vattr.va_mtime = ts[1]; 3263 if (setbirthtime) 3264 vattr.va_birthtime = ts[1]; 3265 if (numtimes > 2) 3266 vattr.va_birthtime = ts[2]; 3267 if (nullflag) 3268 vattr.va_vaflags |= VA_UTIMES_NULL; 3269 #ifdef MAC 3270 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3271 vattr.va_mtime); 3272 #endif 3273 if (error == 0) 3274 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3275 VOP_UNLOCK(vp, 0); 3276 vn_finished_write(mp); 3277 return (error); 3278 } 3279 3280 /* 3281 * Set the access and modification times of a file. 3282 */ 3283 #ifndef _SYS_SYSPROTO_H_ 3284 struct utimes_args { 3285 char *path; 3286 struct timeval *tptr; 3287 }; 3288 #endif 3289 int 3290 sys_utimes(td, uap) 3291 struct thread *td; 3292 register struct utimes_args /* { 3293 char *path; 3294 struct timeval *tptr; 3295 } */ *uap; 3296 { 3297 3298 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3299 UIO_USERSPACE)); 3300 } 3301 3302 #ifndef _SYS_SYSPROTO_H_ 3303 struct futimesat_args { 3304 int fd; 3305 const char * path; 3306 const struct timeval * times; 3307 }; 3308 #endif 3309 int 3310 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3311 { 3312 3313 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3314 uap->times, UIO_USERSPACE)); 3315 } 3316 3317 int 3318 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3319 struct timeval *tptr, enum uio_seg tptrseg) 3320 { 3321 3322 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3323 } 3324 3325 int 3326 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3327 struct timeval *tptr, enum uio_seg tptrseg) 3328 { 3329 struct nameidata nd; 3330 struct timespec ts[2]; 3331 int error, vfslocked; 3332 3333 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3334 return (error); 3335 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, 3336 path, fd, CAP_FUTIMES, td); 3337 3338 if ((error = namei(&nd)) != 0) 3339 return (error); 3340 vfslocked = NDHASGIANT(&nd); 3341 NDFREE(&nd, NDF_ONLY_PNBUF); 3342 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3343 vrele(nd.ni_vp); 3344 VFS_UNLOCK_GIANT(vfslocked); 3345 return (error); 3346 } 3347 3348 /* 3349 * Set the access and modification times of a file. 3350 */ 3351 #ifndef _SYS_SYSPROTO_H_ 3352 struct lutimes_args { 3353 char *path; 3354 struct timeval *tptr; 3355 }; 3356 #endif 3357 int 3358 sys_lutimes(td, uap) 3359 struct thread *td; 3360 register struct lutimes_args /* { 3361 char *path; 3362 struct timeval *tptr; 3363 } */ *uap; 3364 { 3365 3366 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3367 UIO_USERSPACE)); 3368 } 3369 3370 int 3371 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3372 struct timeval *tptr, enum uio_seg tptrseg) 3373 { 3374 struct timespec ts[2]; 3375 int error; 3376 struct nameidata nd; 3377 int vfslocked; 3378 3379 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3380 return (error); 3381 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td); 3382 if ((error = namei(&nd)) != 0) 3383 return (error); 3384 vfslocked = NDHASGIANT(&nd); 3385 NDFREE(&nd, NDF_ONLY_PNBUF); 3386 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3387 vrele(nd.ni_vp); 3388 VFS_UNLOCK_GIANT(vfslocked); 3389 return (error); 3390 } 3391 3392 /* 3393 * Set the access and modification times of a file. 3394 */ 3395 #ifndef _SYS_SYSPROTO_H_ 3396 struct futimes_args { 3397 int fd; 3398 struct timeval *tptr; 3399 }; 3400 #endif 3401 int 3402 sys_futimes(td, uap) 3403 struct thread *td; 3404 register struct futimes_args /* { 3405 int fd; 3406 struct timeval *tptr; 3407 } */ *uap; 3408 { 3409 3410 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3411 } 3412 3413 int 3414 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3415 enum uio_seg tptrseg) 3416 { 3417 struct timespec ts[2]; 3418 struct file *fp; 3419 int vfslocked; 3420 int error; 3421 3422 AUDIT_ARG_FD(fd); 3423 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3424 return (error); 3425 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp)) 3426 != 0) 3427 return (error); 3428 vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); 3429 #ifdef AUDIT 3430 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3431 AUDIT_ARG_VNODE1(fp->f_vnode); 3432 VOP_UNLOCK(fp->f_vnode, 0); 3433 #endif 3434 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3435 VFS_UNLOCK_GIANT(vfslocked); 3436 fdrop(fp, td); 3437 return (error); 3438 } 3439 3440 /* 3441 * Truncate a file given its path name. 3442 */ 3443 #ifndef _SYS_SYSPROTO_H_ 3444 struct truncate_args { 3445 char *path; 3446 int pad; 3447 off_t length; 3448 }; 3449 #endif 3450 int 3451 sys_truncate(td, uap) 3452 struct thread *td; 3453 register struct truncate_args /* { 3454 char *path; 3455 int pad; 3456 off_t length; 3457 } */ *uap; 3458 { 3459 3460 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3461 } 3462 3463 int 3464 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3465 { 3466 struct mount *mp; 3467 struct vnode *vp; 3468 struct vattr vattr; 3469 int error; 3470 struct nameidata nd; 3471 int vfslocked; 3472 3473 if (length < 0) 3474 return(EINVAL); 3475 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td); 3476 if ((error = namei(&nd)) != 0) 3477 return (error); 3478 vfslocked = NDHASGIANT(&nd); 3479 vp = nd.ni_vp; 3480 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3481 vrele(vp); 3482 VFS_UNLOCK_GIANT(vfslocked); 3483 return (error); 3484 } 3485 NDFREE(&nd, NDF_ONLY_PNBUF); 3486 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3487 if (vp->v_type == VDIR) 3488 error = EISDIR; 3489 #ifdef MAC 3490 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3491 } 3492 #endif 3493 else if ((error = vn_writechk(vp)) == 0 && 3494 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3495 VATTR_NULL(&vattr); 3496 vattr.va_size = length; 3497 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3498 } 3499 vput(vp); 3500 vn_finished_write(mp); 3501 VFS_UNLOCK_GIANT(vfslocked); 3502 return (error); 3503 } 3504 3505 #if defined(COMPAT_43) 3506 /* 3507 * Truncate a file given its path name. 3508 */ 3509 #ifndef _SYS_SYSPROTO_H_ 3510 struct otruncate_args { 3511 char *path; 3512 long length; 3513 }; 3514 #endif 3515 int 3516 otruncate(td, uap) 3517 struct thread *td; 3518 register struct otruncate_args /* { 3519 char *path; 3520 long length; 3521 } */ *uap; 3522 { 3523 struct truncate_args /* { 3524 char *path; 3525 int pad; 3526 off_t length; 3527 } */ nuap; 3528 3529 nuap.path = uap->path; 3530 nuap.length = uap->length; 3531 return (sys_truncate(td, &nuap)); 3532 } 3533 #endif /* COMPAT_43 */ 3534 3535 /* Versions with the pad argument */ 3536 int 3537 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3538 { 3539 struct truncate_args ouap; 3540 3541 ouap.path = uap->path; 3542 ouap.length = uap->length; 3543 return (sys_truncate(td, &ouap)); 3544 } 3545 3546 int 3547 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3548 { 3549 struct ftruncate_args ouap; 3550 3551 ouap.fd = uap->fd; 3552 ouap.length = uap->length; 3553 return (sys_ftruncate(td, &ouap)); 3554 } 3555 3556 /* 3557 * Sync an open file. 3558 */ 3559 #ifndef _SYS_SYSPROTO_H_ 3560 struct fsync_args { 3561 int fd; 3562 }; 3563 #endif 3564 int 3565 sys_fsync(td, uap) 3566 struct thread *td; 3567 struct fsync_args /* { 3568 int fd; 3569 } */ *uap; 3570 { 3571 struct vnode *vp; 3572 struct mount *mp; 3573 struct file *fp; 3574 int vfslocked; 3575 int error, lock_flags; 3576 3577 AUDIT_ARG_FD(uap->fd); 3578 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC, 3579 &fp)) != 0) 3580 return (error); 3581 vp = fp->f_vnode; 3582 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 3583 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3584 goto drop; 3585 if (MNT_SHARED_WRITES(mp) || 3586 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3587 lock_flags = LK_SHARED; 3588 } else { 3589 lock_flags = LK_EXCLUSIVE; 3590 } 3591 vn_lock(vp, lock_flags | LK_RETRY); 3592 AUDIT_ARG_VNODE1(vp); 3593 if (vp->v_object != NULL) { 3594 VM_OBJECT_LOCK(vp->v_object); 3595 vm_object_page_clean(vp->v_object, 0, 0, 0); 3596 VM_OBJECT_UNLOCK(vp->v_object); 3597 } 3598 error = VOP_FSYNC(vp, MNT_WAIT, td); 3599 3600 VOP_UNLOCK(vp, 0); 3601 vn_finished_write(mp); 3602 drop: 3603 VFS_UNLOCK_GIANT(vfslocked); 3604 fdrop(fp, td); 3605 return (error); 3606 } 3607 3608 /* 3609 * Rename files. Source and destination must either both be directories, or 3610 * both not be directories. If target is a directory, it must be empty. 3611 */ 3612 #ifndef _SYS_SYSPROTO_H_ 3613 struct rename_args { 3614 char *from; 3615 char *to; 3616 }; 3617 #endif 3618 int 3619 sys_rename(td, uap) 3620 struct thread *td; 3621 register struct rename_args /* { 3622 char *from; 3623 char *to; 3624 } */ *uap; 3625 { 3626 3627 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3628 } 3629 3630 #ifndef _SYS_SYSPROTO_H_ 3631 struct renameat_args { 3632 int oldfd; 3633 char *old; 3634 int newfd; 3635 char *new; 3636 }; 3637 #endif 3638 int 3639 sys_renameat(struct thread *td, struct renameat_args *uap) 3640 { 3641 3642 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3643 UIO_USERSPACE)); 3644 } 3645 3646 int 3647 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3648 { 3649 3650 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3651 } 3652 3653 int 3654 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3655 enum uio_seg pathseg) 3656 { 3657 struct mount *mp = NULL; 3658 struct vnode *tvp, *fvp, *tdvp; 3659 struct nameidata fromnd, tond; 3660 int tvfslocked; 3661 int fvfslocked; 3662 int error; 3663 3664 bwillwrite(); 3665 #ifdef MAC 3666 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3667 MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); 3668 #else 3669 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE | 3670 AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); 3671 #endif 3672 3673 if ((error = namei(&fromnd)) != 0) 3674 return (error); 3675 fvfslocked = NDHASGIANT(&fromnd); 3676 tvfslocked = 0; 3677 #ifdef MAC 3678 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3679 fromnd.ni_vp, &fromnd.ni_cnd); 3680 VOP_UNLOCK(fromnd.ni_dvp, 0); 3681 if (fromnd.ni_dvp != fromnd.ni_vp) 3682 VOP_UNLOCK(fromnd.ni_vp, 0); 3683 #endif 3684 fvp = fromnd.ni_vp; 3685 if (error == 0) 3686 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); 3687 if (error != 0) { 3688 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3689 vrele(fromnd.ni_dvp); 3690 vrele(fvp); 3691 goto out1; 3692 } 3693 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3694 SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE, 3695 td); 3696 if (fromnd.ni_vp->v_type == VDIR) 3697 tond.ni_cnd.cn_flags |= WILLBEDIR; 3698 if ((error = namei(&tond)) != 0) { 3699 /* Translate error code for rename("dir1", "dir2/."). */ 3700 if (error == EISDIR && fvp->v_type == VDIR) 3701 error = EINVAL; 3702 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3703 vrele(fromnd.ni_dvp); 3704 vrele(fvp); 3705 vn_finished_write(mp); 3706 goto out1; 3707 } 3708 tvfslocked = NDHASGIANT(&tond); 3709 tdvp = tond.ni_dvp; 3710 tvp = tond.ni_vp; 3711 if (tvp != NULL) { 3712 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3713 error = ENOTDIR; 3714 goto out; 3715 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3716 error = EISDIR; 3717 goto out; 3718 } 3719 } 3720 if (fvp == tdvp) { 3721 error = EINVAL; 3722 goto out; 3723 } 3724 /* 3725 * If the source is the same as the destination (that is, if they 3726 * are links to the same vnode), then there is nothing to do. 3727 */ 3728 if (fvp == tvp) 3729 error = -1; 3730 #ifdef MAC 3731 else 3732 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3733 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3734 #endif 3735 out: 3736 if (!error) { 3737 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3738 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3739 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3740 NDFREE(&tond, NDF_ONLY_PNBUF); 3741 } else { 3742 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3743 NDFREE(&tond, NDF_ONLY_PNBUF); 3744 if (tvp) 3745 vput(tvp); 3746 if (tdvp == tvp) 3747 vrele(tdvp); 3748 else 3749 vput(tdvp); 3750 vrele(fromnd.ni_dvp); 3751 vrele(fvp); 3752 } 3753 vrele(tond.ni_startdir); 3754 vn_finished_write(mp); 3755 out1: 3756 if (fromnd.ni_startdir) 3757 vrele(fromnd.ni_startdir); 3758 VFS_UNLOCK_GIANT(fvfslocked); 3759 VFS_UNLOCK_GIANT(tvfslocked); 3760 if (error == -1) 3761 return (0); 3762 return (error); 3763 } 3764 3765 /* 3766 * Make a directory file. 3767 */ 3768 #ifndef _SYS_SYSPROTO_H_ 3769 struct mkdir_args { 3770 char *path; 3771 int mode; 3772 }; 3773 #endif 3774 int 3775 sys_mkdir(td, uap) 3776 struct thread *td; 3777 register struct mkdir_args /* { 3778 char *path; 3779 int mode; 3780 } */ *uap; 3781 { 3782 3783 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3784 } 3785 3786 #ifndef _SYS_SYSPROTO_H_ 3787 struct mkdirat_args { 3788 int fd; 3789 char *path; 3790 mode_t mode; 3791 }; 3792 #endif 3793 int 3794 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3795 { 3796 3797 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3798 } 3799 3800 int 3801 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3802 { 3803 3804 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3805 } 3806 3807 int 3808 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3809 int mode) 3810 { 3811 struct mount *mp; 3812 struct vnode *vp; 3813 struct vattr vattr; 3814 int error; 3815 struct nameidata nd; 3816 int vfslocked; 3817 3818 AUDIT_ARG_MODE(mode); 3819 restart: 3820 bwillwrite(); 3821 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | 3822 AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td); 3823 nd.ni_cnd.cn_flags |= WILLBEDIR; 3824 if ((error = namei(&nd)) != 0) 3825 return (error); 3826 vfslocked = NDHASGIANT(&nd); 3827 vp = nd.ni_vp; 3828 if (vp != NULL) { 3829 NDFREE(&nd, NDF_ONLY_PNBUF); 3830 /* 3831 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3832 * the strange behaviour of leaving the vnode unlocked 3833 * if the target is the same vnode as the parent. 3834 */ 3835 if (vp == nd.ni_dvp) 3836 vrele(nd.ni_dvp); 3837 else 3838 vput(nd.ni_dvp); 3839 vrele(vp); 3840 VFS_UNLOCK_GIANT(vfslocked); 3841 return (EEXIST); 3842 } 3843 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3844 NDFREE(&nd, NDF_ONLY_PNBUF); 3845 vput(nd.ni_dvp); 3846 VFS_UNLOCK_GIANT(vfslocked); 3847 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3848 return (error); 3849 goto restart; 3850 } 3851 VATTR_NULL(&vattr); 3852 vattr.va_type = VDIR; 3853 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3854 #ifdef MAC 3855 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3856 &vattr); 3857 if (error) 3858 goto out; 3859 #endif 3860 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3861 #ifdef MAC 3862 out: 3863 #endif 3864 NDFREE(&nd, NDF_ONLY_PNBUF); 3865 vput(nd.ni_dvp); 3866 if (!error) 3867 vput(nd.ni_vp); 3868 vn_finished_write(mp); 3869 VFS_UNLOCK_GIANT(vfslocked); 3870 return (error); 3871 } 3872 3873 /* 3874 * Remove a directory file. 3875 */ 3876 #ifndef _SYS_SYSPROTO_H_ 3877 struct rmdir_args { 3878 char *path; 3879 }; 3880 #endif 3881 int 3882 sys_rmdir(td, uap) 3883 struct thread *td; 3884 struct rmdir_args /* { 3885 char *path; 3886 } */ *uap; 3887 { 3888 3889 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3890 } 3891 3892 int 3893 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3894 { 3895 3896 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3897 } 3898 3899 int 3900 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3901 { 3902 struct mount *mp; 3903 struct vnode *vp; 3904 int error; 3905 struct nameidata nd; 3906 int vfslocked; 3907 3908 restart: 3909 bwillwrite(); 3910 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | 3911 AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td); 3912 if ((error = namei(&nd)) != 0) 3913 return (error); 3914 vfslocked = NDHASGIANT(&nd); 3915 vp = nd.ni_vp; 3916 if (vp->v_type != VDIR) { 3917 error = ENOTDIR; 3918 goto out; 3919 } 3920 /* 3921 * No rmdir "." please. 3922 */ 3923 if (nd.ni_dvp == vp) { 3924 error = EINVAL; 3925 goto out; 3926 } 3927 /* 3928 * The root of a mounted filesystem cannot be deleted. 3929 */ 3930 if (vp->v_vflag & VV_ROOT) { 3931 error = EBUSY; 3932 goto out; 3933 } 3934 #ifdef MAC 3935 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3936 &nd.ni_cnd); 3937 if (error) 3938 goto out; 3939 #endif 3940 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3941 NDFREE(&nd, NDF_ONLY_PNBUF); 3942 vput(vp); 3943 if (nd.ni_dvp == vp) 3944 vrele(nd.ni_dvp); 3945 else 3946 vput(nd.ni_dvp); 3947 VFS_UNLOCK_GIANT(vfslocked); 3948 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3949 return (error); 3950 goto restart; 3951 } 3952 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3953 vn_finished_write(mp); 3954 out: 3955 NDFREE(&nd, NDF_ONLY_PNBUF); 3956 vput(vp); 3957 if (nd.ni_dvp == vp) 3958 vrele(nd.ni_dvp); 3959 else 3960 vput(nd.ni_dvp); 3961 VFS_UNLOCK_GIANT(vfslocked); 3962 return (error); 3963 } 3964 3965 #ifdef COMPAT_43 3966 /* 3967 * Read a block of directory entries in a filesystem independent format. 3968 */ 3969 #ifndef _SYS_SYSPROTO_H_ 3970 struct ogetdirentries_args { 3971 int fd; 3972 char *buf; 3973 u_int count; 3974 long *basep; 3975 }; 3976 #endif 3977 int 3978 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3979 { 3980 long loff; 3981 int error; 3982 3983 error = kern_ogetdirentries(td, uap, &loff); 3984 if (error == 0) 3985 error = copyout(&loff, uap->basep, sizeof(long)); 3986 return (error); 3987 } 3988 3989 int 3990 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3991 long *ploff) 3992 { 3993 struct vnode *vp; 3994 struct file *fp; 3995 struct uio auio, kuio; 3996 struct iovec aiov, kiov; 3997 struct dirent *dp, *edp; 3998 caddr_t dirbuf; 3999 int error, eofflag, readcnt, vfslocked; 4000 long loff; 4001 4002 /* XXX arbitrary sanity limit on `count'. */ 4003 if (uap->count > 64 * 1024) 4004 return (EINVAL); 4005 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, 4006 &fp)) != 0) 4007 return (error); 4008 if ((fp->f_flag & FREAD) == 0) { 4009 fdrop(fp, td); 4010 return (EBADF); 4011 } 4012 vp = fp->f_vnode; 4013 unionread: 4014 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 4015 if (vp->v_type != VDIR) { 4016 VFS_UNLOCK_GIANT(vfslocked); 4017 fdrop(fp, td); 4018 return (EINVAL); 4019 } 4020 aiov.iov_base = uap->buf; 4021 aiov.iov_len = uap->count; 4022 auio.uio_iov = &aiov; 4023 auio.uio_iovcnt = 1; 4024 auio.uio_rw = UIO_READ; 4025 auio.uio_segflg = UIO_USERSPACE; 4026 auio.uio_td = td; 4027 auio.uio_resid = uap->count; 4028 vn_lock(vp, LK_SHARED | LK_RETRY); 4029 loff = auio.uio_offset = fp->f_offset; 4030 #ifdef MAC 4031 error = mac_vnode_check_readdir(td->td_ucred, vp); 4032 if (error) { 4033 VOP_UNLOCK(vp, 0); 4034 VFS_UNLOCK_GIANT(vfslocked); 4035 fdrop(fp, td); 4036 return (error); 4037 } 4038 #endif 4039 # if (BYTE_ORDER != LITTLE_ENDIAN) 4040 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 4041 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 4042 NULL, NULL); 4043 fp->f_offset = auio.uio_offset; 4044 } else 4045 # endif 4046 { 4047 kuio = auio; 4048 kuio.uio_iov = &kiov; 4049 kuio.uio_segflg = UIO_SYSSPACE; 4050 kiov.iov_len = uap->count; 4051 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 4052 kiov.iov_base = dirbuf; 4053 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 4054 NULL, NULL); 4055 fp->f_offset = kuio.uio_offset; 4056 if (error == 0) { 4057 readcnt = uap->count - kuio.uio_resid; 4058 edp = (struct dirent *)&dirbuf[readcnt]; 4059 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 4060 # if (BYTE_ORDER == LITTLE_ENDIAN) 4061 /* 4062 * The expected low byte of 4063 * dp->d_namlen is our dp->d_type. 4064 * The high MBZ byte of dp->d_namlen 4065 * is our dp->d_namlen. 4066 */ 4067 dp->d_type = dp->d_namlen; 4068 dp->d_namlen = 0; 4069 # else 4070 /* 4071 * The dp->d_type is the high byte 4072 * of the expected dp->d_namlen, 4073 * so must be zero'ed. 4074 */ 4075 dp->d_type = 0; 4076 # endif 4077 if (dp->d_reclen > 0) { 4078 dp = (struct dirent *) 4079 ((char *)dp + dp->d_reclen); 4080 } else { 4081 error = EIO; 4082 break; 4083 } 4084 } 4085 if (dp >= edp) 4086 error = uiomove(dirbuf, readcnt, &auio); 4087 } 4088 free(dirbuf, M_TEMP); 4089 } 4090 if (error) { 4091 VOP_UNLOCK(vp, 0); 4092 VFS_UNLOCK_GIANT(vfslocked); 4093 fdrop(fp, td); 4094 return (error); 4095 } 4096 if (uap->count == auio.uio_resid && 4097 (vp->v_vflag & VV_ROOT) && 4098 (vp->v_mount->mnt_flag & MNT_UNION)) { 4099 struct vnode *tvp = vp; 4100 vp = vp->v_mount->mnt_vnodecovered; 4101 VREF(vp); 4102 fp->f_vnode = vp; 4103 fp->f_data = vp; 4104 fp->f_offset = 0; 4105 vput(tvp); 4106 VFS_UNLOCK_GIANT(vfslocked); 4107 goto unionread; 4108 } 4109 VOP_UNLOCK(vp, 0); 4110 VFS_UNLOCK_GIANT(vfslocked); 4111 fdrop(fp, td); 4112 td->td_retval[0] = uap->count - auio.uio_resid; 4113 if (error == 0) 4114 *ploff = loff; 4115 return (error); 4116 } 4117 #endif /* COMPAT_43 */ 4118 4119 /* 4120 * Read a block of directory entries in a filesystem independent format. 4121 */ 4122 #ifndef _SYS_SYSPROTO_H_ 4123 struct getdirentries_args { 4124 int fd; 4125 char *buf; 4126 u_int count; 4127 long *basep; 4128 }; 4129 #endif 4130 int 4131 sys_getdirentries(td, uap) 4132 struct thread *td; 4133 register struct getdirentries_args /* { 4134 int fd; 4135 char *buf; 4136 u_int count; 4137 long *basep; 4138 } */ *uap; 4139 { 4140 long base; 4141 int error; 4142 4143 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base); 4144 if (error) 4145 return (error); 4146 if (uap->basep != NULL) 4147 error = copyout(&base, uap->basep, sizeof(long)); 4148 return (error); 4149 } 4150 4151 int 4152 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4153 long *basep) 4154 { 4155 struct vnode *vp; 4156 struct file *fp; 4157 struct uio auio; 4158 struct iovec aiov; 4159 int vfslocked; 4160 long loff; 4161 int error, eofflag; 4162 4163 AUDIT_ARG_FD(fd); 4164 if (count > INT_MAX) 4165 return (EINVAL); 4166 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK, 4167 &fp)) != 0) 4168 return (error); 4169 if ((fp->f_flag & FREAD) == 0) { 4170 fdrop(fp, td); 4171 return (EBADF); 4172 } 4173 vp = fp->f_vnode; 4174 unionread: 4175 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 4176 if (vp->v_type != VDIR) { 4177 VFS_UNLOCK_GIANT(vfslocked); 4178 error = EINVAL; 4179 goto fail; 4180 } 4181 aiov.iov_base = buf; 4182 aiov.iov_len = count; 4183 auio.uio_iov = &aiov; 4184 auio.uio_iovcnt = 1; 4185 auio.uio_rw = UIO_READ; 4186 auio.uio_segflg = UIO_USERSPACE; 4187 auio.uio_td = td; 4188 auio.uio_resid = count; 4189 vn_lock(vp, LK_SHARED | LK_RETRY); 4190 AUDIT_ARG_VNODE1(vp); 4191 loff = auio.uio_offset = fp->f_offset; 4192 #ifdef MAC 4193 error = mac_vnode_check_readdir(td->td_ucred, vp); 4194 if (error == 0) 4195 #endif 4196 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4197 NULL); 4198 fp->f_offset = auio.uio_offset; 4199 if (error) { 4200 VOP_UNLOCK(vp, 0); 4201 VFS_UNLOCK_GIANT(vfslocked); 4202 goto fail; 4203 } 4204 if (count == auio.uio_resid && 4205 (vp->v_vflag & VV_ROOT) && 4206 (vp->v_mount->mnt_flag & MNT_UNION)) { 4207 struct vnode *tvp = vp; 4208 vp = vp->v_mount->mnt_vnodecovered; 4209 VREF(vp); 4210 fp->f_vnode = vp; 4211 fp->f_data = vp; 4212 fp->f_offset = 0; 4213 vput(tvp); 4214 VFS_UNLOCK_GIANT(vfslocked); 4215 goto unionread; 4216 } 4217 VOP_UNLOCK(vp, 0); 4218 VFS_UNLOCK_GIANT(vfslocked); 4219 *basep = loff; 4220 td->td_retval[0] = count - auio.uio_resid; 4221 fail: 4222 fdrop(fp, td); 4223 return (error); 4224 } 4225 4226 #ifndef _SYS_SYSPROTO_H_ 4227 struct getdents_args { 4228 int fd; 4229 char *buf; 4230 size_t count; 4231 }; 4232 #endif 4233 int 4234 sys_getdents(td, uap) 4235 struct thread *td; 4236 register struct getdents_args /* { 4237 int fd; 4238 char *buf; 4239 u_int count; 4240 } */ *uap; 4241 { 4242 struct getdirentries_args ap; 4243 ap.fd = uap->fd; 4244 ap.buf = uap->buf; 4245 ap.count = uap->count; 4246 ap.basep = NULL; 4247 return (sys_getdirentries(td, &ap)); 4248 } 4249 4250 /* 4251 * Set the mode mask for creation of filesystem nodes. 4252 */ 4253 #ifndef _SYS_SYSPROTO_H_ 4254 struct umask_args { 4255 int newmask; 4256 }; 4257 #endif 4258 int 4259 sys_umask(td, uap) 4260 struct thread *td; 4261 struct umask_args /* { 4262 int newmask; 4263 } */ *uap; 4264 { 4265 register struct filedesc *fdp; 4266 4267 FILEDESC_XLOCK(td->td_proc->p_fd); 4268 fdp = td->td_proc->p_fd; 4269 td->td_retval[0] = fdp->fd_cmask; 4270 fdp->fd_cmask = uap->newmask & ALLPERMS; 4271 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4272 return (0); 4273 } 4274 4275 /* 4276 * Void all references to file by ripping underlying filesystem away from 4277 * vnode. 4278 */ 4279 #ifndef _SYS_SYSPROTO_H_ 4280 struct revoke_args { 4281 char *path; 4282 }; 4283 #endif 4284 int 4285 sys_revoke(td, uap) 4286 struct thread *td; 4287 register struct revoke_args /* { 4288 char *path; 4289 } */ *uap; 4290 { 4291 struct vnode *vp; 4292 struct vattr vattr; 4293 int error; 4294 struct nameidata nd; 4295 int vfslocked; 4296 4297 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, 4298 UIO_USERSPACE, uap->path, td); 4299 if ((error = namei(&nd)) != 0) 4300 return (error); 4301 vfslocked = NDHASGIANT(&nd); 4302 vp = nd.ni_vp; 4303 NDFREE(&nd, NDF_ONLY_PNBUF); 4304 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4305 error = EINVAL; 4306 goto out; 4307 } 4308 #ifdef MAC 4309 error = mac_vnode_check_revoke(td->td_ucred, vp); 4310 if (error) 4311 goto out; 4312 #endif 4313 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4314 if (error) 4315 goto out; 4316 if (td->td_ucred->cr_uid != vattr.va_uid) { 4317 error = priv_check(td, PRIV_VFS_ADMIN); 4318 if (error) 4319 goto out; 4320 } 4321 if (vcount(vp) > 1) 4322 VOP_REVOKE(vp, REVOKEALL); 4323 out: 4324 vput(vp); 4325 VFS_UNLOCK_GIANT(vfslocked); 4326 return (error); 4327 } 4328 4329 /* 4330 * Convert a user file descriptor to a kernel file entry and check that, if it 4331 * is a capability, the correct rights are present. A reference on the file 4332 * entry is held upon returning. 4333 */ 4334 int 4335 getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, 4336 struct file **fpp) 4337 { 4338 struct file *fp; 4339 #ifdef CAPABILITIES 4340 struct file *fp_fromcap; 4341 #endif 4342 int error; 4343 4344 error = 0; 4345 fp = NULL; 4346 if ((fdp == NULL) || (fp = fget_unlocked(fdp, fd)) == NULL) 4347 return (EBADF); 4348 #ifdef CAPABILITIES 4349 /* 4350 * If the file descriptor is for a capability, test rights and use the 4351 * file descriptor referenced by the capability. 4352 */ 4353 error = cap_funwrap(fp, rights, &fp_fromcap); 4354 if (error) { 4355 fdrop(fp, curthread); 4356 return (error); 4357 } 4358 if (fp != fp_fromcap) { 4359 fhold(fp_fromcap); 4360 fdrop(fp, curthread); 4361 fp = fp_fromcap; 4362 } 4363 #endif /* CAPABILITIES */ 4364 4365 /* 4366 * The file could be not of the vnode type, or it may be not 4367 * yet fully initialized, in which case the f_vnode pointer 4368 * may be set, but f_ops is still badfileops. E.g., 4369 * devfs_open() transiently create such situation to 4370 * facilitate csw d_fdopen(). 4371 * 4372 * Dupfdopen() handling in kern_openat() installs the 4373 * half-baked file into the process descriptor table, allowing 4374 * other thread to dereference it. Guard against the race by 4375 * checking f_ops. 4376 */ 4377 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4378 fdrop(fp, curthread); 4379 return (EINVAL); 4380 } 4381 *fpp = fp; 4382 return (0); 4383 } 4384 4385 4386 /* 4387 * Get an (NFS) file handle. 4388 */ 4389 #ifndef _SYS_SYSPROTO_H_ 4390 struct lgetfh_args { 4391 char *fname; 4392 fhandle_t *fhp; 4393 }; 4394 #endif 4395 int 4396 sys_lgetfh(td, uap) 4397 struct thread *td; 4398 register struct lgetfh_args *uap; 4399 { 4400 struct nameidata nd; 4401 fhandle_t fh; 4402 register struct vnode *vp; 4403 int vfslocked; 4404 int error; 4405 4406 error = priv_check(td, PRIV_VFS_GETFH); 4407 if (error) 4408 return (error); 4409 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, 4410 UIO_USERSPACE, uap->fname, td); 4411 error = namei(&nd); 4412 if (error) 4413 return (error); 4414 vfslocked = NDHASGIANT(&nd); 4415 NDFREE(&nd, NDF_ONLY_PNBUF); 4416 vp = nd.ni_vp; 4417 bzero(&fh, sizeof(fh)); 4418 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4419 error = VOP_VPTOFH(vp, &fh.fh_fid); 4420 vput(vp); 4421 VFS_UNLOCK_GIANT(vfslocked); 4422 if (error) 4423 return (error); 4424 error = copyout(&fh, uap->fhp, sizeof (fh)); 4425 return (error); 4426 } 4427 4428 #ifndef _SYS_SYSPROTO_H_ 4429 struct getfh_args { 4430 char *fname; 4431 fhandle_t *fhp; 4432 }; 4433 #endif 4434 int 4435 sys_getfh(td, uap) 4436 struct thread *td; 4437 register struct getfh_args *uap; 4438 { 4439 struct nameidata nd; 4440 fhandle_t fh; 4441 register struct vnode *vp; 4442 int vfslocked; 4443 int error; 4444 4445 error = priv_check(td, PRIV_VFS_GETFH); 4446 if (error) 4447 return (error); 4448 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, 4449 UIO_USERSPACE, uap->fname, td); 4450 error = namei(&nd); 4451 if (error) 4452 return (error); 4453 vfslocked = NDHASGIANT(&nd); 4454 NDFREE(&nd, NDF_ONLY_PNBUF); 4455 vp = nd.ni_vp; 4456 bzero(&fh, sizeof(fh)); 4457 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4458 error = VOP_VPTOFH(vp, &fh.fh_fid); 4459 vput(vp); 4460 VFS_UNLOCK_GIANT(vfslocked); 4461 if (error) 4462 return (error); 4463 error = copyout(&fh, uap->fhp, sizeof (fh)); 4464 return (error); 4465 } 4466 4467 /* 4468 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4469 * open descriptor. 4470 * 4471 * warning: do not remove the priv_check() call or this becomes one giant 4472 * security hole. 4473 */ 4474 #ifndef _SYS_SYSPROTO_H_ 4475 struct fhopen_args { 4476 const struct fhandle *u_fhp; 4477 int flags; 4478 }; 4479 #endif 4480 int 4481 sys_fhopen(td, uap) 4482 struct thread *td; 4483 struct fhopen_args /* { 4484 const struct fhandle *u_fhp; 4485 int flags; 4486 } */ *uap; 4487 { 4488 struct proc *p = td->td_proc; 4489 struct mount *mp; 4490 struct vnode *vp; 4491 struct fhandle fhp; 4492 struct vattr vat; 4493 struct vattr *vap = &vat; 4494 struct flock lf; 4495 struct file *fp; 4496 register struct filedesc *fdp = p->p_fd; 4497 int fmode, error, type; 4498 accmode_t accmode; 4499 struct file *nfp; 4500 int vfslocked; 4501 int indx; 4502 4503 error = priv_check(td, PRIV_VFS_FHOPEN); 4504 if (error) 4505 return (error); 4506 fmode = FFLAGS(uap->flags); 4507 /* why not allow a non-read/write open for our lockd? */ 4508 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4509 return (EINVAL); 4510 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4511 if (error) 4512 return(error); 4513 /* find the mount point */ 4514 mp = vfs_busyfs(&fhp.fh_fsid); 4515 if (mp == NULL) 4516 return (ESTALE); 4517 vfslocked = VFS_LOCK_GIANT(mp); 4518 /* now give me my vnode, it gets returned to me locked */ 4519 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4520 vfs_unbusy(mp); 4521 if (error) 4522 goto out; 4523 /* 4524 * from now on we have to make sure not 4525 * to forget about the vnode 4526 * any error that causes an abort must vput(vp) 4527 * just set error = err and 'goto bad;'. 4528 */ 4529 4530 /* 4531 * from vn_open 4532 */ 4533 if (vp->v_type == VLNK) { 4534 error = EMLINK; 4535 goto bad; 4536 } 4537 if (vp->v_type == VSOCK) { 4538 error = EOPNOTSUPP; 4539 goto bad; 4540 } 4541 if (vp->v_type != VDIR && fmode & O_DIRECTORY) { 4542 error = ENOTDIR; 4543 goto bad; 4544 } 4545 accmode = 0; 4546 if (fmode & (FWRITE | O_TRUNC)) { 4547 if (vp->v_type == VDIR) { 4548 error = EISDIR; 4549 goto bad; 4550 } 4551 error = vn_writechk(vp); 4552 if (error) 4553 goto bad; 4554 accmode |= VWRITE; 4555 } 4556 if (fmode & FREAD) 4557 accmode |= VREAD; 4558 if ((fmode & O_APPEND) && (fmode & FWRITE)) 4559 accmode |= VAPPEND; 4560 #ifdef MAC 4561 error = mac_vnode_check_open(td->td_ucred, vp, accmode); 4562 if (error) 4563 goto bad; 4564 #endif 4565 if (accmode) { 4566 error = VOP_ACCESS(vp, accmode, td->td_ucred, td); 4567 if (error) 4568 goto bad; 4569 } 4570 if (fmode & O_TRUNC) { 4571 vfs_ref(mp); 4572 VOP_UNLOCK(vp, 0); /* XXX */ 4573 if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) { 4574 vrele(vp); 4575 vfs_rel(mp); 4576 goto out; 4577 } 4578 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4579 vfs_rel(mp); 4580 #ifdef MAC 4581 /* 4582 * We don't yet have fp->f_cred, so use td->td_ucred, which 4583 * should be right. 4584 */ 4585 error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp); 4586 if (error == 0) { 4587 #endif 4588 VATTR_NULL(vap); 4589 vap->va_size = 0; 4590 error = VOP_SETATTR(vp, vap, td->td_ucred); 4591 #ifdef MAC 4592 } 4593 #endif 4594 vn_finished_write(mp); 4595 if (error) 4596 goto bad; 4597 } 4598 error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL); 4599 if (error) 4600 goto bad; 4601 4602 if (fmode & FWRITE) 4603 vp->v_writecount++; 4604 4605 /* 4606 * end of vn_open code 4607 */ 4608 4609 if ((error = falloc(td, &nfp, &indx, fmode)) != 0) { 4610 if (fmode & FWRITE) 4611 vp->v_writecount--; 4612 goto bad; 4613 } 4614 /* An extra reference on `nfp' has been held for us by falloc(). */ 4615 fp = nfp; 4616 nfp->f_vnode = vp; 4617 finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops); 4618 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4619 lf.l_whence = SEEK_SET; 4620 lf.l_start = 0; 4621 lf.l_len = 0; 4622 if (fmode & O_EXLOCK) 4623 lf.l_type = F_WRLCK; 4624 else 4625 lf.l_type = F_RDLCK; 4626 type = F_FLOCK; 4627 if ((fmode & FNONBLOCK) == 0) 4628 type |= F_WAIT; 4629 VOP_UNLOCK(vp, 0); 4630 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 4631 type)) != 0) { 4632 /* 4633 * The lock request failed. Normally close the 4634 * descriptor but handle the case where someone might 4635 * have dup()d or close()d it when we weren't looking. 4636 */ 4637 fdclose(fdp, fp, indx, td); 4638 4639 /* 4640 * release our private reference 4641 */ 4642 fdrop(fp, td); 4643 goto out; 4644 } 4645 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4646 atomic_set_int(&fp->f_flag, FHASLOCK); 4647 } 4648 4649 VOP_UNLOCK(vp, 0); 4650 fdrop(fp, td); 4651 VFS_UNLOCK_GIANT(vfslocked); 4652 td->td_retval[0] = indx; 4653 return (0); 4654 4655 bad: 4656 vput(vp); 4657 out: 4658 VFS_UNLOCK_GIANT(vfslocked); 4659 return (error); 4660 } 4661 4662 /* 4663 * Stat an (NFS) file handle. 4664 */ 4665 #ifndef _SYS_SYSPROTO_H_ 4666 struct fhstat_args { 4667 struct fhandle *u_fhp; 4668 struct stat *sb; 4669 }; 4670 #endif 4671 int 4672 sys_fhstat(td, uap) 4673 struct thread *td; 4674 register struct fhstat_args /* { 4675 struct fhandle *u_fhp; 4676 struct stat *sb; 4677 } */ *uap; 4678 { 4679 struct stat sb; 4680 fhandle_t fh; 4681 struct mount *mp; 4682 struct vnode *vp; 4683 int vfslocked; 4684 int error; 4685 4686 error = priv_check(td, PRIV_VFS_FHSTAT); 4687 if (error) 4688 return (error); 4689 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4690 if (error) 4691 return (error); 4692 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4693 return (ESTALE); 4694 vfslocked = VFS_LOCK_GIANT(mp); 4695 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4696 vfs_unbusy(mp); 4697 if (error) { 4698 VFS_UNLOCK_GIANT(vfslocked); 4699 return (error); 4700 } 4701 error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td); 4702 vput(vp); 4703 VFS_UNLOCK_GIANT(vfslocked); 4704 if (error) 4705 return (error); 4706 error = copyout(&sb, uap->sb, sizeof(sb)); 4707 return (error); 4708 } 4709 4710 /* 4711 * Implement fstatfs() for (NFS) file handles. 4712 */ 4713 #ifndef _SYS_SYSPROTO_H_ 4714 struct fhstatfs_args { 4715 struct fhandle *u_fhp; 4716 struct statfs *buf; 4717 }; 4718 #endif 4719 int 4720 sys_fhstatfs(td, uap) 4721 struct thread *td; 4722 struct fhstatfs_args /* { 4723 struct fhandle *u_fhp; 4724 struct statfs *buf; 4725 } */ *uap; 4726 { 4727 struct statfs sf; 4728 fhandle_t fh; 4729 int error; 4730 4731 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4732 if (error) 4733 return (error); 4734 error = kern_fhstatfs(td, fh, &sf); 4735 if (error) 4736 return (error); 4737 return (copyout(&sf, uap->buf, sizeof(sf))); 4738 } 4739 4740 int 4741 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4742 { 4743 struct statfs *sp; 4744 struct mount *mp; 4745 struct vnode *vp; 4746 int vfslocked; 4747 int error; 4748 4749 error = priv_check(td, PRIV_VFS_FHSTATFS); 4750 if (error) 4751 return (error); 4752 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4753 return (ESTALE); 4754 vfslocked = VFS_LOCK_GIANT(mp); 4755 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4756 if (error) { 4757 vfs_unbusy(mp); 4758 VFS_UNLOCK_GIANT(vfslocked); 4759 return (error); 4760 } 4761 vput(vp); 4762 error = prison_canseemount(td->td_ucred, mp); 4763 if (error) 4764 goto out; 4765 #ifdef MAC 4766 error = mac_mount_check_stat(td->td_ucred, mp); 4767 if (error) 4768 goto out; 4769 #endif 4770 /* 4771 * Set these in case the underlying filesystem fails to do so. 4772 */ 4773 sp = &mp->mnt_stat; 4774 sp->f_version = STATFS_VERSION; 4775 sp->f_namemax = NAME_MAX; 4776 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4777 error = VFS_STATFS(mp, sp); 4778 if (error == 0) 4779 *buf = *sp; 4780 out: 4781 vfs_unbusy(mp); 4782 VFS_UNLOCK_GIANT(vfslocked); 4783 return (error); 4784 } 4785 4786 int 4787 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4788 { 4789 struct file *fp; 4790 struct mount *mp; 4791 struct vnode *vp; 4792 off_t olen, ooffset; 4793 int error, vfslocked; 4794 4795 fp = NULL; 4796 vfslocked = 0; 4797 error = fget(td, fd, CAP_WRITE, &fp); 4798 if (error != 0) 4799 goto out; 4800 4801 switch (fp->f_type) { 4802 case DTYPE_VNODE: 4803 break; 4804 case DTYPE_PIPE: 4805 case DTYPE_FIFO: 4806 error = ESPIPE; 4807 goto out; 4808 default: 4809 error = ENODEV; 4810 goto out; 4811 } 4812 if ((fp->f_flag & FWRITE) == 0) { 4813 error = EBADF; 4814 goto out; 4815 } 4816 vp = fp->f_vnode; 4817 if (vp->v_type != VREG) { 4818 error = ENODEV; 4819 goto out; 4820 } 4821 if (offset < 0 || len <= 0) { 4822 error = EINVAL; 4823 goto out; 4824 } 4825 /* Check for wrap. */ 4826 if (offset > OFF_MAX - len) { 4827 error = EFBIG; 4828 goto out; 4829 } 4830 4831 /* Allocating blocks may take a long time, so iterate. */ 4832 for (;;) { 4833 olen = len; 4834 ooffset = offset; 4835 4836 bwillwrite(); 4837 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 4838 mp = NULL; 4839 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4840 if (error != 0) { 4841 VFS_UNLOCK_GIANT(vfslocked); 4842 break; 4843 } 4844 error = vn_lock(vp, LK_EXCLUSIVE); 4845 if (error != 0) { 4846 vn_finished_write(mp); 4847 VFS_UNLOCK_GIANT(vfslocked); 4848 break; 4849 } 4850 #ifdef MAC 4851 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4852 if (error == 0) 4853 #endif 4854 error = VOP_ALLOCATE(vp, &offset, &len); 4855 VOP_UNLOCK(vp, 0); 4856 vn_finished_write(mp); 4857 VFS_UNLOCK_GIANT(vfslocked); 4858 4859 if (olen + ooffset != offset + len) { 4860 panic("offset + len changed from %jx/%jx to %jx/%jx", 4861 ooffset, olen, offset, len); 4862 } 4863 if (error != 0 || len == 0) 4864 break; 4865 KASSERT(olen > len, ("Iteration did not make progress?")); 4866 maybe_yield(); 4867 } 4868 out: 4869 if (fp != NULL) 4870 fdrop(fp, td); 4871 return (error); 4872 } 4873 4874 int 4875 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4876 { 4877 4878 return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len)); 4879 } 4880 4881 /* 4882 * Unlike madvise(2), we do not make a best effort to remember every 4883 * possible caching hint. Instead, we remember the last setting with 4884 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4885 * region of any current setting. 4886 */ 4887 int 4888 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4889 int advice) 4890 { 4891 struct fadvise_info *fa, *new; 4892 struct file *fp; 4893 struct vnode *vp; 4894 off_t end; 4895 int error; 4896 4897 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4898 return (EINVAL); 4899 switch (advice) { 4900 case POSIX_FADV_SEQUENTIAL: 4901 case POSIX_FADV_RANDOM: 4902 case POSIX_FADV_NOREUSE: 4903 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4904 break; 4905 case POSIX_FADV_NORMAL: 4906 case POSIX_FADV_WILLNEED: 4907 case POSIX_FADV_DONTNEED: 4908 new = NULL; 4909 break; 4910 default: 4911 return (EINVAL); 4912 } 4913 /* XXX: CAP_POSIX_FADVISE? */ 4914 error = fget(td, fd, 0, &fp); 4915 if (error != 0) 4916 goto out; 4917 4918 switch (fp->f_type) { 4919 case DTYPE_VNODE: 4920 break; 4921 case DTYPE_PIPE: 4922 case DTYPE_FIFO: 4923 error = ESPIPE; 4924 goto out; 4925 default: 4926 error = ENODEV; 4927 goto out; 4928 } 4929 vp = fp->f_vnode; 4930 if (vp->v_type != VREG) { 4931 error = ENODEV; 4932 goto out; 4933 } 4934 if (len == 0) 4935 end = OFF_MAX; 4936 else 4937 end = offset + len - 1; 4938 switch (advice) { 4939 case POSIX_FADV_SEQUENTIAL: 4940 case POSIX_FADV_RANDOM: 4941 case POSIX_FADV_NOREUSE: 4942 /* 4943 * Try to merge any existing non-standard region with 4944 * this new region if possible, otherwise create a new 4945 * non-standard region for this request. 4946 */ 4947 mtx_pool_lock(mtxpool_sleep, fp); 4948 fa = fp->f_advice; 4949 if (fa != NULL && fa->fa_advice == advice && 4950 ((fa->fa_start <= end && fa->fa_end >= offset) || 4951 (end != OFF_MAX && fa->fa_start == end + 1) || 4952 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4953 if (offset < fa->fa_start) 4954 fa->fa_start = offset; 4955 if (end > fa->fa_end) 4956 fa->fa_end = end; 4957 } else { 4958 new->fa_advice = advice; 4959 new->fa_start = offset; 4960 new->fa_end = end; 4961 fp->f_advice = new; 4962 new = fa; 4963 } 4964 mtx_pool_unlock(mtxpool_sleep, fp); 4965 break; 4966 case POSIX_FADV_NORMAL: 4967 /* 4968 * If a the "normal" region overlaps with an existing 4969 * non-standard region, trim or remove the 4970 * non-standard region. 4971 */ 4972 mtx_pool_lock(mtxpool_sleep, fp); 4973 fa = fp->f_advice; 4974 if (fa != NULL) { 4975 if (offset <= fa->fa_start && end >= fa->fa_end) { 4976 new = fa; 4977 fp->f_advice = NULL; 4978 } else if (offset <= fa->fa_start && 4979 end >= fa->fa_start) 4980 fa->fa_start = end + 1; 4981 else if (offset <= fa->fa_end && end >= fa->fa_end) 4982 fa->fa_end = offset - 1; 4983 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4984 /* 4985 * If the "normal" region is a middle 4986 * portion of the existing 4987 * non-standard region, just remove 4988 * the whole thing rather than picking 4989 * one side or the other to 4990 * preserve. 4991 */ 4992 new = fa; 4993 fp->f_advice = NULL; 4994 } 4995 } 4996 mtx_pool_unlock(mtxpool_sleep, fp); 4997 break; 4998 case POSIX_FADV_WILLNEED: 4999 case POSIX_FADV_DONTNEED: 5000 error = VOP_ADVISE(vp, offset, end, advice); 5001 break; 5002 } 5003 out: 5004 if (fp != NULL) 5005 fdrop(fp, td); 5006 free(new, M_FADVISE); 5007 return (error); 5008 } 5009 5010 int 5011 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 5012 { 5013 5014 return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 5015 uap->advice)); 5016 } 5017