1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_kdtrace.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capability.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE(vfs, , stat, mode, mode); 95 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *"); 96 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int"); 97 SDT_PROBE_DEFINE(vfs, , stat, reg, reg); 98 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *"); 99 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int"); 100 101 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 102 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 103 static int setfflags(struct thread *td, struct vnode *, int); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * The module initialization routine for POSIX asynchronous I/O will 111 * set this to the version of AIO that it implements. (Zero means 112 * that it is not implemented.) This value is used here by pathconf() 113 * and in kern_descrip.c by fpathconf(). 114 */ 115 int async_io_version; 116 117 #ifdef DEBUG 118 static int syncprt = 0; 119 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 120 #endif 121 122 /* 123 * Sync each mounted filesystem. 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct sync_args { 127 int dummy; 128 }; 129 #endif 130 /* ARGSUSED */ 131 int 132 sys_sync(td, uap) 133 struct thread *td; 134 struct sync_args *uap; 135 { 136 struct mount *mp, *nmp; 137 int save, vfslocked; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 vfslocked = VFS_LOCK_GIANT(mp); 146 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 147 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 148 save = curthread_pflags_set(TDP_SYNCIO); 149 vfs_msync(mp, MNT_NOWAIT); 150 VFS_SYNC(mp, MNT_NOWAIT); 151 curthread_pflags_restore(save); 152 vn_finished_write(mp); 153 } 154 VFS_UNLOCK_GIANT(vfslocked); 155 mtx_lock(&mountlist_mtx); 156 nmp = TAILQ_NEXT(mp, mnt_list); 157 vfs_unbusy(mp); 158 } 159 mtx_unlock(&mountlist_mtx); 160 return (0); 161 } 162 163 /* 164 * Change filesystem quotas. 165 */ 166 #ifndef _SYS_SYSPROTO_H_ 167 struct quotactl_args { 168 char *path; 169 int cmd; 170 int uid; 171 caddr_t arg; 172 }; 173 #endif 174 int 175 sys_quotactl(td, uap) 176 struct thread *td; 177 register struct quotactl_args /* { 178 char *path; 179 int cmd; 180 int uid; 181 caddr_t arg; 182 } */ *uap; 183 { 184 struct mount *mp; 185 int vfslocked; 186 int error; 187 struct nameidata nd; 188 189 AUDIT_ARG_CMD(uap->cmd); 190 AUDIT_ARG_UID(uap->uid); 191 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 192 return (EPERM); 193 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, 194 UIO_USERSPACE, uap->path, td); 195 if ((error = namei(&nd)) != 0) 196 return (error); 197 vfslocked = NDHASGIANT(&nd); 198 NDFREE(&nd, NDF_ONLY_PNBUF); 199 mp = nd.ni_vp->v_mount; 200 vfs_ref(mp); 201 vput(nd.ni_vp); 202 error = vfs_busy(mp, 0); 203 vfs_rel(mp); 204 if (error) { 205 VFS_UNLOCK_GIANT(vfslocked); 206 return (error); 207 } 208 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 209 210 /* 211 * Since quota on operation typically needs to open quota 212 * file, the Q_QUOTAON handler needs to unbusy the mount point 213 * before calling into namei. Otherwise, unmount might be 214 * started between two vfs_busy() invocations (first is our, 215 * second is from mount point cross-walk code in lookup()), 216 * causing deadlock. 217 * 218 * Require that Q_QUOTAON handles the vfs_busy() reference on 219 * its own, always returning with ubusied mount point. 220 */ 221 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 222 vfs_unbusy(mp); 223 VFS_UNLOCK_GIANT(vfslocked); 224 return (error); 225 } 226 227 /* 228 * Used by statfs conversion routines to scale the block size up if 229 * necessary so that all of the block counts are <= 'max_size'. Note 230 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 231 * value of 'n'. 232 */ 233 void 234 statfs_scale_blocks(struct statfs *sf, long max_size) 235 { 236 uint64_t count; 237 int shift; 238 239 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 240 241 /* 242 * Attempt to scale the block counts to give a more accurate 243 * overview to userland of the ratio of free space to used 244 * space. To do this, find the largest block count and compute 245 * a divisor that lets it fit into a signed integer <= max_size. 246 */ 247 if (sf->f_bavail < 0) 248 count = -sf->f_bavail; 249 else 250 count = sf->f_bavail; 251 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 252 if (count <= max_size) 253 return; 254 255 count >>= flsl(max_size); 256 shift = 0; 257 while (count > 0) { 258 shift++; 259 count >>=1; 260 } 261 262 sf->f_bsize <<= shift; 263 sf->f_blocks >>= shift; 264 sf->f_bfree >>= shift; 265 sf->f_bavail >>= shift; 266 } 267 268 /* 269 * Get filesystem statistics. 270 */ 271 #ifndef _SYS_SYSPROTO_H_ 272 struct statfs_args { 273 char *path; 274 struct statfs *buf; 275 }; 276 #endif 277 int 278 sys_statfs(td, uap) 279 struct thread *td; 280 register struct statfs_args /* { 281 char *path; 282 struct statfs *buf; 283 } */ *uap; 284 { 285 struct statfs sf; 286 int error; 287 288 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 289 if (error == 0) 290 error = copyout(&sf, uap->buf, sizeof(sf)); 291 return (error); 292 } 293 294 int 295 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 296 struct statfs *buf) 297 { 298 struct mount *mp; 299 struct statfs *sp, sb; 300 int vfslocked; 301 int error; 302 struct nameidata nd; 303 304 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | 305 AUDITVNODE1, pathseg, path, td); 306 error = namei(&nd); 307 if (error) 308 return (error); 309 vfslocked = NDHASGIANT(&nd); 310 mp = nd.ni_vp->v_mount; 311 vfs_ref(mp); 312 NDFREE(&nd, NDF_ONLY_PNBUF); 313 vput(nd.ni_vp); 314 error = vfs_busy(mp, 0); 315 vfs_rel(mp); 316 if (error) { 317 VFS_UNLOCK_GIANT(vfslocked); 318 return (error); 319 } 320 #ifdef MAC 321 error = mac_mount_check_stat(td->td_ucred, mp); 322 if (error) 323 goto out; 324 #endif 325 /* 326 * Set these in case the underlying filesystem fails to do so. 327 */ 328 sp = &mp->mnt_stat; 329 sp->f_version = STATFS_VERSION; 330 sp->f_namemax = NAME_MAX; 331 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 332 error = VFS_STATFS(mp, sp); 333 if (error) 334 goto out; 335 if (priv_check(td, PRIV_VFS_GENERATION)) { 336 bcopy(sp, &sb, sizeof(sb)); 337 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 338 prison_enforce_statfs(td->td_ucred, mp, &sb); 339 sp = &sb; 340 } 341 *buf = *sp; 342 out: 343 vfs_unbusy(mp); 344 VFS_UNLOCK_GIANT(vfslocked); 345 return (error); 346 } 347 348 /* 349 * Get filesystem statistics. 350 */ 351 #ifndef _SYS_SYSPROTO_H_ 352 struct fstatfs_args { 353 int fd; 354 struct statfs *buf; 355 }; 356 #endif 357 int 358 sys_fstatfs(td, uap) 359 struct thread *td; 360 register struct fstatfs_args /* { 361 int fd; 362 struct statfs *buf; 363 } */ *uap; 364 { 365 struct statfs sf; 366 int error; 367 368 error = kern_fstatfs(td, uap->fd, &sf); 369 if (error == 0) 370 error = copyout(&sf, uap->buf, sizeof(sf)); 371 return (error); 372 } 373 374 int 375 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 376 { 377 struct file *fp; 378 struct mount *mp; 379 struct statfs *sp, sb; 380 int vfslocked; 381 struct vnode *vp; 382 int error; 383 384 AUDIT_ARG_FD(fd); 385 error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp); 386 if (error) 387 return (error); 388 vp = fp->f_vnode; 389 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 390 vn_lock(vp, LK_SHARED | LK_RETRY); 391 #ifdef AUDIT 392 AUDIT_ARG_VNODE1(vp); 393 #endif 394 mp = vp->v_mount; 395 if (mp) 396 vfs_ref(mp); 397 VOP_UNLOCK(vp, 0); 398 fdrop(fp, td); 399 if (mp == NULL) { 400 error = EBADF; 401 goto out; 402 } 403 error = vfs_busy(mp, 0); 404 vfs_rel(mp); 405 if (error) { 406 VFS_UNLOCK_GIANT(vfslocked); 407 return (error); 408 } 409 #ifdef MAC 410 error = mac_mount_check_stat(td->td_ucred, mp); 411 if (error) 412 goto out; 413 #endif 414 /* 415 * Set these in case the underlying filesystem fails to do so. 416 */ 417 sp = &mp->mnt_stat; 418 sp->f_version = STATFS_VERSION; 419 sp->f_namemax = NAME_MAX; 420 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 421 error = VFS_STATFS(mp, sp); 422 if (error) 423 goto out; 424 if (priv_check(td, PRIV_VFS_GENERATION)) { 425 bcopy(sp, &sb, sizeof(sb)); 426 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 427 prison_enforce_statfs(td->td_ucred, mp, &sb); 428 sp = &sb; 429 } 430 *buf = *sp; 431 out: 432 if (mp) 433 vfs_unbusy(mp); 434 VFS_UNLOCK_GIANT(vfslocked); 435 return (error); 436 } 437 438 /* 439 * Get statistics on all filesystems. 440 */ 441 #ifndef _SYS_SYSPROTO_H_ 442 struct getfsstat_args { 443 struct statfs *buf; 444 long bufsize; 445 int flags; 446 }; 447 #endif 448 int 449 sys_getfsstat(td, uap) 450 struct thread *td; 451 register struct getfsstat_args /* { 452 struct statfs *buf; 453 long bufsize; 454 int flags; 455 } */ *uap; 456 { 457 458 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 459 uap->flags)); 460 } 461 462 /* 463 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 464 * The caller is responsible for freeing memory which will be allocated 465 * in '*buf'. 466 */ 467 int 468 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 469 enum uio_seg bufseg, int flags) 470 { 471 struct mount *mp, *nmp; 472 struct statfs *sfsp, *sp, sb; 473 size_t count, maxcount; 474 int vfslocked; 475 int error; 476 477 maxcount = bufsize / sizeof(struct statfs); 478 if (bufsize == 0) 479 sfsp = NULL; 480 else if (bufseg == UIO_USERSPACE) 481 sfsp = *buf; 482 else /* if (bufseg == UIO_SYSSPACE) */ { 483 count = 0; 484 mtx_lock(&mountlist_mtx); 485 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 486 count++; 487 } 488 mtx_unlock(&mountlist_mtx); 489 if (maxcount > count) 490 maxcount = count; 491 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 492 M_WAITOK); 493 } 494 count = 0; 495 mtx_lock(&mountlist_mtx); 496 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 497 if (prison_canseemount(td->td_ucred, mp) != 0) { 498 nmp = TAILQ_NEXT(mp, mnt_list); 499 continue; 500 } 501 #ifdef MAC 502 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 503 nmp = TAILQ_NEXT(mp, mnt_list); 504 continue; 505 } 506 #endif 507 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 508 nmp = TAILQ_NEXT(mp, mnt_list); 509 continue; 510 } 511 vfslocked = VFS_LOCK_GIANT(mp); 512 if (sfsp && count < maxcount) { 513 sp = &mp->mnt_stat; 514 /* 515 * Set these in case the underlying filesystem 516 * fails to do so. 517 */ 518 sp->f_version = STATFS_VERSION; 519 sp->f_namemax = NAME_MAX; 520 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 521 /* 522 * If MNT_NOWAIT or MNT_LAZY is specified, do not 523 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 524 * overrides MNT_WAIT. 525 */ 526 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 527 (flags & MNT_WAIT)) && 528 (error = VFS_STATFS(mp, sp))) { 529 VFS_UNLOCK_GIANT(vfslocked); 530 mtx_lock(&mountlist_mtx); 531 nmp = TAILQ_NEXT(mp, mnt_list); 532 vfs_unbusy(mp); 533 continue; 534 } 535 if (priv_check(td, PRIV_VFS_GENERATION)) { 536 bcopy(sp, &sb, sizeof(sb)); 537 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 538 prison_enforce_statfs(td->td_ucred, mp, &sb); 539 sp = &sb; 540 } 541 if (bufseg == UIO_SYSSPACE) 542 bcopy(sp, sfsp, sizeof(*sp)); 543 else /* if (bufseg == UIO_USERSPACE) */ { 544 error = copyout(sp, sfsp, sizeof(*sp)); 545 if (error) { 546 vfs_unbusy(mp); 547 VFS_UNLOCK_GIANT(vfslocked); 548 return (error); 549 } 550 } 551 sfsp++; 552 } 553 VFS_UNLOCK_GIANT(vfslocked); 554 count++; 555 mtx_lock(&mountlist_mtx); 556 nmp = TAILQ_NEXT(mp, mnt_list); 557 vfs_unbusy(mp); 558 } 559 mtx_unlock(&mountlist_mtx); 560 if (sfsp && count > maxcount) 561 td->td_retval[0] = maxcount; 562 else 563 td->td_retval[0] = count; 564 return (0); 565 } 566 567 #ifdef COMPAT_FREEBSD4 568 /* 569 * Get old format filesystem statistics. 570 */ 571 static void cvtstatfs(struct statfs *, struct ostatfs *); 572 573 #ifndef _SYS_SYSPROTO_H_ 574 struct freebsd4_statfs_args { 575 char *path; 576 struct ostatfs *buf; 577 }; 578 #endif 579 int 580 freebsd4_statfs(td, uap) 581 struct thread *td; 582 struct freebsd4_statfs_args /* { 583 char *path; 584 struct ostatfs *buf; 585 } */ *uap; 586 { 587 struct ostatfs osb; 588 struct statfs sf; 589 int error; 590 591 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 592 if (error) 593 return (error); 594 cvtstatfs(&sf, &osb); 595 return (copyout(&osb, uap->buf, sizeof(osb))); 596 } 597 598 /* 599 * Get filesystem statistics. 600 */ 601 #ifndef _SYS_SYSPROTO_H_ 602 struct freebsd4_fstatfs_args { 603 int fd; 604 struct ostatfs *buf; 605 }; 606 #endif 607 int 608 freebsd4_fstatfs(td, uap) 609 struct thread *td; 610 struct freebsd4_fstatfs_args /* { 611 int fd; 612 struct ostatfs *buf; 613 } */ *uap; 614 { 615 struct ostatfs osb; 616 struct statfs sf; 617 int error; 618 619 error = kern_fstatfs(td, uap->fd, &sf); 620 if (error) 621 return (error); 622 cvtstatfs(&sf, &osb); 623 return (copyout(&osb, uap->buf, sizeof(osb))); 624 } 625 626 /* 627 * Get statistics on all filesystems. 628 */ 629 #ifndef _SYS_SYSPROTO_H_ 630 struct freebsd4_getfsstat_args { 631 struct ostatfs *buf; 632 long bufsize; 633 int flags; 634 }; 635 #endif 636 int 637 freebsd4_getfsstat(td, uap) 638 struct thread *td; 639 register struct freebsd4_getfsstat_args /* { 640 struct ostatfs *buf; 641 long bufsize; 642 int flags; 643 } */ *uap; 644 { 645 struct statfs *buf, *sp; 646 struct ostatfs osb; 647 size_t count, size; 648 int error; 649 650 count = uap->bufsize / sizeof(struct ostatfs); 651 size = count * sizeof(struct statfs); 652 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 653 if (size > 0) { 654 count = td->td_retval[0]; 655 sp = buf; 656 while (count > 0 && error == 0) { 657 cvtstatfs(sp, &osb); 658 error = copyout(&osb, uap->buf, sizeof(osb)); 659 sp++; 660 uap->buf++; 661 count--; 662 } 663 free(buf, M_TEMP); 664 } 665 return (error); 666 } 667 668 /* 669 * Implement fstatfs() for (NFS) file handles. 670 */ 671 #ifndef _SYS_SYSPROTO_H_ 672 struct freebsd4_fhstatfs_args { 673 struct fhandle *u_fhp; 674 struct ostatfs *buf; 675 }; 676 #endif 677 int 678 freebsd4_fhstatfs(td, uap) 679 struct thread *td; 680 struct freebsd4_fhstatfs_args /* { 681 struct fhandle *u_fhp; 682 struct ostatfs *buf; 683 } */ *uap; 684 { 685 struct ostatfs osb; 686 struct statfs sf; 687 fhandle_t fh; 688 int error; 689 690 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 691 if (error) 692 return (error); 693 error = kern_fhstatfs(td, fh, &sf); 694 if (error) 695 return (error); 696 cvtstatfs(&sf, &osb); 697 return (copyout(&osb, uap->buf, sizeof(osb))); 698 } 699 700 /* 701 * Convert a new format statfs structure to an old format statfs structure. 702 */ 703 static void 704 cvtstatfs(nsp, osp) 705 struct statfs *nsp; 706 struct ostatfs *osp; 707 { 708 709 statfs_scale_blocks(nsp, LONG_MAX); 710 bzero(osp, sizeof(*osp)); 711 osp->f_bsize = nsp->f_bsize; 712 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 713 osp->f_blocks = nsp->f_blocks; 714 osp->f_bfree = nsp->f_bfree; 715 osp->f_bavail = nsp->f_bavail; 716 osp->f_files = MIN(nsp->f_files, LONG_MAX); 717 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 718 osp->f_owner = nsp->f_owner; 719 osp->f_type = nsp->f_type; 720 osp->f_flags = nsp->f_flags; 721 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 722 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 723 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 724 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 725 strlcpy(osp->f_fstypename, nsp->f_fstypename, 726 MIN(MFSNAMELEN, OMFSNAMELEN)); 727 strlcpy(osp->f_mntonname, nsp->f_mntonname, 728 MIN(MNAMELEN, OMNAMELEN)); 729 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 730 MIN(MNAMELEN, OMNAMELEN)); 731 osp->f_fsid = nsp->f_fsid; 732 } 733 #endif /* COMPAT_FREEBSD4 */ 734 735 /* 736 * Change current working directory to a given file descriptor. 737 */ 738 #ifndef _SYS_SYSPROTO_H_ 739 struct fchdir_args { 740 int fd; 741 }; 742 #endif 743 int 744 sys_fchdir(td, uap) 745 struct thread *td; 746 struct fchdir_args /* { 747 int fd; 748 } */ *uap; 749 { 750 register struct filedesc *fdp = td->td_proc->p_fd; 751 struct vnode *vp, *tdp, *vpold; 752 struct mount *mp; 753 struct file *fp; 754 int vfslocked; 755 int error; 756 757 AUDIT_ARG_FD(uap->fd); 758 if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0) 759 return (error); 760 vp = fp->f_vnode; 761 VREF(vp); 762 fdrop(fp, td); 763 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 764 vn_lock(vp, LK_SHARED | LK_RETRY); 765 AUDIT_ARG_VNODE1(vp); 766 error = change_dir(vp, td); 767 while (!error && (mp = vp->v_mountedhere) != NULL) { 768 int tvfslocked; 769 if (vfs_busy(mp, 0)) 770 continue; 771 tvfslocked = VFS_LOCK_GIANT(mp); 772 error = VFS_ROOT(mp, LK_SHARED, &tdp); 773 vfs_unbusy(mp); 774 if (error) { 775 VFS_UNLOCK_GIANT(tvfslocked); 776 break; 777 } 778 vput(vp); 779 VFS_UNLOCK_GIANT(vfslocked); 780 vp = tdp; 781 vfslocked = tvfslocked; 782 } 783 if (error) { 784 vput(vp); 785 VFS_UNLOCK_GIANT(vfslocked); 786 return (error); 787 } 788 VOP_UNLOCK(vp, 0); 789 VFS_UNLOCK_GIANT(vfslocked); 790 FILEDESC_XLOCK(fdp); 791 vpold = fdp->fd_cdir; 792 fdp->fd_cdir = vp; 793 FILEDESC_XUNLOCK(fdp); 794 vfslocked = VFS_LOCK_GIANT(vpold->v_mount); 795 vrele(vpold); 796 VFS_UNLOCK_GIANT(vfslocked); 797 return (0); 798 } 799 800 /* 801 * Change current working directory (``.''). 802 */ 803 #ifndef _SYS_SYSPROTO_H_ 804 struct chdir_args { 805 char *path; 806 }; 807 #endif 808 int 809 sys_chdir(td, uap) 810 struct thread *td; 811 struct chdir_args /* { 812 char *path; 813 } */ *uap; 814 { 815 816 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 817 } 818 819 int 820 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 821 { 822 register struct filedesc *fdp = td->td_proc->p_fd; 823 int error; 824 struct nameidata nd; 825 struct vnode *vp; 826 int vfslocked; 827 828 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 829 MPSAFE, pathseg, path, td); 830 if ((error = namei(&nd)) != 0) 831 return (error); 832 vfslocked = NDHASGIANT(&nd); 833 if ((error = change_dir(nd.ni_vp, td)) != 0) { 834 vput(nd.ni_vp); 835 VFS_UNLOCK_GIANT(vfslocked); 836 NDFREE(&nd, NDF_ONLY_PNBUF); 837 return (error); 838 } 839 VOP_UNLOCK(nd.ni_vp, 0); 840 VFS_UNLOCK_GIANT(vfslocked); 841 NDFREE(&nd, NDF_ONLY_PNBUF); 842 FILEDESC_XLOCK(fdp); 843 vp = fdp->fd_cdir; 844 fdp->fd_cdir = nd.ni_vp; 845 FILEDESC_XUNLOCK(fdp); 846 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 847 vrele(vp); 848 VFS_UNLOCK_GIANT(vfslocked); 849 return (0); 850 } 851 852 /* 853 * Helper function for raised chroot(2) security function: Refuse if 854 * any filedescriptors are open directories. 855 */ 856 static int 857 chroot_refuse_vdir_fds(fdp) 858 struct filedesc *fdp; 859 { 860 struct vnode *vp; 861 struct file *fp; 862 int fd; 863 864 FILEDESC_LOCK_ASSERT(fdp); 865 866 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 867 fp = fget_locked(fdp, fd); 868 if (fp == NULL) 869 continue; 870 if (fp->f_type == DTYPE_VNODE) { 871 vp = fp->f_vnode; 872 if (vp->v_type == VDIR) 873 return (EPERM); 874 } 875 } 876 return (0); 877 } 878 879 /* 880 * This sysctl determines if we will allow a process to chroot(2) if it 881 * has a directory open: 882 * 0: disallowed for all processes. 883 * 1: allowed for processes that were not already chroot(2)'ed. 884 * 2: allowed for all processes. 885 */ 886 887 static int chroot_allow_open_directories = 1; 888 889 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 890 &chroot_allow_open_directories, 0, 891 "Allow a process to chroot(2) if it has a directory open"); 892 893 /* 894 * Change notion of root (``/'') directory. 895 */ 896 #ifndef _SYS_SYSPROTO_H_ 897 struct chroot_args { 898 char *path; 899 }; 900 #endif 901 int 902 sys_chroot(td, uap) 903 struct thread *td; 904 struct chroot_args /* { 905 char *path; 906 } */ *uap; 907 { 908 int error; 909 struct nameidata nd; 910 int vfslocked; 911 912 error = priv_check(td, PRIV_VFS_CHROOT); 913 if (error) 914 return (error); 915 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | 916 AUDITVNODE1, UIO_USERSPACE, uap->path, td); 917 error = namei(&nd); 918 if (error) 919 goto error; 920 vfslocked = NDHASGIANT(&nd); 921 if ((error = change_dir(nd.ni_vp, td)) != 0) 922 goto e_vunlock; 923 #ifdef MAC 924 if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp))) 925 goto e_vunlock; 926 #endif 927 VOP_UNLOCK(nd.ni_vp, 0); 928 error = change_root(nd.ni_vp, td); 929 vrele(nd.ni_vp); 930 VFS_UNLOCK_GIANT(vfslocked); 931 NDFREE(&nd, NDF_ONLY_PNBUF); 932 return (error); 933 e_vunlock: 934 vput(nd.ni_vp); 935 VFS_UNLOCK_GIANT(vfslocked); 936 error: 937 NDFREE(&nd, NDF_ONLY_PNBUF); 938 return (error); 939 } 940 941 /* 942 * Common routine for chroot and chdir. Callers must provide a locked vnode 943 * instance. 944 */ 945 int 946 change_dir(vp, td) 947 struct vnode *vp; 948 struct thread *td; 949 { 950 int error; 951 952 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 953 if (vp->v_type != VDIR) 954 return (ENOTDIR); 955 #ifdef MAC 956 error = mac_vnode_check_chdir(td->td_ucred, vp); 957 if (error) 958 return (error); 959 #endif 960 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 961 return (error); 962 } 963 964 /* 965 * Common routine for kern_chroot() and jail_attach(). The caller is 966 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 967 * authorize this operation. 968 */ 969 int 970 change_root(vp, td) 971 struct vnode *vp; 972 struct thread *td; 973 { 974 struct filedesc *fdp; 975 struct vnode *oldvp; 976 int vfslocked; 977 int error; 978 979 VFS_ASSERT_GIANT(vp->v_mount); 980 fdp = td->td_proc->p_fd; 981 FILEDESC_XLOCK(fdp); 982 if (chroot_allow_open_directories == 0 || 983 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 984 error = chroot_refuse_vdir_fds(fdp); 985 if (error) { 986 FILEDESC_XUNLOCK(fdp); 987 return (error); 988 } 989 } 990 oldvp = fdp->fd_rdir; 991 fdp->fd_rdir = vp; 992 VREF(fdp->fd_rdir); 993 if (!fdp->fd_jdir) { 994 fdp->fd_jdir = vp; 995 VREF(fdp->fd_jdir); 996 } 997 FILEDESC_XUNLOCK(fdp); 998 vfslocked = VFS_LOCK_GIANT(oldvp->v_mount); 999 vrele(oldvp); 1000 VFS_UNLOCK_GIANT(vfslocked); 1001 return (0); 1002 } 1003 1004 static __inline cap_rights_t 1005 flags_to_rights(int flags) 1006 { 1007 cap_rights_t rights = 0; 1008 1009 switch ((flags & O_ACCMODE)) { 1010 case O_RDONLY: 1011 rights |= CAP_READ; 1012 break; 1013 1014 case O_RDWR: 1015 rights |= CAP_READ; 1016 /* fall through */ 1017 1018 case O_WRONLY: 1019 rights |= CAP_WRITE; 1020 break; 1021 1022 case O_EXEC: 1023 rights |= CAP_FEXECVE; 1024 break; 1025 } 1026 1027 if (flags & O_CREAT) 1028 rights |= CAP_CREATE; 1029 1030 if (flags & O_TRUNC) 1031 rights |= CAP_FTRUNCATE; 1032 1033 if ((flags & O_EXLOCK) || (flags & O_SHLOCK)) 1034 rights |= CAP_FLOCK; 1035 1036 return (rights); 1037 } 1038 1039 /* 1040 * Check permissions, allocate an open file structure, and call the device 1041 * open routine if any. 1042 */ 1043 #ifndef _SYS_SYSPROTO_H_ 1044 struct open_args { 1045 char *path; 1046 int flags; 1047 int mode; 1048 }; 1049 #endif 1050 int 1051 sys_open(td, uap) 1052 struct thread *td; 1053 register struct open_args /* { 1054 char *path; 1055 int flags; 1056 int mode; 1057 } */ *uap; 1058 { 1059 1060 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1061 } 1062 1063 #ifndef _SYS_SYSPROTO_H_ 1064 struct openat_args { 1065 int fd; 1066 char *path; 1067 int flag; 1068 int mode; 1069 }; 1070 #endif 1071 int 1072 sys_openat(struct thread *td, struct openat_args *uap) 1073 { 1074 1075 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1076 uap->mode)); 1077 } 1078 1079 int 1080 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1081 int mode) 1082 { 1083 1084 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1085 } 1086 1087 int 1088 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1089 int flags, int mode) 1090 { 1091 struct proc *p = td->td_proc; 1092 struct filedesc *fdp = p->p_fd; 1093 struct file *fp; 1094 struct vnode *vp; 1095 int cmode; 1096 struct file *nfp; 1097 int type, indx = -1, error, error_open; 1098 struct flock lf; 1099 struct nameidata nd; 1100 int vfslocked; 1101 cap_rights_t rights_needed = CAP_LOOKUP; 1102 1103 AUDIT_ARG_FFLAGS(flags); 1104 AUDIT_ARG_MODE(mode); 1105 /* XXX: audit dirfd */ 1106 rights_needed |= flags_to_rights(flags); 1107 /* 1108 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1109 * may be specified. 1110 */ 1111 if (flags & O_EXEC) { 1112 if (flags & O_ACCMODE) 1113 return (EINVAL); 1114 } else if ((flags & O_ACCMODE) == O_ACCMODE) 1115 return (EINVAL); 1116 else 1117 flags = FFLAGS(flags); 1118 1119 /* 1120 * allocate the file descriptor, but don't install a descriptor yet 1121 */ 1122 error = falloc_noinstall(td, &nfp); 1123 if (error) 1124 return (error); 1125 /* An extra reference on `nfp' has been held for us by falloc_noinstall(). */ 1126 fp = nfp; 1127 /* Set the flags early so the finit in devfs can pick them up. */ 1128 fp->f_flag = flags & FMASK; 1129 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1130 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, 1131 path, fd, rights_needed, td); 1132 td->td_dupfd = -1; /* XXX check for fdopen */ 1133 error = vn_open(&nd, &flags, cmode, fp); 1134 if (error) { 1135 /* 1136 * If the vn_open replaced the method vector, something 1137 * wonderous happened deep below and we just pass it up 1138 * pretending we know what we do. 1139 */ 1140 if (error == ENXIO && fp->f_ops != &badfileops) 1141 goto success; 1142 1143 /* 1144 * handle special fdopen() case. bleh. dupfdopen() is 1145 * responsible for dropping the old contents of ofiles[indx] 1146 * if it succeeds. 1147 * 1148 * Don't do this for relative (capability) lookups; we don't 1149 * understand exactly what would happen, and we don't think 1150 * that it ever should. 1151 */ 1152 if ((nd.ni_strictrelative == 0) && 1153 (error == ENODEV || error == ENXIO) && 1154 (td->td_dupfd >= 0)) { 1155 /* XXX from fdopen */ 1156 error_open = error; 1157 if ((error = finstall(td, fp, &indx, flags)) != 0) 1158 goto bad_unlocked; 1159 if ((error = dupfdopen(td, fdp, indx, td->td_dupfd, 1160 flags, error_open)) == 0) 1161 goto success; 1162 } 1163 /* 1164 * Clean up the descriptor, but only if another thread hadn't 1165 * replaced or closed it. 1166 */ 1167 if (indx != -1) 1168 fdclose(fdp, fp, indx, td); 1169 fdrop(fp, td); 1170 1171 if (error == ERESTART) 1172 error = EINTR; 1173 return (error); 1174 } 1175 td->td_dupfd = 0; 1176 vfslocked = NDHASGIANT(&nd); 1177 NDFREE(&nd, NDF_ONLY_PNBUF); 1178 vp = nd.ni_vp; 1179 1180 /* 1181 * Store the vnode, for any f_type. Typically, the vnode use 1182 * count is decremented by direct call to vn_closefile() for 1183 * files that switched type in the cdevsw fdopen() method. 1184 */ 1185 fp->f_vnode = vp; 1186 /* 1187 * If the file wasn't claimed by devfs bind it to the normal 1188 * vnode operations here. 1189 */ 1190 if (fp->f_ops == &badfileops) { 1191 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1192 fp->f_seqcount = 1; 1193 finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops); 1194 } 1195 1196 VOP_UNLOCK(vp, 0); 1197 if (fp->f_type == DTYPE_VNODE && (flags & (O_EXLOCK | O_SHLOCK)) != 0) { 1198 lf.l_whence = SEEK_SET; 1199 lf.l_start = 0; 1200 lf.l_len = 0; 1201 if (flags & O_EXLOCK) 1202 lf.l_type = F_WRLCK; 1203 else 1204 lf.l_type = F_RDLCK; 1205 type = F_FLOCK; 1206 if ((flags & FNONBLOCK) == 0) 1207 type |= F_WAIT; 1208 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 1209 type)) != 0) 1210 goto bad; 1211 atomic_set_int(&fp->f_flag, FHASLOCK); 1212 } 1213 if (flags & O_TRUNC) { 1214 error = fo_truncate(fp, 0, td->td_ucred, td); 1215 if (error) 1216 goto bad; 1217 } 1218 VFS_UNLOCK_GIANT(vfslocked); 1219 success: 1220 /* 1221 * If we haven't already installed the FD (for dupfdopen), do so now. 1222 */ 1223 if (indx == -1) { 1224 #ifdef CAPABILITIES 1225 if (nd.ni_strictrelative == 1) { 1226 /* 1227 * We are doing a strict relative lookup; wrap the 1228 * result in a capability. 1229 */ 1230 if ((error = kern_capwrap(td, fp, nd.ni_baserights, 1231 &indx)) != 0) 1232 goto bad_unlocked; 1233 } else 1234 #endif 1235 if ((error = finstall(td, fp, &indx, flags)) != 0) 1236 goto bad_unlocked; 1237 1238 } 1239 1240 /* 1241 * Release our private reference, leaving the one associated with 1242 * the descriptor table intact. 1243 */ 1244 fdrop(fp, td); 1245 td->td_retval[0] = indx; 1246 return (0); 1247 bad: 1248 VFS_UNLOCK_GIANT(vfslocked); 1249 bad_unlocked: 1250 if (indx != -1) 1251 fdclose(fdp, fp, indx, td); 1252 fdrop(fp, td); 1253 td->td_retval[0] = -1; 1254 return (error); 1255 } 1256 1257 #ifdef COMPAT_43 1258 /* 1259 * Create a file. 1260 */ 1261 #ifndef _SYS_SYSPROTO_H_ 1262 struct ocreat_args { 1263 char *path; 1264 int mode; 1265 }; 1266 #endif 1267 int 1268 ocreat(td, uap) 1269 struct thread *td; 1270 register struct ocreat_args /* { 1271 char *path; 1272 int mode; 1273 } */ *uap; 1274 { 1275 1276 return (kern_open(td, uap->path, UIO_USERSPACE, 1277 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1278 } 1279 #endif /* COMPAT_43 */ 1280 1281 /* 1282 * Create a special file. 1283 */ 1284 #ifndef _SYS_SYSPROTO_H_ 1285 struct mknod_args { 1286 char *path; 1287 int mode; 1288 int dev; 1289 }; 1290 #endif 1291 int 1292 sys_mknod(td, uap) 1293 struct thread *td; 1294 register struct mknod_args /* { 1295 char *path; 1296 int mode; 1297 int dev; 1298 } */ *uap; 1299 { 1300 1301 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1302 } 1303 1304 #ifndef _SYS_SYSPROTO_H_ 1305 struct mknodat_args { 1306 int fd; 1307 char *path; 1308 mode_t mode; 1309 dev_t dev; 1310 }; 1311 #endif 1312 int 1313 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1314 { 1315 1316 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1317 uap->dev)); 1318 } 1319 1320 int 1321 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1322 int dev) 1323 { 1324 1325 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1326 } 1327 1328 int 1329 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1330 int mode, int dev) 1331 { 1332 struct vnode *vp; 1333 struct mount *mp; 1334 struct vattr vattr; 1335 int error; 1336 int whiteout = 0; 1337 struct nameidata nd; 1338 int vfslocked; 1339 1340 AUDIT_ARG_MODE(mode); 1341 AUDIT_ARG_DEV(dev); 1342 switch (mode & S_IFMT) { 1343 case S_IFCHR: 1344 case S_IFBLK: 1345 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1346 break; 1347 case S_IFMT: 1348 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1349 break; 1350 case S_IFWHT: 1351 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1352 break; 1353 case S_IFIFO: 1354 if (dev == 0) 1355 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1356 /* FALLTHROUGH */ 1357 default: 1358 error = EINVAL; 1359 break; 1360 } 1361 if (error) 1362 return (error); 1363 restart: 1364 bwillwrite(); 1365 NDINIT_ATRIGHTS(&nd, CREATE, 1366 LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd, 1367 CAP_MKFIFO, td); 1368 if ((error = namei(&nd)) != 0) 1369 return (error); 1370 vfslocked = NDHASGIANT(&nd); 1371 vp = nd.ni_vp; 1372 if (vp != NULL) { 1373 NDFREE(&nd, NDF_ONLY_PNBUF); 1374 if (vp == nd.ni_dvp) 1375 vrele(nd.ni_dvp); 1376 else 1377 vput(nd.ni_dvp); 1378 vrele(vp); 1379 VFS_UNLOCK_GIANT(vfslocked); 1380 return (EEXIST); 1381 } else { 1382 VATTR_NULL(&vattr); 1383 vattr.va_mode = (mode & ALLPERMS) & 1384 ~td->td_proc->p_fd->fd_cmask; 1385 vattr.va_rdev = dev; 1386 whiteout = 0; 1387 1388 switch (mode & S_IFMT) { 1389 case S_IFMT: /* used by badsect to flag bad sectors */ 1390 vattr.va_type = VBAD; 1391 break; 1392 case S_IFCHR: 1393 vattr.va_type = VCHR; 1394 break; 1395 case S_IFBLK: 1396 vattr.va_type = VBLK; 1397 break; 1398 case S_IFWHT: 1399 whiteout = 1; 1400 break; 1401 default: 1402 panic("kern_mknod: invalid mode"); 1403 } 1404 } 1405 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1406 NDFREE(&nd, NDF_ONLY_PNBUF); 1407 vput(nd.ni_dvp); 1408 VFS_UNLOCK_GIANT(vfslocked); 1409 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1410 return (error); 1411 goto restart; 1412 } 1413 #ifdef MAC 1414 if (error == 0 && !whiteout) 1415 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1416 &nd.ni_cnd, &vattr); 1417 #endif 1418 if (!error) { 1419 if (whiteout) 1420 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1421 else { 1422 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1423 &nd.ni_cnd, &vattr); 1424 if (error == 0) 1425 vput(nd.ni_vp); 1426 } 1427 } 1428 NDFREE(&nd, NDF_ONLY_PNBUF); 1429 vput(nd.ni_dvp); 1430 vn_finished_write(mp); 1431 VFS_UNLOCK_GIANT(vfslocked); 1432 return (error); 1433 } 1434 1435 /* 1436 * Create a named pipe. 1437 */ 1438 #ifndef _SYS_SYSPROTO_H_ 1439 struct mkfifo_args { 1440 char *path; 1441 int mode; 1442 }; 1443 #endif 1444 int 1445 sys_mkfifo(td, uap) 1446 struct thread *td; 1447 register struct mkfifo_args /* { 1448 char *path; 1449 int mode; 1450 } */ *uap; 1451 { 1452 1453 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1454 } 1455 1456 #ifndef _SYS_SYSPROTO_H_ 1457 struct mkfifoat_args { 1458 int fd; 1459 char *path; 1460 mode_t mode; 1461 }; 1462 #endif 1463 int 1464 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1465 { 1466 1467 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1468 uap->mode)); 1469 } 1470 1471 int 1472 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1473 { 1474 1475 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1476 } 1477 1478 int 1479 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1480 int mode) 1481 { 1482 struct mount *mp; 1483 struct vattr vattr; 1484 int error; 1485 struct nameidata nd; 1486 int vfslocked; 1487 1488 AUDIT_ARG_MODE(mode); 1489 restart: 1490 bwillwrite(); 1491 NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, 1492 pathseg, path, fd, td); 1493 if ((error = namei(&nd)) != 0) 1494 return (error); 1495 vfslocked = NDHASGIANT(&nd); 1496 if (nd.ni_vp != NULL) { 1497 NDFREE(&nd, NDF_ONLY_PNBUF); 1498 if (nd.ni_vp == nd.ni_dvp) 1499 vrele(nd.ni_dvp); 1500 else 1501 vput(nd.ni_dvp); 1502 vrele(nd.ni_vp); 1503 VFS_UNLOCK_GIANT(vfslocked); 1504 return (EEXIST); 1505 } 1506 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1507 NDFREE(&nd, NDF_ONLY_PNBUF); 1508 vput(nd.ni_dvp); 1509 VFS_UNLOCK_GIANT(vfslocked); 1510 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1511 return (error); 1512 goto restart; 1513 } 1514 VATTR_NULL(&vattr); 1515 vattr.va_type = VFIFO; 1516 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1517 #ifdef MAC 1518 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1519 &vattr); 1520 if (error) 1521 goto out; 1522 #endif 1523 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1524 if (error == 0) 1525 vput(nd.ni_vp); 1526 #ifdef MAC 1527 out: 1528 #endif 1529 vput(nd.ni_dvp); 1530 vn_finished_write(mp); 1531 VFS_UNLOCK_GIANT(vfslocked); 1532 NDFREE(&nd, NDF_ONLY_PNBUF); 1533 return (error); 1534 } 1535 1536 /* 1537 * Make a hard file link. 1538 */ 1539 #ifndef _SYS_SYSPROTO_H_ 1540 struct link_args { 1541 char *path; 1542 char *link; 1543 }; 1544 #endif 1545 int 1546 sys_link(td, uap) 1547 struct thread *td; 1548 register struct link_args /* { 1549 char *path; 1550 char *link; 1551 } */ *uap; 1552 { 1553 1554 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1555 } 1556 1557 #ifndef _SYS_SYSPROTO_H_ 1558 struct linkat_args { 1559 int fd1; 1560 char *path1; 1561 int fd2; 1562 char *path2; 1563 int flag; 1564 }; 1565 #endif 1566 int 1567 sys_linkat(struct thread *td, struct linkat_args *uap) 1568 { 1569 int flag; 1570 1571 flag = uap->flag; 1572 if (flag & ~AT_SYMLINK_FOLLOW) 1573 return (EINVAL); 1574 1575 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1576 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1577 } 1578 1579 int hardlink_check_uid = 0; 1580 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1581 &hardlink_check_uid, 0, 1582 "Unprivileged processes cannot create hard links to files owned by other " 1583 "users"); 1584 static int hardlink_check_gid = 0; 1585 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1586 &hardlink_check_gid, 0, 1587 "Unprivileged processes cannot create hard links to files owned by other " 1588 "groups"); 1589 1590 static int 1591 can_hardlink(struct vnode *vp, struct ucred *cred) 1592 { 1593 struct vattr va; 1594 int error; 1595 1596 if (!hardlink_check_uid && !hardlink_check_gid) 1597 return (0); 1598 1599 error = VOP_GETATTR(vp, &va, cred); 1600 if (error != 0) 1601 return (error); 1602 1603 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1604 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1605 if (error) 1606 return (error); 1607 } 1608 1609 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1610 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1611 if (error) 1612 return (error); 1613 } 1614 1615 return (0); 1616 } 1617 1618 int 1619 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1620 { 1621 1622 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1623 } 1624 1625 int 1626 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1627 enum uio_seg segflg, int follow) 1628 { 1629 struct vnode *vp; 1630 struct mount *mp; 1631 struct nameidata nd; 1632 int vfslocked; 1633 int lvfslocked; 1634 int error; 1635 1636 bwillwrite(); 1637 NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1, 1638 fd1, td); 1639 1640 if ((error = namei(&nd)) != 0) 1641 return (error); 1642 vfslocked = NDHASGIANT(&nd); 1643 NDFREE(&nd, NDF_ONLY_PNBUF); 1644 vp = nd.ni_vp; 1645 if (vp->v_type == VDIR) { 1646 vrele(vp); 1647 VFS_UNLOCK_GIANT(vfslocked); 1648 return (EPERM); /* POSIX */ 1649 } 1650 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 1651 vrele(vp); 1652 VFS_UNLOCK_GIANT(vfslocked); 1653 return (error); 1654 } 1655 NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2, 1656 segflg, path2, fd2, td); 1657 if ((error = namei(&nd)) == 0) { 1658 lvfslocked = NDHASGIANT(&nd); 1659 if (nd.ni_vp != NULL) { 1660 if (nd.ni_dvp == nd.ni_vp) 1661 vrele(nd.ni_dvp); 1662 else 1663 vput(nd.ni_dvp); 1664 vrele(nd.ni_vp); 1665 error = EEXIST; 1666 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) 1667 == 0) { 1668 error = can_hardlink(vp, td->td_ucred); 1669 if (error == 0) 1670 #ifdef MAC 1671 error = mac_vnode_check_link(td->td_ucred, 1672 nd.ni_dvp, vp, &nd.ni_cnd); 1673 if (error == 0) 1674 #endif 1675 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1676 VOP_UNLOCK(vp, 0); 1677 vput(nd.ni_dvp); 1678 } 1679 NDFREE(&nd, NDF_ONLY_PNBUF); 1680 VFS_UNLOCK_GIANT(lvfslocked); 1681 } 1682 vrele(vp); 1683 vn_finished_write(mp); 1684 VFS_UNLOCK_GIANT(vfslocked); 1685 return (error); 1686 } 1687 1688 /* 1689 * Make a symbolic link. 1690 */ 1691 #ifndef _SYS_SYSPROTO_H_ 1692 struct symlink_args { 1693 char *path; 1694 char *link; 1695 }; 1696 #endif 1697 int 1698 sys_symlink(td, uap) 1699 struct thread *td; 1700 register struct symlink_args /* { 1701 char *path; 1702 char *link; 1703 } */ *uap; 1704 { 1705 1706 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1707 } 1708 1709 #ifndef _SYS_SYSPROTO_H_ 1710 struct symlinkat_args { 1711 char *path; 1712 int fd; 1713 char *path2; 1714 }; 1715 #endif 1716 int 1717 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1718 { 1719 1720 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1721 UIO_USERSPACE)); 1722 } 1723 1724 int 1725 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1726 { 1727 1728 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1729 } 1730 1731 int 1732 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1733 enum uio_seg segflg) 1734 { 1735 struct mount *mp; 1736 struct vattr vattr; 1737 char *syspath; 1738 int error; 1739 struct nameidata nd; 1740 int vfslocked; 1741 1742 if (segflg == UIO_SYSSPACE) { 1743 syspath = path1; 1744 } else { 1745 syspath = uma_zalloc(namei_zone, M_WAITOK); 1746 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1747 goto out; 1748 } 1749 AUDIT_ARG_TEXT(syspath); 1750 restart: 1751 bwillwrite(); 1752 NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, 1753 segflg, path2, fd, td); 1754 if ((error = namei(&nd)) != 0) 1755 goto out; 1756 vfslocked = NDHASGIANT(&nd); 1757 if (nd.ni_vp) { 1758 NDFREE(&nd, NDF_ONLY_PNBUF); 1759 if (nd.ni_vp == nd.ni_dvp) 1760 vrele(nd.ni_dvp); 1761 else 1762 vput(nd.ni_dvp); 1763 vrele(nd.ni_vp); 1764 VFS_UNLOCK_GIANT(vfslocked); 1765 error = EEXIST; 1766 goto out; 1767 } 1768 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1769 NDFREE(&nd, NDF_ONLY_PNBUF); 1770 vput(nd.ni_dvp); 1771 VFS_UNLOCK_GIANT(vfslocked); 1772 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1773 goto out; 1774 goto restart; 1775 } 1776 VATTR_NULL(&vattr); 1777 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1778 #ifdef MAC 1779 vattr.va_type = VLNK; 1780 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1781 &vattr); 1782 if (error) 1783 goto out2; 1784 #endif 1785 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1786 if (error == 0) 1787 vput(nd.ni_vp); 1788 #ifdef MAC 1789 out2: 1790 #endif 1791 NDFREE(&nd, NDF_ONLY_PNBUF); 1792 vput(nd.ni_dvp); 1793 vn_finished_write(mp); 1794 VFS_UNLOCK_GIANT(vfslocked); 1795 out: 1796 if (segflg != UIO_SYSSPACE) 1797 uma_zfree(namei_zone, syspath); 1798 return (error); 1799 } 1800 1801 /* 1802 * Delete a whiteout from the filesystem. 1803 */ 1804 int 1805 sys_undelete(td, uap) 1806 struct thread *td; 1807 register struct undelete_args /* { 1808 char *path; 1809 } */ *uap; 1810 { 1811 int error; 1812 struct mount *mp; 1813 struct nameidata nd; 1814 int vfslocked; 1815 1816 restart: 1817 bwillwrite(); 1818 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1, 1819 UIO_USERSPACE, uap->path, td); 1820 error = namei(&nd); 1821 if (error) 1822 return (error); 1823 vfslocked = NDHASGIANT(&nd); 1824 1825 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1826 NDFREE(&nd, NDF_ONLY_PNBUF); 1827 if (nd.ni_vp == nd.ni_dvp) 1828 vrele(nd.ni_dvp); 1829 else 1830 vput(nd.ni_dvp); 1831 if (nd.ni_vp) 1832 vrele(nd.ni_vp); 1833 VFS_UNLOCK_GIANT(vfslocked); 1834 return (EEXIST); 1835 } 1836 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1837 NDFREE(&nd, NDF_ONLY_PNBUF); 1838 vput(nd.ni_dvp); 1839 VFS_UNLOCK_GIANT(vfslocked); 1840 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1841 return (error); 1842 goto restart; 1843 } 1844 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1845 NDFREE(&nd, NDF_ONLY_PNBUF); 1846 vput(nd.ni_dvp); 1847 vn_finished_write(mp); 1848 VFS_UNLOCK_GIANT(vfslocked); 1849 return (error); 1850 } 1851 1852 /* 1853 * Delete a name from the filesystem. 1854 */ 1855 #ifndef _SYS_SYSPROTO_H_ 1856 struct unlink_args { 1857 char *path; 1858 }; 1859 #endif 1860 int 1861 sys_unlink(td, uap) 1862 struct thread *td; 1863 struct unlink_args /* { 1864 char *path; 1865 } */ *uap; 1866 { 1867 1868 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1869 } 1870 1871 #ifndef _SYS_SYSPROTO_H_ 1872 struct unlinkat_args { 1873 int fd; 1874 char *path; 1875 int flag; 1876 }; 1877 #endif 1878 int 1879 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1880 { 1881 int flag = uap->flag; 1882 int fd = uap->fd; 1883 char *path = uap->path; 1884 1885 if (flag & ~AT_REMOVEDIR) 1886 return (EINVAL); 1887 1888 if (flag & AT_REMOVEDIR) 1889 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1890 else 1891 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1892 } 1893 1894 int 1895 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1896 { 1897 1898 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1899 } 1900 1901 int 1902 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1903 ino_t oldinum) 1904 { 1905 struct mount *mp; 1906 struct vnode *vp; 1907 int error; 1908 struct nameidata nd; 1909 struct stat sb; 1910 int vfslocked; 1911 1912 restart: 1913 bwillwrite(); 1914 NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1, 1915 pathseg, path, fd, td); 1916 if ((error = namei(&nd)) != 0) 1917 return (error == EINVAL ? EPERM : error); 1918 vfslocked = NDHASGIANT(&nd); 1919 vp = nd.ni_vp; 1920 if (vp->v_type == VDIR && oldinum == 0) { 1921 error = EPERM; /* POSIX */ 1922 } else if (oldinum != 0 && 1923 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1924 sb.st_ino != oldinum) { 1925 error = EIDRM; /* Identifier removed */ 1926 } else { 1927 /* 1928 * The root of a mounted filesystem cannot be deleted. 1929 * 1930 * XXX: can this only be a VDIR case? 1931 */ 1932 if (vp->v_vflag & VV_ROOT) 1933 error = EBUSY; 1934 } 1935 if (error == 0) { 1936 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1937 NDFREE(&nd, NDF_ONLY_PNBUF); 1938 vput(nd.ni_dvp); 1939 if (vp == nd.ni_dvp) 1940 vrele(vp); 1941 else 1942 vput(vp); 1943 VFS_UNLOCK_GIANT(vfslocked); 1944 if ((error = vn_start_write(NULL, &mp, 1945 V_XSLEEP | PCATCH)) != 0) 1946 return (error); 1947 goto restart; 1948 } 1949 #ifdef MAC 1950 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1951 &nd.ni_cnd); 1952 if (error) 1953 goto out; 1954 #endif 1955 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1956 #ifdef MAC 1957 out: 1958 #endif 1959 vn_finished_write(mp); 1960 } 1961 NDFREE(&nd, NDF_ONLY_PNBUF); 1962 vput(nd.ni_dvp); 1963 if (vp == nd.ni_dvp) 1964 vrele(vp); 1965 else 1966 vput(vp); 1967 VFS_UNLOCK_GIANT(vfslocked); 1968 return (error); 1969 } 1970 1971 /* 1972 * Reposition read/write file offset. 1973 */ 1974 #ifndef _SYS_SYSPROTO_H_ 1975 struct lseek_args { 1976 int fd; 1977 int pad; 1978 off_t offset; 1979 int whence; 1980 }; 1981 #endif 1982 int 1983 sys_lseek(td, uap) 1984 struct thread *td; 1985 register struct lseek_args /* { 1986 int fd; 1987 int pad; 1988 off_t offset; 1989 int whence; 1990 } */ *uap; 1991 { 1992 struct ucred *cred = td->td_ucred; 1993 struct file *fp; 1994 struct vnode *vp; 1995 struct vattr vattr; 1996 off_t offset, size; 1997 int error, noneg; 1998 int vfslocked; 1999 2000 AUDIT_ARG_FD(uap->fd); 2001 if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0) 2002 return (error); 2003 if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) { 2004 fdrop(fp, td); 2005 return (ESPIPE); 2006 } 2007 vp = fp->f_vnode; 2008 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2009 noneg = (vp->v_type != VCHR); 2010 offset = uap->offset; 2011 switch (uap->whence) { 2012 case L_INCR: 2013 if (noneg && 2014 (fp->f_offset < 0 || 2015 (offset > 0 && fp->f_offset > OFF_MAX - offset))) { 2016 error = EOVERFLOW; 2017 break; 2018 } 2019 offset += fp->f_offset; 2020 break; 2021 case L_XTND: 2022 vn_lock(vp, LK_SHARED | LK_RETRY); 2023 error = VOP_GETATTR(vp, &vattr, cred); 2024 VOP_UNLOCK(vp, 0); 2025 if (error) 2026 break; 2027 2028 /* 2029 * If the file references a disk device, then fetch 2030 * the media size and use that to determine the ending 2031 * offset. 2032 */ 2033 if (vattr.va_size == 0 && vp->v_type == VCHR && 2034 fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0) 2035 vattr.va_size = size; 2036 if (noneg && 2037 (vattr.va_size > OFF_MAX || 2038 (offset > 0 && vattr.va_size > OFF_MAX - offset))) { 2039 error = EOVERFLOW; 2040 break; 2041 } 2042 offset += vattr.va_size; 2043 break; 2044 case L_SET: 2045 break; 2046 case SEEK_DATA: 2047 error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td); 2048 break; 2049 case SEEK_HOLE: 2050 error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td); 2051 break; 2052 default: 2053 error = EINVAL; 2054 } 2055 if (error == 0 && noneg && offset < 0) 2056 error = EINVAL; 2057 if (error != 0) 2058 goto drop; 2059 fp->f_offset = offset; 2060 VFS_KNOTE_UNLOCKED(vp, 0); 2061 *(off_t *)(td->td_retval) = fp->f_offset; 2062 drop: 2063 fdrop(fp, td); 2064 VFS_UNLOCK_GIANT(vfslocked); 2065 return (error); 2066 } 2067 2068 #if defined(COMPAT_43) 2069 /* 2070 * Reposition read/write file offset. 2071 */ 2072 #ifndef _SYS_SYSPROTO_H_ 2073 struct olseek_args { 2074 int fd; 2075 long offset; 2076 int whence; 2077 }; 2078 #endif 2079 int 2080 olseek(td, uap) 2081 struct thread *td; 2082 register struct olseek_args /* { 2083 int fd; 2084 long offset; 2085 int whence; 2086 } */ *uap; 2087 { 2088 struct lseek_args /* { 2089 int fd; 2090 int pad; 2091 off_t offset; 2092 int whence; 2093 } */ nuap; 2094 2095 nuap.fd = uap->fd; 2096 nuap.offset = uap->offset; 2097 nuap.whence = uap->whence; 2098 return (sys_lseek(td, &nuap)); 2099 } 2100 #endif /* COMPAT_43 */ 2101 2102 /* Version with the 'pad' argument */ 2103 int 2104 freebsd6_lseek(td, uap) 2105 struct thread *td; 2106 register struct freebsd6_lseek_args *uap; 2107 { 2108 struct lseek_args ouap; 2109 2110 ouap.fd = uap->fd; 2111 ouap.offset = uap->offset; 2112 ouap.whence = uap->whence; 2113 return (sys_lseek(td, &ouap)); 2114 } 2115 2116 /* 2117 * Check access permissions using passed credentials. 2118 */ 2119 static int 2120 vn_access(vp, user_flags, cred, td) 2121 struct vnode *vp; 2122 int user_flags; 2123 struct ucred *cred; 2124 struct thread *td; 2125 { 2126 int error; 2127 accmode_t accmode; 2128 2129 /* Flags == 0 means only check for existence. */ 2130 error = 0; 2131 if (user_flags) { 2132 accmode = 0; 2133 if (user_flags & R_OK) 2134 accmode |= VREAD; 2135 if (user_flags & W_OK) 2136 accmode |= VWRITE; 2137 if (user_flags & X_OK) 2138 accmode |= VEXEC; 2139 #ifdef MAC 2140 error = mac_vnode_check_access(cred, vp, accmode); 2141 if (error) 2142 return (error); 2143 #endif 2144 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2145 error = VOP_ACCESS(vp, accmode, cred, td); 2146 } 2147 return (error); 2148 } 2149 2150 /* 2151 * Check access permissions using "real" credentials. 2152 */ 2153 #ifndef _SYS_SYSPROTO_H_ 2154 struct access_args { 2155 char *path; 2156 int amode; 2157 }; 2158 #endif 2159 int 2160 sys_access(td, uap) 2161 struct thread *td; 2162 register struct access_args /* { 2163 char *path; 2164 int amode; 2165 } */ *uap; 2166 { 2167 2168 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2169 } 2170 2171 #ifndef _SYS_SYSPROTO_H_ 2172 struct faccessat_args { 2173 int dirfd; 2174 char *path; 2175 int amode; 2176 int flag; 2177 } 2178 #endif 2179 int 2180 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2181 { 2182 2183 if (uap->flag & ~AT_EACCESS) 2184 return (EINVAL); 2185 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2186 uap->amode)); 2187 } 2188 2189 int 2190 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2191 { 2192 2193 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2194 } 2195 2196 int 2197 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2198 int flag, int amode) 2199 { 2200 struct ucred *cred, *tmpcred; 2201 struct vnode *vp; 2202 struct nameidata nd; 2203 int vfslocked; 2204 int error; 2205 2206 /* 2207 * Create and modify a temporary credential instead of one that 2208 * is potentially shared. 2209 */ 2210 if (!(flag & AT_EACCESS)) { 2211 cred = td->td_ucred; 2212 tmpcred = crdup(cred); 2213 tmpcred->cr_uid = cred->cr_ruid; 2214 tmpcred->cr_groups[0] = cred->cr_rgid; 2215 td->td_ucred = tmpcred; 2216 } else 2217 cred = tmpcred = td->td_ucred; 2218 AUDIT_ARG_VALUE(amode); 2219 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | 2220 AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td); 2221 if ((error = namei(&nd)) != 0) 2222 goto out1; 2223 vfslocked = NDHASGIANT(&nd); 2224 vp = nd.ni_vp; 2225 2226 error = vn_access(vp, amode, tmpcred, td); 2227 NDFREE(&nd, NDF_ONLY_PNBUF); 2228 vput(vp); 2229 VFS_UNLOCK_GIANT(vfslocked); 2230 out1: 2231 if (!(flag & AT_EACCESS)) { 2232 td->td_ucred = cred; 2233 crfree(tmpcred); 2234 } 2235 return (error); 2236 } 2237 2238 /* 2239 * Check access permissions using "effective" credentials. 2240 */ 2241 #ifndef _SYS_SYSPROTO_H_ 2242 struct eaccess_args { 2243 char *path; 2244 int amode; 2245 }; 2246 #endif 2247 int 2248 sys_eaccess(td, uap) 2249 struct thread *td; 2250 register struct eaccess_args /* { 2251 char *path; 2252 int amode; 2253 } */ *uap; 2254 { 2255 2256 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2257 } 2258 2259 int 2260 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2261 { 2262 2263 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2264 } 2265 2266 #if defined(COMPAT_43) 2267 /* 2268 * Get file status; this version follows links. 2269 */ 2270 #ifndef _SYS_SYSPROTO_H_ 2271 struct ostat_args { 2272 char *path; 2273 struct ostat *ub; 2274 }; 2275 #endif 2276 int 2277 ostat(td, uap) 2278 struct thread *td; 2279 register struct ostat_args /* { 2280 char *path; 2281 struct ostat *ub; 2282 } */ *uap; 2283 { 2284 struct stat sb; 2285 struct ostat osb; 2286 int error; 2287 2288 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2289 if (error) 2290 return (error); 2291 cvtstat(&sb, &osb); 2292 error = copyout(&osb, uap->ub, sizeof (osb)); 2293 return (error); 2294 } 2295 2296 /* 2297 * Get file status; this version does not follow links. 2298 */ 2299 #ifndef _SYS_SYSPROTO_H_ 2300 struct olstat_args { 2301 char *path; 2302 struct ostat *ub; 2303 }; 2304 #endif 2305 int 2306 olstat(td, uap) 2307 struct thread *td; 2308 register struct olstat_args /* { 2309 char *path; 2310 struct ostat *ub; 2311 } */ *uap; 2312 { 2313 struct stat sb; 2314 struct ostat osb; 2315 int error; 2316 2317 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2318 if (error) 2319 return (error); 2320 cvtstat(&sb, &osb); 2321 error = copyout(&osb, uap->ub, sizeof (osb)); 2322 return (error); 2323 } 2324 2325 /* 2326 * Convert from an old to a new stat structure. 2327 */ 2328 void 2329 cvtstat(st, ost) 2330 struct stat *st; 2331 struct ostat *ost; 2332 { 2333 2334 ost->st_dev = st->st_dev; 2335 ost->st_ino = st->st_ino; 2336 ost->st_mode = st->st_mode; 2337 ost->st_nlink = st->st_nlink; 2338 ost->st_uid = st->st_uid; 2339 ost->st_gid = st->st_gid; 2340 ost->st_rdev = st->st_rdev; 2341 if (st->st_size < (quad_t)1 << 32) 2342 ost->st_size = st->st_size; 2343 else 2344 ost->st_size = -2; 2345 ost->st_atim = st->st_atim; 2346 ost->st_mtim = st->st_mtim; 2347 ost->st_ctim = st->st_ctim; 2348 ost->st_blksize = st->st_blksize; 2349 ost->st_blocks = st->st_blocks; 2350 ost->st_flags = st->st_flags; 2351 ost->st_gen = st->st_gen; 2352 } 2353 #endif /* COMPAT_43 */ 2354 2355 /* 2356 * Get file status; this version follows links. 2357 */ 2358 #ifndef _SYS_SYSPROTO_H_ 2359 struct stat_args { 2360 char *path; 2361 struct stat *ub; 2362 }; 2363 #endif 2364 int 2365 sys_stat(td, uap) 2366 struct thread *td; 2367 register struct stat_args /* { 2368 char *path; 2369 struct stat *ub; 2370 } */ *uap; 2371 { 2372 struct stat sb; 2373 int error; 2374 2375 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2376 if (error == 0) 2377 error = copyout(&sb, uap->ub, sizeof (sb)); 2378 return (error); 2379 } 2380 2381 #ifndef _SYS_SYSPROTO_H_ 2382 struct fstatat_args { 2383 int fd; 2384 char *path; 2385 struct stat *buf; 2386 int flag; 2387 } 2388 #endif 2389 int 2390 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2391 { 2392 struct stat sb; 2393 int error; 2394 2395 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2396 UIO_USERSPACE, &sb); 2397 if (error == 0) 2398 error = copyout(&sb, uap->buf, sizeof (sb)); 2399 return (error); 2400 } 2401 2402 int 2403 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2404 { 2405 2406 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2407 } 2408 2409 int 2410 kern_statat(struct thread *td, int flag, int fd, char *path, 2411 enum uio_seg pathseg, struct stat *sbp) 2412 { 2413 2414 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2415 } 2416 2417 int 2418 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2419 enum uio_seg pathseg, struct stat *sbp, 2420 void (*hook)(struct vnode *vp, struct stat *sbp)) 2421 { 2422 struct nameidata nd; 2423 struct stat sb; 2424 int error, vfslocked; 2425 2426 if (flag & ~AT_SYMLINK_NOFOLLOW) 2427 return (EINVAL); 2428 2429 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2430 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg, 2431 path, fd, CAP_FSTAT, td); 2432 2433 if ((error = namei(&nd)) != 0) 2434 return (error); 2435 vfslocked = NDHASGIANT(&nd); 2436 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2437 if (!error) { 2438 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2439 if (S_ISREG(sb.st_mode)) 2440 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2441 if (__predict_false(hook != NULL)) 2442 hook(nd.ni_vp, &sb); 2443 } 2444 NDFREE(&nd, NDF_ONLY_PNBUF); 2445 vput(nd.ni_vp); 2446 VFS_UNLOCK_GIANT(vfslocked); 2447 if (error) 2448 return (error); 2449 *sbp = sb; 2450 #ifdef KTRACE 2451 if (KTRPOINT(td, KTR_STRUCT)) 2452 ktrstat(&sb); 2453 #endif 2454 return (0); 2455 } 2456 2457 /* 2458 * Get file status; this version does not follow links. 2459 */ 2460 #ifndef _SYS_SYSPROTO_H_ 2461 struct lstat_args { 2462 char *path; 2463 struct stat *ub; 2464 }; 2465 #endif 2466 int 2467 sys_lstat(td, uap) 2468 struct thread *td; 2469 register struct lstat_args /* { 2470 char *path; 2471 struct stat *ub; 2472 } */ *uap; 2473 { 2474 struct stat sb; 2475 int error; 2476 2477 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2478 if (error == 0) 2479 error = copyout(&sb, uap->ub, sizeof (sb)); 2480 return (error); 2481 } 2482 2483 int 2484 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2485 { 2486 2487 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2488 sbp)); 2489 } 2490 2491 /* 2492 * Implementation of the NetBSD [l]stat() functions. 2493 */ 2494 void 2495 cvtnstat(sb, nsb) 2496 struct stat *sb; 2497 struct nstat *nsb; 2498 { 2499 bzero(nsb, sizeof *nsb); 2500 nsb->st_dev = sb->st_dev; 2501 nsb->st_ino = sb->st_ino; 2502 nsb->st_mode = sb->st_mode; 2503 nsb->st_nlink = sb->st_nlink; 2504 nsb->st_uid = sb->st_uid; 2505 nsb->st_gid = sb->st_gid; 2506 nsb->st_rdev = sb->st_rdev; 2507 nsb->st_atim = sb->st_atim; 2508 nsb->st_mtim = sb->st_mtim; 2509 nsb->st_ctim = sb->st_ctim; 2510 nsb->st_size = sb->st_size; 2511 nsb->st_blocks = sb->st_blocks; 2512 nsb->st_blksize = sb->st_blksize; 2513 nsb->st_flags = sb->st_flags; 2514 nsb->st_gen = sb->st_gen; 2515 nsb->st_birthtim = sb->st_birthtim; 2516 } 2517 2518 #ifndef _SYS_SYSPROTO_H_ 2519 struct nstat_args { 2520 char *path; 2521 struct nstat *ub; 2522 }; 2523 #endif 2524 int 2525 sys_nstat(td, uap) 2526 struct thread *td; 2527 register struct nstat_args /* { 2528 char *path; 2529 struct nstat *ub; 2530 } */ *uap; 2531 { 2532 struct stat sb; 2533 struct nstat nsb; 2534 int error; 2535 2536 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2537 if (error) 2538 return (error); 2539 cvtnstat(&sb, &nsb); 2540 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2541 return (error); 2542 } 2543 2544 /* 2545 * NetBSD lstat. Get file status; this version does not follow links. 2546 */ 2547 #ifndef _SYS_SYSPROTO_H_ 2548 struct lstat_args { 2549 char *path; 2550 struct stat *ub; 2551 }; 2552 #endif 2553 int 2554 sys_nlstat(td, uap) 2555 struct thread *td; 2556 register struct nlstat_args /* { 2557 char *path; 2558 struct nstat *ub; 2559 } */ *uap; 2560 { 2561 struct stat sb; 2562 struct nstat nsb; 2563 int error; 2564 2565 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2566 if (error) 2567 return (error); 2568 cvtnstat(&sb, &nsb); 2569 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2570 return (error); 2571 } 2572 2573 /* 2574 * Get configurable pathname variables. 2575 */ 2576 #ifndef _SYS_SYSPROTO_H_ 2577 struct pathconf_args { 2578 char *path; 2579 int name; 2580 }; 2581 #endif 2582 int 2583 sys_pathconf(td, uap) 2584 struct thread *td; 2585 register struct pathconf_args /* { 2586 char *path; 2587 int name; 2588 } */ *uap; 2589 { 2590 2591 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2592 } 2593 2594 #ifndef _SYS_SYSPROTO_H_ 2595 struct lpathconf_args { 2596 char *path; 2597 int name; 2598 }; 2599 #endif 2600 int 2601 sys_lpathconf(td, uap) 2602 struct thread *td; 2603 register struct lpathconf_args /* { 2604 char *path; 2605 int name; 2606 } */ *uap; 2607 { 2608 2609 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW)); 2610 } 2611 2612 int 2613 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2614 u_long flags) 2615 { 2616 struct nameidata nd; 2617 int error, vfslocked; 2618 2619 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1 | 2620 flags, pathseg, path, td); 2621 if ((error = namei(&nd)) != 0) 2622 return (error); 2623 vfslocked = NDHASGIANT(&nd); 2624 NDFREE(&nd, NDF_ONLY_PNBUF); 2625 2626 /* If asynchronous I/O is available, it works for all files. */ 2627 if (name == _PC_ASYNC_IO) 2628 td->td_retval[0] = async_io_version; 2629 else 2630 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2631 vput(nd.ni_vp); 2632 VFS_UNLOCK_GIANT(vfslocked); 2633 return (error); 2634 } 2635 2636 /* 2637 * Return target name of a symbolic link. 2638 */ 2639 #ifndef _SYS_SYSPROTO_H_ 2640 struct readlink_args { 2641 char *path; 2642 char *buf; 2643 size_t count; 2644 }; 2645 #endif 2646 int 2647 sys_readlink(td, uap) 2648 struct thread *td; 2649 register struct readlink_args /* { 2650 char *path; 2651 char *buf; 2652 size_t count; 2653 } */ *uap; 2654 { 2655 2656 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2657 UIO_USERSPACE, uap->count)); 2658 } 2659 #ifndef _SYS_SYSPROTO_H_ 2660 struct readlinkat_args { 2661 int fd; 2662 char *path; 2663 char *buf; 2664 size_t bufsize; 2665 }; 2666 #endif 2667 int 2668 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2669 { 2670 2671 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2672 uap->buf, UIO_USERSPACE, uap->bufsize)); 2673 } 2674 2675 int 2676 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2677 enum uio_seg bufseg, size_t count) 2678 { 2679 2680 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2681 count)); 2682 } 2683 2684 int 2685 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2686 char *buf, enum uio_seg bufseg, size_t count) 2687 { 2688 struct vnode *vp; 2689 struct iovec aiov; 2690 struct uio auio; 2691 int error; 2692 struct nameidata nd; 2693 int vfslocked; 2694 2695 if (count > IOSIZE_MAX) 2696 return (EINVAL); 2697 2698 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | 2699 AUDITVNODE1, pathseg, path, fd, td); 2700 2701 if ((error = namei(&nd)) != 0) 2702 return (error); 2703 NDFREE(&nd, NDF_ONLY_PNBUF); 2704 vfslocked = NDHASGIANT(&nd); 2705 vp = nd.ni_vp; 2706 #ifdef MAC 2707 error = mac_vnode_check_readlink(td->td_ucred, vp); 2708 if (error) { 2709 vput(vp); 2710 VFS_UNLOCK_GIANT(vfslocked); 2711 return (error); 2712 } 2713 #endif 2714 if (vp->v_type != VLNK) 2715 error = EINVAL; 2716 else { 2717 aiov.iov_base = buf; 2718 aiov.iov_len = count; 2719 auio.uio_iov = &aiov; 2720 auio.uio_iovcnt = 1; 2721 auio.uio_offset = 0; 2722 auio.uio_rw = UIO_READ; 2723 auio.uio_segflg = bufseg; 2724 auio.uio_td = td; 2725 auio.uio_resid = count; 2726 error = VOP_READLINK(vp, &auio, td->td_ucred); 2727 } 2728 vput(vp); 2729 VFS_UNLOCK_GIANT(vfslocked); 2730 td->td_retval[0] = count - auio.uio_resid; 2731 return (error); 2732 } 2733 2734 /* 2735 * Common implementation code for chflags() and fchflags(). 2736 */ 2737 static int 2738 setfflags(td, vp, flags) 2739 struct thread *td; 2740 struct vnode *vp; 2741 int flags; 2742 { 2743 int error; 2744 struct mount *mp; 2745 struct vattr vattr; 2746 2747 /* We can't support the value matching VNOVAL. */ 2748 if (flags == VNOVAL) 2749 return (EOPNOTSUPP); 2750 2751 /* 2752 * Prevent non-root users from setting flags on devices. When 2753 * a device is reused, users can retain ownership of the device 2754 * if they are allowed to set flags and programs assume that 2755 * chown can't fail when done as root. 2756 */ 2757 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2758 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2759 if (error) 2760 return (error); 2761 } 2762 2763 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2764 return (error); 2765 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2766 VATTR_NULL(&vattr); 2767 vattr.va_flags = flags; 2768 #ifdef MAC 2769 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2770 if (error == 0) 2771 #endif 2772 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2773 VOP_UNLOCK(vp, 0); 2774 vn_finished_write(mp); 2775 return (error); 2776 } 2777 2778 /* 2779 * Change flags of a file given a path name. 2780 */ 2781 #ifndef _SYS_SYSPROTO_H_ 2782 struct chflags_args { 2783 char *path; 2784 int flags; 2785 }; 2786 #endif 2787 int 2788 sys_chflags(td, uap) 2789 struct thread *td; 2790 register struct chflags_args /* { 2791 char *path; 2792 int flags; 2793 } */ *uap; 2794 { 2795 int error; 2796 struct nameidata nd; 2797 int vfslocked; 2798 2799 AUDIT_ARG_FFLAGS(uap->flags); 2800 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE, 2801 uap->path, td); 2802 if ((error = namei(&nd)) != 0) 2803 return (error); 2804 NDFREE(&nd, NDF_ONLY_PNBUF); 2805 vfslocked = NDHASGIANT(&nd); 2806 error = setfflags(td, nd.ni_vp, uap->flags); 2807 vrele(nd.ni_vp); 2808 VFS_UNLOCK_GIANT(vfslocked); 2809 return (error); 2810 } 2811 2812 /* 2813 * Same as chflags() but doesn't follow symlinks. 2814 */ 2815 int 2816 sys_lchflags(td, uap) 2817 struct thread *td; 2818 register struct lchflags_args /* { 2819 char *path; 2820 int flags; 2821 } */ *uap; 2822 { 2823 int error; 2824 struct nameidata nd; 2825 int vfslocked; 2826 2827 AUDIT_ARG_FFLAGS(uap->flags); 2828 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE, 2829 uap->path, td); 2830 if ((error = namei(&nd)) != 0) 2831 return (error); 2832 vfslocked = NDHASGIANT(&nd); 2833 NDFREE(&nd, NDF_ONLY_PNBUF); 2834 error = setfflags(td, nd.ni_vp, uap->flags); 2835 vrele(nd.ni_vp); 2836 VFS_UNLOCK_GIANT(vfslocked); 2837 return (error); 2838 } 2839 2840 /* 2841 * Change flags of a file given a file descriptor. 2842 */ 2843 #ifndef _SYS_SYSPROTO_H_ 2844 struct fchflags_args { 2845 int fd; 2846 int flags; 2847 }; 2848 #endif 2849 int 2850 sys_fchflags(td, uap) 2851 struct thread *td; 2852 register struct fchflags_args /* { 2853 int fd; 2854 int flags; 2855 } */ *uap; 2856 { 2857 struct file *fp; 2858 int vfslocked; 2859 int error; 2860 2861 AUDIT_ARG_FD(uap->fd); 2862 AUDIT_ARG_FFLAGS(uap->flags); 2863 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS, 2864 &fp)) != 0) 2865 return (error); 2866 vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); 2867 #ifdef AUDIT 2868 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2869 AUDIT_ARG_VNODE1(fp->f_vnode); 2870 VOP_UNLOCK(fp->f_vnode, 0); 2871 #endif 2872 error = setfflags(td, fp->f_vnode, uap->flags); 2873 VFS_UNLOCK_GIANT(vfslocked); 2874 fdrop(fp, td); 2875 return (error); 2876 } 2877 2878 /* 2879 * Common implementation code for chmod(), lchmod() and fchmod(). 2880 */ 2881 int 2882 setfmode(td, cred, vp, mode) 2883 struct thread *td; 2884 struct ucred *cred; 2885 struct vnode *vp; 2886 int mode; 2887 { 2888 int error; 2889 struct mount *mp; 2890 struct vattr vattr; 2891 2892 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2893 return (error); 2894 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2895 VATTR_NULL(&vattr); 2896 vattr.va_mode = mode & ALLPERMS; 2897 #ifdef MAC 2898 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2899 if (error == 0) 2900 #endif 2901 error = VOP_SETATTR(vp, &vattr, cred); 2902 VOP_UNLOCK(vp, 0); 2903 vn_finished_write(mp); 2904 return (error); 2905 } 2906 2907 /* 2908 * Change mode of a file given path name. 2909 */ 2910 #ifndef _SYS_SYSPROTO_H_ 2911 struct chmod_args { 2912 char *path; 2913 int mode; 2914 }; 2915 #endif 2916 int 2917 sys_chmod(td, uap) 2918 struct thread *td; 2919 register struct chmod_args /* { 2920 char *path; 2921 int mode; 2922 } */ *uap; 2923 { 2924 2925 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2926 } 2927 2928 #ifndef _SYS_SYSPROTO_H_ 2929 struct fchmodat_args { 2930 int dirfd; 2931 char *path; 2932 mode_t mode; 2933 int flag; 2934 } 2935 #endif 2936 int 2937 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2938 { 2939 int flag = uap->flag; 2940 int fd = uap->fd; 2941 char *path = uap->path; 2942 mode_t mode = uap->mode; 2943 2944 if (flag & ~AT_SYMLINK_NOFOLLOW) 2945 return (EINVAL); 2946 2947 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2948 } 2949 2950 int 2951 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2952 { 2953 2954 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2955 } 2956 2957 /* 2958 * Change mode of a file given path name (don't follow links.) 2959 */ 2960 #ifndef _SYS_SYSPROTO_H_ 2961 struct lchmod_args { 2962 char *path; 2963 int mode; 2964 }; 2965 #endif 2966 int 2967 sys_lchmod(td, uap) 2968 struct thread *td; 2969 register struct lchmod_args /* { 2970 char *path; 2971 int mode; 2972 } */ *uap; 2973 { 2974 2975 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2976 uap->mode, AT_SYMLINK_NOFOLLOW)); 2977 } 2978 2979 2980 int 2981 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2982 mode_t mode, int flag) 2983 { 2984 int error; 2985 struct nameidata nd; 2986 int vfslocked; 2987 int follow; 2988 2989 AUDIT_ARG_MODE(mode); 2990 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2991 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, 2992 path, fd, CAP_FCHMOD, td); 2993 if ((error = namei(&nd)) != 0) 2994 return (error); 2995 vfslocked = NDHASGIANT(&nd); 2996 NDFREE(&nd, NDF_ONLY_PNBUF); 2997 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2998 vrele(nd.ni_vp); 2999 VFS_UNLOCK_GIANT(vfslocked); 3000 return (error); 3001 } 3002 3003 /* 3004 * Change mode of a file given a file descriptor. 3005 */ 3006 #ifndef _SYS_SYSPROTO_H_ 3007 struct fchmod_args { 3008 int fd; 3009 int mode; 3010 }; 3011 #endif 3012 int 3013 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3014 { 3015 struct file *fp; 3016 int error; 3017 3018 AUDIT_ARG_FD(uap->fd); 3019 AUDIT_ARG_MODE(uap->mode); 3020 3021 error = fget(td, uap->fd, CAP_FCHMOD, &fp); 3022 if (error != 0) 3023 return (error); 3024 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3025 fdrop(fp, td); 3026 return (error); 3027 } 3028 3029 /* 3030 * Common implementation for chown(), lchown(), and fchown() 3031 */ 3032 int 3033 setfown(td, cred, vp, uid, gid) 3034 struct thread *td; 3035 struct ucred *cred; 3036 struct vnode *vp; 3037 uid_t uid; 3038 gid_t gid; 3039 { 3040 int error; 3041 struct mount *mp; 3042 struct vattr vattr; 3043 3044 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3045 return (error); 3046 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3047 VATTR_NULL(&vattr); 3048 vattr.va_uid = uid; 3049 vattr.va_gid = gid; 3050 #ifdef MAC 3051 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3052 vattr.va_gid); 3053 if (error == 0) 3054 #endif 3055 error = VOP_SETATTR(vp, &vattr, cred); 3056 VOP_UNLOCK(vp, 0); 3057 vn_finished_write(mp); 3058 return (error); 3059 } 3060 3061 /* 3062 * Set ownership given a path name. 3063 */ 3064 #ifndef _SYS_SYSPROTO_H_ 3065 struct chown_args { 3066 char *path; 3067 int uid; 3068 int gid; 3069 }; 3070 #endif 3071 int 3072 sys_chown(td, uap) 3073 struct thread *td; 3074 register struct chown_args /* { 3075 char *path; 3076 int uid; 3077 int gid; 3078 } */ *uap; 3079 { 3080 3081 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3082 } 3083 3084 #ifndef _SYS_SYSPROTO_H_ 3085 struct fchownat_args { 3086 int fd; 3087 const char * path; 3088 uid_t uid; 3089 gid_t gid; 3090 int flag; 3091 }; 3092 #endif 3093 int 3094 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3095 { 3096 int flag; 3097 3098 flag = uap->flag; 3099 if (flag & ~AT_SYMLINK_NOFOLLOW) 3100 return (EINVAL); 3101 3102 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3103 uap->gid, uap->flag)); 3104 } 3105 3106 int 3107 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3108 int gid) 3109 { 3110 3111 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 3112 } 3113 3114 int 3115 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3116 int uid, int gid, int flag) 3117 { 3118 struct nameidata nd; 3119 int error, vfslocked, follow; 3120 3121 AUDIT_ARG_OWNER(uid, gid); 3122 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3123 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, 3124 path, fd, CAP_FCHOWN, td); 3125 3126 if ((error = namei(&nd)) != 0) 3127 return (error); 3128 vfslocked = NDHASGIANT(&nd); 3129 NDFREE(&nd, NDF_ONLY_PNBUF); 3130 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3131 vrele(nd.ni_vp); 3132 VFS_UNLOCK_GIANT(vfslocked); 3133 return (error); 3134 } 3135 3136 /* 3137 * Set ownership given a path name, do not cross symlinks. 3138 */ 3139 #ifndef _SYS_SYSPROTO_H_ 3140 struct lchown_args { 3141 char *path; 3142 int uid; 3143 int gid; 3144 }; 3145 #endif 3146 int 3147 sys_lchown(td, uap) 3148 struct thread *td; 3149 register struct lchown_args /* { 3150 char *path; 3151 int uid; 3152 int gid; 3153 } */ *uap; 3154 { 3155 3156 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3157 } 3158 3159 int 3160 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3161 int gid) 3162 { 3163 3164 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3165 AT_SYMLINK_NOFOLLOW)); 3166 } 3167 3168 /* 3169 * Set ownership given a file descriptor. 3170 */ 3171 #ifndef _SYS_SYSPROTO_H_ 3172 struct fchown_args { 3173 int fd; 3174 int uid; 3175 int gid; 3176 }; 3177 #endif 3178 int 3179 sys_fchown(td, uap) 3180 struct thread *td; 3181 register struct fchown_args /* { 3182 int fd; 3183 int uid; 3184 int gid; 3185 } */ *uap; 3186 { 3187 struct file *fp; 3188 int error; 3189 3190 AUDIT_ARG_FD(uap->fd); 3191 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3192 error = fget(td, uap->fd, CAP_FCHOWN, &fp); 3193 if (error != 0) 3194 return (error); 3195 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3196 fdrop(fp, td); 3197 return (error); 3198 } 3199 3200 /* 3201 * Common implementation code for utimes(), lutimes(), and futimes(). 3202 */ 3203 static int 3204 getutimes(usrtvp, tvpseg, tsp) 3205 const struct timeval *usrtvp; 3206 enum uio_seg tvpseg; 3207 struct timespec *tsp; 3208 { 3209 struct timeval tv[2]; 3210 const struct timeval *tvp; 3211 int error; 3212 3213 if (usrtvp == NULL) { 3214 vfs_timestamp(&tsp[0]); 3215 tsp[1] = tsp[0]; 3216 } else { 3217 if (tvpseg == UIO_SYSSPACE) { 3218 tvp = usrtvp; 3219 } else { 3220 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3221 return (error); 3222 tvp = tv; 3223 } 3224 3225 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3226 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3227 return (EINVAL); 3228 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3229 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3230 } 3231 return (0); 3232 } 3233 3234 /* 3235 * Common implementation code for utimes(), lutimes(), and futimes(). 3236 */ 3237 static int 3238 setutimes(td, vp, ts, numtimes, nullflag) 3239 struct thread *td; 3240 struct vnode *vp; 3241 const struct timespec *ts; 3242 int numtimes; 3243 int nullflag; 3244 { 3245 int error, setbirthtime; 3246 struct mount *mp; 3247 struct vattr vattr; 3248 3249 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3250 return (error); 3251 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3252 setbirthtime = 0; 3253 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3254 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3255 setbirthtime = 1; 3256 VATTR_NULL(&vattr); 3257 vattr.va_atime = ts[0]; 3258 vattr.va_mtime = ts[1]; 3259 if (setbirthtime) 3260 vattr.va_birthtime = ts[1]; 3261 if (numtimes > 2) 3262 vattr.va_birthtime = ts[2]; 3263 if (nullflag) 3264 vattr.va_vaflags |= VA_UTIMES_NULL; 3265 #ifdef MAC 3266 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3267 vattr.va_mtime); 3268 #endif 3269 if (error == 0) 3270 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3271 VOP_UNLOCK(vp, 0); 3272 vn_finished_write(mp); 3273 return (error); 3274 } 3275 3276 /* 3277 * Set the access and modification times of a file. 3278 */ 3279 #ifndef _SYS_SYSPROTO_H_ 3280 struct utimes_args { 3281 char *path; 3282 struct timeval *tptr; 3283 }; 3284 #endif 3285 int 3286 sys_utimes(td, uap) 3287 struct thread *td; 3288 register struct utimes_args /* { 3289 char *path; 3290 struct timeval *tptr; 3291 } */ *uap; 3292 { 3293 3294 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3295 UIO_USERSPACE)); 3296 } 3297 3298 #ifndef _SYS_SYSPROTO_H_ 3299 struct futimesat_args { 3300 int fd; 3301 const char * path; 3302 const struct timeval * times; 3303 }; 3304 #endif 3305 int 3306 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3307 { 3308 3309 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3310 uap->times, UIO_USERSPACE)); 3311 } 3312 3313 int 3314 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3315 struct timeval *tptr, enum uio_seg tptrseg) 3316 { 3317 3318 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3319 } 3320 3321 int 3322 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3323 struct timeval *tptr, enum uio_seg tptrseg) 3324 { 3325 struct nameidata nd; 3326 struct timespec ts[2]; 3327 int error, vfslocked; 3328 3329 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3330 return (error); 3331 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, 3332 path, fd, CAP_FUTIMES, td); 3333 3334 if ((error = namei(&nd)) != 0) 3335 return (error); 3336 vfslocked = NDHASGIANT(&nd); 3337 NDFREE(&nd, NDF_ONLY_PNBUF); 3338 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3339 vrele(nd.ni_vp); 3340 VFS_UNLOCK_GIANT(vfslocked); 3341 return (error); 3342 } 3343 3344 /* 3345 * Set the access and modification times of a file. 3346 */ 3347 #ifndef _SYS_SYSPROTO_H_ 3348 struct lutimes_args { 3349 char *path; 3350 struct timeval *tptr; 3351 }; 3352 #endif 3353 int 3354 sys_lutimes(td, uap) 3355 struct thread *td; 3356 register struct lutimes_args /* { 3357 char *path; 3358 struct timeval *tptr; 3359 } */ *uap; 3360 { 3361 3362 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3363 UIO_USERSPACE)); 3364 } 3365 3366 int 3367 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3368 struct timeval *tptr, enum uio_seg tptrseg) 3369 { 3370 struct timespec ts[2]; 3371 int error; 3372 struct nameidata nd; 3373 int vfslocked; 3374 3375 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3376 return (error); 3377 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td); 3378 if ((error = namei(&nd)) != 0) 3379 return (error); 3380 vfslocked = NDHASGIANT(&nd); 3381 NDFREE(&nd, NDF_ONLY_PNBUF); 3382 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3383 vrele(nd.ni_vp); 3384 VFS_UNLOCK_GIANT(vfslocked); 3385 return (error); 3386 } 3387 3388 /* 3389 * Set the access and modification times of a file. 3390 */ 3391 #ifndef _SYS_SYSPROTO_H_ 3392 struct futimes_args { 3393 int fd; 3394 struct timeval *tptr; 3395 }; 3396 #endif 3397 int 3398 sys_futimes(td, uap) 3399 struct thread *td; 3400 register struct futimes_args /* { 3401 int fd; 3402 struct timeval *tptr; 3403 } */ *uap; 3404 { 3405 3406 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3407 } 3408 3409 int 3410 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3411 enum uio_seg tptrseg) 3412 { 3413 struct timespec ts[2]; 3414 struct file *fp; 3415 int vfslocked; 3416 int error; 3417 3418 AUDIT_ARG_FD(fd); 3419 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3420 return (error); 3421 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp)) 3422 != 0) 3423 return (error); 3424 vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); 3425 #ifdef AUDIT 3426 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3427 AUDIT_ARG_VNODE1(fp->f_vnode); 3428 VOP_UNLOCK(fp->f_vnode, 0); 3429 #endif 3430 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3431 VFS_UNLOCK_GIANT(vfslocked); 3432 fdrop(fp, td); 3433 return (error); 3434 } 3435 3436 /* 3437 * Truncate a file given its path name. 3438 */ 3439 #ifndef _SYS_SYSPROTO_H_ 3440 struct truncate_args { 3441 char *path; 3442 int pad; 3443 off_t length; 3444 }; 3445 #endif 3446 int 3447 sys_truncate(td, uap) 3448 struct thread *td; 3449 register struct truncate_args /* { 3450 char *path; 3451 int pad; 3452 off_t length; 3453 } */ *uap; 3454 { 3455 3456 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3457 } 3458 3459 int 3460 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3461 { 3462 struct mount *mp; 3463 struct vnode *vp; 3464 struct vattr vattr; 3465 int error; 3466 struct nameidata nd; 3467 int vfslocked; 3468 3469 if (length < 0) 3470 return(EINVAL); 3471 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td); 3472 if ((error = namei(&nd)) != 0) 3473 return (error); 3474 vfslocked = NDHASGIANT(&nd); 3475 vp = nd.ni_vp; 3476 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3477 vrele(vp); 3478 VFS_UNLOCK_GIANT(vfslocked); 3479 return (error); 3480 } 3481 NDFREE(&nd, NDF_ONLY_PNBUF); 3482 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3483 if (vp->v_type == VDIR) 3484 error = EISDIR; 3485 #ifdef MAC 3486 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3487 } 3488 #endif 3489 else if ((error = vn_writechk(vp)) == 0 && 3490 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3491 VATTR_NULL(&vattr); 3492 vattr.va_size = length; 3493 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3494 } 3495 vput(vp); 3496 vn_finished_write(mp); 3497 VFS_UNLOCK_GIANT(vfslocked); 3498 return (error); 3499 } 3500 3501 #if defined(COMPAT_43) 3502 /* 3503 * Truncate a file given its path name. 3504 */ 3505 #ifndef _SYS_SYSPROTO_H_ 3506 struct otruncate_args { 3507 char *path; 3508 long length; 3509 }; 3510 #endif 3511 int 3512 otruncate(td, uap) 3513 struct thread *td; 3514 register struct otruncate_args /* { 3515 char *path; 3516 long length; 3517 } */ *uap; 3518 { 3519 struct truncate_args /* { 3520 char *path; 3521 int pad; 3522 off_t length; 3523 } */ nuap; 3524 3525 nuap.path = uap->path; 3526 nuap.length = uap->length; 3527 return (sys_truncate(td, &nuap)); 3528 } 3529 #endif /* COMPAT_43 */ 3530 3531 /* Versions with the pad argument */ 3532 int 3533 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3534 { 3535 struct truncate_args ouap; 3536 3537 ouap.path = uap->path; 3538 ouap.length = uap->length; 3539 return (sys_truncate(td, &ouap)); 3540 } 3541 3542 int 3543 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3544 { 3545 struct ftruncate_args ouap; 3546 3547 ouap.fd = uap->fd; 3548 ouap.length = uap->length; 3549 return (sys_ftruncate(td, &ouap)); 3550 } 3551 3552 /* 3553 * Sync an open file. 3554 */ 3555 #ifndef _SYS_SYSPROTO_H_ 3556 struct fsync_args { 3557 int fd; 3558 }; 3559 #endif 3560 int 3561 sys_fsync(td, uap) 3562 struct thread *td; 3563 struct fsync_args /* { 3564 int fd; 3565 } */ *uap; 3566 { 3567 struct vnode *vp; 3568 struct mount *mp; 3569 struct file *fp; 3570 int vfslocked; 3571 int error, lock_flags; 3572 3573 AUDIT_ARG_FD(uap->fd); 3574 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC, 3575 &fp)) != 0) 3576 return (error); 3577 vp = fp->f_vnode; 3578 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 3579 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3580 goto drop; 3581 if (MNT_SHARED_WRITES(mp) || 3582 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3583 lock_flags = LK_SHARED; 3584 } else { 3585 lock_flags = LK_EXCLUSIVE; 3586 } 3587 vn_lock(vp, lock_flags | LK_RETRY); 3588 AUDIT_ARG_VNODE1(vp); 3589 if (vp->v_object != NULL) { 3590 VM_OBJECT_LOCK(vp->v_object); 3591 vm_object_page_clean(vp->v_object, 0, 0, 0); 3592 VM_OBJECT_UNLOCK(vp->v_object); 3593 } 3594 error = VOP_FSYNC(vp, MNT_WAIT, td); 3595 3596 VOP_UNLOCK(vp, 0); 3597 vn_finished_write(mp); 3598 drop: 3599 VFS_UNLOCK_GIANT(vfslocked); 3600 fdrop(fp, td); 3601 return (error); 3602 } 3603 3604 /* 3605 * Rename files. Source and destination must either both be directories, or 3606 * both not be directories. If target is a directory, it must be empty. 3607 */ 3608 #ifndef _SYS_SYSPROTO_H_ 3609 struct rename_args { 3610 char *from; 3611 char *to; 3612 }; 3613 #endif 3614 int 3615 sys_rename(td, uap) 3616 struct thread *td; 3617 register struct rename_args /* { 3618 char *from; 3619 char *to; 3620 } */ *uap; 3621 { 3622 3623 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3624 } 3625 3626 #ifndef _SYS_SYSPROTO_H_ 3627 struct renameat_args { 3628 int oldfd; 3629 char *old; 3630 int newfd; 3631 char *new; 3632 }; 3633 #endif 3634 int 3635 sys_renameat(struct thread *td, struct renameat_args *uap) 3636 { 3637 3638 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3639 UIO_USERSPACE)); 3640 } 3641 3642 int 3643 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3644 { 3645 3646 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3647 } 3648 3649 int 3650 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3651 enum uio_seg pathseg) 3652 { 3653 struct mount *mp = NULL; 3654 struct vnode *tvp, *fvp, *tdvp; 3655 struct nameidata fromnd, tond; 3656 int tvfslocked; 3657 int fvfslocked; 3658 int error; 3659 3660 bwillwrite(); 3661 #ifdef MAC 3662 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3663 MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); 3664 #else 3665 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE | 3666 AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); 3667 #endif 3668 3669 if ((error = namei(&fromnd)) != 0) 3670 return (error); 3671 fvfslocked = NDHASGIANT(&fromnd); 3672 tvfslocked = 0; 3673 #ifdef MAC 3674 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3675 fromnd.ni_vp, &fromnd.ni_cnd); 3676 VOP_UNLOCK(fromnd.ni_dvp, 0); 3677 if (fromnd.ni_dvp != fromnd.ni_vp) 3678 VOP_UNLOCK(fromnd.ni_vp, 0); 3679 #endif 3680 fvp = fromnd.ni_vp; 3681 if (error == 0) 3682 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); 3683 if (error != 0) { 3684 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3685 vrele(fromnd.ni_dvp); 3686 vrele(fvp); 3687 goto out1; 3688 } 3689 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3690 SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE, 3691 td); 3692 if (fromnd.ni_vp->v_type == VDIR) 3693 tond.ni_cnd.cn_flags |= WILLBEDIR; 3694 if ((error = namei(&tond)) != 0) { 3695 /* Translate error code for rename("dir1", "dir2/."). */ 3696 if (error == EISDIR && fvp->v_type == VDIR) 3697 error = EINVAL; 3698 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3699 vrele(fromnd.ni_dvp); 3700 vrele(fvp); 3701 vn_finished_write(mp); 3702 goto out1; 3703 } 3704 tvfslocked = NDHASGIANT(&tond); 3705 tdvp = tond.ni_dvp; 3706 tvp = tond.ni_vp; 3707 if (tvp != NULL) { 3708 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3709 error = ENOTDIR; 3710 goto out; 3711 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3712 error = EISDIR; 3713 goto out; 3714 } 3715 } 3716 if (fvp == tdvp) { 3717 error = EINVAL; 3718 goto out; 3719 } 3720 /* 3721 * If the source is the same as the destination (that is, if they 3722 * are links to the same vnode), then there is nothing to do. 3723 */ 3724 if (fvp == tvp) 3725 error = -1; 3726 #ifdef MAC 3727 else 3728 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3729 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3730 #endif 3731 out: 3732 if (!error) { 3733 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3734 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3735 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3736 NDFREE(&tond, NDF_ONLY_PNBUF); 3737 } else { 3738 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3739 NDFREE(&tond, NDF_ONLY_PNBUF); 3740 if (tvp) 3741 vput(tvp); 3742 if (tdvp == tvp) 3743 vrele(tdvp); 3744 else 3745 vput(tdvp); 3746 vrele(fromnd.ni_dvp); 3747 vrele(fvp); 3748 } 3749 vrele(tond.ni_startdir); 3750 vn_finished_write(mp); 3751 out1: 3752 if (fromnd.ni_startdir) 3753 vrele(fromnd.ni_startdir); 3754 VFS_UNLOCK_GIANT(fvfslocked); 3755 VFS_UNLOCK_GIANT(tvfslocked); 3756 if (error == -1) 3757 return (0); 3758 return (error); 3759 } 3760 3761 /* 3762 * Make a directory file. 3763 */ 3764 #ifndef _SYS_SYSPROTO_H_ 3765 struct mkdir_args { 3766 char *path; 3767 int mode; 3768 }; 3769 #endif 3770 int 3771 sys_mkdir(td, uap) 3772 struct thread *td; 3773 register struct mkdir_args /* { 3774 char *path; 3775 int mode; 3776 } */ *uap; 3777 { 3778 3779 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3780 } 3781 3782 #ifndef _SYS_SYSPROTO_H_ 3783 struct mkdirat_args { 3784 int fd; 3785 char *path; 3786 mode_t mode; 3787 }; 3788 #endif 3789 int 3790 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3791 { 3792 3793 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3794 } 3795 3796 int 3797 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3798 { 3799 3800 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3801 } 3802 3803 int 3804 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3805 int mode) 3806 { 3807 struct mount *mp; 3808 struct vnode *vp; 3809 struct vattr vattr; 3810 int error; 3811 struct nameidata nd; 3812 int vfslocked; 3813 3814 AUDIT_ARG_MODE(mode); 3815 restart: 3816 bwillwrite(); 3817 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | 3818 AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td); 3819 nd.ni_cnd.cn_flags |= WILLBEDIR; 3820 if ((error = namei(&nd)) != 0) 3821 return (error); 3822 vfslocked = NDHASGIANT(&nd); 3823 vp = nd.ni_vp; 3824 if (vp != NULL) { 3825 NDFREE(&nd, NDF_ONLY_PNBUF); 3826 /* 3827 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3828 * the strange behaviour of leaving the vnode unlocked 3829 * if the target is the same vnode as the parent. 3830 */ 3831 if (vp == nd.ni_dvp) 3832 vrele(nd.ni_dvp); 3833 else 3834 vput(nd.ni_dvp); 3835 vrele(vp); 3836 VFS_UNLOCK_GIANT(vfslocked); 3837 return (EEXIST); 3838 } 3839 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3840 NDFREE(&nd, NDF_ONLY_PNBUF); 3841 vput(nd.ni_dvp); 3842 VFS_UNLOCK_GIANT(vfslocked); 3843 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3844 return (error); 3845 goto restart; 3846 } 3847 VATTR_NULL(&vattr); 3848 vattr.va_type = VDIR; 3849 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3850 #ifdef MAC 3851 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3852 &vattr); 3853 if (error) 3854 goto out; 3855 #endif 3856 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3857 #ifdef MAC 3858 out: 3859 #endif 3860 NDFREE(&nd, NDF_ONLY_PNBUF); 3861 vput(nd.ni_dvp); 3862 if (!error) 3863 vput(nd.ni_vp); 3864 vn_finished_write(mp); 3865 VFS_UNLOCK_GIANT(vfslocked); 3866 return (error); 3867 } 3868 3869 /* 3870 * Remove a directory file. 3871 */ 3872 #ifndef _SYS_SYSPROTO_H_ 3873 struct rmdir_args { 3874 char *path; 3875 }; 3876 #endif 3877 int 3878 sys_rmdir(td, uap) 3879 struct thread *td; 3880 struct rmdir_args /* { 3881 char *path; 3882 } */ *uap; 3883 { 3884 3885 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3886 } 3887 3888 int 3889 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3890 { 3891 3892 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3893 } 3894 3895 int 3896 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3897 { 3898 struct mount *mp; 3899 struct vnode *vp; 3900 int error; 3901 struct nameidata nd; 3902 int vfslocked; 3903 3904 restart: 3905 bwillwrite(); 3906 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | 3907 AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td); 3908 if ((error = namei(&nd)) != 0) 3909 return (error); 3910 vfslocked = NDHASGIANT(&nd); 3911 vp = nd.ni_vp; 3912 if (vp->v_type != VDIR) { 3913 error = ENOTDIR; 3914 goto out; 3915 } 3916 /* 3917 * No rmdir "." please. 3918 */ 3919 if (nd.ni_dvp == vp) { 3920 error = EINVAL; 3921 goto out; 3922 } 3923 /* 3924 * The root of a mounted filesystem cannot be deleted. 3925 */ 3926 if (vp->v_vflag & VV_ROOT) { 3927 error = EBUSY; 3928 goto out; 3929 } 3930 #ifdef MAC 3931 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3932 &nd.ni_cnd); 3933 if (error) 3934 goto out; 3935 #endif 3936 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3937 NDFREE(&nd, NDF_ONLY_PNBUF); 3938 vput(vp); 3939 if (nd.ni_dvp == vp) 3940 vrele(nd.ni_dvp); 3941 else 3942 vput(nd.ni_dvp); 3943 VFS_UNLOCK_GIANT(vfslocked); 3944 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3945 return (error); 3946 goto restart; 3947 } 3948 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3949 vn_finished_write(mp); 3950 out: 3951 NDFREE(&nd, NDF_ONLY_PNBUF); 3952 vput(vp); 3953 if (nd.ni_dvp == vp) 3954 vrele(nd.ni_dvp); 3955 else 3956 vput(nd.ni_dvp); 3957 VFS_UNLOCK_GIANT(vfslocked); 3958 return (error); 3959 } 3960 3961 #ifdef COMPAT_43 3962 /* 3963 * Read a block of directory entries in a filesystem independent format. 3964 */ 3965 #ifndef _SYS_SYSPROTO_H_ 3966 struct ogetdirentries_args { 3967 int fd; 3968 char *buf; 3969 u_int count; 3970 long *basep; 3971 }; 3972 #endif 3973 int 3974 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3975 { 3976 long loff; 3977 int error; 3978 3979 error = kern_ogetdirentries(td, uap, &loff); 3980 if (error == 0) 3981 error = copyout(&loff, uap->basep, sizeof(long)); 3982 return (error); 3983 } 3984 3985 int 3986 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3987 long *ploff) 3988 { 3989 struct vnode *vp; 3990 struct file *fp; 3991 struct uio auio, kuio; 3992 struct iovec aiov, kiov; 3993 struct dirent *dp, *edp; 3994 caddr_t dirbuf; 3995 int error, eofflag, readcnt, vfslocked; 3996 long loff; 3997 3998 /* XXX arbitrary sanity limit on `count'. */ 3999 if (uap->count > 64 * 1024) 4000 return (EINVAL); 4001 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, 4002 &fp)) != 0) 4003 return (error); 4004 if ((fp->f_flag & FREAD) == 0) { 4005 fdrop(fp, td); 4006 return (EBADF); 4007 } 4008 vp = fp->f_vnode; 4009 unionread: 4010 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 4011 if (vp->v_type != VDIR) { 4012 VFS_UNLOCK_GIANT(vfslocked); 4013 fdrop(fp, td); 4014 return (EINVAL); 4015 } 4016 aiov.iov_base = uap->buf; 4017 aiov.iov_len = uap->count; 4018 auio.uio_iov = &aiov; 4019 auio.uio_iovcnt = 1; 4020 auio.uio_rw = UIO_READ; 4021 auio.uio_segflg = UIO_USERSPACE; 4022 auio.uio_td = td; 4023 auio.uio_resid = uap->count; 4024 vn_lock(vp, LK_SHARED | LK_RETRY); 4025 loff = auio.uio_offset = fp->f_offset; 4026 #ifdef MAC 4027 error = mac_vnode_check_readdir(td->td_ucred, vp); 4028 if (error) { 4029 VOP_UNLOCK(vp, 0); 4030 VFS_UNLOCK_GIANT(vfslocked); 4031 fdrop(fp, td); 4032 return (error); 4033 } 4034 #endif 4035 # if (BYTE_ORDER != LITTLE_ENDIAN) 4036 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 4037 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 4038 NULL, NULL); 4039 fp->f_offset = auio.uio_offset; 4040 } else 4041 # endif 4042 { 4043 kuio = auio; 4044 kuio.uio_iov = &kiov; 4045 kuio.uio_segflg = UIO_SYSSPACE; 4046 kiov.iov_len = uap->count; 4047 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 4048 kiov.iov_base = dirbuf; 4049 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 4050 NULL, NULL); 4051 fp->f_offset = kuio.uio_offset; 4052 if (error == 0) { 4053 readcnt = uap->count - kuio.uio_resid; 4054 edp = (struct dirent *)&dirbuf[readcnt]; 4055 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 4056 # if (BYTE_ORDER == LITTLE_ENDIAN) 4057 /* 4058 * The expected low byte of 4059 * dp->d_namlen is our dp->d_type. 4060 * The high MBZ byte of dp->d_namlen 4061 * is our dp->d_namlen. 4062 */ 4063 dp->d_type = dp->d_namlen; 4064 dp->d_namlen = 0; 4065 # else 4066 /* 4067 * The dp->d_type is the high byte 4068 * of the expected dp->d_namlen, 4069 * so must be zero'ed. 4070 */ 4071 dp->d_type = 0; 4072 # endif 4073 if (dp->d_reclen > 0) { 4074 dp = (struct dirent *) 4075 ((char *)dp + dp->d_reclen); 4076 } else { 4077 error = EIO; 4078 break; 4079 } 4080 } 4081 if (dp >= edp) 4082 error = uiomove(dirbuf, readcnt, &auio); 4083 } 4084 free(dirbuf, M_TEMP); 4085 } 4086 if (error) { 4087 VOP_UNLOCK(vp, 0); 4088 VFS_UNLOCK_GIANT(vfslocked); 4089 fdrop(fp, td); 4090 return (error); 4091 } 4092 if (uap->count == auio.uio_resid && 4093 (vp->v_vflag & VV_ROOT) && 4094 (vp->v_mount->mnt_flag & MNT_UNION)) { 4095 struct vnode *tvp = vp; 4096 vp = vp->v_mount->mnt_vnodecovered; 4097 VREF(vp); 4098 fp->f_vnode = vp; 4099 fp->f_data = vp; 4100 fp->f_offset = 0; 4101 vput(tvp); 4102 VFS_UNLOCK_GIANT(vfslocked); 4103 goto unionread; 4104 } 4105 VOP_UNLOCK(vp, 0); 4106 VFS_UNLOCK_GIANT(vfslocked); 4107 fdrop(fp, td); 4108 td->td_retval[0] = uap->count - auio.uio_resid; 4109 if (error == 0) 4110 *ploff = loff; 4111 return (error); 4112 } 4113 #endif /* COMPAT_43 */ 4114 4115 /* 4116 * Read a block of directory entries in a filesystem independent format. 4117 */ 4118 #ifndef _SYS_SYSPROTO_H_ 4119 struct getdirentries_args { 4120 int fd; 4121 char *buf; 4122 u_int count; 4123 long *basep; 4124 }; 4125 #endif 4126 int 4127 sys_getdirentries(td, uap) 4128 struct thread *td; 4129 register struct getdirentries_args /* { 4130 int fd; 4131 char *buf; 4132 u_int count; 4133 long *basep; 4134 } */ *uap; 4135 { 4136 long base; 4137 int error; 4138 4139 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4140 NULL, UIO_USERSPACE); 4141 if (error) 4142 return (error); 4143 if (uap->basep != NULL) 4144 error = copyout(&base, uap->basep, sizeof(long)); 4145 return (error); 4146 } 4147 4148 int 4149 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4150 long *basep, ssize_t *residp, enum uio_seg bufseg) 4151 { 4152 struct vnode *vp; 4153 struct file *fp; 4154 struct uio auio; 4155 struct iovec aiov; 4156 int vfslocked; 4157 long loff; 4158 int error, eofflag; 4159 4160 AUDIT_ARG_FD(fd); 4161 if (count > IOSIZE_MAX) 4162 return (EINVAL); 4163 auio.uio_resid = count; 4164 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK, 4165 &fp)) != 0) 4166 return (error); 4167 if ((fp->f_flag & FREAD) == 0) { 4168 fdrop(fp, td); 4169 return (EBADF); 4170 } 4171 vp = fp->f_vnode; 4172 unionread: 4173 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 4174 if (vp->v_type != VDIR) { 4175 VFS_UNLOCK_GIANT(vfslocked); 4176 error = EINVAL; 4177 goto fail; 4178 } 4179 aiov.iov_base = buf; 4180 aiov.iov_len = count; 4181 auio.uio_iov = &aiov; 4182 auio.uio_iovcnt = 1; 4183 auio.uio_rw = UIO_READ; 4184 auio.uio_segflg = bufseg; 4185 auio.uio_td = td; 4186 vn_lock(vp, LK_SHARED | LK_RETRY); 4187 AUDIT_ARG_VNODE1(vp); 4188 loff = auio.uio_offset = fp->f_offset; 4189 #ifdef MAC 4190 error = mac_vnode_check_readdir(td->td_ucred, vp); 4191 if (error == 0) 4192 #endif 4193 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4194 NULL); 4195 fp->f_offset = auio.uio_offset; 4196 if (error) { 4197 VOP_UNLOCK(vp, 0); 4198 VFS_UNLOCK_GIANT(vfslocked); 4199 goto fail; 4200 } 4201 if (count == auio.uio_resid && 4202 (vp->v_vflag & VV_ROOT) && 4203 (vp->v_mount->mnt_flag & MNT_UNION)) { 4204 struct vnode *tvp = vp; 4205 vp = vp->v_mount->mnt_vnodecovered; 4206 VREF(vp); 4207 fp->f_vnode = vp; 4208 fp->f_data = vp; 4209 fp->f_offset = 0; 4210 vput(tvp); 4211 VFS_UNLOCK_GIANT(vfslocked); 4212 goto unionread; 4213 } 4214 VOP_UNLOCK(vp, 0); 4215 VFS_UNLOCK_GIANT(vfslocked); 4216 *basep = loff; 4217 if (residp != NULL) 4218 *residp = auio.uio_resid; 4219 td->td_retval[0] = count - auio.uio_resid; 4220 fail: 4221 fdrop(fp, td); 4222 return (error); 4223 } 4224 4225 #ifndef _SYS_SYSPROTO_H_ 4226 struct getdents_args { 4227 int fd; 4228 char *buf; 4229 size_t count; 4230 }; 4231 #endif 4232 int 4233 sys_getdents(td, uap) 4234 struct thread *td; 4235 register struct getdents_args /* { 4236 int fd; 4237 char *buf; 4238 u_int count; 4239 } */ *uap; 4240 { 4241 struct getdirentries_args ap; 4242 ap.fd = uap->fd; 4243 ap.buf = uap->buf; 4244 ap.count = uap->count; 4245 ap.basep = NULL; 4246 return (sys_getdirentries(td, &ap)); 4247 } 4248 4249 /* 4250 * Set the mode mask for creation of filesystem nodes. 4251 */ 4252 #ifndef _SYS_SYSPROTO_H_ 4253 struct umask_args { 4254 int newmask; 4255 }; 4256 #endif 4257 int 4258 sys_umask(td, uap) 4259 struct thread *td; 4260 struct umask_args /* { 4261 int newmask; 4262 } */ *uap; 4263 { 4264 register struct filedesc *fdp; 4265 4266 FILEDESC_XLOCK(td->td_proc->p_fd); 4267 fdp = td->td_proc->p_fd; 4268 td->td_retval[0] = fdp->fd_cmask; 4269 fdp->fd_cmask = uap->newmask & ALLPERMS; 4270 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4271 return (0); 4272 } 4273 4274 /* 4275 * Void all references to file by ripping underlying filesystem away from 4276 * vnode. 4277 */ 4278 #ifndef _SYS_SYSPROTO_H_ 4279 struct revoke_args { 4280 char *path; 4281 }; 4282 #endif 4283 int 4284 sys_revoke(td, uap) 4285 struct thread *td; 4286 register struct revoke_args /* { 4287 char *path; 4288 } */ *uap; 4289 { 4290 struct vnode *vp; 4291 struct vattr vattr; 4292 int error; 4293 struct nameidata nd; 4294 int vfslocked; 4295 4296 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, 4297 UIO_USERSPACE, uap->path, td); 4298 if ((error = namei(&nd)) != 0) 4299 return (error); 4300 vfslocked = NDHASGIANT(&nd); 4301 vp = nd.ni_vp; 4302 NDFREE(&nd, NDF_ONLY_PNBUF); 4303 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4304 error = EINVAL; 4305 goto out; 4306 } 4307 #ifdef MAC 4308 error = mac_vnode_check_revoke(td->td_ucred, vp); 4309 if (error) 4310 goto out; 4311 #endif 4312 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4313 if (error) 4314 goto out; 4315 if (td->td_ucred->cr_uid != vattr.va_uid) { 4316 error = priv_check(td, PRIV_VFS_ADMIN); 4317 if (error) 4318 goto out; 4319 } 4320 if (vcount(vp) > 1) 4321 VOP_REVOKE(vp, REVOKEALL); 4322 out: 4323 vput(vp); 4324 VFS_UNLOCK_GIANT(vfslocked); 4325 return (error); 4326 } 4327 4328 /* 4329 * Convert a user file descriptor to a kernel file entry and check that, if it 4330 * is a capability, the correct rights are present. A reference on the file 4331 * entry is held upon returning. 4332 */ 4333 int 4334 getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, 4335 struct file **fpp) 4336 { 4337 struct file *fp; 4338 #ifdef CAPABILITIES 4339 struct file *fp_fromcap; 4340 #endif 4341 int error; 4342 4343 error = 0; 4344 fp = NULL; 4345 if ((fdp == NULL) || (fp = fget_unlocked(fdp, fd)) == NULL) 4346 return (EBADF); 4347 #ifdef CAPABILITIES 4348 /* 4349 * If the file descriptor is for a capability, test rights and use the 4350 * file descriptor referenced by the capability. 4351 */ 4352 error = cap_funwrap(fp, rights, &fp_fromcap); 4353 if (error) { 4354 fdrop(fp, curthread); 4355 return (error); 4356 } 4357 if (fp != fp_fromcap) { 4358 fhold(fp_fromcap); 4359 fdrop(fp, curthread); 4360 fp = fp_fromcap; 4361 } 4362 #endif /* CAPABILITIES */ 4363 4364 /* 4365 * The file could be not of the vnode type, or it may be not 4366 * yet fully initialized, in which case the f_vnode pointer 4367 * may be set, but f_ops is still badfileops. E.g., 4368 * devfs_open() transiently create such situation to 4369 * facilitate csw d_fdopen(). 4370 * 4371 * Dupfdopen() handling in kern_openat() installs the 4372 * half-baked file into the process descriptor table, allowing 4373 * other thread to dereference it. Guard against the race by 4374 * checking f_ops. 4375 */ 4376 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4377 fdrop(fp, curthread); 4378 return (EINVAL); 4379 } 4380 *fpp = fp; 4381 return (0); 4382 } 4383 4384 4385 /* 4386 * Get an (NFS) file handle. 4387 */ 4388 #ifndef _SYS_SYSPROTO_H_ 4389 struct lgetfh_args { 4390 char *fname; 4391 fhandle_t *fhp; 4392 }; 4393 #endif 4394 int 4395 sys_lgetfh(td, uap) 4396 struct thread *td; 4397 register struct lgetfh_args *uap; 4398 { 4399 struct nameidata nd; 4400 fhandle_t fh; 4401 register struct vnode *vp; 4402 int vfslocked; 4403 int error; 4404 4405 error = priv_check(td, PRIV_VFS_GETFH); 4406 if (error) 4407 return (error); 4408 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, 4409 UIO_USERSPACE, uap->fname, td); 4410 error = namei(&nd); 4411 if (error) 4412 return (error); 4413 vfslocked = NDHASGIANT(&nd); 4414 NDFREE(&nd, NDF_ONLY_PNBUF); 4415 vp = nd.ni_vp; 4416 bzero(&fh, sizeof(fh)); 4417 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4418 error = VOP_VPTOFH(vp, &fh.fh_fid); 4419 vput(vp); 4420 VFS_UNLOCK_GIANT(vfslocked); 4421 if (error) 4422 return (error); 4423 error = copyout(&fh, uap->fhp, sizeof (fh)); 4424 return (error); 4425 } 4426 4427 #ifndef _SYS_SYSPROTO_H_ 4428 struct getfh_args { 4429 char *fname; 4430 fhandle_t *fhp; 4431 }; 4432 #endif 4433 int 4434 sys_getfh(td, uap) 4435 struct thread *td; 4436 register struct getfh_args *uap; 4437 { 4438 struct nameidata nd; 4439 fhandle_t fh; 4440 register struct vnode *vp; 4441 int vfslocked; 4442 int error; 4443 4444 error = priv_check(td, PRIV_VFS_GETFH); 4445 if (error) 4446 return (error); 4447 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, 4448 UIO_USERSPACE, uap->fname, td); 4449 error = namei(&nd); 4450 if (error) 4451 return (error); 4452 vfslocked = NDHASGIANT(&nd); 4453 NDFREE(&nd, NDF_ONLY_PNBUF); 4454 vp = nd.ni_vp; 4455 bzero(&fh, sizeof(fh)); 4456 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4457 error = VOP_VPTOFH(vp, &fh.fh_fid); 4458 vput(vp); 4459 VFS_UNLOCK_GIANT(vfslocked); 4460 if (error) 4461 return (error); 4462 error = copyout(&fh, uap->fhp, sizeof (fh)); 4463 return (error); 4464 } 4465 4466 /* 4467 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4468 * open descriptor. 4469 * 4470 * warning: do not remove the priv_check() call or this becomes one giant 4471 * security hole. 4472 */ 4473 #ifndef _SYS_SYSPROTO_H_ 4474 struct fhopen_args { 4475 const struct fhandle *u_fhp; 4476 int flags; 4477 }; 4478 #endif 4479 int 4480 sys_fhopen(td, uap) 4481 struct thread *td; 4482 struct fhopen_args /* { 4483 const struct fhandle *u_fhp; 4484 int flags; 4485 } */ *uap; 4486 { 4487 struct proc *p = td->td_proc; 4488 struct mount *mp; 4489 struct vnode *vp; 4490 struct fhandle fhp; 4491 struct vattr vat; 4492 struct vattr *vap = &vat; 4493 struct flock lf; 4494 struct file *fp; 4495 register struct filedesc *fdp = p->p_fd; 4496 int fmode, error, type; 4497 accmode_t accmode; 4498 struct file *nfp; 4499 int vfslocked; 4500 int indx; 4501 4502 error = priv_check(td, PRIV_VFS_FHOPEN); 4503 if (error) 4504 return (error); 4505 fmode = FFLAGS(uap->flags); 4506 /* why not allow a non-read/write open for our lockd? */ 4507 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4508 return (EINVAL); 4509 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4510 if (error) 4511 return(error); 4512 /* find the mount point */ 4513 mp = vfs_busyfs(&fhp.fh_fsid); 4514 if (mp == NULL) 4515 return (ESTALE); 4516 vfslocked = VFS_LOCK_GIANT(mp); 4517 /* now give me my vnode, it gets returned to me locked */ 4518 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4519 vfs_unbusy(mp); 4520 if (error) 4521 goto out; 4522 /* 4523 * from now on we have to make sure not 4524 * to forget about the vnode 4525 * any error that causes an abort must vput(vp) 4526 * just set error = err and 'goto bad;'. 4527 */ 4528 4529 /* 4530 * from vn_open 4531 */ 4532 if (vp->v_type == VLNK) { 4533 error = EMLINK; 4534 goto bad; 4535 } 4536 if (vp->v_type == VSOCK) { 4537 error = EOPNOTSUPP; 4538 goto bad; 4539 } 4540 if (vp->v_type != VDIR && fmode & O_DIRECTORY) { 4541 error = ENOTDIR; 4542 goto bad; 4543 } 4544 accmode = 0; 4545 if (fmode & (FWRITE | O_TRUNC)) { 4546 if (vp->v_type == VDIR) { 4547 error = EISDIR; 4548 goto bad; 4549 } 4550 error = vn_writechk(vp); 4551 if (error) 4552 goto bad; 4553 accmode |= VWRITE; 4554 } 4555 if (fmode & FREAD) 4556 accmode |= VREAD; 4557 if ((fmode & O_APPEND) && (fmode & FWRITE)) 4558 accmode |= VAPPEND; 4559 #ifdef MAC 4560 error = mac_vnode_check_open(td->td_ucred, vp, accmode); 4561 if (error) 4562 goto bad; 4563 #endif 4564 if (accmode) { 4565 error = VOP_ACCESS(vp, accmode, td->td_ucred, td); 4566 if (error) 4567 goto bad; 4568 } 4569 if (fmode & O_TRUNC) { 4570 vfs_ref(mp); 4571 VOP_UNLOCK(vp, 0); /* XXX */ 4572 if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) { 4573 vrele(vp); 4574 vfs_rel(mp); 4575 goto out; 4576 } 4577 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4578 vfs_rel(mp); 4579 #ifdef MAC 4580 /* 4581 * We don't yet have fp->f_cred, so use td->td_ucred, which 4582 * should be right. 4583 */ 4584 error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp); 4585 if (error == 0) { 4586 #endif 4587 VATTR_NULL(vap); 4588 vap->va_size = 0; 4589 error = VOP_SETATTR(vp, vap, td->td_ucred); 4590 #ifdef MAC 4591 } 4592 #endif 4593 vn_finished_write(mp); 4594 if (error) 4595 goto bad; 4596 } 4597 error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL); 4598 if (error) 4599 goto bad; 4600 4601 if (fmode & FWRITE) { 4602 vp->v_writecount++; 4603 CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", 4604 __func__, vp, vp->v_writecount); 4605 } 4606 4607 /* 4608 * end of vn_open code 4609 */ 4610 4611 if ((error = falloc(td, &nfp, &indx, fmode)) != 0) { 4612 if (fmode & FWRITE) { 4613 vp->v_writecount--; 4614 CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", 4615 __func__, vp, vp->v_writecount); 4616 } 4617 goto bad; 4618 } 4619 /* An extra reference on `nfp' has been held for us by falloc(). */ 4620 fp = nfp; 4621 nfp->f_vnode = vp; 4622 finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops); 4623 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4624 lf.l_whence = SEEK_SET; 4625 lf.l_start = 0; 4626 lf.l_len = 0; 4627 if (fmode & O_EXLOCK) 4628 lf.l_type = F_WRLCK; 4629 else 4630 lf.l_type = F_RDLCK; 4631 type = F_FLOCK; 4632 if ((fmode & FNONBLOCK) == 0) 4633 type |= F_WAIT; 4634 VOP_UNLOCK(vp, 0); 4635 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 4636 type)) != 0) { 4637 /* 4638 * The lock request failed. Normally close the 4639 * descriptor but handle the case where someone might 4640 * have dup()d or close()d it when we weren't looking. 4641 */ 4642 fdclose(fdp, fp, indx, td); 4643 4644 /* 4645 * release our private reference 4646 */ 4647 fdrop(fp, td); 4648 goto out; 4649 } 4650 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4651 atomic_set_int(&fp->f_flag, FHASLOCK); 4652 } 4653 4654 VOP_UNLOCK(vp, 0); 4655 fdrop(fp, td); 4656 VFS_UNLOCK_GIANT(vfslocked); 4657 td->td_retval[0] = indx; 4658 return (0); 4659 4660 bad: 4661 vput(vp); 4662 out: 4663 VFS_UNLOCK_GIANT(vfslocked); 4664 return (error); 4665 } 4666 4667 /* 4668 * Stat an (NFS) file handle. 4669 */ 4670 #ifndef _SYS_SYSPROTO_H_ 4671 struct fhstat_args { 4672 struct fhandle *u_fhp; 4673 struct stat *sb; 4674 }; 4675 #endif 4676 int 4677 sys_fhstat(td, uap) 4678 struct thread *td; 4679 register struct fhstat_args /* { 4680 struct fhandle *u_fhp; 4681 struct stat *sb; 4682 } */ *uap; 4683 { 4684 struct stat sb; 4685 struct fhandle fh; 4686 int error; 4687 4688 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4689 if (error != 0) 4690 return (error); 4691 error = kern_fhstat(td, fh, &sb); 4692 if (error != 0) 4693 return (error); 4694 error = copyout(&sb, uap->sb, sizeof(sb)); 4695 return (error); 4696 } 4697 4698 int 4699 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4700 { 4701 struct mount *mp; 4702 struct vnode *vp; 4703 int vfslocked; 4704 int error; 4705 4706 error = priv_check(td, PRIV_VFS_FHSTAT); 4707 if (error) 4708 return (error); 4709 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4710 return (ESTALE); 4711 vfslocked = VFS_LOCK_GIANT(mp); 4712 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4713 vfs_unbusy(mp); 4714 if (error) { 4715 VFS_UNLOCK_GIANT(vfslocked); 4716 return (error); 4717 } 4718 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4719 vput(vp); 4720 VFS_UNLOCK_GIANT(vfslocked); 4721 return (error); 4722 } 4723 4724 /* 4725 * Implement fstatfs() for (NFS) file handles. 4726 */ 4727 #ifndef _SYS_SYSPROTO_H_ 4728 struct fhstatfs_args { 4729 struct fhandle *u_fhp; 4730 struct statfs *buf; 4731 }; 4732 #endif 4733 int 4734 sys_fhstatfs(td, uap) 4735 struct thread *td; 4736 struct fhstatfs_args /* { 4737 struct fhandle *u_fhp; 4738 struct statfs *buf; 4739 } */ *uap; 4740 { 4741 struct statfs sf; 4742 fhandle_t fh; 4743 int error; 4744 4745 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4746 if (error) 4747 return (error); 4748 error = kern_fhstatfs(td, fh, &sf); 4749 if (error) 4750 return (error); 4751 return (copyout(&sf, uap->buf, sizeof(sf))); 4752 } 4753 4754 int 4755 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4756 { 4757 struct statfs *sp; 4758 struct mount *mp; 4759 struct vnode *vp; 4760 int vfslocked; 4761 int error; 4762 4763 error = priv_check(td, PRIV_VFS_FHSTATFS); 4764 if (error) 4765 return (error); 4766 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4767 return (ESTALE); 4768 vfslocked = VFS_LOCK_GIANT(mp); 4769 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4770 if (error) { 4771 vfs_unbusy(mp); 4772 VFS_UNLOCK_GIANT(vfslocked); 4773 return (error); 4774 } 4775 vput(vp); 4776 error = prison_canseemount(td->td_ucred, mp); 4777 if (error) 4778 goto out; 4779 #ifdef MAC 4780 error = mac_mount_check_stat(td->td_ucred, mp); 4781 if (error) 4782 goto out; 4783 #endif 4784 /* 4785 * Set these in case the underlying filesystem fails to do so. 4786 */ 4787 sp = &mp->mnt_stat; 4788 sp->f_version = STATFS_VERSION; 4789 sp->f_namemax = NAME_MAX; 4790 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4791 error = VFS_STATFS(mp, sp); 4792 if (error == 0) 4793 *buf = *sp; 4794 out: 4795 vfs_unbusy(mp); 4796 VFS_UNLOCK_GIANT(vfslocked); 4797 return (error); 4798 } 4799 4800 int 4801 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4802 { 4803 struct file *fp; 4804 struct mount *mp; 4805 struct vnode *vp; 4806 off_t olen, ooffset; 4807 int error, vfslocked; 4808 4809 fp = NULL; 4810 vfslocked = 0; 4811 error = fget(td, fd, CAP_WRITE, &fp); 4812 if (error != 0) 4813 goto out; 4814 4815 switch (fp->f_type) { 4816 case DTYPE_VNODE: 4817 break; 4818 case DTYPE_PIPE: 4819 case DTYPE_FIFO: 4820 error = ESPIPE; 4821 goto out; 4822 default: 4823 error = ENODEV; 4824 goto out; 4825 } 4826 if ((fp->f_flag & FWRITE) == 0) { 4827 error = EBADF; 4828 goto out; 4829 } 4830 vp = fp->f_vnode; 4831 if (vp->v_type != VREG) { 4832 error = ENODEV; 4833 goto out; 4834 } 4835 if (offset < 0 || len <= 0) { 4836 error = EINVAL; 4837 goto out; 4838 } 4839 /* Check for wrap. */ 4840 if (offset > OFF_MAX - len) { 4841 error = EFBIG; 4842 goto out; 4843 } 4844 4845 /* Allocating blocks may take a long time, so iterate. */ 4846 for (;;) { 4847 olen = len; 4848 ooffset = offset; 4849 4850 bwillwrite(); 4851 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 4852 mp = NULL; 4853 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4854 if (error != 0) { 4855 VFS_UNLOCK_GIANT(vfslocked); 4856 break; 4857 } 4858 error = vn_lock(vp, LK_EXCLUSIVE); 4859 if (error != 0) { 4860 vn_finished_write(mp); 4861 VFS_UNLOCK_GIANT(vfslocked); 4862 break; 4863 } 4864 #ifdef MAC 4865 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4866 if (error == 0) 4867 #endif 4868 error = VOP_ALLOCATE(vp, &offset, &len); 4869 VOP_UNLOCK(vp, 0); 4870 vn_finished_write(mp); 4871 VFS_UNLOCK_GIANT(vfslocked); 4872 4873 if (olen + ooffset != offset + len) { 4874 panic("offset + len changed from %jx/%jx to %jx/%jx", 4875 ooffset, olen, offset, len); 4876 } 4877 if (error != 0 || len == 0) 4878 break; 4879 KASSERT(olen > len, ("Iteration did not make progress?")); 4880 maybe_yield(); 4881 } 4882 out: 4883 if (fp != NULL) 4884 fdrop(fp, td); 4885 return (error); 4886 } 4887 4888 int 4889 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4890 { 4891 4892 return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len)); 4893 } 4894 4895 /* 4896 * Unlike madvise(2), we do not make a best effort to remember every 4897 * possible caching hint. Instead, we remember the last setting with 4898 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4899 * region of any current setting. 4900 */ 4901 int 4902 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4903 int advice) 4904 { 4905 struct fadvise_info *fa, *new; 4906 struct file *fp; 4907 struct vnode *vp; 4908 off_t end; 4909 int error; 4910 4911 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4912 return (EINVAL); 4913 switch (advice) { 4914 case POSIX_FADV_SEQUENTIAL: 4915 case POSIX_FADV_RANDOM: 4916 case POSIX_FADV_NOREUSE: 4917 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4918 break; 4919 case POSIX_FADV_NORMAL: 4920 case POSIX_FADV_WILLNEED: 4921 case POSIX_FADV_DONTNEED: 4922 new = NULL; 4923 break; 4924 default: 4925 return (EINVAL); 4926 } 4927 /* XXX: CAP_POSIX_FADVISE? */ 4928 error = fget(td, fd, 0, &fp); 4929 if (error != 0) 4930 goto out; 4931 4932 switch (fp->f_type) { 4933 case DTYPE_VNODE: 4934 break; 4935 case DTYPE_PIPE: 4936 case DTYPE_FIFO: 4937 error = ESPIPE; 4938 goto out; 4939 default: 4940 error = ENODEV; 4941 goto out; 4942 } 4943 vp = fp->f_vnode; 4944 if (vp->v_type != VREG) { 4945 error = ENODEV; 4946 goto out; 4947 } 4948 if (len == 0) 4949 end = OFF_MAX; 4950 else 4951 end = offset + len - 1; 4952 switch (advice) { 4953 case POSIX_FADV_SEQUENTIAL: 4954 case POSIX_FADV_RANDOM: 4955 case POSIX_FADV_NOREUSE: 4956 /* 4957 * Try to merge any existing non-standard region with 4958 * this new region if possible, otherwise create a new 4959 * non-standard region for this request. 4960 */ 4961 mtx_pool_lock(mtxpool_sleep, fp); 4962 fa = fp->f_advice; 4963 if (fa != NULL && fa->fa_advice == advice && 4964 ((fa->fa_start <= end && fa->fa_end >= offset) || 4965 (end != OFF_MAX && fa->fa_start == end + 1) || 4966 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4967 if (offset < fa->fa_start) 4968 fa->fa_start = offset; 4969 if (end > fa->fa_end) 4970 fa->fa_end = end; 4971 } else { 4972 new->fa_advice = advice; 4973 new->fa_start = offset; 4974 new->fa_end = end; 4975 fp->f_advice = new; 4976 new = fa; 4977 } 4978 mtx_pool_unlock(mtxpool_sleep, fp); 4979 break; 4980 case POSIX_FADV_NORMAL: 4981 /* 4982 * If a the "normal" region overlaps with an existing 4983 * non-standard region, trim or remove the 4984 * non-standard region. 4985 */ 4986 mtx_pool_lock(mtxpool_sleep, fp); 4987 fa = fp->f_advice; 4988 if (fa != NULL) { 4989 if (offset <= fa->fa_start && end >= fa->fa_end) { 4990 new = fa; 4991 fp->f_advice = NULL; 4992 } else if (offset <= fa->fa_start && 4993 end >= fa->fa_start) 4994 fa->fa_start = end + 1; 4995 else if (offset <= fa->fa_end && end >= fa->fa_end) 4996 fa->fa_end = offset - 1; 4997 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4998 /* 4999 * If the "normal" region is a middle 5000 * portion of the existing 5001 * non-standard region, just remove 5002 * the whole thing rather than picking 5003 * one side or the other to 5004 * preserve. 5005 */ 5006 new = fa; 5007 fp->f_advice = NULL; 5008 } 5009 } 5010 mtx_pool_unlock(mtxpool_sleep, fp); 5011 break; 5012 case POSIX_FADV_WILLNEED: 5013 case POSIX_FADV_DONTNEED: 5014 error = VOP_ADVISE(vp, offset, end, advice); 5015 break; 5016 } 5017 out: 5018 if (fp != NULL) 5019 fdrop(fp, td); 5020 free(new, M_FADVISE); 5021 return (error); 5022 } 5023 5024 int 5025 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 5026 { 5027 5028 return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 5029 uap->advice)); 5030 } 5031