1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_kdtrace.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capability.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE(vfs, , stat, mode, mode); 95 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *"); 96 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int"); 97 SDT_PROBE_DEFINE(vfs, , stat, reg, reg); 98 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *"); 99 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int"); 100 101 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 102 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 103 static int setfflags(struct thread *td, struct vnode *, int); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * The module initialization routine for POSIX asynchronous I/O will 111 * set this to the version of AIO that it implements. (Zero means 112 * that it is not implemented.) This value is used here by pathconf() 113 * and in kern_descrip.c by fpathconf(). 114 */ 115 int async_io_version; 116 117 #ifdef DEBUG 118 static int syncprt = 0; 119 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 120 #endif 121 122 /* 123 * Sync each mounted filesystem. 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct sync_args { 127 int dummy; 128 }; 129 #endif 130 /* ARGSUSED */ 131 int 132 sys_sync(td, uap) 133 struct thread *td; 134 struct sync_args *uap; 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_msync(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Change filesystem quotas. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct quotactl_args { 166 char *path; 167 int cmd; 168 int uid; 169 caddr_t arg; 170 }; 171 #endif 172 int 173 sys_quotactl(td, uap) 174 struct thread *td; 175 register struct quotactl_args /* { 176 char *path; 177 int cmd; 178 int uid; 179 caddr_t arg; 180 } */ *uap; 181 { 182 struct mount *mp; 183 int error; 184 struct nameidata nd; 185 186 AUDIT_ARG_CMD(uap->cmd); 187 AUDIT_ARG_UID(uap->uid); 188 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 189 return (EPERM); 190 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 191 uap->path, td); 192 if ((error = namei(&nd)) != 0) 193 return (error); 194 NDFREE(&nd, NDF_ONLY_PNBUF); 195 mp = nd.ni_vp->v_mount; 196 vfs_ref(mp); 197 vput(nd.ni_vp); 198 error = vfs_busy(mp, 0); 199 vfs_rel(mp); 200 if (error) 201 return (error); 202 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 203 204 /* 205 * Since quota on operation typically needs to open quota 206 * file, the Q_QUOTAON handler needs to unbusy the mount point 207 * before calling into namei. Otherwise, unmount might be 208 * started between two vfs_busy() invocations (first is our, 209 * second is from mount point cross-walk code in lookup()), 210 * causing deadlock. 211 * 212 * Require that Q_QUOTAON handles the vfs_busy() reference on 213 * its own, always returning with ubusied mount point. 214 */ 215 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 216 vfs_unbusy(mp); 217 return (error); 218 } 219 220 /* 221 * Used by statfs conversion routines to scale the block size up if 222 * necessary so that all of the block counts are <= 'max_size'. Note 223 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 224 * value of 'n'. 225 */ 226 void 227 statfs_scale_blocks(struct statfs *sf, long max_size) 228 { 229 uint64_t count; 230 int shift; 231 232 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 233 234 /* 235 * Attempt to scale the block counts to give a more accurate 236 * overview to userland of the ratio of free space to used 237 * space. To do this, find the largest block count and compute 238 * a divisor that lets it fit into a signed integer <= max_size. 239 */ 240 if (sf->f_bavail < 0) 241 count = -sf->f_bavail; 242 else 243 count = sf->f_bavail; 244 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 245 if (count <= max_size) 246 return; 247 248 count >>= flsl(max_size); 249 shift = 0; 250 while (count > 0) { 251 shift++; 252 count >>=1; 253 } 254 255 sf->f_bsize <<= shift; 256 sf->f_blocks >>= shift; 257 sf->f_bfree >>= shift; 258 sf->f_bavail >>= shift; 259 } 260 261 /* 262 * Get filesystem statistics. 263 */ 264 #ifndef _SYS_SYSPROTO_H_ 265 struct statfs_args { 266 char *path; 267 struct statfs *buf; 268 }; 269 #endif 270 int 271 sys_statfs(td, uap) 272 struct thread *td; 273 register struct statfs_args /* { 274 char *path; 275 struct statfs *buf; 276 } */ *uap; 277 { 278 struct statfs sf; 279 int error; 280 281 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 282 if (error == 0) 283 error = copyout(&sf, uap->buf, sizeof(sf)); 284 return (error); 285 } 286 287 int 288 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 289 struct statfs *buf) 290 { 291 struct mount *mp; 292 struct statfs *sp, sb; 293 int error; 294 struct nameidata nd; 295 296 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 297 pathseg, path, td); 298 error = namei(&nd); 299 if (error) 300 return (error); 301 mp = nd.ni_vp->v_mount; 302 vfs_ref(mp); 303 NDFREE(&nd, NDF_ONLY_PNBUF); 304 vput(nd.ni_vp); 305 error = vfs_busy(mp, 0); 306 vfs_rel(mp); 307 if (error) 308 return (error); 309 #ifdef MAC 310 error = mac_mount_check_stat(td->td_ucred, mp); 311 if (error) 312 goto out; 313 #endif 314 /* 315 * Set these in case the underlying filesystem fails to do so. 316 */ 317 sp = &mp->mnt_stat; 318 sp->f_version = STATFS_VERSION; 319 sp->f_namemax = NAME_MAX; 320 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 321 error = VFS_STATFS(mp, sp); 322 if (error) 323 goto out; 324 if (priv_check(td, PRIV_VFS_GENERATION)) { 325 bcopy(sp, &sb, sizeof(sb)); 326 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 327 prison_enforce_statfs(td->td_ucred, mp, &sb); 328 sp = &sb; 329 } 330 *buf = *sp; 331 out: 332 vfs_unbusy(mp); 333 return (error); 334 } 335 336 /* 337 * Get filesystem statistics. 338 */ 339 #ifndef _SYS_SYSPROTO_H_ 340 struct fstatfs_args { 341 int fd; 342 struct statfs *buf; 343 }; 344 #endif 345 int 346 sys_fstatfs(td, uap) 347 struct thread *td; 348 register struct fstatfs_args /* { 349 int fd; 350 struct statfs *buf; 351 } */ *uap; 352 { 353 struct statfs sf; 354 int error; 355 356 error = kern_fstatfs(td, uap->fd, &sf); 357 if (error == 0) 358 error = copyout(&sf, uap->buf, sizeof(sf)); 359 return (error); 360 } 361 362 int 363 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 364 { 365 struct file *fp; 366 struct mount *mp; 367 struct statfs *sp, sb; 368 struct vnode *vp; 369 int error; 370 371 AUDIT_ARG_FD(fd); 372 error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp); 373 if (error) 374 return (error); 375 vp = fp->f_vnode; 376 vn_lock(vp, LK_SHARED | LK_RETRY); 377 #ifdef AUDIT 378 AUDIT_ARG_VNODE1(vp); 379 #endif 380 mp = vp->v_mount; 381 if (mp) 382 vfs_ref(mp); 383 VOP_UNLOCK(vp, 0); 384 fdrop(fp, td); 385 if (mp == NULL) { 386 error = EBADF; 387 goto out; 388 } 389 error = vfs_busy(mp, 0); 390 vfs_rel(mp); 391 if (error) 392 return (error); 393 #ifdef MAC 394 error = mac_mount_check_stat(td->td_ucred, mp); 395 if (error) 396 goto out; 397 #endif 398 /* 399 * Set these in case the underlying filesystem fails to do so. 400 */ 401 sp = &mp->mnt_stat; 402 sp->f_version = STATFS_VERSION; 403 sp->f_namemax = NAME_MAX; 404 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 405 error = VFS_STATFS(mp, sp); 406 if (error) 407 goto out; 408 if (priv_check(td, PRIV_VFS_GENERATION)) { 409 bcopy(sp, &sb, sizeof(sb)); 410 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 411 prison_enforce_statfs(td->td_ucred, mp, &sb); 412 sp = &sb; 413 } 414 *buf = *sp; 415 out: 416 if (mp) 417 vfs_unbusy(mp); 418 return (error); 419 } 420 421 /* 422 * Get statistics on all filesystems. 423 */ 424 #ifndef _SYS_SYSPROTO_H_ 425 struct getfsstat_args { 426 struct statfs *buf; 427 long bufsize; 428 int flags; 429 }; 430 #endif 431 int 432 sys_getfsstat(td, uap) 433 struct thread *td; 434 register struct getfsstat_args /* { 435 struct statfs *buf; 436 long bufsize; 437 int flags; 438 } */ *uap; 439 { 440 441 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 442 uap->flags)); 443 } 444 445 /* 446 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 447 * The caller is responsible for freeing memory which will be allocated 448 * in '*buf'. 449 */ 450 int 451 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 452 enum uio_seg bufseg, int flags) 453 { 454 struct mount *mp, *nmp; 455 struct statfs *sfsp, *sp, sb; 456 size_t count, maxcount; 457 int error; 458 459 maxcount = bufsize / sizeof(struct statfs); 460 if (bufsize == 0) 461 sfsp = NULL; 462 else if (bufseg == UIO_USERSPACE) 463 sfsp = *buf; 464 else /* if (bufseg == UIO_SYSSPACE) */ { 465 count = 0; 466 mtx_lock(&mountlist_mtx); 467 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 468 count++; 469 } 470 mtx_unlock(&mountlist_mtx); 471 if (maxcount > count) 472 maxcount = count; 473 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 474 M_WAITOK); 475 } 476 count = 0; 477 mtx_lock(&mountlist_mtx); 478 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 479 if (prison_canseemount(td->td_ucred, mp) != 0) { 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 continue; 482 } 483 #ifdef MAC 484 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 continue; 487 } 488 #endif 489 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 490 nmp = TAILQ_NEXT(mp, mnt_list); 491 continue; 492 } 493 if (sfsp && count < maxcount) { 494 sp = &mp->mnt_stat; 495 /* 496 * Set these in case the underlying filesystem 497 * fails to do so. 498 */ 499 sp->f_version = STATFS_VERSION; 500 sp->f_namemax = NAME_MAX; 501 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 502 /* 503 * If MNT_NOWAIT or MNT_LAZY is specified, do not 504 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 505 * overrides MNT_WAIT. 506 */ 507 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 508 (flags & MNT_WAIT)) && 509 (error = VFS_STATFS(mp, sp))) { 510 mtx_lock(&mountlist_mtx); 511 nmp = TAILQ_NEXT(mp, mnt_list); 512 vfs_unbusy(mp); 513 continue; 514 } 515 if (priv_check(td, PRIV_VFS_GENERATION)) { 516 bcopy(sp, &sb, sizeof(sb)); 517 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 518 prison_enforce_statfs(td->td_ucred, mp, &sb); 519 sp = &sb; 520 } 521 if (bufseg == UIO_SYSSPACE) 522 bcopy(sp, sfsp, sizeof(*sp)); 523 else /* if (bufseg == UIO_USERSPACE) */ { 524 error = copyout(sp, sfsp, sizeof(*sp)); 525 if (error) { 526 vfs_unbusy(mp); 527 return (error); 528 } 529 } 530 sfsp++; 531 } 532 count++; 533 mtx_lock(&mountlist_mtx); 534 nmp = TAILQ_NEXT(mp, mnt_list); 535 vfs_unbusy(mp); 536 } 537 mtx_unlock(&mountlist_mtx); 538 if (sfsp && count > maxcount) 539 td->td_retval[0] = maxcount; 540 else 541 td->td_retval[0] = count; 542 return (0); 543 } 544 545 #ifdef COMPAT_FREEBSD4 546 /* 547 * Get old format filesystem statistics. 548 */ 549 static void cvtstatfs(struct statfs *, struct ostatfs *); 550 551 #ifndef _SYS_SYSPROTO_H_ 552 struct freebsd4_statfs_args { 553 char *path; 554 struct ostatfs *buf; 555 }; 556 #endif 557 int 558 freebsd4_statfs(td, uap) 559 struct thread *td; 560 struct freebsd4_statfs_args /* { 561 char *path; 562 struct ostatfs *buf; 563 } */ *uap; 564 { 565 struct ostatfs osb; 566 struct statfs sf; 567 int error; 568 569 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 570 if (error) 571 return (error); 572 cvtstatfs(&sf, &osb); 573 return (copyout(&osb, uap->buf, sizeof(osb))); 574 } 575 576 /* 577 * Get filesystem statistics. 578 */ 579 #ifndef _SYS_SYSPROTO_H_ 580 struct freebsd4_fstatfs_args { 581 int fd; 582 struct ostatfs *buf; 583 }; 584 #endif 585 int 586 freebsd4_fstatfs(td, uap) 587 struct thread *td; 588 struct freebsd4_fstatfs_args /* { 589 int fd; 590 struct ostatfs *buf; 591 } */ *uap; 592 { 593 struct ostatfs osb; 594 struct statfs sf; 595 int error; 596 597 error = kern_fstatfs(td, uap->fd, &sf); 598 if (error) 599 return (error); 600 cvtstatfs(&sf, &osb); 601 return (copyout(&osb, uap->buf, sizeof(osb))); 602 } 603 604 /* 605 * Get statistics on all filesystems. 606 */ 607 #ifndef _SYS_SYSPROTO_H_ 608 struct freebsd4_getfsstat_args { 609 struct ostatfs *buf; 610 long bufsize; 611 int flags; 612 }; 613 #endif 614 int 615 freebsd4_getfsstat(td, uap) 616 struct thread *td; 617 register struct freebsd4_getfsstat_args /* { 618 struct ostatfs *buf; 619 long bufsize; 620 int flags; 621 } */ *uap; 622 { 623 struct statfs *buf, *sp; 624 struct ostatfs osb; 625 size_t count, size; 626 int error; 627 628 count = uap->bufsize / sizeof(struct ostatfs); 629 size = count * sizeof(struct statfs); 630 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 631 if (size > 0) { 632 count = td->td_retval[0]; 633 sp = buf; 634 while (count > 0 && error == 0) { 635 cvtstatfs(sp, &osb); 636 error = copyout(&osb, uap->buf, sizeof(osb)); 637 sp++; 638 uap->buf++; 639 count--; 640 } 641 free(buf, M_TEMP); 642 } 643 return (error); 644 } 645 646 /* 647 * Implement fstatfs() for (NFS) file handles. 648 */ 649 #ifndef _SYS_SYSPROTO_H_ 650 struct freebsd4_fhstatfs_args { 651 struct fhandle *u_fhp; 652 struct ostatfs *buf; 653 }; 654 #endif 655 int 656 freebsd4_fhstatfs(td, uap) 657 struct thread *td; 658 struct freebsd4_fhstatfs_args /* { 659 struct fhandle *u_fhp; 660 struct ostatfs *buf; 661 } */ *uap; 662 { 663 struct ostatfs osb; 664 struct statfs sf; 665 fhandle_t fh; 666 int error; 667 668 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 669 if (error) 670 return (error); 671 error = kern_fhstatfs(td, fh, &sf); 672 if (error) 673 return (error); 674 cvtstatfs(&sf, &osb); 675 return (copyout(&osb, uap->buf, sizeof(osb))); 676 } 677 678 /* 679 * Convert a new format statfs structure to an old format statfs structure. 680 */ 681 static void 682 cvtstatfs(nsp, osp) 683 struct statfs *nsp; 684 struct ostatfs *osp; 685 { 686 687 statfs_scale_blocks(nsp, LONG_MAX); 688 bzero(osp, sizeof(*osp)); 689 osp->f_bsize = nsp->f_bsize; 690 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 691 osp->f_blocks = nsp->f_blocks; 692 osp->f_bfree = nsp->f_bfree; 693 osp->f_bavail = nsp->f_bavail; 694 osp->f_files = MIN(nsp->f_files, LONG_MAX); 695 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 696 osp->f_owner = nsp->f_owner; 697 osp->f_type = nsp->f_type; 698 osp->f_flags = nsp->f_flags; 699 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 700 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 701 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 702 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 703 strlcpy(osp->f_fstypename, nsp->f_fstypename, 704 MIN(MFSNAMELEN, OMFSNAMELEN)); 705 strlcpy(osp->f_mntonname, nsp->f_mntonname, 706 MIN(MNAMELEN, OMNAMELEN)); 707 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 osp->f_fsid = nsp->f_fsid; 710 } 711 #endif /* COMPAT_FREEBSD4 */ 712 713 /* 714 * Change current working directory to a given file descriptor. 715 */ 716 #ifndef _SYS_SYSPROTO_H_ 717 struct fchdir_args { 718 int fd; 719 }; 720 #endif 721 int 722 sys_fchdir(td, uap) 723 struct thread *td; 724 struct fchdir_args /* { 725 int fd; 726 } */ *uap; 727 { 728 register struct filedesc *fdp = td->td_proc->p_fd; 729 struct vnode *vp, *tdp, *vpold; 730 struct mount *mp; 731 struct file *fp; 732 int error; 733 734 AUDIT_ARG_FD(uap->fd); 735 if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0) 736 return (error); 737 vp = fp->f_vnode; 738 VREF(vp); 739 fdrop(fp, td); 740 vn_lock(vp, LK_SHARED | LK_RETRY); 741 AUDIT_ARG_VNODE1(vp); 742 error = change_dir(vp, td); 743 while (!error && (mp = vp->v_mountedhere) != NULL) { 744 if (vfs_busy(mp, 0)) 745 continue; 746 error = VFS_ROOT(mp, LK_SHARED, &tdp); 747 vfs_unbusy(mp); 748 if (error) 749 break; 750 vput(vp); 751 vp = tdp; 752 } 753 if (error) { 754 vput(vp); 755 return (error); 756 } 757 VOP_UNLOCK(vp, 0); 758 FILEDESC_XLOCK(fdp); 759 vpold = fdp->fd_cdir; 760 fdp->fd_cdir = vp; 761 FILEDESC_XUNLOCK(fdp); 762 vrele(vpold); 763 return (0); 764 } 765 766 /* 767 * Change current working directory (``.''). 768 */ 769 #ifndef _SYS_SYSPROTO_H_ 770 struct chdir_args { 771 char *path; 772 }; 773 #endif 774 int 775 sys_chdir(td, uap) 776 struct thread *td; 777 struct chdir_args /* { 778 char *path; 779 } */ *uap; 780 { 781 782 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 783 } 784 785 int 786 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 787 { 788 register struct filedesc *fdp = td->td_proc->p_fd; 789 int error; 790 struct nameidata nd; 791 struct vnode *vp; 792 793 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 794 pathseg, path, td); 795 if ((error = namei(&nd)) != 0) 796 return (error); 797 if ((error = change_dir(nd.ni_vp, td)) != 0) { 798 vput(nd.ni_vp); 799 NDFREE(&nd, NDF_ONLY_PNBUF); 800 return (error); 801 } 802 VOP_UNLOCK(nd.ni_vp, 0); 803 NDFREE(&nd, NDF_ONLY_PNBUF); 804 FILEDESC_XLOCK(fdp); 805 vp = fdp->fd_cdir; 806 fdp->fd_cdir = nd.ni_vp; 807 FILEDESC_XUNLOCK(fdp); 808 vrele(vp); 809 return (0); 810 } 811 812 /* 813 * Helper function for raised chroot(2) security function: Refuse if 814 * any filedescriptors are open directories. 815 */ 816 static int 817 chroot_refuse_vdir_fds(fdp) 818 struct filedesc *fdp; 819 { 820 struct vnode *vp; 821 struct file *fp; 822 int fd; 823 824 FILEDESC_LOCK_ASSERT(fdp); 825 826 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 827 fp = fget_locked(fdp, fd); 828 if (fp == NULL) 829 continue; 830 if (fp->f_type == DTYPE_VNODE) { 831 vp = fp->f_vnode; 832 if (vp->v_type == VDIR) 833 return (EPERM); 834 } 835 } 836 return (0); 837 } 838 839 /* 840 * This sysctl determines if we will allow a process to chroot(2) if it 841 * has a directory open: 842 * 0: disallowed for all processes. 843 * 1: allowed for processes that were not already chroot(2)'ed. 844 * 2: allowed for all processes. 845 */ 846 847 static int chroot_allow_open_directories = 1; 848 849 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 850 &chroot_allow_open_directories, 0, 851 "Allow a process to chroot(2) if it has a directory open"); 852 853 /* 854 * Change notion of root (``/'') directory. 855 */ 856 #ifndef _SYS_SYSPROTO_H_ 857 struct chroot_args { 858 char *path; 859 }; 860 #endif 861 int 862 sys_chroot(td, uap) 863 struct thread *td; 864 struct chroot_args /* { 865 char *path; 866 } */ *uap; 867 { 868 int error; 869 struct nameidata nd; 870 871 error = priv_check(td, PRIV_VFS_CHROOT); 872 if (error) 873 return (error); 874 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 875 UIO_USERSPACE, uap->path, td); 876 error = namei(&nd); 877 if (error) 878 goto error; 879 if ((error = change_dir(nd.ni_vp, td)) != 0) 880 goto e_vunlock; 881 #ifdef MAC 882 if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp))) 883 goto e_vunlock; 884 #endif 885 VOP_UNLOCK(nd.ni_vp, 0); 886 error = change_root(nd.ni_vp, td); 887 vrele(nd.ni_vp); 888 NDFREE(&nd, NDF_ONLY_PNBUF); 889 return (error); 890 e_vunlock: 891 vput(nd.ni_vp); 892 error: 893 NDFREE(&nd, NDF_ONLY_PNBUF); 894 return (error); 895 } 896 897 /* 898 * Common routine for chroot and chdir. Callers must provide a locked vnode 899 * instance. 900 */ 901 int 902 change_dir(vp, td) 903 struct vnode *vp; 904 struct thread *td; 905 { 906 int error; 907 908 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 909 if (vp->v_type != VDIR) 910 return (ENOTDIR); 911 #ifdef MAC 912 error = mac_vnode_check_chdir(td->td_ucred, vp); 913 if (error) 914 return (error); 915 #endif 916 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 917 return (error); 918 } 919 920 /* 921 * Common routine for kern_chroot() and jail_attach(). The caller is 922 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 923 * authorize this operation. 924 */ 925 int 926 change_root(vp, td) 927 struct vnode *vp; 928 struct thread *td; 929 { 930 struct filedesc *fdp; 931 struct vnode *oldvp; 932 int error; 933 934 fdp = td->td_proc->p_fd; 935 FILEDESC_XLOCK(fdp); 936 if (chroot_allow_open_directories == 0 || 937 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 938 error = chroot_refuse_vdir_fds(fdp); 939 if (error) { 940 FILEDESC_XUNLOCK(fdp); 941 return (error); 942 } 943 } 944 oldvp = fdp->fd_rdir; 945 fdp->fd_rdir = vp; 946 VREF(fdp->fd_rdir); 947 if (!fdp->fd_jdir) { 948 fdp->fd_jdir = vp; 949 VREF(fdp->fd_jdir); 950 } 951 FILEDESC_XUNLOCK(fdp); 952 vrele(oldvp); 953 return (0); 954 } 955 956 static __inline cap_rights_t 957 flags_to_rights(int flags) 958 { 959 cap_rights_t rights = 0; 960 961 if (flags & O_EXEC) { 962 rights |= CAP_FEXECVE; 963 } else { 964 switch ((flags & O_ACCMODE)) { 965 case O_RDONLY: 966 rights |= CAP_READ; 967 break; 968 case O_RDWR: 969 rights |= CAP_READ; 970 /* FALLTHROUGH */ 971 case O_WRONLY: 972 rights |= CAP_WRITE; 973 break; 974 } 975 } 976 977 if (flags & O_CREAT) 978 rights |= CAP_CREATE; 979 980 if (flags & O_TRUNC) 981 rights |= CAP_FTRUNCATE; 982 983 if ((flags & O_EXLOCK) || (flags & O_SHLOCK)) 984 rights |= CAP_FLOCK; 985 986 return (rights); 987 } 988 989 /* 990 * Check permissions, allocate an open file structure, and call the device 991 * open routine if any. 992 */ 993 #ifndef _SYS_SYSPROTO_H_ 994 struct open_args { 995 char *path; 996 int flags; 997 int mode; 998 }; 999 #endif 1000 int 1001 sys_open(td, uap) 1002 struct thread *td; 1003 register struct open_args /* { 1004 char *path; 1005 int flags; 1006 int mode; 1007 } */ *uap; 1008 { 1009 1010 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1011 } 1012 1013 #ifndef _SYS_SYSPROTO_H_ 1014 struct openat_args { 1015 int fd; 1016 char *path; 1017 int flag; 1018 int mode; 1019 }; 1020 #endif 1021 int 1022 sys_openat(struct thread *td, struct openat_args *uap) 1023 { 1024 1025 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1026 uap->mode)); 1027 } 1028 1029 int 1030 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1031 int mode) 1032 { 1033 1034 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1035 } 1036 1037 int 1038 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1039 int flags, int mode) 1040 { 1041 struct proc *p = td->td_proc; 1042 struct filedesc *fdp = p->p_fd; 1043 struct file *fp; 1044 struct vnode *vp; 1045 int cmode; 1046 int indx = -1, error; 1047 struct nameidata nd; 1048 cap_rights_t rights_needed = CAP_LOOKUP; 1049 1050 AUDIT_ARG_FFLAGS(flags); 1051 AUDIT_ARG_MODE(mode); 1052 /* XXX: audit dirfd */ 1053 rights_needed |= flags_to_rights(flags); 1054 /* 1055 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1056 * may be specified. 1057 */ 1058 if (flags & O_EXEC) { 1059 if (flags & O_ACCMODE) 1060 return (EINVAL); 1061 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1062 return (EINVAL); 1063 } else { 1064 flags = FFLAGS(flags); 1065 } 1066 1067 /* 1068 * Allocate the file descriptor, but don't install a descriptor yet. 1069 */ 1070 error = falloc_noinstall(td, &fp); 1071 if (error) 1072 return (error); 1073 /* 1074 * An extra reference on `fp' has been held for us by 1075 * falloc_noinstall(). 1076 */ 1077 /* Set the flags early so the finit in devfs can pick them up. */ 1078 fp->f_flag = flags & FMASK; 1079 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1080 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1081 rights_needed, td); 1082 td->td_dupfd = -1; /* XXX check for fdopen */ 1083 error = vn_open(&nd, &flags, cmode, fp); 1084 if (error) { 1085 /* 1086 * If the vn_open replaced the method vector, something 1087 * wonderous happened deep below and we just pass it up 1088 * pretending we know what we do. 1089 */ 1090 if (error == ENXIO && fp->f_ops != &badfileops) 1091 goto success; 1092 1093 /* 1094 * Handle special fdopen() case. bleh. 1095 * 1096 * Don't do this for relative (capability) lookups; we don't 1097 * understand exactly what would happen, and we don't think 1098 * that it ever should. 1099 */ 1100 if (nd.ni_strictrelative == 0 && 1101 (error == ENODEV || error == ENXIO) && 1102 td->td_dupfd >= 0) { 1103 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1104 &indx); 1105 if (error == 0) 1106 goto success; 1107 } 1108 1109 goto bad; 1110 } 1111 td->td_dupfd = 0; 1112 NDFREE(&nd, NDF_ONLY_PNBUF); 1113 vp = nd.ni_vp; 1114 1115 /* 1116 * Store the vnode, for any f_type. Typically, the vnode use 1117 * count is decremented by direct call to vn_closefile() for 1118 * files that switched type in the cdevsw fdopen() method. 1119 */ 1120 fp->f_vnode = vp; 1121 /* 1122 * If the file wasn't claimed by devfs bind it to the normal 1123 * vnode operations here. 1124 */ 1125 if (fp->f_ops == &badfileops) { 1126 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1127 fp->f_seqcount = 1; 1128 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, 1129 vp, &vnops); 1130 } 1131 1132 VOP_UNLOCK(vp, 0); 1133 if (flags & O_TRUNC) { 1134 error = fo_truncate(fp, 0, td->td_ucred, td); 1135 if (error) 1136 goto bad; 1137 } 1138 success: 1139 /* 1140 * If we haven't already installed the FD (for dupfdopen), do so now. 1141 */ 1142 if (indx == -1) { 1143 #ifdef CAPABILITIES 1144 if (nd.ni_strictrelative == 1) { 1145 /* 1146 * We are doing a strict relative lookup; wrap the 1147 * result in a capability. 1148 */ 1149 if ((error = kern_capwrap(td, fp, nd.ni_baserights, 1150 &indx)) != 0) 1151 goto bad; 1152 } else 1153 #endif 1154 if ((error = finstall(td, fp, &indx, flags)) != 0) 1155 goto bad; 1156 } 1157 1158 /* 1159 * Release our private reference, leaving the one associated with 1160 * the descriptor table intact. 1161 */ 1162 fdrop(fp, td); 1163 td->td_retval[0] = indx; 1164 return (0); 1165 bad: 1166 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1167 fdrop(fp, td); 1168 return (error); 1169 } 1170 1171 #ifdef COMPAT_43 1172 /* 1173 * Create a file. 1174 */ 1175 #ifndef _SYS_SYSPROTO_H_ 1176 struct ocreat_args { 1177 char *path; 1178 int mode; 1179 }; 1180 #endif 1181 int 1182 ocreat(td, uap) 1183 struct thread *td; 1184 register struct ocreat_args /* { 1185 char *path; 1186 int mode; 1187 } */ *uap; 1188 { 1189 1190 return (kern_open(td, uap->path, UIO_USERSPACE, 1191 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1192 } 1193 #endif /* COMPAT_43 */ 1194 1195 /* 1196 * Create a special file. 1197 */ 1198 #ifndef _SYS_SYSPROTO_H_ 1199 struct mknod_args { 1200 char *path; 1201 int mode; 1202 int dev; 1203 }; 1204 #endif 1205 int 1206 sys_mknod(td, uap) 1207 struct thread *td; 1208 register struct mknod_args /* { 1209 char *path; 1210 int mode; 1211 int dev; 1212 } */ *uap; 1213 { 1214 1215 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1216 } 1217 1218 #ifndef _SYS_SYSPROTO_H_ 1219 struct mknodat_args { 1220 int fd; 1221 char *path; 1222 mode_t mode; 1223 dev_t dev; 1224 }; 1225 #endif 1226 int 1227 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1228 { 1229 1230 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1231 uap->dev)); 1232 } 1233 1234 int 1235 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1236 int dev) 1237 { 1238 1239 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1240 } 1241 1242 int 1243 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1244 int mode, int dev) 1245 { 1246 struct vnode *vp; 1247 struct mount *mp; 1248 struct vattr vattr; 1249 int error; 1250 int whiteout = 0; 1251 struct nameidata nd; 1252 1253 AUDIT_ARG_MODE(mode); 1254 AUDIT_ARG_DEV(dev); 1255 switch (mode & S_IFMT) { 1256 case S_IFCHR: 1257 case S_IFBLK: 1258 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1259 break; 1260 case S_IFMT: 1261 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1262 break; 1263 case S_IFWHT: 1264 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1265 break; 1266 case S_IFIFO: 1267 if (dev == 0) 1268 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1269 /* FALLTHROUGH */ 1270 default: 1271 error = EINVAL; 1272 break; 1273 } 1274 if (error) 1275 return (error); 1276 restart: 1277 bwillwrite(); 1278 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1279 pathseg, path, fd, CAP_MKNOD, td); 1280 if ((error = namei(&nd)) != 0) 1281 return (error); 1282 vp = nd.ni_vp; 1283 if (vp != NULL) { 1284 NDFREE(&nd, NDF_ONLY_PNBUF); 1285 if (vp == nd.ni_dvp) 1286 vrele(nd.ni_dvp); 1287 else 1288 vput(nd.ni_dvp); 1289 vrele(vp); 1290 return (EEXIST); 1291 } else { 1292 VATTR_NULL(&vattr); 1293 vattr.va_mode = (mode & ALLPERMS) & 1294 ~td->td_proc->p_fd->fd_cmask; 1295 vattr.va_rdev = dev; 1296 whiteout = 0; 1297 1298 switch (mode & S_IFMT) { 1299 case S_IFMT: /* used by badsect to flag bad sectors */ 1300 vattr.va_type = VBAD; 1301 break; 1302 case S_IFCHR: 1303 vattr.va_type = VCHR; 1304 break; 1305 case S_IFBLK: 1306 vattr.va_type = VBLK; 1307 break; 1308 case S_IFWHT: 1309 whiteout = 1; 1310 break; 1311 default: 1312 panic("kern_mknod: invalid mode"); 1313 } 1314 } 1315 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1316 NDFREE(&nd, NDF_ONLY_PNBUF); 1317 vput(nd.ni_dvp); 1318 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1319 return (error); 1320 goto restart; 1321 } 1322 #ifdef MAC 1323 if (error == 0 && !whiteout) 1324 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1325 &nd.ni_cnd, &vattr); 1326 #endif 1327 if (!error) { 1328 if (whiteout) 1329 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1330 else { 1331 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1332 &nd.ni_cnd, &vattr); 1333 if (error == 0) 1334 vput(nd.ni_vp); 1335 } 1336 } 1337 NDFREE(&nd, NDF_ONLY_PNBUF); 1338 vput(nd.ni_dvp); 1339 vn_finished_write(mp); 1340 return (error); 1341 } 1342 1343 /* 1344 * Create a named pipe. 1345 */ 1346 #ifndef _SYS_SYSPROTO_H_ 1347 struct mkfifo_args { 1348 char *path; 1349 int mode; 1350 }; 1351 #endif 1352 int 1353 sys_mkfifo(td, uap) 1354 struct thread *td; 1355 register struct mkfifo_args /* { 1356 char *path; 1357 int mode; 1358 } */ *uap; 1359 { 1360 1361 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1362 } 1363 1364 #ifndef _SYS_SYSPROTO_H_ 1365 struct mkfifoat_args { 1366 int fd; 1367 char *path; 1368 mode_t mode; 1369 }; 1370 #endif 1371 int 1372 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1373 { 1374 1375 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1376 uap->mode)); 1377 } 1378 1379 int 1380 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1381 { 1382 1383 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1384 } 1385 1386 int 1387 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1388 int mode) 1389 { 1390 struct mount *mp; 1391 struct vattr vattr; 1392 int error; 1393 struct nameidata nd; 1394 1395 AUDIT_ARG_MODE(mode); 1396 restart: 1397 bwillwrite(); 1398 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1399 pathseg, path, fd, CAP_MKFIFO, td); 1400 if ((error = namei(&nd)) != 0) 1401 return (error); 1402 if (nd.ni_vp != NULL) { 1403 NDFREE(&nd, NDF_ONLY_PNBUF); 1404 if (nd.ni_vp == nd.ni_dvp) 1405 vrele(nd.ni_dvp); 1406 else 1407 vput(nd.ni_dvp); 1408 vrele(nd.ni_vp); 1409 return (EEXIST); 1410 } 1411 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1412 NDFREE(&nd, NDF_ONLY_PNBUF); 1413 vput(nd.ni_dvp); 1414 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1415 return (error); 1416 goto restart; 1417 } 1418 VATTR_NULL(&vattr); 1419 vattr.va_type = VFIFO; 1420 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1421 #ifdef MAC 1422 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1423 &vattr); 1424 if (error) 1425 goto out; 1426 #endif 1427 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1428 if (error == 0) 1429 vput(nd.ni_vp); 1430 #ifdef MAC 1431 out: 1432 #endif 1433 vput(nd.ni_dvp); 1434 vn_finished_write(mp); 1435 NDFREE(&nd, NDF_ONLY_PNBUF); 1436 return (error); 1437 } 1438 1439 /* 1440 * Make a hard file link. 1441 */ 1442 #ifndef _SYS_SYSPROTO_H_ 1443 struct link_args { 1444 char *path; 1445 char *link; 1446 }; 1447 #endif 1448 int 1449 sys_link(td, uap) 1450 struct thread *td; 1451 register struct link_args /* { 1452 char *path; 1453 char *link; 1454 } */ *uap; 1455 { 1456 1457 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1458 } 1459 1460 #ifndef _SYS_SYSPROTO_H_ 1461 struct linkat_args { 1462 int fd1; 1463 char *path1; 1464 int fd2; 1465 char *path2; 1466 int flag; 1467 }; 1468 #endif 1469 int 1470 sys_linkat(struct thread *td, struct linkat_args *uap) 1471 { 1472 int flag; 1473 1474 flag = uap->flag; 1475 if (flag & ~AT_SYMLINK_FOLLOW) 1476 return (EINVAL); 1477 1478 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1479 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1480 } 1481 1482 int hardlink_check_uid = 0; 1483 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1484 &hardlink_check_uid, 0, 1485 "Unprivileged processes cannot create hard links to files owned by other " 1486 "users"); 1487 static int hardlink_check_gid = 0; 1488 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1489 &hardlink_check_gid, 0, 1490 "Unprivileged processes cannot create hard links to files owned by other " 1491 "groups"); 1492 1493 static int 1494 can_hardlink(struct vnode *vp, struct ucred *cred) 1495 { 1496 struct vattr va; 1497 int error; 1498 1499 if (!hardlink_check_uid && !hardlink_check_gid) 1500 return (0); 1501 1502 error = VOP_GETATTR(vp, &va, cred); 1503 if (error != 0) 1504 return (error); 1505 1506 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1507 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1508 if (error) 1509 return (error); 1510 } 1511 1512 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1513 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1514 if (error) 1515 return (error); 1516 } 1517 1518 return (0); 1519 } 1520 1521 int 1522 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1523 { 1524 1525 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1526 } 1527 1528 int 1529 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1530 enum uio_seg segflg, int follow) 1531 { 1532 struct vnode *vp; 1533 struct mount *mp; 1534 struct nameidata nd; 1535 int error; 1536 1537 bwillwrite(); 1538 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1539 1540 if ((error = namei(&nd)) != 0) 1541 return (error); 1542 NDFREE(&nd, NDF_ONLY_PNBUF); 1543 vp = nd.ni_vp; 1544 if (vp->v_type == VDIR) { 1545 vrele(vp); 1546 return (EPERM); /* POSIX */ 1547 } 1548 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 1549 vrele(vp); 1550 return (error); 1551 } 1552 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, 1553 segflg, path2, fd2, CAP_CREATE, td); 1554 if ((error = namei(&nd)) == 0) { 1555 if (nd.ni_vp != NULL) { 1556 if (nd.ni_dvp == nd.ni_vp) 1557 vrele(nd.ni_dvp); 1558 else 1559 vput(nd.ni_dvp); 1560 vrele(nd.ni_vp); 1561 error = EEXIST; 1562 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) 1563 == 0) { 1564 error = can_hardlink(vp, td->td_ucred); 1565 if (error == 0) 1566 #ifdef MAC 1567 error = mac_vnode_check_link(td->td_ucred, 1568 nd.ni_dvp, vp, &nd.ni_cnd); 1569 if (error == 0) 1570 #endif 1571 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1572 VOP_UNLOCK(vp, 0); 1573 vput(nd.ni_dvp); 1574 } 1575 NDFREE(&nd, NDF_ONLY_PNBUF); 1576 } 1577 vrele(vp); 1578 vn_finished_write(mp); 1579 return (error); 1580 } 1581 1582 /* 1583 * Make a symbolic link. 1584 */ 1585 #ifndef _SYS_SYSPROTO_H_ 1586 struct symlink_args { 1587 char *path; 1588 char *link; 1589 }; 1590 #endif 1591 int 1592 sys_symlink(td, uap) 1593 struct thread *td; 1594 register struct symlink_args /* { 1595 char *path; 1596 char *link; 1597 } */ *uap; 1598 { 1599 1600 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1601 } 1602 1603 #ifndef _SYS_SYSPROTO_H_ 1604 struct symlinkat_args { 1605 char *path; 1606 int fd; 1607 char *path2; 1608 }; 1609 #endif 1610 int 1611 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1612 { 1613 1614 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1615 UIO_USERSPACE)); 1616 } 1617 1618 int 1619 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1620 { 1621 1622 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1623 } 1624 1625 int 1626 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1627 enum uio_seg segflg) 1628 { 1629 struct mount *mp; 1630 struct vattr vattr; 1631 char *syspath; 1632 int error; 1633 struct nameidata nd; 1634 1635 if (segflg == UIO_SYSSPACE) { 1636 syspath = path1; 1637 } else { 1638 syspath = uma_zalloc(namei_zone, M_WAITOK); 1639 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1640 goto out; 1641 } 1642 AUDIT_ARG_TEXT(syspath); 1643 restart: 1644 bwillwrite(); 1645 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1646 segflg, path2, fd, CAP_CREATE, td); 1647 if ((error = namei(&nd)) != 0) 1648 goto out; 1649 if (nd.ni_vp) { 1650 NDFREE(&nd, NDF_ONLY_PNBUF); 1651 if (nd.ni_vp == nd.ni_dvp) 1652 vrele(nd.ni_dvp); 1653 else 1654 vput(nd.ni_dvp); 1655 vrele(nd.ni_vp); 1656 error = EEXIST; 1657 goto out; 1658 } 1659 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1660 NDFREE(&nd, NDF_ONLY_PNBUF); 1661 vput(nd.ni_dvp); 1662 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1663 goto out; 1664 goto restart; 1665 } 1666 VATTR_NULL(&vattr); 1667 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1668 #ifdef MAC 1669 vattr.va_type = VLNK; 1670 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1671 &vattr); 1672 if (error) 1673 goto out2; 1674 #endif 1675 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1676 if (error == 0) 1677 vput(nd.ni_vp); 1678 #ifdef MAC 1679 out2: 1680 #endif 1681 NDFREE(&nd, NDF_ONLY_PNBUF); 1682 vput(nd.ni_dvp); 1683 vn_finished_write(mp); 1684 out: 1685 if (segflg != UIO_SYSSPACE) 1686 uma_zfree(namei_zone, syspath); 1687 return (error); 1688 } 1689 1690 /* 1691 * Delete a whiteout from the filesystem. 1692 */ 1693 int 1694 sys_undelete(td, uap) 1695 struct thread *td; 1696 register struct undelete_args /* { 1697 char *path; 1698 } */ *uap; 1699 { 1700 int error; 1701 struct mount *mp; 1702 struct nameidata nd; 1703 1704 restart: 1705 bwillwrite(); 1706 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1707 UIO_USERSPACE, uap->path, td); 1708 error = namei(&nd); 1709 if (error) 1710 return (error); 1711 1712 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1713 NDFREE(&nd, NDF_ONLY_PNBUF); 1714 if (nd.ni_vp == nd.ni_dvp) 1715 vrele(nd.ni_dvp); 1716 else 1717 vput(nd.ni_dvp); 1718 if (nd.ni_vp) 1719 vrele(nd.ni_vp); 1720 return (EEXIST); 1721 } 1722 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1723 NDFREE(&nd, NDF_ONLY_PNBUF); 1724 vput(nd.ni_dvp); 1725 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1726 return (error); 1727 goto restart; 1728 } 1729 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1730 NDFREE(&nd, NDF_ONLY_PNBUF); 1731 vput(nd.ni_dvp); 1732 vn_finished_write(mp); 1733 return (error); 1734 } 1735 1736 /* 1737 * Delete a name from the filesystem. 1738 */ 1739 #ifndef _SYS_SYSPROTO_H_ 1740 struct unlink_args { 1741 char *path; 1742 }; 1743 #endif 1744 int 1745 sys_unlink(td, uap) 1746 struct thread *td; 1747 struct unlink_args /* { 1748 char *path; 1749 } */ *uap; 1750 { 1751 1752 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1753 } 1754 1755 #ifndef _SYS_SYSPROTO_H_ 1756 struct unlinkat_args { 1757 int fd; 1758 char *path; 1759 int flag; 1760 }; 1761 #endif 1762 int 1763 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1764 { 1765 int flag = uap->flag; 1766 int fd = uap->fd; 1767 char *path = uap->path; 1768 1769 if (flag & ~AT_REMOVEDIR) 1770 return (EINVAL); 1771 1772 if (flag & AT_REMOVEDIR) 1773 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1774 else 1775 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1776 } 1777 1778 int 1779 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1780 { 1781 1782 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1783 } 1784 1785 int 1786 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1787 ino_t oldinum) 1788 { 1789 struct mount *mp; 1790 struct vnode *vp; 1791 int error; 1792 struct nameidata nd; 1793 struct stat sb; 1794 1795 restart: 1796 bwillwrite(); 1797 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1798 pathseg, path, fd, CAP_DELETE, td); 1799 if ((error = namei(&nd)) != 0) 1800 return (error == EINVAL ? EPERM : error); 1801 vp = nd.ni_vp; 1802 if (vp->v_type == VDIR && oldinum == 0) { 1803 error = EPERM; /* POSIX */ 1804 } else if (oldinum != 0 && 1805 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1806 sb.st_ino != oldinum) { 1807 error = EIDRM; /* Identifier removed */ 1808 } else { 1809 /* 1810 * The root of a mounted filesystem cannot be deleted. 1811 * 1812 * XXX: can this only be a VDIR case? 1813 */ 1814 if (vp->v_vflag & VV_ROOT) 1815 error = EBUSY; 1816 } 1817 if (error == 0) { 1818 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1819 NDFREE(&nd, NDF_ONLY_PNBUF); 1820 vput(nd.ni_dvp); 1821 if (vp == nd.ni_dvp) 1822 vrele(vp); 1823 else 1824 vput(vp); 1825 if ((error = vn_start_write(NULL, &mp, 1826 V_XSLEEP | PCATCH)) != 0) 1827 return (error); 1828 goto restart; 1829 } 1830 #ifdef MAC 1831 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1832 &nd.ni_cnd); 1833 if (error) 1834 goto out; 1835 #endif 1836 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1837 #ifdef MAC 1838 out: 1839 #endif 1840 vn_finished_write(mp); 1841 } 1842 NDFREE(&nd, NDF_ONLY_PNBUF); 1843 vput(nd.ni_dvp); 1844 if (vp == nd.ni_dvp) 1845 vrele(vp); 1846 else 1847 vput(vp); 1848 return (error); 1849 } 1850 1851 /* 1852 * Reposition read/write file offset. 1853 */ 1854 #ifndef _SYS_SYSPROTO_H_ 1855 struct lseek_args { 1856 int fd; 1857 int pad; 1858 off_t offset; 1859 int whence; 1860 }; 1861 #endif 1862 int 1863 sys_lseek(td, uap) 1864 struct thread *td; 1865 register struct lseek_args /* { 1866 int fd; 1867 int pad; 1868 off_t offset; 1869 int whence; 1870 } */ *uap; 1871 { 1872 struct ucred *cred = td->td_ucred; 1873 struct file *fp; 1874 struct vnode *vp; 1875 struct vattr vattr; 1876 off_t foffset, offset, size; 1877 int error, noneg; 1878 1879 AUDIT_ARG_FD(uap->fd); 1880 if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0) 1881 return (error); 1882 if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) { 1883 fdrop(fp, td); 1884 return (ESPIPE); 1885 } 1886 vp = fp->f_vnode; 1887 foffset = foffset_lock(fp, 0); 1888 noneg = (vp->v_type != VCHR); 1889 offset = uap->offset; 1890 switch (uap->whence) { 1891 case L_INCR: 1892 if (noneg && 1893 (foffset < 0 || 1894 (offset > 0 && foffset > OFF_MAX - offset))) { 1895 error = EOVERFLOW; 1896 break; 1897 } 1898 offset += foffset; 1899 break; 1900 case L_XTND: 1901 vn_lock(vp, LK_SHARED | LK_RETRY); 1902 error = VOP_GETATTR(vp, &vattr, cred); 1903 VOP_UNLOCK(vp, 0); 1904 if (error) 1905 break; 1906 1907 /* 1908 * If the file references a disk device, then fetch 1909 * the media size and use that to determine the ending 1910 * offset. 1911 */ 1912 if (vattr.va_size == 0 && vp->v_type == VCHR && 1913 fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0) 1914 vattr.va_size = size; 1915 if (noneg && 1916 (vattr.va_size > OFF_MAX || 1917 (offset > 0 && vattr.va_size > OFF_MAX - offset))) { 1918 error = EOVERFLOW; 1919 break; 1920 } 1921 offset += vattr.va_size; 1922 break; 1923 case L_SET: 1924 break; 1925 case SEEK_DATA: 1926 error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td); 1927 break; 1928 case SEEK_HOLE: 1929 error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td); 1930 break; 1931 default: 1932 error = EINVAL; 1933 } 1934 if (error == 0 && noneg && offset < 0) 1935 error = EINVAL; 1936 if (error != 0) 1937 goto drop; 1938 VFS_KNOTE_UNLOCKED(vp, 0); 1939 *(off_t *)(td->td_retval) = offset; 1940 drop: 1941 fdrop(fp, td); 1942 foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0); 1943 return (error); 1944 } 1945 1946 #if defined(COMPAT_43) 1947 /* 1948 * Reposition read/write file offset. 1949 */ 1950 #ifndef _SYS_SYSPROTO_H_ 1951 struct olseek_args { 1952 int fd; 1953 long offset; 1954 int whence; 1955 }; 1956 #endif 1957 int 1958 olseek(td, uap) 1959 struct thread *td; 1960 register struct olseek_args /* { 1961 int fd; 1962 long offset; 1963 int whence; 1964 } */ *uap; 1965 { 1966 struct lseek_args /* { 1967 int fd; 1968 int pad; 1969 off_t offset; 1970 int whence; 1971 } */ nuap; 1972 1973 nuap.fd = uap->fd; 1974 nuap.offset = uap->offset; 1975 nuap.whence = uap->whence; 1976 return (sys_lseek(td, &nuap)); 1977 } 1978 #endif /* COMPAT_43 */ 1979 1980 /* Version with the 'pad' argument */ 1981 int 1982 freebsd6_lseek(td, uap) 1983 struct thread *td; 1984 register struct freebsd6_lseek_args *uap; 1985 { 1986 struct lseek_args ouap; 1987 1988 ouap.fd = uap->fd; 1989 ouap.offset = uap->offset; 1990 ouap.whence = uap->whence; 1991 return (sys_lseek(td, &ouap)); 1992 } 1993 1994 /* 1995 * Check access permissions using passed credentials. 1996 */ 1997 static int 1998 vn_access(vp, user_flags, cred, td) 1999 struct vnode *vp; 2000 int user_flags; 2001 struct ucred *cred; 2002 struct thread *td; 2003 { 2004 int error; 2005 accmode_t accmode; 2006 2007 /* Flags == 0 means only check for existence. */ 2008 error = 0; 2009 if (user_flags) { 2010 accmode = 0; 2011 if (user_flags & R_OK) 2012 accmode |= VREAD; 2013 if (user_flags & W_OK) 2014 accmode |= VWRITE; 2015 if (user_flags & X_OK) 2016 accmode |= VEXEC; 2017 #ifdef MAC 2018 error = mac_vnode_check_access(cred, vp, accmode); 2019 if (error) 2020 return (error); 2021 #endif 2022 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2023 error = VOP_ACCESS(vp, accmode, cred, td); 2024 } 2025 return (error); 2026 } 2027 2028 /* 2029 * Check access permissions using "real" credentials. 2030 */ 2031 #ifndef _SYS_SYSPROTO_H_ 2032 struct access_args { 2033 char *path; 2034 int amode; 2035 }; 2036 #endif 2037 int 2038 sys_access(td, uap) 2039 struct thread *td; 2040 register struct access_args /* { 2041 char *path; 2042 int amode; 2043 } */ *uap; 2044 { 2045 2046 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2047 } 2048 2049 #ifndef _SYS_SYSPROTO_H_ 2050 struct faccessat_args { 2051 int dirfd; 2052 char *path; 2053 int amode; 2054 int flag; 2055 } 2056 #endif 2057 int 2058 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2059 { 2060 2061 if (uap->flag & ~AT_EACCESS) 2062 return (EINVAL); 2063 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2064 uap->amode)); 2065 } 2066 2067 int 2068 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2069 { 2070 2071 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2072 } 2073 2074 int 2075 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2076 int flag, int amode) 2077 { 2078 struct ucred *cred, *tmpcred; 2079 struct vnode *vp; 2080 struct nameidata nd; 2081 int error; 2082 2083 /* 2084 * Create and modify a temporary credential instead of one that 2085 * is potentially shared. 2086 */ 2087 if (!(flag & AT_EACCESS)) { 2088 cred = td->td_ucred; 2089 tmpcred = crdup(cred); 2090 tmpcred->cr_uid = cred->cr_ruid; 2091 tmpcred->cr_groups[0] = cred->cr_rgid; 2092 td->td_ucred = tmpcred; 2093 } else 2094 cred = tmpcred = td->td_ucred; 2095 AUDIT_ARG_VALUE(amode); 2096 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2097 AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td); 2098 if ((error = namei(&nd)) != 0) 2099 goto out1; 2100 vp = nd.ni_vp; 2101 2102 error = vn_access(vp, amode, tmpcred, td); 2103 NDFREE(&nd, NDF_ONLY_PNBUF); 2104 vput(vp); 2105 out1: 2106 if (!(flag & AT_EACCESS)) { 2107 td->td_ucred = cred; 2108 crfree(tmpcred); 2109 } 2110 return (error); 2111 } 2112 2113 /* 2114 * Check access permissions using "effective" credentials. 2115 */ 2116 #ifndef _SYS_SYSPROTO_H_ 2117 struct eaccess_args { 2118 char *path; 2119 int amode; 2120 }; 2121 #endif 2122 int 2123 sys_eaccess(td, uap) 2124 struct thread *td; 2125 register struct eaccess_args /* { 2126 char *path; 2127 int amode; 2128 } */ *uap; 2129 { 2130 2131 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2132 } 2133 2134 int 2135 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2136 { 2137 2138 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2139 } 2140 2141 #if defined(COMPAT_43) 2142 /* 2143 * Get file status; this version follows links. 2144 */ 2145 #ifndef _SYS_SYSPROTO_H_ 2146 struct ostat_args { 2147 char *path; 2148 struct ostat *ub; 2149 }; 2150 #endif 2151 int 2152 ostat(td, uap) 2153 struct thread *td; 2154 register struct ostat_args /* { 2155 char *path; 2156 struct ostat *ub; 2157 } */ *uap; 2158 { 2159 struct stat sb; 2160 struct ostat osb; 2161 int error; 2162 2163 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2164 if (error) 2165 return (error); 2166 cvtstat(&sb, &osb); 2167 error = copyout(&osb, uap->ub, sizeof (osb)); 2168 return (error); 2169 } 2170 2171 /* 2172 * Get file status; this version does not follow links. 2173 */ 2174 #ifndef _SYS_SYSPROTO_H_ 2175 struct olstat_args { 2176 char *path; 2177 struct ostat *ub; 2178 }; 2179 #endif 2180 int 2181 olstat(td, uap) 2182 struct thread *td; 2183 register struct olstat_args /* { 2184 char *path; 2185 struct ostat *ub; 2186 } */ *uap; 2187 { 2188 struct stat sb; 2189 struct ostat osb; 2190 int error; 2191 2192 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2193 if (error) 2194 return (error); 2195 cvtstat(&sb, &osb); 2196 error = copyout(&osb, uap->ub, sizeof (osb)); 2197 return (error); 2198 } 2199 2200 /* 2201 * Convert from an old to a new stat structure. 2202 */ 2203 void 2204 cvtstat(st, ost) 2205 struct stat *st; 2206 struct ostat *ost; 2207 { 2208 2209 ost->st_dev = st->st_dev; 2210 ost->st_ino = st->st_ino; 2211 ost->st_mode = st->st_mode; 2212 ost->st_nlink = st->st_nlink; 2213 ost->st_uid = st->st_uid; 2214 ost->st_gid = st->st_gid; 2215 ost->st_rdev = st->st_rdev; 2216 if (st->st_size < (quad_t)1 << 32) 2217 ost->st_size = st->st_size; 2218 else 2219 ost->st_size = -2; 2220 ost->st_atim = st->st_atim; 2221 ost->st_mtim = st->st_mtim; 2222 ost->st_ctim = st->st_ctim; 2223 ost->st_blksize = st->st_blksize; 2224 ost->st_blocks = st->st_blocks; 2225 ost->st_flags = st->st_flags; 2226 ost->st_gen = st->st_gen; 2227 } 2228 #endif /* COMPAT_43 */ 2229 2230 /* 2231 * Get file status; this version follows links. 2232 */ 2233 #ifndef _SYS_SYSPROTO_H_ 2234 struct stat_args { 2235 char *path; 2236 struct stat *ub; 2237 }; 2238 #endif 2239 int 2240 sys_stat(td, uap) 2241 struct thread *td; 2242 register struct stat_args /* { 2243 char *path; 2244 struct stat *ub; 2245 } */ *uap; 2246 { 2247 struct stat sb; 2248 int error; 2249 2250 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2251 if (error == 0) 2252 error = copyout(&sb, uap->ub, sizeof (sb)); 2253 return (error); 2254 } 2255 2256 #ifndef _SYS_SYSPROTO_H_ 2257 struct fstatat_args { 2258 int fd; 2259 char *path; 2260 struct stat *buf; 2261 int flag; 2262 } 2263 #endif 2264 int 2265 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2266 { 2267 struct stat sb; 2268 int error; 2269 2270 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2271 UIO_USERSPACE, &sb); 2272 if (error == 0) 2273 error = copyout(&sb, uap->buf, sizeof (sb)); 2274 return (error); 2275 } 2276 2277 int 2278 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2279 { 2280 2281 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2282 } 2283 2284 int 2285 kern_statat(struct thread *td, int flag, int fd, char *path, 2286 enum uio_seg pathseg, struct stat *sbp) 2287 { 2288 2289 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2290 } 2291 2292 int 2293 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2294 enum uio_seg pathseg, struct stat *sbp, 2295 void (*hook)(struct vnode *vp, struct stat *sbp)) 2296 { 2297 struct nameidata nd; 2298 struct stat sb; 2299 int error; 2300 2301 if (flag & ~AT_SYMLINK_NOFOLLOW) 2302 return (EINVAL); 2303 2304 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2305 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2306 CAP_FSTAT, td); 2307 2308 if ((error = namei(&nd)) != 0) 2309 return (error); 2310 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2311 if (!error) { 2312 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2313 if (S_ISREG(sb.st_mode)) 2314 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2315 if (__predict_false(hook != NULL)) 2316 hook(nd.ni_vp, &sb); 2317 } 2318 NDFREE(&nd, NDF_ONLY_PNBUF); 2319 vput(nd.ni_vp); 2320 if (error) 2321 return (error); 2322 *sbp = sb; 2323 #ifdef KTRACE 2324 if (KTRPOINT(td, KTR_STRUCT)) 2325 ktrstat(&sb); 2326 #endif 2327 return (0); 2328 } 2329 2330 /* 2331 * Get file status; this version does not follow links. 2332 */ 2333 #ifndef _SYS_SYSPROTO_H_ 2334 struct lstat_args { 2335 char *path; 2336 struct stat *ub; 2337 }; 2338 #endif 2339 int 2340 sys_lstat(td, uap) 2341 struct thread *td; 2342 register struct lstat_args /* { 2343 char *path; 2344 struct stat *ub; 2345 } */ *uap; 2346 { 2347 struct stat sb; 2348 int error; 2349 2350 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2351 if (error == 0) 2352 error = copyout(&sb, uap->ub, sizeof (sb)); 2353 return (error); 2354 } 2355 2356 int 2357 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2358 { 2359 2360 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2361 sbp)); 2362 } 2363 2364 /* 2365 * Implementation of the NetBSD [l]stat() functions. 2366 */ 2367 void 2368 cvtnstat(sb, nsb) 2369 struct stat *sb; 2370 struct nstat *nsb; 2371 { 2372 bzero(nsb, sizeof *nsb); 2373 nsb->st_dev = sb->st_dev; 2374 nsb->st_ino = sb->st_ino; 2375 nsb->st_mode = sb->st_mode; 2376 nsb->st_nlink = sb->st_nlink; 2377 nsb->st_uid = sb->st_uid; 2378 nsb->st_gid = sb->st_gid; 2379 nsb->st_rdev = sb->st_rdev; 2380 nsb->st_atim = sb->st_atim; 2381 nsb->st_mtim = sb->st_mtim; 2382 nsb->st_ctim = sb->st_ctim; 2383 nsb->st_size = sb->st_size; 2384 nsb->st_blocks = sb->st_blocks; 2385 nsb->st_blksize = sb->st_blksize; 2386 nsb->st_flags = sb->st_flags; 2387 nsb->st_gen = sb->st_gen; 2388 nsb->st_birthtim = sb->st_birthtim; 2389 } 2390 2391 #ifndef _SYS_SYSPROTO_H_ 2392 struct nstat_args { 2393 char *path; 2394 struct nstat *ub; 2395 }; 2396 #endif 2397 int 2398 sys_nstat(td, uap) 2399 struct thread *td; 2400 register struct nstat_args /* { 2401 char *path; 2402 struct nstat *ub; 2403 } */ *uap; 2404 { 2405 struct stat sb; 2406 struct nstat nsb; 2407 int error; 2408 2409 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2410 if (error) 2411 return (error); 2412 cvtnstat(&sb, &nsb); 2413 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2414 return (error); 2415 } 2416 2417 /* 2418 * NetBSD lstat. Get file status; this version does not follow links. 2419 */ 2420 #ifndef _SYS_SYSPROTO_H_ 2421 struct lstat_args { 2422 char *path; 2423 struct stat *ub; 2424 }; 2425 #endif 2426 int 2427 sys_nlstat(td, uap) 2428 struct thread *td; 2429 register struct nlstat_args /* { 2430 char *path; 2431 struct nstat *ub; 2432 } */ *uap; 2433 { 2434 struct stat sb; 2435 struct nstat nsb; 2436 int error; 2437 2438 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2439 if (error) 2440 return (error); 2441 cvtnstat(&sb, &nsb); 2442 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2443 return (error); 2444 } 2445 2446 /* 2447 * Get configurable pathname variables. 2448 */ 2449 #ifndef _SYS_SYSPROTO_H_ 2450 struct pathconf_args { 2451 char *path; 2452 int name; 2453 }; 2454 #endif 2455 int 2456 sys_pathconf(td, uap) 2457 struct thread *td; 2458 register struct pathconf_args /* { 2459 char *path; 2460 int name; 2461 } */ *uap; 2462 { 2463 2464 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2465 } 2466 2467 #ifndef _SYS_SYSPROTO_H_ 2468 struct lpathconf_args { 2469 char *path; 2470 int name; 2471 }; 2472 #endif 2473 int 2474 sys_lpathconf(td, uap) 2475 struct thread *td; 2476 register struct lpathconf_args /* { 2477 char *path; 2478 int name; 2479 } */ *uap; 2480 { 2481 2482 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2483 NOFOLLOW)); 2484 } 2485 2486 int 2487 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2488 u_long flags) 2489 { 2490 struct nameidata nd; 2491 int error; 2492 2493 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2494 pathseg, path, td); 2495 if ((error = namei(&nd)) != 0) 2496 return (error); 2497 NDFREE(&nd, NDF_ONLY_PNBUF); 2498 2499 /* If asynchronous I/O is available, it works for all files. */ 2500 if (name == _PC_ASYNC_IO) 2501 td->td_retval[0] = async_io_version; 2502 else 2503 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2504 vput(nd.ni_vp); 2505 return (error); 2506 } 2507 2508 /* 2509 * Return target name of a symbolic link. 2510 */ 2511 #ifndef _SYS_SYSPROTO_H_ 2512 struct readlink_args { 2513 char *path; 2514 char *buf; 2515 size_t count; 2516 }; 2517 #endif 2518 int 2519 sys_readlink(td, uap) 2520 struct thread *td; 2521 register struct readlink_args /* { 2522 char *path; 2523 char *buf; 2524 size_t count; 2525 } */ *uap; 2526 { 2527 2528 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2529 UIO_USERSPACE, uap->count)); 2530 } 2531 #ifndef _SYS_SYSPROTO_H_ 2532 struct readlinkat_args { 2533 int fd; 2534 char *path; 2535 char *buf; 2536 size_t bufsize; 2537 }; 2538 #endif 2539 int 2540 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2541 { 2542 2543 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2544 uap->buf, UIO_USERSPACE, uap->bufsize)); 2545 } 2546 2547 int 2548 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2549 enum uio_seg bufseg, size_t count) 2550 { 2551 2552 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2553 count)); 2554 } 2555 2556 int 2557 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2558 char *buf, enum uio_seg bufseg, size_t count) 2559 { 2560 struct vnode *vp; 2561 struct iovec aiov; 2562 struct uio auio; 2563 int error; 2564 struct nameidata nd; 2565 2566 if (count > IOSIZE_MAX) 2567 return (EINVAL); 2568 2569 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2570 pathseg, path, fd, td); 2571 2572 if ((error = namei(&nd)) != 0) 2573 return (error); 2574 NDFREE(&nd, NDF_ONLY_PNBUF); 2575 vp = nd.ni_vp; 2576 #ifdef MAC 2577 error = mac_vnode_check_readlink(td->td_ucred, vp); 2578 if (error) { 2579 vput(vp); 2580 return (error); 2581 } 2582 #endif 2583 if (vp->v_type != VLNK) 2584 error = EINVAL; 2585 else { 2586 aiov.iov_base = buf; 2587 aiov.iov_len = count; 2588 auio.uio_iov = &aiov; 2589 auio.uio_iovcnt = 1; 2590 auio.uio_offset = 0; 2591 auio.uio_rw = UIO_READ; 2592 auio.uio_segflg = bufseg; 2593 auio.uio_td = td; 2594 auio.uio_resid = count; 2595 error = VOP_READLINK(vp, &auio, td->td_ucred); 2596 } 2597 vput(vp); 2598 td->td_retval[0] = count - auio.uio_resid; 2599 return (error); 2600 } 2601 2602 /* 2603 * Common implementation code for chflags() and fchflags(). 2604 */ 2605 static int 2606 setfflags(td, vp, flags) 2607 struct thread *td; 2608 struct vnode *vp; 2609 int flags; 2610 { 2611 int error; 2612 struct mount *mp; 2613 struct vattr vattr; 2614 2615 /* We can't support the value matching VNOVAL. */ 2616 if (flags == VNOVAL) 2617 return (EOPNOTSUPP); 2618 2619 /* 2620 * Prevent non-root users from setting flags on devices. When 2621 * a device is reused, users can retain ownership of the device 2622 * if they are allowed to set flags and programs assume that 2623 * chown can't fail when done as root. 2624 */ 2625 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2626 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2627 if (error) 2628 return (error); 2629 } 2630 2631 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2632 return (error); 2633 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2634 VATTR_NULL(&vattr); 2635 vattr.va_flags = flags; 2636 #ifdef MAC 2637 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2638 if (error == 0) 2639 #endif 2640 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2641 VOP_UNLOCK(vp, 0); 2642 vn_finished_write(mp); 2643 return (error); 2644 } 2645 2646 /* 2647 * Change flags of a file given a path name. 2648 */ 2649 #ifndef _SYS_SYSPROTO_H_ 2650 struct chflags_args { 2651 char *path; 2652 int flags; 2653 }; 2654 #endif 2655 int 2656 sys_chflags(td, uap) 2657 struct thread *td; 2658 register struct chflags_args /* { 2659 char *path; 2660 int flags; 2661 } */ *uap; 2662 { 2663 int error; 2664 struct nameidata nd; 2665 2666 AUDIT_ARG_FFLAGS(uap->flags); 2667 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->path, td); 2668 if ((error = namei(&nd)) != 0) 2669 return (error); 2670 NDFREE(&nd, NDF_ONLY_PNBUF); 2671 error = setfflags(td, nd.ni_vp, uap->flags); 2672 vrele(nd.ni_vp); 2673 return (error); 2674 } 2675 2676 /* 2677 * Same as chflags() but doesn't follow symlinks. 2678 */ 2679 int 2680 sys_lchflags(td, uap) 2681 struct thread *td; 2682 register struct lchflags_args /* { 2683 char *path; 2684 int flags; 2685 } */ *uap; 2686 { 2687 int error; 2688 struct nameidata nd; 2689 2690 AUDIT_ARG_FFLAGS(uap->flags); 2691 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->path, 2692 td); 2693 if ((error = namei(&nd)) != 0) 2694 return (error); 2695 NDFREE(&nd, NDF_ONLY_PNBUF); 2696 error = setfflags(td, nd.ni_vp, uap->flags); 2697 vrele(nd.ni_vp); 2698 return (error); 2699 } 2700 2701 /* 2702 * Change flags of a file given a file descriptor. 2703 */ 2704 #ifndef _SYS_SYSPROTO_H_ 2705 struct fchflags_args { 2706 int fd; 2707 int flags; 2708 }; 2709 #endif 2710 int 2711 sys_fchflags(td, uap) 2712 struct thread *td; 2713 register struct fchflags_args /* { 2714 int fd; 2715 int flags; 2716 } */ *uap; 2717 { 2718 struct file *fp; 2719 int error; 2720 2721 AUDIT_ARG_FD(uap->fd); 2722 AUDIT_ARG_FFLAGS(uap->flags); 2723 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS, 2724 &fp)) != 0) 2725 return (error); 2726 #ifdef AUDIT 2727 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2728 AUDIT_ARG_VNODE1(fp->f_vnode); 2729 VOP_UNLOCK(fp->f_vnode, 0); 2730 #endif 2731 error = setfflags(td, fp->f_vnode, uap->flags); 2732 fdrop(fp, td); 2733 return (error); 2734 } 2735 2736 /* 2737 * Common implementation code for chmod(), lchmod() and fchmod(). 2738 */ 2739 int 2740 setfmode(td, cred, vp, mode) 2741 struct thread *td; 2742 struct ucred *cred; 2743 struct vnode *vp; 2744 int mode; 2745 { 2746 int error; 2747 struct mount *mp; 2748 struct vattr vattr; 2749 2750 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2751 return (error); 2752 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2753 VATTR_NULL(&vattr); 2754 vattr.va_mode = mode & ALLPERMS; 2755 #ifdef MAC 2756 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2757 if (error == 0) 2758 #endif 2759 error = VOP_SETATTR(vp, &vattr, cred); 2760 VOP_UNLOCK(vp, 0); 2761 vn_finished_write(mp); 2762 return (error); 2763 } 2764 2765 /* 2766 * Change mode of a file given path name. 2767 */ 2768 #ifndef _SYS_SYSPROTO_H_ 2769 struct chmod_args { 2770 char *path; 2771 int mode; 2772 }; 2773 #endif 2774 int 2775 sys_chmod(td, uap) 2776 struct thread *td; 2777 register struct chmod_args /* { 2778 char *path; 2779 int mode; 2780 } */ *uap; 2781 { 2782 2783 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2784 } 2785 2786 #ifndef _SYS_SYSPROTO_H_ 2787 struct fchmodat_args { 2788 int dirfd; 2789 char *path; 2790 mode_t mode; 2791 int flag; 2792 } 2793 #endif 2794 int 2795 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2796 { 2797 int flag = uap->flag; 2798 int fd = uap->fd; 2799 char *path = uap->path; 2800 mode_t mode = uap->mode; 2801 2802 if (flag & ~AT_SYMLINK_NOFOLLOW) 2803 return (EINVAL); 2804 2805 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2806 } 2807 2808 int 2809 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2810 { 2811 2812 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2813 } 2814 2815 /* 2816 * Change mode of a file given path name (don't follow links.) 2817 */ 2818 #ifndef _SYS_SYSPROTO_H_ 2819 struct lchmod_args { 2820 char *path; 2821 int mode; 2822 }; 2823 #endif 2824 int 2825 sys_lchmod(td, uap) 2826 struct thread *td; 2827 register struct lchmod_args /* { 2828 char *path; 2829 int mode; 2830 } */ *uap; 2831 { 2832 2833 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2834 uap->mode, AT_SYMLINK_NOFOLLOW)); 2835 } 2836 2837 2838 int 2839 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2840 mode_t mode, int flag) 2841 { 2842 int error; 2843 struct nameidata nd; 2844 int follow; 2845 2846 AUDIT_ARG_MODE(mode); 2847 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2848 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2849 CAP_FCHMOD, td); 2850 if ((error = namei(&nd)) != 0) 2851 return (error); 2852 NDFREE(&nd, NDF_ONLY_PNBUF); 2853 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2854 vrele(nd.ni_vp); 2855 return (error); 2856 } 2857 2858 /* 2859 * Change mode of a file given a file descriptor. 2860 */ 2861 #ifndef _SYS_SYSPROTO_H_ 2862 struct fchmod_args { 2863 int fd; 2864 int mode; 2865 }; 2866 #endif 2867 int 2868 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2869 { 2870 struct file *fp; 2871 int error; 2872 2873 AUDIT_ARG_FD(uap->fd); 2874 AUDIT_ARG_MODE(uap->mode); 2875 2876 error = fget(td, uap->fd, CAP_FCHMOD, &fp); 2877 if (error != 0) 2878 return (error); 2879 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2880 fdrop(fp, td); 2881 return (error); 2882 } 2883 2884 /* 2885 * Common implementation for chown(), lchown(), and fchown() 2886 */ 2887 int 2888 setfown(td, cred, vp, uid, gid) 2889 struct thread *td; 2890 struct ucred *cred; 2891 struct vnode *vp; 2892 uid_t uid; 2893 gid_t gid; 2894 { 2895 int error; 2896 struct mount *mp; 2897 struct vattr vattr; 2898 2899 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2900 return (error); 2901 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2902 VATTR_NULL(&vattr); 2903 vattr.va_uid = uid; 2904 vattr.va_gid = gid; 2905 #ifdef MAC 2906 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2907 vattr.va_gid); 2908 if (error == 0) 2909 #endif 2910 error = VOP_SETATTR(vp, &vattr, cred); 2911 VOP_UNLOCK(vp, 0); 2912 vn_finished_write(mp); 2913 return (error); 2914 } 2915 2916 /* 2917 * Set ownership given a path name. 2918 */ 2919 #ifndef _SYS_SYSPROTO_H_ 2920 struct chown_args { 2921 char *path; 2922 int uid; 2923 int gid; 2924 }; 2925 #endif 2926 int 2927 sys_chown(td, uap) 2928 struct thread *td; 2929 register struct chown_args /* { 2930 char *path; 2931 int uid; 2932 int gid; 2933 } */ *uap; 2934 { 2935 2936 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2937 } 2938 2939 #ifndef _SYS_SYSPROTO_H_ 2940 struct fchownat_args { 2941 int fd; 2942 const char * path; 2943 uid_t uid; 2944 gid_t gid; 2945 int flag; 2946 }; 2947 #endif 2948 int 2949 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2950 { 2951 int flag; 2952 2953 flag = uap->flag; 2954 if (flag & ~AT_SYMLINK_NOFOLLOW) 2955 return (EINVAL); 2956 2957 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2958 uap->gid, uap->flag)); 2959 } 2960 2961 int 2962 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2963 int gid) 2964 { 2965 2966 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2967 } 2968 2969 int 2970 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2971 int uid, int gid, int flag) 2972 { 2973 struct nameidata nd; 2974 int error, follow; 2975 2976 AUDIT_ARG_OWNER(uid, gid); 2977 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2978 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2979 CAP_FCHOWN, td); 2980 2981 if ((error = namei(&nd)) != 0) 2982 return (error); 2983 NDFREE(&nd, NDF_ONLY_PNBUF); 2984 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2985 vrele(nd.ni_vp); 2986 return (error); 2987 } 2988 2989 /* 2990 * Set ownership given a path name, do not cross symlinks. 2991 */ 2992 #ifndef _SYS_SYSPROTO_H_ 2993 struct lchown_args { 2994 char *path; 2995 int uid; 2996 int gid; 2997 }; 2998 #endif 2999 int 3000 sys_lchown(td, uap) 3001 struct thread *td; 3002 register struct lchown_args /* { 3003 char *path; 3004 int uid; 3005 int gid; 3006 } */ *uap; 3007 { 3008 3009 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3010 } 3011 3012 int 3013 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3014 int gid) 3015 { 3016 3017 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3018 AT_SYMLINK_NOFOLLOW)); 3019 } 3020 3021 /* 3022 * Set ownership given a file descriptor. 3023 */ 3024 #ifndef _SYS_SYSPROTO_H_ 3025 struct fchown_args { 3026 int fd; 3027 int uid; 3028 int gid; 3029 }; 3030 #endif 3031 int 3032 sys_fchown(td, uap) 3033 struct thread *td; 3034 register struct fchown_args /* { 3035 int fd; 3036 int uid; 3037 int gid; 3038 } */ *uap; 3039 { 3040 struct file *fp; 3041 int error; 3042 3043 AUDIT_ARG_FD(uap->fd); 3044 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3045 error = fget(td, uap->fd, CAP_FCHOWN, &fp); 3046 if (error != 0) 3047 return (error); 3048 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3049 fdrop(fp, td); 3050 return (error); 3051 } 3052 3053 /* 3054 * Common implementation code for utimes(), lutimes(), and futimes(). 3055 */ 3056 static int 3057 getutimes(usrtvp, tvpseg, tsp) 3058 const struct timeval *usrtvp; 3059 enum uio_seg tvpseg; 3060 struct timespec *tsp; 3061 { 3062 struct timeval tv[2]; 3063 const struct timeval *tvp; 3064 int error; 3065 3066 if (usrtvp == NULL) { 3067 vfs_timestamp(&tsp[0]); 3068 tsp[1] = tsp[0]; 3069 } else { 3070 if (tvpseg == UIO_SYSSPACE) { 3071 tvp = usrtvp; 3072 } else { 3073 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3074 return (error); 3075 tvp = tv; 3076 } 3077 3078 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3079 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3080 return (EINVAL); 3081 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3082 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3083 } 3084 return (0); 3085 } 3086 3087 /* 3088 * Common implementation code for utimes(), lutimes(), and futimes(). 3089 */ 3090 static int 3091 setutimes(td, vp, ts, numtimes, nullflag) 3092 struct thread *td; 3093 struct vnode *vp; 3094 const struct timespec *ts; 3095 int numtimes; 3096 int nullflag; 3097 { 3098 int error, setbirthtime; 3099 struct mount *mp; 3100 struct vattr vattr; 3101 3102 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3103 return (error); 3104 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3105 setbirthtime = 0; 3106 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3107 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3108 setbirthtime = 1; 3109 VATTR_NULL(&vattr); 3110 vattr.va_atime = ts[0]; 3111 vattr.va_mtime = ts[1]; 3112 if (setbirthtime) 3113 vattr.va_birthtime = ts[1]; 3114 if (numtimes > 2) 3115 vattr.va_birthtime = ts[2]; 3116 if (nullflag) 3117 vattr.va_vaflags |= VA_UTIMES_NULL; 3118 #ifdef MAC 3119 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3120 vattr.va_mtime); 3121 #endif 3122 if (error == 0) 3123 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3124 VOP_UNLOCK(vp, 0); 3125 vn_finished_write(mp); 3126 return (error); 3127 } 3128 3129 /* 3130 * Set the access and modification times of a file. 3131 */ 3132 #ifndef _SYS_SYSPROTO_H_ 3133 struct utimes_args { 3134 char *path; 3135 struct timeval *tptr; 3136 }; 3137 #endif 3138 int 3139 sys_utimes(td, uap) 3140 struct thread *td; 3141 register struct utimes_args /* { 3142 char *path; 3143 struct timeval *tptr; 3144 } */ *uap; 3145 { 3146 3147 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3148 UIO_USERSPACE)); 3149 } 3150 3151 #ifndef _SYS_SYSPROTO_H_ 3152 struct futimesat_args { 3153 int fd; 3154 const char * path; 3155 const struct timeval * times; 3156 }; 3157 #endif 3158 int 3159 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3160 { 3161 3162 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3163 uap->times, UIO_USERSPACE)); 3164 } 3165 3166 int 3167 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3168 struct timeval *tptr, enum uio_seg tptrseg) 3169 { 3170 3171 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3172 } 3173 3174 int 3175 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3176 struct timeval *tptr, enum uio_seg tptrseg) 3177 { 3178 struct nameidata nd; 3179 struct timespec ts[2]; 3180 int error; 3181 3182 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3183 return (error); 3184 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3185 CAP_FUTIMES, td); 3186 3187 if ((error = namei(&nd)) != 0) 3188 return (error); 3189 NDFREE(&nd, NDF_ONLY_PNBUF); 3190 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3191 vrele(nd.ni_vp); 3192 return (error); 3193 } 3194 3195 /* 3196 * Set the access and modification times of a file. 3197 */ 3198 #ifndef _SYS_SYSPROTO_H_ 3199 struct lutimes_args { 3200 char *path; 3201 struct timeval *tptr; 3202 }; 3203 #endif 3204 int 3205 sys_lutimes(td, uap) 3206 struct thread *td; 3207 register struct lutimes_args /* { 3208 char *path; 3209 struct timeval *tptr; 3210 } */ *uap; 3211 { 3212 3213 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3214 UIO_USERSPACE)); 3215 } 3216 3217 int 3218 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3219 struct timeval *tptr, enum uio_seg tptrseg) 3220 { 3221 struct timespec ts[2]; 3222 int error; 3223 struct nameidata nd; 3224 3225 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3226 return (error); 3227 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3228 if ((error = namei(&nd)) != 0) 3229 return (error); 3230 NDFREE(&nd, NDF_ONLY_PNBUF); 3231 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3232 vrele(nd.ni_vp); 3233 return (error); 3234 } 3235 3236 /* 3237 * Set the access and modification times of a file. 3238 */ 3239 #ifndef _SYS_SYSPROTO_H_ 3240 struct futimes_args { 3241 int fd; 3242 struct timeval *tptr; 3243 }; 3244 #endif 3245 int 3246 sys_futimes(td, uap) 3247 struct thread *td; 3248 register struct futimes_args /* { 3249 int fd; 3250 struct timeval *tptr; 3251 } */ *uap; 3252 { 3253 3254 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3255 } 3256 3257 int 3258 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3259 enum uio_seg tptrseg) 3260 { 3261 struct timespec ts[2]; 3262 struct file *fp; 3263 int error; 3264 3265 AUDIT_ARG_FD(fd); 3266 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3267 return (error); 3268 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp)) 3269 != 0) 3270 return (error); 3271 #ifdef AUDIT 3272 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3273 AUDIT_ARG_VNODE1(fp->f_vnode); 3274 VOP_UNLOCK(fp->f_vnode, 0); 3275 #endif 3276 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3277 fdrop(fp, td); 3278 return (error); 3279 } 3280 3281 /* 3282 * Truncate a file given its path name. 3283 */ 3284 #ifndef _SYS_SYSPROTO_H_ 3285 struct truncate_args { 3286 char *path; 3287 int pad; 3288 off_t length; 3289 }; 3290 #endif 3291 int 3292 sys_truncate(td, uap) 3293 struct thread *td; 3294 register struct truncate_args /* { 3295 char *path; 3296 int pad; 3297 off_t length; 3298 } */ *uap; 3299 { 3300 3301 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3302 } 3303 3304 int 3305 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3306 { 3307 struct mount *mp; 3308 struct vnode *vp; 3309 void *rl_cookie; 3310 struct vattr vattr; 3311 struct nameidata nd; 3312 int error; 3313 3314 if (length < 0) 3315 return(EINVAL); 3316 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3317 if ((error = namei(&nd)) != 0) 3318 return (error); 3319 vp = nd.ni_vp; 3320 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3321 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3322 vn_rangelock_unlock(vp, rl_cookie); 3323 vrele(vp); 3324 return (error); 3325 } 3326 NDFREE(&nd, NDF_ONLY_PNBUF); 3327 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3328 if (vp->v_type == VDIR) 3329 error = EISDIR; 3330 #ifdef MAC 3331 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3332 } 3333 #endif 3334 else if ((error = vn_writechk(vp)) == 0 && 3335 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3336 VATTR_NULL(&vattr); 3337 vattr.va_size = length; 3338 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3339 } 3340 VOP_UNLOCK(vp, 0); 3341 vn_finished_write(mp); 3342 vn_rangelock_unlock(vp, rl_cookie); 3343 vrele(vp); 3344 return (error); 3345 } 3346 3347 #if defined(COMPAT_43) 3348 /* 3349 * Truncate a file given its path name. 3350 */ 3351 #ifndef _SYS_SYSPROTO_H_ 3352 struct otruncate_args { 3353 char *path; 3354 long length; 3355 }; 3356 #endif 3357 int 3358 otruncate(td, uap) 3359 struct thread *td; 3360 register struct otruncate_args /* { 3361 char *path; 3362 long length; 3363 } */ *uap; 3364 { 3365 struct truncate_args /* { 3366 char *path; 3367 int pad; 3368 off_t length; 3369 } */ nuap; 3370 3371 nuap.path = uap->path; 3372 nuap.length = uap->length; 3373 return (sys_truncate(td, &nuap)); 3374 } 3375 #endif /* COMPAT_43 */ 3376 3377 /* Versions with the pad argument */ 3378 int 3379 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3380 { 3381 struct truncate_args ouap; 3382 3383 ouap.path = uap->path; 3384 ouap.length = uap->length; 3385 return (sys_truncate(td, &ouap)); 3386 } 3387 3388 int 3389 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3390 { 3391 struct ftruncate_args ouap; 3392 3393 ouap.fd = uap->fd; 3394 ouap.length = uap->length; 3395 return (sys_ftruncate(td, &ouap)); 3396 } 3397 3398 /* 3399 * Sync an open file. 3400 */ 3401 #ifndef _SYS_SYSPROTO_H_ 3402 struct fsync_args { 3403 int fd; 3404 }; 3405 #endif 3406 int 3407 sys_fsync(td, uap) 3408 struct thread *td; 3409 struct fsync_args /* { 3410 int fd; 3411 } */ *uap; 3412 { 3413 struct vnode *vp; 3414 struct mount *mp; 3415 struct file *fp; 3416 int error, lock_flags; 3417 3418 AUDIT_ARG_FD(uap->fd); 3419 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC, 3420 &fp)) != 0) 3421 return (error); 3422 vp = fp->f_vnode; 3423 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3424 goto drop; 3425 if (MNT_SHARED_WRITES(mp) || 3426 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3427 lock_flags = LK_SHARED; 3428 } else { 3429 lock_flags = LK_EXCLUSIVE; 3430 } 3431 vn_lock(vp, lock_flags | LK_RETRY); 3432 AUDIT_ARG_VNODE1(vp); 3433 if (vp->v_object != NULL) { 3434 VM_OBJECT_LOCK(vp->v_object); 3435 vm_object_page_clean(vp->v_object, 0, 0, 0); 3436 VM_OBJECT_UNLOCK(vp->v_object); 3437 } 3438 error = VOP_FSYNC(vp, MNT_WAIT, td); 3439 3440 VOP_UNLOCK(vp, 0); 3441 vn_finished_write(mp); 3442 drop: 3443 fdrop(fp, td); 3444 return (error); 3445 } 3446 3447 /* 3448 * Rename files. Source and destination must either both be directories, or 3449 * both not be directories. If target is a directory, it must be empty. 3450 */ 3451 #ifndef _SYS_SYSPROTO_H_ 3452 struct rename_args { 3453 char *from; 3454 char *to; 3455 }; 3456 #endif 3457 int 3458 sys_rename(td, uap) 3459 struct thread *td; 3460 register struct rename_args /* { 3461 char *from; 3462 char *to; 3463 } */ *uap; 3464 { 3465 3466 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3467 } 3468 3469 #ifndef _SYS_SYSPROTO_H_ 3470 struct renameat_args { 3471 int oldfd; 3472 char *old; 3473 int newfd; 3474 char *new; 3475 }; 3476 #endif 3477 int 3478 sys_renameat(struct thread *td, struct renameat_args *uap) 3479 { 3480 3481 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3482 UIO_USERSPACE)); 3483 } 3484 3485 int 3486 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3487 { 3488 3489 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3490 } 3491 3492 int 3493 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3494 enum uio_seg pathseg) 3495 { 3496 struct mount *mp = NULL; 3497 struct vnode *tvp, *fvp, *tdvp; 3498 struct nameidata fromnd, tond; 3499 int error; 3500 3501 bwillwrite(); 3502 #ifdef MAC 3503 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3504 AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); 3505 #else 3506 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3507 pathseg, old, oldfd, CAP_DELETE, td); 3508 #endif 3509 3510 if ((error = namei(&fromnd)) != 0) 3511 return (error); 3512 #ifdef MAC 3513 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3514 fromnd.ni_vp, &fromnd.ni_cnd); 3515 VOP_UNLOCK(fromnd.ni_dvp, 0); 3516 if (fromnd.ni_dvp != fromnd.ni_vp) 3517 VOP_UNLOCK(fromnd.ni_vp, 0); 3518 #endif 3519 fvp = fromnd.ni_vp; 3520 if (error == 0) 3521 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); 3522 if (error != 0) { 3523 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3524 vrele(fromnd.ni_dvp); 3525 vrele(fvp); 3526 goto out1; 3527 } 3528 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3529 SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE, 3530 td); 3531 if (fromnd.ni_vp->v_type == VDIR) 3532 tond.ni_cnd.cn_flags |= WILLBEDIR; 3533 if ((error = namei(&tond)) != 0) { 3534 /* Translate error code for rename("dir1", "dir2/."). */ 3535 if (error == EISDIR && fvp->v_type == VDIR) 3536 error = EINVAL; 3537 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3538 vrele(fromnd.ni_dvp); 3539 vrele(fvp); 3540 vn_finished_write(mp); 3541 goto out1; 3542 } 3543 tdvp = tond.ni_dvp; 3544 tvp = tond.ni_vp; 3545 if (tvp != NULL) { 3546 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3547 error = ENOTDIR; 3548 goto out; 3549 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3550 error = EISDIR; 3551 goto out; 3552 } 3553 } 3554 if (fvp == tdvp) { 3555 error = EINVAL; 3556 goto out; 3557 } 3558 /* 3559 * If the source is the same as the destination (that is, if they 3560 * are links to the same vnode), then there is nothing to do. 3561 */ 3562 if (fvp == tvp) 3563 error = -1; 3564 #ifdef MAC 3565 else 3566 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3567 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3568 #endif 3569 out: 3570 if (!error) { 3571 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3572 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3573 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3574 NDFREE(&tond, NDF_ONLY_PNBUF); 3575 } else { 3576 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3577 NDFREE(&tond, NDF_ONLY_PNBUF); 3578 if (tvp) 3579 vput(tvp); 3580 if (tdvp == tvp) 3581 vrele(tdvp); 3582 else 3583 vput(tdvp); 3584 vrele(fromnd.ni_dvp); 3585 vrele(fvp); 3586 } 3587 vrele(tond.ni_startdir); 3588 vn_finished_write(mp); 3589 out1: 3590 if (fromnd.ni_startdir) 3591 vrele(fromnd.ni_startdir); 3592 if (error == -1) 3593 return (0); 3594 return (error); 3595 } 3596 3597 /* 3598 * Make a directory file. 3599 */ 3600 #ifndef _SYS_SYSPROTO_H_ 3601 struct mkdir_args { 3602 char *path; 3603 int mode; 3604 }; 3605 #endif 3606 int 3607 sys_mkdir(td, uap) 3608 struct thread *td; 3609 register struct mkdir_args /* { 3610 char *path; 3611 int mode; 3612 } */ *uap; 3613 { 3614 3615 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3616 } 3617 3618 #ifndef _SYS_SYSPROTO_H_ 3619 struct mkdirat_args { 3620 int fd; 3621 char *path; 3622 mode_t mode; 3623 }; 3624 #endif 3625 int 3626 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3627 { 3628 3629 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3630 } 3631 3632 int 3633 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3634 { 3635 3636 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3637 } 3638 3639 int 3640 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3641 int mode) 3642 { 3643 struct mount *mp; 3644 struct vnode *vp; 3645 struct vattr vattr; 3646 int error; 3647 struct nameidata nd; 3648 3649 AUDIT_ARG_MODE(mode); 3650 restart: 3651 bwillwrite(); 3652 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 3653 segflg, path, fd, CAP_MKDIR, td); 3654 nd.ni_cnd.cn_flags |= WILLBEDIR; 3655 if ((error = namei(&nd)) != 0) 3656 return (error); 3657 vp = nd.ni_vp; 3658 if (vp != NULL) { 3659 NDFREE(&nd, NDF_ONLY_PNBUF); 3660 /* 3661 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3662 * the strange behaviour of leaving the vnode unlocked 3663 * if the target is the same vnode as the parent. 3664 */ 3665 if (vp == nd.ni_dvp) 3666 vrele(nd.ni_dvp); 3667 else 3668 vput(nd.ni_dvp); 3669 vrele(vp); 3670 return (EEXIST); 3671 } 3672 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3673 NDFREE(&nd, NDF_ONLY_PNBUF); 3674 vput(nd.ni_dvp); 3675 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3676 return (error); 3677 goto restart; 3678 } 3679 VATTR_NULL(&vattr); 3680 vattr.va_type = VDIR; 3681 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3682 #ifdef MAC 3683 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3684 &vattr); 3685 if (error) 3686 goto out; 3687 #endif 3688 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3689 #ifdef MAC 3690 out: 3691 #endif 3692 NDFREE(&nd, NDF_ONLY_PNBUF); 3693 vput(nd.ni_dvp); 3694 if (!error) 3695 vput(nd.ni_vp); 3696 vn_finished_write(mp); 3697 return (error); 3698 } 3699 3700 /* 3701 * Remove a directory file. 3702 */ 3703 #ifndef _SYS_SYSPROTO_H_ 3704 struct rmdir_args { 3705 char *path; 3706 }; 3707 #endif 3708 int 3709 sys_rmdir(td, uap) 3710 struct thread *td; 3711 struct rmdir_args /* { 3712 char *path; 3713 } */ *uap; 3714 { 3715 3716 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3717 } 3718 3719 int 3720 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3721 { 3722 3723 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3724 } 3725 3726 int 3727 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3728 { 3729 struct mount *mp; 3730 struct vnode *vp; 3731 int error; 3732 struct nameidata nd; 3733 3734 restart: 3735 bwillwrite(); 3736 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3737 pathseg, path, fd, CAP_RMDIR, td); 3738 if ((error = namei(&nd)) != 0) 3739 return (error); 3740 vp = nd.ni_vp; 3741 if (vp->v_type != VDIR) { 3742 error = ENOTDIR; 3743 goto out; 3744 } 3745 /* 3746 * No rmdir "." please. 3747 */ 3748 if (nd.ni_dvp == vp) { 3749 error = EINVAL; 3750 goto out; 3751 } 3752 /* 3753 * The root of a mounted filesystem cannot be deleted. 3754 */ 3755 if (vp->v_vflag & VV_ROOT) { 3756 error = EBUSY; 3757 goto out; 3758 } 3759 #ifdef MAC 3760 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3761 &nd.ni_cnd); 3762 if (error) 3763 goto out; 3764 #endif 3765 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3766 NDFREE(&nd, NDF_ONLY_PNBUF); 3767 vput(vp); 3768 if (nd.ni_dvp == vp) 3769 vrele(nd.ni_dvp); 3770 else 3771 vput(nd.ni_dvp); 3772 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3773 return (error); 3774 goto restart; 3775 } 3776 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3777 vn_finished_write(mp); 3778 out: 3779 NDFREE(&nd, NDF_ONLY_PNBUF); 3780 vput(vp); 3781 if (nd.ni_dvp == vp) 3782 vrele(nd.ni_dvp); 3783 else 3784 vput(nd.ni_dvp); 3785 return (error); 3786 } 3787 3788 #ifdef COMPAT_43 3789 /* 3790 * Read a block of directory entries in a filesystem independent format. 3791 */ 3792 #ifndef _SYS_SYSPROTO_H_ 3793 struct ogetdirentries_args { 3794 int fd; 3795 char *buf; 3796 u_int count; 3797 long *basep; 3798 }; 3799 #endif 3800 int 3801 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3802 { 3803 long loff; 3804 int error; 3805 3806 error = kern_ogetdirentries(td, uap, &loff); 3807 if (error == 0) 3808 error = copyout(&loff, uap->basep, sizeof(long)); 3809 return (error); 3810 } 3811 3812 int 3813 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3814 long *ploff) 3815 { 3816 struct vnode *vp; 3817 struct file *fp; 3818 struct uio auio, kuio; 3819 struct iovec aiov, kiov; 3820 struct dirent *dp, *edp; 3821 caddr_t dirbuf; 3822 int error, eofflag, readcnt; 3823 long loff; 3824 off_t foffset; 3825 3826 /* XXX arbitrary sanity limit on `count'. */ 3827 if (uap->count > 64 * 1024) 3828 return (EINVAL); 3829 if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, 3830 &fp)) != 0) 3831 return (error); 3832 if ((fp->f_flag & FREAD) == 0) { 3833 fdrop(fp, td); 3834 return (EBADF); 3835 } 3836 vp = fp->f_vnode; 3837 foffset = foffset_lock(fp, 0); 3838 unionread: 3839 if (vp->v_type != VDIR) { 3840 foffset_unlock(fp, foffset, 0); 3841 fdrop(fp, td); 3842 return (EINVAL); 3843 } 3844 aiov.iov_base = uap->buf; 3845 aiov.iov_len = uap->count; 3846 auio.uio_iov = &aiov; 3847 auio.uio_iovcnt = 1; 3848 auio.uio_rw = UIO_READ; 3849 auio.uio_segflg = UIO_USERSPACE; 3850 auio.uio_td = td; 3851 auio.uio_resid = uap->count; 3852 vn_lock(vp, LK_SHARED | LK_RETRY); 3853 loff = auio.uio_offset = foffset; 3854 #ifdef MAC 3855 error = mac_vnode_check_readdir(td->td_ucred, vp); 3856 if (error) { 3857 VOP_UNLOCK(vp, 0); 3858 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3859 fdrop(fp, td); 3860 return (error); 3861 } 3862 #endif 3863 # if (BYTE_ORDER != LITTLE_ENDIAN) 3864 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3865 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3866 NULL, NULL); 3867 foffset = auio.uio_offset; 3868 } else 3869 # endif 3870 { 3871 kuio = auio; 3872 kuio.uio_iov = &kiov; 3873 kuio.uio_segflg = UIO_SYSSPACE; 3874 kiov.iov_len = uap->count; 3875 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3876 kiov.iov_base = dirbuf; 3877 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3878 NULL, NULL); 3879 foffset = kuio.uio_offset; 3880 if (error == 0) { 3881 readcnt = uap->count - kuio.uio_resid; 3882 edp = (struct dirent *)&dirbuf[readcnt]; 3883 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3884 # if (BYTE_ORDER == LITTLE_ENDIAN) 3885 /* 3886 * The expected low byte of 3887 * dp->d_namlen is our dp->d_type. 3888 * The high MBZ byte of dp->d_namlen 3889 * is our dp->d_namlen. 3890 */ 3891 dp->d_type = dp->d_namlen; 3892 dp->d_namlen = 0; 3893 # else 3894 /* 3895 * The dp->d_type is the high byte 3896 * of the expected dp->d_namlen, 3897 * so must be zero'ed. 3898 */ 3899 dp->d_type = 0; 3900 # endif 3901 if (dp->d_reclen > 0) { 3902 dp = (struct dirent *) 3903 ((char *)dp + dp->d_reclen); 3904 } else { 3905 error = EIO; 3906 break; 3907 } 3908 } 3909 if (dp >= edp) 3910 error = uiomove(dirbuf, readcnt, &auio); 3911 } 3912 free(dirbuf, M_TEMP); 3913 } 3914 if (error) { 3915 VOP_UNLOCK(vp, 0); 3916 foffset_unlock(fp, foffset, 0); 3917 fdrop(fp, td); 3918 return (error); 3919 } 3920 if (uap->count == auio.uio_resid && 3921 (vp->v_vflag & VV_ROOT) && 3922 (vp->v_mount->mnt_flag & MNT_UNION)) { 3923 struct vnode *tvp = vp; 3924 vp = vp->v_mount->mnt_vnodecovered; 3925 VREF(vp); 3926 fp->f_vnode = vp; 3927 fp->f_data = vp; 3928 foffset = 0; 3929 vput(tvp); 3930 goto unionread; 3931 } 3932 VOP_UNLOCK(vp, 0); 3933 foffset_unlock(fp, foffset, 0); 3934 fdrop(fp, td); 3935 td->td_retval[0] = uap->count - auio.uio_resid; 3936 if (error == 0) 3937 *ploff = loff; 3938 return (error); 3939 } 3940 #endif /* COMPAT_43 */ 3941 3942 /* 3943 * Read a block of directory entries in a filesystem independent format. 3944 */ 3945 #ifndef _SYS_SYSPROTO_H_ 3946 struct getdirentries_args { 3947 int fd; 3948 char *buf; 3949 u_int count; 3950 long *basep; 3951 }; 3952 #endif 3953 int 3954 sys_getdirentries(td, uap) 3955 struct thread *td; 3956 register struct getdirentries_args /* { 3957 int fd; 3958 char *buf; 3959 u_int count; 3960 long *basep; 3961 } */ *uap; 3962 { 3963 long base; 3964 int error; 3965 3966 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3967 NULL, UIO_USERSPACE); 3968 if (error) 3969 return (error); 3970 if (uap->basep != NULL) 3971 error = copyout(&base, uap->basep, sizeof(long)); 3972 return (error); 3973 } 3974 3975 int 3976 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3977 long *basep, ssize_t *residp, enum uio_seg bufseg) 3978 { 3979 struct vnode *vp; 3980 struct file *fp; 3981 struct uio auio; 3982 struct iovec aiov; 3983 long loff; 3984 int error, eofflag; 3985 off_t foffset; 3986 3987 AUDIT_ARG_FD(fd); 3988 if (count > IOSIZE_MAX) 3989 return (EINVAL); 3990 auio.uio_resid = count; 3991 if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK, 3992 &fp)) != 0) 3993 return (error); 3994 if ((fp->f_flag & FREAD) == 0) { 3995 fdrop(fp, td); 3996 return (EBADF); 3997 } 3998 vp = fp->f_vnode; 3999 foffset = foffset_lock(fp, 0); 4000 unionread: 4001 if (vp->v_type != VDIR) { 4002 error = EINVAL; 4003 goto fail; 4004 } 4005 aiov.iov_base = buf; 4006 aiov.iov_len = count; 4007 auio.uio_iov = &aiov; 4008 auio.uio_iovcnt = 1; 4009 auio.uio_rw = UIO_READ; 4010 auio.uio_segflg = bufseg; 4011 auio.uio_td = td; 4012 vn_lock(vp, LK_SHARED | LK_RETRY); 4013 AUDIT_ARG_VNODE1(vp); 4014 loff = auio.uio_offset = foffset; 4015 #ifdef MAC 4016 error = mac_vnode_check_readdir(td->td_ucred, vp); 4017 if (error == 0) 4018 #endif 4019 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4020 NULL); 4021 foffset = auio.uio_offset; 4022 if (error) { 4023 VOP_UNLOCK(vp, 0); 4024 goto fail; 4025 } 4026 if (count == auio.uio_resid && 4027 (vp->v_vflag & VV_ROOT) && 4028 (vp->v_mount->mnt_flag & MNT_UNION)) { 4029 struct vnode *tvp = vp; 4030 vp = vp->v_mount->mnt_vnodecovered; 4031 VREF(vp); 4032 fp->f_vnode = vp; 4033 fp->f_data = vp; 4034 foffset = 0; 4035 vput(tvp); 4036 goto unionread; 4037 } 4038 VOP_UNLOCK(vp, 0); 4039 *basep = loff; 4040 if (residp != NULL) 4041 *residp = auio.uio_resid; 4042 td->td_retval[0] = count - auio.uio_resid; 4043 fail: 4044 foffset_unlock(fp, foffset, 0); 4045 fdrop(fp, td); 4046 return (error); 4047 } 4048 4049 #ifndef _SYS_SYSPROTO_H_ 4050 struct getdents_args { 4051 int fd; 4052 char *buf; 4053 size_t count; 4054 }; 4055 #endif 4056 int 4057 sys_getdents(td, uap) 4058 struct thread *td; 4059 register struct getdents_args /* { 4060 int fd; 4061 char *buf; 4062 u_int count; 4063 } */ *uap; 4064 { 4065 struct getdirentries_args ap; 4066 ap.fd = uap->fd; 4067 ap.buf = uap->buf; 4068 ap.count = uap->count; 4069 ap.basep = NULL; 4070 return (sys_getdirentries(td, &ap)); 4071 } 4072 4073 /* 4074 * Set the mode mask for creation of filesystem nodes. 4075 */ 4076 #ifndef _SYS_SYSPROTO_H_ 4077 struct umask_args { 4078 int newmask; 4079 }; 4080 #endif 4081 int 4082 sys_umask(td, uap) 4083 struct thread *td; 4084 struct umask_args /* { 4085 int newmask; 4086 } */ *uap; 4087 { 4088 register struct filedesc *fdp; 4089 4090 FILEDESC_XLOCK(td->td_proc->p_fd); 4091 fdp = td->td_proc->p_fd; 4092 td->td_retval[0] = fdp->fd_cmask; 4093 fdp->fd_cmask = uap->newmask & ALLPERMS; 4094 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4095 return (0); 4096 } 4097 4098 /* 4099 * Void all references to file by ripping underlying filesystem away from 4100 * vnode. 4101 */ 4102 #ifndef _SYS_SYSPROTO_H_ 4103 struct revoke_args { 4104 char *path; 4105 }; 4106 #endif 4107 int 4108 sys_revoke(td, uap) 4109 struct thread *td; 4110 register struct revoke_args /* { 4111 char *path; 4112 } */ *uap; 4113 { 4114 struct vnode *vp; 4115 struct vattr vattr; 4116 int error; 4117 struct nameidata nd; 4118 4119 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4120 uap->path, td); 4121 if ((error = namei(&nd)) != 0) 4122 return (error); 4123 vp = nd.ni_vp; 4124 NDFREE(&nd, NDF_ONLY_PNBUF); 4125 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4126 error = EINVAL; 4127 goto out; 4128 } 4129 #ifdef MAC 4130 error = mac_vnode_check_revoke(td->td_ucred, vp); 4131 if (error) 4132 goto out; 4133 #endif 4134 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4135 if (error) 4136 goto out; 4137 if (td->td_ucred->cr_uid != vattr.va_uid) { 4138 error = priv_check(td, PRIV_VFS_ADMIN); 4139 if (error) 4140 goto out; 4141 } 4142 if (vcount(vp) > 1) 4143 VOP_REVOKE(vp, REVOKEALL); 4144 out: 4145 vput(vp); 4146 return (error); 4147 } 4148 4149 /* 4150 * Convert a user file descriptor to a kernel file entry and check that, if it 4151 * is a capability, the correct rights are present. A reference on the file 4152 * entry is held upon returning. 4153 */ 4154 int 4155 getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, 4156 struct file **fpp) 4157 { 4158 struct file *fp; 4159 #ifdef CAPABILITIES 4160 struct file *fp_fromcap; 4161 int error; 4162 #endif 4163 4164 if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) 4165 return (EBADF); 4166 #ifdef CAPABILITIES 4167 /* 4168 * If the file descriptor is for a capability, test rights and use the 4169 * file descriptor referenced by the capability. 4170 */ 4171 error = cap_funwrap(fp, rights, &fp_fromcap); 4172 if (error) { 4173 fdrop(fp, curthread); 4174 return (error); 4175 } 4176 if (fp != fp_fromcap) { 4177 fhold(fp_fromcap); 4178 fdrop(fp, curthread); 4179 fp = fp_fromcap; 4180 } 4181 #endif /* CAPABILITIES */ 4182 4183 /* 4184 * The file could be not of the vnode type, or it may be not 4185 * yet fully initialized, in which case the f_vnode pointer 4186 * may be set, but f_ops is still badfileops. E.g., 4187 * devfs_open() transiently create such situation to 4188 * facilitate csw d_fdopen(). 4189 * 4190 * Dupfdopen() handling in kern_openat() installs the 4191 * half-baked file into the process descriptor table, allowing 4192 * other thread to dereference it. Guard against the race by 4193 * checking f_ops. 4194 */ 4195 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4196 fdrop(fp, curthread); 4197 return (EINVAL); 4198 } 4199 *fpp = fp; 4200 return (0); 4201 } 4202 4203 4204 /* 4205 * Get an (NFS) file handle. 4206 */ 4207 #ifndef _SYS_SYSPROTO_H_ 4208 struct lgetfh_args { 4209 char *fname; 4210 fhandle_t *fhp; 4211 }; 4212 #endif 4213 int 4214 sys_lgetfh(td, uap) 4215 struct thread *td; 4216 register struct lgetfh_args *uap; 4217 { 4218 struct nameidata nd; 4219 fhandle_t fh; 4220 register struct vnode *vp; 4221 int error; 4222 4223 error = priv_check(td, PRIV_VFS_GETFH); 4224 if (error) 4225 return (error); 4226 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4227 uap->fname, td); 4228 error = namei(&nd); 4229 if (error) 4230 return (error); 4231 NDFREE(&nd, NDF_ONLY_PNBUF); 4232 vp = nd.ni_vp; 4233 bzero(&fh, sizeof(fh)); 4234 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4235 error = VOP_VPTOFH(vp, &fh.fh_fid); 4236 vput(vp); 4237 if (error) 4238 return (error); 4239 error = copyout(&fh, uap->fhp, sizeof (fh)); 4240 return (error); 4241 } 4242 4243 #ifndef _SYS_SYSPROTO_H_ 4244 struct getfh_args { 4245 char *fname; 4246 fhandle_t *fhp; 4247 }; 4248 #endif 4249 int 4250 sys_getfh(td, uap) 4251 struct thread *td; 4252 register struct getfh_args *uap; 4253 { 4254 struct nameidata nd; 4255 fhandle_t fh; 4256 register struct vnode *vp; 4257 int error; 4258 4259 error = priv_check(td, PRIV_VFS_GETFH); 4260 if (error) 4261 return (error); 4262 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4263 uap->fname, td); 4264 error = namei(&nd); 4265 if (error) 4266 return (error); 4267 NDFREE(&nd, NDF_ONLY_PNBUF); 4268 vp = nd.ni_vp; 4269 bzero(&fh, sizeof(fh)); 4270 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4271 error = VOP_VPTOFH(vp, &fh.fh_fid); 4272 vput(vp); 4273 if (error) 4274 return (error); 4275 error = copyout(&fh, uap->fhp, sizeof (fh)); 4276 return (error); 4277 } 4278 4279 /* 4280 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4281 * open descriptor. 4282 * 4283 * warning: do not remove the priv_check() call or this becomes one giant 4284 * security hole. 4285 */ 4286 #ifndef _SYS_SYSPROTO_H_ 4287 struct fhopen_args { 4288 const struct fhandle *u_fhp; 4289 int flags; 4290 }; 4291 #endif 4292 int 4293 sys_fhopen(td, uap) 4294 struct thread *td; 4295 struct fhopen_args /* { 4296 const struct fhandle *u_fhp; 4297 int flags; 4298 } */ *uap; 4299 { 4300 struct mount *mp; 4301 struct vnode *vp; 4302 struct fhandle fhp; 4303 struct file *fp; 4304 int fmode, error; 4305 int indx; 4306 4307 error = priv_check(td, PRIV_VFS_FHOPEN); 4308 if (error) 4309 return (error); 4310 indx = -1; 4311 fmode = FFLAGS(uap->flags); 4312 /* why not allow a non-read/write open for our lockd? */ 4313 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4314 return (EINVAL); 4315 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4316 if (error) 4317 return(error); 4318 /* find the mount point */ 4319 mp = vfs_busyfs(&fhp.fh_fsid); 4320 if (mp == NULL) 4321 return (ESTALE); 4322 /* now give me my vnode, it gets returned to me locked */ 4323 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4324 vfs_unbusy(mp); 4325 if (error) 4326 return (error); 4327 4328 error = falloc_noinstall(td, &fp); 4329 if (error) { 4330 vput(vp); 4331 return (error); 4332 } 4333 /* 4334 * An extra reference on `fp' has been held for us by 4335 * falloc_noinstall(). 4336 */ 4337 4338 #ifdef INVARIANTS 4339 td->td_dupfd = -1; 4340 #endif 4341 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4342 if (error) { 4343 KASSERT(fp->f_ops == &badfileops, 4344 ("VOP_OPEN in fhopen() set f_ops")); 4345 KASSERT(td->td_dupfd < 0, 4346 ("fhopen() encountered fdopen()")); 4347 4348 vput(vp); 4349 goto bad; 4350 } 4351 #ifdef INVARIANTS 4352 td->td_dupfd = 0; 4353 #endif 4354 fp->f_vnode = vp; 4355 fp->f_seqcount = 1; 4356 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4357 &vnops); 4358 VOP_UNLOCK(vp, 0); 4359 if (fmode & O_TRUNC) { 4360 error = fo_truncate(fp, 0, td->td_ucred, td); 4361 if (error) 4362 goto bad; 4363 } 4364 4365 error = finstall(td, fp, &indx, fmode); 4366 bad: 4367 fdrop(fp, td); 4368 td->td_retval[0] = indx; 4369 return (error); 4370 } 4371 4372 /* 4373 * Stat an (NFS) file handle. 4374 */ 4375 #ifndef _SYS_SYSPROTO_H_ 4376 struct fhstat_args { 4377 struct fhandle *u_fhp; 4378 struct stat *sb; 4379 }; 4380 #endif 4381 int 4382 sys_fhstat(td, uap) 4383 struct thread *td; 4384 register struct fhstat_args /* { 4385 struct fhandle *u_fhp; 4386 struct stat *sb; 4387 } */ *uap; 4388 { 4389 struct stat sb; 4390 struct fhandle fh; 4391 int error; 4392 4393 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4394 if (error != 0) 4395 return (error); 4396 error = kern_fhstat(td, fh, &sb); 4397 if (error != 0) 4398 return (error); 4399 error = copyout(&sb, uap->sb, sizeof(sb)); 4400 return (error); 4401 } 4402 4403 int 4404 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4405 { 4406 struct mount *mp; 4407 struct vnode *vp; 4408 int error; 4409 4410 error = priv_check(td, PRIV_VFS_FHSTAT); 4411 if (error) 4412 return (error); 4413 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4414 return (ESTALE); 4415 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4416 vfs_unbusy(mp); 4417 if (error) 4418 return (error); 4419 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4420 vput(vp); 4421 return (error); 4422 } 4423 4424 /* 4425 * Implement fstatfs() for (NFS) file handles. 4426 */ 4427 #ifndef _SYS_SYSPROTO_H_ 4428 struct fhstatfs_args { 4429 struct fhandle *u_fhp; 4430 struct statfs *buf; 4431 }; 4432 #endif 4433 int 4434 sys_fhstatfs(td, uap) 4435 struct thread *td; 4436 struct fhstatfs_args /* { 4437 struct fhandle *u_fhp; 4438 struct statfs *buf; 4439 } */ *uap; 4440 { 4441 struct statfs sf; 4442 fhandle_t fh; 4443 int error; 4444 4445 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4446 if (error) 4447 return (error); 4448 error = kern_fhstatfs(td, fh, &sf); 4449 if (error) 4450 return (error); 4451 return (copyout(&sf, uap->buf, sizeof(sf))); 4452 } 4453 4454 int 4455 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4456 { 4457 struct statfs *sp; 4458 struct mount *mp; 4459 struct vnode *vp; 4460 int error; 4461 4462 error = priv_check(td, PRIV_VFS_FHSTATFS); 4463 if (error) 4464 return (error); 4465 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4466 return (ESTALE); 4467 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4468 if (error) { 4469 vfs_unbusy(mp); 4470 return (error); 4471 } 4472 vput(vp); 4473 error = prison_canseemount(td->td_ucred, mp); 4474 if (error) 4475 goto out; 4476 #ifdef MAC 4477 error = mac_mount_check_stat(td->td_ucred, mp); 4478 if (error) 4479 goto out; 4480 #endif 4481 /* 4482 * Set these in case the underlying filesystem fails to do so. 4483 */ 4484 sp = &mp->mnt_stat; 4485 sp->f_version = STATFS_VERSION; 4486 sp->f_namemax = NAME_MAX; 4487 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4488 error = VFS_STATFS(mp, sp); 4489 if (error == 0) 4490 *buf = *sp; 4491 out: 4492 vfs_unbusy(mp); 4493 return (error); 4494 } 4495 4496 int 4497 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4498 { 4499 struct file *fp; 4500 struct mount *mp; 4501 struct vnode *vp; 4502 off_t olen, ooffset; 4503 int error; 4504 4505 fp = NULL; 4506 error = fget(td, fd, CAP_WRITE, &fp); 4507 if (error != 0) 4508 goto out; 4509 4510 switch (fp->f_type) { 4511 case DTYPE_VNODE: 4512 break; 4513 case DTYPE_PIPE: 4514 case DTYPE_FIFO: 4515 error = ESPIPE; 4516 goto out; 4517 default: 4518 error = ENODEV; 4519 goto out; 4520 } 4521 if ((fp->f_flag & FWRITE) == 0) { 4522 error = EBADF; 4523 goto out; 4524 } 4525 vp = fp->f_vnode; 4526 if (vp->v_type != VREG) { 4527 error = ENODEV; 4528 goto out; 4529 } 4530 if (offset < 0 || len <= 0) { 4531 error = EINVAL; 4532 goto out; 4533 } 4534 /* Check for wrap. */ 4535 if (offset > OFF_MAX - len) { 4536 error = EFBIG; 4537 goto out; 4538 } 4539 4540 /* Allocating blocks may take a long time, so iterate. */ 4541 for (;;) { 4542 olen = len; 4543 ooffset = offset; 4544 4545 bwillwrite(); 4546 mp = NULL; 4547 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4548 if (error != 0) 4549 break; 4550 error = vn_lock(vp, LK_EXCLUSIVE); 4551 if (error != 0) { 4552 vn_finished_write(mp); 4553 break; 4554 } 4555 #ifdef MAC 4556 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4557 if (error == 0) 4558 #endif 4559 error = VOP_ALLOCATE(vp, &offset, &len); 4560 VOP_UNLOCK(vp, 0); 4561 vn_finished_write(mp); 4562 4563 if (olen + ooffset != offset + len) { 4564 panic("offset + len changed from %jx/%jx to %jx/%jx", 4565 ooffset, olen, offset, len); 4566 } 4567 if (error != 0 || len == 0) 4568 break; 4569 KASSERT(olen > len, ("Iteration did not make progress?")); 4570 maybe_yield(); 4571 } 4572 out: 4573 if (fp != NULL) 4574 fdrop(fp, td); 4575 return (error); 4576 } 4577 4578 int 4579 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4580 { 4581 4582 return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len)); 4583 } 4584 4585 /* 4586 * Unlike madvise(2), we do not make a best effort to remember every 4587 * possible caching hint. Instead, we remember the last setting with 4588 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4589 * region of any current setting. 4590 */ 4591 int 4592 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4593 int advice) 4594 { 4595 struct fadvise_info *fa, *new; 4596 struct file *fp; 4597 struct vnode *vp; 4598 off_t end; 4599 int error; 4600 4601 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4602 return (EINVAL); 4603 switch (advice) { 4604 case POSIX_FADV_SEQUENTIAL: 4605 case POSIX_FADV_RANDOM: 4606 case POSIX_FADV_NOREUSE: 4607 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4608 break; 4609 case POSIX_FADV_NORMAL: 4610 case POSIX_FADV_WILLNEED: 4611 case POSIX_FADV_DONTNEED: 4612 new = NULL; 4613 break; 4614 default: 4615 return (EINVAL); 4616 } 4617 /* XXX: CAP_POSIX_FADVISE? */ 4618 error = fget(td, fd, 0, &fp); 4619 if (error != 0) 4620 goto out; 4621 4622 switch (fp->f_type) { 4623 case DTYPE_VNODE: 4624 break; 4625 case DTYPE_PIPE: 4626 case DTYPE_FIFO: 4627 error = ESPIPE; 4628 goto out; 4629 default: 4630 error = ENODEV; 4631 goto out; 4632 } 4633 vp = fp->f_vnode; 4634 if (vp->v_type != VREG) { 4635 error = ENODEV; 4636 goto out; 4637 } 4638 if (len == 0) 4639 end = OFF_MAX; 4640 else 4641 end = offset + len - 1; 4642 switch (advice) { 4643 case POSIX_FADV_SEQUENTIAL: 4644 case POSIX_FADV_RANDOM: 4645 case POSIX_FADV_NOREUSE: 4646 /* 4647 * Try to merge any existing non-standard region with 4648 * this new region if possible, otherwise create a new 4649 * non-standard region for this request. 4650 */ 4651 mtx_pool_lock(mtxpool_sleep, fp); 4652 fa = fp->f_advice; 4653 if (fa != NULL && fa->fa_advice == advice && 4654 ((fa->fa_start <= end && fa->fa_end >= offset) || 4655 (end != OFF_MAX && fa->fa_start == end + 1) || 4656 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4657 if (offset < fa->fa_start) 4658 fa->fa_start = offset; 4659 if (end > fa->fa_end) 4660 fa->fa_end = end; 4661 } else { 4662 new->fa_advice = advice; 4663 new->fa_start = offset; 4664 new->fa_end = end; 4665 new->fa_prevstart = 0; 4666 new->fa_prevend = 0; 4667 fp->f_advice = new; 4668 new = fa; 4669 } 4670 mtx_pool_unlock(mtxpool_sleep, fp); 4671 break; 4672 case POSIX_FADV_NORMAL: 4673 /* 4674 * If a the "normal" region overlaps with an existing 4675 * non-standard region, trim or remove the 4676 * non-standard region. 4677 */ 4678 mtx_pool_lock(mtxpool_sleep, fp); 4679 fa = fp->f_advice; 4680 if (fa != NULL) { 4681 if (offset <= fa->fa_start && end >= fa->fa_end) { 4682 new = fa; 4683 fp->f_advice = NULL; 4684 } else if (offset <= fa->fa_start && 4685 end >= fa->fa_start) 4686 fa->fa_start = end + 1; 4687 else if (offset <= fa->fa_end && end >= fa->fa_end) 4688 fa->fa_end = offset - 1; 4689 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4690 /* 4691 * If the "normal" region is a middle 4692 * portion of the existing 4693 * non-standard region, just remove 4694 * the whole thing rather than picking 4695 * one side or the other to 4696 * preserve. 4697 */ 4698 new = fa; 4699 fp->f_advice = NULL; 4700 } 4701 } 4702 mtx_pool_unlock(mtxpool_sleep, fp); 4703 break; 4704 case POSIX_FADV_WILLNEED: 4705 case POSIX_FADV_DONTNEED: 4706 error = VOP_ADVISE(vp, offset, end, advice); 4707 break; 4708 } 4709 out: 4710 if (fp != NULL) 4711 fdrop(fp, td); 4712 free(new, M_FADVISE); 4713 return (error); 4714 } 4715 4716 int 4717 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4718 { 4719 4720 return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4721 uap->advice)); 4722 } 4723