1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 98 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 99 static int kern_chflags(struct thread *td, const char *path, 100 enum uio_seg pathseg, u_long flags); 101 static int kern_chflagsat(struct thread *td, int fd, const char *path, 102 enum uio_seg pathseg, u_long flags, int atflag); 103 static int setfflags(struct thread *td, struct vnode *, u_long); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * The module initialization routine for POSIX asynchronous I/O will 111 * set this to the version of AIO that it implements. (Zero means 112 * that it is not implemented.) This value is used here by pathconf() 113 * and in kern_descrip.c by fpathconf(). 114 */ 115 int async_io_version; 116 117 #ifdef DEBUG 118 static int syncprt = 0; 119 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 120 #endif 121 122 /* 123 * Sync each mounted filesystem. 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct sync_args { 127 int dummy; 128 }; 129 #endif 130 /* ARGSUSED */ 131 int 132 sys_sync(td, uap) 133 struct thread *td; 134 struct sync_args *uap; 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_msync(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Change filesystem quotas. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct quotactl_args { 166 char *path; 167 int cmd; 168 int uid; 169 caddr_t arg; 170 }; 171 #endif 172 int 173 sys_quotactl(td, uap) 174 struct thread *td; 175 register struct quotactl_args /* { 176 char *path; 177 int cmd; 178 int uid; 179 caddr_t arg; 180 } */ *uap; 181 { 182 struct mount *mp; 183 struct nameidata nd; 184 int error; 185 186 AUDIT_ARG_CMD(uap->cmd); 187 AUDIT_ARG_UID(uap->uid); 188 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 189 return (EPERM); 190 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 191 uap->path, td); 192 if ((error = namei(&nd)) != 0) 193 return (error); 194 NDFREE(&nd, NDF_ONLY_PNBUF); 195 mp = nd.ni_vp->v_mount; 196 vfs_ref(mp); 197 vput(nd.ni_vp); 198 error = vfs_busy(mp, 0); 199 vfs_rel(mp); 200 if (error != 0) 201 return (error); 202 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 203 204 /* 205 * Since quota on operation typically needs to open quota 206 * file, the Q_QUOTAON handler needs to unbusy the mount point 207 * before calling into namei. Otherwise, unmount might be 208 * started between two vfs_busy() invocations (first is our, 209 * second is from mount point cross-walk code in lookup()), 210 * causing deadlock. 211 * 212 * Require that Q_QUOTAON handles the vfs_busy() reference on 213 * its own, always returning with ubusied mount point. 214 */ 215 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 216 vfs_unbusy(mp); 217 return (error); 218 } 219 220 /* 221 * Used by statfs conversion routines to scale the block size up if 222 * necessary so that all of the block counts are <= 'max_size'. Note 223 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 224 * value of 'n'. 225 */ 226 void 227 statfs_scale_blocks(struct statfs *sf, long max_size) 228 { 229 uint64_t count; 230 int shift; 231 232 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 233 234 /* 235 * Attempt to scale the block counts to give a more accurate 236 * overview to userland of the ratio of free space to used 237 * space. To do this, find the largest block count and compute 238 * a divisor that lets it fit into a signed integer <= max_size. 239 */ 240 if (sf->f_bavail < 0) 241 count = -sf->f_bavail; 242 else 243 count = sf->f_bavail; 244 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 245 if (count <= max_size) 246 return; 247 248 count >>= flsl(max_size); 249 shift = 0; 250 while (count > 0) { 251 shift++; 252 count >>=1; 253 } 254 255 sf->f_bsize <<= shift; 256 sf->f_blocks >>= shift; 257 sf->f_bfree >>= shift; 258 sf->f_bavail >>= shift; 259 } 260 261 /* 262 * Get filesystem statistics. 263 */ 264 #ifndef _SYS_SYSPROTO_H_ 265 struct statfs_args { 266 char *path; 267 struct statfs *buf; 268 }; 269 #endif 270 int 271 sys_statfs(td, uap) 272 struct thread *td; 273 register struct statfs_args /* { 274 char *path; 275 struct statfs *buf; 276 } */ *uap; 277 { 278 struct statfs sf; 279 int error; 280 281 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 282 if (error == 0) 283 error = copyout(&sf, uap->buf, sizeof(sf)); 284 return (error); 285 } 286 287 int 288 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 289 struct statfs *buf) 290 { 291 struct mount *mp; 292 struct statfs *sp, sb; 293 struct nameidata nd; 294 int error; 295 296 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 297 pathseg, path, td); 298 error = namei(&nd); 299 if (error != 0) 300 return (error); 301 mp = nd.ni_vp->v_mount; 302 vfs_ref(mp); 303 NDFREE(&nd, NDF_ONLY_PNBUF); 304 vput(nd.ni_vp); 305 error = vfs_busy(mp, 0); 306 vfs_rel(mp); 307 if (error != 0) 308 return (error); 309 #ifdef MAC 310 error = mac_mount_check_stat(td->td_ucred, mp); 311 if (error != 0) 312 goto out; 313 #endif 314 /* 315 * Set these in case the underlying filesystem fails to do so. 316 */ 317 sp = &mp->mnt_stat; 318 sp->f_version = STATFS_VERSION; 319 sp->f_namemax = NAME_MAX; 320 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 321 error = VFS_STATFS(mp, sp); 322 if (error != 0) 323 goto out; 324 if (priv_check(td, PRIV_VFS_GENERATION)) { 325 bcopy(sp, &sb, sizeof(sb)); 326 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 327 prison_enforce_statfs(td->td_ucred, mp, &sb); 328 sp = &sb; 329 } 330 *buf = *sp; 331 out: 332 vfs_unbusy(mp); 333 return (error); 334 } 335 336 /* 337 * Get filesystem statistics. 338 */ 339 #ifndef _SYS_SYSPROTO_H_ 340 struct fstatfs_args { 341 int fd; 342 struct statfs *buf; 343 }; 344 #endif 345 int 346 sys_fstatfs(td, uap) 347 struct thread *td; 348 register struct fstatfs_args /* { 349 int fd; 350 struct statfs *buf; 351 } */ *uap; 352 { 353 struct statfs sf; 354 int error; 355 356 error = kern_fstatfs(td, uap->fd, &sf); 357 if (error == 0) 358 error = copyout(&sf, uap->buf, sizeof(sf)); 359 return (error); 360 } 361 362 int 363 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 364 { 365 struct file *fp; 366 struct mount *mp; 367 struct statfs *sp, sb; 368 struct vnode *vp; 369 cap_rights_t rights; 370 int error; 371 372 AUDIT_ARG_FD(fd); 373 error = getvnode(td->td_proc->p_fd, fd, 374 cap_rights_init(&rights, CAP_FSTATFS), &fp); 375 if (error != 0) 376 return (error); 377 vp = fp->f_vnode; 378 vn_lock(vp, LK_SHARED | LK_RETRY); 379 #ifdef AUDIT 380 AUDIT_ARG_VNODE1(vp); 381 #endif 382 mp = vp->v_mount; 383 if (mp) 384 vfs_ref(mp); 385 VOP_UNLOCK(vp, 0); 386 fdrop(fp, td); 387 if (mp == NULL) { 388 error = EBADF; 389 goto out; 390 } 391 error = vfs_busy(mp, 0); 392 vfs_rel(mp); 393 if (error != 0) 394 return (error); 395 #ifdef MAC 396 error = mac_mount_check_stat(td->td_ucred, mp); 397 if (error != 0) 398 goto out; 399 #endif 400 /* 401 * Set these in case the underlying filesystem fails to do so. 402 */ 403 sp = &mp->mnt_stat; 404 sp->f_version = STATFS_VERSION; 405 sp->f_namemax = NAME_MAX; 406 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 407 error = VFS_STATFS(mp, sp); 408 if (error != 0) 409 goto out; 410 if (priv_check(td, PRIV_VFS_GENERATION)) { 411 bcopy(sp, &sb, sizeof(sb)); 412 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 413 prison_enforce_statfs(td->td_ucred, mp, &sb); 414 sp = &sb; 415 } 416 *buf = *sp; 417 out: 418 if (mp) 419 vfs_unbusy(mp); 420 return (error); 421 } 422 423 /* 424 * Get statistics on all filesystems. 425 */ 426 #ifndef _SYS_SYSPROTO_H_ 427 struct getfsstat_args { 428 struct statfs *buf; 429 long bufsize; 430 int flags; 431 }; 432 #endif 433 int 434 sys_getfsstat(td, uap) 435 struct thread *td; 436 register struct getfsstat_args /* { 437 struct statfs *buf; 438 long bufsize; 439 int flags; 440 } */ *uap; 441 { 442 443 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 444 uap->flags)); 445 } 446 447 /* 448 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 449 * The caller is responsible for freeing memory which will be allocated 450 * in '*buf'. 451 */ 452 int 453 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 454 enum uio_seg bufseg, int flags) 455 { 456 struct mount *mp, *nmp; 457 struct statfs *sfsp, *sp, sb; 458 size_t count, maxcount; 459 int error; 460 461 maxcount = bufsize / sizeof(struct statfs); 462 if (bufsize == 0) 463 sfsp = NULL; 464 else if (bufseg == UIO_USERSPACE) 465 sfsp = *buf; 466 else /* if (bufseg == UIO_SYSSPACE) */ { 467 count = 0; 468 mtx_lock(&mountlist_mtx); 469 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 470 count++; 471 } 472 mtx_unlock(&mountlist_mtx); 473 if (maxcount > count) 474 maxcount = count; 475 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 476 M_WAITOK); 477 } 478 count = 0; 479 mtx_lock(&mountlist_mtx); 480 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 481 if (prison_canseemount(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #ifdef MAC 486 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 487 nmp = TAILQ_NEXT(mp, mnt_list); 488 continue; 489 } 490 #endif 491 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 492 nmp = TAILQ_NEXT(mp, mnt_list); 493 continue; 494 } 495 if (sfsp && count < maxcount) { 496 sp = &mp->mnt_stat; 497 /* 498 * Set these in case the underlying filesystem 499 * fails to do so. 500 */ 501 sp->f_version = STATFS_VERSION; 502 sp->f_namemax = NAME_MAX; 503 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 504 /* 505 * If MNT_NOWAIT or MNT_LAZY is specified, do not 506 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 507 * overrides MNT_WAIT. 508 */ 509 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 510 (flags & MNT_WAIT)) && 511 (error = VFS_STATFS(mp, sp))) { 512 mtx_lock(&mountlist_mtx); 513 nmp = TAILQ_NEXT(mp, mnt_list); 514 vfs_unbusy(mp); 515 continue; 516 } 517 if (priv_check(td, PRIV_VFS_GENERATION)) { 518 bcopy(sp, &sb, sizeof(sb)); 519 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 520 prison_enforce_statfs(td->td_ucred, mp, &sb); 521 sp = &sb; 522 } 523 if (bufseg == UIO_SYSSPACE) 524 bcopy(sp, sfsp, sizeof(*sp)); 525 else /* if (bufseg == UIO_USERSPACE) */ { 526 error = copyout(sp, sfsp, sizeof(*sp)); 527 if (error != 0) { 528 vfs_unbusy(mp); 529 return (error); 530 } 531 } 532 sfsp++; 533 } 534 count++; 535 mtx_lock(&mountlist_mtx); 536 nmp = TAILQ_NEXT(mp, mnt_list); 537 vfs_unbusy(mp); 538 } 539 mtx_unlock(&mountlist_mtx); 540 if (sfsp && count > maxcount) 541 td->td_retval[0] = maxcount; 542 else 543 td->td_retval[0] = count; 544 return (0); 545 } 546 547 #ifdef COMPAT_FREEBSD4 548 /* 549 * Get old format filesystem statistics. 550 */ 551 static void cvtstatfs(struct statfs *, struct ostatfs *); 552 553 #ifndef _SYS_SYSPROTO_H_ 554 struct freebsd4_statfs_args { 555 char *path; 556 struct ostatfs *buf; 557 }; 558 #endif 559 int 560 freebsd4_statfs(td, uap) 561 struct thread *td; 562 struct freebsd4_statfs_args /* { 563 char *path; 564 struct ostatfs *buf; 565 } */ *uap; 566 { 567 struct ostatfs osb; 568 struct statfs sf; 569 int error; 570 571 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 572 if (error != 0) 573 return (error); 574 cvtstatfs(&sf, &osb); 575 return (copyout(&osb, uap->buf, sizeof(osb))); 576 } 577 578 /* 579 * Get filesystem statistics. 580 */ 581 #ifndef _SYS_SYSPROTO_H_ 582 struct freebsd4_fstatfs_args { 583 int fd; 584 struct ostatfs *buf; 585 }; 586 #endif 587 int 588 freebsd4_fstatfs(td, uap) 589 struct thread *td; 590 struct freebsd4_fstatfs_args /* { 591 int fd; 592 struct ostatfs *buf; 593 } */ *uap; 594 { 595 struct ostatfs osb; 596 struct statfs sf; 597 int error; 598 599 error = kern_fstatfs(td, uap->fd, &sf); 600 if (error != 0) 601 return (error); 602 cvtstatfs(&sf, &osb); 603 return (copyout(&osb, uap->buf, sizeof(osb))); 604 } 605 606 /* 607 * Get statistics on all filesystems. 608 */ 609 #ifndef _SYS_SYSPROTO_H_ 610 struct freebsd4_getfsstat_args { 611 struct ostatfs *buf; 612 long bufsize; 613 int flags; 614 }; 615 #endif 616 int 617 freebsd4_getfsstat(td, uap) 618 struct thread *td; 619 register struct freebsd4_getfsstat_args /* { 620 struct ostatfs *buf; 621 long bufsize; 622 int flags; 623 } */ *uap; 624 { 625 struct statfs *buf, *sp; 626 struct ostatfs osb; 627 size_t count, size; 628 int error; 629 630 count = uap->bufsize / sizeof(struct ostatfs); 631 size = count * sizeof(struct statfs); 632 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 633 if (size > 0) { 634 count = td->td_retval[0]; 635 sp = buf; 636 while (count > 0 && error == 0) { 637 cvtstatfs(sp, &osb); 638 error = copyout(&osb, uap->buf, sizeof(osb)); 639 sp++; 640 uap->buf++; 641 count--; 642 } 643 free(buf, M_TEMP); 644 } 645 return (error); 646 } 647 648 /* 649 * Implement fstatfs() for (NFS) file handles. 650 */ 651 #ifndef _SYS_SYSPROTO_H_ 652 struct freebsd4_fhstatfs_args { 653 struct fhandle *u_fhp; 654 struct ostatfs *buf; 655 }; 656 #endif 657 int 658 freebsd4_fhstatfs(td, uap) 659 struct thread *td; 660 struct freebsd4_fhstatfs_args /* { 661 struct fhandle *u_fhp; 662 struct ostatfs *buf; 663 } */ *uap; 664 { 665 struct ostatfs osb; 666 struct statfs sf; 667 fhandle_t fh; 668 int error; 669 670 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 671 if (error != 0) 672 return (error); 673 error = kern_fhstatfs(td, fh, &sf); 674 if (error != 0) 675 return (error); 676 cvtstatfs(&sf, &osb); 677 return (copyout(&osb, uap->buf, sizeof(osb))); 678 } 679 680 /* 681 * Convert a new format statfs structure to an old format statfs structure. 682 */ 683 static void 684 cvtstatfs(nsp, osp) 685 struct statfs *nsp; 686 struct ostatfs *osp; 687 { 688 689 statfs_scale_blocks(nsp, LONG_MAX); 690 bzero(osp, sizeof(*osp)); 691 osp->f_bsize = nsp->f_bsize; 692 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 693 osp->f_blocks = nsp->f_blocks; 694 osp->f_bfree = nsp->f_bfree; 695 osp->f_bavail = nsp->f_bavail; 696 osp->f_files = MIN(nsp->f_files, LONG_MAX); 697 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 698 osp->f_owner = nsp->f_owner; 699 osp->f_type = nsp->f_type; 700 osp->f_flags = nsp->f_flags; 701 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 702 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 703 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 704 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 705 strlcpy(osp->f_fstypename, nsp->f_fstypename, 706 MIN(MFSNAMELEN, OMFSNAMELEN)); 707 strlcpy(osp->f_mntonname, nsp->f_mntonname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 710 MIN(MNAMELEN, OMNAMELEN)); 711 osp->f_fsid = nsp->f_fsid; 712 } 713 #endif /* COMPAT_FREEBSD4 */ 714 715 /* 716 * Change current working directory to a given file descriptor. 717 */ 718 #ifndef _SYS_SYSPROTO_H_ 719 struct fchdir_args { 720 int fd; 721 }; 722 #endif 723 int 724 sys_fchdir(td, uap) 725 struct thread *td; 726 struct fchdir_args /* { 727 int fd; 728 } */ *uap; 729 { 730 register struct filedesc *fdp = td->td_proc->p_fd; 731 struct vnode *vp, *tdp, *vpold; 732 struct mount *mp; 733 struct file *fp; 734 cap_rights_t rights; 735 int error; 736 737 AUDIT_ARG_FD(uap->fd); 738 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 739 &fp); 740 if (error != 0) 741 return (error); 742 vp = fp->f_vnode; 743 VREF(vp); 744 fdrop(fp, td); 745 vn_lock(vp, LK_SHARED | LK_RETRY); 746 AUDIT_ARG_VNODE1(vp); 747 error = change_dir(vp, td); 748 while (!error && (mp = vp->v_mountedhere) != NULL) { 749 if (vfs_busy(mp, 0)) 750 continue; 751 error = VFS_ROOT(mp, LK_SHARED, &tdp); 752 vfs_unbusy(mp); 753 if (error != 0) 754 break; 755 vput(vp); 756 vp = tdp; 757 } 758 if (error != 0) { 759 vput(vp); 760 return (error); 761 } 762 VOP_UNLOCK(vp, 0); 763 FILEDESC_XLOCK(fdp); 764 vpold = fdp->fd_cdir; 765 fdp->fd_cdir = vp; 766 FILEDESC_XUNLOCK(fdp); 767 vrele(vpold); 768 return (0); 769 } 770 771 /* 772 * Change current working directory (``.''). 773 */ 774 #ifndef _SYS_SYSPROTO_H_ 775 struct chdir_args { 776 char *path; 777 }; 778 #endif 779 int 780 sys_chdir(td, uap) 781 struct thread *td; 782 struct chdir_args /* { 783 char *path; 784 } */ *uap; 785 { 786 787 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 788 } 789 790 int 791 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 792 { 793 register struct filedesc *fdp = td->td_proc->p_fd; 794 struct nameidata nd; 795 struct vnode *vp; 796 int error; 797 798 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 799 pathseg, path, td); 800 if ((error = namei(&nd)) != 0) 801 return (error); 802 if ((error = change_dir(nd.ni_vp, td)) != 0) { 803 vput(nd.ni_vp); 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 return (error); 806 } 807 VOP_UNLOCK(nd.ni_vp, 0); 808 NDFREE(&nd, NDF_ONLY_PNBUF); 809 FILEDESC_XLOCK(fdp); 810 vp = fdp->fd_cdir; 811 fdp->fd_cdir = nd.ni_vp; 812 FILEDESC_XUNLOCK(fdp); 813 vrele(vp); 814 return (0); 815 } 816 817 /* 818 * Helper function for raised chroot(2) security function: Refuse if 819 * any filedescriptors are open directories. 820 */ 821 static int 822 chroot_refuse_vdir_fds(fdp) 823 struct filedesc *fdp; 824 { 825 struct vnode *vp; 826 struct file *fp; 827 int fd; 828 829 FILEDESC_LOCK_ASSERT(fdp); 830 831 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 832 fp = fget_locked(fdp, fd); 833 if (fp == NULL) 834 continue; 835 if (fp->f_type == DTYPE_VNODE) { 836 vp = fp->f_vnode; 837 if (vp->v_type == VDIR) 838 return (EPERM); 839 } 840 } 841 return (0); 842 } 843 844 /* 845 * This sysctl determines if we will allow a process to chroot(2) if it 846 * has a directory open: 847 * 0: disallowed for all processes. 848 * 1: allowed for processes that were not already chroot(2)'ed. 849 * 2: allowed for all processes. 850 */ 851 852 static int chroot_allow_open_directories = 1; 853 854 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 855 &chroot_allow_open_directories, 0, 856 "Allow a process to chroot(2) if it has a directory open"); 857 858 /* 859 * Change notion of root (``/'') directory. 860 */ 861 #ifndef _SYS_SYSPROTO_H_ 862 struct chroot_args { 863 char *path; 864 }; 865 #endif 866 int 867 sys_chroot(td, uap) 868 struct thread *td; 869 struct chroot_args /* { 870 char *path; 871 } */ *uap; 872 { 873 struct nameidata nd; 874 int error; 875 876 error = priv_check(td, PRIV_VFS_CHROOT); 877 if (error != 0) 878 return (error); 879 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 880 UIO_USERSPACE, uap->path, td); 881 error = namei(&nd); 882 if (error != 0) 883 goto error; 884 error = change_dir(nd.ni_vp, td); 885 if (error != 0) 886 goto e_vunlock; 887 #ifdef MAC 888 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 889 if (error != 0) 890 goto e_vunlock; 891 #endif 892 VOP_UNLOCK(nd.ni_vp, 0); 893 error = change_root(nd.ni_vp, td); 894 vrele(nd.ni_vp); 895 NDFREE(&nd, NDF_ONLY_PNBUF); 896 return (error); 897 e_vunlock: 898 vput(nd.ni_vp); 899 error: 900 NDFREE(&nd, NDF_ONLY_PNBUF); 901 return (error); 902 } 903 904 /* 905 * Common routine for chroot and chdir. Callers must provide a locked vnode 906 * instance. 907 */ 908 int 909 change_dir(vp, td) 910 struct vnode *vp; 911 struct thread *td; 912 { 913 #ifdef MAC 914 int error; 915 #endif 916 917 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 918 if (vp->v_type != VDIR) 919 return (ENOTDIR); 920 #ifdef MAC 921 error = mac_vnode_check_chdir(td->td_ucred, vp); 922 if (error != 0) 923 return (error); 924 #endif 925 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 926 } 927 928 /* 929 * Common routine for kern_chroot() and jail_attach(). The caller is 930 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 931 * authorize this operation. 932 */ 933 int 934 change_root(vp, td) 935 struct vnode *vp; 936 struct thread *td; 937 { 938 struct filedesc *fdp; 939 struct vnode *oldvp; 940 int error; 941 942 fdp = td->td_proc->p_fd; 943 FILEDESC_XLOCK(fdp); 944 if (chroot_allow_open_directories == 0 || 945 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 946 error = chroot_refuse_vdir_fds(fdp); 947 if (error != 0) { 948 FILEDESC_XUNLOCK(fdp); 949 return (error); 950 } 951 } 952 oldvp = fdp->fd_rdir; 953 fdp->fd_rdir = vp; 954 VREF(fdp->fd_rdir); 955 if (!fdp->fd_jdir) { 956 fdp->fd_jdir = vp; 957 VREF(fdp->fd_jdir); 958 } 959 FILEDESC_XUNLOCK(fdp); 960 vrele(oldvp); 961 return (0); 962 } 963 964 static __inline void 965 flags_to_rights(int flags, cap_rights_t *rightsp) 966 { 967 968 if (flags & O_EXEC) { 969 cap_rights_set(rightsp, CAP_FEXECVE); 970 } else { 971 switch ((flags & O_ACCMODE)) { 972 case O_RDONLY: 973 cap_rights_set(rightsp, CAP_READ); 974 break; 975 case O_RDWR: 976 cap_rights_set(rightsp, CAP_READ); 977 /* FALLTHROUGH */ 978 case O_WRONLY: 979 cap_rights_set(rightsp, CAP_WRITE); 980 if (!(flags & (O_APPEND | O_TRUNC))) 981 cap_rights_set(rightsp, CAP_SEEK); 982 break; 983 } 984 } 985 986 if (flags & O_CREAT) 987 cap_rights_set(rightsp, CAP_CREATE); 988 989 if (flags & O_TRUNC) 990 cap_rights_set(rightsp, CAP_FTRUNCATE); 991 992 if (flags & (O_SYNC | O_FSYNC)) 993 cap_rights_set(rightsp, CAP_FSYNC); 994 995 if (flags & (O_EXLOCK | O_SHLOCK)) 996 cap_rights_set(rightsp, CAP_FLOCK); 997 } 998 999 /* 1000 * Check permissions, allocate an open file structure, and call the device 1001 * open routine if any. 1002 */ 1003 #ifndef _SYS_SYSPROTO_H_ 1004 struct open_args { 1005 char *path; 1006 int flags; 1007 int mode; 1008 }; 1009 #endif 1010 int 1011 sys_open(td, uap) 1012 struct thread *td; 1013 register struct open_args /* { 1014 char *path; 1015 int flags; 1016 int mode; 1017 } */ *uap; 1018 { 1019 1020 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1021 } 1022 1023 #ifndef _SYS_SYSPROTO_H_ 1024 struct openat_args { 1025 int fd; 1026 char *path; 1027 int flag; 1028 int mode; 1029 }; 1030 #endif 1031 int 1032 sys_openat(struct thread *td, struct openat_args *uap) 1033 { 1034 1035 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1036 uap->mode)); 1037 } 1038 1039 int 1040 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1041 int mode) 1042 { 1043 1044 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1045 } 1046 1047 int 1048 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1049 int flags, int mode) 1050 { 1051 struct proc *p = td->td_proc; 1052 struct filedesc *fdp = p->p_fd; 1053 struct file *fp; 1054 struct vnode *vp; 1055 struct nameidata nd; 1056 cap_rights_t rights; 1057 int cmode, error, indx; 1058 1059 indx = -1; 1060 1061 AUDIT_ARG_FFLAGS(flags); 1062 AUDIT_ARG_MODE(mode); 1063 /* XXX: audit dirfd */ 1064 cap_rights_init(&rights, CAP_LOOKUP); 1065 flags_to_rights(flags, &rights); 1066 /* 1067 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1068 * may be specified. 1069 */ 1070 if (flags & O_EXEC) { 1071 if (flags & O_ACCMODE) 1072 return (EINVAL); 1073 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1074 return (EINVAL); 1075 } else { 1076 flags = FFLAGS(flags); 1077 } 1078 1079 /* 1080 * Allocate the file descriptor, but don't install a descriptor yet. 1081 */ 1082 error = falloc_noinstall(td, &fp); 1083 if (error != 0) 1084 return (error); 1085 /* 1086 * An extra reference on `fp' has been held for us by 1087 * falloc_noinstall(). 1088 */ 1089 /* Set the flags early so the finit in devfs can pick them up. */ 1090 fp->f_flag = flags & FMASK; 1091 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1092 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1093 &rights, td); 1094 td->td_dupfd = -1; /* XXX check for fdopen */ 1095 error = vn_open(&nd, &flags, cmode, fp); 1096 if (error != 0) { 1097 /* 1098 * If the vn_open replaced the method vector, something 1099 * wonderous happened deep below and we just pass it up 1100 * pretending we know what we do. 1101 */ 1102 if (error == ENXIO && fp->f_ops != &badfileops) 1103 goto success; 1104 1105 /* 1106 * Handle special fdopen() case. bleh. 1107 * 1108 * Don't do this for relative (capability) lookups; we don't 1109 * understand exactly what would happen, and we don't think 1110 * that it ever should. 1111 */ 1112 if (nd.ni_strictrelative == 0 && 1113 (error == ENODEV || error == ENXIO) && 1114 td->td_dupfd >= 0) { 1115 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1116 &indx); 1117 if (error == 0) 1118 goto success; 1119 } 1120 1121 goto bad; 1122 } 1123 td->td_dupfd = 0; 1124 NDFREE(&nd, NDF_ONLY_PNBUF); 1125 vp = nd.ni_vp; 1126 1127 /* 1128 * Store the vnode, for any f_type. Typically, the vnode use 1129 * count is decremented by direct call to vn_closefile() for 1130 * files that switched type in the cdevsw fdopen() method. 1131 */ 1132 fp->f_vnode = vp; 1133 /* 1134 * If the file wasn't claimed by devfs bind it to the normal 1135 * vnode operations here. 1136 */ 1137 if (fp->f_ops == &badfileops) { 1138 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1139 fp->f_seqcount = 1; 1140 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1141 DTYPE_VNODE, vp, &vnops); 1142 } 1143 1144 VOP_UNLOCK(vp, 0); 1145 if (flags & O_TRUNC) { 1146 error = fo_truncate(fp, 0, td->td_ucred, td); 1147 if (error != 0) 1148 goto bad; 1149 } 1150 success: 1151 /* 1152 * If we haven't already installed the FD (for dupfdopen), do so now. 1153 */ 1154 if (indx == -1) { 1155 struct filecaps *fcaps; 1156 1157 #ifdef CAPABILITIES 1158 if (nd.ni_strictrelative == 1) 1159 fcaps = &nd.ni_filecaps; 1160 else 1161 #endif 1162 fcaps = NULL; 1163 error = finstall(td, fp, &indx, flags, fcaps); 1164 /* On success finstall() consumes fcaps. */ 1165 if (error != 0) { 1166 filecaps_free(&nd.ni_filecaps); 1167 goto bad; 1168 } 1169 } else { 1170 filecaps_free(&nd.ni_filecaps); 1171 } 1172 1173 /* 1174 * Release our private reference, leaving the one associated with 1175 * the descriptor table intact. 1176 */ 1177 fdrop(fp, td); 1178 td->td_retval[0] = indx; 1179 return (0); 1180 bad: 1181 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1182 fdrop(fp, td); 1183 return (error); 1184 } 1185 1186 #ifdef COMPAT_43 1187 /* 1188 * Create a file. 1189 */ 1190 #ifndef _SYS_SYSPROTO_H_ 1191 struct ocreat_args { 1192 char *path; 1193 int mode; 1194 }; 1195 #endif 1196 int 1197 ocreat(td, uap) 1198 struct thread *td; 1199 register struct ocreat_args /* { 1200 char *path; 1201 int mode; 1202 } */ *uap; 1203 { 1204 1205 return (kern_open(td, uap->path, UIO_USERSPACE, 1206 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1207 } 1208 #endif /* COMPAT_43 */ 1209 1210 /* 1211 * Create a special file. 1212 */ 1213 #ifndef _SYS_SYSPROTO_H_ 1214 struct mknod_args { 1215 char *path; 1216 int mode; 1217 int dev; 1218 }; 1219 #endif 1220 int 1221 sys_mknod(td, uap) 1222 struct thread *td; 1223 register struct mknod_args /* { 1224 char *path; 1225 int mode; 1226 int dev; 1227 } */ *uap; 1228 { 1229 1230 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1231 } 1232 1233 #ifndef _SYS_SYSPROTO_H_ 1234 struct mknodat_args { 1235 int fd; 1236 char *path; 1237 mode_t mode; 1238 dev_t dev; 1239 }; 1240 #endif 1241 int 1242 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1243 { 1244 1245 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1246 uap->dev)); 1247 } 1248 1249 int 1250 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1251 int dev) 1252 { 1253 1254 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1255 } 1256 1257 int 1258 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1259 int mode, int dev) 1260 { 1261 struct vnode *vp; 1262 struct mount *mp; 1263 struct vattr vattr; 1264 struct nameidata nd; 1265 cap_rights_t rights; 1266 int error, whiteout = 0; 1267 1268 AUDIT_ARG_MODE(mode); 1269 AUDIT_ARG_DEV(dev); 1270 switch (mode & S_IFMT) { 1271 case S_IFCHR: 1272 case S_IFBLK: 1273 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1274 break; 1275 case S_IFMT: 1276 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1277 break; 1278 case S_IFWHT: 1279 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1280 break; 1281 case S_IFIFO: 1282 if (dev == 0) 1283 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1284 /* FALLTHROUGH */ 1285 default: 1286 error = EINVAL; 1287 break; 1288 } 1289 if (error != 0) 1290 return (error); 1291 restart: 1292 bwillwrite(); 1293 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1294 pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td); 1295 if ((error = namei(&nd)) != 0) 1296 return (error); 1297 vp = nd.ni_vp; 1298 if (vp != NULL) { 1299 NDFREE(&nd, NDF_ONLY_PNBUF); 1300 if (vp == nd.ni_dvp) 1301 vrele(nd.ni_dvp); 1302 else 1303 vput(nd.ni_dvp); 1304 vrele(vp); 1305 return (EEXIST); 1306 } else { 1307 VATTR_NULL(&vattr); 1308 vattr.va_mode = (mode & ALLPERMS) & 1309 ~td->td_proc->p_fd->fd_cmask; 1310 vattr.va_rdev = dev; 1311 whiteout = 0; 1312 1313 switch (mode & S_IFMT) { 1314 case S_IFMT: /* used by badsect to flag bad sectors */ 1315 vattr.va_type = VBAD; 1316 break; 1317 case S_IFCHR: 1318 vattr.va_type = VCHR; 1319 break; 1320 case S_IFBLK: 1321 vattr.va_type = VBLK; 1322 break; 1323 case S_IFWHT: 1324 whiteout = 1; 1325 break; 1326 default: 1327 panic("kern_mknod: invalid mode"); 1328 } 1329 } 1330 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1331 NDFREE(&nd, NDF_ONLY_PNBUF); 1332 vput(nd.ni_dvp); 1333 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1334 return (error); 1335 goto restart; 1336 } 1337 #ifdef MAC 1338 if (error == 0 && !whiteout) 1339 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1340 &nd.ni_cnd, &vattr); 1341 #endif 1342 if (error == 0) { 1343 if (whiteout) 1344 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1345 else { 1346 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1347 &nd.ni_cnd, &vattr); 1348 if (error == 0) 1349 vput(nd.ni_vp); 1350 } 1351 } 1352 NDFREE(&nd, NDF_ONLY_PNBUF); 1353 vput(nd.ni_dvp); 1354 vn_finished_write(mp); 1355 return (error); 1356 } 1357 1358 /* 1359 * Create a named pipe. 1360 */ 1361 #ifndef _SYS_SYSPROTO_H_ 1362 struct mkfifo_args { 1363 char *path; 1364 int mode; 1365 }; 1366 #endif 1367 int 1368 sys_mkfifo(td, uap) 1369 struct thread *td; 1370 register struct mkfifo_args /* { 1371 char *path; 1372 int mode; 1373 } */ *uap; 1374 { 1375 1376 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1377 } 1378 1379 #ifndef _SYS_SYSPROTO_H_ 1380 struct mkfifoat_args { 1381 int fd; 1382 char *path; 1383 mode_t mode; 1384 }; 1385 #endif 1386 int 1387 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1388 { 1389 1390 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1391 uap->mode)); 1392 } 1393 1394 int 1395 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1396 { 1397 1398 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1399 } 1400 1401 int 1402 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1403 int mode) 1404 { 1405 struct mount *mp; 1406 struct vattr vattr; 1407 struct nameidata nd; 1408 cap_rights_t rights; 1409 int error; 1410 1411 AUDIT_ARG_MODE(mode); 1412 restart: 1413 bwillwrite(); 1414 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1415 pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td); 1416 if ((error = namei(&nd)) != 0) 1417 return (error); 1418 if (nd.ni_vp != NULL) { 1419 NDFREE(&nd, NDF_ONLY_PNBUF); 1420 if (nd.ni_vp == nd.ni_dvp) 1421 vrele(nd.ni_dvp); 1422 else 1423 vput(nd.ni_dvp); 1424 vrele(nd.ni_vp); 1425 return (EEXIST); 1426 } 1427 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1428 NDFREE(&nd, NDF_ONLY_PNBUF); 1429 vput(nd.ni_dvp); 1430 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1431 return (error); 1432 goto restart; 1433 } 1434 VATTR_NULL(&vattr); 1435 vattr.va_type = VFIFO; 1436 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1437 #ifdef MAC 1438 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1439 &vattr); 1440 if (error != 0) 1441 goto out; 1442 #endif 1443 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1444 if (error == 0) 1445 vput(nd.ni_vp); 1446 #ifdef MAC 1447 out: 1448 #endif 1449 vput(nd.ni_dvp); 1450 vn_finished_write(mp); 1451 NDFREE(&nd, NDF_ONLY_PNBUF); 1452 return (error); 1453 } 1454 1455 /* 1456 * Make a hard file link. 1457 */ 1458 #ifndef _SYS_SYSPROTO_H_ 1459 struct link_args { 1460 char *path; 1461 char *link; 1462 }; 1463 #endif 1464 int 1465 sys_link(td, uap) 1466 struct thread *td; 1467 register struct link_args /* { 1468 char *path; 1469 char *link; 1470 } */ *uap; 1471 { 1472 1473 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1474 } 1475 1476 #ifndef _SYS_SYSPROTO_H_ 1477 struct linkat_args { 1478 int fd1; 1479 char *path1; 1480 int fd2; 1481 char *path2; 1482 int flag; 1483 }; 1484 #endif 1485 int 1486 sys_linkat(struct thread *td, struct linkat_args *uap) 1487 { 1488 int flag; 1489 1490 flag = uap->flag; 1491 if (flag & ~AT_SYMLINK_FOLLOW) 1492 return (EINVAL); 1493 1494 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1495 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1496 } 1497 1498 int hardlink_check_uid = 0; 1499 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1500 &hardlink_check_uid, 0, 1501 "Unprivileged processes cannot create hard links to files owned by other " 1502 "users"); 1503 static int hardlink_check_gid = 0; 1504 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1505 &hardlink_check_gid, 0, 1506 "Unprivileged processes cannot create hard links to files owned by other " 1507 "groups"); 1508 1509 static int 1510 can_hardlink(struct vnode *vp, struct ucred *cred) 1511 { 1512 struct vattr va; 1513 int error; 1514 1515 if (!hardlink_check_uid && !hardlink_check_gid) 1516 return (0); 1517 1518 error = VOP_GETATTR(vp, &va, cred); 1519 if (error != 0) 1520 return (error); 1521 1522 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1523 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1524 if (error != 0) 1525 return (error); 1526 } 1527 1528 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1529 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1530 if (error != 0) 1531 return (error); 1532 } 1533 1534 return (0); 1535 } 1536 1537 int 1538 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1539 { 1540 1541 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1542 } 1543 1544 int 1545 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1546 enum uio_seg segflg, int follow) 1547 { 1548 struct vnode *vp; 1549 struct mount *mp; 1550 struct nameidata nd; 1551 cap_rights_t rights; 1552 int error; 1553 1554 again: 1555 bwillwrite(); 1556 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1557 1558 if ((error = namei(&nd)) != 0) 1559 return (error); 1560 NDFREE(&nd, NDF_ONLY_PNBUF); 1561 vp = nd.ni_vp; 1562 if (vp->v_type == VDIR) { 1563 vrele(vp); 1564 return (EPERM); /* POSIX */ 1565 } 1566 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, 1567 segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td); 1568 if ((error = namei(&nd)) == 0) { 1569 if (nd.ni_vp != NULL) { 1570 NDFREE(&nd, NDF_ONLY_PNBUF); 1571 if (nd.ni_dvp == nd.ni_vp) 1572 vrele(nd.ni_dvp); 1573 else 1574 vput(nd.ni_dvp); 1575 vrele(nd.ni_vp); 1576 vrele(vp); 1577 return (EEXIST); 1578 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1579 /* 1580 * Cross-device link. No need to recheck 1581 * vp->v_type, since it cannot change, except 1582 * to VBAD. 1583 */ 1584 NDFREE(&nd, NDF_ONLY_PNBUF); 1585 vput(nd.ni_dvp); 1586 vrele(vp); 1587 return (EXDEV); 1588 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1589 error = can_hardlink(vp, td->td_ucred); 1590 #ifdef MAC 1591 if (error == 0) 1592 error = mac_vnode_check_link(td->td_ucred, 1593 nd.ni_dvp, vp, &nd.ni_cnd); 1594 #endif 1595 if (error != 0) { 1596 vput(vp); 1597 vput(nd.ni_dvp); 1598 NDFREE(&nd, NDF_ONLY_PNBUF); 1599 return (error); 1600 } 1601 error = vn_start_write(vp, &mp, V_NOWAIT); 1602 if (error != 0) { 1603 vput(vp); 1604 vput(nd.ni_dvp); 1605 NDFREE(&nd, NDF_ONLY_PNBUF); 1606 error = vn_start_write(NULL, &mp, 1607 V_XSLEEP | PCATCH); 1608 if (error != 0) 1609 return (error); 1610 goto again; 1611 } 1612 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1613 VOP_UNLOCK(vp, 0); 1614 vput(nd.ni_dvp); 1615 vn_finished_write(mp); 1616 NDFREE(&nd, NDF_ONLY_PNBUF); 1617 } else { 1618 vput(nd.ni_dvp); 1619 NDFREE(&nd, NDF_ONLY_PNBUF); 1620 vrele(vp); 1621 goto again; 1622 } 1623 } 1624 vrele(vp); 1625 return (error); 1626 } 1627 1628 /* 1629 * Make a symbolic link. 1630 */ 1631 #ifndef _SYS_SYSPROTO_H_ 1632 struct symlink_args { 1633 char *path; 1634 char *link; 1635 }; 1636 #endif 1637 int 1638 sys_symlink(td, uap) 1639 struct thread *td; 1640 register struct symlink_args /* { 1641 char *path; 1642 char *link; 1643 } */ *uap; 1644 { 1645 1646 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1647 } 1648 1649 #ifndef _SYS_SYSPROTO_H_ 1650 struct symlinkat_args { 1651 char *path; 1652 int fd; 1653 char *path2; 1654 }; 1655 #endif 1656 int 1657 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1658 { 1659 1660 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1661 UIO_USERSPACE)); 1662 } 1663 1664 int 1665 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1666 { 1667 1668 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1669 } 1670 1671 int 1672 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1673 enum uio_seg segflg) 1674 { 1675 struct mount *mp; 1676 struct vattr vattr; 1677 char *syspath; 1678 struct nameidata nd; 1679 int error; 1680 cap_rights_t rights; 1681 1682 if (segflg == UIO_SYSSPACE) { 1683 syspath = path1; 1684 } else { 1685 syspath = uma_zalloc(namei_zone, M_WAITOK); 1686 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1687 goto out; 1688 } 1689 AUDIT_ARG_TEXT(syspath); 1690 restart: 1691 bwillwrite(); 1692 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1693 segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td); 1694 if ((error = namei(&nd)) != 0) 1695 goto out; 1696 if (nd.ni_vp) { 1697 NDFREE(&nd, NDF_ONLY_PNBUF); 1698 if (nd.ni_vp == nd.ni_dvp) 1699 vrele(nd.ni_dvp); 1700 else 1701 vput(nd.ni_dvp); 1702 vrele(nd.ni_vp); 1703 error = EEXIST; 1704 goto out; 1705 } 1706 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1707 NDFREE(&nd, NDF_ONLY_PNBUF); 1708 vput(nd.ni_dvp); 1709 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1710 goto out; 1711 goto restart; 1712 } 1713 VATTR_NULL(&vattr); 1714 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1715 #ifdef MAC 1716 vattr.va_type = VLNK; 1717 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1718 &vattr); 1719 if (error != 0) 1720 goto out2; 1721 #endif 1722 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1723 if (error == 0) 1724 vput(nd.ni_vp); 1725 #ifdef MAC 1726 out2: 1727 #endif 1728 NDFREE(&nd, NDF_ONLY_PNBUF); 1729 vput(nd.ni_dvp); 1730 vn_finished_write(mp); 1731 out: 1732 if (segflg != UIO_SYSSPACE) 1733 uma_zfree(namei_zone, syspath); 1734 return (error); 1735 } 1736 1737 /* 1738 * Delete a whiteout from the filesystem. 1739 */ 1740 int 1741 sys_undelete(td, uap) 1742 struct thread *td; 1743 register struct undelete_args /* { 1744 char *path; 1745 } */ *uap; 1746 { 1747 struct mount *mp; 1748 struct nameidata nd; 1749 int error; 1750 1751 restart: 1752 bwillwrite(); 1753 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1754 UIO_USERSPACE, uap->path, td); 1755 error = namei(&nd); 1756 if (error != 0) 1757 return (error); 1758 1759 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1760 NDFREE(&nd, NDF_ONLY_PNBUF); 1761 if (nd.ni_vp == nd.ni_dvp) 1762 vrele(nd.ni_dvp); 1763 else 1764 vput(nd.ni_dvp); 1765 if (nd.ni_vp) 1766 vrele(nd.ni_vp); 1767 return (EEXIST); 1768 } 1769 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1770 NDFREE(&nd, NDF_ONLY_PNBUF); 1771 vput(nd.ni_dvp); 1772 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1773 return (error); 1774 goto restart; 1775 } 1776 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1777 NDFREE(&nd, NDF_ONLY_PNBUF); 1778 vput(nd.ni_dvp); 1779 vn_finished_write(mp); 1780 return (error); 1781 } 1782 1783 /* 1784 * Delete a name from the filesystem. 1785 */ 1786 #ifndef _SYS_SYSPROTO_H_ 1787 struct unlink_args { 1788 char *path; 1789 }; 1790 #endif 1791 int 1792 sys_unlink(td, uap) 1793 struct thread *td; 1794 struct unlink_args /* { 1795 char *path; 1796 } */ *uap; 1797 { 1798 1799 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1800 } 1801 1802 #ifndef _SYS_SYSPROTO_H_ 1803 struct unlinkat_args { 1804 int fd; 1805 char *path; 1806 int flag; 1807 }; 1808 #endif 1809 int 1810 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1811 { 1812 int flag = uap->flag; 1813 int fd = uap->fd; 1814 char *path = uap->path; 1815 1816 if (flag & ~AT_REMOVEDIR) 1817 return (EINVAL); 1818 1819 if (flag & AT_REMOVEDIR) 1820 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1821 else 1822 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1823 } 1824 1825 int 1826 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1827 { 1828 1829 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1830 } 1831 1832 int 1833 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1834 ino_t oldinum) 1835 { 1836 struct mount *mp; 1837 struct vnode *vp; 1838 struct nameidata nd; 1839 struct stat sb; 1840 cap_rights_t rights; 1841 int error; 1842 1843 restart: 1844 bwillwrite(); 1845 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1846 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1847 if ((error = namei(&nd)) != 0) 1848 return (error == EINVAL ? EPERM : error); 1849 vp = nd.ni_vp; 1850 if (vp->v_type == VDIR && oldinum == 0) { 1851 error = EPERM; /* POSIX */ 1852 } else if (oldinum != 0 && 1853 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1854 sb.st_ino != oldinum) { 1855 error = EIDRM; /* Identifier removed */ 1856 } else { 1857 /* 1858 * The root of a mounted filesystem cannot be deleted. 1859 * 1860 * XXX: can this only be a VDIR case? 1861 */ 1862 if (vp->v_vflag & VV_ROOT) 1863 error = EBUSY; 1864 } 1865 if (error == 0) { 1866 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1867 NDFREE(&nd, NDF_ONLY_PNBUF); 1868 vput(nd.ni_dvp); 1869 if (vp == nd.ni_dvp) 1870 vrele(vp); 1871 else 1872 vput(vp); 1873 if ((error = vn_start_write(NULL, &mp, 1874 V_XSLEEP | PCATCH)) != 0) 1875 return (error); 1876 goto restart; 1877 } 1878 #ifdef MAC 1879 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1880 &nd.ni_cnd); 1881 if (error != 0) 1882 goto out; 1883 #endif 1884 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1885 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1886 #ifdef MAC 1887 out: 1888 #endif 1889 vn_finished_write(mp); 1890 } 1891 NDFREE(&nd, NDF_ONLY_PNBUF); 1892 vput(nd.ni_dvp); 1893 if (vp == nd.ni_dvp) 1894 vrele(vp); 1895 else 1896 vput(vp); 1897 return (error); 1898 } 1899 1900 /* 1901 * Reposition read/write file offset. 1902 */ 1903 #ifndef _SYS_SYSPROTO_H_ 1904 struct lseek_args { 1905 int fd; 1906 int pad; 1907 off_t offset; 1908 int whence; 1909 }; 1910 #endif 1911 int 1912 sys_lseek(td, uap) 1913 struct thread *td; 1914 register struct lseek_args /* { 1915 int fd; 1916 int pad; 1917 off_t offset; 1918 int whence; 1919 } */ *uap; 1920 { 1921 struct file *fp; 1922 cap_rights_t rights; 1923 int error; 1924 1925 AUDIT_ARG_FD(uap->fd); 1926 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1927 if (error != 0) 1928 return (error); 1929 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1930 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1931 fdrop(fp, td); 1932 return (error); 1933 } 1934 1935 #if defined(COMPAT_43) 1936 /* 1937 * Reposition read/write file offset. 1938 */ 1939 #ifndef _SYS_SYSPROTO_H_ 1940 struct olseek_args { 1941 int fd; 1942 long offset; 1943 int whence; 1944 }; 1945 #endif 1946 int 1947 olseek(td, uap) 1948 struct thread *td; 1949 register struct olseek_args /* { 1950 int fd; 1951 long offset; 1952 int whence; 1953 } */ *uap; 1954 { 1955 struct lseek_args /* { 1956 int fd; 1957 int pad; 1958 off_t offset; 1959 int whence; 1960 } */ nuap; 1961 1962 nuap.fd = uap->fd; 1963 nuap.offset = uap->offset; 1964 nuap.whence = uap->whence; 1965 return (sys_lseek(td, &nuap)); 1966 } 1967 #endif /* COMPAT_43 */ 1968 1969 /* Version with the 'pad' argument */ 1970 int 1971 freebsd6_lseek(td, uap) 1972 struct thread *td; 1973 register struct freebsd6_lseek_args *uap; 1974 { 1975 struct lseek_args ouap; 1976 1977 ouap.fd = uap->fd; 1978 ouap.offset = uap->offset; 1979 ouap.whence = uap->whence; 1980 return (sys_lseek(td, &ouap)); 1981 } 1982 1983 /* 1984 * Check access permissions using passed credentials. 1985 */ 1986 static int 1987 vn_access(vp, user_flags, cred, td) 1988 struct vnode *vp; 1989 int user_flags; 1990 struct ucred *cred; 1991 struct thread *td; 1992 { 1993 accmode_t accmode; 1994 int error; 1995 1996 /* Flags == 0 means only check for existence. */ 1997 if (user_flags == 0) 1998 return (0); 1999 2000 accmode = 0; 2001 if (user_flags & R_OK) 2002 accmode |= VREAD; 2003 if (user_flags & W_OK) 2004 accmode |= VWRITE; 2005 if (user_flags & X_OK) 2006 accmode |= VEXEC; 2007 #ifdef MAC 2008 error = mac_vnode_check_access(cred, vp, accmode); 2009 if (error != 0) 2010 return (error); 2011 #endif 2012 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2013 error = VOP_ACCESS(vp, accmode, cred, td); 2014 return (error); 2015 } 2016 2017 /* 2018 * Check access permissions using "real" credentials. 2019 */ 2020 #ifndef _SYS_SYSPROTO_H_ 2021 struct access_args { 2022 char *path; 2023 int amode; 2024 }; 2025 #endif 2026 int 2027 sys_access(td, uap) 2028 struct thread *td; 2029 register struct access_args /* { 2030 char *path; 2031 int amode; 2032 } */ *uap; 2033 { 2034 2035 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2036 } 2037 2038 #ifndef _SYS_SYSPROTO_H_ 2039 struct faccessat_args { 2040 int dirfd; 2041 char *path; 2042 int amode; 2043 int flag; 2044 } 2045 #endif 2046 int 2047 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2048 { 2049 2050 if (uap->flag & ~AT_EACCESS) 2051 return (EINVAL); 2052 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2053 uap->amode)); 2054 } 2055 2056 int 2057 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2058 { 2059 2060 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2061 } 2062 2063 int 2064 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2065 int flag, int amode) 2066 { 2067 struct ucred *cred, *usecred; 2068 struct vnode *vp; 2069 struct nameidata nd; 2070 cap_rights_t rights; 2071 int error; 2072 2073 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2074 return (EINVAL); 2075 2076 /* 2077 * Create and modify a temporary credential instead of one that 2078 * is potentially shared (if we need one). 2079 */ 2080 cred = td->td_ucred; 2081 if ((flag & AT_EACCESS) == 0 && 2082 ((cred->cr_uid != cred->cr_ruid || 2083 cred->cr_rgid != cred->cr_groups[0]))) { 2084 usecred = crdup(cred); 2085 usecred->cr_uid = cred->cr_ruid; 2086 usecred->cr_groups[0] = cred->cr_rgid; 2087 td->td_ucred = usecred; 2088 } else 2089 usecred = cred; 2090 AUDIT_ARG_VALUE(amode); 2091 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2092 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2093 td); 2094 if ((error = namei(&nd)) != 0) 2095 goto out; 2096 vp = nd.ni_vp; 2097 2098 error = vn_access(vp, amode, usecred, td); 2099 NDFREE(&nd, NDF_ONLY_PNBUF); 2100 vput(vp); 2101 out: 2102 if (usecred != cred) { 2103 td->td_ucred = cred; 2104 crfree(usecred); 2105 } 2106 return (error); 2107 } 2108 2109 /* 2110 * Check access permissions using "effective" credentials. 2111 */ 2112 #ifndef _SYS_SYSPROTO_H_ 2113 struct eaccess_args { 2114 char *path; 2115 int amode; 2116 }; 2117 #endif 2118 int 2119 sys_eaccess(td, uap) 2120 struct thread *td; 2121 register struct eaccess_args /* { 2122 char *path; 2123 int amode; 2124 } */ *uap; 2125 { 2126 2127 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2128 } 2129 2130 int 2131 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2132 { 2133 2134 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2135 } 2136 2137 #if defined(COMPAT_43) 2138 /* 2139 * Get file status; this version follows links. 2140 */ 2141 #ifndef _SYS_SYSPROTO_H_ 2142 struct ostat_args { 2143 char *path; 2144 struct ostat *ub; 2145 }; 2146 #endif 2147 int 2148 ostat(td, uap) 2149 struct thread *td; 2150 register struct ostat_args /* { 2151 char *path; 2152 struct ostat *ub; 2153 } */ *uap; 2154 { 2155 struct stat sb; 2156 struct ostat osb; 2157 int error; 2158 2159 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2160 if (error != 0) 2161 return (error); 2162 cvtstat(&sb, &osb); 2163 return (copyout(&osb, uap->ub, sizeof (osb))); 2164 } 2165 2166 /* 2167 * Get file status; this version does not follow links. 2168 */ 2169 #ifndef _SYS_SYSPROTO_H_ 2170 struct olstat_args { 2171 char *path; 2172 struct ostat *ub; 2173 }; 2174 #endif 2175 int 2176 olstat(td, uap) 2177 struct thread *td; 2178 register struct olstat_args /* { 2179 char *path; 2180 struct ostat *ub; 2181 } */ *uap; 2182 { 2183 struct stat sb; 2184 struct ostat osb; 2185 int error; 2186 2187 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2188 if (error != 0) 2189 return (error); 2190 cvtstat(&sb, &osb); 2191 return (copyout(&osb, uap->ub, sizeof (osb))); 2192 } 2193 2194 /* 2195 * Convert from an old to a new stat structure. 2196 */ 2197 void 2198 cvtstat(st, ost) 2199 struct stat *st; 2200 struct ostat *ost; 2201 { 2202 2203 ost->st_dev = st->st_dev; 2204 ost->st_ino = st->st_ino; 2205 ost->st_mode = st->st_mode; 2206 ost->st_nlink = st->st_nlink; 2207 ost->st_uid = st->st_uid; 2208 ost->st_gid = st->st_gid; 2209 ost->st_rdev = st->st_rdev; 2210 if (st->st_size < (quad_t)1 << 32) 2211 ost->st_size = st->st_size; 2212 else 2213 ost->st_size = -2; 2214 ost->st_atim = st->st_atim; 2215 ost->st_mtim = st->st_mtim; 2216 ost->st_ctim = st->st_ctim; 2217 ost->st_blksize = st->st_blksize; 2218 ost->st_blocks = st->st_blocks; 2219 ost->st_flags = st->st_flags; 2220 ost->st_gen = st->st_gen; 2221 } 2222 #endif /* COMPAT_43 */ 2223 2224 /* 2225 * Get file status; this version follows links. 2226 */ 2227 #ifndef _SYS_SYSPROTO_H_ 2228 struct stat_args { 2229 char *path; 2230 struct stat *ub; 2231 }; 2232 #endif 2233 int 2234 sys_stat(td, uap) 2235 struct thread *td; 2236 register struct stat_args /* { 2237 char *path; 2238 struct stat *ub; 2239 } */ *uap; 2240 { 2241 struct stat sb; 2242 int error; 2243 2244 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2245 if (error == 0) 2246 error = copyout(&sb, uap->ub, sizeof (sb)); 2247 return (error); 2248 } 2249 2250 #ifndef _SYS_SYSPROTO_H_ 2251 struct fstatat_args { 2252 int fd; 2253 char *path; 2254 struct stat *buf; 2255 int flag; 2256 } 2257 #endif 2258 int 2259 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2260 { 2261 struct stat sb; 2262 int error; 2263 2264 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2265 UIO_USERSPACE, &sb); 2266 if (error == 0) 2267 error = copyout(&sb, uap->buf, sizeof (sb)); 2268 return (error); 2269 } 2270 2271 int 2272 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2273 { 2274 2275 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2276 } 2277 2278 int 2279 kern_statat(struct thread *td, int flag, int fd, char *path, 2280 enum uio_seg pathseg, struct stat *sbp) 2281 { 2282 2283 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2284 } 2285 2286 int 2287 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2288 enum uio_seg pathseg, struct stat *sbp, 2289 void (*hook)(struct vnode *vp, struct stat *sbp)) 2290 { 2291 struct nameidata nd; 2292 struct stat sb; 2293 cap_rights_t rights; 2294 int error; 2295 2296 if (flag & ~AT_SYMLINK_NOFOLLOW) 2297 return (EINVAL); 2298 2299 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2300 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2301 cap_rights_init(&rights, CAP_FSTAT), td); 2302 2303 if ((error = namei(&nd)) != 0) 2304 return (error); 2305 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2306 if (error == 0) { 2307 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2308 if (S_ISREG(sb.st_mode)) 2309 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2310 if (__predict_false(hook != NULL)) 2311 hook(nd.ni_vp, &sb); 2312 } 2313 NDFREE(&nd, NDF_ONLY_PNBUF); 2314 vput(nd.ni_vp); 2315 if (error != 0) 2316 return (error); 2317 *sbp = sb; 2318 #ifdef KTRACE 2319 if (KTRPOINT(td, KTR_STRUCT)) 2320 ktrstat(&sb); 2321 #endif 2322 return (0); 2323 } 2324 2325 /* 2326 * Get file status; this version does not follow links. 2327 */ 2328 #ifndef _SYS_SYSPROTO_H_ 2329 struct lstat_args { 2330 char *path; 2331 struct stat *ub; 2332 }; 2333 #endif 2334 int 2335 sys_lstat(td, uap) 2336 struct thread *td; 2337 register struct lstat_args /* { 2338 char *path; 2339 struct stat *ub; 2340 } */ *uap; 2341 { 2342 struct stat sb; 2343 int error; 2344 2345 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2346 if (error == 0) 2347 error = copyout(&sb, uap->ub, sizeof (sb)); 2348 return (error); 2349 } 2350 2351 int 2352 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2353 { 2354 2355 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2356 sbp)); 2357 } 2358 2359 /* 2360 * Implementation of the NetBSD [l]stat() functions. 2361 */ 2362 void 2363 cvtnstat(sb, nsb) 2364 struct stat *sb; 2365 struct nstat *nsb; 2366 { 2367 2368 bzero(nsb, sizeof *nsb); 2369 nsb->st_dev = sb->st_dev; 2370 nsb->st_ino = sb->st_ino; 2371 nsb->st_mode = sb->st_mode; 2372 nsb->st_nlink = sb->st_nlink; 2373 nsb->st_uid = sb->st_uid; 2374 nsb->st_gid = sb->st_gid; 2375 nsb->st_rdev = sb->st_rdev; 2376 nsb->st_atim = sb->st_atim; 2377 nsb->st_mtim = sb->st_mtim; 2378 nsb->st_ctim = sb->st_ctim; 2379 nsb->st_size = sb->st_size; 2380 nsb->st_blocks = sb->st_blocks; 2381 nsb->st_blksize = sb->st_blksize; 2382 nsb->st_flags = sb->st_flags; 2383 nsb->st_gen = sb->st_gen; 2384 nsb->st_birthtim = sb->st_birthtim; 2385 } 2386 2387 #ifndef _SYS_SYSPROTO_H_ 2388 struct nstat_args { 2389 char *path; 2390 struct nstat *ub; 2391 }; 2392 #endif 2393 int 2394 sys_nstat(td, uap) 2395 struct thread *td; 2396 register struct nstat_args /* { 2397 char *path; 2398 struct nstat *ub; 2399 } */ *uap; 2400 { 2401 struct stat sb; 2402 struct nstat nsb; 2403 int error; 2404 2405 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2406 if (error != 0) 2407 return (error); 2408 cvtnstat(&sb, &nsb); 2409 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2410 } 2411 2412 /* 2413 * NetBSD lstat. Get file status; this version does not follow links. 2414 */ 2415 #ifndef _SYS_SYSPROTO_H_ 2416 struct lstat_args { 2417 char *path; 2418 struct stat *ub; 2419 }; 2420 #endif 2421 int 2422 sys_nlstat(td, uap) 2423 struct thread *td; 2424 register struct nlstat_args /* { 2425 char *path; 2426 struct nstat *ub; 2427 } */ *uap; 2428 { 2429 struct stat sb; 2430 struct nstat nsb; 2431 int error; 2432 2433 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2434 if (error != 0) 2435 return (error); 2436 cvtnstat(&sb, &nsb); 2437 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2438 } 2439 2440 /* 2441 * Get configurable pathname variables. 2442 */ 2443 #ifndef _SYS_SYSPROTO_H_ 2444 struct pathconf_args { 2445 char *path; 2446 int name; 2447 }; 2448 #endif 2449 int 2450 sys_pathconf(td, uap) 2451 struct thread *td; 2452 register struct pathconf_args /* { 2453 char *path; 2454 int name; 2455 } */ *uap; 2456 { 2457 2458 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2459 } 2460 2461 #ifndef _SYS_SYSPROTO_H_ 2462 struct lpathconf_args { 2463 char *path; 2464 int name; 2465 }; 2466 #endif 2467 int 2468 sys_lpathconf(td, uap) 2469 struct thread *td; 2470 register struct lpathconf_args /* { 2471 char *path; 2472 int name; 2473 } */ *uap; 2474 { 2475 2476 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2477 NOFOLLOW)); 2478 } 2479 2480 int 2481 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2482 u_long flags) 2483 { 2484 struct nameidata nd; 2485 int error; 2486 2487 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2488 pathseg, path, td); 2489 if ((error = namei(&nd)) != 0) 2490 return (error); 2491 NDFREE(&nd, NDF_ONLY_PNBUF); 2492 2493 /* If asynchronous I/O is available, it works for all files. */ 2494 if (name == _PC_ASYNC_IO) 2495 td->td_retval[0] = async_io_version; 2496 else 2497 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2498 vput(nd.ni_vp); 2499 return (error); 2500 } 2501 2502 /* 2503 * Return target name of a symbolic link. 2504 */ 2505 #ifndef _SYS_SYSPROTO_H_ 2506 struct readlink_args { 2507 char *path; 2508 char *buf; 2509 size_t count; 2510 }; 2511 #endif 2512 int 2513 sys_readlink(td, uap) 2514 struct thread *td; 2515 register struct readlink_args /* { 2516 char *path; 2517 char *buf; 2518 size_t count; 2519 } */ *uap; 2520 { 2521 2522 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2523 UIO_USERSPACE, uap->count)); 2524 } 2525 #ifndef _SYS_SYSPROTO_H_ 2526 struct readlinkat_args { 2527 int fd; 2528 char *path; 2529 char *buf; 2530 size_t bufsize; 2531 }; 2532 #endif 2533 int 2534 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2535 { 2536 2537 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2538 uap->buf, UIO_USERSPACE, uap->bufsize)); 2539 } 2540 2541 int 2542 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2543 enum uio_seg bufseg, size_t count) 2544 { 2545 2546 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2547 count)); 2548 } 2549 2550 int 2551 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2552 char *buf, enum uio_seg bufseg, size_t count) 2553 { 2554 struct vnode *vp; 2555 struct iovec aiov; 2556 struct uio auio; 2557 struct nameidata nd; 2558 int error; 2559 2560 if (count > IOSIZE_MAX) 2561 return (EINVAL); 2562 2563 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2564 pathseg, path, fd, td); 2565 2566 if ((error = namei(&nd)) != 0) 2567 return (error); 2568 NDFREE(&nd, NDF_ONLY_PNBUF); 2569 vp = nd.ni_vp; 2570 #ifdef MAC 2571 error = mac_vnode_check_readlink(td->td_ucred, vp); 2572 if (error != 0) { 2573 vput(vp); 2574 return (error); 2575 } 2576 #endif 2577 if (vp->v_type != VLNK) 2578 error = EINVAL; 2579 else { 2580 aiov.iov_base = buf; 2581 aiov.iov_len = count; 2582 auio.uio_iov = &aiov; 2583 auio.uio_iovcnt = 1; 2584 auio.uio_offset = 0; 2585 auio.uio_rw = UIO_READ; 2586 auio.uio_segflg = bufseg; 2587 auio.uio_td = td; 2588 auio.uio_resid = count; 2589 error = VOP_READLINK(vp, &auio, td->td_ucred); 2590 td->td_retval[0] = count - auio.uio_resid; 2591 } 2592 vput(vp); 2593 return (error); 2594 } 2595 2596 /* 2597 * Common implementation code for chflags() and fchflags(). 2598 */ 2599 static int 2600 setfflags(td, vp, flags) 2601 struct thread *td; 2602 struct vnode *vp; 2603 u_long flags; 2604 { 2605 struct mount *mp; 2606 struct vattr vattr; 2607 int error; 2608 2609 /* We can't support the value matching VNOVAL. */ 2610 if (flags == VNOVAL) 2611 return (EOPNOTSUPP); 2612 2613 /* 2614 * Prevent non-root users from setting flags on devices. When 2615 * a device is reused, users can retain ownership of the device 2616 * if they are allowed to set flags and programs assume that 2617 * chown can't fail when done as root. 2618 */ 2619 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2620 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2621 if (error != 0) 2622 return (error); 2623 } 2624 2625 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2626 return (error); 2627 VATTR_NULL(&vattr); 2628 vattr.va_flags = flags; 2629 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2630 #ifdef MAC 2631 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2632 if (error == 0) 2633 #endif 2634 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2635 VOP_UNLOCK(vp, 0); 2636 vn_finished_write(mp); 2637 return (error); 2638 } 2639 2640 /* 2641 * Change flags of a file given a path name. 2642 */ 2643 #ifndef _SYS_SYSPROTO_H_ 2644 struct chflags_args { 2645 const char *path; 2646 u_long flags; 2647 }; 2648 #endif 2649 int 2650 sys_chflags(td, uap) 2651 struct thread *td; 2652 register struct chflags_args /* { 2653 const char *path; 2654 u_long flags; 2655 } */ *uap; 2656 { 2657 2658 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2659 } 2660 2661 #ifndef _SYS_SYSPROTO_H_ 2662 struct chflagsat_args { 2663 int fd; 2664 const char *path; 2665 u_long flags; 2666 int atflag; 2667 } 2668 #endif 2669 int 2670 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2671 { 2672 int fd = uap->fd; 2673 const char *path = uap->path; 2674 u_long flags = uap->flags; 2675 int atflag = uap->atflag; 2676 2677 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2678 return (EINVAL); 2679 2680 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2681 } 2682 2683 static int 2684 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2685 u_long flags) 2686 { 2687 2688 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2689 } 2690 2691 /* 2692 * Same as chflags() but doesn't follow symlinks. 2693 */ 2694 int 2695 sys_lchflags(td, uap) 2696 struct thread *td; 2697 register struct lchflags_args /* { 2698 const char *path; 2699 u_long flags; 2700 } */ *uap; 2701 { 2702 2703 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2704 uap->flags, AT_SYMLINK_NOFOLLOW)); 2705 } 2706 2707 static int 2708 kern_chflagsat(struct thread *td, int fd, const char *path, 2709 enum uio_seg pathseg, u_long flags, int atflag) 2710 { 2711 struct nameidata nd; 2712 cap_rights_t rights; 2713 int error, follow; 2714 2715 AUDIT_ARG_FFLAGS(flags); 2716 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2717 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2718 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2719 if ((error = namei(&nd)) != 0) 2720 return (error); 2721 NDFREE(&nd, NDF_ONLY_PNBUF); 2722 error = setfflags(td, nd.ni_vp, flags); 2723 vrele(nd.ni_vp); 2724 return (error); 2725 } 2726 2727 /* 2728 * Change flags of a file given a file descriptor. 2729 */ 2730 #ifndef _SYS_SYSPROTO_H_ 2731 struct fchflags_args { 2732 int fd; 2733 u_long flags; 2734 }; 2735 #endif 2736 int 2737 sys_fchflags(td, uap) 2738 struct thread *td; 2739 register struct fchflags_args /* { 2740 int fd; 2741 u_long flags; 2742 } */ *uap; 2743 { 2744 struct file *fp; 2745 cap_rights_t rights; 2746 int error; 2747 2748 AUDIT_ARG_FD(uap->fd); 2749 AUDIT_ARG_FFLAGS(uap->flags); 2750 error = getvnode(td->td_proc->p_fd, uap->fd, 2751 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2752 if (error != 0) 2753 return (error); 2754 #ifdef AUDIT 2755 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2756 AUDIT_ARG_VNODE1(fp->f_vnode); 2757 VOP_UNLOCK(fp->f_vnode, 0); 2758 #endif 2759 error = setfflags(td, fp->f_vnode, uap->flags); 2760 fdrop(fp, td); 2761 return (error); 2762 } 2763 2764 /* 2765 * Common implementation code for chmod(), lchmod() and fchmod(). 2766 */ 2767 int 2768 setfmode(td, cred, vp, mode) 2769 struct thread *td; 2770 struct ucred *cred; 2771 struct vnode *vp; 2772 int mode; 2773 { 2774 struct mount *mp; 2775 struct vattr vattr; 2776 int error; 2777 2778 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2779 return (error); 2780 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2781 VATTR_NULL(&vattr); 2782 vattr.va_mode = mode & ALLPERMS; 2783 #ifdef MAC 2784 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2785 if (error == 0) 2786 #endif 2787 error = VOP_SETATTR(vp, &vattr, cred); 2788 VOP_UNLOCK(vp, 0); 2789 vn_finished_write(mp); 2790 return (error); 2791 } 2792 2793 /* 2794 * Change mode of a file given path name. 2795 */ 2796 #ifndef _SYS_SYSPROTO_H_ 2797 struct chmod_args { 2798 char *path; 2799 int mode; 2800 }; 2801 #endif 2802 int 2803 sys_chmod(td, uap) 2804 struct thread *td; 2805 register struct chmod_args /* { 2806 char *path; 2807 int mode; 2808 } */ *uap; 2809 { 2810 2811 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2812 } 2813 2814 #ifndef _SYS_SYSPROTO_H_ 2815 struct fchmodat_args { 2816 int dirfd; 2817 char *path; 2818 mode_t mode; 2819 int flag; 2820 } 2821 #endif 2822 int 2823 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2824 { 2825 int flag = uap->flag; 2826 int fd = uap->fd; 2827 char *path = uap->path; 2828 mode_t mode = uap->mode; 2829 2830 if (flag & ~AT_SYMLINK_NOFOLLOW) 2831 return (EINVAL); 2832 2833 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2834 } 2835 2836 int 2837 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2838 { 2839 2840 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2841 } 2842 2843 /* 2844 * Change mode of a file given path name (don't follow links.) 2845 */ 2846 #ifndef _SYS_SYSPROTO_H_ 2847 struct lchmod_args { 2848 char *path; 2849 int mode; 2850 }; 2851 #endif 2852 int 2853 sys_lchmod(td, uap) 2854 struct thread *td; 2855 register struct lchmod_args /* { 2856 char *path; 2857 int mode; 2858 } */ *uap; 2859 { 2860 2861 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2862 uap->mode, AT_SYMLINK_NOFOLLOW)); 2863 } 2864 2865 int 2866 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2867 mode_t mode, int flag) 2868 { 2869 struct nameidata nd; 2870 cap_rights_t rights; 2871 int error, follow; 2872 2873 AUDIT_ARG_MODE(mode); 2874 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2875 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2876 cap_rights_init(&rights, CAP_FCHMOD), td); 2877 if ((error = namei(&nd)) != 0) 2878 return (error); 2879 NDFREE(&nd, NDF_ONLY_PNBUF); 2880 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2881 vrele(nd.ni_vp); 2882 return (error); 2883 } 2884 2885 /* 2886 * Change mode of a file given a file descriptor. 2887 */ 2888 #ifndef _SYS_SYSPROTO_H_ 2889 struct fchmod_args { 2890 int fd; 2891 int mode; 2892 }; 2893 #endif 2894 int 2895 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2896 { 2897 struct file *fp; 2898 cap_rights_t rights; 2899 int error; 2900 2901 AUDIT_ARG_FD(uap->fd); 2902 AUDIT_ARG_MODE(uap->mode); 2903 2904 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2905 if (error != 0) 2906 return (error); 2907 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2908 fdrop(fp, td); 2909 return (error); 2910 } 2911 2912 /* 2913 * Common implementation for chown(), lchown(), and fchown() 2914 */ 2915 int 2916 setfown(td, cred, vp, uid, gid) 2917 struct thread *td; 2918 struct ucred *cred; 2919 struct vnode *vp; 2920 uid_t uid; 2921 gid_t gid; 2922 { 2923 struct mount *mp; 2924 struct vattr vattr; 2925 int error; 2926 2927 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2928 return (error); 2929 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2930 VATTR_NULL(&vattr); 2931 vattr.va_uid = uid; 2932 vattr.va_gid = gid; 2933 #ifdef MAC 2934 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2935 vattr.va_gid); 2936 if (error == 0) 2937 #endif 2938 error = VOP_SETATTR(vp, &vattr, cred); 2939 VOP_UNLOCK(vp, 0); 2940 vn_finished_write(mp); 2941 return (error); 2942 } 2943 2944 /* 2945 * Set ownership given a path name. 2946 */ 2947 #ifndef _SYS_SYSPROTO_H_ 2948 struct chown_args { 2949 char *path; 2950 int uid; 2951 int gid; 2952 }; 2953 #endif 2954 int 2955 sys_chown(td, uap) 2956 struct thread *td; 2957 register struct chown_args /* { 2958 char *path; 2959 int uid; 2960 int gid; 2961 } */ *uap; 2962 { 2963 2964 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2965 } 2966 2967 #ifndef _SYS_SYSPROTO_H_ 2968 struct fchownat_args { 2969 int fd; 2970 const char * path; 2971 uid_t uid; 2972 gid_t gid; 2973 int flag; 2974 }; 2975 #endif 2976 int 2977 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2978 { 2979 int flag; 2980 2981 flag = uap->flag; 2982 if (flag & ~AT_SYMLINK_NOFOLLOW) 2983 return (EINVAL); 2984 2985 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2986 uap->gid, uap->flag)); 2987 } 2988 2989 int 2990 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2991 int gid) 2992 { 2993 2994 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2995 } 2996 2997 int 2998 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2999 int uid, int gid, int flag) 3000 { 3001 struct nameidata nd; 3002 cap_rights_t rights; 3003 int error, follow; 3004 3005 AUDIT_ARG_OWNER(uid, gid); 3006 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3007 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 3008 cap_rights_init(&rights, CAP_FCHOWN), td); 3009 3010 if ((error = namei(&nd)) != 0) 3011 return (error); 3012 NDFREE(&nd, NDF_ONLY_PNBUF); 3013 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3014 vrele(nd.ni_vp); 3015 return (error); 3016 } 3017 3018 /* 3019 * Set ownership given a path name, do not cross symlinks. 3020 */ 3021 #ifndef _SYS_SYSPROTO_H_ 3022 struct lchown_args { 3023 char *path; 3024 int uid; 3025 int gid; 3026 }; 3027 #endif 3028 int 3029 sys_lchown(td, uap) 3030 struct thread *td; 3031 register struct lchown_args /* { 3032 char *path; 3033 int uid; 3034 int gid; 3035 } */ *uap; 3036 { 3037 3038 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3039 } 3040 3041 int 3042 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3043 int gid) 3044 { 3045 3046 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3047 AT_SYMLINK_NOFOLLOW)); 3048 } 3049 3050 /* 3051 * Set ownership given a file descriptor. 3052 */ 3053 #ifndef _SYS_SYSPROTO_H_ 3054 struct fchown_args { 3055 int fd; 3056 int uid; 3057 int gid; 3058 }; 3059 #endif 3060 int 3061 sys_fchown(td, uap) 3062 struct thread *td; 3063 register struct fchown_args /* { 3064 int fd; 3065 int uid; 3066 int gid; 3067 } */ *uap; 3068 { 3069 struct file *fp; 3070 cap_rights_t rights; 3071 int error; 3072 3073 AUDIT_ARG_FD(uap->fd); 3074 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3075 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 3076 if (error != 0) 3077 return (error); 3078 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3079 fdrop(fp, td); 3080 return (error); 3081 } 3082 3083 /* 3084 * Common implementation code for utimes(), lutimes(), and futimes(). 3085 */ 3086 static int 3087 getutimes(usrtvp, tvpseg, tsp) 3088 const struct timeval *usrtvp; 3089 enum uio_seg tvpseg; 3090 struct timespec *tsp; 3091 { 3092 struct timeval tv[2]; 3093 const struct timeval *tvp; 3094 int error; 3095 3096 if (usrtvp == NULL) { 3097 vfs_timestamp(&tsp[0]); 3098 tsp[1] = tsp[0]; 3099 } else { 3100 if (tvpseg == UIO_SYSSPACE) { 3101 tvp = usrtvp; 3102 } else { 3103 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3104 return (error); 3105 tvp = tv; 3106 } 3107 3108 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3109 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3110 return (EINVAL); 3111 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3112 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3113 } 3114 return (0); 3115 } 3116 3117 /* 3118 * Common implementation code for utimes(), lutimes(), and futimes(). 3119 */ 3120 static int 3121 setutimes(td, vp, ts, numtimes, nullflag) 3122 struct thread *td; 3123 struct vnode *vp; 3124 const struct timespec *ts; 3125 int numtimes; 3126 int nullflag; 3127 { 3128 struct mount *mp; 3129 struct vattr vattr; 3130 int error, setbirthtime; 3131 3132 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3133 return (error); 3134 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3135 setbirthtime = 0; 3136 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3137 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3138 setbirthtime = 1; 3139 VATTR_NULL(&vattr); 3140 vattr.va_atime = ts[0]; 3141 vattr.va_mtime = ts[1]; 3142 if (setbirthtime) 3143 vattr.va_birthtime = ts[1]; 3144 if (numtimes > 2) 3145 vattr.va_birthtime = ts[2]; 3146 if (nullflag) 3147 vattr.va_vaflags |= VA_UTIMES_NULL; 3148 #ifdef MAC 3149 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3150 vattr.va_mtime); 3151 #endif 3152 if (error == 0) 3153 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3154 VOP_UNLOCK(vp, 0); 3155 vn_finished_write(mp); 3156 return (error); 3157 } 3158 3159 /* 3160 * Set the access and modification times of a file. 3161 */ 3162 #ifndef _SYS_SYSPROTO_H_ 3163 struct utimes_args { 3164 char *path; 3165 struct timeval *tptr; 3166 }; 3167 #endif 3168 int 3169 sys_utimes(td, uap) 3170 struct thread *td; 3171 register struct utimes_args /* { 3172 char *path; 3173 struct timeval *tptr; 3174 } */ *uap; 3175 { 3176 3177 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3178 UIO_USERSPACE)); 3179 } 3180 3181 #ifndef _SYS_SYSPROTO_H_ 3182 struct futimesat_args { 3183 int fd; 3184 const char * path; 3185 const struct timeval * times; 3186 }; 3187 #endif 3188 int 3189 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3190 { 3191 3192 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3193 uap->times, UIO_USERSPACE)); 3194 } 3195 3196 int 3197 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3198 struct timeval *tptr, enum uio_seg tptrseg) 3199 { 3200 3201 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3202 } 3203 3204 int 3205 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3206 struct timeval *tptr, enum uio_seg tptrseg) 3207 { 3208 struct nameidata nd; 3209 struct timespec ts[2]; 3210 cap_rights_t rights; 3211 int error; 3212 3213 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3214 return (error); 3215 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3216 cap_rights_init(&rights, CAP_FUTIMES), td); 3217 3218 if ((error = namei(&nd)) != 0) 3219 return (error); 3220 NDFREE(&nd, NDF_ONLY_PNBUF); 3221 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3222 vrele(nd.ni_vp); 3223 return (error); 3224 } 3225 3226 /* 3227 * Set the access and modification times of a file. 3228 */ 3229 #ifndef _SYS_SYSPROTO_H_ 3230 struct lutimes_args { 3231 char *path; 3232 struct timeval *tptr; 3233 }; 3234 #endif 3235 int 3236 sys_lutimes(td, uap) 3237 struct thread *td; 3238 register struct lutimes_args /* { 3239 char *path; 3240 struct timeval *tptr; 3241 } */ *uap; 3242 { 3243 3244 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3245 UIO_USERSPACE)); 3246 } 3247 3248 int 3249 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3250 struct timeval *tptr, enum uio_seg tptrseg) 3251 { 3252 struct timespec ts[2]; 3253 struct nameidata nd; 3254 int error; 3255 3256 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3257 return (error); 3258 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3259 if ((error = namei(&nd)) != 0) 3260 return (error); 3261 NDFREE(&nd, NDF_ONLY_PNBUF); 3262 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3263 vrele(nd.ni_vp); 3264 return (error); 3265 } 3266 3267 /* 3268 * Set the access and modification times of a file. 3269 */ 3270 #ifndef _SYS_SYSPROTO_H_ 3271 struct futimes_args { 3272 int fd; 3273 struct timeval *tptr; 3274 }; 3275 #endif 3276 int 3277 sys_futimes(td, uap) 3278 struct thread *td; 3279 register struct futimes_args /* { 3280 int fd; 3281 struct timeval *tptr; 3282 } */ *uap; 3283 { 3284 3285 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3286 } 3287 3288 int 3289 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3290 enum uio_seg tptrseg) 3291 { 3292 struct timespec ts[2]; 3293 struct file *fp; 3294 cap_rights_t rights; 3295 int error; 3296 3297 AUDIT_ARG_FD(fd); 3298 error = getutimes(tptr, tptrseg, ts); 3299 if (error != 0) 3300 return (error); 3301 error = getvnode(td->td_proc->p_fd, fd, 3302 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3303 if (error != 0) 3304 return (error); 3305 #ifdef AUDIT 3306 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3307 AUDIT_ARG_VNODE1(fp->f_vnode); 3308 VOP_UNLOCK(fp->f_vnode, 0); 3309 #endif 3310 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3311 fdrop(fp, td); 3312 return (error); 3313 } 3314 3315 /* 3316 * Truncate a file given its path name. 3317 */ 3318 #ifndef _SYS_SYSPROTO_H_ 3319 struct truncate_args { 3320 char *path; 3321 int pad; 3322 off_t length; 3323 }; 3324 #endif 3325 int 3326 sys_truncate(td, uap) 3327 struct thread *td; 3328 register struct truncate_args /* { 3329 char *path; 3330 int pad; 3331 off_t length; 3332 } */ *uap; 3333 { 3334 3335 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3336 } 3337 3338 int 3339 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3340 { 3341 struct mount *mp; 3342 struct vnode *vp; 3343 void *rl_cookie; 3344 struct vattr vattr; 3345 struct nameidata nd; 3346 int error; 3347 3348 if (length < 0) 3349 return(EINVAL); 3350 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3351 if ((error = namei(&nd)) != 0) 3352 return (error); 3353 vp = nd.ni_vp; 3354 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3355 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3356 vn_rangelock_unlock(vp, rl_cookie); 3357 vrele(vp); 3358 return (error); 3359 } 3360 NDFREE(&nd, NDF_ONLY_PNBUF); 3361 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3362 if (vp->v_type == VDIR) 3363 error = EISDIR; 3364 #ifdef MAC 3365 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3366 } 3367 #endif 3368 else if ((error = vn_writechk(vp)) == 0 && 3369 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3370 VATTR_NULL(&vattr); 3371 vattr.va_size = length; 3372 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3373 } 3374 VOP_UNLOCK(vp, 0); 3375 vn_finished_write(mp); 3376 vn_rangelock_unlock(vp, rl_cookie); 3377 vrele(vp); 3378 return (error); 3379 } 3380 3381 #if defined(COMPAT_43) 3382 /* 3383 * Truncate a file given its path name. 3384 */ 3385 #ifndef _SYS_SYSPROTO_H_ 3386 struct otruncate_args { 3387 char *path; 3388 long length; 3389 }; 3390 #endif 3391 int 3392 otruncate(td, uap) 3393 struct thread *td; 3394 register struct otruncate_args /* { 3395 char *path; 3396 long length; 3397 } */ *uap; 3398 { 3399 struct truncate_args /* { 3400 char *path; 3401 int pad; 3402 off_t length; 3403 } */ nuap; 3404 3405 nuap.path = uap->path; 3406 nuap.length = uap->length; 3407 return (sys_truncate(td, &nuap)); 3408 } 3409 #endif /* COMPAT_43 */ 3410 3411 /* Versions with the pad argument */ 3412 int 3413 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3414 { 3415 struct truncate_args ouap; 3416 3417 ouap.path = uap->path; 3418 ouap.length = uap->length; 3419 return (sys_truncate(td, &ouap)); 3420 } 3421 3422 int 3423 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3424 { 3425 struct ftruncate_args ouap; 3426 3427 ouap.fd = uap->fd; 3428 ouap.length = uap->length; 3429 return (sys_ftruncate(td, &ouap)); 3430 } 3431 3432 /* 3433 * Sync an open file. 3434 */ 3435 #ifndef _SYS_SYSPROTO_H_ 3436 struct fsync_args { 3437 int fd; 3438 }; 3439 #endif 3440 int 3441 sys_fsync(td, uap) 3442 struct thread *td; 3443 struct fsync_args /* { 3444 int fd; 3445 } */ *uap; 3446 { 3447 struct vnode *vp; 3448 struct mount *mp; 3449 struct file *fp; 3450 cap_rights_t rights; 3451 int error, lock_flags; 3452 3453 AUDIT_ARG_FD(uap->fd); 3454 error = getvnode(td->td_proc->p_fd, uap->fd, 3455 cap_rights_init(&rights, CAP_FSYNC), &fp); 3456 if (error != 0) 3457 return (error); 3458 vp = fp->f_vnode; 3459 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3460 if (error != 0) 3461 goto drop; 3462 if (MNT_SHARED_WRITES(mp) || 3463 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3464 lock_flags = LK_SHARED; 3465 } else { 3466 lock_flags = LK_EXCLUSIVE; 3467 } 3468 vn_lock(vp, lock_flags | LK_RETRY); 3469 AUDIT_ARG_VNODE1(vp); 3470 if (vp->v_object != NULL) { 3471 VM_OBJECT_WLOCK(vp->v_object); 3472 vm_object_page_clean(vp->v_object, 0, 0, 0); 3473 VM_OBJECT_WUNLOCK(vp->v_object); 3474 } 3475 error = VOP_FSYNC(vp, MNT_WAIT, td); 3476 3477 VOP_UNLOCK(vp, 0); 3478 vn_finished_write(mp); 3479 drop: 3480 fdrop(fp, td); 3481 return (error); 3482 } 3483 3484 /* 3485 * Rename files. Source and destination must either both be directories, or 3486 * both not be directories. If target is a directory, it must be empty. 3487 */ 3488 #ifndef _SYS_SYSPROTO_H_ 3489 struct rename_args { 3490 char *from; 3491 char *to; 3492 }; 3493 #endif 3494 int 3495 sys_rename(td, uap) 3496 struct thread *td; 3497 register struct rename_args /* { 3498 char *from; 3499 char *to; 3500 } */ *uap; 3501 { 3502 3503 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3504 } 3505 3506 #ifndef _SYS_SYSPROTO_H_ 3507 struct renameat_args { 3508 int oldfd; 3509 char *old; 3510 int newfd; 3511 char *new; 3512 }; 3513 #endif 3514 int 3515 sys_renameat(struct thread *td, struct renameat_args *uap) 3516 { 3517 3518 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3519 UIO_USERSPACE)); 3520 } 3521 3522 int 3523 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3524 { 3525 3526 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3527 } 3528 3529 int 3530 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3531 enum uio_seg pathseg) 3532 { 3533 struct mount *mp = NULL; 3534 struct vnode *tvp, *fvp, *tdvp; 3535 struct nameidata fromnd, tond; 3536 cap_rights_t rights; 3537 int error; 3538 3539 again: 3540 bwillwrite(); 3541 #ifdef MAC 3542 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3543 AUDITVNODE1, pathseg, old, oldfd, 3544 cap_rights_init(&rights, CAP_RENAMEAT), td); 3545 #else 3546 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3547 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3548 #endif 3549 3550 if ((error = namei(&fromnd)) != 0) 3551 return (error); 3552 #ifdef MAC 3553 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3554 fromnd.ni_vp, &fromnd.ni_cnd); 3555 VOP_UNLOCK(fromnd.ni_dvp, 0); 3556 if (fromnd.ni_dvp != fromnd.ni_vp) 3557 VOP_UNLOCK(fromnd.ni_vp, 0); 3558 #endif 3559 fvp = fromnd.ni_vp; 3560 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3561 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3562 cap_rights_init(&rights, CAP_LINKAT), td); 3563 if (fromnd.ni_vp->v_type == VDIR) 3564 tond.ni_cnd.cn_flags |= WILLBEDIR; 3565 if ((error = namei(&tond)) != 0) { 3566 /* Translate error code for rename("dir1", "dir2/."). */ 3567 if (error == EISDIR && fvp->v_type == VDIR) 3568 error = EINVAL; 3569 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3570 vrele(fromnd.ni_dvp); 3571 vrele(fvp); 3572 goto out1; 3573 } 3574 tdvp = tond.ni_dvp; 3575 tvp = tond.ni_vp; 3576 error = vn_start_write(fvp, &mp, V_NOWAIT); 3577 if (error != 0) { 3578 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3579 NDFREE(&tond, NDF_ONLY_PNBUF); 3580 if (tvp != NULL) 3581 vput(tvp); 3582 if (tdvp == tvp) 3583 vrele(tdvp); 3584 else 3585 vput(tdvp); 3586 vrele(fromnd.ni_dvp); 3587 vrele(fvp); 3588 vrele(tond.ni_startdir); 3589 if (fromnd.ni_startdir != NULL) 3590 vrele(fromnd.ni_startdir); 3591 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3592 if (error != 0) 3593 return (error); 3594 goto again; 3595 } 3596 if (tvp != NULL) { 3597 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3598 error = ENOTDIR; 3599 goto out; 3600 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3601 error = EISDIR; 3602 goto out; 3603 } 3604 #ifdef CAPABILITIES 3605 if (newfd != AT_FDCWD) { 3606 /* 3607 * If the target already exists we require CAP_UNLINKAT 3608 * from 'newfd'. 3609 */ 3610 error = cap_check(&tond.ni_filecaps.fc_rights, 3611 cap_rights_init(&rights, CAP_UNLINKAT)); 3612 if (error != 0) 3613 goto out; 3614 } 3615 #endif 3616 } 3617 if (fvp == tdvp) { 3618 error = EINVAL; 3619 goto out; 3620 } 3621 /* 3622 * If the source is the same as the destination (that is, if they 3623 * are links to the same vnode), then there is nothing to do. 3624 */ 3625 if (fvp == tvp) 3626 error = -1; 3627 #ifdef MAC 3628 else 3629 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3630 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3631 #endif 3632 out: 3633 if (error == 0) { 3634 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3635 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3636 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3637 NDFREE(&tond, NDF_ONLY_PNBUF); 3638 } else { 3639 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3640 NDFREE(&tond, NDF_ONLY_PNBUF); 3641 if (tvp != NULL) 3642 vput(tvp); 3643 if (tdvp == tvp) 3644 vrele(tdvp); 3645 else 3646 vput(tdvp); 3647 vrele(fromnd.ni_dvp); 3648 vrele(fvp); 3649 } 3650 vrele(tond.ni_startdir); 3651 vn_finished_write(mp); 3652 out1: 3653 if (fromnd.ni_startdir) 3654 vrele(fromnd.ni_startdir); 3655 if (error == -1) 3656 return (0); 3657 return (error); 3658 } 3659 3660 /* 3661 * Make a directory file. 3662 */ 3663 #ifndef _SYS_SYSPROTO_H_ 3664 struct mkdir_args { 3665 char *path; 3666 int mode; 3667 }; 3668 #endif 3669 int 3670 sys_mkdir(td, uap) 3671 struct thread *td; 3672 register struct mkdir_args /* { 3673 char *path; 3674 int mode; 3675 } */ *uap; 3676 { 3677 3678 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3679 } 3680 3681 #ifndef _SYS_SYSPROTO_H_ 3682 struct mkdirat_args { 3683 int fd; 3684 char *path; 3685 mode_t mode; 3686 }; 3687 #endif 3688 int 3689 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3690 { 3691 3692 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3693 } 3694 3695 int 3696 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3697 { 3698 3699 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3700 } 3701 3702 int 3703 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3704 int mode) 3705 { 3706 struct mount *mp; 3707 struct vnode *vp; 3708 struct vattr vattr; 3709 struct nameidata nd; 3710 cap_rights_t rights; 3711 int error; 3712 3713 AUDIT_ARG_MODE(mode); 3714 restart: 3715 bwillwrite(); 3716 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 3717 segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td); 3718 nd.ni_cnd.cn_flags |= WILLBEDIR; 3719 if ((error = namei(&nd)) != 0) 3720 return (error); 3721 vp = nd.ni_vp; 3722 if (vp != NULL) { 3723 NDFREE(&nd, NDF_ONLY_PNBUF); 3724 /* 3725 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3726 * the strange behaviour of leaving the vnode unlocked 3727 * if the target is the same vnode as the parent. 3728 */ 3729 if (vp == nd.ni_dvp) 3730 vrele(nd.ni_dvp); 3731 else 3732 vput(nd.ni_dvp); 3733 vrele(vp); 3734 return (EEXIST); 3735 } 3736 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3737 NDFREE(&nd, NDF_ONLY_PNBUF); 3738 vput(nd.ni_dvp); 3739 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3740 return (error); 3741 goto restart; 3742 } 3743 VATTR_NULL(&vattr); 3744 vattr.va_type = VDIR; 3745 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3746 #ifdef MAC 3747 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3748 &vattr); 3749 if (error != 0) 3750 goto out; 3751 #endif 3752 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3753 #ifdef MAC 3754 out: 3755 #endif 3756 NDFREE(&nd, NDF_ONLY_PNBUF); 3757 vput(nd.ni_dvp); 3758 if (error == 0) 3759 vput(nd.ni_vp); 3760 vn_finished_write(mp); 3761 return (error); 3762 } 3763 3764 /* 3765 * Remove a directory file. 3766 */ 3767 #ifndef _SYS_SYSPROTO_H_ 3768 struct rmdir_args { 3769 char *path; 3770 }; 3771 #endif 3772 int 3773 sys_rmdir(td, uap) 3774 struct thread *td; 3775 struct rmdir_args /* { 3776 char *path; 3777 } */ *uap; 3778 { 3779 3780 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3781 } 3782 3783 int 3784 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3785 { 3786 3787 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3788 } 3789 3790 int 3791 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3792 { 3793 struct mount *mp; 3794 struct vnode *vp; 3795 struct nameidata nd; 3796 cap_rights_t rights; 3797 int error; 3798 3799 restart: 3800 bwillwrite(); 3801 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3802 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3803 if ((error = namei(&nd)) != 0) 3804 return (error); 3805 vp = nd.ni_vp; 3806 if (vp->v_type != VDIR) { 3807 error = ENOTDIR; 3808 goto out; 3809 } 3810 /* 3811 * No rmdir "." please. 3812 */ 3813 if (nd.ni_dvp == vp) { 3814 error = EINVAL; 3815 goto out; 3816 } 3817 /* 3818 * The root of a mounted filesystem cannot be deleted. 3819 */ 3820 if (vp->v_vflag & VV_ROOT) { 3821 error = EBUSY; 3822 goto out; 3823 } 3824 #ifdef MAC 3825 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3826 &nd.ni_cnd); 3827 if (error != 0) 3828 goto out; 3829 #endif 3830 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3831 NDFREE(&nd, NDF_ONLY_PNBUF); 3832 vput(vp); 3833 if (nd.ni_dvp == vp) 3834 vrele(nd.ni_dvp); 3835 else 3836 vput(nd.ni_dvp); 3837 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3838 return (error); 3839 goto restart; 3840 } 3841 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3842 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3843 vn_finished_write(mp); 3844 out: 3845 NDFREE(&nd, NDF_ONLY_PNBUF); 3846 vput(vp); 3847 if (nd.ni_dvp == vp) 3848 vrele(nd.ni_dvp); 3849 else 3850 vput(nd.ni_dvp); 3851 return (error); 3852 } 3853 3854 #ifdef COMPAT_43 3855 /* 3856 * Read a block of directory entries in a filesystem independent format. 3857 */ 3858 #ifndef _SYS_SYSPROTO_H_ 3859 struct ogetdirentries_args { 3860 int fd; 3861 char *buf; 3862 u_int count; 3863 long *basep; 3864 }; 3865 #endif 3866 int 3867 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3868 { 3869 long loff; 3870 int error; 3871 3872 error = kern_ogetdirentries(td, uap, &loff); 3873 if (error == 0) 3874 error = copyout(&loff, uap->basep, sizeof(long)); 3875 return (error); 3876 } 3877 3878 int 3879 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3880 long *ploff) 3881 { 3882 struct vnode *vp; 3883 struct file *fp; 3884 struct uio auio, kuio; 3885 struct iovec aiov, kiov; 3886 struct dirent *dp, *edp; 3887 cap_rights_t rights; 3888 caddr_t dirbuf; 3889 int error, eofflag, readcnt; 3890 long loff; 3891 off_t foffset; 3892 3893 /* XXX arbitrary sanity limit on `count'. */ 3894 if (uap->count > 64 * 1024) 3895 return (EINVAL); 3896 error = getvnode(td->td_proc->p_fd, uap->fd, 3897 cap_rights_init(&rights, CAP_READ), &fp); 3898 if (error != 0) 3899 return (error); 3900 if ((fp->f_flag & FREAD) == 0) { 3901 fdrop(fp, td); 3902 return (EBADF); 3903 } 3904 vp = fp->f_vnode; 3905 foffset = foffset_lock(fp, 0); 3906 unionread: 3907 if (vp->v_type != VDIR) { 3908 foffset_unlock(fp, foffset, 0); 3909 fdrop(fp, td); 3910 return (EINVAL); 3911 } 3912 aiov.iov_base = uap->buf; 3913 aiov.iov_len = uap->count; 3914 auio.uio_iov = &aiov; 3915 auio.uio_iovcnt = 1; 3916 auio.uio_rw = UIO_READ; 3917 auio.uio_segflg = UIO_USERSPACE; 3918 auio.uio_td = td; 3919 auio.uio_resid = uap->count; 3920 vn_lock(vp, LK_SHARED | LK_RETRY); 3921 loff = auio.uio_offset = foffset; 3922 #ifdef MAC 3923 error = mac_vnode_check_readdir(td->td_ucred, vp); 3924 if (error != 0) { 3925 VOP_UNLOCK(vp, 0); 3926 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3927 fdrop(fp, td); 3928 return (error); 3929 } 3930 #endif 3931 # if (BYTE_ORDER != LITTLE_ENDIAN) 3932 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3933 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3934 NULL, NULL); 3935 foffset = auio.uio_offset; 3936 } else 3937 # endif 3938 { 3939 kuio = auio; 3940 kuio.uio_iov = &kiov; 3941 kuio.uio_segflg = UIO_SYSSPACE; 3942 kiov.iov_len = uap->count; 3943 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3944 kiov.iov_base = dirbuf; 3945 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3946 NULL, NULL); 3947 foffset = kuio.uio_offset; 3948 if (error == 0) { 3949 readcnt = uap->count - kuio.uio_resid; 3950 edp = (struct dirent *)&dirbuf[readcnt]; 3951 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3952 # if (BYTE_ORDER == LITTLE_ENDIAN) 3953 /* 3954 * The expected low byte of 3955 * dp->d_namlen is our dp->d_type. 3956 * The high MBZ byte of dp->d_namlen 3957 * is our dp->d_namlen. 3958 */ 3959 dp->d_type = dp->d_namlen; 3960 dp->d_namlen = 0; 3961 # else 3962 /* 3963 * The dp->d_type is the high byte 3964 * of the expected dp->d_namlen, 3965 * so must be zero'ed. 3966 */ 3967 dp->d_type = 0; 3968 # endif 3969 if (dp->d_reclen > 0) { 3970 dp = (struct dirent *) 3971 ((char *)dp + dp->d_reclen); 3972 } else { 3973 error = EIO; 3974 break; 3975 } 3976 } 3977 if (dp >= edp) 3978 error = uiomove(dirbuf, readcnt, &auio); 3979 } 3980 free(dirbuf, M_TEMP); 3981 } 3982 if (error != 0) { 3983 VOP_UNLOCK(vp, 0); 3984 foffset_unlock(fp, foffset, 0); 3985 fdrop(fp, td); 3986 return (error); 3987 } 3988 if (uap->count == auio.uio_resid && 3989 (vp->v_vflag & VV_ROOT) && 3990 (vp->v_mount->mnt_flag & MNT_UNION)) { 3991 struct vnode *tvp = vp; 3992 vp = vp->v_mount->mnt_vnodecovered; 3993 VREF(vp); 3994 fp->f_vnode = vp; 3995 fp->f_data = vp; 3996 foffset = 0; 3997 vput(tvp); 3998 goto unionread; 3999 } 4000 VOP_UNLOCK(vp, 0); 4001 foffset_unlock(fp, foffset, 0); 4002 fdrop(fp, td); 4003 td->td_retval[0] = uap->count - auio.uio_resid; 4004 if (error == 0) 4005 *ploff = loff; 4006 return (error); 4007 } 4008 #endif /* COMPAT_43 */ 4009 4010 /* 4011 * Read a block of directory entries in a filesystem independent format. 4012 */ 4013 #ifndef _SYS_SYSPROTO_H_ 4014 struct getdirentries_args { 4015 int fd; 4016 char *buf; 4017 u_int count; 4018 long *basep; 4019 }; 4020 #endif 4021 int 4022 sys_getdirentries(td, uap) 4023 struct thread *td; 4024 register struct getdirentries_args /* { 4025 int fd; 4026 char *buf; 4027 u_int count; 4028 long *basep; 4029 } */ *uap; 4030 { 4031 long base; 4032 int error; 4033 4034 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4035 NULL, UIO_USERSPACE); 4036 if (error != 0) 4037 return (error); 4038 if (uap->basep != NULL) 4039 error = copyout(&base, uap->basep, sizeof(long)); 4040 return (error); 4041 } 4042 4043 int 4044 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4045 long *basep, ssize_t *residp, enum uio_seg bufseg) 4046 { 4047 struct vnode *vp; 4048 struct file *fp; 4049 struct uio auio; 4050 struct iovec aiov; 4051 cap_rights_t rights; 4052 long loff; 4053 int error, eofflag; 4054 off_t foffset; 4055 4056 AUDIT_ARG_FD(fd); 4057 if (count > IOSIZE_MAX) 4058 return (EINVAL); 4059 auio.uio_resid = count; 4060 error = getvnode(td->td_proc->p_fd, fd, 4061 cap_rights_init(&rights, CAP_READ), &fp); 4062 if (error != 0) 4063 return (error); 4064 if ((fp->f_flag & FREAD) == 0) { 4065 fdrop(fp, td); 4066 return (EBADF); 4067 } 4068 vp = fp->f_vnode; 4069 foffset = foffset_lock(fp, 0); 4070 unionread: 4071 if (vp->v_type != VDIR) { 4072 error = EINVAL; 4073 goto fail; 4074 } 4075 aiov.iov_base = buf; 4076 aiov.iov_len = count; 4077 auio.uio_iov = &aiov; 4078 auio.uio_iovcnt = 1; 4079 auio.uio_rw = UIO_READ; 4080 auio.uio_segflg = bufseg; 4081 auio.uio_td = td; 4082 vn_lock(vp, LK_SHARED | LK_RETRY); 4083 AUDIT_ARG_VNODE1(vp); 4084 loff = auio.uio_offset = foffset; 4085 #ifdef MAC 4086 error = mac_vnode_check_readdir(td->td_ucred, vp); 4087 if (error == 0) 4088 #endif 4089 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4090 NULL); 4091 foffset = auio.uio_offset; 4092 if (error != 0) { 4093 VOP_UNLOCK(vp, 0); 4094 goto fail; 4095 } 4096 if (count == auio.uio_resid && 4097 (vp->v_vflag & VV_ROOT) && 4098 (vp->v_mount->mnt_flag & MNT_UNION)) { 4099 struct vnode *tvp = vp; 4100 4101 vp = vp->v_mount->mnt_vnodecovered; 4102 VREF(vp); 4103 fp->f_vnode = vp; 4104 fp->f_data = vp; 4105 foffset = 0; 4106 vput(tvp); 4107 goto unionread; 4108 } 4109 VOP_UNLOCK(vp, 0); 4110 *basep = loff; 4111 if (residp != NULL) 4112 *residp = auio.uio_resid; 4113 td->td_retval[0] = count - auio.uio_resid; 4114 fail: 4115 foffset_unlock(fp, foffset, 0); 4116 fdrop(fp, td); 4117 return (error); 4118 } 4119 4120 #ifndef _SYS_SYSPROTO_H_ 4121 struct getdents_args { 4122 int fd; 4123 char *buf; 4124 size_t count; 4125 }; 4126 #endif 4127 int 4128 sys_getdents(td, uap) 4129 struct thread *td; 4130 register struct getdents_args /* { 4131 int fd; 4132 char *buf; 4133 u_int count; 4134 } */ *uap; 4135 { 4136 struct getdirentries_args ap; 4137 4138 ap.fd = uap->fd; 4139 ap.buf = uap->buf; 4140 ap.count = uap->count; 4141 ap.basep = NULL; 4142 return (sys_getdirentries(td, &ap)); 4143 } 4144 4145 /* 4146 * Set the mode mask for creation of filesystem nodes. 4147 */ 4148 #ifndef _SYS_SYSPROTO_H_ 4149 struct umask_args { 4150 int newmask; 4151 }; 4152 #endif 4153 int 4154 sys_umask(td, uap) 4155 struct thread *td; 4156 struct umask_args /* { 4157 int newmask; 4158 } */ *uap; 4159 { 4160 register struct filedesc *fdp; 4161 4162 FILEDESC_XLOCK(td->td_proc->p_fd); 4163 fdp = td->td_proc->p_fd; 4164 td->td_retval[0] = fdp->fd_cmask; 4165 fdp->fd_cmask = uap->newmask & ALLPERMS; 4166 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4167 return (0); 4168 } 4169 4170 /* 4171 * Void all references to file by ripping underlying filesystem away from 4172 * vnode. 4173 */ 4174 #ifndef _SYS_SYSPROTO_H_ 4175 struct revoke_args { 4176 char *path; 4177 }; 4178 #endif 4179 int 4180 sys_revoke(td, uap) 4181 struct thread *td; 4182 register struct revoke_args /* { 4183 char *path; 4184 } */ *uap; 4185 { 4186 struct vnode *vp; 4187 struct vattr vattr; 4188 struct nameidata nd; 4189 int error; 4190 4191 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4192 uap->path, td); 4193 if ((error = namei(&nd)) != 0) 4194 return (error); 4195 vp = nd.ni_vp; 4196 NDFREE(&nd, NDF_ONLY_PNBUF); 4197 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4198 error = EINVAL; 4199 goto out; 4200 } 4201 #ifdef MAC 4202 error = mac_vnode_check_revoke(td->td_ucred, vp); 4203 if (error != 0) 4204 goto out; 4205 #endif 4206 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4207 if (error != 0) 4208 goto out; 4209 if (td->td_ucred->cr_uid != vattr.va_uid) { 4210 error = priv_check(td, PRIV_VFS_ADMIN); 4211 if (error != 0) 4212 goto out; 4213 } 4214 if (vcount(vp) > 1) 4215 VOP_REVOKE(vp, REVOKEALL); 4216 out: 4217 vput(vp); 4218 return (error); 4219 } 4220 4221 /* 4222 * Convert a user file descriptor to a kernel file entry and check that, if it 4223 * is a capability, the correct rights are present. A reference on the file 4224 * entry is held upon returning. 4225 */ 4226 int 4227 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4228 { 4229 struct file *fp; 4230 int error; 4231 4232 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4233 if (error != 0) 4234 return (error); 4235 4236 /* 4237 * The file could be not of the vnode type, or it may be not 4238 * yet fully initialized, in which case the f_vnode pointer 4239 * may be set, but f_ops is still badfileops. E.g., 4240 * devfs_open() transiently create such situation to 4241 * facilitate csw d_fdopen(). 4242 * 4243 * Dupfdopen() handling in kern_openat() installs the 4244 * half-baked file into the process descriptor table, allowing 4245 * other thread to dereference it. Guard against the race by 4246 * checking f_ops. 4247 */ 4248 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4249 fdrop(fp, curthread); 4250 return (EINVAL); 4251 } 4252 *fpp = fp; 4253 return (0); 4254 } 4255 4256 4257 /* 4258 * Get an (NFS) file handle. 4259 */ 4260 #ifndef _SYS_SYSPROTO_H_ 4261 struct lgetfh_args { 4262 char *fname; 4263 fhandle_t *fhp; 4264 }; 4265 #endif 4266 int 4267 sys_lgetfh(td, uap) 4268 struct thread *td; 4269 register struct lgetfh_args *uap; 4270 { 4271 struct nameidata nd; 4272 fhandle_t fh; 4273 register struct vnode *vp; 4274 int error; 4275 4276 error = priv_check(td, PRIV_VFS_GETFH); 4277 if (error != 0) 4278 return (error); 4279 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4280 uap->fname, td); 4281 error = namei(&nd); 4282 if (error != 0) 4283 return (error); 4284 NDFREE(&nd, NDF_ONLY_PNBUF); 4285 vp = nd.ni_vp; 4286 bzero(&fh, sizeof(fh)); 4287 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4288 error = VOP_VPTOFH(vp, &fh.fh_fid); 4289 vput(vp); 4290 if (error == 0) 4291 error = copyout(&fh, uap->fhp, sizeof (fh)); 4292 return (error); 4293 } 4294 4295 #ifndef _SYS_SYSPROTO_H_ 4296 struct getfh_args { 4297 char *fname; 4298 fhandle_t *fhp; 4299 }; 4300 #endif 4301 int 4302 sys_getfh(td, uap) 4303 struct thread *td; 4304 register struct getfh_args *uap; 4305 { 4306 struct nameidata nd; 4307 fhandle_t fh; 4308 register struct vnode *vp; 4309 int error; 4310 4311 error = priv_check(td, PRIV_VFS_GETFH); 4312 if (error != 0) 4313 return (error); 4314 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4315 uap->fname, td); 4316 error = namei(&nd); 4317 if (error != 0) 4318 return (error); 4319 NDFREE(&nd, NDF_ONLY_PNBUF); 4320 vp = nd.ni_vp; 4321 bzero(&fh, sizeof(fh)); 4322 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4323 error = VOP_VPTOFH(vp, &fh.fh_fid); 4324 vput(vp); 4325 if (error == 0) 4326 error = copyout(&fh, uap->fhp, sizeof (fh)); 4327 return (error); 4328 } 4329 4330 /* 4331 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4332 * open descriptor. 4333 * 4334 * warning: do not remove the priv_check() call or this becomes one giant 4335 * security hole. 4336 */ 4337 #ifndef _SYS_SYSPROTO_H_ 4338 struct fhopen_args { 4339 const struct fhandle *u_fhp; 4340 int flags; 4341 }; 4342 #endif 4343 int 4344 sys_fhopen(td, uap) 4345 struct thread *td; 4346 struct fhopen_args /* { 4347 const struct fhandle *u_fhp; 4348 int flags; 4349 } */ *uap; 4350 { 4351 struct mount *mp; 4352 struct vnode *vp; 4353 struct fhandle fhp; 4354 struct file *fp; 4355 int fmode, error; 4356 int indx; 4357 4358 error = priv_check(td, PRIV_VFS_FHOPEN); 4359 if (error != 0) 4360 return (error); 4361 indx = -1; 4362 fmode = FFLAGS(uap->flags); 4363 /* why not allow a non-read/write open for our lockd? */ 4364 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4365 return (EINVAL); 4366 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4367 if (error != 0) 4368 return(error); 4369 /* find the mount point */ 4370 mp = vfs_busyfs(&fhp.fh_fsid); 4371 if (mp == NULL) 4372 return (ESTALE); 4373 /* now give me my vnode, it gets returned to me locked */ 4374 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4375 vfs_unbusy(mp); 4376 if (error != 0) 4377 return (error); 4378 4379 error = falloc_noinstall(td, &fp); 4380 if (error != 0) { 4381 vput(vp); 4382 return (error); 4383 } 4384 /* 4385 * An extra reference on `fp' has been held for us by 4386 * falloc_noinstall(). 4387 */ 4388 4389 #ifdef INVARIANTS 4390 td->td_dupfd = -1; 4391 #endif 4392 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4393 if (error != 0) { 4394 KASSERT(fp->f_ops == &badfileops, 4395 ("VOP_OPEN in fhopen() set f_ops")); 4396 KASSERT(td->td_dupfd < 0, 4397 ("fhopen() encountered fdopen()")); 4398 4399 vput(vp); 4400 goto bad; 4401 } 4402 #ifdef INVARIANTS 4403 td->td_dupfd = 0; 4404 #endif 4405 fp->f_vnode = vp; 4406 fp->f_seqcount = 1; 4407 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4408 &vnops); 4409 VOP_UNLOCK(vp, 0); 4410 if ((fmode & O_TRUNC) != 0) { 4411 error = fo_truncate(fp, 0, td->td_ucred, td); 4412 if (error != 0) 4413 goto bad; 4414 } 4415 4416 error = finstall(td, fp, &indx, fmode, NULL); 4417 bad: 4418 fdrop(fp, td); 4419 td->td_retval[0] = indx; 4420 return (error); 4421 } 4422 4423 /* 4424 * Stat an (NFS) file handle. 4425 */ 4426 #ifndef _SYS_SYSPROTO_H_ 4427 struct fhstat_args { 4428 struct fhandle *u_fhp; 4429 struct stat *sb; 4430 }; 4431 #endif 4432 int 4433 sys_fhstat(td, uap) 4434 struct thread *td; 4435 register struct fhstat_args /* { 4436 struct fhandle *u_fhp; 4437 struct stat *sb; 4438 } */ *uap; 4439 { 4440 struct stat sb; 4441 struct fhandle fh; 4442 int error; 4443 4444 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4445 if (error != 0) 4446 return (error); 4447 error = kern_fhstat(td, fh, &sb); 4448 if (error == 0) 4449 error = copyout(&sb, uap->sb, sizeof(sb)); 4450 return (error); 4451 } 4452 4453 int 4454 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4455 { 4456 struct mount *mp; 4457 struct vnode *vp; 4458 int error; 4459 4460 error = priv_check(td, PRIV_VFS_FHSTAT); 4461 if (error != 0) 4462 return (error); 4463 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4464 return (ESTALE); 4465 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4466 vfs_unbusy(mp); 4467 if (error != 0) 4468 return (error); 4469 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4470 vput(vp); 4471 return (error); 4472 } 4473 4474 /* 4475 * Implement fstatfs() for (NFS) file handles. 4476 */ 4477 #ifndef _SYS_SYSPROTO_H_ 4478 struct fhstatfs_args { 4479 struct fhandle *u_fhp; 4480 struct statfs *buf; 4481 }; 4482 #endif 4483 int 4484 sys_fhstatfs(td, uap) 4485 struct thread *td; 4486 struct fhstatfs_args /* { 4487 struct fhandle *u_fhp; 4488 struct statfs *buf; 4489 } */ *uap; 4490 { 4491 struct statfs sf; 4492 fhandle_t fh; 4493 int error; 4494 4495 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4496 if (error != 0) 4497 return (error); 4498 error = kern_fhstatfs(td, fh, &sf); 4499 if (error != 0) 4500 return (error); 4501 return (copyout(&sf, uap->buf, sizeof(sf))); 4502 } 4503 4504 int 4505 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4506 { 4507 struct statfs *sp; 4508 struct mount *mp; 4509 struct vnode *vp; 4510 int error; 4511 4512 error = priv_check(td, PRIV_VFS_FHSTATFS); 4513 if (error != 0) 4514 return (error); 4515 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4516 return (ESTALE); 4517 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4518 if (error != 0) { 4519 vfs_unbusy(mp); 4520 return (error); 4521 } 4522 vput(vp); 4523 error = prison_canseemount(td->td_ucred, mp); 4524 if (error != 0) 4525 goto out; 4526 #ifdef MAC 4527 error = mac_mount_check_stat(td->td_ucred, mp); 4528 if (error != 0) 4529 goto out; 4530 #endif 4531 /* 4532 * Set these in case the underlying filesystem fails to do so. 4533 */ 4534 sp = &mp->mnt_stat; 4535 sp->f_version = STATFS_VERSION; 4536 sp->f_namemax = NAME_MAX; 4537 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4538 error = VFS_STATFS(mp, sp); 4539 if (error == 0) 4540 *buf = *sp; 4541 out: 4542 vfs_unbusy(mp); 4543 return (error); 4544 } 4545 4546 int 4547 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4548 { 4549 struct file *fp; 4550 struct mount *mp; 4551 struct vnode *vp; 4552 cap_rights_t rights; 4553 off_t olen, ooffset; 4554 int error; 4555 4556 fp = NULL; 4557 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4558 if (error != 0) 4559 goto out; 4560 4561 switch (fp->f_type) { 4562 case DTYPE_VNODE: 4563 break; 4564 case DTYPE_PIPE: 4565 case DTYPE_FIFO: 4566 error = ESPIPE; 4567 goto out; 4568 default: 4569 error = ENODEV; 4570 goto out; 4571 } 4572 if ((fp->f_flag & FWRITE) == 0) { 4573 error = EBADF; 4574 goto out; 4575 } 4576 vp = fp->f_vnode; 4577 if (vp->v_type != VREG) { 4578 error = ENODEV; 4579 goto out; 4580 } 4581 if (offset < 0 || len <= 0) { 4582 error = EINVAL; 4583 goto out; 4584 } 4585 /* Check for wrap. */ 4586 if (offset > OFF_MAX - len) { 4587 error = EFBIG; 4588 goto out; 4589 } 4590 4591 /* Allocating blocks may take a long time, so iterate. */ 4592 for (;;) { 4593 olen = len; 4594 ooffset = offset; 4595 4596 bwillwrite(); 4597 mp = NULL; 4598 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4599 if (error != 0) 4600 break; 4601 error = vn_lock(vp, LK_EXCLUSIVE); 4602 if (error != 0) { 4603 vn_finished_write(mp); 4604 break; 4605 } 4606 #ifdef MAC 4607 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4608 if (error == 0) 4609 #endif 4610 error = VOP_ALLOCATE(vp, &offset, &len); 4611 VOP_UNLOCK(vp, 0); 4612 vn_finished_write(mp); 4613 4614 if (olen + ooffset != offset + len) { 4615 panic("offset + len changed from %jx/%jx to %jx/%jx", 4616 ooffset, olen, offset, len); 4617 } 4618 if (error != 0 || len == 0) 4619 break; 4620 KASSERT(olen > len, ("Iteration did not make progress?")); 4621 maybe_yield(); 4622 } 4623 out: 4624 if (fp != NULL) 4625 fdrop(fp, td); 4626 return (error); 4627 } 4628 4629 int 4630 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4631 { 4632 4633 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4634 uap->len); 4635 return (0); 4636 } 4637 4638 /* 4639 * Unlike madvise(2), we do not make a best effort to remember every 4640 * possible caching hint. Instead, we remember the last setting with 4641 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4642 * region of any current setting. 4643 */ 4644 int 4645 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4646 int advice) 4647 { 4648 struct fadvise_info *fa, *new; 4649 struct file *fp; 4650 struct vnode *vp; 4651 cap_rights_t rights; 4652 off_t end; 4653 int error; 4654 4655 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4656 return (EINVAL); 4657 switch (advice) { 4658 case POSIX_FADV_SEQUENTIAL: 4659 case POSIX_FADV_RANDOM: 4660 case POSIX_FADV_NOREUSE: 4661 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4662 break; 4663 case POSIX_FADV_NORMAL: 4664 case POSIX_FADV_WILLNEED: 4665 case POSIX_FADV_DONTNEED: 4666 new = NULL; 4667 break; 4668 default: 4669 return (EINVAL); 4670 } 4671 /* XXX: CAP_POSIX_FADVISE? */ 4672 error = fget(td, fd, cap_rights_init(&rights), &fp); 4673 if (error != 0) 4674 goto out; 4675 4676 switch (fp->f_type) { 4677 case DTYPE_VNODE: 4678 break; 4679 case DTYPE_PIPE: 4680 case DTYPE_FIFO: 4681 error = ESPIPE; 4682 goto out; 4683 default: 4684 error = ENODEV; 4685 goto out; 4686 } 4687 vp = fp->f_vnode; 4688 if (vp->v_type != VREG) { 4689 error = ENODEV; 4690 goto out; 4691 } 4692 if (len == 0) 4693 end = OFF_MAX; 4694 else 4695 end = offset + len - 1; 4696 switch (advice) { 4697 case POSIX_FADV_SEQUENTIAL: 4698 case POSIX_FADV_RANDOM: 4699 case POSIX_FADV_NOREUSE: 4700 /* 4701 * Try to merge any existing non-standard region with 4702 * this new region if possible, otherwise create a new 4703 * non-standard region for this request. 4704 */ 4705 mtx_pool_lock(mtxpool_sleep, fp); 4706 fa = fp->f_advice; 4707 if (fa != NULL && fa->fa_advice == advice && 4708 ((fa->fa_start <= end && fa->fa_end >= offset) || 4709 (end != OFF_MAX && fa->fa_start == end + 1) || 4710 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4711 if (offset < fa->fa_start) 4712 fa->fa_start = offset; 4713 if (end > fa->fa_end) 4714 fa->fa_end = end; 4715 } else { 4716 new->fa_advice = advice; 4717 new->fa_start = offset; 4718 new->fa_end = end; 4719 new->fa_prevstart = 0; 4720 new->fa_prevend = 0; 4721 fp->f_advice = new; 4722 new = fa; 4723 } 4724 mtx_pool_unlock(mtxpool_sleep, fp); 4725 break; 4726 case POSIX_FADV_NORMAL: 4727 /* 4728 * If a the "normal" region overlaps with an existing 4729 * non-standard region, trim or remove the 4730 * non-standard region. 4731 */ 4732 mtx_pool_lock(mtxpool_sleep, fp); 4733 fa = fp->f_advice; 4734 if (fa != NULL) { 4735 if (offset <= fa->fa_start && end >= fa->fa_end) { 4736 new = fa; 4737 fp->f_advice = NULL; 4738 } else if (offset <= fa->fa_start && 4739 end >= fa->fa_start) 4740 fa->fa_start = end + 1; 4741 else if (offset <= fa->fa_end && end >= fa->fa_end) 4742 fa->fa_end = offset - 1; 4743 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4744 /* 4745 * If the "normal" region is a middle 4746 * portion of the existing 4747 * non-standard region, just remove 4748 * the whole thing rather than picking 4749 * one side or the other to 4750 * preserve. 4751 */ 4752 new = fa; 4753 fp->f_advice = NULL; 4754 } 4755 } 4756 mtx_pool_unlock(mtxpool_sleep, fp); 4757 break; 4758 case POSIX_FADV_WILLNEED: 4759 case POSIX_FADV_DONTNEED: 4760 error = VOP_ADVISE(vp, offset, end, advice); 4761 break; 4762 } 4763 out: 4764 if (fp != NULL) 4765 fdrop(fp, td); 4766 free(new, M_FADVISE); 4767 return (error); 4768 } 4769 4770 int 4771 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4772 { 4773 4774 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4775 uap->len, uap->advice); 4776 return (0); 4777 } 4778