1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 98 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 99 static int kern_chflags(struct thread *td, const char *path, 100 enum uio_seg pathseg, u_long flags); 101 static int kern_chflagsat(struct thread *td, int fd, const char *path, 102 enum uio_seg pathseg, u_long flags, int atflag); 103 static int setfflags(struct thread *td, struct vnode *, u_long); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * The module initialization routine for POSIX asynchronous I/O will 111 * set this to the version of AIO that it implements. (Zero means 112 * that it is not implemented.) This value is used here by pathconf() 113 * and in kern_descrip.c by fpathconf(). 114 */ 115 int async_io_version; 116 117 #ifdef DEBUG 118 static int syncprt = 0; 119 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 120 #endif 121 122 /* 123 * Sync each mounted filesystem. 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct sync_args { 127 int dummy; 128 }; 129 #endif 130 /* ARGSUSED */ 131 int 132 sys_sync(td, uap) 133 struct thread *td; 134 struct sync_args *uap; 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_msync(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Change filesystem quotas. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct quotactl_args { 166 char *path; 167 int cmd; 168 int uid; 169 caddr_t arg; 170 }; 171 #endif 172 int 173 sys_quotactl(td, uap) 174 struct thread *td; 175 register struct quotactl_args /* { 176 char *path; 177 int cmd; 178 int uid; 179 caddr_t arg; 180 } */ *uap; 181 { 182 struct mount *mp; 183 struct nameidata nd; 184 int error; 185 186 AUDIT_ARG_CMD(uap->cmd); 187 AUDIT_ARG_UID(uap->uid); 188 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 189 return (EPERM); 190 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 191 uap->path, td); 192 if ((error = namei(&nd)) != 0) 193 return (error); 194 NDFREE(&nd, NDF_ONLY_PNBUF); 195 mp = nd.ni_vp->v_mount; 196 vfs_ref(mp); 197 vput(nd.ni_vp); 198 error = vfs_busy(mp, 0); 199 vfs_rel(mp); 200 if (error != 0) 201 return (error); 202 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 203 204 /* 205 * Since quota on operation typically needs to open quota 206 * file, the Q_QUOTAON handler needs to unbusy the mount point 207 * before calling into namei. Otherwise, unmount might be 208 * started between two vfs_busy() invocations (first is our, 209 * second is from mount point cross-walk code in lookup()), 210 * causing deadlock. 211 * 212 * Require that Q_QUOTAON handles the vfs_busy() reference on 213 * its own, always returning with ubusied mount point. 214 */ 215 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 216 vfs_unbusy(mp); 217 return (error); 218 } 219 220 /* 221 * Used by statfs conversion routines to scale the block size up if 222 * necessary so that all of the block counts are <= 'max_size'. Note 223 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 224 * value of 'n'. 225 */ 226 void 227 statfs_scale_blocks(struct statfs *sf, long max_size) 228 { 229 uint64_t count; 230 int shift; 231 232 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 233 234 /* 235 * Attempt to scale the block counts to give a more accurate 236 * overview to userland of the ratio of free space to used 237 * space. To do this, find the largest block count and compute 238 * a divisor that lets it fit into a signed integer <= max_size. 239 */ 240 if (sf->f_bavail < 0) 241 count = -sf->f_bavail; 242 else 243 count = sf->f_bavail; 244 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 245 if (count <= max_size) 246 return; 247 248 count >>= flsl(max_size); 249 shift = 0; 250 while (count > 0) { 251 shift++; 252 count >>=1; 253 } 254 255 sf->f_bsize <<= shift; 256 sf->f_blocks >>= shift; 257 sf->f_bfree >>= shift; 258 sf->f_bavail >>= shift; 259 } 260 261 /* 262 * Get filesystem statistics. 263 */ 264 #ifndef _SYS_SYSPROTO_H_ 265 struct statfs_args { 266 char *path; 267 struct statfs *buf; 268 }; 269 #endif 270 int 271 sys_statfs(td, uap) 272 struct thread *td; 273 register struct statfs_args /* { 274 char *path; 275 struct statfs *buf; 276 } */ *uap; 277 { 278 struct statfs sf; 279 int error; 280 281 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 282 if (error == 0) 283 error = copyout(&sf, uap->buf, sizeof(sf)); 284 return (error); 285 } 286 287 int 288 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 289 struct statfs *buf) 290 { 291 struct mount *mp; 292 struct statfs *sp, sb; 293 struct nameidata nd; 294 int error; 295 296 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 297 pathseg, path, td); 298 error = namei(&nd); 299 if (error != 0) 300 return (error); 301 mp = nd.ni_vp->v_mount; 302 vfs_ref(mp); 303 NDFREE(&nd, NDF_ONLY_PNBUF); 304 vput(nd.ni_vp); 305 error = vfs_busy(mp, 0); 306 vfs_rel(mp); 307 if (error != 0) 308 return (error); 309 #ifdef MAC 310 error = mac_mount_check_stat(td->td_ucred, mp); 311 if (error != 0) 312 goto out; 313 #endif 314 /* 315 * Set these in case the underlying filesystem fails to do so. 316 */ 317 sp = &mp->mnt_stat; 318 sp->f_version = STATFS_VERSION; 319 sp->f_namemax = NAME_MAX; 320 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 321 error = VFS_STATFS(mp, sp); 322 if (error != 0) 323 goto out; 324 if (priv_check(td, PRIV_VFS_GENERATION)) { 325 bcopy(sp, &sb, sizeof(sb)); 326 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 327 prison_enforce_statfs(td->td_ucred, mp, &sb); 328 sp = &sb; 329 } 330 *buf = *sp; 331 out: 332 vfs_unbusy(mp); 333 return (error); 334 } 335 336 /* 337 * Get filesystem statistics. 338 */ 339 #ifndef _SYS_SYSPROTO_H_ 340 struct fstatfs_args { 341 int fd; 342 struct statfs *buf; 343 }; 344 #endif 345 int 346 sys_fstatfs(td, uap) 347 struct thread *td; 348 register struct fstatfs_args /* { 349 int fd; 350 struct statfs *buf; 351 } */ *uap; 352 { 353 struct statfs sf; 354 int error; 355 356 error = kern_fstatfs(td, uap->fd, &sf); 357 if (error == 0) 358 error = copyout(&sf, uap->buf, sizeof(sf)); 359 return (error); 360 } 361 362 int 363 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 364 { 365 struct file *fp; 366 struct mount *mp; 367 struct statfs *sp, sb; 368 struct vnode *vp; 369 cap_rights_t rights; 370 int error; 371 372 AUDIT_ARG_FD(fd); 373 error = getvnode(td->td_proc->p_fd, fd, 374 cap_rights_init(&rights, CAP_FSTATFS), &fp); 375 if (error != 0) 376 return (error); 377 vp = fp->f_vnode; 378 vn_lock(vp, LK_SHARED | LK_RETRY); 379 #ifdef AUDIT 380 AUDIT_ARG_VNODE1(vp); 381 #endif 382 mp = vp->v_mount; 383 if (mp) 384 vfs_ref(mp); 385 VOP_UNLOCK(vp, 0); 386 fdrop(fp, td); 387 if (mp == NULL) { 388 error = EBADF; 389 goto out; 390 } 391 error = vfs_busy(mp, 0); 392 vfs_rel(mp); 393 if (error != 0) 394 return (error); 395 #ifdef MAC 396 error = mac_mount_check_stat(td->td_ucred, mp); 397 if (error != 0) 398 goto out; 399 #endif 400 /* 401 * Set these in case the underlying filesystem fails to do so. 402 */ 403 sp = &mp->mnt_stat; 404 sp->f_version = STATFS_VERSION; 405 sp->f_namemax = NAME_MAX; 406 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 407 error = VFS_STATFS(mp, sp); 408 if (error != 0) 409 goto out; 410 if (priv_check(td, PRIV_VFS_GENERATION)) { 411 bcopy(sp, &sb, sizeof(sb)); 412 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 413 prison_enforce_statfs(td->td_ucred, mp, &sb); 414 sp = &sb; 415 } 416 *buf = *sp; 417 out: 418 if (mp) 419 vfs_unbusy(mp); 420 return (error); 421 } 422 423 /* 424 * Get statistics on all filesystems. 425 */ 426 #ifndef _SYS_SYSPROTO_H_ 427 struct getfsstat_args { 428 struct statfs *buf; 429 long bufsize; 430 int flags; 431 }; 432 #endif 433 int 434 sys_getfsstat(td, uap) 435 struct thread *td; 436 register struct getfsstat_args /* { 437 struct statfs *buf; 438 long bufsize; 439 int flags; 440 } */ *uap; 441 { 442 443 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 444 uap->flags)); 445 } 446 447 /* 448 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 449 * The caller is responsible for freeing memory which will be allocated 450 * in '*buf'. 451 */ 452 int 453 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 454 enum uio_seg bufseg, int flags) 455 { 456 struct mount *mp, *nmp; 457 struct statfs *sfsp, *sp, sb; 458 size_t count, maxcount; 459 int error; 460 461 maxcount = bufsize / sizeof(struct statfs); 462 if (bufsize == 0) 463 sfsp = NULL; 464 else if (bufseg == UIO_USERSPACE) 465 sfsp = *buf; 466 else /* if (bufseg == UIO_SYSSPACE) */ { 467 count = 0; 468 mtx_lock(&mountlist_mtx); 469 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 470 count++; 471 } 472 mtx_unlock(&mountlist_mtx); 473 if (maxcount > count) 474 maxcount = count; 475 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 476 M_WAITOK); 477 } 478 count = 0; 479 mtx_lock(&mountlist_mtx); 480 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 481 if (prison_canseemount(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #ifdef MAC 486 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 487 nmp = TAILQ_NEXT(mp, mnt_list); 488 continue; 489 } 490 #endif 491 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 492 nmp = TAILQ_NEXT(mp, mnt_list); 493 continue; 494 } 495 if (sfsp && count < maxcount) { 496 sp = &mp->mnt_stat; 497 /* 498 * Set these in case the underlying filesystem 499 * fails to do so. 500 */ 501 sp->f_version = STATFS_VERSION; 502 sp->f_namemax = NAME_MAX; 503 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 504 /* 505 * If MNT_NOWAIT or MNT_LAZY is specified, do not 506 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 507 * overrides MNT_WAIT. 508 */ 509 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 510 (flags & MNT_WAIT)) && 511 (error = VFS_STATFS(mp, sp))) { 512 mtx_lock(&mountlist_mtx); 513 nmp = TAILQ_NEXT(mp, mnt_list); 514 vfs_unbusy(mp); 515 continue; 516 } 517 if (priv_check(td, PRIV_VFS_GENERATION)) { 518 bcopy(sp, &sb, sizeof(sb)); 519 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 520 prison_enforce_statfs(td->td_ucred, mp, &sb); 521 sp = &sb; 522 } 523 if (bufseg == UIO_SYSSPACE) 524 bcopy(sp, sfsp, sizeof(*sp)); 525 else /* if (bufseg == UIO_USERSPACE) */ { 526 error = copyout(sp, sfsp, sizeof(*sp)); 527 if (error != 0) { 528 vfs_unbusy(mp); 529 return (error); 530 } 531 } 532 sfsp++; 533 } 534 count++; 535 mtx_lock(&mountlist_mtx); 536 nmp = TAILQ_NEXT(mp, mnt_list); 537 vfs_unbusy(mp); 538 } 539 mtx_unlock(&mountlist_mtx); 540 if (sfsp && count > maxcount) 541 td->td_retval[0] = maxcount; 542 else 543 td->td_retval[0] = count; 544 return (0); 545 } 546 547 #ifdef COMPAT_FREEBSD4 548 /* 549 * Get old format filesystem statistics. 550 */ 551 static void cvtstatfs(struct statfs *, struct ostatfs *); 552 553 #ifndef _SYS_SYSPROTO_H_ 554 struct freebsd4_statfs_args { 555 char *path; 556 struct ostatfs *buf; 557 }; 558 #endif 559 int 560 freebsd4_statfs(td, uap) 561 struct thread *td; 562 struct freebsd4_statfs_args /* { 563 char *path; 564 struct ostatfs *buf; 565 } */ *uap; 566 { 567 struct ostatfs osb; 568 struct statfs sf; 569 int error; 570 571 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 572 if (error != 0) 573 return (error); 574 cvtstatfs(&sf, &osb); 575 return (copyout(&osb, uap->buf, sizeof(osb))); 576 } 577 578 /* 579 * Get filesystem statistics. 580 */ 581 #ifndef _SYS_SYSPROTO_H_ 582 struct freebsd4_fstatfs_args { 583 int fd; 584 struct ostatfs *buf; 585 }; 586 #endif 587 int 588 freebsd4_fstatfs(td, uap) 589 struct thread *td; 590 struct freebsd4_fstatfs_args /* { 591 int fd; 592 struct ostatfs *buf; 593 } */ *uap; 594 { 595 struct ostatfs osb; 596 struct statfs sf; 597 int error; 598 599 error = kern_fstatfs(td, uap->fd, &sf); 600 if (error != 0) 601 return (error); 602 cvtstatfs(&sf, &osb); 603 return (copyout(&osb, uap->buf, sizeof(osb))); 604 } 605 606 /* 607 * Get statistics on all filesystems. 608 */ 609 #ifndef _SYS_SYSPROTO_H_ 610 struct freebsd4_getfsstat_args { 611 struct ostatfs *buf; 612 long bufsize; 613 int flags; 614 }; 615 #endif 616 int 617 freebsd4_getfsstat(td, uap) 618 struct thread *td; 619 register struct freebsd4_getfsstat_args /* { 620 struct ostatfs *buf; 621 long bufsize; 622 int flags; 623 } */ *uap; 624 { 625 struct statfs *buf, *sp; 626 struct ostatfs osb; 627 size_t count, size; 628 int error; 629 630 count = uap->bufsize / sizeof(struct ostatfs); 631 size = count * sizeof(struct statfs); 632 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 633 if (size > 0) { 634 count = td->td_retval[0]; 635 sp = buf; 636 while (count > 0 && error == 0) { 637 cvtstatfs(sp, &osb); 638 error = copyout(&osb, uap->buf, sizeof(osb)); 639 sp++; 640 uap->buf++; 641 count--; 642 } 643 free(buf, M_TEMP); 644 } 645 return (error); 646 } 647 648 /* 649 * Implement fstatfs() for (NFS) file handles. 650 */ 651 #ifndef _SYS_SYSPROTO_H_ 652 struct freebsd4_fhstatfs_args { 653 struct fhandle *u_fhp; 654 struct ostatfs *buf; 655 }; 656 #endif 657 int 658 freebsd4_fhstatfs(td, uap) 659 struct thread *td; 660 struct freebsd4_fhstatfs_args /* { 661 struct fhandle *u_fhp; 662 struct ostatfs *buf; 663 } */ *uap; 664 { 665 struct ostatfs osb; 666 struct statfs sf; 667 fhandle_t fh; 668 int error; 669 670 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 671 if (error != 0) 672 return (error); 673 error = kern_fhstatfs(td, fh, &sf); 674 if (error != 0) 675 return (error); 676 cvtstatfs(&sf, &osb); 677 return (copyout(&osb, uap->buf, sizeof(osb))); 678 } 679 680 /* 681 * Convert a new format statfs structure to an old format statfs structure. 682 */ 683 static void 684 cvtstatfs(nsp, osp) 685 struct statfs *nsp; 686 struct ostatfs *osp; 687 { 688 689 statfs_scale_blocks(nsp, LONG_MAX); 690 bzero(osp, sizeof(*osp)); 691 osp->f_bsize = nsp->f_bsize; 692 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 693 osp->f_blocks = nsp->f_blocks; 694 osp->f_bfree = nsp->f_bfree; 695 osp->f_bavail = nsp->f_bavail; 696 osp->f_files = MIN(nsp->f_files, LONG_MAX); 697 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 698 osp->f_owner = nsp->f_owner; 699 osp->f_type = nsp->f_type; 700 osp->f_flags = nsp->f_flags; 701 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 702 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 703 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 704 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 705 strlcpy(osp->f_fstypename, nsp->f_fstypename, 706 MIN(MFSNAMELEN, OMFSNAMELEN)); 707 strlcpy(osp->f_mntonname, nsp->f_mntonname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 710 MIN(MNAMELEN, OMNAMELEN)); 711 osp->f_fsid = nsp->f_fsid; 712 } 713 #endif /* COMPAT_FREEBSD4 */ 714 715 /* 716 * Change current working directory to a given file descriptor. 717 */ 718 #ifndef _SYS_SYSPROTO_H_ 719 struct fchdir_args { 720 int fd; 721 }; 722 #endif 723 int 724 sys_fchdir(td, uap) 725 struct thread *td; 726 struct fchdir_args /* { 727 int fd; 728 } */ *uap; 729 { 730 register struct filedesc *fdp = td->td_proc->p_fd; 731 struct vnode *vp, *tdp, *vpold; 732 struct mount *mp; 733 struct file *fp; 734 cap_rights_t rights; 735 int error; 736 737 AUDIT_ARG_FD(uap->fd); 738 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 739 &fp); 740 if (error != 0) 741 return (error); 742 vp = fp->f_vnode; 743 VREF(vp); 744 fdrop(fp, td); 745 vn_lock(vp, LK_SHARED | LK_RETRY); 746 AUDIT_ARG_VNODE1(vp); 747 error = change_dir(vp, td); 748 while (!error && (mp = vp->v_mountedhere) != NULL) { 749 if (vfs_busy(mp, 0)) 750 continue; 751 error = VFS_ROOT(mp, LK_SHARED, &tdp); 752 vfs_unbusy(mp); 753 if (error != 0) 754 break; 755 vput(vp); 756 vp = tdp; 757 } 758 if (error != 0) { 759 vput(vp); 760 return (error); 761 } 762 VOP_UNLOCK(vp, 0); 763 FILEDESC_XLOCK(fdp); 764 vpold = fdp->fd_cdir; 765 fdp->fd_cdir = vp; 766 FILEDESC_XUNLOCK(fdp); 767 vrele(vpold); 768 return (0); 769 } 770 771 /* 772 * Change current working directory (``.''). 773 */ 774 #ifndef _SYS_SYSPROTO_H_ 775 struct chdir_args { 776 char *path; 777 }; 778 #endif 779 int 780 sys_chdir(td, uap) 781 struct thread *td; 782 struct chdir_args /* { 783 char *path; 784 } */ *uap; 785 { 786 787 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 788 } 789 790 int 791 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 792 { 793 register struct filedesc *fdp = td->td_proc->p_fd; 794 struct nameidata nd; 795 struct vnode *vp; 796 int error; 797 798 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 799 pathseg, path, td); 800 if ((error = namei(&nd)) != 0) 801 return (error); 802 if ((error = change_dir(nd.ni_vp, td)) != 0) { 803 vput(nd.ni_vp); 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 return (error); 806 } 807 VOP_UNLOCK(nd.ni_vp, 0); 808 NDFREE(&nd, NDF_ONLY_PNBUF); 809 FILEDESC_XLOCK(fdp); 810 vp = fdp->fd_cdir; 811 fdp->fd_cdir = nd.ni_vp; 812 FILEDESC_XUNLOCK(fdp); 813 vrele(vp); 814 return (0); 815 } 816 817 /* 818 * Helper function for raised chroot(2) security function: Refuse if 819 * any filedescriptors are open directories. 820 */ 821 static int 822 chroot_refuse_vdir_fds(fdp) 823 struct filedesc *fdp; 824 { 825 struct vnode *vp; 826 struct file *fp; 827 int fd; 828 829 FILEDESC_LOCK_ASSERT(fdp); 830 831 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 832 fp = fget_locked(fdp, fd); 833 if (fp == NULL) 834 continue; 835 if (fp->f_type == DTYPE_VNODE) { 836 vp = fp->f_vnode; 837 if (vp->v_type == VDIR) 838 return (EPERM); 839 } 840 } 841 return (0); 842 } 843 844 /* 845 * This sysctl determines if we will allow a process to chroot(2) if it 846 * has a directory open: 847 * 0: disallowed for all processes. 848 * 1: allowed for processes that were not already chroot(2)'ed. 849 * 2: allowed for all processes. 850 */ 851 852 static int chroot_allow_open_directories = 1; 853 854 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 855 &chroot_allow_open_directories, 0, 856 "Allow a process to chroot(2) if it has a directory open"); 857 858 /* 859 * Change notion of root (``/'') directory. 860 */ 861 #ifndef _SYS_SYSPROTO_H_ 862 struct chroot_args { 863 char *path; 864 }; 865 #endif 866 int 867 sys_chroot(td, uap) 868 struct thread *td; 869 struct chroot_args /* { 870 char *path; 871 } */ *uap; 872 { 873 struct nameidata nd; 874 int error; 875 876 error = priv_check(td, PRIV_VFS_CHROOT); 877 if (error != 0) 878 return (error); 879 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 880 UIO_USERSPACE, uap->path, td); 881 error = namei(&nd); 882 if (error != 0) 883 goto error; 884 error = change_dir(nd.ni_vp, td); 885 if (error != 0) 886 goto e_vunlock; 887 #ifdef MAC 888 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 889 if (error != 0) 890 goto e_vunlock; 891 #endif 892 VOP_UNLOCK(nd.ni_vp, 0); 893 error = change_root(nd.ni_vp, td); 894 vrele(nd.ni_vp); 895 NDFREE(&nd, NDF_ONLY_PNBUF); 896 return (error); 897 e_vunlock: 898 vput(nd.ni_vp); 899 error: 900 NDFREE(&nd, NDF_ONLY_PNBUF); 901 return (error); 902 } 903 904 /* 905 * Common routine for chroot and chdir. Callers must provide a locked vnode 906 * instance. 907 */ 908 int 909 change_dir(vp, td) 910 struct vnode *vp; 911 struct thread *td; 912 { 913 #ifdef MAC 914 int error; 915 #endif 916 917 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 918 if (vp->v_type != VDIR) 919 return (ENOTDIR); 920 #ifdef MAC 921 error = mac_vnode_check_chdir(td->td_ucred, vp); 922 if (error != 0) 923 return (error); 924 #endif 925 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 926 } 927 928 /* 929 * Common routine for kern_chroot() and jail_attach(). The caller is 930 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 931 * authorize this operation. 932 */ 933 int 934 change_root(vp, td) 935 struct vnode *vp; 936 struct thread *td; 937 { 938 struct filedesc *fdp; 939 struct vnode *oldvp; 940 int error; 941 942 fdp = td->td_proc->p_fd; 943 FILEDESC_XLOCK(fdp); 944 if (chroot_allow_open_directories == 0 || 945 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 946 error = chroot_refuse_vdir_fds(fdp); 947 if (error != 0) { 948 FILEDESC_XUNLOCK(fdp); 949 return (error); 950 } 951 } 952 oldvp = fdp->fd_rdir; 953 fdp->fd_rdir = vp; 954 VREF(fdp->fd_rdir); 955 if (!fdp->fd_jdir) { 956 fdp->fd_jdir = vp; 957 VREF(fdp->fd_jdir); 958 } 959 FILEDESC_XUNLOCK(fdp); 960 vrele(oldvp); 961 return (0); 962 } 963 964 static __inline void 965 flags_to_rights(int flags, cap_rights_t *rightsp) 966 { 967 968 if (flags & O_EXEC) { 969 cap_rights_set(rightsp, CAP_FEXECVE); 970 } else { 971 switch ((flags & O_ACCMODE)) { 972 case O_RDONLY: 973 cap_rights_set(rightsp, CAP_READ); 974 break; 975 case O_RDWR: 976 cap_rights_set(rightsp, CAP_READ); 977 /* FALLTHROUGH */ 978 case O_WRONLY: 979 cap_rights_set(rightsp, CAP_WRITE); 980 if (!(flags & (O_APPEND | O_TRUNC))) 981 cap_rights_set(rightsp, CAP_SEEK); 982 break; 983 } 984 } 985 986 if (flags & O_CREAT) 987 cap_rights_set(rightsp, CAP_CREATE); 988 989 if (flags & O_TRUNC) 990 cap_rights_set(rightsp, CAP_FTRUNCATE); 991 992 if (flags & (O_SYNC | O_FSYNC)) 993 cap_rights_set(rightsp, CAP_FSYNC); 994 995 if (flags & (O_EXLOCK | O_SHLOCK)) 996 cap_rights_set(rightsp, CAP_FLOCK); 997 } 998 999 /* 1000 * Check permissions, allocate an open file structure, and call the device 1001 * open routine if any. 1002 */ 1003 #ifndef _SYS_SYSPROTO_H_ 1004 struct open_args { 1005 char *path; 1006 int flags; 1007 int mode; 1008 }; 1009 #endif 1010 int 1011 sys_open(td, uap) 1012 struct thread *td; 1013 register struct open_args /* { 1014 char *path; 1015 int flags; 1016 int mode; 1017 } */ *uap; 1018 { 1019 1020 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1021 } 1022 1023 #ifndef _SYS_SYSPROTO_H_ 1024 struct openat_args { 1025 int fd; 1026 char *path; 1027 int flag; 1028 int mode; 1029 }; 1030 #endif 1031 int 1032 sys_openat(struct thread *td, struct openat_args *uap) 1033 { 1034 1035 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1036 uap->mode)); 1037 } 1038 1039 int 1040 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1041 int mode) 1042 { 1043 1044 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1045 } 1046 1047 int 1048 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1049 int flags, int mode) 1050 { 1051 struct proc *p = td->td_proc; 1052 struct filedesc *fdp = p->p_fd; 1053 struct file *fp; 1054 struct vnode *vp; 1055 struct nameidata nd; 1056 cap_rights_t rights; 1057 int cmode, error, indx; 1058 1059 indx = -1; 1060 1061 AUDIT_ARG_FFLAGS(flags); 1062 AUDIT_ARG_MODE(mode); 1063 /* XXX: audit dirfd */ 1064 cap_rights_init(&rights, CAP_LOOKUP); 1065 flags_to_rights(flags, &rights); 1066 /* 1067 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1068 * may be specified. 1069 */ 1070 if (flags & O_EXEC) { 1071 if (flags & O_ACCMODE) 1072 return (EINVAL); 1073 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1074 return (EINVAL); 1075 } else { 1076 flags = FFLAGS(flags); 1077 } 1078 1079 /* 1080 * Allocate the file descriptor, but don't install a descriptor yet. 1081 */ 1082 error = falloc_noinstall(td, &fp); 1083 if (error != 0) 1084 return (error); 1085 /* 1086 * An extra reference on `fp' has been held for us by 1087 * falloc_noinstall(). 1088 */ 1089 /* Set the flags early so the finit in devfs can pick them up. */ 1090 fp->f_flag = flags & FMASK; 1091 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1092 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1093 &rights, td); 1094 td->td_dupfd = -1; /* XXX check for fdopen */ 1095 error = vn_open(&nd, &flags, cmode, fp); 1096 if (error != 0) { 1097 /* 1098 * If the vn_open replaced the method vector, something 1099 * wonderous happened deep below and we just pass it up 1100 * pretending we know what we do. 1101 */ 1102 if (error == ENXIO && fp->f_ops != &badfileops) 1103 goto success; 1104 1105 /* 1106 * Handle special fdopen() case. bleh. 1107 * 1108 * Don't do this for relative (capability) lookups; we don't 1109 * understand exactly what would happen, and we don't think 1110 * that it ever should. 1111 */ 1112 if (nd.ni_strictrelative == 0 && 1113 (error == ENODEV || error == ENXIO) && 1114 td->td_dupfd >= 0) { 1115 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1116 &indx); 1117 if (error == 0) 1118 goto success; 1119 } 1120 1121 goto bad; 1122 } 1123 td->td_dupfd = 0; 1124 NDFREE(&nd, NDF_ONLY_PNBUF); 1125 vp = nd.ni_vp; 1126 1127 /* 1128 * Store the vnode, for any f_type. Typically, the vnode use 1129 * count is decremented by direct call to vn_closefile() for 1130 * files that switched type in the cdevsw fdopen() method. 1131 */ 1132 fp->f_vnode = vp; 1133 /* 1134 * If the file wasn't claimed by devfs bind it to the normal 1135 * vnode operations here. 1136 */ 1137 if (fp->f_ops == &badfileops) { 1138 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1139 fp->f_seqcount = 1; 1140 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1141 DTYPE_VNODE, vp, &vnops); 1142 } 1143 1144 VOP_UNLOCK(vp, 0); 1145 if (flags & O_TRUNC) { 1146 error = fo_truncate(fp, 0, td->td_ucred, td); 1147 if (error != 0) 1148 goto bad; 1149 } 1150 success: 1151 /* 1152 * If we haven't already installed the FD (for dupfdopen), do so now. 1153 */ 1154 if (indx == -1) { 1155 struct filecaps *fcaps; 1156 1157 #ifdef CAPABILITIES 1158 if (nd.ni_strictrelative == 1) 1159 fcaps = &nd.ni_filecaps; 1160 else 1161 #endif 1162 fcaps = NULL; 1163 error = finstall(td, fp, &indx, flags, fcaps); 1164 /* On success finstall() consumes fcaps. */ 1165 if (error != 0) { 1166 filecaps_free(&nd.ni_filecaps); 1167 goto bad; 1168 } 1169 } else { 1170 filecaps_free(&nd.ni_filecaps); 1171 } 1172 1173 /* 1174 * Release our private reference, leaving the one associated with 1175 * the descriptor table intact. 1176 */ 1177 fdrop(fp, td); 1178 td->td_retval[0] = indx; 1179 return (0); 1180 bad: 1181 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1182 fdrop(fp, td); 1183 return (error); 1184 } 1185 1186 #ifdef COMPAT_43 1187 /* 1188 * Create a file. 1189 */ 1190 #ifndef _SYS_SYSPROTO_H_ 1191 struct ocreat_args { 1192 char *path; 1193 int mode; 1194 }; 1195 #endif 1196 int 1197 ocreat(td, uap) 1198 struct thread *td; 1199 register struct ocreat_args /* { 1200 char *path; 1201 int mode; 1202 } */ *uap; 1203 { 1204 1205 return (kern_open(td, uap->path, UIO_USERSPACE, 1206 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1207 } 1208 #endif /* COMPAT_43 */ 1209 1210 /* 1211 * Create a special file. 1212 */ 1213 #ifndef _SYS_SYSPROTO_H_ 1214 struct mknod_args { 1215 char *path; 1216 int mode; 1217 int dev; 1218 }; 1219 #endif 1220 int 1221 sys_mknod(td, uap) 1222 struct thread *td; 1223 register struct mknod_args /* { 1224 char *path; 1225 int mode; 1226 int dev; 1227 } */ *uap; 1228 { 1229 1230 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1231 } 1232 1233 #ifndef _SYS_SYSPROTO_H_ 1234 struct mknodat_args { 1235 int fd; 1236 char *path; 1237 mode_t mode; 1238 dev_t dev; 1239 }; 1240 #endif 1241 int 1242 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1243 { 1244 1245 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1246 uap->dev)); 1247 } 1248 1249 int 1250 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1251 int dev) 1252 { 1253 1254 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1255 } 1256 1257 int 1258 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1259 int mode, int dev) 1260 { 1261 struct vnode *vp; 1262 struct mount *mp; 1263 struct vattr vattr; 1264 struct nameidata nd; 1265 cap_rights_t rights; 1266 int error, whiteout = 0; 1267 1268 AUDIT_ARG_MODE(mode); 1269 AUDIT_ARG_DEV(dev); 1270 switch (mode & S_IFMT) { 1271 case S_IFCHR: 1272 case S_IFBLK: 1273 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1274 break; 1275 case S_IFMT: 1276 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1277 break; 1278 case S_IFWHT: 1279 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1280 break; 1281 case S_IFIFO: 1282 if (dev == 0) 1283 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1284 /* FALLTHROUGH */ 1285 default: 1286 error = EINVAL; 1287 break; 1288 } 1289 if (error != 0) 1290 return (error); 1291 restart: 1292 bwillwrite(); 1293 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1294 pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td); 1295 if ((error = namei(&nd)) != 0) 1296 return (error); 1297 vp = nd.ni_vp; 1298 if (vp != NULL) { 1299 NDFREE(&nd, NDF_ONLY_PNBUF); 1300 if (vp == nd.ni_dvp) 1301 vrele(nd.ni_dvp); 1302 else 1303 vput(nd.ni_dvp); 1304 vrele(vp); 1305 return (EEXIST); 1306 } else { 1307 VATTR_NULL(&vattr); 1308 vattr.va_mode = (mode & ALLPERMS) & 1309 ~td->td_proc->p_fd->fd_cmask; 1310 vattr.va_rdev = dev; 1311 whiteout = 0; 1312 1313 switch (mode & S_IFMT) { 1314 case S_IFMT: /* used by badsect to flag bad sectors */ 1315 vattr.va_type = VBAD; 1316 break; 1317 case S_IFCHR: 1318 vattr.va_type = VCHR; 1319 break; 1320 case S_IFBLK: 1321 vattr.va_type = VBLK; 1322 break; 1323 case S_IFWHT: 1324 whiteout = 1; 1325 break; 1326 default: 1327 panic("kern_mknod: invalid mode"); 1328 } 1329 } 1330 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1331 NDFREE(&nd, NDF_ONLY_PNBUF); 1332 vput(nd.ni_dvp); 1333 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1334 return (error); 1335 goto restart; 1336 } 1337 #ifdef MAC 1338 if (error == 0 && !whiteout) 1339 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1340 &nd.ni_cnd, &vattr); 1341 #endif 1342 if (error == 0) { 1343 if (whiteout) 1344 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1345 else { 1346 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1347 &nd.ni_cnd, &vattr); 1348 if (error == 0) 1349 vput(nd.ni_vp); 1350 } 1351 } 1352 NDFREE(&nd, NDF_ONLY_PNBUF); 1353 vput(nd.ni_dvp); 1354 vn_finished_write(mp); 1355 return (error); 1356 } 1357 1358 /* 1359 * Create a named pipe. 1360 */ 1361 #ifndef _SYS_SYSPROTO_H_ 1362 struct mkfifo_args { 1363 char *path; 1364 int mode; 1365 }; 1366 #endif 1367 int 1368 sys_mkfifo(td, uap) 1369 struct thread *td; 1370 register struct mkfifo_args /* { 1371 char *path; 1372 int mode; 1373 } */ *uap; 1374 { 1375 1376 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1377 } 1378 1379 #ifndef _SYS_SYSPROTO_H_ 1380 struct mkfifoat_args { 1381 int fd; 1382 char *path; 1383 mode_t mode; 1384 }; 1385 #endif 1386 int 1387 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1388 { 1389 1390 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1391 uap->mode)); 1392 } 1393 1394 int 1395 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1396 { 1397 1398 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1399 } 1400 1401 int 1402 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1403 int mode) 1404 { 1405 struct mount *mp; 1406 struct vattr vattr; 1407 struct nameidata nd; 1408 cap_rights_t rights; 1409 int error; 1410 1411 AUDIT_ARG_MODE(mode); 1412 restart: 1413 bwillwrite(); 1414 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1415 pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td); 1416 if ((error = namei(&nd)) != 0) 1417 return (error); 1418 if (nd.ni_vp != NULL) { 1419 NDFREE(&nd, NDF_ONLY_PNBUF); 1420 if (nd.ni_vp == nd.ni_dvp) 1421 vrele(nd.ni_dvp); 1422 else 1423 vput(nd.ni_dvp); 1424 vrele(nd.ni_vp); 1425 return (EEXIST); 1426 } 1427 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1428 NDFREE(&nd, NDF_ONLY_PNBUF); 1429 vput(nd.ni_dvp); 1430 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1431 return (error); 1432 goto restart; 1433 } 1434 VATTR_NULL(&vattr); 1435 vattr.va_type = VFIFO; 1436 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1437 #ifdef MAC 1438 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1439 &vattr); 1440 if (error != 0) 1441 goto out; 1442 #endif 1443 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1444 if (error == 0) 1445 vput(nd.ni_vp); 1446 #ifdef MAC 1447 out: 1448 #endif 1449 vput(nd.ni_dvp); 1450 vn_finished_write(mp); 1451 NDFREE(&nd, NDF_ONLY_PNBUF); 1452 return (error); 1453 } 1454 1455 /* 1456 * Make a hard file link. 1457 */ 1458 #ifndef _SYS_SYSPROTO_H_ 1459 struct link_args { 1460 char *path; 1461 char *link; 1462 }; 1463 #endif 1464 int 1465 sys_link(td, uap) 1466 struct thread *td; 1467 register struct link_args /* { 1468 char *path; 1469 char *link; 1470 } */ *uap; 1471 { 1472 1473 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1474 } 1475 1476 #ifndef _SYS_SYSPROTO_H_ 1477 struct linkat_args { 1478 int fd1; 1479 char *path1; 1480 int fd2; 1481 char *path2; 1482 int flag; 1483 }; 1484 #endif 1485 int 1486 sys_linkat(struct thread *td, struct linkat_args *uap) 1487 { 1488 int flag; 1489 1490 flag = uap->flag; 1491 if (flag & ~AT_SYMLINK_FOLLOW) 1492 return (EINVAL); 1493 1494 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1495 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1496 } 1497 1498 int hardlink_check_uid = 0; 1499 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1500 &hardlink_check_uid, 0, 1501 "Unprivileged processes cannot create hard links to files owned by other " 1502 "users"); 1503 static int hardlink_check_gid = 0; 1504 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1505 &hardlink_check_gid, 0, 1506 "Unprivileged processes cannot create hard links to files owned by other " 1507 "groups"); 1508 1509 static int 1510 can_hardlink(struct vnode *vp, struct ucred *cred) 1511 { 1512 struct vattr va; 1513 int error; 1514 1515 if (!hardlink_check_uid && !hardlink_check_gid) 1516 return (0); 1517 1518 error = VOP_GETATTR(vp, &va, cred); 1519 if (error != 0) 1520 return (error); 1521 1522 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1523 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1524 if (error != 0) 1525 return (error); 1526 } 1527 1528 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1529 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1530 if (error != 0) 1531 return (error); 1532 } 1533 1534 return (0); 1535 } 1536 1537 int 1538 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1539 { 1540 1541 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1542 } 1543 1544 int 1545 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1546 enum uio_seg segflg, int follow) 1547 { 1548 struct vnode *vp; 1549 struct mount *mp; 1550 struct nameidata nd; 1551 cap_rights_t rights; 1552 int error; 1553 1554 bwillwrite(); 1555 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1556 1557 again: 1558 if ((error = namei(&nd)) != 0) 1559 return (error); 1560 NDFREE(&nd, NDF_ONLY_PNBUF); 1561 vp = nd.ni_vp; 1562 if (vp->v_type == VDIR) { 1563 vrele(vp); 1564 return (EPERM); /* POSIX */ 1565 } 1566 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 1567 vrele(vp); 1568 return (error); 1569 } 1570 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, 1571 segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td); 1572 if ((error = namei(&nd)) == 0) { 1573 if (nd.ni_vp != NULL) { 1574 if (nd.ni_dvp == nd.ni_vp) 1575 vrele(nd.ni_dvp); 1576 else 1577 vput(nd.ni_dvp); 1578 vrele(nd.ni_vp); 1579 error = EEXIST; 1580 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1581 /* 1582 * Check for cross-device links. No need to 1583 * recheck vp->v_type, since it cannot change 1584 * for non-doomed vnode. 1585 */ 1586 if (nd.ni_dvp->v_mount != vp->v_mount) 1587 error = EXDEV; 1588 else 1589 error = can_hardlink(vp, td->td_ucred); 1590 if (error == 0) 1591 #ifdef MAC 1592 error = mac_vnode_check_link(td->td_ucred, 1593 nd.ni_dvp, vp, &nd.ni_cnd); 1594 if (error == 0) 1595 #endif 1596 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1597 VOP_UNLOCK(vp, 0); 1598 vput(nd.ni_dvp); 1599 } else { 1600 vput(nd.ni_dvp); 1601 NDFREE(&nd, NDF_ONLY_PNBUF); 1602 vrele(vp); 1603 vn_finished_write(mp); 1604 goto again; 1605 } 1606 NDFREE(&nd, NDF_ONLY_PNBUF); 1607 } 1608 vrele(vp); 1609 vn_finished_write(mp); 1610 return (error); 1611 } 1612 1613 /* 1614 * Make a symbolic link. 1615 */ 1616 #ifndef _SYS_SYSPROTO_H_ 1617 struct symlink_args { 1618 char *path; 1619 char *link; 1620 }; 1621 #endif 1622 int 1623 sys_symlink(td, uap) 1624 struct thread *td; 1625 register struct symlink_args /* { 1626 char *path; 1627 char *link; 1628 } */ *uap; 1629 { 1630 1631 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1632 } 1633 1634 #ifndef _SYS_SYSPROTO_H_ 1635 struct symlinkat_args { 1636 char *path; 1637 int fd; 1638 char *path2; 1639 }; 1640 #endif 1641 int 1642 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1643 { 1644 1645 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1646 UIO_USERSPACE)); 1647 } 1648 1649 int 1650 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1651 { 1652 1653 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1654 } 1655 1656 int 1657 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1658 enum uio_seg segflg) 1659 { 1660 struct mount *mp; 1661 struct vattr vattr; 1662 char *syspath; 1663 struct nameidata nd; 1664 int error; 1665 cap_rights_t rights; 1666 1667 if (segflg == UIO_SYSSPACE) { 1668 syspath = path1; 1669 } else { 1670 syspath = uma_zalloc(namei_zone, M_WAITOK); 1671 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1672 goto out; 1673 } 1674 AUDIT_ARG_TEXT(syspath); 1675 restart: 1676 bwillwrite(); 1677 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1678 segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td); 1679 if ((error = namei(&nd)) != 0) 1680 goto out; 1681 if (nd.ni_vp) { 1682 NDFREE(&nd, NDF_ONLY_PNBUF); 1683 if (nd.ni_vp == nd.ni_dvp) 1684 vrele(nd.ni_dvp); 1685 else 1686 vput(nd.ni_dvp); 1687 vrele(nd.ni_vp); 1688 error = EEXIST; 1689 goto out; 1690 } 1691 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1692 NDFREE(&nd, NDF_ONLY_PNBUF); 1693 vput(nd.ni_dvp); 1694 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1695 goto out; 1696 goto restart; 1697 } 1698 VATTR_NULL(&vattr); 1699 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1700 #ifdef MAC 1701 vattr.va_type = VLNK; 1702 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1703 &vattr); 1704 if (error != 0) 1705 goto out2; 1706 #endif 1707 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1708 if (error == 0) 1709 vput(nd.ni_vp); 1710 #ifdef MAC 1711 out2: 1712 #endif 1713 NDFREE(&nd, NDF_ONLY_PNBUF); 1714 vput(nd.ni_dvp); 1715 vn_finished_write(mp); 1716 out: 1717 if (segflg != UIO_SYSSPACE) 1718 uma_zfree(namei_zone, syspath); 1719 return (error); 1720 } 1721 1722 /* 1723 * Delete a whiteout from the filesystem. 1724 */ 1725 int 1726 sys_undelete(td, uap) 1727 struct thread *td; 1728 register struct undelete_args /* { 1729 char *path; 1730 } */ *uap; 1731 { 1732 struct mount *mp; 1733 struct nameidata nd; 1734 int error; 1735 1736 restart: 1737 bwillwrite(); 1738 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1739 UIO_USERSPACE, uap->path, td); 1740 error = namei(&nd); 1741 if (error != 0) 1742 return (error); 1743 1744 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1745 NDFREE(&nd, NDF_ONLY_PNBUF); 1746 if (nd.ni_vp == nd.ni_dvp) 1747 vrele(nd.ni_dvp); 1748 else 1749 vput(nd.ni_dvp); 1750 if (nd.ni_vp) 1751 vrele(nd.ni_vp); 1752 return (EEXIST); 1753 } 1754 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1755 NDFREE(&nd, NDF_ONLY_PNBUF); 1756 vput(nd.ni_dvp); 1757 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1758 return (error); 1759 goto restart; 1760 } 1761 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1762 NDFREE(&nd, NDF_ONLY_PNBUF); 1763 vput(nd.ni_dvp); 1764 vn_finished_write(mp); 1765 return (error); 1766 } 1767 1768 /* 1769 * Delete a name from the filesystem. 1770 */ 1771 #ifndef _SYS_SYSPROTO_H_ 1772 struct unlink_args { 1773 char *path; 1774 }; 1775 #endif 1776 int 1777 sys_unlink(td, uap) 1778 struct thread *td; 1779 struct unlink_args /* { 1780 char *path; 1781 } */ *uap; 1782 { 1783 1784 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1785 } 1786 1787 #ifndef _SYS_SYSPROTO_H_ 1788 struct unlinkat_args { 1789 int fd; 1790 char *path; 1791 int flag; 1792 }; 1793 #endif 1794 int 1795 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1796 { 1797 int flag = uap->flag; 1798 int fd = uap->fd; 1799 char *path = uap->path; 1800 1801 if (flag & ~AT_REMOVEDIR) 1802 return (EINVAL); 1803 1804 if (flag & AT_REMOVEDIR) 1805 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1806 else 1807 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1808 } 1809 1810 int 1811 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1812 { 1813 1814 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1815 } 1816 1817 int 1818 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1819 ino_t oldinum) 1820 { 1821 struct mount *mp; 1822 struct vnode *vp; 1823 struct nameidata nd; 1824 struct stat sb; 1825 cap_rights_t rights; 1826 int error; 1827 1828 restart: 1829 bwillwrite(); 1830 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1831 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1832 if ((error = namei(&nd)) != 0) 1833 return (error == EINVAL ? EPERM : error); 1834 vp = nd.ni_vp; 1835 if (vp->v_type == VDIR && oldinum == 0) { 1836 error = EPERM; /* POSIX */ 1837 } else if (oldinum != 0 && 1838 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1839 sb.st_ino != oldinum) { 1840 error = EIDRM; /* Identifier removed */ 1841 } else { 1842 /* 1843 * The root of a mounted filesystem cannot be deleted. 1844 * 1845 * XXX: can this only be a VDIR case? 1846 */ 1847 if (vp->v_vflag & VV_ROOT) 1848 error = EBUSY; 1849 } 1850 if (error == 0) { 1851 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1852 NDFREE(&nd, NDF_ONLY_PNBUF); 1853 vput(nd.ni_dvp); 1854 if (vp == nd.ni_dvp) 1855 vrele(vp); 1856 else 1857 vput(vp); 1858 if ((error = vn_start_write(NULL, &mp, 1859 V_XSLEEP | PCATCH)) != 0) 1860 return (error); 1861 goto restart; 1862 } 1863 #ifdef MAC 1864 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1865 &nd.ni_cnd); 1866 if (error != 0) 1867 goto out; 1868 #endif 1869 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1870 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1871 #ifdef MAC 1872 out: 1873 #endif 1874 vn_finished_write(mp); 1875 } 1876 NDFREE(&nd, NDF_ONLY_PNBUF); 1877 vput(nd.ni_dvp); 1878 if (vp == nd.ni_dvp) 1879 vrele(vp); 1880 else 1881 vput(vp); 1882 return (error); 1883 } 1884 1885 /* 1886 * Reposition read/write file offset. 1887 */ 1888 #ifndef _SYS_SYSPROTO_H_ 1889 struct lseek_args { 1890 int fd; 1891 int pad; 1892 off_t offset; 1893 int whence; 1894 }; 1895 #endif 1896 int 1897 sys_lseek(td, uap) 1898 struct thread *td; 1899 register struct lseek_args /* { 1900 int fd; 1901 int pad; 1902 off_t offset; 1903 int whence; 1904 } */ *uap; 1905 { 1906 struct file *fp; 1907 cap_rights_t rights; 1908 int error; 1909 1910 AUDIT_ARG_FD(uap->fd); 1911 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1912 if (error != 0) 1913 return (error); 1914 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1915 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1916 fdrop(fp, td); 1917 return (error); 1918 } 1919 1920 #if defined(COMPAT_43) 1921 /* 1922 * Reposition read/write file offset. 1923 */ 1924 #ifndef _SYS_SYSPROTO_H_ 1925 struct olseek_args { 1926 int fd; 1927 long offset; 1928 int whence; 1929 }; 1930 #endif 1931 int 1932 olseek(td, uap) 1933 struct thread *td; 1934 register struct olseek_args /* { 1935 int fd; 1936 long offset; 1937 int whence; 1938 } */ *uap; 1939 { 1940 struct lseek_args /* { 1941 int fd; 1942 int pad; 1943 off_t offset; 1944 int whence; 1945 } */ nuap; 1946 1947 nuap.fd = uap->fd; 1948 nuap.offset = uap->offset; 1949 nuap.whence = uap->whence; 1950 return (sys_lseek(td, &nuap)); 1951 } 1952 #endif /* COMPAT_43 */ 1953 1954 /* Version with the 'pad' argument */ 1955 int 1956 freebsd6_lseek(td, uap) 1957 struct thread *td; 1958 register struct freebsd6_lseek_args *uap; 1959 { 1960 struct lseek_args ouap; 1961 1962 ouap.fd = uap->fd; 1963 ouap.offset = uap->offset; 1964 ouap.whence = uap->whence; 1965 return (sys_lseek(td, &ouap)); 1966 } 1967 1968 /* 1969 * Check access permissions using passed credentials. 1970 */ 1971 static int 1972 vn_access(vp, user_flags, cred, td) 1973 struct vnode *vp; 1974 int user_flags; 1975 struct ucred *cred; 1976 struct thread *td; 1977 { 1978 accmode_t accmode; 1979 int error; 1980 1981 /* Flags == 0 means only check for existence. */ 1982 error = 0; 1983 if (user_flags) { 1984 accmode = 0; 1985 if (user_flags & R_OK) 1986 accmode |= VREAD; 1987 if (user_flags & W_OK) 1988 accmode |= VWRITE; 1989 if (user_flags & X_OK) 1990 accmode |= VEXEC; 1991 #ifdef MAC 1992 error = mac_vnode_check_access(cred, vp, accmode); 1993 if (error != 0) 1994 return (error); 1995 #endif 1996 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1997 error = VOP_ACCESS(vp, accmode, cred, td); 1998 } 1999 return (error); 2000 } 2001 2002 /* 2003 * Check access permissions using "real" credentials. 2004 */ 2005 #ifndef _SYS_SYSPROTO_H_ 2006 struct access_args { 2007 char *path; 2008 int amode; 2009 }; 2010 #endif 2011 int 2012 sys_access(td, uap) 2013 struct thread *td; 2014 register struct access_args /* { 2015 char *path; 2016 int amode; 2017 } */ *uap; 2018 { 2019 2020 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2021 } 2022 2023 #ifndef _SYS_SYSPROTO_H_ 2024 struct faccessat_args { 2025 int dirfd; 2026 char *path; 2027 int amode; 2028 int flag; 2029 } 2030 #endif 2031 int 2032 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2033 { 2034 2035 if (uap->flag & ~AT_EACCESS) 2036 return (EINVAL); 2037 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2038 uap->amode)); 2039 } 2040 2041 int 2042 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2043 { 2044 2045 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2046 } 2047 2048 int 2049 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2050 int flag, int amode) 2051 { 2052 struct ucred *cred, *tmpcred; 2053 struct vnode *vp; 2054 struct nameidata nd; 2055 cap_rights_t rights; 2056 int error; 2057 2058 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2059 return (EINVAL); 2060 2061 /* 2062 * Create and modify a temporary credential instead of one that 2063 * is potentially shared. 2064 */ 2065 if (!(flag & AT_EACCESS)) { 2066 cred = td->td_ucred; 2067 tmpcred = crdup(cred); 2068 tmpcred->cr_uid = cred->cr_ruid; 2069 tmpcred->cr_groups[0] = cred->cr_rgid; 2070 td->td_ucred = tmpcred; 2071 } else 2072 cred = tmpcred = td->td_ucred; 2073 AUDIT_ARG_VALUE(amode); 2074 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2075 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2076 td); 2077 if ((error = namei(&nd)) != 0) 2078 goto out1; 2079 vp = nd.ni_vp; 2080 2081 error = vn_access(vp, amode, tmpcred, td); 2082 NDFREE(&nd, NDF_ONLY_PNBUF); 2083 vput(vp); 2084 out1: 2085 if (!(flag & AT_EACCESS)) { 2086 td->td_ucred = cred; 2087 crfree(tmpcred); 2088 } 2089 return (error); 2090 } 2091 2092 /* 2093 * Check access permissions using "effective" credentials. 2094 */ 2095 #ifndef _SYS_SYSPROTO_H_ 2096 struct eaccess_args { 2097 char *path; 2098 int amode; 2099 }; 2100 #endif 2101 int 2102 sys_eaccess(td, uap) 2103 struct thread *td; 2104 register struct eaccess_args /* { 2105 char *path; 2106 int amode; 2107 } */ *uap; 2108 { 2109 2110 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2111 } 2112 2113 int 2114 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2115 { 2116 2117 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2118 } 2119 2120 #if defined(COMPAT_43) 2121 /* 2122 * Get file status; this version follows links. 2123 */ 2124 #ifndef _SYS_SYSPROTO_H_ 2125 struct ostat_args { 2126 char *path; 2127 struct ostat *ub; 2128 }; 2129 #endif 2130 int 2131 ostat(td, uap) 2132 struct thread *td; 2133 register struct ostat_args /* { 2134 char *path; 2135 struct ostat *ub; 2136 } */ *uap; 2137 { 2138 struct stat sb; 2139 struct ostat osb; 2140 int error; 2141 2142 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2143 if (error != 0) 2144 return (error); 2145 cvtstat(&sb, &osb); 2146 return (copyout(&osb, uap->ub, sizeof (osb))); 2147 } 2148 2149 /* 2150 * Get file status; this version does not follow links. 2151 */ 2152 #ifndef _SYS_SYSPROTO_H_ 2153 struct olstat_args { 2154 char *path; 2155 struct ostat *ub; 2156 }; 2157 #endif 2158 int 2159 olstat(td, uap) 2160 struct thread *td; 2161 register struct olstat_args /* { 2162 char *path; 2163 struct ostat *ub; 2164 } */ *uap; 2165 { 2166 struct stat sb; 2167 struct ostat osb; 2168 int error; 2169 2170 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2171 if (error != 0) 2172 return (error); 2173 cvtstat(&sb, &osb); 2174 return (copyout(&osb, uap->ub, sizeof (osb))); 2175 } 2176 2177 /* 2178 * Convert from an old to a new stat structure. 2179 */ 2180 void 2181 cvtstat(st, ost) 2182 struct stat *st; 2183 struct ostat *ost; 2184 { 2185 2186 ost->st_dev = st->st_dev; 2187 ost->st_ino = st->st_ino; 2188 ost->st_mode = st->st_mode; 2189 ost->st_nlink = st->st_nlink; 2190 ost->st_uid = st->st_uid; 2191 ost->st_gid = st->st_gid; 2192 ost->st_rdev = st->st_rdev; 2193 if (st->st_size < (quad_t)1 << 32) 2194 ost->st_size = st->st_size; 2195 else 2196 ost->st_size = -2; 2197 ost->st_atim = st->st_atim; 2198 ost->st_mtim = st->st_mtim; 2199 ost->st_ctim = st->st_ctim; 2200 ost->st_blksize = st->st_blksize; 2201 ost->st_blocks = st->st_blocks; 2202 ost->st_flags = st->st_flags; 2203 ost->st_gen = st->st_gen; 2204 } 2205 #endif /* COMPAT_43 */ 2206 2207 /* 2208 * Get file status; this version follows links. 2209 */ 2210 #ifndef _SYS_SYSPROTO_H_ 2211 struct stat_args { 2212 char *path; 2213 struct stat *ub; 2214 }; 2215 #endif 2216 int 2217 sys_stat(td, uap) 2218 struct thread *td; 2219 register struct stat_args /* { 2220 char *path; 2221 struct stat *ub; 2222 } */ *uap; 2223 { 2224 struct stat sb; 2225 int error; 2226 2227 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2228 if (error == 0) 2229 error = copyout(&sb, uap->ub, sizeof (sb)); 2230 return (error); 2231 } 2232 2233 #ifndef _SYS_SYSPROTO_H_ 2234 struct fstatat_args { 2235 int fd; 2236 char *path; 2237 struct stat *buf; 2238 int flag; 2239 } 2240 #endif 2241 int 2242 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2243 { 2244 struct stat sb; 2245 int error; 2246 2247 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2248 UIO_USERSPACE, &sb); 2249 if (error == 0) 2250 error = copyout(&sb, uap->buf, sizeof (sb)); 2251 return (error); 2252 } 2253 2254 int 2255 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2256 { 2257 2258 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2259 } 2260 2261 int 2262 kern_statat(struct thread *td, int flag, int fd, char *path, 2263 enum uio_seg pathseg, struct stat *sbp) 2264 { 2265 2266 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2267 } 2268 2269 int 2270 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2271 enum uio_seg pathseg, struct stat *sbp, 2272 void (*hook)(struct vnode *vp, struct stat *sbp)) 2273 { 2274 struct nameidata nd; 2275 struct stat sb; 2276 cap_rights_t rights; 2277 int error; 2278 2279 if (flag & ~AT_SYMLINK_NOFOLLOW) 2280 return (EINVAL); 2281 2282 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2283 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2284 cap_rights_init(&rights, CAP_FSTAT), td); 2285 2286 if ((error = namei(&nd)) != 0) 2287 return (error); 2288 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2289 if (error == 0) { 2290 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2291 if (S_ISREG(sb.st_mode)) 2292 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2293 if (__predict_false(hook != NULL)) 2294 hook(nd.ni_vp, &sb); 2295 } 2296 NDFREE(&nd, NDF_ONLY_PNBUF); 2297 vput(nd.ni_vp); 2298 if (error != 0) 2299 return (error); 2300 *sbp = sb; 2301 #ifdef KTRACE 2302 if (KTRPOINT(td, KTR_STRUCT)) 2303 ktrstat(&sb); 2304 #endif 2305 return (0); 2306 } 2307 2308 /* 2309 * Get file status; this version does not follow links. 2310 */ 2311 #ifndef _SYS_SYSPROTO_H_ 2312 struct lstat_args { 2313 char *path; 2314 struct stat *ub; 2315 }; 2316 #endif 2317 int 2318 sys_lstat(td, uap) 2319 struct thread *td; 2320 register struct lstat_args /* { 2321 char *path; 2322 struct stat *ub; 2323 } */ *uap; 2324 { 2325 struct stat sb; 2326 int error; 2327 2328 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2329 if (error == 0) 2330 error = copyout(&sb, uap->ub, sizeof (sb)); 2331 return (error); 2332 } 2333 2334 int 2335 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2336 { 2337 2338 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2339 sbp)); 2340 } 2341 2342 /* 2343 * Implementation of the NetBSD [l]stat() functions. 2344 */ 2345 void 2346 cvtnstat(sb, nsb) 2347 struct stat *sb; 2348 struct nstat *nsb; 2349 { 2350 2351 bzero(nsb, sizeof *nsb); 2352 nsb->st_dev = sb->st_dev; 2353 nsb->st_ino = sb->st_ino; 2354 nsb->st_mode = sb->st_mode; 2355 nsb->st_nlink = sb->st_nlink; 2356 nsb->st_uid = sb->st_uid; 2357 nsb->st_gid = sb->st_gid; 2358 nsb->st_rdev = sb->st_rdev; 2359 nsb->st_atim = sb->st_atim; 2360 nsb->st_mtim = sb->st_mtim; 2361 nsb->st_ctim = sb->st_ctim; 2362 nsb->st_size = sb->st_size; 2363 nsb->st_blocks = sb->st_blocks; 2364 nsb->st_blksize = sb->st_blksize; 2365 nsb->st_flags = sb->st_flags; 2366 nsb->st_gen = sb->st_gen; 2367 nsb->st_birthtim = sb->st_birthtim; 2368 } 2369 2370 #ifndef _SYS_SYSPROTO_H_ 2371 struct nstat_args { 2372 char *path; 2373 struct nstat *ub; 2374 }; 2375 #endif 2376 int 2377 sys_nstat(td, uap) 2378 struct thread *td; 2379 register struct nstat_args /* { 2380 char *path; 2381 struct nstat *ub; 2382 } */ *uap; 2383 { 2384 struct stat sb; 2385 struct nstat nsb; 2386 int error; 2387 2388 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2389 if (error != 0) 2390 return (error); 2391 cvtnstat(&sb, &nsb); 2392 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2393 } 2394 2395 /* 2396 * NetBSD lstat. Get file status; this version does not follow links. 2397 */ 2398 #ifndef _SYS_SYSPROTO_H_ 2399 struct lstat_args { 2400 char *path; 2401 struct stat *ub; 2402 }; 2403 #endif 2404 int 2405 sys_nlstat(td, uap) 2406 struct thread *td; 2407 register struct nlstat_args /* { 2408 char *path; 2409 struct nstat *ub; 2410 } */ *uap; 2411 { 2412 struct stat sb; 2413 struct nstat nsb; 2414 int error; 2415 2416 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2417 if (error != 0) 2418 return (error); 2419 cvtnstat(&sb, &nsb); 2420 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2421 } 2422 2423 /* 2424 * Get configurable pathname variables. 2425 */ 2426 #ifndef _SYS_SYSPROTO_H_ 2427 struct pathconf_args { 2428 char *path; 2429 int name; 2430 }; 2431 #endif 2432 int 2433 sys_pathconf(td, uap) 2434 struct thread *td; 2435 register struct pathconf_args /* { 2436 char *path; 2437 int name; 2438 } */ *uap; 2439 { 2440 2441 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2442 } 2443 2444 #ifndef _SYS_SYSPROTO_H_ 2445 struct lpathconf_args { 2446 char *path; 2447 int name; 2448 }; 2449 #endif 2450 int 2451 sys_lpathconf(td, uap) 2452 struct thread *td; 2453 register struct lpathconf_args /* { 2454 char *path; 2455 int name; 2456 } */ *uap; 2457 { 2458 2459 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2460 NOFOLLOW)); 2461 } 2462 2463 int 2464 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2465 u_long flags) 2466 { 2467 struct nameidata nd; 2468 int error; 2469 2470 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2471 pathseg, path, td); 2472 if ((error = namei(&nd)) != 0) 2473 return (error); 2474 NDFREE(&nd, NDF_ONLY_PNBUF); 2475 2476 /* If asynchronous I/O is available, it works for all files. */ 2477 if (name == _PC_ASYNC_IO) 2478 td->td_retval[0] = async_io_version; 2479 else 2480 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2481 vput(nd.ni_vp); 2482 return (error); 2483 } 2484 2485 /* 2486 * Return target name of a symbolic link. 2487 */ 2488 #ifndef _SYS_SYSPROTO_H_ 2489 struct readlink_args { 2490 char *path; 2491 char *buf; 2492 size_t count; 2493 }; 2494 #endif 2495 int 2496 sys_readlink(td, uap) 2497 struct thread *td; 2498 register struct readlink_args /* { 2499 char *path; 2500 char *buf; 2501 size_t count; 2502 } */ *uap; 2503 { 2504 2505 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2506 UIO_USERSPACE, uap->count)); 2507 } 2508 #ifndef _SYS_SYSPROTO_H_ 2509 struct readlinkat_args { 2510 int fd; 2511 char *path; 2512 char *buf; 2513 size_t bufsize; 2514 }; 2515 #endif 2516 int 2517 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2518 { 2519 2520 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2521 uap->buf, UIO_USERSPACE, uap->bufsize)); 2522 } 2523 2524 int 2525 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2526 enum uio_seg bufseg, size_t count) 2527 { 2528 2529 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2530 count)); 2531 } 2532 2533 int 2534 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2535 char *buf, enum uio_seg bufseg, size_t count) 2536 { 2537 struct vnode *vp; 2538 struct iovec aiov; 2539 struct uio auio; 2540 struct nameidata nd; 2541 int error; 2542 2543 if (count > IOSIZE_MAX) 2544 return (EINVAL); 2545 2546 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2547 pathseg, path, fd, td); 2548 2549 if ((error = namei(&nd)) != 0) 2550 return (error); 2551 NDFREE(&nd, NDF_ONLY_PNBUF); 2552 vp = nd.ni_vp; 2553 #ifdef MAC 2554 error = mac_vnode_check_readlink(td->td_ucred, vp); 2555 if (error != 0) { 2556 vput(vp); 2557 return (error); 2558 } 2559 #endif 2560 if (vp->v_type != VLNK) 2561 error = EINVAL; 2562 else { 2563 aiov.iov_base = buf; 2564 aiov.iov_len = count; 2565 auio.uio_iov = &aiov; 2566 auio.uio_iovcnt = 1; 2567 auio.uio_offset = 0; 2568 auio.uio_rw = UIO_READ; 2569 auio.uio_segflg = bufseg; 2570 auio.uio_td = td; 2571 auio.uio_resid = count; 2572 error = VOP_READLINK(vp, &auio, td->td_ucred); 2573 td->td_retval[0] = count - auio.uio_resid; 2574 } 2575 vput(vp); 2576 return (error); 2577 } 2578 2579 /* 2580 * Common implementation code for chflags() and fchflags(). 2581 */ 2582 static int 2583 setfflags(td, vp, flags) 2584 struct thread *td; 2585 struct vnode *vp; 2586 u_long flags; 2587 { 2588 struct mount *mp; 2589 struct vattr vattr; 2590 int error; 2591 2592 /* We can't support the value matching VNOVAL. */ 2593 if (flags == VNOVAL) 2594 return (EOPNOTSUPP); 2595 2596 /* 2597 * Prevent non-root users from setting flags on devices. When 2598 * a device is reused, users can retain ownership of the device 2599 * if they are allowed to set flags and programs assume that 2600 * chown can't fail when done as root. 2601 */ 2602 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2603 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2604 if (error != 0) 2605 return (error); 2606 } 2607 2608 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2609 return (error); 2610 VATTR_NULL(&vattr); 2611 vattr.va_flags = flags; 2612 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2613 #ifdef MAC 2614 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2615 if (error == 0) 2616 #endif 2617 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2618 VOP_UNLOCK(vp, 0); 2619 vn_finished_write(mp); 2620 return (error); 2621 } 2622 2623 /* 2624 * Change flags of a file given a path name. 2625 */ 2626 #ifndef _SYS_SYSPROTO_H_ 2627 struct chflags_args { 2628 const char *path; 2629 u_long flags; 2630 }; 2631 #endif 2632 int 2633 sys_chflags(td, uap) 2634 struct thread *td; 2635 register struct chflags_args /* { 2636 const char *path; 2637 u_long flags; 2638 } */ *uap; 2639 { 2640 2641 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2642 } 2643 2644 #ifndef _SYS_SYSPROTO_H_ 2645 struct chflagsat_args { 2646 int fd; 2647 const char *path; 2648 u_long flags; 2649 int atflag; 2650 } 2651 #endif 2652 int 2653 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2654 { 2655 int fd = uap->fd; 2656 const char *path = uap->path; 2657 u_long flags = uap->flags; 2658 int atflag = uap->atflag; 2659 2660 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2661 return (EINVAL); 2662 2663 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2664 } 2665 2666 static int 2667 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2668 u_long flags) 2669 { 2670 2671 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2672 } 2673 2674 /* 2675 * Same as chflags() but doesn't follow symlinks. 2676 */ 2677 int 2678 sys_lchflags(td, uap) 2679 struct thread *td; 2680 register struct lchflags_args /* { 2681 const char *path; 2682 u_long flags; 2683 } */ *uap; 2684 { 2685 2686 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2687 uap->flags, AT_SYMLINK_NOFOLLOW)); 2688 } 2689 2690 static int 2691 kern_chflagsat(struct thread *td, int fd, const char *path, 2692 enum uio_seg pathseg, u_long flags, int atflag) 2693 { 2694 struct nameidata nd; 2695 cap_rights_t rights; 2696 int error, follow; 2697 2698 AUDIT_ARG_FFLAGS(flags); 2699 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2700 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2701 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2702 if ((error = namei(&nd)) != 0) 2703 return (error); 2704 NDFREE(&nd, NDF_ONLY_PNBUF); 2705 error = setfflags(td, nd.ni_vp, flags); 2706 vrele(nd.ni_vp); 2707 return (error); 2708 } 2709 2710 /* 2711 * Change flags of a file given a file descriptor. 2712 */ 2713 #ifndef _SYS_SYSPROTO_H_ 2714 struct fchflags_args { 2715 int fd; 2716 u_long flags; 2717 }; 2718 #endif 2719 int 2720 sys_fchflags(td, uap) 2721 struct thread *td; 2722 register struct fchflags_args /* { 2723 int fd; 2724 u_long flags; 2725 } */ *uap; 2726 { 2727 struct file *fp; 2728 cap_rights_t rights; 2729 int error; 2730 2731 AUDIT_ARG_FD(uap->fd); 2732 AUDIT_ARG_FFLAGS(uap->flags); 2733 error = getvnode(td->td_proc->p_fd, uap->fd, 2734 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2735 if (error != 0) 2736 return (error); 2737 #ifdef AUDIT 2738 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2739 AUDIT_ARG_VNODE1(fp->f_vnode); 2740 VOP_UNLOCK(fp->f_vnode, 0); 2741 #endif 2742 error = setfflags(td, fp->f_vnode, uap->flags); 2743 fdrop(fp, td); 2744 return (error); 2745 } 2746 2747 /* 2748 * Common implementation code for chmod(), lchmod() and fchmod(). 2749 */ 2750 int 2751 setfmode(td, cred, vp, mode) 2752 struct thread *td; 2753 struct ucred *cred; 2754 struct vnode *vp; 2755 int mode; 2756 { 2757 struct mount *mp; 2758 struct vattr vattr; 2759 int error; 2760 2761 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2762 return (error); 2763 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2764 VATTR_NULL(&vattr); 2765 vattr.va_mode = mode & ALLPERMS; 2766 #ifdef MAC 2767 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2768 if (error == 0) 2769 #endif 2770 error = VOP_SETATTR(vp, &vattr, cred); 2771 VOP_UNLOCK(vp, 0); 2772 vn_finished_write(mp); 2773 return (error); 2774 } 2775 2776 /* 2777 * Change mode of a file given path name. 2778 */ 2779 #ifndef _SYS_SYSPROTO_H_ 2780 struct chmod_args { 2781 char *path; 2782 int mode; 2783 }; 2784 #endif 2785 int 2786 sys_chmod(td, uap) 2787 struct thread *td; 2788 register struct chmod_args /* { 2789 char *path; 2790 int mode; 2791 } */ *uap; 2792 { 2793 2794 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2795 } 2796 2797 #ifndef _SYS_SYSPROTO_H_ 2798 struct fchmodat_args { 2799 int dirfd; 2800 char *path; 2801 mode_t mode; 2802 int flag; 2803 } 2804 #endif 2805 int 2806 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2807 { 2808 int flag = uap->flag; 2809 int fd = uap->fd; 2810 char *path = uap->path; 2811 mode_t mode = uap->mode; 2812 2813 if (flag & ~AT_SYMLINK_NOFOLLOW) 2814 return (EINVAL); 2815 2816 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2817 } 2818 2819 int 2820 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2821 { 2822 2823 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2824 } 2825 2826 /* 2827 * Change mode of a file given path name (don't follow links.) 2828 */ 2829 #ifndef _SYS_SYSPROTO_H_ 2830 struct lchmod_args { 2831 char *path; 2832 int mode; 2833 }; 2834 #endif 2835 int 2836 sys_lchmod(td, uap) 2837 struct thread *td; 2838 register struct lchmod_args /* { 2839 char *path; 2840 int mode; 2841 } */ *uap; 2842 { 2843 2844 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2845 uap->mode, AT_SYMLINK_NOFOLLOW)); 2846 } 2847 2848 int 2849 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2850 mode_t mode, int flag) 2851 { 2852 struct nameidata nd; 2853 cap_rights_t rights; 2854 int error, follow; 2855 2856 AUDIT_ARG_MODE(mode); 2857 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2858 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2859 cap_rights_init(&rights, CAP_FCHMOD), td); 2860 if ((error = namei(&nd)) != 0) 2861 return (error); 2862 NDFREE(&nd, NDF_ONLY_PNBUF); 2863 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2864 vrele(nd.ni_vp); 2865 return (error); 2866 } 2867 2868 /* 2869 * Change mode of a file given a file descriptor. 2870 */ 2871 #ifndef _SYS_SYSPROTO_H_ 2872 struct fchmod_args { 2873 int fd; 2874 int mode; 2875 }; 2876 #endif 2877 int 2878 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2879 { 2880 struct file *fp; 2881 cap_rights_t rights; 2882 int error; 2883 2884 AUDIT_ARG_FD(uap->fd); 2885 AUDIT_ARG_MODE(uap->mode); 2886 2887 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2888 if (error != 0) 2889 return (error); 2890 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2891 fdrop(fp, td); 2892 return (error); 2893 } 2894 2895 /* 2896 * Common implementation for chown(), lchown(), and fchown() 2897 */ 2898 int 2899 setfown(td, cred, vp, uid, gid) 2900 struct thread *td; 2901 struct ucred *cred; 2902 struct vnode *vp; 2903 uid_t uid; 2904 gid_t gid; 2905 { 2906 struct mount *mp; 2907 struct vattr vattr; 2908 int error; 2909 2910 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2911 return (error); 2912 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2913 VATTR_NULL(&vattr); 2914 vattr.va_uid = uid; 2915 vattr.va_gid = gid; 2916 #ifdef MAC 2917 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2918 vattr.va_gid); 2919 if (error == 0) 2920 #endif 2921 error = VOP_SETATTR(vp, &vattr, cred); 2922 VOP_UNLOCK(vp, 0); 2923 vn_finished_write(mp); 2924 return (error); 2925 } 2926 2927 /* 2928 * Set ownership given a path name. 2929 */ 2930 #ifndef _SYS_SYSPROTO_H_ 2931 struct chown_args { 2932 char *path; 2933 int uid; 2934 int gid; 2935 }; 2936 #endif 2937 int 2938 sys_chown(td, uap) 2939 struct thread *td; 2940 register struct chown_args /* { 2941 char *path; 2942 int uid; 2943 int gid; 2944 } */ *uap; 2945 { 2946 2947 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2948 } 2949 2950 #ifndef _SYS_SYSPROTO_H_ 2951 struct fchownat_args { 2952 int fd; 2953 const char * path; 2954 uid_t uid; 2955 gid_t gid; 2956 int flag; 2957 }; 2958 #endif 2959 int 2960 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2961 { 2962 int flag; 2963 2964 flag = uap->flag; 2965 if (flag & ~AT_SYMLINK_NOFOLLOW) 2966 return (EINVAL); 2967 2968 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2969 uap->gid, uap->flag)); 2970 } 2971 2972 int 2973 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2974 int gid) 2975 { 2976 2977 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2978 } 2979 2980 int 2981 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2982 int uid, int gid, int flag) 2983 { 2984 struct nameidata nd; 2985 cap_rights_t rights; 2986 int error, follow; 2987 2988 AUDIT_ARG_OWNER(uid, gid); 2989 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2990 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2991 cap_rights_init(&rights, CAP_FCHOWN), td); 2992 2993 if ((error = namei(&nd)) != 0) 2994 return (error); 2995 NDFREE(&nd, NDF_ONLY_PNBUF); 2996 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2997 vrele(nd.ni_vp); 2998 return (error); 2999 } 3000 3001 /* 3002 * Set ownership given a path name, do not cross symlinks. 3003 */ 3004 #ifndef _SYS_SYSPROTO_H_ 3005 struct lchown_args { 3006 char *path; 3007 int uid; 3008 int gid; 3009 }; 3010 #endif 3011 int 3012 sys_lchown(td, uap) 3013 struct thread *td; 3014 register struct lchown_args /* { 3015 char *path; 3016 int uid; 3017 int gid; 3018 } */ *uap; 3019 { 3020 3021 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3022 } 3023 3024 int 3025 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3026 int gid) 3027 { 3028 3029 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3030 AT_SYMLINK_NOFOLLOW)); 3031 } 3032 3033 /* 3034 * Set ownership given a file descriptor. 3035 */ 3036 #ifndef _SYS_SYSPROTO_H_ 3037 struct fchown_args { 3038 int fd; 3039 int uid; 3040 int gid; 3041 }; 3042 #endif 3043 int 3044 sys_fchown(td, uap) 3045 struct thread *td; 3046 register struct fchown_args /* { 3047 int fd; 3048 int uid; 3049 int gid; 3050 } */ *uap; 3051 { 3052 struct file *fp; 3053 cap_rights_t rights; 3054 int error; 3055 3056 AUDIT_ARG_FD(uap->fd); 3057 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3058 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 3059 if (error != 0) 3060 return (error); 3061 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3062 fdrop(fp, td); 3063 return (error); 3064 } 3065 3066 /* 3067 * Common implementation code for utimes(), lutimes(), and futimes(). 3068 */ 3069 static int 3070 getutimes(usrtvp, tvpseg, tsp) 3071 const struct timeval *usrtvp; 3072 enum uio_seg tvpseg; 3073 struct timespec *tsp; 3074 { 3075 struct timeval tv[2]; 3076 const struct timeval *tvp; 3077 int error; 3078 3079 if (usrtvp == NULL) { 3080 vfs_timestamp(&tsp[0]); 3081 tsp[1] = tsp[0]; 3082 } else { 3083 if (tvpseg == UIO_SYSSPACE) { 3084 tvp = usrtvp; 3085 } else { 3086 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3087 return (error); 3088 tvp = tv; 3089 } 3090 3091 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3092 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3093 return (EINVAL); 3094 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3095 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3096 } 3097 return (0); 3098 } 3099 3100 /* 3101 * Common implementation code for utimes(), lutimes(), and futimes(). 3102 */ 3103 static int 3104 setutimes(td, vp, ts, numtimes, nullflag) 3105 struct thread *td; 3106 struct vnode *vp; 3107 const struct timespec *ts; 3108 int numtimes; 3109 int nullflag; 3110 { 3111 struct mount *mp; 3112 struct vattr vattr; 3113 int error, setbirthtime; 3114 3115 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3116 return (error); 3117 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3118 setbirthtime = 0; 3119 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3120 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3121 setbirthtime = 1; 3122 VATTR_NULL(&vattr); 3123 vattr.va_atime = ts[0]; 3124 vattr.va_mtime = ts[1]; 3125 if (setbirthtime) 3126 vattr.va_birthtime = ts[1]; 3127 if (numtimes > 2) 3128 vattr.va_birthtime = ts[2]; 3129 if (nullflag) 3130 vattr.va_vaflags |= VA_UTIMES_NULL; 3131 #ifdef MAC 3132 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3133 vattr.va_mtime); 3134 #endif 3135 if (error == 0) 3136 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3137 VOP_UNLOCK(vp, 0); 3138 vn_finished_write(mp); 3139 return (error); 3140 } 3141 3142 /* 3143 * Set the access and modification times of a file. 3144 */ 3145 #ifndef _SYS_SYSPROTO_H_ 3146 struct utimes_args { 3147 char *path; 3148 struct timeval *tptr; 3149 }; 3150 #endif 3151 int 3152 sys_utimes(td, uap) 3153 struct thread *td; 3154 register struct utimes_args /* { 3155 char *path; 3156 struct timeval *tptr; 3157 } */ *uap; 3158 { 3159 3160 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3161 UIO_USERSPACE)); 3162 } 3163 3164 #ifndef _SYS_SYSPROTO_H_ 3165 struct futimesat_args { 3166 int fd; 3167 const char * path; 3168 const struct timeval * times; 3169 }; 3170 #endif 3171 int 3172 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3173 { 3174 3175 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3176 uap->times, UIO_USERSPACE)); 3177 } 3178 3179 int 3180 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3181 struct timeval *tptr, enum uio_seg tptrseg) 3182 { 3183 3184 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3185 } 3186 3187 int 3188 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3189 struct timeval *tptr, enum uio_seg tptrseg) 3190 { 3191 struct nameidata nd; 3192 struct timespec ts[2]; 3193 cap_rights_t rights; 3194 int error; 3195 3196 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3197 return (error); 3198 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3199 cap_rights_init(&rights, CAP_FUTIMES), td); 3200 3201 if ((error = namei(&nd)) != 0) 3202 return (error); 3203 NDFREE(&nd, NDF_ONLY_PNBUF); 3204 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3205 vrele(nd.ni_vp); 3206 return (error); 3207 } 3208 3209 /* 3210 * Set the access and modification times of a file. 3211 */ 3212 #ifndef _SYS_SYSPROTO_H_ 3213 struct lutimes_args { 3214 char *path; 3215 struct timeval *tptr; 3216 }; 3217 #endif 3218 int 3219 sys_lutimes(td, uap) 3220 struct thread *td; 3221 register struct lutimes_args /* { 3222 char *path; 3223 struct timeval *tptr; 3224 } */ *uap; 3225 { 3226 3227 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3228 UIO_USERSPACE)); 3229 } 3230 3231 int 3232 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3233 struct timeval *tptr, enum uio_seg tptrseg) 3234 { 3235 struct timespec ts[2]; 3236 struct nameidata nd; 3237 int error; 3238 3239 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3240 return (error); 3241 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3242 if ((error = namei(&nd)) != 0) 3243 return (error); 3244 NDFREE(&nd, NDF_ONLY_PNBUF); 3245 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3246 vrele(nd.ni_vp); 3247 return (error); 3248 } 3249 3250 /* 3251 * Set the access and modification times of a file. 3252 */ 3253 #ifndef _SYS_SYSPROTO_H_ 3254 struct futimes_args { 3255 int fd; 3256 struct timeval *tptr; 3257 }; 3258 #endif 3259 int 3260 sys_futimes(td, uap) 3261 struct thread *td; 3262 register struct futimes_args /* { 3263 int fd; 3264 struct timeval *tptr; 3265 } */ *uap; 3266 { 3267 3268 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3269 } 3270 3271 int 3272 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3273 enum uio_seg tptrseg) 3274 { 3275 struct timespec ts[2]; 3276 struct file *fp; 3277 cap_rights_t rights; 3278 int error; 3279 3280 AUDIT_ARG_FD(fd); 3281 error = getutimes(tptr, tptrseg, ts); 3282 if (error != 0) 3283 return (error); 3284 error = getvnode(td->td_proc->p_fd, fd, 3285 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3286 if (error != 0) 3287 return (error); 3288 #ifdef AUDIT 3289 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3290 AUDIT_ARG_VNODE1(fp->f_vnode); 3291 VOP_UNLOCK(fp->f_vnode, 0); 3292 #endif 3293 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3294 fdrop(fp, td); 3295 return (error); 3296 } 3297 3298 /* 3299 * Truncate a file given its path name. 3300 */ 3301 #ifndef _SYS_SYSPROTO_H_ 3302 struct truncate_args { 3303 char *path; 3304 int pad; 3305 off_t length; 3306 }; 3307 #endif 3308 int 3309 sys_truncate(td, uap) 3310 struct thread *td; 3311 register struct truncate_args /* { 3312 char *path; 3313 int pad; 3314 off_t length; 3315 } */ *uap; 3316 { 3317 3318 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3319 } 3320 3321 int 3322 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3323 { 3324 struct mount *mp; 3325 struct vnode *vp; 3326 void *rl_cookie; 3327 struct vattr vattr; 3328 struct nameidata nd; 3329 int error; 3330 3331 if (length < 0) 3332 return(EINVAL); 3333 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3334 if ((error = namei(&nd)) != 0) 3335 return (error); 3336 vp = nd.ni_vp; 3337 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3338 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3339 vn_rangelock_unlock(vp, rl_cookie); 3340 vrele(vp); 3341 return (error); 3342 } 3343 NDFREE(&nd, NDF_ONLY_PNBUF); 3344 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3345 if (vp->v_type == VDIR) 3346 error = EISDIR; 3347 #ifdef MAC 3348 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3349 } 3350 #endif 3351 else if ((error = vn_writechk(vp)) == 0 && 3352 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3353 VATTR_NULL(&vattr); 3354 vattr.va_size = length; 3355 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3356 } 3357 VOP_UNLOCK(vp, 0); 3358 vn_finished_write(mp); 3359 vn_rangelock_unlock(vp, rl_cookie); 3360 vrele(vp); 3361 return (error); 3362 } 3363 3364 #if defined(COMPAT_43) 3365 /* 3366 * Truncate a file given its path name. 3367 */ 3368 #ifndef _SYS_SYSPROTO_H_ 3369 struct otruncate_args { 3370 char *path; 3371 long length; 3372 }; 3373 #endif 3374 int 3375 otruncate(td, uap) 3376 struct thread *td; 3377 register struct otruncate_args /* { 3378 char *path; 3379 long length; 3380 } */ *uap; 3381 { 3382 struct truncate_args /* { 3383 char *path; 3384 int pad; 3385 off_t length; 3386 } */ nuap; 3387 3388 nuap.path = uap->path; 3389 nuap.length = uap->length; 3390 return (sys_truncate(td, &nuap)); 3391 } 3392 #endif /* COMPAT_43 */ 3393 3394 /* Versions with the pad argument */ 3395 int 3396 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3397 { 3398 struct truncate_args ouap; 3399 3400 ouap.path = uap->path; 3401 ouap.length = uap->length; 3402 return (sys_truncate(td, &ouap)); 3403 } 3404 3405 int 3406 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3407 { 3408 struct ftruncate_args ouap; 3409 3410 ouap.fd = uap->fd; 3411 ouap.length = uap->length; 3412 return (sys_ftruncate(td, &ouap)); 3413 } 3414 3415 /* 3416 * Sync an open file. 3417 */ 3418 #ifndef _SYS_SYSPROTO_H_ 3419 struct fsync_args { 3420 int fd; 3421 }; 3422 #endif 3423 int 3424 sys_fsync(td, uap) 3425 struct thread *td; 3426 struct fsync_args /* { 3427 int fd; 3428 } */ *uap; 3429 { 3430 struct vnode *vp; 3431 struct mount *mp; 3432 struct file *fp; 3433 cap_rights_t rights; 3434 int error, lock_flags; 3435 3436 AUDIT_ARG_FD(uap->fd); 3437 error = getvnode(td->td_proc->p_fd, uap->fd, 3438 cap_rights_init(&rights, CAP_FSYNC), &fp); 3439 if (error != 0) 3440 return (error); 3441 vp = fp->f_vnode; 3442 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3443 if (error != 0) 3444 goto drop; 3445 if (MNT_SHARED_WRITES(mp) || 3446 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3447 lock_flags = LK_SHARED; 3448 } else { 3449 lock_flags = LK_EXCLUSIVE; 3450 } 3451 vn_lock(vp, lock_flags | LK_RETRY); 3452 AUDIT_ARG_VNODE1(vp); 3453 if (vp->v_object != NULL) { 3454 VM_OBJECT_WLOCK(vp->v_object); 3455 vm_object_page_clean(vp->v_object, 0, 0, 0); 3456 VM_OBJECT_WUNLOCK(vp->v_object); 3457 } 3458 error = VOP_FSYNC(vp, MNT_WAIT, td); 3459 3460 VOP_UNLOCK(vp, 0); 3461 vn_finished_write(mp); 3462 drop: 3463 fdrop(fp, td); 3464 return (error); 3465 } 3466 3467 /* 3468 * Rename files. Source and destination must either both be directories, or 3469 * both not be directories. If target is a directory, it must be empty. 3470 */ 3471 #ifndef _SYS_SYSPROTO_H_ 3472 struct rename_args { 3473 char *from; 3474 char *to; 3475 }; 3476 #endif 3477 int 3478 sys_rename(td, uap) 3479 struct thread *td; 3480 register struct rename_args /* { 3481 char *from; 3482 char *to; 3483 } */ *uap; 3484 { 3485 3486 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3487 } 3488 3489 #ifndef _SYS_SYSPROTO_H_ 3490 struct renameat_args { 3491 int oldfd; 3492 char *old; 3493 int newfd; 3494 char *new; 3495 }; 3496 #endif 3497 int 3498 sys_renameat(struct thread *td, struct renameat_args *uap) 3499 { 3500 3501 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3502 UIO_USERSPACE)); 3503 } 3504 3505 int 3506 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3507 { 3508 3509 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3510 } 3511 3512 int 3513 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3514 enum uio_seg pathseg) 3515 { 3516 struct mount *mp = NULL; 3517 struct vnode *tvp, *fvp, *tdvp; 3518 struct nameidata fromnd, tond; 3519 cap_rights_t rights; 3520 int error; 3521 3522 bwillwrite(); 3523 #ifdef MAC 3524 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3525 AUDITVNODE1, pathseg, old, oldfd, 3526 cap_rights_init(&rights, CAP_RENAMEAT), td); 3527 #else 3528 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3529 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3530 #endif 3531 3532 if ((error = namei(&fromnd)) != 0) 3533 return (error); 3534 #ifdef MAC 3535 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3536 fromnd.ni_vp, &fromnd.ni_cnd); 3537 VOP_UNLOCK(fromnd.ni_dvp, 0); 3538 if (fromnd.ni_dvp != fromnd.ni_vp) 3539 VOP_UNLOCK(fromnd.ni_vp, 0); 3540 #endif 3541 fvp = fromnd.ni_vp; 3542 if (error == 0) 3543 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); 3544 if (error != 0) { 3545 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3546 vrele(fromnd.ni_dvp); 3547 vrele(fvp); 3548 goto out1; 3549 } 3550 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3551 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3552 cap_rights_init(&rights, CAP_LINKAT), td); 3553 if (fromnd.ni_vp->v_type == VDIR) 3554 tond.ni_cnd.cn_flags |= WILLBEDIR; 3555 if ((error = namei(&tond)) != 0) { 3556 /* Translate error code for rename("dir1", "dir2/."). */ 3557 if (error == EISDIR && fvp->v_type == VDIR) 3558 error = EINVAL; 3559 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3560 vrele(fromnd.ni_dvp); 3561 vrele(fvp); 3562 vn_finished_write(mp); 3563 goto out1; 3564 } 3565 tdvp = tond.ni_dvp; 3566 tvp = tond.ni_vp; 3567 if (tvp != NULL) { 3568 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3569 error = ENOTDIR; 3570 goto out; 3571 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3572 error = EISDIR; 3573 goto out; 3574 } 3575 #ifdef CAPABILITIES 3576 if (newfd != AT_FDCWD) { 3577 /* 3578 * If the target already exists we require CAP_UNLINKAT 3579 * from 'newfd'. 3580 */ 3581 error = cap_check(&tond.ni_filecaps.fc_rights, 3582 cap_rights_init(&rights, CAP_UNLINKAT)); 3583 if (error != 0) 3584 goto out; 3585 } 3586 #endif 3587 } 3588 if (fvp == tdvp) { 3589 error = EINVAL; 3590 goto out; 3591 } 3592 /* 3593 * If the source is the same as the destination (that is, if they 3594 * are links to the same vnode), then there is nothing to do. 3595 */ 3596 if (fvp == tvp) 3597 error = -1; 3598 #ifdef MAC 3599 else 3600 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3601 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3602 #endif 3603 out: 3604 if (error == 0) { 3605 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3606 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3607 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3608 NDFREE(&tond, NDF_ONLY_PNBUF); 3609 } else { 3610 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3611 NDFREE(&tond, NDF_ONLY_PNBUF); 3612 if (tvp != NULL) 3613 vput(tvp); 3614 if (tdvp == tvp) 3615 vrele(tdvp); 3616 else 3617 vput(tdvp); 3618 vrele(fromnd.ni_dvp); 3619 vrele(fvp); 3620 } 3621 vrele(tond.ni_startdir); 3622 vn_finished_write(mp); 3623 out1: 3624 if (fromnd.ni_startdir) 3625 vrele(fromnd.ni_startdir); 3626 if (error == -1) 3627 return (0); 3628 return (error); 3629 } 3630 3631 /* 3632 * Make a directory file. 3633 */ 3634 #ifndef _SYS_SYSPROTO_H_ 3635 struct mkdir_args { 3636 char *path; 3637 int mode; 3638 }; 3639 #endif 3640 int 3641 sys_mkdir(td, uap) 3642 struct thread *td; 3643 register struct mkdir_args /* { 3644 char *path; 3645 int mode; 3646 } */ *uap; 3647 { 3648 3649 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3650 } 3651 3652 #ifndef _SYS_SYSPROTO_H_ 3653 struct mkdirat_args { 3654 int fd; 3655 char *path; 3656 mode_t mode; 3657 }; 3658 #endif 3659 int 3660 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3661 { 3662 3663 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3664 } 3665 3666 int 3667 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3668 { 3669 3670 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3671 } 3672 3673 int 3674 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3675 int mode) 3676 { 3677 struct mount *mp; 3678 struct vnode *vp; 3679 struct vattr vattr; 3680 struct nameidata nd; 3681 cap_rights_t rights; 3682 int error; 3683 3684 AUDIT_ARG_MODE(mode); 3685 restart: 3686 bwillwrite(); 3687 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 3688 segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td); 3689 nd.ni_cnd.cn_flags |= WILLBEDIR; 3690 if ((error = namei(&nd)) != 0) 3691 return (error); 3692 vp = nd.ni_vp; 3693 if (vp != NULL) { 3694 NDFREE(&nd, NDF_ONLY_PNBUF); 3695 /* 3696 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3697 * the strange behaviour of leaving the vnode unlocked 3698 * if the target is the same vnode as the parent. 3699 */ 3700 if (vp == nd.ni_dvp) 3701 vrele(nd.ni_dvp); 3702 else 3703 vput(nd.ni_dvp); 3704 vrele(vp); 3705 return (EEXIST); 3706 } 3707 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3708 NDFREE(&nd, NDF_ONLY_PNBUF); 3709 vput(nd.ni_dvp); 3710 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3711 return (error); 3712 goto restart; 3713 } 3714 VATTR_NULL(&vattr); 3715 vattr.va_type = VDIR; 3716 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3717 #ifdef MAC 3718 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3719 &vattr); 3720 if (error != 0) 3721 goto out; 3722 #endif 3723 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3724 #ifdef MAC 3725 out: 3726 #endif 3727 NDFREE(&nd, NDF_ONLY_PNBUF); 3728 vput(nd.ni_dvp); 3729 if (error == 0) 3730 vput(nd.ni_vp); 3731 vn_finished_write(mp); 3732 return (error); 3733 } 3734 3735 /* 3736 * Remove a directory file. 3737 */ 3738 #ifndef _SYS_SYSPROTO_H_ 3739 struct rmdir_args { 3740 char *path; 3741 }; 3742 #endif 3743 int 3744 sys_rmdir(td, uap) 3745 struct thread *td; 3746 struct rmdir_args /* { 3747 char *path; 3748 } */ *uap; 3749 { 3750 3751 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3752 } 3753 3754 int 3755 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3756 { 3757 3758 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3759 } 3760 3761 int 3762 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3763 { 3764 struct mount *mp; 3765 struct vnode *vp; 3766 struct nameidata nd; 3767 cap_rights_t rights; 3768 int error; 3769 3770 restart: 3771 bwillwrite(); 3772 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3773 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3774 if ((error = namei(&nd)) != 0) 3775 return (error); 3776 vp = nd.ni_vp; 3777 if (vp->v_type != VDIR) { 3778 error = ENOTDIR; 3779 goto out; 3780 } 3781 /* 3782 * No rmdir "." please. 3783 */ 3784 if (nd.ni_dvp == vp) { 3785 error = EINVAL; 3786 goto out; 3787 } 3788 /* 3789 * The root of a mounted filesystem cannot be deleted. 3790 */ 3791 if (vp->v_vflag & VV_ROOT) { 3792 error = EBUSY; 3793 goto out; 3794 } 3795 #ifdef MAC 3796 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3797 &nd.ni_cnd); 3798 if (error != 0) 3799 goto out; 3800 #endif 3801 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3802 NDFREE(&nd, NDF_ONLY_PNBUF); 3803 vput(vp); 3804 if (nd.ni_dvp == vp) 3805 vrele(nd.ni_dvp); 3806 else 3807 vput(nd.ni_dvp); 3808 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3809 return (error); 3810 goto restart; 3811 } 3812 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3813 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3814 vn_finished_write(mp); 3815 out: 3816 NDFREE(&nd, NDF_ONLY_PNBUF); 3817 vput(vp); 3818 if (nd.ni_dvp == vp) 3819 vrele(nd.ni_dvp); 3820 else 3821 vput(nd.ni_dvp); 3822 return (error); 3823 } 3824 3825 #ifdef COMPAT_43 3826 /* 3827 * Read a block of directory entries in a filesystem independent format. 3828 */ 3829 #ifndef _SYS_SYSPROTO_H_ 3830 struct ogetdirentries_args { 3831 int fd; 3832 char *buf; 3833 u_int count; 3834 long *basep; 3835 }; 3836 #endif 3837 int 3838 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3839 { 3840 long loff; 3841 int error; 3842 3843 error = kern_ogetdirentries(td, uap, &loff); 3844 if (error == 0) 3845 error = copyout(&loff, uap->basep, sizeof(long)); 3846 return (error); 3847 } 3848 3849 int 3850 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3851 long *ploff) 3852 { 3853 struct vnode *vp; 3854 struct file *fp; 3855 struct uio auio, kuio; 3856 struct iovec aiov, kiov; 3857 struct dirent *dp, *edp; 3858 cap_rights_t rights; 3859 caddr_t dirbuf; 3860 int error, eofflag, readcnt; 3861 long loff; 3862 off_t foffset; 3863 3864 /* XXX arbitrary sanity limit on `count'. */ 3865 if (uap->count > 64 * 1024) 3866 return (EINVAL); 3867 error = getvnode(td->td_proc->p_fd, uap->fd, 3868 cap_rights_init(&rights, CAP_READ), &fp); 3869 if (error != 0) 3870 return (error); 3871 if ((fp->f_flag & FREAD) == 0) { 3872 fdrop(fp, td); 3873 return (EBADF); 3874 } 3875 vp = fp->f_vnode; 3876 foffset = foffset_lock(fp, 0); 3877 unionread: 3878 if (vp->v_type != VDIR) { 3879 foffset_unlock(fp, foffset, 0); 3880 fdrop(fp, td); 3881 return (EINVAL); 3882 } 3883 aiov.iov_base = uap->buf; 3884 aiov.iov_len = uap->count; 3885 auio.uio_iov = &aiov; 3886 auio.uio_iovcnt = 1; 3887 auio.uio_rw = UIO_READ; 3888 auio.uio_segflg = UIO_USERSPACE; 3889 auio.uio_td = td; 3890 auio.uio_resid = uap->count; 3891 vn_lock(vp, LK_SHARED | LK_RETRY); 3892 loff = auio.uio_offset = foffset; 3893 #ifdef MAC 3894 error = mac_vnode_check_readdir(td->td_ucred, vp); 3895 if (error != 0) { 3896 VOP_UNLOCK(vp, 0); 3897 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3898 fdrop(fp, td); 3899 return (error); 3900 } 3901 #endif 3902 # if (BYTE_ORDER != LITTLE_ENDIAN) 3903 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3904 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3905 NULL, NULL); 3906 foffset = auio.uio_offset; 3907 } else 3908 # endif 3909 { 3910 kuio = auio; 3911 kuio.uio_iov = &kiov; 3912 kuio.uio_segflg = UIO_SYSSPACE; 3913 kiov.iov_len = uap->count; 3914 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3915 kiov.iov_base = dirbuf; 3916 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3917 NULL, NULL); 3918 foffset = kuio.uio_offset; 3919 if (error == 0) { 3920 readcnt = uap->count - kuio.uio_resid; 3921 edp = (struct dirent *)&dirbuf[readcnt]; 3922 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3923 # if (BYTE_ORDER == LITTLE_ENDIAN) 3924 /* 3925 * The expected low byte of 3926 * dp->d_namlen is our dp->d_type. 3927 * The high MBZ byte of dp->d_namlen 3928 * is our dp->d_namlen. 3929 */ 3930 dp->d_type = dp->d_namlen; 3931 dp->d_namlen = 0; 3932 # else 3933 /* 3934 * The dp->d_type is the high byte 3935 * of the expected dp->d_namlen, 3936 * so must be zero'ed. 3937 */ 3938 dp->d_type = 0; 3939 # endif 3940 if (dp->d_reclen > 0) { 3941 dp = (struct dirent *) 3942 ((char *)dp + dp->d_reclen); 3943 } else { 3944 error = EIO; 3945 break; 3946 } 3947 } 3948 if (dp >= edp) 3949 error = uiomove(dirbuf, readcnt, &auio); 3950 } 3951 free(dirbuf, M_TEMP); 3952 } 3953 if (error != 0) { 3954 VOP_UNLOCK(vp, 0); 3955 foffset_unlock(fp, foffset, 0); 3956 fdrop(fp, td); 3957 return (error); 3958 } 3959 if (uap->count == auio.uio_resid && 3960 (vp->v_vflag & VV_ROOT) && 3961 (vp->v_mount->mnt_flag & MNT_UNION)) { 3962 struct vnode *tvp = vp; 3963 vp = vp->v_mount->mnt_vnodecovered; 3964 VREF(vp); 3965 fp->f_vnode = vp; 3966 fp->f_data = vp; 3967 foffset = 0; 3968 vput(tvp); 3969 goto unionread; 3970 } 3971 VOP_UNLOCK(vp, 0); 3972 foffset_unlock(fp, foffset, 0); 3973 fdrop(fp, td); 3974 td->td_retval[0] = uap->count - auio.uio_resid; 3975 if (error == 0) 3976 *ploff = loff; 3977 return (error); 3978 } 3979 #endif /* COMPAT_43 */ 3980 3981 /* 3982 * Read a block of directory entries in a filesystem independent format. 3983 */ 3984 #ifndef _SYS_SYSPROTO_H_ 3985 struct getdirentries_args { 3986 int fd; 3987 char *buf; 3988 u_int count; 3989 long *basep; 3990 }; 3991 #endif 3992 int 3993 sys_getdirentries(td, uap) 3994 struct thread *td; 3995 register struct getdirentries_args /* { 3996 int fd; 3997 char *buf; 3998 u_int count; 3999 long *basep; 4000 } */ *uap; 4001 { 4002 long base; 4003 int error; 4004 4005 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4006 NULL, UIO_USERSPACE); 4007 if (error != 0) 4008 return (error); 4009 if (uap->basep != NULL) 4010 error = copyout(&base, uap->basep, sizeof(long)); 4011 return (error); 4012 } 4013 4014 int 4015 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4016 long *basep, ssize_t *residp, enum uio_seg bufseg) 4017 { 4018 struct vnode *vp; 4019 struct file *fp; 4020 struct uio auio; 4021 struct iovec aiov; 4022 cap_rights_t rights; 4023 long loff; 4024 int error, eofflag; 4025 off_t foffset; 4026 4027 AUDIT_ARG_FD(fd); 4028 if (count > IOSIZE_MAX) 4029 return (EINVAL); 4030 auio.uio_resid = count; 4031 error = getvnode(td->td_proc->p_fd, fd, 4032 cap_rights_init(&rights, CAP_READ), &fp); 4033 if (error != 0) 4034 return (error); 4035 if ((fp->f_flag & FREAD) == 0) { 4036 fdrop(fp, td); 4037 return (EBADF); 4038 } 4039 vp = fp->f_vnode; 4040 foffset = foffset_lock(fp, 0); 4041 unionread: 4042 if (vp->v_type != VDIR) { 4043 error = EINVAL; 4044 goto fail; 4045 } 4046 aiov.iov_base = buf; 4047 aiov.iov_len = count; 4048 auio.uio_iov = &aiov; 4049 auio.uio_iovcnt = 1; 4050 auio.uio_rw = UIO_READ; 4051 auio.uio_segflg = bufseg; 4052 auio.uio_td = td; 4053 vn_lock(vp, LK_SHARED | LK_RETRY); 4054 AUDIT_ARG_VNODE1(vp); 4055 loff = auio.uio_offset = foffset; 4056 #ifdef MAC 4057 error = mac_vnode_check_readdir(td->td_ucred, vp); 4058 if (error == 0) 4059 #endif 4060 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4061 NULL); 4062 foffset = auio.uio_offset; 4063 if (error != 0) { 4064 VOP_UNLOCK(vp, 0); 4065 goto fail; 4066 } 4067 if (count == auio.uio_resid && 4068 (vp->v_vflag & VV_ROOT) && 4069 (vp->v_mount->mnt_flag & MNT_UNION)) { 4070 struct vnode *tvp = vp; 4071 4072 vp = vp->v_mount->mnt_vnodecovered; 4073 VREF(vp); 4074 fp->f_vnode = vp; 4075 fp->f_data = vp; 4076 foffset = 0; 4077 vput(tvp); 4078 goto unionread; 4079 } 4080 VOP_UNLOCK(vp, 0); 4081 *basep = loff; 4082 if (residp != NULL) 4083 *residp = auio.uio_resid; 4084 td->td_retval[0] = count - auio.uio_resid; 4085 fail: 4086 foffset_unlock(fp, foffset, 0); 4087 fdrop(fp, td); 4088 return (error); 4089 } 4090 4091 #ifndef _SYS_SYSPROTO_H_ 4092 struct getdents_args { 4093 int fd; 4094 char *buf; 4095 size_t count; 4096 }; 4097 #endif 4098 int 4099 sys_getdents(td, uap) 4100 struct thread *td; 4101 register struct getdents_args /* { 4102 int fd; 4103 char *buf; 4104 u_int count; 4105 } */ *uap; 4106 { 4107 struct getdirentries_args ap; 4108 4109 ap.fd = uap->fd; 4110 ap.buf = uap->buf; 4111 ap.count = uap->count; 4112 ap.basep = NULL; 4113 return (sys_getdirentries(td, &ap)); 4114 } 4115 4116 /* 4117 * Set the mode mask for creation of filesystem nodes. 4118 */ 4119 #ifndef _SYS_SYSPROTO_H_ 4120 struct umask_args { 4121 int newmask; 4122 }; 4123 #endif 4124 int 4125 sys_umask(td, uap) 4126 struct thread *td; 4127 struct umask_args /* { 4128 int newmask; 4129 } */ *uap; 4130 { 4131 register struct filedesc *fdp; 4132 4133 FILEDESC_XLOCK(td->td_proc->p_fd); 4134 fdp = td->td_proc->p_fd; 4135 td->td_retval[0] = fdp->fd_cmask; 4136 fdp->fd_cmask = uap->newmask & ALLPERMS; 4137 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4138 return (0); 4139 } 4140 4141 /* 4142 * Void all references to file by ripping underlying filesystem away from 4143 * vnode. 4144 */ 4145 #ifndef _SYS_SYSPROTO_H_ 4146 struct revoke_args { 4147 char *path; 4148 }; 4149 #endif 4150 int 4151 sys_revoke(td, uap) 4152 struct thread *td; 4153 register struct revoke_args /* { 4154 char *path; 4155 } */ *uap; 4156 { 4157 struct vnode *vp; 4158 struct vattr vattr; 4159 struct nameidata nd; 4160 int error; 4161 4162 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4163 uap->path, td); 4164 if ((error = namei(&nd)) != 0) 4165 return (error); 4166 vp = nd.ni_vp; 4167 NDFREE(&nd, NDF_ONLY_PNBUF); 4168 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4169 error = EINVAL; 4170 goto out; 4171 } 4172 #ifdef MAC 4173 error = mac_vnode_check_revoke(td->td_ucred, vp); 4174 if (error != 0) 4175 goto out; 4176 #endif 4177 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4178 if (error != 0) 4179 goto out; 4180 if (td->td_ucred->cr_uid != vattr.va_uid) { 4181 error = priv_check(td, PRIV_VFS_ADMIN); 4182 if (error != 0) 4183 goto out; 4184 } 4185 if (vcount(vp) > 1) 4186 VOP_REVOKE(vp, REVOKEALL); 4187 out: 4188 vput(vp); 4189 return (error); 4190 } 4191 4192 /* 4193 * Convert a user file descriptor to a kernel file entry and check that, if it 4194 * is a capability, the correct rights are present. A reference on the file 4195 * entry is held upon returning. 4196 */ 4197 int 4198 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4199 { 4200 struct file *fp; 4201 int error; 4202 4203 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4204 if (error != 0) 4205 return (error); 4206 4207 /* 4208 * The file could be not of the vnode type, or it may be not 4209 * yet fully initialized, in which case the f_vnode pointer 4210 * may be set, but f_ops is still badfileops. E.g., 4211 * devfs_open() transiently create such situation to 4212 * facilitate csw d_fdopen(). 4213 * 4214 * Dupfdopen() handling in kern_openat() installs the 4215 * half-baked file into the process descriptor table, allowing 4216 * other thread to dereference it. Guard against the race by 4217 * checking f_ops. 4218 */ 4219 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4220 fdrop(fp, curthread); 4221 return (EINVAL); 4222 } 4223 *fpp = fp; 4224 return (0); 4225 } 4226 4227 4228 /* 4229 * Get an (NFS) file handle. 4230 */ 4231 #ifndef _SYS_SYSPROTO_H_ 4232 struct lgetfh_args { 4233 char *fname; 4234 fhandle_t *fhp; 4235 }; 4236 #endif 4237 int 4238 sys_lgetfh(td, uap) 4239 struct thread *td; 4240 register struct lgetfh_args *uap; 4241 { 4242 struct nameidata nd; 4243 fhandle_t fh; 4244 register struct vnode *vp; 4245 int error; 4246 4247 error = priv_check(td, PRIV_VFS_GETFH); 4248 if (error != 0) 4249 return (error); 4250 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4251 uap->fname, td); 4252 error = namei(&nd); 4253 if (error != 0) 4254 return (error); 4255 NDFREE(&nd, NDF_ONLY_PNBUF); 4256 vp = nd.ni_vp; 4257 bzero(&fh, sizeof(fh)); 4258 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4259 error = VOP_VPTOFH(vp, &fh.fh_fid); 4260 vput(vp); 4261 if (error == 0) 4262 error = copyout(&fh, uap->fhp, sizeof (fh)); 4263 return (error); 4264 } 4265 4266 #ifndef _SYS_SYSPROTO_H_ 4267 struct getfh_args { 4268 char *fname; 4269 fhandle_t *fhp; 4270 }; 4271 #endif 4272 int 4273 sys_getfh(td, uap) 4274 struct thread *td; 4275 register struct getfh_args *uap; 4276 { 4277 struct nameidata nd; 4278 fhandle_t fh; 4279 register struct vnode *vp; 4280 int error; 4281 4282 error = priv_check(td, PRIV_VFS_GETFH); 4283 if (error != 0) 4284 return (error); 4285 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4286 uap->fname, td); 4287 error = namei(&nd); 4288 if (error != 0) 4289 return (error); 4290 NDFREE(&nd, NDF_ONLY_PNBUF); 4291 vp = nd.ni_vp; 4292 bzero(&fh, sizeof(fh)); 4293 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4294 error = VOP_VPTOFH(vp, &fh.fh_fid); 4295 vput(vp); 4296 if (error == 0) 4297 error = copyout(&fh, uap->fhp, sizeof (fh)); 4298 return (error); 4299 } 4300 4301 /* 4302 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4303 * open descriptor. 4304 * 4305 * warning: do not remove the priv_check() call or this becomes one giant 4306 * security hole. 4307 */ 4308 #ifndef _SYS_SYSPROTO_H_ 4309 struct fhopen_args { 4310 const struct fhandle *u_fhp; 4311 int flags; 4312 }; 4313 #endif 4314 int 4315 sys_fhopen(td, uap) 4316 struct thread *td; 4317 struct fhopen_args /* { 4318 const struct fhandle *u_fhp; 4319 int flags; 4320 } */ *uap; 4321 { 4322 struct mount *mp; 4323 struct vnode *vp; 4324 struct fhandle fhp; 4325 struct file *fp; 4326 int fmode, error; 4327 int indx; 4328 4329 error = priv_check(td, PRIV_VFS_FHOPEN); 4330 if (error != 0) 4331 return (error); 4332 indx = -1; 4333 fmode = FFLAGS(uap->flags); 4334 /* why not allow a non-read/write open for our lockd? */ 4335 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4336 return (EINVAL); 4337 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4338 if (error != 0) 4339 return(error); 4340 /* find the mount point */ 4341 mp = vfs_busyfs(&fhp.fh_fsid); 4342 if (mp == NULL) 4343 return (ESTALE); 4344 /* now give me my vnode, it gets returned to me locked */ 4345 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4346 vfs_unbusy(mp); 4347 if (error != 0) 4348 return (error); 4349 4350 error = falloc_noinstall(td, &fp); 4351 if (error != 0) { 4352 vput(vp); 4353 return (error); 4354 } 4355 /* 4356 * An extra reference on `fp' has been held for us by 4357 * falloc_noinstall(). 4358 */ 4359 4360 #ifdef INVARIANTS 4361 td->td_dupfd = -1; 4362 #endif 4363 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4364 if (error != 0) { 4365 KASSERT(fp->f_ops == &badfileops, 4366 ("VOP_OPEN in fhopen() set f_ops")); 4367 KASSERT(td->td_dupfd < 0, 4368 ("fhopen() encountered fdopen()")); 4369 4370 vput(vp); 4371 goto bad; 4372 } 4373 #ifdef INVARIANTS 4374 td->td_dupfd = 0; 4375 #endif 4376 fp->f_vnode = vp; 4377 fp->f_seqcount = 1; 4378 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4379 &vnops); 4380 VOP_UNLOCK(vp, 0); 4381 if ((fmode & O_TRUNC) != 0) { 4382 error = fo_truncate(fp, 0, td->td_ucred, td); 4383 if (error != 0) 4384 goto bad; 4385 } 4386 4387 error = finstall(td, fp, &indx, fmode, NULL); 4388 bad: 4389 fdrop(fp, td); 4390 td->td_retval[0] = indx; 4391 return (error); 4392 } 4393 4394 /* 4395 * Stat an (NFS) file handle. 4396 */ 4397 #ifndef _SYS_SYSPROTO_H_ 4398 struct fhstat_args { 4399 struct fhandle *u_fhp; 4400 struct stat *sb; 4401 }; 4402 #endif 4403 int 4404 sys_fhstat(td, uap) 4405 struct thread *td; 4406 register struct fhstat_args /* { 4407 struct fhandle *u_fhp; 4408 struct stat *sb; 4409 } */ *uap; 4410 { 4411 struct stat sb; 4412 struct fhandle fh; 4413 int error; 4414 4415 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4416 if (error != 0) 4417 return (error); 4418 error = kern_fhstat(td, fh, &sb); 4419 if (error == 0) 4420 error = copyout(&sb, uap->sb, sizeof(sb)); 4421 return (error); 4422 } 4423 4424 int 4425 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4426 { 4427 struct mount *mp; 4428 struct vnode *vp; 4429 int error; 4430 4431 error = priv_check(td, PRIV_VFS_FHSTAT); 4432 if (error != 0) 4433 return (error); 4434 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4435 return (ESTALE); 4436 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4437 vfs_unbusy(mp); 4438 if (error != 0) 4439 return (error); 4440 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4441 vput(vp); 4442 return (error); 4443 } 4444 4445 /* 4446 * Implement fstatfs() for (NFS) file handles. 4447 */ 4448 #ifndef _SYS_SYSPROTO_H_ 4449 struct fhstatfs_args { 4450 struct fhandle *u_fhp; 4451 struct statfs *buf; 4452 }; 4453 #endif 4454 int 4455 sys_fhstatfs(td, uap) 4456 struct thread *td; 4457 struct fhstatfs_args /* { 4458 struct fhandle *u_fhp; 4459 struct statfs *buf; 4460 } */ *uap; 4461 { 4462 struct statfs sf; 4463 fhandle_t fh; 4464 int error; 4465 4466 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4467 if (error != 0) 4468 return (error); 4469 error = kern_fhstatfs(td, fh, &sf); 4470 if (error != 0) 4471 return (error); 4472 return (copyout(&sf, uap->buf, sizeof(sf))); 4473 } 4474 4475 int 4476 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4477 { 4478 struct statfs *sp; 4479 struct mount *mp; 4480 struct vnode *vp; 4481 int error; 4482 4483 error = priv_check(td, PRIV_VFS_FHSTATFS); 4484 if (error != 0) 4485 return (error); 4486 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4487 return (ESTALE); 4488 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4489 if (error != 0) { 4490 vfs_unbusy(mp); 4491 return (error); 4492 } 4493 vput(vp); 4494 error = prison_canseemount(td->td_ucred, mp); 4495 if (error != 0) 4496 goto out; 4497 #ifdef MAC 4498 error = mac_mount_check_stat(td->td_ucred, mp); 4499 if (error != 0) 4500 goto out; 4501 #endif 4502 /* 4503 * Set these in case the underlying filesystem fails to do so. 4504 */ 4505 sp = &mp->mnt_stat; 4506 sp->f_version = STATFS_VERSION; 4507 sp->f_namemax = NAME_MAX; 4508 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4509 error = VFS_STATFS(mp, sp); 4510 if (error == 0) 4511 *buf = *sp; 4512 out: 4513 vfs_unbusy(mp); 4514 return (error); 4515 } 4516 4517 int 4518 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4519 { 4520 struct file *fp; 4521 struct mount *mp; 4522 struct vnode *vp; 4523 cap_rights_t rights; 4524 off_t olen, ooffset; 4525 int error; 4526 4527 fp = NULL; 4528 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4529 if (error != 0) 4530 goto out; 4531 4532 switch (fp->f_type) { 4533 case DTYPE_VNODE: 4534 break; 4535 case DTYPE_PIPE: 4536 case DTYPE_FIFO: 4537 error = ESPIPE; 4538 goto out; 4539 default: 4540 error = ENODEV; 4541 goto out; 4542 } 4543 if ((fp->f_flag & FWRITE) == 0) { 4544 error = EBADF; 4545 goto out; 4546 } 4547 vp = fp->f_vnode; 4548 if (vp->v_type != VREG) { 4549 error = ENODEV; 4550 goto out; 4551 } 4552 if (offset < 0 || len <= 0) { 4553 error = EINVAL; 4554 goto out; 4555 } 4556 /* Check for wrap. */ 4557 if (offset > OFF_MAX - len) { 4558 error = EFBIG; 4559 goto out; 4560 } 4561 4562 /* Allocating blocks may take a long time, so iterate. */ 4563 for (;;) { 4564 olen = len; 4565 ooffset = offset; 4566 4567 bwillwrite(); 4568 mp = NULL; 4569 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4570 if (error != 0) 4571 break; 4572 error = vn_lock(vp, LK_EXCLUSIVE); 4573 if (error != 0) { 4574 vn_finished_write(mp); 4575 break; 4576 } 4577 #ifdef MAC 4578 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4579 if (error == 0) 4580 #endif 4581 error = VOP_ALLOCATE(vp, &offset, &len); 4582 VOP_UNLOCK(vp, 0); 4583 vn_finished_write(mp); 4584 4585 if (olen + ooffset != offset + len) { 4586 panic("offset + len changed from %jx/%jx to %jx/%jx", 4587 ooffset, olen, offset, len); 4588 } 4589 if (error != 0 || len == 0) 4590 break; 4591 KASSERT(olen > len, ("Iteration did not make progress?")); 4592 maybe_yield(); 4593 } 4594 out: 4595 if (fp != NULL) 4596 fdrop(fp, td); 4597 return (error); 4598 } 4599 4600 int 4601 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4602 { 4603 4604 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4605 uap->len); 4606 return (0); 4607 } 4608 4609 /* 4610 * Unlike madvise(2), we do not make a best effort to remember every 4611 * possible caching hint. Instead, we remember the last setting with 4612 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4613 * region of any current setting. 4614 */ 4615 int 4616 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4617 int advice) 4618 { 4619 struct fadvise_info *fa, *new; 4620 struct file *fp; 4621 struct vnode *vp; 4622 cap_rights_t rights; 4623 off_t end; 4624 int error; 4625 4626 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4627 return (EINVAL); 4628 switch (advice) { 4629 case POSIX_FADV_SEQUENTIAL: 4630 case POSIX_FADV_RANDOM: 4631 case POSIX_FADV_NOREUSE: 4632 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4633 break; 4634 case POSIX_FADV_NORMAL: 4635 case POSIX_FADV_WILLNEED: 4636 case POSIX_FADV_DONTNEED: 4637 new = NULL; 4638 break; 4639 default: 4640 return (EINVAL); 4641 } 4642 /* XXX: CAP_POSIX_FADVISE? */ 4643 error = fget(td, fd, cap_rights_init(&rights), &fp); 4644 if (error != 0) 4645 goto out; 4646 4647 switch (fp->f_type) { 4648 case DTYPE_VNODE: 4649 break; 4650 case DTYPE_PIPE: 4651 case DTYPE_FIFO: 4652 error = ESPIPE; 4653 goto out; 4654 default: 4655 error = ENODEV; 4656 goto out; 4657 } 4658 vp = fp->f_vnode; 4659 if (vp->v_type != VREG) { 4660 error = ENODEV; 4661 goto out; 4662 } 4663 if (len == 0) 4664 end = OFF_MAX; 4665 else 4666 end = offset + len - 1; 4667 switch (advice) { 4668 case POSIX_FADV_SEQUENTIAL: 4669 case POSIX_FADV_RANDOM: 4670 case POSIX_FADV_NOREUSE: 4671 /* 4672 * Try to merge any existing non-standard region with 4673 * this new region if possible, otherwise create a new 4674 * non-standard region for this request. 4675 */ 4676 mtx_pool_lock(mtxpool_sleep, fp); 4677 fa = fp->f_advice; 4678 if (fa != NULL && fa->fa_advice == advice && 4679 ((fa->fa_start <= end && fa->fa_end >= offset) || 4680 (end != OFF_MAX && fa->fa_start == end + 1) || 4681 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4682 if (offset < fa->fa_start) 4683 fa->fa_start = offset; 4684 if (end > fa->fa_end) 4685 fa->fa_end = end; 4686 } else { 4687 new->fa_advice = advice; 4688 new->fa_start = offset; 4689 new->fa_end = end; 4690 new->fa_prevstart = 0; 4691 new->fa_prevend = 0; 4692 fp->f_advice = new; 4693 new = fa; 4694 } 4695 mtx_pool_unlock(mtxpool_sleep, fp); 4696 break; 4697 case POSIX_FADV_NORMAL: 4698 /* 4699 * If a the "normal" region overlaps with an existing 4700 * non-standard region, trim or remove the 4701 * non-standard region. 4702 */ 4703 mtx_pool_lock(mtxpool_sleep, fp); 4704 fa = fp->f_advice; 4705 if (fa != NULL) { 4706 if (offset <= fa->fa_start && end >= fa->fa_end) { 4707 new = fa; 4708 fp->f_advice = NULL; 4709 } else if (offset <= fa->fa_start && 4710 end >= fa->fa_start) 4711 fa->fa_start = end + 1; 4712 else if (offset <= fa->fa_end && end >= fa->fa_end) 4713 fa->fa_end = offset - 1; 4714 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4715 /* 4716 * If the "normal" region is a middle 4717 * portion of the existing 4718 * non-standard region, just remove 4719 * the whole thing rather than picking 4720 * one side or the other to 4721 * preserve. 4722 */ 4723 new = fa; 4724 fp->f_advice = NULL; 4725 } 4726 } 4727 mtx_pool_unlock(mtxpool_sleep, fp); 4728 break; 4729 case POSIX_FADV_WILLNEED: 4730 case POSIX_FADV_DONTNEED: 4731 error = VOP_ADVISE(vp, offset, end, advice); 4732 break; 4733 } 4734 out: 4735 if (fp != NULL) 4736 fdrop(fp, td); 4737 free(new, M_FADVISE); 4738 return (error); 4739 } 4740 4741 int 4742 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4743 { 4744 4745 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4746 uap->len, uap->advice); 4747 return (0); 4748 } 4749