1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 98 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 99 static int kern_chflags(struct thread *td, const char *path, 100 enum uio_seg pathseg, u_long flags); 101 static int kern_chflagsat(struct thread *td, int fd, const char *path, 102 enum uio_seg pathseg, u_long flags, int atflag); 103 static int setfflags(struct thread *td, struct vnode *, u_long); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * The module initialization routine for POSIX asynchronous I/O will 111 * set this to the version of AIO that it implements. (Zero means 112 * that it is not implemented.) This value is used here by pathconf() 113 * and in kern_descrip.c by fpathconf(). 114 */ 115 int async_io_version; 116 117 #ifdef DEBUG 118 static int syncprt = 0; 119 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 120 #endif 121 122 /* 123 * Sync each mounted filesystem. 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct sync_args { 127 int dummy; 128 }; 129 #endif 130 /* ARGSUSED */ 131 int 132 sys_sync(td, uap) 133 struct thread *td; 134 struct sync_args *uap; 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_msync(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Change filesystem quotas. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct quotactl_args { 166 char *path; 167 int cmd; 168 int uid; 169 caddr_t arg; 170 }; 171 #endif 172 int 173 sys_quotactl(td, uap) 174 struct thread *td; 175 register struct quotactl_args /* { 176 char *path; 177 int cmd; 178 int uid; 179 caddr_t arg; 180 } */ *uap; 181 { 182 struct mount *mp; 183 struct nameidata nd; 184 int error; 185 186 AUDIT_ARG_CMD(uap->cmd); 187 AUDIT_ARG_UID(uap->uid); 188 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 189 return (EPERM); 190 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 191 uap->path, td); 192 if ((error = namei(&nd)) != 0) 193 return (error); 194 NDFREE(&nd, NDF_ONLY_PNBUF); 195 mp = nd.ni_vp->v_mount; 196 vfs_ref(mp); 197 vput(nd.ni_vp); 198 error = vfs_busy(mp, 0); 199 vfs_rel(mp); 200 if (error != 0) 201 return (error); 202 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 203 204 /* 205 * Since quota on operation typically needs to open quota 206 * file, the Q_QUOTAON handler needs to unbusy the mount point 207 * before calling into namei. Otherwise, unmount might be 208 * started between two vfs_busy() invocations (first is our, 209 * second is from mount point cross-walk code in lookup()), 210 * causing deadlock. 211 * 212 * Require that Q_QUOTAON handles the vfs_busy() reference on 213 * its own, always returning with ubusied mount point. 214 */ 215 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 216 vfs_unbusy(mp); 217 return (error); 218 } 219 220 /* 221 * Used by statfs conversion routines to scale the block size up if 222 * necessary so that all of the block counts are <= 'max_size'. Note 223 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 224 * value of 'n'. 225 */ 226 void 227 statfs_scale_blocks(struct statfs *sf, long max_size) 228 { 229 uint64_t count; 230 int shift; 231 232 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 233 234 /* 235 * Attempt to scale the block counts to give a more accurate 236 * overview to userland of the ratio of free space to used 237 * space. To do this, find the largest block count and compute 238 * a divisor that lets it fit into a signed integer <= max_size. 239 */ 240 if (sf->f_bavail < 0) 241 count = -sf->f_bavail; 242 else 243 count = sf->f_bavail; 244 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 245 if (count <= max_size) 246 return; 247 248 count >>= flsl(max_size); 249 shift = 0; 250 while (count > 0) { 251 shift++; 252 count >>=1; 253 } 254 255 sf->f_bsize <<= shift; 256 sf->f_blocks >>= shift; 257 sf->f_bfree >>= shift; 258 sf->f_bavail >>= shift; 259 } 260 261 /* 262 * Get filesystem statistics. 263 */ 264 #ifndef _SYS_SYSPROTO_H_ 265 struct statfs_args { 266 char *path; 267 struct statfs *buf; 268 }; 269 #endif 270 int 271 sys_statfs(td, uap) 272 struct thread *td; 273 register struct statfs_args /* { 274 char *path; 275 struct statfs *buf; 276 } */ *uap; 277 { 278 struct statfs sf; 279 int error; 280 281 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 282 if (error == 0) 283 error = copyout(&sf, uap->buf, sizeof(sf)); 284 return (error); 285 } 286 287 int 288 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 289 struct statfs *buf) 290 { 291 struct mount *mp; 292 struct statfs *sp, sb; 293 struct nameidata nd; 294 int error; 295 296 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 297 pathseg, path, td); 298 error = namei(&nd); 299 if (error != 0) 300 return (error); 301 mp = nd.ni_vp->v_mount; 302 vfs_ref(mp); 303 NDFREE(&nd, NDF_ONLY_PNBUF); 304 vput(nd.ni_vp); 305 error = vfs_busy(mp, 0); 306 vfs_rel(mp); 307 if (error != 0) 308 return (error); 309 #ifdef MAC 310 error = mac_mount_check_stat(td->td_ucred, mp); 311 if (error != 0) 312 goto out; 313 #endif 314 /* 315 * Set these in case the underlying filesystem fails to do so. 316 */ 317 sp = &mp->mnt_stat; 318 sp->f_version = STATFS_VERSION; 319 sp->f_namemax = NAME_MAX; 320 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 321 error = VFS_STATFS(mp, sp); 322 if (error != 0) 323 goto out; 324 if (priv_check(td, PRIV_VFS_GENERATION)) { 325 bcopy(sp, &sb, sizeof(sb)); 326 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 327 prison_enforce_statfs(td->td_ucred, mp, &sb); 328 sp = &sb; 329 } 330 *buf = *sp; 331 out: 332 vfs_unbusy(mp); 333 return (error); 334 } 335 336 /* 337 * Get filesystem statistics. 338 */ 339 #ifndef _SYS_SYSPROTO_H_ 340 struct fstatfs_args { 341 int fd; 342 struct statfs *buf; 343 }; 344 #endif 345 int 346 sys_fstatfs(td, uap) 347 struct thread *td; 348 register struct fstatfs_args /* { 349 int fd; 350 struct statfs *buf; 351 } */ *uap; 352 { 353 struct statfs sf; 354 int error; 355 356 error = kern_fstatfs(td, uap->fd, &sf); 357 if (error == 0) 358 error = copyout(&sf, uap->buf, sizeof(sf)); 359 return (error); 360 } 361 362 int 363 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 364 { 365 struct file *fp; 366 struct mount *mp; 367 struct statfs *sp, sb; 368 struct vnode *vp; 369 cap_rights_t rights; 370 int error; 371 372 AUDIT_ARG_FD(fd); 373 error = getvnode(td->td_proc->p_fd, fd, 374 cap_rights_init(&rights, CAP_FSTATFS), &fp); 375 if (error != 0) 376 return (error); 377 vp = fp->f_vnode; 378 vn_lock(vp, LK_SHARED | LK_RETRY); 379 #ifdef AUDIT 380 AUDIT_ARG_VNODE1(vp); 381 #endif 382 mp = vp->v_mount; 383 if (mp) 384 vfs_ref(mp); 385 VOP_UNLOCK(vp, 0); 386 fdrop(fp, td); 387 if (mp == NULL) { 388 error = EBADF; 389 goto out; 390 } 391 error = vfs_busy(mp, 0); 392 vfs_rel(mp); 393 if (error != 0) 394 return (error); 395 #ifdef MAC 396 error = mac_mount_check_stat(td->td_ucred, mp); 397 if (error != 0) 398 goto out; 399 #endif 400 /* 401 * Set these in case the underlying filesystem fails to do so. 402 */ 403 sp = &mp->mnt_stat; 404 sp->f_version = STATFS_VERSION; 405 sp->f_namemax = NAME_MAX; 406 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 407 error = VFS_STATFS(mp, sp); 408 if (error != 0) 409 goto out; 410 if (priv_check(td, PRIV_VFS_GENERATION)) { 411 bcopy(sp, &sb, sizeof(sb)); 412 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 413 prison_enforce_statfs(td->td_ucred, mp, &sb); 414 sp = &sb; 415 } 416 *buf = *sp; 417 out: 418 if (mp) 419 vfs_unbusy(mp); 420 return (error); 421 } 422 423 /* 424 * Get statistics on all filesystems. 425 */ 426 #ifndef _SYS_SYSPROTO_H_ 427 struct getfsstat_args { 428 struct statfs *buf; 429 long bufsize; 430 int flags; 431 }; 432 #endif 433 int 434 sys_getfsstat(td, uap) 435 struct thread *td; 436 register struct getfsstat_args /* { 437 struct statfs *buf; 438 long bufsize; 439 int flags; 440 } */ *uap; 441 { 442 443 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 444 uap->flags)); 445 } 446 447 /* 448 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 449 * The caller is responsible for freeing memory which will be allocated 450 * in '*buf'. 451 */ 452 int 453 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 454 enum uio_seg bufseg, int flags) 455 { 456 struct mount *mp, *nmp; 457 struct statfs *sfsp, *sp, sb; 458 size_t count, maxcount; 459 int error; 460 461 maxcount = bufsize / sizeof(struct statfs); 462 if (bufsize == 0) 463 sfsp = NULL; 464 else if (bufseg == UIO_USERSPACE) 465 sfsp = *buf; 466 else /* if (bufseg == UIO_SYSSPACE) */ { 467 count = 0; 468 mtx_lock(&mountlist_mtx); 469 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 470 count++; 471 } 472 mtx_unlock(&mountlist_mtx); 473 if (maxcount > count) 474 maxcount = count; 475 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 476 M_WAITOK); 477 } 478 count = 0; 479 mtx_lock(&mountlist_mtx); 480 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 481 if (prison_canseemount(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #ifdef MAC 486 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 487 nmp = TAILQ_NEXT(mp, mnt_list); 488 continue; 489 } 490 #endif 491 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 492 nmp = TAILQ_NEXT(mp, mnt_list); 493 continue; 494 } 495 if (sfsp && count < maxcount) { 496 sp = &mp->mnt_stat; 497 /* 498 * Set these in case the underlying filesystem 499 * fails to do so. 500 */ 501 sp->f_version = STATFS_VERSION; 502 sp->f_namemax = NAME_MAX; 503 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 504 /* 505 * If MNT_NOWAIT or MNT_LAZY is specified, do not 506 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 507 * overrides MNT_WAIT. 508 */ 509 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 510 (flags & MNT_WAIT)) && 511 (error = VFS_STATFS(mp, sp))) { 512 mtx_lock(&mountlist_mtx); 513 nmp = TAILQ_NEXT(mp, mnt_list); 514 vfs_unbusy(mp); 515 continue; 516 } 517 if (priv_check(td, PRIV_VFS_GENERATION)) { 518 bcopy(sp, &sb, sizeof(sb)); 519 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 520 prison_enforce_statfs(td->td_ucred, mp, &sb); 521 sp = &sb; 522 } 523 if (bufseg == UIO_SYSSPACE) 524 bcopy(sp, sfsp, sizeof(*sp)); 525 else /* if (bufseg == UIO_USERSPACE) */ { 526 error = copyout(sp, sfsp, sizeof(*sp)); 527 if (error != 0) { 528 vfs_unbusy(mp); 529 return (error); 530 } 531 } 532 sfsp++; 533 } 534 count++; 535 mtx_lock(&mountlist_mtx); 536 nmp = TAILQ_NEXT(mp, mnt_list); 537 vfs_unbusy(mp); 538 } 539 mtx_unlock(&mountlist_mtx); 540 if (sfsp && count > maxcount) 541 td->td_retval[0] = maxcount; 542 else 543 td->td_retval[0] = count; 544 return (0); 545 } 546 547 #ifdef COMPAT_FREEBSD4 548 /* 549 * Get old format filesystem statistics. 550 */ 551 static void cvtstatfs(struct statfs *, struct ostatfs *); 552 553 #ifndef _SYS_SYSPROTO_H_ 554 struct freebsd4_statfs_args { 555 char *path; 556 struct ostatfs *buf; 557 }; 558 #endif 559 int 560 freebsd4_statfs(td, uap) 561 struct thread *td; 562 struct freebsd4_statfs_args /* { 563 char *path; 564 struct ostatfs *buf; 565 } */ *uap; 566 { 567 struct ostatfs osb; 568 struct statfs sf; 569 int error; 570 571 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 572 if (error != 0) 573 return (error); 574 cvtstatfs(&sf, &osb); 575 return (copyout(&osb, uap->buf, sizeof(osb))); 576 } 577 578 /* 579 * Get filesystem statistics. 580 */ 581 #ifndef _SYS_SYSPROTO_H_ 582 struct freebsd4_fstatfs_args { 583 int fd; 584 struct ostatfs *buf; 585 }; 586 #endif 587 int 588 freebsd4_fstatfs(td, uap) 589 struct thread *td; 590 struct freebsd4_fstatfs_args /* { 591 int fd; 592 struct ostatfs *buf; 593 } */ *uap; 594 { 595 struct ostatfs osb; 596 struct statfs sf; 597 int error; 598 599 error = kern_fstatfs(td, uap->fd, &sf); 600 if (error != 0) 601 return (error); 602 cvtstatfs(&sf, &osb); 603 return (copyout(&osb, uap->buf, sizeof(osb))); 604 } 605 606 /* 607 * Get statistics on all filesystems. 608 */ 609 #ifndef _SYS_SYSPROTO_H_ 610 struct freebsd4_getfsstat_args { 611 struct ostatfs *buf; 612 long bufsize; 613 int flags; 614 }; 615 #endif 616 int 617 freebsd4_getfsstat(td, uap) 618 struct thread *td; 619 register struct freebsd4_getfsstat_args /* { 620 struct ostatfs *buf; 621 long bufsize; 622 int flags; 623 } */ *uap; 624 { 625 struct statfs *buf, *sp; 626 struct ostatfs osb; 627 size_t count, size; 628 int error; 629 630 count = uap->bufsize / sizeof(struct ostatfs); 631 size = count * sizeof(struct statfs); 632 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 633 if (size > 0) { 634 count = td->td_retval[0]; 635 sp = buf; 636 while (count > 0 && error == 0) { 637 cvtstatfs(sp, &osb); 638 error = copyout(&osb, uap->buf, sizeof(osb)); 639 sp++; 640 uap->buf++; 641 count--; 642 } 643 free(buf, M_TEMP); 644 } 645 return (error); 646 } 647 648 /* 649 * Implement fstatfs() for (NFS) file handles. 650 */ 651 #ifndef _SYS_SYSPROTO_H_ 652 struct freebsd4_fhstatfs_args { 653 struct fhandle *u_fhp; 654 struct ostatfs *buf; 655 }; 656 #endif 657 int 658 freebsd4_fhstatfs(td, uap) 659 struct thread *td; 660 struct freebsd4_fhstatfs_args /* { 661 struct fhandle *u_fhp; 662 struct ostatfs *buf; 663 } */ *uap; 664 { 665 struct ostatfs osb; 666 struct statfs sf; 667 fhandle_t fh; 668 int error; 669 670 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 671 if (error != 0) 672 return (error); 673 error = kern_fhstatfs(td, fh, &sf); 674 if (error != 0) 675 return (error); 676 cvtstatfs(&sf, &osb); 677 return (copyout(&osb, uap->buf, sizeof(osb))); 678 } 679 680 /* 681 * Convert a new format statfs structure to an old format statfs structure. 682 */ 683 static void 684 cvtstatfs(nsp, osp) 685 struct statfs *nsp; 686 struct ostatfs *osp; 687 { 688 689 statfs_scale_blocks(nsp, LONG_MAX); 690 bzero(osp, sizeof(*osp)); 691 osp->f_bsize = nsp->f_bsize; 692 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 693 osp->f_blocks = nsp->f_blocks; 694 osp->f_bfree = nsp->f_bfree; 695 osp->f_bavail = nsp->f_bavail; 696 osp->f_files = MIN(nsp->f_files, LONG_MAX); 697 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 698 osp->f_owner = nsp->f_owner; 699 osp->f_type = nsp->f_type; 700 osp->f_flags = nsp->f_flags; 701 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 702 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 703 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 704 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 705 strlcpy(osp->f_fstypename, nsp->f_fstypename, 706 MIN(MFSNAMELEN, OMFSNAMELEN)); 707 strlcpy(osp->f_mntonname, nsp->f_mntonname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 710 MIN(MNAMELEN, OMNAMELEN)); 711 osp->f_fsid = nsp->f_fsid; 712 } 713 #endif /* COMPAT_FREEBSD4 */ 714 715 /* 716 * Change current working directory to a given file descriptor. 717 */ 718 #ifndef _SYS_SYSPROTO_H_ 719 struct fchdir_args { 720 int fd; 721 }; 722 #endif 723 int 724 sys_fchdir(td, uap) 725 struct thread *td; 726 struct fchdir_args /* { 727 int fd; 728 } */ *uap; 729 { 730 register struct filedesc *fdp = td->td_proc->p_fd; 731 struct vnode *vp, *tdp, *vpold; 732 struct mount *mp; 733 struct file *fp; 734 cap_rights_t rights; 735 int error; 736 737 AUDIT_ARG_FD(uap->fd); 738 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 739 &fp); 740 if (error != 0) 741 return (error); 742 vp = fp->f_vnode; 743 VREF(vp); 744 fdrop(fp, td); 745 vn_lock(vp, LK_SHARED | LK_RETRY); 746 AUDIT_ARG_VNODE1(vp); 747 error = change_dir(vp, td); 748 while (!error && (mp = vp->v_mountedhere) != NULL) { 749 if (vfs_busy(mp, 0)) 750 continue; 751 error = VFS_ROOT(mp, LK_SHARED, &tdp); 752 vfs_unbusy(mp); 753 if (error != 0) 754 break; 755 vput(vp); 756 vp = tdp; 757 } 758 if (error != 0) { 759 vput(vp); 760 return (error); 761 } 762 VOP_UNLOCK(vp, 0); 763 FILEDESC_XLOCK(fdp); 764 vpold = fdp->fd_cdir; 765 fdp->fd_cdir = vp; 766 FILEDESC_XUNLOCK(fdp); 767 vrele(vpold); 768 return (0); 769 } 770 771 /* 772 * Change current working directory (``.''). 773 */ 774 #ifndef _SYS_SYSPROTO_H_ 775 struct chdir_args { 776 char *path; 777 }; 778 #endif 779 int 780 sys_chdir(td, uap) 781 struct thread *td; 782 struct chdir_args /* { 783 char *path; 784 } */ *uap; 785 { 786 787 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 788 } 789 790 int 791 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 792 { 793 register struct filedesc *fdp = td->td_proc->p_fd; 794 struct nameidata nd; 795 struct vnode *vp; 796 int error; 797 798 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 799 pathseg, path, td); 800 if ((error = namei(&nd)) != 0) 801 return (error); 802 if ((error = change_dir(nd.ni_vp, td)) != 0) { 803 vput(nd.ni_vp); 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 return (error); 806 } 807 VOP_UNLOCK(nd.ni_vp, 0); 808 NDFREE(&nd, NDF_ONLY_PNBUF); 809 FILEDESC_XLOCK(fdp); 810 vp = fdp->fd_cdir; 811 fdp->fd_cdir = nd.ni_vp; 812 FILEDESC_XUNLOCK(fdp); 813 vrele(vp); 814 return (0); 815 } 816 817 /* 818 * Helper function for raised chroot(2) security function: Refuse if 819 * any filedescriptors are open directories. 820 */ 821 static int 822 chroot_refuse_vdir_fds(fdp) 823 struct filedesc *fdp; 824 { 825 struct vnode *vp; 826 struct file *fp; 827 int fd; 828 829 FILEDESC_LOCK_ASSERT(fdp); 830 831 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 832 fp = fget_locked(fdp, fd); 833 if (fp == NULL) 834 continue; 835 if (fp->f_type == DTYPE_VNODE) { 836 vp = fp->f_vnode; 837 if (vp->v_type == VDIR) 838 return (EPERM); 839 } 840 } 841 return (0); 842 } 843 844 /* 845 * This sysctl determines if we will allow a process to chroot(2) if it 846 * has a directory open: 847 * 0: disallowed for all processes. 848 * 1: allowed for processes that were not already chroot(2)'ed. 849 * 2: allowed for all processes. 850 */ 851 852 static int chroot_allow_open_directories = 1; 853 854 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 855 &chroot_allow_open_directories, 0, 856 "Allow a process to chroot(2) if it has a directory open"); 857 858 /* 859 * Change notion of root (``/'') directory. 860 */ 861 #ifndef _SYS_SYSPROTO_H_ 862 struct chroot_args { 863 char *path; 864 }; 865 #endif 866 int 867 sys_chroot(td, uap) 868 struct thread *td; 869 struct chroot_args /* { 870 char *path; 871 } */ *uap; 872 { 873 struct nameidata nd; 874 int error; 875 876 error = priv_check(td, PRIV_VFS_CHROOT); 877 if (error != 0) 878 return (error); 879 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 880 UIO_USERSPACE, uap->path, td); 881 error = namei(&nd); 882 if (error != 0) 883 goto error; 884 error = change_dir(nd.ni_vp, td); 885 if (error != 0) 886 goto e_vunlock; 887 #ifdef MAC 888 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 889 if (error != 0) 890 goto e_vunlock; 891 #endif 892 VOP_UNLOCK(nd.ni_vp, 0); 893 error = change_root(nd.ni_vp, td); 894 vrele(nd.ni_vp); 895 NDFREE(&nd, NDF_ONLY_PNBUF); 896 return (error); 897 e_vunlock: 898 vput(nd.ni_vp); 899 error: 900 NDFREE(&nd, NDF_ONLY_PNBUF); 901 return (error); 902 } 903 904 /* 905 * Common routine for chroot and chdir. Callers must provide a locked vnode 906 * instance. 907 */ 908 int 909 change_dir(vp, td) 910 struct vnode *vp; 911 struct thread *td; 912 { 913 #ifdef MAC 914 int error; 915 #endif 916 917 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 918 if (vp->v_type != VDIR) 919 return (ENOTDIR); 920 #ifdef MAC 921 error = mac_vnode_check_chdir(td->td_ucred, vp); 922 if (error != 0) 923 return (error); 924 #endif 925 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 926 } 927 928 /* 929 * Common routine for kern_chroot() and jail_attach(). The caller is 930 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 931 * authorize this operation. 932 */ 933 int 934 change_root(vp, td) 935 struct vnode *vp; 936 struct thread *td; 937 { 938 struct filedesc *fdp; 939 struct vnode *oldvp; 940 int error; 941 942 fdp = td->td_proc->p_fd; 943 FILEDESC_XLOCK(fdp); 944 if (chroot_allow_open_directories == 0 || 945 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 946 error = chroot_refuse_vdir_fds(fdp); 947 if (error != 0) { 948 FILEDESC_XUNLOCK(fdp); 949 return (error); 950 } 951 } 952 oldvp = fdp->fd_rdir; 953 fdp->fd_rdir = vp; 954 VREF(fdp->fd_rdir); 955 if (!fdp->fd_jdir) { 956 fdp->fd_jdir = vp; 957 VREF(fdp->fd_jdir); 958 } 959 FILEDESC_XUNLOCK(fdp); 960 vrele(oldvp); 961 return (0); 962 } 963 964 static __inline void 965 flags_to_rights(int flags, cap_rights_t *rightsp) 966 { 967 968 if (flags & O_EXEC) { 969 cap_rights_set(rightsp, CAP_FEXECVE); 970 } else { 971 switch ((flags & O_ACCMODE)) { 972 case O_RDONLY: 973 cap_rights_set(rightsp, CAP_READ); 974 break; 975 case O_RDWR: 976 cap_rights_set(rightsp, CAP_READ); 977 /* FALLTHROUGH */ 978 case O_WRONLY: 979 cap_rights_set(rightsp, CAP_WRITE); 980 if (!(flags & (O_APPEND | O_TRUNC))) 981 cap_rights_set(rightsp, CAP_SEEK); 982 break; 983 } 984 } 985 986 if (flags & O_CREAT) 987 cap_rights_set(rightsp, CAP_CREATE); 988 989 if (flags & O_TRUNC) 990 cap_rights_set(rightsp, CAP_FTRUNCATE); 991 992 if (flags & (O_SYNC | O_FSYNC)) 993 cap_rights_set(rightsp, CAP_FSYNC); 994 995 if (flags & (O_EXLOCK | O_SHLOCK)) 996 cap_rights_set(rightsp, CAP_FLOCK); 997 } 998 999 /* 1000 * Check permissions, allocate an open file structure, and call the device 1001 * open routine if any. 1002 */ 1003 #ifndef _SYS_SYSPROTO_H_ 1004 struct open_args { 1005 char *path; 1006 int flags; 1007 int mode; 1008 }; 1009 #endif 1010 int 1011 sys_open(td, uap) 1012 struct thread *td; 1013 register struct open_args /* { 1014 char *path; 1015 int flags; 1016 int mode; 1017 } */ *uap; 1018 { 1019 1020 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1021 } 1022 1023 #ifndef _SYS_SYSPROTO_H_ 1024 struct openat_args { 1025 int fd; 1026 char *path; 1027 int flag; 1028 int mode; 1029 }; 1030 #endif 1031 int 1032 sys_openat(struct thread *td, struct openat_args *uap) 1033 { 1034 1035 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1036 uap->mode)); 1037 } 1038 1039 int 1040 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1041 int mode) 1042 { 1043 1044 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1045 } 1046 1047 int 1048 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1049 int flags, int mode) 1050 { 1051 struct proc *p = td->td_proc; 1052 struct filedesc *fdp = p->p_fd; 1053 struct file *fp; 1054 struct vnode *vp; 1055 struct nameidata nd; 1056 cap_rights_t rights; 1057 int cmode, error, indx; 1058 1059 indx = -1; 1060 1061 AUDIT_ARG_FFLAGS(flags); 1062 AUDIT_ARG_MODE(mode); 1063 /* XXX: audit dirfd */ 1064 cap_rights_init(&rights, CAP_LOOKUP); 1065 flags_to_rights(flags, &rights); 1066 /* 1067 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1068 * may be specified. 1069 */ 1070 if (flags & O_EXEC) { 1071 if (flags & O_ACCMODE) 1072 return (EINVAL); 1073 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1074 return (EINVAL); 1075 } else { 1076 flags = FFLAGS(flags); 1077 } 1078 1079 /* 1080 * Allocate the file descriptor, but don't install a descriptor yet. 1081 */ 1082 error = falloc_noinstall(td, &fp); 1083 if (error != 0) 1084 return (error); 1085 /* 1086 * An extra reference on `fp' has been held for us by 1087 * falloc_noinstall(). 1088 */ 1089 /* Set the flags early so the finit in devfs can pick them up. */ 1090 fp->f_flag = flags & FMASK; 1091 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1092 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1093 &rights, td); 1094 td->td_dupfd = -1; /* XXX check for fdopen */ 1095 error = vn_open(&nd, &flags, cmode, fp); 1096 if (error != 0) { 1097 /* 1098 * If the vn_open replaced the method vector, something 1099 * wonderous happened deep below and we just pass it up 1100 * pretending we know what we do. 1101 */ 1102 if (error == ENXIO && fp->f_ops != &badfileops) 1103 goto success; 1104 1105 /* 1106 * Handle special fdopen() case. bleh. 1107 * 1108 * Don't do this for relative (capability) lookups; we don't 1109 * understand exactly what would happen, and we don't think 1110 * that it ever should. 1111 */ 1112 if (nd.ni_strictrelative == 0 && 1113 (error == ENODEV || error == ENXIO) && 1114 td->td_dupfd >= 0) { 1115 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1116 &indx); 1117 if (error == 0) 1118 goto success; 1119 } 1120 1121 goto bad; 1122 } 1123 td->td_dupfd = 0; 1124 NDFREE(&nd, NDF_ONLY_PNBUF); 1125 vp = nd.ni_vp; 1126 1127 /* 1128 * Store the vnode, for any f_type. Typically, the vnode use 1129 * count is decremented by direct call to vn_closefile() for 1130 * files that switched type in the cdevsw fdopen() method. 1131 */ 1132 fp->f_vnode = vp; 1133 /* 1134 * If the file wasn't claimed by devfs bind it to the normal 1135 * vnode operations here. 1136 */ 1137 if (fp->f_ops == &badfileops) { 1138 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1139 fp->f_seqcount = 1; 1140 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1141 DTYPE_VNODE, vp, &vnops); 1142 } 1143 1144 VOP_UNLOCK(vp, 0); 1145 if (flags & O_TRUNC) { 1146 error = fo_truncate(fp, 0, td->td_ucred, td); 1147 if (error != 0) 1148 goto bad; 1149 } 1150 success: 1151 /* 1152 * If we haven't already installed the FD (for dupfdopen), do so now. 1153 */ 1154 if (indx == -1) { 1155 struct filecaps *fcaps; 1156 1157 #ifdef CAPABILITIES 1158 if (nd.ni_strictrelative == 1) 1159 fcaps = &nd.ni_filecaps; 1160 else 1161 #endif 1162 fcaps = NULL; 1163 error = finstall(td, fp, &indx, flags, fcaps); 1164 /* On success finstall() consumes fcaps. */ 1165 if (error != 0) { 1166 filecaps_free(&nd.ni_filecaps); 1167 goto bad; 1168 } 1169 } else { 1170 filecaps_free(&nd.ni_filecaps); 1171 } 1172 1173 /* 1174 * Release our private reference, leaving the one associated with 1175 * the descriptor table intact. 1176 */ 1177 fdrop(fp, td); 1178 td->td_retval[0] = indx; 1179 return (0); 1180 bad: 1181 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1182 fdrop(fp, td); 1183 return (error); 1184 } 1185 1186 #ifdef COMPAT_43 1187 /* 1188 * Create a file. 1189 */ 1190 #ifndef _SYS_SYSPROTO_H_ 1191 struct ocreat_args { 1192 char *path; 1193 int mode; 1194 }; 1195 #endif 1196 int 1197 ocreat(td, uap) 1198 struct thread *td; 1199 register struct ocreat_args /* { 1200 char *path; 1201 int mode; 1202 } */ *uap; 1203 { 1204 1205 return (kern_open(td, uap->path, UIO_USERSPACE, 1206 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1207 } 1208 #endif /* COMPAT_43 */ 1209 1210 /* 1211 * Create a special file. 1212 */ 1213 #ifndef _SYS_SYSPROTO_H_ 1214 struct mknod_args { 1215 char *path; 1216 int mode; 1217 int dev; 1218 }; 1219 #endif 1220 int 1221 sys_mknod(td, uap) 1222 struct thread *td; 1223 register struct mknod_args /* { 1224 char *path; 1225 int mode; 1226 int dev; 1227 } */ *uap; 1228 { 1229 1230 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1231 } 1232 1233 #ifndef _SYS_SYSPROTO_H_ 1234 struct mknodat_args { 1235 int fd; 1236 char *path; 1237 mode_t mode; 1238 dev_t dev; 1239 }; 1240 #endif 1241 int 1242 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1243 { 1244 1245 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1246 uap->dev)); 1247 } 1248 1249 int 1250 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1251 int dev) 1252 { 1253 1254 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1255 } 1256 1257 int 1258 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1259 int mode, int dev) 1260 { 1261 struct vnode *vp; 1262 struct mount *mp; 1263 struct vattr vattr; 1264 struct nameidata nd; 1265 cap_rights_t rights; 1266 int error, whiteout = 0; 1267 1268 AUDIT_ARG_MODE(mode); 1269 AUDIT_ARG_DEV(dev); 1270 switch (mode & S_IFMT) { 1271 case S_IFCHR: 1272 case S_IFBLK: 1273 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1274 break; 1275 case S_IFMT: 1276 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1277 break; 1278 case S_IFWHT: 1279 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1280 break; 1281 case S_IFIFO: 1282 if (dev == 0) 1283 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1284 /* FALLTHROUGH */ 1285 default: 1286 error = EINVAL; 1287 break; 1288 } 1289 if (error != 0) 1290 return (error); 1291 restart: 1292 bwillwrite(); 1293 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1294 pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td); 1295 if ((error = namei(&nd)) != 0) 1296 return (error); 1297 vp = nd.ni_vp; 1298 if (vp != NULL) { 1299 NDFREE(&nd, NDF_ONLY_PNBUF); 1300 if (vp == nd.ni_dvp) 1301 vrele(nd.ni_dvp); 1302 else 1303 vput(nd.ni_dvp); 1304 vrele(vp); 1305 return (EEXIST); 1306 } else { 1307 VATTR_NULL(&vattr); 1308 vattr.va_mode = (mode & ALLPERMS) & 1309 ~td->td_proc->p_fd->fd_cmask; 1310 vattr.va_rdev = dev; 1311 whiteout = 0; 1312 1313 switch (mode & S_IFMT) { 1314 case S_IFMT: /* used by badsect to flag bad sectors */ 1315 vattr.va_type = VBAD; 1316 break; 1317 case S_IFCHR: 1318 vattr.va_type = VCHR; 1319 break; 1320 case S_IFBLK: 1321 vattr.va_type = VBLK; 1322 break; 1323 case S_IFWHT: 1324 whiteout = 1; 1325 break; 1326 default: 1327 panic("kern_mknod: invalid mode"); 1328 } 1329 } 1330 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1331 NDFREE(&nd, NDF_ONLY_PNBUF); 1332 vput(nd.ni_dvp); 1333 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1334 return (error); 1335 goto restart; 1336 } 1337 #ifdef MAC 1338 if (error == 0 && !whiteout) 1339 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1340 &nd.ni_cnd, &vattr); 1341 #endif 1342 if (error == 0) { 1343 if (whiteout) 1344 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1345 else { 1346 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1347 &nd.ni_cnd, &vattr); 1348 if (error == 0) 1349 vput(nd.ni_vp); 1350 } 1351 } 1352 NDFREE(&nd, NDF_ONLY_PNBUF); 1353 vput(nd.ni_dvp); 1354 vn_finished_write(mp); 1355 return (error); 1356 } 1357 1358 /* 1359 * Create a named pipe. 1360 */ 1361 #ifndef _SYS_SYSPROTO_H_ 1362 struct mkfifo_args { 1363 char *path; 1364 int mode; 1365 }; 1366 #endif 1367 int 1368 sys_mkfifo(td, uap) 1369 struct thread *td; 1370 register struct mkfifo_args /* { 1371 char *path; 1372 int mode; 1373 } */ *uap; 1374 { 1375 1376 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1377 } 1378 1379 #ifndef _SYS_SYSPROTO_H_ 1380 struct mkfifoat_args { 1381 int fd; 1382 char *path; 1383 mode_t mode; 1384 }; 1385 #endif 1386 int 1387 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1388 { 1389 1390 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1391 uap->mode)); 1392 } 1393 1394 int 1395 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1396 { 1397 1398 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1399 } 1400 1401 int 1402 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1403 int mode) 1404 { 1405 struct mount *mp; 1406 struct vattr vattr; 1407 struct nameidata nd; 1408 cap_rights_t rights; 1409 int error; 1410 1411 AUDIT_ARG_MODE(mode); 1412 restart: 1413 bwillwrite(); 1414 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1415 pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td); 1416 if ((error = namei(&nd)) != 0) 1417 return (error); 1418 if (nd.ni_vp != NULL) { 1419 NDFREE(&nd, NDF_ONLY_PNBUF); 1420 if (nd.ni_vp == nd.ni_dvp) 1421 vrele(nd.ni_dvp); 1422 else 1423 vput(nd.ni_dvp); 1424 vrele(nd.ni_vp); 1425 return (EEXIST); 1426 } 1427 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1428 NDFREE(&nd, NDF_ONLY_PNBUF); 1429 vput(nd.ni_dvp); 1430 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1431 return (error); 1432 goto restart; 1433 } 1434 VATTR_NULL(&vattr); 1435 vattr.va_type = VFIFO; 1436 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1437 #ifdef MAC 1438 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1439 &vattr); 1440 if (error != 0) 1441 goto out; 1442 #endif 1443 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1444 if (error == 0) 1445 vput(nd.ni_vp); 1446 #ifdef MAC 1447 out: 1448 #endif 1449 vput(nd.ni_dvp); 1450 vn_finished_write(mp); 1451 NDFREE(&nd, NDF_ONLY_PNBUF); 1452 return (error); 1453 } 1454 1455 /* 1456 * Make a hard file link. 1457 */ 1458 #ifndef _SYS_SYSPROTO_H_ 1459 struct link_args { 1460 char *path; 1461 char *link; 1462 }; 1463 #endif 1464 int 1465 sys_link(td, uap) 1466 struct thread *td; 1467 register struct link_args /* { 1468 char *path; 1469 char *link; 1470 } */ *uap; 1471 { 1472 1473 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1474 } 1475 1476 #ifndef _SYS_SYSPROTO_H_ 1477 struct linkat_args { 1478 int fd1; 1479 char *path1; 1480 int fd2; 1481 char *path2; 1482 int flag; 1483 }; 1484 #endif 1485 int 1486 sys_linkat(struct thread *td, struct linkat_args *uap) 1487 { 1488 int flag; 1489 1490 flag = uap->flag; 1491 if (flag & ~AT_SYMLINK_FOLLOW) 1492 return (EINVAL); 1493 1494 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1495 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1496 } 1497 1498 int hardlink_check_uid = 0; 1499 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1500 &hardlink_check_uid, 0, 1501 "Unprivileged processes cannot create hard links to files owned by other " 1502 "users"); 1503 static int hardlink_check_gid = 0; 1504 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1505 &hardlink_check_gid, 0, 1506 "Unprivileged processes cannot create hard links to files owned by other " 1507 "groups"); 1508 1509 static int 1510 can_hardlink(struct vnode *vp, struct ucred *cred) 1511 { 1512 struct vattr va; 1513 int error; 1514 1515 if (!hardlink_check_uid && !hardlink_check_gid) 1516 return (0); 1517 1518 error = VOP_GETATTR(vp, &va, cred); 1519 if (error != 0) 1520 return (error); 1521 1522 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1523 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1524 if (error != 0) 1525 return (error); 1526 } 1527 1528 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1529 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1530 if (error != 0) 1531 return (error); 1532 } 1533 1534 return (0); 1535 } 1536 1537 int 1538 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1539 { 1540 1541 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1542 } 1543 1544 int 1545 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1546 enum uio_seg segflg, int follow) 1547 { 1548 struct vnode *vp; 1549 struct mount *mp; 1550 struct nameidata nd; 1551 cap_rights_t rights; 1552 int error; 1553 1554 bwillwrite(); 1555 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1556 1557 again: 1558 if ((error = namei(&nd)) != 0) 1559 return (error); 1560 NDFREE(&nd, NDF_ONLY_PNBUF); 1561 vp = nd.ni_vp; 1562 if (vp->v_type == VDIR) { 1563 vrele(vp); 1564 return (EPERM); /* POSIX */ 1565 } 1566 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 1567 vrele(vp); 1568 return (error); 1569 } 1570 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, 1571 segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td); 1572 if ((error = namei(&nd)) == 0) { 1573 if (nd.ni_vp != NULL) { 1574 if (nd.ni_dvp == nd.ni_vp) 1575 vrele(nd.ni_dvp); 1576 else 1577 vput(nd.ni_dvp); 1578 vrele(nd.ni_vp); 1579 error = EEXIST; 1580 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1581 /* 1582 * Check for cross-device links. No need to 1583 * recheck vp->v_type, since it cannot change 1584 * for non-doomed vnode. 1585 */ 1586 if (nd.ni_dvp->v_mount != vp->v_mount) 1587 error = EXDEV; 1588 else 1589 error = can_hardlink(vp, td->td_ucred); 1590 if (error == 0) 1591 #ifdef MAC 1592 error = mac_vnode_check_link(td->td_ucred, 1593 nd.ni_dvp, vp, &nd.ni_cnd); 1594 if (error == 0) 1595 #endif 1596 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1597 VOP_UNLOCK(vp, 0); 1598 vput(nd.ni_dvp); 1599 } else { 1600 vput(nd.ni_dvp); 1601 NDFREE(&nd, NDF_ONLY_PNBUF); 1602 vrele(vp); 1603 vn_finished_write(mp); 1604 goto again; 1605 } 1606 NDFREE(&nd, NDF_ONLY_PNBUF); 1607 } 1608 vrele(vp); 1609 vn_finished_write(mp); 1610 return (error); 1611 } 1612 1613 /* 1614 * Make a symbolic link. 1615 */ 1616 #ifndef _SYS_SYSPROTO_H_ 1617 struct symlink_args { 1618 char *path; 1619 char *link; 1620 }; 1621 #endif 1622 int 1623 sys_symlink(td, uap) 1624 struct thread *td; 1625 register struct symlink_args /* { 1626 char *path; 1627 char *link; 1628 } */ *uap; 1629 { 1630 1631 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1632 } 1633 1634 #ifndef _SYS_SYSPROTO_H_ 1635 struct symlinkat_args { 1636 char *path; 1637 int fd; 1638 char *path2; 1639 }; 1640 #endif 1641 int 1642 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1643 { 1644 1645 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1646 UIO_USERSPACE)); 1647 } 1648 1649 int 1650 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1651 { 1652 1653 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1654 } 1655 1656 int 1657 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1658 enum uio_seg segflg) 1659 { 1660 struct mount *mp; 1661 struct vattr vattr; 1662 char *syspath; 1663 struct nameidata nd; 1664 int error; 1665 cap_rights_t rights; 1666 1667 if (segflg == UIO_SYSSPACE) { 1668 syspath = path1; 1669 } else { 1670 syspath = uma_zalloc(namei_zone, M_WAITOK); 1671 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1672 goto out; 1673 } 1674 AUDIT_ARG_TEXT(syspath); 1675 restart: 1676 bwillwrite(); 1677 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1678 segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td); 1679 if ((error = namei(&nd)) != 0) 1680 goto out; 1681 if (nd.ni_vp) { 1682 NDFREE(&nd, NDF_ONLY_PNBUF); 1683 if (nd.ni_vp == nd.ni_dvp) 1684 vrele(nd.ni_dvp); 1685 else 1686 vput(nd.ni_dvp); 1687 vrele(nd.ni_vp); 1688 error = EEXIST; 1689 goto out; 1690 } 1691 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1692 NDFREE(&nd, NDF_ONLY_PNBUF); 1693 vput(nd.ni_dvp); 1694 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1695 goto out; 1696 goto restart; 1697 } 1698 VATTR_NULL(&vattr); 1699 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1700 #ifdef MAC 1701 vattr.va_type = VLNK; 1702 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1703 &vattr); 1704 if (error != 0) 1705 goto out2; 1706 #endif 1707 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1708 if (error == 0) 1709 vput(nd.ni_vp); 1710 #ifdef MAC 1711 out2: 1712 #endif 1713 NDFREE(&nd, NDF_ONLY_PNBUF); 1714 vput(nd.ni_dvp); 1715 vn_finished_write(mp); 1716 out: 1717 if (segflg != UIO_SYSSPACE) 1718 uma_zfree(namei_zone, syspath); 1719 return (error); 1720 } 1721 1722 /* 1723 * Delete a whiteout from the filesystem. 1724 */ 1725 int 1726 sys_undelete(td, uap) 1727 struct thread *td; 1728 register struct undelete_args /* { 1729 char *path; 1730 } */ *uap; 1731 { 1732 struct mount *mp; 1733 struct nameidata nd; 1734 int error; 1735 1736 restart: 1737 bwillwrite(); 1738 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1739 UIO_USERSPACE, uap->path, td); 1740 error = namei(&nd); 1741 if (error != 0) 1742 return (error); 1743 1744 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1745 NDFREE(&nd, NDF_ONLY_PNBUF); 1746 if (nd.ni_vp == nd.ni_dvp) 1747 vrele(nd.ni_dvp); 1748 else 1749 vput(nd.ni_dvp); 1750 if (nd.ni_vp) 1751 vrele(nd.ni_vp); 1752 return (EEXIST); 1753 } 1754 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1755 NDFREE(&nd, NDF_ONLY_PNBUF); 1756 vput(nd.ni_dvp); 1757 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1758 return (error); 1759 goto restart; 1760 } 1761 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1762 NDFREE(&nd, NDF_ONLY_PNBUF); 1763 vput(nd.ni_dvp); 1764 vn_finished_write(mp); 1765 return (error); 1766 } 1767 1768 /* 1769 * Delete a name from the filesystem. 1770 */ 1771 #ifndef _SYS_SYSPROTO_H_ 1772 struct unlink_args { 1773 char *path; 1774 }; 1775 #endif 1776 int 1777 sys_unlink(td, uap) 1778 struct thread *td; 1779 struct unlink_args /* { 1780 char *path; 1781 } */ *uap; 1782 { 1783 1784 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1785 } 1786 1787 #ifndef _SYS_SYSPROTO_H_ 1788 struct unlinkat_args { 1789 int fd; 1790 char *path; 1791 int flag; 1792 }; 1793 #endif 1794 int 1795 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1796 { 1797 int flag = uap->flag; 1798 int fd = uap->fd; 1799 char *path = uap->path; 1800 1801 if (flag & ~AT_REMOVEDIR) 1802 return (EINVAL); 1803 1804 if (flag & AT_REMOVEDIR) 1805 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1806 else 1807 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1808 } 1809 1810 int 1811 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1812 { 1813 1814 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1815 } 1816 1817 int 1818 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1819 ino_t oldinum) 1820 { 1821 struct mount *mp; 1822 struct vnode *vp; 1823 struct nameidata nd; 1824 struct stat sb; 1825 cap_rights_t rights; 1826 int error; 1827 1828 restart: 1829 bwillwrite(); 1830 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1831 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1832 if ((error = namei(&nd)) != 0) 1833 return (error == EINVAL ? EPERM : error); 1834 vp = nd.ni_vp; 1835 if (vp->v_type == VDIR && oldinum == 0) { 1836 error = EPERM; /* POSIX */ 1837 } else if (oldinum != 0 && 1838 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1839 sb.st_ino != oldinum) { 1840 error = EIDRM; /* Identifier removed */ 1841 } else { 1842 /* 1843 * The root of a mounted filesystem cannot be deleted. 1844 * 1845 * XXX: can this only be a VDIR case? 1846 */ 1847 if (vp->v_vflag & VV_ROOT) 1848 error = EBUSY; 1849 } 1850 if (error == 0) { 1851 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1852 NDFREE(&nd, NDF_ONLY_PNBUF); 1853 vput(nd.ni_dvp); 1854 if (vp == nd.ni_dvp) 1855 vrele(vp); 1856 else 1857 vput(vp); 1858 if ((error = vn_start_write(NULL, &mp, 1859 V_XSLEEP | PCATCH)) != 0) 1860 return (error); 1861 goto restart; 1862 } 1863 #ifdef MAC 1864 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1865 &nd.ni_cnd); 1866 if (error != 0) 1867 goto out; 1868 #endif 1869 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1870 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1871 #ifdef MAC 1872 out: 1873 #endif 1874 vn_finished_write(mp); 1875 } 1876 NDFREE(&nd, NDF_ONLY_PNBUF); 1877 vput(nd.ni_dvp); 1878 if (vp == nd.ni_dvp) 1879 vrele(vp); 1880 else 1881 vput(vp); 1882 return (error); 1883 } 1884 1885 /* 1886 * Reposition read/write file offset. 1887 */ 1888 #ifndef _SYS_SYSPROTO_H_ 1889 struct lseek_args { 1890 int fd; 1891 int pad; 1892 off_t offset; 1893 int whence; 1894 }; 1895 #endif 1896 int 1897 sys_lseek(td, uap) 1898 struct thread *td; 1899 register struct lseek_args /* { 1900 int fd; 1901 int pad; 1902 off_t offset; 1903 int whence; 1904 } */ *uap; 1905 { 1906 struct file *fp; 1907 cap_rights_t rights; 1908 int error; 1909 1910 AUDIT_ARG_FD(uap->fd); 1911 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1912 if (error != 0) 1913 return (error); 1914 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1915 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1916 fdrop(fp, td); 1917 return (error); 1918 } 1919 1920 #if defined(COMPAT_43) 1921 /* 1922 * Reposition read/write file offset. 1923 */ 1924 #ifndef _SYS_SYSPROTO_H_ 1925 struct olseek_args { 1926 int fd; 1927 long offset; 1928 int whence; 1929 }; 1930 #endif 1931 int 1932 olseek(td, uap) 1933 struct thread *td; 1934 register struct olseek_args /* { 1935 int fd; 1936 long offset; 1937 int whence; 1938 } */ *uap; 1939 { 1940 struct lseek_args /* { 1941 int fd; 1942 int pad; 1943 off_t offset; 1944 int whence; 1945 } */ nuap; 1946 1947 nuap.fd = uap->fd; 1948 nuap.offset = uap->offset; 1949 nuap.whence = uap->whence; 1950 return (sys_lseek(td, &nuap)); 1951 } 1952 #endif /* COMPAT_43 */ 1953 1954 /* Version with the 'pad' argument */ 1955 int 1956 freebsd6_lseek(td, uap) 1957 struct thread *td; 1958 register struct freebsd6_lseek_args *uap; 1959 { 1960 struct lseek_args ouap; 1961 1962 ouap.fd = uap->fd; 1963 ouap.offset = uap->offset; 1964 ouap.whence = uap->whence; 1965 return (sys_lseek(td, &ouap)); 1966 } 1967 1968 /* 1969 * Check access permissions using passed credentials. 1970 */ 1971 static int 1972 vn_access(vp, user_flags, cred, td) 1973 struct vnode *vp; 1974 int user_flags; 1975 struct ucred *cred; 1976 struct thread *td; 1977 { 1978 accmode_t accmode; 1979 int error; 1980 1981 /* Flags == 0 means only check for existence. */ 1982 error = 0; 1983 if (user_flags) { 1984 accmode = 0; 1985 if (user_flags & R_OK) 1986 accmode |= VREAD; 1987 if (user_flags & W_OK) 1988 accmode |= VWRITE; 1989 if (user_flags & X_OK) 1990 accmode |= VEXEC; 1991 #ifdef MAC 1992 error = mac_vnode_check_access(cred, vp, accmode); 1993 if (error != 0) 1994 return (error); 1995 #endif 1996 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1997 error = VOP_ACCESS(vp, accmode, cred, td); 1998 } 1999 return (error); 2000 } 2001 2002 /* 2003 * Check access permissions using "real" credentials. 2004 */ 2005 #ifndef _SYS_SYSPROTO_H_ 2006 struct access_args { 2007 char *path; 2008 int amode; 2009 }; 2010 #endif 2011 int 2012 sys_access(td, uap) 2013 struct thread *td; 2014 register struct access_args /* { 2015 char *path; 2016 int amode; 2017 } */ *uap; 2018 { 2019 2020 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2021 } 2022 2023 #ifndef _SYS_SYSPROTO_H_ 2024 struct faccessat_args { 2025 int dirfd; 2026 char *path; 2027 int amode; 2028 int flag; 2029 } 2030 #endif 2031 int 2032 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2033 { 2034 2035 if (uap->flag & ~AT_EACCESS) 2036 return (EINVAL); 2037 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2038 uap->amode)); 2039 } 2040 2041 int 2042 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2043 { 2044 2045 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2046 } 2047 2048 int 2049 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2050 int flag, int amode) 2051 { 2052 struct ucred *cred, *tmpcred; 2053 struct vnode *vp; 2054 struct nameidata nd; 2055 cap_rights_t rights; 2056 int error; 2057 2058 /* 2059 * Create and modify a temporary credential instead of one that 2060 * is potentially shared. 2061 */ 2062 if (!(flag & AT_EACCESS)) { 2063 cred = td->td_ucred; 2064 tmpcred = crdup(cred); 2065 tmpcred->cr_uid = cred->cr_ruid; 2066 tmpcred->cr_groups[0] = cred->cr_rgid; 2067 td->td_ucred = tmpcred; 2068 } else 2069 cred = tmpcred = td->td_ucred; 2070 AUDIT_ARG_VALUE(amode); 2071 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2072 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2073 td); 2074 if ((error = namei(&nd)) != 0) 2075 goto out1; 2076 vp = nd.ni_vp; 2077 2078 error = vn_access(vp, amode, tmpcred, td); 2079 NDFREE(&nd, NDF_ONLY_PNBUF); 2080 vput(vp); 2081 out1: 2082 if (!(flag & AT_EACCESS)) { 2083 td->td_ucred = cred; 2084 crfree(tmpcred); 2085 } 2086 return (error); 2087 } 2088 2089 /* 2090 * Check access permissions using "effective" credentials. 2091 */ 2092 #ifndef _SYS_SYSPROTO_H_ 2093 struct eaccess_args { 2094 char *path; 2095 int amode; 2096 }; 2097 #endif 2098 int 2099 sys_eaccess(td, uap) 2100 struct thread *td; 2101 register struct eaccess_args /* { 2102 char *path; 2103 int amode; 2104 } */ *uap; 2105 { 2106 2107 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2108 } 2109 2110 int 2111 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2112 { 2113 2114 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2115 } 2116 2117 #if defined(COMPAT_43) 2118 /* 2119 * Get file status; this version follows links. 2120 */ 2121 #ifndef _SYS_SYSPROTO_H_ 2122 struct ostat_args { 2123 char *path; 2124 struct ostat *ub; 2125 }; 2126 #endif 2127 int 2128 ostat(td, uap) 2129 struct thread *td; 2130 register struct ostat_args /* { 2131 char *path; 2132 struct ostat *ub; 2133 } */ *uap; 2134 { 2135 struct stat sb; 2136 struct ostat osb; 2137 int error; 2138 2139 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2140 if (error != 0) 2141 return (error); 2142 cvtstat(&sb, &osb); 2143 return (copyout(&osb, uap->ub, sizeof (osb))); 2144 } 2145 2146 /* 2147 * Get file status; this version does not follow links. 2148 */ 2149 #ifndef _SYS_SYSPROTO_H_ 2150 struct olstat_args { 2151 char *path; 2152 struct ostat *ub; 2153 }; 2154 #endif 2155 int 2156 olstat(td, uap) 2157 struct thread *td; 2158 register struct olstat_args /* { 2159 char *path; 2160 struct ostat *ub; 2161 } */ *uap; 2162 { 2163 struct stat sb; 2164 struct ostat osb; 2165 int error; 2166 2167 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2168 if (error != 0) 2169 return (error); 2170 cvtstat(&sb, &osb); 2171 return (copyout(&osb, uap->ub, sizeof (osb))); 2172 } 2173 2174 /* 2175 * Convert from an old to a new stat structure. 2176 */ 2177 void 2178 cvtstat(st, ost) 2179 struct stat *st; 2180 struct ostat *ost; 2181 { 2182 2183 ost->st_dev = st->st_dev; 2184 ost->st_ino = st->st_ino; 2185 ost->st_mode = st->st_mode; 2186 ost->st_nlink = st->st_nlink; 2187 ost->st_uid = st->st_uid; 2188 ost->st_gid = st->st_gid; 2189 ost->st_rdev = st->st_rdev; 2190 if (st->st_size < (quad_t)1 << 32) 2191 ost->st_size = st->st_size; 2192 else 2193 ost->st_size = -2; 2194 ost->st_atim = st->st_atim; 2195 ost->st_mtim = st->st_mtim; 2196 ost->st_ctim = st->st_ctim; 2197 ost->st_blksize = st->st_blksize; 2198 ost->st_blocks = st->st_blocks; 2199 ost->st_flags = st->st_flags; 2200 ost->st_gen = st->st_gen; 2201 } 2202 #endif /* COMPAT_43 */ 2203 2204 /* 2205 * Get file status; this version follows links. 2206 */ 2207 #ifndef _SYS_SYSPROTO_H_ 2208 struct stat_args { 2209 char *path; 2210 struct stat *ub; 2211 }; 2212 #endif 2213 int 2214 sys_stat(td, uap) 2215 struct thread *td; 2216 register struct stat_args /* { 2217 char *path; 2218 struct stat *ub; 2219 } */ *uap; 2220 { 2221 struct stat sb; 2222 int error; 2223 2224 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2225 if (error == 0) 2226 error = copyout(&sb, uap->ub, sizeof (sb)); 2227 return (error); 2228 } 2229 2230 #ifndef _SYS_SYSPROTO_H_ 2231 struct fstatat_args { 2232 int fd; 2233 char *path; 2234 struct stat *buf; 2235 int flag; 2236 } 2237 #endif 2238 int 2239 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2240 { 2241 struct stat sb; 2242 int error; 2243 2244 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2245 UIO_USERSPACE, &sb); 2246 if (error == 0) 2247 error = copyout(&sb, uap->buf, sizeof (sb)); 2248 return (error); 2249 } 2250 2251 int 2252 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2253 { 2254 2255 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2256 } 2257 2258 int 2259 kern_statat(struct thread *td, int flag, int fd, char *path, 2260 enum uio_seg pathseg, struct stat *sbp) 2261 { 2262 2263 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2264 } 2265 2266 int 2267 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2268 enum uio_seg pathseg, struct stat *sbp, 2269 void (*hook)(struct vnode *vp, struct stat *sbp)) 2270 { 2271 struct nameidata nd; 2272 struct stat sb; 2273 cap_rights_t rights; 2274 int error; 2275 2276 if (flag & ~AT_SYMLINK_NOFOLLOW) 2277 return (EINVAL); 2278 2279 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2280 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2281 cap_rights_init(&rights, CAP_FSTAT), td); 2282 2283 if ((error = namei(&nd)) != 0) 2284 return (error); 2285 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2286 if (error == 0) { 2287 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2288 if (S_ISREG(sb.st_mode)) 2289 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2290 if (__predict_false(hook != NULL)) 2291 hook(nd.ni_vp, &sb); 2292 } 2293 NDFREE(&nd, NDF_ONLY_PNBUF); 2294 vput(nd.ni_vp); 2295 if (error != 0) 2296 return (error); 2297 *sbp = sb; 2298 #ifdef KTRACE 2299 if (KTRPOINT(td, KTR_STRUCT)) 2300 ktrstat(&sb); 2301 #endif 2302 return (0); 2303 } 2304 2305 /* 2306 * Get file status; this version does not follow links. 2307 */ 2308 #ifndef _SYS_SYSPROTO_H_ 2309 struct lstat_args { 2310 char *path; 2311 struct stat *ub; 2312 }; 2313 #endif 2314 int 2315 sys_lstat(td, uap) 2316 struct thread *td; 2317 register struct lstat_args /* { 2318 char *path; 2319 struct stat *ub; 2320 } */ *uap; 2321 { 2322 struct stat sb; 2323 int error; 2324 2325 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2326 if (error == 0) 2327 error = copyout(&sb, uap->ub, sizeof (sb)); 2328 return (error); 2329 } 2330 2331 int 2332 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2333 { 2334 2335 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2336 sbp)); 2337 } 2338 2339 /* 2340 * Implementation of the NetBSD [l]stat() functions. 2341 */ 2342 void 2343 cvtnstat(sb, nsb) 2344 struct stat *sb; 2345 struct nstat *nsb; 2346 { 2347 2348 bzero(nsb, sizeof *nsb); 2349 nsb->st_dev = sb->st_dev; 2350 nsb->st_ino = sb->st_ino; 2351 nsb->st_mode = sb->st_mode; 2352 nsb->st_nlink = sb->st_nlink; 2353 nsb->st_uid = sb->st_uid; 2354 nsb->st_gid = sb->st_gid; 2355 nsb->st_rdev = sb->st_rdev; 2356 nsb->st_atim = sb->st_atim; 2357 nsb->st_mtim = sb->st_mtim; 2358 nsb->st_ctim = sb->st_ctim; 2359 nsb->st_size = sb->st_size; 2360 nsb->st_blocks = sb->st_blocks; 2361 nsb->st_blksize = sb->st_blksize; 2362 nsb->st_flags = sb->st_flags; 2363 nsb->st_gen = sb->st_gen; 2364 nsb->st_birthtim = sb->st_birthtim; 2365 } 2366 2367 #ifndef _SYS_SYSPROTO_H_ 2368 struct nstat_args { 2369 char *path; 2370 struct nstat *ub; 2371 }; 2372 #endif 2373 int 2374 sys_nstat(td, uap) 2375 struct thread *td; 2376 register struct nstat_args /* { 2377 char *path; 2378 struct nstat *ub; 2379 } */ *uap; 2380 { 2381 struct stat sb; 2382 struct nstat nsb; 2383 int error; 2384 2385 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2386 if (error != 0) 2387 return (error); 2388 cvtnstat(&sb, &nsb); 2389 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2390 } 2391 2392 /* 2393 * NetBSD lstat. Get file status; this version does not follow links. 2394 */ 2395 #ifndef _SYS_SYSPROTO_H_ 2396 struct lstat_args { 2397 char *path; 2398 struct stat *ub; 2399 }; 2400 #endif 2401 int 2402 sys_nlstat(td, uap) 2403 struct thread *td; 2404 register struct nlstat_args /* { 2405 char *path; 2406 struct nstat *ub; 2407 } */ *uap; 2408 { 2409 struct stat sb; 2410 struct nstat nsb; 2411 int error; 2412 2413 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2414 if (error != 0) 2415 return (error); 2416 cvtnstat(&sb, &nsb); 2417 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2418 } 2419 2420 /* 2421 * Get configurable pathname variables. 2422 */ 2423 #ifndef _SYS_SYSPROTO_H_ 2424 struct pathconf_args { 2425 char *path; 2426 int name; 2427 }; 2428 #endif 2429 int 2430 sys_pathconf(td, uap) 2431 struct thread *td; 2432 register struct pathconf_args /* { 2433 char *path; 2434 int name; 2435 } */ *uap; 2436 { 2437 2438 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2439 } 2440 2441 #ifndef _SYS_SYSPROTO_H_ 2442 struct lpathconf_args { 2443 char *path; 2444 int name; 2445 }; 2446 #endif 2447 int 2448 sys_lpathconf(td, uap) 2449 struct thread *td; 2450 register struct lpathconf_args /* { 2451 char *path; 2452 int name; 2453 } */ *uap; 2454 { 2455 2456 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2457 NOFOLLOW)); 2458 } 2459 2460 int 2461 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2462 u_long flags) 2463 { 2464 struct nameidata nd; 2465 int error; 2466 2467 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2468 pathseg, path, td); 2469 if ((error = namei(&nd)) != 0) 2470 return (error); 2471 NDFREE(&nd, NDF_ONLY_PNBUF); 2472 2473 /* If asynchronous I/O is available, it works for all files. */ 2474 if (name == _PC_ASYNC_IO) 2475 td->td_retval[0] = async_io_version; 2476 else 2477 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2478 vput(nd.ni_vp); 2479 return (error); 2480 } 2481 2482 /* 2483 * Return target name of a symbolic link. 2484 */ 2485 #ifndef _SYS_SYSPROTO_H_ 2486 struct readlink_args { 2487 char *path; 2488 char *buf; 2489 size_t count; 2490 }; 2491 #endif 2492 int 2493 sys_readlink(td, uap) 2494 struct thread *td; 2495 register struct readlink_args /* { 2496 char *path; 2497 char *buf; 2498 size_t count; 2499 } */ *uap; 2500 { 2501 2502 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2503 UIO_USERSPACE, uap->count)); 2504 } 2505 #ifndef _SYS_SYSPROTO_H_ 2506 struct readlinkat_args { 2507 int fd; 2508 char *path; 2509 char *buf; 2510 size_t bufsize; 2511 }; 2512 #endif 2513 int 2514 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2515 { 2516 2517 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2518 uap->buf, UIO_USERSPACE, uap->bufsize)); 2519 } 2520 2521 int 2522 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2523 enum uio_seg bufseg, size_t count) 2524 { 2525 2526 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2527 count)); 2528 } 2529 2530 int 2531 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2532 char *buf, enum uio_seg bufseg, size_t count) 2533 { 2534 struct vnode *vp; 2535 struct iovec aiov; 2536 struct uio auio; 2537 struct nameidata nd; 2538 int error; 2539 2540 if (count > IOSIZE_MAX) 2541 return (EINVAL); 2542 2543 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2544 pathseg, path, fd, td); 2545 2546 if ((error = namei(&nd)) != 0) 2547 return (error); 2548 NDFREE(&nd, NDF_ONLY_PNBUF); 2549 vp = nd.ni_vp; 2550 #ifdef MAC 2551 error = mac_vnode_check_readlink(td->td_ucred, vp); 2552 if (error != 0) { 2553 vput(vp); 2554 return (error); 2555 } 2556 #endif 2557 if (vp->v_type != VLNK) 2558 error = EINVAL; 2559 else { 2560 aiov.iov_base = buf; 2561 aiov.iov_len = count; 2562 auio.uio_iov = &aiov; 2563 auio.uio_iovcnt = 1; 2564 auio.uio_offset = 0; 2565 auio.uio_rw = UIO_READ; 2566 auio.uio_segflg = bufseg; 2567 auio.uio_td = td; 2568 auio.uio_resid = count; 2569 error = VOP_READLINK(vp, &auio, td->td_ucred); 2570 td->td_retval[0] = count - auio.uio_resid; 2571 } 2572 vput(vp); 2573 return (error); 2574 } 2575 2576 /* 2577 * Common implementation code for chflags() and fchflags(). 2578 */ 2579 static int 2580 setfflags(td, vp, flags) 2581 struct thread *td; 2582 struct vnode *vp; 2583 u_long flags; 2584 { 2585 struct mount *mp; 2586 struct vattr vattr; 2587 int error; 2588 2589 /* We can't support the value matching VNOVAL. */ 2590 if (flags == VNOVAL) 2591 return (EOPNOTSUPP); 2592 2593 /* 2594 * Prevent non-root users from setting flags on devices. When 2595 * a device is reused, users can retain ownership of the device 2596 * if they are allowed to set flags and programs assume that 2597 * chown can't fail when done as root. 2598 */ 2599 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2600 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2601 if (error != 0) 2602 return (error); 2603 } 2604 2605 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2606 return (error); 2607 VATTR_NULL(&vattr); 2608 vattr.va_flags = flags; 2609 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2610 #ifdef MAC 2611 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2612 if (error == 0) 2613 #endif 2614 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2615 VOP_UNLOCK(vp, 0); 2616 vn_finished_write(mp); 2617 return (error); 2618 } 2619 2620 /* 2621 * Change flags of a file given a path name. 2622 */ 2623 #ifndef _SYS_SYSPROTO_H_ 2624 struct chflags_args { 2625 const char *path; 2626 u_long flags; 2627 }; 2628 #endif 2629 int 2630 sys_chflags(td, uap) 2631 struct thread *td; 2632 register struct chflags_args /* { 2633 const char *path; 2634 u_long flags; 2635 } */ *uap; 2636 { 2637 2638 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2639 } 2640 2641 #ifndef _SYS_SYSPROTO_H_ 2642 struct chflagsat_args { 2643 int fd; 2644 const char *path; 2645 u_long flags; 2646 int atflag; 2647 } 2648 #endif 2649 int 2650 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2651 { 2652 int fd = uap->fd; 2653 const char *path = uap->path; 2654 u_long flags = uap->flags; 2655 int atflag = uap->atflag; 2656 2657 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2658 return (EINVAL); 2659 2660 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2661 } 2662 2663 static int 2664 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2665 u_long flags) 2666 { 2667 2668 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2669 } 2670 2671 /* 2672 * Same as chflags() but doesn't follow symlinks. 2673 */ 2674 int 2675 sys_lchflags(td, uap) 2676 struct thread *td; 2677 register struct lchflags_args /* { 2678 const char *path; 2679 u_long flags; 2680 } */ *uap; 2681 { 2682 2683 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2684 uap->flags, AT_SYMLINK_NOFOLLOW)); 2685 } 2686 2687 static int 2688 kern_chflagsat(struct thread *td, int fd, const char *path, 2689 enum uio_seg pathseg, u_long flags, int atflag) 2690 { 2691 struct nameidata nd; 2692 cap_rights_t rights; 2693 int error, follow; 2694 2695 AUDIT_ARG_FFLAGS(flags); 2696 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2697 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2698 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2699 if ((error = namei(&nd)) != 0) 2700 return (error); 2701 NDFREE(&nd, NDF_ONLY_PNBUF); 2702 error = setfflags(td, nd.ni_vp, flags); 2703 vrele(nd.ni_vp); 2704 return (error); 2705 } 2706 2707 /* 2708 * Change flags of a file given a file descriptor. 2709 */ 2710 #ifndef _SYS_SYSPROTO_H_ 2711 struct fchflags_args { 2712 int fd; 2713 u_long flags; 2714 }; 2715 #endif 2716 int 2717 sys_fchflags(td, uap) 2718 struct thread *td; 2719 register struct fchflags_args /* { 2720 int fd; 2721 u_long flags; 2722 } */ *uap; 2723 { 2724 struct file *fp; 2725 cap_rights_t rights; 2726 int error; 2727 2728 AUDIT_ARG_FD(uap->fd); 2729 AUDIT_ARG_FFLAGS(uap->flags); 2730 error = getvnode(td->td_proc->p_fd, uap->fd, 2731 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2732 if (error != 0) 2733 return (error); 2734 #ifdef AUDIT 2735 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2736 AUDIT_ARG_VNODE1(fp->f_vnode); 2737 VOP_UNLOCK(fp->f_vnode, 0); 2738 #endif 2739 error = setfflags(td, fp->f_vnode, uap->flags); 2740 fdrop(fp, td); 2741 return (error); 2742 } 2743 2744 /* 2745 * Common implementation code for chmod(), lchmod() and fchmod(). 2746 */ 2747 int 2748 setfmode(td, cred, vp, mode) 2749 struct thread *td; 2750 struct ucred *cred; 2751 struct vnode *vp; 2752 int mode; 2753 { 2754 struct mount *mp; 2755 struct vattr vattr; 2756 int error; 2757 2758 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2759 return (error); 2760 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2761 VATTR_NULL(&vattr); 2762 vattr.va_mode = mode & ALLPERMS; 2763 #ifdef MAC 2764 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2765 if (error == 0) 2766 #endif 2767 error = VOP_SETATTR(vp, &vattr, cred); 2768 VOP_UNLOCK(vp, 0); 2769 vn_finished_write(mp); 2770 return (error); 2771 } 2772 2773 /* 2774 * Change mode of a file given path name. 2775 */ 2776 #ifndef _SYS_SYSPROTO_H_ 2777 struct chmod_args { 2778 char *path; 2779 int mode; 2780 }; 2781 #endif 2782 int 2783 sys_chmod(td, uap) 2784 struct thread *td; 2785 register struct chmod_args /* { 2786 char *path; 2787 int mode; 2788 } */ *uap; 2789 { 2790 2791 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2792 } 2793 2794 #ifndef _SYS_SYSPROTO_H_ 2795 struct fchmodat_args { 2796 int dirfd; 2797 char *path; 2798 mode_t mode; 2799 int flag; 2800 } 2801 #endif 2802 int 2803 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2804 { 2805 int flag = uap->flag; 2806 int fd = uap->fd; 2807 char *path = uap->path; 2808 mode_t mode = uap->mode; 2809 2810 if (flag & ~AT_SYMLINK_NOFOLLOW) 2811 return (EINVAL); 2812 2813 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2814 } 2815 2816 int 2817 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2818 { 2819 2820 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2821 } 2822 2823 /* 2824 * Change mode of a file given path name (don't follow links.) 2825 */ 2826 #ifndef _SYS_SYSPROTO_H_ 2827 struct lchmod_args { 2828 char *path; 2829 int mode; 2830 }; 2831 #endif 2832 int 2833 sys_lchmod(td, uap) 2834 struct thread *td; 2835 register struct lchmod_args /* { 2836 char *path; 2837 int mode; 2838 } */ *uap; 2839 { 2840 2841 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2842 uap->mode, AT_SYMLINK_NOFOLLOW)); 2843 } 2844 2845 int 2846 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2847 mode_t mode, int flag) 2848 { 2849 struct nameidata nd; 2850 cap_rights_t rights; 2851 int error, follow; 2852 2853 AUDIT_ARG_MODE(mode); 2854 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2855 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2856 cap_rights_init(&rights, CAP_FCHMOD), td); 2857 if ((error = namei(&nd)) != 0) 2858 return (error); 2859 NDFREE(&nd, NDF_ONLY_PNBUF); 2860 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2861 vrele(nd.ni_vp); 2862 return (error); 2863 } 2864 2865 /* 2866 * Change mode of a file given a file descriptor. 2867 */ 2868 #ifndef _SYS_SYSPROTO_H_ 2869 struct fchmod_args { 2870 int fd; 2871 int mode; 2872 }; 2873 #endif 2874 int 2875 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2876 { 2877 struct file *fp; 2878 cap_rights_t rights; 2879 int error; 2880 2881 AUDIT_ARG_FD(uap->fd); 2882 AUDIT_ARG_MODE(uap->mode); 2883 2884 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2885 if (error != 0) 2886 return (error); 2887 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2888 fdrop(fp, td); 2889 return (error); 2890 } 2891 2892 /* 2893 * Common implementation for chown(), lchown(), and fchown() 2894 */ 2895 int 2896 setfown(td, cred, vp, uid, gid) 2897 struct thread *td; 2898 struct ucred *cred; 2899 struct vnode *vp; 2900 uid_t uid; 2901 gid_t gid; 2902 { 2903 struct mount *mp; 2904 struct vattr vattr; 2905 int error; 2906 2907 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2908 return (error); 2909 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2910 VATTR_NULL(&vattr); 2911 vattr.va_uid = uid; 2912 vattr.va_gid = gid; 2913 #ifdef MAC 2914 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2915 vattr.va_gid); 2916 if (error == 0) 2917 #endif 2918 error = VOP_SETATTR(vp, &vattr, cred); 2919 VOP_UNLOCK(vp, 0); 2920 vn_finished_write(mp); 2921 return (error); 2922 } 2923 2924 /* 2925 * Set ownership given a path name. 2926 */ 2927 #ifndef _SYS_SYSPROTO_H_ 2928 struct chown_args { 2929 char *path; 2930 int uid; 2931 int gid; 2932 }; 2933 #endif 2934 int 2935 sys_chown(td, uap) 2936 struct thread *td; 2937 register struct chown_args /* { 2938 char *path; 2939 int uid; 2940 int gid; 2941 } */ *uap; 2942 { 2943 2944 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2945 } 2946 2947 #ifndef _SYS_SYSPROTO_H_ 2948 struct fchownat_args { 2949 int fd; 2950 const char * path; 2951 uid_t uid; 2952 gid_t gid; 2953 int flag; 2954 }; 2955 #endif 2956 int 2957 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2958 { 2959 int flag; 2960 2961 flag = uap->flag; 2962 if (flag & ~AT_SYMLINK_NOFOLLOW) 2963 return (EINVAL); 2964 2965 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2966 uap->gid, uap->flag)); 2967 } 2968 2969 int 2970 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2971 int gid) 2972 { 2973 2974 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2975 } 2976 2977 int 2978 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2979 int uid, int gid, int flag) 2980 { 2981 struct nameidata nd; 2982 cap_rights_t rights; 2983 int error, follow; 2984 2985 AUDIT_ARG_OWNER(uid, gid); 2986 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2987 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2988 cap_rights_init(&rights, CAP_FCHOWN), td); 2989 2990 if ((error = namei(&nd)) != 0) 2991 return (error); 2992 NDFREE(&nd, NDF_ONLY_PNBUF); 2993 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2994 vrele(nd.ni_vp); 2995 return (error); 2996 } 2997 2998 /* 2999 * Set ownership given a path name, do not cross symlinks. 3000 */ 3001 #ifndef _SYS_SYSPROTO_H_ 3002 struct lchown_args { 3003 char *path; 3004 int uid; 3005 int gid; 3006 }; 3007 #endif 3008 int 3009 sys_lchown(td, uap) 3010 struct thread *td; 3011 register struct lchown_args /* { 3012 char *path; 3013 int uid; 3014 int gid; 3015 } */ *uap; 3016 { 3017 3018 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3019 } 3020 3021 int 3022 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3023 int gid) 3024 { 3025 3026 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3027 AT_SYMLINK_NOFOLLOW)); 3028 } 3029 3030 /* 3031 * Set ownership given a file descriptor. 3032 */ 3033 #ifndef _SYS_SYSPROTO_H_ 3034 struct fchown_args { 3035 int fd; 3036 int uid; 3037 int gid; 3038 }; 3039 #endif 3040 int 3041 sys_fchown(td, uap) 3042 struct thread *td; 3043 register struct fchown_args /* { 3044 int fd; 3045 int uid; 3046 int gid; 3047 } */ *uap; 3048 { 3049 struct file *fp; 3050 cap_rights_t rights; 3051 int error; 3052 3053 AUDIT_ARG_FD(uap->fd); 3054 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3055 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 3056 if (error != 0) 3057 return (error); 3058 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3059 fdrop(fp, td); 3060 return (error); 3061 } 3062 3063 /* 3064 * Common implementation code for utimes(), lutimes(), and futimes(). 3065 */ 3066 static int 3067 getutimes(usrtvp, tvpseg, tsp) 3068 const struct timeval *usrtvp; 3069 enum uio_seg tvpseg; 3070 struct timespec *tsp; 3071 { 3072 struct timeval tv[2]; 3073 const struct timeval *tvp; 3074 int error; 3075 3076 if (usrtvp == NULL) { 3077 vfs_timestamp(&tsp[0]); 3078 tsp[1] = tsp[0]; 3079 } else { 3080 if (tvpseg == UIO_SYSSPACE) { 3081 tvp = usrtvp; 3082 } else { 3083 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3084 return (error); 3085 tvp = tv; 3086 } 3087 3088 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3089 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3090 return (EINVAL); 3091 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3092 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3093 } 3094 return (0); 3095 } 3096 3097 /* 3098 * Common implementation code for utimes(), lutimes(), and futimes(). 3099 */ 3100 static int 3101 setutimes(td, vp, ts, numtimes, nullflag) 3102 struct thread *td; 3103 struct vnode *vp; 3104 const struct timespec *ts; 3105 int numtimes; 3106 int nullflag; 3107 { 3108 struct mount *mp; 3109 struct vattr vattr; 3110 int error, setbirthtime; 3111 3112 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3113 return (error); 3114 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3115 setbirthtime = 0; 3116 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3117 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3118 setbirthtime = 1; 3119 VATTR_NULL(&vattr); 3120 vattr.va_atime = ts[0]; 3121 vattr.va_mtime = ts[1]; 3122 if (setbirthtime) 3123 vattr.va_birthtime = ts[1]; 3124 if (numtimes > 2) 3125 vattr.va_birthtime = ts[2]; 3126 if (nullflag) 3127 vattr.va_vaflags |= VA_UTIMES_NULL; 3128 #ifdef MAC 3129 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3130 vattr.va_mtime); 3131 #endif 3132 if (error == 0) 3133 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3134 VOP_UNLOCK(vp, 0); 3135 vn_finished_write(mp); 3136 return (error); 3137 } 3138 3139 /* 3140 * Set the access and modification times of a file. 3141 */ 3142 #ifndef _SYS_SYSPROTO_H_ 3143 struct utimes_args { 3144 char *path; 3145 struct timeval *tptr; 3146 }; 3147 #endif 3148 int 3149 sys_utimes(td, uap) 3150 struct thread *td; 3151 register struct utimes_args /* { 3152 char *path; 3153 struct timeval *tptr; 3154 } */ *uap; 3155 { 3156 3157 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3158 UIO_USERSPACE)); 3159 } 3160 3161 #ifndef _SYS_SYSPROTO_H_ 3162 struct futimesat_args { 3163 int fd; 3164 const char * path; 3165 const struct timeval * times; 3166 }; 3167 #endif 3168 int 3169 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3170 { 3171 3172 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3173 uap->times, UIO_USERSPACE)); 3174 } 3175 3176 int 3177 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3178 struct timeval *tptr, enum uio_seg tptrseg) 3179 { 3180 3181 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3182 } 3183 3184 int 3185 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3186 struct timeval *tptr, enum uio_seg tptrseg) 3187 { 3188 struct nameidata nd; 3189 struct timespec ts[2]; 3190 cap_rights_t rights; 3191 int error; 3192 3193 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3194 return (error); 3195 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3196 cap_rights_init(&rights, CAP_FUTIMES), td); 3197 3198 if ((error = namei(&nd)) != 0) 3199 return (error); 3200 NDFREE(&nd, NDF_ONLY_PNBUF); 3201 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3202 vrele(nd.ni_vp); 3203 return (error); 3204 } 3205 3206 /* 3207 * Set the access and modification times of a file. 3208 */ 3209 #ifndef _SYS_SYSPROTO_H_ 3210 struct lutimes_args { 3211 char *path; 3212 struct timeval *tptr; 3213 }; 3214 #endif 3215 int 3216 sys_lutimes(td, uap) 3217 struct thread *td; 3218 register struct lutimes_args /* { 3219 char *path; 3220 struct timeval *tptr; 3221 } */ *uap; 3222 { 3223 3224 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3225 UIO_USERSPACE)); 3226 } 3227 3228 int 3229 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3230 struct timeval *tptr, enum uio_seg tptrseg) 3231 { 3232 struct timespec ts[2]; 3233 struct nameidata nd; 3234 int error; 3235 3236 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3237 return (error); 3238 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3239 if ((error = namei(&nd)) != 0) 3240 return (error); 3241 NDFREE(&nd, NDF_ONLY_PNBUF); 3242 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3243 vrele(nd.ni_vp); 3244 return (error); 3245 } 3246 3247 /* 3248 * Set the access and modification times of a file. 3249 */ 3250 #ifndef _SYS_SYSPROTO_H_ 3251 struct futimes_args { 3252 int fd; 3253 struct timeval *tptr; 3254 }; 3255 #endif 3256 int 3257 sys_futimes(td, uap) 3258 struct thread *td; 3259 register struct futimes_args /* { 3260 int fd; 3261 struct timeval *tptr; 3262 } */ *uap; 3263 { 3264 3265 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3266 } 3267 3268 int 3269 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3270 enum uio_seg tptrseg) 3271 { 3272 struct timespec ts[2]; 3273 struct file *fp; 3274 cap_rights_t rights; 3275 int error; 3276 3277 AUDIT_ARG_FD(fd); 3278 error = getutimes(tptr, tptrseg, ts); 3279 if (error != 0) 3280 return (error); 3281 error = getvnode(td->td_proc->p_fd, fd, 3282 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3283 if (error != 0) 3284 return (error); 3285 #ifdef AUDIT 3286 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3287 AUDIT_ARG_VNODE1(fp->f_vnode); 3288 VOP_UNLOCK(fp->f_vnode, 0); 3289 #endif 3290 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3291 fdrop(fp, td); 3292 return (error); 3293 } 3294 3295 /* 3296 * Truncate a file given its path name. 3297 */ 3298 #ifndef _SYS_SYSPROTO_H_ 3299 struct truncate_args { 3300 char *path; 3301 int pad; 3302 off_t length; 3303 }; 3304 #endif 3305 int 3306 sys_truncate(td, uap) 3307 struct thread *td; 3308 register struct truncate_args /* { 3309 char *path; 3310 int pad; 3311 off_t length; 3312 } */ *uap; 3313 { 3314 3315 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3316 } 3317 3318 int 3319 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3320 { 3321 struct mount *mp; 3322 struct vnode *vp; 3323 void *rl_cookie; 3324 struct vattr vattr; 3325 struct nameidata nd; 3326 int error; 3327 3328 if (length < 0) 3329 return(EINVAL); 3330 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3331 if ((error = namei(&nd)) != 0) 3332 return (error); 3333 vp = nd.ni_vp; 3334 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3335 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3336 vn_rangelock_unlock(vp, rl_cookie); 3337 vrele(vp); 3338 return (error); 3339 } 3340 NDFREE(&nd, NDF_ONLY_PNBUF); 3341 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3342 if (vp->v_type == VDIR) 3343 error = EISDIR; 3344 #ifdef MAC 3345 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3346 } 3347 #endif 3348 else if ((error = vn_writechk(vp)) == 0 && 3349 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3350 VATTR_NULL(&vattr); 3351 vattr.va_size = length; 3352 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3353 } 3354 VOP_UNLOCK(vp, 0); 3355 vn_finished_write(mp); 3356 vn_rangelock_unlock(vp, rl_cookie); 3357 vrele(vp); 3358 return (error); 3359 } 3360 3361 #if defined(COMPAT_43) 3362 /* 3363 * Truncate a file given its path name. 3364 */ 3365 #ifndef _SYS_SYSPROTO_H_ 3366 struct otruncate_args { 3367 char *path; 3368 long length; 3369 }; 3370 #endif 3371 int 3372 otruncate(td, uap) 3373 struct thread *td; 3374 register struct otruncate_args /* { 3375 char *path; 3376 long length; 3377 } */ *uap; 3378 { 3379 struct truncate_args /* { 3380 char *path; 3381 int pad; 3382 off_t length; 3383 } */ nuap; 3384 3385 nuap.path = uap->path; 3386 nuap.length = uap->length; 3387 return (sys_truncate(td, &nuap)); 3388 } 3389 #endif /* COMPAT_43 */ 3390 3391 /* Versions with the pad argument */ 3392 int 3393 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3394 { 3395 struct truncate_args ouap; 3396 3397 ouap.path = uap->path; 3398 ouap.length = uap->length; 3399 return (sys_truncate(td, &ouap)); 3400 } 3401 3402 int 3403 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3404 { 3405 struct ftruncate_args ouap; 3406 3407 ouap.fd = uap->fd; 3408 ouap.length = uap->length; 3409 return (sys_ftruncate(td, &ouap)); 3410 } 3411 3412 /* 3413 * Sync an open file. 3414 */ 3415 #ifndef _SYS_SYSPROTO_H_ 3416 struct fsync_args { 3417 int fd; 3418 }; 3419 #endif 3420 int 3421 sys_fsync(td, uap) 3422 struct thread *td; 3423 struct fsync_args /* { 3424 int fd; 3425 } */ *uap; 3426 { 3427 struct vnode *vp; 3428 struct mount *mp; 3429 struct file *fp; 3430 cap_rights_t rights; 3431 int error, lock_flags; 3432 3433 AUDIT_ARG_FD(uap->fd); 3434 error = getvnode(td->td_proc->p_fd, uap->fd, 3435 cap_rights_init(&rights, CAP_FSYNC), &fp); 3436 if (error != 0) 3437 return (error); 3438 vp = fp->f_vnode; 3439 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3440 if (error != 0) 3441 goto drop; 3442 if (MNT_SHARED_WRITES(mp) || 3443 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3444 lock_flags = LK_SHARED; 3445 } else { 3446 lock_flags = LK_EXCLUSIVE; 3447 } 3448 vn_lock(vp, lock_flags | LK_RETRY); 3449 AUDIT_ARG_VNODE1(vp); 3450 if (vp->v_object != NULL) { 3451 VM_OBJECT_WLOCK(vp->v_object); 3452 vm_object_page_clean(vp->v_object, 0, 0, 0); 3453 VM_OBJECT_WUNLOCK(vp->v_object); 3454 } 3455 error = VOP_FSYNC(vp, MNT_WAIT, td); 3456 3457 VOP_UNLOCK(vp, 0); 3458 vn_finished_write(mp); 3459 drop: 3460 fdrop(fp, td); 3461 return (error); 3462 } 3463 3464 /* 3465 * Rename files. Source and destination must either both be directories, or 3466 * both not be directories. If target is a directory, it must be empty. 3467 */ 3468 #ifndef _SYS_SYSPROTO_H_ 3469 struct rename_args { 3470 char *from; 3471 char *to; 3472 }; 3473 #endif 3474 int 3475 sys_rename(td, uap) 3476 struct thread *td; 3477 register struct rename_args /* { 3478 char *from; 3479 char *to; 3480 } */ *uap; 3481 { 3482 3483 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3484 } 3485 3486 #ifndef _SYS_SYSPROTO_H_ 3487 struct renameat_args { 3488 int oldfd; 3489 char *old; 3490 int newfd; 3491 char *new; 3492 }; 3493 #endif 3494 int 3495 sys_renameat(struct thread *td, struct renameat_args *uap) 3496 { 3497 3498 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3499 UIO_USERSPACE)); 3500 } 3501 3502 int 3503 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3504 { 3505 3506 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3507 } 3508 3509 int 3510 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3511 enum uio_seg pathseg) 3512 { 3513 struct mount *mp = NULL; 3514 struct vnode *tvp, *fvp, *tdvp; 3515 struct nameidata fromnd, tond; 3516 cap_rights_t rights; 3517 int error; 3518 3519 bwillwrite(); 3520 #ifdef MAC 3521 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3522 AUDITVNODE1, pathseg, old, oldfd, 3523 cap_rights_init(&rights, CAP_RENAMEAT), td); 3524 #else 3525 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3526 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3527 #endif 3528 3529 if ((error = namei(&fromnd)) != 0) 3530 return (error); 3531 #ifdef MAC 3532 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3533 fromnd.ni_vp, &fromnd.ni_cnd); 3534 VOP_UNLOCK(fromnd.ni_dvp, 0); 3535 if (fromnd.ni_dvp != fromnd.ni_vp) 3536 VOP_UNLOCK(fromnd.ni_vp, 0); 3537 #endif 3538 fvp = fromnd.ni_vp; 3539 if (error == 0) 3540 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); 3541 if (error != 0) { 3542 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3543 vrele(fromnd.ni_dvp); 3544 vrele(fvp); 3545 goto out1; 3546 } 3547 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3548 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3549 cap_rights_init(&rights, CAP_LINKAT), td); 3550 if (fromnd.ni_vp->v_type == VDIR) 3551 tond.ni_cnd.cn_flags |= WILLBEDIR; 3552 if ((error = namei(&tond)) != 0) { 3553 /* Translate error code for rename("dir1", "dir2/."). */ 3554 if (error == EISDIR && fvp->v_type == VDIR) 3555 error = EINVAL; 3556 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3557 vrele(fromnd.ni_dvp); 3558 vrele(fvp); 3559 vn_finished_write(mp); 3560 goto out1; 3561 } 3562 tdvp = tond.ni_dvp; 3563 tvp = tond.ni_vp; 3564 if (tvp != NULL) { 3565 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3566 error = ENOTDIR; 3567 goto out; 3568 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3569 error = EISDIR; 3570 goto out; 3571 } 3572 #ifdef CAPABILITIES 3573 if (newfd != AT_FDCWD) { 3574 /* 3575 * If the target already exists we require CAP_UNLINKAT 3576 * from 'newfd'. 3577 */ 3578 error = cap_check(&tond.ni_filecaps.fc_rights, 3579 cap_rights_init(&rights, CAP_UNLINKAT)); 3580 if (error != 0) 3581 goto out; 3582 } 3583 #endif 3584 } 3585 if (fvp == tdvp) { 3586 error = EINVAL; 3587 goto out; 3588 } 3589 /* 3590 * If the source is the same as the destination (that is, if they 3591 * are links to the same vnode), then there is nothing to do. 3592 */ 3593 if (fvp == tvp) 3594 error = -1; 3595 #ifdef MAC 3596 else 3597 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3598 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3599 #endif 3600 out: 3601 if (error == 0) { 3602 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3603 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3604 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3605 NDFREE(&tond, NDF_ONLY_PNBUF); 3606 } else { 3607 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3608 NDFREE(&tond, NDF_ONLY_PNBUF); 3609 if (tvp != NULL) 3610 vput(tvp); 3611 if (tdvp == tvp) 3612 vrele(tdvp); 3613 else 3614 vput(tdvp); 3615 vrele(fromnd.ni_dvp); 3616 vrele(fvp); 3617 } 3618 vrele(tond.ni_startdir); 3619 vn_finished_write(mp); 3620 out1: 3621 if (fromnd.ni_startdir) 3622 vrele(fromnd.ni_startdir); 3623 if (error == -1) 3624 return (0); 3625 return (error); 3626 } 3627 3628 /* 3629 * Make a directory file. 3630 */ 3631 #ifndef _SYS_SYSPROTO_H_ 3632 struct mkdir_args { 3633 char *path; 3634 int mode; 3635 }; 3636 #endif 3637 int 3638 sys_mkdir(td, uap) 3639 struct thread *td; 3640 register struct mkdir_args /* { 3641 char *path; 3642 int mode; 3643 } */ *uap; 3644 { 3645 3646 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3647 } 3648 3649 #ifndef _SYS_SYSPROTO_H_ 3650 struct mkdirat_args { 3651 int fd; 3652 char *path; 3653 mode_t mode; 3654 }; 3655 #endif 3656 int 3657 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3658 { 3659 3660 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3661 } 3662 3663 int 3664 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3665 { 3666 3667 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3668 } 3669 3670 int 3671 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3672 int mode) 3673 { 3674 struct mount *mp; 3675 struct vnode *vp; 3676 struct vattr vattr; 3677 struct nameidata nd; 3678 cap_rights_t rights; 3679 int error; 3680 3681 AUDIT_ARG_MODE(mode); 3682 restart: 3683 bwillwrite(); 3684 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 3685 segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td); 3686 nd.ni_cnd.cn_flags |= WILLBEDIR; 3687 if ((error = namei(&nd)) != 0) 3688 return (error); 3689 vp = nd.ni_vp; 3690 if (vp != NULL) { 3691 NDFREE(&nd, NDF_ONLY_PNBUF); 3692 /* 3693 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3694 * the strange behaviour of leaving the vnode unlocked 3695 * if the target is the same vnode as the parent. 3696 */ 3697 if (vp == nd.ni_dvp) 3698 vrele(nd.ni_dvp); 3699 else 3700 vput(nd.ni_dvp); 3701 vrele(vp); 3702 return (EEXIST); 3703 } 3704 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3705 NDFREE(&nd, NDF_ONLY_PNBUF); 3706 vput(nd.ni_dvp); 3707 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3708 return (error); 3709 goto restart; 3710 } 3711 VATTR_NULL(&vattr); 3712 vattr.va_type = VDIR; 3713 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3714 #ifdef MAC 3715 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3716 &vattr); 3717 if (error != 0) 3718 goto out; 3719 #endif 3720 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3721 #ifdef MAC 3722 out: 3723 #endif 3724 NDFREE(&nd, NDF_ONLY_PNBUF); 3725 vput(nd.ni_dvp); 3726 if (error == 0) 3727 vput(nd.ni_vp); 3728 vn_finished_write(mp); 3729 return (error); 3730 } 3731 3732 /* 3733 * Remove a directory file. 3734 */ 3735 #ifndef _SYS_SYSPROTO_H_ 3736 struct rmdir_args { 3737 char *path; 3738 }; 3739 #endif 3740 int 3741 sys_rmdir(td, uap) 3742 struct thread *td; 3743 struct rmdir_args /* { 3744 char *path; 3745 } */ *uap; 3746 { 3747 3748 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3749 } 3750 3751 int 3752 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3753 { 3754 3755 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3756 } 3757 3758 int 3759 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3760 { 3761 struct mount *mp; 3762 struct vnode *vp; 3763 struct nameidata nd; 3764 cap_rights_t rights; 3765 int error; 3766 3767 restart: 3768 bwillwrite(); 3769 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3770 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3771 if ((error = namei(&nd)) != 0) 3772 return (error); 3773 vp = nd.ni_vp; 3774 if (vp->v_type != VDIR) { 3775 error = ENOTDIR; 3776 goto out; 3777 } 3778 /* 3779 * No rmdir "." please. 3780 */ 3781 if (nd.ni_dvp == vp) { 3782 error = EINVAL; 3783 goto out; 3784 } 3785 /* 3786 * The root of a mounted filesystem cannot be deleted. 3787 */ 3788 if (vp->v_vflag & VV_ROOT) { 3789 error = EBUSY; 3790 goto out; 3791 } 3792 #ifdef MAC 3793 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3794 &nd.ni_cnd); 3795 if (error != 0) 3796 goto out; 3797 #endif 3798 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3799 NDFREE(&nd, NDF_ONLY_PNBUF); 3800 vput(vp); 3801 if (nd.ni_dvp == vp) 3802 vrele(nd.ni_dvp); 3803 else 3804 vput(nd.ni_dvp); 3805 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3806 return (error); 3807 goto restart; 3808 } 3809 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3810 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3811 vn_finished_write(mp); 3812 out: 3813 NDFREE(&nd, NDF_ONLY_PNBUF); 3814 vput(vp); 3815 if (nd.ni_dvp == vp) 3816 vrele(nd.ni_dvp); 3817 else 3818 vput(nd.ni_dvp); 3819 return (error); 3820 } 3821 3822 #ifdef COMPAT_43 3823 /* 3824 * Read a block of directory entries in a filesystem independent format. 3825 */ 3826 #ifndef _SYS_SYSPROTO_H_ 3827 struct ogetdirentries_args { 3828 int fd; 3829 char *buf; 3830 u_int count; 3831 long *basep; 3832 }; 3833 #endif 3834 int 3835 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3836 { 3837 long loff; 3838 int error; 3839 3840 error = kern_ogetdirentries(td, uap, &loff); 3841 if (error == 0) 3842 error = copyout(&loff, uap->basep, sizeof(long)); 3843 return (error); 3844 } 3845 3846 int 3847 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3848 long *ploff) 3849 { 3850 struct vnode *vp; 3851 struct file *fp; 3852 struct uio auio, kuio; 3853 struct iovec aiov, kiov; 3854 struct dirent *dp, *edp; 3855 cap_rights_t rights; 3856 caddr_t dirbuf; 3857 int error, eofflag, readcnt; 3858 long loff; 3859 off_t foffset; 3860 3861 /* XXX arbitrary sanity limit on `count'. */ 3862 if (uap->count > 64 * 1024) 3863 return (EINVAL); 3864 error = getvnode(td->td_proc->p_fd, uap->fd, 3865 cap_rights_init(&rights, CAP_READ), &fp); 3866 if (error != 0) 3867 return (error); 3868 if ((fp->f_flag & FREAD) == 0) { 3869 fdrop(fp, td); 3870 return (EBADF); 3871 } 3872 vp = fp->f_vnode; 3873 foffset = foffset_lock(fp, 0); 3874 unionread: 3875 if (vp->v_type != VDIR) { 3876 foffset_unlock(fp, foffset, 0); 3877 fdrop(fp, td); 3878 return (EINVAL); 3879 } 3880 aiov.iov_base = uap->buf; 3881 aiov.iov_len = uap->count; 3882 auio.uio_iov = &aiov; 3883 auio.uio_iovcnt = 1; 3884 auio.uio_rw = UIO_READ; 3885 auio.uio_segflg = UIO_USERSPACE; 3886 auio.uio_td = td; 3887 auio.uio_resid = uap->count; 3888 vn_lock(vp, LK_SHARED | LK_RETRY); 3889 loff = auio.uio_offset = foffset; 3890 #ifdef MAC 3891 error = mac_vnode_check_readdir(td->td_ucred, vp); 3892 if (error != 0) { 3893 VOP_UNLOCK(vp, 0); 3894 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3895 fdrop(fp, td); 3896 return (error); 3897 } 3898 #endif 3899 # if (BYTE_ORDER != LITTLE_ENDIAN) 3900 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3901 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3902 NULL, NULL); 3903 foffset = auio.uio_offset; 3904 } else 3905 # endif 3906 { 3907 kuio = auio; 3908 kuio.uio_iov = &kiov; 3909 kuio.uio_segflg = UIO_SYSSPACE; 3910 kiov.iov_len = uap->count; 3911 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3912 kiov.iov_base = dirbuf; 3913 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3914 NULL, NULL); 3915 foffset = kuio.uio_offset; 3916 if (error == 0) { 3917 readcnt = uap->count - kuio.uio_resid; 3918 edp = (struct dirent *)&dirbuf[readcnt]; 3919 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3920 # if (BYTE_ORDER == LITTLE_ENDIAN) 3921 /* 3922 * The expected low byte of 3923 * dp->d_namlen is our dp->d_type. 3924 * The high MBZ byte of dp->d_namlen 3925 * is our dp->d_namlen. 3926 */ 3927 dp->d_type = dp->d_namlen; 3928 dp->d_namlen = 0; 3929 # else 3930 /* 3931 * The dp->d_type is the high byte 3932 * of the expected dp->d_namlen, 3933 * so must be zero'ed. 3934 */ 3935 dp->d_type = 0; 3936 # endif 3937 if (dp->d_reclen > 0) { 3938 dp = (struct dirent *) 3939 ((char *)dp + dp->d_reclen); 3940 } else { 3941 error = EIO; 3942 break; 3943 } 3944 } 3945 if (dp >= edp) 3946 error = uiomove(dirbuf, readcnt, &auio); 3947 } 3948 free(dirbuf, M_TEMP); 3949 } 3950 if (error != 0) { 3951 VOP_UNLOCK(vp, 0); 3952 foffset_unlock(fp, foffset, 0); 3953 fdrop(fp, td); 3954 return (error); 3955 } 3956 if (uap->count == auio.uio_resid && 3957 (vp->v_vflag & VV_ROOT) && 3958 (vp->v_mount->mnt_flag & MNT_UNION)) { 3959 struct vnode *tvp = vp; 3960 vp = vp->v_mount->mnt_vnodecovered; 3961 VREF(vp); 3962 fp->f_vnode = vp; 3963 fp->f_data = vp; 3964 foffset = 0; 3965 vput(tvp); 3966 goto unionread; 3967 } 3968 VOP_UNLOCK(vp, 0); 3969 foffset_unlock(fp, foffset, 0); 3970 fdrop(fp, td); 3971 td->td_retval[0] = uap->count - auio.uio_resid; 3972 if (error == 0) 3973 *ploff = loff; 3974 return (error); 3975 } 3976 #endif /* COMPAT_43 */ 3977 3978 /* 3979 * Read a block of directory entries in a filesystem independent format. 3980 */ 3981 #ifndef _SYS_SYSPROTO_H_ 3982 struct getdirentries_args { 3983 int fd; 3984 char *buf; 3985 u_int count; 3986 long *basep; 3987 }; 3988 #endif 3989 int 3990 sys_getdirentries(td, uap) 3991 struct thread *td; 3992 register struct getdirentries_args /* { 3993 int fd; 3994 char *buf; 3995 u_int count; 3996 long *basep; 3997 } */ *uap; 3998 { 3999 long base; 4000 int error; 4001 4002 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4003 NULL, UIO_USERSPACE); 4004 if (error != 0) 4005 return (error); 4006 if (uap->basep != NULL) 4007 error = copyout(&base, uap->basep, sizeof(long)); 4008 return (error); 4009 } 4010 4011 int 4012 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4013 long *basep, ssize_t *residp, enum uio_seg bufseg) 4014 { 4015 struct vnode *vp; 4016 struct file *fp; 4017 struct uio auio; 4018 struct iovec aiov; 4019 cap_rights_t rights; 4020 long loff; 4021 int error, eofflag; 4022 off_t foffset; 4023 4024 AUDIT_ARG_FD(fd); 4025 if (count > IOSIZE_MAX) 4026 return (EINVAL); 4027 auio.uio_resid = count; 4028 error = getvnode(td->td_proc->p_fd, fd, 4029 cap_rights_init(&rights, CAP_READ), &fp); 4030 if (error != 0) 4031 return (error); 4032 if ((fp->f_flag & FREAD) == 0) { 4033 fdrop(fp, td); 4034 return (EBADF); 4035 } 4036 vp = fp->f_vnode; 4037 foffset = foffset_lock(fp, 0); 4038 unionread: 4039 if (vp->v_type != VDIR) { 4040 error = EINVAL; 4041 goto fail; 4042 } 4043 aiov.iov_base = buf; 4044 aiov.iov_len = count; 4045 auio.uio_iov = &aiov; 4046 auio.uio_iovcnt = 1; 4047 auio.uio_rw = UIO_READ; 4048 auio.uio_segflg = bufseg; 4049 auio.uio_td = td; 4050 vn_lock(vp, LK_SHARED | LK_RETRY); 4051 AUDIT_ARG_VNODE1(vp); 4052 loff = auio.uio_offset = foffset; 4053 #ifdef MAC 4054 error = mac_vnode_check_readdir(td->td_ucred, vp); 4055 if (error == 0) 4056 #endif 4057 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4058 NULL); 4059 foffset = auio.uio_offset; 4060 if (error != 0) { 4061 VOP_UNLOCK(vp, 0); 4062 goto fail; 4063 } 4064 if (count == auio.uio_resid && 4065 (vp->v_vflag & VV_ROOT) && 4066 (vp->v_mount->mnt_flag & MNT_UNION)) { 4067 struct vnode *tvp = vp; 4068 4069 vp = vp->v_mount->mnt_vnodecovered; 4070 VREF(vp); 4071 fp->f_vnode = vp; 4072 fp->f_data = vp; 4073 foffset = 0; 4074 vput(tvp); 4075 goto unionread; 4076 } 4077 VOP_UNLOCK(vp, 0); 4078 *basep = loff; 4079 if (residp != NULL) 4080 *residp = auio.uio_resid; 4081 td->td_retval[0] = count - auio.uio_resid; 4082 fail: 4083 foffset_unlock(fp, foffset, 0); 4084 fdrop(fp, td); 4085 return (error); 4086 } 4087 4088 #ifndef _SYS_SYSPROTO_H_ 4089 struct getdents_args { 4090 int fd; 4091 char *buf; 4092 size_t count; 4093 }; 4094 #endif 4095 int 4096 sys_getdents(td, uap) 4097 struct thread *td; 4098 register struct getdents_args /* { 4099 int fd; 4100 char *buf; 4101 u_int count; 4102 } */ *uap; 4103 { 4104 struct getdirentries_args ap; 4105 4106 ap.fd = uap->fd; 4107 ap.buf = uap->buf; 4108 ap.count = uap->count; 4109 ap.basep = NULL; 4110 return (sys_getdirentries(td, &ap)); 4111 } 4112 4113 /* 4114 * Set the mode mask for creation of filesystem nodes. 4115 */ 4116 #ifndef _SYS_SYSPROTO_H_ 4117 struct umask_args { 4118 int newmask; 4119 }; 4120 #endif 4121 int 4122 sys_umask(td, uap) 4123 struct thread *td; 4124 struct umask_args /* { 4125 int newmask; 4126 } */ *uap; 4127 { 4128 register struct filedesc *fdp; 4129 4130 FILEDESC_XLOCK(td->td_proc->p_fd); 4131 fdp = td->td_proc->p_fd; 4132 td->td_retval[0] = fdp->fd_cmask; 4133 fdp->fd_cmask = uap->newmask & ALLPERMS; 4134 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4135 return (0); 4136 } 4137 4138 /* 4139 * Void all references to file by ripping underlying filesystem away from 4140 * vnode. 4141 */ 4142 #ifndef _SYS_SYSPROTO_H_ 4143 struct revoke_args { 4144 char *path; 4145 }; 4146 #endif 4147 int 4148 sys_revoke(td, uap) 4149 struct thread *td; 4150 register struct revoke_args /* { 4151 char *path; 4152 } */ *uap; 4153 { 4154 struct vnode *vp; 4155 struct vattr vattr; 4156 struct nameidata nd; 4157 int error; 4158 4159 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4160 uap->path, td); 4161 if ((error = namei(&nd)) != 0) 4162 return (error); 4163 vp = nd.ni_vp; 4164 NDFREE(&nd, NDF_ONLY_PNBUF); 4165 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4166 error = EINVAL; 4167 goto out; 4168 } 4169 #ifdef MAC 4170 error = mac_vnode_check_revoke(td->td_ucred, vp); 4171 if (error != 0) 4172 goto out; 4173 #endif 4174 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4175 if (error != 0) 4176 goto out; 4177 if (td->td_ucred->cr_uid != vattr.va_uid) { 4178 error = priv_check(td, PRIV_VFS_ADMIN); 4179 if (error != 0) 4180 goto out; 4181 } 4182 if (vcount(vp) > 1) 4183 VOP_REVOKE(vp, REVOKEALL); 4184 out: 4185 vput(vp); 4186 return (error); 4187 } 4188 4189 /* 4190 * Convert a user file descriptor to a kernel file entry and check that, if it 4191 * is a capability, the correct rights are present. A reference on the file 4192 * entry is held upon returning. 4193 */ 4194 int 4195 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4196 { 4197 struct file *fp; 4198 int error; 4199 4200 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4201 if (error != 0) 4202 return (error); 4203 4204 /* 4205 * The file could be not of the vnode type, or it may be not 4206 * yet fully initialized, in which case the f_vnode pointer 4207 * may be set, but f_ops is still badfileops. E.g., 4208 * devfs_open() transiently create such situation to 4209 * facilitate csw d_fdopen(). 4210 * 4211 * Dupfdopen() handling in kern_openat() installs the 4212 * half-baked file into the process descriptor table, allowing 4213 * other thread to dereference it. Guard against the race by 4214 * checking f_ops. 4215 */ 4216 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4217 fdrop(fp, curthread); 4218 return (EINVAL); 4219 } 4220 *fpp = fp; 4221 return (0); 4222 } 4223 4224 4225 /* 4226 * Get an (NFS) file handle. 4227 */ 4228 #ifndef _SYS_SYSPROTO_H_ 4229 struct lgetfh_args { 4230 char *fname; 4231 fhandle_t *fhp; 4232 }; 4233 #endif 4234 int 4235 sys_lgetfh(td, uap) 4236 struct thread *td; 4237 register struct lgetfh_args *uap; 4238 { 4239 struct nameidata nd; 4240 fhandle_t fh; 4241 register struct vnode *vp; 4242 int error; 4243 4244 error = priv_check(td, PRIV_VFS_GETFH); 4245 if (error != 0) 4246 return (error); 4247 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4248 uap->fname, td); 4249 error = namei(&nd); 4250 if (error != 0) 4251 return (error); 4252 NDFREE(&nd, NDF_ONLY_PNBUF); 4253 vp = nd.ni_vp; 4254 bzero(&fh, sizeof(fh)); 4255 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4256 error = VOP_VPTOFH(vp, &fh.fh_fid); 4257 vput(vp); 4258 if (error == 0) 4259 error = copyout(&fh, uap->fhp, sizeof (fh)); 4260 return (error); 4261 } 4262 4263 #ifndef _SYS_SYSPROTO_H_ 4264 struct getfh_args { 4265 char *fname; 4266 fhandle_t *fhp; 4267 }; 4268 #endif 4269 int 4270 sys_getfh(td, uap) 4271 struct thread *td; 4272 register struct getfh_args *uap; 4273 { 4274 struct nameidata nd; 4275 fhandle_t fh; 4276 register struct vnode *vp; 4277 int error; 4278 4279 error = priv_check(td, PRIV_VFS_GETFH); 4280 if (error != 0) 4281 return (error); 4282 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4283 uap->fname, td); 4284 error = namei(&nd); 4285 if (error != 0) 4286 return (error); 4287 NDFREE(&nd, NDF_ONLY_PNBUF); 4288 vp = nd.ni_vp; 4289 bzero(&fh, sizeof(fh)); 4290 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4291 error = VOP_VPTOFH(vp, &fh.fh_fid); 4292 vput(vp); 4293 if (error == 0) 4294 error = copyout(&fh, uap->fhp, sizeof (fh)); 4295 return (error); 4296 } 4297 4298 /* 4299 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4300 * open descriptor. 4301 * 4302 * warning: do not remove the priv_check() call or this becomes one giant 4303 * security hole. 4304 */ 4305 #ifndef _SYS_SYSPROTO_H_ 4306 struct fhopen_args { 4307 const struct fhandle *u_fhp; 4308 int flags; 4309 }; 4310 #endif 4311 int 4312 sys_fhopen(td, uap) 4313 struct thread *td; 4314 struct fhopen_args /* { 4315 const struct fhandle *u_fhp; 4316 int flags; 4317 } */ *uap; 4318 { 4319 struct mount *mp; 4320 struct vnode *vp; 4321 struct fhandle fhp; 4322 struct file *fp; 4323 int fmode, error; 4324 int indx; 4325 4326 error = priv_check(td, PRIV_VFS_FHOPEN); 4327 if (error != 0) 4328 return (error); 4329 indx = -1; 4330 fmode = FFLAGS(uap->flags); 4331 /* why not allow a non-read/write open for our lockd? */ 4332 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4333 return (EINVAL); 4334 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4335 if (error != 0) 4336 return(error); 4337 /* find the mount point */ 4338 mp = vfs_busyfs(&fhp.fh_fsid); 4339 if (mp == NULL) 4340 return (ESTALE); 4341 /* now give me my vnode, it gets returned to me locked */ 4342 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4343 vfs_unbusy(mp); 4344 if (error != 0) 4345 return (error); 4346 4347 error = falloc_noinstall(td, &fp); 4348 if (error != 0) { 4349 vput(vp); 4350 return (error); 4351 } 4352 /* 4353 * An extra reference on `fp' has been held for us by 4354 * falloc_noinstall(). 4355 */ 4356 4357 #ifdef INVARIANTS 4358 td->td_dupfd = -1; 4359 #endif 4360 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4361 if (error != 0) { 4362 KASSERT(fp->f_ops == &badfileops, 4363 ("VOP_OPEN in fhopen() set f_ops")); 4364 KASSERT(td->td_dupfd < 0, 4365 ("fhopen() encountered fdopen()")); 4366 4367 vput(vp); 4368 goto bad; 4369 } 4370 #ifdef INVARIANTS 4371 td->td_dupfd = 0; 4372 #endif 4373 fp->f_vnode = vp; 4374 fp->f_seqcount = 1; 4375 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4376 &vnops); 4377 VOP_UNLOCK(vp, 0); 4378 if ((fmode & O_TRUNC) != 0) { 4379 error = fo_truncate(fp, 0, td->td_ucred, td); 4380 if (error != 0) 4381 goto bad; 4382 } 4383 4384 error = finstall(td, fp, &indx, fmode, NULL); 4385 bad: 4386 fdrop(fp, td); 4387 td->td_retval[0] = indx; 4388 return (error); 4389 } 4390 4391 /* 4392 * Stat an (NFS) file handle. 4393 */ 4394 #ifndef _SYS_SYSPROTO_H_ 4395 struct fhstat_args { 4396 struct fhandle *u_fhp; 4397 struct stat *sb; 4398 }; 4399 #endif 4400 int 4401 sys_fhstat(td, uap) 4402 struct thread *td; 4403 register struct fhstat_args /* { 4404 struct fhandle *u_fhp; 4405 struct stat *sb; 4406 } */ *uap; 4407 { 4408 struct stat sb; 4409 struct fhandle fh; 4410 int error; 4411 4412 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4413 if (error != 0) 4414 return (error); 4415 error = kern_fhstat(td, fh, &sb); 4416 if (error == 0) 4417 error = copyout(&sb, uap->sb, sizeof(sb)); 4418 return (error); 4419 } 4420 4421 int 4422 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4423 { 4424 struct mount *mp; 4425 struct vnode *vp; 4426 int error; 4427 4428 error = priv_check(td, PRIV_VFS_FHSTAT); 4429 if (error != 0) 4430 return (error); 4431 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4432 return (ESTALE); 4433 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4434 vfs_unbusy(mp); 4435 if (error != 0) 4436 return (error); 4437 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4438 vput(vp); 4439 return (error); 4440 } 4441 4442 /* 4443 * Implement fstatfs() for (NFS) file handles. 4444 */ 4445 #ifndef _SYS_SYSPROTO_H_ 4446 struct fhstatfs_args { 4447 struct fhandle *u_fhp; 4448 struct statfs *buf; 4449 }; 4450 #endif 4451 int 4452 sys_fhstatfs(td, uap) 4453 struct thread *td; 4454 struct fhstatfs_args /* { 4455 struct fhandle *u_fhp; 4456 struct statfs *buf; 4457 } */ *uap; 4458 { 4459 struct statfs sf; 4460 fhandle_t fh; 4461 int error; 4462 4463 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4464 if (error != 0) 4465 return (error); 4466 error = kern_fhstatfs(td, fh, &sf); 4467 if (error != 0) 4468 return (error); 4469 return (copyout(&sf, uap->buf, sizeof(sf))); 4470 } 4471 4472 int 4473 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4474 { 4475 struct statfs *sp; 4476 struct mount *mp; 4477 struct vnode *vp; 4478 int error; 4479 4480 error = priv_check(td, PRIV_VFS_FHSTATFS); 4481 if (error != 0) 4482 return (error); 4483 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4484 return (ESTALE); 4485 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4486 if (error != 0) { 4487 vfs_unbusy(mp); 4488 return (error); 4489 } 4490 vput(vp); 4491 error = prison_canseemount(td->td_ucred, mp); 4492 if (error != 0) 4493 goto out; 4494 #ifdef MAC 4495 error = mac_mount_check_stat(td->td_ucred, mp); 4496 if (error != 0) 4497 goto out; 4498 #endif 4499 /* 4500 * Set these in case the underlying filesystem fails to do so. 4501 */ 4502 sp = &mp->mnt_stat; 4503 sp->f_version = STATFS_VERSION; 4504 sp->f_namemax = NAME_MAX; 4505 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4506 error = VFS_STATFS(mp, sp); 4507 if (error == 0) 4508 *buf = *sp; 4509 out: 4510 vfs_unbusy(mp); 4511 return (error); 4512 } 4513 4514 int 4515 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4516 { 4517 struct file *fp; 4518 struct mount *mp; 4519 struct vnode *vp; 4520 cap_rights_t rights; 4521 off_t olen, ooffset; 4522 int error; 4523 4524 fp = NULL; 4525 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4526 if (error != 0) 4527 goto out; 4528 4529 switch (fp->f_type) { 4530 case DTYPE_VNODE: 4531 break; 4532 case DTYPE_PIPE: 4533 case DTYPE_FIFO: 4534 error = ESPIPE; 4535 goto out; 4536 default: 4537 error = ENODEV; 4538 goto out; 4539 } 4540 if ((fp->f_flag & FWRITE) == 0) { 4541 error = EBADF; 4542 goto out; 4543 } 4544 vp = fp->f_vnode; 4545 if (vp->v_type != VREG) { 4546 error = ENODEV; 4547 goto out; 4548 } 4549 if (offset < 0 || len <= 0) { 4550 error = EINVAL; 4551 goto out; 4552 } 4553 /* Check for wrap. */ 4554 if (offset > OFF_MAX - len) { 4555 error = EFBIG; 4556 goto out; 4557 } 4558 4559 /* Allocating blocks may take a long time, so iterate. */ 4560 for (;;) { 4561 olen = len; 4562 ooffset = offset; 4563 4564 bwillwrite(); 4565 mp = NULL; 4566 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4567 if (error != 0) 4568 break; 4569 error = vn_lock(vp, LK_EXCLUSIVE); 4570 if (error != 0) { 4571 vn_finished_write(mp); 4572 break; 4573 } 4574 #ifdef MAC 4575 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4576 if (error == 0) 4577 #endif 4578 error = VOP_ALLOCATE(vp, &offset, &len); 4579 VOP_UNLOCK(vp, 0); 4580 vn_finished_write(mp); 4581 4582 if (olen + ooffset != offset + len) { 4583 panic("offset + len changed from %jx/%jx to %jx/%jx", 4584 ooffset, olen, offset, len); 4585 } 4586 if (error != 0 || len == 0) 4587 break; 4588 KASSERT(olen > len, ("Iteration did not make progress?")); 4589 maybe_yield(); 4590 } 4591 out: 4592 if (fp != NULL) 4593 fdrop(fp, td); 4594 return (error); 4595 } 4596 4597 int 4598 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4599 { 4600 4601 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4602 uap->len); 4603 return (0); 4604 } 4605 4606 /* 4607 * Unlike madvise(2), we do not make a best effort to remember every 4608 * possible caching hint. Instead, we remember the last setting with 4609 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4610 * region of any current setting. 4611 */ 4612 int 4613 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4614 int advice) 4615 { 4616 struct fadvise_info *fa, *new; 4617 struct file *fp; 4618 struct vnode *vp; 4619 cap_rights_t rights; 4620 off_t end; 4621 int error; 4622 4623 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4624 return (EINVAL); 4625 switch (advice) { 4626 case POSIX_FADV_SEQUENTIAL: 4627 case POSIX_FADV_RANDOM: 4628 case POSIX_FADV_NOREUSE: 4629 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4630 break; 4631 case POSIX_FADV_NORMAL: 4632 case POSIX_FADV_WILLNEED: 4633 case POSIX_FADV_DONTNEED: 4634 new = NULL; 4635 break; 4636 default: 4637 return (EINVAL); 4638 } 4639 /* XXX: CAP_POSIX_FADVISE? */ 4640 error = fget(td, fd, cap_rights_init(&rights), &fp); 4641 if (error != 0) 4642 goto out; 4643 4644 switch (fp->f_type) { 4645 case DTYPE_VNODE: 4646 break; 4647 case DTYPE_PIPE: 4648 case DTYPE_FIFO: 4649 error = ESPIPE; 4650 goto out; 4651 default: 4652 error = ENODEV; 4653 goto out; 4654 } 4655 vp = fp->f_vnode; 4656 if (vp->v_type != VREG) { 4657 error = ENODEV; 4658 goto out; 4659 } 4660 if (len == 0) 4661 end = OFF_MAX; 4662 else 4663 end = offset + len - 1; 4664 switch (advice) { 4665 case POSIX_FADV_SEQUENTIAL: 4666 case POSIX_FADV_RANDOM: 4667 case POSIX_FADV_NOREUSE: 4668 /* 4669 * Try to merge any existing non-standard region with 4670 * this new region if possible, otherwise create a new 4671 * non-standard region for this request. 4672 */ 4673 mtx_pool_lock(mtxpool_sleep, fp); 4674 fa = fp->f_advice; 4675 if (fa != NULL && fa->fa_advice == advice && 4676 ((fa->fa_start <= end && fa->fa_end >= offset) || 4677 (end != OFF_MAX && fa->fa_start == end + 1) || 4678 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4679 if (offset < fa->fa_start) 4680 fa->fa_start = offset; 4681 if (end > fa->fa_end) 4682 fa->fa_end = end; 4683 } else { 4684 new->fa_advice = advice; 4685 new->fa_start = offset; 4686 new->fa_end = end; 4687 new->fa_prevstart = 0; 4688 new->fa_prevend = 0; 4689 fp->f_advice = new; 4690 new = fa; 4691 } 4692 mtx_pool_unlock(mtxpool_sleep, fp); 4693 break; 4694 case POSIX_FADV_NORMAL: 4695 /* 4696 * If a the "normal" region overlaps with an existing 4697 * non-standard region, trim or remove the 4698 * non-standard region. 4699 */ 4700 mtx_pool_lock(mtxpool_sleep, fp); 4701 fa = fp->f_advice; 4702 if (fa != NULL) { 4703 if (offset <= fa->fa_start && end >= fa->fa_end) { 4704 new = fa; 4705 fp->f_advice = NULL; 4706 } else if (offset <= fa->fa_start && 4707 end >= fa->fa_start) 4708 fa->fa_start = end + 1; 4709 else if (offset <= fa->fa_end && end >= fa->fa_end) 4710 fa->fa_end = offset - 1; 4711 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4712 /* 4713 * If the "normal" region is a middle 4714 * portion of the existing 4715 * non-standard region, just remove 4716 * the whole thing rather than picking 4717 * one side or the other to 4718 * preserve. 4719 */ 4720 new = fa; 4721 fp->f_advice = NULL; 4722 } 4723 } 4724 mtx_pool_unlock(mtxpool_sleep, fp); 4725 break; 4726 case POSIX_FADV_WILLNEED: 4727 case POSIX_FADV_DONTNEED: 4728 error = VOP_ADVISE(vp, offset, end, advice); 4729 break; 4730 } 4731 out: 4732 if (fp != NULL) 4733 fdrop(fp, td); 4734 free(new, M_FADVISE); 4735 return (error); 4736 } 4737 4738 int 4739 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4740 { 4741 4742 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4743 uap->len, uap->advice); 4744 return (0); 4745 } 4746