1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/t_lock.h> 29 #include <sys/systm.h> 30 #include <sys/sysmacros.h> 31 #include <sys/user.h> 32 #include <sys/time.h> 33 #include <sys/vfs.h> 34 #include <sys/vfs_opreg.h> 35 #include <sys/vnode.h> 36 #include <sys/file.h> 37 #include <sys/fcntl.h> 38 #include <sys/flock.h> 39 #include <sys/kmem.h> 40 #include <sys/uio.h> 41 #include <sys/errno.h> 42 #include <sys/stat.h> 43 #include <sys/cred.h> 44 #include <sys/dirent.h> 45 #include <sys/pathname.h> 46 #include <sys/vmsystm.h> 47 #include <sys/fs/tmp.h> 48 #include <sys/fs/tmpnode.h> 49 #include <sys/mman.h> 50 #include <vm/hat.h> 51 #include <vm/seg_vn.h> 52 #include <vm/seg_map.h> 53 #include <vm/seg.h> 54 #include <vm/anon.h> 55 #include <vm/as.h> 56 #include <vm/page.h> 57 #include <vm/pvn.h> 58 #include <sys/cmn_err.h> 59 #include <sys/debug.h> 60 #include <sys/swap.h> 61 #include <sys/buf.h> 62 #include <sys/vm.h> 63 #include <sys/vtrace.h> 64 #include <sys/policy.h> 65 #include <fs/fs_subr.h> 66 67 static int tmp_getapage(struct vnode *, u_offset_t, size_t, uint_t *, 68 page_t **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *); 69 static int tmp_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, 70 int, struct cred *); 71 72 /* ARGSUSED1 */ 73 static int 74 tmp_open(struct vnode **vpp, int flag, struct cred *cred, caller_context_t *ct) 75 { 76 /* 77 * swapon to a tmpfs file is not supported so access 78 * is denied on open if VISSWAP is set. 79 */ 80 if ((*vpp)->v_flag & VISSWAP) 81 return (EINVAL); 82 return (0); 83 } 84 85 /* ARGSUSED1 */ 86 static int 87 tmp_close( 88 struct vnode *vp, 89 int flag, 90 int count, 91 offset_t offset, 92 struct cred *cred, 93 caller_context_t *ct) 94 { 95 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 96 cleanshares(vp, ttoproc(curthread)->p_pid); 97 return (0); 98 } 99 100 /* 101 * wrtmp does the real work of write requests for tmpfs. 102 */ 103 static int 104 wrtmp( 105 struct tmount *tm, 106 struct tmpnode *tp, 107 struct uio *uio, 108 struct cred *cr, 109 struct caller_context *ct) 110 { 111 pgcnt_t pageoffset; /* offset in pages */ 112 ulong_t segmap_offset; /* pagesize byte offset into segmap */ 113 caddr_t base; /* base of segmap */ 114 ssize_t bytes; /* bytes to uiomove */ 115 pfn_t pagenumber; /* offset in pages into tmp file */ 116 struct vnode *vp; 117 int error = 0; 118 int pagecreate; /* == 1 if we allocated a page */ 119 int newpage; 120 rlim64_t limit = uio->uio_llimit; 121 long oresid = uio->uio_resid; 122 timestruc_t now; 123 124 long tn_size_changed = 0; 125 long old_tn_size; 126 long new_tn_size; 127 128 vp = TNTOV(tp); 129 ASSERT(vp->v_type == VREG); 130 131 TRACE_1(TR_FAC_TMPFS, TR_TMPFS_RWTMP_START, 132 "tmp_wrtmp_start:vp %p", vp); 133 134 ASSERT(RW_WRITE_HELD(&tp->tn_contents)); 135 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 136 137 if (MANDLOCK(vp, tp->tn_mode)) { 138 rw_exit(&tp->tn_contents); 139 /* 140 * tmp_getattr ends up being called by chklock 141 */ 142 error = chklock(vp, FWRITE, uio->uio_loffset, uio->uio_resid, 143 uio->uio_fmode, ct); 144 rw_enter(&tp->tn_contents, RW_WRITER); 145 if (error != 0) { 146 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 147 "tmp_wrtmp_end:vp %p error %d", vp, error); 148 return (error); 149 } 150 } 151 152 if (uio->uio_loffset < 0) 153 return (EINVAL); 154 155 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 156 limit = MAXOFFSET_T; 157 158 if (uio->uio_loffset >= limit) { 159 proc_t *p = ttoproc(curthread); 160 161 mutex_enter(&p->p_lock); 162 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 163 p, RCA_UNSAFE_SIGINFO); 164 mutex_exit(&p->p_lock); 165 return (EFBIG); 166 } 167 168 if (uio->uio_loffset >= MAXOFF_T) { 169 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 170 "tmp_wrtmp_end:vp %p error %d", vp, EINVAL); 171 return (EFBIG); 172 } 173 174 if (uio->uio_resid == 0) { 175 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 176 "tmp_wrtmp_end:vp %p error %d", vp, 0); 177 return (0); 178 } 179 180 if (limit > MAXOFF_T) 181 limit = MAXOFF_T; 182 183 do { 184 long offset; 185 long delta; 186 187 offset = (long)uio->uio_offset; 188 pageoffset = offset & PAGEOFFSET; 189 /* 190 * A maximum of PAGESIZE bytes of data is transferred 191 * each pass through this loop 192 */ 193 bytes = MIN(PAGESIZE - pageoffset, uio->uio_resid); 194 195 if (offset + bytes >= limit) { 196 if (offset >= limit) { 197 error = EFBIG; 198 goto out; 199 } 200 bytes = limit - offset; 201 } 202 pagenumber = btop(offset); 203 204 /* 205 * delta is the amount of anonymous memory 206 * to reserve for the file. 207 * We always reserve in pagesize increments so 208 * unless we're extending the file into a new page, 209 * we don't need to call tmp_resv. 210 */ 211 delta = offset + bytes - 212 P2ROUNDUP_TYPED(tp->tn_size, PAGESIZE, u_offset_t); 213 if (delta > 0) { 214 pagecreate = 1; 215 if (tmp_resv(tm, tp, delta, pagecreate)) { 216 /* 217 * Log file system full in the zone that owns 218 * the tmpfs mount, as well as in the global 219 * zone if necessary. 220 */ 221 zcmn_err(tm->tm_vfsp->vfs_zone->zone_id, 222 CE_WARN, "%s: File system full, " 223 "swap space limit exceeded", 224 tm->tm_mntpath); 225 226 if (tm->tm_vfsp->vfs_zone->zone_id != 227 GLOBAL_ZONEID) { 228 229 vfs_t *vfs = tm->tm_vfsp; 230 231 zcmn_err(GLOBAL_ZONEID, 232 CE_WARN, "%s: File system full, " 233 "swap space limit exceeded", 234 vfs->vfs_vnodecovered->v_path); 235 } 236 error = ENOSPC; 237 break; 238 } 239 tmpnode_growmap(tp, (ulong_t)offset + bytes); 240 } 241 /* grow the file to the new length */ 242 if (offset + bytes > tp->tn_size) { 243 tn_size_changed = 1; 244 old_tn_size = tp->tn_size; 245 /* 246 * Postpone updating tp->tn_size until uiomove() is 247 * done. 248 */ 249 new_tn_size = offset + bytes; 250 } 251 if (bytes == PAGESIZE) { 252 /* 253 * Writing whole page so reading from disk 254 * is a waste 255 */ 256 pagecreate = 1; 257 } else { 258 pagecreate = 0; 259 } 260 /* 261 * If writing past EOF or filling in a hole 262 * we need to allocate an anon slot. 263 */ 264 if (anon_get_ptr(tp->tn_anon, pagenumber) == NULL) { 265 (void) anon_set_ptr(tp->tn_anon, pagenumber, 266 anon_alloc(vp, ptob(pagenumber)), ANON_SLEEP); 267 pagecreate = 1; 268 tp->tn_nblocks++; 269 } 270 271 /* 272 * We have to drop the contents lock to allow the VM 273 * system to reacquire it in tmp_getpage() 274 */ 275 rw_exit(&tp->tn_contents); 276 277 /* 278 * Touch the page and fault it in if it is not in core 279 * before segmap_getmapflt or vpm_data_copy can lock it. 280 * This is to avoid the deadlock if the buffer is mapped 281 * to the same file through mmap which we want to write. 282 */ 283 uio_prefaultpages((long)bytes, uio); 284 285 newpage = 0; 286 if (vpm_enable) { 287 /* 288 * Copy data. If new pages are created, part of 289 * the page that is not written will be initizliazed 290 * with zeros. 291 */ 292 error = vpm_data_copy(vp, offset, bytes, uio, 293 !pagecreate, &newpage, 1, S_WRITE); 294 } else { 295 /* Get offset within the segmap mapping */ 296 segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; 297 base = segmap_getmapflt(segkmap, vp, 298 (offset & MAXBMASK), PAGESIZE, !pagecreate, 299 S_WRITE); 300 } 301 302 303 if (!vpm_enable && pagecreate) { 304 /* 305 * segmap_pagecreate() returns 1 if it calls 306 * page_create_va() to allocate any pages. 307 */ 308 newpage = segmap_pagecreate(segkmap, 309 base + segmap_offset, (size_t)PAGESIZE, 0); 310 /* 311 * Clear from the beginning of the page to the starting 312 * offset of the data. 313 */ 314 if (pageoffset != 0) 315 (void) kzero(base + segmap_offset, 316 (size_t)pageoffset); 317 } 318 319 if (!vpm_enable) { 320 error = uiomove(base + segmap_offset + pageoffset, 321 (long)bytes, UIO_WRITE, uio); 322 } 323 324 if (!vpm_enable && pagecreate && 325 uio->uio_offset < P2ROUNDUP(offset + bytes, PAGESIZE)) { 326 long zoffset; /* zero from offset into page */ 327 /* 328 * We created pages w/o initializing them completely, 329 * thus we need to zero the part that wasn't set up. 330 * This happens on most EOF write cases and if 331 * we had some sort of error during the uiomove. 332 */ 333 long nmoved; 334 335 nmoved = uio->uio_offset - offset; 336 ASSERT((nmoved + pageoffset) <= PAGESIZE); 337 338 /* 339 * Zero from the end of data in the page to the 340 * end of the page. 341 */ 342 if ((zoffset = pageoffset + nmoved) < PAGESIZE) 343 (void) kzero(base + segmap_offset + zoffset, 344 (size_t)PAGESIZE - zoffset); 345 } 346 347 /* 348 * Unlock the pages which have been allocated by 349 * page_create_va() in segmap_pagecreate() 350 */ 351 if (!vpm_enable && newpage) { 352 segmap_pageunlock(segkmap, base + segmap_offset, 353 (size_t)PAGESIZE, S_WRITE); 354 } 355 356 if (error) { 357 /* 358 * If we failed on a write, we must 359 * be sure to invalidate any pages that may have 360 * been allocated. 361 */ 362 if (vpm_enable) { 363 (void) vpm_sync_pages(vp, offset, PAGESIZE, 364 SM_INVAL); 365 } else { 366 (void) segmap_release(segkmap, base, SM_INVAL); 367 } 368 } else { 369 if (vpm_enable) { 370 error = vpm_sync_pages(vp, offset, PAGESIZE, 371 0); 372 } else { 373 error = segmap_release(segkmap, base, 0); 374 } 375 } 376 377 /* 378 * Re-acquire contents lock. 379 */ 380 rw_enter(&tp->tn_contents, RW_WRITER); 381 382 /* 383 * Update tn_size. 384 */ 385 if (tn_size_changed) 386 tp->tn_size = new_tn_size; 387 388 /* 389 * If the uiomove failed, fix up tn_size. 390 */ 391 if (error) { 392 if (tn_size_changed) { 393 /* 394 * The uiomove failed, and we 395 * allocated blocks,so get rid 396 * of them. 397 */ 398 (void) tmpnode_trunc(tm, tp, 399 (ulong_t)old_tn_size); 400 } 401 } else { 402 /* 403 * XXX - Can this be out of the loop? 404 */ 405 if ((tp->tn_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) && 406 (tp->tn_mode & (S_ISUID | S_ISGID)) && 407 secpolicy_vnode_setid_retain(cr, 408 (tp->tn_mode & S_ISUID) != 0 && tp->tn_uid == 0)) { 409 /* 410 * Clear Set-UID & Set-GID bits on 411 * successful write if not privileged 412 * and at least one of the execute bits 413 * is set. If we always clear Set-GID, 414 * mandatory file and record locking is 415 * unuseable. 416 */ 417 tp->tn_mode &= ~(S_ISUID | S_ISGID); 418 } 419 gethrestime(&now); 420 tp->tn_mtime = now; 421 tp->tn_ctime = now; 422 } 423 } while (error == 0 && uio->uio_resid > 0 && bytes != 0); 424 425 out: 426 /* 427 * If we've already done a partial-write, terminate 428 * the write but return no error. 429 */ 430 if (oresid != uio->uio_resid) 431 error = 0; 432 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 433 "tmp_wrtmp_end:vp %p error %d", vp, error); 434 return (error); 435 } 436 437 /* 438 * rdtmp does the real work of read requests for tmpfs. 439 */ 440 static int 441 rdtmp( 442 struct tmount *tm, 443 struct tmpnode *tp, 444 struct uio *uio, 445 struct caller_context *ct) 446 { 447 ulong_t pageoffset; /* offset in tmpfs file (uio_offset) */ 448 ulong_t segmap_offset; /* pagesize byte offset into segmap */ 449 caddr_t base; /* base of segmap */ 450 ssize_t bytes; /* bytes to uiomove */ 451 struct vnode *vp; 452 int error; 453 long oresid = uio->uio_resid; 454 455 #if defined(lint) 456 tm = tm; 457 #endif 458 vp = TNTOV(tp); 459 460 TRACE_1(TR_FAC_TMPFS, TR_TMPFS_RWTMP_START, "tmp_rdtmp_start:vp %p", 461 vp); 462 463 ASSERT(RW_LOCK_HELD(&tp->tn_contents)); 464 465 if (MANDLOCK(vp, tp->tn_mode)) { 466 rw_exit(&tp->tn_contents); 467 /* 468 * tmp_getattr ends up being called by chklock 469 */ 470 error = chklock(vp, FREAD, uio->uio_loffset, uio->uio_resid, 471 uio->uio_fmode, ct); 472 rw_enter(&tp->tn_contents, RW_READER); 473 if (error != 0) { 474 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 475 "tmp_rdtmp_end:vp %p error %d", vp, error); 476 return (error); 477 } 478 } 479 ASSERT(tp->tn_type == VREG); 480 481 if (uio->uio_loffset >= MAXOFF_T) { 482 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 483 "tmp_rdtmp_end:vp %p error %d", vp, EINVAL); 484 return (0); 485 } 486 if (uio->uio_loffset < 0) 487 return (EINVAL); 488 if (uio->uio_resid == 0) { 489 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 490 "tmp_rdtmp_end:vp %p error %d", vp, 0); 491 return (0); 492 } 493 494 vp = TNTOV(tp); 495 496 do { 497 long diff; 498 long offset; 499 500 offset = uio->uio_offset; 501 pageoffset = offset & PAGEOFFSET; 502 bytes = MIN(PAGESIZE - pageoffset, uio->uio_resid); 503 504 diff = tp->tn_size - offset; 505 506 if (diff <= 0) { 507 error = 0; 508 goto out; 509 } 510 if (diff < bytes) 511 bytes = diff; 512 513 /* 514 * We have to drop the contents lock to allow the VM system 515 * to reacquire it in tmp_getpage() should the uiomove cause a 516 * pagefault. 517 */ 518 rw_exit(&tp->tn_contents); 519 520 if (vpm_enable) { 521 /* 522 * Copy data. 523 */ 524 error = vpm_data_copy(vp, offset, bytes, uio, 1, NULL, 525 0, S_READ); 526 } else { 527 segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; 528 base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, 529 bytes, 1, S_READ); 530 531 error = uiomove(base + segmap_offset + pageoffset, 532 (long)bytes, UIO_READ, uio); 533 } 534 535 if (error) { 536 if (vpm_enable) { 537 (void) vpm_sync_pages(vp, offset, PAGESIZE, 0); 538 } else { 539 (void) segmap_release(segkmap, base, 0); 540 } 541 } else { 542 if (vpm_enable) { 543 error = vpm_sync_pages(vp, offset, PAGESIZE, 544 0); 545 } else { 546 error = segmap_release(segkmap, base, 0); 547 } 548 } 549 550 /* 551 * Re-acquire contents lock. 552 */ 553 rw_enter(&tp->tn_contents, RW_READER); 554 555 } while (error == 0 && uio->uio_resid > 0); 556 557 out: 558 gethrestime(&tp->tn_atime); 559 560 /* 561 * If we've already done a partial read, terminate 562 * the read but return no error. 563 */ 564 if (oresid != uio->uio_resid) 565 error = 0; 566 567 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 568 "tmp_rdtmp_end:vp %x error %d", vp, error); 569 return (error); 570 } 571 572 /* ARGSUSED2 */ 573 static int 574 tmp_read(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cred, 575 struct caller_context *ct) 576 { 577 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 578 struct tmount *tm = (struct tmount *)VTOTM(vp); 579 int error; 580 581 /* 582 * We don't currently support reading non-regular files 583 */ 584 if (vp->v_type == VDIR) 585 return (EISDIR); 586 if (vp->v_type != VREG) 587 return (EINVAL); 588 /* 589 * tmp_rwlock should have already been called from layers above 590 */ 591 ASSERT(RW_READ_HELD(&tp->tn_rwlock)); 592 593 rw_enter(&tp->tn_contents, RW_READER); 594 595 error = rdtmp(tm, tp, uiop, ct); 596 597 rw_exit(&tp->tn_contents); 598 599 return (error); 600 } 601 602 static int 603 tmp_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 604 struct caller_context *ct) 605 { 606 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 607 struct tmount *tm = (struct tmount *)VTOTM(vp); 608 int error; 609 610 /* 611 * We don't currently support writing to non-regular files 612 */ 613 if (vp->v_type != VREG) 614 return (EINVAL); /* XXX EISDIR? */ 615 616 /* 617 * tmp_rwlock should have already been called from layers above 618 */ 619 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 620 621 rw_enter(&tp->tn_contents, RW_WRITER); 622 623 if (ioflag & FAPPEND) { 624 /* 625 * In append mode start at end of file. 626 */ 627 uiop->uio_loffset = tp->tn_size; 628 } 629 630 error = wrtmp(tm, tp, uiop, cred, ct); 631 632 rw_exit(&tp->tn_contents); 633 634 return (error); 635 } 636 637 /* ARGSUSED */ 638 static int 639 tmp_ioctl( 640 struct vnode *vp, 641 int com, 642 intptr_t data, 643 int flag, 644 struct cred *cred, 645 int *rvalp, 646 caller_context_t *ct) 647 { 648 return (ENOTTY); 649 } 650 651 /* ARGSUSED2 */ 652 static int 653 tmp_getattr( 654 struct vnode *vp, 655 struct vattr *vap, 656 int flags, 657 struct cred *cred, 658 caller_context_t *ct) 659 { 660 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 661 struct vnode *mvp; 662 struct vattr va; 663 int attrs = 1; 664 665 /* 666 * A special case to handle the root tnode on a diskless nfs 667 * client who may have had its uid and gid inherited 668 * from an nfs vnode with nobody ownership. Likely the 669 * root filesystem. After nfs is fully functional the uid/gid 670 * may be mapable so ask again. 671 * vfsp can't get unmounted because we hold vp. 672 */ 673 if (vp->v_flag & VROOT && 674 (mvp = vp->v_vfsp->vfs_vnodecovered) != NULL) { 675 mutex_enter(&tp->tn_tlock); 676 if (tp->tn_uid == UID_NOBODY || tp->tn_gid == GID_NOBODY) { 677 mutex_exit(&tp->tn_tlock); 678 bzero(&va, sizeof (struct vattr)); 679 va.va_mask = AT_UID|AT_GID; 680 attrs = VOP_GETATTR(mvp, &va, 0, cred, ct); 681 } else { 682 mutex_exit(&tp->tn_tlock); 683 } 684 } 685 mutex_enter(&tp->tn_tlock); 686 if (attrs == 0) { 687 tp->tn_uid = va.va_uid; 688 tp->tn_gid = va.va_gid; 689 } 690 vap->va_type = vp->v_type; 691 vap->va_mode = tp->tn_mode & MODEMASK; 692 vap->va_uid = tp->tn_uid; 693 vap->va_gid = tp->tn_gid; 694 vap->va_fsid = tp->tn_fsid; 695 vap->va_nodeid = (ino64_t)tp->tn_nodeid; 696 vap->va_nlink = tp->tn_nlink; 697 vap->va_size = (u_offset_t)tp->tn_size; 698 vap->va_atime = tp->tn_atime; 699 vap->va_mtime = tp->tn_mtime; 700 vap->va_ctime = tp->tn_ctime; 701 vap->va_blksize = PAGESIZE; 702 vap->va_rdev = tp->tn_rdev; 703 vap->va_seq = tp->tn_seq; 704 705 /* 706 * XXX Holes are not taken into account. We could take the time to 707 * run through the anon array looking for allocated slots... 708 */ 709 vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(vap->va_size))); 710 mutex_exit(&tp->tn_tlock); 711 return (0); 712 } 713 714 /*ARGSUSED4*/ 715 static int 716 tmp_setattr( 717 struct vnode *vp, 718 struct vattr *vap, 719 int flags, 720 struct cred *cred, 721 caller_context_t *ct) 722 { 723 struct tmount *tm = (struct tmount *)VTOTM(vp); 724 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 725 int error = 0; 726 struct vattr *get; 727 long mask; 728 729 /* 730 * Cannot set these attributes 731 */ 732 if ((vap->va_mask & AT_NOSET) || (vap->va_mask & AT_XVATTR)) 733 return (EINVAL); 734 735 mutex_enter(&tp->tn_tlock); 736 737 get = &tp->tn_attr; 738 /* 739 * Change file access modes. Must be owner or have sufficient 740 * privileges. 741 */ 742 error = secpolicy_vnode_setattr(cred, vp, vap, get, flags, tmp_taccess, 743 tp); 744 745 if (error) 746 goto out; 747 748 mask = vap->va_mask; 749 750 if (mask & AT_MODE) { 751 get->va_mode &= S_IFMT; 752 get->va_mode |= vap->va_mode & ~S_IFMT; 753 } 754 755 if (mask & AT_UID) 756 get->va_uid = vap->va_uid; 757 if (mask & AT_GID) 758 get->va_gid = vap->va_gid; 759 if (mask & AT_ATIME) 760 get->va_atime = vap->va_atime; 761 if (mask & AT_MTIME) 762 get->va_mtime = vap->va_mtime; 763 764 if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME)) 765 gethrestime(&tp->tn_ctime); 766 767 if (mask & AT_SIZE) { 768 ASSERT(vp->v_type != VDIR); 769 770 /* Don't support large files. */ 771 if (vap->va_size > MAXOFF_T) { 772 error = EFBIG; 773 goto out; 774 } 775 mutex_exit(&tp->tn_tlock); 776 777 rw_enter(&tp->tn_rwlock, RW_WRITER); 778 rw_enter(&tp->tn_contents, RW_WRITER); 779 error = tmpnode_trunc(tm, tp, (ulong_t)vap->va_size); 780 rw_exit(&tp->tn_contents); 781 rw_exit(&tp->tn_rwlock); 782 goto out1; 783 } 784 out: 785 mutex_exit(&tp->tn_tlock); 786 out1: 787 return (error); 788 } 789 790 /* ARGSUSED2 */ 791 static int 792 tmp_access( 793 struct vnode *vp, 794 int mode, 795 int flags, 796 struct cred *cred, 797 caller_context_t *ct) 798 { 799 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 800 int error; 801 802 mutex_enter(&tp->tn_tlock); 803 error = tmp_taccess(tp, mode, cred); 804 mutex_exit(&tp->tn_tlock); 805 return (error); 806 } 807 808 /* ARGSUSED3 */ 809 static int 810 tmp_lookup( 811 struct vnode *dvp, 812 char *nm, 813 struct vnode **vpp, 814 struct pathname *pnp, 815 int flags, 816 struct vnode *rdir, 817 struct cred *cred, 818 caller_context_t *ct, 819 int *direntflags, 820 pathname_t *realpnp) 821 { 822 struct tmpnode *tp = (struct tmpnode *)VTOTN(dvp); 823 struct tmpnode *ntp = NULL; 824 int error; 825 826 827 /* allow cd into @ dir */ 828 if (flags & LOOKUP_XATTR) { 829 struct tmpnode *xdp; 830 struct tmount *tm; 831 832 /* 833 * don't allow attributes if not mounted XATTR support 834 */ 835 if (!(dvp->v_vfsp->vfs_flag & VFS_XATTR)) 836 return (EINVAL); 837 838 if (tp->tn_flags & ISXATTR) 839 /* No attributes on attributes */ 840 return (EINVAL); 841 842 rw_enter(&tp->tn_rwlock, RW_WRITER); 843 if (tp->tn_xattrdp == NULL) { 844 if (!(flags & CREATE_XATTR_DIR)) { 845 rw_exit(&tp->tn_rwlock); 846 return (ENOENT); 847 } 848 849 /* 850 * No attribute directory exists for this 851 * node - create the attr dir as a side effect 852 * of this lookup. 853 */ 854 855 /* 856 * Make sure we have adequate permission... 857 */ 858 859 if ((error = tmp_taccess(tp, VWRITE, cred)) != 0) { 860 rw_exit(&tp->tn_rwlock); 861 return (error); 862 } 863 864 xdp = tmp_memalloc(sizeof (struct tmpnode), 865 TMP_MUSTHAVE); 866 tm = VTOTM(dvp); 867 tmpnode_init(tm, xdp, &tp->tn_attr, NULL); 868 /* 869 * Fix-up fields unique to attribute directories. 870 */ 871 xdp->tn_flags = ISXATTR; 872 xdp->tn_type = VDIR; 873 if (tp->tn_type == VDIR) { 874 xdp->tn_mode = tp->tn_attr.va_mode; 875 } else { 876 xdp->tn_mode = 0700; 877 if (tp->tn_attr.va_mode & 0040) 878 xdp->tn_mode |= 0750; 879 if (tp->tn_attr.va_mode & 0004) 880 xdp->tn_mode |= 0705; 881 } 882 xdp->tn_vnode->v_type = VDIR; 883 xdp->tn_vnode->v_flag |= V_XATTRDIR; 884 tdirinit(tp, xdp); 885 tp->tn_xattrdp = xdp; 886 } else { 887 VN_HOLD(tp->tn_xattrdp->tn_vnode); 888 } 889 *vpp = TNTOV(tp->tn_xattrdp); 890 rw_exit(&tp->tn_rwlock); 891 return (0); 892 } 893 894 /* 895 * Null component name is a synonym for directory being searched. 896 */ 897 if (*nm == '\0') { 898 VN_HOLD(dvp); 899 *vpp = dvp; 900 return (0); 901 } 902 ASSERT(tp); 903 904 error = tdirlookup(tp, nm, &ntp, cred); 905 906 if (error == 0) { 907 ASSERT(ntp); 908 *vpp = TNTOV(ntp); 909 /* 910 * If vnode is a device return special vnode instead 911 */ 912 if (IS_DEVVP(*vpp)) { 913 struct vnode *newvp; 914 915 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, 916 cred); 917 VN_RELE(*vpp); 918 *vpp = newvp; 919 } 920 } 921 TRACE_4(TR_FAC_TMPFS, TR_TMPFS_LOOKUP, 922 "tmpfs lookup:vp %p name %s vpp %p error %d", 923 dvp, nm, vpp, error); 924 return (error); 925 } 926 927 /*ARGSUSED7*/ 928 static int 929 tmp_create( 930 struct vnode *dvp, 931 char *nm, 932 struct vattr *vap, 933 enum vcexcl exclusive, 934 int mode, 935 struct vnode **vpp, 936 struct cred *cred, 937 int flag, 938 caller_context_t *ct, 939 vsecattr_t *vsecp) 940 { 941 struct tmpnode *parent; 942 struct tmount *tm; 943 struct tmpnode *self; 944 int error; 945 struct tmpnode *oldtp; 946 947 again: 948 parent = (struct tmpnode *)VTOTN(dvp); 949 tm = (struct tmount *)VTOTM(dvp); 950 self = NULL; 951 error = 0; 952 oldtp = NULL; 953 954 /* device files not allowed in ext. attr dirs */ 955 if ((parent->tn_flags & ISXATTR) && 956 (vap->va_type == VBLK || vap->va_type == VCHR || 957 vap->va_type == VFIFO || vap->va_type == VDOOR || 958 vap->va_type == VSOCK || vap->va_type == VPORT)) 959 return (EINVAL); 960 961 if (vap->va_type == VREG && (vap->va_mode & VSVTX)) { 962 /* Must be privileged to set sticky bit */ 963 if (secpolicy_vnode_stky_modify(cred)) 964 vap->va_mode &= ~VSVTX; 965 } else if (vap->va_type == VNON) { 966 return (EINVAL); 967 } 968 969 /* 970 * Null component name is a synonym for directory being searched. 971 */ 972 if (*nm == '\0') { 973 VN_HOLD(dvp); 974 oldtp = parent; 975 } else { 976 error = tdirlookup(parent, nm, &oldtp, cred); 977 } 978 979 if (error == 0) { /* name found */ 980 ASSERT(oldtp); 981 982 rw_enter(&oldtp->tn_rwlock, RW_WRITER); 983 984 /* 985 * if create/read-only an existing 986 * directory, allow it 987 */ 988 if (exclusive == EXCL) 989 error = EEXIST; 990 else if ((oldtp->tn_type == VDIR) && (mode & VWRITE)) 991 error = EISDIR; 992 else { 993 error = tmp_taccess(oldtp, mode, cred); 994 } 995 996 if (error) { 997 rw_exit(&oldtp->tn_rwlock); 998 tmpnode_rele(oldtp); 999 return (error); 1000 } 1001 *vpp = TNTOV(oldtp); 1002 if ((*vpp)->v_type == VREG && (vap->va_mask & AT_SIZE) && 1003 vap->va_size == 0) { 1004 rw_enter(&oldtp->tn_contents, RW_WRITER); 1005 (void) tmpnode_trunc(tm, oldtp, 0); 1006 rw_exit(&oldtp->tn_contents); 1007 } 1008 rw_exit(&oldtp->tn_rwlock); 1009 if (IS_DEVVP(*vpp)) { 1010 struct vnode *newvp; 1011 1012 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, 1013 cred); 1014 VN_RELE(*vpp); 1015 if (newvp == NULL) { 1016 return (ENOSYS); 1017 } 1018 *vpp = newvp; 1019 } 1020 1021 if (error == 0) { 1022 vnevent_create(*vpp, ct); 1023 } 1024 return (0); 1025 } 1026 1027 if (error != ENOENT) 1028 return (error); 1029 1030 rw_enter(&parent->tn_rwlock, RW_WRITER); 1031 error = tdirenter(tm, parent, nm, DE_CREATE, 1032 (struct tmpnode *)NULL, (struct tmpnode *)NULL, 1033 vap, &self, cred, ct); 1034 rw_exit(&parent->tn_rwlock); 1035 1036 if (error) { 1037 if (self) 1038 tmpnode_rele(self); 1039 1040 if (error == EEXIST) { 1041 /* 1042 * This means that the file was created sometime 1043 * after we checked and did not find it and when 1044 * we went to create it. 1045 * Since creat() is supposed to truncate a file 1046 * that already exits go back to the begining 1047 * of the function. This time we will find it 1048 * and go down the tmp_trunc() path 1049 */ 1050 goto again; 1051 } 1052 return (error); 1053 } 1054 1055 *vpp = TNTOV(self); 1056 1057 if (!error && IS_DEVVP(*vpp)) { 1058 struct vnode *newvp; 1059 1060 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cred); 1061 VN_RELE(*vpp); 1062 if (newvp == NULL) 1063 return (ENOSYS); 1064 *vpp = newvp; 1065 } 1066 TRACE_3(TR_FAC_TMPFS, TR_TMPFS_CREATE, 1067 "tmpfs create:dvp %p nm %s vpp %p", dvp, nm, vpp); 1068 return (0); 1069 } 1070 1071 /* ARGSUSED3 */ 1072 static int 1073 tmp_remove( 1074 struct vnode *dvp, 1075 char *nm, 1076 struct cred *cred, 1077 caller_context_t *ct, 1078 int flags) 1079 { 1080 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1081 int error; 1082 struct tmpnode *tp = NULL; 1083 1084 error = tdirlookup(parent, nm, &tp, cred); 1085 if (error) 1086 return (error); 1087 1088 ASSERT(tp); 1089 rw_enter(&parent->tn_rwlock, RW_WRITER); 1090 rw_enter(&tp->tn_rwlock, RW_WRITER); 1091 1092 if (tp->tn_type != VDIR || 1093 (error = secpolicy_fs_linkdir(cred, dvp->v_vfsp)) == 0) 1094 error = tdirdelete(parent, tp, nm, DR_REMOVE, cred); 1095 1096 rw_exit(&tp->tn_rwlock); 1097 rw_exit(&parent->tn_rwlock); 1098 vnevent_remove(TNTOV(tp), dvp, nm, ct); 1099 tmpnode_rele(tp); 1100 1101 TRACE_3(TR_FAC_TMPFS, TR_TMPFS_REMOVE, 1102 "tmpfs remove:dvp %p nm %s error %d", dvp, nm, error); 1103 return (error); 1104 } 1105 1106 /* ARGSUSED4 */ 1107 static int 1108 tmp_link( 1109 struct vnode *dvp, 1110 struct vnode *srcvp, 1111 char *tnm, 1112 struct cred *cred, 1113 caller_context_t *ct, 1114 int flags) 1115 { 1116 struct tmpnode *parent; 1117 struct tmpnode *from; 1118 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1119 int error; 1120 struct tmpnode *found = NULL; 1121 struct vnode *realvp; 1122 1123 if (VOP_REALVP(srcvp, &realvp, ct) == 0) 1124 srcvp = realvp; 1125 1126 parent = (struct tmpnode *)VTOTN(dvp); 1127 from = (struct tmpnode *)VTOTN(srcvp); 1128 1129 if ((srcvp->v_type == VDIR && 1130 secpolicy_fs_linkdir(cred, dvp->v_vfsp)) || 1131 (from->tn_uid != crgetuid(cred) && secpolicy_basic_link(cred))) 1132 return (EPERM); 1133 1134 /* 1135 * Make sure link for extended attributes is valid 1136 * We only support hard linking of xattr's in xattrdir to an xattrdir 1137 */ 1138 if ((from->tn_flags & ISXATTR) != (parent->tn_flags & ISXATTR)) 1139 return (EINVAL); 1140 1141 error = tdirlookup(parent, tnm, &found, cred); 1142 if (error == 0) { 1143 ASSERT(found); 1144 tmpnode_rele(found); 1145 return (EEXIST); 1146 } 1147 1148 if (error != ENOENT) 1149 return (error); 1150 1151 rw_enter(&parent->tn_rwlock, RW_WRITER); 1152 error = tdirenter(tm, parent, tnm, DE_LINK, (struct tmpnode *)NULL, 1153 from, NULL, (struct tmpnode **)NULL, cred, ct); 1154 rw_exit(&parent->tn_rwlock); 1155 if (error == 0) { 1156 vnevent_link(srcvp, ct); 1157 } 1158 return (error); 1159 } 1160 1161 /* ARGSUSED5 */ 1162 static int 1163 tmp_rename( 1164 struct vnode *odvp, /* source parent vnode */ 1165 char *onm, /* source name */ 1166 struct vnode *ndvp, /* destination parent vnode */ 1167 char *nnm, /* destination name */ 1168 struct cred *cred, 1169 caller_context_t *ct, 1170 int flags) 1171 { 1172 struct tmpnode *fromparent; 1173 struct tmpnode *toparent; 1174 struct tmpnode *fromtp = NULL; /* source tmpnode */ 1175 struct tmount *tm = (struct tmount *)VTOTM(odvp); 1176 int error; 1177 int samedir = 0; /* set if odvp == ndvp */ 1178 struct vnode *realvp; 1179 1180 if (VOP_REALVP(ndvp, &realvp, ct) == 0) 1181 ndvp = realvp; 1182 1183 fromparent = (struct tmpnode *)VTOTN(odvp); 1184 toparent = (struct tmpnode *)VTOTN(ndvp); 1185 1186 if ((fromparent->tn_flags & ISXATTR) != (toparent->tn_flags & ISXATTR)) 1187 return (EINVAL); 1188 1189 mutex_enter(&tm->tm_renamelck); 1190 1191 /* 1192 * Look up tmpnode of file we're supposed to rename. 1193 */ 1194 error = tdirlookup(fromparent, onm, &fromtp, cred); 1195 if (error) { 1196 mutex_exit(&tm->tm_renamelck); 1197 return (error); 1198 } 1199 1200 /* 1201 * Make sure we can delete the old (source) entry. This 1202 * requires write permission on the containing directory. If 1203 * that directory is "sticky" it requires further checks. 1204 */ 1205 if (((error = tmp_taccess(fromparent, VWRITE, cred)) != 0) || 1206 (error = tmp_sticky_remove_access(fromparent, fromtp, cred)) != 0) 1207 goto done; 1208 1209 /* 1210 * Check for renaming to or from '.' or '..' or that 1211 * fromtp == fromparent 1212 */ 1213 if ((onm[0] == '.' && 1214 (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || 1215 (nnm[0] == '.' && 1216 (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0'))) || 1217 (fromparent == fromtp)) { 1218 error = EINVAL; 1219 goto done; 1220 } 1221 1222 samedir = (fromparent == toparent); 1223 /* 1224 * Make sure we can search and rename into the new 1225 * (destination) directory. 1226 */ 1227 if (!samedir) { 1228 error = tmp_taccess(toparent, VEXEC|VWRITE, cred); 1229 if (error) 1230 goto done; 1231 } 1232 1233 /* 1234 * Link source to new target 1235 */ 1236 rw_enter(&toparent->tn_rwlock, RW_WRITER); 1237 error = tdirenter(tm, toparent, nnm, DE_RENAME, 1238 fromparent, fromtp, (struct vattr *)NULL, 1239 (struct tmpnode **)NULL, cred, ct); 1240 rw_exit(&toparent->tn_rwlock); 1241 1242 if (error) { 1243 /* 1244 * ESAME isn't really an error; it indicates that the 1245 * operation should not be done because the source and target 1246 * are the same file, but that no error should be reported. 1247 */ 1248 if (error == ESAME) 1249 error = 0; 1250 goto done; 1251 } 1252 vnevent_rename_src(TNTOV(fromtp), odvp, onm, ct); 1253 1254 /* 1255 * Notify the target directory if not same as 1256 * source directory. 1257 */ 1258 if (ndvp != odvp) { 1259 vnevent_rename_dest_dir(ndvp, ct); 1260 } 1261 1262 /* 1263 * Unlink from source. 1264 */ 1265 rw_enter(&fromparent->tn_rwlock, RW_WRITER); 1266 rw_enter(&fromtp->tn_rwlock, RW_WRITER); 1267 1268 error = tdirdelete(fromparent, fromtp, onm, DR_RENAME, cred); 1269 1270 /* 1271 * The following handles the case where our source tmpnode was 1272 * removed before we got to it. 1273 * 1274 * XXX We should also cleanup properly in the case where tdirdelete 1275 * fails for some other reason. Currently this case shouldn't happen. 1276 * (see 1184991). 1277 */ 1278 if (error == ENOENT) 1279 error = 0; 1280 1281 rw_exit(&fromtp->tn_rwlock); 1282 rw_exit(&fromparent->tn_rwlock); 1283 done: 1284 tmpnode_rele(fromtp); 1285 mutex_exit(&tm->tm_renamelck); 1286 1287 TRACE_5(TR_FAC_TMPFS, TR_TMPFS_RENAME, 1288 "tmpfs rename:ovp %p onm %s nvp %p nnm %s error %d", odvp, onm, 1289 ndvp, nnm, error); 1290 return (error); 1291 } 1292 1293 /* ARGSUSED5 */ 1294 static int 1295 tmp_mkdir( 1296 struct vnode *dvp, 1297 char *nm, 1298 struct vattr *va, 1299 struct vnode **vpp, 1300 struct cred *cred, 1301 caller_context_t *ct, 1302 int flags, 1303 vsecattr_t *vsecp) 1304 { 1305 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1306 struct tmpnode *self = NULL; 1307 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1308 int error; 1309 1310 /* no new dirs allowed in xattr dirs */ 1311 if (parent->tn_flags & ISXATTR) 1312 return (EINVAL); 1313 1314 /* 1315 * Might be dangling directory. Catch it here, 1316 * because a ENOENT return from tdirlookup() is 1317 * an "o.k. return". 1318 */ 1319 if (parent->tn_nlink == 0) 1320 return (ENOENT); 1321 1322 error = tdirlookup(parent, nm, &self, cred); 1323 if (error == 0) { 1324 ASSERT(self); 1325 tmpnode_rele(self); 1326 return (EEXIST); 1327 } 1328 if (error != ENOENT) 1329 return (error); 1330 1331 rw_enter(&parent->tn_rwlock, RW_WRITER); 1332 error = tdirenter(tm, parent, nm, DE_MKDIR, (struct tmpnode *)NULL, 1333 (struct tmpnode *)NULL, va, &self, cred, ct); 1334 if (error) { 1335 rw_exit(&parent->tn_rwlock); 1336 if (self) 1337 tmpnode_rele(self); 1338 return (error); 1339 } 1340 rw_exit(&parent->tn_rwlock); 1341 *vpp = TNTOV(self); 1342 return (0); 1343 } 1344 1345 /* ARGSUSED4 */ 1346 static int 1347 tmp_rmdir( 1348 struct vnode *dvp, 1349 char *nm, 1350 struct vnode *cdir, 1351 struct cred *cred, 1352 caller_context_t *ct, 1353 int flags) 1354 { 1355 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1356 struct tmpnode *self = NULL; 1357 struct vnode *vp; 1358 int error = 0; 1359 1360 /* 1361 * Return error when removing . and .. 1362 */ 1363 if (strcmp(nm, ".") == 0) 1364 return (EINVAL); 1365 if (strcmp(nm, "..") == 0) 1366 return (EEXIST); /* Should be ENOTEMPTY */ 1367 error = tdirlookup(parent, nm, &self, cred); 1368 if (error) 1369 return (error); 1370 1371 rw_enter(&parent->tn_rwlock, RW_WRITER); 1372 rw_enter(&self->tn_rwlock, RW_WRITER); 1373 1374 vp = TNTOV(self); 1375 if (vp == dvp || vp == cdir) { 1376 error = EINVAL; 1377 goto done1; 1378 } 1379 if (self->tn_type != VDIR) { 1380 error = ENOTDIR; 1381 goto done1; 1382 } 1383 1384 mutex_enter(&self->tn_tlock); 1385 if (self->tn_nlink > 2) { 1386 mutex_exit(&self->tn_tlock); 1387 error = EEXIST; 1388 goto done1; 1389 } 1390 mutex_exit(&self->tn_tlock); 1391 1392 if (vn_vfswlock(vp)) { 1393 error = EBUSY; 1394 goto done1; 1395 } 1396 if (vn_mountedvfs(vp) != NULL) { 1397 error = EBUSY; 1398 goto done; 1399 } 1400 1401 /* 1402 * Check for an empty directory 1403 * i.e. only includes entries for "." and ".." 1404 */ 1405 if (self->tn_dirents > 2) { 1406 error = EEXIST; /* SIGH should be ENOTEMPTY */ 1407 /* 1408 * Update atime because checking tn_dirents is logically 1409 * equivalent to reading the directory 1410 */ 1411 gethrestime(&self->tn_atime); 1412 goto done; 1413 } 1414 1415 error = tdirdelete(parent, self, nm, DR_RMDIR, cred); 1416 done: 1417 vn_vfsunlock(vp); 1418 done1: 1419 rw_exit(&self->tn_rwlock); 1420 rw_exit(&parent->tn_rwlock); 1421 vnevent_rmdir(TNTOV(self), dvp, nm, ct); 1422 tmpnode_rele(self); 1423 1424 return (error); 1425 } 1426 1427 /* ARGSUSED2 */ 1428 static int 1429 tmp_readdir( 1430 struct vnode *vp, 1431 struct uio *uiop, 1432 struct cred *cred, 1433 int *eofp, 1434 caller_context_t *ct, 1435 int flags) 1436 { 1437 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1438 struct tdirent *tdp; 1439 int error = 0; 1440 size_t namelen; 1441 struct dirent64 *dp; 1442 ulong_t offset; 1443 ulong_t total_bytes_wanted; 1444 long outcount = 0; 1445 long bufsize; 1446 int reclen; 1447 caddr_t outbuf; 1448 1449 if (uiop->uio_loffset >= MAXOFF_T) { 1450 if (eofp) 1451 *eofp = 1; 1452 return (0); 1453 } 1454 /* 1455 * assuming system call has already called tmp_rwlock 1456 */ 1457 ASSERT(RW_READ_HELD(&tp->tn_rwlock)); 1458 1459 if (uiop->uio_iovcnt != 1) 1460 return (EINVAL); 1461 1462 if (vp->v_type != VDIR) 1463 return (ENOTDIR); 1464 1465 /* 1466 * There's a window here where someone could have removed 1467 * all the entries in the directory after we put a hold on the 1468 * vnode but before we grabbed the rwlock. Just return. 1469 */ 1470 if (tp->tn_dir == NULL) { 1471 if (tp->tn_nlink) { 1472 panic("empty directory 0x%p", (void *)tp); 1473 /*NOTREACHED*/ 1474 } 1475 return (0); 1476 } 1477 1478 /* 1479 * Get space for multiple directory entries 1480 */ 1481 total_bytes_wanted = uiop->uio_iov->iov_len; 1482 bufsize = total_bytes_wanted + sizeof (struct dirent64); 1483 outbuf = kmem_alloc(bufsize, KM_SLEEP); 1484 1485 dp = (struct dirent64 *)outbuf; 1486 1487 1488 offset = 0; 1489 tdp = tp->tn_dir; 1490 while (tdp) { 1491 namelen = strlen(tdp->td_name); /* no +1 needed */ 1492 offset = tdp->td_offset; 1493 if (offset >= uiop->uio_offset) { 1494 reclen = (int)DIRENT64_RECLEN(namelen); 1495 if (outcount + reclen > total_bytes_wanted) { 1496 if (!outcount) 1497 /* 1498 * Buffer too small for any entries. 1499 */ 1500 error = EINVAL; 1501 break; 1502 } 1503 ASSERT(tdp->td_tmpnode != NULL); 1504 1505 /* use strncpy(9f) to zero out uninitialized bytes */ 1506 1507 (void) strncpy(dp->d_name, tdp->td_name, 1508 DIRENT64_NAMELEN(reclen)); 1509 dp->d_reclen = (ushort_t)reclen; 1510 dp->d_ino = (ino64_t)tdp->td_tmpnode->tn_nodeid; 1511 dp->d_off = (offset_t)tdp->td_offset + 1; 1512 dp = (struct dirent64 *) 1513 ((uintptr_t)dp + dp->d_reclen); 1514 outcount += reclen; 1515 ASSERT(outcount <= bufsize); 1516 } 1517 tdp = tdp->td_next; 1518 } 1519 1520 if (!error) 1521 error = uiomove(outbuf, outcount, UIO_READ, uiop); 1522 1523 if (!error) { 1524 /* If we reached the end of the list our offset */ 1525 /* should now be just past the end. */ 1526 if (!tdp) { 1527 offset += 1; 1528 if (eofp) 1529 *eofp = 1; 1530 } else if (eofp) 1531 *eofp = 0; 1532 uiop->uio_offset = offset; 1533 } 1534 gethrestime(&tp->tn_atime); 1535 kmem_free(outbuf, bufsize); 1536 return (error); 1537 } 1538 1539 /* ARGSUSED5 */ 1540 static int 1541 tmp_symlink( 1542 struct vnode *dvp, 1543 char *lnm, 1544 struct vattr *tva, 1545 char *tnm, 1546 struct cred *cred, 1547 caller_context_t *ct, 1548 int flags) 1549 { 1550 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1551 struct tmpnode *self = (struct tmpnode *)NULL; 1552 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1553 char *cp = NULL; 1554 int error; 1555 size_t len; 1556 1557 /* no symlinks allowed to files in xattr dirs */ 1558 if (parent->tn_flags & ISXATTR) 1559 return (EINVAL); 1560 1561 error = tdirlookup(parent, lnm, &self, cred); 1562 if (error == 0) { 1563 /* 1564 * The entry already exists 1565 */ 1566 tmpnode_rele(self); 1567 return (EEXIST); /* was 0 */ 1568 } 1569 1570 if (error != ENOENT) { 1571 if (self != NULL) 1572 tmpnode_rele(self); 1573 return (error); 1574 } 1575 1576 rw_enter(&parent->tn_rwlock, RW_WRITER); 1577 error = tdirenter(tm, parent, lnm, DE_CREATE, (struct tmpnode *)NULL, 1578 (struct tmpnode *)NULL, tva, &self, cred, ct); 1579 rw_exit(&parent->tn_rwlock); 1580 1581 if (error) { 1582 if (self) 1583 tmpnode_rele(self); 1584 return (error); 1585 } 1586 len = strlen(tnm) + 1; 1587 cp = tmp_memalloc(len, 0); 1588 if (cp == NULL) { 1589 tmpnode_rele(self); 1590 return (ENOSPC); 1591 } 1592 (void) strcpy(cp, tnm); 1593 1594 self->tn_symlink = cp; 1595 self->tn_size = len - 1; 1596 tmpnode_rele(self); 1597 return (error); 1598 } 1599 1600 /* ARGSUSED2 */ 1601 static int 1602 tmp_readlink( 1603 struct vnode *vp, 1604 struct uio *uiop, 1605 struct cred *cred, 1606 caller_context_t *ct) 1607 { 1608 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1609 int error = 0; 1610 1611 if (vp->v_type != VLNK) 1612 return (EINVAL); 1613 1614 rw_enter(&tp->tn_rwlock, RW_READER); 1615 rw_enter(&tp->tn_contents, RW_READER); 1616 error = uiomove(tp->tn_symlink, tp->tn_size, UIO_READ, uiop); 1617 gethrestime(&tp->tn_atime); 1618 rw_exit(&tp->tn_contents); 1619 rw_exit(&tp->tn_rwlock); 1620 return (error); 1621 } 1622 1623 /* ARGSUSED */ 1624 static int 1625 tmp_fsync( 1626 struct vnode *vp, 1627 int syncflag, 1628 struct cred *cred, 1629 caller_context_t *ct) 1630 { 1631 return (0); 1632 } 1633 1634 /* ARGSUSED */ 1635 static void 1636 tmp_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1637 { 1638 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1639 struct tmount *tm = (struct tmount *)VFSTOTM(vp->v_vfsp); 1640 1641 rw_enter(&tp->tn_rwlock, RW_WRITER); 1642 top: 1643 mutex_enter(&tp->tn_tlock); 1644 mutex_enter(&vp->v_lock); 1645 ASSERT(vp->v_count >= 1); 1646 1647 /* 1648 * If we don't have the last hold or the link count is non-zero, 1649 * there's little to do -- just drop our hold. 1650 */ 1651 if (vp->v_count > 1 || tp->tn_nlink != 0) { 1652 vp->v_count--; 1653 mutex_exit(&vp->v_lock); 1654 mutex_exit(&tp->tn_tlock); 1655 rw_exit(&tp->tn_rwlock); 1656 return; 1657 } 1658 1659 /* 1660 * We have the last hold *and* the link count is zero, so this 1661 * tmpnode is dead from the filesystem's viewpoint. However, 1662 * if the tmpnode has any pages associated with it (i.e. if it's 1663 * a normal file with non-zero size), the tmpnode can still be 1664 * discovered by pageout or fsflush via the page vnode pointers. 1665 * In this case we must drop all our locks, truncate the tmpnode, 1666 * and try the whole dance again. 1667 */ 1668 if (tp->tn_size != 0) { 1669 if (tp->tn_type == VREG) { 1670 mutex_exit(&vp->v_lock); 1671 mutex_exit(&tp->tn_tlock); 1672 rw_enter(&tp->tn_contents, RW_WRITER); 1673 (void) tmpnode_trunc(tm, tp, 0); 1674 rw_exit(&tp->tn_contents); 1675 ASSERT(tp->tn_size == 0); 1676 ASSERT(tp->tn_nblocks == 0); 1677 goto top; 1678 } 1679 if (tp->tn_type == VLNK) 1680 tmp_memfree(tp->tn_symlink, tp->tn_size + 1); 1681 } 1682 1683 /* 1684 * Remove normal file/dir's xattr dir and xattrs. 1685 */ 1686 if (tp->tn_xattrdp) { 1687 struct tmpnode *xtp = tp->tn_xattrdp; 1688 1689 ASSERT(xtp->tn_flags & ISXATTR); 1690 tmpnode_hold(xtp); 1691 rw_enter(&xtp->tn_rwlock, RW_WRITER); 1692 tdirtrunc(xtp); 1693 DECR_COUNT(&xtp->tn_nlink, &xtp->tn_tlock); 1694 tp->tn_xattrdp = NULL; 1695 rw_exit(&xtp->tn_rwlock); 1696 tmpnode_rele(xtp); 1697 } 1698 1699 mutex_exit(&vp->v_lock); 1700 mutex_exit(&tp->tn_tlock); 1701 /* Here's our chance to send invalid event while we're between locks */ 1702 vn_invalid(TNTOV(tp)); 1703 mutex_enter(&tm->tm_contents); 1704 if (tp->tn_forw == NULL) 1705 tm->tm_rootnode->tn_back = tp->tn_back; 1706 else 1707 tp->tn_forw->tn_back = tp->tn_back; 1708 tp->tn_back->tn_forw = tp->tn_forw; 1709 mutex_exit(&tm->tm_contents); 1710 rw_exit(&tp->tn_rwlock); 1711 rw_destroy(&tp->tn_rwlock); 1712 mutex_destroy(&tp->tn_tlock); 1713 vn_free(TNTOV(tp)); 1714 tmp_memfree(tp, sizeof (struct tmpnode)); 1715 } 1716 1717 /* ARGSUSED2 */ 1718 static int 1719 tmp_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1720 { 1721 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1722 struct tfid *tfid; 1723 1724 if (fidp->fid_len < (sizeof (struct tfid) - sizeof (ushort_t))) { 1725 fidp->fid_len = sizeof (struct tfid) - sizeof (ushort_t); 1726 return (ENOSPC); 1727 } 1728 1729 tfid = (struct tfid *)fidp; 1730 bzero(tfid, sizeof (struct tfid)); 1731 tfid->tfid_len = (int)sizeof (struct tfid) - sizeof (ushort_t); 1732 1733 tfid->tfid_ino = tp->tn_nodeid; 1734 tfid->tfid_gen = tp->tn_gen; 1735 1736 return (0); 1737 } 1738 1739 1740 /* 1741 * Return all the pages from [off..off+len] in given file 1742 */ 1743 /* ARGSUSED */ 1744 static int 1745 tmp_getpage( 1746 struct vnode *vp, 1747 offset_t off, 1748 size_t len, 1749 uint_t *protp, 1750 page_t *pl[], 1751 size_t plsz, 1752 struct seg *seg, 1753 caddr_t addr, 1754 enum seg_rw rw, 1755 struct cred *cr, 1756 caller_context_t *ct) 1757 { 1758 int err = 0; 1759 struct tmpnode *tp = VTOTN(vp); 1760 anoff_t toff = (anoff_t)off; 1761 size_t tlen = len; 1762 u_offset_t tmpoff; 1763 timestruc_t now; 1764 1765 rw_enter(&tp->tn_contents, RW_READER); 1766 1767 if (off + len > tp->tn_size + PAGEOFFSET) { 1768 err = EFAULT; 1769 goto out; 1770 } 1771 /* 1772 * Look for holes (no anon slot) in faulting range. If there are 1773 * holes we have to switch to a write lock and fill them in. Swap 1774 * space for holes was already reserved when the file was grown. 1775 */ 1776 tmpoff = toff; 1777 if (non_anon(tp->tn_anon, btop(off), &tmpoff, &tlen)) { 1778 if (!rw_tryupgrade(&tp->tn_contents)) { 1779 rw_exit(&tp->tn_contents); 1780 rw_enter(&tp->tn_contents, RW_WRITER); 1781 /* Size may have changed when lock was dropped */ 1782 if (off + len > tp->tn_size + PAGEOFFSET) { 1783 err = EFAULT; 1784 goto out; 1785 } 1786 } 1787 for (toff = (anoff_t)off; toff < (anoff_t)off + len; 1788 toff += PAGESIZE) { 1789 if (anon_get_ptr(tp->tn_anon, btop(toff)) == NULL) { 1790 /* XXX - may allocate mem w. write lock held */ 1791 (void) anon_set_ptr(tp->tn_anon, btop(toff), 1792 anon_alloc(vp, toff), ANON_SLEEP); 1793 tp->tn_nblocks++; 1794 } 1795 } 1796 rw_downgrade(&tp->tn_contents); 1797 } 1798 1799 1800 if (len <= PAGESIZE) 1801 err = tmp_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, 1802 seg, addr, rw, cr); 1803 else 1804 err = pvn_getpages(tmp_getapage, vp, (u_offset_t)off, len, 1805 protp, pl, plsz, seg, addr, rw, cr); 1806 1807 gethrestime(&now); 1808 tp->tn_atime = now; 1809 if (rw == S_WRITE) 1810 tp->tn_mtime = now; 1811 1812 out: 1813 rw_exit(&tp->tn_contents); 1814 return (err); 1815 } 1816 1817 /* 1818 * Called from pvn_getpages or swap_getpage to get a particular page. 1819 */ 1820 /*ARGSUSED*/ 1821 static int 1822 tmp_getapage( 1823 struct vnode *vp, 1824 u_offset_t off, 1825 size_t len, 1826 uint_t *protp, 1827 page_t *pl[], 1828 size_t plsz, 1829 struct seg *seg, 1830 caddr_t addr, 1831 enum seg_rw rw, 1832 struct cred *cr) 1833 { 1834 struct page *pp; 1835 int flags; 1836 int err = 0; 1837 struct vnode *pvp; 1838 u_offset_t poff; 1839 1840 if (protp != NULL) 1841 *protp = PROT_ALL; 1842 again: 1843 if (pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED)) { 1844 if (pl) { 1845 pl[0] = pp; 1846 pl[1] = NULL; 1847 } else { 1848 page_unlock(pp); 1849 } 1850 } else { 1851 pp = page_create_va(vp, off, PAGESIZE, 1852 PG_WAIT | PG_EXCL, seg, addr); 1853 /* 1854 * Someone raced in and created the page after we did the 1855 * lookup but before we did the create, so go back and 1856 * try to look it up again. 1857 */ 1858 if (pp == NULL) 1859 goto again; 1860 /* 1861 * Fill page from backing store, if any. If none, then 1862 * either this is a newly filled hole or page must have 1863 * been unmodified and freed so just zero it out. 1864 */ 1865 err = swap_getphysname(vp, off, &pvp, &poff); 1866 if (err) { 1867 panic("tmp_getapage: no anon slot vp %p " 1868 "off %llx pp %p\n", (void *)vp, off, (void *)pp); 1869 } 1870 if (pvp) { 1871 flags = (pl == NULL ? B_ASYNC|B_READ : B_READ); 1872 err = VOP_PAGEIO(pvp, pp, (u_offset_t)poff, PAGESIZE, 1873 flags, cr, NULL); 1874 if (flags & B_ASYNC) 1875 pp = NULL; 1876 } else if (rw != S_CREATE) { 1877 pagezero(pp, 0, PAGESIZE); 1878 } 1879 if (err && pp) 1880 pvn_read_done(pp, B_ERROR); 1881 if (err == 0) { 1882 if (pl) 1883 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); 1884 else 1885 pvn_io_done(pp); 1886 } 1887 } 1888 return (err); 1889 } 1890 1891 1892 /* 1893 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}. 1894 * If len == 0, do from off to EOF. 1895 */ 1896 static int tmp_nopage = 0; /* Don't do tmp_putpage's if set */ 1897 1898 /* ARGSUSED */ 1899 int 1900 tmp_putpage( 1901 register struct vnode *vp, 1902 offset_t off, 1903 size_t len, 1904 int flags, 1905 struct cred *cr, 1906 caller_context_t *ct) 1907 { 1908 register page_t *pp; 1909 u_offset_t io_off; 1910 size_t io_len = 0; 1911 int err = 0; 1912 struct tmpnode *tp = VTOTN(vp); 1913 int dolock; 1914 1915 if (tmp_nopage) 1916 return (0); 1917 1918 ASSERT(vp->v_count != 0); 1919 1920 if (vp->v_flag & VNOMAP) 1921 return (ENOSYS); 1922 1923 /* 1924 * This being tmpfs, we don't ever do i/o unless we really 1925 * have to (when we're low on memory and pageout calls us 1926 * with B_ASYNC | B_FREE or the user explicitly asks for it with 1927 * B_DONTNEED). 1928 * XXX to approximately track the mod time like ufs we should 1929 * update the times here. The problem is, once someone does a 1930 * store we never clear the mod bit and do i/o, thus fsflush 1931 * will keep calling us every 30 seconds to do the i/o and we'll 1932 * continually update the mod time. At least we update the mod 1933 * time on the first store because this results in a call to getpage. 1934 */ 1935 if (flags != (B_ASYNC | B_FREE) && (flags & B_INVAL) == 0 && 1936 (flags & B_DONTNEED) == 0) 1937 return (0); 1938 /* 1939 * If this thread owns the lock, i.e., this thread grabbed it 1940 * as writer somewhere above, then we don't need to grab the 1941 * lock as reader in this routine. 1942 */ 1943 dolock = (rw_owner(&tp->tn_contents) != curthread); 1944 1945 /* 1946 * If this is pageout don't block on the lock as you could deadlock 1947 * when freemem == 0 (another thread has the read lock and is blocked 1948 * creating a page, and a third thread is waiting to get the writers 1949 * lock - waiting writers priority blocks us from getting the read 1950 * lock). Of course, if the only freeable pages are on this tmpnode 1951 * we're hosed anyways. A better solution might be a new lock type. 1952 * Note: ufs has the same problem. 1953 */ 1954 if (curproc == proc_pageout) { 1955 if (!rw_tryenter(&tp->tn_contents, RW_READER)) 1956 return (ENOMEM); 1957 } else if (dolock) 1958 rw_enter(&tp->tn_contents, RW_READER); 1959 1960 if (!vn_has_cached_data(vp)) 1961 goto out; 1962 1963 if (len == 0) { 1964 if (curproc == proc_pageout) { 1965 panic("tmp: pageout can't block"); 1966 /*NOTREACHED*/ 1967 } 1968 1969 /* Search the entire vp list for pages >= off. */ 1970 err = pvn_vplist_dirty(vp, (u_offset_t)off, tmp_putapage, 1971 flags, cr); 1972 } else { 1973 u_offset_t eoff; 1974 1975 /* 1976 * Loop over all offsets in the range [off...off + len] 1977 * looking for pages to deal with. 1978 */ 1979 eoff = MIN(off + len, tp->tn_size); 1980 for (io_off = off; io_off < eoff; io_off += io_len) { 1981 /* 1982 * If we are not invalidating, synchronously 1983 * freeing or writing pages use the routine 1984 * page_lookup_nowait() to prevent reclaiming 1985 * them from the free list. 1986 */ 1987 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 1988 pp = page_lookup(vp, io_off, 1989 (flags & (B_INVAL | B_FREE)) ? 1990 SE_EXCL : SE_SHARED); 1991 } else { 1992 pp = page_lookup_nowait(vp, io_off, 1993 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 1994 } 1995 1996 if (pp == NULL || pvn_getdirty(pp, flags) == 0) 1997 io_len = PAGESIZE; 1998 else { 1999 err = tmp_putapage(vp, pp, &io_off, &io_len, 2000 flags, cr); 2001 if (err != 0) 2002 break; 2003 } 2004 } 2005 } 2006 /* If invalidating, verify all pages on vnode list are gone. */ 2007 if (err == 0 && off == 0 && len == 0 && 2008 (flags & B_INVAL) && vn_has_cached_data(vp)) { 2009 panic("tmp_putpage: B_INVAL, pages not gone"); 2010 /*NOTREACHED*/ 2011 } 2012 out: 2013 if ((curproc == proc_pageout) || dolock) 2014 rw_exit(&tp->tn_contents); 2015 /* 2016 * Only reason putapage is going to give us SE_NOSWAP as error 2017 * is when we ask a page to be written to physical backing store 2018 * and there is none. Ignore this because we might be dealing 2019 * with a swap page which does not have any backing store 2020 * on disk. In any other case we won't get this error over here. 2021 */ 2022 if (err == SE_NOSWAP) 2023 err = 0; 2024 return (err); 2025 } 2026 2027 long tmp_putpagecnt, tmp_pagespushed; 2028 2029 /* 2030 * Write out a single page. 2031 * For tmpfs this means choose a physical swap slot and write the page 2032 * out using VOP_PAGEIO. For performance, we attempt to kluster; i.e., 2033 * we try to find a bunch of other dirty pages adjacent in the file 2034 * and a bunch of contiguous swap slots, and then write all the pages 2035 * out in a single i/o. 2036 */ 2037 /*ARGSUSED*/ 2038 static int 2039 tmp_putapage( 2040 struct vnode *vp, 2041 page_t *pp, 2042 u_offset_t *offp, 2043 size_t *lenp, 2044 int flags, 2045 struct cred *cr) 2046 { 2047 int err; 2048 ulong_t klstart, kllen; 2049 page_t *pplist, *npplist; 2050 extern int klustsize; 2051 long tmp_klustsize; 2052 struct tmpnode *tp; 2053 size_t pp_off, pp_len; 2054 u_offset_t io_off; 2055 size_t io_len; 2056 struct vnode *pvp; 2057 u_offset_t pstart; 2058 u_offset_t offset; 2059 u_offset_t tmpoff; 2060 2061 ASSERT(PAGE_LOCKED(pp)); 2062 2063 /* Kluster in tmp_klustsize chunks */ 2064 tp = VTOTN(vp); 2065 tmp_klustsize = klustsize; 2066 offset = pp->p_offset; 2067 klstart = (offset / tmp_klustsize) * tmp_klustsize; 2068 kllen = MIN(tmp_klustsize, tp->tn_size - klstart); 2069 2070 /* Get a kluster of pages */ 2071 pplist = 2072 pvn_write_kluster(vp, pp, &tmpoff, &pp_len, klstart, kllen, flags); 2073 2074 pp_off = (size_t)tmpoff; 2075 2076 /* 2077 * Get a cluster of physical offsets for the pages; the amount we 2078 * get may be some subrange of what we ask for (io_off, io_len). 2079 */ 2080 io_off = pp_off; 2081 io_len = pp_len; 2082 err = swap_newphysname(vp, offset, &io_off, &io_len, &pvp, &pstart); 2083 ASSERT(err != SE_NOANON); /* anon slot must have been filled */ 2084 if (err) { 2085 pvn_write_done(pplist, B_ERROR | B_WRITE | flags); 2086 /* 2087 * If this routine is called as a result of segvn_sync 2088 * operation and we have no physical swap then we can get an 2089 * error here. In such case we would return SE_NOSWAP as error. 2090 * At this point, we expect only SE_NOSWAP. 2091 */ 2092 ASSERT(err == SE_NOSWAP); 2093 if (flags & B_INVAL) 2094 err = ENOMEM; 2095 goto out; 2096 } 2097 ASSERT(pp_off <= io_off && io_off + io_len <= pp_off + pp_len); 2098 ASSERT(io_off <= offset && offset < io_off + io_len); 2099 2100 /* Toss pages at front/rear that we couldn't get physical backing for */ 2101 if (io_off != pp_off) { 2102 npplist = NULL; 2103 page_list_break(&pplist, &npplist, btop(io_off - pp_off)); 2104 ASSERT(pplist->p_offset == pp_off); 2105 ASSERT(pplist->p_prev->p_offset == io_off - PAGESIZE); 2106 pvn_write_done(pplist, B_ERROR | B_WRITE | flags); 2107 pplist = npplist; 2108 } 2109 if (io_off + io_len < pp_off + pp_len) { 2110 npplist = NULL; 2111 page_list_break(&pplist, &npplist, btop(io_len)); 2112 ASSERT(npplist->p_offset == io_off + io_len); 2113 ASSERT(npplist->p_prev->p_offset == pp_off + pp_len - PAGESIZE); 2114 pvn_write_done(npplist, B_ERROR | B_WRITE | flags); 2115 } 2116 2117 ASSERT(pplist->p_offset == io_off); 2118 ASSERT(pplist->p_prev->p_offset == io_off + io_len - PAGESIZE); 2119 ASSERT(btopr(io_len) <= btopr(kllen)); 2120 2121 /* Do i/o on the remaining kluster */ 2122 err = VOP_PAGEIO(pvp, pplist, (u_offset_t)pstart, io_len, 2123 B_WRITE | flags, cr, NULL); 2124 2125 if ((flags & B_ASYNC) == 0) { 2126 pvn_write_done(pplist, ((err) ? B_ERROR : 0) | B_WRITE | flags); 2127 } 2128 out: 2129 if (!err) { 2130 if (offp) 2131 *offp = io_off; 2132 if (lenp) 2133 *lenp = io_len; 2134 tmp_putpagecnt++; 2135 tmp_pagespushed += btop(io_len); 2136 } 2137 if (err && err != ENOMEM && err != SE_NOSWAP) 2138 cmn_err(CE_WARN, "tmp_putapage: err %d\n", err); 2139 return (err); 2140 } 2141 2142 /* ARGSUSED */ 2143 static int 2144 tmp_map( 2145 struct vnode *vp, 2146 offset_t off, 2147 struct as *as, 2148 caddr_t *addrp, 2149 size_t len, 2150 uchar_t prot, 2151 uchar_t maxprot, 2152 uint_t flags, 2153 struct cred *cred, 2154 caller_context_t *ct) 2155 { 2156 struct segvn_crargs vn_a; 2157 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 2158 int error; 2159 2160 #ifdef _ILP32 2161 if (len > MAXOFF_T) 2162 return (ENOMEM); 2163 #endif 2164 2165 if (vp->v_flag & VNOMAP) 2166 return (ENOSYS); 2167 2168 if (off < 0 || (offset_t)(off + len) < 0 || 2169 off > MAXOFF_T || (off + len) > MAXOFF_T) 2170 return (ENXIO); 2171 2172 if (vp->v_type != VREG) 2173 return (ENODEV); 2174 2175 /* 2176 * Don't allow mapping to locked file 2177 */ 2178 if (vn_has_mandatory_locks(vp, tp->tn_mode)) { 2179 return (EAGAIN); 2180 } 2181 2182 as_rangelock(as); 2183 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 2184 if (error != 0) { 2185 as_rangeunlock(as); 2186 return (error); 2187 } 2188 2189 vn_a.vp = vp; 2190 vn_a.offset = (u_offset_t)off; 2191 vn_a.type = flags & MAP_TYPE; 2192 vn_a.prot = prot; 2193 vn_a.maxprot = maxprot; 2194 vn_a.flags = flags & ~MAP_TYPE; 2195 vn_a.cred = cred; 2196 vn_a.amp = NULL; 2197 vn_a.szc = 0; 2198 vn_a.lgrp_mem_policy_flags = 0; 2199 2200 error = as_map(as, *addrp, len, segvn_create, &vn_a); 2201 as_rangeunlock(as); 2202 return (error); 2203 } 2204 2205 /* 2206 * tmp_addmap and tmp_delmap can't be called since the vp 2207 * maintained in the segvn mapping is NULL. 2208 */ 2209 /* ARGSUSED */ 2210 static int 2211 tmp_addmap( 2212 struct vnode *vp, 2213 offset_t off, 2214 struct as *as, 2215 caddr_t addr, 2216 size_t len, 2217 uchar_t prot, 2218 uchar_t maxprot, 2219 uint_t flags, 2220 struct cred *cred, 2221 caller_context_t *ct) 2222 { 2223 return (0); 2224 } 2225 2226 /* ARGSUSED */ 2227 static int 2228 tmp_delmap( 2229 struct vnode *vp, 2230 offset_t off, 2231 struct as *as, 2232 caddr_t addr, 2233 size_t len, 2234 uint_t prot, 2235 uint_t maxprot, 2236 uint_t flags, 2237 struct cred *cred, 2238 caller_context_t *ct) 2239 { 2240 return (0); 2241 } 2242 2243 static int 2244 tmp_freesp(struct vnode *vp, struct flock64 *lp, int flag) 2245 { 2246 register int i; 2247 register struct tmpnode *tp = VTOTN(vp); 2248 int error; 2249 2250 ASSERT(vp->v_type == VREG); 2251 ASSERT(lp->l_start >= 0); 2252 2253 if (lp->l_len != 0) 2254 return (EINVAL); 2255 2256 rw_enter(&tp->tn_rwlock, RW_WRITER); 2257 if (tp->tn_size == lp->l_start) { 2258 rw_exit(&tp->tn_rwlock); 2259 return (0); 2260 } 2261 2262 /* 2263 * Check for any mandatory locks on the range 2264 */ 2265 if (MANDLOCK(vp, tp->tn_mode)) { 2266 long save_start; 2267 2268 save_start = lp->l_start; 2269 2270 if (tp->tn_size < lp->l_start) { 2271 /* 2272 * "Truncate up" case: need to make sure there 2273 * is no lock beyond current end-of-file. To 2274 * do so, we need to set l_start to the size 2275 * of the file temporarily. 2276 */ 2277 lp->l_start = tp->tn_size; 2278 } 2279 lp->l_type = F_WRLCK; 2280 lp->l_sysid = 0; 2281 lp->l_pid = ttoproc(curthread)->p_pid; 2282 i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK; 2283 if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 || 2284 lp->l_type != F_UNLCK) { 2285 rw_exit(&tp->tn_rwlock); 2286 return (i ? i : EAGAIN); 2287 } 2288 2289 lp->l_start = save_start; 2290 } 2291 VFSTOTM(vp->v_vfsp); 2292 2293 rw_enter(&tp->tn_contents, RW_WRITER); 2294 error = tmpnode_trunc((struct tmount *)VFSTOTM(vp->v_vfsp), 2295 tp, (ulong_t)lp->l_start); 2296 rw_exit(&tp->tn_contents); 2297 rw_exit(&tp->tn_rwlock); 2298 return (error); 2299 } 2300 2301 /* ARGSUSED */ 2302 static int 2303 tmp_space( 2304 struct vnode *vp, 2305 int cmd, 2306 struct flock64 *bfp, 2307 int flag, 2308 offset_t offset, 2309 cred_t *cred, 2310 caller_context_t *ct) 2311 { 2312 int error; 2313 2314 if (cmd != F_FREESP) 2315 return (EINVAL); 2316 if ((error = convoff(vp, bfp, 0, (offset_t)offset)) == 0) { 2317 if ((bfp->l_start > MAXOFF_T) || (bfp->l_len > MAXOFF_T)) 2318 return (EFBIG); 2319 error = tmp_freesp(vp, bfp, flag); 2320 } 2321 return (error); 2322 } 2323 2324 /* ARGSUSED */ 2325 static int 2326 tmp_seek( 2327 struct vnode *vp, 2328 offset_t ooff, 2329 offset_t *noffp, 2330 caller_context_t *ct) 2331 { 2332 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 2333 } 2334 2335 /* ARGSUSED2 */ 2336 static int 2337 tmp_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 2338 { 2339 struct tmpnode *tp = VTOTN(vp); 2340 2341 if (write_lock) { 2342 rw_enter(&tp->tn_rwlock, RW_WRITER); 2343 } else { 2344 rw_enter(&tp->tn_rwlock, RW_READER); 2345 } 2346 return (write_lock); 2347 } 2348 2349 /* ARGSUSED1 */ 2350 static void 2351 tmp_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 2352 { 2353 struct tmpnode *tp = VTOTN(vp); 2354 2355 rw_exit(&tp->tn_rwlock); 2356 } 2357 2358 static int 2359 tmp_pathconf( 2360 struct vnode *vp, 2361 int cmd, 2362 ulong_t *valp, 2363 cred_t *cr, 2364 caller_context_t *ct) 2365 { 2366 struct tmpnode *tp = NULL; 2367 int error; 2368 2369 switch (cmd) { 2370 case _PC_XATTR_EXISTS: 2371 if (vp->v_vfsp->vfs_flag & VFS_XATTR) { 2372 *valp = 0; /* assume no attributes */ 2373 error = 0; /* okay to ask */ 2374 tp = VTOTN(vp); 2375 rw_enter(&tp->tn_rwlock, RW_READER); 2376 if (tp->tn_xattrdp) { 2377 rw_enter(&tp->tn_xattrdp->tn_rwlock, RW_READER); 2378 /* do not count "." and ".." */ 2379 if (tp->tn_xattrdp->tn_dirents > 2) 2380 *valp = 1; 2381 rw_exit(&tp->tn_xattrdp->tn_rwlock); 2382 } 2383 rw_exit(&tp->tn_rwlock); 2384 } else { 2385 error = EINVAL; 2386 } 2387 break; 2388 case _PC_SATTR_ENABLED: 2389 case _PC_SATTR_EXISTS: 2390 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2391 (vp->v_type == VREG || vp->v_type == VDIR); 2392 error = 0; 2393 break; 2394 default: 2395 error = fs_pathconf(vp, cmd, valp, cr, ct); 2396 } 2397 return (error); 2398 } 2399 2400 2401 struct vnodeops *tmp_vnodeops; 2402 2403 const fs_operation_def_t tmp_vnodeops_template[] = { 2404 VOPNAME_OPEN, { .vop_open = tmp_open }, 2405 VOPNAME_CLOSE, { .vop_close = tmp_close }, 2406 VOPNAME_READ, { .vop_read = tmp_read }, 2407 VOPNAME_WRITE, { .vop_write = tmp_write }, 2408 VOPNAME_IOCTL, { .vop_ioctl = tmp_ioctl }, 2409 VOPNAME_GETATTR, { .vop_getattr = tmp_getattr }, 2410 VOPNAME_SETATTR, { .vop_setattr = tmp_setattr }, 2411 VOPNAME_ACCESS, { .vop_access = tmp_access }, 2412 VOPNAME_LOOKUP, { .vop_lookup = tmp_lookup }, 2413 VOPNAME_CREATE, { .vop_create = tmp_create }, 2414 VOPNAME_REMOVE, { .vop_remove = tmp_remove }, 2415 VOPNAME_LINK, { .vop_link = tmp_link }, 2416 VOPNAME_RENAME, { .vop_rename = tmp_rename }, 2417 VOPNAME_MKDIR, { .vop_mkdir = tmp_mkdir }, 2418 VOPNAME_RMDIR, { .vop_rmdir = tmp_rmdir }, 2419 VOPNAME_READDIR, { .vop_readdir = tmp_readdir }, 2420 VOPNAME_SYMLINK, { .vop_symlink = tmp_symlink }, 2421 VOPNAME_READLINK, { .vop_readlink = tmp_readlink }, 2422 VOPNAME_FSYNC, { .vop_fsync = tmp_fsync }, 2423 VOPNAME_INACTIVE, { .vop_inactive = tmp_inactive }, 2424 VOPNAME_FID, { .vop_fid = tmp_fid }, 2425 VOPNAME_RWLOCK, { .vop_rwlock = tmp_rwlock }, 2426 VOPNAME_RWUNLOCK, { .vop_rwunlock = tmp_rwunlock }, 2427 VOPNAME_SEEK, { .vop_seek = tmp_seek }, 2428 VOPNAME_SPACE, { .vop_space = tmp_space }, 2429 VOPNAME_GETPAGE, { .vop_getpage = tmp_getpage }, 2430 VOPNAME_PUTPAGE, { .vop_putpage = tmp_putpage }, 2431 VOPNAME_MAP, { .vop_map = tmp_map }, 2432 VOPNAME_ADDMAP, { .vop_addmap = tmp_addmap }, 2433 VOPNAME_DELMAP, { .vop_delmap = tmp_delmap }, 2434 VOPNAME_PATHCONF, { .vop_pathconf = tmp_pathconf }, 2435 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 2436 NULL, NULL 2437 }; 2438