1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/t_lock.h> 31 #include <sys/systm.h> 32 #include <sys/sysmacros.h> 33 #include <sys/user.h> 34 #include <sys/time.h> 35 #include <sys/vfs.h> 36 #include <sys/vfs_opreg.h> 37 #include <sys/vnode.h> 38 #include <sys/file.h> 39 #include <sys/fcntl.h> 40 #include <sys/flock.h> 41 #include <sys/kmem.h> 42 #include <sys/uio.h> 43 #include <sys/errno.h> 44 #include <sys/stat.h> 45 #include <sys/cred.h> 46 #include <sys/dirent.h> 47 #include <sys/pathname.h> 48 #include <sys/vmsystm.h> 49 #include <sys/fs/tmp.h> 50 #include <sys/fs/tmpnode.h> 51 #include <sys/mman.h> 52 #include <vm/hat.h> 53 #include <vm/seg_vn.h> 54 #include <vm/seg_map.h> 55 #include <vm/seg.h> 56 #include <vm/anon.h> 57 #include <vm/as.h> 58 #include <vm/page.h> 59 #include <vm/pvn.h> 60 #include <sys/cmn_err.h> 61 #include <sys/debug.h> 62 #include <sys/swap.h> 63 #include <sys/buf.h> 64 #include <sys/vm.h> 65 #include <sys/vtrace.h> 66 #include <sys/policy.h> 67 #include <fs/fs_subr.h> 68 69 static int tmp_getapage(struct vnode *, u_offset_t, size_t, uint_t *, 70 page_t **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *); 71 static int tmp_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, 72 int, struct cred *); 73 74 /* ARGSUSED1 */ 75 static int 76 tmp_open(struct vnode **vpp, int flag, struct cred *cred, caller_context_t *ct) 77 { 78 /* 79 * swapon to a tmpfs file is not supported so access 80 * is denied on open if VISSWAP is set. 81 */ 82 if ((*vpp)->v_flag & VISSWAP) 83 return (EINVAL); 84 return (0); 85 } 86 87 /* ARGSUSED1 */ 88 static int 89 tmp_close( 90 struct vnode *vp, 91 int flag, 92 int count, 93 offset_t offset, 94 struct cred *cred, 95 caller_context_t *ct) 96 { 97 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 98 cleanshares(vp, ttoproc(curthread)->p_pid); 99 return (0); 100 } 101 102 /* 103 * wrtmp does the real work of write requests for tmpfs. 104 */ 105 static int 106 wrtmp( 107 struct tmount *tm, 108 struct tmpnode *tp, 109 struct uio *uio, 110 struct cred *cr, 111 struct caller_context *ct) 112 { 113 pgcnt_t pageoffset; /* offset in pages */ 114 ulong_t segmap_offset; /* pagesize byte offset into segmap */ 115 caddr_t base; /* base of segmap */ 116 ssize_t bytes; /* bytes to uiomove */ 117 pfn_t pagenumber; /* offset in pages into tmp file */ 118 struct vnode *vp; 119 int error = 0; 120 int pagecreate; /* == 1 if we allocated a page */ 121 int newpage; 122 rlim64_t limit = uio->uio_llimit; 123 long oresid = uio->uio_resid; 124 timestruc_t now; 125 126 long tn_size_changed = 0; 127 long old_tn_size; 128 long new_tn_size; 129 130 vp = TNTOV(tp); 131 ASSERT(vp->v_type == VREG); 132 133 TRACE_1(TR_FAC_TMPFS, TR_TMPFS_RWTMP_START, 134 "tmp_wrtmp_start:vp %p", vp); 135 136 ASSERT(RW_WRITE_HELD(&tp->tn_contents)); 137 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 138 139 if (MANDLOCK(vp, tp->tn_mode)) { 140 rw_exit(&tp->tn_contents); 141 /* 142 * tmp_getattr ends up being called by chklock 143 */ 144 error = chklock(vp, FWRITE, uio->uio_loffset, uio->uio_resid, 145 uio->uio_fmode, ct); 146 rw_enter(&tp->tn_contents, RW_WRITER); 147 if (error != 0) { 148 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 149 "tmp_wrtmp_end:vp %p error %d", vp, error); 150 return (error); 151 } 152 } 153 154 if (uio->uio_loffset < 0) 155 return (EINVAL); 156 157 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 158 limit = MAXOFFSET_T; 159 160 if (uio->uio_loffset >= limit) { 161 proc_t *p = ttoproc(curthread); 162 163 mutex_enter(&p->p_lock); 164 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 165 p, RCA_UNSAFE_SIGINFO); 166 mutex_exit(&p->p_lock); 167 return (EFBIG); 168 } 169 170 if (uio->uio_loffset >= MAXOFF_T) { 171 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 172 "tmp_wrtmp_end:vp %p error %d", vp, EINVAL); 173 return (EFBIG); 174 } 175 176 if (uio->uio_resid == 0) { 177 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 178 "tmp_wrtmp_end:vp %p error %d", vp, 0); 179 return (0); 180 } 181 182 if (limit > MAXOFF_T) 183 limit = MAXOFF_T; 184 185 do { 186 long offset; 187 long delta; 188 189 offset = (long)uio->uio_offset; 190 pageoffset = offset & PAGEOFFSET; 191 /* 192 * A maximum of PAGESIZE bytes of data is transferred 193 * each pass through this loop 194 */ 195 bytes = MIN(PAGESIZE - pageoffset, uio->uio_resid); 196 197 if (offset + bytes >= limit) { 198 if (offset >= limit) { 199 error = EFBIG; 200 goto out; 201 } 202 bytes = limit - offset; 203 } 204 pagenumber = btop(offset); 205 206 /* 207 * delta is the amount of anonymous memory 208 * to reserve for the file. 209 * We always reserve in pagesize increments so 210 * unless we're extending the file into a new page, 211 * we don't need to call tmp_resv. 212 */ 213 delta = offset + bytes - 214 P2ROUNDUP_TYPED(tp->tn_size, PAGESIZE, u_offset_t); 215 if (delta > 0) { 216 pagecreate = 1; 217 if (tmp_resv(tm, tp, delta, pagecreate)) { 218 /* 219 * Log file system full in the zone that owns 220 * the tmpfs mount, as well as in the global 221 * zone if necessary. 222 */ 223 zcmn_err(tm->tm_vfsp->vfs_zone->zone_id, 224 CE_WARN, "%s: File system full, " 225 "swap space limit exceeded", 226 tm->tm_mntpath); 227 228 if (tm->tm_vfsp->vfs_zone->zone_id != 229 GLOBAL_ZONEID) { 230 231 vfs_t *vfs = tm->tm_vfsp; 232 233 zcmn_err(GLOBAL_ZONEID, 234 CE_WARN, "%s: File system full, " 235 "swap space limit exceeded", 236 vfs->vfs_vnodecovered->v_path); 237 } 238 error = ENOSPC; 239 break; 240 } 241 tmpnode_growmap(tp, (ulong_t)offset + bytes); 242 } 243 /* grow the file to the new length */ 244 if (offset + bytes > tp->tn_size) { 245 tn_size_changed = 1; 246 old_tn_size = tp->tn_size; 247 /* 248 * Postpone updating tp->tn_size until uiomove() is 249 * done. 250 */ 251 new_tn_size = offset + bytes; 252 } 253 if (bytes == PAGESIZE) { 254 /* 255 * Writing whole page so reading from disk 256 * is a waste 257 */ 258 pagecreate = 1; 259 } else { 260 pagecreate = 0; 261 } 262 /* 263 * If writing past EOF or filling in a hole 264 * we need to allocate an anon slot. 265 */ 266 if (anon_get_ptr(tp->tn_anon, pagenumber) == NULL) { 267 (void) anon_set_ptr(tp->tn_anon, pagenumber, 268 anon_alloc(vp, ptob(pagenumber)), ANON_SLEEP); 269 pagecreate = 1; 270 tp->tn_nblocks++; 271 } 272 273 /* 274 * We have to drop the contents lock to allow the VM 275 * system to reacquire it in tmp_getpage() 276 */ 277 rw_exit(&tp->tn_contents); 278 279 newpage = 0; 280 if (vpm_enable) { 281 /* 282 * Copy data. If new pages are created, part of 283 * the page that is not written will be initizliazed 284 * with zeros. 285 */ 286 error = vpm_data_copy(vp, offset, bytes, uio, 287 !pagecreate, &newpage, 1, S_WRITE); 288 } else { 289 /* Get offset within the segmap mapping */ 290 segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; 291 base = segmap_getmapflt(segkmap, vp, 292 (offset & MAXBMASK), PAGESIZE, !pagecreate, 293 S_WRITE); 294 } 295 296 297 if (!vpm_enable && pagecreate) { 298 /* 299 * segmap_pagecreate() returns 1 if it calls 300 * page_create_va() to allocate any pages. 301 */ 302 newpage = segmap_pagecreate(segkmap, 303 base + segmap_offset, (size_t)PAGESIZE, 0); 304 /* 305 * Clear from the beginning of the page to the starting 306 * offset of the data. 307 */ 308 if (pageoffset != 0) 309 (void) kzero(base + segmap_offset, 310 (size_t)pageoffset); 311 } 312 313 if (!vpm_enable) { 314 error = uiomove(base + segmap_offset + pageoffset, 315 (long)bytes, UIO_WRITE, uio); 316 } 317 318 if (!vpm_enable && pagecreate && 319 uio->uio_offset < P2ROUNDUP(offset + bytes, PAGESIZE)) { 320 long zoffset; /* zero from offset into page */ 321 /* 322 * We created pages w/o initializing them completely, 323 * thus we need to zero the part that wasn't set up. 324 * This happens on most EOF write cases and if 325 * we had some sort of error during the uiomove. 326 */ 327 long nmoved; 328 329 nmoved = uio->uio_offset - offset; 330 ASSERT((nmoved + pageoffset) <= PAGESIZE); 331 332 /* 333 * Zero from the end of data in the page to the 334 * end of the page. 335 */ 336 if ((zoffset = pageoffset + nmoved) < PAGESIZE) 337 (void) kzero(base + segmap_offset + zoffset, 338 (size_t)PAGESIZE - zoffset); 339 } 340 341 /* 342 * Unlock the pages which have been allocated by 343 * page_create_va() in segmap_pagecreate() 344 */ 345 if (!vpm_enable && newpage) { 346 segmap_pageunlock(segkmap, base + segmap_offset, 347 (size_t)PAGESIZE, S_WRITE); 348 } 349 350 if (error) { 351 /* 352 * If we failed on a write, we must 353 * be sure to invalidate any pages that may have 354 * been allocated. 355 */ 356 if (vpm_enable) { 357 (void) vpm_sync_pages(vp, offset, PAGESIZE, 358 SM_INVAL); 359 } else { 360 (void) segmap_release(segkmap, base, SM_INVAL); 361 } 362 } else { 363 if (vpm_enable) { 364 error = vpm_sync_pages(vp, offset, PAGESIZE, 365 0); 366 } else { 367 error = segmap_release(segkmap, base, 0); 368 } 369 } 370 371 /* 372 * Re-acquire contents lock. 373 */ 374 rw_enter(&tp->tn_contents, RW_WRITER); 375 376 /* 377 * Update tn_size. 378 */ 379 if (tn_size_changed) 380 tp->tn_size = new_tn_size; 381 382 /* 383 * If the uiomove failed, fix up tn_size. 384 */ 385 if (error) { 386 if (tn_size_changed) { 387 /* 388 * The uiomove failed, and we 389 * allocated blocks,so get rid 390 * of them. 391 */ 392 (void) tmpnode_trunc(tm, tp, 393 (ulong_t)old_tn_size); 394 } 395 } else { 396 /* 397 * XXX - Can this be out of the loop? 398 */ 399 if ((tp->tn_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) && 400 (tp->tn_mode & (S_ISUID | S_ISGID)) && 401 secpolicy_vnode_setid_retain(cr, 402 (tp->tn_mode & S_ISUID) != 0 && tp->tn_uid == 0)) { 403 /* 404 * Clear Set-UID & Set-GID bits on 405 * successful write if not privileged 406 * and at least one of the execute bits 407 * is set. If we always clear Set-GID, 408 * mandatory file and record locking is 409 * unuseable. 410 */ 411 tp->tn_mode &= ~(S_ISUID | S_ISGID); 412 } 413 gethrestime(&now); 414 tp->tn_mtime = now; 415 tp->tn_ctime = now; 416 } 417 } while (error == 0 && uio->uio_resid > 0 && bytes != 0); 418 419 out: 420 /* 421 * If we've already done a partial-write, terminate 422 * the write but return no error. 423 */ 424 if (oresid != uio->uio_resid) 425 error = 0; 426 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 427 "tmp_wrtmp_end:vp %p error %d", vp, error); 428 return (error); 429 } 430 431 /* 432 * rdtmp does the real work of read requests for tmpfs. 433 */ 434 static int 435 rdtmp( 436 struct tmount *tm, 437 struct tmpnode *tp, 438 struct uio *uio, 439 struct caller_context *ct) 440 { 441 ulong_t pageoffset; /* offset in tmpfs file (uio_offset) */ 442 ulong_t segmap_offset; /* pagesize byte offset into segmap */ 443 caddr_t base; /* base of segmap */ 444 ssize_t bytes; /* bytes to uiomove */ 445 struct vnode *vp; 446 int error; 447 long oresid = uio->uio_resid; 448 449 #if defined(lint) 450 tm = tm; 451 #endif 452 vp = TNTOV(tp); 453 454 TRACE_1(TR_FAC_TMPFS, TR_TMPFS_RWTMP_START, "tmp_rdtmp_start:vp %p", 455 vp); 456 457 ASSERT(RW_LOCK_HELD(&tp->tn_contents)); 458 459 if (MANDLOCK(vp, tp->tn_mode)) { 460 rw_exit(&tp->tn_contents); 461 /* 462 * tmp_getattr ends up being called by chklock 463 */ 464 error = chklock(vp, FREAD, uio->uio_loffset, uio->uio_resid, 465 uio->uio_fmode, ct); 466 rw_enter(&tp->tn_contents, RW_READER); 467 if (error != 0) { 468 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 469 "tmp_rdtmp_end:vp %p error %d", vp, error); 470 return (error); 471 } 472 } 473 ASSERT(tp->tn_type == VREG); 474 475 if (uio->uio_loffset >= MAXOFF_T) { 476 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 477 "tmp_rdtmp_end:vp %p error %d", vp, EINVAL); 478 return (0); 479 } 480 if (uio->uio_loffset < 0) 481 return (EINVAL); 482 if (uio->uio_resid == 0) { 483 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 484 "tmp_rdtmp_end:vp %p error %d", vp, 0); 485 return (0); 486 } 487 488 vp = TNTOV(tp); 489 490 do { 491 long diff; 492 long offset; 493 494 offset = uio->uio_offset; 495 pageoffset = offset & PAGEOFFSET; 496 bytes = MIN(PAGESIZE - pageoffset, uio->uio_resid); 497 498 diff = tp->tn_size - offset; 499 500 if (diff <= 0) { 501 error = 0; 502 goto out; 503 } 504 if (diff < bytes) 505 bytes = diff; 506 507 /* 508 * We have to drop the contents lock to allow the VM system 509 * to reacquire it in tmp_getpage() should the uiomove cause a 510 * pagefault. 511 */ 512 rw_exit(&tp->tn_contents); 513 514 if (vpm_enable) { 515 /* 516 * Copy data. 517 */ 518 error = vpm_data_copy(vp, offset, bytes, uio, 1, NULL, 519 0, S_READ); 520 } else { 521 segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; 522 base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, 523 bytes, 1, S_READ); 524 525 error = uiomove(base + segmap_offset + pageoffset, 526 (long)bytes, UIO_READ, uio); 527 } 528 529 if (error) { 530 if (vpm_enable) { 531 (void) vpm_sync_pages(vp, offset, PAGESIZE, 0); 532 } else { 533 (void) segmap_release(segkmap, base, 0); 534 } 535 } else { 536 if (vpm_enable) { 537 error = vpm_sync_pages(vp, offset, PAGESIZE, 538 0); 539 } else { 540 error = segmap_release(segkmap, base, 0); 541 } 542 } 543 544 /* 545 * Re-acquire contents lock. 546 */ 547 rw_enter(&tp->tn_contents, RW_READER); 548 549 } while (error == 0 && uio->uio_resid > 0); 550 551 out: 552 gethrestime(&tp->tn_atime); 553 554 /* 555 * If we've already done a partial read, terminate 556 * the read but return no error. 557 */ 558 if (oresid != uio->uio_resid) 559 error = 0; 560 561 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 562 "tmp_rdtmp_end:vp %x error %d", vp, error); 563 return (error); 564 } 565 566 /* ARGSUSED2 */ 567 static int 568 tmp_read(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cred, 569 struct caller_context *ct) 570 { 571 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 572 struct tmount *tm = (struct tmount *)VTOTM(vp); 573 int error; 574 575 /* 576 * We don't currently support reading non-regular files 577 */ 578 if (vp->v_type == VDIR) 579 return (EISDIR); 580 if (vp->v_type != VREG) 581 return (EINVAL); 582 /* 583 * tmp_rwlock should have already been called from layers above 584 */ 585 ASSERT(RW_READ_HELD(&tp->tn_rwlock)); 586 587 rw_enter(&tp->tn_contents, RW_READER); 588 589 error = rdtmp(tm, tp, uiop, ct); 590 591 rw_exit(&tp->tn_contents); 592 593 return (error); 594 } 595 596 static int 597 tmp_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 598 struct caller_context *ct) 599 { 600 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 601 struct tmount *tm = (struct tmount *)VTOTM(vp); 602 int error; 603 604 /* 605 * We don't currently support writing to non-regular files 606 */ 607 if (vp->v_type != VREG) 608 return (EINVAL); /* XXX EISDIR? */ 609 610 /* 611 * tmp_rwlock should have already been called from layers above 612 */ 613 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 614 615 rw_enter(&tp->tn_contents, RW_WRITER); 616 617 if (ioflag & FAPPEND) { 618 /* 619 * In append mode start at end of file. 620 */ 621 uiop->uio_loffset = tp->tn_size; 622 } 623 624 error = wrtmp(tm, tp, uiop, cred, ct); 625 626 rw_exit(&tp->tn_contents); 627 628 return (error); 629 } 630 631 /* ARGSUSED */ 632 static int 633 tmp_ioctl( 634 struct vnode *vp, 635 int com, 636 intptr_t data, 637 int flag, 638 struct cred *cred, 639 int *rvalp, 640 caller_context_t *ct) 641 { 642 return (ENOTTY); 643 } 644 645 /* ARGSUSED2 */ 646 static int 647 tmp_getattr( 648 struct vnode *vp, 649 struct vattr *vap, 650 int flags, 651 struct cred *cred, 652 caller_context_t *ct) 653 { 654 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 655 struct vnode *mvp; 656 struct vattr va; 657 int attrs = 1; 658 659 /* 660 * A special case to handle the root tnode on a diskless nfs 661 * client who may have had its uid and gid inherited 662 * from an nfs vnode with nobody ownership. Likely the 663 * root filesystem. After nfs is fully functional the uid/gid 664 * may be mapable so ask again. 665 * vfsp can't get unmounted because we hold vp. 666 */ 667 if (vp->v_flag & VROOT && 668 (mvp = vp->v_vfsp->vfs_vnodecovered) != NULL) { 669 mutex_enter(&tp->tn_tlock); 670 if (tp->tn_uid == UID_NOBODY || tp->tn_gid == GID_NOBODY) { 671 mutex_exit(&tp->tn_tlock); 672 bzero(&va, sizeof (struct vattr)); 673 va.va_mask = AT_UID|AT_GID; 674 attrs = VOP_GETATTR(mvp, &va, 0, cred, ct); 675 } else { 676 mutex_exit(&tp->tn_tlock); 677 } 678 } 679 mutex_enter(&tp->tn_tlock); 680 if (attrs == 0) { 681 tp->tn_uid = va.va_uid; 682 tp->tn_gid = va.va_gid; 683 } 684 vap->va_type = vp->v_type; 685 vap->va_mode = tp->tn_mode & MODEMASK; 686 vap->va_uid = tp->tn_uid; 687 vap->va_gid = tp->tn_gid; 688 vap->va_fsid = tp->tn_fsid; 689 vap->va_nodeid = (ino64_t)tp->tn_nodeid; 690 vap->va_nlink = tp->tn_nlink; 691 vap->va_size = (u_offset_t)tp->tn_size; 692 vap->va_atime = tp->tn_atime; 693 vap->va_mtime = tp->tn_mtime; 694 vap->va_ctime = tp->tn_ctime; 695 vap->va_blksize = PAGESIZE; 696 vap->va_rdev = tp->tn_rdev; 697 vap->va_seq = tp->tn_seq; 698 699 /* 700 * XXX Holes are not taken into account. We could take the time to 701 * run through the anon array looking for allocated slots... 702 */ 703 vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(vap->va_size))); 704 mutex_exit(&tp->tn_tlock); 705 return (0); 706 } 707 708 /*ARGSUSED4*/ 709 static int 710 tmp_setattr( 711 struct vnode *vp, 712 struct vattr *vap, 713 int flags, 714 struct cred *cred, 715 caller_context_t *ct) 716 { 717 struct tmount *tm = (struct tmount *)VTOTM(vp); 718 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 719 int error = 0; 720 struct vattr *get; 721 long mask; 722 723 /* 724 * Cannot set these attributes 725 */ 726 if ((vap->va_mask & AT_NOSET) || (vap->va_mask & AT_XVATTR)) 727 return (EINVAL); 728 729 mutex_enter(&tp->tn_tlock); 730 731 get = &tp->tn_attr; 732 /* 733 * Change file access modes. Must be owner or have sufficient 734 * privileges. 735 */ 736 error = secpolicy_vnode_setattr(cred, vp, vap, get, flags, tmp_taccess, 737 tp); 738 739 if (error) 740 goto out; 741 742 mask = vap->va_mask; 743 744 if (mask & AT_MODE) { 745 get->va_mode &= S_IFMT; 746 get->va_mode |= vap->va_mode & ~S_IFMT; 747 } 748 749 if (mask & AT_UID) 750 get->va_uid = vap->va_uid; 751 if (mask & AT_GID) 752 get->va_gid = vap->va_gid; 753 if (mask & AT_ATIME) 754 get->va_atime = vap->va_atime; 755 if (mask & AT_MTIME) 756 get->va_mtime = vap->va_mtime; 757 758 if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME)) 759 gethrestime(&tp->tn_ctime); 760 761 if (mask & AT_SIZE) { 762 ASSERT(vp->v_type != VDIR); 763 764 /* Don't support large files. */ 765 if (vap->va_size > MAXOFF_T) { 766 error = EFBIG; 767 goto out; 768 } 769 mutex_exit(&tp->tn_tlock); 770 771 rw_enter(&tp->tn_rwlock, RW_WRITER); 772 rw_enter(&tp->tn_contents, RW_WRITER); 773 error = tmpnode_trunc(tm, tp, (ulong_t)vap->va_size); 774 rw_exit(&tp->tn_contents); 775 rw_exit(&tp->tn_rwlock); 776 goto out1; 777 } 778 out: 779 mutex_exit(&tp->tn_tlock); 780 out1: 781 return (error); 782 } 783 784 /* ARGSUSED2 */ 785 static int 786 tmp_access( 787 struct vnode *vp, 788 int mode, 789 int flags, 790 struct cred *cred, 791 caller_context_t *ct) 792 { 793 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 794 int error; 795 796 mutex_enter(&tp->tn_tlock); 797 error = tmp_taccess(tp, mode, cred); 798 mutex_exit(&tp->tn_tlock); 799 return (error); 800 } 801 802 /* ARGSUSED3 */ 803 static int 804 tmp_lookup( 805 struct vnode *dvp, 806 char *nm, 807 struct vnode **vpp, 808 struct pathname *pnp, 809 int flags, 810 struct vnode *rdir, 811 struct cred *cred, 812 caller_context_t *ct, 813 int *direntflags, 814 pathname_t *realpnp) 815 { 816 struct tmpnode *tp = (struct tmpnode *)VTOTN(dvp); 817 struct tmpnode *ntp = NULL; 818 int error; 819 820 821 /* allow cd into @ dir */ 822 if (flags & LOOKUP_XATTR) { 823 struct tmpnode *xdp; 824 struct tmount *tm; 825 826 /* 827 * don't allow attributes if not mounted XATTR support 828 */ 829 if (!(dvp->v_vfsp->vfs_flag & VFS_XATTR)) 830 return (EINVAL); 831 832 if (tp->tn_flags & ISXATTR) 833 /* No attributes on attributes */ 834 return (EINVAL); 835 836 rw_enter(&tp->tn_rwlock, RW_WRITER); 837 if (tp->tn_xattrdp == NULL) { 838 if (!(flags & CREATE_XATTR_DIR)) { 839 rw_exit(&tp->tn_rwlock); 840 return (ENOENT); 841 } 842 843 /* 844 * No attribute directory exists for this 845 * node - create the attr dir as a side effect 846 * of this lookup. 847 */ 848 849 /* 850 * Make sure we have adequate permission... 851 */ 852 853 if ((error = tmp_taccess(tp, VWRITE, cred)) != 0) { 854 rw_exit(&tp->tn_rwlock); 855 return (error); 856 } 857 858 xdp = tmp_memalloc(sizeof (struct tmpnode), 859 TMP_MUSTHAVE); 860 tm = VTOTM(dvp); 861 tmpnode_init(tm, xdp, &tp->tn_attr, NULL); 862 /* 863 * Fix-up fields unique to attribute directories. 864 */ 865 xdp->tn_flags = ISXATTR; 866 xdp->tn_type = VDIR; 867 if (tp->tn_type == VDIR) { 868 xdp->tn_mode = tp->tn_attr.va_mode; 869 } else { 870 xdp->tn_mode = 0700; 871 if (tp->tn_attr.va_mode & 0040) 872 xdp->tn_mode |= 0750; 873 if (tp->tn_attr.va_mode & 0004) 874 xdp->tn_mode |= 0705; 875 } 876 xdp->tn_vnode->v_type = VDIR; 877 xdp->tn_vnode->v_flag |= V_XATTRDIR; 878 tdirinit(tp, xdp); 879 tp->tn_xattrdp = xdp; 880 } else { 881 VN_HOLD(tp->tn_xattrdp->tn_vnode); 882 } 883 *vpp = TNTOV(tp->tn_xattrdp); 884 rw_exit(&tp->tn_rwlock); 885 return (0); 886 } 887 888 /* 889 * Null component name is a synonym for directory being searched. 890 */ 891 if (*nm == '\0') { 892 VN_HOLD(dvp); 893 *vpp = dvp; 894 return (0); 895 } 896 ASSERT(tp); 897 898 error = tdirlookup(tp, nm, &ntp, cred); 899 900 if (error == 0) { 901 ASSERT(ntp); 902 *vpp = TNTOV(ntp); 903 /* 904 * If vnode is a device return special vnode instead 905 */ 906 if (IS_DEVVP(*vpp)) { 907 struct vnode *newvp; 908 909 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, 910 cred); 911 VN_RELE(*vpp); 912 *vpp = newvp; 913 } 914 } 915 TRACE_4(TR_FAC_TMPFS, TR_TMPFS_LOOKUP, 916 "tmpfs lookup:vp %p name %s vpp %p error %d", 917 dvp, nm, vpp, error); 918 return (error); 919 } 920 921 /*ARGSUSED7*/ 922 static int 923 tmp_create( 924 struct vnode *dvp, 925 char *nm, 926 struct vattr *vap, 927 enum vcexcl exclusive, 928 int mode, 929 struct vnode **vpp, 930 struct cred *cred, 931 int flag, 932 caller_context_t *ct, 933 vsecattr_t *vsecp) 934 { 935 struct tmpnode *parent; 936 struct tmount *tm; 937 struct tmpnode *self; 938 int error; 939 struct tmpnode *oldtp; 940 941 again: 942 parent = (struct tmpnode *)VTOTN(dvp); 943 tm = (struct tmount *)VTOTM(dvp); 944 self = NULL; 945 error = 0; 946 oldtp = NULL; 947 948 /* device files not allowed in ext. attr dirs */ 949 if ((parent->tn_flags & ISXATTR) && 950 (vap->va_type == VBLK || vap->va_type == VCHR || 951 vap->va_type == VFIFO || vap->va_type == VDOOR || 952 vap->va_type == VSOCK || vap->va_type == VPORT)) 953 return (EINVAL); 954 955 if (vap->va_type == VREG && (vap->va_mode & VSVTX)) { 956 /* Must be privileged to set sticky bit */ 957 if (secpolicy_vnode_stky_modify(cred)) 958 vap->va_mode &= ~VSVTX; 959 } else if (vap->va_type == VNON) { 960 return (EINVAL); 961 } 962 963 /* 964 * Null component name is a synonym for directory being searched. 965 */ 966 if (*nm == '\0') { 967 VN_HOLD(dvp); 968 oldtp = parent; 969 } else { 970 error = tdirlookup(parent, nm, &oldtp, cred); 971 } 972 973 if (error == 0) { /* name found */ 974 ASSERT(oldtp); 975 976 rw_enter(&oldtp->tn_rwlock, RW_WRITER); 977 978 /* 979 * if create/read-only an existing 980 * directory, allow it 981 */ 982 if (exclusive == EXCL) 983 error = EEXIST; 984 else if ((oldtp->tn_type == VDIR) && (mode & VWRITE)) 985 error = EISDIR; 986 else { 987 error = tmp_taccess(oldtp, mode, cred); 988 } 989 990 if (error) { 991 rw_exit(&oldtp->tn_rwlock); 992 tmpnode_rele(oldtp); 993 return (error); 994 } 995 *vpp = TNTOV(oldtp); 996 if ((*vpp)->v_type == VREG && (vap->va_mask & AT_SIZE) && 997 vap->va_size == 0) { 998 rw_enter(&oldtp->tn_contents, RW_WRITER); 999 (void) tmpnode_trunc(tm, oldtp, 0); 1000 rw_exit(&oldtp->tn_contents); 1001 } 1002 rw_exit(&oldtp->tn_rwlock); 1003 if (IS_DEVVP(*vpp)) { 1004 struct vnode *newvp; 1005 1006 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, 1007 cred); 1008 VN_RELE(*vpp); 1009 if (newvp == NULL) { 1010 return (ENOSYS); 1011 } 1012 *vpp = newvp; 1013 } 1014 1015 if (error == 0) { 1016 vnevent_create(*vpp, ct); 1017 } 1018 return (0); 1019 } 1020 1021 if (error != ENOENT) 1022 return (error); 1023 1024 rw_enter(&parent->tn_rwlock, RW_WRITER); 1025 error = tdirenter(tm, parent, nm, DE_CREATE, 1026 (struct tmpnode *)NULL, (struct tmpnode *)NULL, 1027 vap, &self, cred, ct); 1028 rw_exit(&parent->tn_rwlock); 1029 1030 if (error) { 1031 if (self) 1032 tmpnode_rele(self); 1033 1034 if (error == EEXIST) { 1035 /* 1036 * This means that the file was created sometime 1037 * after we checked and did not find it and when 1038 * we went to create it. 1039 * Since creat() is supposed to truncate a file 1040 * that already exits go back to the begining 1041 * of the function. This time we will find it 1042 * and go down the tmp_trunc() path 1043 */ 1044 goto again; 1045 } 1046 return (error); 1047 } 1048 1049 *vpp = TNTOV(self); 1050 1051 if (!error && IS_DEVVP(*vpp)) { 1052 struct vnode *newvp; 1053 1054 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cred); 1055 VN_RELE(*vpp); 1056 if (newvp == NULL) 1057 return (ENOSYS); 1058 *vpp = newvp; 1059 } 1060 TRACE_3(TR_FAC_TMPFS, TR_TMPFS_CREATE, 1061 "tmpfs create:dvp %p nm %s vpp %p", dvp, nm, vpp); 1062 return (0); 1063 } 1064 1065 /* ARGSUSED3 */ 1066 static int 1067 tmp_remove( 1068 struct vnode *dvp, 1069 char *nm, 1070 struct cred *cred, 1071 caller_context_t *ct, 1072 int flags) 1073 { 1074 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1075 int error; 1076 struct tmpnode *tp = NULL; 1077 1078 error = tdirlookup(parent, nm, &tp, cred); 1079 if (error) 1080 return (error); 1081 1082 ASSERT(tp); 1083 rw_enter(&parent->tn_rwlock, RW_WRITER); 1084 rw_enter(&tp->tn_rwlock, RW_WRITER); 1085 1086 if (tp->tn_type != VDIR || 1087 (error = secpolicy_fs_linkdir(cred, dvp->v_vfsp)) == 0) 1088 error = tdirdelete(parent, tp, nm, DR_REMOVE, cred); 1089 1090 rw_exit(&tp->tn_rwlock); 1091 rw_exit(&parent->tn_rwlock); 1092 vnevent_remove(TNTOV(tp), dvp, nm, ct); 1093 tmpnode_rele(tp); 1094 1095 TRACE_3(TR_FAC_TMPFS, TR_TMPFS_REMOVE, 1096 "tmpfs remove:dvp %p nm %s error %d", dvp, nm, error); 1097 return (error); 1098 } 1099 1100 /* ARGSUSED4 */ 1101 static int 1102 tmp_link( 1103 struct vnode *dvp, 1104 struct vnode *srcvp, 1105 char *tnm, 1106 struct cred *cred, 1107 caller_context_t *ct, 1108 int flags) 1109 { 1110 struct tmpnode *parent; 1111 struct tmpnode *from; 1112 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1113 int error; 1114 struct tmpnode *found = NULL; 1115 struct vnode *realvp; 1116 1117 if (VOP_REALVP(srcvp, &realvp, ct) == 0) 1118 srcvp = realvp; 1119 1120 parent = (struct tmpnode *)VTOTN(dvp); 1121 from = (struct tmpnode *)VTOTN(srcvp); 1122 1123 if ((srcvp->v_type == VDIR && 1124 secpolicy_fs_linkdir(cred, dvp->v_vfsp)) || 1125 (from->tn_uid != crgetuid(cred) && secpolicy_basic_link(cred))) 1126 return (EPERM); 1127 1128 /* 1129 * Make sure link for extended attributes is valid 1130 * We only support hard linking of xattr's in xattrdir to an xattrdir 1131 */ 1132 if ((from->tn_flags & ISXATTR) != (parent->tn_flags & ISXATTR)) 1133 return (EINVAL); 1134 1135 error = tdirlookup(parent, tnm, &found, cred); 1136 if (error == 0) { 1137 ASSERT(found); 1138 tmpnode_rele(found); 1139 return (EEXIST); 1140 } 1141 1142 if (error != ENOENT) 1143 return (error); 1144 1145 rw_enter(&parent->tn_rwlock, RW_WRITER); 1146 error = tdirenter(tm, parent, tnm, DE_LINK, (struct tmpnode *)NULL, 1147 from, NULL, (struct tmpnode **)NULL, cred, ct); 1148 rw_exit(&parent->tn_rwlock); 1149 if (error == 0) { 1150 vnevent_link(srcvp, ct); 1151 } 1152 return (error); 1153 } 1154 1155 /* ARGSUSED5 */ 1156 static int 1157 tmp_rename( 1158 struct vnode *odvp, /* source parent vnode */ 1159 char *onm, /* source name */ 1160 struct vnode *ndvp, /* destination parent vnode */ 1161 char *nnm, /* destination name */ 1162 struct cred *cred, 1163 caller_context_t *ct, 1164 int flags) 1165 { 1166 struct tmpnode *fromparent; 1167 struct tmpnode *toparent; 1168 struct tmpnode *fromtp = NULL; /* source tmpnode */ 1169 struct tmount *tm = (struct tmount *)VTOTM(odvp); 1170 int error; 1171 int samedir = 0; /* set if odvp == ndvp */ 1172 struct vnode *realvp; 1173 1174 if (VOP_REALVP(ndvp, &realvp, ct) == 0) 1175 ndvp = realvp; 1176 1177 fromparent = (struct tmpnode *)VTOTN(odvp); 1178 toparent = (struct tmpnode *)VTOTN(ndvp); 1179 1180 if ((fromparent->tn_flags & ISXATTR) != (toparent->tn_flags & ISXATTR)) 1181 return (EINVAL); 1182 1183 mutex_enter(&tm->tm_renamelck); 1184 1185 /* 1186 * Look up tmpnode of file we're supposed to rename. 1187 */ 1188 error = tdirlookup(fromparent, onm, &fromtp, cred); 1189 if (error) { 1190 mutex_exit(&tm->tm_renamelck); 1191 return (error); 1192 } 1193 1194 /* 1195 * Make sure we can delete the old (source) entry. This 1196 * requires write permission on the containing directory. If 1197 * that directory is "sticky" it requires further checks. 1198 */ 1199 if (((error = tmp_taccess(fromparent, VWRITE, cred)) != 0) || 1200 (error = tmp_sticky_remove_access(fromparent, fromtp, cred)) != 0) 1201 goto done; 1202 1203 /* 1204 * Check for renaming to or from '.' or '..' or that 1205 * fromtp == fromparent 1206 */ 1207 if ((onm[0] == '.' && 1208 (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || 1209 (nnm[0] == '.' && 1210 (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0'))) || 1211 (fromparent == fromtp)) { 1212 error = EINVAL; 1213 goto done; 1214 } 1215 1216 samedir = (fromparent == toparent); 1217 /* 1218 * Make sure we can search and rename into the new 1219 * (destination) directory. 1220 */ 1221 if (!samedir) { 1222 error = tmp_taccess(toparent, VEXEC|VWRITE, cred); 1223 if (error) 1224 goto done; 1225 } 1226 1227 /* 1228 * Link source to new target 1229 */ 1230 rw_enter(&toparent->tn_rwlock, RW_WRITER); 1231 error = tdirenter(tm, toparent, nnm, DE_RENAME, 1232 fromparent, fromtp, (struct vattr *)NULL, 1233 (struct tmpnode **)NULL, cred, ct); 1234 rw_exit(&toparent->tn_rwlock); 1235 1236 if (error) { 1237 /* 1238 * ESAME isn't really an error; it indicates that the 1239 * operation should not be done because the source and target 1240 * are the same file, but that no error should be reported. 1241 */ 1242 if (error == ESAME) 1243 error = 0; 1244 goto done; 1245 } 1246 vnevent_rename_src(TNTOV(fromtp), odvp, onm, ct); 1247 1248 /* 1249 * Notify the target directory if not same as 1250 * source directory. 1251 */ 1252 if (ndvp != odvp) { 1253 vnevent_rename_dest_dir(ndvp, ct); 1254 } 1255 1256 /* 1257 * Unlink from source. 1258 */ 1259 rw_enter(&fromparent->tn_rwlock, RW_WRITER); 1260 rw_enter(&fromtp->tn_rwlock, RW_WRITER); 1261 1262 error = tdirdelete(fromparent, fromtp, onm, DR_RENAME, cred); 1263 1264 /* 1265 * The following handles the case where our source tmpnode was 1266 * removed before we got to it. 1267 * 1268 * XXX We should also cleanup properly in the case where tdirdelete 1269 * fails for some other reason. Currently this case shouldn't happen. 1270 * (see 1184991). 1271 */ 1272 if (error == ENOENT) 1273 error = 0; 1274 1275 rw_exit(&fromtp->tn_rwlock); 1276 rw_exit(&fromparent->tn_rwlock); 1277 done: 1278 tmpnode_rele(fromtp); 1279 mutex_exit(&tm->tm_renamelck); 1280 1281 TRACE_5(TR_FAC_TMPFS, TR_TMPFS_RENAME, 1282 "tmpfs rename:ovp %p onm %s nvp %p nnm %s error %d", odvp, onm, 1283 ndvp, nnm, error); 1284 return (error); 1285 } 1286 1287 /* ARGSUSED5 */ 1288 static int 1289 tmp_mkdir( 1290 struct vnode *dvp, 1291 char *nm, 1292 struct vattr *va, 1293 struct vnode **vpp, 1294 struct cred *cred, 1295 caller_context_t *ct, 1296 int flags, 1297 vsecattr_t *vsecp) 1298 { 1299 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1300 struct tmpnode *self = NULL; 1301 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1302 int error; 1303 1304 /* no new dirs allowed in xattr dirs */ 1305 if (parent->tn_flags & ISXATTR) 1306 return (EINVAL); 1307 1308 /* 1309 * Might be dangling directory. Catch it here, 1310 * because a ENOENT return from tdirlookup() is 1311 * an "o.k. return". 1312 */ 1313 if (parent->tn_nlink == 0) 1314 return (ENOENT); 1315 1316 error = tdirlookup(parent, nm, &self, cred); 1317 if (error == 0) { 1318 ASSERT(self); 1319 tmpnode_rele(self); 1320 return (EEXIST); 1321 } 1322 if (error != ENOENT) 1323 return (error); 1324 1325 rw_enter(&parent->tn_rwlock, RW_WRITER); 1326 error = tdirenter(tm, parent, nm, DE_MKDIR, (struct tmpnode *)NULL, 1327 (struct tmpnode *)NULL, va, &self, cred, ct); 1328 if (error) { 1329 rw_exit(&parent->tn_rwlock); 1330 if (self) 1331 tmpnode_rele(self); 1332 return (error); 1333 } 1334 rw_exit(&parent->tn_rwlock); 1335 *vpp = TNTOV(self); 1336 return (0); 1337 } 1338 1339 /* ARGSUSED4 */ 1340 static int 1341 tmp_rmdir( 1342 struct vnode *dvp, 1343 char *nm, 1344 struct vnode *cdir, 1345 struct cred *cred, 1346 caller_context_t *ct, 1347 int flags) 1348 { 1349 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1350 struct tmpnode *self = NULL; 1351 struct vnode *vp; 1352 int error = 0; 1353 1354 /* 1355 * Return error when removing . and .. 1356 */ 1357 if (strcmp(nm, ".") == 0) 1358 return (EINVAL); 1359 if (strcmp(nm, "..") == 0) 1360 return (EEXIST); /* Should be ENOTEMPTY */ 1361 error = tdirlookup(parent, nm, &self, cred); 1362 if (error) 1363 return (error); 1364 1365 rw_enter(&parent->tn_rwlock, RW_WRITER); 1366 rw_enter(&self->tn_rwlock, RW_WRITER); 1367 1368 vp = TNTOV(self); 1369 if (vp == dvp || vp == cdir) { 1370 error = EINVAL; 1371 goto done1; 1372 } 1373 if (self->tn_type != VDIR) { 1374 error = ENOTDIR; 1375 goto done1; 1376 } 1377 1378 mutex_enter(&self->tn_tlock); 1379 if (self->tn_nlink > 2) { 1380 mutex_exit(&self->tn_tlock); 1381 error = EEXIST; 1382 goto done1; 1383 } 1384 mutex_exit(&self->tn_tlock); 1385 1386 if (vn_vfswlock(vp)) { 1387 error = EBUSY; 1388 goto done1; 1389 } 1390 if (vn_mountedvfs(vp) != NULL) { 1391 error = EBUSY; 1392 goto done; 1393 } 1394 1395 /* 1396 * Check for an empty directory 1397 * i.e. only includes entries for "." and ".." 1398 */ 1399 if (self->tn_dirents > 2) { 1400 error = EEXIST; /* SIGH should be ENOTEMPTY */ 1401 /* 1402 * Update atime because checking tn_dirents is logically 1403 * equivalent to reading the directory 1404 */ 1405 gethrestime(&self->tn_atime); 1406 goto done; 1407 } 1408 1409 error = tdirdelete(parent, self, nm, DR_RMDIR, cred); 1410 done: 1411 vn_vfsunlock(vp); 1412 done1: 1413 rw_exit(&self->tn_rwlock); 1414 rw_exit(&parent->tn_rwlock); 1415 vnevent_rmdir(TNTOV(self), dvp, nm, ct); 1416 tmpnode_rele(self); 1417 1418 return (error); 1419 } 1420 1421 /* ARGSUSED2 */ 1422 static int 1423 tmp_readdir( 1424 struct vnode *vp, 1425 struct uio *uiop, 1426 struct cred *cred, 1427 int *eofp, 1428 caller_context_t *ct, 1429 int flags) 1430 { 1431 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1432 struct tdirent *tdp; 1433 int error = 0; 1434 size_t namelen; 1435 struct dirent64 *dp; 1436 ulong_t offset; 1437 ulong_t total_bytes_wanted; 1438 long outcount = 0; 1439 long bufsize; 1440 int reclen; 1441 caddr_t outbuf; 1442 1443 if (uiop->uio_loffset >= MAXOFF_T) { 1444 if (eofp) 1445 *eofp = 1; 1446 return (0); 1447 } 1448 /* 1449 * assuming system call has already called tmp_rwlock 1450 */ 1451 ASSERT(RW_READ_HELD(&tp->tn_rwlock)); 1452 1453 if (uiop->uio_iovcnt != 1) 1454 return (EINVAL); 1455 1456 if (vp->v_type != VDIR) 1457 return (ENOTDIR); 1458 1459 /* 1460 * There's a window here where someone could have removed 1461 * all the entries in the directory after we put a hold on the 1462 * vnode but before we grabbed the rwlock. Just return. 1463 */ 1464 if (tp->tn_dir == NULL) { 1465 if (tp->tn_nlink) { 1466 panic("empty directory 0x%p", (void *)tp); 1467 /*NOTREACHED*/ 1468 } 1469 return (0); 1470 } 1471 1472 /* 1473 * Get space for multiple directory entries 1474 */ 1475 total_bytes_wanted = uiop->uio_iov->iov_len; 1476 bufsize = total_bytes_wanted + sizeof (struct dirent64); 1477 outbuf = kmem_alloc(bufsize, KM_SLEEP); 1478 1479 dp = (struct dirent64 *)outbuf; 1480 1481 1482 offset = 0; 1483 tdp = tp->tn_dir; 1484 while (tdp) { 1485 namelen = strlen(tdp->td_name); /* no +1 needed */ 1486 offset = tdp->td_offset; 1487 if (offset >= uiop->uio_offset) { 1488 reclen = (int)DIRENT64_RECLEN(namelen); 1489 if (outcount + reclen > total_bytes_wanted) { 1490 if (!outcount) 1491 /* 1492 * Buffer too small for any entries. 1493 */ 1494 error = EINVAL; 1495 break; 1496 } 1497 ASSERT(tdp->td_tmpnode != NULL); 1498 1499 /* use strncpy(9f) to zero out uninitialized bytes */ 1500 1501 (void) strncpy(dp->d_name, tdp->td_name, 1502 DIRENT64_NAMELEN(reclen)); 1503 dp->d_reclen = (ushort_t)reclen; 1504 dp->d_ino = (ino64_t)tdp->td_tmpnode->tn_nodeid; 1505 dp->d_off = (offset_t)tdp->td_offset + 1; 1506 dp = (struct dirent64 *) 1507 ((uintptr_t)dp + dp->d_reclen); 1508 outcount += reclen; 1509 ASSERT(outcount <= bufsize); 1510 } 1511 tdp = tdp->td_next; 1512 } 1513 1514 if (!error) 1515 error = uiomove(outbuf, outcount, UIO_READ, uiop); 1516 1517 if (!error) { 1518 /* If we reached the end of the list our offset */ 1519 /* should now be just past the end. */ 1520 if (!tdp) { 1521 offset += 1; 1522 if (eofp) 1523 *eofp = 1; 1524 } else if (eofp) 1525 *eofp = 0; 1526 uiop->uio_offset = offset; 1527 } 1528 gethrestime(&tp->tn_atime); 1529 kmem_free(outbuf, bufsize); 1530 return (error); 1531 } 1532 1533 /* ARGSUSED5 */ 1534 static int 1535 tmp_symlink( 1536 struct vnode *dvp, 1537 char *lnm, 1538 struct vattr *tva, 1539 char *tnm, 1540 struct cred *cred, 1541 caller_context_t *ct, 1542 int flags) 1543 { 1544 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1545 struct tmpnode *self = (struct tmpnode *)NULL; 1546 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1547 char *cp = NULL; 1548 int error; 1549 size_t len; 1550 1551 /* no symlinks allowed to files in xattr dirs */ 1552 if (parent->tn_flags & ISXATTR) 1553 return (EINVAL); 1554 1555 error = tdirlookup(parent, lnm, &self, cred); 1556 if (error == 0) { 1557 /* 1558 * The entry already exists 1559 */ 1560 tmpnode_rele(self); 1561 return (EEXIST); /* was 0 */ 1562 } 1563 1564 if (error != ENOENT) { 1565 if (self != NULL) 1566 tmpnode_rele(self); 1567 return (error); 1568 } 1569 1570 rw_enter(&parent->tn_rwlock, RW_WRITER); 1571 error = tdirenter(tm, parent, lnm, DE_CREATE, (struct tmpnode *)NULL, 1572 (struct tmpnode *)NULL, tva, &self, cred, ct); 1573 rw_exit(&parent->tn_rwlock); 1574 1575 if (error) { 1576 if (self) 1577 tmpnode_rele(self); 1578 return (error); 1579 } 1580 len = strlen(tnm) + 1; 1581 cp = tmp_memalloc(len, 0); 1582 if (cp == NULL) { 1583 tmpnode_rele(self); 1584 return (ENOSPC); 1585 } 1586 (void) strcpy(cp, tnm); 1587 1588 self->tn_symlink = cp; 1589 self->tn_size = len - 1; 1590 tmpnode_rele(self); 1591 return (error); 1592 } 1593 1594 /* ARGSUSED2 */ 1595 static int 1596 tmp_readlink( 1597 struct vnode *vp, 1598 struct uio *uiop, 1599 struct cred *cred, 1600 caller_context_t *ct) 1601 { 1602 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1603 int error = 0; 1604 1605 if (vp->v_type != VLNK) 1606 return (EINVAL); 1607 1608 rw_enter(&tp->tn_rwlock, RW_READER); 1609 rw_enter(&tp->tn_contents, RW_READER); 1610 error = uiomove(tp->tn_symlink, tp->tn_size, UIO_READ, uiop); 1611 gethrestime(&tp->tn_atime); 1612 rw_exit(&tp->tn_contents); 1613 rw_exit(&tp->tn_rwlock); 1614 return (error); 1615 } 1616 1617 /* ARGSUSED */ 1618 static int 1619 tmp_fsync( 1620 struct vnode *vp, 1621 int syncflag, 1622 struct cred *cred, 1623 caller_context_t *ct) 1624 { 1625 return (0); 1626 } 1627 1628 /* ARGSUSED */ 1629 static void 1630 tmp_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1631 { 1632 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1633 struct tmount *tm = (struct tmount *)VFSTOTM(vp->v_vfsp); 1634 1635 rw_enter(&tp->tn_rwlock, RW_WRITER); 1636 top: 1637 mutex_enter(&tp->tn_tlock); 1638 mutex_enter(&vp->v_lock); 1639 ASSERT(vp->v_count >= 1); 1640 1641 /* 1642 * If we don't have the last hold or the link count is non-zero, 1643 * there's little to do -- just drop our hold. 1644 */ 1645 if (vp->v_count > 1 || tp->tn_nlink != 0) { 1646 vp->v_count--; 1647 mutex_exit(&vp->v_lock); 1648 mutex_exit(&tp->tn_tlock); 1649 rw_exit(&tp->tn_rwlock); 1650 return; 1651 } 1652 1653 /* 1654 * We have the last hold *and* the link count is zero, so this 1655 * tmpnode is dead from the filesystem's viewpoint. However, 1656 * if the tmpnode has any pages associated with it (i.e. if it's 1657 * a normal file with non-zero size), the tmpnode can still be 1658 * discovered by pageout or fsflush via the page vnode pointers. 1659 * In this case we must drop all our locks, truncate the tmpnode, 1660 * and try the whole dance again. 1661 */ 1662 if (tp->tn_size != 0) { 1663 if (tp->tn_type == VREG) { 1664 mutex_exit(&vp->v_lock); 1665 mutex_exit(&tp->tn_tlock); 1666 rw_enter(&tp->tn_contents, RW_WRITER); 1667 (void) tmpnode_trunc(tm, tp, 0); 1668 rw_exit(&tp->tn_contents); 1669 ASSERT(tp->tn_size == 0); 1670 ASSERT(tp->tn_nblocks == 0); 1671 goto top; 1672 } 1673 if (tp->tn_type == VLNK) 1674 tmp_memfree(tp->tn_symlink, tp->tn_size + 1); 1675 } 1676 1677 /* 1678 * Remove normal file/dir's xattr dir and xattrs. 1679 */ 1680 if (tp->tn_xattrdp) { 1681 struct tmpnode *xtp = tp->tn_xattrdp; 1682 1683 ASSERT(xtp->tn_flags & ISXATTR); 1684 tmpnode_hold(xtp); 1685 rw_enter(&xtp->tn_rwlock, RW_WRITER); 1686 tdirtrunc(xtp); 1687 DECR_COUNT(&xtp->tn_nlink, &xtp->tn_tlock); 1688 tp->tn_xattrdp = NULL; 1689 rw_exit(&xtp->tn_rwlock); 1690 tmpnode_rele(xtp); 1691 } 1692 1693 mutex_exit(&vp->v_lock); 1694 mutex_exit(&tp->tn_tlock); 1695 /* Here's our chance to send invalid event while we're between locks */ 1696 vn_invalid(TNTOV(tp)); 1697 mutex_enter(&tm->tm_contents); 1698 if (tp->tn_forw == NULL) 1699 tm->tm_rootnode->tn_back = tp->tn_back; 1700 else 1701 tp->tn_forw->tn_back = tp->tn_back; 1702 tp->tn_back->tn_forw = tp->tn_forw; 1703 mutex_exit(&tm->tm_contents); 1704 rw_exit(&tp->tn_rwlock); 1705 rw_destroy(&tp->tn_rwlock); 1706 mutex_destroy(&tp->tn_tlock); 1707 vn_free(TNTOV(tp)); 1708 tmp_memfree(tp, sizeof (struct tmpnode)); 1709 } 1710 1711 /* ARGSUSED2 */ 1712 static int 1713 tmp_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1714 { 1715 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1716 struct tfid *tfid; 1717 1718 if (fidp->fid_len < (sizeof (struct tfid) - sizeof (ushort_t))) { 1719 fidp->fid_len = sizeof (struct tfid) - sizeof (ushort_t); 1720 return (ENOSPC); 1721 } 1722 1723 tfid = (struct tfid *)fidp; 1724 bzero(tfid, sizeof (struct tfid)); 1725 tfid->tfid_len = (int)sizeof (struct tfid) - sizeof (ushort_t); 1726 1727 tfid->tfid_ino = tp->tn_nodeid; 1728 tfid->tfid_gen = tp->tn_gen; 1729 1730 return (0); 1731 } 1732 1733 1734 /* 1735 * Return all the pages from [off..off+len] in given file 1736 */ 1737 /* ARGSUSED */ 1738 static int 1739 tmp_getpage( 1740 struct vnode *vp, 1741 offset_t off, 1742 size_t len, 1743 uint_t *protp, 1744 page_t *pl[], 1745 size_t plsz, 1746 struct seg *seg, 1747 caddr_t addr, 1748 enum seg_rw rw, 1749 struct cred *cr, 1750 caller_context_t *ct) 1751 { 1752 int err = 0; 1753 struct tmpnode *tp = VTOTN(vp); 1754 anoff_t toff = (anoff_t)off; 1755 size_t tlen = len; 1756 u_offset_t tmpoff; 1757 timestruc_t now; 1758 1759 rw_enter(&tp->tn_contents, RW_READER); 1760 1761 if (off + len > tp->tn_size + PAGEOFFSET) { 1762 err = EFAULT; 1763 goto out; 1764 } 1765 /* 1766 * Look for holes (no anon slot) in faulting range. If there are 1767 * holes we have to switch to a write lock and fill them in. Swap 1768 * space for holes was already reserved when the file was grown. 1769 */ 1770 tmpoff = toff; 1771 if (non_anon(tp->tn_anon, btop(off), &tmpoff, &tlen)) { 1772 if (!rw_tryupgrade(&tp->tn_contents)) { 1773 rw_exit(&tp->tn_contents); 1774 rw_enter(&tp->tn_contents, RW_WRITER); 1775 /* Size may have changed when lock was dropped */ 1776 if (off + len > tp->tn_size + PAGEOFFSET) { 1777 err = EFAULT; 1778 goto out; 1779 } 1780 } 1781 for (toff = (anoff_t)off; toff < (anoff_t)off + len; 1782 toff += PAGESIZE) { 1783 if (anon_get_ptr(tp->tn_anon, btop(toff)) == NULL) { 1784 /* XXX - may allocate mem w. write lock held */ 1785 (void) anon_set_ptr(tp->tn_anon, btop(toff), 1786 anon_alloc(vp, toff), ANON_SLEEP); 1787 tp->tn_nblocks++; 1788 } 1789 } 1790 rw_downgrade(&tp->tn_contents); 1791 } 1792 1793 1794 if (len <= PAGESIZE) 1795 err = tmp_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, 1796 seg, addr, rw, cr); 1797 else 1798 err = pvn_getpages(tmp_getapage, vp, (u_offset_t)off, len, 1799 protp, pl, plsz, seg, addr, rw, cr); 1800 1801 gethrestime(&now); 1802 tp->tn_atime = now; 1803 if (rw == S_WRITE) 1804 tp->tn_mtime = now; 1805 1806 out: 1807 rw_exit(&tp->tn_contents); 1808 return (err); 1809 } 1810 1811 /* 1812 * Called from pvn_getpages or swap_getpage to get a particular page. 1813 */ 1814 /*ARGSUSED*/ 1815 static int 1816 tmp_getapage( 1817 struct vnode *vp, 1818 u_offset_t off, 1819 size_t len, 1820 uint_t *protp, 1821 page_t *pl[], 1822 size_t plsz, 1823 struct seg *seg, 1824 caddr_t addr, 1825 enum seg_rw rw, 1826 struct cred *cr) 1827 { 1828 struct page *pp; 1829 int flags; 1830 int err = 0; 1831 struct vnode *pvp; 1832 u_offset_t poff; 1833 1834 if (protp != NULL) 1835 *protp = PROT_ALL; 1836 again: 1837 if (pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED)) { 1838 if (pl) { 1839 pl[0] = pp; 1840 pl[1] = NULL; 1841 } else { 1842 page_unlock(pp); 1843 } 1844 } else { 1845 pp = page_create_va(vp, off, PAGESIZE, 1846 PG_WAIT | PG_EXCL, seg, addr); 1847 /* 1848 * Someone raced in and created the page after we did the 1849 * lookup but before we did the create, so go back and 1850 * try to look it up again. 1851 */ 1852 if (pp == NULL) 1853 goto again; 1854 /* 1855 * Fill page from backing store, if any. If none, then 1856 * either this is a newly filled hole or page must have 1857 * been unmodified and freed so just zero it out. 1858 */ 1859 err = swap_getphysname(vp, off, &pvp, &poff); 1860 if (err) { 1861 panic("tmp_getapage: no anon slot vp %p " 1862 "off %llx pp %p\n", (void *)vp, off, (void *)pp); 1863 } 1864 if (pvp) { 1865 flags = (pl == NULL ? B_ASYNC|B_READ : B_READ); 1866 err = VOP_PAGEIO(pvp, pp, (u_offset_t)poff, PAGESIZE, 1867 flags, cr, NULL); 1868 if (flags & B_ASYNC) 1869 pp = NULL; 1870 } else if (rw != S_CREATE) { 1871 pagezero(pp, 0, PAGESIZE); 1872 } 1873 if (err && pp) 1874 pvn_read_done(pp, B_ERROR); 1875 if (err == 0) { 1876 if (pl) 1877 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); 1878 else 1879 pvn_io_done(pp); 1880 } 1881 } 1882 return (err); 1883 } 1884 1885 1886 /* 1887 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}. 1888 * If len == 0, do from off to EOF. 1889 */ 1890 static int tmp_nopage = 0; /* Don't do tmp_putpage's if set */ 1891 1892 /* ARGSUSED */ 1893 int 1894 tmp_putpage( 1895 register struct vnode *vp, 1896 offset_t off, 1897 size_t len, 1898 int flags, 1899 struct cred *cr, 1900 caller_context_t *ct) 1901 { 1902 register page_t *pp; 1903 u_offset_t io_off; 1904 size_t io_len = 0; 1905 int err = 0; 1906 struct tmpnode *tp = VTOTN(vp); 1907 int dolock; 1908 1909 if (tmp_nopage) 1910 return (0); 1911 1912 ASSERT(vp->v_count != 0); 1913 1914 if (vp->v_flag & VNOMAP) 1915 return (ENOSYS); 1916 1917 /* 1918 * This being tmpfs, we don't ever do i/o unless we really 1919 * have to (when we're low on memory and pageout calls us 1920 * with B_ASYNC | B_FREE or the user explicitly asks for it with 1921 * B_DONTNEED). 1922 * XXX to approximately track the mod time like ufs we should 1923 * update the times here. The problem is, once someone does a 1924 * store we never clear the mod bit and do i/o, thus fsflush 1925 * will keep calling us every 30 seconds to do the i/o and we'll 1926 * continually update the mod time. At least we update the mod 1927 * time on the first store because this results in a call to getpage. 1928 */ 1929 if (flags != (B_ASYNC | B_FREE) && (flags & B_INVAL) == 0 && 1930 (flags & B_DONTNEED) == 0) 1931 return (0); 1932 /* 1933 * If this thread owns the lock, i.e., this thread grabbed it 1934 * as writer somewhere above, then we don't need to grab the 1935 * lock as reader in this routine. 1936 */ 1937 dolock = (rw_owner(&tp->tn_contents) != curthread); 1938 1939 /* 1940 * If this is pageout don't block on the lock as you could deadlock 1941 * when freemem == 0 (another thread has the read lock and is blocked 1942 * creating a page, and a third thread is waiting to get the writers 1943 * lock - waiting writers priority blocks us from getting the read 1944 * lock). Of course, if the only freeable pages are on this tmpnode 1945 * we're hosed anyways. A better solution might be a new lock type. 1946 * Note: ufs has the same problem. 1947 */ 1948 if (curproc == proc_pageout) { 1949 if (!rw_tryenter(&tp->tn_contents, RW_READER)) 1950 return (ENOMEM); 1951 } else if (dolock) 1952 rw_enter(&tp->tn_contents, RW_READER); 1953 1954 if (!vn_has_cached_data(vp)) 1955 goto out; 1956 1957 if (len == 0) { 1958 if (curproc == proc_pageout) { 1959 panic("tmp: pageout can't block"); 1960 /*NOTREACHED*/ 1961 } 1962 1963 /* Search the entire vp list for pages >= off. */ 1964 err = pvn_vplist_dirty(vp, (u_offset_t)off, tmp_putapage, 1965 flags, cr); 1966 } else { 1967 u_offset_t eoff; 1968 1969 /* 1970 * Loop over all offsets in the range [off...off + len] 1971 * looking for pages to deal with. 1972 */ 1973 eoff = MIN(off + len, tp->tn_size); 1974 for (io_off = off; io_off < eoff; io_off += io_len) { 1975 /* 1976 * If we are not invalidating, synchronously 1977 * freeing or writing pages use the routine 1978 * page_lookup_nowait() to prevent reclaiming 1979 * them from the free list. 1980 */ 1981 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 1982 pp = page_lookup(vp, io_off, 1983 (flags & (B_INVAL | B_FREE)) ? 1984 SE_EXCL : SE_SHARED); 1985 } else { 1986 pp = page_lookup_nowait(vp, io_off, 1987 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 1988 } 1989 1990 if (pp == NULL || pvn_getdirty(pp, flags) == 0) 1991 io_len = PAGESIZE; 1992 else { 1993 err = tmp_putapage(vp, pp, &io_off, &io_len, 1994 flags, cr); 1995 if (err != 0) 1996 break; 1997 } 1998 } 1999 } 2000 /* If invalidating, verify all pages on vnode list are gone. */ 2001 if (err == 0 && off == 0 && len == 0 && 2002 (flags & B_INVAL) && vn_has_cached_data(vp)) { 2003 panic("tmp_putpage: B_INVAL, pages not gone"); 2004 /*NOTREACHED*/ 2005 } 2006 out: 2007 if ((curproc == proc_pageout) || dolock) 2008 rw_exit(&tp->tn_contents); 2009 /* 2010 * Only reason putapage is going to give us SE_NOSWAP as error 2011 * is when we ask a page to be written to physical backing store 2012 * and there is none. Ignore this because we might be dealing 2013 * with a swap page which does not have any backing store 2014 * on disk. In any other case we won't get this error over here. 2015 */ 2016 if (err == SE_NOSWAP) 2017 err = 0; 2018 return (err); 2019 } 2020 2021 long tmp_putpagecnt, tmp_pagespushed; 2022 2023 /* 2024 * Write out a single page. 2025 * For tmpfs this means choose a physical swap slot and write the page 2026 * out using VOP_PAGEIO. For performance, we attempt to kluster; i.e., 2027 * we try to find a bunch of other dirty pages adjacent in the file 2028 * and a bunch of contiguous swap slots, and then write all the pages 2029 * out in a single i/o. 2030 */ 2031 /*ARGSUSED*/ 2032 static int 2033 tmp_putapage( 2034 struct vnode *vp, 2035 page_t *pp, 2036 u_offset_t *offp, 2037 size_t *lenp, 2038 int flags, 2039 struct cred *cr) 2040 { 2041 int err; 2042 ulong_t klstart, kllen; 2043 page_t *pplist, *npplist; 2044 extern int klustsize; 2045 long tmp_klustsize; 2046 struct tmpnode *tp; 2047 size_t pp_off, pp_len; 2048 u_offset_t io_off; 2049 size_t io_len; 2050 struct vnode *pvp; 2051 u_offset_t pstart; 2052 u_offset_t offset; 2053 u_offset_t tmpoff; 2054 2055 ASSERT(PAGE_LOCKED(pp)); 2056 2057 /* Kluster in tmp_klustsize chunks */ 2058 tp = VTOTN(vp); 2059 tmp_klustsize = klustsize; 2060 offset = pp->p_offset; 2061 klstart = (offset / tmp_klustsize) * tmp_klustsize; 2062 kllen = MIN(tmp_klustsize, tp->tn_size - klstart); 2063 2064 /* Get a kluster of pages */ 2065 pplist = 2066 pvn_write_kluster(vp, pp, &tmpoff, &pp_len, klstart, kllen, flags); 2067 2068 pp_off = (size_t)tmpoff; 2069 2070 /* 2071 * Get a cluster of physical offsets for the pages; the amount we 2072 * get may be some subrange of what we ask for (io_off, io_len). 2073 */ 2074 io_off = pp_off; 2075 io_len = pp_len; 2076 err = swap_newphysname(vp, offset, &io_off, &io_len, &pvp, &pstart); 2077 ASSERT(err != SE_NOANON); /* anon slot must have been filled */ 2078 if (err) { 2079 pvn_write_done(pplist, B_ERROR | B_WRITE | flags); 2080 /* 2081 * If this routine is called as a result of segvn_sync 2082 * operation and we have no physical swap then we can get an 2083 * error here. In such case we would return SE_NOSWAP as error. 2084 * At this point, we expect only SE_NOSWAP. 2085 */ 2086 ASSERT(err == SE_NOSWAP); 2087 if (flags & B_INVAL) 2088 err = ENOMEM; 2089 goto out; 2090 } 2091 ASSERT(pp_off <= io_off && io_off + io_len <= pp_off + pp_len); 2092 ASSERT(io_off <= offset && offset < io_off + io_len); 2093 2094 /* Toss pages at front/rear that we couldn't get physical backing for */ 2095 if (io_off != pp_off) { 2096 npplist = NULL; 2097 page_list_break(&pplist, &npplist, btop(io_off - pp_off)); 2098 ASSERT(pplist->p_offset == pp_off); 2099 ASSERT(pplist->p_prev->p_offset == io_off - PAGESIZE); 2100 pvn_write_done(pplist, B_ERROR | B_WRITE | flags); 2101 pplist = npplist; 2102 } 2103 if (io_off + io_len < pp_off + pp_len) { 2104 npplist = NULL; 2105 page_list_break(&pplist, &npplist, btop(io_len)); 2106 ASSERT(npplist->p_offset == io_off + io_len); 2107 ASSERT(npplist->p_prev->p_offset == pp_off + pp_len - PAGESIZE); 2108 pvn_write_done(npplist, B_ERROR | B_WRITE | flags); 2109 } 2110 2111 ASSERT(pplist->p_offset == io_off); 2112 ASSERT(pplist->p_prev->p_offset == io_off + io_len - PAGESIZE); 2113 ASSERT(btopr(io_len) <= btopr(kllen)); 2114 2115 /* Do i/o on the remaining kluster */ 2116 err = VOP_PAGEIO(pvp, pplist, (u_offset_t)pstart, io_len, 2117 B_WRITE | flags, cr, NULL); 2118 2119 if ((flags & B_ASYNC) == 0) { 2120 pvn_write_done(pplist, ((err) ? B_ERROR : 0) | B_WRITE | flags); 2121 } 2122 out: 2123 if (!err) { 2124 if (offp) 2125 *offp = io_off; 2126 if (lenp) 2127 *lenp = io_len; 2128 tmp_putpagecnt++; 2129 tmp_pagespushed += btop(io_len); 2130 } 2131 if (err && err != ENOMEM && err != SE_NOSWAP) 2132 cmn_err(CE_WARN, "tmp_putapage: err %d\n", err); 2133 return (err); 2134 } 2135 2136 /* ARGSUSED */ 2137 static int 2138 tmp_map( 2139 struct vnode *vp, 2140 offset_t off, 2141 struct as *as, 2142 caddr_t *addrp, 2143 size_t len, 2144 uchar_t prot, 2145 uchar_t maxprot, 2146 uint_t flags, 2147 struct cred *cred, 2148 caller_context_t *ct) 2149 { 2150 struct segvn_crargs vn_a; 2151 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 2152 int error; 2153 2154 #ifdef _ILP32 2155 if (len > MAXOFF_T) 2156 return (ENOMEM); 2157 #endif 2158 2159 if (vp->v_flag & VNOMAP) 2160 return (ENOSYS); 2161 2162 if (off < 0 || (offset_t)(off + len) < 0 || 2163 off > MAXOFF_T || (off + len) > MAXOFF_T) 2164 return (ENXIO); 2165 2166 if (vp->v_type != VREG) 2167 return (ENODEV); 2168 2169 /* 2170 * Don't allow mapping to locked file 2171 */ 2172 if (vn_has_mandatory_locks(vp, tp->tn_mode)) { 2173 return (EAGAIN); 2174 } 2175 2176 as_rangelock(as); 2177 if ((flags & MAP_FIXED) == 0) { 2178 map_addr(addrp, len, (offset_t)off, 1, flags); 2179 if (*addrp == NULL) { 2180 as_rangeunlock(as); 2181 return (ENOMEM); 2182 } 2183 } else { 2184 /* 2185 * User specified address - blow away any previous mappings 2186 */ 2187 (void) as_unmap(as, *addrp, len); 2188 } 2189 2190 vn_a.vp = vp; 2191 vn_a.offset = (u_offset_t)off; 2192 vn_a.type = flags & MAP_TYPE; 2193 vn_a.prot = prot; 2194 vn_a.maxprot = maxprot; 2195 vn_a.flags = flags & ~MAP_TYPE; 2196 vn_a.cred = cred; 2197 vn_a.amp = NULL; 2198 vn_a.szc = 0; 2199 vn_a.lgrp_mem_policy_flags = 0; 2200 2201 error = as_map(as, *addrp, len, segvn_create, &vn_a); 2202 as_rangeunlock(as); 2203 return (error); 2204 } 2205 2206 /* 2207 * tmp_addmap and tmp_delmap can't be called since the vp 2208 * maintained in the segvn mapping is NULL. 2209 */ 2210 /* ARGSUSED */ 2211 static int 2212 tmp_addmap( 2213 struct vnode *vp, 2214 offset_t off, 2215 struct as *as, 2216 caddr_t addr, 2217 size_t len, 2218 uchar_t prot, 2219 uchar_t maxprot, 2220 uint_t flags, 2221 struct cred *cred, 2222 caller_context_t *ct) 2223 { 2224 return (0); 2225 } 2226 2227 /* ARGSUSED */ 2228 static int 2229 tmp_delmap( 2230 struct vnode *vp, 2231 offset_t off, 2232 struct as *as, 2233 caddr_t addr, 2234 size_t len, 2235 uint_t prot, 2236 uint_t maxprot, 2237 uint_t flags, 2238 struct cred *cred, 2239 caller_context_t *ct) 2240 { 2241 return (0); 2242 } 2243 2244 static int 2245 tmp_freesp(struct vnode *vp, struct flock64 *lp, int flag) 2246 { 2247 register int i; 2248 register struct tmpnode *tp = VTOTN(vp); 2249 int error; 2250 2251 ASSERT(vp->v_type == VREG); 2252 ASSERT(lp->l_start >= 0); 2253 2254 if (lp->l_len != 0) 2255 return (EINVAL); 2256 2257 rw_enter(&tp->tn_rwlock, RW_WRITER); 2258 if (tp->tn_size == lp->l_start) { 2259 rw_exit(&tp->tn_rwlock); 2260 return (0); 2261 } 2262 2263 /* 2264 * Check for any mandatory locks on the range 2265 */ 2266 if (MANDLOCK(vp, tp->tn_mode)) { 2267 long save_start; 2268 2269 save_start = lp->l_start; 2270 2271 if (tp->tn_size < lp->l_start) { 2272 /* 2273 * "Truncate up" case: need to make sure there 2274 * is no lock beyond current end-of-file. To 2275 * do so, we need to set l_start to the size 2276 * of the file temporarily. 2277 */ 2278 lp->l_start = tp->tn_size; 2279 } 2280 lp->l_type = F_WRLCK; 2281 lp->l_sysid = 0; 2282 lp->l_pid = ttoproc(curthread)->p_pid; 2283 i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK; 2284 if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 || 2285 lp->l_type != F_UNLCK) { 2286 rw_exit(&tp->tn_rwlock); 2287 return (i ? i : EAGAIN); 2288 } 2289 2290 lp->l_start = save_start; 2291 } 2292 VFSTOTM(vp->v_vfsp); 2293 2294 rw_enter(&tp->tn_contents, RW_WRITER); 2295 error = tmpnode_trunc((struct tmount *)VFSTOTM(vp->v_vfsp), 2296 tp, (ulong_t)lp->l_start); 2297 rw_exit(&tp->tn_contents); 2298 rw_exit(&tp->tn_rwlock); 2299 return (error); 2300 } 2301 2302 /* ARGSUSED */ 2303 static int 2304 tmp_space( 2305 struct vnode *vp, 2306 int cmd, 2307 struct flock64 *bfp, 2308 int flag, 2309 offset_t offset, 2310 cred_t *cred, 2311 caller_context_t *ct) 2312 { 2313 int error; 2314 2315 if (cmd != F_FREESP) 2316 return (EINVAL); 2317 if ((error = convoff(vp, bfp, 0, (offset_t)offset)) == 0) { 2318 if ((bfp->l_start > MAXOFF_T) || (bfp->l_len > MAXOFF_T)) 2319 return (EFBIG); 2320 error = tmp_freesp(vp, bfp, flag); 2321 } 2322 return (error); 2323 } 2324 2325 /* ARGSUSED */ 2326 static int 2327 tmp_seek( 2328 struct vnode *vp, 2329 offset_t ooff, 2330 offset_t *noffp, 2331 caller_context_t *ct) 2332 { 2333 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 2334 } 2335 2336 /* ARGSUSED2 */ 2337 static int 2338 tmp_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 2339 { 2340 struct tmpnode *tp = VTOTN(vp); 2341 2342 if (write_lock) { 2343 rw_enter(&tp->tn_rwlock, RW_WRITER); 2344 } else { 2345 rw_enter(&tp->tn_rwlock, RW_READER); 2346 } 2347 return (write_lock); 2348 } 2349 2350 /* ARGSUSED1 */ 2351 static void 2352 tmp_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 2353 { 2354 struct tmpnode *tp = VTOTN(vp); 2355 2356 rw_exit(&tp->tn_rwlock); 2357 } 2358 2359 static int 2360 tmp_pathconf( 2361 struct vnode *vp, 2362 int cmd, 2363 ulong_t *valp, 2364 cred_t *cr, 2365 caller_context_t *ct) 2366 { 2367 struct tmpnode *tp = NULL; 2368 int error; 2369 2370 switch (cmd) { 2371 case _PC_XATTR_EXISTS: 2372 if (vp->v_vfsp->vfs_flag & VFS_XATTR) { 2373 *valp = 0; /* assume no attributes */ 2374 error = 0; /* okay to ask */ 2375 tp = VTOTN(vp); 2376 rw_enter(&tp->tn_rwlock, RW_READER); 2377 if (tp->tn_xattrdp) { 2378 rw_enter(&tp->tn_xattrdp->tn_rwlock, RW_READER); 2379 /* do not count "." and ".." */ 2380 if (tp->tn_xattrdp->tn_dirents > 2) 2381 *valp = 1; 2382 rw_exit(&tp->tn_xattrdp->tn_rwlock); 2383 } 2384 rw_exit(&tp->tn_rwlock); 2385 } else { 2386 error = EINVAL; 2387 } 2388 break; 2389 case _PC_SATTR_ENABLED: 2390 case _PC_SATTR_EXISTS: 2391 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) && 2392 (vp->v_type == VREG || vp->v_type == VDIR); 2393 error = 0; 2394 break; 2395 default: 2396 error = fs_pathconf(vp, cmd, valp, cr, ct); 2397 } 2398 return (error); 2399 } 2400 2401 2402 struct vnodeops *tmp_vnodeops; 2403 2404 const fs_operation_def_t tmp_vnodeops_template[] = { 2405 VOPNAME_OPEN, { .vop_open = tmp_open }, 2406 VOPNAME_CLOSE, { .vop_close = tmp_close }, 2407 VOPNAME_READ, { .vop_read = tmp_read }, 2408 VOPNAME_WRITE, { .vop_write = tmp_write }, 2409 VOPNAME_IOCTL, { .vop_ioctl = tmp_ioctl }, 2410 VOPNAME_GETATTR, { .vop_getattr = tmp_getattr }, 2411 VOPNAME_SETATTR, { .vop_setattr = tmp_setattr }, 2412 VOPNAME_ACCESS, { .vop_access = tmp_access }, 2413 VOPNAME_LOOKUP, { .vop_lookup = tmp_lookup }, 2414 VOPNAME_CREATE, { .vop_create = tmp_create }, 2415 VOPNAME_REMOVE, { .vop_remove = tmp_remove }, 2416 VOPNAME_LINK, { .vop_link = tmp_link }, 2417 VOPNAME_RENAME, { .vop_rename = tmp_rename }, 2418 VOPNAME_MKDIR, { .vop_mkdir = tmp_mkdir }, 2419 VOPNAME_RMDIR, { .vop_rmdir = tmp_rmdir }, 2420 VOPNAME_READDIR, { .vop_readdir = tmp_readdir }, 2421 VOPNAME_SYMLINK, { .vop_symlink = tmp_symlink }, 2422 VOPNAME_READLINK, { .vop_readlink = tmp_readlink }, 2423 VOPNAME_FSYNC, { .vop_fsync = tmp_fsync }, 2424 VOPNAME_INACTIVE, { .vop_inactive = tmp_inactive }, 2425 VOPNAME_FID, { .vop_fid = tmp_fid }, 2426 VOPNAME_RWLOCK, { .vop_rwlock = tmp_rwlock }, 2427 VOPNAME_RWUNLOCK, { .vop_rwunlock = tmp_rwunlock }, 2428 VOPNAME_SEEK, { .vop_seek = tmp_seek }, 2429 VOPNAME_SPACE, { .vop_space = tmp_space }, 2430 VOPNAME_GETPAGE, { .vop_getpage = tmp_getpage }, 2431 VOPNAME_PUTPAGE, { .vop_putpage = tmp_putpage }, 2432 VOPNAME_MAP, { .vop_map = tmp_map }, 2433 VOPNAME_ADDMAP, { .vop_addmap = tmp_addmap }, 2434 VOPNAME_DELMAP, { .vop_delmap = tmp_delmap }, 2435 VOPNAME_PATHCONF, { .vop_pathconf = tmp_pathconf }, 2436 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 2437 NULL, NULL 2438 }; 2439