1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/t_lock.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/user.h> 35 #include <sys/time.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/file.h> 39 #include <sys/fcntl.h> 40 #include <sys/flock.h> 41 #include <sys/kmem.h> 42 #include <sys/uio.h> 43 #include <sys/errno.h> 44 #include <sys/stat.h> 45 #include <sys/cred.h> 46 #include <sys/dirent.h> 47 #include <sys/pathname.h> 48 #include <sys/vmsystm.h> 49 #include <sys/fs/tmp.h> 50 #include <sys/fs/tmpnode.h> 51 #include <sys/mman.h> 52 #include <vm/hat.h> 53 #include <vm/seg_vn.h> 54 #include <vm/seg_map.h> 55 #include <vm/seg.h> 56 #include <vm/anon.h> 57 #include <vm/as.h> 58 #include <vm/page.h> 59 #include <vm/pvn.h> 60 #include <sys/cmn_err.h> 61 #include <sys/debug.h> 62 #include <sys/swap.h> 63 #include <sys/buf.h> 64 #include <sys/vm.h> 65 #include <sys/vtrace.h> 66 #include <sys/policy.h> 67 #include <fs/fs_subr.h> 68 69 static int tmp_getapage(struct vnode *, u_offset_t, size_t, uint_t *, 70 page_t **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *); 71 static int tmp_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, 72 int, struct cred *); 73 74 /* ARGSUSED1 */ 75 static int 76 tmp_open(struct vnode **vpp, int flag, struct cred *cred) 77 { 78 /* 79 * swapon to a tmpfs file is not supported so access 80 * is denied on open if VISSWAP is set. 81 */ 82 if ((*vpp)->v_flag & VISSWAP) 83 return (EINVAL); 84 return (0); 85 } 86 87 /* ARGSUSED1 */ 88 static int 89 tmp_close(struct vnode *vp, int flag, int count, 90 offset_t offset, struct cred *cred) 91 { 92 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 93 cleanshares(vp, ttoproc(curthread)->p_pid); 94 return (0); 95 } 96 97 /* 98 * wrtmp does the real work of write requests for tmpfs. 99 */ 100 static int 101 wrtmp( 102 struct tmount *tm, 103 struct tmpnode *tp, 104 struct uio *uio, 105 struct cred *cr, 106 struct caller_context *ct) 107 { 108 pgcnt_t pageoffset; /* offset in pages */ 109 ulong_t segmap_offset; /* pagesize byte offset into segmap */ 110 caddr_t base; /* base of segmap */ 111 ssize_t bytes; /* bytes to uiomove */ 112 pfn_t pagenumber; /* offset in pages into tmp file */ 113 struct vnode *vp; 114 int error = 0; 115 int pagecreate; /* == 1 if we allocated a page */ 116 int newpage; 117 rlim64_t limit = uio->uio_llimit; 118 long oresid = uio->uio_resid; 119 timestruc_t now; 120 121 /* 122 * tp->tn_size is incremented before the uiomove 123 * is done on a write. If the move fails (bad user 124 * address) reset tp->tn_size. 125 * The better way would be to increment tp->tn_size 126 * only if the uiomove succeeds. 127 */ 128 long tn_size_changed = 0; 129 long old_tn_size; 130 131 vp = TNTOV(tp); 132 ASSERT(vp->v_type == VREG); 133 134 TRACE_1(TR_FAC_TMPFS, TR_TMPFS_RWTMP_START, 135 "tmp_wrtmp_start:vp %p", vp); 136 137 ASSERT(RW_WRITE_HELD(&tp->tn_contents)); 138 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 139 140 if (MANDLOCK(vp, tp->tn_mode)) { 141 rw_exit(&tp->tn_contents); 142 /* 143 * tmp_getattr ends up being called by chklock 144 */ 145 error = chklock(vp, FWRITE, 146 uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); 147 rw_enter(&tp->tn_contents, RW_WRITER); 148 if (error != 0) { 149 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 150 "tmp_wrtmp_end:vp %p error %d", vp, error); 151 return (error); 152 } 153 } 154 155 if (uio->uio_loffset < 0) 156 return (EINVAL); 157 158 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 159 limit = MAXOFFSET_T; 160 161 if (uio->uio_loffset >= limit) { 162 proc_t *p = ttoproc(curthread); 163 164 mutex_enter(&p->p_lock); 165 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 166 p, RCA_UNSAFE_SIGINFO); 167 mutex_exit(&p->p_lock); 168 return (EFBIG); 169 } 170 171 if (uio->uio_loffset >= MAXOFF_T) { 172 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 173 "tmp_wrtmp_end:vp %p error %d", vp, EINVAL); 174 return (EFBIG); 175 } 176 177 if (uio->uio_resid == 0) { 178 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 179 "tmp_wrtmp_end:vp %p error %d", vp, 0); 180 return (0); 181 } 182 183 if (limit > MAXOFF_T) 184 limit = MAXOFF_T; 185 186 do { 187 long offset; 188 long delta; 189 190 offset = (long)uio->uio_offset; 191 pageoffset = offset & PAGEOFFSET; 192 /* 193 * A maximum of PAGESIZE bytes of data is transferred 194 * each pass through this loop 195 */ 196 bytes = MIN(PAGESIZE - pageoffset, uio->uio_resid); 197 198 if (offset + bytes >= limit) { 199 if (offset >= limit) { 200 error = EFBIG; 201 goto out; 202 } 203 bytes = limit - offset; 204 } 205 pagenumber = btop(offset); 206 207 /* 208 * delta is the amount of anonymous memory 209 * to reserve for the file. 210 * We always reserve in pagesize increments so 211 * unless we're extending the file into a new page, 212 * we don't need to call tmp_resv. 213 */ 214 delta = offset + bytes - 215 P2ROUNDUP_TYPED(tp->tn_size, PAGESIZE, u_offset_t); 216 if (delta > 0) { 217 pagecreate = 1; 218 if (tmp_resv(tm, tp, delta, pagecreate)) { 219 cmn_err(CE_WARN, 220 "%s: File system full, swap space limit exceeded", 221 tm->tm_mntpath); 222 error = ENOSPC; 223 break; 224 } 225 tmpnode_growmap(tp, (ulong_t)offset + bytes); 226 } 227 /* grow the file to the new length */ 228 if (offset + bytes > tp->tn_size) { 229 tn_size_changed = 1; 230 old_tn_size = tp->tn_size; 231 tp->tn_size = offset + bytes; 232 } 233 if (bytes == PAGESIZE) { 234 /* 235 * Writing whole page so reading from disk 236 * is a waste 237 */ 238 pagecreate = 1; 239 } else { 240 pagecreate = 0; 241 } 242 /* 243 * If writing past EOF or filling in a hole 244 * we need to allocate an anon slot. 245 */ 246 if (anon_get_ptr(tp->tn_anon, pagenumber) == NULL) { 247 (void) anon_set_ptr(tp->tn_anon, pagenumber, 248 anon_alloc(vp, ptob(pagenumber)), ANON_SLEEP); 249 pagecreate = 1; 250 tp->tn_nblocks++; 251 } 252 253 /* 254 * We have to drop the contents lock to prevent the VM 255 * system from trying to reaquire it in tmp_getpage() 256 * should the uiomove cause a pagefault. If we're doing 257 * a pagecreate segmap creates the page without calling 258 * the filesystem so we need to hold onto the lock until 259 * the page is created. 260 */ 261 if (!pagecreate) 262 rw_exit(&tp->tn_contents); 263 264 /* Get offset within the segmap mapping */ 265 segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; 266 base = segmap_getmapflt(segkmap, vp, (offset & MAXBMASK), 267 PAGESIZE, !pagecreate, S_WRITE); 268 269 newpage = 0; 270 271 if (pagecreate) { 272 rw_downgrade(&tp->tn_contents); 273 274 /* 275 * segmap_pagecreate() returns 1 if it calls 276 * page_create_va() to allocate any pages. 277 */ 278 newpage = segmap_pagecreate(segkmap, 279 base + segmap_offset, (size_t)PAGESIZE, 0); 280 rw_exit(&tp->tn_contents); 281 /* 282 * Clear from the beginning of the page to the starting 283 * offset of the data. 284 */ 285 if (pageoffset != 0) 286 (void) kzero(base + segmap_offset, 287 (size_t)pageoffset); 288 } 289 290 error = uiomove(base + segmap_offset + pageoffset, 291 (long)bytes, UIO_WRITE, uio); 292 293 if (pagecreate && 294 uio->uio_offset < P2ROUNDUP(offset + bytes, PAGESIZE)) { 295 long zoffset; /* zero from offset into page */ 296 /* 297 * We created pages w/o initializing them completely, 298 * thus we need to zero the part that wasn't set up. 299 * This happens on most EOF write cases and if 300 * we had some sort of error during the uiomove. 301 */ 302 long nmoved; 303 304 nmoved = uio->uio_offset - offset; 305 ASSERT((nmoved + pageoffset) <= PAGESIZE); 306 307 /* 308 * Zero from the end of data in the page to the 309 * end of the page. 310 */ 311 if ((zoffset = pageoffset + nmoved) < PAGESIZE) 312 (void) kzero(base + segmap_offset + zoffset, 313 (size_t)PAGESIZE - zoffset); 314 } 315 316 /* 317 * Unlock the pages which have been allocated by 318 * page_create_va() in segmap_pagecreate() 319 */ 320 if (newpage) 321 segmap_pageunlock(segkmap, base + segmap_offset, 322 (size_t)PAGESIZE, S_WRITE); 323 324 if (error) { 325 /* 326 * If we failed on a write, we must 327 * be sure to invalidate any pages that may have 328 * been allocated. 329 */ 330 (void) segmap_release(segkmap, base, SM_INVAL); 331 } else { 332 error = segmap_release(segkmap, base, 0); 333 } 334 335 /* 336 * Re-acquire contents lock. 337 */ 338 rw_enter(&tp->tn_contents, RW_WRITER); 339 /* 340 * If the uiomove failed, fix up tn_size. 341 */ 342 if (error) { 343 if (tn_size_changed) { 344 /* 345 * The uiomove failed, and we 346 * allocated blocks,so get rid 347 * of them. 348 */ 349 (void) tmpnode_trunc(tm, tp, 350 (ulong_t)old_tn_size); 351 } 352 } else { 353 /* 354 * XXX - Can this be out of the loop? 355 */ 356 if ((tp->tn_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) && 357 (tp->tn_mode & (S_ISUID | S_ISGID)) && 358 secpolicy_vnode_setid_retain(cr, 359 (tp->tn_mode & S_ISUID) != 0 && tp->tn_uid == 0)) { 360 /* 361 * Clear Set-UID & Set-GID bits on 362 * successful write if not privileged 363 * and at least one of the execute bits 364 * is set. If we always clear Set-GID, 365 * mandatory file and record locking is 366 * unuseable. 367 */ 368 tp->tn_mode &= ~(S_ISUID | S_ISGID); 369 } 370 gethrestime(&now); 371 tp->tn_mtime = now; 372 tp->tn_ctime = now; 373 } 374 } while (error == 0 && uio->uio_resid > 0 && bytes != 0); 375 376 out: 377 /* 378 * If we've already done a partial-write, terminate 379 * the write but return no error. 380 */ 381 if (oresid != uio->uio_resid) 382 error = 0; 383 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 384 "tmp_wrtmp_end:vp %p error %d", vp, error); 385 return (error); 386 } 387 388 /* 389 * rdtmp does the real work of read requests for tmpfs. 390 */ 391 static int 392 rdtmp( 393 struct tmount *tm, 394 struct tmpnode *tp, 395 struct uio *uio, 396 struct caller_context *ct) 397 { 398 ulong_t pageoffset; /* offset in tmpfs file (uio_offset) */ 399 ulong_t segmap_offset; /* pagesize byte offset into segmap */ 400 caddr_t base; /* base of segmap */ 401 ssize_t bytes; /* bytes to uiomove */ 402 struct vnode *vp; 403 int error; 404 long oresid = uio->uio_resid; 405 406 #if defined(lint) 407 tm = tm; 408 #endif 409 vp = TNTOV(tp); 410 411 TRACE_1(TR_FAC_TMPFS, TR_TMPFS_RWTMP_START, 412 "tmp_rdtmp_start:vp %p", vp); 413 414 ASSERT(RW_LOCK_HELD(&tp->tn_contents)); 415 416 if (MANDLOCK(vp, tp->tn_mode)) { 417 rw_exit(&tp->tn_contents); 418 /* 419 * tmp_getattr ends up being called by chklock 420 */ 421 error = chklock(vp, FREAD, 422 uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); 423 rw_enter(&tp->tn_contents, RW_READER); 424 if (error != 0) { 425 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 426 "tmp_rdtmp_end:vp %p error %d", vp, error); 427 return (error); 428 } 429 } 430 ASSERT(tp->tn_type == VREG); 431 432 if (uio->uio_loffset >= MAXOFF_T) { 433 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 434 "tmp_rdtmp_end:vp %p error %d", vp, EINVAL); 435 return (0); 436 } 437 if (uio->uio_loffset < 0) 438 return (EINVAL); 439 if (uio->uio_resid == 0) { 440 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 441 "tmp_rdtmp_end:vp %p error %d", vp, 0); 442 return (0); 443 } 444 445 vp = TNTOV(tp); 446 447 do { 448 long diff; 449 long offset; 450 451 offset = uio->uio_offset; 452 pageoffset = offset & PAGEOFFSET; 453 bytes = MIN(PAGESIZE - pageoffset, uio->uio_resid); 454 455 diff = tp->tn_size - offset; 456 457 if (diff <= 0) { 458 error = 0; 459 goto out; 460 } 461 if (diff < bytes) 462 bytes = diff; 463 464 /* 465 * We have to drop the contents lock to prevent the VM 466 * system from trying to reaquire it in tmp_getpage() 467 * should the uiomove cause a pagefault. 468 */ 469 rw_exit(&tp->tn_contents); 470 471 segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; 472 base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, 473 bytes, 1, S_READ); 474 475 error = uiomove(base + segmap_offset + pageoffset, 476 (long)bytes, UIO_READ, uio); 477 478 if (error) 479 (void) segmap_release(segkmap, base, 0); 480 else 481 error = segmap_release(segkmap, base, 0); 482 483 /* 484 * Re-acquire contents lock. 485 */ 486 rw_enter(&tp->tn_contents, RW_READER); 487 488 } while (error == 0 && uio->uio_resid > 0); 489 490 out: 491 gethrestime(&tp->tn_atime); 492 493 /* 494 * If we've already done a partial read, terminate 495 * the read but return no error. 496 */ 497 if (oresid != uio->uio_resid) 498 error = 0; 499 500 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 501 "tmp_rdtmp_end:vp %x error %d", vp, error); 502 return (error); 503 } 504 505 /* ARGSUSED2 */ 506 static int 507 tmp_read(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cred, 508 struct caller_context *ct) 509 { 510 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 511 struct tmount *tm = (struct tmount *)VTOTM(vp); 512 int error; 513 514 /* 515 * We don't currently support reading non-regular files 516 */ 517 if (vp->v_type == VDIR) 518 return (EISDIR); 519 if (vp->v_type != VREG) 520 return (EINVAL); 521 /* 522 * tmp_rwlock should have already been called from layers above 523 */ 524 ASSERT(RW_READ_HELD(&tp->tn_rwlock)); 525 526 rw_enter(&tp->tn_contents, RW_READER); 527 528 error = rdtmp(tm, tp, uiop, ct); 529 530 rw_exit(&tp->tn_contents); 531 532 return (error); 533 } 534 535 static int 536 tmp_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 537 struct caller_context *ct) 538 { 539 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 540 struct tmount *tm = (struct tmount *)VTOTM(vp); 541 int error; 542 543 /* 544 * We don't currently support writing to non-regular files 545 */ 546 if (vp->v_type != VREG) 547 return (EINVAL); /* XXX EISDIR? */ 548 549 /* 550 * tmp_rwlock should have already been called from layers above 551 */ 552 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 553 554 rw_enter(&tp->tn_contents, RW_WRITER); 555 556 if (ioflag & FAPPEND) { 557 /* 558 * In append mode start at end of file. 559 */ 560 uiop->uio_loffset = tp->tn_size; 561 } 562 563 error = wrtmp(tm, tp, uiop, cred, ct); 564 565 rw_exit(&tp->tn_contents); 566 567 return (error); 568 } 569 570 /* ARGSUSED */ 571 static int 572 tmp_ioctl(struct vnode *vp, int com, intptr_t data, int flag, 573 struct cred *cred, int *rvalp) 574 { 575 return (ENOTTY); 576 } 577 578 /* ARGSUSED2 */ 579 static int 580 tmp_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred) 581 { 582 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 583 struct vnode *mvp; 584 struct vattr va; 585 int attrs = 1; 586 587 /* 588 * A special case to handle the root tnode on a diskless nfs 589 * client who may have had its uid and gid inherited 590 * from an nfs vnode with nobody ownership. Likely the 591 * root filesystem. After nfs is fully functional the uid/gid 592 * may be mapable so ask again. 593 * vfsp can't get unmounted because we hold vp. 594 */ 595 if (vp->v_flag & VROOT && 596 (mvp = vp->v_vfsp->vfs_vnodecovered) != NULL) { 597 mutex_enter(&tp->tn_tlock); 598 if (tp->tn_uid == UID_NOBODY || tp->tn_gid == GID_NOBODY) { 599 mutex_exit(&tp->tn_tlock); 600 bzero(&va, sizeof (struct vattr)); 601 va.va_mask = AT_UID|AT_GID; 602 attrs = VOP_GETATTR(mvp, &va, 0, cred); 603 } else { 604 mutex_exit(&tp->tn_tlock); 605 } 606 } 607 mutex_enter(&tp->tn_tlock); 608 if (attrs == 0) { 609 tp->tn_uid = va.va_uid; 610 tp->tn_gid = va.va_gid; 611 } 612 vap->va_type = vp->v_type; 613 vap->va_mode = tp->tn_mode & MODEMASK; 614 vap->va_uid = tp->tn_uid; 615 vap->va_gid = tp->tn_gid; 616 vap->va_fsid = tp->tn_fsid; 617 vap->va_nodeid = (ino64_t)tp->tn_nodeid; 618 vap->va_nlink = tp->tn_nlink; 619 vap->va_size = (u_offset_t)tp->tn_size; 620 vap->va_atime = tp->tn_atime; 621 vap->va_mtime = tp->tn_mtime; 622 vap->va_ctime = tp->tn_ctime; 623 vap->va_blksize = PAGESIZE; 624 vap->va_rdev = tp->tn_rdev; 625 vap->va_seq = tp->tn_seq; 626 627 /* 628 * XXX Holes are not taken into account. We could take the time to 629 * run through the anon array looking for allocated slots... 630 */ 631 vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(vap->va_size))); 632 mutex_exit(&tp->tn_tlock); 633 return (0); 634 } 635 636 /*ARGSUSED4*/ 637 static int 638 tmp_setattr( 639 struct vnode *vp, 640 struct vattr *vap, 641 int flags, 642 struct cred *cred, 643 caller_context_t *ct) 644 { 645 struct tmount *tm = (struct tmount *)VTOTM(vp); 646 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 647 int error = 0; 648 struct vattr *get; 649 long mask; 650 651 /* 652 * Cannot set these attributes 653 */ 654 if (vap->va_mask & AT_NOSET) 655 return (EINVAL); 656 657 mutex_enter(&tp->tn_tlock); 658 659 get = &tp->tn_attr; 660 /* 661 * Change file access modes. Must be owner or have sufficient 662 * privileges. 663 */ 664 error = secpolicy_vnode_setattr(cred, vp, vap, get, flags, 665 tmp_taccess, tp); 666 667 if (error) 668 goto out; 669 670 mask = vap->va_mask; 671 672 if (mask & AT_MODE) { 673 get->va_mode &= S_IFMT; 674 get->va_mode |= vap->va_mode & ~S_IFMT; 675 } 676 677 if (mask & AT_UID) 678 get->va_uid = vap->va_uid; 679 if (mask & AT_GID) 680 get->va_gid = vap->va_gid; 681 if (mask & AT_ATIME) 682 get->va_atime = vap->va_atime; 683 if (mask & AT_MTIME) 684 get->va_mtime = vap->va_mtime; 685 686 if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME)) 687 gethrestime(&tp->tn_ctime); 688 689 if (mask & AT_SIZE) { 690 ASSERT(vp->v_type != VDIR); 691 692 /* Don't support large files. */ 693 if (vap->va_size > MAXOFF_T) { 694 error = EFBIG; 695 goto out; 696 } 697 mutex_exit(&tp->tn_tlock); 698 699 rw_enter(&tp->tn_rwlock, RW_WRITER); 700 rw_enter(&tp->tn_contents, RW_WRITER); 701 error = tmpnode_trunc(tm, tp, (ulong_t)vap->va_size); 702 rw_exit(&tp->tn_contents); 703 rw_exit(&tp->tn_rwlock); 704 goto out1; 705 } 706 out: 707 mutex_exit(&tp->tn_tlock); 708 out1: 709 return (error); 710 } 711 712 /* ARGSUSED2 */ 713 static int 714 tmp_access(struct vnode *vp, int mode, int flags, struct cred *cred) 715 { 716 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 717 int error; 718 719 mutex_enter(&tp->tn_tlock); 720 error = tmp_taccess(tp, mode, cred); 721 mutex_exit(&tp->tn_tlock); 722 return (error); 723 } 724 725 /* ARGSUSED3 */ 726 static int 727 tmp_lookup( 728 struct vnode *dvp, 729 char *nm, 730 struct vnode **vpp, 731 struct pathname *pnp, 732 int flags, 733 struct vnode *rdir, 734 struct cred *cred) 735 { 736 struct tmpnode *tp = (struct tmpnode *)VTOTN(dvp); 737 struct tmpnode *ntp = NULL; 738 int error; 739 740 741 /* allow cd into @ dir */ 742 if (flags & LOOKUP_XATTR) { 743 struct tmpnode *xdp; 744 struct tmount *tm; 745 746 if (tp->tn_flags & ISXATTR) 747 /* No attributes on attributes */ 748 return (EINVAL); 749 750 rw_enter(&tp->tn_rwlock, RW_WRITER); 751 if (tp->tn_xattrdp == NULL) { 752 if (!(flags & CREATE_XATTR_DIR)) { 753 rw_exit(&tp->tn_rwlock); 754 return (ENOENT); 755 } 756 757 /* 758 * No attribute directory exists for this 759 * node - create the attr dir as a side effect 760 * of this lookup. 761 */ 762 763 /* 764 * Make sure we have adequate permission... 765 */ 766 767 if ((error = tmp_taccess(tp, VWRITE, cred)) != 0) { 768 rw_exit(&tp->tn_rwlock); 769 return (error); 770 } 771 772 xdp = tmp_memalloc(sizeof (struct tmpnode), 773 TMP_MUSTHAVE); 774 tm = VTOTM(dvp); 775 tmpnode_init(tm, xdp, &tp->tn_attr, NULL); 776 /* 777 * Fix-up fields unique to attribute directories. 778 */ 779 xdp->tn_flags = ISXATTR; 780 xdp->tn_type = VDIR; 781 if (tp->tn_type == VDIR) { 782 xdp->tn_mode = tp->tn_attr.va_mode; 783 } else { 784 xdp->tn_mode = 0700; 785 if (tp->tn_attr.va_mode & 0040) 786 xdp->tn_mode |= 0750; 787 if (tp->tn_attr.va_mode & 0004) 788 xdp->tn_mode |= 0705; 789 } 790 xdp->tn_vnode->v_type = VDIR; 791 xdp->tn_vnode->v_flag |= V_XATTRDIR; 792 tdirinit(tp, xdp); 793 tp->tn_xattrdp = xdp; 794 } else { 795 VN_HOLD(tp->tn_xattrdp->tn_vnode); 796 } 797 *vpp = TNTOV(tp->tn_xattrdp); 798 rw_exit(&tp->tn_rwlock); 799 return (0); 800 } 801 802 /* 803 * Null component name is a synonym for directory being searched. 804 */ 805 if (*nm == '\0') { 806 VN_HOLD(dvp); 807 *vpp = dvp; 808 return (0); 809 } 810 ASSERT(tp); 811 812 error = tdirlookup(tp, nm, &ntp, cred); 813 814 if (error == 0) { 815 ASSERT(ntp); 816 *vpp = TNTOV(ntp); 817 /* 818 * If vnode is a device return special vnode instead 819 */ 820 if (IS_DEVVP(*vpp)) { 821 struct vnode *newvp; 822 823 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, 824 cred); 825 VN_RELE(*vpp); 826 *vpp = newvp; 827 } 828 } 829 TRACE_4(TR_FAC_TMPFS, TR_TMPFS_LOOKUP, 830 "tmpfs lookup:vp %p name %s vpp %p error %d", 831 dvp, nm, vpp, error); 832 return (error); 833 } 834 835 /*ARGSUSED7*/ 836 static int 837 tmp_create( 838 struct vnode *dvp, 839 char *nm, 840 struct vattr *vap, 841 enum vcexcl exclusive, 842 int mode, 843 struct vnode **vpp, 844 struct cred *cred, 845 int flag) 846 { 847 struct tmpnode *parent; 848 struct tmount *tm; 849 struct tmpnode *self; 850 int error; 851 struct tmpnode *oldtp; 852 853 again: 854 parent = (struct tmpnode *)VTOTN(dvp); 855 tm = (struct tmount *)VTOTM(dvp); 856 self = NULL; 857 error = 0; 858 oldtp = NULL; 859 860 /* device files not allowed in ext. attr dirs */ 861 if ((parent->tn_flags & ISXATTR) && 862 (vap->va_type == VBLK || vap->va_type == VCHR || 863 vap->va_type == VFIFO || vap->va_type == VDOOR || 864 vap->va_type == VSOCK || vap->va_type == VPORT)) 865 return (EINVAL); 866 867 if (vap->va_type == VREG && (vap->va_mode & VSVTX)) { 868 /* Must be privileged to set sticky bit */ 869 if (secpolicy_vnode_stky_modify(cred)) 870 vap->va_mode &= ~VSVTX; 871 } else if (vap->va_type == VNON) { 872 return (EINVAL); 873 } 874 875 /* 876 * Null component name is a synonym for directory being searched. 877 */ 878 if (*nm == '\0') { 879 VN_HOLD(dvp); 880 oldtp = parent; 881 } else { 882 error = tdirlookup(parent, nm, &oldtp, cred); 883 } 884 885 if (error == 0) { /* name found */ 886 ASSERT(oldtp); 887 888 rw_enter(&oldtp->tn_rwlock, RW_WRITER); 889 890 /* 891 * if create/read-only an existing 892 * directory, allow it 893 */ 894 if (exclusive == EXCL) 895 error = EEXIST; 896 else if ((oldtp->tn_type == VDIR) && (mode & VWRITE)) 897 error = EISDIR; 898 else { 899 error = tmp_taccess(oldtp, mode, cred); 900 } 901 902 if (error) { 903 rw_exit(&oldtp->tn_rwlock); 904 tmpnode_rele(oldtp); 905 return (error); 906 } 907 *vpp = TNTOV(oldtp); 908 if ((*vpp)->v_type == VREG && (vap->va_mask & AT_SIZE) && 909 vap->va_size == 0) { 910 rw_enter(&oldtp->tn_contents, RW_WRITER); 911 (void) tmpnode_trunc(tm, oldtp, 0); 912 rw_exit(&oldtp->tn_contents); 913 } 914 rw_exit(&oldtp->tn_rwlock); 915 if (IS_DEVVP(*vpp)) { 916 struct vnode *newvp; 917 918 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, 919 cred); 920 VN_RELE(*vpp); 921 if (newvp == NULL) { 922 return (ENOSYS); 923 } 924 *vpp = newvp; 925 } 926 return (0); 927 } 928 929 if (error != ENOENT) 930 return (error); 931 932 rw_enter(&parent->tn_rwlock, RW_WRITER); 933 error = tdirenter(tm, parent, nm, DE_CREATE, 934 (struct tmpnode *)NULL, (struct tmpnode *)NULL, 935 vap, &self, cred); 936 rw_exit(&parent->tn_rwlock); 937 938 if (error) { 939 if (self) 940 tmpnode_rele(self); 941 942 if (error == EEXIST) { 943 /* 944 * This means that the file was created sometime 945 * after we checked and did not find it and when 946 * we went to create it. 947 * Since creat() is supposed to truncate a file 948 * that already exits go back to the begining 949 * of the function. This time we will find it 950 * and go down the tmp_trunc() path 951 */ 952 goto again; 953 } 954 return (error); 955 } 956 957 *vpp = TNTOV(self); 958 959 if (!error && IS_DEVVP(*vpp)) { 960 struct vnode *newvp; 961 962 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cred); 963 VN_RELE(*vpp); 964 if (newvp == NULL) 965 return (ENOSYS); 966 *vpp = newvp; 967 } 968 TRACE_3(TR_FAC_TMPFS, TR_TMPFS_CREATE, 969 "tmpfs create:dvp %p nm %s vpp %p", dvp, nm, vpp); 970 return (0); 971 } 972 973 static int 974 tmp_remove(struct vnode *dvp, char *nm, struct cred *cred) 975 { 976 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 977 int error; 978 struct tmpnode *tp = NULL; 979 980 error = tdirlookup(parent, nm, &tp, cred); 981 if (error) 982 return (error); 983 984 ASSERT(tp); 985 rw_enter(&parent->tn_rwlock, RW_WRITER); 986 rw_enter(&tp->tn_rwlock, RW_WRITER); 987 988 if (tp->tn_type != VDIR || 989 (error = secpolicy_fs_linkdir(cred, dvp->v_vfsp)) == 0) 990 error = tdirdelete(parent, tp, nm, DR_REMOVE, cred); 991 992 rw_exit(&tp->tn_rwlock); 993 rw_exit(&parent->tn_rwlock); 994 vnevent_remove(TNTOV(tp)); 995 tmpnode_rele(tp); 996 997 TRACE_3(TR_FAC_TMPFS, TR_TMPFS_REMOVE, 998 "tmpfs remove:dvp %p nm %s error %d", dvp, nm, error); 999 return (error); 1000 } 1001 1002 static int 1003 tmp_link(struct vnode *dvp, struct vnode *srcvp, char *tnm, struct cred *cred) 1004 { 1005 struct tmpnode *parent; 1006 struct tmpnode *from; 1007 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1008 int error; 1009 struct tmpnode *found = NULL; 1010 struct vnode *realvp; 1011 1012 if (VOP_REALVP(srcvp, &realvp) == 0) 1013 srcvp = realvp; 1014 1015 parent = (struct tmpnode *)VTOTN(dvp); 1016 from = (struct tmpnode *)VTOTN(srcvp); 1017 1018 if ((srcvp->v_type == VDIR && 1019 secpolicy_fs_linkdir(cred, dvp->v_vfsp)) || 1020 (from->tn_uid != crgetuid(cred) && secpolicy_basic_link(cred))) 1021 return (EPERM); 1022 1023 /* 1024 * Make sure link for extended attributes is valid 1025 * We only support hard linking of xattr's in xattrdir to an xattrdir 1026 */ 1027 if ((from->tn_flags & ISXATTR) != (parent->tn_flags & ISXATTR)) 1028 return (EINVAL); 1029 1030 error = tdirlookup(parent, tnm, &found, cred); 1031 if (error == 0) { 1032 ASSERT(found); 1033 tmpnode_rele(found); 1034 return (EEXIST); 1035 } 1036 1037 if (error != ENOENT) 1038 return (error); 1039 1040 rw_enter(&parent->tn_rwlock, RW_WRITER); 1041 error = tdirenter(tm, parent, tnm, DE_LINK, (struct tmpnode *)NULL, 1042 from, NULL, (struct tmpnode **)NULL, cred); 1043 rw_exit(&parent->tn_rwlock); 1044 return (error); 1045 } 1046 1047 static int 1048 tmp_rename( 1049 struct vnode *odvp, /* source parent vnode */ 1050 char *onm, /* source name */ 1051 struct vnode *ndvp, /* destination parent vnode */ 1052 char *nnm, /* destination name */ 1053 struct cred *cred) 1054 { 1055 struct tmpnode *fromparent; 1056 struct tmpnode *toparent; 1057 struct tmpnode *fromtp = NULL; /* source tmpnode */ 1058 struct tmount *tm = (struct tmount *)VTOTM(odvp); 1059 int error; 1060 int samedir = 0; /* set if odvp == ndvp */ 1061 struct vnode *realvp; 1062 1063 if (VOP_REALVP(ndvp, &realvp) == 0) 1064 ndvp = realvp; 1065 1066 fromparent = (struct tmpnode *)VTOTN(odvp); 1067 toparent = (struct tmpnode *)VTOTN(ndvp); 1068 1069 if ((fromparent->tn_flags & ISXATTR) != (toparent->tn_flags & ISXATTR)) 1070 return (EINVAL); 1071 1072 mutex_enter(&tm->tm_renamelck); 1073 1074 /* 1075 * Look up tmpnode of file we're supposed to rename. 1076 */ 1077 error = tdirlookup(fromparent, onm, &fromtp, cred); 1078 if (error) { 1079 mutex_exit(&tm->tm_renamelck); 1080 return (error); 1081 } 1082 1083 /* 1084 * Make sure we can delete the old (source) entry. This 1085 * requires write permission on the containing directory. If 1086 * that directory is "sticky" it requires further checks. 1087 */ 1088 if (((error = tmp_taccess(fromparent, VWRITE, cred)) != 0) || 1089 (error = tmp_sticky_remove_access(fromparent, fromtp, cred)) != 0) 1090 goto done; 1091 1092 /* 1093 * Check for renaming to or from '.' or '..' or that 1094 * fromtp == fromparent 1095 */ 1096 if ((onm[0] == '.' && 1097 (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || 1098 (nnm[0] == '.' && 1099 (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0'))) || 1100 (fromparent == fromtp)) { 1101 error = EINVAL; 1102 goto done; 1103 } 1104 1105 samedir = (fromparent == toparent); 1106 /* 1107 * Make sure we can search and rename into the new 1108 * (destination) directory. 1109 */ 1110 if (!samedir) { 1111 error = tmp_taccess(toparent, VEXEC|VWRITE, cred); 1112 if (error) 1113 goto done; 1114 } 1115 1116 /* 1117 * Link source to new target 1118 */ 1119 rw_enter(&toparent->tn_rwlock, RW_WRITER); 1120 error = tdirenter(tm, toparent, nnm, DE_RENAME, 1121 fromparent, fromtp, (struct vattr *)NULL, 1122 (struct tmpnode **)NULL, cred); 1123 rw_exit(&toparent->tn_rwlock); 1124 1125 if (error) { 1126 /* 1127 * ESAME isn't really an error; it indicates that the 1128 * operation should not be done because the source and target 1129 * are the same file, but that no error should be reported. 1130 */ 1131 if (error == ESAME) 1132 error = 0; 1133 goto done; 1134 } 1135 1136 /* 1137 * Unlink from source. 1138 */ 1139 rw_enter(&fromparent->tn_rwlock, RW_WRITER); 1140 rw_enter(&fromtp->tn_rwlock, RW_WRITER); 1141 1142 error = tdirdelete(fromparent, fromtp, onm, DR_RENAME, cred); 1143 1144 /* 1145 * The following handles the case where our source tmpnode was 1146 * removed before we got to it. 1147 * 1148 * XXX We should also cleanup properly in the case where tdirdelete 1149 * fails for some other reason. Currently this case shouldn't happen. 1150 * (see 1184991). 1151 */ 1152 if (error == ENOENT) 1153 error = 0; 1154 1155 rw_exit(&fromtp->tn_rwlock); 1156 rw_exit(&fromparent->tn_rwlock); 1157 done: 1158 tmpnode_rele(fromtp); 1159 mutex_exit(&tm->tm_renamelck); 1160 1161 TRACE_5(TR_FAC_TMPFS, TR_TMPFS_RENAME, 1162 "tmpfs rename:ovp %p onm %s nvp %p nnm %s error %d", 1163 odvp, onm, ndvp, nnm, error); 1164 return (error); 1165 } 1166 1167 static int 1168 tmp_mkdir( 1169 struct vnode *dvp, 1170 char *nm, 1171 struct vattr *va, 1172 struct vnode **vpp, 1173 struct cred *cred) 1174 { 1175 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1176 struct tmpnode *self = NULL; 1177 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1178 int error; 1179 1180 /* no new dirs allowed in xattr dirs */ 1181 if (parent->tn_flags & ISXATTR) 1182 return (EINVAL); 1183 1184 /* 1185 * Might be dangling directory. Catch it here, 1186 * because a ENOENT return from tdirlookup() is 1187 * an "o.k. return". 1188 */ 1189 if (parent->tn_nlink == 0) 1190 return (ENOENT); 1191 1192 error = tdirlookup(parent, nm, &self, cred); 1193 if (error == 0) { 1194 ASSERT(self); 1195 tmpnode_rele(self); 1196 return (EEXIST); 1197 } 1198 if (error != ENOENT) 1199 return (error); 1200 1201 rw_enter(&parent->tn_rwlock, RW_WRITER); 1202 error = tdirenter(tm, parent, nm, DE_MKDIR, 1203 (struct tmpnode *)NULL, (struct tmpnode *)NULL, va, 1204 &self, cred); 1205 if (error) { 1206 rw_exit(&parent->tn_rwlock); 1207 if (self) 1208 tmpnode_rele(self); 1209 return (error); 1210 } 1211 rw_exit(&parent->tn_rwlock); 1212 *vpp = TNTOV(self); 1213 return (0); 1214 } 1215 1216 static int 1217 tmp_rmdir( 1218 struct vnode *dvp, 1219 char *nm, 1220 struct vnode *cdir, 1221 struct cred *cred) 1222 { 1223 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1224 struct tmpnode *self = NULL; 1225 struct vnode *vp; 1226 int error = 0; 1227 1228 /* 1229 * Return error when removing . and .. 1230 */ 1231 if (strcmp(nm, ".") == 0) 1232 return (EINVAL); 1233 if (strcmp(nm, "..") == 0) 1234 return (EEXIST); /* Should be ENOTEMPTY */ 1235 error = tdirlookup(parent, nm, &self, cred); 1236 if (error) 1237 return (error); 1238 1239 rw_enter(&parent->tn_rwlock, RW_WRITER); 1240 rw_enter(&self->tn_rwlock, RW_WRITER); 1241 1242 vp = TNTOV(self); 1243 if (vp == dvp || vp == cdir) { 1244 error = EINVAL; 1245 goto done1; 1246 } 1247 if (self->tn_type != VDIR) { 1248 error = ENOTDIR; 1249 goto done1; 1250 } 1251 1252 mutex_enter(&self->tn_tlock); 1253 if (self->tn_nlink > 2) { 1254 mutex_exit(&self->tn_tlock); 1255 error = EEXIST; 1256 goto done1; 1257 } 1258 mutex_exit(&self->tn_tlock); 1259 1260 if (vn_vfslock(vp)) { 1261 error = EBUSY; 1262 goto done1; 1263 } 1264 if (vn_mountedvfs(vp) != NULL) { 1265 error = EBUSY; 1266 goto done; 1267 } 1268 1269 /* 1270 * Check for an empty directory 1271 * i.e. only includes entries for "." and ".." 1272 */ 1273 if (self->tn_dirents > 2) { 1274 error = EEXIST; /* SIGH should be ENOTEMPTY */ 1275 /* 1276 * Update atime because checking tn_dirents is logically 1277 * equivalent to reading the directory 1278 */ 1279 gethrestime(&self->tn_atime); 1280 goto done; 1281 } 1282 1283 error = tdirdelete(parent, self, nm, DR_RMDIR, cred); 1284 done: 1285 vn_vfsunlock(vp); 1286 done1: 1287 rw_exit(&self->tn_rwlock); 1288 rw_exit(&parent->tn_rwlock); 1289 vnevent_rmdir(TNTOV(self)); 1290 tmpnode_rele(self); 1291 1292 return (error); 1293 } 1294 1295 /* ARGSUSED2 */ 1296 1297 static int 1298 tmp_readdir(struct vnode *vp, struct uio *uiop, struct cred *cred, int *eofp) 1299 { 1300 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1301 struct tdirent *tdp; 1302 int error = 0; 1303 size_t namelen; 1304 struct dirent64 *dp; 1305 ulong_t offset; 1306 ulong_t total_bytes_wanted; 1307 long outcount = 0; 1308 long bufsize; 1309 int reclen; 1310 caddr_t outbuf; 1311 1312 if (uiop->uio_loffset >= MAXOFF_T) { 1313 if (eofp) 1314 *eofp = 1; 1315 return (0); 1316 } 1317 /* 1318 * assuming system call has already called tmp_rwlock 1319 */ 1320 ASSERT(RW_READ_HELD(&tp->tn_rwlock)); 1321 1322 if (uiop->uio_iovcnt != 1) 1323 return (EINVAL); 1324 1325 if (vp->v_type != VDIR) 1326 return (ENOTDIR); 1327 1328 /* 1329 * There's a window here where someone could have removed 1330 * all the entries in the directory after we put a hold on the 1331 * vnode but before we grabbed the rwlock. Just return. 1332 */ 1333 if (tp->tn_dir == NULL) { 1334 if (tp->tn_nlink) { 1335 panic("empty directory 0x%p", (void *)tp); 1336 /*NOTREACHED*/ 1337 } 1338 return (0); 1339 } 1340 1341 /* 1342 * Get space for multiple directory entries 1343 */ 1344 total_bytes_wanted = uiop->uio_iov->iov_len; 1345 bufsize = total_bytes_wanted + sizeof (struct dirent64); 1346 outbuf = kmem_alloc(bufsize, KM_SLEEP); 1347 1348 dp = (struct dirent64 *)outbuf; 1349 1350 1351 offset = 0; 1352 tdp = tp->tn_dir; 1353 while (tdp) { 1354 namelen = strlen(tdp->td_name); /* no +1 needed */ 1355 offset = tdp->td_offset; 1356 if (offset >= uiop->uio_offset) { 1357 reclen = (int)DIRENT64_RECLEN(namelen); 1358 if (outcount + reclen > total_bytes_wanted) { 1359 if (!outcount) 1360 /* 1361 * Buffer too small for any entries. 1362 */ 1363 error = EINVAL; 1364 break; 1365 } 1366 ASSERT(tdp->td_tmpnode != NULL); 1367 1368 /* use strncpy(9f) to zero out uninitialized bytes */ 1369 1370 (void) strncpy(dp->d_name, tdp->td_name, 1371 DIRENT64_NAMELEN(reclen)); 1372 dp->d_reclen = (ushort_t)reclen; 1373 dp->d_ino = (ino64_t)tdp->td_tmpnode->tn_nodeid; 1374 dp->d_off = (offset_t)tdp->td_offset + 1; 1375 dp = (struct dirent64 *) 1376 ((uintptr_t)dp + dp->d_reclen); 1377 outcount += reclen; 1378 ASSERT(outcount <= bufsize); 1379 } 1380 tdp = tdp->td_next; 1381 } 1382 1383 if (!error) 1384 error = uiomove(outbuf, outcount, UIO_READ, uiop); 1385 1386 if (!error) { 1387 /* If we reached the end of the list our offset */ 1388 /* should now be just past the end. */ 1389 if (!tdp) { 1390 offset += 1; 1391 if (eofp) 1392 *eofp = 1; 1393 } else if (eofp) 1394 *eofp = 0; 1395 uiop->uio_offset = offset; 1396 } 1397 gethrestime(&tp->tn_atime); 1398 kmem_free(outbuf, bufsize); 1399 return (error); 1400 } 1401 1402 static int 1403 tmp_symlink( 1404 struct vnode *dvp, 1405 char *lnm, 1406 struct vattr *tva, 1407 char *tnm, 1408 struct cred *cred) 1409 { 1410 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1411 struct tmpnode *self = (struct tmpnode *)NULL; 1412 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1413 char *cp = NULL; 1414 int error; 1415 size_t len; 1416 1417 /* no symlinks allowed to files in xattr dirs */ 1418 if (parent->tn_flags & ISXATTR) 1419 return (EINVAL); 1420 1421 error = tdirlookup(parent, lnm, &self, cred); 1422 if (error == 0) { 1423 /* 1424 * The entry already exists 1425 */ 1426 tmpnode_rele(self); 1427 return (EEXIST); /* was 0 */ 1428 } 1429 1430 if (error != ENOENT) { 1431 if (self != NULL) 1432 tmpnode_rele(self); 1433 return (error); 1434 } 1435 1436 rw_enter(&parent->tn_rwlock, RW_WRITER); 1437 error = tdirenter(tm, parent, lnm, DE_CREATE, (struct tmpnode *)NULL, 1438 (struct tmpnode *)NULL, tva, &self, cred); 1439 rw_exit(&parent->tn_rwlock); 1440 1441 if (error) { 1442 if (self) 1443 tmpnode_rele(self); 1444 return (error); 1445 } 1446 len = strlen(tnm) + 1; 1447 cp = tmp_memalloc(len, 0); 1448 if (cp == NULL) { 1449 tmpnode_rele(self); 1450 return (ENOSPC); 1451 } 1452 (void) strcpy(cp, tnm); 1453 1454 self->tn_symlink = cp; 1455 self->tn_size = len - 1; 1456 tmpnode_rele(self); 1457 return (error); 1458 } 1459 1460 /* ARGSUSED2 */ 1461 static int 1462 tmp_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred) 1463 { 1464 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1465 int error = 0; 1466 1467 if (vp->v_type != VLNK) 1468 return (EINVAL); 1469 1470 rw_enter(&tp->tn_rwlock, RW_READER); 1471 rw_enter(&tp->tn_contents, RW_READER); 1472 error = uiomove(tp->tn_symlink, tp->tn_size, UIO_READ, uiop); 1473 gethrestime(&tp->tn_atime); 1474 rw_exit(&tp->tn_contents); 1475 rw_exit(&tp->tn_rwlock); 1476 return (error); 1477 } 1478 1479 /* ARGSUSED */ 1480 static int 1481 tmp_fsync(struct vnode *vp, int syncflag, struct cred *cred) 1482 { 1483 return (0); 1484 } 1485 1486 /* ARGSUSED */ 1487 static void 1488 tmp_inactive(struct vnode *vp, struct cred *cred) 1489 { 1490 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1491 struct tmount *tm = (struct tmount *)VFSTOTM(vp->v_vfsp); 1492 1493 rw_enter(&tp->tn_rwlock, RW_WRITER); 1494 top: 1495 mutex_enter(&tp->tn_tlock); 1496 mutex_enter(&vp->v_lock); 1497 ASSERT(vp->v_count >= 1); 1498 1499 /* 1500 * If we don't have the last hold or the link count is non-zero, 1501 * there's little to do -- just drop our hold. 1502 */ 1503 if (vp->v_count > 1 || tp->tn_nlink != 0) { 1504 vp->v_count--; 1505 mutex_exit(&vp->v_lock); 1506 mutex_exit(&tp->tn_tlock); 1507 rw_exit(&tp->tn_rwlock); 1508 return; 1509 } 1510 1511 /* 1512 * We have the last hold *and* the link count is zero, so this 1513 * tmpnode is dead from the filesystem's viewpoint. However, 1514 * if the tmpnode has any pages associated with it (i.e. if it's 1515 * a normal file with non-zero size), the tmpnode can still be 1516 * discovered by pageout or fsflush via the page vnode pointers. 1517 * In this case we must drop all our locks, truncate the tmpnode, 1518 * and try the whole dance again. 1519 */ 1520 if (tp->tn_size != 0) { 1521 if (tp->tn_type == VREG) { 1522 mutex_exit(&vp->v_lock); 1523 mutex_exit(&tp->tn_tlock); 1524 rw_enter(&tp->tn_contents, RW_WRITER); 1525 (void) tmpnode_trunc(tm, tp, 0); 1526 rw_exit(&tp->tn_contents); 1527 ASSERT(tp->tn_size == 0); 1528 ASSERT(tp->tn_nblocks == 0); 1529 goto top; 1530 } 1531 if (tp->tn_type == VLNK) 1532 tmp_memfree(tp->tn_symlink, tp->tn_size + 1); 1533 } 1534 1535 /* 1536 * Remove normal file/dir's xattr dir and xattrs. 1537 */ 1538 if (tp->tn_xattrdp) { 1539 struct tmpnode *xtp = tp->tn_xattrdp; 1540 1541 ASSERT(xtp->tn_flags & ISXATTR); 1542 tmpnode_hold(xtp); 1543 rw_enter(&xtp->tn_rwlock, RW_WRITER); 1544 tdirtrunc(xtp); 1545 DECR_COUNT(&xtp->tn_nlink, &xtp->tn_tlock); 1546 tp->tn_xattrdp = NULL; 1547 rw_exit(&xtp->tn_rwlock); 1548 tmpnode_rele(xtp); 1549 } 1550 1551 mutex_exit(&vp->v_lock); 1552 mutex_exit(&tp->tn_tlock); 1553 /* Here's our chance to send invalid event while we're between locks */ 1554 vn_invalid(TNTOV(tp)); 1555 mutex_enter(&tm->tm_contents); 1556 if (tp->tn_forw == NULL) 1557 tm->tm_rootnode->tn_back = tp->tn_back; 1558 else 1559 tp->tn_forw->tn_back = tp->tn_back; 1560 tp->tn_back->tn_forw = tp->tn_forw; 1561 mutex_exit(&tm->tm_contents); 1562 rw_exit(&tp->tn_rwlock); 1563 rw_destroy(&tp->tn_rwlock); 1564 mutex_destroy(&tp->tn_tlock); 1565 vn_free(TNTOV(tp)); 1566 tmp_memfree(tp, sizeof (struct tmpnode)); 1567 } 1568 1569 static int 1570 tmp_fid(struct vnode *vp, struct fid *fidp) 1571 { 1572 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1573 struct tfid *tfid; 1574 1575 if (fidp->fid_len < (sizeof (struct tfid) - sizeof (ushort_t))) { 1576 fidp->fid_len = sizeof (struct tfid) - sizeof (ushort_t); 1577 return (ENOSPC); 1578 } 1579 1580 tfid = (struct tfid *)fidp; 1581 bzero(tfid, sizeof (struct tfid)); 1582 tfid->tfid_len = (int)sizeof (struct tfid) - sizeof (ushort_t); 1583 1584 tfid->tfid_ino = tp->tn_nodeid; 1585 tfid->tfid_gen = tp->tn_gen; 1586 1587 return (0); 1588 } 1589 1590 1591 /* 1592 * Return all the pages from [off..off+len] in given file 1593 */ 1594 static int 1595 tmp_getpage( 1596 struct vnode *vp, 1597 offset_t off, 1598 size_t len, 1599 uint_t *protp, 1600 page_t *pl[], 1601 size_t plsz, 1602 struct seg *seg, 1603 caddr_t addr, 1604 enum seg_rw rw, 1605 struct cred *cr) 1606 { 1607 int err = 0; 1608 struct tmpnode *tp = VTOTN(vp); 1609 anoff_t toff = (anoff_t)off; 1610 size_t tlen = len; 1611 u_offset_t tmpoff; 1612 timestruc_t now; 1613 1614 rw_enter(&tp->tn_contents, RW_READER); 1615 1616 if (off + len > tp->tn_size + PAGEOFFSET) { 1617 err = EFAULT; 1618 goto out; 1619 } 1620 /* 1621 * Look for holes (no anon slot) in faulting range. If there are 1622 * holes we have to switch to a write lock and fill them in. Swap 1623 * space for holes was already reserved when the file was grown. 1624 */ 1625 tmpoff = toff; 1626 if (non_anon(tp->tn_anon, btop(off), &tmpoff, &tlen)) { 1627 if (!rw_tryupgrade(&tp->tn_contents)) { 1628 rw_exit(&tp->tn_contents); 1629 rw_enter(&tp->tn_contents, RW_WRITER); 1630 /* Size may have changed when lock was dropped */ 1631 if (off + len > tp->tn_size + PAGEOFFSET) { 1632 err = EFAULT; 1633 goto out; 1634 } 1635 } 1636 for (toff = (anoff_t)off; toff < (anoff_t)off + len; 1637 toff += PAGESIZE) { 1638 if (anon_get_ptr(tp->tn_anon, btop(toff)) == NULL) { 1639 /* XXX - may allocate mem w. write lock held */ 1640 (void) anon_set_ptr(tp->tn_anon, btop(toff), 1641 anon_alloc(vp, toff), 1642 ANON_SLEEP); 1643 tp->tn_nblocks++; 1644 } 1645 } 1646 rw_downgrade(&tp->tn_contents); 1647 } 1648 1649 1650 if (len <= PAGESIZE) 1651 err = tmp_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, 1652 seg, addr, rw, cr); 1653 else 1654 err = pvn_getpages(tmp_getapage, vp, (u_offset_t)off, len, 1655 protp, pl, plsz, seg, addr, rw, cr); 1656 1657 gethrestime(&now); 1658 tp->tn_atime = now; 1659 if (rw == S_WRITE) 1660 tp->tn_mtime = now; 1661 1662 out: 1663 rw_exit(&tp->tn_contents); 1664 return (err); 1665 } 1666 1667 /* 1668 * Called from pvn_getpages or swap_getpage to get a particular page. 1669 */ 1670 /*ARGSUSED*/ 1671 static int 1672 tmp_getapage( 1673 struct vnode *vp, 1674 u_offset_t off, 1675 size_t len, 1676 uint_t *protp, 1677 page_t *pl[], 1678 size_t plsz, 1679 struct seg *seg, 1680 caddr_t addr, 1681 enum seg_rw rw, 1682 struct cred *cr) 1683 { 1684 struct page *pp; 1685 int flags; 1686 int err = 0; 1687 struct vnode *pvp; 1688 u_offset_t poff; 1689 1690 if (protp != NULL) 1691 *protp = PROT_ALL; 1692 again: 1693 if (pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED)) { 1694 if (pl) { 1695 pl[0] = pp; 1696 pl[1] = NULL; 1697 } else { 1698 page_unlock(pp); 1699 } 1700 } else { 1701 pp = page_create_va(vp, off, PAGESIZE, 1702 PG_WAIT | PG_EXCL, seg, addr); 1703 /* 1704 * Someone raced in and created the page after we did the 1705 * lookup but before we did the create, so go back and 1706 * try to look it up again. 1707 */ 1708 if (pp == NULL) 1709 goto again; 1710 /* 1711 * Fill page from backing store, if any. If none, then 1712 * either this is a newly filled hole or page must have 1713 * been unmodified and freed so just zero it out. 1714 */ 1715 err = swap_getphysname(vp, off, &pvp, &poff); 1716 if (err) { 1717 panic("tmp_getapage: no anon slot vp %p " 1718 "off %llx pp %p\n", (void *)vp, off, (void *)pp); 1719 } 1720 if (pvp) { 1721 flags = (pl == NULL ? B_ASYNC|B_READ : B_READ); 1722 err = VOP_PAGEIO(pvp, pp, (u_offset_t)poff, PAGESIZE, 1723 flags, cr); 1724 if (flags & B_ASYNC) 1725 pp = NULL; 1726 } else if (rw != S_CREATE) { 1727 pagezero(pp, 0, PAGESIZE); 1728 } 1729 if (err && pp) 1730 pvn_read_done(pp, B_ERROR); 1731 if (err == 0) { 1732 if (pl) 1733 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); 1734 else 1735 pvn_io_done(pp); 1736 } 1737 } 1738 return (err); 1739 } 1740 1741 1742 /* 1743 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}. 1744 * If len == 0, do from off to EOF. 1745 */ 1746 static int tmp_nopage = 0; /* Don't do tmp_putpage's if set */ 1747 1748 /* ARGSUSED */ 1749 int 1750 tmp_putpage( 1751 register struct vnode *vp, 1752 offset_t off, 1753 size_t len, 1754 int flags, 1755 struct cred *cr) 1756 { 1757 register page_t *pp; 1758 u_offset_t io_off; 1759 size_t io_len = 0; 1760 int err = 0; 1761 struct tmpnode *tp = VTOTN(vp); 1762 int dolock; 1763 1764 if (tmp_nopage) 1765 return (0); 1766 1767 ASSERT(vp->v_count != 0); 1768 1769 if (vp->v_flag & VNOMAP) 1770 return (ENOSYS); 1771 1772 /* 1773 * This being tmpfs, we don't ever do i/o unless we really 1774 * have to (when we're low on memory and pageout calls us 1775 * with B_ASYNC | B_FREE or the user explicitly asks for it with 1776 * B_DONTNEED). 1777 * XXX to approximately track the mod time like ufs we should 1778 * update the times here. The problem is, once someone does a 1779 * store we never clear the mod bit and do i/o, thus fsflush 1780 * will keep calling us every 30 seconds to do the i/o and we'll 1781 * continually update the mod time. At least we update the mod 1782 * time on the first store because this results in a call to getpage. 1783 */ 1784 if (flags != (B_ASYNC | B_FREE) && (flags & B_INVAL) == 0 && 1785 (flags & B_DONTNEED) == 0) 1786 return (0); 1787 /* 1788 * If this thread owns the lock, i.e., this thread grabbed it 1789 * as writer somewhere above, then we don't need to grab the 1790 * lock as reader in this routine. 1791 */ 1792 dolock = (rw_owner(&tp->tn_contents) != curthread); 1793 1794 /* 1795 * If this is pageout don't block on the lock as you could deadlock 1796 * when freemem == 0 (another thread has the read lock and is blocked 1797 * creating a page, and a third thread is waiting to get the writers 1798 * lock - waiting writers priority blocks us from getting the read 1799 * lock). Of course, if the only freeable pages are on this tmpnode 1800 * we're hosed anyways. A better solution might be a new lock type. 1801 * Note: ufs has the same problem. 1802 */ 1803 if (curproc == proc_pageout) { 1804 if (!rw_tryenter(&tp->tn_contents, RW_READER)) 1805 return (ENOMEM); 1806 } else if (dolock) 1807 rw_enter(&tp->tn_contents, RW_READER); 1808 1809 if (!vn_has_cached_data(vp)) 1810 goto out; 1811 1812 if (len == 0) { 1813 if (curproc == proc_pageout) { 1814 panic("tmp: pageout can't block"); 1815 /*NOTREACHED*/ 1816 } 1817 1818 /* Search the entire vp list for pages >= off. */ 1819 err = pvn_vplist_dirty(vp, (u_offset_t)off, tmp_putapage, 1820 flags, cr); 1821 } else { 1822 u_offset_t eoff; 1823 1824 /* 1825 * Loop over all offsets in the range [off...off + len] 1826 * looking for pages to deal with. 1827 */ 1828 eoff = MIN(off + len, tp->tn_size); 1829 for (io_off = off; io_off < eoff; io_off += io_len) { 1830 /* 1831 * If we are not invalidating, synchronously 1832 * freeing or writing pages use the routine 1833 * page_lookup_nowait() to prevent reclaiming 1834 * them from the free list. 1835 */ 1836 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 1837 pp = page_lookup(vp, io_off, 1838 (flags & (B_INVAL | B_FREE)) ? 1839 SE_EXCL : SE_SHARED); 1840 } else { 1841 pp = page_lookup_nowait(vp, io_off, 1842 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 1843 } 1844 1845 if (pp == NULL || pvn_getdirty(pp, flags) == 0) 1846 io_len = PAGESIZE; 1847 else { 1848 err = tmp_putapage(vp, pp, &io_off, &io_len, 1849 flags, cr); 1850 if (err != 0) 1851 break; 1852 } 1853 } 1854 } 1855 /* If invalidating, verify all pages on vnode list are gone. */ 1856 if (err == 0 && off == 0 && len == 0 && 1857 (flags & B_INVAL) && vn_has_cached_data(vp)) { 1858 panic("tmp_putpage: B_INVAL, pages not gone"); 1859 /*NOTREACHED*/ 1860 } 1861 out: 1862 if ((curproc == proc_pageout) || dolock) 1863 rw_exit(&tp->tn_contents); 1864 /* 1865 * Only reason putapage is going to give us SE_NOSWAP as error 1866 * is when we ask a page to be written to physical backing store 1867 * and there is none. Ignore this because we might be dealing 1868 * with a swap page which does not have any backing store 1869 * on disk. In any other case we won't get this error over here. 1870 */ 1871 if (err == SE_NOSWAP) 1872 err = 0; 1873 return (err); 1874 } 1875 1876 long tmp_putpagecnt, tmp_pagespushed; 1877 1878 /* 1879 * Write out a single page. 1880 * For tmpfs this means choose a physical swap slot and write the page 1881 * out using VOP_PAGEIO. For performance, we attempt to kluster; i.e., 1882 * we try to find a bunch of other dirty pages adjacent in the file 1883 * and a bunch of contiguous swap slots, and then write all the pages 1884 * out in a single i/o. 1885 */ 1886 /*ARGSUSED*/ 1887 static int 1888 tmp_putapage( 1889 struct vnode *vp, 1890 page_t *pp, 1891 u_offset_t *offp, 1892 size_t *lenp, 1893 int flags, 1894 struct cred *cr) 1895 { 1896 int err; 1897 ulong_t klstart, kllen; 1898 page_t *pplist, *npplist; 1899 extern int klustsize; 1900 long tmp_klustsize; 1901 struct tmpnode *tp; 1902 size_t pp_off, pp_len; 1903 u_offset_t io_off; 1904 size_t io_len; 1905 struct vnode *pvp; 1906 u_offset_t pstart; 1907 u_offset_t offset; 1908 u_offset_t tmpoff; 1909 1910 ASSERT(PAGE_LOCKED(pp)); 1911 1912 /* Kluster in tmp_klustsize chunks */ 1913 tp = VTOTN(vp); 1914 tmp_klustsize = klustsize; 1915 offset = pp->p_offset; 1916 klstart = (offset / tmp_klustsize) * tmp_klustsize; 1917 kllen = MIN(tmp_klustsize, tp->tn_size - klstart); 1918 1919 /* Get a kluster of pages */ 1920 pplist = 1921 pvn_write_kluster(vp, pp, &tmpoff, &pp_len, klstart, kllen, flags); 1922 1923 pp_off = (size_t)tmpoff; 1924 1925 /* 1926 * Get a cluster of physical offsets for the pages; the amount we 1927 * get may be some subrange of what we ask for (io_off, io_len). 1928 */ 1929 io_off = pp_off; 1930 io_len = pp_len; 1931 err = swap_newphysname(vp, offset, &io_off, &io_len, &pvp, &pstart); 1932 ASSERT(err != SE_NOANON); /* anon slot must have been filled */ 1933 if (err) { 1934 pvn_write_done(pplist, B_ERROR | B_WRITE | flags); 1935 /* 1936 * If this routine is called as a result of segvn_sync 1937 * operation and we have no physical swap then we can get an 1938 * error here. In such case we would return SE_NOSWAP as error. 1939 * At this point, we expect only SE_NOSWAP. 1940 */ 1941 ASSERT(err == SE_NOSWAP); 1942 if (flags & B_INVAL) 1943 err = ENOMEM; 1944 goto out; 1945 } 1946 ASSERT(pp_off <= io_off && io_off + io_len <= pp_off + pp_len); 1947 ASSERT(io_off <= offset && offset < io_off + io_len); 1948 1949 /* Toss pages at front/rear that we couldn't get physical backing for */ 1950 if (io_off != pp_off) { 1951 npplist = NULL; 1952 page_list_break(&pplist, &npplist, btop(io_off - pp_off)); 1953 ASSERT(pplist->p_offset == pp_off); 1954 ASSERT(pplist->p_prev->p_offset == io_off - PAGESIZE); 1955 pvn_write_done(pplist, B_ERROR | B_WRITE | flags); 1956 pplist = npplist; 1957 } 1958 if (io_off + io_len < pp_off + pp_len) { 1959 npplist = NULL; 1960 page_list_break(&pplist, &npplist, btop(io_len)); 1961 ASSERT(npplist->p_offset == io_off + io_len); 1962 ASSERT(npplist->p_prev->p_offset == pp_off + pp_len - PAGESIZE); 1963 pvn_write_done(npplist, B_ERROR | B_WRITE | flags); 1964 } 1965 1966 ASSERT(pplist->p_offset == io_off); 1967 ASSERT(pplist->p_prev->p_offset == io_off + io_len - PAGESIZE); 1968 ASSERT(btopr(io_len) <= btopr(kllen)); 1969 1970 /* Do i/o on the remaining kluster */ 1971 err = VOP_PAGEIO(pvp, pplist, (u_offset_t)pstart, io_len, 1972 B_WRITE | flags, cr); 1973 1974 if ((flags & B_ASYNC) == 0) { 1975 pvn_write_done(pplist, ((err) ? B_ERROR : 0) | B_WRITE | flags); 1976 } 1977 out: 1978 if (!err) { 1979 if (offp) 1980 *offp = io_off; 1981 if (lenp) 1982 *lenp = io_len; 1983 tmp_putpagecnt++; 1984 tmp_pagespushed += btop(io_len); 1985 } 1986 if (err && err != ENOMEM && err != SE_NOSWAP) 1987 cmn_err(CE_WARN, "tmp_putapage: err %d\n", err); 1988 return (err); 1989 } 1990 1991 static int 1992 tmp_map( 1993 struct vnode *vp, 1994 offset_t off, 1995 struct as *as, 1996 caddr_t *addrp, 1997 size_t len, 1998 uchar_t prot, 1999 uchar_t maxprot, 2000 uint_t flags, 2001 struct cred *cred) 2002 { 2003 struct segvn_crargs vn_a; 2004 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 2005 int error; 2006 2007 #ifdef _ILP32 2008 if (len > MAXOFF_T) 2009 return (ENOMEM); 2010 #endif 2011 2012 if (vp->v_flag & VNOMAP) 2013 return (ENOSYS); 2014 2015 if (off < 0 || (off + len) < 0 || 2016 off > MAXOFF_T || (off + len) > MAXOFF_T) 2017 return (ENXIO); 2018 2019 if (vp->v_type != VREG) 2020 return (ENODEV); 2021 2022 /* 2023 * Don't allow mapping to locked file 2024 */ 2025 if (vn_has_mandatory_locks(vp, tp->tn_mode)) { 2026 return (EAGAIN); 2027 } 2028 2029 as_rangelock(as); 2030 if ((flags & MAP_FIXED) == 0) { 2031 map_addr(addrp, len, (offset_t)off, 1, flags); 2032 if (*addrp == NULL) { 2033 as_rangeunlock(as); 2034 return (ENOMEM); 2035 } 2036 } else { 2037 /* 2038 * User specified address - blow away any previous mappings 2039 */ 2040 (void) as_unmap(as, *addrp, len); 2041 } 2042 2043 vn_a.vp = vp; 2044 vn_a.offset = (u_offset_t)off; 2045 vn_a.type = flags & MAP_TYPE; 2046 vn_a.prot = prot; 2047 vn_a.maxprot = maxprot; 2048 vn_a.flags = flags & ~MAP_TYPE; 2049 vn_a.cred = cred; 2050 vn_a.amp = NULL; 2051 vn_a.szc = 0; 2052 vn_a.lgrp_mem_policy_flags = 0; 2053 2054 error = as_map(as, *addrp, len, segvn_create, &vn_a); 2055 as_rangeunlock(as); 2056 return (error); 2057 } 2058 2059 /* 2060 * tmp_addmap and tmp_delmap can't be called since the vp 2061 * maintained in the segvn mapping is NULL. 2062 */ 2063 /* ARGSUSED */ 2064 static int 2065 tmp_addmap( 2066 struct vnode *vp, 2067 offset_t off, 2068 struct as *as, 2069 caddr_t addr, 2070 size_t len, 2071 uchar_t prot, 2072 uchar_t maxprot, 2073 uint_t flags, 2074 struct cred *cred) 2075 { 2076 return (0); 2077 } 2078 2079 /* ARGSUSED */ 2080 static int 2081 tmp_delmap( 2082 struct vnode *vp, 2083 offset_t off, 2084 struct as *as, 2085 caddr_t addr, 2086 size_t len, 2087 uint_t prot, 2088 uint_t maxprot, 2089 uint_t flags, 2090 struct cred *cred) 2091 { 2092 return (0); 2093 } 2094 2095 static int 2096 tmp_freesp(struct vnode *vp, struct flock64 *lp, int flag) 2097 { 2098 register int i; 2099 register struct tmpnode *tp = VTOTN(vp); 2100 int error; 2101 2102 ASSERT(vp->v_type == VREG); 2103 ASSERT(lp->l_start >= 0); 2104 2105 if (lp->l_len != 0) 2106 return (EINVAL); 2107 2108 rw_enter(&tp->tn_rwlock, RW_WRITER); 2109 if (tp->tn_size == lp->l_start) { 2110 rw_exit(&tp->tn_rwlock); 2111 return (0); 2112 } 2113 2114 /* 2115 * Check for any mandatory locks on the range 2116 */ 2117 if (MANDLOCK(vp, tp->tn_mode)) { 2118 long save_start; 2119 2120 save_start = lp->l_start; 2121 2122 if (tp->tn_size < lp->l_start) { 2123 /* 2124 * "Truncate up" case: need to make sure there 2125 * is no lock beyond current end-of-file. To 2126 * do so, we need to set l_start to the size 2127 * of the file temporarily. 2128 */ 2129 lp->l_start = tp->tn_size; 2130 } 2131 lp->l_type = F_WRLCK; 2132 lp->l_sysid = 0; 2133 lp->l_pid = ttoproc(curthread)->p_pid; 2134 i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK; 2135 if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 || 2136 lp->l_type != F_UNLCK) { 2137 rw_exit(&tp->tn_rwlock); 2138 return (i ? i : EAGAIN); 2139 } 2140 2141 lp->l_start = save_start; 2142 } 2143 VFSTOTM(vp->v_vfsp); 2144 2145 rw_enter(&tp->tn_contents, RW_WRITER); 2146 error = tmpnode_trunc((struct tmount *)VFSTOTM(vp->v_vfsp), 2147 tp, (ulong_t)lp->l_start); 2148 rw_exit(&tp->tn_contents); 2149 rw_exit(&tp->tn_rwlock); 2150 return (error); 2151 } 2152 2153 /* ARGSUSED */ 2154 static int 2155 tmp_space( 2156 struct vnode *vp, 2157 int cmd, 2158 struct flock64 *bfp, 2159 int flag, 2160 offset_t offset, 2161 cred_t *cred, 2162 caller_context_t *ct) 2163 { 2164 int error; 2165 2166 if (cmd != F_FREESP) 2167 return (EINVAL); 2168 if ((error = convoff(vp, bfp, 0, (offset_t)offset)) == 0) { 2169 if ((bfp->l_start > MAXOFF_T) || (bfp->l_len > MAXOFF_T)) 2170 return (EFBIG); 2171 error = tmp_freesp(vp, bfp, flag); 2172 } 2173 return (error); 2174 } 2175 2176 /* ARGSUSED */ 2177 static int 2178 tmp_seek(struct vnode *vp, offset_t ooff, offset_t *noffp) 2179 { 2180 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 2181 } 2182 2183 /* ARGSUSED2 */ 2184 static int 2185 tmp_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 2186 { 2187 struct tmpnode *tp = VTOTN(vp); 2188 2189 if (write_lock) { 2190 rw_enter(&tp->tn_rwlock, RW_WRITER); 2191 } else { 2192 rw_enter(&tp->tn_rwlock, RW_READER); 2193 } 2194 return (write_lock); 2195 } 2196 2197 /* ARGSUSED1 */ 2198 static void 2199 tmp_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 2200 { 2201 struct tmpnode *tp = VTOTN(vp); 2202 2203 rw_exit(&tp->tn_rwlock); 2204 } 2205 2206 static int 2207 tmp_pathconf(struct vnode *vp, int cmd, ulong_t *valp, cred_t *cr) 2208 { 2209 struct tmpnode *tp = NULL; 2210 int error; 2211 2212 switch (cmd) { 2213 case _PC_XATTR_EXISTS: 2214 if (vp->v_vfsp->vfs_flag & VFS_XATTR) { 2215 *valp = 0; /* assume no attributes */ 2216 error = 0; /* okay to ask */ 2217 tp = VTOTN(vp); 2218 rw_enter(&tp->tn_rwlock, RW_READER); 2219 if (tp->tn_xattrdp) { 2220 rw_enter(&tp->tn_xattrdp->tn_rwlock, RW_READER); 2221 /* do not count "." and ".." */ 2222 if (tp->tn_xattrdp->tn_dirents > 2) 2223 *valp = 1; 2224 rw_exit(&tp->tn_xattrdp->tn_rwlock); 2225 } 2226 rw_exit(&tp->tn_rwlock); 2227 } else { 2228 error = EINVAL; 2229 } 2230 break; 2231 default: 2232 error = fs_pathconf(vp, cmd, valp, cr); 2233 } 2234 return (error); 2235 } 2236 2237 2238 struct vnodeops *tmp_vnodeops; 2239 2240 const fs_operation_def_t tmp_vnodeops_template[] = { 2241 VOPNAME_OPEN, tmp_open, 2242 VOPNAME_CLOSE, tmp_close, 2243 VOPNAME_READ, tmp_read, 2244 VOPNAME_WRITE, tmp_write, 2245 VOPNAME_IOCTL, tmp_ioctl, 2246 VOPNAME_GETATTR, tmp_getattr, 2247 VOPNAME_SETATTR, tmp_setattr, 2248 VOPNAME_ACCESS, tmp_access, 2249 VOPNAME_LOOKUP, tmp_lookup, 2250 VOPNAME_CREATE, tmp_create, 2251 VOPNAME_REMOVE, tmp_remove, 2252 VOPNAME_LINK, tmp_link, 2253 VOPNAME_RENAME, tmp_rename, 2254 VOPNAME_MKDIR, tmp_mkdir, 2255 VOPNAME_RMDIR, tmp_rmdir, 2256 VOPNAME_READDIR, tmp_readdir, 2257 VOPNAME_SYMLINK, tmp_symlink, 2258 VOPNAME_READLINK, tmp_readlink, 2259 VOPNAME_FSYNC, tmp_fsync, 2260 VOPNAME_INACTIVE, (fs_generic_func_p) tmp_inactive, 2261 VOPNAME_FID, tmp_fid, 2262 VOPNAME_RWLOCK, tmp_rwlock, 2263 VOPNAME_RWUNLOCK, (fs_generic_func_p) tmp_rwunlock, 2264 VOPNAME_SEEK, tmp_seek, 2265 VOPNAME_SPACE, tmp_space, 2266 VOPNAME_GETPAGE, tmp_getpage, 2267 VOPNAME_PUTPAGE, tmp_putpage, 2268 VOPNAME_MAP, (fs_generic_func_p) tmp_map, 2269 VOPNAME_ADDMAP, (fs_generic_func_p) tmp_addmap, 2270 VOPNAME_DELMAP, tmp_delmap, 2271 VOPNAME_PATHCONF, tmp_pathconf, 2272 VOPNAME_VNEVENT, fs_vnevent_support, 2273 NULL, NULL 2274 }; 2275