1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/sysmacros.h> 29 #include <sys/systm.h> 30 #include <sys/time.h> 31 #include <sys/vfs.h> 32 #include <sys/vnode.h> 33 #include <sys/errno.h> 34 #include <sys/cmn_err.h> 35 #include <sys/cred.h> 36 #include <sys/stat.h> 37 #include <sys/debug.h> 38 #include <sys/policy.h> 39 #include <sys/fs/tmpnode.h> 40 #include <sys/fs/tmp.h> 41 #include <sys/vtrace.h> 42 43 static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *); 44 static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *, 45 char *, struct tmpnode *, struct tdirent *, struct cred *); 46 static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *); 47 static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *, 48 enum de_op, struct tmpnode **, struct cred *); 49 static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *, 50 enum de_op, struct tmpnode *); 51 52 53 #define T_HASH_SIZE 8192 /* must be power of 2 */ 54 #define T_MUTEX_SIZE 64 55 56 /* Non-static so compilers won't constant-fold these away. */ 57 clock_t tmpfs_rename_backoff_delay = 1; 58 unsigned int tmpfs_rename_backoff_tries = 0; 59 unsigned long tmpfs_rename_loops = 0; 60 61 static struct tdirent *t_hashtable[T_HASH_SIZE]; 62 static kmutex_t t_hashmutex[T_MUTEX_SIZE]; 63 64 #define T_HASH_INDEX(a) ((a) & (T_HASH_SIZE-1)) 65 #define T_MUTEX_INDEX(a) ((a) & (T_MUTEX_SIZE-1)) 66 67 #define TMPFS_HASH(tp, name, hash) \ 68 { \ 69 char Xc, *Xcp; \ 70 hash = (uint_t)(uintptr_t)(tp) >> 8; \ 71 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \ 72 hash = (hash << 4) + hash + (uint_t)Xc; \ 73 } 74 75 void 76 tmpfs_hash_init(void) 77 { 78 int ix; 79 80 for (ix = 0; ix < T_MUTEX_SIZE; ix++) 81 mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL); 82 } 83 84 /* 85 * This routine is where the rubber meets the road for identities. 86 */ 87 static void 88 tmpfs_hash_in(struct tdirent *t) 89 { 90 uint_t hash; 91 struct tdirent **prevpp; 92 kmutex_t *t_hmtx; 93 94 TMPFS_HASH(t->td_parent, t->td_name, hash); 95 t->td_hash = hash; 96 prevpp = &t_hashtable[T_HASH_INDEX(hash)]; 97 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; 98 mutex_enter(t_hmtx); 99 t->td_link = *prevpp; 100 *prevpp = t; 101 mutex_exit(t_hmtx); 102 } 103 104 /* 105 * Remove tdirent *t from the hash list. 106 */ 107 static void 108 tmpfs_hash_out(struct tdirent *t) 109 { 110 uint_t hash; 111 struct tdirent **prevpp; 112 kmutex_t *t_hmtx; 113 114 hash = t->td_hash; 115 prevpp = &t_hashtable[T_HASH_INDEX(hash)]; 116 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; 117 mutex_enter(t_hmtx); 118 while (*prevpp != t) 119 prevpp = &(*prevpp)->td_link; 120 *prevpp = t->td_link; 121 mutex_exit(t_hmtx); 122 } 123 124 /* 125 * Currently called by tdirrename() only. 126 * rename operation needs to be done with lock held, to ensure that 127 * no other operations can access the tmpnode at the same instance. 128 */ 129 static void 130 tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp) 131 { 132 uint_t hash; 133 kmutex_t *t_hmtx; 134 135 hash = tdp->td_hash; 136 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; 137 mutex_enter(t_hmtx); 138 tdp->td_tmpnode = fromtp; 139 mutex_exit(t_hmtx); 140 } 141 142 static struct tdirent * 143 tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold, 144 struct tmpnode **found) 145 { 146 struct tdirent *l; 147 uint_t hash; 148 kmutex_t *t_hmtx; 149 struct tmpnode *tnp; 150 151 TMPFS_HASH(parent, name, hash); 152 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; 153 mutex_enter(t_hmtx); 154 l = t_hashtable[T_HASH_INDEX(hash)]; 155 while (l) { 156 if ((l->td_hash == hash) && 157 (l->td_parent == parent) && 158 (strcmp(l->td_name, name) == 0)) { 159 /* 160 * We need to make sure that the tmpnode that 161 * we put a hold on is the same one that we pass back. 162 * Hence, temporary variable tnp is necessary. 163 */ 164 tnp = l->td_tmpnode; 165 if (hold) { 166 ASSERT(tnp); 167 tmpnode_hold(tnp); 168 } 169 if (found) 170 *found = tnp; 171 mutex_exit(t_hmtx); 172 return (l); 173 } else { 174 l = l->td_link; 175 } 176 } 177 mutex_exit(t_hmtx); 178 return (NULL); 179 } 180 181 /* 182 * Search directory 'parent' for entry 'name'. 183 * 184 * The calling thread can't hold the write version 185 * of the rwlock for the directory being searched 186 * 187 * 0 is returned on success and *foundtp points 188 * to the found tmpnode with its vnode held. 189 */ 190 int 191 tdirlookup( 192 struct tmpnode *parent, 193 char *name, 194 struct tmpnode **foundtp, 195 struct cred *cred) 196 { 197 int error; 198 199 *foundtp = NULL; 200 if (parent->tn_type != VDIR) 201 return (ENOTDIR); 202 203 if ((error = tmp_taccess(parent, VEXEC, cred))) 204 return (error); 205 206 if (*name == '\0') { 207 tmpnode_hold(parent); 208 *foundtp = parent; 209 return (0); 210 } 211 212 /* 213 * Search the directory for the matching name 214 * We need the lock protecting the tn_dir list 215 * so that it doesn't change out from underneath us. 216 * tmpfs_hash_lookup() will pass back the tmpnode 217 * with a hold on it. 218 */ 219 220 if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) { 221 ASSERT(*foundtp); 222 return (0); 223 } 224 225 return (ENOENT); 226 } 227 228 /* 229 * Enter a directory entry for 'name' and 'tp' into directory 'dir' 230 * 231 * Returns 0 on success. 232 */ 233 int 234 tdirenter( 235 struct tmount *tm, 236 struct tmpnode *dir, /* target directory to make entry in */ 237 char *name, /* name of entry */ 238 enum de_op op, /* entry operation */ 239 struct tmpnode *fromparent, /* source directory if rename */ 240 struct tmpnode *tp, /* source tmpnode, if link/rename */ 241 struct vattr *va, 242 struct tmpnode **tpp, /* return tmpnode, if create/mkdir */ 243 struct cred *cred, 244 caller_context_t *ctp) 245 { 246 struct tdirent *tdp; 247 struct tmpnode *found = NULL; 248 int error = 0; 249 char *s; 250 251 /* 252 * tn_rwlock is held to serialize direnter and dirdeletes 253 */ 254 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock)); 255 ASSERT(dir->tn_type == VDIR); 256 257 /* 258 * Don't allow '/' characters in pathname component 259 * (thus in ufs_direnter()). 260 */ 261 for (s = name; *s; s++) 262 if (*s == '/') 263 return (EACCES); 264 265 if (name[0] == '\0') 266 panic("tdirenter: NULL name"); 267 268 /* 269 * For link and rename lock the source entry and check the link count 270 * to see if it has been removed while it was unlocked. 271 */ 272 if (op == DE_LINK || op == DE_RENAME) { 273 if (tp != dir) { 274 unsigned int tries = 0; 275 276 /* 277 * If we are acquiring tp->tn_rwlock (for SOURCE) 278 * inside here, we must consider the following: 279 * 280 * - dir->tn_rwlock (TARGET) is already HELD (see 281 * above ASSERT()). 282 * 283 * - It is possible our SOURCE is a parent of our 284 * TARGET. Yes it's unusual, but it will return an 285 * error below via tdircheckpath(). 286 * 287 * - It is also possible that another thread, 288 * concurrent to this one, is performing 289 * rmdir(TARGET), which means it will first acquire 290 * SOURCE's lock, THEN acquire TARGET's lock, which 291 * could result in this thread holding TARGET and 292 * trying for SOURCE, but the other thread holding 293 * SOURCE and trying for TARGET. This is deadlock, 294 * and it's inducible. 295 * 296 * To prevent this, we borrow some techniques from UFS 297 * and rw_tryenter(), delaying if we fail, and 298 * if someone tweaks the number of backoff tries to be 299 * nonzero, return EBUSY after that number of tries. 300 */ 301 while (!rw_tryenter(&tp->tn_rwlock, RW_WRITER)) { 302 /* 303 * Sloppy, but this is a diagnostic so atomic 304 * increment would be overkill. 305 */ 306 tmpfs_rename_loops++; 307 308 if (tmpfs_rename_backoff_tries != 0) { 309 if (tries > tmpfs_rename_backoff_tries) 310 return (EBUSY); 311 tries++; 312 } 313 /* 314 * NOTE: We're still holding dir->tn_rwlock, 315 * so drop it over the delay, so any other 316 * thread can get its business done. 317 * 318 * No state change or state inspection happens 319 * prior to here, so it is not wholly dangerous 320 * to release-and-reacquire dir->tn_rwlock. 321 * 322 * Hold the vnode of dir in case it gets 323 * released by another thread, though. 324 */ 325 VN_HOLD(TNTOV(dir)); 326 rw_exit(&dir->tn_rwlock); 327 delay(tmpfs_rename_backoff_delay); 328 rw_enter(&dir->tn_rwlock, RW_WRITER); 329 VN_RELE(TNTOV(dir)); 330 } 331 } 332 mutex_enter(&tp->tn_tlock); 333 if (tp->tn_nlink == 0) { 334 mutex_exit(&tp->tn_tlock); 335 if (tp != dir) 336 rw_exit(&tp->tn_rwlock); 337 return (ENOENT); 338 } 339 340 if (tp->tn_nlink == MAXLINK) { 341 mutex_exit(&tp->tn_tlock); 342 if (tp != dir) 343 rw_exit(&tp->tn_rwlock); 344 return (EMLINK); 345 } 346 tp->tn_nlink++; 347 gethrestime(&tp->tn_ctime); 348 mutex_exit(&tp->tn_tlock); 349 if (tp != dir) 350 rw_exit(&tp->tn_rwlock); 351 } 352 353 /* 354 * This might be a "dangling detached directory". 355 * it could have been removed, but a reference 356 * to it kept in u_cwd. don't bother searching 357 * it, and with any luck the user will get tired 358 * of dealing with us and cd to some absolute 359 * pathway. *sigh*, thus in ufs, too. 360 */ 361 if (dir->tn_nlink == 0) { 362 error = ENOENT; 363 goto out; 364 } 365 366 /* 367 * If this is a rename of a directory and the parent is 368 * different (".." must be changed), then the source 369 * directory must not be in the directory hierarchy 370 * above the target, as this would orphan everything 371 * below the source directory. 372 */ 373 if (op == DE_RENAME) { 374 if (tp == dir) { 375 error = EINVAL; 376 goto out; 377 } 378 if (tp->tn_type == VDIR) { 379 if ((fromparent != dir) && 380 (error = tdircheckpath(tp, dir, cred))) { 381 goto out; 382 } 383 } 384 } 385 386 /* 387 * Search for the entry. Return "found" if it exists. 388 */ 389 tdp = tmpfs_hash_lookup(name, dir, 1, &found); 390 391 if (tdp) { 392 ASSERT(found); 393 switch (op) { 394 case DE_CREATE: 395 case DE_MKDIR: 396 if (tpp) { 397 *tpp = found; 398 error = EEXIST; 399 } else { 400 tmpnode_rele(found); 401 } 402 break; 403 404 case DE_RENAME: 405 error = tdirrename(fromparent, tp, 406 dir, name, found, tdp, cred); 407 if (error == 0) { 408 if (found != NULL) { 409 vnevent_rename_dest(TNTOV(found), 410 TNTOV(dir), name, ctp); 411 } 412 } 413 414 tmpnode_rele(found); 415 break; 416 417 case DE_LINK: 418 /* 419 * Can't link to an existing file. 420 */ 421 error = EEXIST; 422 tmpnode_rele(found); 423 break; 424 } 425 } else { 426 427 /* 428 * The entry does not exist. Check write permission in 429 * directory to see if entry can be created. 430 */ 431 if (error = tmp_taccess(dir, VWRITE, cred)) 432 goto out; 433 if (op == DE_CREATE || op == DE_MKDIR) { 434 /* 435 * Make new tmpnode and directory entry as required. 436 */ 437 error = tdirmaketnode(dir, tm, va, op, &tp, cred); 438 if (error) 439 goto out; 440 } 441 if (error = tdiraddentry(dir, tp, name, op, fromparent)) { 442 if (op == DE_CREATE || op == DE_MKDIR) { 443 /* 444 * Unmake the inode we just made. 445 */ 446 rw_enter(&tp->tn_rwlock, RW_WRITER); 447 if ((tp->tn_type) == VDIR) { 448 ASSERT(tdp == NULL); 449 /* 450 * cleanup allocs made by tdirinit() 451 */ 452 tdirtrunc(tp); 453 } 454 mutex_enter(&tp->tn_tlock); 455 tp->tn_nlink = 0; 456 mutex_exit(&tp->tn_tlock); 457 gethrestime(&tp->tn_ctime); 458 rw_exit(&tp->tn_rwlock); 459 tmpnode_rele(tp); 460 tp = NULL; 461 } 462 } else if (tpp) { 463 *tpp = tp; 464 } else if (op == DE_CREATE || op == DE_MKDIR) { 465 tmpnode_rele(tp); 466 } 467 } 468 469 out: 470 if (error && (op == DE_LINK || op == DE_RENAME)) { 471 /* 472 * Undo bumped link count. 473 */ 474 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock); 475 gethrestime(&tp->tn_ctime); 476 } 477 return (error); 478 } 479 480 /* 481 * Delete entry tp of name "nm" from dir. 482 * Free dir entry space and decrement link count on tmpnode(s). 483 * 484 * Return 0 on success. 485 */ 486 int 487 tdirdelete( 488 struct tmpnode *dir, 489 struct tmpnode *tp, 490 char *nm, 491 enum dr_op op, 492 struct cred *cred) 493 { 494 struct tdirent *tpdp; 495 int error; 496 size_t namelen; 497 struct tmpnode *tnp; 498 timestruc_t now; 499 500 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock)); 501 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 502 ASSERT(dir->tn_type == VDIR); 503 504 if (nm[0] == '\0') 505 panic("tdirdelete: NULL name for %p", (void *)tp); 506 507 /* 508 * return error when removing . and .. 509 */ 510 if (nm[0] == '.') { 511 if (nm[1] == '\0') 512 return (EINVAL); 513 if (nm[1] == '.' && nm[2] == '\0') 514 return (EEXIST); /* thus in ufs */ 515 } 516 517 if (error = tmp_taccess(dir, VEXEC|VWRITE, cred)) 518 return (error); 519 520 /* 521 * If the parent directory is "sticky", then the user must 522 * own the parent directory or the file in it, or else must 523 * have permission to write the file. Otherwise it may not 524 * be deleted (except by privileged users). 525 * Same as ufs_dirremove. 526 */ 527 if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0) 528 return (error); 529 530 if (dir->tn_dir == NULL) 531 return (ENOENT); 532 533 tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp); 534 if (tpdp == NULL) { 535 /* 536 * If it is gone, some other thread got here first! 537 * Return error ENOENT. 538 */ 539 return (ENOENT); 540 } 541 542 /* 543 * If the tmpnode in the tdirent changed, we were probably 544 * the victim of a concurrent rename operation. The original 545 * is gone, so return that status (same as UFS). 546 */ 547 if (tp != tnp) 548 return (ENOENT); 549 550 tmpfs_hash_out(tpdp); 551 552 /* 553 * Take tpdp out of the directory list. 554 */ 555 ASSERT(tpdp->td_next != tpdp); 556 ASSERT(tpdp->td_prev != tpdp); 557 if (tpdp->td_prev) { 558 tpdp->td_prev->td_next = tpdp->td_next; 559 } 560 if (tpdp->td_next) { 561 tpdp->td_next->td_prev = tpdp->td_prev; 562 } 563 564 /* 565 * If the roving slot pointer happens to match tpdp, 566 * point it at the previous dirent. 567 */ 568 if (dir->tn_dir->td_prev == tpdp) { 569 dir->tn_dir->td_prev = tpdp->td_prev; 570 } 571 ASSERT(tpdp->td_next != tpdp); 572 ASSERT(tpdp->td_prev != tpdp); 573 574 /* 575 * tpdp points to the correct directory entry 576 */ 577 namelen = strlen(tpdp->td_name) + 1; 578 579 tmp_memfree(tpdp, sizeof (struct tdirent) + namelen); 580 dir->tn_size -= (sizeof (struct tdirent) + namelen); 581 dir->tn_dirents--; 582 583 gethrestime(&now); 584 dir->tn_mtime = now; 585 dir->tn_ctime = now; 586 tp->tn_ctime = now; 587 588 ASSERT(tp->tn_nlink > 0); 589 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock); 590 if (op == DR_RMDIR && tp->tn_type == VDIR) { 591 tdirtrunc(tp); 592 ASSERT(tp->tn_nlink == 0); 593 } 594 return (0); 595 } 596 597 /* 598 * tdirinit is used internally to initialize a directory (dir) 599 * with '.' and '..' entries without checking permissions and locking 600 */ 601 void 602 tdirinit( 603 struct tmpnode *parent, /* parent of directory to initialize */ 604 struct tmpnode *dir) /* the new directory */ 605 { 606 struct tdirent *dot, *dotdot; 607 timestruc_t now; 608 609 ASSERT(RW_WRITE_HELD(&parent->tn_rwlock)); 610 ASSERT(dir->tn_type == VDIR); 611 612 dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE); 613 dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE); 614 615 /* 616 * Initialize the entries 617 */ 618 dot->td_tmpnode = dir; 619 dot->td_offset = 0; 620 dot->td_name = (char *)dot + sizeof (struct tdirent); 621 dot->td_name[0] = '.'; 622 dot->td_parent = dir; 623 tmpfs_hash_in(dot); 624 625 dotdot->td_tmpnode = parent; 626 dotdot->td_offset = 1; 627 dotdot->td_name = (char *)dotdot + sizeof (struct tdirent); 628 dotdot->td_name[0] = '.'; 629 dotdot->td_name[1] = '.'; 630 dotdot->td_parent = dir; 631 tmpfs_hash_in(dotdot); 632 633 /* 634 * Initialize directory entry list. 635 */ 636 dot->td_next = dotdot; 637 dot->td_prev = dotdot; /* dot's td_prev holds roving slot pointer */ 638 dotdot->td_next = NULL; 639 dotdot->td_prev = dot; 640 641 gethrestime(&now); 642 dir->tn_mtime = now; 643 dir->tn_ctime = now; 644 645 /* 646 * Link counts are special for the hidden attribute directory. 647 * The only explicit reference in the name space is "." and 648 * the reference through ".." is not counted on the parent 649 * file. The attrdir is created as a side effect to lookup, 650 * so don't change the ctime of the parent. 651 * Since tdirinit is called with both dir and parent being the 652 * same for the root vnode, we need to increment this before we set 653 * tn_nlink = 2 below. 654 */ 655 if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) { 656 INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock); 657 parent->tn_ctime = now; 658 } 659 660 dir->tn_dir = dot; 661 dir->tn_size = 2 * sizeof (struct tdirent) + 5; /* dot and dotdot */ 662 dir->tn_dirents = 2; 663 dir->tn_nlink = 2; 664 } 665 666 667 /* 668 * tdirtrunc is called to remove all directory entries under this directory. 669 */ 670 void 671 tdirtrunc(struct tmpnode *dir) 672 { 673 struct tdirent *tdp; 674 struct tmpnode *tp; 675 size_t namelen; 676 timestruc_t now; 677 int isvattrdir, isdotdot, skip_decr; 678 679 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock)); 680 ASSERT(dir->tn_type == VDIR); 681 682 isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0; 683 for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) { 684 ASSERT(tdp->td_next != tdp); 685 ASSERT(tdp->td_prev != tdp); 686 ASSERT(tdp->td_tmpnode); 687 688 dir->tn_dir = tdp->td_next; 689 namelen = strlen(tdp->td_name) + 1; 690 691 /* 692 * Adjust the link counts to account for this directory 693 * entry removal. Hidden attribute directories may 694 * not be empty as they may be truncated as a side- 695 * effect of removing the parent. We do hold/rele 696 * operations to free up these tmpnodes. 697 * 698 * Skip the link count adjustment for parents of 699 * attribute directories as those link counts 700 * do not include the ".." reference in the hidden 701 * directories. 702 */ 703 tp = tdp->td_tmpnode; 704 isdotdot = (strcmp("..", tdp->td_name) == 0); 705 skip_decr = (isvattrdir && isdotdot); 706 if (!skip_decr) { 707 ASSERT(tp->tn_nlink > 0); 708 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock); 709 } 710 711 tmpfs_hash_out(tdp); 712 713 tmp_memfree(tdp, sizeof (struct tdirent) + namelen); 714 dir->tn_size -= (sizeof (struct tdirent) + namelen); 715 dir->tn_dirents--; 716 } 717 718 gethrestime(&now); 719 dir->tn_mtime = now; 720 dir->tn_ctime = now; 721 722 ASSERT(dir->tn_dir == NULL); 723 ASSERT(dir->tn_size == 0); 724 ASSERT(dir->tn_dirents == 0); 725 } 726 727 /* 728 * Check if the source directory is in the path of the target directory. 729 * The target directory is locked by the caller. 730 * 731 * XXX - The source and target's should be different upon entry. 732 */ 733 static int 734 tdircheckpath( 735 struct tmpnode *fromtp, 736 struct tmpnode *toparent, 737 struct cred *cred) 738 { 739 int error = 0; 740 struct tmpnode *dir, *dotdot; 741 struct tdirent *tdp; 742 743 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock)); 744 745 tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot); 746 if (tdp == NULL) 747 return (ENOENT); 748 749 ASSERT(dotdot); 750 751 if (dotdot == toparent) { 752 /* root of fs. search trivially satisfied. */ 753 tmpnode_rele(dotdot); 754 return (0); 755 } 756 for (;;) { 757 /* 758 * Return error for cases like "mv c c/d", 759 * "mv c c/d/e" and so on. 760 */ 761 if (dotdot == fromtp) { 762 tmpnode_rele(dotdot); 763 error = EINVAL; 764 break; 765 } 766 dir = dotdot; 767 error = tdirlookup(dir, "..", &dotdot, cred); 768 if (error) { 769 tmpnode_rele(dir); 770 break; 771 } 772 /* 773 * We're okay if we traverse the directory tree up to 774 * the root directory and don't run into the 775 * parent directory. 776 */ 777 if (dir == dotdot) { 778 tmpnode_rele(dir); 779 tmpnode_rele(dotdot); 780 break; 781 } 782 tmpnode_rele(dir); 783 } 784 return (error); 785 } 786 787 static int 788 tdirrename( 789 struct tmpnode *fromparent, /* parent directory of source */ 790 struct tmpnode *fromtp, /* source tmpnode */ 791 struct tmpnode *toparent, /* parent directory of target */ 792 char *nm, /* entry we are trying to change */ 793 struct tmpnode *to, /* target tmpnode */ 794 struct tdirent *where, /* target tmpnode directory entry */ 795 struct cred *cred) /* credentials */ 796 { 797 int error = 0; 798 int doingdirectory; 799 timestruc_t now; 800 801 #if defined(lint) 802 nm = nm; 803 #endif 804 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock)); 805 806 /* 807 * Short circuit rename of something to itself. 808 */ 809 if (fromtp == to) 810 return (ESAME); /* special KLUDGE error code */ 811 812 rw_enter(&fromtp->tn_rwlock, RW_READER); 813 rw_enter(&to->tn_rwlock, RW_READER); 814 815 /* 816 * Check that everything is on the same filesystem. 817 */ 818 if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp || 819 to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) { 820 error = EXDEV; 821 goto out; 822 } 823 824 /* 825 * Must have write permission to rewrite target entry. 826 * Check for stickyness. 827 */ 828 if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 || 829 (error = tmp_sticky_remove_access(toparent, to, cred)) != 0) 830 goto out; 831 832 /* 833 * Ensure source and target are compatible (both directories 834 * or both not directories). If target is a directory it must 835 * be empty and have no links to it; in addition it must not 836 * be a mount point, and both the source and target must be 837 * writable. 838 */ 839 doingdirectory = (fromtp->tn_type == VDIR); 840 if (to->tn_type == VDIR) { 841 if (!doingdirectory) { 842 error = EISDIR; 843 goto out; 844 } 845 /* 846 * vn_vfswlock will prevent mounts from using the directory 847 * until we are done. 848 */ 849 if (vn_vfswlock(TNTOV(to))) { 850 error = EBUSY; 851 goto out; 852 } 853 if (vn_mountedvfs(TNTOV(to)) != NULL) { 854 vn_vfsunlock(TNTOV(to)); 855 error = EBUSY; 856 goto out; 857 } 858 859 mutex_enter(&to->tn_tlock); 860 if (to->tn_dirents > 2 || to->tn_nlink > 2) { 861 mutex_exit(&to->tn_tlock); 862 vn_vfsunlock(TNTOV(to)); 863 error = EEXIST; /* SIGH should be ENOTEMPTY */ 864 /* 865 * Update atime because checking tn_dirents is 866 * logically equivalent to reading the directory 867 */ 868 gethrestime(&to->tn_atime); 869 goto out; 870 } 871 mutex_exit(&to->tn_tlock); 872 } else if (doingdirectory) { 873 error = ENOTDIR; 874 goto out; 875 } 876 877 tmpfs_hash_change(where, fromtp); 878 gethrestime(&now); 879 toparent->tn_mtime = now; 880 toparent->tn_ctime = now; 881 882 /* 883 * Upgrade to write lock on "to" (i.e., the target tmpnode). 884 */ 885 rw_exit(&to->tn_rwlock); 886 rw_enter(&to->tn_rwlock, RW_WRITER); 887 888 /* 889 * Decrement the link count of the target tmpnode. 890 */ 891 DECR_COUNT(&to->tn_nlink, &to->tn_tlock); 892 to->tn_ctime = now; 893 894 if (doingdirectory) { 895 /* 896 * The entry for "to" no longer exists so release the vfslock. 897 */ 898 vn_vfsunlock(TNTOV(to)); 899 900 /* 901 * Decrement the target link count and delete all entires. 902 */ 903 tdirtrunc(to); 904 ASSERT(to->tn_nlink == 0); 905 906 /* 907 * Renaming a directory with the parent different 908 * requires that ".." be rewritten. The window is 909 * still there for ".." to be inconsistent, but this 910 * is unavoidable, and a lot shorter than when it was 911 * done in a user process. 912 */ 913 if (fromparent != toparent) 914 tdirfixdotdot(fromtp, fromparent, toparent); 915 } 916 out: 917 rw_exit(&to->tn_rwlock); 918 rw_exit(&fromtp->tn_rwlock); 919 return (error); 920 } 921 922 static void 923 tdirfixdotdot( 924 struct tmpnode *fromtp, /* child directory */ 925 struct tmpnode *fromparent, /* old parent directory */ 926 struct tmpnode *toparent) /* new parent directory */ 927 { 928 struct tdirent *dotdot; 929 930 ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock)); 931 932 /* 933 * Increment the link count in the new parent tmpnode 934 */ 935 INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock); 936 gethrestime(&toparent->tn_ctime); 937 938 dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL); 939 940 ASSERT(dotdot->td_tmpnode == fromparent); 941 dotdot->td_tmpnode = toparent; 942 943 /* 944 * Decrement the link count of the old parent tmpnode. 945 * If fromparent is NULL, then this is a new directory link; 946 * it has no parent, so we need not do anything. 947 */ 948 if (fromparent != NULL) { 949 mutex_enter(&fromparent->tn_tlock); 950 if (fromparent->tn_nlink != 0) { 951 fromparent->tn_nlink--; 952 gethrestime(&fromparent->tn_ctime); 953 } 954 mutex_exit(&fromparent->tn_tlock); 955 } 956 } 957 958 static int 959 tdiraddentry( 960 struct tmpnode *dir, /* target directory to make entry in */ 961 struct tmpnode *tp, /* new tmpnode */ 962 char *name, 963 enum de_op op, 964 struct tmpnode *fromtp) 965 { 966 struct tdirent *tdp, *tpdp; 967 size_t namelen, alloc_size; 968 timestruc_t now; 969 970 /* 971 * Make sure the parent directory wasn't removed from 972 * underneath the caller. 973 */ 974 if (dir->tn_dir == NULL) 975 return (ENOENT); 976 977 /* 978 * Check that everything is on the same filesystem. 979 */ 980 if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp) 981 return (EXDEV); 982 983 /* 984 * Allocate and initialize directory entry 985 */ 986 namelen = strlen(name) + 1; 987 alloc_size = namelen + sizeof (struct tdirent); 988 tdp = tmp_memalloc(alloc_size, 0); 989 if (tdp == NULL) 990 return (ENOSPC); 991 992 if ((op == DE_RENAME) && (tp->tn_type == VDIR)) 993 tdirfixdotdot(tp, fromtp, dir); 994 995 dir->tn_size += alloc_size; 996 dir->tn_dirents++; 997 tdp->td_tmpnode = tp; 998 tdp->td_parent = dir; 999 1000 /* 1001 * The directory entry and its name were allocated sequentially. 1002 */ 1003 tdp->td_name = (char *)tdp + sizeof (struct tdirent); 1004 (void) strcpy(tdp->td_name, name); 1005 1006 tmpfs_hash_in(tdp); 1007 1008 /* 1009 * Some utilities expect the size of a directory to remain 1010 * somewhat static. For example, a routine which unlinks 1011 * files between calls to readdir(); the size of the 1012 * directory changes from underneath it and so the real 1013 * directory offset in bytes is invalid. To circumvent 1014 * this problem, we initialize a directory entry with an 1015 * phony offset, and use this offset to determine end of 1016 * file in tmp_readdir. 1017 */ 1018 tpdp = dir->tn_dir->td_prev; 1019 /* 1020 * Install at first empty "slot" in directory list. 1021 */ 1022 while (tpdp->td_next != NULL && (tpdp->td_next->td_offset - 1023 tpdp->td_offset) <= 1) { 1024 ASSERT(tpdp->td_next != tpdp); 1025 ASSERT(tpdp->td_prev != tpdp); 1026 ASSERT(tpdp->td_next->td_offset > tpdp->td_offset); 1027 tpdp = tpdp->td_next; 1028 } 1029 tdp->td_offset = tpdp->td_offset + 1; 1030 1031 /* 1032 * If we're at the end of the dirent list and the offset (which 1033 * is necessarily the largest offset in this directory) is more 1034 * than twice the number of dirents, that means the directory is 1035 * 50% holes. At this point we reset the slot pointer back to 1036 * the beginning of the directory so we start using the holes. 1037 * The idea is that if there are N dirents, there must also be 1038 * N holes, so we can satisfy the next N creates by walking at 1039 * most 2N entries; thus the average cost of a create is constant. 1040 * Note that we use the first dirent's td_prev as the roving 1041 * slot pointer; it's ugly, but it saves a word in every dirent. 1042 */ 1043 if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents) 1044 dir->tn_dir->td_prev = dir->tn_dir->td_next; 1045 else 1046 dir->tn_dir->td_prev = tdp; 1047 1048 ASSERT(tpdp->td_next != tpdp); 1049 ASSERT(tpdp->td_prev != tpdp); 1050 1051 tdp->td_next = tpdp->td_next; 1052 if (tdp->td_next) { 1053 tdp->td_next->td_prev = tdp; 1054 } 1055 tdp->td_prev = tpdp; 1056 tpdp->td_next = tdp; 1057 1058 ASSERT(tdp->td_next != tdp); 1059 ASSERT(tdp->td_prev != tdp); 1060 ASSERT(tpdp->td_next != tpdp); 1061 ASSERT(tpdp->td_prev != tpdp); 1062 1063 gethrestime(&now); 1064 dir->tn_mtime = now; 1065 dir->tn_ctime = now; 1066 1067 return (0); 1068 } 1069 1070 static int 1071 tdirmaketnode( 1072 struct tmpnode *dir, 1073 struct tmount *tm, 1074 struct vattr *va, 1075 enum de_op op, 1076 struct tmpnode **newnode, 1077 struct cred *cred) 1078 { 1079 struct tmpnode *tp; 1080 enum vtype type; 1081 1082 ASSERT(va != NULL); 1083 ASSERT(op == DE_CREATE || op == DE_MKDIR); 1084 if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) || 1085 ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime))) 1086 return (EOVERFLOW); 1087 type = va->va_type; 1088 tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE); 1089 tmpnode_init(tm, tp, va, cred); 1090 1091 /* setup normal file/dir's extended attribute directory */ 1092 if (dir->tn_flags & ISXATTR) { 1093 /* parent dir is , mark file as xattr */ 1094 tp->tn_flags |= ISXATTR; 1095 } 1096 1097 1098 if (type == VBLK || type == VCHR) { 1099 tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev; 1100 } else { 1101 tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV; 1102 } 1103 tp->tn_vnode->v_type = type; 1104 tp->tn_uid = crgetuid(cred); 1105 1106 /* 1107 * To determine the group-id of the created file: 1108 * 1) If the gid is set in the attribute list (non-Sun & pre-4.0 1109 * clients are not likely to set the gid), then use it if 1110 * the process is privileged, belongs to the target group, 1111 * or the group is the same as the parent directory. 1112 * 2) If the filesystem was not mounted with the Old-BSD-compatible 1113 * GRPID option, and the directory's set-gid bit is clear, 1114 * then use the process's gid. 1115 * 3) Otherwise, set the group-id to the gid of the parent directory. 1116 */ 1117 if ((va->va_mask & AT_GID) && 1118 ((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) || 1119 secpolicy_vnode_create_gid(cred) == 0)) { 1120 /* 1121 * XXX - is this only the case when a 4.0 NFS client, or a 1122 * client derived from that code, makes a call over the wire? 1123 */ 1124 tp->tn_gid = va->va_gid; 1125 } else { 1126 if (dir->tn_mode & VSGID) 1127 tp->tn_gid = dir->tn_gid; 1128 else 1129 tp->tn_gid = crgetgid(cred); 1130 } 1131 /* 1132 * If we're creating a directory, and the parent directory has the 1133 * set-GID bit set, set it on the new directory. 1134 * Otherwise, if the user is neither privileged nor a member of the 1135 * file's new group, clear the file's set-GID bit. 1136 */ 1137 if (dir->tn_mode & VSGID && type == VDIR) 1138 tp->tn_mode |= VSGID; 1139 else { 1140 if ((tp->tn_mode & VSGID) && 1141 secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0) 1142 tp->tn_mode &= ~VSGID; 1143 } 1144 1145 if (va->va_mask & AT_ATIME) 1146 tp->tn_atime = va->va_atime; 1147 if (va->va_mask & AT_MTIME) 1148 tp->tn_mtime = va->va_mtime; 1149 1150 if (op == DE_MKDIR) 1151 tdirinit(dir, tp); 1152 1153 *newnode = tp; 1154 return (0); 1155 } 1156