1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/sysmacros.h> 31 #include <sys/systm.h> 32 #include <sys/time.h> 33 #include <sys/vfs.h> 34 #include <sys/vnode.h> 35 #include <sys/errno.h> 36 #include <sys/cmn_err.h> 37 #include <sys/cred.h> 38 #include <sys/stat.h> 39 #include <sys/debug.h> 40 #include <sys/policy.h> 41 #include <sys/fs/tmpnode.h> 42 #include <sys/fs/tmp.h> 43 #include <sys/vtrace.h> 44 45 static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *); 46 static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *, 47 char *, struct tmpnode *, struct tdirent *, struct cred *); 48 static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *); 49 static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *, 50 enum de_op, struct tmpnode **, struct cred *); 51 static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *, 52 enum de_op, struct tmpnode *); 53 54 55 #define T_HASH_SIZE 8192 /* must be power of 2 */ 56 #define T_MUTEX_SIZE 64 57 58 /* Non-static so compilers won't constant-fold these away. */ 59 clock_t tmpfs_rename_backoff_delay = 1; 60 unsigned int tmpfs_rename_backoff_tries = 0; 61 unsigned long tmpfs_rename_loops = 0; 62 63 static struct tdirent *t_hashtable[T_HASH_SIZE]; 64 static kmutex_t t_hashmutex[T_MUTEX_SIZE]; 65 66 #define T_HASH_INDEX(a) ((a) & (T_HASH_SIZE-1)) 67 #define T_MUTEX_INDEX(a) ((a) & (T_MUTEX_SIZE-1)) 68 69 #define TMPFS_HASH(tp, name, hash) \ 70 { \ 71 char Xc, *Xcp; \ 72 hash = (uint_t)(uintptr_t)(tp) >> 8; \ 73 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \ 74 hash = (hash << 4) + hash + (uint_t)Xc; \ 75 } 76 77 void 78 tmpfs_hash_init(void) 79 { 80 int ix; 81 82 for (ix = 0; ix < T_MUTEX_SIZE; ix++) 83 mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL); 84 } 85 86 /* 87 * This routine is where the rubber meets the road for identities. 88 */ 89 static void 90 tmpfs_hash_in(struct tdirent *t) 91 { 92 uint_t hash; 93 struct tdirent **prevpp; 94 kmutex_t *t_hmtx; 95 96 TMPFS_HASH(t->td_parent, t->td_name, hash); 97 t->td_hash = hash; 98 prevpp = &t_hashtable[T_HASH_INDEX(hash)]; 99 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; 100 mutex_enter(t_hmtx); 101 t->td_link = *prevpp; 102 *prevpp = t; 103 mutex_exit(t_hmtx); 104 } 105 106 /* 107 * Remove tdirent *t from the hash list. 108 */ 109 static void 110 tmpfs_hash_out(struct tdirent *t) 111 { 112 uint_t hash; 113 struct tdirent **prevpp; 114 kmutex_t *t_hmtx; 115 116 hash = t->td_hash; 117 prevpp = &t_hashtable[T_HASH_INDEX(hash)]; 118 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; 119 mutex_enter(t_hmtx); 120 while (*prevpp != t) 121 prevpp = &(*prevpp)->td_link; 122 *prevpp = t->td_link; 123 mutex_exit(t_hmtx); 124 } 125 126 /* 127 * Currently called by tdirrename() only. 128 * rename operation needs to be done with lock held, to ensure that 129 * no other operations can access the tmpnode at the same instance. 130 */ 131 static void 132 tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp) 133 { 134 uint_t hash; 135 kmutex_t *t_hmtx; 136 137 hash = tdp->td_hash; 138 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; 139 mutex_enter(t_hmtx); 140 tdp->td_tmpnode = fromtp; 141 mutex_exit(t_hmtx); 142 } 143 144 static struct tdirent * 145 tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold, 146 struct tmpnode **found) 147 { 148 struct tdirent *l; 149 uint_t hash; 150 kmutex_t *t_hmtx; 151 struct tmpnode *tnp; 152 153 TMPFS_HASH(parent, name, hash); 154 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; 155 mutex_enter(t_hmtx); 156 l = t_hashtable[T_HASH_INDEX(hash)]; 157 while (l) { 158 if ((l->td_hash == hash) && 159 (l->td_parent == parent) && 160 (strcmp(l->td_name, name) == 0)) { 161 /* 162 * We need to make sure that the tmpnode that 163 * we put a hold on is the same one that we pass back. 164 * Hence, temporary variable tnp is necessary. 165 */ 166 tnp = l->td_tmpnode; 167 if (hold) { 168 ASSERT(tnp); 169 tmpnode_hold(tnp); 170 } 171 if (found) 172 *found = tnp; 173 mutex_exit(t_hmtx); 174 return (l); 175 } else { 176 l = l->td_link; 177 } 178 } 179 mutex_exit(t_hmtx); 180 return (NULL); 181 } 182 183 /* 184 * Search directory 'parent' for entry 'name'. 185 * 186 * The calling thread can't hold the write version 187 * of the rwlock for the directory being searched 188 * 189 * 0 is returned on success and *foundtp points 190 * to the found tmpnode with its vnode held. 191 */ 192 int 193 tdirlookup( 194 struct tmpnode *parent, 195 char *name, 196 struct tmpnode **foundtp, 197 struct cred *cred) 198 { 199 int error; 200 201 *foundtp = NULL; 202 if (parent->tn_type != VDIR) 203 return (ENOTDIR); 204 205 if ((error = tmp_taccess(parent, VEXEC, cred))) 206 return (error); 207 208 if (*name == '\0') { 209 tmpnode_hold(parent); 210 *foundtp = parent; 211 return (0); 212 } 213 214 /* 215 * Search the directory for the matching name 216 * We need the lock protecting the tn_dir list 217 * so that it doesn't change out from underneath us. 218 * tmpfs_hash_lookup() will pass back the tmpnode 219 * with a hold on it. 220 */ 221 222 if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) { 223 ASSERT(*foundtp); 224 return (0); 225 } 226 227 return (ENOENT); 228 } 229 230 /* 231 * Enter a directory entry for 'name' and 'tp' into directory 'dir' 232 * 233 * Returns 0 on success. 234 */ 235 int 236 tdirenter( 237 struct tmount *tm, 238 struct tmpnode *dir, /* target directory to make entry in */ 239 char *name, /* name of entry */ 240 enum de_op op, /* entry operation */ 241 struct tmpnode *fromparent, /* source directory if rename */ 242 struct tmpnode *tp, /* source tmpnode, if link/rename */ 243 struct vattr *va, 244 struct tmpnode **tpp, /* return tmpnode, if create/mkdir */ 245 struct cred *cred, 246 caller_context_t *ctp) 247 { 248 struct tdirent *tdp; 249 struct tmpnode *found = NULL; 250 int error = 0; 251 char *s; 252 253 /* 254 * tn_rwlock is held to serialize direnter and dirdeletes 255 */ 256 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock)); 257 ASSERT(dir->tn_type == VDIR); 258 259 /* 260 * Don't allow '/' characters in pathname component 261 * (thus in ufs_direnter()). 262 */ 263 for (s = name; *s; s++) 264 if (*s == '/') 265 return (EACCES); 266 267 if (name[0] == '\0') 268 panic("tdirenter: NULL name"); 269 270 /* 271 * For link and rename lock the source entry and check the link count 272 * to see if it has been removed while it was unlocked. 273 */ 274 if (op == DE_LINK || op == DE_RENAME) { 275 if (tp != dir) { 276 unsigned int tries = 0; 277 278 /* 279 * If we are acquiring tp->tn_rwlock (for SOURCE) 280 * inside here, we must consider the following: 281 * 282 * - dir->tn_rwlock (TARGET) is already HELD (see 283 * above ASSERT()). 284 * 285 * - It is possible our SOURCE is a parent of our 286 * TARGET. Yes it's unusual, but it will return an 287 * error below via tdircheckpath(). 288 * 289 * - It is also possible that another thread, 290 * concurrent to this one, is performing 291 * rmdir(TARGET), which means it will first acquire 292 * SOURCE's lock, THEN acquire TARGET's lock, which 293 * could result in this thread holding TARGET and 294 * trying for SOURCE, but the other thread holding 295 * SOURCE and trying for TARGET. This is deadlock, 296 * and it's inducible. 297 * 298 * To prevent this, we borrow some techniques from UFS 299 * and rw_tryenter(), delaying if we fail, and 300 * if someone tweaks the number of backoff tries to be 301 * nonzero, return EBUSY after that number of tries. 302 */ 303 while (!rw_tryenter(&tp->tn_rwlock, RW_WRITER)) { 304 /* 305 * Sloppy, but this is a diagnostic so atomic 306 * increment would be overkill. 307 */ 308 tmpfs_rename_loops++; 309 310 if (tmpfs_rename_backoff_tries != 0) { 311 if (tries > tmpfs_rename_backoff_tries) 312 return (EBUSY); 313 tries++; 314 } 315 /* 316 * NOTE: We're still holding dir->tn_rwlock, 317 * so drop it over the delay, so any other 318 * thread can get its business done. 319 * 320 * No state change or state inspection happens 321 * prior to here, so it is not wholly dangerous 322 * to release-and-reacquire dir->tn_rwlock. 323 * 324 * Hold the vnode of dir in case it gets 325 * released by another thread, though. 326 */ 327 VN_HOLD(TNTOV(dir)); 328 rw_exit(&dir->tn_rwlock); 329 delay(tmpfs_rename_backoff_delay); 330 rw_enter(&dir->tn_rwlock, RW_WRITER); 331 VN_RELE(TNTOV(dir)); 332 } 333 } 334 mutex_enter(&tp->tn_tlock); 335 if (tp->tn_nlink == 0) { 336 mutex_exit(&tp->tn_tlock); 337 if (tp != dir) 338 rw_exit(&tp->tn_rwlock); 339 return (ENOENT); 340 } 341 342 if (tp->tn_nlink == MAXLINK) { 343 mutex_exit(&tp->tn_tlock); 344 if (tp != dir) 345 rw_exit(&tp->tn_rwlock); 346 return (EMLINK); 347 } 348 tp->tn_nlink++; 349 gethrestime(&tp->tn_ctime); 350 mutex_exit(&tp->tn_tlock); 351 if (tp != dir) 352 rw_exit(&tp->tn_rwlock); 353 } 354 355 /* 356 * This might be a "dangling detached directory". 357 * it could have been removed, but a reference 358 * to it kept in u_cwd. don't bother searching 359 * it, and with any luck the user will get tired 360 * of dealing with us and cd to some absolute 361 * pathway. *sigh*, thus in ufs, too. 362 */ 363 if (dir->tn_nlink == 0) { 364 error = ENOENT; 365 goto out; 366 } 367 368 /* 369 * If this is a rename of a directory and the parent is 370 * different (".." must be changed), then the source 371 * directory must not be in the directory hierarchy 372 * above the target, as this would orphan everything 373 * below the source directory. 374 */ 375 if (op == DE_RENAME) { 376 if (tp == dir) { 377 error = EINVAL; 378 goto out; 379 } 380 if (tp->tn_type == VDIR) { 381 if ((fromparent != dir) && 382 (error = tdircheckpath(tp, dir, cred))) { 383 goto out; 384 } 385 } 386 } 387 388 /* 389 * Search for the entry. Return "found" if it exists. 390 */ 391 tdp = tmpfs_hash_lookup(name, dir, 1, &found); 392 393 if (tdp) { 394 ASSERT(found); 395 switch (op) { 396 case DE_CREATE: 397 case DE_MKDIR: 398 if (tpp) { 399 *tpp = found; 400 error = EEXIST; 401 } else { 402 tmpnode_rele(found); 403 } 404 break; 405 406 case DE_RENAME: 407 error = tdirrename(fromparent, tp, 408 dir, name, found, tdp, cred); 409 if (error == 0) { 410 if (found != NULL) { 411 vnevent_rename_dest(TNTOV(found), 412 TNTOV(dir), name, ctp); 413 } 414 } 415 416 tmpnode_rele(found); 417 break; 418 419 case DE_LINK: 420 /* 421 * Can't link to an existing file. 422 */ 423 error = EEXIST; 424 tmpnode_rele(found); 425 break; 426 } 427 } else { 428 429 /* 430 * The entry does not exist. Check write permission in 431 * directory to see if entry can be created. 432 */ 433 if (error = tmp_taccess(dir, VWRITE, cred)) 434 goto out; 435 if (op == DE_CREATE || op == DE_MKDIR) { 436 /* 437 * Make new tmpnode and directory entry as required. 438 */ 439 error = tdirmaketnode(dir, tm, va, op, &tp, cred); 440 if (error) 441 goto out; 442 } 443 if (error = tdiraddentry(dir, tp, name, op, fromparent)) { 444 if (op == DE_CREATE || op == DE_MKDIR) { 445 /* 446 * Unmake the inode we just made. 447 */ 448 rw_enter(&tp->tn_rwlock, RW_WRITER); 449 if ((tp->tn_type) == VDIR) { 450 ASSERT(tdp == NULL); 451 /* 452 * cleanup allocs made by tdirinit() 453 */ 454 tdirtrunc(tp); 455 } 456 mutex_enter(&tp->tn_tlock); 457 tp->tn_nlink = 0; 458 mutex_exit(&tp->tn_tlock); 459 gethrestime(&tp->tn_ctime); 460 rw_exit(&tp->tn_rwlock); 461 tmpnode_rele(tp); 462 tp = NULL; 463 } 464 } else if (tpp) { 465 *tpp = tp; 466 } else if (op == DE_CREATE || op == DE_MKDIR) { 467 tmpnode_rele(tp); 468 } 469 } 470 471 out: 472 if (error && (op == DE_LINK || op == DE_RENAME)) { 473 /* 474 * Undo bumped link count. 475 */ 476 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock); 477 gethrestime(&tp->tn_ctime); 478 } 479 return (error); 480 } 481 482 /* 483 * Delete entry tp of name "nm" from dir. 484 * Free dir entry space and decrement link count on tmpnode(s). 485 * 486 * Return 0 on success. 487 */ 488 int 489 tdirdelete( 490 struct tmpnode *dir, 491 struct tmpnode *tp, 492 char *nm, 493 enum dr_op op, 494 struct cred *cred) 495 { 496 struct tdirent *tpdp; 497 int error; 498 size_t namelen; 499 struct tmpnode *tnp; 500 timestruc_t now; 501 502 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock)); 503 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 504 ASSERT(dir->tn_type == VDIR); 505 506 if (nm[0] == '\0') 507 panic("tdirdelete: NULL name for %p", (void *)tp); 508 509 /* 510 * return error when removing . and .. 511 */ 512 if (nm[0] == '.') { 513 if (nm[1] == '\0') 514 return (EINVAL); 515 if (nm[1] == '.' && nm[2] == '\0') 516 return (EEXIST); /* thus in ufs */ 517 } 518 519 if (error = tmp_taccess(dir, VEXEC|VWRITE, cred)) 520 return (error); 521 522 /* 523 * If the parent directory is "sticky", then the user must 524 * own the parent directory or the file in it, or else must 525 * have permission to write the file. Otherwise it may not 526 * be deleted (except by privileged users). 527 * Same as ufs_dirremove. 528 */ 529 if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0) 530 return (error); 531 532 if (dir->tn_dir == NULL) 533 return (ENOENT); 534 535 tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp); 536 if (tpdp == NULL) { 537 /* 538 * If it is gone, some other thread got here first! 539 * Return error ENOENT. 540 */ 541 return (ENOENT); 542 } 543 544 /* 545 * If the tmpnode in the tdirent changed, we were probably 546 * the victim of a concurrent rename operation. The original 547 * is gone, so return that status (same as UFS). 548 */ 549 if (tp != tnp) 550 return (ENOENT); 551 552 tmpfs_hash_out(tpdp); 553 554 /* 555 * Take tpdp out of the directory list. 556 */ 557 ASSERT(tpdp->td_next != tpdp); 558 ASSERT(tpdp->td_prev != tpdp); 559 if (tpdp->td_prev) { 560 tpdp->td_prev->td_next = tpdp->td_next; 561 } 562 if (tpdp->td_next) { 563 tpdp->td_next->td_prev = tpdp->td_prev; 564 } 565 566 /* 567 * If the roving slot pointer happens to match tpdp, 568 * point it at the previous dirent. 569 */ 570 if (dir->tn_dir->td_prev == tpdp) { 571 dir->tn_dir->td_prev = tpdp->td_prev; 572 } 573 ASSERT(tpdp->td_next != tpdp); 574 ASSERT(tpdp->td_prev != tpdp); 575 576 /* 577 * tpdp points to the correct directory entry 578 */ 579 namelen = strlen(tpdp->td_name) + 1; 580 581 tmp_memfree(tpdp, sizeof (struct tdirent) + namelen); 582 dir->tn_size -= (sizeof (struct tdirent) + namelen); 583 dir->tn_dirents--; 584 585 gethrestime(&now); 586 dir->tn_mtime = now; 587 dir->tn_ctime = now; 588 tp->tn_ctime = now; 589 590 ASSERT(tp->tn_nlink > 0); 591 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock); 592 if (op == DR_RMDIR && tp->tn_type == VDIR) { 593 tdirtrunc(tp); 594 ASSERT(tp->tn_nlink == 0); 595 } 596 return (0); 597 } 598 599 /* 600 * tdirinit is used internally to initialize a directory (dir) 601 * with '.' and '..' entries without checking permissions and locking 602 */ 603 void 604 tdirinit( 605 struct tmpnode *parent, /* parent of directory to initialize */ 606 struct tmpnode *dir) /* the new directory */ 607 { 608 struct tdirent *dot, *dotdot; 609 timestruc_t now; 610 611 ASSERT(RW_WRITE_HELD(&parent->tn_rwlock)); 612 ASSERT(dir->tn_type == VDIR); 613 614 dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE); 615 dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE); 616 617 /* 618 * Initialize the entries 619 */ 620 dot->td_tmpnode = dir; 621 dot->td_offset = 0; 622 dot->td_name = (char *)dot + sizeof (struct tdirent); 623 dot->td_name[0] = '.'; 624 dot->td_parent = dir; 625 tmpfs_hash_in(dot); 626 627 dotdot->td_tmpnode = parent; 628 dotdot->td_offset = 1; 629 dotdot->td_name = (char *)dotdot + sizeof (struct tdirent); 630 dotdot->td_name[0] = '.'; 631 dotdot->td_name[1] = '.'; 632 dotdot->td_parent = dir; 633 tmpfs_hash_in(dotdot); 634 635 /* 636 * Initialize directory entry list. 637 */ 638 dot->td_next = dotdot; 639 dot->td_prev = dotdot; /* dot's td_prev holds roving slot pointer */ 640 dotdot->td_next = NULL; 641 dotdot->td_prev = dot; 642 643 gethrestime(&now); 644 dir->tn_mtime = now; 645 dir->tn_ctime = now; 646 647 /* 648 * Link counts are special for the hidden attribute directory. 649 * The only explicit reference in the name space is "." and 650 * the reference through ".." is not counted on the parent 651 * file. The attrdir is created as a side effect to lookup, 652 * so don't change the ctime of the parent. 653 * Since tdirinit is called with both dir and parent being the 654 * same for the root vnode, we need to increment this before we set 655 * tn_nlink = 2 below. 656 */ 657 if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) { 658 INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock); 659 parent->tn_ctime = now; 660 } 661 662 dir->tn_dir = dot; 663 dir->tn_size = 2 * sizeof (struct tdirent) + 5; /* dot and dotdot */ 664 dir->tn_dirents = 2; 665 dir->tn_nlink = 2; 666 } 667 668 669 /* 670 * tdirtrunc is called to remove all directory entries under this directory. 671 */ 672 void 673 tdirtrunc(struct tmpnode *dir) 674 { 675 struct tdirent *tdp; 676 struct tmpnode *tp; 677 size_t namelen; 678 timestruc_t now; 679 int isvattrdir, isdotdot, skip_decr; 680 681 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock)); 682 ASSERT(dir->tn_type == VDIR); 683 684 isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0; 685 for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) { 686 ASSERT(tdp->td_next != tdp); 687 ASSERT(tdp->td_prev != tdp); 688 ASSERT(tdp->td_tmpnode); 689 690 dir->tn_dir = tdp->td_next; 691 namelen = strlen(tdp->td_name) + 1; 692 693 /* 694 * Adjust the link counts to account for this directory 695 * entry removal. Hidden attribute directories may 696 * not be empty as they may be truncated as a side- 697 * effect of removing the parent. We do hold/rele 698 * operations to free up these tmpnodes. 699 * 700 * Skip the link count adjustment for parents of 701 * attribute directories as those link counts 702 * do not include the ".." reference in the hidden 703 * directories. 704 */ 705 tp = tdp->td_tmpnode; 706 isdotdot = (strcmp("..", tdp->td_name) == 0); 707 skip_decr = (isvattrdir && isdotdot); 708 if (!skip_decr) { 709 ASSERT(tp->tn_nlink > 0); 710 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock); 711 } 712 713 tmpfs_hash_out(tdp); 714 715 tmp_memfree(tdp, sizeof (struct tdirent) + namelen); 716 dir->tn_size -= (sizeof (struct tdirent) + namelen); 717 dir->tn_dirents--; 718 } 719 720 gethrestime(&now); 721 dir->tn_mtime = now; 722 dir->tn_ctime = now; 723 724 ASSERT(dir->tn_dir == NULL); 725 ASSERT(dir->tn_size == 0); 726 ASSERT(dir->tn_dirents == 0); 727 } 728 729 /* 730 * Check if the source directory is in the path of the target directory. 731 * The target directory is locked by the caller. 732 * 733 * XXX - The source and target's should be different upon entry. 734 */ 735 static int 736 tdircheckpath( 737 struct tmpnode *fromtp, 738 struct tmpnode *toparent, 739 struct cred *cred) 740 { 741 int error = 0; 742 struct tmpnode *dir, *dotdot; 743 struct tdirent *tdp; 744 745 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock)); 746 747 tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot); 748 if (tdp == NULL) 749 return (ENOENT); 750 751 ASSERT(dotdot); 752 753 if (dotdot == toparent) { 754 /* root of fs. search trivially satisfied. */ 755 tmpnode_rele(dotdot); 756 return (0); 757 } 758 for (;;) { 759 /* 760 * Return error for cases like "mv c c/d", 761 * "mv c c/d/e" and so on. 762 */ 763 if (dotdot == fromtp) { 764 tmpnode_rele(dotdot); 765 error = EINVAL; 766 break; 767 } 768 dir = dotdot; 769 error = tdirlookup(dir, "..", &dotdot, cred); 770 if (error) { 771 tmpnode_rele(dir); 772 break; 773 } 774 /* 775 * We're okay if we traverse the directory tree up to 776 * the root directory and don't run into the 777 * parent directory. 778 */ 779 if (dir == dotdot) { 780 tmpnode_rele(dir); 781 tmpnode_rele(dotdot); 782 break; 783 } 784 tmpnode_rele(dir); 785 } 786 return (error); 787 } 788 789 static int 790 tdirrename( 791 struct tmpnode *fromparent, /* parent directory of source */ 792 struct tmpnode *fromtp, /* source tmpnode */ 793 struct tmpnode *toparent, /* parent directory of target */ 794 char *nm, /* entry we are trying to change */ 795 struct tmpnode *to, /* target tmpnode */ 796 struct tdirent *where, /* target tmpnode directory entry */ 797 struct cred *cred) /* credentials */ 798 { 799 int error = 0; 800 int doingdirectory; 801 timestruc_t now; 802 803 #if defined(lint) 804 nm = nm; 805 #endif 806 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock)); 807 808 /* 809 * Short circuit rename of something to itself. 810 */ 811 if (fromtp == to) 812 return (ESAME); /* special KLUDGE error code */ 813 814 rw_enter(&fromtp->tn_rwlock, RW_READER); 815 rw_enter(&to->tn_rwlock, RW_READER); 816 817 /* 818 * Check that everything is on the same filesystem. 819 */ 820 if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp || 821 to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) { 822 error = EXDEV; 823 goto out; 824 } 825 826 /* 827 * Must have write permission to rewrite target entry. 828 * Check for stickyness. 829 */ 830 if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 || 831 (error = tmp_sticky_remove_access(toparent, to, cred)) != 0) 832 goto out; 833 834 /* 835 * Ensure source and target are compatible (both directories 836 * or both not directories). If target is a directory it must 837 * be empty and have no links to it; in addition it must not 838 * be a mount point, and both the source and target must be 839 * writable. 840 */ 841 doingdirectory = (fromtp->tn_type == VDIR); 842 if (to->tn_type == VDIR) { 843 if (!doingdirectory) { 844 error = EISDIR; 845 goto out; 846 } 847 /* 848 * vn_vfswlock will prevent mounts from using the directory 849 * until we are done. 850 */ 851 if (vn_vfswlock(TNTOV(to))) { 852 error = EBUSY; 853 goto out; 854 } 855 if (vn_mountedvfs(TNTOV(to)) != NULL) { 856 vn_vfsunlock(TNTOV(to)); 857 error = EBUSY; 858 goto out; 859 } 860 861 mutex_enter(&to->tn_tlock); 862 if (to->tn_dirents > 2 || to->tn_nlink > 2) { 863 mutex_exit(&to->tn_tlock); 864 vn_vfsunlock(TNTOV(to)); 865 error = EEXIST; /* SIGH should be ENOTEMPTY */ 866 /* 867 * Update atime because checking tn_dirents is 868 * logically equivalent to reading the directory 869 */ 870 gethrestime(&to->tn_atime); 871 goto out; 872 } 873 mutex_exit(&to->tn_tlock); 874 } else if (doingdirectory) { 875 error = ENOTDIR; 876 goto out; 877 } 878 879 tmpfs_hash_change(where, fromtp); 880 gethrestime(&now); 881 toparent->tn_mtime = now; 882 toparent->tn_ctime = now; 883 884 /* 885 * Upgrade to write lock on "to" (i.e., the target tmpnode). 886 */ 887 rw_exit(&to->tn_rwlock); 888 rw_enter(&to->tn_rwlock, RW_WRITER); 889 890 /* 891 * Decrement the link count of the target tmpnode. 892 */ 893 DECR_COUNT(&to->tn_nlink, &to->tn_tlock); 894 to->tn_ctime = now; 895 896 if (doingdirectory) { 897 /* 898 * The entry for "to" no longer exists so release the vfslock. 899 */ 900 vn_vfsunlock(TNTOV(to)); 901 902 /* 903 * Decrement the target link count and delete all entires. 904 */ 905 tdirtrunc(to); 906 ASSERT(to->tn_nlink == 0); 907 908 /* 909 * Renaming a directory with the parent different 910 * requires that ".." be rewritten. The window is 911 * still there for ".." to be inconsistent, but this 912 * is unavoidable, and a lot shorter than when it was 913 * done in a user process. 914 */ 915 if (fromparent != toparent) 916 tdirfixdotdot(fromtp, fromparent, toparent); 917 } 918 out: 919 rw_exit(&to->tn_rwlock); 920 rw_exit(&fromtp->tn_rwlock); 921 return (error); 922 } 923 924 static void 925 tdirfixdotdot( 926 struct tmpnode *fromtp, /* child directory */ 927 struct tmpnode *fromparent, /* old parent directory */ 928 struct tmpnode *toparent) /* new parent directory */ 929 { 930 struct tdirent *dotdot; 931 932 ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock)); 933 934 /* 935 * Increment the link count in the new parent tmpnode 936 */ 937 INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock); 938 gethrestime(&toparent->tn_ctime); 939 940 dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL); 941 942 ASSERT(dotdot->td_tmpnode == fromparent); 943 dotdot->td_tmpnode = toparent; 944 945 /* 946 * Decrement the link count of the old parent tmpnode. 947 * If fromparent is NULL, then this is a new directory link; 948 * it has no parent, so we need not do anything. 949 */ 950 if (fromparent != NULL) { 951 mutex_enter(&fromparent->tn_tlock); 952 if (fromparent->tn_nlink != 0) { 953 fromparent->tn_nlink--; 954 gethrestime(&fromparent->tn_ctime); 955 } 956 mutex_exit(&fromparent->tn_tlock); 957 } 958 } 959 960 static int 961 tdiraddentry( 962 struct tmpnode *dir, /* target directory to make entry in */ 963 struct tmpnode *tp, /* new tmpnode */ 964 char *name, 965 enum de_op op, 966 struct tmpnode *fromtp) 967 { 968 struct tdirent *tdp, *tpdp; 969 size_t namelen, alloc_size; 970 timestruc_t now; 971 972 /* 973 * Make sure the parent directory wasn't removed from 974 * underneath the caller. 975 */ 976 if (dir->tn_dir == NULL) 977 return (ENOENT); 978 979 /* 980 * Check that everything is on the same filesystem. 981 */ 982 if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp) 983 return (EXDEV); 984 985 /* 986 * Allocate and initialize directory entry 987 */ 988 namelen = strlen(name) + 1; 989 alloc_size = namelen + sizeof (struct tdirent); 990 tdp = tmp_memalloc(alloc_size, 0); 991 if (tdp == NULL) 992 return (ENOSPC); 993 994 if ((op == DE_RENAME) && (tp->tn_type == VDIR)) 995 tdirfixdotdot(tp, fromtp, dir); 996 997 dir->tn_size += alloc_size; 998 dir->tn_dirents++; 999 tdp->td_tmpnode = tp; 1000 tdp->td_parent = dir; 1001 1002 /* 1003 * The directory entry and its name were allocated sequentially. 1004 */ 1005 tdp->td_name = (char *)tdp + sizeof (struct tdirent); 1006 (void) strcpy(tdp->td_name, name); 1007 1008 tmpfs_hash_in(tdp); 1009 1010 /* 1011 * Some utilities expect the size of a directory to remain 1012 * somewhat static. For example, a routine which unlinks 1013 * files between calls to readdir(); the size of the 1014 * directory changes from underneath it and so the real 1015 * directory offset in bytes is invalid. To circumvent 1016 * this problem, we initialize a directory entry with an 1017 * phony offset, and use this offset to determine end of 1018 * file in tmp_readdir. 1019 */ 1020 tpdp = dir->tn_dir->td_prev; 1021 /* 1022 * Install at first empty "slot" in directory list. 1023 */ 1024 while (tpdp->td_next != NULL && (tpdp->td_next->td_offset - 1025 tpdp->td_offset) <= 1) { 1026 ASSERT(tpdp->td_next != tpdp); 1027 ASSERT(tpdp->td_prev != tpdp); 1028 ASSERT(tpdp->td_next->td_offset > tpdp->td_offset); 1029 tpdp = tpdp->td_next; 1030 } 1031 tdp->td_offset = tpdp->td_offset + 1; 1032 1033 /* 1034 * If we're at the end of the dirent list and the offset (which 1035 * is necessarily the largest offset in this directory) is more 1036 * than twice the number of dirents, that means the directory is 1037 * 50% holes. At this point we reset the slot pointer back to 1038 * the beginning of the directory so we start using the holes. 1039 * The idea is that if there are N dirents, there must also be 1040 * N holes, so we can satisfy the next N creates by walking at 1041 * most 2N entries; thus the average cost of a create is constant. 1042 * Note that we use the first dirent's td_prev as the roving 1043 * slot pointer; it's ugly, but it saves a word in every dirent. 1044 */ 1045 if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents) 1046 dir->tn_dir->td_prev = dir->tn_dir->td_next; 1047 else 1048 dir->tn_dir->td_prev = tdp; 1049 1050 ASSERT(tpdp->td_next != tpdp); 1051 ASSERT(tpdp->td_prev != tpdp); 1052 1053 tdp->td_next = tpdp->td_next; 1054 if (tdp->td_next) { 1055 tdp->td_next->td_prev = tdp; 1056 } 1057 tdp->td_prev = tpdp; 1058 tpdp->td_next = tdp; 1059 1060 ASSERT(tdp->td_next != tdp); 1061 ASSERT(tdp->td_prev != tdp); 1062 ASSERT(tpdp->td_next != tpdp); 1063 ASSERT(tpdp->td_prev != tpdp); 1064 1065 gethrestime(&now); 1066 dir->tn_mtime = now; 1067 dir->tn_ctime = now; 1068 1069 return (0); 1070 } 1071 1072 static int 1073 tdirmaketnode( 1074 struct tmpnode *dir, 1075 struct tmount *tm, 1076 struct vattr *va, 1077 enum de_op op, 1078 struct tmpnode **newnode, 1079 struct cred *cred) 1080 { 1081 struct tmpnode *tp; 1082 enum vtype type; 1083 1084 ASSERT(va != NULL); 1085 ASSERT(op == DE_CREATE || op == DE_MKDIR); 1086 if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) || 1087 ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime))) 1088 return (EOVERFLOW); 1089 type = va->va_type; 1090 tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE); 1091 tmpnode_init(tm, tp, va, cred); 1092 1093 /* setup normal file/dir's extended attribute directory */ 1094 if (dir->tn_flags & ISXATTR) { 1095 /* parent dir is , mark file as xattr */ 1096 tp->tn_flags |= ISXATTR; 1097 } 1098 1099 1100 if (type == VBLK || type == VCHR) { 1101 tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev; 1102 } else { 1103 tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV; 1104 } 1105 tp->tn_vnode->v_type = type; 1106 tp->tn_uid = crgetuid(cred); 1107 1108 /* 1109 * To determine the group-id of the created file: 1110 * 1) If the gid is set in the attribute list (non-Sun & pre-4.0 1111 * clients are not likely to set the gid), then use it if 1112 * the process is privileged, belongs to the target group, 1113 * or the group is the same as the parent directory. 1114 * 2) If the filesystem was not mounted with the Old-BSD-compatible 1115 * GRPID option, and the directory's set-gid bit is clear, 1116 * then use the process's gid. 1117 * 3) Otherwise, set the group-id to the gid of the parent directory. 1118 */ 1119 if ((va->va_mask & AT_GID) && 1120 ((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) || 1121 secpolicy_vnode_create_gid(cred) == 0)) { 1122 /* 1123 * XXX - is this only the case when a 4.0 NFS client, or a 1124 * client derived from that code, makes a call over the wire? 1125 */ 1126 tp->tn_gid = va->va_gid; 1127 } else { 1128 if (dir->tn_mode & VSGID) 1129 tp->tn_gid = dir->tn_gid; 1130 else 1131 tp->tn_gid = crgetgid(cred); 1132 } 1133 /* 1134 * If we're creating a directory, and the parent directory has the 1135 * set-GID bit set, set it on the new directory. 1136 * Otherwise, if the user is neither privileged nor a member of the 1137 * file's new group, clear the file's set-GID bit. 1138 */ 1139 if (dir->tn_mode & VSGID && type == VDIR) 1140 tp->tn_mode |= VSGID; 1141 else { 1142 if ((tp->tn_mode & VSGID) && 1143 secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0) 1144 tp->tn_mode &= ~VSGID; 1145 } 1146 1147 if (va->va_mask & AT_ATIME) 1148 tp->tn_atime = va->va_atime; 1149 if (va->va_mask & AT_MTIME) 1150 tp->tn_mtime = va->va_mtime; 1151 1152 if (op == DE_MKDIR) 1153 tdirinit(dir, tp); 1154 1155 *newnode = tp; 1156 return (0); 1157 } 1158