/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *); static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *, char *, struct tmpnode *, struct tdirent *, struct cred *); static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *); static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *, enum de_op, struct tmpnode **, struct cred *); static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *, enum de_op, struct tmpnode *); #define T_HASH_SIZE 8192 /* must be power of 2 */ #define T_MUTEX_SIZE 64 /* Non-static so compilers won't constant-fold these away. */ clock_t tmpfs_rename_backoff_delay = 1; unsigned int tmpfs_rename_backoff_tries = 0; unsigned long tmpfs_rename_loops = 0; static struct tdirent *t_hashtable[T_HASH_SIZE]; static kmutex_t t_hashmutex[T_MUTEX_SIZE]; #define T_HASH_INDEX(a) ((a) & (T_HASH_SIZE-1)) #define T_MUTEX_INDEX(a) ((a) & (T_MUTEX_SIZE-1)) #define TMPFS_HASH(tp, name, hash) \ { \ char Xc, *Xcp; \ hash = (uint_t)(uintptr_t)(tp) >> 8; \ for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \ hash = (hash << 4) + hash + (uint_t)Xc; \ } void tmpfs_hash_init(void) { int ix; for (ix = 0; ix < T_MUTEX_SIZE; ix++) mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL); } /* * This routine is where the rubber meets the road for identities. */ static void tmpfs_hash_in(struct tdirent *t) { uint_t hash; struct tdirent **prevpp; kmutex_t *t_hmtx; TMPFS_HASH(t->td_parent, t->td_name, hash); t->td_hash = hash; prevpp = &t_hashtable[T_HASH_INDEX(hash)]; t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; mutex_enter(t_hmtx); t->td_link = *prevpp; *prevpp = t; mutex_exit(t_hmtx); } /* * Remove tdirent *t from the hash list. */ static void tmpfs_hash_out(struct tdirent *t) { uint_t hash; struct tdirent **prevpp; kmutex_t *t_hmtx; hash = t->td_hash; prevpp = &t_hashtable[T_HASH_INDEX(hash)]; t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; mutex_enter(t_hmtx); while (*prevpp != t) prevpp = &(*prevpp)->td_link; *prevpp = t->td_link; mutex_exit(t_hmtx); } /* * Currently called by tdirrename() only. * rename operation needs to be done with lock held, to ensure that * no other operations can access the tmpnode at the same instance. */ static void tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp) { uint_t hash; kmutex_t *t_hmtx; hash = tdp->td_hash; t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; mutex_enter(t_hmtx); tdp->td_tmpnode = fromtp; mutex_exit(t_hmtx); } static struct tdirent * tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold, struct tmpnode **found) { struct tdirent *l; uint_t hash; kmutex_t *t_hmtx; struct tmpnode *tnp; TMPFS_HASH(parent, name, hash); t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)]; mutex_enter(t_hmtx); l = t_hashtable[T_HASH_INDEX(hash)]; while (l) { if ((l->td_hash == hash) && (l->td_parent == parent) && (strcmp(l->td_name, name) == 0)) { /* * We need to make sure that the tmpnode that * we put a hold on is the same one that we pass back. * Hence, temporary variable tnp is necessary. */ tnp = l->td_tmpnode; if (hold) { ASSERT(tnp); tmpnode_hold(tnp); } if (found) *found = tnp; mutex_exit(t_hmtx); return (l); } else { l = l->td_link; } } mutex_exit(t_hmtx); return (NULL); } /* * Search directory 'parent' for entry 'name'. * * The calling thread can't hold the write version * of the rwlock for the directory being searched * * 0 is returned on success and *foundtp points * to the found tmpnode with its vnode held. */ int tdirlookup( struct tmpnode *parent, char *name, struct tmpnode **foundtp, struct cred *cred) { int error; *foundtp = NULL; if (parent->tn_type != VDIR) return (ENOTDIR); if ((error = tmp_taccess(parent, VEXEC, cred))) return (error); if (*name == '\0') { tmpnode_hold(parent); *foundtp = parent; return (0); } /* * Search the directory for the matching name * We need the lock protecting the tn_dir list * so that it doesn't change out from underneath us. * tmpfs_hash_lookup() will pass back the tmpnode * with a hold on it. */ if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) { ASSERT(*foundtp); return (0); } return (ENOENT); } /* * Enter a directory entry for 'name' and 'tp' into directory 'dir' * * Returns 0 on success. */ int tdirenter( struct tmount *tm, struct tmpnode *dir, /* target directory to make entry in */ char *name, /* name of entry */ enum de_op op, /* entry operation */ struct tmpnode *fromparent, /* source directory if rename */ struct tmpnode *tp, /* source tmpnode, if link/rename */ struct vattr *va, struct tmpnode **tpp, /* return tmpnode, if create/mkdir */ struct cred *cred, caller_context_t *ctp) { struct tdirent *tdp; struct tmpnode *found = NULL; int error = 0; char *s; /* * tn_rwlock is held to serialize direnter and dirdeletes */ ASSERT(RW_WRITE_HELD(&dir->tn_rwlock)); ASSERT(dir->tn_type == VDIR); /* * Don't allow '/' characters in pathname component * (thus in ufs_direnter()). */ for (s = name; *s; s++) if (*s == '/') return (EACCES); if (name[0] == '\0') panic("tdirenter: NULL name"); /* * For link and rename lock the source entry and check the link count * to see if it has been removed while it was unlocked. */ if (op == DE_LINK || op == DE_RENAME) { if (tp != dir) { unsigned int tries = 0; /* * If we are acquiring tp->tn_rwlock (for SOURCE) * inside here, we must consider the following: * * - dir->tn_rwlock (TARGET) is already HELD (see * above ASSERT()). * * - It is possible our SOURCE is a parent of our * TARGET. Yes it's unusual, but it will return an * error below via tdircheckpath(). * * - It is also possible that another thread, * concurrent to this one, is performing * rmdir(TARGET), which means it will first acquire * SOURCE's lock, THEN acquire TARGET's lock, which * could result in this thread holding TARGET and * trying for SOURCE, but the other thread holding * SOURCE and trying for TARGET. This is deadlock, * and it's inducible. * * To prevent this, we borrow some techniques from UFS * and rw_tryenter(), delaying if we fail, and * if someone tweaks the number of backoff tries to be * nonzero, return EBUSY after that number of tries. */ while (!rw_tryenter(&tp->tn_rwlock, RW_WRITER)) { /* * Sloppy, but this is a diagnostic so atomic * increment would be overkill. */ tmpfs_rename_loops++; if (tmpfs_rename_backoff_tries != 0) { if (tries > tmpfs_rename_backoff_tries) return (EBUSY); tries++; } /* * NOTE: We're still holding dir->tn_rwlock, * so drop it over the delay, so any other * thread can get its business done. * * No state change or state inspection happens * prior to here, so it is not wholly dangerous * to release-and-reacquire dir->tn_rwlock. * * Hold the vnode of dir in case it gets * released by another thread, though. */ VN_HOLD(TNTOV(dir)); rw_exit(&dir->tn_rwlock); delay(tmpfs_rename_backoff_delay); rw_enter(&dir->tn_rwlock, RW_WRITER); VN_RELE(TNTOV(dir)); } } mutex_enter(&tp->tn_tlock); if (tp->tn_nlink == 0) { mutex_exit(&tp->tn_tlock); if (tp != dir) rw_exit(&tp->tn_rwlock); return (ENOENT); } if (tp->tn_nlink == MAXLINK) { mutex_exit(&tp->tn_tlock); if (tp != dir) rw_exit(&tp->tn_rwlock); return (EMLINK); } tp->tn_nlink++; gethrestime(&tp->tn_ctime); mutex_exit(&tp->tn_tlock); if (tp != dir) rw_exit(&tp->tn_rwlock); } /* * This might be a "dangling detached directory". * it could have been removed, but a reference * to it kept in u_cwd. don't bother searching * it, and with any luck the user will get tired * of dealing with us and cd to some absolute * pathway. *sigh*, thus in ufs, too. */ if (dir->tn_nlink == 0) { error = ENOENT; goto out; } /* * If this is a rename of a directory and the parent is * different (".." must be changed), then the source * directory must not be in the directory hierarchy * above the target, as this would orphan everything * below the source directory. */ if (op == DE_RENAME) { if (tp == dir) { error = EINVAL; goto out; } if (tp->tn_type == VDIR) { if ((fromparent != dir) && (error = tdircheckpath(tp, dir, cred))) { goto out; } } } /* * Search for the entry. Return "found" if it exists. */ tdp = tmpfs_hash_lookup(name, dir, 1, &found); if (tdp) { ASSERT(found); switch (op) { case DE_CREATE: case DE_MKDIR: if (tpp) { *tpp = found; error = EEXIST; } else { tmpnode_rele(found); } break; case DE_RENAME: error = tdirrename(fromparent, tp, dir, name, found, tdp, cred); if (error == 0) { if (found != NULL) { vnevent_rename_dest(TNTOV(found), TNTOV(dir), name, ctp); } } tmpnode_rele(found); break; case DE_LINK: /* * Can't link to an existing file. */ error = EEXIST; tmpnode_rele(found); break; } } else { /* * The entry does not exist. Check write permission in * directory to see if entry can be created. */ if (error = tmp_taccess(dir, VWRITE, cred)) goto out; if (op == DE_CREATE || op == DE_MKDIR) { /* * Make new tmpnode and directory entry as required. */ error = tdirmaketnode(dir, tm, va, op, &tp, cred); if (error) goto out; } if (error = tdiraddentry(dir, tp, name, op, fromparent)) { if (op == DE_CREATE || op == DE_MKDIR) { /* * Unmake the inode we just made. */ rw_enter(&tp->tn_rwlock, RW_WRITER); if ((tp->tn_type) == VDIR) { ASSERT(tdp == NULL); /* * cleanup allocs made by tdirinit() */ tdirtrunc(tp); } mutex_enter(&tp->tn_tlock); tp->tn_nlink = 0; mutex_exit(&tp->tn_tlock); gethrestime(&tp->tn_ctime); rw_exit(&tp->tn_rwlock); tmpnode_rele(tp); tp = NULL; } } else if (tpp) { *tpp = tp; } else if (op == DE_CREATE || op == DE_MKDIR) { tmpnode_rele(tp); } } out: if (error && (op == DE_LINK || op == DE_RENAME)) { /* * Undo bumped link count. */ DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock); gethrestime(&tp->tn_ctime); } return (error); } /* * Delete entry tp of name "nm" from dir. * Free dir entry space and decrement link count on tmpnode(s). * * Return 0 on success. */ int tdirdelete( struct tmpnode *dir, struct tmpnode *tp, char *nm, enum dr_op op, struct cred *cred) { struct tdirent *tpdp; int error; size_t namelen; struct tmpnode *tnp; timestruc_t now; ASSERT(RW_WRITE_HELD(&dir->tn_rwlock)); ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); ASSERT(dir->tn_type == VDIR); if (nm[0] == '\0') panic("tdirdelete: NULL name for %p", (void *)tp); /* * return error when removing . and .. */ if (nm[0] == '.') { if (nm[1] == '\0') return (EINVAL); if (nm[1] == '.' && nm[2] == '\0') return (EEXIST); /* thus in ufs */ } if (error = tmp_taccess(dir, VEXEC|VWRITE, cred)) return (error); /* * If the parent directory is "sticky", then the user must * own the parent directory or the file in it, or else must * have permission to write the file. Otherwise it may not * be deleted (except by privileged users). * Same as ufs_dirremove. */ if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0) return (error); if (dir->tn_dir == NULL) return (ENOENT); tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp); if (tpdp == NULL) { /* * If it is gone, some other thread got here first! * Return error ENOENT. */ return (ENOENT); } /* * If the tmpnode in the tdirent changed, we were probably * the victim of a concurrent rename operation. The original * is gone, so return that status (same as UFS). */ if (tp != tnp) return (ENOENT); tmpfs_hash_out(tpdp); /* * Take tpdp out of the directory list. */ ASSERT(tpdp->td_next != tpdp); ASSERT(tpdp->td_prev != tpdp); if (tpdp->td_prev) { tpdp->td_prev->td_next = tpdp->td_next; } if (tpdp->td_next) { tpdp->td_next->td_prev = tpdp->td_prev; } /* * If the roving slot pointer happens to match tpdp, * point it at the previous dirent. */ if (dir->tn_dir->td_prev == tpdp) { dir->tn_dir->td_prev = tpdp->td_prev; } ASSERT(tpdp->td_next != tpdp); ASSERT(tpdp->td_prev != tpdp); /* * tpdp points to the correct directory entry */ namelen = strlen(tpdp->td_name) + 1; tmp_memfree(tpdp, sizeof (struct tdirent) + namelen); dir->tn_size -= (sizeof (struct tdirent) + namelen); dir->tn_dirents--; gethrestime(&now); dir->tn_mtime = now; dir->tn_ctime = now; tp->tn_ctime = now; ASSERT(tp->tn_nlink > 0); DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock); if (op == DR_RMDIR && tp->tn_type == VDIR) { tdirtrunc(tp); ASSERT(tp->tn_nlink == 0); } return (0); } /* * tdirinit is used internally to initialize a directory (dir) * with '.' and '..' entries without checking permissions and locking */ void tdirinit( struct tmpnode *parent, /* parent of directory to initialize */ struct tmpnode *dir) /* the new directory */ { struct tdirent *dot, *dotdot; timestruc_t now; ASSERT(RW_WRITE_HELD(&parent->tn_rwlock)); ASSERT(dir->tn_type == VDIR); dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE); dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE); /* * Initialize the entries */ dot->td_tmpnode = dir; dot->td_offset = 0; dot->td_name = (char *)dot + sizeof (struct tdirent); dot->td_name[0] = '.'; dot->td_parent = dir; tmpfs_hash_in(dot); dotdot->td_tmpnode = parent; dotdot->td_offset = 1; dotdot->td_name = (char *)dotdot + sizeof (struct tdirent); dotdot->td_name[0] = '.'; dotdot->td_name[1] = '.'; dotdot->td_parent = dir; tmpfs_hash_in(dotdot); /* * Initialize directory entry list. */ dot->td_next = dotdot; dot->td_prev = dotdot; /* dot's td_prev holds roving slot pointer */ dotdot->td_next = NULL; dotdot->td_prev = dot; gethrestime(&now); dir->tn_mtime = now; dir->tn_ctime = now; /* * Link counts are special for the hidden attribute directory. * The only explicit reference in the name space is "." and * the reference through ".." is not counted on the parent * file. The attrdir is created as a side effect to lookup, * so don't change the ctime of the parent. * Since tdirinit is called with both dir and parent being the * same for the root vnode, we need to increment this before we set * tn_nlink = 2 below. */ if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) { INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock); parent->tn_ctime = now; } dir->tn_dir = dot; dir->tn_size = 2 * sizeof (struct tdirent) + 5; /* dot and dotdot */ dir->tn_dirents = 2; dir->tn_nlink = 2; } /* * tdirtrunc is called to remove all directory entries under this directory. */ void tdirtrunc(struct tmpnode *dir) { struct tdirent *tdp; struct tmpnode *tp; size_t namelen; timestruc_t now; int isvattrdir, isdotdot, skip_decr; ASSERT(RW_WRITE_HELD(&dir->tn_rwlock)); ASSERT(dir->tn_type == VDIR); isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0; for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) { ASSERT(tdp->td_next != tdp); ASSERT(tdp->td_prev != tdp); ASSERT(tdp->td_tmpnode); dir->tn_dir = tdp->td_next; namelen = strlen(tdp->td_name) + 1; /* * Adjust the link counts to account for this directory * entry removal. Hidden attribute directories may * not be empty as they may be truncated as a side- * effect of removing the parent. We do hold/rele * operations to free up these tmpnodes. * * Skip the link count adjustment for parents of * attribute directories as those link counts * do not include the ".." reference in the hidden * directories. */ tp = tdp->td_tmpnode; isdotdot = (strcmp("..", tdp->td_name) == 0); skip_decr = (isvattrdir && isdotdot); if (!skip_decr) { ASSERT(tp->tn_nlink > 0); DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock); } tmpfs_hash_out(tdp); tmp_memfree(tdp, sizeof (struct tdirent) + namelen); dir->tn_size -= (sizeof (struct tdirent) + namelen); dir->tn_dirents--; } gethrestime(&now); dir->tn_mtime = now; dir->tn_ctime = now; ASSERT(dir->tn_dir == NULL); ASSERT(dir->tn_size == 0); ASSERT(dir->tn_dirents == 0); } /* * Check if the source directory is in the path of the target directory. * The target directory is locked by the caller. * * XXX - The source and target's should be different upon entry. */ static int tdircheckpath( struct tmpnode *fromtp, struct tmpnode *toparent, struct cred *cred) { int error = 0; struct tmpnode *dir, *dotdot; struct tdirent *tdp; ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock)); tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot); if (tdp == NULL) return (ENOENT); ASSERT(dotdot); if (dotdot == toparent) { /* root of fs. search trivially satisfied. */ tmpnode_rele(dotdot); return (0); } for (;;) { /* * Return error for cases like "mv c c/d", * "mv c c/d/e" and so on. */ if (dotdot == fromtp) { tmpnode_rele(dotdot); error = EINVAL; break; } dir = dotdot; error = tdirlookup(dir, "..", &dotdot, cred); if (error) { tmpnode_rele(dir); break; } /* * We're okay if we traverse the directory tree up to * the root directory and don't run into the * parent directory. */ if (dir == dotdot) { tmpnode_rele(dir); tmpnode_rele(dotdot); break; } tmpnode_rele(dir); } return (error); } static int tdirrename( struct tmpnode *fromparent, /* parent directory of source */ struct tmpnode *fromtp, /* source tmpnode */ struct tmpnode *toparent, /* parent directory of target */ char *nm, /* entry we are trying to change */ struct tmpnode *to, /* target tmpnode */ struct tdirent *where, /* target tmpnode directory entry */ struct cred *cred) /* credentials */ { int error = 0; int doingdirectory; timestruc_t now; #if defined(lint) nm = nm; #endif ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock)); /* * Short circuit rename of something to itself. */ if (fromtp == to) return (ESAME); /* special KLUDGE error code */ rw_enter(&fromtp->tn_rwlock, RW_READER); rw_enter(&to->tn_rwlock, RW_READER); /* * Check that everything is on the same filesystem. */ if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp || to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) { error = EXDEV; goto out; } /* * Must have write permission to rewrite target entry. * Check for stickyness. */ if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 || (error = tmp_sticky_remove_access(toparent, to, cred)) != 0) goto out; /* * Ensure source and target are compatible (both directories * or both not directories). If target is a directory it must * be empty and have no links to it; in addition it must not * be a mount point, and both the source and target must be * writable. */ doingdirectory = (fromtp->tn_type == VDIR); if (to->tn_type == VDIR) { if (!doingdirectory) { error = EISDIR; goto out; } /* * vn_vfswlock will prevent mounts from using the directory * until we are done. */ if (vn_vfswlock(TNTOV(to))) { error = EBUSY; goto out; } if (vn_mountedvfs(TNTOV(to)) != NULL) { vn_vfsunlock(TNTOV(to)); error = EBUSY; goto out; } mutex_enter(&to->tn_tlock); if (to->tn_dirents > 2 || to->tn_nlink > 2) { mutex_exit(&to->tn_tlock); vn_vfsunlock(TNTOV(to)); error = EEXIST; /* SIGH should be ENOTEMPTY */ /* * Update atime because checking tn_dirents is * logically equivalent to reading the directory */ gethrestime(&to->tn_atime); goto out; } mutex_exit(&to->tn_tlock); } else if (doingdirectory) { error = ENOTDIR; goto out; } tmpfs_hash_change(where, fromtp); gethrestime(&now); toparent->tn_mtime = now; toparent->tn_ctime = now; /* * Upgrade to write lock on "to" (i.e., the target tmpnode). */ rw_exit(&to->tn_rwlock); rw_enter(&to->tn_rwlock, RW_WRITER); /* * Decrement the link count of the target tmpnode. */ DECR_COUNT(&to->tn_nlink, &to->tn_tlock); to->tn_ctime = now; if (doingdirectory) { /* * The entry for "to" no longer exists so release the vfslock. */ vn_vfsunlock(TNTOV(to)); /* * Decrement the target link count and delete all entires. */ tdirtrunc(to); ASSERT(to->tn_nlink == 0); /* * Renaming a directory with the parent different * requires that ".." be rewritten. The window is * still there for ".." to be inconsistent, but this * is unavoidable, and a lot shorter than when it was * done in a user process. */ if (fromparent != toparent) tdirfixdotdot(fromtp, fromparent, toparent); } out: rw_exit(&to->tn_rwlock); rw_exit(&fromtp->tn_rwlock); return (error); } static void tdirfixdotdot( struct tmpnode *fromtp, /* child directory */ struct tmpnode *fromparent, /* old parent directory */ struct tmpnode *toparent) /* new parent directory */ { struct tdirent *dotdot; ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock)); /* * Increment the link count in the new parent tmpnode */ INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock); gethrestime(&toparent->tn_ctime); dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL); ASSERT(dotdot->td_tmpnode == fromparent); dotdot->td_tmpnode = toparent; /* * Decrement the link count of the old parent tmpnode. * If fromparent is NULL, then this is a new directory link; * it has no parent, so we need not do anything. */ if (fromparent != NULL) { mutex_enter(&fromparent->tn_tlock); if (fromparent->tn_nlink != 0) { fromparent->tn_nlink--; gethrestime(&fromparent->tn_ctime); } mutex_exit(&fromparent->tn_tlock); } } static int tdiraddentry( struct tmpnode *dir, /* target directory to make entry in */ struct tmpnode *tp, /* new tmpnode */ char *name, enum de_op op, struct tmpnode *fromtp) { struct tdirent *tdp, *tpdp; size_t namelen, alloc_size; timestruc_t now; /* * Make sure the parent directory wasn't removed from * underneath the caller. */ if (dir->tn_dir == NULL) return (ENOENT); /* * Check that everything is on the same filesystem. */ if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp) return (EXDEV); /* * Allocate and initialize directory entry */ namelen = strlen(name) + 1; alloc_size = namelen + sizeof (struct tdirent); tdp = tmp_memalloc(alloc_size, 0); if (tdp == NULL) return (ENOSPC); if ((op == DE_RENAME) && (tp->tn_type == VDIR)) tdirfixdotdot(tp, fromtp, dir); dir->tn_size += alloc_size; dir->tn_dirents++; tdp->td_tmpnode = tp; tdp->td_parent = dir; /* * The directory entry and its name were allocated sequentially. */ tdp->td_name = (char *)tdp + sizeof (struct tdirent); (void) strcpy(tdp->td_name, name); tmpfs_hash_in(tdp); /* * Some utilities expect the size of a directory to remain * somewhat static. For example, a routine which unlinks * files between calls to readdir(); the size of the * directory changes from underneath it and so the real * directory offset in bytes is invalid. To circumvent * this problem, we initialize a directory entry with an * phony offset, and use this offset to determine end of * file in tmp_readdir. */ tpdp = dir->tn_dir->td_prev; /* * Install at first empty "slot" in directory list. */ while (tpdp->td_next != NULL && (tpdp->td_next->td_offset - tpdp->td_offset) <= 1) { ASSERT(tpdp->td_next != tpdp); ASSERT(tpdp->td_prev != tpdp); ASSERT(tpdp->td_next->td_offset > tpdp->td_offset); tpdp = tpdp->td_next; } tdp->td_offset = tpdp->td_offset + 1; /* * If we're at the end of the dirent list and the offset (which * is necessarily the largest offset in this directory) is more * than twice the number of dirents, that means the directory is * 50% holes. At this point we reset the slot pointer back to * the beginning of the directory so we start using the holes. * The idea is that if there are N dirents, there must also be * N holes, so we can satisfy the next N creates by walking at * most 2N entries; thus the average cost of a create is constant. * Note that we use the first dirent's td_prev as the roving * slot pointer; it's ugly, but it saves a word in every dirent. */ if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents) dir->tn_dir->td_prev = dir->tn_dir->td_next; else dir->tn_dir->td_prev = tdp; ASSERT(tpdp->td_next != tpdp); ASSERT(tpdp->td_prev != tpdp); tdp->td_next = tpdp->td_next; if (tdp->td_next) { tdp->td_next->td_prev = tdp; } tdp->td_prev = tpdp; tpdp->td_next = tdp; ASSERT(tdp->td_next != tdp); ASSERT(tdp->td_prev != tdp); ASSERT(tpdp->td_next != tpdp); ASSERT(tpdp->td_prev != tpdp); gethrestime(&now); dir->tn_mtime = now; dir->tn_ctime = now; return (0); } static int tdirmaketnode( struct tmpnode *dir, struct tmount *tm, struct vattr *va, enum de_op op, struct tmpnode **newnode, struct cred *cred) { struct tmpnode *tp; enum vtype type; ASSERT(va != NULL); ASSERT(op == DE_CREATE || op == DE_MKDIR); if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) || ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime))) return (EOVERFLOW); type = va->va_type; tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE); tmpnode_init(tm, tp, va, cred); /* setup normal file/dir's extended attribute directory */ if (dir->tn_flags & ISXATTR) { /* parent dir is , mark file as xattr */ tp->tn_flags |= ISXATTR; } if (type == VBLK || type == VCHR) { tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev; } else { tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV; } tp->tn_vnode->v_type = type; tp->tn_uid = crgetuid(cred); /* * To determine the group-id of the created file: * 1) If the gid is set in the attribute list (non-Sun & pre-4.0 * clients are not likely to set the gid), then use it if * the process is privileged, belongs to the target group, * or the group is the same as the parent directory. * 2) If the filesystem was not mounted with the Old-BSD-compatible * GRPID option, and the directory's set-gid bit is clear, * then use the process's gid. * 3) Otherwise, set the group-id to the gid of the parent directory. */ if ((va->va_mask & AT_GID) && ((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) || secpolicy_vnode_create_gid(cred) == 0)) { /* * XXX - is this only the case when a 4.0 NFS client, or a * client derived from that code, makes a call over the wire? */ tp->tn_gid = va->va_gid; } else { if (dir->tn_mode & VSGID) tp->tn_gid = dir->tn_gid; else tp->tn_gid = crgetgid(cred); } /* * If we're creating a directory, and the parent directory has the * set-GID bit set, set it on the new directory. * Otherwise, if the user is neither privileged nor a member of the * file's new group, clear the file's set-GID bit. */ if (dir->tn_mode & VSGID && type == VDIR) tp->tn_mode |= VSGID; else { if ((tp->tn_mode & VSGID) && secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0) tp->tn_mode &= ~VSGID; } if (va->va_mask & AT_ATIME) tp->tn_atime = va->va_atime; if (va->va_mask & AT_MTIME) tp->tn_mtime = va->va_mtime; if (op == DE_MKDIR) tdirinit(dir, tp); *newnode = tp; return (0); }