1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/sysmacros.h>
29 #include <sys/systm.h>
30 #include <sys/time.h>
31 #include <sys/vfs.h>
32 #include <sys/vnode.h>
33 #include <sys/errno.h>
34 #include <sys/cmn_err.h>
35 #include <sys/cred.h>
36 #include <sys/stat.h>
37 #include <sys/debug.h>
38 #include <sys/policy.h>
39 #include <sys/fs/tmpnode.h>
40 #include <sys/fs/tmp.h>
41 #include <sys/vtrace.h>
42
43 static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *);
44 static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *,
45 char *, struct tmpnode *, struct tdirent *, struct cred *);
46 static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *);
47 static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *,
48 enum de_op, struct tmpnode **, struct cred *);
49 static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
50 enum de_op, struct tmpnode *);
51
52
53 #define T_HASH_SIZE 8192 /* must be power of 2 */
54 #define T_MUTEX_SIZE 64
55
56 /* Non-static so compilers won't constant-fold these away. */
57 clock_t tmpfs_rename_backoff_delay = 1;
58 unsigned int tmpfs_rename_backoff_tries = 0;
59 unsigned long tmpfs_rename_loops = 0;
60
61 static struct tdirent *t_hashtable[T_HASH_SIZE];
62 static kmutex_t t_hashmutex[T_MUTEX_SIZE];
63
64 #define T_HASH_INDEX(a) ((a) & (T_HASH_SIZE-1))
65 #define T_MUTEX_INDEX(a) ((a) & (T_MUTEX_SIZE-1))
66
67 #define TMPFS_HASH(tp, name, hash) \
68 { \
69 char Xc, *Xcp; \
70 hash = (uint_t)(uintptr_t)(tp) >> 8; \
71 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
72 hash = (hash << 4) + hash + (uint_t)Xc; \
73 }
74
75 void
tmpfs_hash_init(void)76 tmpfs_hash_init(void)
77 {
78 int ix;
79
80 for (ix = 0; ix < T_MUTEX_SIZE; ix++)
81 mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
82 }
83
84 /*
85 * This routine is where the rubber meets the road for identities.
86 */
87 static void
tmpfs_hash_in(struct tdirent * t)88 tmpfs_hash_in(struct tdirent *t)
89 {
90 uint_t hash;
91 struct tdirent **prevpp;
92 kmutex_t *t_hmtx;
93
94 TMPFS_HASH(t->td_parent, t->td_name, hash);
95 t->td_hash = hash;
96 prevpp = &t_hashtable[T_HASH_INDEX(hash)];
97 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
98 mutex_enter(t_hmtx);
99 t->td_link = *prevpp;
100 *prevpp = t;
101 mutex_exit(t_hmtx);
102 }
103
104 /*
105 * Remove tdirent *t from the hash list.
106 */
107 static void
tmpfs_hash_out(struct tdirent * t)108 tmpfs_hash_out(struct tdirent *t)
109 {
110 uint_t hash;
111 struct tdirent **prevpp;
112 kmutex_t *t_hmtx;
113
114 hash = t->td_hash;
115 prevpp = &t_hashtable[T_HASH_INDEX(hash)];
116 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
117 mutex_enter(t_hmtx);
118 while (*prevpp != t)
119 prevpp = &(*prevpp)->td_link;
120 *prevpp = t->td_link;
121 mutex_exit(t_hmtx);
122 }
123
124 /*
125 * Currently called by tdirrename() only.
126 * rename operation needs to be done with lock held, to ensure that
127 * no other operations can access the tmpnode at the same instance.
128 */
129 static void
tmpfs_hash_change(struct tdirent * tdp,struct tmpnode * fromtp)130 tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp)
131 {
132 uint_t hash;
133 kmutex_t *t_hmtx;
134
135 hash = tdp->td_hash;
136 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
137 mutex_enter(t_hmtx);
138 tdp->td_tmpnode = fromtp;
139 mutex_exit(t_hmtx);
140 }
141
142 static struct tdirent *
tmpfs_hash_lookup(char * name,struct tmpnode * parent,uint_t hold,struct tmpnode ** found)143 tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold,
144 struct tmpnode **found)
145 {
146 struct tdirent *l;
147 uint_t hash;
148 kmutex_t *t_hmtx;
149 struct tmpnode *tnp;
150
151 TMPFS_HASH(parent, name, hash);
152 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
153 mutex_enter(t_hmtx);
154 l = t_hashtable[T_HASH_INDEX(hash)];
155 while (l) {
156 if ((l->td_hash == hash) &&
157 (l->td_parent == parent) &&
158 (strcmp(l->td_name, name) == 0)) {
159 /*
160 * We need to make sure that the tmpnode that
161 * we put a hold on is the same one that we pass back.
162 * Hence, temporary variable tnp is necessary.
163 */
164 tnp = l->td_tmpnode;
165 if (hold) {
166 ASSERT(tnp);
167 tmpnode_hold(tnp);
168 }
169 if (found)
170 *found = tnp;
171 mutex_exit(t_hmtx);
172 return (l);
173 } else {
174 l = l->td_link;
175 }
176 }
177 mutex_exit(t_hmtx);
178 return (NULL);
179 }
180
181 /*
182 * Search directory 'parent' for entry 'name'.
183 *
184 * The calling thread can't hold the write version
185 * of the rwlock for the directory being searched
186 *
187 * 0 is returned on success and *foundtp points
188 * to the found tmpnode with its vnode held.
189 */
190 int
tdirlookup(struct tmpnode * parent,char * name,struct tmpnode ** foundtp,struct cred * cred)191 tdirlookup(
192 struct tmpnode *parent,
193 char *name,
194 struct tmpnode **foundtp,
195 struct cred *cred)
196 {
197 int error;
198
199 *foundtp = NULL;
200 if (parent->tn_type != VDIR)
201 return (ENOTDIR);
202
203 if ((error = tmp_taccess(parent, VEXEC, cred)))
204 return (error);
205
206 if (*name == '\0') {
207 tmpnode_hold(parent);
208 *foundtp = parent;
209 return (0);
210 }
211
212 /*
213 * Search the directory for the matching name
214 * We need the lock protecting the tn_dir list
215 * so that it doesn't change out from underneath us.
216 * tmpfs_hash_lookup() will pass back the tmpnode
217 * with a hold on it.
218 */
219
220 if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) {
221 ASSERT(*foundtp);
222 return (0);
223 }
224
225 return (ENOENT);
226 }
227
228 /*
229 * Enter a directory entry for 'name' and 'tp' into directory 'dir'
230 *
231 * Returns 0 on success.
232 */
233 int
tdirenter(struct tmount * tm,struct tmpnode * dir,char * name,enum de_op op,struct tmpnode * fromparent,struct tmpnode * tp,struct vattr * va,struct tmpnode ** tpp,struct cred * cred,caller_context_t * ctp)234 tdirenter(
235 struct tmount *tm,
236 struct tmpnode *dir, /* target directory to make entry in */
237 char *name, /* name of entry */
238 enum de_op op, /* entry operation */
239 struct tmpnode *fromparent, /* source directory if rename */
240 struct tmpnode *tp, /* source tmpnode, if link/rename */
241 struct vattr *va,
242 struct tmpnode **tpp, /* return tmpnode, if create/mkdir */
243 struct cred *cred,
244 caller_context_t *ctp)
245 {
246 struct tdirent *tdp;
247 struct tmpnode *found = NULL;
248 int error = 0;
249 char *s;
250
251 /*
252 * tn_rwlock is held to serialize direnter and dirdeletes
253 */
254 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
255 ASSERT(dir->tn_type == VDIR);
256
257 /*
258 * Don't allow '/' characters in pathname component
259 * (thus in ufs_direnter()).
260 */
261 for (s = name; *s; s++)
262 if (*s == '/')
263 return (EACCES);
264
265 if (name[0] == '\0')
266 panic("tdirenter: NULL name");
267
268 /*
269 * For link and rename lock the source entry and check the link count
270 * to see if it has been removed while it was unlocked.
271 */
272 if (op == DE_LINK || op == DE_RENAME) {
273 if (tp != dir) {
274 unsigned int tries = 0;
275
276 /*
277 * If we are acquiring tp->tn_rwlock (for SOURCE)
278 * inside here, we must consider the following:
279 *
280 * - dir->tn_rwlock (TARGET) is already HELD (see
281 * above ASSERT()).
282 *
283 * - It is possible our SOURCE is a parent of our
284 * TARGET. Yes it's unusual, but it will return an
285 * error below via tdircheckpath().
286 *
287 * - It is also possible that another thread,
288 * concurrent to this one, is performing
289 * rmdir(TARGET), which means it will first acquire
290 * SOURCE's lock, THEN acquire TARGET's lock, which
291 * could result in this thread holding TARGET and
292 * trying for SOURCE, but the other thread holding
293 * SOURCE and trying for TARGET. This is deadlock,
294 * and it's inducible.
295 *
296 * To prevent this, we borrow some techniques from UFS
297 * and rw_tryenter(), delaying if we fail, and
298 * if someone tweaks the number of backoff tries to be
299 * nonzero, return EBUSY after that number of tries.
300 */
301 while (!rw_tryenter(&tp->tn_rwlock, RW_WRITER)) {
302 /*
303 * Sloppy, but this is a diagnostic so atomic
304 * increment would be overkill.
305 */
306 tmpfs_rename_loops++;
307
308 if (tmpfs_rename_backoff_tries != 0) {
309 if (tries > tmpfs_rename_backoff_tries)
310 return (EBUSY);
311 tries++;
312 }
313 /*
314 * NOTE: We're still holding dir->tn_rwlock,
315 * so drop it over the delay, so any other
316 * thread can get its business done.
317 *
318 * No state change or state inspection happens
319 * prior to here, so it is not wholly dangerous
320 * to release-and-reacquire dir->tn_rwlock.
321 *
322 * Hold the vnode of dir in case it gets
323 * released by another thread, though.
324 */
325 VN_HOLD(TNTOV(dir));
326 rw_exit(&dir->tn_rwlock);
327 delay(tmpfs_rename_backoff_delay);
328 rw_enter(&dir->tn_rwlock, RW_WRITER);
329 VN_RELE(TNTOV(dir));
330 }
331 }
332 mutex_enter(&tp->tn_tlock);
333 if (tp->tn_nlink == 0) {
334 mutex_exit(&tp->tn_tlock);
335 if (tp != dir)
336 rw_exit(&tp->tn_rwlock);
337 return (ENOENT);
338 }
339
340 if (tp->tn_nlink == MAXLINK) {
341 mutex_exit(&tp->tn_tlock);
342 if (tp != dir)
343 rw_exit(&tp->tn_rwlock);
344 return (EMLINK);
345 }
346 tp->tn_nlink++;
347 gethrestime(&tp->tn_ctime);
348 mutex_exit(&tp->tn_tlock);
349 if (tp != dir)
350 rw_exit(&tp->tn_rwlock);
351 }
352
353 /*
354 * This might be a "dangling detached directory".
355 * it could have been removed, but a reference
356 * to it kept in u_cwd. don't bother searching
357 * it, and with any luck the user will get tired
358 * of dealing with us and cd to some absolute
359 * pathway. *sigh*, thus in ufs, too.
360 */
361 if (dir->tn_nlink == 0) {
362 error = ENOENT;
363 goto out;
364 }
365
366 /*
367 * If this is a rename of a directory and the parent is
368 * different (".." must be changed), then the source
369 * directory must not be in the directory hierarchy
370 * above the target, as this would orphan everything
371 * below the source directory.
372 */
373 if (op == DE_RENAME) {
374 if (tp == dir) {
375 error = EINVAL;
376 goto out;
377 }
378 if (tp->tn_type == VDIR) {
379 if ((fromparent != dir) &&
380 (error = tdircheckpath(tp, dir, cred))) {
381 goto out;
382 }
383 }
384 }
385
386 /*
387 * Search for the entry. Return "found" if it exists.
388 */
389 tdp = tmpfs_hash_lookup(name, dir, 1, &found);
390
391 if (tdp) {
392 ASSERT(found);
393 switch (op) {
394 case DE_CREATE:
395 case DE_MKDIR:
396 if (tpp) {
397 *tpp = found;
398 error = EEXIST;
399 } else {
400 tmpnode_rele(found);
401 }
402 break;
403
404 case DE_RENAME:
405 error = tdirrename(fromparent, tp,
406 dir, name, found, tdp, cred);
407 if (error == 0) {
408 if (found != NULL) {
409 vnevent_rename_dest(TNTOV(found),
410 TNTOV(dir), name, ctp);
411 }
412 }
413
414 tmpnode_rele(found);
415 break;
416
417 case DE_LINK:
418 /*
419 * Can't link to an existing file.
420 */
421 error = EEXIST;
422 tmpnode_rele(found);
423 break;
424 }
425 } else {
426
427 /*
428 * The entry does not exist. Check write permission in
429 * directory to see if entry can be created.
430 */
431 if (error = tmp_taccess(dir, VWRITE, cred))
432 goto out;
433 if (op == DE_CREATE || op == DE_MKDIR) {
434 /*
435 * Make new tmpnode and directory entry as required.
436 */
437 error = tdirmaketnode(dir, tm, va, op, &tp, cred);
438 if (error)
439 goto out;
440 }
441 if (error = tdiraddentry(dir, tp, name, op, fromparent)) {
442 if (op == DE_CREATE || op == DE_MKDIR) {
443 /*
444 * Unmake the inode we just made.
445 */
446 rw_enter(&tp->tn_rwlock, RW_WRITER);
447 if ((tp->tn_type) == VDIR) {
448 ASSERT(tdp == NULL);
449 /*
450 * cleanup allocs made by tdirinit()
451 */
452 tdirtrunc(tp);
453 }
454 mutex_enter(&tp->tn_tlock);
455 tp->tn_nlink = 0;
456 mutex_exit(&tp->tn_tlock);
457 gethrestime(&tp->tn_ctime);
458 rw_exit(&tp->tn_rwlock);
459 tmpnode_rele(tp);
460 tp = NULL;
461 }
462 } else if (tpp) {
463 *tpp = tp;
464 } else if (op == DE_CREATE || op == DE_MKDIR) {
465 tmpnode_rele(tp);
466 }
467 }
468
469 out:
470 if (error && (op == DE_LINK || op == DE_RENAME)) {
471 /*
472 * Undo bumped link count.
473 */
474 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
475 gethrestime(&tp->tn_ctime);
476 }
477 return (error);
478 }
479
480 /*
481 * Delete entry tp of name "nm" from dir.
482 * Free dir entry space and decrement link count on tmpnode(s).
483 *
484 * Return 0 on success.
485 */
486 int
tdirdelete(struct tmpnode * dir,struct tmpnode * tp,char * nm,enum dr_op op,struct cred * cred)487 tdirdelete(
488 struct tmpnode *dir,
489 struct tmpnode *tp,
490 char *nm,
491 enum dr_op op,
492 struct cred *cred)
493 {
494 struct tdirent *tpdp;
495 int error;
496 size_t namelen;
497 struct tmpnode *tnp;
498 timestruc_t now;
499
500 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
501 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
502 ASSERT(dir->tn_type == VDIR);
503
504 if (nm[0] == '\0')
505 panic("tdirdelete: NULL name for %p", (void *)tp);
506
507 /*
508 * return error when removing . and ..
509 */
510 if (nm[0] == '.') {
511 if (nm[1] == '\0')
512 return (EINVAL);
513 if (nm[1] == '.' && nm[2] == '\0')
514 return (EEXIST); /* thus in ufs */
515 }
516
517 if (error = tmp_taccess(dir, VEXEC|VWRITE, cred))
518 return (error);
519
520 /*
521 * If the parent directory is "sticky", then the user must
522 * own the parent directory or the file in it, or else must
523 * have permission to write the file. Otherwise it may not
524 * be deleted (except by privileged users).
525 * Same as ufs_dirremove.
526 */
527 if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0)
528 return (error);
529
530 if (dir->tn_dir == NULL)
531 return (ENOENT);
532
533 tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp);
534 if (tpdp == NULL) {
535 /*
536 * If it is gone, some other thread got here first!
537 * Return error ENOENT.
538 */
539 return (ENOENT);
540 }
541
542 /*
543 * If the tmpnode in the tdirent changed, we were probably
544 * the victim of a concurrent rename operation. The original
545 * is gone, so return that status (same as UFS).
546 */
547 if (tp != tnp)
548 return (ENOENT);
549
550 tmpfs_hash_out(tpdp);
551
552 /*
553 * Take tpdp out of the directory list.
554 */
555 ASSERT(tpdp->td_next != tpdp);
556 ASSERT(tpdp->td_prev != tpdp);
557 if (tpdp->td_prev) {
558 tpdp->td_prev->td_next = tpdp->td_next;
559 }
560 if (tpdp->td_next) {
561 tpdp->td_next->td_prev = tpdp->td_prev;
562 }
563
564 /*
565 * If the roving slot pointer happens to match tpdp,
566 * point it at the previous dirent.
567 */
568 if (dir->tn_dir->td_prev == tpdp) {
569 dir->tn_dir->td_prev = tpdp->td_prev;
570 }
571 ASSERT(tpdp->td_next != tpdp);
572 ASSERT(tpdp->td_prev != tpdp);
573
574 /*
575 * tpdp points to the correct directory entry
576 */
577 namelen = strlen(tpdp->td_name) + 1;
578
579 tmp_memfree(tpdp, sizeof (struct tdirent) + namelen);
580 dir->tn_size -= (sizeof (struct tdirent) + namelen);
581 dir->tn_dirents--;
582
583 gethrestime(&now);
584 dir->tn_mtime = now;
585 dir->tn_ctime = now;
586 tp->tn_ctime = now;
587
588 ASSERT(tp->tn_nlink > 0);
589 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
590 if (op == DR_RMDIR && tp->tn_type == VDIR) {
591 tdirtrunc(tp);
592 ASSERT(tp->tn_nlink == 0);
593 }
594 return (0);
595 }
596
597 /*
598 * tdirinit is used internally to initialize a directory (dir)
599 * with '.' and '..' entries without checking permissions and locking
600 */
601 void
tdirinit(struct tmpnode * parent,struct tmpnode * dir)602 tdirinit(
603 struct tmpnode *parent, /* parent of directory to initialize */
604 struct tmpnode *dir) /* the new directory */
605 {
606 struct tdirent *dot, *dotdot;
607 timestruc_t now;
608
609 ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
610 ASSERT(dir->tn_type == VDIR);
611
612 dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE);
613 dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE);
614
615 /*
616 * Initialize the entries
617 */
618 dot->td_tmpnode = dir;
619 dot->td_offset = 0;
620 dot->td_name = (char *)dot + sizeof (struct tdirent);
621 dot->td_name[0] = '.';
622 dot->td_parent = dir;
623 tmpfs_hash_in(dot);
624
625 dotdot->td_tmpnode = parent;
626 dotdot->td_offset = 1;
627 dotdot->td_name = (char *)dotdot + sizeof (struct tdirent);
628 dotdot->td_name[0] = '.';
629 dotdot->td_name[1] = '.';
630 dotdot->td_parent = dir;
631 tmpfs_hash_in(dotdot);
632
633 /*
634 * Initialize directory entry list.
635 */
636 dot->td_next = dotdot;
637 dot->td_prev = dotdot; /* dot's td_prev holds roving slot pointer */
638 dotdot->td_next = NULL;
639 dotdot->td_prev = dot;
640
641 gethrestime(&now);
642 dir->tn_mtime = now;
643 dir->tn_ctime = now;
644
645 /*
646 * Link counts are special for the hidden attribute directory.
647 * The only explicit reference in the name space is "." and
648 * the reference through ".." is not counted on the parent
649 * file. The attrdir is created as a side effect to lookup,
650 * so don't change the ctime of the parent.
651 * Since tdirinit is called with both dir and parent being the
652 * same for the root vnode, we need to increment this before we set
653 * tn_nlink = 2 below.
654 */
655 if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) {
656 INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock);
657 parent->tn_ctime = now;
658 }
659
660 dir->tn_dir = dot;
661 dir->tn_size = 2 * sizeof (struct tdirent) + 5; /* dot and dotdot */
662 dir->tn_dirents = 2;
663 dir->tn_nlink = 2;
664 }
665
666
667 /*
668 * tdirtrunc is called to remove all directory entries under this directory.
669 */
670 void
tdirtrunc(struct tmpnode * dir)671 tdirtrunc(struct tmpnode *dir)
672 {
673 struct tdirent *tdp;
674 struct tmpnode *tp;
675 size_t namelen;
676 timestruc_t now;
677 int isvattrdir, isdotdot, skip_decr;
678
679 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
680 ASSERT(dir->tn_type == VDIR);
681
682 isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0;
683 for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) {
684 ASSERT(tdp->td_next != tdp);
685 ASSERT(tdp->td_prev != tdp);
686 ASSERT(tdp->td_tmpnode);
687
688 dir->tn_dir = tdp->td_next;
689 namelen = strlen(tdp->td_name) + 1;
690
691 /*
692 * Adjust the link counts to account for this directory
693 * entry removal. Hidden attribute directories may
694 * not be empty as they may be truncated as a side-
695 * effect of removing the parent. We do hold/rele
696 * operations to free up these tmpnodes.
697 *
698 * Skip the link count adjustment for parents of
699 * attribute directories as those link counts
700 * do not include the ".." reference in the hidden
701 * directories.
702 */
703 tp = tdp->td_tmpnode;
704 isdotdot = (strcmp("..", tdp->td_name) == 0);
705 skip_decr = (isvattrdir && isdotdot);
706 if (!skip_decr) {
707 ASSERT(tp->tn_nlink > 0);
708 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
709 }
710
711 tmpfs_hash_out(tdp);
712
713 tmp_memfree(tdp, sizeof (struct tdirent) + namelen);
714 dir->tn_size -= (sizeof (struct tdirent) + namelen);
715 dir->tn_dirents--;
716 }
717
718 gethrestime(&now);
719 dir->tn_mtime = now;
720 dir->tn_ctime = now;
721
722 ASSERT(dir->tn_dir == NULL);
723 ASSERT(dir->tn_size == 0);
724 ASSERT(dir->tn_dirents == 0);
725 }
726
727 /*
728 * Check if the source directory is in the path of the target directory.
729 * The target directory is locked by the caller.
730 *
731 * XXX - The source and target's should be different upon entry.
732 */
733 static int
tdircheckpath(struct tmpnode * fromtp,struct tmpnode * toparent,struct cred * cred)734 tdircheckpath(
735 struct tmpnode *fromtp,
736 struct tmpnode *toparent,
737 struct cred *cred)
738 {
739 int error = 0;
740 struct tmpnode *dir, *dotdot;
741 struct tdirent *tdp;
742
743 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
744
745 tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot);
746 if (tdp == NULL)
747 return (ENOENT);
748
749 ASSERT(dotdot);
750
751 if (dotdot == toparent) {
752 /* root of fs. search trivially satisfied. */
753 tmpnode_rele(dotdot);
754 return (0);
755 }
756 for (;;) {
757 /*
758 * Return error for cases like "mv c c/d",
759 * "mv c c/d/e" and so on.
760 */
761 if (dotdot == fromtp) {
762 tmpnode_rele(dotdot);
763 error = EINVAL;
764 break;
765 }
766 dir = dotdot;
767 error = tdirlookup(dir, "..", &dotdot, cred);
768 if (error) {
769 tmpnode_rele(dir);
770 break;
771 }
772 /*
773 * We're okay if we traverse the directory tree up to
774 * the root directory and don't run into the
775 * parent directory.
776 */
777 if (dir == dotdot) {
778 tmpnode_rele(dir);
779 tmpnode_rele(dotdot);
780 break;
781 }
782 tmpnode_rele(dir);
783 }
784 return (error);
785 }
786
787 static int
tdirrename(struct tmpnode * fromparent,struct tmpnode * fromtp,struct tmpnode * toparent,char * nm,struct tmpnode * to,struct tdirent * where,struct cred * cred)788 tdirrename(
789 struct tmpnode *fromparent, /* parent directory of source */
790 struct tmpnode *fromtp, /* source tmpnode */
791 struct tmpnode *toparent, /* parent directory of target */
792 char *nm, /* entry we are trying to change */
793 struct tmpnode *to, /* target tmpnode */
794 struct tdirent *where, /* target tmpnode directory entry */
795 struct cred *cred) /* credentials */
796 {
797 int error = 0;
798 int doingdirectory;
799 timestruc_t now;
800
801 #if defined(lint)
802 nm = nm;
803 #endif
804 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
805
806 /*
807 * Short circuit rename of something to itself.
808 */
809 if (fromtp == to)
810 return (ESAME); /* special KLUDGE error code */
811
812 rw_enter(&fromtp->tn_rwlock, RW_READER);
813 rw_enter(&to->tn_rwlock, RW_READER);
814
815 /*
816 * Check that everything is on the same filesystem.
817 */
818 if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp ||
819 to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) {
820 error = EXDEV;
821 goto out;
822 }
823
824 /*
825 * Must have write permission to rewrite target entry.
826 * Check for stickyness.
827 */
828 if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 ||
829 (error = tmp_sticky_remove_access(toparent, to, cred)) != 0)
830 goto out;
831
832 /*
833 * Ensure source and target are compatible (both directories
834 * or both not directories). If target is a directory it must
835 * be empty and have no links to it; in addition it must not
836 * be a mount point, and both the source and target must be
837 * writable.
838 */
839 doingdirectory = (fromtp->tn_type == VDIR);
840 if (to->tn_type == VDIR) {
841 if (!doingdirectory) {
842 error = EISDIR;
843 goto out;
844 }
845 /*
846 * vn_vfswlock will prevent mounts from using the directory
847 * until we are done.
848 */
849 if (vn_vfswlock(TNTOV(to))) {
850 error = EBUSY;
851 goto out;
852 }
853 if (vn_mountedvfs(TNTOV(to)) != NULL) {
854 vn_vfsunlock(TNTOV(to));
855 error = EBUSY;
856 goto out;
857 }
858
859 mutex_enter(&to->tn_tlock);
860 if (to->tn_dirents > 2 || to->tn_nlink > 2) {
861 mutex_exit(&to->tn_tlock);
862 vn_vfsunlock(TNTOV(to));
863 error = EEXIST; /* SIGH should be ENOTEMPTY */
864 /*
865 * Update atime because checking tn_dirents is
866 * logically equivalent to reading the directory
867 */
868 gethrestime(&to->tn_atime);
869 goto out;
870 }
871 mutex_exit(&to->tn_tlock);
872 } else if (doingdirectory) {
873 error = ENOTDIR;
874 goto out;
875 }
876
877 tmpfs_hash_change(where, fromtp);
878 gethrestime(&now);
879 toparent->tn_mtime = now;
880 toparent->tn_ctime = now;
881
882 /*
883 * Upgrade to write lock on "to" (i.e., the target tmpnode).
884 */
885 rw_exit(&to->tn_rwlock);
886 rw_enter(&to->tn_rwlock, RW_WRITER);
887
888 /*
889 * Decrement the link count of the target tmpnode.
890 */
891 DECR_COUNT(&to->tn_nlink, &to->tn_tlock);
892 to->tn_ctime = now;
893
894 if (doingdirectory) {
895 /*
896 * The entry for "to" no longer exists so release the vfslock.
897 */
898 vn_vfsunlock(TNTOV(to));
899
900 /*
901 * Decrement the target link count and delete all entires.
902 */
903 tdirtrunc(to);
904 ASSERT(to->tn_nlink == 0);
905
906 /*
907 * Renaming a directory with the parent different
908 * requires that ".." be rewritten. The window is
909 * still there for ".." to be inconsistent, but this
910 * is unavoidable, and a lot shorter than when it was
911 * done in a user process.
912 */
913 if (fromparent != toparent)
914 tdirfixdotdot(fromtp, fromparent, toparent);
915 }
916 out:
917 rw_exit(&to->tn_rwlock);
918 rw_exit(&fromtp->tn_rwlock);
919 return (error);
920 }
921
922 static void
tdirfixdotdot(struct tmpnode * fromtp,struct tmpnode * fromparent,struct tmpnode * toparent)923 tdirfixdotdot(
924 struct tmpnode *fromtp, /* child directory */
925 struct tmpnode *fromparent, /* old parent directory */
926 struct tmpnode *toparent) /* new parent directory */
927 {
928 struct tdirent *dotdot;
929
930 ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock));
931
932 /*
933 * Increment the link count in the new parent tmpnode
934 */
935 INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock);
936 gethrestime(&toparent->tn_ctime);
937
938 dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL);
939
940 ASSERT(dotdot->td_tmpnode == fromparent);
941 dotdot->td_tmpnode = toparent;
942
943 /*
944 * Decrement the link count of the old parent tmpnode.
945 * If fromparent is NULL, then this is a new directory link;
946 * it has no parent, so we need not do anything.
947 */
948 if (fromparent != NULL) {
949 mutex_enter(&fromparent->tn_tlock);
950 if (fromparent->tn_nlink != 0) {
951 fromparent->tn_nlink--;
952 gethrestime(&fromparent->tn_ctime);
953 }
954 mutex_exit(&fromparent->tn_tlock);
955 }
956 }
957
958 static int
tdiraddentry(struct tmpnode * dir,struct tmpnode * tp,char * name,enum de_op op,struct tmpnode * fromtp)959 tdiraddentry(
960 struct tmpnode *dir, /* target directory to make entry in */
961 struct tmpnode *tp, /* new tmpnode */
962 char *name,
963 enum de_op op,
964 struct tmpnode *fromtp)
965 {
966 struct tdirent *tdp, *tpdp;
967 size_t namelen, alloc_size;
968 timestruc_t now;
969
970 /*
971 * Make sure the parent directory wasn't removed from
972 * underneath the caller.
973 */
974 if (dir->tn_dir == NULL)
975 return (ENOENT);
976
977 /*
978 * Check that everything is on the same filesystem.
979 */
980 if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp)
981 return (EXDEV);
982
983 /*
984 * Allocate and initialize directory entry
985 */
986 namelen = strlen(name) + 1;
987 alloc_size = namelen + sizeof (struct tdirent);
988 tdp = tmp_memalloc(alloc_size, 0);
989 if (tdp == NULL)
990 return (ENOSPC);
991
992 if ((op == DE_RENAME) && (tp->tn_type == VDIR))
993 tdirfixdotdot(tp, fromtp, dir);
994
995 dir->tn_size += alloc_size;
996 dir->tn_dirents++;
997 tdp->td_tmpnode = tp;
998 tdp->td_parent = dir;
999
1000 /*
1001 * The directory entry and its name were allocated sequentially.
1002 */
1003 tdp->td_name = (char *)tdp + sizeof (struct tdirent);
1004 (void) strcpy(tdp->td_name, name);
1005
1006 tmpfs_hash_in(tdp);
1007
1008 /*
1009 * Some utilities expect the size of a directory to remain
1010 * somewhat static. For example, a routine which unlinks
1011 * files between calls to readdir(); the size of the
1012 * directory changes from underneath it and so the real
1013 * directory offset in bytes is invalid. To circumvent
1014 * this problem, we initialize a directory entry with an
1015 * phony offset, and use this offset to determine end of
1016 * file in tmp_readdir.
1017 */
1018 tpdp = dir->tn_dir->td_prev;
1019 /*
1020 * Install at first empty "slot" in directory list.
1021 */
1022 while (tpdp->td_next != NULL && (tpdp->td_next->td_offset -
1023 tpdp->td_offset) <= 1) {
1024 ASSERT(tpdp->td_next != tpdp);
1025 ASSERT(tpdp->td_prev != tpdp);
1026 ASSERT(tpdp->td_next->td_offset > tpdp->td_offset);
1027 tpdp = tpdp->td_next;
1028 }
1029 tdp->td_offset = tpdp->td_offset + 1;
1030
1031 /*
1032 * If we're at the end of the dirent list and the offset (which
1033 * is necessarily the largest offset in this directory) is more
1034 * than twice the number of dirents, that means the directory is
1035 * 50% holes. At this point we reset the slot pointer back to
1036 * the beginning of the directory so we start using the holes.
1037 * The idea is that if there are N dirents, there must also be
1038 * N holes, so we can satisfy the next N creates by walking at
1039 * most 2N entries; thus the average cost of a create is constant.
1040 * Note that we use the first dirent's td_prev as the roving
1041 * slot pointer; it's ugly, but it saves a word in every dirent.
1042 */
1043 if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents)
1044 dir->tn_dir->td_prev = dir->tn_dir->td_next;
1045 else
1046 dir->tn_dir->td_prev = tdp;
1047
1048 ASSERT(tpdp->td_next != tpdp);
1049 ASSERT(tpdp->td_prev != tpdp);
1050
1051 tdp->td_next = tpdp->td_next;
1052 if (tdp->td_next) {
1053 tdp->td_next->td_prev = tdp;
1054 }
1055 tdp->td_prev = tpdp;
1056 tpdp->td_next = tdp;
1057
1058 ASSERT(tdp->td_next != tdp);
1059 ASSERT(tdp->td_prev != tdp);
1060 ASSERT(tpdp->td_next != tpdp);
1061 ASSERT(tpdp->td_prev != tpdp);
1062
1063 gethrestime(&now);
1064 dir->tn_mtime = now;
1065 dir->tn_ctime = now;
1066
1067 return (0);
1068 }
1069
1070 static int
tdirmaketnode(struct tmpnode * dir,struct tmount * tm,struct vattr * va,enum de_op op,struct tmpnode ** newnode,struct cred * cred)1071 tdirmaketnode(
1072 struct tmpnode *dir,
1073 struct tmount *tm,
1074 struct vattr *va,
1075 enum de_op op,
1076 struct tmpnode **newnode,
1077 struct cred *cred)
1078 {
1079 struct tmpnode *tp;
1080 enum vtype type;
1081
1082 ASSERT(va != NULL);
1083 ASSERT(op == DE_CREATE || op == DE_MKDIR);
1084 if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
1085 ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
1086 return (EOVERFLOW);
1087 type = va->va_type;
1088 tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
1089 tmpnode_init(tm, tp, va, cred);
1090
1091 /* setup normal file/dir's extended attribute directory */
1092 if (dir->tn_flags & ISXATTR) {
1093 /* parent dir is , mark file as xattr */
1094 tp->tn_flags |= ISXATTR;
1095 }
1096
1097
1098 if (type == VBLK || type == VCHR) {
1099 tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev;
1100 } else {
1101 tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV;
1102 }
1103 tp->tn_vnode->v_type = type;
1104 tp->tn_uid = crgetuid(cred);
1105
1106 /*
1107 * To determine the group-id of the created file:
1108 * 1) If the gid is set in the attribute list (non-Sun & pre-4.0
1109 * clients are not likely to set the gid), then use it if
1110 * the process is privileged, belongs to the target group,
1111 * or the group is the same as the parent directory.
1112 * 2) If the filesystem was not mounted with the Old-BSD-compatible
1113 * GRPID option, and the directory's set-gid bit is clear,
1114 * then use the process's gid.
1115 * 3) Otherwise, set the group-id to the gid of the parent directory.
1116 */
1117 if ((va->va_mask & AT_GID) &&
1118 ((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) ||
1119 secpolicy_vnode_create_gid(cred) == 0)) {
1120 /*
1121 * XXX - is this only the case when a 4.0 NFS client, or a
1122 * client derived from that code, makes a call over the wire?
1123 */
1124 tp->tn_gid = va->va_gid;
1125 } else {
1126 if (dir->tn_mode & VSGID)
1127 tp->tn_gid = dir->tn_gid;
1128 else
1129 tp->tn_gid = crgetgid(cred);
1130 }
1131 /*
1132 * If we're creating a directory, and the parent directory has the
1133 * set-GID bit set, set it on the new directory.
1134 * Otherwise, if the user is neither privileged nor a member of the
1135 * file's new group, clear the file's set-GID bit.
1136 */
1137 if (dir->tn_mode & VSGID && type == VDIR)
1138 tp->tn_mode |= VSGID;
1139 else {
1140 if ((tp->tn_mode & VSGID) &&
1141 secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0)
1142 tp->tn_mode &= ~VSGID;
1143 }
1144
1145 if (va->va_mask & AT_ATIME)
1146 tp->tn_atime = va->va_atime;
1147 if (va->va_mask & AT_MTIME)
1148 tp->tn_mtime = va->va_mtime;
1149
1150 if (op == DE_MKDIR)
1151 tdirinit(dir, tp);
1152
1153 *newnode = tp;
1154 return (0);
1155 }
1156