xref: /titanic_44/usr/src/uts/common/fs/ufs/ufs_thread.c (revision 5494fa53f9c55de7fb31e3bfb7e0da50d08159a5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * Portions of this source code were derived from Berkeley 4.3 BSD
31  * under license from the Regents of the University of California.
32  */
33 
34 #pragma ident	"%Z%%M%	%I%	%E% SMI"
35 
36 #include <sys/types.h>
37 #include <sys/systm.h>
38 #include <sys/errno.h>
39 #include <sys/kmem.h>
40 #include <sys/buf.h>
41 #include <sys/vnode.h>
42 #include <sys/vfs.h>
43 #include <sys/user.h>
44 #include <sys/callb.h>
45 #include <sys/cpuvar.h>
46 #include <sys/fs/ufs_inode.h>
47 #include <sys/fs/ufs_log.h>
48 #include <sys/fs/ufs_trans.h>
49 #include <sys/fs/ufs_acl.h>
50 #include <sys/fs/ufs_bio.h>
51 #include <sys/fs/ufs_fsdir.h>
52 #include <sys/debug.h>
53 #include <sys/cmn_err.h>
54 #include <sys/sysmacros.h>
55 
56 extern pri_t 			minclsyspri;
57 extern int			hash2ints();
58 extern struct kmem_cache	*inode_cache;	/* cache of free inodes */
59 extern int			ufs_idle_waiters;
60 extern struct instats		ins;
61 
62 static void ufs_attr_purge(struct inode *);
63 
64 /*
65  * initialize a thread's queue struct
66  */
67 void
68 ufs_thread_init(struct ufs_q *uq, int lowat)
69 {
70 	bzero((caddr_t)uq, sizeof (*uq));
71 	cv_init(&uq->uq_cv, NULL, CV_DEFAULT, NULL);
72 	mutex_init(&uq->uq_mutex, NULL, MUTEX_DEFAULT, NULL);
73 	uq->uq_lowat = lowat;
74 	uq->uq_hiwat = 2 * lowat;
75 	uq->uq_threadp = NULL;
76 }
77 
78 /*
79  * start a thread for a queue (assumes success)
80  */
81 void
82 ufs_thread_start(struct ufs_q *uq, void (*func)(), struct vfs *vfsp)
83 {
84 	mutex_enter(&uq->uq_mutex);
85 	if (uq->uq_threadp == NULL) {
86 		uq->uq_threadp = thread_create(NULL, 0, func, vfsp, 0, &p0,
87 		    TS_RUN, minclsyspri);
88 		uq->uq_flags = 0;
89 	}
90 	mutex_exit(&uq->uq_mutex);
91 }
92 
93 /*
94  * wait for the thread to exit
95  */
96 void
97 ufs_thread_exit(struct ufs_q *uq)
98 {
99 	kt_did_t ufs_thread_did = 0;
100 
101 	mutex_enter(&uq->uq_mutex);
102 	uq->uq_flags &= ~(UQ_SUSPEND | UQ_SUSPENDED);
103 	if (uq->uq_threadp != NULL) {
104 		ufs_thread_did = uq->uq_threadp->t_did;
105 		uq->uq_flags |= (UQ_EXIT|UQ_WAIT);
106 		cv_broadcast(&uq->uq_cv);
107 	}
108 	mutex_exit(&uq->uq_mutex);
109 
110 	/*
111 	 * It's safe to call thread_join() with an already-gone
112 	 * t_did, but we have to obtain it before the kernel
113 	 * thread structure is freed. We do so above under the
114 	 * protection of the uq_mutex when we're sure the thread
115 	 * still exists and it's save to de-reference it.
116 	 * We also have to check if ufs_thread_did is != 0
117 	 * before calling thread_join() since thread 0 in the system
118 	 * gets a t_did of 0.
119 	 */
120 	if (ufs_thread_did)
121 		thread_join(ufs_thread_did);
122 }
123 
124 /*
125  * wait for a thread to suspend itself on the caller's behalf
126  *	the caller is responsible for continuing the thread
127  */
128 void
129 ufs_thread_suspend(struct ufs_q *uq)
130 {
131 	mutex_enter(&uq->uq_mutex);
132 	if (uq->uq_threadp != NULL) {
133 		/*
134 		 * wait while another thread is suspending this thread.
135 		 * no need to do a cv_broadcast(), as whoever suspended
136 		 * the thread must continue it at some point.
137 		 */
138 		while ((uq->uq_flags & UQ_SUSPEND) &&
139 		    (uq->uq_threadp != NULL)) {
140 			/*
141 			 * We can't use cv_signal() because if our
142 			 * signal doesn't happen to hit the desired
143 			 * thread but instead some other waiter like
144 			 * ourselves, we'll wait forever for a
145 			 * response.  Well, at least an indeterminate
146 			 * amount of time until we just happen to get
147 			 * lucky from whomever did get signalled doing
148 			 * a cv_signal() of their own.  This is an
149 			 * unfortunate performance lossage.
150 			 */
151 			uq->uq_flags |= UQ_WAIT;
152 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
153 		}
154 
155 		uq->uq_flags |= (UQ_SUSPEND | UQ_WAIT);
156 
157 		/*
158 		 * wait for the thread to suspend itself
159 		 */
160 		if ((uq->uq_flags & UQ_SUSPENDED) == 0 &&
161 		    (uq->uq_threadp != NULL)) {
162 			cv_broadcast(&uq->uq_cv);
163 		}
164 
165 		while (((uq->uq_flags & UQ_SUSPENDED) == 0) &&
166 		    (uq->uq_threadp != NULL)) {
167 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
168 		}
169 	}
170 	mutex_exit(&uq->uq_mutex);
171 }
172 
173 /*
174  * allow a thread to continue from a ufs_thread_suspend()
175  *	This thread must be the same as the thread that called
176  *	ufs_thread_suspend.
177  */
178 void
179 ufs_thread_continue(struct ufs_q *uq)
180 {
181 	mutex_enter(&uq->uq_mutex);
182 	uq->uq_flags &= ~(UQ_SUSPEND | UQ_SUSPENDED);
183 	cv_broadcast(&uq->uq_cv);
184 	mutex_exit(&uq->uq_mutex);
185 }
186 
187 /*
188  * some common code for managing a threads execution
189  *	uq is locked at entry and return
190  *	may sleep
191  *	may exit
192  */
193 /*
194  * Kind of a hack passing in the callb_cpr_t * here.
195  * It should really be part of the ufs_q structure.
196  * I did not put it in there because we are already in beta
197  * and I was concerned that changing ufs_inode.h to include
198  * callb.h might break something.
199  */
200 int
201 ufs_thread_run(struct ufs_q *uq, callb_cpr_t *cprinfop)
202 {
203 again:
204 	ASSERT(uq->uq_ne >= 0);
205 
206 	if (uq->uq_flags & UQ_SUSPEND) {
207 		uq->uq_flags |= UQ_SUSPENDED;
208 	} else if (uq->uq_flags & UQ_EXIT) {
209 		/*
210 		 * exiting; empty the queue (may infinite loop)
211 		 */
212 		if (uq->uq_ne)
213 			return (uq->uq_ne);
214 		uq->uq_threadp = NULL;
215 		if (uq->uq_flags & UQ_WAIT) {
216 			cv_broadcast(&uq->uq_cv);
217 		}
218 		uq->uq_flags &= ~(UQ_EXIT | UQ_WAIT);
219 		CALLB_CPR_EXIT(cprinfop);
220 		thread_exit();
221 	} else if (uq->uq_ne >= uq->uq_lowat) {
222 		/*
223 		 * process a block of entries until below high water mark
224 		 */
225 		return (uq->uq_ne - (uq->uq_lowat >> 1));
226 	} else if (uq->uq_flags & UQ_FASTCLIENTS) {
227 		/*
228 		 * Let the fast acting clients through
229 		 */
230 		return (0);
231 	}
232 	if (uq->uq_flags & UQ_WAIT) {
233 		uq->uq_flags &= ~UQ_WAIT;
234 		cv_broadcast(&uq->uq_cv);
235 	}
236 	CALLB_CPR_SAFE_BEGIN(cprinfop);
237 	cv_wait(&uq->uq_cv, &uq->uq_mutex);
238 	CALLB_CPR_SAFE_END(cprinfop, &uq->uq_mutex);
239 	goto again;
240 }
241 
242 /*
243  * DELETE INODE
244  * The following routines implement the protocol for freeing the resources
245  * held by an idle and deleted inode.
246  */
247 void
248 ufs_delete(struct ufsvfs *ufsvfsp, struct inode *ip, int dolockfs)
249 {
250 	ushort_t	mode;
251 	struct vnode	*vp	= ITOV(ip);
252 	struct ulockfs	*ulp;
253 	int		trans_size;
254 	int		dorwlock = ((ip->i_mode & IFMT) == IFREG);
255 	int		issync;
256 	int		err;
257 	struct inode	*dp;
258 	struct ufs_q    *delq = &ufsvfsp->vfs_delete;
259 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
260 
261 	/*
262 	 * not on a trans device or not part of a transaction
263 	 */
264 	ASSERT(!TRANS_ISTRANS(ufsvfsp) ||
265 		((curthread->t_flag & T_DONTBLOCK) == 0));
266 
267 	/*
268 	 * Ignore if deletes are not allowed (wlock/hlock)
269 	 */
270 	if (ULOCKFS_IS_NOIDEL(ITOUL(ip))) {
271 		mutex_enter(&delq->uq_mutex);
272 		delq_info->delq_unreclaimed_blocks -= ip->i_blocks;
273 		delq_info->delq_unreclaimed_files--;
274 		mutex_exit(&delq->uq_mutex);
275 		VN_RELE(vp);
276 		return;
277 	}
278 
279 	if ((vp->v_count > 1) || (ip->i_mode == 0)) {
280 		mutex_enter(&delq->uq_mutex);
281 		delq_info->delq_unreclaimed_blocks -= ip->i_blocks;
282 		delq_info->delq_unreclaimed_files--;
283 		mutex_exit(&delq->uq_mutex);
284 		VN_RELE(vp);
285 		return;
286 	}
287 	/*
288 	 * If we are called as part of setting a fs lock, then only
289 	 * do part of the lockfs protocol.  In other words, don't hang.
290 	 */
291 	if (dolockfs) {
292 		if (ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_DELETE_MASK))
293 			return;
294 	} else {
295 		/*
296 		 * check for recursive VOP call
297 		 */
298 		if (curthread->t_flag & T_DONTBLOCK) {
299 			ulp = NULL;
300 		} else {
301 			ulp = &ufsvfsp->vfs_ulockfs;
302 			curthread->t_flag |= T_DONTBLOCK;
303 		}
304 	}
305 
306 	/*
307 	 * Hold rwlock to synchronize with (nfs) writes
308 	 */
309 	if (dorwlock)
310 		rw_enter(&ip->i_rwlock, RW_WRITER);
311 
312 	/*
313 	 * Delete the attribute directory.
314 	 */
315 	if (ip->i_oeftflag != 0) {
316 		TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_REMOVE,
317 		    trans_size = (int)TOP_REMOVE_SIZE(ip));
318 		rw_enter(&ip->i_contents, RW_WRITER);
319 		err = ufs_iget(ip->i_vfs, ip->i_oeftflag,
320 		    &dp, CRED());
321 		if (err == 0) {
322 			rw_enter(&dp->i_rwlock, RW_WRITER);
323 			rw_enter(&dp->i_contents, RW_WRITER);
324 			dp->i_flag |= IUPD|ICHG;
325 			dp->i_seq++;
326 			TRANS_INODE(dp->i_ufsvfs, dp);
327 			dp->i_nlink -= 2;
328 			ufs_setreclaim(dp);
329 			/*
330 			 * Should get rid of any negative cache entries that
331 			 * might be lingering, as well as ``.'' and
332 			 * ``..''.  If we don't, the VN_RELE() below
333 			 * won't actually put dp on the delete queue
334 			 * and it'll hang out until someone forces it
335 			 * (lockfs -f, umount, ...).  The only reliable
336 			 * way of doing this at the moment is to call
337 			 * dnlc_purge_vp(ITOV(dp)), which is unacceptably
338 			 * slow, so we'll just note the problem in this
339 			 * comment for now.
340 			 */
341 			dnlc_remove(ITOV(dp), ".");
342 			dnlc_remove(ITOV(dp), "..");
343 			ITIMES_NOLOCK(dp);
344 			if (!TRANS_ISTRANS(ufsvfsp)) {
345 				ufs_iupdat(dp, I_SYNC);
346 			}
347 			rw_exit(&dp->i_contents);
348 			rw_exit(&dp->i_rwlock);
349 			VN_RELE(ITOV(dp));
350 		}
351 		/*
352 		 * Clear out attribute pointer
353 		 */
354 		ip->i_oeftflag = 0;
355 		rw_exit(&ip->i_contents);
356 		TRANS_END_CSYNC(ufsvfsp, err, issync,
357 		    TOP_REMOVE, trans_size);
358 		dnlc_remove(ITOV(ip), XATTR_DIR_NAME);
359 	}
360 
361 	if ((ip->i_mode & IFMT) == IFATTRDIR) {
362 		ufs_attr_purge(ip);
363 	}
364 
365 	(void) TRANS_ITRUNC(ip, (u_offset_t)0, I_FREE | I_ACCT, CRED());
366 
367 	/*
368 	 * the inode's space has been freed; now free the inode
369 	 */
370 	if (ulp) {
371 		trans_size = TOP_IFREE_SIZE(ip);
372 		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_IFREE, trans_size);
373 	}
374 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
375 	rw_enter(&ip->i_contents, RW_WRITER);
376 	TRANS_INODE(ufsvfsp, ip);
377 	mode = ip->i_mode;
378 	ip->i_mode = 0;
379 	ip->i_rdev = 0;
380 	ip->i_ordev = 0;
381 	ip->i_flag |= IMOD;
382 	if (ip->i_ufs_acl) {
383 		(void) ufs_si_free(ip->i_ufs_acl, vp->v_vfsp, CRED());
384 		ip->i_ufs_acl = NULL;
385 		ip->i_shadow = 0;
386 	}
387 
388 	/*
389 	 * This inode is torn down but still retains it's identity
390 	 * (inode number).  It could get recycled soon so it's best
391 	 * to clean up the vnode just in case.
392 	 */
393 	mutex_enter(&vp->v_lock);
394 	vn_recycle(vp);
395 	mutex_exit(&vp->v_lock);
396 
397 	/*
398 	 * free the inode
399 	 */
400 	ufs_ifree(ip, ip->i_number, mode);
401 	/*
402 	 * release quota resources; can't fail
403 	 */
404 	(void) chkiq((struct ufsvfs *)vp->v_vfsp->vfs_data,
405 		/* change */ -1, ip, (uid_t)ip->i_uid, 0, CRED(),
406 		(char **)NULL, (size_t *)NULL);
407 	dqrele(ip->i_dquot);
408 	ip->i_dquot = NULL;
409 	ip->i_flag &= ~(IDEL | IDIRECTIO);
410 	ip->i_cflags = 0;
411 	if (!TRANS_ISTRANS(ufsvfsp)) {
412 		ufs_iupdat(ip, I_SYNC);
413 	} else {
414 		mutex_enter(&delq->uq_mutex);
415 		delq_info->delq_unreclaimed_files--;
416 		mutex_exit(&delq->uq_mutex);
417 	}
418 	rw_exit(&ip->i_contents);
419 	rw_exit(&ufsvfsp->vfs_dqrwlock);
420 	if (dorwlock)
421 		rw_exit(&ip->i_rwlock);
422 	VN_RELE(vp);
423 
424 	/*
425 	 * End of transaction
426 	 */
427 	if (ulp) {
428 		TRANS_END_ASYNC(ufsvfsp, TOP_IFREE, trans_size);
429 		if (dolockfs)
430 			ufs_lockfs_end(ulp);
431 		else
432 			curthread->t_flag &= ~T_DONTBLOCK;
433 	}
434 }
435 
436 /*
437  * Create the delete thread and init the delq_info for this fs
438  */
439 void
440 ufs_delete_init(struct ufsvfs *ufsvfsp, int lowat)
441 {
442 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
443 
444 	ufs_thread_init(&ufsvfsp->vfs_delete, lowat);
445 	(void) memset((void *)delq_info, 0, sizeof (*delq_info));
446 	cv_init(&delq_info->delq_fast_cv, NULL, CV_DEFAULT, NULL);
447 }
448 
449 /*
450  * thread that frees up deleted inodes
451  */
452 void
453 ufs_thread_delete(struct vfs *vfsp)
454 {
455 	struct ufsvfs	*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
456 	struct ufs_q	*uq = &ufsvfsp->vfs_delete;
457 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
458 	struct inode	*ip;
459 	long		ne;
460 	callb_cpr_t	cprinfo;
461 
462 	CALLB_CPR_INIT(&cprinfo, &uq->uq_mutex, callb_generic_cpr,
463 	    "ufsdelete");
464 
465 	mutex_enter(&uq->uq_mutex);
466 again:
467 	/*
468 	 * Sleep until there is work to do.  Only do one entry at
469 	 * a time, to reduce the wait time for checking for a suspend
470 	 * or fast-client request.  The ?: is for pedantic portability.
471 	 */
472 	ne = ufs_thread_run(uq, &cprinfo) ? 1 : 0;
473 
474 	/*
475 	 * process an entry, if there are any
476 	 */
477 	if (ne && (ip = uq->uq_ihead)) {
478 		/*
479 		 * process first entry on queue.  Assumed conditions are:
480 		 *	ip is held (v_count >= 1)
481 		 *	ip is referenced (i_flag & IREF)
482 		 *	ip is free (i_nlink <= 0)
483 		 */
484 		if ((uq->uq_ihead = ip->i_freef) == ip)
485 			uq->uq_ihead = NULL;
486 		ip->i_freef->i_freeb = ip->i_freeb;
487 		ip->i_freeb->i_freef = ip->i_freef;
488 		ip->i_freef = ip;
489 		ip->i_freeb = ip;
490 		uq->uq_ne--;
491 		mutex_exit(&uq->uq_mutex);
492 		ufs_delete(ufsvfsp, ip, 1);
493 		mutex_enter(&uq->uq_mutex);
494 	}
495 
496 	/*
497 	 * If there are any fast clients, let all of them through.
498 	 * Mainly intended for statvfs(), which doesn't need to do
499 	 * anything except look at the number of bytes/inodes that
500 	 * are in the queue.
501 	 */
502 	if (uq->uq_flags & UQ_FASTCLIENTS) {
503 		uq->uq_flags &= ~UQ_FASTCLIENTS;
504 		/*
505 		 * Give clients a chance.  The lock exit/entry
506 		 * allows waiting statvfs threads through.
507 		 */
508 		cv_broadcast(&delq_info->delq_fast_cv);
509 		mutex_exit(&uq->uq_mutex);
510 		mutex_enter(&uq->uq_mutex);
511 	}
512 	goto again;
513 }
514 
515 /*
516  * drain ne entries off the delete queue.  As new queue entries may
517  * be added while we're working, ne is interpreted as follows:
518  *
519  * ne > 0   => remove up to ne entries
520  * ne == 0  => remove all entries currently on the queue
521  * ne == -1 => remove entries until the queue is empty
522  */
523 void
524 ufs_delete_drain(struct vfs *vfsp, int ne, int dolockfs)
525 {
526 	struct ufsvfs	*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
527 	struct ufs_q	*uq;
528 	struct inode	*ip;
529 	int		drain_cnt = 0;
530 	int		done;
531 
532 	/*
533 	 * if forcibly unmounted; ignore
534 	 */
535 	if (ufsvfsp == NULL)
536 		return;
537 
538 	uq = &ufsvfsp->vfs_delete;
539 	mutex_enter(&uq->uq_mutex);
540 	if (ne == 0)
541 		drain_cnt = uq->uq_ne;
542 	else if (ne > 0)
543 		drain_cnt = ne;
544 
545 	/*
546 	 * process up to ne entries
547 	 */
548 
549 	done = 0;
550 	while (!done && (ip = uq->uq_ihead)) {
551 		if (ne != -1)
552 			drain_cnt--;
553 		if (ne != -1 && drain_cnt == 0)
554 			done = 1;
555 		if ((uq->uq_ihead = ip->i_freef) == ip)
556 			uq->uq_ihead = NULL;
557 		ip->i_freef->i_freeb = ip->i_freeb;
558 		ip->i_freeb->i_freef = ip->i_freef;
559 		ip->i_freef = ip;
560 		ip->i_freeb = ip;
561 		uq->uq_ne--;
562 		mutex_exit(&uq->uq_mutex);
563 		ufs_delete(ufsvfsp, ip, dolockfs);
564 		mutex_enter(&uq->uq_mutex);
565 	}
566 	mutex_exit(&uq->uq_mutex);
567 }
568 
569 void
570 ufs_sync_with_thread(struct ufs_q *uq)
571 {
572 	mutex_enter(&uq->uq_mutex);
573 
574 	/*
575 	 * Wake up delete thread to free up space.
576 	 */
577 	if ((uq->uq_flags & UQ_WAIT) == 0) {
578 		uq->uq_flags |= UQ_WAIT;
579 		cv_broadcast(&uq->uq_cv);
580 	}
581 
582 	while ((uq->uq_threadp != NULL) && (uq->uq_flags & UQ_WAIT)) {
583 		cv_wait(&uq->uq_cv, &uq->uq_mutex);
584 	}
585 
586 	mutex_exit(&uq->uq_mutex);
587 }
588 
589 /*
590  * Get rid of everything that's currently in the delete queue,
591  * plus whatever the delete thread is working on at the moment.
592  *
593  * This ability is required for providing true POSIX semantics
594  * regarding close(2), unlink(2), etc, even when logging is enabled.
595  * The standard requires that the released space be immediately
596  * observable (statvfs(2)) and allocatable (e.g., write(2)).
597  */
598 void
599 ufs_delete_drain_wait(struct ufsvfs *ufsvfsp, int dolockfs)
600 {
601 	struct ufs_q *uq = &ufsvfsp->vfs_delete;
602 	int	error;
603 	struct ufs_q    *delq = &ufsvfsp->vfs_delete;
604 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
605 
606 	/*
607 	 * If there is something on delq or delete thread
608 	 * working on delq.
609 	 */
610 	mutex_enter(&delq->uq_mutex);
611 	if (delq_info->delq_unreclaimed_files > 0) {
612 		mutex_exit(&delq->uq_mutex);
613 		(void) ufs_delete_drain(ufsvfsp->vfs_vfs, 0, dolockfs);
614 		ufs_sync_with_thread(uq);
615 	} else {
616 		ASSERT(delq_info->delq_unreclaimed_files == 0);
617 		mutex_exit(&delq->uq_mutex);
618 		return;
619 	}
620 
621 	/*
622 	 * Commit any outstanding transactions to make sure
623 	 * any canceled freed blocks are available for allocation.
624 	 */
625 	curthread->t_flag |= T_DONTBLOCK;
626 	TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UPDATE, TOP_COMMIT_SIZE, error);
627 	if (!error) {
628 		TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UPDATE,
629 			TOP_COMMIT_SIZE);
630 	}
631 	curthread->t_flag &= ~T_DONTBLOCK;
632 }
633 
634 /*
635  * Adjust the resource usage in a struct statvfs based on
636  * what's in the delete queue.
637  *
638  * We do not consider the impact of ACLs or extended attributes
639  * that may be deleted as a side-effect of deleting a file.
640  * Those are metadata, and their sizes aren't reflected in the
641  * sizes returned by stat(), so this is not a problem.
642  */
643 void
644 ufs_delete_adjust_stats(struct ufsvfs *ufsvfsp, struct statvfs64 *sp)
645 {
646 	struct ufs_q *uq = &ufsvfsp->vfs_delete;
647 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
648 
649 	/*
650 	 * We'll get signalled when it's our turn.  However, if there's
651 	 * nothing going on, there's no point in waking up the delete
652 	 * thread and waiting for it to tell us to continue.
653 	 */
654 	mutex_enter(&uq->uq_mutex);
655 
656 	if ((uq->uq_flags & UQ_FASTCLIENTS) || (uq->uq_ne != 0)) {
657 		uq->uq_flags |= UQ_FASTCLIENTS;
658 		cv_broadcast(&uq->uq_cv);
659 		cv_wait(&delq_info->delq_fast_cv, &uq->uq_mutex);
660 	}
661 
662 	/*
663 	 * The blocks accounted for in the delete queue info are
664 	 * counted in DEV_BSIZE chunks, but ufs_statvfs counts in
665 	 * filesystem fragments, so a conversion is required here.
666 	 */
667 	sp->f_bfree += dbtofsb(ufsvfsp->vfs_fs,
668 	    delq_info->delq_unreclaimed_blocks);
669 	sp->f_ffree += delq_info->delq_unreclaimed_files;
670 	mutex_exit(&uq->uq_mutex);
671 }
672 
673 /*
674  * IDLE INODE
675  * The following routines implement the protocol for maintaining an
676  * LRU list of idle inodes and for moving the idle inodes to the
677  * reuse list when the number of allocated inodes exceeds the user
678  * tunable high-water mark (ufs_ninode).
679  */
680 
681 /*
682  * clean an idle inode and move it to the reuse list
683  */
684 static void
685 ufs_idle_free(struct inode *ip)
686 {
687 	int			pages;
688 	int			hno;
689 	kmutex_t		*ihm;
690 	struct ufsvfs		*ufsvfsp	= ip->i_ufsvfs;
691 	struct vnode		*vp		= ITOV(ip);
692 
693 	/*
694 	 * inode is held
695 	 */
696 
697 	/*
698 	 * remember `pages' for stats below
699 	 */
700 	pages = (ip->i_mode && vn_has_cached_data(vp) && vp->v_type != VCHR);
701 
702 	/*
703 	 * start the dirty pages to disk and then invalidate them
704 	 * unless the inode is invalid (ISTALE)
705 	 */
706 	if ((ip->i_flag & ISTALE) == 0) {
707 		(void) TRANS_SYNCIP(ip, B_ASYNC, I_ASYNC, TOP_SYNCIP_FREE);
708 		(void) TRANS_SYNCIP(ip,
709 				    (TRANS_ISERROR(ufsvfsp)) ?
710 				    B_INVAL | B_FORCE : B_INVAL,
711 				    I_ASYNC, TOP_SYNCIP_FREE);
712 	}
713 
714 	/*
715 	 * wait for any current ufs_iget to finish and block future ufs_igets
716 	 */
717 	ASSERT(ip->i_number != 0);
718 	hno = INOHASH(ip->i_number);
719 	ihm = &ih_lock[hno];
720 	mutex_enter(ihm);
721 
722 	/*
723 	 * It must be guaranteed that v_count >= 2, otherwise
724 	 * something must be wrong with this vnode already.
725 	 * That is why we use v_count-- instead of VN_RELE().
726 	 * Acquire the vnode lock in case another thread is in
727 	 * VN_RELE().
728 	 */
729 	mutex_enter(&vp->v_lock);
730 
731 	if (vp->v_count < 2)
732 		cmn_err(CE_PANIC,
733 			"ufs_idle_free: vnode ref count is less than 2");
734 
735 	vp->v_count--;
736 	if ((vp->v_type != VCHR && vn_has_cached_data(vp)) ||
737 		vp->v_count != 1 ||
738 		ip->i_flag & (IMOD|IMODACC|IACC|ICHG|IUPD|IATTCHG)) {
739 			/*
740 			 * Another thread has referenced this inode while
741 			 * we are trying to free it. Call VN_RELE() to
742 			 * release our reference.
743 			 */
744 			mutex_exit(&vp->v_lock);
745 			mutex_exit(ihm);
746 			VN_RELE(vp);
747 	} else {
748 		/*
749 		 * The inode is currently unreferenced and can not
750 		 * acquire further references because it has no pages
751 		 * and the hash is locked.  Inodes acquire references
752 		 * via the hash list or via their pages.
753 		 */
754 
755 		mutex_exit(&vp->v_lock);
756 
757 		/*
758 		 * remove it from the cache
759 		 */
760 		remque(ip);
761 		mutex_exit(ihm);
762 		/*
763 		 * Stale inodes have no valid ufsvfs
764 		 */
765 		if ((ip->i_flag & ISTALE) == 0 && ip->i_dquot) {
766 			TRANS_DQRELE(ufsvfsp, ip->i_dquot);
767 			ip->i_dquot = NULL;
768 		}
769 		ufs_si_del(ip);
770 		if (pages) {
771 			CPU_STATS_ADDQ(CPU, sys, ufsipage, 1);
772 		} else {
773 			CPU_STATS_ADDQ(CPU, sys, ufsinopage, 1);
774 		}
775 		ASSERT((vp->v_type == VCHR) || !vn_has_cached_data(vp));
776 
777 		/*
778 		 * We had better not have a vnode reference count > 1
779 		 * at this point, if we do then something is broken as
780 		 * this inode/vnode acquired a reference underneath of us.
781 		 */
782 		ASSERT(vp->v_count == 1);
783 
784 		ufs_free_inode(ip);
785 	}
786 }
787 
788 /*
789  * this thread processes the global idle queue
790  */
791 iqhead_t *ufs_junk_iq;
792 iqhead_t *ufs_useful_iq;
793 int ufs_njunk_iq = 0;
794 int ufs_nuseful_iq = 0;
795 int ufs_niqhash;
796 int ufs_iqhashmask;
797 struct ufs_q	ufs_idle_q;
798 
799 void
800 ufs_thread_idle(void)
801 {
802 	callb_cpr_t cprinfo;
803 	int i;
804 	int ne;
805 
806 	ufs_niqhash = (ufs_idle_q.uq_lowat >> 1) / IQHASHQLEN;
807 	ufs_niqhash = 1 << highbit(ufs_niqhash); /* round up to power of 2 */
808 	ufs_iqhashmask = ufs_niqhash - 1;
809 	ufs_junk_iq = kmem_alloc(ufs_niqhash * sizeof (*ufs_junk_iq),
810 	    KM_SLEEP);
811 	ufs_useful_iq = kmem_alloc(ufs_niqhash * sizeof (*ufs_useful_iq),
812 	    KM_SLEEP);
813 
814 	/* Initialize hash queue headers */
815 	for (i = 0; i < ufs_niqhash; i++) {
816 		ufs_junk_iq[i].i_freef = (inode_t *)&ufs_junk_iq[i];
817 		ufs_junk_iq[i].i_freeb = (inode_t *)&ufs_junk_iq[i];
818 		ufs_useful_iq[i].i_freef = (inode_t *)&ufs_useful_iq[i];
819 		ufs_useful_iq[i].i_freeb = (inode_t *)&ufs_useful_iq[i];
820 	}
821 
822 	CALLB_CPR_INIT(&cprinfo, &ufs_idle_q.uq_mutex, callb_generic_cpr,
823 	    "ufsidle");
824 again:
825 	/*
826 	 * Whenever the idle thread is awakened, it repeatedly gives
827 	 * back half of the idle queue until the idle queue falls
828 	 * below lowat.
829 	 */
830 	mutex_enter(&ufs_idle_q.uq_mutex);
831 	if (ufs_idle_q.uq_ne < ufs_idle_q.uq_lowat) {
832 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
833 		cv_wait(&ufs_idle_q.uq_cv, &ufs_idle_q.uq_mutex);
834 		CALLB_CPR_SAFE_END(&cprinfo, &ufs_idle_q.uq_mutex);
835 	}
836 	mutex_exit(&ufs_idle_q.uq_mutex);
837 
838 	/*
839 	 * Give back 1/2 of the idle queue
840 	 */
841 	ne = ufs_idle_q.uq_ne >> 1;
842 	ins.in_tidles.value.ul += ne;
843 	ufs_idle_some(ne);
844 	goto again;
845 }
846 
847 /*
848  * Reclaim callback for ufs inode cache.
849  * Invoked by the kernel memory allocator when memory gets tight.
850  */
851 /*ARGSUSED*/
852 void
853 ufs_inode_cache_reclaim(void *cdrarg)
854 {
855 	/*
856 	 * If we are low on memory and the idle queue is over its
857 	 * halfway mark, then free 50% of the idle q
858 	 *
859 	 * We don't free all of the idle inodes because the inodes
860 	 * for popular NFS files may have been kicked from the dnlc.
861 	 * The inodes for these files will end up on the idle queue
862 	 * after every NFS access.
863 	 *
864 	 * If we repeatedly push them from the idle queue then
865 	 * NFS users may be unhappy as an extra buf cache operation
866 	 * is incurred for every NFS operation to these files.
867 	 *
868 	 * It's not common, but I have seen it happen.
869 	 *
870 	 */
871 	if (ufs_idle_q.uq_ne < (ufs_idle_q.uq_lowat >> 1))
872 		return;
873 	mutex_enter(&ufs_idle_q.uq_mutex);
874 	cv_broadcast(&ufs_idle_q.uq_cv);
875 	mutex_exit(&ufs_idle_q.uq_mutex);
876 }
877 
878 /*
879  * Free up some idle inodes
880  */
881 void
882 ufs_idle_some(int ne)
883 {
884 	int i;
885 	struct inode *ip;
886 	struct vnode *vp;
887 	static int junk_rotor = 0;
888 	static int useful_rotor = 0;
889 
890 	for (i = 0; i < ne; ++i) {
891 		mutex_enter(&ufs_idle_q.uq_mutex);
892 
893 		if (ufs_njunk_iq) {
894 			while (ufs_junk_iq[junk_rotor].i_freef ==
895 			    (inode_t *)&ufs_junk_iq[junk_rotor]) {
896 				junk_rotor = IQNEXT(junk_rotor);
897 			}
898 			ip = ufs_junk_iq[junk_rotor].i_freef;
899 			ASSERT(ip->i_flag & IJUNKIQ);
900 		} else if (ufs_nuseful_iq) {
901 			while (ufs_useful_iq[useful_rotor].i_freef ==
902 			    (inode_t *)&ufs_useful_iq[useful_rotor]) {
903 				useful_rotor = IQNEXT(useful_rotor);
904 			}
905 			ip = ufs_useful_iq[useful_rotor].i_freef;
906 			ASSERT(!(ip->i_flag & IJUNKIQ));
907 		} else {
908 			mutex_exit(&ufs_idle_q.uq_mutex);
909 			return;
910 		}
911 
912 		/*
913 		 * emulate ufs_iget
914 		 */
915 		vp = ITOV(ip);
916 		VN_HOLD(vp);
917 		mutex_exit(&ufs_idle_q.uq_mutex);
918 		rw_enter(&ip->i_contents, RW_WRITER);
919 		/*
920 		 * VN_RELE should not be called if
921 		 * ufs_rmidle returns true, as it will
922 		 * effectively be done in ufs_idle_free.
923 		 */
924 		if (ufs_rmidle(ip)) {
925 			rw_exit(&ip->i_contents);
926 			ufs_idle_free(ip);
927 		} else {
928 			rw_exit(&ip->i_contents);
929 			VN_RELE(vp);
930 		}
931 	}
932 }
933 
934 /*
935  * drain entries for vfsp from the idle queue
936  * vfsp == NULL means drain the entire thing
937  */
938 void
939 ufs_idle_drain(struct vfs *vfsp)
940 {
941 	struct inode	*ip, *nip;
942 	struct inode	*ianchor = NULL;
943 	int		i;
944 
945 	mutex_enter(&ufs_idle_q.uq_mutex);
946 	if (ufs_njunk_iq) {
947 		/* for each hash q */
948 		for (i = 0; i < ufs_niqhash; i++) {
949 			/* search down the hash q */
950 			for (ip = ufs_junk_iq[i].i_freef;
951 			    ip != (inode_t *)&ufs_junk_iq[i];
952 			    ip = ip->i_freef) {
953 				if (ip->i_vfs == vfsp || vfsp == NULL) {
954 					/* found a matching entry */
955 					VN_HOLD(ITOV(ip));
956 					mutex_exit(&ufs_idle_q.uq_mutex);
957 					rw_enter(&ip->i_contents, RW_WRITER);
958 					/*
959 					 * See comments in ufs_idle_some()
960 					 * as we will call ufs_idle_free()
961 					 * after scanning both queues.
962 					 */
963 					if (ufs_rmidle(ip)) {
964 						rw_exit(&ip->i_contents);
965 						ip->i_freef = ianchor;
966 						ianchor = ip;
967 					} else {
968 						rw_exit(&ip->i_contents);
969 						VN_RELE(ITOV(ip));
970 					}
971 					/* restart this hash q */
972 					ip = (inode_t *)&ufs_junk_iq[i];
973 					mutex_enter(&ufs_idle_q.uq_mutex);
974 				}
975 			}
976 		}
977 	}
978 	if (ufs_nuseful_iq) {
979 		/* for each hash q */
980 		for (i = 0; i < ufs_niqhash; i++) {
981 			/* search down the hash q */
982 			for (ip = ufs_useful_iq[i].i_freef;
983 			    ip != (inode_t *)&ufs_useful_iq[i];
984 			    ip = ip->i_freef) {
985 				if (ip->i_vfs == vfsp || vfsp == NULL) {
986 					/* found a matching entry */
987 					VN_HOLD(ITOV(ip));
988 					mutex_exit(&ufs_idle_q.uq_mutex);
989 					rw_enter(&ip->i_contents, RW_WRITER);
990 					/*
991 					 * See comments in ufs_idle_some()
992 					 * as we will call ufs_idle_free()
993 					 * after scanning both queues.
994 					 */
995 					if (ufs_rmidle(ip)) {
996 						rw_exit(&ip->i_contents);
997 						ip->i_freef = ianchor;
998 						ianchor = ip;
999 					} else {
1000 						rw_exit(&ip->i_contents);
1001 						VN_RELE(ITOV(ip));
1002 					}
1003 					/* restart this hash q */
1004 					ip = (inode_t *)&ufs_useful_iq[i];
1005 					mutex_enter(&ufs_idle_q.uq_mutex);
1006 				}
1007 			}
1008 		}
1009 	}
1010 
1011 	mutex_exit(&ufs_idle_q.uq_mutex);
1012 	/* no more matching entries, release those we have found (if any) */
1013 	for (ip = ianchor; ip; ip = nip) {
1014 		nip = ip->i_freef;
1015 		ip->i_freef = ip;
1016 		ufs_idle_free(ip);
1017 	}
1018 }
1019 
1020 /*
1021  * RECLAIM DELETED INODES
1022  * The following thread scans the file system once looking for deleted files
1023  */
1024 void
1025 ufs_thread_reclaim(struct vfs *vfsp)
1026 {
1027 	struct ufsvfs		*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1028 	struct ufs_q		*uq	= &ufsvfsp->vfs_reclaim;
1029 	struct fs		*fs	= ufsvfsp->vfs_fs;
1030 	struct buf		*bp	= 0;
1031 	int			err	= 0;
1032 	daddr_t			bno;
1033 	ino_t			ino;
1034 	struct dinode		*dp;
1035 	struct inode		*ip;
1036 	callb_cpr_t		cprinfo;
1037 
1038 	CALLB_CPR_INIT(&cprinfo, &uq->uq_mutex, callb_generic_cpr,
1039 	    "ufsreclaim");
1040 
1041 	/*
1042 	 * mount decided that we don't need a reclaim thread
1043 	 */
1044 	if ((fs->fs_reclaim & FS_RECLAIMING) == 0)
1045 		err++;
1046 
1047 	/*
1048 	 * don't reclaim if readonly
1049 	 */
1050 	if (fs->fs_ronly)
1051 		err++;
1052 
1053 	for (ino = 0; ino < (fs->fs_ncg * fs->fs_ipg) && !err; ++ino) {
1054 
1055 		/*
1056 		 * Check whether we are the target of another
1057 		 * thread having called ufs_thread_exit() or
1058 		 * ufs_thread_suspend().
1059 		 */
1060 		mutex_enter(&uq->uq_mutex);
1061 again:
1062 		if (uq->uq_flags & UQ_EXIT) {
1063 			err++;
1064 			mutex_exit(&uq->uq_mutex);
1065 			break;
1066 		} else if (uq->uq_flags & UQ_SUSPEND) {
1067 			uq->uq_flags |= UQ_SUSPENDED;
1068 			/*
1069 			 * Release the buf before we cv_wait()
1070 			 * otherwise we may deadlock with the
1071 			 * thread that called ufs_thread_suspend().
1072 			 */
1073 			if (bp) {
1074 				brelse(bp);
1075 				bp = 0;
1076 			}
1077 			if (uq->uq_flags & UQ_WAIT) {
1078 				uq->uq_flags &= ~UQ_WAIT;
1079 				cv_broadcast(&uq->uq_cv);
1080 			}
1081 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1082 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
1083 			CALLB_CPR_SAFE_END(&cprinfo, &uq->uq_mutex);
1084 			goto again;
1085 		}
1086 		mutex_exit(&uq->uq_mutex);
1087 
1088 		/*
1089 		 * if we don't already have the buf; get it
1090 		 */
1091 		bno = fsbtodb(fs, itod(fs, ino));
1092 		if ((bp == 0) || (bp->b_blkno != bno)) {
1093 			if (bp)
1094 				brelse(bp);
1095 			bp = UFS_BREAD(ufsvfsp,
1096 					ufsvfsp->vfs_dev, bno, fs->fs_bsize);
1097 			bp->b_flags |= B_AGE;
1098 		}
1099 		if (bp->b_flags & B_ERROR) {
1100 			err++;
1101 			continue;
1102 		}
1103 		/*
1104 		 * nlink <= 0 and mode != 0 means deleted
1105 		 */
1106 		dp = (struct dinode *)bp->b_un.b_addr + itoo(fs, ino);
1107 		if ((dp->di_nlink <= 0) && (dp->di_mode != 0)) {
1108 			/*
1109 			 * can't hold the buf (deadlock)
1110 			 */
1111 			brelse(bp);
1112 			bp = 0;
1113 			rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1114 			/*
1115 			 * iget/iput sequence will put inode on ifree
1116 			 * thread queue if it is idle.  This is a nop
1117 			 * for busy (open, deleted) inodes
1118 			 */
1119 			if (ufs_iget(vfsp, ino, &ip, CRED()))
1120 				err++;
1121 			else
1122 				VN_RELE(ITOV(ip));
1123 			rw_exit(&ufsvfsp->vfs_dqrwlock);
1124 		}
1125 	}
1126 
1127 	if (bp)
1128 		brelse(bp);
1129 	if (!err) {
1130 		/*
1131 		 * reset the reclaiming-bit
1132 		 */
1133 		mutex_enter(&ufsvfsp->vfs_lock);
1134 		fs->fs_reclaim &= ~FS_RECLAIMING;
1135 		mutex_exit(&ufsvfsp->vfs_lock);
1136 		TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_RECLAIM);
1137 	}
1138 
1139 	/*
1140 	 * exit the reclaim thread
1141 	 */
1142 	mutex_enter(&uq->uq_mutex);
1143 	uq->uq_threadp = NULL;
1144 	uq->uq_flags &= ~UQ_WAIT;
1145 	cv_broadcast(&uq->uq_cv);
1146 	CALLB_CPR_EXIT(&cprinfo);
1147 	thread_exit();
1148 }
1149 /*
1150  * HLOCK FILE SYSTEM
1151  *	hlock the file system's whose logs have device errors
1152  */
1153 struct ufs_q	ufs_hlock;
1154 /*ARGSUSED*/
1155 void
1156 ufs_thread_hlock(void *ignore)
1157 {
1158 	int		retry;
1159 	callb_cpr_t	cprinfo;
1160 
1161 	CALLB_CPR_INIT(&cprinfo, &ufs_hlock.uq_mutex, callb_generic_cpr,
1162 	    "ufshlock");
1163 
1164 	for (;;) {
1165 		/*
1166 		 * sleep until there is work to do
1167 		 */
1168 		mutex_enter(&ufs_hlock.uq_mutex);
1169 		(void) ufs_thread_run(&ufs_hlock, &cprinfo);
1170 		ufs_hlock.uq_ne = 0;
1171 		mutex_exit(&ufs_hlock.uq_mutex);
1172 		/*
1173 		 * hlock the error'ed fs's
1174 		 *	retry after a bit if another app is doing lockfs stuff
1175 		 */
1176 		do {
1177 			retry = ufs_trans_hlock();
1178 			if (retry) {
1179 				mutex_enter(&ufs_hlock.uq_mutex);
1180 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
1181 				(void) cv_timedwait(&ufs_hlock.uq_cv,
1182 							&ufs_hlock.uq_mutex,
1183 							lbolt + hz);
1184 				CALLB_CPR_SAFE_END(&cprinfo,
1185 				    &ufs_hlock.uq_mutex);
1186 				mutex_exit(&ufs_hlock.uq_mutex);
1187 			}
1188 		} while (retry);
1189 	}
1190 }
1191 
1192 static void
1193 ufs_attr_purge(struct inode *dp)
1194 {
1195 	int	err;
1196 	int	error;
1197 	off_t 	dirsize;			/* size of the directory */
1198 	off_t 	offset;	/* offset in the directory */
1199 	int entryoffsetinblk;		/* offset of ep in fbp's buffer */
1200 	struct inode *tp;
1201 	struct fbuf *fbp;	/* pointer to directory block */
1202 	struct direct *ep;	/* directory entry */
1203 	int trans_size;
1204 	int issync;
1205 	struct ufsvfs	*ufsvfsp = dp->i_ufsvfs;
1206 
1207 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1208 
1209 	fbp = NULL;
1210 	dirsize = roundup(dp->i_size, DIRBLKSIZ);
1211 	offset = 0;
1212 	entryoffsetinblk = 0;
1213 
1214 	/*
1215 	 * Purge directory cache
1216 	 */
1217 
1218 	dnlc_dir_purge(&dp->i_danchor);
1219 
1220 	while (offset < dirsize) {
1221 		/*
1222 		 * If offset is on a block boundary,
1223 		 * read the next directory block.
1224 		 * Release previous if it exists.
1225 		 */
1226 		if (blkoff(dp->i_fs, offset) == 0) {
1227 			if (fbp != NULL) {
1228 				fbrelse(fbp, S_OTHER);
1229 			}
1230 
1231 			err = blkatoff(dp, offset, (char **)0, &fbp);
1232 			if (err) {
1233 				goto out;
1234 			}
1235 			entryoffsetinblk = 0;
1236 		}
1237 		ep = (struct direct *)(fbp->fb_addr + entryoffsetinblk);
1238 		if (ep->d_ino == 0 || (ep->d_name[0] == '.' &&
1239 		    ep->d_name[1] == '\0') ||
1240 		    (ep->d_name[0] == '.' && ep->d_name[1] == '.' &&
1241 		    ep->d_name[2] == '\0')) {
1242 
1243 			entryoffsetinblk += ep->d_reclen;
1244 
1245 		} else {
1246 
1247 			if ((err = ufs_iget(dp->i_vfs, ep->d_ino,
1248 			    &tp, CRED())) != 0) {
1249 				goto out;
1250 			}
1251 
1252 			TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_REMOVE,
1253 			    trans_size = (int)TOP_REMOVE_SIZE(tp));
1254 
1255 			/*
1256 			 * Delete inode.
1257 			 */
1258 
1259 			dnlc_remove(ITOV(dp), ep->d_name);
1260 
1261 			rw_enter(&tp->i_contents, RW_WRITER);
1262 			tp->i_flag |= ICHG;
1263 			tp->i_seq++;
1264 			TRANS_INODE(tp->i_ufsvfs, tp);
1265 			tp->i_nlink--;
1266 			ufs_setreclaim(tp);
1267 			ITIMES_NOLOCK(tp);
1268 			rw_exit(&tp->i_contents);
1269 
1270 			VN_RELE(ITOV(tp));
1271 			entryoffsetinblk += ep->d_reclen;
1272 			TRANS_END_CSYNC(ufsvfsp, error,
1273 			    issync, TOP_REMOVE, trans_size);
1274 
1275 		}
1276 		offset += ep->d_reclen;
1277 	}
1278 
1279 	if (fbp) {
1280 		fbrelse(fbp, S_OTHER);
1281 	}
1282 
1283 out:
1284 	rw_exit(&ufsvfsp->vfs_dqrwlock);
1285 }
1286