xref: /titanic_41/usr/src/uts/common/fs/ufs/ufs_thread.c (revision d3d50737e566cade9a08d73d2af95105ac7cd960)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * Portions of this source code were derived from Berkeley 4.3 BSD
31  * under license from the Regents of the University of California.
32  */
33 
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kmem.h>
38 #include <sys/buf.h>
39 #include <sys/vnode.h>
40 #include <sys/vfs.h>
41 #include <sys/user.h>
42 #include <sys/callb.h>
43 #include <sys/cpuvar.h>
44 #include <sys/fs/ufs_inode.h>
45 #include <sys/fs/ufs_log.h>
46 #include <sys/fs/ufs_trans.h>
47 #include <sys/fs/ufs_acl.h>
48 #include <sys/fs/ufs_bio.h>
49 #include <sys/fs/ufs_fsdir.h>
50 #include <sys/debug.h>
51 #include <sys/cmn_err.h>
52 #include <sys/sysmacros.h>
53 #include <vm/pvn.h>
54 
55 extern pri_t 			minclsyspri;
56 extern int			hash2ints();
57 extern struct kmem_cache	*inode_cache;	/* cache of free inodes */
58 extern int			ufs_idle_waiters;
59 extern struct instats		ins;
60 
61 static void ufs_attr_purge(struct inode *);
62 
63 /*
64  * initialize a thread's queue struct
65  */
66 void
ufs_thread_init(struct ufs_q * uq,int lowat)67 ufs_thread_init(struct ufs_q *uq, int lowat)
68 {
69 	bzero((caddr_t)uq, sizeof (*uq));
70 	cv_init(&uq->uq_cv, NULL, CV_DEFAULT, NULL);
71 	mutex_init(&uq->uq_mutex, NULL, MUTEX_DEFAULT, NULL);
72 	uq->uq_lowat = lowat;
73 	uq->uq_hiwat = 2 * lowat;
74 	uq->uq_threadp = NULL;
75 }
76 
77 /*
78  * start a thread for a queue (assumes success)
79  */
80 void
ufs_thread_start(struct ufs_q * uq,void (* func)(),struct vfs * vfsp)81 ufs_thread_start(struct ufs_q *uq, void (*func)(), struct vfs *vfsp)
82 {
83 	mutex_enter(&uq->uq_mutex);
84 	if (uq->uq_threadp == NULL) {
85 		uq->uq_threadp = thread_create(NULL, 0, func, vfsp, 0, &p0,
86 		    TS_RUN, minclsyspri);
87 		uq->uq_flags = 0;
88 	}
89 	mutex_exit(&uq->uq_mutex);
90 }
91 
92 /*
93  * wait for the thread to exit
94  */
95 void
ufs_thread_exit(struct ufs_q * uq)96 ufs_thread_exit(struct ufs_q *uq)
97 {
98 	kt_did_t ufs_thread_did = 0;
99 
100 	mutex_enter(&uq->uq_mutex);
101 	uq->uq_flags &= ~(UQ_SUSPEND | UQ_SUSPENDED);
102 	if (uq->uq_threadp != NULL) {
103 		ufs_thread_did = uq->uq_threadp->t_did;
104 		uq->uq_flags |= (UQ_EXIT|UQ_WAIT);
105 		cv_broadcast(&uq->uq_cv);
106 	}
107 	mutex_exit(&uq->uq_mutex);
108 
109 	/*
110 	 * It's safe to call thread_join() with an already-gone
111 	 * t_did, but we have to obtain it before the kernel
112 	 * thread structure is freed. We do so above under the
113 	 * protection of the uq_mutex when we're sure the thread
114 	 * still exists and it's save to de-reference it.
115 	 * We also have to check if ufs_thread_did is != 0
116 	 * before calling thread_join() since thread 0 in the system
117 	 * gets a t_did of 0.
118 	 */
119 	if (ufs_thread_did)
120 		thread_join(ufs_thread_did);
121 }
122 
123 /*
124  * wait for a thread to suspend itself on the caller's behalf
125  *	the caller is responsible for continuing the thread
126  */
127 void
ufs_thread_suspend(struct ufs_q * uq)128 ufs_thread_suspend(struct ufs_q *uq)
129 {
130 	mutex_enter(&uq->uq_mutex);
131 	if (uq->uq_threadp != NULL) {
132 		/*
133 		 * wait while another thread is suspending this thread.
134 		 * no need to do a cv_broadcast(), as whoever suspended
135 		 * the thread must continue it at some point.
136 		 */
137 		while ((uq->uq_flags & UQ_SUSPEND) &&
138 		    (uq->uq_threadp != NULL)) {
139 			/*
140 			 * We can't use cv_signal() because if our
141 			 * signal doesn't happen to hit the desired
142 			 * thread but instead some other waiter like
143 			 * ourselves, we'll wait forever for a
144 			 * response.  Well, at least an indeterminate
145 			 * amount of time until we just happen to get
146 			 * lucky from whomever did get signalled doing
147 			 * a cv_signal() of their own.  This is an
148 			 * unfortunate performance lossage.
149 			 */
150 			uq->uq_flags |= UQ_WAIT;
151 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
152 		}
153 
154 		uq->uq_flags |= (UQ_SUSPEND | UQ_WAIT);
155 
156 		/*
157 		 * wait for the thread to suspend itself
158 		 */
159 		if ((uq->uq_flags & UQ_SUSPENDED) == 0 &&
160 		    (uq->uq_threadp != NULL)) {
161 			cv_broadcast(&uq->uq_cv);
162 		}
163 
164 		while (((uq->uq_flags & UQ_SUSPENDED) == 0) &&
165 		    (uq->uq_threadp != NULL)) {
166 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
167 		}
168 	}
169 	mutex_exit(&uq->uq_mutex);
170 }
171 
172 /*
173  * allow a thread to continue from a ufs_thread_suspend()
174  *	This thread must be the same as the thread that called
175  *	ufs_thread_suspend.
176  */
177 void
ufs_thread_continue(struct ufs_q * uq)178 ufs_thread_continue(struct ufs_q *uq)
179 {
180 	mutex_enter(&uq->uq_mutex);
181 	uq->uq_flags &= ~(UQ_SUSPEND | UQ_SUSPENDED);
182 	cv_broadcast(&uq->uq_cv);
183 	mutex_exit(&uq->uq_mutex);
184 }
185 
186 /*
187  * some common code for managing a threads execution
188  *	uq is locked at entry and return
189  *	may sleep
190  *	may exit
191  */
192 /*
193  * Kind of a hack passing in the callb_cpr_t * here.
194  * It should really be part of the ufs_q structure.
195  * I did not put it in there because we are already in beta
196  * and I was concerned that changing ufs_inode.h to include
197  * callb.h might break something.
198  */
199 int
ufs_thread_run(struct ufs_q * uq,callb_cpr_t * cprinfop)200 ufs_thread_run(struct ufs_q *uq, callb_cpr_t *cprinfop)
201 {
202 again:
203 	ASSERT(uq->uq_ne >= 0);
204 
205 	if (uq->uq_flags & UQ_SUSPEND) {
206 		uq->uq_flags |= UQ_SUSPENDED;
207 	} else if (uq->uq_flags & UQ_EXIT) {
208 		/*
209 		 * exiting; empty the queue (may infinite loop)
210 		 */
211 		if (uq->uq_ne)
212 			return (uq->uq_ne);
213 		uq->uq_threadp = NULL;
214 		if (uq->uq_flags & UQ_WAIT) {
215 			cv_broadcast(&uq->uq_cv);
216 		}
217 		uq->uq_flags &= ~(UQ_EXIT | UQ_WAIT);
218 		CALLB_CPR_EXIT(cprinfop);
219 		thread_exit();
220 	} else if (uq->uq_ne >= uq->uq_lowat) {
221 		/*
222 		 * process a block of entries until below high water mark
223 		 */
224 		return (uq->uq_ne - (uq->uq_lowat >> 1));
225 	}
226 	if (uq->uq_flags & UQ_WAIT) {
227 		uq->uq_flags &= ~UQ_WAIT;
228 		cv_broadcast(&uq->uq_cv);
229 	}
230 	CALLB_CPR_SAFE_BEGIN(cprinfop);
231 	cv_wait(&uq->uq_cv, &uq->uq_mutex);
232 	CALLB_CPR_SAFE_END(cprinfop, &uq->uq_mutex);
233 	goto again;
234 }
235 
236 /*
237  * DELETE INODE
238  * The following routines implement the protocol for freeing the resources
239  * held by an idle and deleted inode.
240  */
241 void
ufs_delete(struct ufsvfs * ufsvfsp,struct inode * ip,int dolockfs)242 ufs_delete(struct ufsvfs *ufsvfsp, struct inode *ip, int dolockfs)
243 {
244 	ushort_t	mode;
245 	struct vnode	*vp	= ITOV(ip);
246 	struct ulockfs	*ulp;
247 	int		trans_size;
248 	int		dorwlock = ((ip->i_mode & IFMT) == IFREG);
249 	int		issync;
250 	int		err;
251 	struct inode	*dp;
252 	struct ufs_q    *delq = &ufsvfsp->vfs_delete;
253 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
254 
255 	/*
256 	 * Ignore if deletes are not allowed (wlock/hlock)
257 	 */
258 	if (ULOCKFS_IS_NOIDEL(ITOUL(ip))) {
259 		mutex_enter(&delq->uq_mutex);
260 		delq_info->delq_unreclaimed_blocks -= ip->i_blocks;
261 		delq_info->delq_unreclaimed_files--;
262 		mutex_exit(&delq->uq_mutex);
263 		VN_RELE(vp);
264 		return;
265 	}
266 
267 	if ((vp->v_count > 1) || (ip->i_mode == 0)) {
268 		mutex_enter(&delq->uq_mutex);
269 		delq_info->delq_unreclaimed_blocks -= ip->i_blocks;
270 		delq_info->delq_unreclaimed_files--;
271 		mutex_exit(&delq->uq_mutex);
272 		VN_RELE(vp);
273 		return;
274 	}
275 	/*
276 	 * If we are called as part of setting a fs lock, then only
277 	 * do part of the lockfs protocol.  In other words, don't hang.
278 	 */
279 	if (dolockfs) {
280 		if (ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_DELETE_MASK))
281 			return;
282 	} else {
283 		/*
284 		 * check for recursive VOP call
285 		 */
286 		if (curthread->t_flag & T_DONTBLOCK) {
287 			ulp = NULL;
288 		} else {
289 			ulp = &ufsvfsp->vfs_ulockfs;
290 			curthread->t_flag |= T_DONTBLOCK;
291 		}
292 	}
293 
294 	/*
295 	 * Hold rwlock to synchronize with (nfs) writes
296 	 */
297 	if (dorwlock)
298 		rw_enter(&ip->i_rwlock, RW_WRITER);
299 
300 	/*
301 	 * Delete the attribute directory.
302 	 */
303 	if (ip->i_oeftflag != 0) {
304 		TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_REMOVE,
305 		    trans_size = (int)TOP_REMOVE_SIZE(ip));
306 		rw_enter(&ip->i_contents, RW_WRITER);
307 		err = ufs_iget(ip->i_vfs, ip->i_oeftflag,
308 		    &dp, CRED());
309 		if (err == 0) {
310 			rw_enter(&dp->i_rwlock, RW_WRITER);
311 			rw_enter(&dp->i_contents, RW_WRITER);
312 			dp->i_flag |= IUPD|ICHG;
313 			dp->i_seq++;
314 			TRANS_INODE(dp->i_ufsvfs, dp);
315 			dp->i_nlink -= 2;
316 			ufs_setreclaim(dp);
317 			/*
318 			 * Should get rid of any negative cache entries that
319 			 * might be lingering, as well as ``.'' and
320 			 * ``..''.  If we don't, the VN_RELE() below
321 			 * won't actually put dp on the delete queue
322 			 * and it'll hang out until someone forces it
323 			 * (lockfs -f, umount, ...).  The only reliable
324 			 * way of doing this at the moment is to call
325 			 * dnlc_purge_vp(ITOV(dp)), which is unacceptably
326 			 * slow, so we'll just note the problem in this
327 			 * comment for now.
328 			 */
329 			dnlc_remove(ITOV(dp), ".");
330 			dnlc_remove(ITOV(dp), "..");
331 			ITIMES_NOLOCK(dp);
332 			if (!TRANS_ISTRANS(ufsvfsp)) {
333 				ufs_iupdat(dp, I_SYNC);
334 			}
335 			rw_exit(&dp->i_contents);
336 			rw_exit(&dp->i_rwlock);
337 			VN_RELE(ITOV(dp));
338 		}
339 		/*
340 		 * Clear out attribute pointer
341 		 */
342 		ip->i_oeftflag = 0;
343 		rw_exit(&ip->i_contents);
344 		TRANS_END_CSYNC(ufsvfsp, err, issync,
345 		    TOP_REMOVE, trans_size);
346 		dnlc_remove(ITOV(ip), XATTR_DIR_NAME);
347 	}
348 
349 	if ((ip->i_mode & IFMT) == IFATTRDIR) {
350 		ufs_attr_purge(ip);
351 	}
352 
353 	(void) TRANS_ITRUNC(ip, (u_offset_t)0, I_FREE | I_ACCT, CRED());
354 
355 	/*
356 	 * the inode's space has been freed; now free the inode
357 	 */
358 	if (ulp) {
359 		trans_size = TOP_IFREE_SIZE(ip);
360 		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_IFREE, trans_size);
361 	}
362 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
363 	rw_enter(&ip->i_contents, RW_WRITER);
364 	TRANS_INODE(ufsvfsp, ip);
365 	mode = ip->i_mode;
366 	ip->i_mode = 0;
367 	ip->i_rdev = 0;
368 	ip->i_ordev = 0;
369 	ip->i_flag |= IMOD;
370 	if (ip->i_ufs_acl) {
371 		(void) ufs_si_free(ip->i_ufs_acl, vp->v_vfsp, CRED());
372 		ip->i_ufs_acl = NULL;
373 		ip->i_shadow = 0;
374 	}
375 
376 	/*
377 	 * This inode is torn down but still retains it's identity
378 	 * (inode number).  It could get recycled soon so it's best
379 	 * to clean up the vnode just in case.
380 	 */
381 	mutex_enter(&vp->v_lock);
382 	vn_recycle(vp);
383 	mutex_exit(&vp->v_lock);
384 
385 	/*
386 	 * free the inode
387 	 */
388 	ufs_ifree(ip, ip->i_number, mode);
389 	/*
390 	 * release quota resources; can't fail
391 	 */
392 	(void) chkiq((struct ufsvfs *)vp->v_vfsp->vfs_data,
393 	    /* change */ -1, ip, (uid_t)ip->i_uid, 0, CRED(),
394 	    (char **)NULL, (size_t *)NULL);
395 	dqrele(ip->i_dquot);
396 	ip->i_dquot = NULL;
397 	ip->i_flag &= ~(IDEL | IDIRECTIO);
398 	ip->i_cflags = 0;
399 	if (!TRANS_ISTRANS(ufsvfsp)) {
400 		ufs_iupdat(ip, I_SYNC);
401 	} else {
402 		mutex_enter(&delq->uq_mutex);
403 		delq_info->delq_unreclaimed_files--;
404 		mutex_exit(&delq->uq_mutex);
405 	}
406 	rw_exit(&ip->i_contents);
407 	rw_exit(&ufsvfsp->vfs_dqrwlock);
408 	if (dorwlock)
409 		rw_exit(&ip->i_rwlock);
410 	VN_RELE(vp);
411 
412 	/*
413 	 * End of transaction
414 	 */
415 	if (ulp) {
416 		TRANS_END_ASYNC(ufsvfsp, TOP_IFREE, trans_size);
417 		if (dolockfs)
418 			ufs_lockfs_end(ulp);
419 		else
420 			curthread->t_flag &= ~T_DONTBLOCK;
421 	}
422 }
423 
424 /*
425  * Create the delete thread and init the delq_info for this fs
426  */
427 void
ufs_delete_init(struct ufsvfs * ufsvfsp,int lowat)428 ufs_delete_init(struct ufsvfs *ufsvfsp, int lowat)
429 {
430 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
431 
432 	ufs_thread_init(&ufsvfsp->vfs_delete, lowat);
433 	(void) memset((void *)delq_info, 0, sizeof (*delq_info));
434 }
435 
436 /*
437  * thread that frees up deleted inodes
438  */
439 void
ufs_thread_delete(struct vfs * vfsp)440 ufs_thread_delete(struct vfs *vfsp)
441 {
442 	struct ufsvfs	*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
443 	struct ufs_q	*uq = &ufsvfsp->vfs_delete;
444 	struct inode	*ip;
445 	long		ne;
446 	callb_cpr_t	cprinfo;
447 
448 	CALLB_CPR_INIT(&cprinfo, &uq->uq_mutex, callb_generic_cpr,
449 	    "ufsdelete");
450 
451 	mutex_enter(&uq->uq_mutex);
452 again:
453 	/*
454 	 * Sleep until there is work to do.  Only do one entry at
455 	 * a time, to reduce the wait time for checking for a suspend
456 	 * request.  The ?: is for pedantic portability.
457 	 */
458 	ne = ufs_thread_run(uq, &cprinfo) ? 1 : 0;
459 
460 	/*
461 	 * process an entry, if there are any
462 	 */
463 	if (ne && (ip = uq->uq_ihead)) {
464 		/*
465 		 * process first entry on queue.  Assumed conditions are:
466 		 *	ip is held (v_count >= 1)
467 		 *	ip is referenced (i_flag & IREF)
468 		 *	ip is free (i_nlink <= 0)
469 		 */
470 		if ((uq->uq_ihead = ip->i_freef) == ip)
471 			uq->uq_ihead = NULL;
472 		ip->i_freef->i_freeb = ip->i_freeb;
473 		ip->i_freeb->i_freef = ip->i_freef;
474 		ip->i_freef = ip;
475 		ip->i_freeb = ip;
476 		uq->uq_ne--;
477 		mutex_exit(&uq->uq_mutex);
478 		ufs_delete(ufsvfsp, ip, 1);
479 		mutex_enter(&uq->uq_mutex);
480 	}
481 	goto again;
482 }
483 
484 /*
485  * drain ne entries off the delete queue.  As new queue entries may
486  * be added while we're working, ne is interpreted as follows:
487  *
488  * ne > 0   => remove up to ne entries
489  * ne == 0  => remove all entries currently on the queue
490  * ne == -1 => remove entries until the queue is empty
491  */
492 void
ufs_delete_drain(struct vfs * vfsp,int ne,int dolockfs)493 ufs_delete_drain(struct vfs *vfsp, int ne, int dolockfs)
494 {
495 	struct ufsvfs	*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
496 	struct ufs_q	*uq;
497 	struct inode	*ip;
498 	int		drain_cnt = 0;
499 	int		done;
500 
501 	/*
502 	 * if forcibly unmounted; ignore
503 	 */
504 	if (ufsvfsp == NULL)
505 		return;
506 
507 	uq = &ufsvfsp->vfs_delete;
508 	mutex_enter(&uq->uq_mutex);
509 	if (ne == 0)
510 		drain_cnt = uq->uq_ne;
511 	else if (ne > 0)
512 		drain_cnt = ne;
513 
514 	/*
515 	 * process up to ne entries
516 	 */
517 
518 	done = 0;
519 	while (!done && (ip = uq->uq_ihead)) {
520 		if (ne != -1)
521 			drain_cnt--;
522 		if (ne != -1 && drain_cnt == 0)
523 			done = 1;
524 		if ((uq->uq_ihead = ip->i_freef) == ip)
525 			uq->uq_ihead = NULL;
526 		ip->i_freef->i_freeb = ip->i_freeb;
527 		ip->i_freeb->i_freef = ip->i_freef;
528 		ip->i_freef = ip;
529 		ip->i_freeb = ip;
530 		uq->uq_ne--;
531 		mutex_exit(&uq->uq_mutex);
532 		ufs_delete(ufsvfsp, ip, dolockfs);
533 		mutex_enter(&uq->uq_mutex);
534 	}
535 	mutex_exit(&uq->uq_mutex);
536 }
537 
538 void
ufs_sync_with_thread(struct ufs_q * uq)539 ufs_sync_with_thread(struct ufs_q *uq)
540 {
541 	mutex_enter(&uq->uq_mutex);
542 
543 	/*
544 	 * Wake up delete thread to free up space.
545 	 */
546 	if ((uq->uq_flags & UQ_WAIT) == 0) {
547 		uq->uq_flags |= UQ_WAIT;
548 		cv_broadcast(&uq->uq_cv);
549 	}
550 
551 	while ((uq->uq_threadp != NULL) && (uq->uq_flags & UQ_WAIT)) {
552 		cv_wait(&uq->uq_cv, &uq->uq_mutex);
553 	}
554 
555 	mutex_exit(&uq->uq_mutex);
556 }
557 
558 /*
559  * Get rid of everything that's currently in the delete queue,
560  * plus whatever the delete thread is working on at the moment.
561  *
562  * This ability is required for providing true POSIX semantics
563  * regarding close(2), unlink(2), etc, even when logging is enabled.
564  * The standard requires that the released space be immediately
565  * observable (statvfs(2)) and allocatable (e.g., write(2)).
566  */
567 void
ufs_delete_drain_wait(struct ufsvfs * ufsvfsp,int dolockfs)568 ufs_delete_drain_wait(struct ufsvfs *ufsvfsp, int dolockfs)
569 {
570 	struct ufs_q *uq = &ufsvfsp->vfs_delete;
571 	int	error;
572 	struct ufs_q    *delq = &ufsvfsp->vfs_delete;
573 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
574 
575 	/*
576 	 * If there is something on delq or delete thread
577 	 * working on delq.
578 	 */
579 	mutex_enter(&delq->uq_mutex);
580 	if (delq_info->delq_unreclaimed_files > 0) {
581 		mutex_exit(&delq->uq_mutex);
582 		(void) ufs_delete_drain(ufsvfsp->vfs_vfs, 0, dolockfs);
583 		ufs_sync_with_thread(uq);
584 	} else {
585 		ASSERT(delq_info->delq_unreclaimed_files == 0);
586 		mutex_exit(&delq->uq_mutex);
587 		return;
588 	}
589 
590 	/*
591 	 * Commit any outstanding transactions to make sure
592 	 * any canceled freed blocks are available for allocation.
593 	 */
594 	curthread->t_flag |= T_DONTBLOCK;
595 	TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UPDATE, TOP_COMMIT_SIZE, error);
596 	if (!error) {
597 		TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UPDATE,
598 		    TOP_COMMIT_SIZE);
599 	}
600 	curthread->t_flag &= ~T_DONTBLOCK;
601 }
602 
603 /*
604  * Adjust the resource usage in a struct statvfs based on
605  * what's in the delete queue.
606  *
607  * We do not consider the impact of ACLs or extended attributes
608  * that may be deleted as a side-effect of deleting a file.
609  * Those are metadata, and their sizes aren't reflected in the
610  * sizes returned by stat(), so this is not a problem.
611  */
612 void
ufs_delete_adjust_stats(struct ufsvfs * ufsvfsp,struct statvfs64 * sp)613 ufs_delete_adjust_stats(struct ufsvfs *ufsvfsp, struct statvfs64 *sp)
614 {
615 	struct ufs_q *uq = &ufsvfsp->vfs_delete;
616 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
617 
618 	mutex_enter(&uq->uq_mutex);
619 	/*
620 	 * The blocks accounted for in the delete queue info are
621 	 * counted in DEV_BSIZE chunks, but ufs_statvfs counts in
622 	 * filesystem fragments, so a conversion is required here.
623 	 */
624 	sp->f_bfree += dbtofsb(ufsvfsp->vfs_fs,
625 	    delq_info->delq_unreclaimed_blocks);
626 	sp->f_ffree += delq_info->delq_unreclaimed_files;
627 	mutex_exit(&uq->uq_mutex);
628 }
629 
630 /*
631  * IDLE INODE
632  * The following routines implement the protocol for maintaining an
633  * LRU list of idle inodes and for moving the idle inodes to the
634  * reuse list when the number of allocated inodes exceeds the user
635  * tunable high-water mark (ufs_ninode).
636  */
637 
638 /*
639  * clean an idle inode and move it to the reuse list
640  */
641 static void
ufs_idle_free(struct inode * ip)642 ufs_idle_free(struct inode *ip)
643 {
644 	int			pages;
645 	int			hno;
646 	kmutex_t		*ihm;
647 	struct ufsvfs		*ufsvfsp	= ip->i_ufsvfs;
648 	struct vnode		*vp		= ITOV(ip);
649 	int			vn_has_data, vn_modified;
650 
651 	/*
652 	 * inode is held
653 	 */
654 
655 	/*
656 	 * remember `pages' for stats below
657 	 */
658 	pages = (ip->i_mode && vn_has_cached_data(vp) && vp->v_type != VCHR);
659 
660 	/*
661 	 * start the dirty pages to disk and then invalidate them
662 	 * unless the inode is invalid (ISTALE)
663 	 */
664 	if ((ip->i_flag & ISTALE) == 0) {
665 		(void) TRANS_SYNCIP(ip, B_ASYNC, I_ASYNC, TOP_SYNCIP_FREE);
666 		(void) TRANS_SYNCIP(ip,
667 		    (TRANS_ISERROR(ufsvfsp)) ? B_INVAL | B_FORCE : B_INVAL,
668 		    I_ASYNC, TOP_SYNCIP_FREE);
669 	}
670 
671 	/*
672 	 * wait for any current ufs_iget to finish and block future ufs_igets
673 	 */
674 	ASSERT(ip->i_number != 0);
675 	hno = INOHASH(ip->i_number);
676 	ihm = &ih_lock[hno];
677 	mutex_enter(ihm);
678 
679 	/*
680 	 * It must be guaranteed that v_count >= 2, otherwise
681 	 * something must be wrong with this vnode already.
682 	 * That is why we use v_count-- instead of VN_RELE().
683 	 * Acquire the vnode lock in case another thread is in
684 	 * VN_RELE().
685 	 */
686 	mutex_enter(&vp->v_lock);
687 
688 	if (vp->v_count < 2)
689 		cmn_err(CE_PANIC,
690 		    "ufs_idle_free: vnode ref count is less than 2");
691 
692 	vp->v_count--;
693 
694 	vn_has_data = (vp->v_type != VCHR && vn_has_cached_data(vp));
695 	vn_modified = (ip->i_flag & (IMOD|IMODACC|IACC|ICHG|IUPD|IATTCHG));
696 
697 	if (vp->v_count != 1 ||
698 	    ((vn_has_data || vn_modified) &&
699 	    ((ip->i_flag & ISTALE) == 0))) {
700 		/*
701 		 * Another thread has referenced this inode while
702 		 * we are trying  to free  it.  Call VN_RELE() to
703 		 * release our reference, if v_count > 1  data is
704 		 * present  or one of the modified etc. flags was
705 		 * set, whereby ISTALE wasn't set.
706 		 * If we'd proceed with ISTALE set here, we might
707 		 * get ourselves into a deadlock situation.
708 		 */
709 		mutex_exit(&vp->v_lock);
710 		mutex_exit(ihm);
711 		VN_RELE(vp);
712 	} else {
713 		/*
714 		 * The inode is currently unreferenced and can not
715 		 * acquire further references because it has no pages
716 		 * and the hash is locked.  Inodes acquire references
717 		 * via the hash list or via their pages.
718 		 */
719 
720 		mutex_exit(&vp->v_lock);
721 
722 		/*
723 		 * remove it from the cache
724 		 */
725 		remque(ip);
726 		mutex_exit(ihm);
727 		/*
728 		 * Stale inodes have no valid ufsvfs
729 		 */
730 		if ((ip->i_flag & ISTALE) == 0 && ip->i_dquot) {
731 			TRANS_DQRELE(ufsvfsp, ip->i_dquot);
732 			ip->i_dquot = NULL;
733 		}
734 		if ((ip->i_flag & ISTALE) &&
735 		    vn_has_data) {
736 			/*
737 			 * ISTALE inodes may have data
738 			 * and  this data needs  to be
739 			 * cleaned up.
740 			 */
741 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
742 			    ufs_putapage, B_INVAL | B_TRUNC,
743 			    (struct cred *)NULL);
744 		}
745 		ufs_si_del(ip);
746 		if (pages) {
747 			CPU_STATS_ADDQ(CPU, sys, ufsipage, 1);
748 		} else {
749 			CPU_STATS_ADDQ(CPU, sys, ufsinopage, 1);
750 		}
751 		ASSERT((vp->v_type == VCHR) || !vn_has_cached_data(vp));
752 
753 		/*
754 		 * We had better not have a vnode reference count > 1
755 		 * at this point, if we do then something is broken as
756 		 * this inode/vnode acquired a reference underneath of us.
757 		 */
758 		ASSERT(vp->v_count == 1);
759 
760 		ufs_free_inode(ip);
761 	}
762 }
763 
764 /*
765  * this thread processes the global idle queue
766  */
767 iqhead_t *ufs_junk_iq;
768 iqhead_t *ufs_useful_iq;
769 int ufs_njunk_iq = 0;
770 int ufs_nuseful_iq = 0;
771 int ufs_niqhash;
772 int ufs_iqhashmask;
773 struct ufs_q	ufs_idle_q;
774 
775 void
ufs_thread_idle(void)776 ufs_thread_idle(void)
777 {
778 	callb_cpr_t cprinfo;
779 	int i;
780 	int ne;
781 
782 	ufs_niqhash = (ufs_idle_q.uq_lowat >> 1) / IQHASHQLEN;
783 	ufs_niqhash = 1 << highbit(ufs_niqhash); /* round up to power of 2 */
784 	ufs_iqhashmask = ufs_niqhash - 1;
785 	ufs_junk_iq = kmem_alloc(ufs_niqhash * sizeof (*ufs_junk_iq),
786 	    KM_SLEEP);
787 	ufs_useful_iq = kmem_alloc(ufs_niqhash * sizeof (*ufs_useful_iq),
788 	    KM_SLEEP);
789 
790 	/* Initialize hash queue headers */
791 	for (i = 0; i < ufs_niqhash; i++) {
792 		ufs_junk_iq[i].i_freef = (inode_t *)&ufs_junk_iq[i];
793 		ufs_junk_iq[i].i_freeb = (inode_t *)&ufs_junk_iq[i];
794 		ufs_useful_iq[i].i_freef = (inode_t *)&ufs_useful_iq[i];
795 		ufs_useful_iq[i].i_freeb = (inode_t *)&ufs_useful_iq[i];
796 	}
797 
798 	CALLB_CPR_INIT(&cprinfo, &ufs_idle_q.uq_mutex, callb_generic_cpr,
799 	    "ufsidle");
800 again:
801 	/*
802 	 * Whenever the idle thread is awakened, it repeatedly gives
803 	 * back half of the idle queue until the idle queue falls
804 	 * below lowat.
805 	 */
806 	mutex_enter(&ufs_idle_q.uq_mutex);
807 	if (ufs_idle_q.uq_ne < ufs_idle_q.uq_lowat) {
808 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
809 		cv_wait(&ufs_idle_q.uq_cv, &ufs_idle_q.uq_mutex);
810 		CALLB_CPR_SAFE_END(&cprinfo, &ufs_idle_q.uq_mutex);
811 	}
812 	mutex_exit(&ufs_idle_q.uq_mutex);
813 
814 	/*
815 	 * Give back 1/2 of the idle queue
816 	 */
817 	ne = ufs_idle_q.uq_ne >> 1;
818 	ins.in_tidles.value.ul += ne;
819 	ufs_idle_some(ne);
820 	goto again;
821 }
822 
823 /*
824  * Reclaim callback for ufs inode cache.
825  * Invoked by the kernel memory allocator when memory gets tight.
826  */
827 /*ARGSUSED*/
828 void
ufs_inode_cache_reclaim(void * cdrarg)829 ufs_inode_cache_reclaim(void *cdrarg)
830 {
831 	/*
832 	 * If we are low on memory and the idle queue is over its
833 	 * halfway mark, then free 50% of the idle q
834 	 *
835 	 * We don't free all of the idle inodes because the inodes
836 	 * for popular NFS files may have been kicked from the dnlc.
837 	 * The inodes for these files will end up on the idle queue
838 	 * after every NFS access.
839 	 *
840 	 * If we repeatedly push them from the idle queue then
841 	 * NFS users may be unhappy as an extra buf cache operation
842 	 * is incurred for every NFS operation to these files.
843 	 *
844 	 * It's not common, but I have seen it happen.
845 	 *
846 	 */
847 	if (ufs_idle_q.uq_ne < (ufs_idle_q.uq_lowat >> 1))
848 		return;
849 	mutex_enter(&ufs_idle_q.uq_mutex);
850 	cv_broadcast(&ufs_idle_q.uq_cv);
851 	mutex_exit(&ufs_idle_q.uq_mutex);
852 }
853 
854 /*
855  * Free up some idle inodes
856  */
857 void
ufs_idle_some(int ne)858 ufs_idle_some(int ne)
859 {
860 	int i;
861 	struct inode *ip;
862 	struct vnode *vp;
863 	static int junk_rotor = 0;
864 	static int useful_rotor = 0;
865 
866 	for (i = 0; i < ne; ++i) {
867 		mutex_enter(&ufs_idle_q.uq_mutex);
868 
869 		if (ufs_njunk_iq) {
870 			while (ufs_junk_iq[junk_rotor].i_freef ==
871 			    (inode_t *)&ufs_junk_iq[junk_rotor]) {
872 				junk_rotor = IQNEXT(junk_rotor);
873 			}
874 			ip = ufs_junk_iq[junk_rotor].i_freef;
875 			ASSERT(ip->i_flag & IJUNKIQ);
876 		} else if (ufs_nuseful_iq) {
877 			while (ufs_useful_iq[useful_rotor].i_freef ==
878 			    (inode_t *)&ufs_useful_iq[useful_rotor]) {
879 				useful_rotor = IQNEXT(useful_rotor);
880 			}
881 			ip = ufs_useful_iq[useful_rotor].i_freef;
882 			ASSERT(!(ip->i_flag & IJUNKIQ));
883 		} else {
884 			mutex_exit(&ufs_idle_q.uq_mutex);
885 			return;
886 		}
887 
888 		/*
889 		 * emulate ufs_iget
890 		 */
891 		vp = ITOV(ip);
892 		VN_HOLD(vp);
893 		mutex_exit(&ufs_idle_q.uq_mutex);
894 		rw_enter(&ip->i_contents, RW_WRITER);
895 		/*
896 		 * VN_RELE should not be called if
897 		 * ufs_rmidle returns true, as it will
898 		 * effectively be done in ufs_idle_free.
899 		 */
900 		if (ufs_rmidle(ip)) {
901 			rw_exit(&ip->i_contents);
902 			ufs_idle_free(ip);
903 		} else {
904 			rw_exit(&ip->i_contents);
905 			VN_RELE(vp);
906 		}
907 	}
908 }
909 
910 /*
911  * drain entries for vfsp from the idle queue
912  * vfsp == NULL means drain the entire thing
913  */
914 void
ufs_idle_drain(struct vfs * vfsp)915 ufs_idle_drain(struct vfs *vfsp)
916 {
917 	struct inode	*ip, *nip;
918 	struct inode	*ianchor = NULL;
919 	int		i;
920 
921 	mutex_enter(&ufs_idle_q.uq_mutex);
922 	if (ufs_njunk_iq) {
923 		/* for each hash q */
924 		for (i = 0; i < ufs_niqhash; i++) {
925 			/* search down the hash q */
926 			for (ip = ufs_junk_iq[i].i_freef;
927 			    ip != (inode_t *)&ufs_junk_iq[i];
928 			    ip = ip->i_freef) {
929 				if (ip->i_vfs == vfsp || vfsp == NULL) {
930 					/* found a matching entry */
931 					VN_HOLD(ITOV(ip));
932 					mutex_exit(&ufs_idle_q.uq_mutex);
933 					rw_enter(&ip->i_contents, RW_WRITER);
934 					/*
935 					 * See comments in ufs_idle_some()
936 					 * as we will call ufs_idle_free()
937 					 * after scanning both queues.
938 					 */
939 					if (ufs_rmidle(ip)) {
940 						rw_exit(&ip->i_contents);
941 						ip->i_freef = ianchor;
942 						ianchor = ip;
943 					} else {
944 						rw_exit(&ip->i_contents);
945 						VN_RELE(ITOV(ip));
946 					}
947 					/* restart this hash q */
948 					ip = (inode_t *)&ufs_junk_iq[i];
949 					mutex_enter(&ufs_idle_q.uq_mutex);
950 				}
951 			}
952 		}
953 	}
954 	if (ufs_nuseful_iq) {
955 		/* for each hash q */
956 		for (i = 0; i < ufs_niqhash; i++) {
957 			/* search down the hash q */
958 			for (ip = ufs_useful_iq[i].i_freef;
959 			    ip != (inode_t *)&ufs_useful_iq[i];
960 			    ip = ip->i_freef) {
961 				if (ip->i_vfs == vfsp || vfsp == NULL) {
962 					/* found a matching entry */
963 					VN_HOLD(ITOV(ip));
964 					mutex_exit(&ufs_idle_q.uq_mutex);
965 					rw_enter(&ip->i_contents, RW_WRITER);
966 					/*
967 					 * See comments in ufs_idle_some()
968 					 * as we will call ufs_idle_free()
969 					 * after scanning both queues.
970 					 */
971 					if (ufs_rmidle(ip)) {
972 						rw_exit(&ip->i_contents);
973 						ip->i_freef = ianchor;
974 						ianchor = ip;
975 					} else {
976 						rw_exit(&ip->i_contents);
977 						VN_RELE(ITOV(ip));
978 					}
979 					/* restart this hash q */
980 					ip = (inode_t *)&ufs_useful_iq[i];
981 					mutex_enter(&ufs_idle_q.uq_mutex);
982 				}
983 			}
984 		}
985 	}
986 
987 	mutex_exit(&ufs_idle_q.uq_mutex);
988 	/* no more matching entries, release those we have found (if any) */
989 	for (ip = ianchor; ip; ip = nip) {
990 		nip = ip->i_freef;
991 		ip->i_freef = ip;
992 		ufs_idle_free(ip);
993 	}
994 }
995 
996 /*
997  * RECLAIM DELETED INODES
998  * The following thread scans the file system once looking for deleted files
999  */
1000 void
ufs_thread_reclaim(struct vfs * vfsp)1001 ufs_thread_reclaim(struct vfs *vfsp)
1002 {
1003 	struct ufsvfs		*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1004 	struct ufs_q		*uq	= &ufsvfsp->vfs_reclaim;
1005 	struct fs		*fs	= ufsvfsp->vfs_fs;
1006 	struct buf		*bp	= 0;
1007 	int			err	= 0;
1008 	daddr_t			bno;
1009 	ino_t			ino;
1010 	struct dinode		*dp;
1011 	struct inode		*ip;
1012 	callb_cpr_t		cprinfo;
1013 
1014 	CALLB_CPR_INIT(&cprinfo, &uq->uq_mutex, callb_generic_cpr,
1015 	    "ufsreclaim");
1016 
1017 	/*
1018 	 * mount decided that we don't need a reclaim thread
1019 	 */
1020 	if ((fs->fs_reclaim & FS_RECLAIMING) == 0)
1021 		err++;
1022 
1023 	/*
1024 	 * don't reclaim if readonly
1025 	 */
1026 	if (fs->fs_ronly)
1027 		err++;
1028 
1029 	for (ino = 0; ino < (fs->fs_ncg * fs->fs_ipg) && !err; ++ino) {
1030 
1031 		/*
1032 		 * Check whether we are the target of another
1033 		 * thread having called ufs_thread_exit() or
1034 		 * ufs_thread_suspend().
1035 		 */
1036 		mutex_enter(&uq->uq_mutex);
1037 again:
1038 		if (uq->uq_flags & UQ_EXIT) {
1039 			err++;
1040 			mutex_exit(&uq->uq_mutex);
1041 			break;
1042 		} else if (uq->uq_flags & UQ_SUSPEND) {
1043 			uq->uq_flags |= UQ_SUSPENDED;
1044 			/*
1045 			 * Release the buf before we cv_wait()
1046 			 * otherwise we may deadlock with the
1047 			 * thread that called ufs_thread_suspend().
1048 			 */
1049 			if (bp) {
1050 				brelse(bp);
1051 				bp = 0;
1052 			}
1053 			if (uq->uq_flags & UQ_WAIT) {
1054 				uq->uq_flags &= ~UQ_WAIT;
1055 				cv_broadcast(&uq->uq_cv);
1056 			}
1057 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1058 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
1059 			CALLB_CPR_SAFE_END(&cprinfo, &uq->uq_mutex);
1060 			goto again;
1061 		}
1062 		mutex_exit(&uq->uq_mutex);
1063 
1064 		/*
1065 		 * if we don't already have the buf; get it
1066 		 */
1067 		bno = fsbtodb(fs, itod(fs, ino));
1068 		if ((bp == 0) || (bp->b_blkno != bno)) {
1069 			if (bp)
1070 				brelse(bp);
1071 			bp = UFS_BREAD(ufsvfsp,
1072 			    ufsvfsp->vfs_dev, bno, fs->fs_bsize);
1073 			bp->b_flags |= B_AGE;
1074 		}
1075 		if (bp->b_flags & B_ERROR) {
1076 			err++;
1077 			continue;
1078 		}
1079 		/*
1080 		 * nlink <= 0 and mode != 0 means deleted
1081 		 */
1082 		dp = (struct dinode *)bp->b_un.b_addr + itoo(fs, ino);
1083 		if ((dp->di_nlink <= 0) && (dp->di_mode != 0)) {
1084 			/*
1085 			 * can't hold the buf (deadlock)
1086 			 */
1087 			brelse(bp);
1088 			bp = 0;
1089 			rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1090 			/*
1091 			 * iget/iput sequence will put inode on ifree
1092 			 * thread queue if it is idle.  This is a nop
1093 			 * for busy (open, deleted) inodes
1094 			 */
1095 			if (ufs_iget(vfsp, ino, &ip, CRED()))
1096 				err++;
1097 			else
1098 				VN_RELE(ITOV(ip));
1099 			rw_exit(&ufsvfsp->vfs_dqrwlock);
1100 		}
1101 	}
1102 
1103 	if (bp)
1104 		brelse(bp);
1105 	if (!err) {
1106 		/*
1107 		 * reset the reclaiming-bit
1108 		 */
1109 		mutex_enter(&ufsvfsp->vfs_lock);
1110 		fs->fs_reclaim &= ~FS_RECLAIMING;
1111 		mutex_exit(&ufsvfsp->vfs_lock);
1112 		TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_RECLAIM);
1113 	}
1114 
1115 	/*
1116 	 * exit the reclaim thread
1117 	 */
1118 	mutex_enter(&uq->uq_mutex);
1119 	uq->uq_threadp = NULL;
1120 	uq->uq_flags &= ~UQ_WAIT;
1121 	cv_broadcast(&uq->uq_cv);
1122 	CALLB_CPR_EXIT(&cprinfo);
1123 	thread_exit();
1124 }
1125 /*
1126  * HLOCK FILE SYSTEM
1127  *	hlock the file system's whose logs have device errors
1128  */
1129 struct ufs_q	ufs_hlock;
1130 /*ARGSUSED*/
1131 void
ufs_thread_hlock(void * ignore)1132 ufs_thread_hlock(void *ignore)
1133 {
1134 	int		retry;
1135 	callb_cpr_t	cprinfo;
1136 
1137 	CALLB_CPR_INIT(&cprinfo, &ufs_hlock.uq_mutex, callb_generic_cpr,
1138 	    "ufshlock");
1139 
1140 	for (;;) {
1141 		/*
1142 		 * sleep until there is work to do
1143 		 */
1144 		mutex_enter(&ufs_hlock.uq_mutex);
1145 		(void) ufs_thread_run(&ufs_hlock, &cprinfo);
1146 		ufs_hlock.uq_ne = 0;
1147 		mutex_exit(&ufs_hlock.uq_mutex);
1148 		/*
1149 		 * hlock the error'ed fs's
1150 		 *	retry after a bit if another app is doing lockfs stuff
1151 		 */
1152 		do {
1153 			retry = ufs_trans_hlock();
1154 			if (retry) {
1155 				mutex_enter(&ufs_hlock.uq_mutex);
1156 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
1157 				(void) cv_reltimedwait(&ufs_hlock.uq_cv,
1158 				    &ufs_hlock.uq_mutex, hz, TR_CLOCK_TICK);
1159 				CALLB_CPR_SAFE_END(&cprinfo,
1160 				    &ufs_hlock.uq_mutex);
1161 				mutex_exit(&ufs_hlock.uq_mutex);
1162 			}
1163 		} while (retry);
1164 	}
1165 }
1166 
1167 static void
ufs_attr_purge(struct inode * dp)1168 ufs_attr_purge(struct inode *dp)
1169 {
1170 	int	err;
1171 	int	error;
1172 	off_t 	dirsize;			/* size of the directory */
1173 	off_t 	offset;	/* offset in the directory */
1174 	int entryoffsetinblk;		/* offset of ep in fbp's buffer */
1175 	struct inode *tp;
1176 	struct fbuf *fbp;	/* pointer to directory block */
1177 	struct direct *ep;	/* directory entry */
1178 	int trans_size;
1179 	int issync;
1180 	struct ufsvfs	*ufsvfsp = dp->i_ufsvfs;
1181 
1182 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1183 
1184 	fbp = NULL;
1185 	dirsize = roundup(dp->i_size, DIRBLKSIZ);
1186 	offset = 0;
1187 	entryoffsetinblk = 0;
1188 
1189 	/*
1190 	 * Purge directory cache
1191 	 */
1192 
1193 	dnlc_dir_purge(&dp->i_danchor);
1194 
1195 	while (offset < dirsize) {
1196 		/*
1197 		 * If offset is on a block boundary,
1198 		 * read the next directory block.
1199 		 * Release previous if it exists.
1200 		 */
1201 		if (blkoff(dp->i_fs, offset) == 0) {
1202 			if (fbp != NULL) {
1203 				fbrelse(fbp, S_OTHER);
1204 			}
1205 
1206 			err = blkatoff(dp, offset, (char **)0, &fbp);
1207 			if (err) {
1208 				goto out;
1209 			}
1210 			entryoffsetinblk = 0;
1211 		}
1212 		ep = (struct direct *)(fbp->fb_addr + entryoffsetinblk);
1213 		if (ep->d_ino == 0 || (ep->d_name[0] == '.' &&
1214 		    ep->d_name[1] == '\0') ||
1215 		    (ep->d_name[0] == '.' && ep->d_name[1] == '.' &&
1216 		    ep->d_name[2] == '\0')) {
1217 
1218 			entryoffsetinblk += ep->d_reclen;
1219 
1220 		} else {
1221 
1222 			if ((err = ufs_iget(dp->i_vfs, ep->d_ino,
1223 			    &tp, CRED())) != 0) {
1224 				goto out;
1225 			}
1226 
1227 			TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_REMOVE,
1228 			    trans_size = (int)TOP_REMOVE_SIZE(tp));
1229 
1230 			/*
1231 			 * Delete inode.
1232 			 */
1233 
1234 			dnlc_remove(ITOV(dp), ep->d_name);
1235 
1236 			rw_enter(&tp->i_contents, RW_WRITER);
1237 			tp->i_flag |= ICHG;
1238 			tp->i_seq++;
1239 			TRANS_INODE(tp->i_ufsvfs, tp);
1240 			tp->i_nlink--;
1241 			ufs_setreclaim(tp);
1242 			ITIMES_NOLOCK(tp);
1243 			rw_exit(&tp->i_contents);
1244 
1245 			VN_RELE(ITOV(tp));
1246 			entryoffsetinblk += ep->d_reclen;
1247 			TRANS_END_CSYNC(ufsvfsp, error,
1248 			    issync, TOP_REMOVE, trans_size);
1249 
1250 		}
1251 		offset += ep->d_reclen;
1252 	}
1253 
1254 	if (fbp) {
1255 		fbrelse(fbp, S_OTHER);
1256 	}
1257 
1258 out:
1259 	rw_exit(&ufsvfsp->vfs_dqrwlock);
1260 }
1261