xref: /illumos-gate/usr/src/uts/common/fs/ufs/ufs_thread.c (revision 7da74b7620d65183bc0949a426dc2cad658f19c3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * Copyright (c) 2017 by Delphix. All rights reserved.
31  */
32 
33 /*
34  * Portions of this source code were derived from Berkeley 4.3 BSD
35  * under license from the Regents of the University of California.
36  */
37 
38 #include <sys/types.h>
39 #include <sys/systm.h>
40 #include <sys/errno.h>
41 #include <sys/kmem.h>
42 #include <sys/buf.h>
43 #include <sys/vnode.h>
44 #include <sys/vfs.h>
45 #include <sys/user.h>
46 #include <sys/callb.h>
47 #include <sys/cpuvar.h>
48 #include <sys/fs/ufs_inode.h>
49 #include <sys/fs/ufs_log.h>
50 #include <sys/fs/ufs_trans.h>
51 #include <sys/fs/ufs_acl.h>
52 #include <sys/fs/ufs_bio.h>
53 #include <sys/fs/ufs_fsdir.h>
54 #include <sys/debug.h>
55 #include <sys/cmn_err.h>
56 #include <sys/sysmacros.h>
57 #include <vm/pvn.h>
58 
59 extern pri_t			minclsyspri;
60 extern int			hash2ints();
61 extern struct kmem_cache	*inode_cache;	/* cache of free inodes */
62 extern int			ufs_idle_waiters;
63 extern struct instats		ins;
64 
65 static void ufs_attr_purge(struct inode *);
66 
67 /*
68  * initialize a thread's queue struct
69  */
70 void
ufs_thread_init(struct ufs_q * uq,int lowat)71 ufs_thread_init(struct ufs_q *uq, int lowat)
72 {
73 	bzero((caddr_t)uq, sizeof (*uq));
74 	cv_init(&uq->uq_cv, NULL, CV_DEFAULT, NULL);
75 	mutex_init(&uq->uq_mutex, NULL, MUTEX_DEFAULT, NULL);
76 	uq->uq_lowat = lowat;
77 	uq->uq_hiwat = 2 * lowat;
78 	uq->uq_threadp = NULL;
79 }
80 
81 /*
82  * start a thread for a queue (assumes success)
83  */
84 void
ufs_thread_start(struct ufs_q * uq,void (* func)(),struct vfs * vfsp)85 ufs_thread_start(struct ufs_q *uq, void (*func)(), struct vfs *vfsp)
86 {
87 	mutex_enter(&uq->uq_mutex);
88 	if (uq->uq_threadp == NULL) {
89 		uq->uq_threadp = thread_create(NULL, 0, func, vfsp, 0, &p0,
90 		    TS_RUN, minclsyspri);
91 		uq->uq_flags = 0;
92 	}
93 	mutex_exit(&uq->uq_mutex);
94 }
95 
96 /*
97  * wait for the thread to exit
98  */
99 void
ufs_thread_exit(struct ufs_q * uq)100 ufs_thread_exit(struct ufs_q *uq)
101 {
102 	kt_did_t ufs_thread_did = 0;
103 
104 	mutex_enter(&uq->uq_mutex);
105 	uq->uq_flags &= ~(UQ_SUSPEND | UQ_SUSPENDED);
106 	if (uq->uq_threadp != NULL) {
107 		ufs_thread_did = uq->uq_threadp->t_did;
108 		uq->uq_flags |= (UQ_EXIT|UQ_WAIT);
109 		cv_broadcast(&uq->uq_cv);
110 	}
111 	mutex_exit(&uq->uq_mutex);
112 
113 	/*
114 	 * It's safe to call thread_join() with an already-gone
115 	 * t_did, but we have to obtain it before the kernel
116 	 * thread structure is freed. We do so above under the
117 	 * protection of the uq_mutex when we're sure the thread
118 	 * still exists and it's save to de-reference it.
119 	 * We also have to check if ufs_thread_did is != 0
120 	 * before calling thread_join() since thread 0 in the system
121 	 * gets a t_did of 0.
122 	 */
123 	if (ufs_thread_did)
124 		thread_join(ufs_thread_did);
125 }
126 
127 /*
128  * wait for a thread to suspend itself on the caller's behalf
129  *	the caller is responsible for continuing the thread
130  */
131 void
ufs_thread_suspend(struct ufs_q * uq)132 ufs_thread_suspend(struct ufs_q *uq)
133 {
134 	mutex_enter(&uq->uq_mutex);
135 	if (uq->uq_threadp != NULL) {
136 		/*
137 		 * wait while another thread is suspending this thread.
138 		 * no need to do a cv_broadcast(), as whoever suspended
139 		 * the thread must continue it at some point.
140 		 */
141 		while ((uq->uq_flags & UQ_SUSPEND) &&
142 		    (uq->uq_threadp != NULL)) {
143 			/*
144 			 * We can't use cv_signal() because if our
145 			 * signal doesn't happen to hit the desired
146 			 * thread but instead some other waiter like
147 			 * ourselves, we'll wait forever for a
148 			 * response.  Well, at least an indeterminate
149 			 * amount of time until we just happen to get
150 			 * lucky from whomever did get signalled doing
151 			 * a cv_signal() of their own.  This is an
152 			 * unfortunate performance lossage.
153 			 */
154 			uq->uq_flags |= UQ_WAIT;
155 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
156 		}
157 
158 		uq->uq_flags |= (UQ_SUSPEND | UQ_WAIT);
159 
160 		/*
161 		 * wait for the thread to suspend itself
162 		 */
163 		if ((uq->uq_flags & UQ_SUSPENDED) == 0 &&
164 		    (uq->uq_threadp != NULL)) {
165 			cv_broadcast(&uq->uq_cv);
166 		}
167 
168 		while (((uq->uq_flags & UQ_SUSPENDED) == 0) &&
169 		    (uq->uq_threadp != NULL)) {
170 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
171 		}
172 	}
173 	mutex_exit(&uq->uq_mutex);
174 }
175 
176 /*
177  * allow a thread to continue from a ufs_thread_suspend()
178  *	This thread must be the same as the thread that called
179  *	ufs_thread_suspend.
180  */
181 void
ufs_thread_continue(struct ufs_q * uq)182 ufs_thread_continue(struct ufs_q *uq)
183 {
184 	mutex_enter(&uq->uq_mutex);
185 	uq->uq_flags &= ~(UQ_SUSPEND | UQ_SUSPENDED);
186 	cv_broadcast(&uq->uq_cv);
187 	mutex_exit(&uq->uq_mutex);
188 }
189 
190 /*
191  * some common code for managing a threads execution
192  *	uq is locked at entry and return
193  *	may sleep
194  *	may exit
195  */
196 /*
197  * Kind of a hack passing in the callb_cpr_t * here.
198  * It should really be part of the ufs_q structure.
199  * I did not put it in there because we are already in beta
200  * and I was concerned that changing ufs_inode.h to include
201  * callb.h might break something.
202  */
203 int
ufs_thread_run(struct ufs_q * uq,callb_cpr_t * cprinfop)204 ufs_thread_run(struct ufs_q *uq, callb_cpr_t *cprinfop)
205 {
206 again:
207 	ASSERT(uq->uq_ne >= 0);
208 
209 	if (uq->uq_flags & UQ_SUSPEND) {
210 		uq->uq_flags |= UQ_SUSPENDED;
211 	} else if (uq->uq_flags & UQ_EXIT) {
212 		/*
213 		 * exiting; empty the queue (may infinite loop)
214 		 */
215 		if (uq->uq_ne)
216 			return (uq->uq_ne);
217 		uq->uq_threadp = NULL;
218 		if (uq->uq_flags & UQ_WAIT) {
219 			cv_broadcast(&uq->uq_cv);
220 		}
221 		uq->uq_flags &= ~(UQ_EXIT | UQ_WAIT);
222 		CALLB_CPR_EXIT(cprinfop);
223 		thread_exit();
224 	} else if (uq->uq_ne >= uq->uq_lowat) {
225 		/*
226 		 * process a block of entries until below high water mark
227 		 */
228 		return (uq->uq_ne - (uq->uq_lowat >> 1));
229 	}
230 	if (uq->uq_flags & UQ_WAIT) {
231 		uq->uq_flags &= ~UQ_WAIT;
232 		cv_broadcast(&uq->uq_cv);
233 	}
234 	CALLB_CPR_SAFE_BEGIN(cprinfop);
235 	cv_wait(&uq->uq_cv, &uq->uq_mutex);
236 	CALLB_CPR_SAFE_END(cprinfop, &uq->uq_mutex);
237 	goto again;
238 }
239 
240 /*
241  * DELETE INODE
242  * The following routines implement the protocol for freeing the resources
243  * held by an idle and deleted inode.
244  */
245 void
ufs_delete(struct ufsvfs * ufsvfsp,struct inode * ip,int dolockfs)246 ufs_delete(struct ufsvfs *ufsvfsp, struct inode *ip, int dolockfs)
247 {
248 	ushort_t	mode;
249 	struct vnode	*vp	= ITOV(ip);
250 	struct ulockfs	*ulp;
251 	int		trans_size;
252 	int		dorwlock = ((ip->i_mode & IFMT) == IFREG);
253 	int		issync;
254 	int		err;
255 	struct inode	*dp;
256 	struct ufs_q    *delq = &ufsvfsp->vfs_delete;
257 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
258 
259 	/*
260 	 * Ignore if deletes are not allowed (wlock/hlock)
261 	 */
262 	if (ULOCKFS_IS_NOIDEL(ITOUL(ip))) {
263 		mutex_enter(&delq->uq_mutex);
264 		delq_info->delq_unreclaimed_blocks -= ip->i_blocks;
265 		delq_info->delq_unreclaimed_files--;
266 		mutex_exit(&delq->uq_mutex);
267 		VN_RELE(vp);
268 		return;
269 	}
270 
271 	if ((vp->v_count > 1) || (ip->i_mode == 0)) {
272 		mutex_enter(&delq->uq_mutex);
273 		delq_info->delq_unreclaimed_blocks -= ip->i_blocks;
274 		delq_info->delq_unreclaimed_files--;
275 		mutex_exit(&delq->uq_mutex);
276 		VN_RELE(vp);
277 		return;
278 	}
279 	/*
280 	 * If we are called as part of setting a fs lock, then only
281 	 * do part of the lockfs protocol.  In other words, don't hang.
282 	 */
283 	if (dolockfs) {
284 		if (ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_DELETE_MASK))
285 			return;
286 	} else {
287 		/*
288 		 * check for recursive VOP call
289 		 */
290 		if (curthread->t_flag & T_DONTBLOCK) {
291 			ulp = NULL;
292 		} else {
293 			ulp = &ufsvfsp->vfs_ulockfs;
294 			curthread->t_flag |= T_DONTBLOCK;
295 		}
296 	}
297 
298 	/*
299 	 * Hold rwlock to synchronize with (nfs) writes
300 	 */
301 	if (dorwlock)
302 		rw_enter(&ip->i_rwlock, RW_WRITER);
303 
304 	/*
305 	 * Delete the attribute directory.
306 	 */
307 	if (ip->i_oeftflag != 0) {
308 		TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_REMOVE,
309 		    trans_size = (int)TOP_REMOVE_SIZE(ip));
310 		rw_enter(&ip->i_contents, RW_WRITER);
311 		err = ufs_iget(ip->i_vfs, ip->i_oeftflag,
312 		    &dp, CRED());
313 		if (err == 0) {
314 			rw_enter(&dp->i_rwlock, RW_WRITER);
315 			rw_enter(&dp->i_contents, RW_WRITER);
316 			dp->i_flag |= IUPD|ICHG;
317 			dp->i_seq++;
318 			TRANS_INODE(dp->i_ufsvfs, dp);
319 			dp->i_nlink -= 2;
320 			ufs_setreclaim(dp);
321 			/*
322 			 * Should get rid of any negative cache entries that
323 			 * might be lingering, as well as ``.'' and
324 			 * ``..''.  If we don't, the VN_RELE() below
325 			 * won't actually put dp on the delete queue
326 			 * and it'll hang out until someone forces it
327 			 * (lockfs -f, umount, ...).  The only reliable
328 			 * way of doing this at the moment is to call
329 			 * dnlc_purge_vp(ITOV(dp)), which is unacceptably
330 			 * slow, so we'll just note the problem in this
331 			 * comment for now.
332 			 */
333 			dnlc_remove(ITOV(dp), ".");
334 			dnlc_remove(ITOV(dp), "..");
335 			ITIMES_NOLOCK(dp);
336 			if (!TRANS_ISTRANS(ufsvfsp)) {
337 				ufs_iupdat(dp, I_SYNC);
338 			}
339 			rw_exit(&dp->i_contents);
340 			rw_exit(&dp->i_rwlock);
341 			VN_RELE(ITOV(dp));
342 		}
343 		/*
344 		 * Clear out attribute pointer
345 		 */
346 		ip->i_oeftflag = 0;
347 		rw_exit(&ip->i_contents);
348 		TRANS_END_CSYNC(ufsvfsp, err, issync,
349 		    TOP_REMOVE, trans_size);
350 		dnlc_remove(ITOV(ip), XATTR_DIR_NAME);
351 	}
352 
353 	if ((ip->i_mode & IFMT) == IFATTRDIR) {
354 		ufs_attr_purge(ip);
355 	}
356 
357 	(void) TRANS_ITRUNC(ip, (u_offset_t)0, I_FREE | I_ACCT, CRED());
358 
359 	/*
360 	 * the inode's space has been freed; now free the inode
361 	 */
362 	if (ulp) {
363 		trans_size = TOP_IFREE_SIZE(ip);
364 		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_IFREE, trans_size);
365 	}
366 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
367 	rw_enter(&ip->i_contents, RW_WRITER);
368 	TRANS_INODE(ufsvfsp, ip);
369 	mode = ip->i_mode;
370 	ip->i_mode = 0;
371 	ip->i_rdev = 0;
372 	ip->i_ordev = 0;
373 	ip->i_flag |= IMOD;
374 	if (ip->i_ufs_acl) {
375 		(void) ufs_si_free(ip->i_ufs_acl, vp->v_vfsp, CRED());
376 		ip->i_ufs_acl = NULL;
377 		ip->i_shadow = 0;
378 	}
379 
380 	/*
381 	 * This inode is torn down but still retains it's identity
382 	 * (inode number).  It could get recycled soon so it's best
383 	 * to clean up the vnode just in case.
384 	 */
385 	mutex_enter(&vp->v_lock);
386 	vn_recycle(vp);
387 	mutex_exit(&vp->v_lock);
388 
389 	/*
390 	 * free the inode
391 	 */
392 	ufs_ifree(ip, ip->i_number, mode);
393 	/*
394 	 * release quota resources; can't fail
395 	 */
396 	(void) chkiq((struct ufsvfs *)vp->v_vfsp->vfs_data,
397 	    /* change */ -1, ip, (uid_t)ip->i_uid, 0, CRED(),
398 	    (char **)NULL, (size_t *)NULL);
399 	dqrele(ip->i_dquot);
400 	ip->i_dquot = NULL;
401 	ip->i_flag &= ~(IDEL | IDIRECTIO);
402 	ip->i_cflags = 0;
403 	if (!TRANS_ISTRANS(ufsvfsp)) {
404 		ufs_iupdat(ip, I_SYNC);
405 	} else {
406 		mutex_enter(&delq->uq_mutex);
407 		delq_info->delq_unreclaimed_files--;
408 		mutex_exit(&delq->uq_mutex);
409 	}
410 	rw_exit(&ip->i_contents);
411 	rw_exit(&ufsvfsp->vfs_dqrwlock);
412 	if (dorwlock)
413 		rw_exit(&ip->i_rwlock);
414 	VN_RELE(vp);
415 
416 	/*
417 	 * End of transaction
418 	 */
419 	if (ulp) {
420 		TRANS_END_ASYNC(ufsvfsp, TOP_IFREE, trans_size);
421 		if (dolockfs)
422 			ufs_lockfs_end(ulp);
423 		else
424 			curthread->t_flag &= ~T_DONTBLOCK;
425 	}
426 }
427 
428 /*
429  * Create the delete thread and init the delq_info for this fs
430  */
431 void
ufs_delete_init(struct ufsvfs * ufsvfsp,int lowat)432 ufs_delete_init(struct ufsvfs *ufsvfsp, int lowat)
433 {
434 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
435 
436 	ufs_thread_init(&ufsvfsp->vfs_delete, lowat);
437 	(void) memset((void *)delq_info, 0, sizeof (*delq_info));
438 }
439 
440 /*
441  * thread that frees up deleted inodes
442  */
443 void
ufs_thread_delete(struct vfs * vfsp)444 ufs_thread_delete(struct vfs *vfsp)
445 {
446 	struct ufsvfs	*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
447 	struct ufs_q	*uq = &ufsvfsp->vfs_delete;
448 	struct inode	*ip;
449 	long		ne;
450 	callb_cpr_t	cprinfo;
451 
452 	CALLB_CPR_INIT(&cprinfo, &uq->uq_mutex, callb_generic_cpr,
453 	    "ufsdelete");
454 
455 	mutex_enter(&uq->uq_mutex);
456 again:
457 	/*
458 	 * Sleep until there is work to do.  Only do one entry at
459 	 * a time, to reduce the wait time for checking for a suspend
460 	 * request.  The ?: is for pedantic portability.
461 	 */
462 	ne = ufs_thread_run(uq, &cprinfo) ? 1 : 0;
463 
464 	/*
465 	 * process an entry, if there are any
466 	 */
467 	if (ne && (ip = uq->uq_ihead)) {
468 		/*
469 		 * process first entry on queue.  Assumed conditions are:
470 		 *	ip is held (v_count >= 1)
471 		 *	ip is referenced (i_flag & IREF)
472 		 *	ip is free (i_nlink <= 0)
473 		 */
474 		if ((uq->uq_ihead = ip->i_freef) == ip)
475 			uq->uq_ihead = NULL;
476 		ip->i_freef->i_freeb = ip->i_freeb;
477 		ip->i_freeb->i_freef = ip->i_freef;
478 		ip->i_freef = ip;
479 		ip->i_freeb = ip;
480 		uq->uq_ne--;
481 		mutex_exit(&uq->uq_mutex);
482 		ufs_delete(ufsvfsp, ip, 1);
483 		mutex_enter(&uq->uq_mutex);
484 	}
485 	goto again;
486 }
487 
488 /*
489  * drain ne entries off the delete queue.  As new queue entries may
490  * be added while we're working, ne is interpreted as follows:
491  *
492  * ne > 0   => remove up to ne entries
493  * ne == 0  => remove all entries currently on the queue
494  * ne == -1 => remove entries until the queue is empty
495  */
496 void
ufs_delete_drain(struct vfs * vfsp,int ne,int dolockfs)497 ufs_delete_drain(struct vfs *vfsp, int ne, int dolockfs)
498 {
499 	struct ufsvfs	*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
500 	struct ufs_q	*uq;
501 	struct inode	*ip;
502 	int		drain_cnt = 0;
503 	int		done;
504 
505 	/*
506 	 * if forcibly unmounted; ignore
507 	 */
508 	if (ufsvfsp == NULL)
509 		return;
510 
511 	uq = &ufsvfsp->vfs_delete;
512 	mutex_enter(&uq->uq_mutex);
513 	if (ne == 0)
514 		drain_cnt = uq->uq_ne;
515 	else if (ne > 0)
516 		drain_cnt = ne;
517 
518 	/*
519 	 * process up to ne entries
520 	 */
521 
522 	done = 0;
523 	while (!done && (ip = uq->uq_ihead)) {
524 		if (ne != -1)
525 			drain_cnt--;
526 		if (ne != -1 && drain_cnt == 0)
527 			done = 1;
528 		if ((uq->uq_ihead = ip->i_freef) == ip)
529 			uq->uq_ihead = NULL;
530 		ip->i_freef->i_freeb = ip->i_freeb;
531 		ip->i_freeb->i_freef = ip->i_freef;
532 		ip->i_freef = ip;
533 		ip->i_freeb = ip;
534 		uq->uq_ne--;
535 		mutex_exit(&uq->uq_mutex);
536 		ufs_delete(ufsvfsp, ip, dolockfs);
537 		mutex_enter(&uq->uq_mutex);
538 	}
539 	mutex_exit(&uq->uq_mutex);
540 }
541 
542 void
ufs_sync_with_thread(struct ufs_q * uq)543 ufs_sync_with_thread(struct ufs_q *uq)
544 {
545 	mutex_enter(&uq->uq_mutex);
546 
547 	/*
548 	 * Wake up delete thread to free up space.
549 	 */
550 	if ((uq->uq_flags & UQ_WAIT) == 0) {
551 		uq->uq_flags |= UQ_WAIT;
552 		cv_broadcast(&uq->uq_cv);
553 	}
554 
555 	while ((uq->uq_threadp != NULL) && (uq->uq_flags & UQ_WAIT)) {
556 		cv_wait(&uq->uq_cv, &uq->uq_mutex);
557 	}
558 
559 	mutex_exit(&uq->uq_mutex);
560 }
561 
562 /*
563  * Get rid of everything that's currently in the delete queue,
564  * plus whatever the delete thread is working on at the moment.
565  *
566  * This ability is required for providing true POSIX semantics
567  * regarding close(2), unlink(2), etc, even when logging is enabled.
568  * The standard requires that the released space be immediately
569  * observable (statvfs(2)) and allocatable (e.g., write(2)).
570  */
571 void
ufs_delete_drain_wait(struct ufsvfs * ufsvfsp,int dolockfs)572 ufs_delete_drain_wait(struct ufsvfs *ufsvfsp, int dolockfs)
573 {
574 	struct ufs_q *uq = &ufsvfsp->vfs_delete;
575 	int	error;
576 	struct ufs_q    *delq = &ufsvfsp->vfs_delete;
577 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
578 
579 	/*
580 	 * If there is something on delq or delete thread
581 	 * working on delq.
582 	 */
583 	mutex_enter(&delq->uq_mutex);
584 	if (delq_info->delq_unreclaimed_files > 0) {
585 		mutex_exit(&delq->uq_mutex);
586 		(void) ufs_delete_drain(ufsvfsp->vfs_vfs, 0, dolockfs);
587 		ufs_sync_with_thread(uq);
588 	} else {
589 		ASSERT(delq_info->delq_unreclaimed_files == 0);
590 		mutex_exit(&delq->uq_mutex);
591 		return;
592 	}
593 
594 	/*
595 	 * Commit any outstanding transactions to make sure
596 	 * any canceled freed blocks are available for allocation.
597 	 */
598 	curthread->t_flag |= T_DONTBLOCK;
599 	TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UPDATE, TOP_COMMIT_SIZE, error);
600 	if (!error) {
601 		TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UPDATE,
602 		    TOP_COMMIT_SIZE);
603 	}
604 	curthread->t_flag &= ~T_DONTBLOCK;
605 }
606 
607 /*
608  * Adjust the resource usage in a struct statvfs based on
609  * what's in the delete queue.
610  *
611  * We do not consider the impact of ACLs or extended attributes
612  * that may be deleted as a side-effect of deleting a file.
613  * Those are metadata, and their sizes aren't reflected in the
614  * sizes returned by stat(), so this is not a problem.
615  */
616 void
ufs_delete_adjust_stats(struct ufsvfs * ufsvfsp,struct statvfs64 * sp)617 ufs_delete_adjust_stats(struct ufsvfs *ufsvfsp, struct statvfs64 *sp)
618 {
619 	struct ufs_q *uq = &ufsvfsp->vfs_delete;
620 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
621 
622 	mutex_enter(&uq->uq_mutex);
623 	/*
624 	 * The blocks accounted for in the delete queue info are
625 	 * counted in DEV_BSIZE chunks, but ufs_statvfs counts in
626 	 * filesystem fragments, so a conversion is required here.
627 	 */
628 	sp->f_bfree += dbtofsb(ufsvfsp->vfs_fs,
629 	    delq_info->delq_unreclaimed_blocks);
630 	sp->f_ffree += delq_info->delq_unreclaimed_files;
631 	mutex_exit(&uq->uq_mutex);
632 }
633 
634 /*
635  * IDLE INODE
636  * The following routines implement the protocol for maintaining an
637  * LRU list of idle inodes and for moving the idle inodes to the
638  * reuse list when the number of allocated inodes exceeds the user
639  * tunable high-water mark (ufs_ninode).
640  */
641 
642 /*
643  * clean an idle inode and move it to the reuse list
644  */
645 static void
ufs_idle_free(struct inode * ip)646 ufs_idle_free(struct inode *ip)
647 {
648 	int			pages;
649 	int			hno;
650 	kmutex_t		*ihm;
651 	struct ufsvfs		*ufsvfsp	= ip->i_ufsvfs;
652 	struct vnode		*vp		= ITOV(ip);
653 	int			vn_has_data, vn_modified;
654 
655 	/*
656 	 * inode is held
657 	 */
658 
659 	/*
660 	 * remember `pages' for stats below
661 	 */
662 	pages = (ip->i_mode && vn_has_cached_data(vp) && vp->v_type != VCHR);
663 
664 	/*
665 	 * start the dirty pages to disk and then invalidate them
666 	 * unless the inode is invalid (ISTALE)
667 	 */
668 	if ((ip->i_flag & ISTALE) == 0) {
669 		(void) TRANS_SYNCIP(ip, B_ASYNC, I_ASYNC, TOP_SYNCIP_FREE);
670 		(void) TRANS_SYNCIP(ip,
671 		    (TRANS_ISERROR(ufsvfsp)) ? B_INVAL | B_FORCE : B_INVAL,
672 		    I_ASYNC, TOP_SYNCIP_FREE);
673 	}
674 
675 	/*
676 	 * wait for any current ufs_iget to finish and block future ufs_igets
677 	 */
678 	ASSERT(ip->i_number != 0);
679 	hno = INOHASH(ip->i_number);
680 	ihm = &ih_lock[hno];
681 	mutex_enter(ihm);
682 
683 	/*
684 	 * It must be guaranteed that v_count >= 2, otherwise
685 	 * something must be wrong with this vnode already.
686 	 * That is why we use VN_RELE_LOCKED() instead of VN_RELE().
687 	 * Acquire the vnode lock in case another thread is in
688 	 * VN_RELE().
689 	 */
690 	mutex_enter(&vp->v_lock);
691 
692 	VERIFY3U(vp->v_count, >=, 2);
693 
694 	VN_RELE_LOCKED(vp);
695 
696 	vn_has_data = (vp->v_type != VCHR && vn_has_cached_data(vp));
697 	vn_modified = (ip->i_flag & (IMOD|IMODACC|IACC|ICHG|IUPD|IATTCHG));
698 
699 	if (vp->v_count != 1 ||
700 	    ((vn_has_data || vn_modified) &&
701 	    ((ip->i_flag & ISTALE) == 0))) {
702 		/*
703 		 * Another thread has referenced this inode while
704 		 * we are trying  to free  it.  Call VN_RELE() to
705 		 * release our reference, if v_count > 1  data is
706 		 * present  or one of the modified etc. flags was
707 		 * set, whereby ISTALE wasn't set.
708 		 * If we'd proceed with ISTALE set here, we might
709 		 * get ourselves into a deadlock situation.
710 		 */
711 		mutex_exit(&vp->v_lock);
712 		mutex_exit(ihm);
713 		VN_RELE(vp);
714 	} else {
715 		/*
716 		 * The inode is currently unreferenced and can not
717 		 * acquire further references because it has no pages
718 		 * and the hash is locked.  Inodes acquire references
719 		 * via the hash list or via their pages.
720 		 */
721 
722 		mutex_exit(&vp->v_lock);
723 
724 		/*
725 		 * remove it from the cache
726 		 */
727 		remque(ip);
728 		mutex_exit(ihm);
729 		/*
730 		 * Stale inodes have no valid ufsvfs
731 		 */
732 		if ((ip->i_flag & ISTALE) == 0 && ip->i_dquot) {
733 			TRANS_DQRELE(ufsvfsp, ip->i_dquot);
734 			ip->i_dquot = NULL;
735 		}
736 		if ((ip->i_flag & ISTALE) &&
737 		    vn_has_data) {
738 			/*
739 			 * ISTALE inodes may have data
740 			 * and  this data needs  to be
741 			 * cleaned up.
742 			 */
743 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
744 			    ufs_putapage, B_INVAL | B_TRUNC,
745 			    (struct cred *)NULL);
746 		}
747 		ufs_si_del(ip);
748 		if (pages) {
749 			CPU_STATS_ADDQ(CPU, sys, ufsipage, 1);
750 		} else {
751 			CPU_STATS_ADDQ(CPU, sys, ufsinopage, 1);
752 		}
753 		ASSERT((vp->v_type == VCHR) || !vn_has_cached_data(vp));
754 
755 		/*
756 		 * We had better not have a vnode reference count > 1
757 		 * at this point, if we do then something is broken as
758 		 * this inode/vnode acquired a reference underneath of us.
759 		 */
760 		ASSERT(vp->v_count == 1);
761 
762 		ufs_free_inode(ip);
763 	}
764 }
765 
766 /*
767  * this thread processes the global idle queue
768  */
769 iqhead_t *ufs_junk_iq;
770 iqhead_t *ufs_useful_iq;
771 int ufs_njunk_iq = 0;
772 int ufs_nuseful_iq = 0;
773 int ufs_niqhash;
774 int ufs_iqhashmask;
775 struct ufs_q	ufs_idle_q;
776 
777 void
ufs_thread_idle(void)778 ufs_thread_idle(void)
779 {
780 	callb_cpr_t cprinfo;
781 	int i;
782 	int ne;
783 
784 	ufs_niqhash = (ufs_idle_q.uq_lowat >> 1) / IQHASHQLEN;
785 	ufs_niqhash = 1 << highbit(ufs_niqhash); /* round up to power of 2 */
786 	ufs_iqhashmask = ufs_niqhash - 1;
787 	ufs_junk_iq = kmem_alloc(ufs_niqhash * sizeof (*ufs_junk_iq),
788 	    KM_SLEEP);
789 	ufs_useful_iq = kmem_alloc(ufs_niqhash * sizeof (*ufs_useful_iq),
790 	    KM_SLEEP);
791 
792 	/* Initialize hash queue headers */
793 	for (i = 0; i < ufs_niqhash; i++) {
794 		ufs_junk_iq[i].i_freef = (inode_t *)&ufs_junk_iq[i];
795 		ufs_junk_iq[i].i_freeb = (inode_t *)&ufs_junk_iq[i];
796 		ufs_useful_iq[i].i_freef = (inode_t *)&ufs_useful_iq[i];
797 		ufs_useful_iq[i].i_freeb = (inode_t *)&ufs_useful_iq[i];
798 	}
799 
800 	CALLB_CPR_INIT(&cprinfo, &ufs_idle_q.uq_mutex, callb_generic_cpr,
801 	    "ufsidle");
802 again:
803 	/*
804 	 * Whenever the idle thread is awakened, it repeatedly gives
805 	 * back half of the idle queue until the idle queue falls
806 	 * below lowat.
807 	 */
808 	mutex_enter(&ufs_idle_q.uq_mutex);
809 	if (ufs_idle_q.uq_ne < ufs_idle_q.uq_lowat) {
810 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
811 		cv_wait(&ufs_idle_q.uq_cv, &ufs_idle_q.uq_mutex);
812 		CALLB_CPR_SAFE_END(&cprinfo, &ufs_idle_q.uq_mutex);
813 	}
814 	mutex_exit(&ufs_idle_q.uq_mutex);
815 
816 	/*
817 	 * Give back 1/2 of the idle queue
818 	 */
819 	ne = ufs_idle_q.uq_ne >> 1;
820 	ins.in_tidles.value.ul += ne;
821 	ufs_idle_some(ne);
822 	goto again;
823 }
824 
825 /*
826  * Reclaim callback for ufs inode cache.
827  * Invoked by the kernel memory allocator when memory gets tight.
828  */
829 /*ARGSUSED*/
830 void
ufs_inode_cache_reclaim(void * cdrarg)831 ufs_inode_cache_reclaim(void *cdrarg)
832 {
833 	/*
834 	 * If we are low on memory and the idle queue is over its
835 	 * halfway mark, then free 50% of the idle q
836 	 *
837 	 * We don't free all of the idle inodes because the inodes
838 	 * for popular NFS files may have been kicked from the dnlc.
839 	 * The inodes for these files will end up on the idle queue
840 	 * after every NFS access.
841 	 *
842 	 * If we repeatedly push them from the idle queue then
843 	 * NFS users may be unhappy as an extra buf cache operation
844 	 * is incurred for every NFS operation to these files.
845 	 *
846 	 * It's not common, but I have seen it happen.
847 	 *
848 	 */
849 	if (ufs_idle_q.uq_ne < (ufs_idle_q.uq_lowat >> 1))
850 		return;
851 	mutex_enter(&ufs_idle_q.uq_mutex);
852 	cv_broadcast(&ufs_idle_q.uq_cv);
853 	mutex_exit(&ufs_idle_q.uq_mutex);
854 }
855 
856 /*
857  * Free up some idle inodes
858  */
859 void
ufs_idle_some(int ne)860 ufs_idle_some(int ne)
861 {
862 	int i;
863 	struct inode *ip;
864 	struct vnode *vp;
865 	static int junk_rotor = 0;
866 	static int useful_rotor = 0;
867 
868 	for (i = 0; i < ne; ++i) {
869 		mutex_enter(&ufs_idle_q.uq_mutex);
870 
871 		if (ufs_njunk_iq) {
872 			while (ufs_junk_iq[junk_rotor].i_freef ==
873 			    (inode_t *)&ufs_junk_iq[junk_rotor]) {
874 				junk_rotor = IQNEXT(junk_rotor);
875 			}
876 			ip = ufs_junk_iq[junk_rotor].i_freef;
877 			ASSERT(ip->i_flag & IJUNKIQ);
878 		} else if (ufs_nuseful_iq) {
879 			while (ufs_useful_iq[useful_rotor].i_freef ==
880 			    (inode_t *)&ufs_useful_iq[useful_rotor]) {
881 				useful_rotor = IQNEXT(useful_rotor);
882 			}
883 			ip = ufs_useful_iq[useful_rotor].i_freef;
884 			ASSERT(!(ip->i_flag & IJUNKIQ));
885 		} else {
886 			mutex_exit(&ufs_idle_q.uq_mutex);
887 			return;
888 		}
889 
890 		/*
891 		 * emulate ufs_iget
892 		 */
893 		vp = ITOV(ip);
894 		VN_HOLD(vp);
895 		mutex_exit(&ufs_idle_q.uq_mutex);
896 		rw_enter(&ip->i_contents, RW_WRITER);
897 		/*
898 		 * VN_RELE should not be called if
899 		 * ufs_rmidle returns true, as it will
900 		 * effectively be done in ufs_idle_free.
901 		 */
902 		if (ufs_rmidle(ip)) {
903 			rw_exit(&ip->i_contents);
904 			ufs_idle_free(ip);
905 		} else {
906 			rw_exit(&ip->i_contents);
907 			VN_RELE(vp);
908 		}
909 	}
910 }
911 
912 /*
913  * drain entries for vfsp from the idle queue
914  * vfsp == NULL means drain the entire thing
915  */
916 void
ufs_idle_drain(struct vfs * vfsp)917 ufs_idle_drain(struct vfs *vfsp)
918 {
919 	struct inode	*ip, *nip;
920 	struct inode	*ianchor = NULL;
921 	int		i;
922 
923 	mutex_enter(&ufs_idle_q.uq_mutex);
924 	if (ufs_njunk_iq) {
925 		/* for each hash q */
926 		for (i = 0; i < ufs_niqhash; i++) {
927 			/* search down the hash q */
928 			for (ip = ufs_junk_iq[i].i_freef;
929 			    ip != (inode_t *)&ufs_junk_iq[i];
930 			    ip = ip->i_freef) {
931 				if (ip->i_vfs == vfsp || vfsp == NULL) {
932 					/* found a matching entry */
933 					VN_HOLD(ITOV(ip));
934 					mutex_exit(&ufs_idle_q.uq_mutex);
935 					rw_enter(&ip->i_contents, RW_WRITER);
936 					/*
937 					 * See comments in ufs_idle_some()
938 					 * as we will call ufs_idle_free()
939 					 * after scanning both queues.
940 					 */
941 					if (ufs_rmidle(ip)) {
942 						rw_exit(&ip->i_contents);
943 						ip->i_freef = ianchor;
944 						ianchor = ip;
945 					} else {
946 						rw_exit(&ip->i_contents);
947 						VN_RELE(ITOV(ip));
948 					}
949 					/* restart this hash q */
950 					ip = (inode_t *)&ufs_junk_iq[i];
951 					mutex_enter(&ufs_idle_q.uq_mutex);
952 				}
953 			}
954 		}
955 	}
956 	if (ufs_nuseful_iq) {
957 		/* for each hash q */
958 		for (i = 0; i < ufs_niqhash; i++) {
959 			/* search down the hash q */
960 			for (ip = ufs_useful_iq[i].i_freef;
961 			    ip != (inode_t *)&ufs_useful_iq[i];
962 			    ip = ip->i_freef) {
963 				if (ip->i_vfs == vfsp || vfsp == NULL) {
964 					/* found a matching entry */
965 					VN_HOLD(ITOV(ip));
966 					mutex_exit(&ufs_idle_q.uq_mutex);
967 					rw_enter(&ip->i_contents, RW_WRITER);
968 					/*
969 					 * See comments in ufs_idle_some()
970 					 * as we will call ufs_idle_free()
971 					 * after scanning both queues.
972 					 */
973 					if (ufs_rmidle(ip)) {
974 						rw_exit(&ip->i_contents);
975 						ip->i_freef = ianchor;
976 						ianchor = ip;
977 					} else {
978 						rw_exit(&ip->i_contents);
979 						VN_RELE(ITOV(ip));
980 					}
981 					/* restart this hash q */
982 					ip = (inode_t *)&ufs_useful_iq[i];
983 					mutex_enter(&ufs_idle_q.uq_mutex);
984 				}
985 			}
986 		}
987 	}
988 
989 	mutex_exit(&ufs_idle_q.uq_mutex);
990 	/* no more matching entries, release those we have found (if any) */
991 	for (ip = ianchor; ip; ip = nip) {
992 		nip = ip->i_freef;
993 		ip->i_freef = ip;
994 		ufs_idle_free(ip);
995 	}
996 }
997 
998 /*
999  * RECLAIM DELETED INODES
1000  * The following thread scans the file system once looking for deleted files
1001  */
1002 void
ufs_thread_reclaim(struct vfs * vfsp)1003 ufs_thread_reclaim(struct vfs *vfsp)
1004 {
1005 	struct ufsvfs		*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1006 	struct ufs_q		*uq	= &ufsvfsp->vfs_reclaim;
1007 	struct fs		*fs	= ufsvfsp->vfs_fs;
1008 	struct buf		*bp	= 0;
1009 	int			err	= 0;
1010 	daddr_t			bno;
1011 	ino_t			ino;
1012 	struct dinode		*dp;
1013 	struct inode		*ip;
1014 	callb_cpr_t		cprinfo;
1015 
1016 	CALLB_CPR_INIT(&cprinfo, &uq->uq_mutex, callb_generic_cpr,
1017 	    "ufsreclaim");
1018 
1019 	/*
1020 	 * mount decided that we don't need a reclaim thread
1021 	 */
1022 	if ((fs->fs_reclaim & FS_RECLAIMING) == 0)
1023 		err++;
1024 
1025 	/*
1026 	 * don't reclaim if readonly
1027 	 */
1028 	if (fs->fs_ronly)
1029 		err++;
1030 
1031 	for (ino = 0; ino < (fs->fs_ncg * fs->fs_ipg) && !err; ++ino) {
1032 
1033 		/*
1034 		 * Check whether we are the target of another
1035 		 * thread having called ufs_thread_exit() or
1036 		 * ufs_thread_suspend().
1037 		 */
1038 		mutex_enter(&uq->uq_mutex);
1039 again:
1040 		if (uq->uq_flags & UQ_EXIT) {
1041 			err++;
1042 			mutex_exit(&uq->uq_mutex);
1043 			break;
1044 		} else if (uq->uq_flags & UQ_SUSPEND) {
1045 			uq->uq_flags |= UQ_SUSPENDED;
1046 			/*
1047 			 * Release the buf before we cv_wait()
1048 			 * otherwise we may deadlock with the
1049 			 * thread that called ufs_thread_suspend().
1050 			 */
1051 			if (bp) {
1052 				brelse(bp);
1053 				bp = 0;
1054 			}
1055 			if (uq->uq_flags & UQ_WAIT) {
1056 				uq->uq_flags &= ~UQ_WAIT;
1057 				cv_broadcast(&uq->uq_cv);
1058 			}
1059 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1060 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
1061 			CALLB_CPR_SAFE_END(&cprinfo, &uq->uq_mutex);
1062 			goto again;
1063 		}
1064 		mutex_exit(&uq->uq_mutex);
1065 
1066 		/*
1067 		 * if we don't already have the buf; get it
1068 		 */
1069 		bno = fsbtodb(fs, itod(fs, ino));
1070 		if ((bp == 0) || (bp->b_blkno != bno)) {
1071 			if (bp)
1072 				brelse(bp);
1073 			bp = UFS_BREAD(ufsvfsp,
1074 			    ufsvfsp->vfs_dev, bno, fs->fs_bsize);
1075 			bp->b_flags |= B_AGE;
1076 		}
1077 		if (bp->b_flags & B_ERROR) {
1078 			err++;
1079 			continue;
1080 		}
1081 		/*
1082 		 * nlink <= 0 and mode != 0 means deleted
1083 		 */
1084 		dp = (struct dinode *)bp->b_un.b_addr + itoo(fs, ino);
1085 		if ((dp->di_nlink <= 0) && (dp->di_mode != 0)) {
1086 			/*
1087 			 * can't hold the buf (deadlock)
1088 			 */
1089 			brelse(bp);
1090 			bp = 0;
1091 			rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1092 			/*
1093 			 * iget/iput sequence will put inode on ifree
1094 			 * thread queue if it is idle.  This is a nop
1095 			 * for busy (open, deleted) inodes
1096 			 */
1097 			if (ufs_iget(vfsp, ino, &ip, CRED()))
1098 				err++;
1099 			else
1100 				VN_RELE(ITOV(ip));
1101 			rw_exit(&ufsvfsp->vfs_dqrwlock);
1102 		}
1103 	}
1104 
1105 	if (bp)
1106 		brelse(bp);
1107 	if (!err) {
1108 		/*
1109 		 * reset the reclaiming-bit
1110 		 */
1111 		mutex_enter(&ufsvfsp->vfs_lock);
1112 		fs->fs_reclaim &= ~FS_RECLAIMING;
1113 		mutex_exit(&ufsvfsp->vfs_lock);
1114 		TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_RECLAIM);
1115 	}
1116 
1117 	/*
1118 	 * exit the reclaim thread
1119 	 */
1120 	mutex_enter(&uq->uq_mutex);
1121 	uq->uq_threadp = NULL;
1122 	uq->uq_flags &= ~UQ_WAIT;
1123 	cv_broadcast(&uq->uq_cv);
1124 	CALLB_CPR_EXIT(&cprinfo);
1125 	thread_exit();
1126 }
1127 /*
1128  * HLOCK FILE SYSTEM
1129  *	hlock the file system's whose logs have device errors
1130  */
1131 struct ufs_q	ufs_hlock;
1132 /*ARGSUSED*/
1133 void
ufs_thread_hlock(void * ignore)1134 ufs_thread_hlock(void *ignore)
1135 {
1136 	int		retry;
1137 	callb_cpr_t	cprinfo;
1138 
1139 	CALLB_CPR_INIT(&cprinfo, &ufs_hlock.uq_mutex, callb_generic_cpr,
1140 	    "ufshlock");
1141 
1142 	for (;;) {
1143 		/*
1144 		 * sleep until there is work to do
1145 		 */
1146 		mutex_enter(&ufs_hlock.uq_mutex);
1147 		(void) ufs_thread_run(&ufs_hlock, &cprinfo);
1148 		ufs_hlock.uq_ne = 0;
1149 		mutex_exit(&ufs_hlock.uq_mutex);
1150 		/*
1151 		 * hlock the error'ed fs's
1152 		 *	retry after a bit if another app is doing lockfs stuff
1153 		 */
1154 		do {
1155 			retry = ufs_trans_hlock();
1156 			if (retry) {
1157 				mutex_enter(&ufs_hlock.uq_mutex);
1158 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
1159 				(void) cv_reltimedwait(&ufs_hlock.uq_cv,
1160 				    &ufs_hlock.uq_mutex, hz, TR_CLOCK_TICK);
1161 				CALLB_CPR_SAFE_END(&cprinfo,
1162 				    &ufs_hlock.uq_mutex);
1163 				mutex_exit(&ufs_hlock.uq_mutex);
1164 			}
1165 		} while (retry);
1166 	}
1167 }
1168 
1169 static void
ufs_attr_purge(struct inode * dp)1170 ufs_attr_purge(struct inode *dp)
1171 {
1172 	int	err;
1173 	int	error;
1174 	off_t 	dirsize;			/* size of the directory */
1175 	off_t 	offset;	/* offset in the directory */
1176 	int entryoffsetinblk;		/* offset of ep in fbp's buffer */
1177 	struct inode *tp;
1178 	struct fbuf *fbp;	/* pointer to directory block */
1179 	struct direct *ep;	/* directory entry */
1180 	int trans_size;
1181 	int issync;
1182 	struct ufsvfs	*ufsvfsp = dp->i_ufsvfs;
1183 
1184 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1185 
1186 	fbp = NULL;
1187 	dirsize = roundup(dp->i_size, DIRBLKSIZ);
1188 	offset = 0;
1189 	entryoffsetinblk = 0;
1190 
1191 	/*
1192 	 * Purge directory cache
1193 	 */
1194 
1195 	dnlc_dir_purge(&dp->i_danchor);
1196 
1197 	while (offset < dirsize) {
1198 		/*
1199 		 * If offset is on a block boundary,
1200 		 * read the next directory block.
1201 		 * Release previous if it exists.
1202 		 */
1203 		if (blkoff(dp->i_fs, offset) == 0) {
1204 			if (fbp != NULL) {
1205 				fbrelse(fbp, S_OTHER);
1206 			}
1207 
1208 			err = blkatoff(dp, offset, (char **)0, &fbp);
1209 			if (err) {
1210 				goto out;
1211 			}
1212 			entryoffsetinblk = 0;
1213 		}
1214 		ep = (struct direct *)(fbp->fb_addr + entryoffsetinblk);
1215 		if (ep->d_ino == 0 || (ep->d_name[0] == '.' &&
1216 		    ep->d_name[1] == '\0') ||
1217 		    (ep->d_name[0] == '.' && ep->d_name[1] == '.' &&
1218 		    ep->d_name[2] == '\0')) {
1219 
1220 			entryoffsetinblk += ep->d_reclen;
1221 
1222 		} else {
1223 
1224 			if ((err = ufs_iget(dp->i_vfs, ep->d_ino,
1225 			    &tp, CRED())) != 0) {
1226 				goto out;
1227 			}
1228 
1229 			TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_REMOVE,
1230 			    trans_size = (int)TOP_REMOVE_SIZE(tp));
1231 
1232 			/*
1233 			 * Delete inode.
1234 			 */
1235 
1236 			dnlc_remove(ITOV(dp), ep->d_name);
1237 
1238 			rw_enter(&tp->i_contents, RW_WRITER);
1239 			tp->i_flag |= ICHG;
1240 			tp->i_seq++;
1241 			TRANS_INODE(tp->i_ufsvfs, tp);
1242 			tp->i_nlink--;
1243 			ufs_setreclaim(tp);
1244 			ITIMES_NOLOCK(tp);
1245 			rw_exit(&tp->i_contents);
1246 
1247 			VN_RELE(ITOV(tp));
1248 			entryoffsetinblk += ep->d_reclen;
1249 			TRANS_END_CSYNC(ufsvfsp, error,
1250 			    issync, TOP_REMOVE, trans_size);
1251 
1252 		}
1253 		offset += ep->d_reclen;
1254 	}
1255 
1256 	if (fbp) {
1257 		fbrelse(fbp, S_OTHER);
1258 	}
1259 
1260 out:
1261 	rw_exit(&ufsvfsp->vfs_dqrwlock);
1262 }
1263