xref: /titanic_51/usr/src/uts/common/fs/ufs/ufs_panic.c (revision 83fcdc8cfa9b16b358b13c5dd920d71bbaf4a8b5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/errno.h>
33 #include <sys/mode.h>
34 #include <sys/sysmacros.h>
35 #include <sys/cmn_err.h>
36 #include <sys/varargs.h>
37 #include <sys/time.h>
38 #include <sys/buf.h>
39 #include <sys/kmem.h>
40 #include <sys/t_lock.h>
41 #include <sys/poll.h>
42 #include <sys/debug.h>
43 #include <sys/cred.h>
44 #include <sys/lockfs.h>
45 #include <sys/fs/ufs_fs.h>
46 #include <sys/fs/ufs_inode.h>
47 #include <sys/fs/ufs_panic.h>
48 #include <sys/fs/ufs_lockfs.h>
49 #include <sys/fs/ufs_trans.h>
50 #include <sys/fs/ufs_mount.h>
51 #include <sys/fs/ufs_prot.h>
52 #include <sys/fs/ufs_bio.h>
53 #include <sys/pathname.h>
54 #include <sys/utsname.h>
55 #include <sys/conf.h>
56 
57 /* handy */
58 #define	abs(x)		((x) < 0? -(x): (x))
59 
60 #if defined(DEBUG)
61 
62 #define	DBGLVL_NONE	0x00000000
63 #define	DBGLVL_MAJOR	0x00000100
64 #define	DBGLVL_MINOR	0x00000200
65 #define	DBGLVL_MINUTE	0x00000400
66 #define	DBGLVL_TRIVIA	0x00000800
67 #define	DBGLVL_HIDEOUS	0x00001000
68 
69 #define	DBGFLG_NONE		0x00000000
70 #define	DBGFLG_NOPANIC		0x00000001
71 #define	DBGFLG_LVLONLY		0x00000002
72 #define	DBGFLG_FIXWOULDPANIC	0x00000004
73 
74 #define	DBGFLG_FLAGMASK		0x0000000F
75 #define	DBGFLG_LEVELMASK	~DBGFLG_FLAGMASK
76 
77 #define	DEBUG_FLAGS	(ufs_fix_failure_dbg & DBGFLG_FLAGMASK)
78 #define	DEBUG_LEVEL	(ufs_fix_failure_dbg & DBGFLG_LEVELMASK)
79 
80 unsigned int ufs_fix_failure_dbg =	DBGLVL_NONE | DBGFLG_NONE;
81 
82 #define	DCALL(dbg_level, call)						\
83 	{								\
84 		if (DEBUG_LEVEL != DBGLVL_NONE) {			\
85 			if (DEBUG_FLAGS & DBGFLG_LVLONLY) {		\
86 				if (DEBUG_LEVEL & dbg_level) {		\
87 					call;				\
88 				}					\
89 			} else {					\
90 				if (dbg_level <= DEBUG_LEVEL) {		\
91 					call;				\
92 				}					\
93 			}						\
94 		}							\
95 	}
96 
97 #define	DPRINTF(dbg_level, msg)		DCALL(dbg_level, printf msg)
98 
99 #define	MAJOR(msg)			DPRINTF(DBGLVL_MAJOR, msg)
100 #define	MINOR(msg)			DPRINTF(DBGLVL_MINOR, msg)
101 #define	MINUTE(msg)			DPRINTF(DBGLVL_MINUTE, msg)
102 #define	TRIVIA(msg)			DPRINTF(DBGLVL_TRIVIA, msg)
103 #define	HIDEOUS(msg)			DPRINTF(DBGLVL_HIDEOUS, msg)
104 
105 #else	/* !DEBUG */
106 
107 #define	DCALL(ignored_dbg_level, ignored_routine)
108 #define	MAJOR(ignored)
109 #define	MINOR(ignored)
110 #define	MINUTE(ignored)
111 #define	TRIVIA(ignored)
112 #define	HIDEOUS(ignored)
113 
114 #endif /* DEBUG */
115 
116 #define	NULLSTR(str)	(!(str) || *(str) == '\0'? "<null>" : (str))
117 #define	NULSTRING	""
118 
119 /* somewhat arbitrary limits, in seconds */
120 /* all probably ought to be different, but these are convenient for debugging */
121 const time_t	UF_TOO_LONG		= 128;	/* max. wait for fsck start */
122 
123 /* all of these are in units of seconds used for retry period while ... */
124 const time_t	UF_FIXSTART_PERIOD	= 16;	/* awaiting fsck start */
125 const time_t	UF_FIXPOLL_PERIOD	= 256;	/* awaiting fsck finish */
126 const time_t	UF_SHORT_ERROR_PERIOD	= 4;	/* after (lockfs) error */
127 const time_t	UF_LONG_ERROR_PERIOD	= 512;	/* after (lockfs) error */
128 
129 #define	NO_ERROR		0
130 #define	LOCKFS_OLOCK		LOCKFS_MAXLOCK+1
131 
132 const ulong_t	GB			= 1024 * 1024 * 1024;
133 const ulong_t	SecondsPerGig		= 1024;	/* ~17 minutes (overestimate) */
134 
135 /*
136  * per filesystem flags
137  */
138 const int	UFSFX_PANIC		= (UFSMNT_ONERROR_PANIC >> 4);
139 const int	UFSFX_LCKONLY		= (UFSMNT_ONERROR_LOCK >> 4);
140 const int	UFSFX_LCKUMOUNT		= (UFSMNT_ONERROR_UMOUNT >> 4);
141 const int	UFSFX_DEFAULT		= (UFSMNT_ONERROR_DEFAULT >> 4);
142 const int	UFSFX_REPAIR_START	= 0x10000000;
143 
144 /* return protocols */
145 
146 typedef enum triage_return_code {
147 	TRIAGE_DEAD = -1,
148 	TRIAGE_NO_SPIRIT,
149 	TRIAGE_ATTEND_TO
150 } triage_t;
151 
152 typedef enum statefunc_return_code {
153 	SFRC_SUCCESS = 1,
154 	SFRC_FAIL = 0
155 } sfrc_t;
156 
157 /* external references */
158 /* in ufs_thread.c */
159 extern int	ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop);
160 extern int	ufs_checkaccton(vnode_t *);		/* in ufs_lockfs.c */
161 extern int	ufs_checkswapon(vnode_t *);		/* in ufs_lockfs.c */
162 
163 extern struct pollhead		ufs_pollhd;		/* in ufs_vnops.c */
164 
165 /* globals */
166 struct	ufs_q	 ufs_fix;
167 
168 /*
169  * patchable constants:
170  *   These are set in ufsfx_init() [called at modload]
171  */
172 struct ufs_failure_tunable {
173 	long	 uft_too_long;		/* limit repair startup time */
174 	long	 uft_fixstart_period;	/* pre-repair start period */
175 	long	 uft_fixpoll_period;	/* post-fsck start period */
176 	long	 uft_short_err_period;	/* post-error short period */
177 	long	 uft_long_err_period;	/* post-error long period */
178 } ufsfx_tune;
179 
180 /* internal statistics of events */
181 struct uf_statistics {
182 	ulong_t		ufst_lock_violations;
183 	ulong_t		ufst_current_races;
184 	ulong_t		ufst_unmount_failures;
185 	ulong_t		ufst_num_fixed;
186 	ulong_t		ufst_num_failed;
187 	ulong_t		ufst_cpu_waste;
188 	time_t		ufst_last_start_tm;
189 	kmutex_t	ufst_mutex;
190 } uf_stats;
191 
192 typedef enum state_action {
193 	UFA_ERROR = -1,		/* internal error */
194 	UFA_FOUND,		/* found uf in state */
195 	UFA_SET			/* change uf to state */
196 } ufsa_t;
197 
198 /* state definition */
199 typedef struct uf_state_desc {
200 	int	  ud_v;					/* value */
201 	char	 *ud_name;				/* name */
202 	sfrc_t	(*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
203 							/* per-state actions */
204 	ufs_failure_states_t	  ud_prev;		/* valid prev. states */
205 
206 	struct uf_state_desc_attr {
207 		unsigned	terminal:1;	/* no action req. if found */
208 		unsigned	at_fail:1;	/* state set by thread */
209 						/* encountering the error */
210 		unsigned	unused;
211 	} ud_attr;
212 } ufsd_t;
213 
214 /*
215  * forward references
216  */
217 
218 /* thread to watch for failures */
219 static void	ufsfx_thread_fix_failures(void *);
220 static int 	ufsfx_do_failure_q(void);
221 static void	ufsfx_kill_fix_failure_thread(void *);
222 
223 /* routines called when failure occurs */
224 static int		 ufs_fault_v(vnode_t *, char *, va_list)
225 	__KVPRINTFLIKE(2);
226 static ufs_failure_t	*init_failure(vnode_t *, char *, va_list)
227 	__KVPRINTFLIKE(2);
228 static void		 queue_failure(ufs_failure_t *);
229 /*PRINTFLIKE2*/
230 static void		 real_panic(ufs_failure_t *, const char *, ...)
231 	__KPRINTFLIKE(2);
232 static void		 real_panic_v(ufs_failure_t *, const char *, va_list)
233 	__KVPRINTFLIKE(2);
234 static triage_t		 triage(vnode_t *);
235 
236 /* routines called when failure record is acted upon */
237 static sfrc_t	set_state(ufs_failure_t *, ufs_failure_states_t);
238 static int	state_trans_valid(ufs_failure_states_t, ufs_failure_states_t);
239 static int	terminal_state(ufs_failure_states_t);
240 
241 /* routines called when states entered/found */
242 static sfrc_t	sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
243 static sfrc_t	sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
244 static sfrc_t	sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
245 static sfrc_t	sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
246 static sfrc_t	sf_found_queue(ufs_failure_t *);
247 static sfrc_t	sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
248 static sfrc_t	sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
249 static sfrc_t	sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
250 static sfrc_t	sf_set_trylck(ufs_failure_t *);
251 static sfrc_t	sf_set_locked(ufs_failure_t *);
252 static sfrc_t	sf_found_trylck(ufs_failure_t *);
253 static sfrc_t	sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t);
254 static sfrc_t	sf_found_umount(ufs_failure_t *);
255 
256 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */
257 static time_t 	trylock_time_exceeded(ufs_failure_t *);
258 static void 	pester_msg(ufs_failure_t *, int);
259 static int 	get_lockfs_status(ufs_failure_t *, struct lockfs *);
260 static void 	alloc_lockfs_comment(ufs_failure_t *, struct lockfs *);
261 static int 	set_lockfs(ufs_failure_t *, struct lockfs *);
262 static int 	lockfs_failure(ufs_failure_t *);
263 static int 	lockfs_success(ufs_failure_t *);
264 static int	fsck_active(ufs_failure_t *);
265 
266 /* low-level support routines */
267 static ufsd_t	*get_state_desc(ufs_failure_states_t);
268 static char	*fs_name(ufs_failure_t *);
269 
270 #if defined(DEBUG)
271 static char	*state_name(ufs_failure_states_t);
272 static char	*lock_name(struct lockfs *);
273 static char	*err_name(int);
274 static char	*act_name(ufsa_t);
275 static void	 dump_uf_list(char *msg);
276 static void	 dump_uf(ufs_failure_t *, int i);
277 #endif /* DEBUG */
278 /*
279  *
280  * State Transitions:
281  *
282  * normally:
283  * if flagged to be locked but not unmounted:	(UFSMNT_ONERROR_LOCK)
284  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED
285  *
286  * The only difference between these two is that the fsck must be started
287  * manually.
288  *
289  * if flagged to be unmounted:			(UFSMNT_ONERROR_UMOUNT)
290  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX
291  *
292  * if flagged to panic:				(UFSMNT_ONERROR_PANIC)
293  *	UNDEF -> INIT -> PANIC
294  *
295  * if a secondary panic on a file system which has an active failure
296  * record:
297  *	UNDEF -> INIT -> QUEUE -> REPLICA
298  *
299  * UNDEF, INIT, QUEUE all are set in the context of the failing thread.
300  * All other states (except possibly PANIC) are set in by the monitor
301  * (lock) thread.
302  *
303  */
304 
305 ufsd_t	state_desc[] =
306 {
307 	{ UF_ILLEGAL,	"in an unknown state",	sf_minimum,	UF_ILLEGAL,
308 								{ 0, 1, 0 } },
309 	{ UF_UNDEF,	"undefined",		sf_undef,	UF_UNDEF,
310 								{ 0, 1, 0 } },
311 	{ UF_INIT,	"being initialized",	sf_init,	UF_UNDEF,
312 								{ 0, 1, 0 } },
313 	{ UF_QUEUE,	"queued",		sf_queue,	UF_INIT,
314 								{ 0, 1, 0 } },
315 	{ UF_TRYLCK,	"trying to be locked",	sf_nonterm_cmn,
316 						UF_QUEUE,	{ 0, 0, 0 } },
317 	{ UF_LOCKED,	"locked",		sf_nonterm_cmn,
318 					UF_TRYLCK | UF_FIXING,	{ 0, 0, 0 } },
319 	{ UF_UMOUNT,	"being unmounted",	sf_nonterm_cmn,
320 
321 #if defined(DEBUG)
322 					UF_PANIC |
323 #endif /* DEBUG */
324 					UF_TRYLCK | UF_LOCKED, 	{ 0, 0, 0 } },
325 	{ UF_FIXING,	"being fixed",		sf_nonterm_cmn,
326 						UF_LOCKED,	{ 0, 0, 0 } },
327 	{ UF_FIXED,	"fixed",		sf_term_cmn,
328 						UF_FIXING,	{ 1, 0, 0 } },
329 	{ UF_NOTFIX,	"not fixed",		sf_term_cmn,
330 
331 #if defined(DEBUG)
332 							UF_PANIC |
333 #endif /* DEBUG */
334 
335 	    UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING,
336 								{ 1, 0, 0 } },
337 	{ UF_REPLICA,	"a replica",		sf_term_cmn,
338 						UF_QUEUE,	{ 1, 0, 0 } },
339 	{ UF_PANIC,	"panicking",		sf_panic,
340 		/* XXX make this narrower */	UF_ALLSTATES,	{ 0, 0, 0 } },
341 	{ UF_UNDEF,	NULL,			((sfrc_t (*)()) NULL),
342 						UF_UNDEF, 	{ 0, 0, 0 } }
343 };
344 
345 /* unified collection */
346 struct ufsfx_info {
347 	struct uf_statistics		*ufi_statp;
348 	struct ufs_failure_tunable	*ufi_tunep;
349 	ufsd_t				*ufi_statetab;
350 } uffsinfo;
351 
352 #if defined(DEBUG)
353 struct action_description {
354 	ufsa_t	 ad_v;
355 	char	*ad_name;
356 };
357 
358 #define	EUNK		(-1)
359 
360 struct error_description {
361 	int	 ed_errno;
362 	char	*ed_name;
363 } err_desc[] =
364 {
365 	{ EUNK,		"<unexpected errno?>"	},
366 	{ EINVAL,	"EINVAL"		},
367 	{ EACCES,	"EACCES"		},
368 	{ EPERM,	"EPERM"			},
369 	{ EIO,		"EIO"			},
370 	{ EDEADLK,	"EDEADLK"		},
371 	{ EBUSY,	"EBUSY"			},
372 	{ EAGAIN,	"EAGAIN"		},
373 	{ ERESTART,	"ERESTART"		},
374 	{ ETIMEDOUT,	"ETIMEDOUT"		},
375 	{ NO_ERROR,	"Ok"			},
376 	{ EUNK,		NULL 			}
377 };
378 
379 struct action_description act_desc[] =
380 {
381 	{ UFA_ERROR,	"<unexpected action?>"	},
382 	{ UFA_FOUND,	"\"found\""	},
383 	{ UFA_SET,	"\"set\""	},
384 	{ UFA_ERROR,	NULL			},
385 };
386 
387 #define	LOCKFS_BADLOCK	(-1)
388 
389 struct lock_description {
390 	int	 ld_type;
391 	char	*ld_name;
392 } lock_desc[] =
393 {
394 	{ LOCKFS_BADLOCK,	"<unexpected lock?>"	},
395 	{ LOCKFS_ULOCK,		"Unlock"		},
396 	{ LOCKFS_ELOCK,		"Error Lock"		},
397 	{ LOCKFS_HLOCK,		"Hard Lock"		},
398 	{ LOCKFS_OLOCK,		"Old Lock"		},
399 	{ LOCKFS_BADLOCK,	NULL			}
400 };
401 
402 #endif /* DEBUG */
403 
404 /*
405  * ufs_fault, ufs_fault_v
406  *
407  *  called instead of cmn_err(CE_PANIC, ...) by ufs routines
408  *  when a failure is detected to put the file system into an
409  *  error state (if possible) or to devolve to a panic otherwise
410  *
411  * vnode is some vnode in this file system, used to find the way
412  * to ufsvfs, vfsp etc.  Since a panic can be called from many
413  * levels, the vnode is the most convenient hook to pass through.
414  *
415  */
416 
417 /*PRINTFLIKE2*/
418 int
419 ufs_fault(vnode_t *vp, char *fmt, ...)
420 {
421 	va_list	adx;
422 	int	error;
423 
424 	MINOR(("[ufs_fault"));
425 
426 	va_start(adx, fmt);
427 	error = ufs_fault_v(vp, fmt, adx);
428 	va_end(adx);
429 
430 	MINOR((": %s (%d)]\n", err_name(error), error));
431 	return (error);
432 }
433 
434 const char *nullfmt = "<null format?>";
435 
436 static int
437 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx)
438 {
439 	ufs_failure_t		*new = NULL;
440 	ufsvfs_t		*ufsvfsp;
441 	triage_t		 fix;
442 	int			 err = ERESTART;
443 	int			need_vfslock;
444 
445 	MINOR(("[ufs_fault_v"));
446 
447 	if (fmt == NULL)
448 		fmt = (char *)nullfmt;
449 
450 	fix = triage(vp);
451 
452 	if (vp) {
453 		ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data;
454 
455 		/*
456 		 * Something bad has happened. That is why we are here.
457 		 *
458 		 * In order for the bad thing to be recorded in the superblock
459 		 * we need to write to the superblock directly.
460 		 * In the case that logging is enabled the logging code
461 		 * would normally intercept our write as a delta to the log,
462 		 * thus we mark the filesystem FSBAD in any case.
463 		 */
464 		need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
465 
466 		if (need_vfslock) {
467 			mutex_enter(&ufsvfsp->vfs_lock);
468 		}
469 
470 		ufsvfsp->vfs_fs->fs_clean = FSBAD;
471 		ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem));
472 		ufsvfsp->vfs_bufp->b_flags &= ~(B_ASYNC | B_READ |
473 				B_DONE | B_ERROR | B_DELWRI);
474 
475 		(void) bdev_strategy(ufsvfsp->vfs_bufp);
476 		(void) biowait(ufsvfsp->vfs_bufp);
477 
478 		if (need_vfslock) {
479 			mutex_exit(&ufsvfsp->vfs_lock);
480 		}
481 	}
482 
483 	switch (fix) {
484 
485 	default:
486 	case TRIAGE_DEAD:
487 	case TRIAGE_NO_SPIRIT:
488 
489 		real_panic_v(new, fmt, adx);
490 		/* LINTED: warning: logical expression always true: op "||" */
491 		ASSERT(DEBUG);
492 		err = EAGAIN;
493 
494 #if defined(DEBUG)
495 		if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) {
496 			break;
497 		}
498 		/* FALLTHROUGH */
499 
500 #else
501 		break;
502 
503 #endif /* DEBUG */
504 
505 	case TRIAGE_ATTEND_TO:
506 
507 		/* q thread not running yet? */
508 		mutex_enter(&ufs_fix.uq_mutex);
509 		if (!ufs_fix.uq_threadp) {
510 			mutex_exit(&ufs_fix.uq_mutex);
511 			ufs_thread_start(&ufs_fix, ufsfx_thread_fix_failures,
512 								    NULL);
513 			ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK;
514 			mutex_enter(&ufs_fix.uq_mutex);
515 		} else {
516 			MINOR((": fix failure thread already running "));
517 		}
518 
519 		if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) {
520 			mutex_exit(&ufs_fix.uq_mutex);
521 			cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault");
522 		} else {
523 			mutex_exit(&ufs_fix.uq_mutex);
524 		}
525 
526 		new = init_failure(vp, fmt, adx);
527 		if (new != NULL) {
528 			queue_failure(new);
529 			break;
530 		}
531 		real_panic_v(new, fmt, adx);
532 		break;
533 
534 	}
535 	MINOR(("] "));
536 	return (err);
537 }
538 
539 /*
540  * triage()
541  *
542  *  Attempt to fix iff:
543  *    - the system is not already panicking
544  *    - this file system isn't explicitly marked not to be fixed
545  *    - we can connect to the user-level daemon
546  * These conditions are detectable later, but if we can determine
547  * them in the failing threads context the core dump may be more
548  * useful.
549  *
550  */
551 
552 static triage_t
553 triage(vnode_t *vp)
554 {
555 	struct inode	 *ip;
556 	int		  need_unlock_vfs;
557 	int		  fs_flags;
558 
559 	MINUTE(("[triage"));
560 
561 	if (panicstr) {
562 		MINUTE((
563 		": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr));
564 		return (TRIAGE_DEAD);
565 	}
566 
567 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) {
568 		MINUTE((
569 	": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n"));
570 		return (TRIAGE_DEAD);
571 	}
572 
573 	/* use tryenter and continue no matter what since we're panicky */
574 	need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
575 	if (need_unlock_vfs)
576 		need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock);
577 
578 	fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags;
579 	if (need_unlock_vfs)
580 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
581 
582 	if (fs_flags & UFSFX_PANIC) {
583 		MINUTE((
584 		": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n"));
585 		return (TRIAGE_NO_SPIRIT);
586 	}
587 
588 	if (ufs_checkaccton(vp) != 0) {
589 		MINUTE((
590 		": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n"));
591 		return (TRIAGE_DEAD);
592 	}
593 
594 	if (ufs_checkswapon(vp) != 0) {
595 		MINUTE((
596 		": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n"));
597 		return (TRIAGE_DEAD);
598 	}
599 
600 	MINUTE((": return TRIAGE_ATTEND_TO] "));
601 	return (TRIAGE_ATTEND_TO);
602 }
603 
604 /*
605  * init failure
606  *
607  * This routine allocates a failure struct and initializes
608  * it's member elements.
609  * Space is allocated for copies of dynamic identifying fs structures
610  * passed in.  Without a much more segmented kernel architecture
611  * this is as protected as we can make it (for now.)
612  */
613 static ufs_failure_t *
614 init_failure(vnode_t *vp, char *fmt, va_list adx)
615 {
616 	ufs_failure_t	*new;
617 	struct inode	*ip;
618 	int		 initialization_worked = 0;
619 	int		 need_vfs_unlock;
620 
621 	MINOR(("[init_failure"));
622 
623 	new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP);
624 	if (!new) {
625 		MINOR((": kmem_zalloc failed]\n"));
626 		return (NULL);
627 	}
628 
629 	/*
630 	 * enough information to make a fix attempt possible?
631 	 */
632 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp ||
633 	    !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt)
634 		goto errout;
635 
636 	if (vp->v_type != VREG && vp->v_type != VDIR &&
637 	    vp->v_type != VBLK && vp->v_type != VCHR &&
638 	    vp->v_type != VLNK && vp->v_type != VFIFO &&
639 	    vp->v_type != VSOCK)
640 		goto errout;
641 
642 	if (ip->i_ufsvfs->vfs_root->v_type != VREG &&
643 	    ip->i_ufsvfs->vfs_root->v_type != VDIR &&
644 	    ip->i_ufsvfs->vfs_root->v_type != VBLK &&
645 	    ip->i_ufsvfs->vfs_root->v_type != VCHR &&
646 	    ip->i_ufsvfs->vfs_root->v_type != VLNK &&
647 	    ip->i_ufsvfs->vfs_root->v_type != VFIFO &&
648 	    ip->i_ufsvfs->vfs_root->v_type != VSOCK)
649 		goto errout;
650 
651 	if ((ITOF(ip)->fs_magic != FS_MAGIC) &&
652 	    (ITOF(ip)->fs_magic != MTB_UFS_MAGIC))
653 		goto errout;
654 
655 	/* intialize values */
656 
657 	(void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx);
658 
659 	new->uf_ufsvfsp = ip->i_ufsvfs;
660 	new->uf_vfsp    = ip->i_vfs;
661 
662 	mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL);
663 	need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
664 
665 	if (need_vfs_unlock) {
666 		if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) {
667 			/*
668 			 * not much alternative here, but we're panicking
669 			 * already, it couldn't be worse - so just
670 			 * proceed optimistically and take note.
671 			 */
672 			mutex_enter(&uf_stats.ufst_mutex);
673 			uf_stats.ufst_lock_violations++;
674 			mutex_exit(&uf_stats.ufst_mutex);
675 			MINOR((": couldn't get vfs lock"))
676 			need_vfs_unlock = 0;
677 		}
678 	}
679 
680 	if (mutex_tryenter(&new->uf_mutex)) {
681 		initialization_worked = set_state(new, UF_INIT);
682 		mutex_exit(&new->uf_mutex);
683 	}
684 
685 	if (need_vfs_unlock)
686 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
687 
688 	if (initialization_worked) {
689 		MINOR(("] "));
690 		return (new);
691 	}
692 	/* FALLTHROUGH */
693 
694 errout:
695 	if (new)
696 		kmem_free(new, sizeof (ufs_failure_t));
697 	MINOR((": failed]\n"));
698 	return (NULL);
699 }
700 
701 static void
702 queue_failure(ufs_failure_t *new)
703 {
704 	MINOR(("[queue_failure"));
705 
706 	mutex_enter(&ufs_fix.uq_mutex);
707 
708 	if (ufs_fix.uq_ufhead)
709 		insque(new, &ufs_fix.uq_ufhead);
710 	else
711 		ufs_fix.uq_ufhead = new;
712 
713 	if (mutex_tryenter(&new->uf_mutex)) {
714 		(void) set_state(new, UF_QUEUE);
715 		mutex_exit(&new->uf_mutex);
716 	}
717 
718 	mutex_enter(&uf_stats.ufst_mutex);		/* force wakeup */
719 	ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed;
720 	mutex_exit(&uf_stats.ufst_mutex);
721 
722 	cv_broadcast(&ufs_fix.uq_cv);
723 
724 	DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str?
725 					new->uf_panic_str:
726 					"queue_failure: NULL panic str?"));
727 	mutex_exit(&ufs_fix.uq_mutex);
728 
729 	MINOR(("] "));
730 }
731 
732 /*PRINTFLIKE2*/
733 static void
734 real_panic(ufs_failure_t *f, const char *fmt, ...)
735 {
736 	va_list	adx;
737 
738 	MINUTE(("[real_panic "));
739 
740 	va_start(adx, fmt);
741 	real_panic_v(f, fmt, adx);
742 	va_end(adx);
743 
744 	MINUTE((": return?!]\n"));
745 }
746 
747 static void
748 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx)
749 {
750 	int seriousness = CE_PANIC;
751 	int need_unlock;
752 
753 	MINUTE(("[real_panic_v "));
754 
755 	if (f && f->uf_ufsvfsp)
756 		TRANS_SETERROR(f->uf_ufsvfsp);
757 
758 #if defined(DEBUG)
759 	if (DEBUG_FLAGS & DBGFLG_NOPANIC) {
760 		seriousness = CE_WARN;
761 		cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n");
762 	}
763 #endif /* DEBUG */
764 
765 	delay(hz >> 1);			/* allow previous warnings to get out */
766 
767 	if (!f && fmt)
768 		vcmn_err(seriousness, fmt, adx);
769 	else
770 		cmn_err(seriousness, f && f->uf_panic_str? f->uf_panic_str:
771 		    "real_panic: <unknown panic?>");
772 
773 	if (f) {
774 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
775 		if (need_unlock) {
776 			mutex_enter(&f->uf_mutex);
777 		}
778 
779 		f->uf_retry = -1;
780 		(void) set_state(f, UF_PANIC);
781 
782 		if (need_unlock) {
783 			mutex_exit(&f->uf_mutex);
784 		}
785 	}
786 	MINUTE((": return?!]\n"));
787 }
788 
789 /*
790  * initializes ufs panic structs, locks, etc
791  */
792 void
793 ufsfx_init(void)
794 {
795 
796 	MINUTE(("[ufsfx_init"));
797 
798 	/* patchable; unchanged while running, so no lock is needed */
799 	ufsfx_tune.uft_too_long		= UF_TOO_LONG;
800 	ufsfx_tune.uft_fixstart_period	= UF_FIXSTART_PERIOD;
801 	ufsfx_tune.uft_fixpoll_period	= UF_FIXPOLL_PERIOD;
802 	ufsfx_tune.uft_short_err_period	= UF_SHORT_ERROR_PERIOD;
803 	ufsfx_tune.uft_long_err_period	= UF_LONG_ERROR_PERIOD;
804 
805 	uffsinfo.ufi_statp	= &uf_stats;
806 	uffsinfo.ufi_tunep	= &ufsfx_tune;
807 	uffsinfo.ufi_statetab	= &state_desc[0];
808 
809 	mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL);
810 	ufs_thread_init(&ufs_fix, /* maxne */ 1);
811 
812 	MINUTE(("] "));
813 }
814 
815 /*
816  * initializes per-ufs values
817  * returns 0 (ok) or errno
818  */
819 int
820 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags)
821 {
822 	MINUTE(("[ufsfx_mount (%d)", flags));
823 	/* don't check/need vfs_lock because it's still being initialized */
824 
825 	ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4;
826 
827 	MINUTE((": %s: fx_flags:%ld,",
828 		ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags));
829 	/*
830 	 *	onerror={panic ^ lock only ^ unmount}
831 	 */
832 
833 	if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) {
834 		MINUTE((" PANIC"));
835 
836 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) {
837 		MINUTE((" LCKONLY"));
838 
839 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) {
840 		MINUTE((" LCKUMOUNT"));
841 
842 	} else {
843 		ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT;
844 		ASSERT(ufsvfsp->vfs_fsfx.fx_flags &
845 						(UFSMNT_ONERROR_FLGMASK >> 4));
846 		MINUTE((" DEFAULT"));
847 	}
848 
849 	pollwakeup(&ufs_pollhd, POLLPRI);
850 	MINUTE(("]\n"));
851 	return (0);
852 }
853 
854 /*
855  * ufsfx_unmount
856  *
857  * called during unmount
858  */
859 void
860 ufsfx_unmount(struct ufsvfs *ufsvfsp)
861 {
862 	ufs_failure_t	*f;
863 	int		 must_unlock_list;
864 
865 	MINUTE(("[ufsfx_unmount"));
866 
867 	if (!ufsvfsp) {
868 		MINUTE((": no ufsvfsp]"));
869 		return;
870 	}
871 
872 	if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0)
873 		mutex_enter(&ufs_fix.uq_mutex);
874 
875 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
876 		int must_unlock_failure;
877 
878 		must_unlock_failure = !MUTEX_HELD(&f->uf_mutex);
879 		if (must_unlock_failure) {
880 			mutex_enter(&f->uf_mutex);
881 		}
882 
883 		if (f->uf_ufsvfsp == ufsvfsp) {
884 
885 			/*
886 			 * if we owned the failure record lock, then this
887 			 * is probably a fix failure-triggered unmount, so
888 			 * the warning is not appropriate or needed
889 			 */
890 
891 			/* XXX if rebooting don't print this? */
892 			if (!terminal_state(f->uf_s) && must_unlock_failure) {
893 				cmn_err(CE_WARN,
894 					"Unmounting %s while error-locked",
895 					fs_name(f));
896 			}
897 
898 			f->uf_ufsvfsp		= NULL;
899 			f->uf_vfs_ufsfxp	= NULL;
900 			f->uf_vfs_lockp		= NULL;
901 			f->uf_bp		= NULL;
902 			f->uf_vfsp		= NULL;
903 			f->uf_retry		= -1;
904 		}
905 
906 		if (must_unlock_failure)
907 			mutex_exit(&f->uf_mutex);
908 	}
909 	if (must_unlock_list)
910 		mutex_exit(&ufs_fix.uq_mutex);
911 
912 	pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP);
913 	MINUTE(("] "));
914 }
915 
916 /*
917  * ufsfx_(un)lockfs
918  *
919  * provides hook from lockfs code so we can recognize unlock/relock
920  *  This is called after it is certain that the (un)lock will succeed.
921  */
922 void
923 ufsfx_unlockfs(struct ufsvfs *ufsvfsp)
924 {
925 	ufs_failure_t	*f;
926 	int		 need_unlock;
927 	int		 need_unlock_list;
928 	int		 informed = 0;
929 
930 	MINUTE(("[ufsfx_unlockfs"));
931 
932 	if (!ufsvfsp)
933 		return;
934 
935 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
936 
937 	if (need_unlock_list)
938 		mutex_enter(&ufs_fix.uq_mutex);
939 
940 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
941 
942 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
943 		if (need_unlock)
944 			mutex_enter(&f->uf_mutex);
945 
946 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) {
947 			if (!(f->uf_s & UF_FIXING)) {
948 				/*
949 				 * This might happen if we don't notice that
950 				 * the fs gets marked FSFIX before it is
951 				 * marked FSCLEAN, as might occur if the
952 				 * the superblock was hammered directly.
953 				 */
954 				if (!informed) {
955 					informed = 1;
956 					cmn_err(CE_NOTE,
957 		    "Unlock of %s succeeded before fs_clean marked FSFIX?",
958 							    fs_name(f));
959 				}
960 
961 				/*
962 				 * pass through fixing state so
963 				 * transition protocol is satisfied
964 				 */
965 				if (!set_state(f, UF_FIXING)) {
966 					MINUTE((": failed] "));
967 				}
968 			}
969 
970 			if (!set_state(f, UF_FIXED)) {
971 				/* it's already fixed, so don't panic now */
972 				MINUTE((": failed] "));
973 			}
974 		}
975 
976 		if (need_unlock)
977 			mutex_exit(&f->uf_mutex);
978 	}
979 	if (need_unlock_list)
980 		mutex_exit(&ufs_fix.uq_mutex);
981 	MINUTE(("] "));
982 }
983 
984 void
985 ufsfx_lockfs(struct ufsvfs *ufsvfsp)
986 {
987 	ufs_failure_t	*f;
988 	int		 need_unlock;
989 	int		 need_unlock_list;
990 
991 	MINUTE(("[ufsfx_lockfs"));
992 
993 	if (!ufsvfsp)
994 		return;
995 
996 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
997 
998 	if (need_unlock_list)
999 		mutex_enter(&ufs_fix.uq_mutex);
1000 
1001 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1002 
1003 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
1004 		if (need_unlock)
1005 			mutex_enter(&f->uf_mutex);
1006 
1007 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) &&
1008 		    f->uf_s != UF_PANIC) {
1009 			switch (f->uf_s) {
1010 
1011 			default:
1012 				cmn_err(CE_WARN,
1013 			"fs %s not in state UF_TRYLCK, UF_LOCKED or UF_FIXING",
1014 								fs_name(f));
1015 				break;
1016 
1017 			case UF_TRYLCK:
1018 				if (!set_state(f, UF_LOCKED)) {
1019 					MINUTE((": failed] "));
1020 				}
1021 				break;
1022 
1023 			case UF_LOCKED:
1024 				if (!set_state(f, UF_FIXING)) {
1025 					MINUTE((": failed] "));
1026 				}
1027 				break;
1028 
1029 			case UF_FIXING:
1030 				break;
1031 
1032 			}
1033 		}
1034 
1035 		if (need_unlock)
1036 			mutex_exit(&f->uf_mutex);
1037 	}
1038 	if (need_unlock_list)
1039 		mutex_exit(&ufs_fix.uq_mutex);
1040 
1041 	MINUTE(("] "));
1042 }
1043 
1044 /*
1045  * error lock, trigger fsck and unlock those fs with failures
1046  * blatantly copied from the hlock routine, although this routine
1047  * triggers differently in order to use uq_ne as meaningful data.
1048  */
1049 /* ARGSUSED */
1050 void
1051 ufsfx_thread_fix_failures(void *ignored)
1052 {
1053 	int		retry;
1054 	callb_cpr_t	cprinfo;
1055 
1056 	CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr,
1057 	    "ufsfixfail");
1058 
1059 	MINUTE(("[ufsfx_thread_fix_failures] "));
1060 
1061 	for (;;) {
1062 		/* sleep until there is work to do */
1063 
1064 		mutex_enter(&ufs_fix.uq_mutex);
1065 		(void) ufs_thread_run(&ufs_fix, &cprinfo);
1066 		ufs_fix.uq_ne = 0;
1067 		mutex_exit(&ufs_fix.uq_mutex);
1068 
1069 		/* process failures on our q */
1070 		do {
1071 			retry = ufsfx_do_failure_q();
1072 			if (retry) {
1073 				mutex_enter(&ufs_fix.uq_mutex);
1074 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
1075 				(void) cv_timedwait(&ufs_fix.uq_cv,
1076 							&ufs_fix.uq_mutex,
1077 							lbolt + (hz * retry));
1078 				CALLB_CPR_SAFE_END(&cprinfo,
1079 				    &ufs_fix.uq_mutex);
1080 				mutex_exit(&ufs_fix.uq_mutex);
1081 			}
1082 		} while (retry);
1083 	}
1084 	/* NOTREACHED */
1085 }
1086 
1087 
1088 /*
1089  * watch for fix-on-panic work
1090  *
1091  * returns # of seconds to sleep before trying again
1092  * and zero if no retry is needed
1093  */
1094 
1095 int
1096 ufsfx_do_failure_q(void)
1097 {
1098 	ufs_failure_t	*f;
1099 	long		 retry = 1;
1100 	ufsd_t		*s;
1101 
1102 	MAJOR(("[ufsfx_do_failure_q"));
1103 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1104 
1105 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
1106 		return (retry);
1107 
1108 	retry = 0;
1109 rescan_q:
1110 
1111 	/*
1112 	 * walk down failure list
1113 	 *  depending on state of each failure, do whatever
1114 	 *  is appropriate to move it to the next state
1115 	 *  taking note of whether retry gets set
1116 	 *
1117 	 * retry protocol:
1118 	 * wakeup in shortest required time for any failure
1119 	 *   retry == 0; nothing more to do (terminal state)
1120 	 *   retry < 0; reprocess queue immediately, retry will
1121 	 *		be abs(retry) for the next cycle
1122 	 *   retry > 0; schedule wakeup for retry seconds
1123 	 */
1124 
1125 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1126 
1127 		if (!mutex_tryenter(&f->uf_mutex)) {
1128 			retry = 1;
1129 			continue;
1130 		}
1131 		s = get_state_desc(f->uf_s);
1132 
1133 		MINOR((": found%s: %s, \"%s: %s\"\n",
1134 			    s->ud_attr.terminal? " old": "",
1135 			    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
1136 
1137 		if (s->ud_attr.terminal) {
1138 			mutex_exit(&f->uf_mutex);
1139 			continue;
1140 		}
1141 
1142 		if (s->ud_sfp)
1143 			(*s->ud_sfp)(f, UFA_FOUND, f->uf_s);
1144 
1145 		ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0);
1146 
1147 		if (f->uf_retry != 0) {
1148 			if (retry > f->uf_retry || retry == 0)
1149 				retry = f->uf_retry;
1150 			if (f->uf_retry < 0)
1151 				f->uf_retry = abs(f->uf_retry);
1152 		}
1153 		mutex_exit(&f->uf_mutex);
1154 	}
1155 
1156 
1157 	if (retry < 0) {
1158 		retry = abs(retry);
1159 		goto rescan_q;
1160 	}
1161 
1162 	mutex_exit(&ufs_fix.uq_mutex);
1163 
1164 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1165 	MAJOR((": retry=%ld, good night]\n\n", retry));
1166 
1167 	return (retry);
1168 }
1169 
1170 static void
1171 pester_msg(ufs_failure_t *f, int seriousness)
1172 {
1173 	MINUTE(("[pester_msg"));
1174 	ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING));
1175 
1176 	/*
1177 	 * XXX if seems too long for this fs, poke administrator
1178 	 * XXX to run fsck manually (and change retry time?)
1179 	 */
1180 	cmn_err(seriousness,
1181 		"Waiting for repair of %s to %s",
1182 			    fs_name(f),
1183 			    f->uf_s & UF_LOCKED? "start": "finish");
1184 	MINUTE(("]"));
1185 }
1186 
1187 static time_t
1188 trylock_time_exceeded(ufs_failure_t *f)
1189 {
1190 	time_t		toolong;
1191 	extern time_t	time;
1192 
1193 	MINUTE(("[trylock_time_exceeded"));
1194 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1195 
1196 	toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm;
1197 	if (time > toolong)
1198 		cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f));
1199 
1200 	MINUTE(("] "));
1201 	return (time <= toolong? 0: time - toolong);
1202 }
1203 
1204 static int
1205 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp)
1206 {
1207 	MINUTE(("[get_lockfs_status"));
1208 
1209 	if (!f->uf_ufsvfsp) {
1210 		MINUTE((": ufsvfsp is NULL]\n"));
1211 		return (0);
1212 	}
1213 
1214 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1215 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1216 	ASSERT(!vfs_lock_held(f->uf_vfsp));
1217 	ASSERT(f->uf_ufsvfsp->vfs_root != NULL);
1218 
1219 	f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp);
1220 
1221 	if (f->uf_lf_err) {
1222 		f->uf_retry = ufsfx_tune.uft_short_err_period;
1223 	}
1224 
1225 	MINUTE(("] "));
1226 	return (1);
1227 }
1228 
1229 static sfrc_t
1230 set_state(ufs_failure_t *f, ufs_failure_states_t new_state)
1231 {
1232 	ufsd_t		*s;
1233 	sfrc_t		 sfrc = SFRC_FAIL;
1234 	int		 need_unlock;
1235 	extern time_t	 time;
1236 
1237 	HIDEOUS(("[set_state: new state:%s", state_name(new_state)));
1238 	ASSERT(f);
1239 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1240 
1241 	/*
1242 	 * if someone else is panicking, just let panic sync proceed
1243 	 */
1244 	if (panicstr) {
1245 		(void) set_state(f, UF_NOTFIX);
1246 		HIDEOUS((": state reset: not fixed] "));
1247 		return (sfrc);
1248 	}
1249 
1250 	/*
1251 	 * bad state transition, an internal error
1252 	 */
1253 	if (!state_trans_valid(f->uf_s, new_state)) {
1254 		/* recursion */
1255 		if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC))
1256 			(void) set_state(f, UF_PANIC);
1257 		MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ",
1258 				state_name(f->uf_s), state_name(new_state)));
1259 		return (sfrc);
1260 	}
1261 
1262 	s = get_state_desc(new_state);
1263 
1264 	need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex);
1265 	if (need_unlock)
1266 		mutex_enter(&ufs_fix.uq_mutex);
1267 
1268 	if (s->ud_attr.at_fail && ufs_fix.uq_threadp &&
1269 	    curthread == ufs_fix.uq_threadp) {
1270 		cmn_err(CE_WARN, "set_state: probable recursive panic of %s",
1271 			fs_name(f));
1272 	}
1273 	if (need_unlock)
1274 		mutex_exit(&ufs_fix.uq_mutex);
1275 
1276 	/* NULL state functions always succeed */
1277 	sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state);
1278 
1279 	if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) {
1280 		f->uf_s = new_state;
1281 		f->uf_entered_tm = time;
1282 		f->uf_counter = 0;
1283 	}
1284 
1285 	HIDEOUS(("]\n"));
1286 	return (sfrc);
1287 }
1288 
1289 static ufsd_t *
1290 get_state_desc(ufs_failure_states_t state)
1291 {
1292 	ufsd_t *s;
1293 
1294 	HIDEOUS(("[get_state_desc"));
1295 
1296 	for (s = &state_desc[1]; s->ud_name != NULL; s++) {
1297 		if (s->ud_v == state) {
1298 			HIDEOUS(("] "));
1299 			return (s);
1300 		}
1301 	}
1302 
1303 	HIDEOUS(("] "));
1304 	return (&state_desc[0]);	/* default */
1305 }
1306 
1307 static sfrc_t
1308 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1309 {
1310 	sfrc_t rc;
1311 
1312 	TRIVIA(("[sf_undef, action is %s, state is %s\n",
1313 		act_name(a), state_name(s)));
1314 	ASSERT(s == UF_UNDEF);
1315 
1316 	/* shouldn't find null failure records or ever set one */
1317 	rc = set_state(f, UF_NOTFIX);
1318 
1319 	TRIVIA(("] "));
1320 	return (rc);
1321 }
1322 
1323 
1324 static sfrc_t
1325 sf_init(
1326 	ufs_failure_t	*f,
1327 	ufsa_t	 a,
1328 	ufs_failure_states_t	 s)
1329 {
1330 	sfrc_t		rc = SFRC_FAIL;
1331 	extern time_t	time;
1332 
1333 	TRIVIA(("[sf_init, action is %s", act_name(a)));
1334 	ASSERT(s & UF_INIT);
1335 
1336 	switch (a) {
1337 	case UFA_SET:
1338 		f->uf_begin_tm = time;
1339 		f->uf_retry = 1;
1340 		if (!f->uf_ufsvfsp) {
1341 			(void) set_state(f, UF_PANIC);
1342 			TRIVIA((": NULL ufsvfsp]\n"));
1343 			return (rc);
1344 		}
1345 		/*
1346 		 * because we can call panic from many different levels,
1347 		 * we can't be sure that we've got the vfs_lock at this
1348 		 * point.  However, there's not much alternative and if
1349 		 * we don't (have the lock) the worst case is we'll just
1350 		 * panic again
1351 		 */
1352 		f->uf_vfs_lockp		= &f->uf_ufsvfsp->vfs_lock;
1353 		f->uf_vfs_ufsfxp	= &f->uf_ufsvfsp->vfs_fsfx;
1354 
1355 		if (!f->uf_ufsvfsp->vfs_bufp) {
1356 			(void) set_state(f, UF_PANIC);
1357 			TRIVIA((": NULL vfs_bufp]\n"));
1358 			return (rc);
1359 		}
1360 		f->uf_bp = f->uf_ufsvfsp->vfs_bufp;
1361 
1362 		if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) {
1363 			(void) set_state(f, UF_PANIC);
1364 			TRIVIA((": NULL vfs_fs]\n"));
1365 			return (rc);
1366 		}
1367 
1368 		/* vfs_fs = vfs_bufp->b_un.b_fs */
1369 		bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN);
1370 
1371 		f->uf_lf.lf_lock  = LOCKFS_ELOCK;	/* primer */
1372 
1373 		if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) {
1374 			(void) set_state(f, UF_PANIC);
1375 			TRIVIA((": NULL vfsp or vfs_dev == NODEV"));
1376 			return (rc);
1377 		}
1378 		f->uf_dev = f->uf_vfsp->vfs_dev;
1379 
1380 		rc = SFRC_SUCCESS;
1381 		break;
1382 
1383 	case UFA_FOUND:
1384 	default:
1385 		/* failures marked init shouldn't even be on the queue yet */
1386 		rc = set_state(f, UF_QUEUE);
1387 		TRIVIA((": found failure with state init]\n"));
1388 	}
1389 
1390 	TRIVIA(("] "));
1391 	return (rc);
1392 }
1393 
1394 static sfrc_t
1395 sf_queue(
1396 	ufs_failure_t	*f,
1397 	ufsa_t	 a,
1398 	ufs_failure_states_t	 s)
1399 {
1400 	sfrc_t		rc = SFRC_FAIL;
1401 
1402 	TRIVIA(("[sf_queue, action is %s", act_name(a)));
1403 	ASSERT(s & UF_QUEUE);
1404 
1405 	if (!f->uf_ufsvfsp) {
1406 		TRIVIA((": NULL ufsvfsp]\n"));
1407 		return (rc);
1408 	}
1409 
1410 	switch (a) {
1411 	case UFA_FOUND:
1412 		rc = sf_found_queue(f);
1413 		break;
1414 
1415 	case UFA_SET:
1416 
1417 		ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1418 
1419 		mutex_enter(&uf_stats.ufst_mutex);
1420 		uf_stats.ufst_num_failed++;
1421 		mutex_exit(&uf_stats.ufst_mutex);
1422 
1423 		/*
1424 		 * if can't get the vfs lock, just wait until
1425 		 * UF_TRYLCK to set fx_current
1426 		 */
1427 		if (mutex_tryenter(f->uf_vfs_lockp)) {
1428 			f->uf_vfs_ufsfxp->fx_current = f;
1429 			mutex_exit(f->uf_vfs_lockp);
1430 		} else {
1431 			mutex_enter(&uf_stats.ufst_mutex);
1432 			uf_stats.ufst_current_races++;
1433 			mutex_exit(&uf_stats.ufst_mutex);
1434 		}
1435 
1436 		f->uf_retry = 1;
1437 		rc = SFRC_SUCCESS;
1438 		TRIVIA(("] "));
1439 		break;
1440 
1441 	default:
1442 		(void) set_state(f, UF_PANIC);
1443 		TRIVIA((": failed] "));
1444 	}
1445 
1446 	return (rc);
1447 }
1448 
1449 static sfrc_t
1450 sf_found_queue(ufs_failure_t *f)
1451 {
1452 	int		replica;
1453 	sfrc_t		rc = SFRC_FAIL;
1454 
1455 	TRIVIA(("[sf_found_queue"));
1456 
1457 	/*
1458 	 * don't need to check for null ufsvfsp because
1459 	 * unmount must own list's ufs_fix.uq_mutex
1460 	 * to mark it null and we own that lock since
1461 	 * we got here.
1462 	 */
1463 
1464 	ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1465 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1466 
1467 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
1468 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1469 		f->uf_retry = 1;
1470 		return (rc);
1471 	}
1472 
1473 	replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL &&
1474 		    f->uf_vfs_ufsfxp->fx_current != f &&
1475 		    !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s);
1476 
1477 	/*
1478 	 * copy general flags to this ufs_failure so we don't
1479 	 * need to refer back to the ufsvfs, or, more importantly,
1480 	 * don't need to keep acquiring (trying to acquire) vfs_lockp
1481 	 *
1482 	 * The most restrictive option wins:
1483 	 *  panic > errlock only > errlock+unmount > repair
1484 	 * XXX panic > elock > elock > elock+umount
1485 	 */
1486 	if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) {
1487 		if (!set_state(f, UF_PANIC)) {
1488 			TRIVIA((": marked panic but was queued?"));
1489 			real_panic(f, " ");
1490 			/*NOTREACHED*/
1491 		}
1492 		mutex_exit(f->uf_vfs_lockp);
1493 		return (rc);
1494 	}
1495 	f->uf_flags = f->uf_vfs_ufsfxp->fx_flags;
1496 
1497 	if (replica) {
1498 		if (!set_state(f, UF_REPLICA)) {
1499 			f->uf_retry = 1;
1500 			TRIVIA((": set to replica failed] "));
1501 		} else {
1502 			TRIVIA(("] "));
1503 		}
1504 		mutex_exit(f->uf_vfs_lockp);
1505 		return (rc);
1506 	}
1507 	mutex_exit(f->uf_vfs_lockp);
1508 
1509 	if (!set_state(f, UF_TRYLCK)) {
1510 		TRIVIA((": failed] "));
1511 	} else {
1512 		rc = SFRC_SUCCESS;
1513 	}
1514 	return (rc);
1515 }
1516 
1517 static sfrc_t
1518 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1519 {
1520 	sfrc_t	rc = SFRC_FAIL;
1521 
1522 	TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s)));
1523 	ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING));
1524 	ASSERT(!terminal_state(s));
1525 
1526 	if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) {
1527 		TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n"));
1528 		(void) set_state(f, UF_NOTFIX);
1529 		return (rc);
1530 	}
1531 
1532 	switch (a) {
1533 	case UFA_SET:
1534 		switch (s) {
1535 		case UF_TRYLCK:
1536 			ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1537 			rc = sf_set_trylck(f);
1538 			break;
1539 
1540 		case UF_LOCKED:
1541 			rc = sf_set_locked(f);
1542 			break;
1543 
1544 		case UF_FIXING:
1545 			f->uf_flags |= UFSFX_REPAIR_START;
1546 			f->uf_retry  = ufsfx_tune.uft_fixpoll_period;
1547 			rc = SFRC_SUCCESS;
1548 			break;
1549 
1550 		case UF_UMOUNT:
1551 			f->uf_retry = -ufsfx_tune.uft_short_err_period;
1552 			rc = SFRC_SUCCESS;
1553 			break;
1554 
1555 		default:
1556 			(void) set_state(f, UF_PANIC);
1557 			TRIVIA((": failed] "));
1558 		}
1559 		break;
1560 
1561 	case UFA_FOUND:
1562 
1563 		switch (s) {
1564 		case UF_TRYLCK:
1565 			rc = sf_found_trylck(f);
1566 			break;
1567 
1568 		case UF_LOCKED:
1569 		case UF_FIXING:
1570 			rc = sf_found_lock_fix_cmn(f, s);
1571 			break;
1572 
1573 		case UF_UMOUNT:
1574 			rc = sf_found_umount(f);
1575 			break;
1576 
1577 		default:
1578 			(void) set_state(f, UF_PANIC);
1579 			TRIVIA((": failed] "));
1580 			break;
1581 		}
1582 		break;
1583 	default:
1584 		(void) set_state(f, UF_PANIC);
1585 		TRIVIA((": failed] "));
1586 		break;
1587 	}
1588 
1589 	TRIVIA(("] "));
1590 	return (rc);
1591 }
1592 
1593 static sfrc_t
1594 sf_set_trylck(ufs_failure_t *f)
1595 {
1596 	TRIVIA(("[sf_set_trylck"));
1597 
1598 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
1599 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1600 		f->uf_retry = 1;
1601 		return (SFRC_FAIL);
1602 	}
1603 
1604 	if (!f->uf_vfs_ufsfxp->fx_current)
1605 		f->uf_vfs_ufsfxp->fx_current = f;
1606 
1607 	mutex_exit(f->uf_vfs_lockp);
1608 
1609 	f->uf_lf.lf_flags = 0;
1610 	f->uf_lf.lf_lock  = LOCKFS_ELOCK;
1611 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1612 	TRIVIA(("] "));
1613 	return (SFRC_SUCCESS);
1614 }
1615 
1616 static sfrc_t
1617 sf_found_trylck(ufs_failure_t *f)
1618 {
1619 	struct lockfs lockfs_status;
1620 
1621 	TRIVIA(("[sf_found_trylck"));
1622 
1623 	if (trylock_time_exceeded(f) > 0) {
1624 		(void) set_state(f, UF_PANIC);
1625 		TRIVIA((": failed] "));
1626 		return (SFRC_FAIL);
1627 	}
1628 
1629 	if (!get_lockfs_status(f, &lockfs_status)) {
1630 		(void) set_state(f, UF_PANIC);
1631 		TRIVIA((": failed] "));
1632 		return (SFRC_FAIL);
1633 	}
1634 
1635 	if (f->uf_lf_err == NO_ERROR)
1636 		f->uf_lf.lf_key = lockfs_status.lf_key;
1637 
1638 	if (!set_lockfs(f, &lockfs_status)) {
1639 		(void) set_state(f, UF_PANIC);
1640 		TRIVIA((": failed] "));
1641 		return (SFRC_FAIL);
1642 	}
1643 	TRIVIA(("] "));
1644 	return (SFRC_SUCCESS);
1645 }
1646 
1647 static sfrc_t
1648 sf_set_locked(ufs_failure_t *f)
1649 {
1650 	TRIVIA(("[sf_set_locked"));
1651 
1652 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1653 
1654 #if defined(DEBUG)
1655 	if (f->uf_flags & UFSFX_REPAIR_START)
1656 		TRIVIA(("clearing UFSFX_REPAIR_START "));
1657 #endif /* DEBUG */
1658 
1659 	f->uf_flags &= ~UFSFX_REPAIR_START;
1660 
1661 	if (f->uf_s & UF_TRYLCK) {
1662 		cmn_err(CE_WARN, "Error-locked %s: \"%s\"",
1663 				    fs_name(f), f->uf_panic_str);
1664 
1665 		if (f->uf_flags & UFSFX_LCKONLY)
1666 			cmn_err(CE_WARN, "Manual repair of %s required",
1667 								fs_name(f));
1668 	}
1669 
1670 	/*
1671 	 * just reset to current state
1672 	 */
1673 #if defined(DEBUG)
1674 	TRIVIA(("locked->locked "));
1675 #endif /* DEBUG */
1676 
1677 	TRIVIA(("] "));
1678 	return (SFRC_SUCCESS);
1679 }
1680 
1681 static sfrc_t
1682 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s)
1683 {
1684 	time_t		toolong;
1685 	extern time_t	time;
1686 	struct buf	*bp			= NULL;
1687 	struct fs	*dfs;
1688 	time_t		 concerned, anxious;
1689 	sfrc_t		 rc			= SFRC_FAIL;
1690 	ulong_t		 gb_size;
1691 
1692 	TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s)));
1693 
1694 	if (s & UF_LOCKED) {
1695 		ASSERT(MUTEX_HELD(&f->uf_mutex));
1696 
1697 		toolong = time > (ufsfx_tune.uft_too_long +
1698 							f->uf_entered_tm);
1699 		TRIVIA(("%stoolong", !toolong? "not": ""));
1700 		HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ",
1701 		    time, ufsfx_tune.uft_too_long, f->uf_entered_tm));
1702 
1703 		if (f->uf_flags & UFSFX_LCKUMOUNT) {
1704 			if (set_state(f, UF_UMOUNT)) {
1705 				TRIVIA(("] "));
1706 				rc = SFRC_SUCCESS;
1707 			} else {
1708 				TRIVIA((": failed] "));
1709 				f->uf_retry = 1;
1710 			}
1711 			return (rc);
1712 		}
1713 		if (!toolong) {
1714 			rc = SFRC_SUCCESS;
1715 		} else {
1716 			if (!(f->uf_flags & UFSFX_REPAIR_START)) {
1717 				cmn_err(CE_WARN, "%s repair of %s not started.",
1718 						(f->uf_flags & UFSFX_LCKONLY)?
1719 						"Manual": "Automatic",
1720 						fs_name(f));
1721 
1722 				f->uf_retry = ufsfx_tune.uft_long_err_period;
1723 			} else {
1724 				f->uf_retry = ufsfx_tune.uft_long_err_period;
1725 				cmn_err(CE_WARN,
1726 		"Repair of %s is not timely; operator attention is required.",
1727 								    fs_name(f));
1728 			}
1729 			TRIVIA(("] "));
1730 			return (rc);
1731 		}
1732 	}
1733 
1734 #if defined(DEBUG)
1735 	else {
1736 		ASSERT(s & UF_FIXING);
1737 	}
1738 #endif /* DEBUG */
1739 
1740 	/*
1741 	 * get on disk superblock; force it to really
1742 	 * come from the disk
1743 	 */
1744 	(void) bfinval(f->uf_dev, 0);
1745 	bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE);
1746 	if (bp) {
1747 		bp->b_flags |= (B_STALE | B_AGE);
1748 		dfs = bp->b_un.b_fs;
1749 	}
1750 
1751 	if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) &&
1752 	    (dfs->fs_magic != MTB_UFS_MAGIC))) {
1753 		TRIVIA((": UFS_BREAD(SBLOCK) failed]\n"));
1754 		f->uf_retry = 1;
1755 		goto out;
1756 	}
1757 
1758 	/* fsck started but we haven't noticed yet? */
1759 	if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1760 		if (!set_state(f, UF_FIXING)) {
1761 			TRIVIA((": failed]\n"));
1762 			f->uf_retry = 1;
1763 			goto out;
1764 		}
1765 	}
1766 
1767 	/* fsck started but didn't succeed? */
1768 	if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) {
1769 		TRIVIA((": fs_clean: %d", (int)dfs->fs_clean));
1770 		(void) set_state(f, UF_LOCKED);
1771 		cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f));
1772 		f->uf_retry = ufsfx_tune.uft_long_err_period;
1773 		goto out;
1774 	}
1775 
1776 	gb_size = (dfs->fs_size * dfs->fs_bshift) / GB;
1777 	toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig);
1778 
1779 	/* fsck started but doesn't seem to be proceeding? */
1780 	if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1781 		if (time > f->uf_entered_tm + toolong) {
1782 
1783 			cmn_err(CE_WARN,
1784 "Repair completion timeout exceeded on %s; manual fsck may be required",
1785 								    fs_name(f));
1786 			f->uf_retry = ufsfx_tune.uft_long_err_period;
1787 		}
1788 	}
1789 
1790 	concerned = f->uf_entered_tm + (toolong / 3);
1791 	anxious = f->uf_entered_tm + ((2 * toolong) / 3);
1792 
1793 	if (time > concerned)
1794 		pester_msg(f, time > anxious? CE_WARN: CE_NOTE);
1795 
1796 	TRIVIA(("] "));
1797 
1798 out:
1799 	if (bp)
1800 		brelse(bp);
1801 
1802 	return (rc);
1803 }
1804 
1805 static sfrc_t
1806 sf_found_umount(ufs_failure_t *f)
1807 {
1808 	extern time_t	 time;
1809 	sfrc_t		 rc			= SFRC_FAIL;
1810 	struct vfs	*vfsp			= f->uf_vfsp;
1811 	struct ufsvfs	*ufsvfsp		= f->uf_ufsvfsp;
1812 	int		 toolong		= 0;
1813 	int		 err			= 0;
1814 
1815 	TRIVIA(("[sf_found_umount"));
1816 
1817 	toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm;
1818 	if (toolong) {
1819 		TRIVIA((": unmount time limit exceeded] "));
1820 		goto out;
1821 	}
1822 
1823 	if (!vfsp || !ufsvfsp) {	/* trivial case */
1824 		TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] "));
1825 		goto out;
1826 	}
1827 
1828 	if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) {
1829 		TRIVIA((": !not error locked?"));
1830 		err = EINVAL;
1831 		goto out;
1832 	}
1833 
1834 	/* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */
1835 	if (vn_vfswlock(vfsp->vfs_vnodecovered)) {
1836 		TRIVIA((": couldn't lock coveredvp"));
1837 		err = EBUSY;
1838 		goto out;
1839 	}
1840 
1841 	if ((err = dounmount(vfsp, 0, kcred)) != 0) {
1842 
1843 		/* take note, but not many alternatives here */
1844 		mutex_enter(&uf_stats.ufst_mutex);
1845 		uf_stats.ufst_unmount_failures++;
1846 		mutex_exit(&uf_stats.ufst_mutex);
1847 
1848 		TRIVIA((": unmount failed] "));
1849 	} else {
1850 		cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f));
1851 	}
1852 
1853 out:
1854 	if (toolong || (err != EBUSY && err != EAGAIN))
1855 		rc = set_state(f, UF_NOTFIX);
1856 
1857 	TRIVIA(("] "));
1858 	return (rc);
1859 }
1860 
1861 static sfrc_t
1862 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1863 {
1864 	extern time_t	time;
1865 	sfrc_t		rc = SFRC_FAIL;
1866 
1867 	TRIVIA(("[sf_term_cmn, action is %s, state is %s",
1868 						act_name(a), state_name(s)));
1869 	ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA));
1870 	ASSERT(terminal_state(s));
1871 
1872 	if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) {
1873 		TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n"));
1874 		return (rc);
1875 	}
1876 
1877 	switch (a) {
1878 	case UFA_SET:
1879 		switch (s) {
1880 		case UF_NOTFIX:
1881 		case UF_FIXED:
1882 		{	int need_lock_vfs;
1883 
1884 			if (f->uf_ufsvfsp && f->uf_vfs_lockp)
1885 				need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp);
1886 			else
1887 				need_lock_vfs = 0;
1888 
1889 			if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) {
1890 				TRIVIA((": tryenter(vfslockp) fail; retry]\n"));
1891 				f->uf_retry = 1;
1892 				break;
1893 			}
1894 
1895 			f->uf_end_tm = time;
1896 			f->uf_lf.lf_lock = LOCKFS_OLOCK;
1897 			f->uf_retry = 0;
1898 
1899 			if (f->uf_vfs_ufsfxp)
1900 				f->uf_vfs_ufsfxp->fx_current = NULL;
1901 
1902 			if (need_lock_vfs)
1903 				mutex_exit(f->uf_vfs_lockp);
1904 
1905 			cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s":
1906 				    "%s is now accessible", fs_name(f));
1907 
1908 			if (s & UF_FIXED) {
1909 				mutex_enter(&uf_stats.ufst_mutex);
1910 				uf_stats.ufst_num_fixed++;
1911 				mutex_exit(&uf_stats.ufst_mutex);
1912 			}
1913 			(void) timeout(ufsfx_kill_fix_failure_thread,
1914 			    (void *)(ufsfx_tune.uft_short_err_period * hz),
1915 			    ufsfx_tune.uft_short_err_period * hz);
1916 			rc = SFRC_SUCCESS;
1917 			break;
1918 		}
1919 		case UF_REPLICA:
1920 
1921 			ASSERT(MUTEX_HELD(f->uf_vfs_lockp));
1922 
1923 			/* not actually a replica? */
1924 			if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current &&
1925 			    f->uf_vfs_ufsfxp->fx_current != f &&
1926 			!terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s)) {
1927 
1928 				f->uf_orig = f->uf_vfs_ufsfxp->fx_current;
1929 				f->uf_retry = 0;
1930 				rc = SFRC_SUCCESS;
1931 			} else {
1932 				TRIVIA((": NULL fx_current]\n"));
1933 				f->uf_retry = 1;
1934 			}
1935 
1936 			break;
1937 
1938 		default:
1939 			rc = set_state(f, UF_PANIC);
1940 			TRIVIA((": failed] "));
1941 			break;
1942 		}
1943 		break;
1944 
1945 	case UFA_FOUND:
1946 		/*
1947 		 * XXX de-allocate these after some period?
1948 		 * XXX or move to an historical list?
1949 		 * XXX or have an ioctl which reaps them?
1950 		 */
1951 		/*
1952 		 * For now, since we don't expect lots of failures
1953 		 * to occur (to the point of memory shortages),
1954 		 * just punt
1955 		 */
1956 
1957 		/* be sure we're not wasting cpu on old failures */
1958 		if (f->uf_retry != 0) {
1959 			mutex_enter(&uf_stats.ufst_mutex);
1960 			uf_stats.ufst_cpu_waste++;
1961 			mutex_exit(&uf_stats.ufst_mutex);
1962 			f->uf_retry = 0;
1963 		}
1964 		rc = SFRC_SUCCESS;
1965 		break;
1966 
1967 	default:
1968 		(void) set_state(f, UF_PANIC);
1969 		TRIVIA((": failed] "));
1970 		break;
1971 	}
1972 
1973 	TRIVIA(("] "));
1974 	return (rc);
1975 }
1976 
1977 static sfrc_t
1978 sf_panic(
1979 	ufs_failure_t	*f,
1980 	ufsa_t	 a,
1981 	ufs_failure_states_t	 s)
1982 {
1983 	sfrc_t	rc = SFRC_FAIL;
1984 
1985 	TRIVIA(("[sf_panic, action is %s, prev. state is %s",
1986 		act_name(a), state_name(f->uf_s)));
1987 	ASSERT(s & UF_PANIC);
1988 
1989 	switch (a) {
1990 	case UFA_SET:
1991 		f->uf_retry = -ufsfx_tune.uft_short_err_period;
1992 		rc = SFRC_SUCCESS;
1993 		break;
1994 
1995 	case UFA_FOUND:
1996 	default:
1997 		real_panic(f, " ");
1998 
1999 		/* LINTED: warning: logical expression always true: op "||" */
2000 		ASSERT(DEBUG);
2001 
2002 		(void) set_state(f, UF_UMOUNT);	/* XXX UF_NOTFIX? */
2003 
2004 		break;
2005 	}
2006 
2007 	TRIVIA(("] "));
2008 	return (rc);
2009 }
2010 
2011 /*
2012  * minimum state function
2013  */
2014 static sfrc_t
2015 sf_minimum(
2016 	ufs_failure_t	*f,
2017 	ufsa_t	 a, /* LINTED argument unused in function: ignored */
2018 	ufs_failure_states_t	 ignored)
2019 {
2020 	sfrc_t rc = SFRC_FAIL;
2021 
2022 	TRIVIA(("[sf_minimum, action is %s", act_name(a)));
2023 
2024 	switch (a) {
2025 	case UFA_SET:
2026 		f->uf_retry = 0;
2027 		/* FALLTHROUGH */
2028 
2029 	case UFA_FOUND:
2030 		rc = SFRC_SUCCESS;
2031 		break;
2032 
2033 	default:
2034 		(void) set_state(f, UF_PANIC);
2035 		TRIVIA((": failed] "));
2036 		break;
2037 	}
2038 
2039 	TRIVIA(("] "));
2040 	return (rc);
2041 }
2042 
2043 static int
2044 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to)
2045 {
2046 	ufsd_t	*s;
2047 	int	 valid;
2048 
2049 	HIDEOUS(("[state_trans_valid"));
2050 
2051 	if (from & to)
2052 		return (1);
2053 
2054 	s = get_state_desc(to);
2055 
2056 	/*
2057 	 * extra test is necessary since we want UF_UNDEF = 0,
2058 	 * (to detect freshly allocated memory)
2059 	 * but can't check for that value with a bit test
2060 	 */
2061 	valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev;
2062 
2063 	HIDEOUS((": %svalid] ", valid? "": "in"));
2064 	return (valid);
2065 }
2066 
2067 static int
2068 terminal_state(ufs_failure_states_t state)
2069 {
2070 	ufsd_t	*s;
2071 
2072 	HIDEOUS(("[terminal_state"));
2073 
2074 	s = get_state_desc(state);
2075 
2076 	HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not "));
2077 	return ((int)s->ud_attr.terminal);
2078 }
2079 
2080 static void
2081 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp)
2082 {
2083 	MINUTE(("[alloc_lockfs_comment"));
2084 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2085 
2086 	/*
2087 	 * ufs_fiolfs expects a kmem_alloc'ed comment;
2088 	 * it frees the comment if the lock fails
2089 	 * or else when the lock is unlocked.
2090 	 */
2091 
2092 	f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP);
2093 	if (f->uf_lf.lf_comment) {
2094 		char	*from;
2095 		size_t	 len;
2096 
2097 		/*
2098 		 * use panic string if there's no previous comment
2099 		 * or if we're setting the error lock
2100 		 */
2101 		if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment ||
2102 		    lfp->lf_comlen <= 0)) {
2103 			from = f->uf_panic_str;
2104 			len = LOCKFS_MAXCOMMENTLEN;
2105 		} else {
2106 			from = lfp->lf_comment;
2107 			len = lfp->lf_comlen;
2108 		}
2109 
2110 		bcopy(from, f->uf_lf.lf_comment, len);
2111 		f->uf_lf.lf_comlen = len;
2112 
2113 	} else {
2114 		f->uf_lf.lf_comlen = 0;
2115 	}
2116 	MINUTE(("] "));
2117 }
2118 
2119 static int
2120 set_lockfs(ufs_failure_t *f, struct lockfs *lfp)
2121 {
2122 	int	(*handle_lockfs_rc)(ufs_failure_t *);
2123 	int	  rc;
2124 
2125 	MINUTE(("[set_lockfs"));
2126 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2127 	ASSERT(!vfs_lock_held(f->uf_vfsp));
2128 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2129 
2130 	if (!f->uf_ufsvfsp) {
2131 		MINUTE((": ufsvfsp is NULL]\n"));
2132 		return (0);
2133 	}
2134 
2135 	ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock));
2136 
2137 	if (!f->uf_ufsvfsp->vfs_root) {
2138 		MINUTE((": vfs_root is NULL]\n"));
2139 		return (0);
2140 	}
2141 
2142 	alloc_lockfs_comment(f, lfp);
2143 	f->uf_lf_err = 0;
2144 
2145 	if (!LOCKFS_IS_ELOCK(lfp)) {
2146 		lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK;
2147 		VN_HOLD(f->uf_ufsvfsp->vfs_root);
2148 		f->uf_lf_err = ufs__fiolfs(f->uf_ufsvfsp->vfs_root,
2149 						&f->uf_lf,
2150 						/* from_user */ 0,
2151 						/* from_log  */ 0);
2152 		VN_RELE(f->uf_ufsvfsp->vfs_root);
2153 	}
2154 
2155 	handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success;
2156 	rc = handle_lockfs_rc(f);
2157 
2158 	MINUTE(("] "));
2159 	return (rc);
2160 }
2161 
2162 static int
2163 lockfs_failure(ufs_failure_t *f)
2164 {
2165 	int	error;
2166 	ufs_failure_states_t	s;
2167 
2168 	TRIVIA(("[lockfs_failure"));
2169 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2170 
2171 	if (!f->uf_ufsvfsp) {
2172 		TRIVIA((": ufsvfsp is NULL]\n"));
2173 		return (0);
2174 	}
2175 
2176 	error = f->uf_lf_err;
2177 	switch (error) {
2178 			/* non-transient errors: */
2179 	case EACCES:	/* disk/in-core metadata reconciliation failed  */
2180 	case EPERM:	/* inode reconciliation failed; incore inode changed? */
2181 	case EIO:	/* device is hard-locked or not responding */
2182 	case EROFS:	/* device is write-locked */
2183 	case EDEADLK:	/* can't lockfs; deadlock would result; */
2184 			/* Swapping or saving accounting records */
2185 			/* onto this fs can cause this errno. */
2186 
2187 		MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)",
2188 				fs_name(f),
2189 				lock_name(&f->uf_lf),
2190 				err_name(error),
2191 				error));
2192 
2193 		/*
2194 		 * if can't get lock, then fallback to panic, unless
2195 		 * unless unmount was requested (although unmount will
2196 		 * probably fail if the lock failed, so we'll panic
2197 		 * anyway
2198 		 */
2199 
2200 		s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK)?
2201 							UF_UMOUNT: UF_PANIC;
2202 
2203 		if (!set_state(f, s)) {
2204 			real_panic(f, " ");
2205 			/*NOTREACHED*/
2206 			break;
2207 		}
2208 		break;
2209 
2210 
2211 	case EBUSY:
2212 	case EAGAIN:
2213 
2214 		f->uf_retry = ufsfx_tune.uft_short_err_period;
2215 		if (curthread->t_flag & T_DONTPEND) {
2216 			curthread->t_flag &= ~T_DONTPEND;
2217 
2218 		} else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) {
2219 			ufs_failure_states_t state;
2220 			/*
2221 			 * if we didn't know that the fix had started,
2222 			 * take note
2223 			 */
2224 			state = error == EBUSY? UF_LOCKED: UF_FIXING;
2225 			if (!set_state(f, state)) {
2226 				TRIVIA((": failed] "));
2227 				return (0);
2228 			}
2229 		}
2230 		break;
2231 
2232 	default:	/* some other non-fatal error */
2233 		MINOR(("lockfs(\"%s\") of %s returned %s (%d)",
2234 					lock_name(&f->uf_lf),
2235 					fs_name(f),
2236 					err_name(f->uf_lf_err),
2237 					f->uf_lf_err));
2238 
2239 		f->uf_retry = ufsfx_tune.uft_short_err_period;
2240 		break;
2241 
2242 	case EINVAL:	/* unmounted? */
2243 		(void) set_state(f, UF_NOTFIX);
2244 		break;
2245 	}
2246 	TRIVIA(("] "));
2247 	return (1);
2248 }
2249 
2250 static int
2251 lockfs_success(ufs_failure_t *f)
2252 {
2253 	TRIVIA(("[lockfs_success"));
2254 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2255 
2256 	if (!f->uf_ufsvfsp) {
2257 		TRIVIA((": ufsvfsp is NULL]\n"));
2258 		return (0);
2259 	}
2260 
2261 	switch (f->uf_lf.lf_lock) {
2262 	case LOCKFS_ELOCK:	/* error lock worked */
2263 
2264 		if (!set_state(f, UF_LOCKED)) {
2265 			TRIVIA((": failed] "));
2266 			return (0);
2267 		}
2268 		break;
2269 
2270 	case LOCKFS_ULOCK: 			/* unlock worked */
2271 		/*
2272 		 * how'd we get here?
2273 		 * This should be done from fsck's unlock,
2274 		 * not from this thread's context.
2275 		 */
2276 		cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f));
2277 		ufsfx_unlockfs(f->uf_ufsvfsp);
2278 		break;
2279 
2280 	default:
2281 		if (!set_state(f, UF_NOTFIX)) {
2282 			TRIVIA((": failed] "));
2283 			return (0);
2284 		}
2285 		break;
2286 	}
2287 	TRIVIA(("] "));
2288 	return (1);
2289 }
2290 
2291 /*
2292  * when fsck is running it puts its pid into the lockfs
2293  * comment structure, prefaced by PIDSTR
2294  */
2295 const char *PIDSTR = "[pid:";
2296 static int
2297 fsck_active(ufs_failure_t *f)
2298 {
2299 	char		*cp;
2300 	int		 i, found, errlocked;
2301 	size_t		 comlen;
2302 	const int	 PIDSTRLEN = (int)strlen(PIDSTR);
2303 	struct ulockfs	*ulp = &f->uf_ufsvfsp->vfs_ulockfs;
2304 
2305 	TRIVIA(("[fsck_active"));
2306 
2307 	ASSERT(f);
2308 	ASSERT(f->uf_s & UF_FIXING);
2309 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2310 	ASSERT(f->uf_ufsvfsp);
2311 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2312 	ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock));
2313 
2314 	mutex_enter(&ulp->ul_lock);
2315 	cp = ulp->ul_lockfs.lf_comment;
2316 	comlen = ulp->ul_lockfs.lf_comlen;
2317 	errlocked = (int)ULOCKFS_IS_ELOCK(ulp);
2318 	mutex_exit(&ulp->ul_lock);
2319 
2320 	if (!cp || comlen == 0) {
2321 		TRIVIA((": null comment or comlen <= 0, found:0]"));
2322 		return (0);
2323 	}
2324 
2325 	for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++)
2326 		found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0;
2327 
2328 	TRIVIA(("found:%d, is_elock:%d]", found, errlocked));
2329 	return (errlocked & found);
2330 }
2331 
2332 static const char unknown_fs[]		= "<unknown fs>";
2333 static const char null_failure[] = "<NULL ufs failure record; unknown fs>";
2334 static const char mutated_vfs_bufp[]	= "<mutated vfs_bufp, unknown fs>";
2335 static const char mutated_vfs_fs[]	= "<mutated vfs_fs, unknown fs>";
2336 
2337 static char *
2338 fs_name(ufs_failure_t *f)
2339 {
2340 	HIDEOUS(("[fs_name"));
2341 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2342 
2343 	if (!f) {
2344 		HIDEOUS((": failure ptr is NULL]\n"));
2345 		return ((char *)null_failure);
2346 	}
2347 
2348 	if (f->uf_fsname[0] != '\0') {
2349 		HIDEOUS((": return (uf_fsname)]\n"));
2350 		return (f->uf_fsname);
2351 	}
2352 
2353 	if (MUTEX_HELD(f->uf_vfs_lockp)) {
2354 		if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) {
2355 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2356 			    (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp));
2357 			return ((char *)mutated_vfs_bufp);
2358 		}
2359 		if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) {
2360 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2361 			    (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs));
2362 			return ((char *)mutated_vfs_fs);
2363 		}
2364 		if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs &&
2365 		    *f->uf_fs->fs_fsmnt != '\0') {
2366 			HIDEOUS((": return (fs_fsmnt)]\n"));
2367 			return (f->uf_fs->fs_fsmnt);
2368 		}
2369 	}
2370 
2371 	HIDEOUS((": unknown file system]\n"));
2372 	return ((char *)unknown_fs);
2373 }
2374 
2375 #if defined(DEBUG)
2376 static char *
2377 lock_name(struct lockfs *lfp)
2378 {
2379 	struct lock_description	*l;
2380 	char			*lname;
2381 
2382 	HIDEOUS(("[lock_name"));
2383 
2384 	lname = lock_desc[0].ld_name;
2385 	for (l = &lock_desc[1]; l->ld_name != NULL; l++) {
2386 		if (lfp && lfp->lf_lock == l->ld_type) {
2387 			lname = l->ld_name;
2388 			break;
2389 		}
2390 	}
2391 	HIDEOUS(("]"));
2392 	return (lname);
2393 }
2394 
2395 static char *
2396 state_name(ufs_failure_states_t state)
2397 {
2398 	ufsd_t	*s;
2399 
2400 	HIDEOUS(("[state_name"));
2401 
2402 	s = get_state_desc(state);
2403 
2404 	HIDEOUS(("]"));
2405 	return (s->ud_name);
2406 }
2407 
2408 static char *
2409 err_name(int error)
2410 {
2411 	struct error_description *e;
2412 
2413 	HIDEOUS(("[err_name"));
2414 
2415 	for (e = &err_desc[1]; e->ed_name != NULL; e++) {
2416 		if (error == e->ed_errno) {
2417 			HIDEOUS(("]"));
2418 			return (e->ed_name);
2419 		}
2420 	}
2421 	HIDEOUS(("]"));
2422 	return (err_desc[0].ed_name);
2423 }
2424 
2425 static char *
2426 act_name(ufsa_t action)
2427 {
2428 	struct action_description *a;
2429 
2430 	HIDEOUS(("[act_name"));
2431 
2432 	for (a = &act_desc[1]; a->ad_name != NULL; a++) {
2433 		if (action == a->ad_v) {
2434 			HIDEOUS(("]"));
2435 			return (a->ad_name);
2436 		}
2437 	}
2438 	HIDEOUS(("]"));
2439 	return (act_desc[0].ad_name);
2440 }
2441 
2442 /*
2443  * dump failure list
2444  */
2445 static void
2446 dump_uf_list(char *msg)
2447 {
2448 	ufs_failure_t	*f;
2449 	int		 i;
2450 	int		 list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex);
2451 
2452 	if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) {
2453 		printf("dump_uf_list: couldn't get list lock\n");
2454 		return;
2455 	}
2456 
2457 	if (msg) {
2458 		printf("\n%s", msg);
2459 	}
2460 	printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n",
2461 		ufs_fix.uq_lowat, ufs_fix.uq_ne);
2462 
2463 	mutex_enter(&uf_stats.ufst_mutex);
2464 	printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races);
2465 	printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed);
2466 	printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed);
2467 	printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste);
2468 	printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n",
2469 		uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures);
2470 	mutex_exit(&uf_stats.ufst_mutex);
2471 
2472 	for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) {
2473 
2474 		if (!mutex_tryenter(&f->uf_mutex)) {
2475 			printf("%d.\t\"skipped - try enter failed\"\n", i);
2476 			continue;
2477 		}
2478 
2479 		dump_uf(f, i);
2480 
2481 		mutex_exit(&f->uf_mutex);
2482 	}
2483 
2484 	printf("\n");
2485 
2486 	if (!list_was_locked)
2487 		mutex_exit(&ufs_fix.uq_mutex);
2488 }
2489 
2490 static void
2491 dump_uf(ufs_failure_t *f, int i)
2492 {
2493 	if (!f) {
2494 		printf("dump_uf: NULL failure record\n");
2495 		return;
2496 	}
2497 
2498 	printf("%d.\t\"%s\" is %s.\n",
2499 		    i, fs_name(f), state_name(f->uf_s));
2500 	printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f);
2501 	printf("\tNext: 0x%p\t\tPrev: 0x%p\n",
2502 					(void *)f->uf_next, (void *)f->uf_prev);
2503 
2504 	if (f->uf_orig)
2505 		printf("\tOriginal failure: 0x%p \"%s\"\n",
2506 		    (void *)f->uf_orig, f->uf_orig->uf_panic_str);
2507 
2508 	printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n",
2509 		    (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp);
2510 	printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp);
2511 	printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp);
2512 
2513 	if (f->uf_bp)
2514 		printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs);
2515 	else
2516 		printf("\n");
2517 
2518 	printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n",
2519 	    f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm);
2520 
2521 	printf("\tFlags: (%d) %s%s%s%s", f->uf_flags,
2522 		f->uf_flags & UFSFX_LCKONLY?	 "\"lock only\" "	: "",
2523 		f->uf_flags & UFSFX_LCKUMOUNT?	 "\"lock+unmount\" "	: "",
2524 		f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" "	: "",
2525 		f->uf_flags == 0?                "<none>"               : "");
2526 
2527 	printf("\tRetry: %ld seconds\n", f->uf_retry);
2528 
2529 	printf("\tLockfs:\ttype: %s\terror: %s (%d)\n",
2530 		lock_name(&f->uf_lf),
2531 		err_name(f->uf_lf_err), f->uf_lf_err);
2532 
2533 }
2534 #endif /* DEBUG */
2535 
2536 /*
2537  * returns # of ufs_failures in a non-terminal state on queue
2538  * used to coordinate with hlock thread (see ufs_thread.c)
2539  * and to determine when the error lock thread may exit
2540  */
2541 
2542 int
2543 ufsfx_get_failure_qlen(void)
2544 {
2545 	ufs_failure_t	*f;
2546 	ufsd_t		*s;
2547 	int		 qlen = 0;
2548 
2549 	MINUTE(("[ufsfx_get_failure_qlen"));
2550 
2551 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
2552 		return (-1);
2553 
2554 	/*
2555 	 * walk down failure list
2556 	 */
2557 
2558 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
2559 
2560 		if (!mutex_tryenter(&f->uf_mutex))
2561 			continue;
2562 
2563 		s = get_state_desc(f->uf_s);
2564 
2565 		if (s->ud_attr.terminal) {
2566 			mutex_exit(&f->uf_mutex);
2567 			continue;
2568 		}
2569 
2570 		MINUTE((": found: %s, \"%s: %s\"\n",
2571 			    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
2572 
2573 		qlen++;
2574 		mutex_exit(&f->uf_mutex);
2575 	}
2576 
2577 	mutex_exit(&ufs_fix.uq_mutex);
2578 
2579 	MINUTE((": qlen=%d]\n", qlen));
2580 
2581 	return (qlen);
2582 }
2583 
2584 /*
2585  * timeout routine
2586  *  called to shutdown fix failure thread and server daemon
2587  */
2588 static void
2589 ufsfx_kill_fix_failure_thread(void *arg)
2590 {
2591 	clock_t odelta = (clock_t)arg;
2592 	int	qlen;
2593 
2594 	MAJOR(("[ufsfx_kill_fix_failure_thread"));
2595 
2596 	qlen = ufsfx_get_failure_qlen();
2597 
2598 	if (qlen < 0) {
2599 		clock_t delta;
2600 
2601 		delta = odelta << 1;
2602 		if (delta <= 0)
2603 			delta = INT_MAX;
2604 
2605 		(void) timeout(ufsfx_kill_fix_failure_thread,
2606 		    (void *)delta, delta);
2607 		MAJOR((": rescheduled"));
2608 
2609 	} else if (qlen == 0) {
2610 		ufs_thread_exit(&ufs_fix);
2611 		MAJOR((": killed"));
2612 	}
2613 	/*
2614 	 * else
2615 	 *  let timeout expire
2616 	 */
2617 	MAJOR(("]\n"));
2618 }
2619