xref: /titanic_44/usr/src/uts/common/fs/ufs/ufs_panic.c (revision 2b4a78020b9c38d1b95e2f3fefa6d6e4be382d1f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/errno.h>
32 #include <sys/mode.h>
33 #include <sys/sysmacros.h>
34 #include <sys/cmn_err.h>
35 #include <sys/varargs.h>
36 #include <sys/time.h>
37 #include <sys/buf.h>
38 #include <sys/kmem.h>
39 #include <sys/t_lock.h>
40 #include <sys/poll.h>
41 #include <sys/debug.h>
42 #include <sys/cred.h>
43 #include <sys/lockfs.h>
44 #include <sys/fs/ufs_fs.h>
45 #include <sys/fs/ufs_inode.h>
46 #include <sys/fs/ufs_panic.h>
47 #include <sys/fs/ufs_lockfs.h>
48 #include <sys/fs/ufs_trans.h>
49 #include <sys/fs/ufs_mount.h>
50 #include <sys/fs/ufs_prot.h>
51 #include <sys/fs/ufs_bio.h>
52 #include <sys/pathname.h>
53 #include <sys/utsname.h>
54 #include <sys/conf.h>
55 
56 /* handy */
57 #define	abs(x)		((x) < 0? -(x): (x))
58 
59 #if defined(DEBUG)
60 
61 #define	DBGLVL_NONE	0x00000000
62 #define	DBGLVL_MAJOR	0x00000100
63 #define	DBGLVL_MINOR	0x00000200
64 #define	DBGLVL_MINUTE	0x00000400
65 #define	DBGLVL_TRIVIA	0x00000800
66 #define	DBGLVL_HIDEOUS	0x00001000
67 
68 #define	DBGFLG_NONE		0x00000000
69 #define	DBGFLG_NOPANIC		0x00000001
70 #define	DBGFLG_LVLONLY		0x00000002
71 #define	DBGFLG_FIXWOULDPANIC	0x00000004
72 
73 #define	DBGFLG_FLAGMASK		0x0000000F
74 #define	DBGFLG_LEVELMASK	~DBGFLG_FLAGMASK
75 
76 #define	DEBUG_FLAGS	(ufs_fix_failure_dbg & DBGFLG_FLAGMASK)
77 #define	DEBUG_LEVEL	(ufs_fix_failure_dbg & DBGFLG_LEVELMASK)
78 
79 unsigned int ufs_fix_failure_dbg =	DBGLVL_NONE | DBGFLG_NONE;
80 
81 #define	DCALL(dbg_level, call)						\
82 	{								\
83 		if (DEBUG_LEVEL != DBGLVL_NONE) {			\
84 			if (DEBUG_FLAGS & DBGFLG_LVLONLY) {		\
85 				if (DEBUG_LEVEL & dbg_level) {		\
86 					call;				\
87 				}					\
88 			} else {					\
89 				if (dbg_level <= DEBUG_LEVEL) {		\
90 					call;				\
91 				}					\
92 			}						\
93 		}							\
94 	}
95 
96 #define	DPRINTF(dbg_level, msg)		DCALL(dbg_level, printf msg)
97 
98 #define	MAJOR(msg)			DPRINTF(DBGLVL_MAJOR, msg)
99 #define	MINOR(msg)			DPRINTF(DBGLVL_MINOR, msg)
100 #define	MINUTE(msg)			DPRINTF(DBGLVL_MINUTE, msg)
101 #define	TRIVIA(msg)			DPRINTF(DBGLVL_TRIVIA, msg)
102 #define	HIDEOUS(msg)			DPRINTF(DBGLVL_HIDEOUS, msg)
103 
104 #else	/* !DEBUG */
105 
106 #define	DCALL(ignored_dbg_level, ignored_routine)
107 #define	MAJOR(ignored)
108 #define	MINOR(ignored)
109 #define	MINUTE(ignored)
110 #define	TRIVIA(ignored)
111 #define	HIDEOUS(ignored)
112 
113 #endif /* DEBUG */
114 
115 #define	NULLSTR(str)	(!(str) || *(str) == '\0'? "<null>" : (str))
116 #define	NULSTRING	""
117 
118 /* somewhat arbitrary limits, in seconds */
119 /* all probably ought to be different, but these are convenient for debugging */
120 const time_t	UF_TOO_LONG		= 128;	/* max. wait for fsck start */
121 
122 /* all of these are in units of seconds used for retry period while ... */
123 const time_t	UF_FIXSTART_PERIOD	= 16;	/* awaiting fsck start */
124 const time_t	UF_FIXPOLL_PERIOD	= 256;	/* awaiting fsck finish */
125 const time_t	UF_SHORT_ERROR_PERIOD	= 4;	/* after (lockfs) error */
126 const time_t	UF_LONG_ERROR_PERIOD	= 512;	/* after (lockfs) error */
127 
128 #define	NO_ERROR		0
129 #define	LOCKFS_OLOCK		LOCKFS_MAXLOCK+1
130 
131 const ulong_t	GB			= 1024 * 1024 * 1024;
132 const ulong_t	SecondsPerGig		= 1024;	/* ~17 minutes (overestimate) */
133 
134 /*
135  * per filesystem flags
136  */
137 const int	UFSFX_PANIC		= (UFSMNT_ONERROR_PANIC >> 4);
138 const int	UFSFX_LCKONLY		= (UFSMNT_ONERROR_LOCK >> 4);
139 const int	UFSFX_LCKUMOUNT		= (UFSMNT_ONERROR_UMOUNT >> 4);
140 const int	UFSFX_DEFAULT		= (UFSMNT_ONERROR_DEFAULT >> 4);
141 const int	UFSFX_REPAIR_START	= 0x10000000;
142 
143 /* return protocols */
144 
145 typedef enum triage_return_code {
146 	TRIAGE_DEAD = -1,
147 	TRIAGE_NO_SPIRIT,
148 	TRIAGE_ATTEND_TO
149 } triage_t;
150 
151 typedef enum statefunc_return_code {
152 	SFRC_SUCCESS = 1,
153 	SFRC_FAIL = 0
154 } sfrc_t;
155 
156 /* external references */
157 /* in ufs_thread.c */
158 extern int	ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop);
159 extern int	ufs_checkaccton(vnode_t *);		/* in ufs_lockfs.c */
160 extern int	ufs_checkswapon(vnode_t *);		/* in ufs_lockfs.c */
161 
162 extern struct pollhead		ufs_pollhd;		/* in ufs_vnops.c */
163 
164 /* globals */
165 struct	ufs_q	 ufs_fix;
166 
167 /*
168  * patchable constants:
169  *   These are set in ufsfx_init() [called at modload]
170  */
171 struct ufs_failure_tunable {
172 	long	 uft_too_long;		/* limit repair startup time */
173 	long	 uft_fixstart_period;	/* pre-repair start period */
174 	long	 uft_fixpoll_period;	/* post-fsck start period */
175 	long	 uft_short_err_period;	/* post-error short period */
176 	long	 uft_long_err_period;	/* post-error long period */
177 } ufsfx_tune;
178 
179 /* internal statistics of events */
180 struct uf_statistics {
181 	ulong_t		ufst_lock_violations;
182 	ulong_t		ufst_current_races;
183 	ulong_t		ufst_unmount_failures;
184 	ulong_t		ufst_num_fixed;
185 	ulong_t		ufst_num_failed;
186 	ulong_t		ufst_cpu_waste;
187 	time_t		ufst_last_start_tm;
188 	kmutex_t	ufst_mutex;
189 } uf_stats;
190 
191 typedef enum state_action {
192 	UFA_ERROR = -1,		/* internal error */
193 	UFA_FOUND,		/* found uf in state */
194 	UFA_SET			/* change uf to state */
195 } ufsa_t;
196 
197 /* state definition */
198 typedef struct uf_state_desc {
199 	int	  ud_v;					/* value */
200 	char	 *ud_name;				/* name */
201 	sfrc_t	(*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
202 							/* per-state actions */
203 	ufs_failure_states_t	  ud_prev;		/* valid prev. states */
204 
205 	struct uf_state_desc_attr {
206 		unsigned	terminal:1;	/* no action req. if found */
207 		unsigned	at_fail:1;	/* state set by thread */
208 						/* encountering the error */
209 		unsigned	unused;
210 	} ud_attr;
211 } ufsd_t;
212 
213 /*
214  * forward references
215  */
216 
217 /* thread to watch for failures */
218 static void	ufsfx_thread_fix_failures(void *);
219 static int 	ufsfx_do_failure_q(void);
220 static void	ufsfx_kill_fix_failure_thread(void *);
221 
222 /* routines called when failure occurs */
223 static int		 ufs_fault_v(vnode_t *, char *, va_list)
224 	__KVPRINTFLIKE(2);
225 static ufs_failure_t	*init_failure(vnode_t *, char *, va_list)
226 	__KVPRINTFLIKE(2);
227 static void		 queue_failure(ufs_failure_t *);
228 /*PRINTFLIKE2*/
229 static void		 real_panic(ufs_failure_t *, const char *, ...)
230 	__KPRINTFLIKE(2);
231 static void		 real_panic_v(ufs_failure_t *, const char *, va_list)
232 	__KVPRINTFLIKE(2);
233 static triage_t		 triage(vnode_t *);
234 
235 /* routines called when failure record is acted upon */
236 static sfrc_t	set_state(ufs_failure_t *, ufs_failure_states_t);
237 static int	state_trans_valid(ufs_failure_states_t, ufs_failure_states_t);
238 static int	terminal_state(ufs_failure_states_t);
239 
240 /* routines called when states entered/found */
241 static sfrc_t	sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
242 static sfrc_t	sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
243 static sfrc_t	sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
244 static sfrc_t	sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
245 static sfrc_t	sf_found_queue(ufs_failure_t *);
246 static sfrc_t	sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
247 static sfrc_t	sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
248 static sfrc_t	sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
249 static sfrc_t	sf_set_trylck(ufs_failure_t *);
250 static sfrc_t	sf_set_locked(ufs_failure_t *);
251 static sfrc_t	sf_found_trylck(ufs_failure_t *);
252 static sfrc_t	sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t);
253 static sfrc_t	sf_found_umount(ufs_failure_t *);
254 
255 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */
256 static time_t 	trylock_time_exceeded(ufs_failure_t *);
257 static void 	pester_msg(ufs_failure_t *, int);
258 static int 	get_lockfs_status(ufs_failure_t *, struct lockfs *);
259 static void 	alloc_lockfs_comment(ufs_failure_t *, struct lockfs *);
260 static int 	set_lockfs(ufs_failure_t *, struct lockfs *);
261 static int 	lockfs_failure(ufs_failure_t *);
262 static int 	lockfs_success(ufs_failure_t *);
263 static int	fsck_active(ufs_failure_t *);
264 
265 /* low-level support routines */
266 static ufsd_t	*get_state_desc(ufs_failure_states_t);
267 static char	*fs_name(ufs_failure_t *);
268 
269 #if defined(DEBUG)
270 static char	*state_name(ufs_failure_states_t);
271 static char	*lock_name(struct lockfs *);
272 static char	*err_name(int);
273 static char	*act_name(ufsa_t);
274 static void	 dump_uf_list(char *msg);
275 static void	 dump_uf(ufs_failure_t *, int i);
276 #endif /* DEBUG */
277 /*
278  *
279  * State Transitions:
280  *
281  * normally:
282  * if flagged to be locked but not unmounted:	(UFSMNT_ONERROR_LOCK)
283  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED
284  *
285  * The only difference between these two is that the fsck must be started
286  * manually.
287  *
288  * if flagged to be unmounted:			(UFSMNT_ONERROR_UMOUNT)
289  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX
290  *
291  * if flagged to panic:				(UFSMNT_ONERROR_PANIC)
292  *	UNDEF -> INIT -> PANIC
293  *
294  * if a secondary panic on a file system which has an active failure
295  * record:
296  *	UNDEF -> INIT -> QUEUE -> REPLICA
297  *
298  * UNDEF, INIT, QUEUE all are set in the context of the failing thread.
299  * All other states (except possibly PANIC) are set in by the monitor
300  * (lock) thread.
301  *
302  */
303 
304 ufsd_t	state_desc[] =
305 {
306 	{ UF_ILLEGAL,	"in an unknown state",	sf_minimum,	UF_ILLEGAL,
307 								{ 0, 1, 0 } },
308 	{ UF_UNDEF,	"undefined",		sf_undef,	UF_UNDEF,
309 								{ 0, 1, 0 } },
310 	{ UF_INIT,	"being initialized",	sf_init,	UF_UNDEF,
311 								{ 0, 1, 0 } },
312 	{ UF_QUEUE,	"queued",		sf_queue,	UF_INIT,
313 								{ 0, 1, 0 } },
314 	{ UF_TRYLCK,	"trying to be locked",	sf_nonterm_cmn,
315 						UF_QUEUE,	{ 0, 0, 0 } },
316 	{ UF_LOCKED,	"locked",		sf_nonterm_cmn,
317 					UF_TRYLCK | UF_FIXING,	{ 0, 0, 0 } },
318 	{ UF_UMOUNT,	"being unmounted",	sf_nonterm_cmn,
319 
320 #if defined(DEBUG)
321 					UF_PANIC |
322 #endif /* DEBUG */
323 					UF_TRYLCK | UF_LOCKED, 	{ 0, 0, 0 } },
324 	{ UF_FIXING,	"being fixed",		sf_nonterm_cmn,
325 						UF_LOCKED,	{ 0, 0, 0 } },
326 	{ UF_FIXED,	"fixed",		sf_term_cmn,
327 						UF_FIXING,	{ 1, 0, 0 } },
328 	{ UF_NOTFIX,	"not fixed",		sf_term_cmn,
329 
330 #if defined(DEBUG)
331 							UF_PANIC |
332 #endif /* DEBUG */
333 
334 	    UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING,
335 								{ 1, 0, 0 } },
336 	{ UF_REPLICA,	"a replica",		sf_term_cmn,
337 						UF_QUEUE,	{ 1, 0, 0 } },
338 	{ UF_PANIC,	"panicking",		sf_panic,
339 		/* XXX make this narrower */	UF_ALLSTATES,	{ 0, 0, 0 } },
340 	{ UF_UNDEF,	NULL,			((sfrc_t (*)()) NULL),
341 						UF_UNDEF, 	{ 0, 0, 0 } }
342 };
343 
344 /* unified collection */
345 struct ufsfx_info {
346 	struct uf_statistics		*ufi_statp;
347 	struct ufs_failure_tunable	*ufi_tunep;
348 	ufsd_t				*ufi_statetab;
349 } uffsinfo;
350 
351 #if defined(DEBUG)
352 struct action_description {
353 	ufsa_t	 ad_v;
354 	char	*ad_name;
355 };
356 
357 #define	EUNK		(-1)
358 
359 struct error_description {
360 	int	 ed_errno;
361 	char	*ed_name;
362 } err_desc[] =
363 {
364 	{ EUNK,		"<unexpected errno?>"	},
365 	{ EINVAL,	"EINVAL"		},
366 	{ EACCES,	"EACCES"		},
367 	{ EPERM,	"EPERM"			},
368 	{ EIO,		"EIO"			},
369 	{ EDEADLK,	"EDEADLK"		},
370 	{ EBUSY,	"EBUSY"			},
371 	{ EAGAIN,	"EAGAIN"		},
372 	{ ERESTART,	"ERESTART"		},
373 	{ ETIMEDOUT,	"ETIMEDOUT"		},
374 	{ NO_ERROR,	"Ok"			},
375 	{ EUNK,		NULL 			}
376 };
377 
378 struct action_description act_desc[] =
379 {
380 	{ UFA_ERROR,	"<unexpected action?>"	},
381 	{ UFA_FOUND,	"\"found\""	},
382 	{ UFA_SET,	"\"set\""	},
383 	{ UFA_ERROR,	NULL			},
384 };
385 
386 #define	LOCKFS_BADLOCK	(-1)
387 
388 struct lock_description {
389 	int	 ld_type;
390 	char	*ld_name;
391 } lock_desc[] =
392 {
393 	{ LOCKFS_BADLOCK,	"<unexpected lock?>"	},
394 	{ LOCKFS_ULOCK,		"Unlock"		},
395 	{ LOCKFS_ELOCK,		"Error Lock"		},
396 	{ LOCKFS_HLOCK,		"Hard Lock"		},
397 	{ LOCKFS_OLOCK,		"Old Lock"		},
398 	{ LOCKFS_BADLOCK,	NULL			}
399 };
400 
401 #endif /* DEBUG */
402 
403 /*
404  * ufs_fault, ufs_fault_v
405  *
406  *  called instead of cmn_err(CE_PANIC, ...) by ufs routines
407  *  when a failure is detected to put the file system into an
408  *  error state (if possible) or to devolve to a panic otherwise
409  *
410  * vnode is some vnode in this file system, used to find the way
411  * to ufsvfs, vfsp etc.  Since a panic can be called from many
412  * levels, the vnode is the most convenient hook to pass through.
413  *
414  */
415 
416 /*PRINTFLIKE2*/
417 int
418 ufs_fault(vnode_t *vp, char *fmt, ...)
419 {
420 	va_list	adx;
421 	int	error;
422 
423 	MINOR(("[ufs_fault"));
424 
425 	va_start(adx, fmt);
426 	error = ufs_fault_v(vp, fmt, adx);
427 	va_end(adx);
428 
429 	MINOR((": %s (%d)]\n", err_name(error), error));
430 	return (error);
431 }
432 
433 const char *nullfmt = "<null format?>";
434 
435 static int
436 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx)
437 {
438 	ufs_failure_t		*new = NULL;
439 	ufsvfs_t		*ufsvfsp;
440 	triage_t		 fix;
441 	int			 err = ERESTART;
442 	int			need_vfslock;
443 
444 	MINOR(("[ufs_fault_v"));
445 
446 	if (fmt == NULL)
447 		fmt = (char *)nullfmt;
448 
449 	fix = triage(vp);
450 
451 	if (vp) {
452 		ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data;
453 
454 		/*
455 		 * Something bad has happened. That is why we are here.
456 		 *
457 		 * In order for the bad thing to be recorded in the superblock
458 		 * we need to write to the superblock directly.
459 		 * In the case that logging is enabled the logging code
460 		 * would normally intercept our write as a delta to the log,
461 		 * thus we mark the filesystem FSBAD in any case.
462 		 */
463 		need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
464 
465 		if (need_vfslock) {
466 			mutex_enter(&ufsvfsp->vfs_lock);
467 		}
468 
469 		ufsvfsp->vfs_fs->fs_clean = FSBAD;
470 		ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem));
471 		ufsvfsp->vfs_bufp->b_flags &=
472 		    ~(B_ASYNC | B_READ | B_DONE | B_ERROR | B_DELWRI);
473 
474 		(void) bdev_strategy(ufsvfsp->vfs_bufp);
475 		(void) biowait(ufsvfsp->vfs_bufp);
476 
477 		if (need_vfslock) {
478 			mutex_exit(&ufsvfsp->vfs_lock);
479 		}
480 	}
481 
482 	switch (fix) {
483 
484 	default:
485 	case TRIAGE_DEAD:
486 	case TRIAGE_NO_SPIRIT:
487 
488 		real_panic_v(new, fmt, adx);
489 		/* LINTED: warning: logical expression always true: op "||" */
490 		ASSERT(DEBUG);
491 		err = EAGAIN;
492 
493 #if defined(DEBUG)
494 		if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) {
495 			break;
496 		}
497 		/* FALLTHROUGH */
498 
499 #else
500 		break;
501 
502 #endif /* DEBUG */
503 
504 	case TRIAGE_ATTEND_TO:
505 
506 		/* q thread not running yet? */
507 		if (mutex_tryenter(&ufs_fix.uq_mutex)) {
508 			if (!ufs_fix.uq_threadp) {
509 				mutex_exit(&ufs_fix.uq_mutex);
510 				ufs_thread_start(&ufs_fix,
511 				    ufsfx_thread_fix_failures, NULL);
512 				ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK;
513 				mutex_enter(&ufs_fix.uq_mutex);
514 			} else {
515 				/*
516 				 * We got the lock but we are not the current
517 				 * threadp so we have to release the lock.
518 				 */
519 				mutex_exit(&ufs_fix.uq_mutex);
520 			}
521 		} else {
522 			MINOR((": fix failure thread already running "));
523 			/*
524 			 * No need to log another failure as one is already
525 			 * being logged.
526 			 */
527 			break;
528 		}
529 
530 		if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) {
531 			mutex_exit(&ufs_fix.uq_mutex);
532 			cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault");
533 		} else {
534 			/*
535 			 * Must check if we actually still own the lock and
536 			 * if so then release the lock and move on with life.
537 			 */
538 			if (mutex_owner(&ufs_fix.uq_mutex) == curthread)
539 				mutex_exit(&ufs_fix.uq_mutex);
540 		}
541 
542 		new = init_failure(vp, fmt, adx);
543 		if (new != NULL) {
544 			queue_failure(new);
545 			break;
546 		}
547 		real_panic_v(new, fmt, adx);
548 		break;
549 
550 	}
551 	MINOR(("] "));
552 	return (err);
553 }
554 
555 /*
556  * triage()
557  *
558  *  Attempt to fix iff:
559  *    - the system is not already panicking
560  *    - this file system isn't explicitly marked not to be fixed
561  *    - we can connect to the user-level daemon
562  * These conditions are detectable later, but if we can determine
563  * them in the failing threads context the core dump may be more
564  * useful.
565  *
566  */
567 
568 static triage_t
569 triage(vnode_t *vp)
570 {
571 	struct inode	 *ip;
572 	int		  need_unlock_vfs;
573 	int		  fs_flags;
574 
575 	MINUTE(("[triage"));
576 
577 	if (panicstr) {
578 		MINUTE((
579 		": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr));
580 		return (TRIAGE_DEAD);
581 	}
582 
583 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) {
584 		MINUTE((
585 	": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n"));
586 		return (TRIAGE_DEAD);
587 	}
588 
589 	/* use tryenter and continue no matter what since we're panicky */
590 	need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
591 	if (need_unlock_vfs)
592 		need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock);
593 
594 	fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags;
595 	if (need_unlock_vfs)
596 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
597 
598 	if (fs_flags & UFSFX_PANIC) {
599 		MINUTE((
600 		": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n"));
601 		return (TRIAGE_NO_SPIRIT);
602 	}
603 
604 	if (ufs_checkaccton(vp) != 0) {
605 		MINUTE((
606 		": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n"));
607 		return (TRIAGE_DEAD);
608 	}
609 
610 	if (ufs_checkswapon(vp) != 0) {
611 		MINUTE((
612 		": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n"));
613 		return (TRIAGE_DEAD);
614 	}
615 
616 	MINUTE((": return TRIAGE_ATTEND_TO] "));
617 	return (TRIAGE_ATTEND_TO);
618 }
619 
620 /*
621  * init failure
622  *
623  * This routine allocates a failure struct and initializes
624  * it's member elements.
625  * Space is allocated for copies of dynamic identifying fs structures
626  * passed in.  Without a much more segmented kernel architecture
627  * this is as protected as we can make it (for now.)
628  */
629 static ufs_failure_t *
630 init_failure(vnode_t *vp, char *fmt, va_list adx)
631 {
632 	ufs_failure_t	*new;
633 	struct inode	*ip;
634 	int		 initialization_worked = 0;
635 	int		 need_vfs_unlock;
636 
637 	MINOR(("[init_failure"));
638 
639 	new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP);
640 	if (!new) {
641 		MINOR((": kmem_zalloc failed]\n"));
642 		return (NULL);
643 	}
644 
645 	/*
646 	 * enough information to make a fix attempt possible?
647 	 */
648 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp ||
649 	    !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt)
650 		goto errout;
651 
652 	if (vp->v_type != VREG && vp->v_type != VDIR &&
653 	    vp->v_type != VBLK && vp->v_type != VCHR &&
654 	    vp->v_type != VLNK && vp->v_type != VFIFO &&
655 	    vp->v_type != VSOCK)
656 		goto errout;
657 
658 	if (ip->i_ufsvfs->vfs_root->v_type != VREG &&
659 	    ip->i_ufsvfs->vfs_root->v_type != VDIR &&
660 	    ip->i_ufsvfs->vfs_root->v_type != VBLK &&
661 	    ip->i_ufsvfs->vfs_root->v_type != VCHR &&
662 	    ip->i_ufsvfs->vfs_root->v_type != VLNK &&
663 	    ip->i_ufsvfs->vfs_root->v_type != VFIFO &&
664 	    ip->i_ufsvfs->vfs_root->v_type != VSOCK)
665 		goto errout;
666 
667 	if ((ITOF(ip)->fs_magic != FS_MAGIC) &&
668 	    (ITOF(ip)->fs_magic != MTB_UFS_MAGIC))
669 		goto errout;
670 
671 	/* intialize values */
672 
673 	(void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx);
674 
675 	new->uf_ufsvfsp = ip->i_ufsvfs;
676 	new->uf_vfsp    = ip->i_vfs;
677 
678 	mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL);
679 	need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
680 
681 	if (need_vfs_unlock) {
682 		if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) {
683 			/*
684 			 * not much alternative here, but we're panicking
685 			 * already, it couldn't be worse - so just
686 			 * proceed optimistically and take note.
687 			 */
688 			mutex_enter(&uf_stats.ufst_mutex);
689 			uf_stats.ufst_lock_violations++;
690 			mutex_exit(&uf_stats.ufst_mutex);
691 			MINOR((": couldn't get vfs lock"))
692 			need_vfs_unlock = 0;
693 		}
694 	}
695 
696 	if (mutex_tryenter(&new->uf_mutex)) {
697 		initialization_worked = set_state(new, UF_INIT);
698 		mutex_exit(&new->uf_mutex);
699 	}
700 
701 	if (need_vfs_unlock)
702 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
703 
704 	if (initialization_worked) {
705 		MINOR(("] "));
706 		return (new);
707 	}
708 	/* FALLTHROUGH */
709 
710 errout:
711 	if (new)
712 		kmem_free(new, sizeof (ufs_failure_t));
713 	MINOR((": failed]\n"));
714 	return (NULL);
715 }
716 
717 static void
718 queue_failure(ufs_failure_t *new)
719 {
720 	MINOR(("[queue_failure"));
721 
722 	mutex_enter(&ufs_fix.uq_mutex);
723 
724 	if (ufs_fix.uq_ufhead)
725 		insque(new, &ufs_fix.uq_ufhead);
726 	else
727 		ufs_fix.uq_ufhead = new;
728 
729 	if (mutex_tryenter(&new->uf_mutex)) {
730 		(void) set_state(new, UF_QUEUE);
731 		mutex_exit(&new->uf_mutex);
732 	}
733 
734 	mutex_enter(&uf_stats.ufst_mutex);		/* force wakeup */
735 	ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed;
736 	mutex_exit(&uf_stats.ufst_mutex);
737 
738 	cv_broadcast(&ufs_fix.uq_cv);
739 
740 	DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str ?
741 	    new->uf_panic_str : "queue_failure: NULL panic str?"));
742 	mutex_exit(&ufs_fix.uq_mutex);
743 
744 	MINOR(("] "));
745 }
746 
747 /*PRINTFLIKE2*/
748 static void
749 real_panic(ufs_failure_t *f, const char *fmt, ...)
750 {
751 	va_list	adx;
752 
753 	MINUTE(("[real_panic "));
754 
755 	va_start(adx, fmt);
756 	real_panic_v(f, fmt, adx);
757 	va_end(adx);
758 
759 	MINUTE((": return?!]\n"));
760 }
761 
762 static void
763 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx)
764 {
765 	int seriousness = CE_PANIC;
766 	int need_unlock;
767 
768 	MINUTE(("[real_panic_v "));
769 
770 	if (f && f->uf_ufsvfsp)
771 		TRANS_SETERROR(f->uf_ufsvfsp);
772 
773 #if defined(DEBUG)
774 	if (DEBUG_FLAGS & DBGFLG_NOPANIC) {
775 		seriousness = CE_WARN;
776 		cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n");
777 	}
778 #endif /* DEBUG */
779 
780 	delay(hz >> 1);			/* allow previous warnings to get out */
781 
782 	if (!f && fmt)
783 		vcmn_err(seriousness, fmt, adx);
784 	else
785 		cmn_err(seriousness, f && f->uf_panic_str? f->uf_panic_str:
786 		    "real_panic: <unknown panic?>");
787 
788 	if (f) {
789 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
790 		if (need_unlock) {
791 			mutex_enter(&f->uf_mutex);
792 		}
793 
794 		f->uf_retry = -1;
795 		(void) set_state(f, UF_PANIC);
796 
797 		if (need_unlock) {
798 			mutex_exit(&f->uf_mutex);
799 		}
800 	}
801 	MINUTE((": return?!]\n"));
802 }
803 
804 /*
805  * initializes ufs panic structs, locks, etc
806  */
807 void
808 ufsfx_init(void)
809 {
810 
811 	MINUTE(("[ufsfx_init"));
812 
813 	/* patchable; unchanged while running, so no lock is needed */
814 	ufsfx_tune.uft_too_long		= UF_TOO_LONG;
815 	ufsfx_tune.uft_fixstart_period	= UF_FIXSTART_PERIOD;
816 	ufsfx_tune.uft_fixpoll_period	= UF_FIXPOLL_PERIOD;
817 	ufsfx_tune.uft_short_err_period	= UF_SHORT_ERROR_PERIOD;
818 	ufsfx_tune.uft_long_err_period	= UF_LONG_ERROR_PERIOD;
819 
820 	uffsinfo.ufi_statp	= &uf_stats;
821 	uffsinfo.ufi_tunep	= &ufsfx_tune;
822 	uffsinfo.ufi_statetab	= &state_desc[0];
823 
824 	mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL);
825 	ufs_thread_init(&ufs_fix, /* maxne */ 1);
826 
827 	MINUTE(("] "));
828 }
829 
830 /*
831  * initializes per-ufs values
832  * returns 0 (ok) or errno
833  */
834 int
835 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags)
836 {
837 	MINUTE(("[ufsfx_mount (%d)", flags));
838 	/* don't check/need vfs_lock because it's still being initialized */
839 
840 	ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4;
841 
842 	MINUTE((": %s: fx_flags:%ld,",
843 	    ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags));
844 	/*
845 	 *	onerror={panic ^ lock only ^ unmount}
846 	 */
847 
848 	if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) {
849 		MINUTE((" PANIC"));
850 
851 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) {
852 		MINUTE((" LCKONLY"));
853 
854 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) {
855 		MINUTE((" LCKUMOUNT"));
856 
857 	} else {
858 		ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT;
859 		ASSERT(ufsvfsp->vfs_fsfx.fx_flags &
860 		    (UFSMNT_ONERROR_FLGMASK >> 4));
861 		MINUTE((" DEFAULT"));
862 	}
863 
864 	pollwakeup(&ufs_pollhd, POLLPRI);
865 	MINUTE(("]\n"));
866 	return (0);
867 }
868 
869 /*
870  * ufsfx_unmount
871  *
872  * called during unmount
873  */
874 void
875 ufsfx_unmount(struct ufsvfs *ufsvfsp)
876 {
877 	ufs_failure_t	*f;
878 	int		 must_unlock_list;
879 
880 	MINUTE(("[ufsfx_unmount"));
881 
882 	if (!ufsvfsp) {
883 		MINUTE((": no ufsvfsp]"));
884 		return;
885 	}
886 
887 	if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0)
888 		mutex_enter(&ufs_fix.uq_mutex);
889 
890 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
891 		int must_unlock_failure;
892 
893 		must_unlock_failure = !MUTEX_HELD(&f->uf_mutex);
894 		if (must_unlock_failure) {
895 			mutex_enter(&f->uf_mutex);
896 		}
897 
898 		if (f->uf_ufsvfsp == ufsvfsp) {
899 
900 			/*
901 			 * if we owned the failure record lock, then this
902 			 * is probably a fix failure-triggered unmount, so
903 			 * the warning is not appropriate or needed
904 			 */
905 
906 			/* XXX if rebooting don't print this? */
907 			if (!terminal_state(f->uf_s) && must_unlock_failure) {
908 				cmn_err(CE_WARN,
909 				    "Unmounting %s while error-locked",
910 				    fs_name(f));
911 			}
912 
913 			f->uf_ufsvfsp		= NULL;
914 			f->uf_vfs_ufsfxp	= NULL;
915 			f->uf_vfs_lockp		= NULL;
916 			f->uf_bp		= NULL;
917 			f->uf_vfsp		= NULL;
918 			f->uf_retry		= -1;
919 		}
920 
921 		if (must_unlock_failure)
922 			mutex_exit(&f->uf_mutex);
923 	}
924 	if (must_unlock_list)
925 		mutex_exit(&ufs_fix.uq_mutex);
926 
927 	pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP);
928 	MINUTE(("] "));
929 }
930 
931 /*
932  * ufsfx_(un)lockfs
933  *
934  * provides hook from lockfs code so we can recognize unlock/relock
935  *  This is called after it is certain that the (un)lock will succeed.
936  */
937 void
938 ufsfx_unlockfs(struct ufsvfs *ufsvfsp)
939 {
940 	ufs_failure_t	*f;
941 	int		 need_unlock;
942 	int		 need_unlock_list;
943 	int		 informed = 0;
944 
945 	MINUTE(("[ufsfx_unlockfs"));
946 
947 	if (!ufsvfsp)
948 		return;
949 
950 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
951 
952 	if (need_unlock_list)
953 		mutex_enter(&ufs_fix.uq_mutex);
954 
955 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
956 
957 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
958 		if (need_unlock)
959 			mutex_enter(&f->uf_mutex);
960 
961 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) {
962 			if (!(f->uf_s & UF_FIXING)) {
963 				/*
964 				 * This might happen if we don't notice that
965 				 * the fs gets marked FSFIX before it is
966 				 * marked FSCLEAN, as might occur if the
967 				 * the superblock was hammered directly.
968 				 */
969 				if (!informed) {
970 					informed = 1;
971 					cmn_err(CE_NOTE,
972 					    "Unlock of %s succeeded before "
973 					    "fs_clean marked FSFIX?",
974 					    fs_name(f));
975 				}
976 
977 				/*
978 				 * pass through fixing state so
979 				 * transition protocol is satisfied
980 				 */
981 				if (!set_state(f, UF_FIXING)) {
982 					MINUTE((": failed] "));
983 				}
984 			}
985 
986 			if (!set_state(f, UF_FIXED)) {
987 				/* it's already fixed, so don't panic now */
988 				MINUTE((": failed] "));
989 			}
990 		}
991 
992 		if (need_unlock)
993 			mutex_exit(&f->uf_mutex);
994 	}
995 	if (need_unlock_list)
996 		mutex_exit(&ufs_fix.uq_mutex);
997 	MINUTE(("] "));
998 }
999 
1000 void
1001 ufsfx_lockfs(struct ufsvfs *ufsvfsp)
1002 {
1003 	ufs_failure_t	*f;
1004 	int		 need_unlock;
1005 	int		 need_unlock_list;
1006 
1007 	MINUTE(("[ufsfx_lockfs"));
1008 
1009 	if (!ufsvfsp)
1010 		return;
1011 
1012 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
1013 
1014 	if (need_unlock_list)
1015 		mutex_enter(&ufs_fix.uq_mutex);
1016 
1017 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1018 
1019 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
1020 		if (need_unlock)
1021 			mutex_enter(&f->uf_mutex);
1022 
1023 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) &&
1024 		    f->uf_s != UF_PANIC) {
1025 			switch (f->uf_s) {
1026 
1027 			default:
1028 				cmn_err(CE_WARN,
1029 				    "fs %s not in state "
1030 				    "UF_TRYLCK, UF_LOCKED or UF_FIXING",
1031 				    fs_name(f));
1032 				break;
1033 
1034 			case UF_TRYLCK:
1035 				if (!set_state(f, UF_LOCKED)) {
1036 					MINUTE((": failed] "));
1037 				}
1038 				break;
1039 
1040 			case UF_LOCKED:
1041 				if (!set_state(f, UF_FIXING)) {
1042 					MINUTE((": failed] "));
1043 				}
1044 				break;
1045 
1046 			case UF_FIXING:
1047 				break;
1048 
1049 			}
1050 		}
1051 
1052 		if (need_unlock)
1053 			mutex_exit(&f->uf_mutex);
1054 	}
1055 	if (need_unlock_list)
1056 		mutex_exit(&ufs_fix.uq_mutex);
1057 
1058 	MINUTE(("] "));
1059 }
1060 
1061 /*
1062  * error lock, trigger fsck and unlock those fs with failures
1063  * blatantly copied from the hlock routine, although this routine
1064  * triggers differently in order to use uq_ne as meaningful data.
1065  */
1066 /* ARGSUSED */
1067 void
1068 ufsfx_thread_fix_failures(void *ignored)
1069 {
1070 	int		retry;
1071 	callb_cpr_t	cprinfo;
1072 
1073 	CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr,
1074 	    "ufsfixfail");
1075 
1076 	MINUTE(("[ufsfx_thread_fix_failures] "));
1077 
1078 	for (;;) {
1079 		/* sleep until there is work to do */
1080 
1081 		mutex_enter(&ufs_fix.uq_mutex);
1082 		(void) ufs_thread_run(&ufs_fix, &cprinfo);
1083 		ufs_fix.uq_ne = 0;
1084 		mutex_exit(&ufs_fix.uq_mutex);
1085 
1086 		/* process failures on our q */
1087 		do {
1088 			retry = ufsfx_do_failure_q();
1089 			if (retry) {
1090 				mutex_enter(&ufs_fix.uq_mutex);
1091 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
1092 				(void) cv_timedwait(&ufs_fix.uq_cv,
1093 				    &ufs_fix.uq_mutex, lbolt + (hz * retry));
1094 				CALLB_CPR_SAFE_END(&cprinfo,
1095 				    &ufs_fix.uq_mutex);
1096 				mutex_exit(&ufs_fix.uq_mutex);
1097 			}
1098 		} while (retry);
1099 	}
1100 	/* NOTREACHED */
1101 }
1102 
1103 
1104 /*
1105  * watch for fix-on-panic work
1106  *
1107  * returns # of seconds to sleep before trying again
1108  * and zero if no retry is needed
1109  */
1110 
1111 int
1112 ufsfx_do_failure_q(void)
1113 {
1114 	ufs_failure_t	*f;
1115 	long		 retry = 1;
1116 	ufsd_t		*s;
1117 
1118 	MAJOR(("[ufsfx_do_failure_q"));
1119 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1120 
1121 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
1122 		return (retry);
1123 
1124 	retry = 0;
1125 rescan_q:
1126 
1127 	/*
1128 	 * walk down failure list
1129 	 *  depending on state of each failure, do whatever
1130 	 *  is appropriate to move it to the next state
1131 	 *  taking note of whether retry gets set
1132 	 *
1133 	 * retry protocol:
1134 	 * wakeup in shortest required time for any failure
1135 	 *   retry == 0; nothing more to do (terminal state)
1136 	 *   retry < 0; reprocess queue immediately, retry will
1137 	 *		be abs(retry) for the next cycle
1138 	 *   retry > 0; schedule wakeup for retry seconds
1139 	 */
1140 
1141 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1142 
1143 		if (!mutex_tryenter(&f->uf_mutex)) {
1144 			retry = 1;
1145 			continue;
1146 		}
1147 		s = get_state_desc(f->uf_s);
1148 
1149 		MINOR((": found%s: %s, \"%s: %s\"\n",
1150 		    s->ud_attr.terminal ? " old" : "",
1151 		    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
1152 
1153 		if (s->ud_attr.terminal) {
1154 			mutex_exit(&f->uf_mutex);
1155 			continue;
1156 		}
1157 
1158 		if (s->ud_sfp)
1159 			(*s->ud_sfp)(f, UFA_FOUND, f->uf_s);
1160 
1161 		ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0);
1162 
1163 		if (f->uf_retry != 0) {
1164 			if (retry > f->uf_retry || retry == 0)
1165 				retry = f->uf_retry;
1166 			if (f->uf_retry < 0)
1167 				f->uf_retry = abs(f->uf_retry);
1168 		}
1169 		mutex_exit(&f->uf_mutex);
1170 	}
1171 
1172 
1173 	if (retry < 0) {
1174 		retry = abs(retry);
1175 		goto rescan_q;
1176 	}
1177 
1178 	mutex_exit(&ufs_fix.uq_mutex);
1179 
1180 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1181 	MAJOR((": retry=%ld, good night]\n\n", retry));
1182 
1183 	return (retry);
1184 }
1185 
1186 static void
1187 pester_msg(ufs_failure_t *f, int seriousness)
1188 {
1189 	MINUTE(("[pester_msg"));
1190 	ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING));
1191 
1192 	/*
1193 	 * XXX if seems too long for this fs, poke administrator
1194 	 * XXX to run fsck manually (and change retry time?)
1195 	 */
1196 	cmn_err(seriousness, "Waiting for repair of %s to %s",
1197 	    fs_name(f), f->uf_s & UF_LOCKED ? "start" : "finish");
1198 	MINUTE(("]"));
1199 }
1200 
1201 static time_t
1202 trylock_time_exceeded(ufs_failure_t *f)
1203 {
1204 	time_t		toolong;
1205 	extern time_t	time;
1206 
1207 	MINUTE(("[trylock_time_exceeded"));
1208 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1209 
1210 	toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm;
1211 	if (time > toolong)
1212 		cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f));
1213 
1214 	MINUTE(("] "));
1215 	return (time <= toolong? 0: time - toolong);
1216 }
1217 
1218 static int
1219 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp)
1220 {
1221 	MINUTE(("[get_lockfs_status"));
1222 
1223 	if (!f->uf_ufsvfsp) {
1224 		MINUTE((": ufsvfsp is NULL]\n"));
1225 		return (0);
1226 	}
1227 
1228 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1229 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1230 	ASSERT(!vfs_lock_held(f->uf_vfsp));
1231 	ASSERT(f->uf_ufsvfsp->vfs_root != NULL);
1232 
1233 	f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp);
1234 
1235 	if (f->uf_lf_err) {
1236 		f->uf_retry = ufsfx_tune.uft_short_err_period;
1237 	}
1238 
1239 	MINUTE(("] "));
1240 	return (1);
1241 }
1242 
1243 static sfrc_t
1244 set_state(ufs_failure_t *f, ufs_failure_states_t new_state)
1245 {
1246 	ufsd_t		*s;
1247 	sfrc_t		 sfrc = SFRC_FAIL;
1248 	int		 need_unlock;
1249 	extern time_t	 time;
1250 
1251 	HIDEOUS(("[set_state: new state:%s", state_name(new_state)));
1252 	ASSERT(f);
1253 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1254 
1255 	/*
1256 	 * if someone else is panicking, just let panic sync proceed
1257 	 */
1258 	if (panicstr) {
1259 		(void) set_state(f, UF_NOTFIX);
1260 		HIDEOUS((": state reset: not fixed] "));
1261 		return (sfrc);
1262 	}
1263 
1264 	/*
1265 	 * bad state transition, an internal error
1266 	 */
1267 	if (!state_trans_valid(f->uf_s, new_state)) {
1268 		/* recursion */
1269 		if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC))
1270 			(void) set_state(f, UF_PANIC);
1271 		MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ",
1272 		    state_name(f->uf_s), state_name(new_state)));
1273 		return (sfrc);
1274 	}
1275 
1276 	s = get_state_desc(new_state);
1277 
1278 	need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex);
1279 	if (need_unlock)
1280 		mutex_enter(&ufs_fix.uq_mutex);
1281 
1282 	if (s->ud_attr.at_fail && ufs_fix.uq_threadp &&
1283 	    curthread == ufs_fix.uq_threadp) {
1284 		cmn_err(CE_WARN, "set_state: probable recursive panic of %s",
1285 		    fs_name(f));
1286 	}
1287 	if (need_unlock)
1288 		mutex_exit(&ufs_fix.uq_mutex);
1289 
1290 	/* NULL state functions always succeed */
1291 	sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state);
1292 
1293 	if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) {
1294 		f->uf_s = new_state;
1295 		f->uf_entered_tm = time;
1296 		f->uf_counter = 0;
1297 	}
1298 
1299 	HIDEOUS(("]\n"));
1300 	return (sfrc);
1301 }
1302 
1303 static ufsd_t *
1304 get_state_desc(ufs_failure_states_t state)
1305 {
1306 	ufsd_t *s;
1307 
1308 	HIDEOUS(("[get_state_desc"));
1309 
1310 	for (s = &state_desc[1]; s->ud_name != NULL; s++) {
1311 		if (s->ud_v == state) {
1312 			HIDEOUS(("] "));
1313 			return (s);
1314 		}
1315 	}
1316 
1317 	HIDEOUS(("] "));
1318 	return (&state_desc[0]);	/* default */
1319 }
1320 
1321 static sfrc_t
1322 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1323 {
1324 	sfrc_t rc;
1325 
1326 	TRIVIA(("[sf_undef, action is %s, state is %s\n",
1327 	    act_name(a), state_name(s)));
1328 	ASSERT(s == UF_UNDEF);
1329 
1330 	/* shouldn't find null failure records or ever set one */
1331 	rc = set_state(f, UF_NOTFIX);
1332 
1333 	TRIVIA(("] "));
1334 	return (rc);
1335 }
1336 
1337 
1338 static sfrc_t
1339 sf_init(
1340 	ufs_failure_t	*f,
1341 	ufsa_t	 a,
1342 	ufs_failure_states_t	 s)
1343 {
1344 	sfrc_t		rc = SFRC_FAIL;
1345 	extern time_t	time;
1346 
1347 	TRIVIA(("[sf_init, action is %s", act_name(a)));
1348 	ASSERT(s & UF_INIT);
1349 
1350 	switch (a) {
1351 	case UFA_SET:
1352 		f->uf_begin_tm = time;
1353 		f->uf_retry = 1;
1354 		if (!f->uf_ufsvfsp) {
1355 			(void) set_state(f, UF_PANIC);
1356 			TRIVIA((": NULL ufsvfsp]\n"));
1357 			return (rc);
1358 		}
1359 		/*
1360 		 * because we can call panic from many different levels,
1361 		 * we can't be sure that we've got the vfs_lock at this
1362 		 * point.  However, there's not much alternative and if
1363 		 * we don't (have the lock) the worst case is we'll just
1364 		 * panic again
1365 		 */
1366 		f->uf_vfs_lockp		= &f->uf_ufsvfsp->vfs_lock;
1367 		f->uf_vfs_ufsfxp	= &f->uf_ufsvfsp->vfs_fsfx;
1368 
1369 		if (!f->uf_ufsvfsp->vfs_bufp) {
1370 			(void) set_state(f, UF_PANIC);
1371 			TRIVIA((": NULL vfs_bufp]\n"));
1372 			return (rc);
1373 		}
1374 		f->uf_bp = f->uf_ufsvfsp->vfs_bufp;
1375 
1376 		if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) {
1377 			(void) set_state(f, UF_PANIC);
1378 			TRIVIA((": NULL vfs_fs]\n"));
1379 			return (rc);
1380 		}
1381 
1382 		/* vfs_fs = vfs_bufp->b_un.b_fs */
1383 		bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN);
1384 
1385 		f->uf_lf.lf_lock  = LOCKFS_ELOCK;	/* primer */
1386 
1387 		if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) {
1388 			(void) set_state(f, UF_PANIC);
1389 			TRIVIA((": NULL vfsp or vfs_dev == NODEV"));
1390 			return (rc);
1391 		}
1392 		f->uf_dev = f->uf_vfsp->vfs_dev;
1393 
1394 		rc = SFRC_SUCCESS;
1395 		break;
1396 
1397 	case UFA_FOUND:
1398 	default:
1399 		/* failures marked init shouldn't even be on the queue yet */
1400 		rc = set_state(f, UF_QUEUE);
1401 		TRIVIA((": found failure with state init]\n"));
1402 	}
1403 
1404 	TRIVIA(("] "));
1405 	return (rc);
1406 }
1407 
1408 static sfrc_t
1409 sf_queue(
1410 	ufs_failure_t	*f,
1411 	ufsa_t	 a,
1412 	ufs_failure_states_t	 s)
1413 {
1414 	sfrc_t		rc = SFRC_FAIL;
1415 
1416 	TRIVIA(("[sf_queue, action is %s", act_name(a)));
1417 	ASSERT(s & UF_QUEUE);
1418 
1419 	if (!f->uf_ufsvfsp) {
1420 		TRIVIA((": NULL ufsvfsp]\n"));
1421 		return (rc);
1422 	}
1423 
1424 	switch (a) {
1425 	case UFA_FOUND:
1426 		rc = sf_found_queue(f);
1427 		break;
1428 
1429 	case UFA_SET:
1430 
1431 		ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1432 
1433 		mutex_enter(&uf_stats.ufst_mutex);
1434 		uf_stats.ufst_num_failed++;
1435 		mutex_exit(&uf_stats.ufst_mutex);
1436 
1437 		/*
1438 		 * if can't get the vfs lock, just wait until
1439 		 * UF_TRYLCK to set fx_current
1440 		 */
1441 		if (mutex_tryenter(f->uf_vfs_lockp)) {
1442 			f->uf_vfs_ufsfxp->fx_current = f;
1443 			mutex_exit(f->uf_vfs_lockp);
1444 		} else {
1445 			mutex_enter(&uf_stats.ufst_mutex);
1446 			uf_stats.ufst_current_races++;
1447 			mutex_exit(&uf_stats.ufst_mutex);
1448 		}
1449 
1450 		f->uf_retry = 1;
1451 		rc = SFRC_SUCCESS;
1452 		TRIVIA(("] "));
1453 		break;
1454 
1455 	default:
1456 		(void) set_state(f, UF_PANIC);
1457 		TRIVIA((": failed] "));
1458 	}
1459 
1460 	return (rc);
1461 }
1462 
1463 static sfrc_t
1464 sf_found_queue(ufs_failure_t *f)
1465 {
1466 	int		replica;
1467 	sfrc_t		rc = SFRC_FAIL;
1468 
1469 	TRIVIA(("[sf_found_queue"));
1470 
1471 	/*
1472 	 * don't need to check for null ufsvfsp because
1473 	 * unmount must own list's ufs_fix.uq_mutex
1474 	 * to mark it null and we own that lock since
1475 	 * we got here.
1476 	 */
1477 
1478 	ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1479 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1480 
1481 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
1482 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1483 		f->uf_retry = 1;
1484 		return (rc);
1485 	}
1486 
1487 	replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL &&
1488 	    f->uf_vfs_ufsfxp->fx_current != f &&
1489 	    !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s);
1490 
1491 	/*
1492 	 * copy general flags to this ufs_failure so we don't
1493 	 * need to refer back to the ufsvfs, or, more importantly,
1494 	 * don't need to keep acquiring (trying to acquire) vfs_lockp
1495 	 *
1496 	 * The most restrictive option wins:
1497 	 *  panic > errlock only > errlock+unmount > repair
1498 	 * XXX panic > elock > elock > elock+umount
1499 	 */
1500 	if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) {
1501 		if (!set_state(f, UF_PANIC)) {
1502 			TRIVIA((": marked panic but was queued?"));
1503 			real_panic(f, " ");
1504 			/*NOTREACHED*/
1505 		}
1506 		mutex_exit(f->uf_vfs_lockp);
1507 		return (rc);
1508 	}
1509 	f->uf_flags = f->uf_vfs_ufsfxp->fx_flags;
1510 
1511 	if (replica) {
1512 		if (!set_state(f, UF_REPLICA)) {
1513 			f->uf_retry = 1;
1514 			TRIVIA((": set to replica failed] "));
1515 		} else {
1516 			TRIVIA(("] "));
1517 		}
1518 		mutex_exit(f->uf_vfs_lockp);
1519 		return (rc);
1520 	}
1521 	mutex_exit(f->uf_vfs_lockp);
1522 
1523 	if (!set_state(f, UF_TRYLCK)) {
1524 		TRIVIA((": failed] "));
1525 	} else {
1526 		rc = SFRC_SUCCESS;
1527 	}
1528 	return (rc);
1529 }
1530 
1531 static sfrc_t
1532 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1533 {
1534 	sfrc_t	rc = SFRC_FAIL;
1535 
1536 	TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s)));
1537 	ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING));
1538 	ASSERT(!terminal_state(s));
1539 
1540 	if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) {
1541 		TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n"));
1542 		(void) set_state(f, UF_NOTFIX);
1543 		return (rc);
1544 	}
1545 
1546 	switch (a) {
1547 	case UFA_SET:
1548 		switch (s) {
1549 		case UF_TRYLCK:
1550 			ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1551 			rc = sf_set_trylck(f);
1552 			break;
1553 
1554 		case UF_LOCKED:
1555 			rc = sf_set_locked(f);
1556 			break;
1557 
1558 		case UF_FIXING:
1559 			f->uf_flags |= UFSFX_REPAIR_START;
1560 			f->uf_retry  = ufsfx_tune.uft_fixpoll_period;
1561 			rc = SFRC_SUCCESS;
1562 			break;
1563 
1564 		case UF_UMOUNT:
1565 			f->uf_retry = -ufsfx_tune.uft_short_err_period;
1566 			rc = SFRC_SUCCESS;
1567 			break;
1568 
1569 		default:
1570 			(void) set_state(f, UF_PANIC);
1571 			TRIVIA((": failed] "));
1572 		}
1573 		break;
1574 
1575 	case UFA_FOUND:
1576 
1577 		switch (s) {
1578 		case UF_TRYLCK:
1579 			rc = sf_found_trylck(f);
1580 			break;
1581 
1582 		case UF_LOCKED:
1583 		case UF_FIXING:
1584 			rc = sf_found_lock_fix_cmn(f, s);
1585 			break;
1586 
1587 		case UF_UMOUNT:
1588 			rc = sf_found_umount(f);
1589 			break;
1590 
1591 		default:
1592 			(void) set_state(f, UF_PANIC);
1593 			TRIVIA((": failed] "));
1594 			break;
1595 		}
1596 		break;
1597 	default:
1598 		(void) set_state(f, UF_PANIC);
1599 		TRIVIA((": failed] "));
1600 		break;
1601 	}
1602 
1603 	TRIVIA(("] "));
1604 	return (rc);
1605 }
1606 
1607 static sfrc_t
1608 sf_set_trylck(ufs_failure_t *f)
1609 {
1610 	TRIVIA(("[sf_set_trylck"));
1611 
1612 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
1613 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1614 		f->uf_retry = 1;
1615 		return (SFRC_FAIL);
1616 	}
1617 
1618 	if (!f->uf_vfs_ufsfxp->fx_current)
1619 		f->uf_vfs_ufsfxp->fx_current = f;
1620 
1621 	mutex_exit(f->uf_vfs_lockp);
1622 
1623 	f->uf_lf.lf_flags = 0;
1624 	f->uf_lf.lf_lock  = LOCKFS_ELOCK;
1625 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1626 	TRIVIA(("] "));
1627 	return (SFRC_SUCCESS);
1628 }
1629 
1630 static sfrc_t
1631 sf_found_trylck(ufs_failure_t *f)
1632 {
1633 	struct lockfs lockfs_status;
1634 
1635 	TRIVIA(("[sf_found_trylck"));
1636 
1637 	if (trylock_time_exceeded(f) > 0) {
1638 		(void) set_state(f, UF_PANIC);
1639 		TRIVIA((": failed] "));
1640 		return (SFRC_FAIL);
1641 	}
1642 
1643 	if (!get_lockfs_status(f, &lockfs_status)) {
1644 		(void) set_state(f, UF_PANIC);
1645 		TRIVIA((": failed] "));
1646 		return (SFRC_FAIL);
1647 	}
1648 
1649 	if (f->uf_lf_err == NO_ERROR)
1650 		f->uf_lf.lf_key = lockfs_status.lf_key;
1651 
1652 	if (!set_lockfs(f, &lockfs_status)) {
1653 		(void) set_state(f, UF_PANIC);
1654 		TRIVIA((": failed] "));
1655 		return (SFRC_FAIL);
1656 	}
1657 	TRIVIA(("] "));
1658 	return (SFRC_SUCCESS);
1659 }
1660 
1661 static sfrc_t
1662 sf_set_locked(ufs_failure_t *f)
1663 {
1664 	TRIVIA(("[sf_set_locked"));
1665 
1666 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1667 
1668 #if defined(DEBUG)
1669 	if (f->uf_flags & UFSFX_REPAIR_START)
1670 		TRIVIA(("clearing UFSFX_REPAIR_START "));
1671 #endif /* DEBUG */
1672 
1673 	f->uf_flags &= ~UFSFX_REPAIR_START;
1674 
1675 	if (f->uf_s & UF_TRYLCK) {
1676 		cmn_err(CE_WARN, "Error-locked %s: \"%s\"",
1677 		    fs_name(f), f->uf_panic_str);
1678 
1679 		if (f->uf_flags & UFSFX_LCKONLY)
1680 			cmn_err(CE_WARN, "Manual repair of %s required",
1681 			    fs_name(f));
1682 	}
1683 
1684 	/*
1685 	 * just reset to current state
1686 	 */
1687 #if defined(DEBUG)
1688 	TRIVIA(("locked->locked "));
1689 #endif /* DEBUG */
1690 
1691 	TRIVIA(("] "));
1692 	return (SFRC_SUCCESS);
1693 }
1694 
1695 static sfrc_t
1696 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s)
1697 {
1698 	time_t		toolong;
1699 	extern time_t	time;
1700 	struct buf	*bp			= NULL;
1701 	struct fs	*dfs;
1702 	time_t		 concerned, anxious;
1703 	sfrc_t		 rc			= SFRC_FAIL;
1704 	ulong_t		 gb_size;
1705 
1706 	TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s)));
1707 
1708 	if (s & UF_LOCKED) {
1709 		ASSERT(MUTEX_HELD(&f->uf_mutex));
1710 
1711 		toolong =
1712 		    time > (ufsfx_tune.uft_too_long + f->uf_entered_tm);
1713 		TRIVIA(("%stoolong", !toolong? "not": ""));
1714 		HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ",
1715 		    time, ufsfx_tune.uft_too_long, f->uf_entered_tm));
1716 
1717 		if (f->uf_flags & UFSFX_LCKUMOUNT) {
1718 			if (set_state(f, UF_UMOUNT)) {
1719 				TRIVIA(("] "));
1720 				rc = SFRC_SUCCESS;
1721 			} else {
1722 				TRIVIA((": failed] "));
1723 				f->uf_retry = 1;
1724 			}
1725 			return (rc);
1726 		}
1727 		if (!toolong) {
1728 			rc = SFRC_SUCCESS;
1729 		} else {
1730 			if (!(f->uf_flags & UFSFX_REPAIR_START)) {
1731 				cmn_err(CE_WARN, "%s repair of %s not started.",
1732 				    (f->uf_flags & UFSFX_LCKONLY) ?
1733 				    "Manual" : "Automatic", fs_name(f));
1734 
1735 				f->uf_retry = ufsfx_tune.uft_long_err_period;
1736 			} else {
1737 				f->uf_retry = ufsfx_tune.uft_long_err_period;
1738 				cmn_err(CE_WARN, "Repair of %s is not timely; "
1739 				    "operator attention is required.",
1740 				    fs_name(f));
1741 			}
1742 			TRIVIA(("] "));
1743 			return (rc);
1744 		}
1745 	}
1746 
1747 #if defined(DEBUG)
1748 	else {
1749 		ASSERT(s & UF_FIXING);
1750 	}
1751 #endif /* DEBUG */
1752 
1753 	/*
1754 	 * get on disk superblock; force it to really
1755 	 * come from the disk
1756 	 */
1757 	(void) bfinval(f->uf_dev, 0);
1758 	bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE);
1759 	if (bp) {
1760 		bp->b_flags |= (B_STALE | B_AGE);
1761 		dfs = bp->b_un.b_fs;
1762 	}
1763 
1764 	if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) &&
1765 	    (dfs->fs_magic != MTB_UFS_MAGIC))) {
1766 		TRIVIA((": UFS_BREAD(SBLOCK) failed]\n"));
1767 		f->uf_retry = 1;
1768 		goto out;
1769 	}
1770 
1771 	/* fsck started but we haven't noticed yet? */
1772 	if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1773 		if (!set_state(f, UF_FIXING)) {
1774 			TRIVIA((": failed]\n"));
1775 			f->uf_retry = 1;
1776 			goto out;
1777 		}
1778 	}
1779 
1780 	/* fsck started but didn't succeed? */
1781 	if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) {
1782 		TRIVIA((": fs_clean: %d", (int)dfs->fs_clean));
1783 		(void) set_state(f, UF_LOCKED);
1784 		cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f));
1785 		f->uf_retry = ufsfx_tune.uft_long_err_period;
1786 		goto out;
1787 	}
1788 
1789 	gb_size = (dfs->fs_size * dfs->fs_bshift) / GB;
1790 	toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig);
1791 
1792 	/* fsck started but doesn't seem to be proceeding? */
1793 	if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1794 		if (time > f->uf_entered_tm + toolong) {
1795 
1796 			cmn_err(CE_WARN,
1797 			    "Repair completion timeout exceeded on %s; "
1798 			    "manual fsck may be required", fs_name(f));
1799 			f->uf_retry = ufsfx_tune.uft_long_err_period;
1800 		}
1801 	}
1802 
1803 	concerned = f->uf_entered_tm + (toolong / 3);
1804 	anxious = f->uf_entered_tm + ((2 * toolong) / 3);
1805 
1806 	if (time > concerned)
1807 		pester_msg(f, time > anxious? CE_WARN: CE_NOTE);
1808 
1809 	TRIVIA(("] "));
1810 
1811 out:
1812 	if (bp)
1813 		brelse(bp);
1814 
1815 	return (rc);
1816 }
1817 
1818 static sfrc_t
1819 sf_found_umount(ufs_failure_t *f)
1820 {
1821 	extern time_t	 time;
1822 	sfrc_t		 rc			= SFRC_FAIL;
1823 	struct vfs	*vfsp			= f->uf_vfsp;
1824 	struct ufsvfs	*ufsvfsp		= f->uf_ufsvfsp;
1825 	int		 toolong		= 0;
1826 	int		 err			= 0;
1827 
1828 	TRIVIA(("[sf_found_umount"));
1829 
1830 	toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm;
1831 	if (toolong) {
1832 		TRIVIA((": unmount time limit exceeded] "));
1833 		goto out;
1834 	}
1835 
1836 	if (!vfsp || !ufsvfsp) {	/* trivial case */
1837 		TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] "));
1838 		goto out;
1839 	}
1840 
1841 	if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) {
1842 		TRIVIA((": !not error locked?"));
1843 		err = EINVAL;
1844 		goto out;
1845 	}
1846 
1847 	/* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */
1848 	if (vn_vfswlock(vfsp->vfs_vnodecovered)) {
1849 		TRIVIA((": couldn't lock coveredvp"));
1850 		err = EBUSY;
1851 		goto out;
1852 	}
1853 
1854 	if ((err = dounmount(vfsp, 0, kcred)) != 0) {
1855 
1856 		/* take note, but not many alternatives here */
1857 		mutex_enter(&uf_stats.ufst_mutex);
1858 		uf_stats.ufst_unmount_failures++;
1859 		mutex_exit(&uf_stats.ufst_mutex);
1860 
1861 		TRIVIA((": unmount failed] "));
1862 	} else {
1863 		cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f));
1864 	}
1865 
1866 out:
1867 	if (toolong || (err != EBUSY && err != EAGAIN))
1868 		rc = set_state(f, UF_NOTFIX);
1869 
1870 	TRIVIA(("] "));
1871 	return (rc);
1872 }
1873 
1874 static sfrc_t
1875 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1876 {
1877 	extern time_t	time;
1878 	sfrc_t		rc = SFRC_FAIL;
1879 
1880 	TRIVIA(("[sf_term_cmn, action is %s, state is %s",
1881 	    act_name(a), state_name(s)));
1882 	ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA));
1883 	ASSERT(terminal_state(s));
1884 
1885 	if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) {
1886 		TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n"));
1887 		return (rc);
1888 	}
1889 
1890 	switch (a) {
1891 	case UFA_SET:
1892 		switch (s) {
1893 		case UF_NOTFIX:
1894 		case UF_FIXED:
1895 		{
1896 			int need_lock_vfs;
1897 
1898 			if (f->uf_ufsvfsp && f->uf_vfs_lockp)
1899 				need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp);
1900 			else
1901 				need_lock_vfs = 0;
1902 
1903 			if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) {
1904 				TRIVIA((": tryenter(vfslockp) fail; retry]\n"));
1905 				f->uf_retry = 1;
1906 				break;
1907 			}
1908 
1909 			f->uf_end_tm = time;
1910 			f->uf_lf.lf_lock = LOCKFS_OLOCK;
1911 			f->uf_retry = 0;
1912 
1913 			if (f->uf_vfs_ufsfxp)
1914 				f->uf_vfs_ufsfxp->fx_current = NULL;
1915 
1916 			if (need_lock_vfs)
1917 				mutex_exit(f->uf_vfs_lockp);
1918 
1919 			cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s":
1920 			    "%s is now accessible", fs_name(f));
1921 
1922 			if (s & UF_FIXED) {
1923 				mutex_enter(&uf_stats.ufst_mutex);
1924 				uf_stats.ufst_num_fixed++;
1925 				mutex_exit(&uf_stats.ufst_mutex);
1926 			}
1927 			(void) timeout(ufsfx_kill_fix_failure_thread,
1928 			    (void *)(ufsfx_tune.uft_short_err_period * hz),
1929 			    ufsfx_tune.uft_short_err_period * hz);
1930 			rc = SFRC_SUCCESS;
1931 			break;
1932 		}
1933 		case UF_REPLICA:
1934 
1935 			ASSERT(MUTEX_HELD(f->uf_vfs_lockp));
1936 
1937 			/* not actually a replica? */
1938 			if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current &&
1939 			    f->uf_vfs_ufsfxp->fx_current != f &&
1940 			    !terminal_state(
1941 			    f->uf_vfs_ufsfxp->fx_current->uf_s)) {
1942 
1943 				f->uf_orig = f->uf_vfs_ufsfxp->fx_current;
1944 				f->uf_retry = 0;
1945 				rc = SFRC_SUCCESS;
1946 			} else {
1947 				TRIVIA((": NULL fx_current]\n"));
1948 				f->uf_retry = 1;
1949 			}
1950 
1951 			break;
1952 
1953 		default:
1954 			rc = set_state(f, UF_PANIC);
1955 			TRIVIA((": failed] "));
1956 			break;
1957 		}
1958 		break;
1959 
1960 	case UFA_FOUND:
1961 		/*
1962 		 * XXX de-allocate these after some period?
1963 		 * XXX or move to an historical list?
1964 		 * XXX or have an ioctl which reaps them?
1965 		 */
1966 		/*
1967 		 * For now, since we don't expect lots of failures
1968 		 * to occur (to the point of memory shortages),
1969 		 * just punt
1970 		 */
1971 
1972 		/* be sure we're not wasting cpu on old failures */
1973 		if (f->uf_retry != 0) {
1974 			mutex_enter(&uf_stats.ufst_mutex);
1975 			uf_stats.ufst_cpu_waste++;
1976 			mutex_exit(&uf_stats.ufst_mutex);
1977 			f->uf_retry = 0;
1978 		}
1979 		rc = SFRC_SUCCESS;
1980 		break;
1981 
1982 	default:
1983 		(void) set_state(f, UF_PANIC);
1984 		TRIVIA((": failed] "));
1985 		break;
1986 	}
1987 
1988 	TRIVIA(("] "));
1989 	return (rc);
1990 }
1991 
1992 static sfrc_t
1993 sf_panic(
1994 	ufs_failure_t	*f,
1995 	ufsa_t	 a,
1996 	ufs_failure_states_t	 s)
1997 {
1998 	sfrc_t	rc = SFRC_FAIL;
1999 
2000 	TRIVIA(("[sf_panic, action is %s, prev. state is %s",
2001 	    act_name(a), state_name(f->uf_s)));
2002 	ASSERT(s & UF_PANIC);
2003 
2004 	switch (a) {
2005 	case UFA_SET:
2006 		f->uf_retry = -ufsfx_tune.uft_short_err_period;
2007 		rc = SFRC_SUCCESS;
2008 		break;
2009 
2010 	case UFA_FOUND:
2011 	default:
2012 		real_panic(f, " ");
2013 
2014 		/* LINTED: warning: logical expression always true: op "||" */
2015 		ASSERT(DEBUG);
2016 
2017 		(void) set_state(f, UF_UMOUNT);	/* XXX UF_NOTFIX? */
2018 
2019 		break;
2020 	}
2021 
2022 	TRIVIA(("] "));
2023 	return (rc);
2024 }
2025 
2026 /*
2027  * minimum state function
2028  */
2029 static sfrc_t
2030 sf_minimum(
2031 	ufs_failure_t	*f,
2032 	ufsa_t	 a, /* LINTED argument unused in function: ignored */
2033 	ufs_failure_states_t	 ignored)
2034 {
2035 	sfrc_t rc = SFRC_FAIL;
2036 
2037 	TRIVIA(("[sf_minimum, action is %s", act_name(a)));
2038 
2039 	switch (a) {
2040 	case UFA_SET:
2041 		f->uf_retry = 0;
2042 		/* FALLTHROUGH */
2043 
2044 	case UFA_FOUND:
2045 		rc = SFRC_SUCCESS;
2046 		break;
2047 
2048 	default:
2049 		(void) set_state(f, UF_PANIC);
2050 		TRIVIA((": failed] "));
2051 		break;
2052 	}
2053 
2054 	TRIVIA(("] "));
2055 	return (rc);
2056 }
2057 
2058 static int
2059 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to)
2060 {
2061 	ufsd_t	*s;
2062 	int	 valid;
2063 
2064 	HIDEOUS(("[state_trans_valid"));
2065 
2066 	if (from & to)
2067 		return (1);
2068 
2069 	s = get_state_desc(to);
2070 
2071 	/*
2072 	 * extra test is necessary since we want UF_UNDEF = 0,
2073 	 * (to detect freshly allocated memory)
2074 	 * but can't check for that value with a bit test
2075 	 */
2076 	valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev;
2077 
2078 	HIDEOUS((": %svalid] ", valid? "": "in"));
2079 	return (valid);
2080 }
2081 
2082 static int
2083 terminal_state(ufs_failure_states_t state)
2084 {
2085 	ufsd_t	*s;
2086 
2087 	HIDEOUS(("[terminal_state"));
2088 
2089 	s = get_state_desc(state);
2090 
2091 	HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not "));
2092 	return ((int)s->ud_attr.terminal);
2093 }
2094 
2095 static void
2096 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp)
2097 {
2098 	MINUTE(("[alloc_lockfs_comment"));
2099 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2100 
2101 	/*
2102 	 * ufs_fiolfs expects a kmem_alloc'ed comment;
2103 	 * it frees the comment if the lock fails
2104 	 * or else when the lock is unlocked.
2105 	 */
2106 
2107 	f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP);
2108 	if (f->uf_lf.lf_comment) {
2109 		char	*from;
2110 		size_t	 len;
2111 
2112 		/*
2113 		 * use panic string if there's no previous comment
2114 		 * or if we're setting the error lock
2115 		 */
2116 		if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment ||
2117 		    lfp->lf_comlen <= 0)) {
2118 			from = f->uf_panic_str;
2119 			len = LOCKFS_MAXCOMMENTLEN;
2120 		} else {
2121 			from = lfp->lf_comment;
2122 			len = lfp->lf_comlen;
2123 		}
2124 
2125 		bcopy(from, f->uf_lf.lf_comment, len);
2126 		f->uf_lf.lf_comlen = len;
2127 
2128 	} else {
2129 		f->uf_lf.lf_comlen = 0;
2130 	}
2131 	MINUTE(("] "));
2132 }
2133 
2134 static int
2135 set_lockfs(ufs_failure_t *f, struct lockfs *lfp)
2136 {
2137 	int	(*handle_lockfs_rc)(ufs_failure_t *);
2138 	int	  rc;
2139 
2140 	MINUTE(("[set_lockfs"));
2141 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2142 	ASSERT(!vfs_lock_held(f->uf_vfsp));
2143 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2144 
2145 	if (!f->uf_ufsvfsp) {
2146 		MINUTE((": ufsvfsp is NULL]\n"));
2147 		return (0);
2148 	}
2149 
2150 	ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock));
2151 
2152 	if (!f->uf_ufsvfsp->vfs_root) {
2153 		MINUTE((": vfs_root is NULL]\n"));
2154 		return (0);
2155 	}
2156 
2157 	alloc_lockfs_comment(f, lfp);
2158 	f->uf_lf_err = 0;
2159 
2160 	if (!LOCKFS_IS_ELOCK(lfp)) {
2161 		lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK;
2162 		VN_HOLD(f->uf_ufsvfsp->vfs_root);
2163 		f->uf_lf_err =
2164 		    ufs__fiolfs(f->uf_ufsvfsp->vfs_root,
2165 		    &f->uf_lf, /* from_user */ 0, /* from_log */ 0);
2166 		VN_RELE(f->uf_ufsvfsp->vfs_root);
2167 	}
2168 
2169 	handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success;
2170 	rc = handle_lockfs_rc(f);
2171 
2172 	MINUTE(("] "));
2173 	return (rc);
2174 }
2175 
2176 static int
2177 lockfs_failure(ufs_failure_t *f)
2178 {
2179 	int	error;
2180 	ufs_failure_states_t	s;
2181 
2182 	TRIVIA(("[lockfs_failure"));
2183 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2184 
2185 	if (!f->uf_ufsvfsp) {
2186 		TRIVIA((": ufsvfsp is NULL]\n"));
2187 		return (0);
2188 	}
2189 
2190 	error = f->uf_lf_err;
2191 	switch (error) {
2192 			/* non-transient errors: */
2193 	case EACCES:	/* disk/in-core metadata reconciliation failed  */
2194 	case EPERM:	/* inode reconciliation failed; incore inode changed? */
2195 	case EIO:	/* device is hard-locked or not responding */
2196 	case EROFS:	/* device is write-locked */
2197 	case EDEADLK:	/* can't lockfs; deadlock would result; */
2198 			/* Swapping or saving accounting records */
2199 			/* onto this fs can cause this errno. */
2200 
2201 		MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)",
2202 		    fs_name(f), lock_name(&f->uf_lf),
2203 		    err_name(error), error));
2204 
2205 		/*
2206 		 * if can't get lock, then fallback to panic, unless
2207 		 * unless unmount was requested (although unmount will
2208 		 * probably fail if the lock failed, so we'll panic
2209 		 * anyway
2210 		 */
2211 
2212 		s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK) ?
2213 		    UF_UMOUNT: UF_PANIC;
2214 
2215 		if (!set_state(f, s)) {
2216 			real_panic(f, " ");
2217 			/*NOTREACHED*/
2218 			break;
2219 		}
2220 		break;
2221 
2222 
2223 	case EBUSY:
2224 	case EAGAIN:
2225 
2226 		f->uf_retry = ufsfx_tune.uft_short_err_period;
2227 		if (curthread->t_flag & T_DONTPEND) {
2228 			curthread->t_flag &= ~T_DONTPEND;
2229 
2230 		} else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) {
2231 			ufs_failure_states_t state;
2232 			/*
2233 			 * if we didn't know that the fix had started,
2234 			 * take note
2235 			 */
2236 			state = error == EBUSY? UF_LOCKED: UF_FIXING;
2237 			if (!set_state(f, state)) {
2238 				TRIVIA((": failed] "));
2239 				return (0);
2240 			}
2241 		}
2242 		break;
2243 
2244 	default:	/* some other non-fatal error */
2245 		MINOR(("lockfs(\"%s\") of %s returned %s (%d)",
2246 		    lock_name(&f->uf_lf), fs_name(f),
2247 		    err_name(f->uf_lf_err), f->uf_lf_err));
2248 
2249 		f->uf_retry = ufsfx_tune.uft_short_err_period;
2250 		break;
2251 
2252 	case EINVAL:	/* unmounted? */
2253 		(void) set_state(f, UF_NOTFIX);
2254 		break;
2255 	}
2256 	TRIVIA(("] "));
2257 	return (1);
2258 }
2259 
2260 static int
2261 lockfs_success(ufs_failure_t *f)
2262 {
2263 	TRIVIA(("[lockfs_success"));
2264 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2265 
2266 	if (!f->uf_ufsvfsp) {
2267 		TRIVIA((": ufsvfsp is NULL]\n"));
2268 		return (0);
2269 	}
2270 
2271 	switch (f->uf_lf.lf_lock) {
2272 	case LOCKFS_ELOCK:	/* error lock worked */
2273 
2274 		if (!set_state(f, UF_LOCKED)) {
2275 			TRIVIA((": failed] "));
2276 			return (0);
2277 		}
2278 		break;
2279 
2280 	case LOCKFS_ULOCK: 			/* unlock worked */
2281 		/*
2282 		 * how'd we get here?
2283 		 * This should be done from fsck's unlock,
2284 		 * not from this thread's context.
2285 		 */
2286 		cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f));
2287 		ufsfx_unlockfs(f->uf_ufsvfsp);
2288 		break;
2289 
2290 	default:
2291 		if (!set_state(f, UF_NOTFIX)) {
2292 			TRIVIA((": failed] "));
2293 			return (0);
2294 		}
2295 		break;
2296 	}
2297 	TRIVIA(("] "));
2298 	return (1);
2299 }
2300 
2301 /*
2302  * when fsck is running it puts its pid into the lockfs
2303  * comment structure, prefaced by PIDSTR
2304  */
2305 const char *PIDSTR = "[pid:";
2306 static int
2307 fsck_active(ufs_failure_t *f)
2308 {
2309 	char		*cp;
2310 	int		 i, found, errlocked;
2311 	size_t		 comlen;
2312 	const int	 PIDSTRLEN = (int)strlen(PIDSTR);
2313 	struct ulockfs	*ulp = &f->uf_ufsvfsp->vfs_ulockfs;
2314 
2315 	TRIVIA(("[fsck_active"));
2316 
2317 	ASSERT(f);
2318 	ASSERT(f->uf_s & UF_FIXING);
2319 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2320 	ASSERT(f->uf_ufsvfsp);
2321 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2322 	ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock));
2323 
2324 	mutex_enter(&ulp->ul_lock);
2325 	cp = ulp->ul_lockfs.lf_comment;
2326 	comlen = ulp->ul_lockfs.lf_comlen;
2327 	errlocked = (int)ULOCKFS_IS_ELOCK(ulp);
2328 	mutex_exit(&ulp->ul_lock);
2329 
2330 	if (!cp || comlen == 0) {
2331 		TRIVIA((": null comment or comlen <= 0, found:0]"));
2332 		return (0);
2333 	}
2334 
2335 	for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++)
2336 		found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0;
2337 
2338 	TRIVIA(("found:%d, is_elock:%d]", found, errlocked));
2339 	return (errlocked & found);
2340 }
2341 
2342 static const char unknown_fs[]		= "<unknown fs>";
2343 static const char null_failure[] = "<NULL ufs failure record; unknown fs>";
2344 static const char mutated_vfs_bufp[]	= "<mutated vfs_bufp, unknown fs>";
2345 static const char mutated_vfs_fs[]	= "<mutated vfs_fs, unknown fs>";
2346 
2347 static char *
2348 fs_name(ufs_failure_t *f)
2349 {
2350 	HIDEOUS(("[fs_name"));
2351 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2352 
2353 	if (!f) {
2354 		HIDEOUS((": failure ptr is NULL]\n"));
2355 		return ((char *)null_failure);
2356 	}
2357 
2358 	if (f->uf_fsname[0] != '\0') {
2359 		HIDEOUS((": return (uf_fsname)]\n"));
2360 		return (f->uf_fsname);
2361 	}
2362 
2363 	if (MUTEX_HELD(f->uf_vfs_lockp)) {
2364 		if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) {
2365 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2366 			    (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp));
2367 			return ((char *)mutated_vfs_bufp);
2368 		}
2369 		if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) {
2370 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2371 			    (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs));
2372 			return ((char *)mutated_vfs_fs);
2373 		}
2374 		if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs &&
2375 		    *f->uf_fs->fs_fsmnt != '\0') {
2376 			HIDEOUS((": return (fs_fsmnt)]\n"));
2377 			return (f->uf_fs->fs_fsmnt);
2378 		}
2379 	}
2380 
2381 	HIDEOUS((": unknown file system]\n"));
2382 	return ((char *)unknown_fs);
2383 }
2384 
2385 #if defined(DEBUG)
2386 static char *
2387 lock_name(struct lockfs *lfp)
2388 {
2389 	struct lock_description	*l;
2390 	char			*lname;
2391 
2392 	HIDEOUS(("[lock_name"));
2393 
2394 	lname = lock_desc[0].ld_name;
2395 	for (l = &lock_desc[1]; l->ld_name != NULL; l++) {
2396 		if (lfp && lfp->lf_lock == l->ld_type) {
2397 			lname = l->ld_name;
2398 			break;
2399 		}
2400 	}
2401 	HIDEOUS(("]"));
2402 	return (lname);
2403 }
2404 
2405 static char *
2406 state_name(ufs_failure_states_t state)
2407 {
2408 	ufsd_t	*s;
2409 
2410 	HIDEOUS(("[state_name"));
2411 
2412 	s = get_state_desc(state);
2413 
2414 	HIDEOUS(("]"));
2415 	return (s->ud_name);
2416 }
2417 
2418 static char *
2419 err_name(int error)
2420 {
2421 	struct error_description *e;
2422 
2423 	HIDEOUS(("[err_name"));
2424 
2425 	for (e = &err_desc[1]; e->ed_name != NULL; e++) {
2426 		if (error == e->ed_errno) {
2427 			HIDEOUS(("]"));
2428 			return (e->ed_name);
2429 		}
2430 	}
2431 	HIDEOUS(("]"));
2432 	return (err_desc[0].ed_name);
2433 }
2434 
2435 static char *
2436 act_name(ufsa_t action)
2437 {
2438 	struct action_description *a;
2439 
2440 	HIDEOUS(("[act_name"));
2441 
2442 	for (a = &act_desc[1]; a->ad_name != NULL; a++) {
2443 		if (action == a->ad_v) {
2444 			HIDEOUS(("]"));
2445 			return (a->ad_name);
2446 		}
2447 	}
2448 	HIDEOUS(("]"));
2449 	return (act_desc[0].ad_name);
2450 }
2451 
2452 /*
2453  * dump failure list
2454  */
2455 static void
2456 dump_uf_list(char *msg)
2457 {
2458 	ufs_failure_t	*f;
2459 	int		 i;
2460 	int		 list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex);
2461 
2462 	if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) {
2463 		printf("dump_uf_list: couldn't get list lock\n");
2464 		return;
2465 	}
2466 
2467 	if (msg) {
2468 		printf("\n%s", msg);
2469 	}
2470 	printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n",
2471 	    ufs_fix.uq_lowat, ufs_fix.uq_ne);
2472 
2473 	mutex_enter(&uf_stats.ufst_mutex);
2474 	printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races);
2475 	printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed);
2476 	printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed);
2477 	printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste);
2478 	printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n",
2479 	    uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures);
2480 	mutex_exit(&uf_stats.ufst_mutex);
2481 
2482 	for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) {
2483 
2484 		if (!mutex_tryenter(&f->uf_mutex)) {
2485 			printf("%d.\t\"skipped - try enter failed\"\n", i);
2486 			continue;
2487 		}
2488 
2489 		dump_uf(f, i);
2490 
2491 		mutex_exit(&f->uf_mutex);
2492 	}
2493 
2494 	printf("\n");
2495 
2496 	if (!list_was_locked)
2497 		mutex_exit(&ufs_fix.uq_mutex);
2498 }
2499 
2500 static void
2501 dump_uf(ufs_failure_t *f, int i)
2502 {
2503 	if (!f) {
2504 		printf("dump_uf: NULL failure record\n");
2505 		return;
2506 	}
2507 
2508 	printf("%d.\t\"%s\" is %s.\n",
2509 	    i, fs_name(f), state_name(f->uf_s));
2510 	printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f);
2511 	printf("\tNext: 0x%p\t\tPrev: 0x%p\n",
2512 	    (void *)f->uf_next, (void *)f->uf_prev);
2513 
2514 	if (f->uf_orig)
2515 		printf("\tOriginal failure: 0x%p \"%s\"\n",
2516 		    (void *)f->uf_orig, f->uf_orig->uf_panic_str);
2517 
2518 	printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n",
2519 	    (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp);
2520 	printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp);
2521 	printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp);
2522 
2523 	if (f->uf_bp)
2524 		printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs);
2525 	else
2526 		printf("\n");
2527 
2528 	printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n",
2529 	    f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm);
2530 
2531 	printf("\tFlags: (%d) %s%s%s%s", f->uf_flags,
2532 	    f->uf_flags & UFSFX_LCKONLY?	 "\"lock only\" "	: "",
2533 	    f->uf_flags & UFSFX_LCKUMOUNT?	 "\"lock+unmount\" "	: "",
2534 	    f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" "	: "",
2535 	    f->uf_flags == 0?                "<none>"               : "");
2536 
2537 	printf("\tRetry: %ld seconds\n", f->uf_retry);
2538 
2539 	printf("\tLockfs:\ttype: %s\terror: %s (%d)\n",
2540 	    lock_name(&f->uf_lf), err_name(f->uf_lf_err), f->uf_lf_err);
2541 
2542 }
2543 #endif /* DEBUG */
2544 
2545 /*
2546  * returns # of ufs_failures in a non-terminal state on queue
2547  * used to coordinate with hlock thread (see ufs_thread.c)
2548  * and to determine when the error lock thread may exit
2549  */
2550 
2551 int
2552 ufsfx_get_failure_qlen(void)
2553 {
2554 	ufs_failure_t	*f;
2555 	ufsd_t		*s;
2556 	int		 qlen = 0;
2557 
2558 	MINUTE(("[ufsfx_get_failure_qlen"));
2559 
2560 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
2561 		return (-1);
2562 
2563 	/*
2564 	 * walk down failure list
2565 	 */
2566 
2567 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
2568 
2569 		if (!mutex_tryenter(&f->uf_mutex))
2570 			continue;
2571 
2572 		s = get_state_desc(f->uf_s);
2573 
2574 		if (s->ud_attr.terminal) {
2575 			mutex_exit(&f->uf_mutex);
2576 			continue;
2577 		}
2578 
2579 		MINUTE((": found: %s, \"%s: %s\"\n",
2580 		    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
2581 
2582 		qlen++;
2583 		mutex_exit(&f->uf_mutex);
2584 	}
2585 
2586 	mutex_exit(&ufs_fix.uq_mutex);
2587 
2588 	MINUTE((": qlen=%d]\n", qlen));
2589 
2590 	return (qlen);
2591 }
2592 
2593 /*
2594  * timeout routine
2595  *  called to shutdown fix failure thread and server daemon
2596  */
2597 static void
2598 ufsfx_kill_fix_failure_thread(void *arg)
2599 {
2600 	clock_t odelta = (clock_t)arg;
2601 	int	qlen;
2602 
2603 	MAJOR(("[ufsfx_kill_fix_failure_thread"));
2604 
2605 	qlen = ufsfx_get_failure_qlen();
2606 
2607 	if (qlen < 0) {
2608 		clock_t delta;
2609 
2610 		delta = odelta << 1;
2611 		if (delta <= 0)
2612 			delta = INT_MAX;
2613 
2614 		(void) timeout(ufsfx_kill_fix_failure_thread,
2615 		    (void *)delta, delta);
2616 		MAJOR((": rescheduled"));
2617 
2618 	} else if (qlen == 0) {
2619 		ufs_thread_exit(&ufs_fix);
2620 		MAJOR((": killed"));
2621 	}
2622 	/*
2623 	 * else
2624 	 *  let timeout expire
2625 	 */
2626 	MAJOR(("]\n"));
2627 }
2628