xref: /illumos-gate/usr/src/uts/common/fs/ufs/ufs_panic.c (revision 355b4669e025ff377602b6fc7caaf30dbc218371)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/errno.h>
32 #include <sys/mode.h>
33 #include <sys/sysmacros.h>
34 #include <sys/cmn_err.h>
35 #include <sys/varargs.h>
36 #include <sys/time.h>
37 #include <sys/buf.h>
38 #include <sys/kmem.h>
39 #include <sys/t_lock.h>
40 #include <sys/poll.h>
41 #include <sys/debug.h>
42 #include <sys/cred.h>
43 #include <sys/lockfs.h>
44 #include <sys/fs/ufs_fs.h>
45 #include <sys/fs/ufs_inode.h>
46 #include <sys/fs/ufs_panic.h>
47 #include <sys/fs/ufs_lockfs.h>
48 #include <sys/fs/ufs_trans.h>
49 #include <sys/fs/ufs_mount.h>
50 #include <sys/fs/ufs_prot.h>
51 #include <sys/fs/ufs_bio.h>
52 #include <sys/pathname.h>
53 #include <sys/utsname.h>
54 #include <sys/conf.h>
55 
56 /* handy */
57 #define	abs(x)		((x) < 0? -(x): (x))
58 
59 #if defined(DEBUG)
60 
61 #define	DBGLVL_NONE	0x00000000
62 #define	DBGLVL_MAJOR	0x00000100
63 #define	DBGLVL_MINOR	0x00000200
64 #define	DBGLVL_MINUTE	0x00000400
65 #define	DBGLVL_TRIVIA	0x00000800
66 #define	DBGLVL_HIDEOUS	0x00001000
67 
68 #define	DBGFLG_NONE		0x00000000
69 #define	DBGFLG_NOPANIC		0x00000001
70 #define	DBGFLG_LVLONLY		0x00000002
71 #define	DBGFLG_FIXWOULDPANIC	0x00000004
72 
73 #define	DBGFLG_FLAGMASK		0x0000000F
74 #define	DBGFLG_LEVELMASK	~DBGFLG_FLAGMASK
75 
76 #define	DEBUG_FLAGS	(ufs_fix_failure_dbg & DBGFLG_FLAGMASK)
77 #define	DEBUG_LEVEL	(ufs_fix_failure_dbg & DBGFLG_LEVELMASK)
78 
79 unsigned int ufs_fix_failure_dbg =	DBGLVL_NONE | DBGFLG_NONE;
80 
81 #define	DCALL(dbg_level, call)						\
82 	{								\
83 		if (DEBUG_LEVEL != DBGLVL_NONE) {			\
84 			if (DEBUG_FLAGS & DBGFLG_LVLONLY) {		\
85 				if (DEBUG_LEVEL & dbg_level) {		\
86 					call;				\
87 				}					\
88 			} else {					\
89 				if (dbg_level <= DEBUG_LEVEL) {		\
90 					call;				\
91 				}					\
92 			}						\
93 		}							\
94 	}
95 
96 #define	DPRINTF(dbg_level, msg)		DCALL(dbg_level, printf msg)
97 
98 #define	MAJOR(msg)			DPRINTF(DBGLVL_MAJOR, msg)
99 #define	MINOR(msg)			DPRINTF(DBGLVL_MINOR, msg)
100 #define	MINUTE(msg)			DPRINTF(DBGLVL_MINUTE, msg)
101 #define	TRIVIA(msg)			DPRINTF(DBGLVL_TRIVIA, msg)
102 #define	HIDEOUS(msg)			DPRINTF(DBGLVL_HIDEOUS, msg)
103 
104 #else	/* !DEBUG */
105 
106 #define	DCALL(ignored_dbg_level, ignored_routine)
107 #define	MAJOR(ignored)
108 #define	MINOR(ignored)
109 #define	MINUTE(ignored)
110 #define	TRIVIA(ignored)
111 #define	HIDEOUS(ignored)
112 
113 #endif /* DEBUG */
114 
115 #define	NULLSTR(str)	(!(str) || *(str) == '\0'? "<null>" : (str))
116 #define	NULSTRING	""
117 
118 /* somewhat arbitrary limits, in seconds */
119 /* all probably ought to be different, but these are convenient for debugging */
120 const time_t	UF_TOO_LONG		= 128;	/* max. wait for fsck start */
121 
122 /* all of these are in units of seconds used for retry period while ... */
123 const time_t	UF_FIXSTART_PERIOD	= 16;	/* awaiting fsck start */
124 const time_t	UF_FIXPOLL_PERIOD	= 256;	/* awaiting fsck finish */
125 const time_t	UF_SHORT_ERROR_PERIOD	= 4;	/* after (lockfs) error */
126 const time_t	UF_LONG_ERROR_PERIOD	= 512;	/* after (lockfs) error */
127 
128 #define	NO_ERROR		0
129 #define	LOCKFS_OLOCK		LOCKFS_MAXLOCK+1
130 
131 const ulong_t	GB			= 1024 * 1024 * 1024;
132 const ulong_t	SecondsPerGig		= 1024;	/* ~17 minutes (overestimate) */
133 
134 /*
135  * per filesystem flags
136  */
137 const int	UFSFX_PANIC		= (UFSMNT_ONERROR_PANIC >> 4);
138 const int	UFSFX_LCKONLY		= (UFSMNT_ONERROR_LOCK >> 4);
139 const int	UFSFX_LCKUMOUNT		= (UFSMNT_ONERROR_UMOUNT >> 4);
140 const int	UFSFX_DEFAULT		= (UFSMNT_ONERROR_DEFAULT >> 4);
141 const int	UFSFX_REPAIR_START	= 0x10000000;
142 
143 /* return protocols */
144 
145 typedef enum triage_return_code {
146 	TRIAGE_DEAD = -1,
147 	TRIAGE_NO_SPIRIT,
148 	TRIAGE_ATTEND_TO
149 } triage_t;
150 
151 typedef enum statefunc_return_code {
152 	SFRC_SUCCESS = 1,
153 	SFRC_FAIL = 0
154 } sfrc_t;
155 
156 /* external references */
157 /* in ufs_thread.c */
158 extern int	ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop);
159 extern int	ufs_checkaccton(vnode_t *);		/* in ufs_lockfs.c */
160 extern int	ufs_checkswapon(vnode_t *);		/* in ufs_lockfs.c */
161 
162 extern struct pollhead		ufs_pollhd;		/* in ufs_vnops.c */
163 
164 /* globals */
165 struct	ufs_q	 ufs_fix;
166 
167 /*
168  * patchable constants:
169  *   These are set in ufsfx_init() [called at modload]
170  */
171 struct ufs_failure_tunable {
172 	long	 uft_too_long;		/* limit repair startup time */
173 	long	 uft_fixstart_period;	/* pre-repair start period */
174 	long	 uft_fixpoll_period;	/* post-fsck start period */
175 	long	 uft_short_err_period;	/* post-error short period */
176 	long	 uft_long_err_period;	/* post-error long period */
177 } ufsfx_tune;
178 
179 /* internal statistics of events */
180 struct uf_statistics {
181 	ulong_t		ufst_lock_violations;
182 	ulong_t		ufst_current_races;
183 	ulong_t		ufst_unmount_failures;
184 	ulong_t		ufst_num_fixed;
185 	ulong_t		ufst_num_failed;
186 	ulong_t		ufst_cpu_waste;
187 	time_t		ufst_last_start_tm;
188 	kmutex_t	ufst_mutex;
189 } uf_stats;
190 
191 typedef enum state_action {
192 	UFA_ERROR = -1,		/* internal error */
193 	UFA_FOUND,		/* found uf in state */
194 	UFA_SET			/* change uf to state */
195 } ufsa_t;
196 
197 /* state definition */
198 typedef struct uf_state_desc {
199 	int	  ud_v;					/* value */
200 	char	 *ud_name;				/* name */
201 	sfrc_t	(*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
202 							/* per-state actions */
203 	ufs_failure_states_t	  ud_prev;		/* valid prev. states */
204 
205 	struct uf_state_desc_attr {
206 		unsigned	terminal:1;	/* no action req. if found */
207 		unsigned	at_fail:1;	/* state set by thread */
208 						/* encountering the error */
209 		unsigned	unused;
210 	} ud_attr;
211 } ufsd_t;
212 
213 /*
214  * forward references
215  */
216 
217 /* thread to watch for failures */
218 static void	ufsfx_thread_fix_failures(void *);
219 static int 	ufsfx_do_failure_q(void);
220 static void	ufsfx_kill_fix_failure_thread(void *);
221 
222 /* routines called when failure occurs */
223 static int		 ufs_fault_v(vnode_t *, char *, va_list)
224 	__KVPRINTFLIKE(2);
225 static ufs_failure_t	*init_failure(vnode_t *, char *, va_list)
226 	__KVPRINTFLIKE(2);
227 static void		 queue_failure(ufs_failure_t *);
228 /*PRINTFLIKE2*/
229 static void		 real_panic(ufs_failure_t *, const char *, ...)
230 	__KPRINTFLIKE(2);
231 static void		 real_panic_v(ufs_failure_t *, const char *, va_list)
232 	__KVPRINTFLIKE(2);
233 static triage_t		 triage(vnode_t *);
234 
235 /* routines called when failure record is acted upon */
236 static sfrc_t	set_state(ufs_failure_t *, ufs_failure_states_t);
237 static int	state_trans_valid(ufs_failure_states_t, ufs_failure_states_t);
238 static int	terminal_state(ufs_failure_states_t);
239 
240 /* routines called when states entered/found */
241 static sfrc_t	sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
242 static sfrc_t	sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
243 static sfrc_t	sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
244 static sfrc_t	sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
245 static sfrc_t	sf_found_queue(ufs_failure_t *);
246 static sfrc_t	sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
247 static sfrc_t	sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
248 static sfrc_t	sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
249 static sfrc_t	sf_set_trylck(ufs_failure_t *);
250 static sfrc_t	sf_set_locked(ufs_failure_t *);
251 static sfrc_t	sf_found_trylck(ufs_failure_t *);
252 static sfrc_t	sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t);
253 static sfrc_t	sf_found_umount(ufs_failure_t *);
254 
255 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */
256 static time_t 	trylock_time_exceeded(ufs_failure_t *);
257 static void 	pester_msg(ufs_failure_t *, int);
258 static int 	get_lockfs_status(ufs_failure_t *, struct lockfs *);
259 static void 	alloc_lockfs_comment(ufs_failure_t *, struct lockfs *);
260 static int 	set_lockfs(ufs_failure_t *, struct lockfs *);
261 static int 	lockfs_failure(ufs_failure_t *);
262 static int 	lockfs_success(ufs_failure_t *);
263 static int	fsck_active(ufs_failure_t *);
264 
265 /* low-level support routines */
266 static ufsd_t	*get_state_desc(ufs_failure_states_t);
267 static char	*fs_name(ufs_failure_t *);
268 
269 #if defined(DEBUG)
270 static char	*state_name(ufs_failure_states_t);
271 static char	*lock_name(struct lockfs *);
272 static char	*err_name(int);
273 static char	*act_name(ufsa_t);
274 static void	 dump_uf_list(char *msg);
275 static void	 dump_uf(ufs_failure_t *, int i);
276 #endif /* DEBUG */
277 /*
278  *
279  * State Transitions:
280  *
281  * normally:
282  * if flagged to be locked but not unmounted:	(UFSMNT_ONERROR_LOCK)
283  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED
284  *
285  * The only difference between these two is that the fsck must be started
286  * manually.
287  *
288  * if flagged to be unmounted:			(UFSMNT_ONERROR_UMOUNT)
289  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX
290  *
291  * if flagged to panic:				(UFSMNT_ONERROR_PANIC)
292  *	UNDEF -> INIT -> PANIC
293  *
294  * if a secondary panic on a file system which has an active failure
295  * record:
296  *	UNDEF -> INIT -> QUEUE -> REPLICA
297  *
298  * UNDEF, INIT, QUEUE all are set in the context of the failing thread.
299  * All other states (except possibly PANIC) are set in by the monitor
300  * (lock) thread.
301  *
302  */
303 
304 ufsd_t	state_desc[] =
305 {
306 	{ UF_ILLEGAL,	"in an unknown state",	sf_minimum,	UF_ILLEGAL,
307 								{ 0, 1, 0 } },
308 	{ UF_UNDEF,	"undefined",		sf_undef,	UF_UNDEF,
309 								{ 0, 1, 0 } },
310 	{ UF_INIT,	"being initialized",	sf_init,	UF_UNDEF,
311 								{ 0, 1, 0 } },
312 	{ UF_QUEUE,	"queued",		sf_queue,	UF_INIT,
313 								{ 0, 1, 0 } },
314 	{ UF_TRYLCK,	"trying to be locked",	sf_nonterm_cmn,
315 						UF_QUEUE,	{ 0, 0, 0 } },
316 	{ UF_LOCKED,	"locked",		sf_nonterm_cmn,
317 					UF_TRYLCK | UF_FIXING,	{ 0, 0, 0 } },
318 	{ UF_UMOUNT,	"being unmounted",	sf_nonterm_cmn,
319 
320 #if defined(DEBUG)
321 					UF_PANIC |
322 #endif /* DEBUG */
323 					UF_TRYLCK | UF_LOCKED, 	{ 0, 0, 0 } },
324 	{ UF_FIXING,	"being fixed",		sf_nonterm_cmn,
325 						UF_LOCKED,	{ 0, 0, 0 } },
326 	{ UF_FIXED,	"fixed",		sf_term_cmn,
327 						UF_FIXING,	{ 1, 0, 0 } },
328 	{ UF_NOTFIX,	"not fixed",		sf_term_cmn,
329 
330 #if defined(DEBUG)
331 							UF_PANIC |
332 #endif /* DEBUG */
333 
334 	    UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING,
335 								{ 1, 0, 0 } },
336 	{ UF_REPLICA,	"a replica",		sf_term_cmn,
337 						UF_QUEUE,	{ 1, 0, 0 } },
338 	{ UF_PANIC,	"panicking",		sf_panic,
339 		/* XXX make this narrower */	UF_ALLSTATES,	{ 0, 0, 0 } },
340 	{ UF_UNDEF,	NULL,			((sfrc_t (*)()) NULL),
341 						UF_UNDEF, 	{ 0, 0, 0 } }
342 };
343 
344 /* unified collection */
345 struct ufsfx_info {
346 	struct uf_statistics		*ufi_statp;
347 	struct ufs_failure_tunable	*ufi_tunep;
348 	ufsd_t				*ufi_statetab;
349 } uffsinfo;
350 
351 #if defined(DEBUG)
352 struct action_description {
353 	ufsa_t	 ad_v;
354 	char	*ad_name;
355 };
356 
357 #define	EUNK		(-1)
358 
359 struct error_description {
360 	int	 ed_errno;
361 	char	*ed_name;
362 } err_desc[] =
363 {
364 	{ EUNK,		"<unexpected errno?>"	},
365 	{ EINVAL,	"EINVAL"		},
366 	{ EACCES,	"EACCES"		},
367 	{ EPERM,	"EPERM"			},
368 	{ EIO,		"EIO"			},
369 	{ EDEADLK,	"EDEADLK"		},
370 	{ EBUSY,	"EBUSY"			},
371 	{ EAGAIN,	"EAGAIN"		},
372 	{ ERESTART,	"ERESTART"		},
373 	{ ETIMEDOUT,	"ETIMEDOUT"		},
374 	{ NO_ERROR,	"Ok"			},
375 	{ EUNK,		NULL 			}
376 };
377 
378 struct action_description act_desc[] =
379 {
380 	{ UFA_ERROR,	"<unexpected action?>"	},
381 	{ UFA_FOUND,	"\"found\""	},
382 	{ UFA_SET,	"\"set\""	},
383 	{ UFA_ERROR,	NULL			},
384 };
385 
386 #define	LOCKFS_BADLOCK	(-1)
387 
388 struct lock_description {
389 	int	 ld_type;
390 	char	*ld_name;
391 } lock_desc[] =
392 {
393 	{ LOCKFS_BADLOCK,	"<unexpected lock?>"	},
394 	{ LOCKFS_ULOCK,		"Unlock"		},
395 	{ LOCKFS_ELOCK,		"Error Lock"		},
396 	{ LOCKFS_HLOCK,		"Hard Lock"		},
397 	{ LOCKFS_OLOCK,		"Old Lock"		},
398 	{ LOCKFS_BADLOCK,	NULL			}
399 };
400 
401 #endif /* DEBUG */
402 
403 /*
404  * ufs_fault, ufs_fault_v
405  *
406  *  called instead of cmn_err(CE_PANIC, ...) by ufs routines
407  *  when a failure is detected to put the file system into an
408  *  error state (if possible) or to devolve to a panic otherwise
409  *
410  * vnode is some vnode in this file system, used to find the way
411  * to ufsvfs, vfsp etc.  Since a panic can be called from many
412  * levels, the vnode is the most convenient hook to pass through.
413  *
414  */
415 
416 /*PRINTFLIKE2*/
417 int
418 ufs_fault(vnode_t *vp, char *fmt, ...)
419 {
420 	va_list	adx;
421 	int	error;
422 
423 	MINOR(("[ufs_fault"));
424 
425 	va_start(adx, fmt);
426 	error = ufs_fault_v(vp, fmt, adx);
427 	va_end(adx);
428 
429 	MINOR((": %s (%d)]\n", err_name(error), error));
430 	return (error);
431 }
432 
433 const char *nullfmt = "<null format?>";
434 
435 static int
436 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx)
437 {
438 	ufs_failure_t		*new = NULL;
439 	ufsvfs_t		*ufsvfsp;
440 	triage_t		 fix;
441 	int			 err = ERESTART;
442 	int			need_vfslock;
443 
444 	MINOR(("[ufs_fault_v"));
445 
446 	if (fmt == NULL)
447 		fmt = (char *)nullfmt;
448 
449 	fix = triage(vp);
450 
451 	if (vp) {
452 		ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data;
453 
454 		/*
455 		 * Something bad has happened. That is why we are here.
456 		 *
457 		 * In order for the bad thing to be recorded in the superblock
458 		 * we need to write to the superblock directly.
459 		 * In the case that logging is enabled the logging code
460 		 * would normally intercept our write as a delta to the log,
461 		 * thus we mark the filesystem FSBAD in any case.
462 		 */
463 		need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
464 
465 		if (need_vfslock) {
466 			mutex_enter(&ufsvfsp->vfs_lock);
467 		}
468 
469 		ufsvfsp->vfs_fs->fs_clean = FSBAD;
470 		ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem));
471 		ufsvfsp->vfs_bufp->b_flags &= ~(B_ASYNC | B_READ |
472 				B_DONE | B_ERROR | B_DELWRI);
473 
474 		(void) bdev_strategy(ufsvfsp->vfs_bufp);
475 		(void) biowait(ufsvfsp->vfs_bufp);
476 
477 		if (need_vfslock) {
478 			mutex_exit(&ufsvfsp->vfs_lock);
479 		}
480 	}
481 
482 	switch (fix) {
483 
484 	default:
485 	case TRIAGE_DEAD:
486 	case TRIAGE_NO_SPIRIT:
487 
488 		real_panic_v(new, fmt, adx);
489 		/* LINTED: warning: logical expression always true: op "||" */
490 		ASSERT(DEBUG);
491 		err = EAGAIN;
492 
493 #if defined(DEBUG)
494 		if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) {
495 			break;
496 		}
497 		/* FALLTHROUGH */
498 
499 #else
500 		break;
501 
502 #endif /* DEBUG */
503 
504 	case TRIAGE_ATTEND_TO:
505 
506 		/* q thread not running yet? */
507 		if (mutex_tryenter(&ufs_fix.uq_mutex)) {
508 			if (!ufs_fix.uq_threadp) {
509 				mutex_exit(&ufs_fix.uq_mutex);
510 				ufs_thread_start(&ufs_fix,
511 				    ufsfx_thread_fix_failures, NULL);
512 				ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK;
513 				mutex_enter(&ufs_fix.uq_mutex);
514 			} else {
515 				/*
516 				 * We got the lock but we are not the current
517 				 * threadp so we have to release the lock.
518 				 */
519 				mutex_exit(&ufs_fix.uq_mutex);
520 			}
521 		} else {
522 			MINOR((": fix failure thread already running "));
523 			/*
524 			 * No need to log another failure as one is already
525 			 * being logged.
526 			 */
527 			break;
528 		}
529 
530 		if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) {
531 			mutex_exit(&ufs_fix.uq_mutex);
532 			cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault");
533 		} else {
534 			/*
535 			 * Must check if we actually still own the lock and
536 			 * if so then release the lock and move on with life.
537 			 */
538 			if (mutex_owner(&ufs_fix.uq_mutex) == curthread)
539 				mutex_exit(&ufs_fix.uq_mutex);
540 		}
541 
542 		new = init_failure(vp, fmt, adx);
543 		if (new != NULL) {
544 			queue_failure(new);
545 			break;
546 		}
547 		real_panic_v(new, fmt, adx);
548 		break;
549 
550 	}
551 	MINOR(("] "));
552 	return (err);
553 }
554 
555 /*
556  * triage()
557  *
558  *  Attempt to fix iff:
559  *    - the system is not already panicking
560  *    - this file system isn't explicitly marked not to be fixed
561  *    - we can connect to the user-level daemon
562  * These conditions are detectable later, but if we can determine
563  * them in the failing threads context the core dump may be more
564  * useful.
565  *
566  */
567 
568 static triage_t
569 triage(vnode_t *vp)
570 {
571 	struct inode	 *ip;
572 	int		  need_unlock_vfs;
573 	int		  fs_flags;
574 
575 	MINUTE(("[triage"));
576 
577 	if (panicstr) {
578 		MINUTE((
579 		": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr));
580 		return (TRIAGE_DEAD);
581 	}
582 
583 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) {
584 		MINUTE((
585 	": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n"));
586 		return (TRIAGE_DEAD);
587 	}
588 
589 	/* use tryenter and continue no matter what since we're panicky */
590 	need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
591 	if (need_unlock_vfs)
592 		need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock);
593 
594 	fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags;
595 	if (need_unlock_vfs)
596 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
597 
598 	if (fs_flags & UFSFX_PANIC) {
599 		MINUTE((
600 		": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n"));
601 		return (TRIAGE_NO_SPIRIT);
602 	}
603 
604 	if (ufs_checkaccton(vp) != 0) {
605 		MINUTE((
606 		": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n"));
607 		return (TRIAGE_DEAD);
608 	}
609 
610 	if (ufs_checkswapon(vp) != 0) {
611 		MINUTE((
612 		": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n"));
613 		return (TRIAGE_DEAD);
614 	}
615 
616 	MINUTE((": return TRIAGE_ATTEND_TO] "));
617 	return (TRIAGE_ATTEND_TO);
618 }
619 
620 /*
621  * init failure
622  *
623  * This routine allocates a failure struct and initializes
624  * it's member elements.
625  * Space is allocated for copies of dynamic identifying fs structures
626  * passed in.  Without a much more segmented kernel architecture
627  * this is as protected as we can make it (for now.)
628  */
629 static ufs_failure_t *
630 init_failure(vnode_t *vp, char *fmt, va_list adx)
631 {
632 	ufs_failure_t	*new;
633 	struct inode	*ip;
634 	int		 initialization_worked = 0;
635 	int		 need_vfs_unlock;
636 
637 	MINOR(("[init_failure"));
638 
639 	new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP);
640 	if (!new) {
641 		MINOR((": kmem_zalloc failed]\n"));
642 		return (NULL);
643 	}
644 
645 	/*
646 	 * enough information to make a fix attempt possible?
647 	 */
648 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp ||
649 	    !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt)
650 		goto errout;
651 
652 	if (vp->v_type != VREG && vp->v_type != VDIR &&
653 	    vp->v_type != VBLK && vp->v_type != VCHR &&
654 	    vp->v_type != VLNK && vp->v_type != VFIFO &&
655 	    vp->v_type != VSOCK)
656 		goto errout;
657 
658 	if (ip->i_ufsvfs->vfs_root->v_type != VREG &&
659 	    ip->i_ufsvfs->vfs_root->v_type != VDIR &&
660 	    ip->i_ufsvfs->vfs_root->v_type != VBLK &&
661 	    ip->i_ufsvfs->vfs_root->v_type != VCHR &&
662 	    ip->i_ufsvfs->vfs_root->v_type != VLNK &&
663 	    ip->i_ufsvfs->vfs_root->v_type != VFIFO &&
664 	    ip->i_ufsvfs->vfs_root->v_type != VSOCK)
665 		goto errout;
666 
667 	if ((ITOF(ip)->fs_magic != FS_MAGIC) &&
668 	    (ITOF(ip)->fs_magic != MTB_UFS_MAGIC))
669 		goto errout;
670 
671 	/* intialize values */
672 
673 	(void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx);
674 
675 	new->uf_ufsvfsp = ip->i_ufsvfs;
676 	new->uf_vfsp    = ip->i_vfs;
677 
678 	mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL);
679 	need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
680 
681 	if (need_vfs_unlock) {
682 		if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) {
683 			/*
684 			 * not much alternative here, but we're panicking
685 			 * already, it couldn't be worse - so just
686 			 * proceed optimistically and take note.
687 			 */
688 			mutex_enter(&uf_stats.ufst_mutex);
689 			uf_stats.ufst_lock_violations++;
690 			mutex_exit(&uf_stats.ufst_mutex);
691 			MINOR((": couldn't get vfs lock"))
692 			need_vfs_unlock = 0;
693 		}
694 	}
695 
696 	if (mutex_tryenter(&new->uf_mutex)) {
697 		initialization_worked = set_state(new, UF_INIT);
698 		mutex_exit(&new->uf_mutex);
699 	}
700 
701 	if (need_vfs_unlock)
702 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
703 
704 	if (initialization_worked) {
705 		MINOR(("] "));
706 		return (new);
707 	}
708 	/* FALLTHROUGH */
709 
710 errout:
711 	if (new)
712 		kmem_free(new, sizeof (ufs_failure_t));
713 	MINOR((": failed]\n"));
714 	return (NULL);
715 }
716 
717 static void
718 queue_failure(ufs_failure_t *new)
719 {
720 	MINOR(("[queue_failure"));
721 
722 	mutex_enter(&ufs_fix.uq_mutex);
723 
724 	if (ufs_fix.uq_ufhead)
725 		insque(new, &ufs_fix.uq_ufhead);
726 	else
727 		ufs_fix.uq_ufhead = new;
728 
729 	if (mutex_tryenter(&new->uf_mutex)) {
730 		(void) set_state(new, UF_QUEUE);
731 		mutex_exit(&new->uf_mutex);
732 	}
733 
734 	mutex_enter(&uf_stats.ufst_mutex);		/* force wakeup */
735 	ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed;
736 	mutex_exit(&uf_stats.ufst_mutex);
737 
738 	cv_broadcast(&ufs_fix.uq_cv);
739 
740 	DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str?
741 					new->uf_panic_str:
742 					"queue_failure: NULL panic str?"));
743 	mutex_exit(&ufs_fix.uq_mutex);
744 
745 	MINOR(("] "));
746 }
747 
748 /*PRINTFLIKE2*/
749 static void
750 real_panic(ufs_failure_t *f, const char *fmt, ...)
751 {
752 	va_list	adx;
753 
754 	MINUTE(("[real_panic "));
755 
756 	va_start(adx, fmt);
757 	real_panic_v(f, fmt, adx);
758 	va_end(adx);
759 
760 	MINUTE((": return?!]\n"));
761 }
762 
763 static void
764 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx)
765 {
766 	int seriousness = CE_PANIC;
767 	int need_unlock;
768 
769 	MINUTE(("[real_panic_v "));
770 
771 	if (f && f->uf_ufsvfsp)
772 		TRANS_SETERROR(f->uf_ufsvfsp);
773 
774 #if defined(DEBUG)
775 	if (DEBUG_FLAGS & DBGFLG_NOPANIC) {
776 		seriousness = CE_WARN;
777 		cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n");
778 	}
779 #endif /* DEBUG */
780 
781 	delay(hz >> 1);			/* allow previous warnings to get out */
782 
783 	if (!f && fmt)
784 		vcmn_err(seriousness, fmt, adx);
785 	else
786 		cmn_err(seriousness, f && f->uf_panic_str? f->uf_panic_str:
787 		    "real_panic: <unknown panic?>");
788 
789 	if (f) {
790 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
791 		if (need_unlock) {
792 			mutex_enter(&f->uf_mutex);
793 		}
794 
795 		f->uf_retry = -1;
796 		(void) set_state(f, UF_PANIC);
797 
798 		if (need_unlock) {
799 			mutex_exit(&f->uf_mutex);
800 		}
801 	}
802 	MINUTE((": return?!]\n"));
803 }
804 
805 /*
806  * initializes ufs panic structs, locks, etc
807  */
808 void
809 ufsfx_init(void)
810 {
811 
812 	MINUTE(("[ufsfx_init"));
813 
814 	/* patchable; unchanged while running, so no lock is needed */
815 	ufsfx_tune.uft_too_long		= UF_TOO_LONG;
816 	ufsfx_tune.uft_fixstart_period	= UF_FIXSTART_PERIOD;
817 	ufsfx_tune.uft_fixpoll_period	= UF_FIXPOLL_PERIOD;
818 	ufsfx_tune.uft_short_err_period	= UF_SHORT_ERROR_PERIOD;
819 	ufsfx_tune.uft_long_err_period	= UF_LONG_ERROR_PERIOD;
820 
821 	uffsinfo.ufi_statp	= &uf_stats;
822 	uffsinfo.ufi_tunep	= &ufsfx_tune;
823 	uffsinfo.ufi_statetab	= &state_desc[0];
824 
825 	mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL);
826 	ufs_thread_init(&ufs_fix, /* maxne */ 1);
827 
828 	MINUTE(("] "));
829 }
830 
831 /*
832  * initializes per-ufs values
833  * returns 0 (ok) or errno
834  */
835 int
836 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags)
837 {
838 	MINUTE(("[ufsfx_mount (%d)", flags));
839 	/* don't check/need vfs_lock because it's still being initialized */
840 
841 	ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4;
842 
843 	MINUTE((": %s: fx_flags:%ld,",
844 		ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags));
845 	/*
846 	 *	onerror={panic ^ lock only ^ unmount}
847 	 */
848 
849 	if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) {
850 		MINUTE((" PANIC"));
851 
852 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) {
853 		MINUTE((" LCKONLY"));
854 
855 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) {
856 		MINUTE((" LCKUMOUNT"));
857 
858 	} else {
859 		ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT;
860 		ASSERT(ufsvfsp->vfs_fsfx.fx_flags &
861 						(UFSMNT_ONERROR_FLGMASK >> 4));
862 		MINUTE((" DEFAULT"));
863 	}
864 
865 	pollwakeup(&ufs_pollhd, POLLPRI);
866 	MINUTE(("]\n"));
867 	return (0);
868 }
869 
870 /*
871  * ufsfx_unmount
872  *
873  * called during unmount
874  */
875 void
876 ufsfx_unmount(struct ufsvfs *ufsvfsp)
877 {
878 	ufs_failure_t	*f;
879 	int		 must_unlock_list;
880 
881 	MINUTE(("[ufsfx_unmount"));
882 
883 	if (!ufsvfsp) {
884 		MINUTE((": no ufsvfsp]"));
885 		return;
886 	}
887 
888 	if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0)
889 		mutex_enter(&ufs_fix.uq_mutex);
890 
891 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
892 		int must_unlock_failure;
893 
894 		must_unlock_failure = !MUTEX_HELD(&f->uf_mutex);
895 		if (must_unlock_failure) {
896 			mutex_enter(&f->uf_mutex);
897 		}
898 
899 		if (f->uf_ufsvfsp == ufsvfsp) {
900 
901 			/*
902 			 * if we owned the failure record lock, then this
903 			 * is probably a fix failure-triggered unmount, so
904 			 * the warning is not appropriate or needed
905 			 */
906 
907 			/* XXX if rebooting don't print this? */
908 			if (!terminal_state(f->uf_s) && must_unlock_failure) {
909 				cmn_err(CE_WARN,
910 					"Unmounting %s while error-locked",
911 					fs_name(f));
912 			}
913 
914 			f->uf_ufsvfsp		= NULL;
915 			f->uf_vfs_ufsfxp	= NULL;
916 			f->uf_vfs_lockp		= NULL;
917 			f->uf_bp		= NULL;
918 			f->uf_vfsp		= NULL;
919 			f->uf_retry		= -1;
920 		}
921 
922 		if (must_unlock_failure)
923 			mutex_exit(&f->uf_mutex);
924 	}
925 	if (must_unlock_list)
926 		mutex_exit(&ufs_fix.uq_mutex);
927 
928 	pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP);
929 	MINUTE(("] "));
930 }
931 
932 /*
933  * ufsfx_(un)lockfs
934  *
935  * provides hook from lockfs code so we can recognize unlock/relock
936  *  This is called after it is certain that the (un)lock will succeed.
937  */
938 void
939 ufsfx_unlockfs(struct ufsvfs *ufsvfsp)
940 {
941 	ufs_failure_t	*f;
942 	int		 need_unlock;
943 	int		 need_unlock_list;
944 	int		 informed = 0;
945 
946 	MINUTE(("[ufsfx_unlockfs"));
947 
948 	if (!ufsvfsp)
949 		return;
950 
951 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
952 
953 	if (need_unlock_list)
954 		mutex_enter(&ufs_fix.uq_mutex);
955 
956 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
957 
958 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
959 		if (need_unlock)
960 			mutex_enter(&f->uf_mutex);
961 
962 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) {
963 			if (!(f->uf_s & UF_FIXING)) {
964 				/*
965 				 * This might happen if we don't notice that
966 				 * the fs gets marked FSFIX before it is
967 				 * marked FSCLEAN, as might occur if the
968 				 * the superblock was hammered directly.
969 				 */
970 				if (!informed) {
971 					informed = 1;
972 					cmn_err(CE_NOTE,
973 		    "Unlock of %s succeeded before fs_clean marked FSFIX?",
974 							    fs_name(f));
975 				}
976 
977 				/*
978 				 * pass through fixing state so
979 				 * transition protocol is satisfied
980 				 */
981 				if (!set_state(f, UF_FIXING)) {
982 					MINUTE((": failed] "));
983 				}
984 			}
985 
986 			if (!set_state(f, UF_FIXED)) {
987 				/* it's already fixed, so don't panic now */
988 				MINUTE((": failed] "));
989 			}
990 		}
991 
992 		if (need_unlock)
993 			mutex_exit(&f->uf_mutex);
994 	}
995 	if (need_unlock_list)
996 		mutex_exit(&ufs_fix.uq_mutex);
997 	MINUTE(("] "));
998 }
999 
1000 void
1001 ufsfx_lockfs(struct ufsvfs *ufsvfsp)
1002 {
1003 	ufs_failure_t	*f;
1004 	int		 need_unlock;
1005 	int		 need_unlock_list;
1006 
1007 	MINUTE(("[ufsfx_lockfs"));
1008 
1009 	if (!ufsvfsp)
1010 		return;
1011 
1012 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
1013 
1014 	if (need_unlock_list)
1015 		mutex_enter(&ufs_fix.uq_mutex);
1016 
1017 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1018 
1019 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
1020 		if (need_unlock)
1021 			mutex_enter(&f->uf_mutex);
1022 
1023 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) &&
1024 		    f->uf_s != UF_PANIC) {
1025 			switch (f->uf_s) {
1026 
1027 			default:
1028 				cmn_err(CE_WARN,
1029 			"fs %s not in state UF_TRYLCK, UF_LOCKED or UF_FIXING",
1030 								fs_name(f));
1031 				break;
1032 
1033 			case UF_TRYLCK:
1034 				if (!set_state(f, UF_LOCKED)) {
1035 					MINUTE((": failed] "));
1036 				}
1037 				break;
1038 
1039 			case UF_LOCKED:
1040 				if (!set_state(f, UF_FIXING)) {
1041 					MINUTE((": failed] "));
1042 				}
1043 				break;
1044 
1045 			case UF_FIXING:
1046 				break;
1047 
1048 			}
1049 		}
1050 
1051 		if (need_unlock)
1052 			mutex_exit(&f->uf_mutex);
1053 	}
1054 	if (need_unlock_list)
1055 		mutex_exit(&ufs_fix.uq_mutex);
1056 
1057 	MINUTE(("] "));
1058 }
1059 
1060 /*
1061  * error lock, trigger fsck and unlock those fs with failures
1062  * blatantly copied from the hlock routine, although this routine
1063  * triggers differently in order to use uq_ne as meaningful data.
1064  */
1065 /* ARGSUSED */
1066 void
1067 ufsfx_thread_fix_failures(void *ignored)
1068 {
1069 	int		retry;
1070 	callb_cpr_t	cprinfo;
1071 
1072 	CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr,
1073 	    "ufsfixfail");
1074 
1075 	MINUTE(("[ufsfx_thread_fix_failures] "));
1076 
1077 	for (;;) {
1078 		/* sleep until there is work to do */
1079 
1080 		mutex_enter(&ufs_fix.uq_mutex);
1081 		(void) ufs_thread_run(&ufs_fix, &cprinfo);
1082 		ufs_fix.uq_ne = 0;
1083 		mutex_exit(&ufs_fix.uq_mutex);
1084 
1085 		/* process failures on our q */
1086 		do {
1087 			retry = ufsfx_do_failure_q();
1088 			if (retry) {
1089 				mutex_enter(&ufs_fix.uq_mutex);
1090 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
1091 				(void) cv_timedwait(&ufs_fix.uq_cv,
1092 							&ufs_fix.uq_mutex,
1093 							lbolt + (hz * retry));
1094 				CALLB_CPR_SAFE_END(&cprinfo,
1095 				    &ufs_fix.uq_mutex);
1096 				mutex_exit(&ufs_fix.uq_mutex);
1097 			}
1098 		} while (retry);
1099 	}
1100 	/* NOTREACHED */
1101 }
1102 
1103 
1104 /*
1105  * watch for fix-on-panic work
1106  *
1107  * returns # of seconds to sleep before trying again
1108  * and zero if no retry is needed
1109  */
1110 
1111 int
1112 ufsfx_do_failure_q(void)
1113 {
1114 	ufs_failure_t	*f;
1115 	long		 retry = 1;
1116 	ufsd_t		*s;
1117 
1118 	MAJOR(("[ufsfx_do_failure_q"));
1119 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1120 
1121 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
1122 		return (retry);
1123 
1124 	retry = 0;
1125 rescan_q:
1126 
1127 	/*
1128 	 * walk down failure list
1129 	 *  depending on state of each failure, do whatever
1130 	 *  is appropriate to move it to the next state
1131 	 *  taking note of whether retry gets set
1132 	 *
1133 	 * retry protocol:
1134 	 * wakeup in shortest required time for any failure
1135 	 *   retry == 0; nothing more to do (terminal state)
1136 	 *   retry < 0; reprocess queue immediately, retry will
1137 	 *		be abs(retry) for the next cycle
1138 	 *   retry > 0; schedule wakeup for retry seconds
1139 	 */
1140 
1141 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1142 
1143 		if (!mutex_tryenter(&f->uf_mutex)) {
1144 			retry = 1;
1145 			continue;
1146 		}
1147 		s = get_state_desc(f->uf_s);
1148 
1149 		MINOR((": found%s: %s, \"%s: %s\"\n",
1150 			    s->ud_attr.terminal? " old": "",
1151 			    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
1152 
1153 		if (s->ud_attr.terminal) {
1154 			mutex_exit(&f->uf_mutex);
1155 			continue;
1156 		}
1157 
1158 		if (s->ud_sfp)
1159 			(*s->ud_sfp)(f, UFA_FOUND, f->uf_s);
1160 
1161 		ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0);
1162 
1163 		if (f->uf_retry != 0) {
1164 			if (retry > f->uf_retry || retry == 0)
1165 				retry = f->uf_retry;
1166 			if (f->uf_retry < 0)
1167 				f->uf_retry = abs(f->uf_retry);
1168 		}
1169 		mutex_exit(&f->uf_mutex);
1170 	}
1171 
1172 
1173 	if (retry < 0) {
1174 		retry = abs(retry);
1175 		goto rescan_q;
1176 	}
1177 
1178 	mutex_exit(&ufs_fix.uq_mutex);
1179 
1180 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1181 	MAJOR((": retry=%ld, good night]\n\n", retry));
1182 
1183 	return (retry);
1184 }
1185 
1186 static void
1187 pester_msg(ufs_failure_t *f, int seriousness)
1188 {
1189 	MINUTE(("[pester_msg"));
1190 	ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING));
1191 
1192 	/*
1193 	 * XXX if seems too long for this fs, poke administrator
1194 	 * XXX to run fsck manually (and change retry time?)
1195 	 */
1196 	cmn_err(seriousness,
1197 		"Waiting for repair of %s to %s",
1198 			    fs_name(f),
1199 			    f->uf_s & UF_LOCKED? "start": "finish");
1200 	MINUTE(("]"));
1201 }
1202 
1203 static time_t
1204 trylock_time_exceeded(ufs_failure_t *f)
1205 {
1206 	time_t		toolong;
1207 	extern time_t	time;
1208 
1209 	MINUTE(("[trylock_time_exceeded"));
1210 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1211 
1212 	toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm;
1213 	if (time > toolong)
1214 		cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f));
1215 
1216 	MINUTE(("] "));
1217 	return (time <= toolong? 0: time - toolong);
1218 }
1219 
1220 static int
1221 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp)
1222 {
1223 	MINUTE(("[get_lockfs_status"));
1224 
1225 	if (!f->uf_ufsvfsp) {
1226 		MINUTE((": ufsvfsp is NULL]\n"));
1227 		return (0);
1228 	}
1229 
1230 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1231 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1232 	ASSERT(!vfs_lock_held(f->uf_vfsp));
1233 	ASSERT(f->uf_ufsvfsp->vfs_root != NULL);
1234 
1235 	f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp);
1236 
1237 	if (f->uf_lf_err) {
1238 		f->uf_retry = ufsfx_tune.uft_short_err_period;
1239 	}
1240 
1241 	MINUTE(("] "));
1242 	return (1);
1243 }
1244 
1245 static sfrc_t
1246 set_state(ufs_failure_t *f, ufs_failure_states_t new_state)
1247 {
1248 	ufsd_t		*s;
1249 	sfrc_t		 sfrc = SFRC_FAIL;
1250 	int		 need_unlock;
1251 	extern time_t	 time;
1252 
1253 	HIDEOUS(("[set_state: new state:%s", state_name(new_state)));
1254 	ASSERT(f);
1255 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1256 
1257 	/*
1258 	 * if someone else is panicking, just let panic sync proceed
1259 	 */
1260 	if (panicstr) {
1261 		(void) set_state(f, UF_NOTFIX);
1262 		HIDEOUS((": state reset: not fixed] "));
1263 		return (sfrc);
1264 	}
1265 
1266 	/*
1267 	 * bad state transition, an internal error
1268 	 */
1269 	if (!state_trans_valid(f->uf_s, new_state)) {
1270 		/* recursion */
1271 		if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC))
1272 			(void) set_state(f, UF_PANIC);
1273 		MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ",
1274 				state_name(f->uf_s), state_name(new_state)));
1275 		return (sfrc);
1276 	}
1277 
1278 	s = get_state_desc(new_state);
1279 
1280 	need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex);
1281 	if (need_unlock)
1282 		mutex_enter(&ufs_fix.uq_mutex);
1283 
1284 	if (s->ud_attr.at_fail && ufs_fix.uq_threadp &&
1285 	    curthread == ufs_fix.uq_threadp) {
1286 		cmn_err(CE_WARN, "set_state: probable recursive panic of %s",
1287 			fs_name(f));
1288 	}
1289 	if (need_unlock)
1290 		mutex_exit(&ufs_fix.uq_mutex);
1291 
1292 	/* NULL state functions always succeed */
1293 	sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state);
1294 
1295 	if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) {
1296 		f->uf_s = new_state;
1297 		f->uf_entered_tm = time;
1298 		f->uf_counter = 0;
1299 	}
1300 
1301 	HIDEOUS(("]\n"));
1302 	return (sfrc);
1303 }
1304 
1305 static ufsd_t *
1306 get_state_desc(ufs_failure_states_t state)
1307 {
1308 	ufsd_t *s;
1309 
1310 	HIDEOUS(("[get_state_desc"));
1311 
1312 	for (s = &state_desc[1]; s->ud_name != NULL; s++) {
1313 		if (s->ud_v == state) {
1314 			HIDEOUS(("] "));
1315 			return (s);
1316 		}
1317 	}
1318 
1319 	HIDEOUS(("] "));
1320 	return (&state_desc[0]);	/* default */
1321 }
1322 
1323 static sfrc_t
1324 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1325 {
1326 	sfrc_t rc;
1327 
1328 	TRIVIA(("[sf_undef, action is %s, state is %s\n",
1329 		act_name(a), state_name(s)));
1330 	ASSERT(s == UF_UNDEF);
1331 
1332 	/* shouldn't find null failure records or ever set one */
1333 	rc = set_state(f, UF_NOTFIX);
1334 
1335 	TRIVIA(("] "));
1336 	return (rc);
1337 }
1338 
1339 
1340 static sfrc_t
1341 sf_init(
1342 	ufs_failure_t	*f,
1343 	ufsa_t	 a,
1344 	ufs_failure_states_t	 s)
1345 {
1346 	sfrc_t		rc = SFRC_FAIL;
1347 	extern time_t	time;
1348 
1349 	TRIVIA(("[sf_init, action is %s", act_name(a)));
1350 	ASSERT(s & UF_INIT);
1351 
1352 	switch (a) {
1353 	case UFA_SET:
1354 		f->uf_begin_tm = time;
1355 		f->uf_retry = 1;
1356 		if (!f->uf_ufsvfsp) {
1357 			(void) set_state(f, UF_PANIC);
1358 			TRIVIA((": NULL ufsvfsp]\n"));
1359 			return (rc);
1360 		}
1361 		/*
1362 		 * because we can call panic from many different levels,
1363 		 * we can't be sure that we've got the vfs_lock at this
1364 		 * point.  However, there's not much alternative and if
1365 		 * we don't (have the lock) the worst case is we'll just
1366 		 * panic again
1367 		 */
1368 		f->uf_vfs_lockp		= &f->uf_ufsvfsp->vfs_lock;
1369 		f->uf_vfs_ufsfxp	= &f->uf_ufsvfsp->vfs_fsfx;
1370 
1371 		if (!f->uf_ufsvfsp->vfs_bufp) {
1372 			(void) set_state(f, UF_PANIC);
1373 			TRIVIA((": NULL vfs_bufp]\n"));
1374 			return (rc);
1375 		}
1376 		f->uf_bp = f->uf_ufsvfsp->vfs_bufp;
1377 
1378 		if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) {
1379 			(void) set_state(f, UF_PANIC);
1380 			TRIVIA((": NULL vfs_fs]\n"));
1381 			return (rc);
1382 		}
1383 
1384 		/* vfs_fs = vfs_bufp->b_un.b_fs */
1385 		bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN);
1386 
1387 		f->uf_lf.lf_lock  = LOCKFS_ELOCK;	/* primer */
1388 
1389 		if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) {
1390 			(void) set_state(f, UF_PANIC);
1391 			TRIVIA((": NULL vfsp or vfs_dev == NODEV"));
1392 			return (rc);
1393 		}
1394 		f->uf_dev = f->uf_vfsp->vfs_dev;
1395 
1396 		rc = SFRC_SUCCESS;
1397 		break;
1398 
1399 	case UFA_FOUND:
1400 	default:
1401 		/* failures marked init shouldn't even be on the queue yet */
1402 		rc = set_state(f, UF_QUEUE);
1403 		TRIVIA((": found failure with state init]\n"));
1404 	}
1405 
1406 	TRIVIA(("] "));
1407 	return (rc);
1408 }
1409 
1410 static sfrc_t
1411 sf_queue(
1412 	ufs_failure_t	*f,
1413 	ufsa_t	 a,
1414 	ufs_failure_states_t	 s)
1415 {
1416 	sfrc_t		rc = SFRC_FAIL;
1417 
1418 	TRIVIA(("[sf_queue, action is %s", act_name(a)));
1419 	ASSERT(s & UF_QUEUE);
1420 
1421 	if (!f->uf_ufsvfsp) {
1422 		TRIVIA((": NULL ufsvfsp]\n"));
1423 		return (rc);
1424 	}
1425 
1426 	switch (a) {
1427 	case UFA_FOUND:
1428 		rc = sf_found_queue(f);
1429 		break;
1430 
1431 	case UFA_SET:
1432 
1433 		ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1434 
1435 		mutex_enter(&uf_stats.ufst_mutex);
1436 		uf_stats.ufst_num_failed++;
1437 		mutex_exit(&uf_stats.ufst_mutex);
1438 
1439 		/*
1440 		 * if can't get the vfs lock, just wait until
1441 		 * UF_TRYLCK to set fx_current
1442 		 */
1443 		if (mutex_tryenter(f->uf_vfs_lockp)) {
1444 			f->uf_vfs_ufsfxp->fx_current = f;
1445 			mutex_exit(f->uf_vfs_lockp);
1446 		} else {
1447 			mutex_enter(&uf_stats.ufst_mutex);
1448 			uf_stats.ufst_current_races++;
1449 			mutex_exit(&uf_stats.ufst_mutex);
1450 		}
1451 
1452 		f->uf_retry = 1;
1453 		rc = SFRC_SUCCESS;
1454 		TRIVIA(("] "));
1455 		break;
1456 
1457 	default:
1458 		(void) set_state(f, UF_PANIC);
1459 		TRIVIA((": failed] "));
1460 	}
1461 
1462 	return (rc);
1463 }
1464 
1465 static sfrc_t
1466 sf_found_queue(ufs_failure_t *f)
1467 {
1468 	int		replica;
1469 	sfrc_t		rc = SFRC_FAIL;
1470 
1471 	TRIVIA(("[sf_found_queue"));
1472 
1473 	/*
1474 	 * don't need to check for null ufsvfsp because
1475 	 * unmount must own list's ufs_fix.uq_mutex
1476 	 * to mark it null and we own that lock since
1477 	 * we got here.
1478 	 */
1479 
1480 	ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1481 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1482 
1483 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
1484 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1485 		f->uf_retry = 1;
1486 		return (rc);
1487 	}
1488 
1489 	replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL &&
1490 		    f->uf_vfs_ufsfxp->fx_current != f &&
1491 		    !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s);
1492 
1493 	/*
1494 	 * copy general flags to this ufs_failure so we don't
1495 	 * need to refer back to the ufsvfs, or, more importantly,
1496 	 * don't need to keep acquiring (trying to acquire) vfs_lockp
1497 	 *
1498 	 * The most restrictive option wins:
1499 	 *  panic > errlock only > errlock+unmount > repair
1500 	 * XXX panic > elock > elock > elock+umount
1501 	 */
1502 	if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) {
1503 		if (!set_state(f, UF_PANIC)) {
1504 			TRIVIA((": marked panic but was queued?"));
1505 			real_panic(f, " ");
1506 			/*NOTREACHED*/
1507 		}
1508 		mutex_exit(f->uf_vfs_lockp);
1509 		return (rc);
1510 	}
1511 	f->uf_flags = f->uf_vfs_ufsfxp->fx_flags;
1512 
1513 	if (replica) {
1514 		if (!set_state(f, UF_REPLICA)) {
1515 			f->uf_retry = 1;
1516 			TRIVIA((": set to replica failed] "));
1517 		} else {
1518 			TRIVIA(("] "));
1519 		}
1520 		mutex_exit(f->uf_vfs_lockp);
1521 		return (rc);
1522 	}
1523 	mutex_exit(f->uf_vfs_lockp);
1524 
1525 	if (!set_state(f, UF_TRYLCK)) {
1526 		TRIVIA((": failed] "));
1527 	} else {
1528 		rc = SFRC_SUCCESS;
1529 	}
1530 	return (rc);
1531 }
1532 
1533 static sfrc_t
1534 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1535 {
1536 	sfrc_t	rc = SFRC_FAIL;
1537 
1538 	TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s)));
1539 	ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING));
1540 	ASSERT(!terminal_state(s));
1541 
1542 	if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) {
1543 		TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n"));
1544 		(void) set_state(f, UF_NOTFIX);
1545 		return (rc);
1546 	}
1547 
1548 	switch (a) {
1549 	case UFA_SET:
1550 		switch (s) {
1551 		case UF_TRYLCK:
1552 			ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1553 			rc = sf_set_trylck(f);
1554 			break;
1555 
1556 		case UF_LOCKED:
1557 			rc = sf_set_locked(f);
1558 			break;
1559 
1560 		case UF_FIXING:
1561 			f->uf_flags |= UFSFX_REPAIR_START;
1562 			f->uf_retry  = ufsfx_tune.uft_fixpoll_period;
1563 			rc = SFRC_SUCCESS;
1564 			break;
1565 
1566 		case UF_UMOUNT:
1567 			f->uf_retry = -ufsfx_tune.uft_short_err_period;
1568 			rc = SFRC_SUCCESS;
1569 			break;
1570 
1571 		default:
1572 			(void) set_state(f, UF_PANIC);
1573 			TRIVIA((": failed] "));
1574 		}
1575 		break;
1576 
1577 	case UFA_FOUND:
1578 
1579 		switch (s) {
1580 		case UF_TRYLCK:
1581 			rc = sf_found_trylck(f);
1582 			break;
1583 
1584 		case UF_LOCKED:
1585 		case UF_FIXING:
1586 			rc = sf_found_lock_fix_cmn(f, s);
1587 			break;
1588 
1589 		case UF_UMOUNT:
1590 			rc = sf_found_umount(f);
1591 			break;
1592 
1593 		default:
1594 			(void) set_state(f, UF_PANIC);
1595 			TRIVIA((": failed] "));
1596 			break;
1597 		}
1598 		break;
1599 	default:
1600 		(void) set_state(f, UF_PANIC);
1601 		TRIVIA((": failed] "));
1602 		break;
1603 	}
1604 
1605 	TRIVIA(("] "));
1606 	return (rc);
1607 }
1608 
1609 static sfrc_t
1610 sf_set_trylck(ufs_failure_t *f)
1611 {
1612 	TRIVIA(("[sf_set_trylck"));
1613 
1614 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
1615 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1616 		f->uf_retry = 1;
1617 		return (SFRC_FAIL);
1618 	}
1619 
1620 	if (!f->uf_vfs_ufsfxp->fx_current)
1621 		f->uf_vfs_ufsfxp->fx_current = f;
1622 
1623 	mutex_exit(f->uf_vfs_lockp);
1624 
1625 	f->uf_lf.lf_flags = 0;
1626 	f->uf_lf.lf_lock  = LOCKFS_ELOCK;
1627 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1628 	TRIVIA(("] "));
1629 	return (SFRC_SUCCESS);
1630 }
1631 
1632 static sfrc_t
1633 sf_found_trylck(ufs_failure_t *f)
1634 {
1635 	struct lockfs lockfs_status;
1636 
1637 	TRIVIA(("[sf_found_trylck"));
1638 
1639 	if (trylock_time_exceeded(f) > 0) {
1640 		(void) set_state(f, UF_PANIC);
1641 		TRIVIA((": failed] "));
1642 		return (SFRC_FAIL);
1643 	}
1644 
1645 	if (!get_lockfs_status(f, &lockfs_status)) {
1646 		(void) set_state(f, UF_PANIC);
1647 		TRIVIA((": failed] "));
1648 		return (SFRC_FAIL);
1649 	}
1650 
1651 	if (f->uf_lf_err == NO_ERROR)
1652 		f->uf_lf.lf_key = lockfs_status.lf_key;
1653 
1654 	if (!set_lockfs(f, &lockfs_status)) {
1655 		(void) set_state(f, UF_PANIC);
1656 		TRIVIA((": failed] "));
1657 		return (SFRC_FAIL);
1658 	}
1659 	TRIVIA(("] "));
1660 	return (SFRC_SUCCESS);
1661 }
1662 
1663 static sfrc_t
1664 sf_set_locked(ufs_failure_t *f)
1665 {
1666 	TRIVIA(("[sf_set_locked"));
1667 
1668 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1669 
1670 #if defined(DEBUG)
1671 	if (f->uf_flags & UFSFX_REPAIR_START)
1672 		TRIVIA(("clearing UFSFX_REPAIR_START "));
1673 #endif /* DEBUG */
1674 
1675 	f->uf_flags &= ~UFSFX_REPAIR_START;
1676 
1677 	if (f->uf_s & UF_TRYLCK) {
1678 		cmn_err(CE_WARN, "Error-locked %s: \"%s\"",
1679 				    fs_name(f), f->uf_panic_str);
1680 
1681 		if (f->uf_flags & UFSFX_LCKONLY)
1682 			cmn_err(CE_WARN, "Manual repair of %s required",
1683 								fs_name(f));
1684 	}
1685 
1686 	/*
1687 	 * just reset to current state
1688 	 */
1689 #if defined(DEBUG)
1690 	TRIVIA(("locked->locked "));
1691 #endif /* DEBUG */
1692 
1693 	TRIVIA(("] "));
1694 	return (SFRC_SUCCESS);
1695 }
1696 
1697 static sfrc_t
1698 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s)
1699 {
1700 	time_t		toolong;
1701 	extern time_t	time;
1702 	struct buf	*bp			= NULL;
1703 	struct fs	*dfs;
1704 	time_t		 concerned, anxious;
1705 	sfrc_t		 rc			= SFRC_FAIL;
1706 	ulong_t		 gb_size;
1707 
1708 	TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s)));
1709 
1710 	if (s & UF_LOCKED) {
1711 		ASSERT(MUTEX_HELD(&f->uf_mutex));
1712 
1713 		toolong = time > (ufsfx_tune.uft_too_long +
1714 							f->uf_entered_tm);
1715 		TRIVIA(("%stoolong", !toolong? "not": ""));
1716 		HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ",
1717 		    time, ufsfx_tune.uft_too_long, f->uf_entered_tm));
1718 
1719 		if (f->uf_flags & UFSFX_LCKUMOUNT) {
1720 			if (set_state(f, UF_UMOUNT)) {
1721 				TRIVIA(("] "));
1722 				rc = SFRC_SUCCESS;
1723 			} else {
1724 				TRIVIA((": failed] "));
1725 				f->uf_retry = 1;
1726 			}
1727 			return (rc);
1728 		}
1729 		if (!toolong) {
1730 			rc = SFRC_SUCCESS;
1731 		} else {
1732 			if (!(f->uf_flags & UFSFX_REPAIR_START)) {
1733 				cmn_err(CE_WARN, "%s repair of %s not started.",
1734 						(f->uf_flags & UFSFX_LCKONLY)?
1735 						"Manual": "Automatic",
1736 						fs_name(f));
1737 
1738 				f->uf_retry = ufsfx_tune.uft_long_err_period;
1739 			} else {
1740 				f->uf_retry = ufsfx_tune.uft_long_err_period;
1741 				cmn_err(CE_WARN,
1742 		"Repair of %s is not timely; operator attention is required.",
1743 								    fs_name(f));
1744 			}
1745 			TRIVIA(("] "));
1746 			return (rc);
1747 		}
1748 	}
1749 
1750 #if defined(DEBUG)
1751 	else {
1752 		ASSERT(s & UF_FIXING);
1753 	}
1754 #endif /* DEBUG */
1755 
1756 	/*
1757 	 * get on disk superblock; force it to really
1758 	 * come from the disk
1759 	 */
1760 	(void) bfinval(f->uf_dev, 0);
1761 	bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE);
1762 	if (bp) {
1763 		bp->b_flags |= (B_STALE | B_AGE);
1764 		dfs = bp->b_un.b_fs;
1765 	}
1766 
1767 	if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) &&
1768 	    (dfs->fs_magic != MTB_UFS_MAGIC))) {
1769 		TRIVIA((": UFS_BREAD(SBLOCK) failed]\n"));
1770 		f->uf_retry = 1;
1771 		goto out;
1772 	}
1773 
1774 	/* fsck started but we haven't noticed yet? */
1775 	if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1776 		if (!set_state(f, UF_FIXING)) {
1777 			TRIVIA((": failed]\n"));
1778 			f->uf_retry = 1;
1779 			goto out;
1780 		}
1781 	}
1782 
1783 	/* fsck started but didn't succeed? */
1784 	if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) {
1785 		TRIVIA((": fs_clean: %d", (int)dfs->fs_clean));
1786 		(void) set_state(f, UF_LOCKED);
1787 		cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f));
1788 		f->uf_retry = ufsfx_tune.uft_long_err_period;
1789 		goto out;
1790 	}
1791 
1792 	gb_size = (dfs->fs_size * dfs->fs_bshift) / GB;
1793 	toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig);
1794 
1795 	/* fsck started but doesn't seem to be proceeding? */
1796 	if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1797 		if (time > f->uf_entered_tm + toolong) {
1798 
1799 			cmn_err(CE_WARN,
1800 "Repair completion timeout exceeded on %s; manual fsck may be required",
1801 								    fs_name(f));
1802 			f->uf_retry = ufsfx_tune.uft_long_err_period;
1803 		}
1804 	}
1805 
1806 	concerned = f->uf_entered_tm + (toolong / 3);
1807 	anxious = f->uf_entered_tm + ((2 * toolong) / 3);
1808 
1809 	if (time > concerned)
1810 		pester_msg(f, time > anxious? CE_WARN: CE_NOTE);
1811 
1812 	TRIVIA(("] "));
1813 
1814 out:
1815 	if (bp)
1816 		brelse(bp);
1817 
1818 	return (rc);
1819 }
1820 
1821 static sfrc_t
1822 sf_found_umount(ufs_failure_t *f)
1823 {
1824 	extern time_t	 time;
1825 	sfrc_t		 rc			= SFRC_FAIL;
1826 	struct vfs	*vfsp			= f->uf_vfsp;
1827 	struct ufsvfs	*ufsvfsp		= f->uf_ufsvfsp;
1828 	int		 toolong		= 0;
1829 	int		 err			= 0;
1830 
1831 	TRIVIA(("[sf_found_umount"));
1832 
1833 	toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm;
1834 	if (toolong) {
1835 		TRIVIA((": unmount time limit exceeded] "));
1836 		goto out;
1837 	}
1838 
1839 	if (!vfsp || !ufsvfsp) {	/* trivial case */
1840 		TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] "));
1841 		goto out;
1842 	}
1843 
1844 	if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) {
1845 		TRIVIA((": !not error locked?"));
1846 		err = EINVAL;
1847 		goto out;
1848 	}
1849 
1850 	/* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */
1851 	if (vn_vfswlock(vfsp->vfs_vnodecovered)) {
1852 		TRIVIA((": couldn't lock coveredvp"));
1853 		err = EBUSY;
1854 		goto out;
1855 	}
1856 
1857 	if ((err = dounmount(vfsp, 0, kcred)) != 0) {
1858 
1859 		/* take note, but not many alternatives here */
1860 		mutex_enter(&uf_stats.ufst_mutex);
1861 		uf_stats.ufst_unmount_failures++;
1862 		mutex_exit(&uf_stats.ufst_mutex);
1863 
1864 		TRIVIA((": unmount failed] "));
1865 	} else {
1866 		cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f));
1867 	}
1868 
1869 out:
1870 	if (toolong || (err != EBUSY && err != EAGAIN))
1871 		rc = set_state(f, UF_NOTFIX);
1872 
1873 	TRIVIA(("] "));
1874 	return (rc);
1875 }
1876 
1877 static sfrc_t
1878 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1879 {
1880 	extern time_t	time;
1881 	sfrc_t		rc = SFRC_FAIL;
1882 
1883 	TRIVIA(("[sf_term_cmn, action is %s, state is %s",
1884 						act_name(a), state_name(s)));
1885 	ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA));
1886 	ASSERT(terminal_state(s));
1887 
1888 	if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) {
1889 		TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n"));
1890 		return (rc);
1891 	}
1892 
1893 	switch (a) {
1894 	case UFA_SET:
1895 		switch (s) {
1896 		case UF_NOTFIX:
1897 		case UF_FIXED:
1898 		{	int need_lock_vfs;
1899 
1900 			if (f->uf_ufsvfsp && f->uf_vfs_lockp)
1901 				need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp);
1902 			else
1903 				need_lock_vfs = 0;
1904 
1905 			if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) {
1906 				TRIVIA((": tryenter(vfslockp) fail; retry]\n"));
1907 				f->uf_retry = 1;
1908 				break;
1909 			}
1910 
1911 			f->uf_end_tm = time;
1912 			f->uf_lf.lf_lock = LOCKFS_OLOCK;
1913 			f->uf_retry = 0;
1914 
1915 			if (f->uf_vfs_ufsfxp)
1916 				f->uf_vfs_ufsfxp->fx_current = NULL;
1917 
1918 			if (need_lock_vfs)
1919 				mutex_exit(f->uf_vfs_lockp);
1920 
1921 			cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s":
1922 				    "%s is now accessible", fs_name(f));
1923 
1924 			if (s & UF_FIXED) {
1925 				mutex_enter(&uf_stats.ufst_mutex);
1926 				uf_stats.ufst_num_fixed++;
1927 				mutex_exit(&uf_stats.ufst_mutex);
1928 			}
1929 			(void) timeout(ufsfx_kill_fix_failure_thread,
1930 			    (void *)(ufsfx_tune.uft_short_err_period * hz),
1931 			    ufsfx_tune.uft_short_err_period * hz);
1932 			rc = SFRC_SUCCESS;
1933 			break;
1934 		}
1935 		case UF_REPLICA:
1936 
1937 			ASSERT(MUTEX_HELD(f->uf_vfs_lockp));
1938 
1939 			/* not actually a replica? */
1940 			if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current &&
1941 			    f->uf_vfs_ufsfxp->fx_current != f &&
1942 			!terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s)) {
1943 
1944 				f->uf_orig = f->uf_vfs_ufsfxp->fx_current;
1945 				f->uf_retry = 0;
1946 				rc = SFRC_SUCCESS;
1947 			} else {
1948 				TRIVIA((": NULL fx_current]\n"));
1949 				f->uf_retry = 1;
1950 			}
1951 
1952 			break;
1953 
1954 		default:
1955 			rc = set_state(f, UF_PANIC);
1956 			TRIVIA((": failed] "));
1957 			break;
1958 		}
1959 		break;
1960 
1961 	case UFA_FOUND:
1962 		/*
1963 		 * XXX de-allocate these after some period?
1964 		 * XXX or move to an historical list?
1965 		 * XXX or have an ioctl which reaps them?
1966 		 */
1967 		/*
1968 		 * For now, since we don't expect lots of failures
1969 		 * to occur (to the point of memory shortages),
1970 		 * just punt
1971 		 */
1972 
1973 		/* be sure we're not wasting cpu on old failures */
1974 		if (f->uf_retry != 0) {
1975 			mutex_enter(&uf_stats.ufst_mutex);
1976 			uf_stats.ufst_cpu_waste++;
1977 			mutex_exit(&uf_stats.ufst_mutex);
1978 			f->uf_retry = 0;
1979 		}
1980 		rc = SFRC_SUCCESS;
1981 		break;
1982 
1983 	default:
1984 		(void) set_state(f, UF_PANIC);
1985 		TRIVIA((": failed] "));
1986 		break;
1987 	}
1988 
1989 	TRIVIA(("] "));
1990 	return (rc);
1991 }
1992 
1993 static sfrc_t
1994 sf_panic(
1995 	ufs_failure_t	*f,
1996 	ufsa_t	 a,
1997 	ufs_failure_states_t	 s)
1998 {
1999 	sfrc_t	rc = SFRC_FAIL;
2000 
2001 	TRIVIA(("[sf_panic, action is %s, prev. state is %s",
2002 		act_name(a), state_name(f->uf_s)));
2003 	ASSERT(s & UF_PANIC);
2004 
2005 	switch (a) {
2006 	case UFA_SET:
2007 		f->uf_retry = -ufsfx_tune.uft_short_err_period;
2008 		rc = SFRC_SUCCESS;
2009 		break;
2010 
2011 	case UFA_FOUND:
2012 	default:
2013 		real_panic(f, " ");
2014 
2015 		/* LINTED: warning: logical expression always true: op "||" */
2016 		ASSERT(DEBUG);
2017 
2018 		(void) set_state(f, UF_UMOUNT);	/* XXX UF_NOTFIX? */
2019 
2020 		break;
2021 	}
2022 
2023 	TRIVIA(("] "));
2024 	return (rc);
2025 }
2026 
2027 /*
2028  * minimum state function
2029  */
2030 static sfrc_t
2031 sf_minimum(
2032 	ufs_failure_t	*f,
2033 	ufsa_t	 a, /* LINTED argument unused in function: ignored */
2034 	ufs_failure_states_t	 ignored)
2035 {
2036 	sfrc_t rc = SFRC_FAIL;
2037 
2038 	TRIVIA(("[sf_minimum, action is %s", act_name(a)));
2039 
2040 	switch (a) {
2041 	case UFA_SET:
2042 		f->uf_retry = 0;
2043 		/* FALLTHROUGH */
2044 
2045 	case UFA_FOUND:
2046 		rc = SFRC_SUCCESS;
2047 		break;
2048 
2049 	default:
2050 		(void) set_state(f, UF_PANIC);
2051 		TRIVIA((": failed] "));
2052 		break;
2053 	}
2054 
2055 	TRIVIA(("] "));
2056 	return (rc);
2057 }
2058 
2059 static int
2060 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to)
2061 {
2062 	ufsd_t	*s;
2063 	int	 valid;
2064 
2065 	HIDEOUS(("[state_trans_valid"));
2066 
2067 	if (from & to)
2068 		return (1);
2069 
2070 	s = get_state_desc(to);
2071 
2072 	/*
2073 	 * extra test is necessary since we want UF_UNDEF = 0,
2074 	 * (to detect freshly allocated memory)
2075 	 * but can't check for that value with a bit test
2076 	 */
2077 	valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev;
2078 
2079 	HIDEOUS((": %svalid] ", valid? "": "in"));
2080 	return (valid);
2081 }
2082 
2083 static int
2084 terminal_state(ufs_failure_states_t state)
2085 {
2086 	ufsd_t	*s;
2087 
2088 	HIDEOUS(("[terminal_state"));
2089 
2090 	s = get_state_desc(state);
2091 
2092 	HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not "));
2093 	return ((int)s->ud_attr.terminal);
2094 }
2095 
2096 static void
2097 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp)
2098 {
2099 	MINUTE(("[alloc_lockfs_comment"));
2100 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2101 
2102 	/*
2103 	 * ufs_fiolfs expects a kmem_alloc'ed comment;
2104 	 * it frees the comment if the lock fails
2105 	 * or else when the lock is unlocked.
2106 	 */
2107 
2108 	f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP);
2109 	if (f->uf_lf.lf_comment) {
2110 		char	*from;
2111 		size_t	 len;
2112 
2113 		/*
2114 		 * use panic string if there's no previous comment
2115 		 * or if we're setting the error lock
2116 		 */
2117 		if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment ||
2118 		    lfp->lf_comlen <= 0)) {
2119 			from = f->uf_panic_str;
2120 			len = LOCKFS_MAXCOMMENTLEN;
2121 		} else {
2122 			from = lfp->lf_comment;
2123 			len = lfp->lf_comlen;
2124 		}
2125 
2126 		bcopy(from, f->uf_lf.lf_comment, len);
2127 		f->uf_lf.lf_comlen = len;
2128 
2129 	} else {
2130 		f->uf_lf.lf_comlen = 0;
2131 	}
2132 	MINUTE(("] "));
2133 }
2134 
2135 static int
2136 set_lockfs(ufs_failure_t *f, struct lockfs *lfp)
2137 {
2138 	int	(*handle_lockfs_rc)(ufs_failure_t *);
2139 	int	  rc;
2140 
2141 	MINUTE(("[set_lockfs"));
2142 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2143 	ASSERT(!vfs_lock_held(f->uf_vfsp));
2144 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2145 
2146 	if (!f->uf_ufsvfsp) {
2147 		MINUTE((": ufsvfsp is NULL]\n"));
2148 		return (0);
2149 	}
2150 
2151 	ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock));
2152 
2153 	if (!f->uf_ufsvfsp->vfs_root) {
2154 		MINUTE((": vfs_root is NULL]\n"));
2155 		return (0);
2156 	}
2157 
2158 	alloc_lockfs_comment(f, lfp);
2159 	f->uf_lf_err = 0;
2160 
2161 	if (!LOCKFS_IS_ELOCK(lfp)) {
2162 		lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK;
2163 		VN_HOLD(f->uf_ufsvfsp->vfs_root);
2164 		f->uf_lf_err = ufs__fiolfs(f->uf_ufsvfsp->vfs_root,
2165 						&f->uf_lf,
2166 						/* from_user */ 0,
2167 						/* from_log  */ 0);
2168 		VN_RELE(f->uf_ufsvfsp->vfs_root);
2169 	}
2170 
2171 	handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success;
2172 	rc = handle_lockfs_rc(f);
2173 
2174 	MINUTE(("] "));
2175 	return (rc);
2176 }
2177 
2178 static int
2179 lockfs_failure(ufs_failure_t *f)
2180 {
2181 	int	error;
2182 	ufs_failure_states_t	s;
2183 
2184 	TRIVIA(("[lockfs_failure"));
2185 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2186 
2187 	if (!f->uf_ufsvfsp) {
2188 		TRIVIA((": ufsvfsp is NULL]\n"));
2189 		return (0);
2190 	}
2191 
2192 	error = f->uf_lf_err;
2193 	switch (error) {
2194 			/* non-transient errors: */
2195 	case EACCES:	/* disk/in-core metadata reconciliation failed  */
2196 	case EPERM:	/* inode reconciliation failed; incore inode changed? */
2197 	case EIO:	/* device is hard-locked or not responding */
2198 	case EROFS:	/* device is write-locked */
2199 	case EDEADLK:	/* can't lockfs; deadlock would result; */
2200 			/* Swapping or saving accounting records */
2201 			/* onto this fs can cause this errno. */
2202 
2203 		MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)",
2204 				fs_name(f),
2205 				lock_name(&f->uf_lf),
2206 				err_name(error),
2207 				error));
2208 
2209 		/*
2210 		 * if can't get lock, then fallback to panic, unless
2211 		 * unless unmount was requested (although unmount will
2212 		 * probably fail if the lock failed, so we'll panic
2213 		 * anyway
2214 		 */
2215 
2216 		s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK)?
2217 							UF_UMOUNT: UF_PANIC;
2218 
2219 		if (!set_state(f, s)) {
2220 			real_panic(f, " ");
2221 			/*NOTREACHED*/
2222 			break;
2223 		}
2224 		break;
2225 
2226 
2227 	case EBUSY:
2228 	case EAGAIN:
2229 
2230 		f->uf_retry = ufsfx_tune.uft_short_err_period;
2231 		if (curthread->t_flag & T_DONTPEND) {
2232 			curthread->t_flag &= ~T_DONTPEND;
2233 
2234 		} else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) {
2235 			ufs_failure_states_t state;
2236 			/*
2237 			 * if we didn't know that the fix had started,
2238 			 * take note
2239 			 */
2240 			state = error == EBUSY? UF_LOCKED: UF_FIXING;
2241 			if (!set_state(f, state)) {
2242 				TRIVIA((": failed] "));
2243 				return (0);
2244 			}
2245 		}
2246 		break;
2247 
2248 	default:	/* some other non-fatal error */
2249 		MINOR(("lockfs(\"%s\") of %s returned %s (%d)",
2250 					lock_name(&f->uf_lf),
2251 					fs_name(f),
2252 					err_name(f->uf_lf_err),
2253 					f->uf_lf_err));
2254 
2255 		f->uf_retry = ufsfx_tune.uft_short_err_period;
2256 		break;
2257 
2258 	case EINVAL:	/* unmounted? */
2259 		(void) set_state(f, UF_NOTFIX);
2260 		break;
2261 	}
2262 	TRIVIA(("] "));
2263 	return (1);
2264 }
2265 
2266 static int
2267 lockfs_success(ufs_failure_t *f)
2268 {
2269 	TRIVIA(("[lockfs_success"));
2270 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2271 
2272 	if (!f->uf_ufsvfsp) {
2273 		TRIVIA((": ufsvfsp is NULL]\n"));
2274 		return (0);
2275 	}
2276 
2277 	switch (f->uf_lf.lf_lock) {
2278 	case LOCKFS_ELOCK:	/* error lock worked */
2279 
2280 		if (!set_state(f, UF_LOCKED)) {
2281 			TRIVIA((": failed] "));
2282 			return (0);
2283 		}
2284 		break;
2285 
2286 	case LOCKFS_ULOCK: 			/* unlock worked */
2287 		/*
2288 		 * how'd we get here?
2289 		 * This should be done from fsck's unlock,
2290 		 * not from this thread's context.
2291 		 */
2292 		cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f));
2293 		ufsfx_unlockfs(f->uf_ufsvfsp);
2294 		break;
2295 
2296 	default:
2297 		if (!set_state(f, UF_NOTFIX)) {
2298 			TRIVIA((": failed] "));
2299 			return (0);
2300 		}
2301 		break;
2302 	}
2303 	TRIVIA(("] "));
2304 	return (1);
2305 }
2306 
2307 /*
2308  * when fsck is running it puts its pid into the lockfs
2309  * comment structure, prefaced by PIDSTR
2310  */
2311 const char *PIDSTR = "[pid:";
2312 static int
2313 fsck_active(ufs_failure_t *f)
2314 {
2315 	char		*cp;
2316 	int		 i, found, errlocked;
2317 	size_t		 comlen;
2318 	const int	 PIDSTRLEN = (int)strlen(PIDSTR);
2319 	struct ulockfs	*ulp = &f->uf_ufsvfsp->vfs_ulockfs;
2320 
2321 	TRIVIA(("[fsck_active"));
2322 
2323 	ASSERT(f);
2324 	ASSERT(f->uf_s & UF_FIXING);
2325 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2326 	ASSERT(f->uf_ufsvfsp);
2327 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2328 	ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock));
2329 
2330 	mutex_enter(&ulp->ul_lock);
2331 	cp = ulp->ul_lockfs.lf_comment;
2332 	comlen = ulp->ul_lockfs.lf_comlen;
2333 	errlocked = (int)ULOCKFS_IS_ELOCK(ulp);
2334 	mutex_exit(&ulp->ul_lock);
2335 
2336 	if (!cp || comlen == 0) {
2337 		TRIVIA((": null comment or comlen <= 0, found:0]"));
2338 		return (0);
2339 	}
2340 
2341 	for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++)
2342 		found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0;
2343 
2344 	TRIVIA(("found:%d, is_elock:%d]", found, errlocked));
2345 	return (errlocked & found);
2346 }
2347 
2348 static const char unknown_fs[]		= "<unknown fs>";
2349 static const char null_failure[] = "<NULL ufs failure record; unknown fs>";
2350 static const char mutated_vfs_bufp[]	= "<mutated vfs_bufp, unknown fs>";
2351 static const char mutated_vfs_fs[]	= "<mutated vfs_fs, unknown fs>";
2352 
2353 static char *
2354 fs_name(ufs_failure_t *f)
2355 {
2356 	HIDEOUS(("[fs_name"));
2357 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2358 
2359 	if (!f) {
2360 		HIDEOUS((": failure ptr is NULL]\n"));
2361 		return ((char *)null_failure);
2362 	}
2363 
2364 	if (f->uf_fsname[0] != '\0') {
2365 		HIDEOUS((": return (uf_fsname)]\n"));
2366 		return (f->uf_fsname);
2367 	}
2368 
2369 	if (MUTEX_HELD(f->uf_vfs_lockp)) {
2370 		if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) {
2371 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2372 			    (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp));
2373 			return ((char *)mutated_vfs_bufp);
2374 		}
2375 		if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) {
2376 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2377 			    (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs));
2378 			return ((char *)mutated_vfs_fs);
2379 		}
2380 		if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs &&
2381 		    *f->uf_fs->fs_fsmnt != '\0') {
2382 			HIDEOUS((": return (fs_fsmnt)]\n"));
2383 			return (f->uf_fs->fs_fsmnt);
2384 		}
2385 	}
2386 
2387 	HIDEOUS((": unknown file system]\n"));
2388 	return ((char *)unknown_fs);
2389 }
2390 
2391 #if defined(DEBUG)
2392 static char *
2393 lock_name(struct lockfs *lfp)
2394 {
2395 	struct lock_description	*l;
2396 	char			*lname;
2397 
2398 	HIDEOUS(("[lock_name"));
2399 
2400 	lname = lock_desc[0].ld_name;
2401 	for (l = &lock_desc[1]; l->ld_name != NULL; l++) {
2402 		if (lfp && lfp->lf_lock == l->ld_type) {
2403 			lname = l->ld_name;
2404 			break;
2405 		}
2406 	}
2407 	HIDEOUS(("]"));
2408 	return (lname);
2409 }
2410 
2411 static char *
2412 state_name(ufs_failure_states_t state)
2413 {
2414 	ufsd_t	*s;
2415 
2416 	HIDEOUS(("[state_name"));
2417 
2418 	s = get_state_desc(state);
2419 
2420 	HIDEOUS(("]"));
2421 	return (s->ud_name);
2422 }
2423 
2424 static char *
2425 err_name(int error)
2426 {
2427 	struct error_description *e;
2428 
2429 	HIDEOUS(("[err_name"));
2430 
2431 	for (e = &err_desc[1]; e->ed_name != NULL; e++) {
2432 		if (error == e->ed_errno) {
2433 			HIDEOUS(("]"));
2434 			return (e->ed_name);
2435 		}
2436 	}
2437 	HIDEOUS(("]"));
2438 	return (err_desc[0].ed_name);
2439 }
2440 
2441 static char *
2442 act_name(ufsa_t action)
2443 {
2444 	struct action_description *a;
2445 
2446 	HIDEOUS(("[act_name"));
2447 
2448 	for (a = &act_desc[1]; a->ad_name != NULL; a++) {
2449 		if (action == a->ad_v) {
2450 			HIDEOUS(("]"));
2451 			return (a->ad_name);
2452 		}
2453 	}
2454 	HIDEOUS(("]"));
2455 	return (act_desc[0].ad_name);
2456 }
2457 
2458 /*
2459  * dump failure list
2460  */
2461 static void
2462 dump_uf_list(char *msg)
2463 {
2464 	ufs_failure_t	*f;
2465 	int		 i;
2466 	int		 list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex);
2467 
2468 	if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) {
2469 		printf("dump_uf_list: couldn't get list lock\n");
2470 		return;
2471 	}
2472 
2473 	if (msg) {
2474 		printf("\n%s", msg);
2475 	}
2476 	printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n",
2477 		ufs_fix.uq_lowat, ufs_fix.uq_ne);
2478 
2479 	mutex_enter(&uf_stats.ufst_mutex);
2480 	printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races);
2481 	printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed);
2482 	printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed);
2483 	printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste);
2484 	printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n",
2485 		uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures);
2486 	mutex_exit(&uf_stats.ufst_mutex);
2487 
2488 	for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) {
2489 
2490 		if (!mutex_tryenter(&f->uf_mutex)) {
2491 			printf("%d.\t\"skipped - try enter failed\"\n", i);
2492 			continue;
2493 		}
2494 
2495 		dump_uf(f, i);
2496 
2497 		mutex_exit(&f->uf_mutex);
2498 	}
2499 
2500 	printf("\n");
2501 
2502 	if (!list_was_locked)
2503 		mutex_exit(&ufs_fix.uq_mutex);
2504 }
2505 
2506 static void
2507 dump_uf(ufs_failure_t *f, int i)
2508 {
2509 	if (!f) {
2510 		printf("dump_uf: NULL failure record\n");
2511 		return;
2512 	}
2513 
2514 	printf("%d.\t\"%s\" is %s.\n",
2515 		    i, fs_name(f), state_name(f->uf_s));
2516 	printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f);
2517 	printf("\tNext: 0x%p\t\tPrev: 0x%p\n",
2518 					(void *)f->uf_next, (void *)f->uf_prev);
2519 
2520 	if (f->uf_orig)
2521 		printf("\tOriginal failure: 0x%p \"%s\"\n",
2522 		    (void *)f->uf_orig, f->uf_orig->uf_panic_str);
2523 
2524 	printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n",
2525 		    (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp);
2526 	printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp);
2527 	printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp);
2528 
2529 	if (f->uf_bp)
2530 		printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs);
2531 	else
2532 		printf("\n");
2533 
2534 	printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n",
2535 	    f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm);
2536 
2537 	printf("\tFlags: (%d) %s%s%s%s", f->uf_flags,
2538 		f->uf_flags & UFSFX_LCKONLY?	 "\"lock only\" "	: "",
2539 		f->uf_flags & UFSFX_LCKUMOUNT?	 "\"lock+unmount\" "	: "",
2540 		f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" "	: "",
2541 		f->uf_flags == 0?                "<none>"               : "");
2542 
2543 	printf("\tRetry: %ld seconds\n", f->uf_retry);
2544 
2545 	printf("\tLockfs:\ttype: %s\terror: %s (%d)\n",
2546 		lock_name(&f->uf_lf),
2547 		err_name(f->uf_lf_err), f->uf_lf_err);
2548 
2549 }
2550 #endif /* DEBUG */
2551 
2552 /*
2553  * returns # of ufs_failures in a non-terminal state on queue
2554  * used to coordinate with hlock thread (see ufs_thread.c)
2555  * and to determine when the error lock thread may exit
2556  */
2557 
2558 int
2559 ufsfx_get_failure_qlen(void)
2560 {
2561 	ufs_failure_t	*f;
2562 	ufsd_t		*s;
2563 	int		 qlen = 0;
2564 
2565 	MINUTE(("[ufsfx_get_failure_qlen"));
2566 
2567 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
2568 		return (-1);
2569 
2570 	/*
2571 	 * walk down failure list
2572 	 */
2573 
2574 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
2575 
2576 		if (!mutex_tryenter(&f->uf_mutex))
2577 			continue;
2578 
2579 		s = get_state_desc(f->uf_s);
2580 
2581 		if (s->ud_attr.terminal) {
2582 			mutex_exit(&f->uf_mutex);
2583 			continue;
2584 		}
2585 
2586 		MINUTE((": found: %s, \"%s: %s\"\n",
2587 			    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
2588 
2589 		qlen++;
2590 		mutex_exit(&f->uf_mutex);
2591 	}
2592 
2593 	mutex_exit(&ufs_fix.uq_mutex);
2594 
2595 	MINUTE((": qlen=%d]\n", qlen));
2596 
2597 	return (qlen);
2598 }
2599 
2600 /*
2601  * timeout routine
2602  *  called to shutdown fix failure thread and server daemon
2603  */
2604 static void
2605 ufsfx_kill_fix_failure_thread(void *arg)
2606 {
2607 	clock_t odelta = (clock_t)arg;
2608 	int	qlen;
2609 
2610 	MAJOR(("[ufsfx_kill_fix_failure_thread"));
2611 
2612 	qlen = ufsfx_get_failure_qlen();
2613 
2614 	if (qlen < 0) {
2615 		clock_t delta;
2616 
2617 		delta = odelta << 1;
2618 		if (delta <= 0)
2619 			delta = INT_MAX;
2620 
2621 		(void) timeout(ufsfx_kill_fix_failure_thread,
2622 		    (void *)delta, delta);
2623 		MAJOR((": rescheduled"));
2624 
2625 	} else if (qlen == 0) {
2626 		ufs_thread_exit(&ufs_fix);
2627 		MAJOR((": killed"));
2628 	}
2629 	/*
2630 	 * else
2631 	 *  let timeout expire
2632 	 */
2633 	MAJOR(("]\n"));
2634 }
2635