xref: /titanic_50/usr/src/uts/common/fs/ufs/ufs_panic.c (revision d3d50737e566cade9a08d73d2af95105ac7cd960)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/errno.h>
30 #include <sys/mode.h>
31 #include <sys/sysmacros.h>
32 #include <sys/cmn_err.h>
33 #include <sys/varargs.h>
34 #include <sys/time.h>
35 #include <sys/buf.h>
36 #include <sys/kmem.h>
37 #include <sys/t_lock.h>
38 #include <sys/poll.h>
39 #include <sys/debug.h>
40 #include <sys/cred.h>
41 #include <sys/lockfs.h>
42 #include <sys/fs/ufs_fs.h>
43 #include <sys/fs/ufs_inode.h>
44 #include <sys/fs/ufs_panic.h>
45 #include <sys/fs/ufs_lockfs.h>
46 #include <sys/fs/ufs_trans.h>
47 #include <sys/fs/ufs_mount.h>
48 #include <sys/fs/ufs_prot.h>
49 #include <sys/fs/ufs_bio.h>
50 #include <sys/pathname.h>
51 #include <sys/utsname.h>
52 #include <sys/conf.h>
53 
54 /* handy */
55 #define	abs(x)		((x) < 0? -(x): (x))
56 
57 #if defined(DEBUG)
58 
59 #define	DBGLVL_NONE	0x00000000
60 #define	DBGLVL_MAJOR	0x00000100
61 #define	DBGLVL_MINOR	0x00000200
62 #define	DBGLVL_MINUTE	0x00000400
63 #define	DBGLVL_TRIVIA	0x00000800
64 #define	DBGLVL_HIDEOUS	0x00001000
65 
66 #define	DBGFLG_NONE		0x00000000
67 #define	DBGFLG_NOPANIC		0x00000001
68 #define	DBGFLG_LVLONLY		0x00000002
69 #define	DBGFLG_FIXWOULDPANIC	0x00000004
70 
71 #define	DBGFLG_FLAGMASK		0x0000000F
72 #define	DBGFLG_LEVELMASK	~DBGFLG_FLAGMASK
73 
74 #define	DEBUG_FLAGS	(ufs_fix_failure_dbg & DBGFLG_FLAGMASK)
75 #define	DEBUG_LEVEL	(ufs_fix_failure_dbg & DBGFLG_LEVELMASK)
76 
77 unsigned int ufs_fix_failure_dbg =	DBGLVL_NONE | DBGFLG_NONE;
78 
79 #define	DCALL(dbg_level, call)						\
80 	{								\
81 		if (DEBUG_LEVEL != DBGLVL_NONE) {			\
82 			if (DEBUG_FLAGS & DBGFLG_LVLONLY) {		\
83 				if (DEBUG_LEVEL & dbg_level) {		\
84 					call;				\
85 				}					\
86 			} else {					\
87 				if (dbg_level <= DEBUG_LEVEL) {		\
88 					call;				\
89 				}					\
90 			}						\
91 		}							\
92 	}
93 
94 #define	DPRINTF(dbg_level, msg)		DCALL(dbg_level, printf msg)
95 
96 #define	MAJOR(msg)			DPRINTF(DBGLVL_MAJOR, msg)
97 #define	MINOR(msg)			DPRINTF(DBGLVL_MINOR, msg)
98 #define	MINUTE(msg)			DPRINTF(DBGLVL_MINUTE, msg)
99 #define	TRIVIA(msg)			DPRINTF(DBGLVL_TRIVIA, msg)
100 #define	HIDEOUS(msg)			DPRINTF(DBGLVL_HIDEOUS, msg)
101 
102 #else	/* !DEBUG */
103 
104 #define	DCALL(ignored_dbg_level, ignored_routine)
105 #define	MAJOR(ignored)
106 #define	MINOR(ignored)
107 #define	MINUTE(ignored)
108 #define	TRIVIA(ignored)
109 #define	HIDEOUS(ignored)
110 
111 #endif /* DEBUG */
112 
113 #define	NULLSTR(str)	(!(str) || *(str) == '\0'? "<null>" : (str))
114 #define	NULSTRING	""
115 
116 /* somewhat arbitrary limits, in seconds */
117 /* all probably ought to be different, but these are convenient for debugging */
118 const time_t	UF_TOO_LONG		= 128;	/* max. wait for fsck start */
119 
120 /* all of these are in units of seconds used for retry period while ... */
121 const time_t	UF_FIXSTART_PERIOD	= 16;	/* awaiting fsck start */
122 const time_t	UF_FIXPOLL_PERIOD	= 256;	/* awaiting fsck finish */
123 const time_t	UF_SHORT_ERROR_PERIOD	= 4;	/* after (lockfs) error */
124 const time_t	UF_LONG_ERROR_PERIOD	= 512;	/* after (lockfs) error */
125 
126 #define	NO_ERROR		0
127 #define	LOCKFS_OLOCK		LOCKFS_MAXLOCK+1
128 
129 const ulong_t	GB			= 1024 * 1024 * 1024;
130 const ulong_t	SecondsPerGig		= 1024;	/* ~17 minutes (overestimate) */
131 
132 /*
133  * per filesystem flags
134  */
135 const int	UFSFX_PANIC		= (UFSMNT_ONERROR_PANIC >> 4);
136 const int	UFSFX_LCKONLY		= (UFSMNT_ONERROR_LOCK >> 4);
137 const int	UFSFX_LCKUMOUNT		= (UFSMNT_ONERROR_UMOUNT >> 4);
138 const int	UFSFX_DEFAULT		= (UFSMNT_ONERROR_DEFAULT >> 4);
139 const int	UFSFX_REPAIR_START	= 0x10000000;
140 
141 /* return protocols */
142 
143 typedef enum triage_return_code {
144 	TRIAGE_DEAD = -1,
145 	TRIAGE_NO_SPIRIT,
146 	TRIAGE_ATTEND_TO
147 } triage_t;
148 
149 typedef enum statefunc_return_code {
150 	SFRC_SUCCESS = 1,
151 	SFRC_FAIL = 0
152 } sfrc_t;
153 
154 /* external references */
155 /* in ufs_thread.c */
156 extern int	ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop);
157 extern int	ufs_checkaccton(vnode_t *);		/* in ufs_lockfs.c */
158 extern int	ufs_checkswapon(vnode_t *);		/* in ufs_lockfs.c */
159 
160 extern struct pollhead		ufs_pollhd;		/* in ufs_vnops.c */
161 
162 /* globals */
163 struct	ufs_q	 ufs_fix;
164 
165 /*
166  * patchable constants:
167  *   These are set in ufsfx_init() [called at modload]
168  */
169 struct ufs_failure_tunable {
170 	long	 uft_too_long;		/* limit repair startup time */
171 	long	 uft_fixstart_period;	/* pre-repair start period */
172 	long	 uft_fixpoll_period;	/* post-fsck start period */
173 	long	 uft_short_err_period;	/* post-error short period */
174 	long	 uft_long_err_period;	/* post-error long period */
175 } ufsfx_tune;
176 
177 /* internal statistics of events */
178 struct uf_statistics {
179 	ulong_t		ufst_lock_violations;
180 	ulong_t		ufst_current_races;
181 	ulong_t		ufst_unmount_failures;
182 	ulong_t		ufst_num_fixed;
183 	ulong_t		ufst_num_failed;
184 	ulong_t		ufst_cpu_waste;
185 	time_t		ufst_last_start_tm;
186 	kmutex_t	ufst_mutex;
187 } uf_stats;
188 
189 typedef enum state_action {
190 	UFA_ERROR = -1,		/* internal error */
191 	UFA_FOUND,		/* found uf in state */
192 	UFA_SET			/* change uf to state */
193 } ufsa_t;
194 
195 /* state definition */
196 typedef struct uf_state_desc {
197 	int	  ud_v;					/* value */
198 	char	 *ud_name;				/* name */
199 	sfrc_t	(*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
200 							/* per-state actions */
201 	ufs_failure_states_t	  ud_prev;		/* valid prev. states */
202 
203 	struct uf_state_desc_attr {
204 		unsigned	terminal:1;	/* no action req. if found */
205 		unsigned	at_fail:1;	/* state set by thread */
206 						/* encountering the error */
207 		unsigned	unused;
208 	} ud_attr;
209 } ufsd_t;
210 
211 /*
212  * forward references
213  */
214 
215 /* thread to watch for failures */
216 static void	ufsfx_thread_fix_failures(void *);
217 static int 	ufsfx_do_failure_q(void);
218 static void	ufsfx_kill_fix_failure_thread(void *);
219 
220 /* routines called when failure occurs */
221 static int		 ufs_fault_v(vnode_t *, char *, va_list)
222 	__KVPRINTFLIKE(2);
223 static ufs_failure_t	*init_failure(vnode_t *, char *, va_list)
224 	__KVPRINTFLIKE(2);
225 static void		 queue_failure(ufs_failure_t *);
226 /*PRINTFLIKE2*/
227 static void		 real_panic(ufs_failure_t *, const char *, ...)
228 	__KPRINTFLIKE(2);
229 static void		 real_panic_v(ufs_failure_t *, const char *, va_list)
230 	__KVPRINTFLIKE(2);
231 static triage_t		 triage(vnode_t *);
232 
233 /* routines called when failure record is acted upon */
234 static sfrc_t	set_state(ufs_failure_t *, ufs_failure_states_t);
235 static int	state_trans_valid(ufs_failure_states_t, ufs_failure_states_t);
236 static int	terminal_state(ufs_failure_states_t);
237 
238 /* routines called when states entered/found */
239 static sfrc_t	sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
240 static sfrc_t	sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
241 static sfrc_t	sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
242 static sfrc_t	sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
243 static sfrc_t	sf_found_queue(ufs_failure_t *);
244 static sfrc_t	sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
245 static sfrc_t	sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
246 static sfrc_t	sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
247 static sfrc_t	sf_set_trylck(ufs_failure_t *);
248 static sfrc_t	sf_set_locked(ufs_failure_t *);
249 static sfrc_t	sf_found_trylck(ufs_failure_t *);
250 static sfrc_t	sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t);
251 static sfrc_t	sf_found_umount(ufs_failure_t *);
252 
253 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */
254 static time_t 	trylock_time_exceeded(ufs_failure_t *);
255 static void 	pester_msg(ufs_failure_t *, int);
256 static int 	get_lockfs_status(ufs_failure_t *, struct lockfs *);
257 static void 	alloc_lockfs_comment(ufs_failure_t *, struct lockfs *);
258 static int 	set_lockfs(ufs_failure_t *, struct lockfs *);
259 static int 	lockfs_failure(ufs_failure_t *);
260 static int 	lockfs_success(ufs_failure_t *);
261 static int	fsck_active(ufs_failure_t *);
262 
263 /* low-level support routines */
264 static ufsd_t	*get_state_desc(ufs_failure_states_t);
265 static char	*fs_name(ufs_failure_t *);
266 
267 #if defined(DEBUG)
268 static char	*state_name(ufs_failure_states_t);
269 static char	*lock_name(struct lockfs *);
270 static char	*err_name(int);
271 static char	*act_name(ufsa_t);
272 static void	 dump_uf_list(char *msg);
273 static void	 dump_uf(ufs_failure_t *, int i);
274 #endif /* DEBUG */
275 /*
276  *
277  * State Transitions:
278  *
279  * normally:
280  * if flagged to be locked but not unmounted:	(UFSMNT_ONERROR_LOCK)
281  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED
282  *
283  * The only difference between these two is that the fsck must be started
284  * manually.
285  *
286  * if flagged to be unmounted:			(UFSMNT_ONERROR_UMOUNT)
287  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX
288  *
289  * if flagged to panic:				(UFSMNT_ONERROR_PANIC)
290  *	UNDEF -> INIT -> PANIC
291  *
292  * if a secondary panic on a file system which has an active failure
293  * record:
294  *	UNDEF -> INIT -> QUEUE -> REPLICA
295  *
296  * UNDEF, INIT, QUEUE all are set in the context of the failing thread.
297  * All other states (except possibly PANIC) are set in by the monitor
298  * (lock) thread.
299  *
300  */
301 
302 ufsd_t	state_desc[] =
303 {
304 	{ UF_ILLEGAL,	"in an unknown state",	sf_minimum,	UF_ILLEGAL,
305 								{ 0, 1, 0 } },
306 	{ UF_UNDEF,	"undefined",		sf_undef,	UF_UNDEF,
307 								{ 0, 1, 0 } },
308 	{ UF_INIT,	"being initialized",	sf_init,	UF_UNDEF,
309 								{ 0, 1, 0 } },
310 	{ UF_QUEUE,	"queued",		sf_queue,	UF_INIT,
311 								{ 0, 1, 0 } },
312 	{ UF_TRYLCK,	"trying to be locked",	sf_nonterm_cmn,
313 						UF_QUEUE,	{ 0, 0, 0 } },
314 	{ UF_LOCKED,	"locked",		sf_nonterm_cmn,
315 					UF_TRYLCK | UF_FIXING,	{ 0, 0, 0 } },
316 	{ UF_UMOUNT,	"being unmounted",	sf_nonterm_cmn,
317 
318 #if defined(DEBUG)
319 					UF_PANIC |
320 #endif /* DEBUG */
321 					UF_TRYLCK | UF_LOCKED, 	{ 0, 0, 0 } },
322 	{ UF_FIXING,	"being fixed",		sf_nonterm_cmn,
323 						UF_LOCKED,	{ 0, 0, 0 } },
324 	{ UF_FIXED,	"fixed",		sf_term_cmn,
325 						UF_FIXING,	{ 1, 0, 0 } },
326 	{ UF_NOTFIX,	"not fixed",		sf_term_cmn,
327 
328 #if defined(DEBUG)
329 							UF_PANIC |
330 #endif /* DEBUG */
331 
332 	    UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING,
333 								{ 1, 0, 0 } },
334 	{ UF_REPLICA,	"a replica",		sf_term_cmn,
335 						UF_QUEUE,	{ 1, 0, 0 } },
336 	{ UF_PANIC,	"panicking",		sf_panic,
337 		/* XXX make this narrower */	UF_ALLSTATES,	{ 0, 0, 0 } },
338 	{ UF_UNDEF,	NULL,			((sfrc_t (*)()) NULL),
339 						UF_UNDEF, 	{ 0, 0, 0 } }
340 };
341 
342 /* unified collection */
343 struct ufsfx_info {
344 	struct uf_statistics		*ufi_statp;
345 	struct ufs_failure_tunable	*ufi_tunep;
346 	ufsd_t				*ufi_statetab;
347 } uffsinfo;
348 
349 #if defined(DEBUG)
350 struct action_description {
351 	ufsa_t	 ad_v;
352 	char	*ad_name;
353 };
354 
355 #define	EUNK		(-1)
356 
357 struct error_description {
358 	int	 ed_errno;
359 	char	*ed_name;
360 } err_desc[] =
361 {
362 	{ EUNK,		"<unexpected errno?>"	},
363 	{ EINVAL,	"EINVAL"		},
364 	{ EACCES,	"EACCES"		},
365 	{ EPERM,	"EPERM"			},
366 	{ EIO,		"EIO"			},
367 	{ EDEADLK,	"EDEADLK"		},
368 	{ EBUSY,	"EBUSY"			},
369 	{ EAGAIN,	"EAGAIN"		},
370 	{ ERESTART,	"ERESTART"		},
371 	{ ETIMEDOUT,	"ETIMEDOUT"		},
372 	{ NO_ERROR,	"Ok"			},
373 	{ EUNK,		NULL 			}
374 };
375 
376 struct action_description act_desc[] =
377 {
378 	{ UFA_ERROR,	"<unexpected action?>"	},
379 	{ UFA_FOUND,	"\"found\""	},
380 	{ UFA_SET,	"\"set\""	},
381 	{ UFA_ERROR,	NULL			},
382 };
383 
384 #define	LOCKFS_BADLOCK	(-1)
385 
386 struct lock_description {
387 	int	 ld_type;
388 	char	*ld_name;
389 } lock_desc[] =
390 {
391 	{ LOCKFS_BADLOCK,	"<unexpected lock?>"	},
392 	{ LOCKFS_ULOCK,		"Unlock"		},
393 	{ LOCKFS_ELOCK,		"Error Lock"		},
394 	{ LOCKFS_HLOCK,		"Hard Lock"		},
395 	{ LOCKFS_OLOCK,		"Old Lock"		},
396 	{ LOCKFS_BADLOCK,	NULL			}
397 };
398 
399 #endif /* DEBUG */
400 
401 /*
402  * ufs_fault, ufs_fault_v
403  *
404  *  called instead of cmn_err(CE_PANIC, ...) by ufs routines
405  *  when a failure is detected to put the file system into an
406  *  error state (if possible) or to devolve to a panic otherwise
407  *
408  * vnode is some vnode in this file system, used to find the way
409  * to ufsvfs, vfsp etc.  Since a panic can be called from many
410  * levels, the vnode is the most convenient hook to pass through.
411  *
412  */
413 
414 /*PRINTFLIKE2*/
415 int
ufs_fault(vnode_t * vp,char * fmt,...)416 ufs_fault(vnode_t *vp, char *fmt, ...)
417 {
418 	va_list	adx;
419 	int	error;
420 
421 	MINOR(("[ufs_fault"));
422 
423 	va_start(adx, fmt);
424 	error = ufs_fault_v(vp, fmt, adx);
425 	va_end(adx);
426 
427 	MINOR((": %s (%d)]\n", err_name(error), error));
428 	return (error);
429 }
430 
431 const char *nullfmt = "<null format?>";
432 
433 static int
ufs_fault_v(vnode_t * vp,char * fmt,va_list adx)434 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx)
435 {
436 	ufs_failure_t		*new = NULL;
437 	ufsvfs_t		*ufsvfsp;
438 	triage_t		 fix;
439 	int			 err = ERESTART;
440 	int			need_vfslock;
441 
442 	MINOR(("[ufs_fault_v"));
443 
444 	if (fmt == NULL)
445 		fmt = (char *)nullfmt;
446 
447 	fix = triage(vp);
448 
449 	if (vp) {
450 		ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data;
451 
452 		/*
453 		 * Something bad has happened. That is why we are here.
454 		 *
455 		 * In order for the bad thing to be recorded in the superblock
456 		 * we need to write to the superblock directly.
457 		 * In the case that logging is enabled the logging code
458 		 * would normally intercept our write as a delta to the log,
459 		 * thus we mark the filesystem FSBAD in any case.
460 		 */
461 		need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
462 
463 		if (need_vfslock) {
464 			mutex_enter(&ufsvfsp->vfs_lock);
465 		}
466 
467 		ufsvfsp->vfs_fs->fs_clean = FSBAD;
468 		ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem));
469 		ufsvfsp->vfs_bufp->b_flags &=
470 		    ~(B_ASYNC | B_READ | B_DONE | B_ERROR | B_DELWRI);
471 
472 		(void) bdev_strategy(ufsvfsp->vfs_bufp);
473 		(void) biowait(ufsvfsp->vfs_bufp);
474 
475 		if (need_vfslock) {
476 			mutex_exit(&ufsvfsp->vfs_lock);
477 		}
478 	}
479 
480 	switch (fix) {
481 
482 	default:
483 	case TRIAGE_DEAD:
484 	case TRIAGE_NO_SPIRIT:
485 
486 		real_panic_v(new, fmt, adx);
487 		/* LINTED: warning: logical expression always true: op "||" */
488 		ASSERT(DEBUG);
489 		err = EAGAIN;
490 
491 #if defined(DEBUG)
492 		if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) {
493 			break;
494 		}
495 		/* FALLTHROUGH */
496 
497 #else
498 		break;
499 
500 #endif /* DEBUG */
501 
502 	case TRIAGE_ATTEND_TO:
503 
504 		/* q thread not running yet? */
505 		if (mutex_tryenter(&ufs_fix.uq_mutex)) {
506 			if (!ufs_fix.uq_threadp) {
507 				mutex_exit(&ufs_fix.uq_mutex);
508 				ufs_thread_start(&ufs_fix,
509 				    ufsfx_thread_fix_failures, NULL);
510 				ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK;
511 				mutex_enter(&ufs_fix.uq_mutex);
512 			} else {
513 				/*
514 				 * We got the lock but we are not the current
515 				 * threadp so we have to release the lock.
516 				 */
517 				mutex_exit(&ufs_fix.uq_mutex);
518 			}
519 		} else {
520 			MINOR((": fix failure thread already running "));
521 			/*
522 			 * No need to log another failure as one is already
523 			 * being logged.
524 			 */
525 			break;
526 		}
527 
528 		if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) {
529 			mutex_exit(&ufs_fix.uq_mutex);
530 			cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault");
531 		} else {
532 			/*
533 			 * Must check if we actually still own the lock and
534 			 * if so then release the lock and move on with life.
535 			 */
536 			if (mutex_owner(&ufs_fix.uq_mutex) == curthread)
537 				mutex_exit(&ufs_fix.uq_mutex);
538 		}
539 
540 		new = init_failure(vp, fmt, adx);
541 		if (new != NULL) {
542 			queue_failure(new);
543 			break;
544 		}
545 		real_panic_v(new, fmt, adx);
546 		break;
547 
548 	}
549 	MINOR(("] "));
550 	return (err);
551 }
552 
553 /*
554  * triage()
555  *
556  *  Attempt to fix iff:
557  *    - the system is not already panicking
558  *    - this file system isn't explicitly marked not to be fixed
559  *    - we can connect to the user-level daemon
560  * These conditions are detectable later, but if we can determine
561  * them in the failing threads context the core dump may be more
562  * useful.
563  *
564  */
565 
566 static triage_t
triage(vnode_t * vp)567 triage(vnode_t *vp)
568 {
569 	struct inode	 *ip;
570 	int		  need_unlock_vfs;
571 	int		  fs_flags;
572 
573 	MINUTE(("[triage"));
574 
575 	if (panicstr) {
576 		MINUTE((
577 		": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr));
578 		return (TRIAGE_DEAD);
579 	}
580 
581 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) {
582 		MINUTE((
583 	": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n"));
584 		return (TRIAGE_DEAD);
585 	}
586 
587 	/* use tryenter and continue no matter what since we're panicky */
588 	need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
589 	if (need_unlock_vfs)
590 		need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock);
591 
592 	fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags;
593 	if (need_unlock_vfs)
594 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
595 
596 	if (fs_flags & UFSFX_PANIC) {
597 		MINUTE((
598 		": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n"));
599 		return (TRIAGE_NO_SPIRIT);
600 	}
601 
602 	if (ufs_checkaccton(vp) != 0) {
603 		MINUTE((
604 		": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n"));
605 		return (TRIAGE_DEAD);
606 	}
607 
608 	if (ufs_checkswapon(vp) != 0) {
609 		MINUTE((
610 		": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n"));
611 		return (TRIAGE_DEAD);
612 	}
613 
614 	MINUTE((": return TRIAGE_ATTEND_TO] "));
615 	return (TRIAGE_ATTEND_TO);
616 }
617 
618 /*
619  * init failure
620  *
621  * This routine allocates a failure struct and initializes
622  * it's member elements.
623  * Space is allocated for copies of dynamic identifying fs structures
624  * passed in.  Without a much more segmented kernel architecture
625  * this is as protected as we can make it (for now.)
626  */
627 static ufs_failure_t *
init_failure(vnode_t * vp,char * fmt,va_list adx)628 init_failure(vnode_t *vp, char *fmt, va_list adx)
629 {
630 	ufs_failure_t	*new;
631 	struct inode	*ip;
632 	int		 initialization_worked = 0;
633 	int		 need_vfs_unlock;
634 
635 	MINOR(("[init_failure"));
636 
637 	new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP);
638 	if (!new) {
639 		MINOR((": kmem_zalloc failed]\n"));
640 		return (NULL);
641 	}
642 
643 	/*
644 	 * enough information to make a fix attempt possible?
645 	 */
646 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp ||
647 	    !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt)
648 		goto errout;
649 
650 	if (vp->v_type != VREG && vp->v_type != VDIR &&
651 	    vp->v_type != VBLK && vp->v_type != VCHR &&
652 	    vp->v_type != VLNK && vp->v_type != VFIFO &&
653 	    vp->v_type != VSOCK)
654 		goto errout;
655 
656 	if (ip->i_ufsvfs->vfs_root->v_type != VREG &&
657 	    ip->i_ufsvfs->vfs_root->v_type != VDIR &&
658 	    ip->i_ufsvfs->vfs_root->v_type != VBLK &&
659 	    ip->i_ufsvfs->vfs_root->v_type != VCHR &&
660 	    ip->i_ufsvfs->vfs_root->v_type != VLNK &&
661 	    ip->i_ufsvfs->vfs_root->v_type != VFIFO &&
662 	    ip->i_ufsvfs->vfs_root->v_type != VSOCK)
663 		goto errout;
664 
665 	if ((ITOF(ip)->fs_magic != FS_MAGIC) &&
666 	    (ITOF(ip)->fs_magic != MTB_UFS_MAGIC))
667 		goto errout;
668 
669 	/* intialize values */
670 
671 	(void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx);
672 
673 	new->uf_ufsvfsp = ip->i_ufsvfs;
674 	new->uf_vfsp    = ip->i_vfs;
675 
676 	mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL);
677 	need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
678 
679 	if (need_vfs_unlock) {
680 		if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) {
681 			/*
682 			 * not much alternative here, but we're panicking
683 			 * already, it couldn't be worse - so just
684 			 * proceed optimistically and take note.
685 			 */
686 			mutex_enter(&uf_stats.ufst_mutex);
687 			uf_stats.ufst_lock_violations++;
688 			mutex_exit(&uf_stats.ufst_mutex);
689 			MINOR((": couldn't get vfs lock"))
690 			need_vfs_unlock = 0;
691 		}
692 	}
693 
694 	if (mutex_tryenter(&new->uf_mutex)) {
695 		initialization_worked = set_state(new, UF_INIT);
696 		mutex_exit(&new->uf_mutex);
697 	}
698 
699 	if (need_vfs_unlock)
700 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
701 
702 	if (initialization_worked) {
703 		MINOR(("] "));
704 		return (new);
705 	}
706 	/* FALLTHROUGH */
707 
708 errout:
709 	if (new)
710 		kmem_free(new, sizeof (ufs_failure_t));
711 	MINOR((": failed]\n"));
712 	return (NULL);
713 }
714 
715 static void
queue_failure(ufs_failure_t * new)716 queue_failure(ufs_failure_t *new)
717 {
718 	MINOR(("[queue_failure"));
719 
720 	mutex_enter(&ufs_fix.uq_mutex);
721 
722 	if (ufs_fix.uq_ufhead)
723 		insque(new, &ufs_fix.uq_ufhead);
724 	else
725 		ufs_fix.uq_ufhead = new;
726 
727 	if (mutex_tryenter(&new->uf_mutex)) {
728 		(void) set_state(new, UF_QUEUE);
729 		mutex_exit(&new->uf_mutex);
730 	}
731 
732 	mutex_enter(&uf_stats.ufst_mutex);		/* force wakeup */
733 	ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed;
734 	mutex_exit(&uf_stats.ufst_mutex);
735 
736 	cv_broadcast(&ufs_fix.uq_cv);
737 
738 	DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str ?
739 	    new->uf_panic_str : "queue_failure: NULL panic str?"));
740 	mutex_exit(&ufs_fix.uq_mutex);
741 
742 	MINOR(("] "));
743 }
744 
745 /*PRINTFLIKE2*/
746 static void
real_panic(ufs_failure_t * f,const char * fmt,...)747 real_panic(ufs_failure_t *f, const char *fmt, ...)
748 {
749 	va_list	adx;
750 
751 	MINUTE(("[real_panic "));
752 
753 	va_start(adx, fmt);
754 	real_panic_v(f, fmt, adx);
755 	va_end(adx);
756 
757 	MINUTE((": return?!]\n"));
758 }
759 
760 static void
real_panic_v(ufs_failure_t * f,const char * fmt,va_list adx)761 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx)
762 {
763 	int seriousness = CE_PANIC;
764 	int need_unlock;
765 
766 	MINUTE(("[real_panic_v "));
767 
768 	if (f && f->uf_ufsvfsp)
769 		TRANS_SETERROR(f->uf_ufsvfsp);
770 
771 #if defined(DEBUG)
772 	if (DEBUG_FLAGS & DBGFLG_NOPANIC) {
773 		seriousness = CE_WARN;
774 		cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n");
775 	}
776 #endif /* DEBUG */
777 
778 	delay(hz >> 1);			/* allow previous warnings to get out */
779 
780 	if (!f && fmt)
781 		vcmn_err(seriousness, fmt, adx);
782 	else
783 		cmn_err(seriousness, f && f->uf_panic_str? f->uf_panic_str:
784 		    "real_panic: <unknown panic?>");
785 
786 	if (f) {
787 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
788 		if (need_unlock) {
789 			mutex_enter(&f->uf_mutex);
790 		}
791 
792 		f->uf_retry = -1;
793 		(void) set_state(f, UF_PANIC);
794 
795 		if (need_unlock) {
796 			mutex_exit(&f->uf_mutex);
797 		}
798 	}
799 	MINUTE((": return?!]\n"));
800 }
801 
802 /*
803  * initializes ufs panic structs, locks, etc
804  */
805 void
ufsfx_init(void)806 ufsfx_init(void)
807 {
808 
809 	MINUTE(("[ufsfx_init"));
810 
811 	/* patchable; unchanged while running, so no lock is needed */
812 	ufsfx_tune.uft_too_long		= UF_TOO_LONG;
813 	ufsfx_tune.uft_fixstart_period	= UF_FIXSTART_PERIOD;
814 	ufsfx_tune.uft_fixpoll_period	= UF_FIXPOLL_PERIOD;
815 	ufsfx_tune.uft_short_err_period	= UF_SHORT_ERROR_PERIOD;
816 	ufsfx_tune.uft_long_err_period	= UF_LONG_ERROR_PERIOD;
817 
818 	uffsinfo.ufi_statp	= &uf_stats;
819 	uffsinfo.ufi_tunep	= &ufsfx_tune;
820 	uffsinfo.ufi_statetab	= &state_desc[0];
821 
822 	mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL);
823 	ufs_thread_init(&ufs_fix, /* maxne */ 1);
824 
825 	MINUTE(("] "));
826 }
827 
828 /*
829  * initializes per-ufs values
830  * returns 0 (ok) or errno
831  */
832 int
ufsfx_mount(struct ufsvfs * ufsvfsp,int flags)833 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags)
834 {
835 	MINUTE(("[ufsfx_mount (%d)", flags));
836 	/* don't check/need vfs_lock because it's still being initialized */
837 
838 	ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4;
839 
840 	MINUTE((": %s: fx_flags:%ld,",
841 	    ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags));
842 	/*
843 	 *	onerror={panic ^ lock only ^ unmount}
844 	 */
845 
846 	if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) {
847 		MINUTE((" PANIC"));
848 
849 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) {
850 		MINUTE((" LCKONLY"));
851 
852 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) {
853 		MINUTE((" LCKUMOUNT"));
854 
855 	} else {
856 		ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT;
857 		ASSERT(ufsvfsp->vfs_fsfx.fx_flags &
858 		    (UFSMNT_ONERROR_FLGMASK >> 4));
859 		MINUTE((" DEFAULT"));
860 	}
861 
862 	pollwakeup(&ufs_pollhd, POLLPRI);
863 	MINUTE(("]\n"));
864 	return (0);
865 }
866 
867 /*
868  * ufsfx_unmount
869  *
870  * called during unmount
871  */
872 void
ufsfx_unmount(struct ufsvfs * ufsvfsp)873 ufsfx_unmount(struct ufsvfs *ufsvfsp)
874 {
875 	ufs_failure_t	*f;
876 	int		 must_unlock_list;
877 
878 	MINUTE(("[ufsfx_unmount"));
879 
880 	if (!ufsvfsp) {
881 		MINUTE((": no ufsvfsp]"));
882 		return;
883 	}
884 
885 	if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0)
886 		mutex_enter(&ufs_fix.uq_mutex);
887 
888 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
889 		int must_unlock_failure;
890 
891 		must_unlock_failure = !MUTEX_HELD(&f->uf_mutex);
892 		if (must_unlock_failure) {
893 			mutex_enter(&f->uf_mutex);
894 		}
895 
896 		if (f->uf_ufsvfsp == ufsvfsp) {
897 
898 			/*
899 			 * if we owned the failure record lock, then this
900 			 * is probably a fix failure-triggered unmount, so
901 			 * the warning is not appropriate or needed
902 			 */
903 
904 			/* XXX if rebooting don't print this? */
905 			if (!terminal_state(f->uf_s) && must_unlock_failure) {
906 				cmn_err(CE_WARN,
907 				    "Unmounting %s while error-locked",
908 				    fs_name(f));
909 			}
910 
911 			f->uf_ufsvfsp		= NULL;
912 			f->uf_vfs_ufsfxp	= NULL;
913 			f->uf_vfs_lockp		= NULL;
914 			f->uf_bp		= NULL;
915 			f->uf_vfsp		= NULL;
916 			f->uf_retry		= -1;
917 		}
918 
919 		if (must_unlock_failure)
920 			mutex_exit(&f->uf_mutex);
921 	}
922 	if (must_unlock_list)
923 		mutex_exit(&ufs_fix.uq_mutex);
924 
925 	pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP);
926 	MINUTE(("] "));
927 }
928 
929 /*
930  * ufsfx_(un)lockfs
931  *
932  * provides hook from lockfs code so we can recognize unlock/relock
933  *  This is called after it is certain that the (un)lock will succeed.
934  */
935 void
ufsfx_unlockfs(struct ufsvfs * ufsvfsp)936 ufsfx_unlockfs(struct ufsvfs *ufsvfsp)
937 {
938 	ufs_failure_t	*f;
939 	int		 need_unlock;
940 	int		 need_unlock_list;
941 	int		 informed = 0;
942 
943 	MINUTE(("[ufsfx_unlockfs"));
944 
945 	if (!ufsvfsp)
946 		return;
947 
948 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
949 
950 	if (need_unlock_list)
951 		mutex_enter(&ufs_fix.uq_mutex);
952 
953 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
954 
955 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
956 		if (need_unlock)
957 			mutex_enter(&f->uf_mutex);
958 
959 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) {
960 			if (!(f->uf_s & UF_FIXING)) {
961 				/*
962 				 * This might happen if we don't notice that
963 				 * the fs gets marked FSFIX before it is
964 				 * marked FSCLEAN, as might occur if the
965 				 * the superblock was hammered directly.
966 				 */
967 				if (!informed) {
968 					informed = 1;
969 					cmn_err(CE_NOTE,
970 					    "Unlock of %s succeeded before "
971 					    "fs_clean marked FSFIX?",
972 					    fs_name(f));
973 				}
974 
975 				/*
976 				 * pass through fixing state so
977 				 * transition protocol is satisfied
978 				 */
979 				if (!set_state(f, UF_FIXING)) {
980 					MINUTE((": failed] "));
981 				}
982 			}
983 
984 			if (!set_state(f, UF_FIXED)) {
985 				/* it's already fixed, so don't panic now */
986 				MINUTE((": failed] "));
987 			}
988 		}
989 
990 		if (need_unlock)
991 			mutex_exit(&f->uf_mutex);
992 	}
993 	if (need_unlock_list)
994 		mutex_exit(&ufs_fix.uq_mutex);
995 	MINUTE(("] "));
996 }
997 
998 void
ufsfx_lockfs(struct ufsvfs * ufsvfsp)999 ufsfx_lockfs(struct ufsvfs *ufsvfsp)
1000 {
1001 	ufs_failure_t	*f;
1002 	int		 need_unlock;
1003 	int		 need_unlock_list;
1004 
1005 	MINUTE(("[ufsfx_lockfs"));
1006 
1007 	if (!ufsvfsp)
1008 		return;
1009 
1010 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
1011 
1012 	if (need_unlock_list)
1013 		mutex_enter(&ufs_fix.uq_mutex);
1014 
1015 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1016 
1017 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
1018 		if (need_unlock)
1019 			mutex_enter(&f->uf_mutex);
1020 
1021 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) &&
1022 		    f->uf_s != UF_PANIC) {
1023 			switch (f->uf_s) {
1024 
1025 			default:
1026 				cmn_err(CE_WARN,
1027 				    "fs %s not in state "
1028 				    "UF_TRYLCK, UF_LOCKED or UF_FIXING",
1029 				    fs_name(f));
1030 				break;
1031 
1032 			case UF_TRYLCK:
1033 				if (!set_state(f, UF_LOCKED)) {
1034 					MINUTE((": failed] "));
1035 				}
1036 				break;
1037 
1038 			case UF_LOCKED:
1039 				if (!set_state(f, UF_FIXING)) {
1040 					MINUTE((": failed] "));
1041 				}
1042 				break;
1043 
1044 			case UF_FIXING:
1045 				break;
1046 
1047 			}
1048 		}
1049 
1050 		if (need_unlock)
1051 			mutex_exit(&f->uf_mutex);
1052 	}
1053 	if (need_unlock_list)
1054 		mutex_exit(&ufs_fix.uq_mutex);
1055 
1056 	MINUTE(("] "));
1057 }
1058 
1059 /*
1060  * error lock, trigger fsck and unlock those fs with failures
1061  * blatantly copied from the hlock routine, although this routine
1062  * triggers differently in order to use uq_ne as meaningful data.
1063  */
1064 /* ARGSUSED */
1065 void
ufsfx_thread_fix_failures(void * ignored)1066 ufsfx_thread_fix_failures(void *ignored)
1067 {
1068 	int		retry;
1069 	callb_cpr_t	cprinfo;
1070 
1071 	CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr,
1072 	    "ufsfixfail");
1073 
1074 	MINUTE(("[ufsfx_thread_fix_failures] "));
1075 
1076 	for (;;) {
1077 		/* sleep until there is work to do */
1078 
1079 		mutex_enter(&ufs_fix.uq_mutex);
1080 		(void) ufs_thread_run(&ufs_fix, &cprinfo);
1081 		ufs_fix.uq_ne = 0;
1082 		mutex_exit(&ufs_fix.uq_mutex);
1083 
1084 		/* process failures on our q */
1085 		do {
1086 			retry = ufsfx_do_failure_q();
1087 			if (retry) {
1088 				mutex_enter(&ufs_fix.uq_mutex);
1089 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
1090 				(void) cv_reltimedwait(&ufs_fix.uq_cv,
1091 				    &ufs_fix.uq_mutex, (hz * retry),
1092 				    TR_CLOCK_TICK);
1093 				CALLB_CPR_SAFE_END(&cprinfo,
1094 				    &ufs_fix.uq_mutex);
1095 				mutex_exit(&ufs_fix.uq_mutex);
1096 			}
1097 		} while (retry);
1098 	}
1099 	/* NOTREACHED */
1100 }
1101 
1102 
1103 /*
1104  * watch for fix-on-panic work
1105  *
1106  * returns # of seconds to sleep before trying again
1107  * and zero if no retry is needed
1108  */
1109 
1110 int
ufsfx_do_failure_q(void)1111 ufsfx_do_failure_q(void)
1112 {
1113 	ufs_failure_t	*f;
1114 	long		 retry = 1;
1115 	ufsd_t		*s;
1116 
1117 	MAJOR(("[ufsfx_do_failure_q"));
1118 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1119 
1120 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
1121 		return (retry);
1122 
1123 	retry = 0;
1124 rescan_q:
1125 
1126 	/*
1127 	 * walk down failure list
1128 	 *  depending on state of each failure, do whatever
1129 	 *  is appropriate to move it to the next state
1130 	 *  taking note of whether retry gets set
1131 	 *
1132 	 * retry protocol:
1133 	 * wakeup in shortest required time for any failure
1134 	 *   retry == 0; nothing more to do (terminal state)
1135 	 *   retry < 0; reprocess queue immediately, retry will
1136 	 *		be abs(retry) for the next cycle
1137 	 *   retry > 0; schedule wakeup for retry seconds
1138 	 */
1139 
1140 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1141 
1142 		if (!mutex_tryenter(&f->uf_mutex)) {
1143 			retry = 1;
1144 			continue;
1145 		}
1146 		s = get_state_desc(f->uf_s);
1147 
1148 		MINOR((": found%s: %s, \"%s: %s\"\n",
1149 		    s->ud_attr.terminal ? " old" : "",
1150 		    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
1151 
1152 		if (s->ud_attr.terminal) {
1153 			mutex_exit(&f->uf_mutex);
1154 			continue;
1155 		}
1156 
1157 		if (s->ud_sfp)
1158 			(*s->ud_sfp)(f, UFA_FOUND, f->uf_s);
1159 
1160 		ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0);
1161 
1162 		if (f->uf_retry != 0) {
1163 			if (retry > f->uf_retry || retry == 0)
1164 				retry = f->uf_retry;
1165 			if (f->uf_retry < 0)
1166 				f->uf_retry = abs(f->uf_retry);
1167 		}
1168 		mutex_exit(&f->uf_mutex);
1169 	}
1170 
1171 
1172 	if (retry < 0) {
1173 		retry = abs(retry);
1174 		goto rescan_q;
1175 	}
1176 
1177 	mutex_exit(&ufs_fix.uq_mutex);
1178 
1179 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1180 	MAJOR((": retry=%ld, good night]\n\n", retry));
1181 
1182 	return (retry);
1183 }
1184 
1185 static void
pester_msg(ufs_failure_t * f,int seriousness)1186 pester_msg(ufs_failure_t *f, int seriousness)
1187 {
1188 	MINUTE(("[pester_msg"));
1189 	ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING));
1190 
1191 	/*
1192 	 * XXX if seems too long for this fs, poke administrator
1193 	 * XXX to run fsck manually (and change retry time?)
1194 	 */
1195 	cmn_err(seriousness, "Waiting for repair of %s to %s",
1196 	    fs_name(f), f->uf_s & UF_LOCKED ? "start" : "finish");
1197 	MINUTE(("]"));
1198 }
1199 
1200 static time_t
trylock_time_exceeded(ufs_failure_t * f)1201 trylock_time_exceeded(ufs_failure_t *f)
1202 {
1203 	time_t		toolong;
1204 	extern time_t	time;
1205 
1206 	MINUTE(("[trylock_time_exceeded"));
1207 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1208 
1209 	toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm;
1210 	if (time > toolong)
1211 		cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f));
1212 
1213 	MINUTE(("] "));
1214 	return (time <= toolong? 0: time - toolong);
1215 }
1216 
1217 static int
get_lockfs_status(ufs_failure_t * f,struct lockfs * lfp)1218 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp)
1219 {
1220 	MINUTE(("[get_lockfs_status"));
1221 
1222 	if (!f->uf_ufsvfsp) {
1223 		MINUTE((": ufsvfsp is NULL]\n"));
1224 		return (0);
1225 	}
1226 
1227 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1228 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1229 	ASSERT(!vfs_lock_held(f->uf_vfsp));
1230 	ASSERT(f->uf_ufsvfsp->vfs_root != NULL);
1231 
1232 	f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp);
1233 
1234 	if (f->uf_lf_err) {
1235 		f->uf_retry = ufsfx_tune.uft_short_err_period;
1236 	}
1237 
1238 	MINUTE(("] "));
1239 	return (1);
1240 }
1241 
1242 static sfrc_t
set_state(ufs_failure_t * f,ufs_failure_states_t new_state)1243 set_state(ufs_failure_t *f, ufs_failure_states_t new_state)
1244 {
1245 	ufsd_t		*s;
1246 	sfrc_t		 sfrc = SFRC_FAIL;
1247 	int		 need_unlock;
1248 	extern time_t	 time;
1249 
1250 	HIDEOUS(("[set_state: new state:%s", state_name(new_state)));
1251 	ASSERT(f);
1252 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1253 
1254 	/*
1255 	 * if someone else is panicking, just let panic sync proceed
1256 	 */
1257 	if (panicstr) {
1258 		(void) set_state(f, UF_NOTFIX);
1259 		HIDEOUS((": state reset: not fixed] "));
1260 		return (sfrc);
1261 	}
1262 
1263 	/*
1264 	 * bad state transition, an internal error
1265 	 */
1266 	if (!state_trans_valid(f->uf_s, new_state)) {
1267 		/* recursion */
1268 		if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC))
1269 			(void) set_state(f, UF_PANIC);
1270 		MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ",
1271 		    state_name(f->uf_s), state_name(new_state)));
1272 		return (sfrc);
1273 	}
1274 
1275 	s = get_state_desc(new_state);
1276 
1277 	need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex);
1278 	if (need_unlock)
1279 		mutex_enter(&ufs_fix.uq_mutex);
1280 
1281 	if (s->ud_attr.at_fail && ufs_fix.uq_threadp &&
1282 	    curthread == ufs_fix.uq_threadp) {
1283 		cmn_err(CE_WARN, "set_state: probable recursive panic of %s",
1284 		    fs_name(f));
1285 	}
1286 	if (need_unlock)
1287 		mutex_exit(&ufs_fix.uq_mutex);
1288 
1289 	/* NULL state functions always succeed */
1290 	sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state);
1291 
1292 	if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) {
1293 		f->uf_s = new_state;
1294 		f->uf_entered_tm = time;
1295 		f->uf_counter = 0;
1296 	}
1297 
1298 	HIDEOUS(("]\n"));
1299 	return (sfrc);
1300 }
1301 
1302 static ufsd_t *
get_state_desc(ufs_failure_states_t state)1303 get_state_desc(ufs_failure_states_t state)
1304 {
1305 	ufsd_t *s;
1306 
1307 	HIDEOUS(("[get_state_desc"));
1308 
1309 	for (s = &state_desc[1]; s->ud_name != NULL; s++) {
1310 		if (s->ud_v == state) {
1311 			HIDEOUS(("] "));
1312 			return (s);
1313 		}
1314 	}
1315 
1316 	HIDEOUS(("] "));
1317 	return (&state_desc[0]);	/* default */
1318 }
1319 
1320 static sfrc_t
sf_undef(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1321 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1322 {
1323 	sfrc_t rc;
1324 
1325 	TRIVIA(("[sf_undef, action is %s, state is %s\n",
1326 	    act_name(a), state_name(s)));
1327 	ASSERT(s == UF_UNDEF);
1328 
1329 	/* shouldn't find null failure records or ever set one */
1330 	rc = set_state(f, UF_NOTFIX);
1331 
1332 	TRIVIA(("] "));
1333 	return (rc);
1334 }
1335 
1336 
1337 static sfrc_t
sf_init(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1338 sf_init(
1339 	ufs_failure_t	*f,
1340 	ufsa_t	 a,
1341 	ufs_failure_states_t	 s)
1342 {
1343 	sfrc_t		rc = SFRC_FAIL;
1344 	extern time_t	time;
1345 
1346 	TRIVIA(("[sf_init, action is %s", act_name(a)));
1347 	ASSERT(s & UF_INIT);
1348 
1349 	switch (a) {
1350 	case UFA_SET:
1351 		f->uf_begin_tm = time;
1352 		f->uf_retry = 1;
1353 		if (!f->uf_ufsvfsp) {
1354 			(void) set_state(f, UF_PANIC);
1355 			TRIVIA((": NULL ufsvfsp]\n"));
1356 			return (rc);
1357 		}
1358 		/*
1359 		 * because we can call panic from many different levels,
1360 		 * we can't be sure that we've got the vfs_lock at this
1361 		 * point.  However, there's not much alternative and if
1362 		 * we don't (have the lock) the worst case is we'll just
1363 		 * panic again
1364 		 */
1365 		f->uf_vfs_lockp		= &f->uf_ufsvfsp->vfs_lock;
1366 		f->uf_vfs_ufsfxp	= &f->uf_ufsvfsp->vfs_fsfx;
1367 
1368 		if (!f->uf_ufsvfsp->vfs_bufp) {
1369 			(void) set_state(f, UF_PANIC);
1370 			TRIVIA((": NULL vfs_bufp]\n"));
1371 			return (rc);
1372 		}
1373 		f->uf_bp = f->uf_ufsvfsp->vfs_bufp;
1374 
1375 		if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) {
1376 			(void) set_state(f, UF_PANIC);
1377 			TRIVIA((": NULL vfs_fs]\n"));
1378 			return (rc);
1379 		}
1380 
1381 		/* vfs_fs = vfs_bufp->b_un.b_fs */
1382 		bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN);
1383 
1384 		f->uf_lf.lf_lock  = LOCKFS_ELOCK;	/* primer */
1385 
1386 		if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) {
1387 			(void) set_state(f, UF_PANIC);
1388 			TRIVIA((": NULL vfsp or vfs_dev == NODEV"));
1389 			return (rc);
1390 		}
1391 		f->uf_dev = f->uf_vfsp->vfs_dev;
1392 
1393 		rc = SFRC_SUCCESS;
1394 		break;
1395 
1396 	case UFA_FOUND:
1397 	default:
1398 		/* failures marked init shouldn't even be on the queue yet */
1399 		rc = set_state(f, UF_QUEUE);
1400 		TRIVIA((": found failure with state init]\n"));
1401 	}
1402 
1403 	TRIVIA(("] "));
1404 	return (rc);
1405 }
1406 
1407 static sfrc_t
sf_queue(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1408 sf_queue(
1409 	ufs_failure_t	*f,
1410 	ufsa_t	 a,
1411 	ufs_failure_states_t	 s)
1412 {
1413 	sfrc_t		rc = SFRC_FAIL;
1414 
1415 	TRIVIA(("[sf_queue, action is %s", act_name(a)));
1416 	ASSERT(s & UF_QUEUE);
1417 
1418 	if (!f->uf_ufsvfsp) {
1419 		TRIVIA((": NULL ufsvfsp]\n"));
1420 		return (rc);
1421 	}
1422 
1423 	switch (a) {
1424 	case UFA_FOUND:
1425 		rc = sf_found_queue(f);
1426 		break;
1427 
1428 	case UFA_SET:
1429 
1430 		ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1431 
1432 		mutex_enter(&uf_stats.ufst_mutex);
1433 		uf_stats.ufst_num_failed++;
1434 		mutex_exit(&uf_stats.ufst_mutex);
1435 
1436 		/*
1437 		 * if can't get the vfs lock, just wait until
1438 		 * UF_TRYLCK to set fx_current
1439 		 */
1440 		if (mutex_tryenter(f->uf_vfs_lockp)) {
1441 			f->uf_vfs_ufsfxp->fx_current = f;
1442 			mutex_exit(f->uf_vfs_lockp);
1443 		} else {
1444 			mutex_enter(&uf_stats.ufst_mutex);
1445 			uf_stats.ufst_current_races++;
1446 			mutex_exit(&uf_stats.ufst_mutex);
1447 		}
1448 
1449 		f->uf_retry = 1;
1450 		rc = SFRC_SUCCESS;
1451 		TRIVIA(("] "));
1452 		break;
1453 
1454 	default:
1455 		(void) set_state(f, UF_PANIC);
1456 		TRIVIA((": failed] "));
1457 	}
1458 
1459 	return (rc);
1460 }
1461 
1462 static sfrc_t
sf_found_queue(ufs_failure_t * f)1463 sf_found_queue(ufs_failure_t *f)
1464 {
1465 	int		replica;
1466 	sfrc_t		rc = SFRC_FAIL;
1467 
1468 	TRIVIA(("[sf_found_queue"));
1469 
1470 	/*
1471 	 * don't need to check for null ufsvfsp because
1472 	 * unmount must own list's ufs_fix.uq_mutex
1473 	 * to mark it null and we own that lock since
1474 	 * we got here.
1475 	 */
1476 
1477 	ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1478 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1479 
1480 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
1481 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1482 		f->uf_retry = 1;
1483 		return (rc);
1484 	}
1485 
1486 	replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL &&
1487 	    f->uf_vfs_ufsfxp->fx_current != f &&
1488 	    !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s);
1489 
1490 	/*
1491 	 * copy general flags to this ufs_failure so we don't
1492 	 * need to refer back to the ufsvfs, or, more importantly,
1493 	 * don't need to keep acquiring (trying to acquire) vfs_lockp
1494 	 *
1495 	 * The most restrictive option wins:
1496 	 *  panic > errlock only > errlock+unmount > repair
1497 	 * XXX panic > elock > elock > elock+umount
1498 	 */
1499 	if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) {
1500 		if (!set_state(f, UF_PANIC)) {
1501 			TRIVIA((": marked panic but was queued?"));
1502 			real_panic(f, " ");
1503 			/*NOTREACHED*/
1504 		}
1505 		mutex_exit(f->uf_vfs_lockp);
1506 		return (rc);
1507 	}
1508 	f->uf_flags = f->uf_vfs_ufsfxp->fx_flags;
1509 
1510 	if (replica) {
1511 		if (!set_state(f, UF_REPLICA)) {
1512 			f->uf_retry = 1;
1513 			TRIVIA((": set to replica failed] "));
1514 		} else {
1515 			TRIVIA(("] "));
1516 		}
1517 		mutex_exit(f->uf_vfs_lockp);
1518 		return (rc);
1519 	}
1520 	mutex_exit(f->uf_vfs_lockp);
1521 
1522 	if (!set_state(f, UF_TRYLCK)) {
1523 		TRIVIA((": failed] "));
1524 	} else {
1525 		rc = SFRC_SUCCESS;
1526 	}
1527 	return (rc);
1528 }
1529 
1530 static sfrc_t
sf_nonterm_cmn(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1531 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1532 {
1533 	sfrc_t	rc = SFRC_FAIL;
1534 
1535 	TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s)));
1536 	ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING));
1537 	ASSERT(!terminal_state(s));
1538 
1539 	if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) {
1540 		TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n"));
1541 		(void) set_state(f, UF_NOTFIX);
1542 		return (rc);
1543 	}
1544 
1545 	switch (a) {
1546 	case UFA_SET:
1547 		switch (s) {
1548 		case UF_TRYLCK:
1549 			ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1550 			rc = sf_set_trylck(f);
1551 			break;
1552 
1553 		case UF_LOCKED:
1554 			rc = sf_set_locked(f);
1555 			break;
1556 
1557 		case UF_FIXING:
1558 			f->uf_flags |= UFSFX_REPAIR_START;
1559 			f->uf_retry  = ufsfx_tune.uft_fixpoll_period;
1560 			rc = SFRC_SUCCESS;
1561 			break;
1562 
1563 		case UF_UMOUNT:
1564 			f->uf_retry = -ufsfx_tune.uft_short_err_period;
1565 			rc = SFRC_SUCCESS;
1566 			break;
1567 
1568 		default:
1569 			(void) set_state(f, UF_PANIC);
1570 			TRIVIA((": failed] "));
1571 		}
1572 		break;
1573 
1574 	case UFA_FOUND:
1575 
1576 		switch (s) {
1577 		case UF_TRYLCK:
1578 			rc = sf_found_trylck(f);
1579 			break;
1580 
1581 		case UF_LOCKED:
1582 		case UF_FIXING:
1583 			rc = sf_found_lock_fix_cmn(f, s);
1584 			break;
1585 
1586 		case UF_UMOUNT:
1587 			rc = sf_found_umount(f);
1588 			break;
1589 
1590 		default:
1591 			(void) set_state(f, UF_PANIC);
1592 			TRIVIA((": failed] "));
1593 			break;
1594 		}
1595 		break;
1596 	default:
1597 		(void) set_state(f, UF_PANIC);
1598 		TRIVIA((": failed] "));
1599 		break;
1600 	}
1601 
1602 	TRIVIA(("] "));
1603 	return (rc);
1604 }
1605 
1606 static sfrc_t
sf_set_trylck(ufs_failure_t * f)1607 sf_set_trylck(ufs_failure_t *f)
1608 {
1609 	TRIVIA(("[sf_set_trylck"));
1610 
1611 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
1612 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1613 		f->uf_retry = 1;
1614 		return (SFRC_FAIL);
1615 	}
1616 
1617 	if (!f->uf_vfs_ufsfxp->fx_current)
1618 		f->uf_vfs_ufsfxp->fx_current = f;
1619 
1620 	mutex_exit(f->uf_vfs_lockp);
1621 
1622 	f->uf_lf.lf_flags = 0;
1623 	f->uf_lf.lf_lock  = LOCKFS_ELOCK;
1624 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1625 	TRIVIA(("] "));
1626 	return (SFRC_SUCCESS);
1627 }
1628 
1629 static sfrc_t
sf_found_trylck(ufs_failure_t * f)1630 sf_found_trylck(ufs_failure_t *f)
1631 {
1632 	struct lockfs lockfs_status;
1633 
1634 	TRIVIA(("[sf_found_trylck"));
1635 
1636 	if (trylock_time_exceeded(f) > 0) {
1637 		(void) set_state(f, UF_PANIC);
1638 		TRIVIA((": failed] "));
1639 		return (SFRC_FAIL);
1640 	}
1641 
1642 	if (!get_lockfs_status(f, &lockfs_status)) {
1643 		(void) set_state(f, UF_PANIC);
1644 		TRIVIA((": failed] "));
1645 		return (SFRC_FAIL);
1646 	}
1647 
1648 	if (f->uf_lf_err == NO_ERROR)
1649 		f->uf_lf.lf_key = lockfs_status.lf_key;
1650 
1651 	if (!set_lockfs(f, &lockfs_status)) {
1652 		(void) set_state(f, UF_PANIC);
1653 		TRIVIA((": failed] "));
1654 		return (SFRC_FAIL);
1655 	}
1656 	TRIVIA(("] "));
1657 	return (SFRC_SUCCESS);
1658 }
1659 
1660 static sfrc_t
sf_set_locked(ufs_failure_t * f)1661 sf_set_locked(ufs_failure_t *f)
1662 {
1663 	TRIVIA(("[sf_set_locked"));
1664 
1665 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1666 
1667 #if defined(DEBUG)
1668 	if (f->uf_flags & UFSFX_REPAIR_START)
1669 		TRIVIA(("clearing UFSFX_REPAIR_START "));
1670 #endif /* DEBUG */
1671 
1672 	f->uf_flags &= ~UFSFX_REPAIR_START;
1673 
1674 	if (f->uf_s & UF_TRYLCK) {
1675 		cmn_err(CE_WARN, "Error-locked %s: \"%s\"",
1676 		    fs_name(f), f->uf_panic_str);
1677 
1678 		if (f->uf_flags & UFSFX_LCKONLY)
1679 			cmn_err(CE_WARN, "Manual repair of %s required",
1680 			    fs_name(f));
1681 	}
1682 
1683 	/*
1684 	 * just reset to current state
1685 	 */
1686 #if defined(DEBUG)
1687 	TRIVIA(("locked->locked "));
1688 #endif /* DEBUG */
1689 
1690 	TRIVIA(("] "));
1691 	return (SFRC_SUCCESS);
1692 }
1693 
1694 static sfrc_t
sf_found_lock_fix_cmn(ufs_failure_t * f,ufs_failure_states_t s)1695 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s)
1696 {
1697 	time_t		toolong;
1698 	extern time_t	time;
1699 	struct buf	*bp			= NULL;
1700 	struct fs	*dfs;
1701 	time_t		 concerned, anxious;
1702 	sfrc_t		 rc			= SFRC_FAIL;
1703 	ulong_t		 gb_size;
1704 
1705 	TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s)));
1706 
1707 	if (s & UF_LOCKED) {
1708 		ASSERT(MUTEX_HELD(&f->uf_mutex));
1709 
1710 		toolong =
1711 		    time > (ufsfx_tune.uft_too_long + f->uf_entered_tm);
1712 		TRIVIA(("%stoolong", !toolong? "not": ""));
1713 		HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ",
1714 		    time, ufsfx_tune.uft_too_long, f->uf_entered_tm));
1715 
1716 		if (f->uf_flags & UFSFX_LCKUMOUNT) {
1717 			if (set_state(f, UF_UMOUNT)) {
1718 				TRIVIA(("] "));
1719 				rc = SFRC_SUCCESS;
1720 			} else {
1721 				TRIVIA((": failed] "));
1722 				f->uf_retry = 1;
1723 			}
1724 			return (rc);
1725 		}
1726 		if (!toolong) {
1727 			rc = SFRC_SUCCESS;
1728 		} else {
1729 			if (!(f->uf_flags & UFSFX_REPAIR_START)) {
1730 				cmn_err(CE_WARN, "%s repair of %s not started.",
1731 				    (f->uf_flags & UFSFX_LCKONLY) ?
1732 				    "Manual" : "Automatic", fs_name(f));
1733 
1734 				f->uf_retry = ufsfx_tune.uft_long_err_period;
1735 			} else {
1736 				f->uf_retry = ufsfx_tune.uft_long_err_period;
1737 				cmn_err(CE_WARN, "Repair of %s is not timely; "
1738 				    "operator attention is required.",
1739 				    fs_name(f));
1740 			}
1741 			TRIVIA(("] "));
1742 			return (rc);
1743 		}
1744 	}
1745 
1746 #if defined(DEBUG)
1747 	else {
1748 		ASSERT(s & UF_FIXING);
1749 	}
1750 #endif /* DEBUG */
1751 
1752 	/*
1753 	 * get on disk superblock; force it to really
1754 	 * come from the disk
1755 	 */
1756 	(void) bfinval(f->uf_dev, 0);
1757 	bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE);
1758 	if (bp) {
1759 		bp->b_flags |= (B_STALE | B_AGE);
1760 		dfs = bp->b_un.b_fs;
1761 	}
1762 
1763 	if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) &&
1764 	    (dfs->fs_magic != MTB_UFS_MAGIC))) {
1765 		TRIVIA((": UFS_BREAD(SBLOCK) failed]\n"));
1766 		f->uf_retry = 1;
1767 		goto out;
1768 	}
1769 
1770 	/* fsck started but we haven't noticed yet? */
1771 	if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1772 		if (!set_state(f, UF_FIXING)) {
1773 			TRIVIA((": failed]\n"));
1774 			f->uf_retry = 1;
1775 			goto out;
1776 		}
1777 	}
1778 
1779 	/* fsck started but didn't succeed? */
1780 	if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) {
1781 		TRIVIA((": fs_clean: %d", (int)dfs->fs_clean));
1782 		(void) set_state(f, UF_LOCKED);
1783 		cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f));
1784 		f->uf_retry = ufsfx_tune.uft_long_err_period;
1785 		goto out;
1786 	}
1787 
1788 	gb_size = (dfs->fs_size * dfs->fs_bshift) / GB;
1789 	toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig);
1790 
1791 	/* fsck started but doesn't seem to be proceeding? */
1792 	if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1793 		if (time > f->uf_entered_tm + toolong) {
1794 
1795 			cmn_err(CE_WARN,
1796 			    "Repair completion timeout exceeded on %s; "
1797 			    "manual fsck may be required", fs_name(f));
1798 			f->uf_retry = ufsfx_tune.uft_long_err_period;
1799 		}
1800 	}
1801 
1802 	concerned = f->uf_entered_tm + (toolong / 3);
1803 	anxious = f->uf_entered_tm + ((2 * toolong) / 3);
1804 
1805 	if (time > concerned)
1806 		pester_msg(f, time > anxious? CE_WARN: CE_NOTE);
1807 
1808 	TRIVIA(("] "));
1809 
1810 out:
1811 	if (bp)
1812 		brelse(bp);
1813 
1814 	return (rc);
1815 }
1816 
1817 static sfrc_t
sf_found_umount(ufs_failure_t * f)1818 sf_found_umount(ufs_failure_t *f)
1819 {
1820 	extern time_t	 time;
1821 	sfrc_t		 rc			= SFRC_FAIL;
1822 	struct vfs	*vfsp			= f->uf_vfsp;
1823 	struct ufsvfs	*ufsvfsp		= f->uf_ufsvfsp;
1824 	int		 toolong		= 0;
1825 	int		 err			= 0;
1826 
1827 	TRIVIA(("[sf_found_umount"));
1828 
1829 	toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm;
1830 	if (toolong) {
1831 		TRIVIA((": unmount time limit exceeded] "));
1832 		goto out;
1833 	}
1834 
1835 	if (!vfsp || !ufsvfsp) {	/* trivial case */
1836 		TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] "));
1837 		goto out;
1838 	}
1839 
1840 	if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) {
1841 		TRIVIA((": !not error locked?"));
1842 		err = EINVAL;
1843 		goto out;
1844 	}
1845 
1846 	/* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */
1847 	if (vn_vfswlock(vfsp->vfs_vnodecovered)) {
1848 		TRIVIA((": couldn't lock coveredvp"));
1849 		err = EBUSY;
1850 		goto out;
1851 	}
1852 
1853 	if ((err = dounmount(vfsp, 0, kcred)) != 0) {
1854 
1855 		/* take note, but not many alternatives here */
1856 		mutex_enter(&uf_stats.ufst_mutex);
1857 		uf_stats.ufst_unmount_failures++;
1858 		mutex_exit(&uf_stats.ufst_mutex);
1859 
1860 		TRIVIA((": unmount failed] "));
1861 	} else {
1862 		cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f));
1863 	}
1864 
1865 out:
1866 	if (toolong || (err != EBUSY && err != EAGAIN))
1867 		rc = set_state(f, UF_NOTFIX);
1868 
1869 	TRIVIA(("] "));
1870 	return (rc);
1871 }
1872 
1873 static sfrc_t
sf_term_cmn(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1874 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1875 {
1876 	extern time_t	time;
1877 	sfrc_t		rc = SFRC_FAIL;
1878 
1879 	TRIVIA(("[sf_term_cmn, action is %s, state is %s",
1880 	    act_name(a), state_name(s)));
1881 	ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA));
1882 	ASSERT(terminal_state(s));
1883 
1884 	if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) {
1885 		TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n"));
1886 		return (rc);
1887 	}
1888 
1889 	switch (a) {
1890 	case UFA_SET:
1891 		switch (s) {
1892 		case UF_NOTFIX:
1893 		case UF_FIXED:
1894 		{
1895 			int need_lock_vfs;
1896 
1897 			if (f->uf_ufsvfsp && f->uf_vfs_lockp)
1898 				need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp);
1899 			else
1900 				need_lock_vfs = 0;
1901 
1902 			if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) {
1903 				TRIVIA((": tryenter(vfslockp) fail; retry]\n"));
1904 				f->uf_retry = 1;
1905 				break;
1906 			}
1907 
1908 			f->uf_end_tm = time;
1909 			f->uf_lf.lf_lock = LOCKFS_OLOCK;
1910 			f->uf_retry = 0;
1911 
1912 			if (f->uf_vfs_ufsfxp)
1913 				f->uf_vfs_ufsfxp->fx_current = NULL;
1914 
1915 			if (need_lock_vfs)
1916 				mutex_exit(f->uf_vfs_lockp);
1917 
1918 			cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s":
1919 			    "%s is now accessible", fs_name(f));
1920 
1921 			if (s & UF_FIXED) {
1922 				mutex_enter(&uf_stats.ufst_mutex);
1923 				uf_stats.ufst_num_fixed++;
1924 				mutex_exit(&uf_stats.ufst_mutex);
1925 			}
1926 			(void) timeout(ufsfx_kill_fix_failure_thread,
1927 			    (void *)(ufsfx_tune.uft_short_err_period * hz),
1928 			    ufsfx_tune.uft_short_err_period * hz);
1929 			rc = SFRC_SUCCESS;
1930 			break;
1931 		}
1932 		case UF_REPLICA:
1933 
1934 			ASSERT(MUTEX_HELD(f->uf_vfs_lockp));
1935 
1936 			/* not actually a replica? */
1937 			if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current &&
1938 			    f->uf_vfs_ufsfxp->fx_current != f &&
1939 			    !terminal_state(
1940 			    f->uf_vfs_ufsfxp->fx_current->uf_s)) {
1941 
1942 				f->uf_orig = f->uf_vfs_ufsfxp->fx_current;
1943 				f->uf_retry = 0;
1944 				rc = SFRC_SUCCESS;
1945 			} else {
1946 				TRIVIA((": NULL fx_current]\n"));
1947 				f->uf_retry = 1;
1948 			}
1949 
1950 			break;
1951 
1952 		default:
1953 			rc = set_state(f, UF_PANIC);
1954 			TRIVIA((": failed] "));
1955 			break;
1956 		}
1957 		break;
1958 
1959 	case UFA_FOUND:
1960 		/*
1961 		 * XXX de-allocate these after some period?
1962 		 * XXX or move to an historical list?
1963 		 * XXX or have an ioctl which reaps them?
1964 		 */
1965 		/*
1966 		 * For now, since we don't expect lots of failures
1967 		 * to occur (to the point of memory shortages),
1968 		 * just punt
1969 		 */
1970 
1971 		/* be sure we're not wasting cpu on old failures */
1972 		if (f->uf_retry != 0) {
1973 			mutex_enter(&uf_stats.ufst_mutex);
1974 			uf_stats.ufst_cpu_waste++;
1975 			mutex_exit(&uf_stats.ufst_mutex);
1976 			f->uf_retry = 0;
1977 		}
1978 		rc = SFRC_SUCCESS;
1979 		break;
1980 
1981 	default:
1982 		(void) set_state(f, UF_PANIC);
1983 		TRIVIA((": failed] "));
1984 		break;
1985 	}
1986 
1987 	TRIVIA(("] "));
1988 	return (rc);
1989 }
1990 
1991 static sfrc_t
sf_panic(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1992 sf_panic(
1993 	ufs_failure_t	*f,
1994 	ufsa_t	 a,
1995 	ufs_failure_states_t	 s)
1996 {
1997 	sfrc_t	rc = SFRC_FAIL;
1998 
1999 	TRIVIA(("[sf_panic, action is %s, prev. state is %s",
2000 	    act_name(a), state_name(f->uf_s)));
2001 	ASSERT(s & UF_PANIC);
2002 
2003 	switch (a) {
2004 	case UFA_SET:
2005 		f->uf_retry = -ufsfx_tune.uft_short_err_period;
2006 		rc = SFRC_SUCCESS;
2007 		break;
2008 
2009 	case UFA_FOUND:
2010 	default:
2011 		real_panic(f, " ");
2012 
2013 		/* LINTED: warning: logical expression always true: op "||" */
2014 		ASSERT(DEBUG);
2015 
2016 		(void) set_state(f, UF_UMOUNT);	/* XXX UF_NOTFIX? */
2017 
2018 		break;
2019 	}
2020 
2021 	TRIVIA(("] "));
2022 	return (rc);
2023 }
2024 
2025 /*
2026  * minimum state function
2027  */
2028 static sfrc_t
sf_minimum(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t ignored)2029 sf_minimum(
2030 	ufs_failure_t	*f,
2031 	ufsa_t	 a, /* LINTED argument unused in function: ignored */
2032 	ufs_failure_states_t	 ignored)
2033 {
2034 	sfrc_t rc = SFRC_FAIL;
2035 
2036 	TRIVIA(("[sf_minimum, action is %s", act_name(a)));
2037 
2038 	switch (a) {
2039 	case UFA_SET:
2040 		f->uf_retry = 0;
2041 		/* FALLTHROUGH */
2042 
2043 	case UFA_FOUND:
2044 		rc = SFRC_SUCCESS;
2045 		break;
2046 
2047 	default:
2048 		(void) set_state(f, UF_PANIC);
2049 		TRIVIA((": failed] "));
2050 		break;
2051 	}
2052 
2053 	TRIVIA(("] "));
2054 	return (rc);
2055 }
2056 
2057 static int
state_trans_valid(ufs_failure_states_t from,ufs_failure_states_t to)2058 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to)
2059 {
2060 	ufsd_t	*s;
2061 	int	 valid;
2062 
2063 	HIDEOUS(("[state_trans_valid"));
2064 
2065 	if (from & to)
2066 		return (1);
2067 
2068 	s = get_state_desc(to);
2069 
2070 	/*
2071 	 * extra test is necessary since we want UF_UNDEF = 0,
2072 	 * (to detect freshly allocated memory)
2073 	 * but can't check for that value with a bit test
2074 	 */
2075 	valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev;
2076 
2077 	HIDEOUS((": %svalid] ", valid? "": "in"));
2078 	return (valid);
2079 }
2080 
2081 static int
terminal_state(ufs_failure_states_t state)2082 terminal_state(ufs_failure_states_t state)
2083 {
2084 	ufsd_t	*s;
2085 
2086 	HIDEOUS(("[terminal_state"));
2087 
2088 	s = get_state_desc(state);
2089 
2090 	HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not "));
2091 	return ((int)s->ud_attr.terminal);
2092 }
2093 
2094 static void
alloc_lockfs_comment(ufs_failure_t * f,struct lockfs * lfp)2095 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp)
2096 {
2097 	MINUTE(("[alloc_lockfs_comment"));
2098 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2099 
2100 	/*
2101 	 * ufs_fiolfs expects a kmem_alloc'ed comment;
2102 	 * it frees the comment if the lock fails
2103 	 * or else when the lock is unlocked.
2104 	 */
2105 
2106 	f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP);
2107 	if (f->uf_lf.lf_comment) {
2108 		char	*from;
2109 		size_t	 len;
2110 
2111 		/*
2112 		 * use panic string if there's no previous comment
2113 		 * or if we're setting the error lock
2114 		 */
2115 		if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment ||
2116 		    lfp->lf_comlen <= 0)) {
2117 			from = f->uf_panic_str;
2118 			len = LOCKFS_MAXCOMMENTLEN;
2119 		} else {
2120 			from = lfp->lf_comment;
2121 			len = lfp->lf_comlen;
2122 		}
2123 
2124 		bcopy(from, f->uf_lf.lf_comment, len);
2125 		f->uf_lf.lf_comlen = len;
2126 
2127 	} else {
2128 		f->uf_lf.lf_comlen = 0;
2129 	}
2130 	MINUTE(("] "));
2131 }
2132 
2133 static int
set_lockfs(ufs_failure_t * f,struct lockfs * lfp)2134 set_lockfs(ufs_failure_t *f, struct lockfs *lfp)
2135 {
2136 	int	(*handle_lockfs_rc)(ufs_failure_t *);
2137 	int	  rc;
2138 
2139 	MINUTE(("[set_lockfs"));
2140 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2141 	ASSERT(!vfs_lock_held(f->uf_vfsp));
2142 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2143 
2144 	if (!f->uf_ufsvfsp) {
2145 		MINUTE((": ufsvfsp is NULL]\n"));
2146 		return (0);
2147 	}
2148 
2149 	ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock));
2150 
2151 	if (!f->uf_ufsvfsp->vfs_root) {
2152 		MINUTE((": vfs_root is NULL]\n"));
2153 		return (0);
2154 	}
2155 
2156 	alloc_lockfs_comment(f, lfp);
2157 	f->uf_lf_err = 0;
2158 
2159 	if (!LOCKFS_IS_ELOCK(lfp)) {
2160 		lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK;
2161 		VN_HOLD(f->uf_ufsvfsp->vfs_root);
2162 		f->uf_lf_err =
2163 		    ufs__fiolfs(f->uf_ufsvfsp->vfs_root,
2164 		    &f->uf_lf, /* from_user */ 0, /* from_log */ 0);
2165 		VN_RELE(f->uf_ufsvfsp->vfs_root);
2166 	}
2167 
2168 	handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success;
2169 	rc = handle_lockfs_rc(f);
2170 
2171 	MINUTE(("] "));
2172 	return (rc);
2173 }
2174 
2175 static int
lockfs_failure(ufs_failure_t * f)2176 lockfs_failure(ufs_failure_t *f)
2177 {
2178 	int	error;
2179 	ufs_failure_states_t	s;
2180 
2181 	TRIVIA(("[lockfs_failure"));
2182 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2183 
2184 	if (!f->uf_ufsvfsp) {
2185 		TRIVIA((": ufsvfsp is NULL]\n"));
2186 		return (0);
2187 	}
2188 
2189 	error = f->uf_lf_err;
2190 	switch (error) {
2191 			/* non-transient errors: */
2192 	case EACCES:	/* disk/in-core metadata reconciliation failed  */
2193 	case EPERM:	/* inode reconciliation failed; incore inode changed? */
2194 	case EIO:	/* device is hard-locked or not responding */
2195 	case EROFS:	/* device is write-locked */
2196 	case EDEADLK:	/* can't lockfs; deadlock would result; */
2197 			/* Swapping or saving accounting records */
2198 			/* onto this fs can cause this errno. */
2199 
2200 		MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)",
2201 		    fs_name(f), lock_name(&f->uf_lf),
2202 		    err_name(error), error));
2203 
2204 		/*
2205 		 * if can't get lock, then fallback to panic, unless
2206 		 * unless unmount was requested (although unmount will
2207 		 * probably fail if the lock failed, so we'll panic
2208 		 * anyway
2209 		 */
2210 
2211 		s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK) ?
2212 		    UF_UMOUNT: UF_PANIC;
2213 
2214 		if (!set_state(f, s)) {
2215 			real_panic(f, " ");
2216 			/*NOTREACHED*/
2217 			break;
2218 		}
2219 		break;
2220 
2221 
2222 	case EBUSY:
2223 	case EAGAIN:
2224 
2225 		f->uf_retry = ufsfx_tune.uft_short_err_period;
2226 		if (curthread->t_flag & T_DONTPEND) {
2227 			curthread->t_flag &= ~T_DONTPEND;
2228 
2229 		} else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) {
2230 			ufs_failure_states_t state;
2231 			/*
2232 			 * if we didn't know that the fix had started,
2233 			 * take note
2234 			 */
2235 			state = error == EBUSY? UF_LOCKED: UF_FIXING;
2236 			if (!set_state(f, state)) {
2237 				TRIVIA((": failed] "));
2238 				return (0);
2239 			}
2240 		}
2241 		break;
2242 
2243 	default:	/* some other non-fatal error */
2244 		MINOR(("lockfs(\"%s\") of %s returned %s (%d)",
2245 		    lock_name(&f->uf_lf), fs_name(f),
2246 		    err_name(f->uf_lf_err), f->uf_lf_err));
2247 
2248 		f->uf_retry = ufsfx_tune.uft_short_err_period;
2249 		break;
2250 
2251 	case EINVAL:	/* unmounted? */
2252 		(void) set_state(f, UF_NOTFIX);
2253 		break;
2254 	}
2255 	TRIVIA(("] "));
2256 	return (1);
2257 }
2258 
2259 static int
lockfs_success(ufs_failure_t * f)2260 lockfs_success(ufs_failure_t *f)
2261 {
2262 	TRIVIA(("[lockfs_success"));
2263 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2264 
2265 	if (!f->uf_ufsvfsp) {
2266 		TRIVIA((": ufsvfsp is NULL]\n"));
2267 		return (0);
2268 	}
2269 
2270 	switch (f->uf_lf.lf_lock) {
2271 	case LOCKFS_ELOCK:	/* error lock worked */
2272 
2273 		if (!set_state(f, UF_LOCKED)) {
2274 			TRIVIA((": failed] "));
2275 			return (0);
2276 		}
2277 		break;
2278 
2279 	case LOCKFS_ULOCK: 			/* unlock worked */
2280 		/*
2281 		 * how'd we get here?
2282 		 * This should be done from fsck's unlock,
2283 		 * not from this thread's context.
2284 		 */
2285 		cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f));
2286 		ufsfx_unlockfs(f->uf_ufsvfsp);
2287 		break;
2288 
2289 	default:
2290 		if (!set_state(f, UF_NOTFIX)) {
2291 			TRIVIA((": failed] "));
2292 			return (0);
2293 		}
2294 		break;
2295 	}
2296 	TRIVIA(("] "));
2297 	return (1);
2298 }
2299 
2300 /*
2301  * when fsck is running it puts its pid into the lockfs
2302  * comment structure, prefaced by PIDSTR
2303  */
2304 const char *PIDSTR = "[pid:";
2305 static int
fsck_active(ufs_failure_t * f)2306 fsck_active(ufs_failure_t *f)
2307 {
2308 	char		*cp;
2309 	int		 i, found, errlocked;
2310 	size_t		 comlen;
2311 	const int	 PIDSTRLEN = (int)strlen(PIDSTR);
2312 	struct ulockfs	*ulp = &f->uf_ufsvfsp->vfs_ulockfs;
2313 
2314 	TRIVIA(("[fsck_active"));
2315 
2316 	ASSERT(f);
2317 	ASSERT(f->uf_s & UF_FIXING);
2318 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2319 	ASSERT(f->uf_ufsvfsp);
2320 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2321 	ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock));
2322 
2323 	mutex_enter(&ulp->ul_lock);
2324 	cp = ulp->ul_lockfs.lf_comment;
2325 	comlen = ulp->ul_lockfs.lf_comlen;
2326 	errlocked = (int)ULOCKFS_IS_ELOCK(ulp);
2327 	mutex_exit(&ulp->ul_lock);
2328 
2329 	if (!cp || comlen == 0) {
2330 		TRIVIA((": null comment or comlen <= 0, found:0]"));
2331 		return (0);
2332 	}
2333 
2334 	for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++)
2335 		found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0;
2336 
2337 	TRIVIA(("found:%d, is_elock:%d]", found, errlocked));
2338 	return (errlocked & found);
2339 }
2340 
2341 static const char unknown_fs[]		= "<unknown fs>";
2342 static const char null_failure[] = "<NULL ufs failure record; unknown fs>";
2343 static const char mutated_vfs_bufp[]	= "<mutated vfs_bufp, unknown fs>";
2344 static const char mutated_vfs_fs[]	= "<mutated vfs_fs, unknown fs>";
2345 
2346 static char *
fs_name(ufs_failure_t * f)2347 fs_name(ufs_failure_t *f)
2348 {
2349 	HIDEOUS(("[fs_name"));
2350 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2351 
2352 	if (!f) {
2353 		HIDEOUS((": failure ptr is NULL]\n"));
2354 		return ((char *)null_failure);
2355 	}
2356 
2357 	if (f->uf_fsname[0] != '\0') {
2358 		HIDEOUS((": return (uf_fsname)]\n"));
2359 		return (f->uf_fsname);
2360 	}
2361 
2362 	if (MUTEX_HELD(f->uf_vfs_lockp)) {
2363 		if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) {
2364 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2365 			    (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp));
2366 			return ((char *)mutated_vfs_bufp);
2367 		}
2368 		if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) {
2369 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2370 			    (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs));
2371 			return ((char *)mutated_vfs_fs);
2372 		}
2373 		if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs &&
2374 		    *f->uf_fs->fs_fsmnt != '\0') {
2375 			HIDEOUS((": return (fs_fsmnt)]\n"));
2376 			return (f->uf_fs->fs_fsmnt);
2377 		}
2378 	}
2379 
2380 	HIDEOUS((": unknown file system]\n"));
2381 	return ((char *)unknown_fs);
2382 }
2383 
2384 #if defined(DEBUG)
2385 static char *
lock_name(struct lockfs * lfp)2386 lock_name(struct lockfs *lfp)
2387 {
2388 	struct lock_description	*l;
2389 	char			*lname;
2390 
2391 	HIDEOUS(("[lock_name"));
2392 
2393 	lname = lock_desc[0].ld_name;
2394 	for (l = &lock_desc[1]; l->ld_name != NULL; l++) {
2395 		if (lfp && lfp->lf_lock == l->ld_type) {
2396 			lname = l->ld_name;
2397 			break;
2398 		}
2399 	}
2400 	HIDEOUS(("]"));
2401 	return (lname);
2402 }
2403 
2404 static char *
state_name(ufs_failure_states_t state)2405 state_name(ufs_failure_states_t state)
2406 {
2407 	ufsd_t	*s;
2408 
2409 	HIDEOUS(("[state_name"));
2410 
2411 	s = get_state_desc(state);
2412 
2413 	HIDEOUS(("]"));
2414 	return (s->ud_name);
2415 }
2416 
2417 static char *
err_name(int error)2418 err_name(int error)
2419 {
2420 	struct error_description *e;
2421 
2422 	HIDEOUS(("[err_name"));
2423 
2424 	for (e = &err_desc[1]; e->ed_name != NULL; e++) {
2425 		if (error == e->ed_errno) {
2426 			HIDEOUS(("]"));
2427 			return (e->ed_name);
2428 		}
2429 	}
2430 	HIDEOUS(("]"));
2431 	return (err_desc[0].ed_name);
2432 }
2433 
2434 static char *
act_name(ufsa_t action)2435 act_name(ufsa_t action)
2436 {
2437 	struct action_description *a;
2438 
2439 	HIDEOUS(("[act_name"));
2440 
2441 	for (a = &act_desc[1]; a->ad_name != NULL; a++) {
2442 		if (action == a->ad_v) {
2443 			HIDEOUS(("]"));
2444 			return (a->ad_name);
2445 		}
2446 	}
2447 	HIDEOUS(("]"));
2448 	return (act_desc[0].ad_name);
2449 }
2450 
2451 /*
2452  * dump failure list
2453  */
2454 static void
dump_uf_list(char * msg)2455 dump_uf_list(char *msg)
2456 {
2457 	ufs_failure_t	*f;
2458 	int		 i;
2459 	int		 list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex);
2460 
2461 	if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) {
2462 		printf("dump_uf_list: couldn't get list lock\n");
2463 		return;
2464 	}
2465 
2466 	if (msg) {
2467 		printf("\n%s", msg);
2468 	}
2469 	printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n",
2470 	    ufs_fix.uq_lowat, ufs_fix.uq_ne);
2471 
2472 	mutex_enter(&uf_stats.ufst_mutex);
2473 	printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races);
2474 	printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed);
2475 	printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed);
2476 	printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste);
2477 	printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n",
2478 	    uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures);
2479 	mutex_exit(&uf_stats.ufst_mutex);
2480 
2481 	for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) {
2482 
2483 		if (!mutex_tryenter(&f->uf_mutex)) {
2484 			printf("%d.\t\"skipped - try enter failed\"\n", i);
2485 			continue;
2486 		}
2487 
2488 		dump_uf(f, i);
2489 
2490 		mutex_exit(&f->uf_mutex);
2491 	}
2492 
2493 	printf("\n");
2494 
2495 	if (!list_was_locked)
2496 		mutex_exit(&ufs_fix.uq_mutex);
2497 }
2498 
2499 static void
dump_uf(ufs_failure_t * f,int i)2500 dump_uf(ufs_failure_t *f, int i)
2501 {
2502 	if (!f) {
2503 		printf("dump_uf: NULL failure record\n");
2504 		return;
2505 	}
2506 
2507 	printf("%d.\t\"%s\" is %s.\n",
2508 	    i, fs_name(f), state_name(f->uf_s));
2509 	printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f);
2510 	printf("\tNext: 0x%p\t\tPrev: 0x%p\n",
2511 	    (void *)f->uf_next, (void *)f->uf_prev);
2512 
2513 	if (f->uf_orig)
2514 		printf("\tOriginal failure: 0x%p \"%s\"\n",
2515 		    (void *)f->uf_orig, f->uf_orig->uf_panic_str);
2516 
2517 	printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n",
2518 	    (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp);
2519 	printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp);
2520 	printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp);
2521 
2522 	if (f->uf_bp)
2523 		printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs);
2524 	else
2525 		printf("\n");
2526 
2527 	printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n",
2528 	    f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm);
2529 
2530 	printf("\tFlags: (%d) %s%s%s%s", f->uf_flags,
2531 	    f->uf_flags & UFSFX_LCKONLY?	 "\"lock only\" "	: "",
2532 	    f->uf_flags & UFSFX_LCKUMOUNT?	 "\"lock+unmount\" "	: "",
2533 	    f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" "	: "",
2534 	    f->uf_flags == 0?                "<none>"               : "");
2535 
2536 	printf("\tRetry: %ld seconds\n", f->uf_retry);
2537 
2538 	printf("\tLockfs:\ttype: %s\terror: %s (%d)\n",
2539 	    lock_name(&f->uf_lf), err_name(f->uf_lf_err), f->uf_lf_err);
2540 
2541 }
2542 #endif /* DEBUG */
2543 
2544 /*
2545  * returns # of ufs_failures in a non-terminal state on queue
2546  * used to coordinate with hlock thread (see ufs_thread.c)
2547  * and to determine when the error lock thread may exit
2548  */
2549 
2550 int
ufsfx_get_failure_qlen(void)2551 ufsfx_get_failure_qlen(void)
2552 {
2553 	ufs_failure_t	*f;
2554 	ufsd_t		*s;
2555 	int		 qlen = 0;
2556 
2557 	MINUTE(("[ufsfx_get_failure_qlen"));
2558 
2559 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
2560 		return (-1);
2561 
2562 	/*
2563 	 * walk down failure list
2564 	 */
2565 
2566 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
2567 
2568 		if (!mutex_tryenter(&f->uf_mutex))
2569 			continue;
2570 
2571 		s = get_state_desc(f->uf_s);
2572 
2573 		if (s->ud_attr.terminal) {
2574 			mutex_exit(&f->uf_mutex);
2575 			continue;
2576 		}
2577 
2578 		MINUTE((": found: %s, \"%s: %s\"\n",
2579 		    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
2580 
2581 		qlen++;
2582 		mutex_exit(&f->uf_mutex);
2583 	}
2584 
2585 	mutex_exit(&ufs_fix.uq_mutex);
2586 
2587 	MINUTE((": qlen=%d]\n", qlen));
2588 
2589 	return (qlen);
2590 }
2591 
2592 /*
2593  * timeout routine
2594  *  called to shutdown fix failure thread and server daemon
2595  */
2596 static void
ufsfx_kill_fix_failure_thread(void * arg)2597 ufsfx_kill_fix_failure_thread(void *arg)
2598 {
2599 	clock_t odelta = (clock_t)arg;
2600 	int	qlen;
2601 
2602 	MAJOR(("[ufsfx_kill_fix_failure_thread"));
2603 
2604 	qlen = ufsfx_get_failure_qlen();
2605 
2606 	if (qlen < 0) {
2607 		clock_t delta;
2608 
2609 		delta = odelta << 1;
2610 		if (delta <= 0)
2611 			delta = INT_MAX;
2612 
2613 		(void) timeout(ufsfx_kill_fix_failure_thread,
2614 		    (void *)delta, delta);
2615 		MAJOR((": rescheduled"));
2616 
2617 	} else if (qlen == 0) {
2618 		ufs_thread_exit(&ufs_fix);
2619 		MAJOR((": killed"));
2620 	}
2621 	/*
2622 	 * else
2623 	 *  let timeout expire
2624 	 */
2625 	MAJOR(("]\n"));
2626 }
2627