1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/errno.h> 33 #include <sys/mode.h> 34 #include <sys/sysmacros.h> 35 #include <sys/cmn_err.h> 36 #include <sys/varargs.h> 37 #include <sys/time.h> 38 #include <sys/buf.h> 39 #include <sys/kmem.h> 40 #include <sys/t_lock.h> 41 #include <sys/poll.h> 42 #include <sys/debug.h> 43 #include <sys/cred.h> 44 #include <sys/lockfs.h> 45 #include <sys/fs/ufs_fs.h> 46 #include <sys/fs/ufs_inode.h> 47 #include <sys/fs/ufs_panic.h> 48 #include <sys/fs/ufs_lockfs.h> 49 #include <sys/fs/ufs_trans.h> 50 #include <sys/fs/ufs_mount.h> 51 #include <sys/fs/ufs_prot.h> 52 #include <sys/fs/ufs_bio.h> 53 #include <sys/pathname.h> 54 #include <sys/utsname.h> 55 #include <sys/conf.h> 56 57 /* handy */ 58 #define abs(x) ((x) < 0? -(x): (x)) 59 60 #if defined(DEBUG) 61 62 #define DBGLVL_NONE 0x00000000 63 #define DBGLVL_MAJOR 0x00000100 64 #define DBGLVL_MINOR 0x00000200 65 #define DBGLVL_MINUTE 0x00000400 66 #define DBGLVL_TRIVIA 0x00000800 67 #define DBGLVL_HIDEOUS 0x00001000 68 69 #define DBGFLG_NONE 0x00000000 70 #define DBGFLG_NOPANIC 0x00000001 71 #define DBGFLG_LVLONLY 0x00000002 72 #define DBGFLG_FIXWOULDPANIC 0x00000004 73 74 #define DBGFLG_FLAGMASK 0x0000000F 75 #define DBGFLG_LEVELMASK ~DBGFLG_FLAGMASK 76 77 #define DEBUG_FLAGS (ufs_fix_failure_dbg & DBGFLG_FLAGMASK) 78 #define DEBUG_LEVEL (ufs_fix_failure_dbg & DBGFLG_LEVELMASK) 79 80 unsigned int ufs_fix_failure_dbg = DBGLVL_NONE | DBGFLG_NONE; 81 82 #define DCALL(dbg_level, call) \ 83 { \ 84 if (DEBUG_LEVEL != DBGLVL_NONE) { \ 85 if (DEBUG_FLAGS & DBGFLG_LVLONLY) { \ 86 if (DEBUG_LEVEL & dbg_level) { \ 87 call; \ 88 } \ 89 } else { \ 90 if (dbg_level <= DEBUG_LEVEL) { \ 91 call; \ 92 } \ 93 } \ 94 } \ 95 } 96 97 #define DPRINTF(dbg_level, msg) DCALL(dbg_level, printf msg) 98 99 #define MAJOR(msg) DPRINTF(DBGLVL_MAJOR, msg) 100 #define MINOR(msg) DPRINTF(DBGLVL_MINOR, msg) 101 #define MINUTE(msg) DPRINTF(DBGLVL_MINUTE, msg) 102 #define TRIVIA(msg) DPRINTF(DBGLVL_TRIVIA, msg) 103 #define HIDEOUS(msg) DPRINTF(DBGLVL_HIDEOUS, msg) 104 105 #else /* !DEBUG */ 106 107 #define DCALL(ignored_dbg_level, ignored_routine) 108 #define MAJOR(ignored) 109 #define MINOR(ignored) 110 #define MINUTE(ignored) 111 #define TRIVIA(ignored) 112 #define HIDEOUS(ignored) 113 114 #endif /* DEBUG */ 115 116 #define NULLSTR(str) (!(str) || *(str) == '\0'? "<null>" : (str)) 117 #define NULSTRING "" 118 119 /* somewhat arbitrary limits, in seconds */ 120 /* all probably ought to be different, but these are convenient for debugging */ 121 const time_t UF_TOO_LONG = 128; /* max. wait for fsck start */ 122 123 /* all of these are in units of seconds used for retry period while ... */ 124 const time_t UF_FIXSTART_PERIOD = 16; /* awaiting fsck start */ 125 const time_t UF_FIXPOLL_PERIOD = 256; /* awaiting fsck finish */ 126 const time_t UF_SHORT_ERROR_PERIOD = 4; /* after (lockfs) error */ 127 const time_t UF_LONG_ERROR_PERIOD = 512; /* after (lockfs) error */ 128 129 #define NO_ERROR 0 130 #define LOCKFS_OLOCK LOCKFS_MAXLOCK+1 131 132 const ulong_t GB = 1024 * 1024 * 1024; 133 const ulong_t SecondsPerGig = 1024; /* ~17 minutes (overestimate) */ 134 135 /* 136 * per filesystem flags 137 */ 138 const int UFSFX_PANIC = (UFSMNT_ONERROR_PANIC >> 4); 139 const int UFSFX_LCKONLY = (UFSMNT_ONERROR_LOCK >> 4); 140 const int UFSFX_LCKUMOUNT = (UFSMNT_ONERROR_UMOUNT >> 4); 141 const int UFSFX_DEFAULT = (UFSMNT_ONERROR_DEFAULT >> 4); 142 const int UFSFX_REPAIR_START = 0x10000000; 143 144 /* return protocols */ 145 146 typedef enum triage_return_code { 147 TRIAGE_DEAD = -1, 148 TRIAGE_NO_SPIRIT, 149 TRIAGE_ATTEND_TO 150 } triage_t; 151 152 typedef enum statefunc_return_code { 153 SFRC_SUCCESS = 1, 154 SFRC_FAIL = 0 155 } sfrc_t; 156 157 /* external references */ 158 /* in ufs_thread.c */ 159 extern int ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop); 160 extern int ufs_checkaccton(vnode_t *); /* in ufs_lockfs.c */ 161 extern int ufs_checkswapon(vnode_t *); /* in ufs_lockfs.c */ 162 163 extern struct pollhead ufs_pollhd; /* in ufs_vnops.c */ 164 165 /* globals */ 166 struct ufs_q ufs_fix; 167 168 /* 169 * patchable constants: 170 * These are set in ufsfx_init() [called at modload] 171 */ 172 struct ufs_failure_tunable { 173 long uft_too_long; /* limit repair startup time */ 174 long uft_fixstart_period; /* pre-repair start period */ 175 long uft_fixpoll_period; /* post-fsck start period */ 176 long uft_short_err_period; /* post-error short period */ 177 long uft_long_err_period; /* post-error long period */ 178 } ufsfx_tune; 179 180 /* internal statistics of events */ 181 struct uf_statistics { 182 ulong_t ufst_lock_violations; 183 ulong_t ufst_current_races; 184 ulong_t ufst_unmount_failures; 185 ulong_t ufst_num_fixed; 186 ulong_t ufst_num_failed; 187 ulong_t ufst_cpu_waste; 188 time_t ufst_last_start_tm; 189 kmutex_t ufst_mutex; 190 } uf_stats; 191 192 typedef enum state_action { 193 UFA_ERROR = -1, /* internal error */ 194 UFA_FOUND, /* found uf in state */ 195 UFA_SET /* change uf to state */ 196 } ufsa_t; 197 198 /* state definition */ 199 typedef struct uf_state_desc { 200 int ud_v; /* value */ 201 char *ud_name; /* name */ 202 sfrc_t (*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 203 /* per-state actions */ 204 ufs_failure_states_t ud_prev; /* valid prev. states */ 205 206 struct uf_state_desc_attr { 207 unsigned terminal:1; /* no action req. if found */ 208 unsigned at_fail:1; /* state set by thread */ 209 /* encountering the error */ 210 unsigned unused; 211 } ud_attr; 212 } ufsd_t; 213 214 /* 215 * forward references 216 */ 217 218 /* thread to watch for failures */ 219 static void ufsfx_thread_fix_failures(void *); 220 static int ufsfx_do_failure_q(void); 221 static void ufsfx_kill_fix_failure_thread(void *); 222 223 /* routines called when failure occurs */ 224 static int ufs_fault_v(vnode_t *, char *, va_list) 225 __KVPRINTFLIKE(2); 226 static ufs_failure_t *init_failure(vnode_t *, char *, va_list) 227 __KVPRINTFLIKE(2); 228 static void queue_failure(ufs_failure_t *); 229 /*PRINTFLIKE2*/ 230 static void real_panic(ufs_failure_t *, const char *, ...) 231 __KPRINTFLIKE(2); 232 static void real_panic_v(ufs_failure_t *, const char *, va_list) 233 __KVPRINTFLIKE(2); 234 static triage_t triage(vnode_t *); 235 236 /* routines called when failure record is acted upon */ 237 static sfrc_t set_state(ufs_failure_t *, ufs_failure_states_t); 238 static int state_trans_valid(ufs_failure_states_t, ufs_failure_states_t); 239 static int terminal_state(ufs_failure_states_t); 240 241 /* routines called when states entered/found */ 242 static sfrc_t sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 243 static sfrc_t sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 244 static sfrc_t sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 245 static sfrc_t sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 246 static sfrc_t sf_found_queue(ufs_failure_t *); 247 static sfrc_t sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 248 static sfrc_t sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 249 static sfrc_t sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 250 static sfrc_t sf_set_trylck(ufs_failure_t *); 251 static sfrc_t sf_set_locked(ufs_failure_t *); 252 static sfrc_t sf_found_trylck(ufs_failure_t *); 253 static sfrc_t sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t); 254 static sfrc_t sf_found_umount(ufs_failure_t *); 255 256 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */ 257 static time_t trylock_time_exceeded(ufs_failure_t *); 258 static void pester_msg(ufs_failure_t *, int); 259 static int get_lockfs_status(ufs_failure_t *, struct lockfs *); 260 static void alloc_lockfs_comment(ufs_failure_t *, struct lockfs *); 261 static int set_lockfs(ufs_failure_t *, struct lockfs *); 262 static int lockfs_failure(ufs_failure_t *); 263 static int lockfs_success(ufs_failure_t *); 264 static int fsck_active(ufs_failure_t *); 265 266 /* low-level support routines */ 267 static ufsd_t *get_state_desc(ufs_failure_states_t); 268 static char *fs_name(ufs_failure_t *); 269 270 #if defined(DEBUG) 271 static char *state_name(ufs_failure_states_t); 272 static char *lock_name(struct lockfs *); 273 static char *err_name(int); 274 static char *act_name(ufsa_t); 275 static void dump_uf_list(char *msg); 276 static void dump_uf(ufs_failure_t *, int i); 277 #endif /* DEBUG */ 278 /* 279 * 280 * State Transitions: 281 * 282 * normally: 283 * if flagged to be locked but not unmounted: (UFSMNT_ONERROR_LOCK) 284 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED 285 * 286 * The only difference between these two is that the fsck must be started 287 * manually. 288 * 289 * if flagged to be unmounted: (UFSMNT_ONERROR_UMOUNT) 290 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX 291 * 292 * if flagged to panic: (UFSMNT_ONERROR_PANIC) 293 * UNDEF -> INIT -> PANIC 294 * 295 * if a secondary panic on a file system which has an active failure 296 * record: 297 * UNDEF -> INIT -> QUEUE -> REPLICA 298 * 299 * UNDEF, INIT, QUEUE all are set in the context of the failing thread. 300 * All other states (except possibly PANIC) are set in by the monitor 301 * (lock) thread. 302 * 303 */ 304 305 ufsd_t state_desc[] = 306 { 307 { UF_ILLEGAL, "in an unknown state", sf_minimum, UF_ILLEGAL, 308 { 0, 1, 0 } }, 309 { UF_UNDEF, "undefined", sf_undef, UF_UNDEF, 310 { 0, 1, 0 } }, 311 { UF_INIT, "being initialized", sf_init, UF_UNDEF, 312 { 0, 1, 0 } }, 313 { UF_QUEUE, "queued", sf_queue, UF_INIT, 314 { 0, 1, 0 } }, 315 { UF_TRYLCK, "trying to be locked", sf_nonterm_cmn, 316 UF_QUEUE, { 0, 0, 0 } }, 317 { UF_LOCKED, "locked", sf_nonterm_cmn, 318 UF_TRYLCK | UF_FIXING, { 0, 0, 0 } }, 319 { UF_UMOUNT, "being unmounted", sf_nonterm_cmn, 320 321 #if defined(DEBUG) 322 UF_PANIC | 323 #endif /* DEBUG */ 324 UF_TRYLCK | UF_LOCKED, { 0, 0, 0 } }, 325 { UF_FIXING, "being fixed", sf_nonterm_cmn, 326 UF_LOCKED, { 0, 0, 0 } }, 327 { UF_FIXED, "fixed", sf_term_cmn, 328 UF_FIXING, { 1, 0, 0 } }, 329 { UF_NOTFIX, "not fixed", sf_term_cmn, 330 331 #if defined(DEBUG) 332 UF_PANIC | 333 #endif /* DEBUG */ 334 335 UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING, 336 { 1, 0, 0 } }, 337 { UF_REPLICA, "a replica", sf_term_cmn, 338 UF_QUEUE, { 1, 0, 0 } }, 339 { UF_PANIC, "panicking", sf_panic, 340 /* XXX make this narrower */ UF_ALLSTATES, { 0, 0, 0 } }, 341 { UF_UNDEF, NULL, ((sfrc_t (*)()) NULL), 342 UF_UNDEF, { 0, 0, 0 } } 343 }; 344 345 /* unified collection */ 346 struct ufsfx_info { 347 struct uf_statistics *ufi_statp; 348 struct ufs_failure_tunable *ufi_tunep; 349 ufsd_t *ufi_statetab; 350 } uffsinfo; 351 352 #if defined(DEBUG) 353 struct action_description { 354 ufsa_t ad_v; 355 char *ad_name; 356 }; 357 358 #define EUNK (-1) 359 360 struct error_description { 361 int ed_errno; 362 char *ed_name; 363 } err_desc[] = 364 { 365 { EUNK, "<unexpected errno?>" }, 366 { EINVAL, "EINVAL" }, 367 { EACCES, "EACCES" }, 368 { EPERM, "EPERM" }, 369 { EIO, "EIO" }, 370 { EDEADLK, "EDEADLK" }, 371 { EBUSY, "EBUSY" }, 372 { EAGAIN, "EAGAIN" }, 373 { ERESTART, "ERESTART" }, 374 { ETIMEDOUT, "ETIMEDOUT" }, 375 { NO_ERROR, "Ok" }, 376 { EUNK, NULL } 377 }; 378 379 struct action_description act_desc[] = 380 { 381 { UFA_ERROR, "<unexpected action?>" }, 382 { UFA_FOUND, "\"found\"" }, 383 { UFA_SET, "\"set\"" }, 384 { UFA_ERROR, NULL }, 385 }; 386 387 #define LOCKFS_BADLOCK (-1) 388 389 struct lock_description { 390 int ld_type; 391 char *ld_name; 392 } lock_desc[] = 393 { 394 { LOCKFS_BADLOCK, "<unexpected lock?>" }, 395 { LOCKFS_ULOCK, "Unlock" }, 396 { LOCKFS_ELOCK, "Error Lock" }, 397 { LOCKFS_HLOCK, "Hard Lock" }, 398 { LOCKFS_OLOCK, "Old Lock" }, 399 { LOCKFS_BADLOCK, NULL } 400 }; 401 402 #endif /* DEBUG */ 403 404 /* 405 * ufs_fault, ufs_fault_v 406 * 407 * called instead of cmn_err(CE_PANIC, ...) by ufs routines 408 * when a failure is detected to put the file system into an 409 * error state (if possible) or to devolve to a panic otherwise 410 * 411 * vnode is some vnode in this file system, used to find the way 412 * to ufsvfs, vfsp etc. Since a panic can be called from many 413 * levels, the vnode is the most convenient hook to pass through. 414 * 415 */ 416 417 /*PRINTFLIKE2*/ 418 int 419 ufs_fault(vnode_t *vp, char *fmt, ...) 420 { 421 va_list adx; 422 int error; 423 424 MINOR(("[ufs_fault")); 425 426 va_start(adx, fmt); 427 error = ufs_fault_v(vp, fmt, adx); 428 va_end(adx); 429 430 MINOR((": %s (%d)]\n", err_name(error), error)); 431 return (error); 432 } 433 434 const char *nullfmt = "<null format?>"; 435 436 static int 437 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx) 438 { 439 ufs_failure_t *new = NULL; 440 ufsvfs_t *ufsvfsp; 441 triage_t fix; 442 int err = ERESTART; 443 int need_vfslock; 444 445 MINOR(("[ufs_fault_v")); 446 447 if (fmt == NULL) 448 fmt = (char *)nullfmt; 449 450 fix = triage(vp); 451 452 if (vp) { 453 ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data; 454 455 /* 456 * Something bad has happened. That is why we are here. 457 * 458 * In order for the bad thing to be recorded in the superblock 459 * we need to write to the superblock directly. 460 * In the case that logging is enabled the logging code 461 * would normally intercept our write as a delta to the log, 462 * thus we mark the filesystem FSBAD in any case. 463 */ 464 need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock); 465 466 if (need_vfslock) { 467 mutex_enter(&ufsvfsp->vfs_lock); 468 } 469 470 ufsvfsp->vfs_fs->fs_clean = FSBAD; 471 ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem)); 472 ufsvfsp->vfs_bufp->b_flags &= ~(B_ASYNC | B_READ | 473 B_DONE | B_ERROR | B_DELWRI); 474 475 (void) bdev_strategy(ufsvfsp->vfs_bufp); 476 (void) biowait(ufsvfsp->vfs_bufp); 477 478 if (need_vfslock) { 479 mutex_exit(&ufsvfsp->vfs_lock); 480 } 481 } 482 483 switch (fix) { 484 485 default: 486 case TRIAGE_DEAD: 487 case TRIAGE_NO_SPIRIT: 488 489 real_panic_v(new, fmt, adx); 490 /* LINTED: warning: logical expression always true: op "||" */ 491 ASSERT(DEBUG); 492 err = EAGAIN; 493 494 #if defined(DEBUG) 495 if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) { 496 break; 497 } 498 /* FALLTHROUGH */ 499 500 #else 501 break; 502 503 #endif /* DEBUG */ 504 505 case TRIAGE_ATTEND_TO: 506 507 /* q thread not running yet? */ 508 mutex_enter(&ufs_fix.uq_mutex); 509 if (!ufs_fix.uq_threadp) { 510 mutex_exit(&ufs_fix.uq_mutex); 511 ufs_thread_start(&ufs_fix, ufsfx_thread_fix_failures, 512 NULL); 513 ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK; 514 mutex_enter(&ufs_fix.uq_mutex); 515 } else { 516 MINOR((": fix failure thread already running ")); 517 } 518 519 if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) { 520 mutex_exit(&ufs_fix.uq_mutex); 521 cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault"); 522 } else { 523 mutex_exit(&ufs_fix.uq_mutex); 524 } 525 526 new = init_failure(vp, fmt, adx); 527 if (new != NULL) { 528 queue_failure(new); 529 break; 530 } 531 real_panic_v(new, fmt, adx); 532 break; 533 534 } 535 MINOR(("] ")); 536 return (err); 537 } 538 539 /* 540 * triage() 541 * 542 * Attempt to fix iff: 543 * - the system is not already panicking 544 * - this file system isn't explicitly marked not to be fixed 545 * - we can connect to the user-level daemon 546 * These conditions are detectable later, but if we can determine 547 * them in the failing threads context the core dump may be more 548 * useful. 549 * 550 */ 551 552 static triage_t 553 triage(vnode_t *vp) 554 { 555 struct inode *ip; 556 int need_unlock_vfs; 557 int fs_flags; 558 559 MINUTE(("[triage")); 560 561 if (panicstr) { 562 MINUTE(( 563 ": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr)); 564 return (TRIAGE_DEAD); 565 } 566 567 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) { 568 MINUTE(( 569 ": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n")); 570 return (TRIAGE_DEAD); 571 } 572 573 /* use tryenter and continue no matter what since we're panicky */ 574 need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock); 575 if (need_unlock_vfs) 576 need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock); 577 578 fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags; 579 if (need_unlock_vfs) 580 mutex_exit(&ip->i_ufsvfs->vfs_lock); 581 582 if (fs_flags & UFSFX_PANIC) { 583 MINUTE(( 584 ": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n")); 585 return (TRIAGE_NO_SPIRIT); 586 } 587 588 if (ufs_checkaccton(vp) != 0) { 589 MINUTE(( 590 ": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n")); 591 return (TRIAGE_DEAD); 592 } 593 594 if (ufs_checkswapon(vp) != 0) { 595 MINUTE(( 596 ": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n")); 597 return (TRIAGE_DEAD); 598 } 599 600 MINUTE((": return TRIAGE_ATTEND_TO] ")); 601 return (TRIAGE_ATTEND_TO); 602 } 603 604 /* 605 * init failure 606 * 607 * This routine allocates a failure struct and initializes 608 * it's member elements. 609 * Space is allocated for copies of dynamic identifying fs structures 610 * passed in. Without a much more segmented kernel architecture 611 * this is as protected as we can make it (for now.) 612 */ 613 static ufs_failure_t * 614 init_failure(vnode_t *vp, char *fmt, va_list adx) 615 { 616 ufs_failure_t *new; 617 struct inode *ip; 618 int initialization_worked = 0; 619 int need_vfs_unlock; 620 621 MINOR(("[init_failure")); 622 623 new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP); 624 if (!new) { 625 MINOR((": kmem_zalloc failed]\n")); 626 return (NULL); 627 } 628 629 /* 630 * enough information to make a fix attempt possible? 631 */ 632 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp || 633 !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt) 634 goto errout; 635 636 if (vp->v_type != VREG && vp->v_type != VDIR && 637 vp->v_type != VBLK && vp->v_type != VCHR && 638 vp->v_type != VLNK && vp->v_type != VFIFO && 639 vp->v_type != VSOCK) 640 goto errout; 641 642 if (ip->i_ufsvfs->vfs_root->v_type != VREG && 643 ip->i_ufsvfs->vfs_root->v_type != VDIR && 644 ip->i_ufsvfs->vfs_root->v_type != VBLK && 645 ip->i_ufsvfs->vfs_root->v_type != VCHR && 646 ip->i_ufsvfs->vfs_root->v_type != VLNK && 647 ip->i_ufsvfs->vfs_root->v_type != VFIFO && 648 ip->i_ufsvfs->vfs_root->v_type != VSOCK) 649 goto errout; 650 651 if ((ITOF(ip)->fs_magic != FS_MAGIC) && 652 (ITOF(ip)->fs_magic != MTB_UFS_MAGIC)) 653 goto errout; 654 655 /* intialize values */ 656 657 (void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx); 658 659 new->uf_ufsvfsp = ip->i_ufsvfs; 660 new->uf_vfsp = ip->i_vfs; 661 662 mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL); 663 need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock); 664 665 if (need_vfs_unlock) { 666 if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) { 667 /* 668 * not much alternative here, but we're panicking 669 * already, it couldn't be worse - so just 670 * proceed optimistically and take note. 671 */ 672 mutex_enter(&uf_stats.ufst_mutex); 673 uf_stats.ufst_lock_violations++; 674 mutex_exit(&uf_stats.ufst_mutex); 675 MINOR((": couldn't get vfs lock")) 676 need_vfs_unlock = 0; 677 } 678 } 679 680 if (mutex_tryenter(&new->uf_mutex)) { 681 initialization_worked = set_state(new, UF_INIT); 682 mutex_exit(&new->uf_mutex); 683 } 684 685 if (need_vfs_unlock) 686 mutex_exit(&ip->i_ufsvfs->vfs_lock); 687 688 if (initialization_worked) { 689 MINOR(("] ")); 690 return (new); 691 } 692 /* FALLTHROUGH */ 693 694 errout: 695 if (new) 696 kmem_free(new, sizeof (ufs_failure_t)); 697 MINOR((": failed]\n")); 698 return (NULL); 699 } 700 701 static void 702 queue_failure(ufs_failure_t *new) 703 { 704 MINOR(("[queue_failure")); 705 706 mutex_enter(&ufs_fix.uq_mutex); 707 708 if (ufs_fix.uq_ufhead) 709 insque(new, &ufs_fix.uq_ufhead); 710 else 711 ufs_fix.uq_ufhead = new; 712 713 if (mutex_tryenter(&new->uf_mutex)) { 714 (void) set_state(new, UF_QUEUE); 715 mutex_exit(&new->uf_mutex); 716 } 717 718 mutex_enter(&uf_stats.ufst_mutex); /* force wakeup */ 719 ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed; 720 mutex_exit(&uf_stats.ufst_mutex); 721 722 cv_broadcast(&ufs_fix.uq_cv); 723 724 DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str? 725 new->uf_panic_str: 726 "queue_failure: NULL panic str?")); 727 mutex_exit(&ufs_fix.uq_mutex); 728 729 MINOR(("] ")); 730 } 731 732 /*PRINTFLIKE2*/ 733 static void 734 real_panic(ufs_failure_t *f, const char *fmt, ...) 735 { 736 va_list adx; 737 738 MINUTE(("[real_panic ")); 739 740 va_start(adx, fmt); 741 real_panic_v(f, fmt, adx); 742 va_end(adx); 743 744 MINUTE((": return?!]\n")); 745 } 746 747 static void 748 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx) 749 { 750 int seriousness = CE_PANIC; 751 int need_unlock; 752 753 MINUTE(("[real_panic_v ")); 754 755 if (f && f->uf_ufsvfsp) 756 TRANS_SETERROR(f->uf_ufsvfsp); 757 758 #if defined(DEBUG) 759 if (DEBUG_FLAGS & DBGFLG_NOPANIC) { 760 seriousness = CE_WARN; 761 cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n"); 762 } 763 #endif /* DEBUG */ 764 765 delay(hz >> 1); /* allow previous warnings to get out */ 766 767 if (!f && fmt) 768 vcmn_err(seriousness, fmt, adx); 769 else 770 cmn_err(seriousness, f && f->uf_panic_str? f->uf_panic_str: 771 "real_panic: <unknown panic?>"); 772 773 if (f) { 774 need_unlock = !MUTEX_HELD(&f->uf_mutex); 775 if (need_unlock) { 776 mutex_enter(&f->uf_mutex); 777 } 778 779 f->uf_retry = -1; 780 (void) set_state(f, UF_PANIC); 781 782 if (need_unlock) { 783 mutex_exit(&f->uf_mutex); 784 } 785 } 786 MINUTE((": return?!]\n")); 787 } 788 789 /* 790 * initializes ufs panic structs, locks, etc 791 */ 792 void 793 ufsfx_init(void) 794 { 795 796 MINUTE(("[ufsfx_init")); 797 798 /* patchable; unchanged while running, so no lock is needed */ 799 ufsfx_tune.uft_too_long = UF_TOO_LONG; 800 ufsfx_tune.uft_fixstart_period = UF_FIXSTART_PERIOD; 801 ufsfx_tune.uft_fixpoll_period = UF_FIXPOLL_PERIOD; 802 ufsfx_tune.uft_short_err_period = UF_SHORT_ERROR_PERIOD; 803 ufsfx_tune.uft_long_err_period = UF_LONG_ERROR_PERIOD; 804 805 uffsinfo.ufi_statp = &uf_stats; 806 uffsinfo.ufi_tunep = &ufsfx_tune; 807 uffsinfo.ufi_statetab = &state_desc[0]; 808 809 mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL); 810 ufs_thread_init(&ufs_fix, /* maxne */ 1); 811 812 MINUTE(("] ")); 813 } 814 815 /* 816 * initializes per-ufs values 817 * returns 0 (ok) or errno 818 */ 819 int 820 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags) 821 { 822 MINUTE(("[ufsfx_mount (%d)", flags)); 823 /* don't check/need vfs_lock because it's still being initialized */ 824 825 ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4; 826 827 MINUTE((": %s: fx_flags:%ld,", 828 ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags)); 829 /* 830 * onerror={panic ^ lock only ^ unmount} 831 */ 832 833 if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) { 834 MINUTE((" PANIC")); 835 836 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) { 837 MINUTE((" LCKONLY")); 838 839 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) { 840 MINUTE((" LCKUMOUNT")); 841 842 } else { 843 ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT; 844 ASSERT(ufsvfsp->vfs_fsfx.fx_flags & 845 (UFSMNT_ONERROR_FLGMASK >> 4)); 846 MINUTE((" DEFAULT")); 847 } 848 849 pollwakeup(&ufs_pollhd, POLLPRI); 850 MINUTE(("]\n")); 851 return (0); 852 } 853 854 /* 855 * ufsfx_unmount 856 * 857 * called during unmount 858 */ 859 void 860 ufsfx_unmount(struct ufsvfs *ufsvfsp) 861 { 862 ufs_failure_t *f; 863 int must_unlock_list; 864 865 MINUTE(("[ufsfx_unmount")); 866 867 if (!ufsvfsp) { 868 MINUTE((": no ufsvfsp]")); 869 return; 870 } 871 872 if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0) 873 mutex_enter(&ufs_fix.uq_mutex); 874 875 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 876 int must_unlock_failure; 877 878 must_unlock_failure = !MUTEX_HELD(&f->uf_mutex); 879 if (must_unlock_failure) { 880 mutex_enter(&f->uf_mutex); 881 } 882 883 if (f->uf_ufsvfsp == ufsvfsp) { 884 885 /* 886 * if we owned the failure record lock, then this 887 * is probably a fix failure-triggered unmount, so 888 * the warning is not appropriate or needed 889 */ 890 891 /* XXX if rebooting don't print this? */ 892 if (!terminal_state(f->uf_s) && must_unlock_failure) { 893 cmn_err(CE_WARN, 894 "Unmounting %s while error-locked", 895 fs_name(f)); 896 } 897 898 f->uf_ufsvfsp = NULL; 899 f->uf_vfs_ufsfxp = NULL; 900 f->uf_vfs_lockp = NULL; 901 f->uf_bp = NULL; 902 f->uf_vfsp = NULL; 903 f->uf_retry = -1; 904 } 905 906 if (must_unlock_failure) 907 mutex_exit(&f->uf_mutex); 908 } 909 if (must_unlock_list) 910 mutex_exit(&ufs_fix.uq_mutex); 911 912 pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP); 913 MINUTE(("] ")); 914 } 915 916 /* 917 * ufsfx_(un)lockfs 918 * 919 * provides hook from lockfs code so we can recognize unlock/relock 920 * This is called after it is certain that the (un)lock will succeed. 921 */ 922 void 923 ufsfx_unlockfs(struct ufsvfs *ufsvfsp) 924 { 925 ufs_failure_t *f; 926 int need_unlock; 927 int need_unlock_list; 928 int informed = 0; 929 930 MINUTE(("[ufsfx_unlockfs")); 931 932 if (!ufsvfsp) 933 return; 934 935 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex); 936 937 if (need_unlock_list) 938 mutex_enter(&ufs_fix.uq_mutex); 939 940 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 941 942 need_unlock = !MUTEX_HELD(&f->uf_mutex); 943 if (need_unlock) 944 mutex_enter(&f->uf_mutex); 945 946 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) { 947 if (!(f->uf_s & UF_FIXING)) { 948 /* 949 * This might happen if we don't notice that 950 * the fs gets marked FSFIX before it is 951 * marked FSCLEAN, as might occur if the 952 * the superblock was hammered directly. 953 */ 954 if (!informed) { 955 informed = 1; 956 cmn_err(CE_NOTE, 957 "Unlock of %s succeeded before fs_clean marked FSFIX?", 958 fs_name(f)); 959 } 960 961 /* 962 * pass through fixing state so 963 * transition protocol is satisfied 964 */ 965 if (!set_state(f, UF_FIXING)) { 966 MINUTE((": failed] ")); 967 } 968 } 969 970 if (!set_state(f, UF_FIXED)) { 971 /* it's already fixed, so don't panic now */ 972 MINUTE((": failed] ")); 973 } 974 } 975 976 if (need_unlock) 977 mutex_exit(&f->uf_mutex); 978 } 979 if (need_unlock_list) 980 mutex_exit(&ufs_fix.uq_mutex); 981 MINUTE(("] ")); 982 } 983 984 void 985 ufsfx_lockfs(struct ufsvfs *ufsvfsp) 986 { 987 ufs_failure_t *f; 988 int need_unlock; 989 int need_unlock_list; 990 991 MINUTE(("[ufsfx_lockfs")); 992 993 if (!ufsvfsp) 994 return; 995 996 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex); 997 998 if (need_unlock_list) 999 mutex_enter(&ufs_fix.uq_mutex); 1000 1001 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 1002 1003 need_unlock = !MUTEX_HELD(&f->uf_mutex); 1004 if (need_unlock) 1005 mutex_enter(&f->uf_mutex); 1006 1007 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) && 1008 f->uf_s != UF_PANIC) { 1009 switch (f->uf_s) { 1010 1011 default: 1012 cmn_err(CE_WARN, 1013 "fs %s not in state UF_TRYLCK, UF_LOCKED or UF_FIXING", 1014 fs_name(f)); 1015 break; 1016 1017 case UF_TRYLCK: 1018 if (!set_state(f, UF_LOCKED)) { 1019 MINUTE((": failed] ")); 1020 } 1021 break; 1022 1023 case UF_LOCKED: 1024 if (!set_state(f, UF_FIXING)) { 1025 MINUTE((": failed] ")); 1026 } 1027 break; 1028 1029 case UF_FIXING: 1030 break; 1031 1032 } 1033 } 1034 1035 if (need_unlock) 1036 mutex_exit(&f->uf_mutex); 1037 } 1038 if (need_unlock_list) 1039 mutex_exit(&ufs_fix.uq_mutex); 1040 1041 MINUTE(("] ")); 1042 } 1043 1044 /* 1045 * error lock, trigger fsck and unlock those fs with failures 1046 * blatantly copied from the hlock routine, although this routine 1047 * triggers differently in order to use uq_ne as meaningful data. 1048 */ 1049 /* ARGSUSED */ 1050 void 1051 ufsfx_thread_fix_failures(void *ignored) 1052 { 1053 int retry; 1054 callb_cpr_t cprinfo; 1055 1056 CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr, 1057 "ufsfixfail"); 1058 1059 MINUTE(("[ufsfx_thread_fix_failures] ")); 1060 1061 for (;;) { 1062 /* sleep until there is work to do */ 1063 1064 mutex_enter(&ufs_fix.uq_mutex); 1065 (void) ufs_thread_run(&ufs_fix, &cprinfo); 1066 ufs_fix.uq_ne = 0; 1067 mutex_exit(&ufs_fix.uq_mutex); 1068 1069 /* process failures on our q */ 1070 do { 1071 retry = ufsfx_do_failure_q(); 1072 if (retry) { 1073 mutex_enter(&ufs_fix.uq_mutex); 1074 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1075 (void) cv_timedwait(&ufs_fix.uq_cv, 1076 &ufs_fix.uq_mutex, 1077 lbolt + (hz * retry)); 1078 CALLB_CPR_SAFE_END(&cprinfo, 1079 &ufs_fix.uq_mutex); 1080 mutex_exit(&ufs_fix.uq_mutex); 1081 } 1082 } while (retry); 1083 } 1084 /* NOTREACHED */ 1085 } 1086 1087 1088 /* 1089 * watch for fix-on-panic work 1090 * 1091 * returns # of seconds to sleep before trying again 1092 * and zero if no retry is needed 1093 */ 1094 1095 int 1096 ufsfx_do_failure_q(void) 1097 { 1098 ufs_failure_t *f; 1099 long retry = 1; 1100 ufsd_t *s; 1101 1102 MAJOR(("[ufsfx_do_failure_q")); 1103 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL)); 1104 1105 if (!mutex_tryenter(&ufs_fix.uq_mutex)) 1106 return (retry); 1107 1108 retry = 0; 1109 rescan_q: 1110 1111 /* 1112 * walk down failure list 1113 * depending on state of each failure, do whatever 1114 * is appropriate to move it to the next state 1115 * taking note of whether retry gets set 1116 * 1117 * retry protocol: 1118 * wakeup in shortest required time for any failure 1119 * retry == 0; nothing more to do (terminal state) 1120 * retry < 0; reprocess queue immediately, retry will 1121 * be abs(retry) for the next cycle 1122 * retry > 0; schedule wakeup for retry seconds 1123 */ 1124 1125 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 1126 1127 if (!mutex_tryenter(&f->uf_mutex)) { 1128 retry = 1; 1129 continue; 1130 } 1131 s = get_state_desc(f->uf_s); 1132 1133 MINOR((": found%s: %s, \"%s: %s\"\n", 1134 s->ud_attr.terminal? " old": "", 1135 fs_name(f), state_name(f->uf_s), f->uf_panic_str)); 1136 1137 if (s->ud_attr.terminal) { 1138 mutex_exit(&f->uf_mutex); 1139 continue; 1140 } 1141 1142 if (s->ud_sfp) 1143 (*s->ud_sfp)(f, UFA_FOUND, f->uf_s); 1144 1145 ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0); 1146 1147 if (f->uf_retry != 0) { 1148 if (retry > f->uf_retry || retry == 0) 1149 retry = f->uf_retry; 1150 if (f->uf_retry < 0) 1151 f->uf_retry = abs(f->uf_retry); 1152 } 1153 mutex_exit(&f->uf_mutex); 1154 } 1155 1156 1157 if (retry < 0) { 1158 retry = abs(retry); 1159 goto rescan_q; 1160 } 1161 1162 mutex_exit(&ufs_fix.uq_mutex); 1163 1164 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL)); 1165 MAJOR((": retry=%ld, good night]\n\n", retry)); 1166 1167 return (retry); 1168 } 1169 1170 static void 1171 pester_msg(ufs_failure_t *f, int seriousness) 1172 { 1173 MINUTE(("[pester_msg")); 1174 ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING)); 1175 1176 /* 1177 * XXX if seems too long for this fs, poke administrator 1178 * XXX to run fsck manually (and change retry time?) 1179 */ 1180 cmn_err(seriousness, 1181 "Waiting for repair of %s to %s", 1182 fs_name(f), 1183 f->uf_s & UF_LOCKED? "start": "finish"); 1184 MINUTE(("]")); 1185 } 1186 1187 static time_t 1188 trylock_time_exceeded(ufs_failure_t *f) 1189 { 1190 time_t toolong; 1191 extern time_t time; 1192 1193 MINUTE(("[trylock_time_exceeded")); 1194 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1195 1196 toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm; 1197 if (time > toolong) 1198 cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f)); 1199 1200 MINUTE(("] ")); 1201 return (time <= toolong? 0: time - toolong); 1202 } 1203 1204 static int 1205 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp) 1206 { 1207 MINUTE(("[get_lockfs_status")); 1208 1209 if (!f->uf_ufsvfsp) { 1210 MINUTE((": ufsvfsp is NULL]\n")); 1211 return (0); 1212 } 1213 1214 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1215 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1216 ASSERT(!vfs_lock_held(f->uf_vfsp)); 1217 ASSERT(f->uf_ufsvfsp->vfs_root != NULL); 1218 1219 f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp); 1220 1221 if (f->uf_lf_err) { 1222 f->uf_retry = ufsfx_tune.uft_short_err_period; 1223 } 1224 1225 MINUTE(("] ")); 1226 return (1); 1227 } 1228 1229 static sfrc_t 1230 set_state(ufs_failure_t *f, ufs_failure_states_t new_state) 1231 { 1232 ufsd_t *s; 1233 sfrc_t sfrc = SFRC_FAIL; 1234 int need_unlock; 1235 extern time_t time; 1236 1237 HIDEOUS(("[set_state: new state:%s", state_name(new_state))); 1238 ASSERT(f); 1239 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1240 1241 /* 1242 * if someone else is panicking, just let panic sync proceed 1243 */ 1244 if (panicstr) { 1245 (void) set_state(f, UF_NOTFIX); 1246 HIDEOUS((": state reset: not fixed] ")); 1247 return (sfrc); 1248 } 1249 1250 /* 1251 * bad state transition, an internal error 1252 */ 1253 if (!state_trans_valid(f->uf_s, new_state)) { 1254 /* recursion */ 1255 if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC)) 1256 (void) set_state(f, UF_PANIC); 1257 MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ", 1258 state_name(f->uf_s), state_name(new_state))); 1259 return (sfrc); 1260 } 1261 1262 s = get_state_desc(new_state); 1263 1264 need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex); 1265 if (need_unlock) 1266 mutex_enter(&ufs_fix.uq_mutex); 1267 1268 if (s->ud_attr.at_fail && ufs_fix.uq_threadp && 1269 curthread == ufs_fix.uq_threadp) { 1270 cmn_err(CE_WARN, "set_state: probable recursive panic of %s", 1271 fs_name(f)); 1272 } 1273 if (need_unlock) 1274 mutex_exit(&ufs_fix.uq_mutex); 1275 1276 /* NULL state functions always succeed */ 1277 sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state); 1278 1279 if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) { 1280 f->uf_s = new_state; 1281 f->uf_entered_tm = time; 1282 f->uf_counter = 0; 1283 } 1284 1285 HIDEOUS(("]\n")); 1286 return (sfrc); 1287 } 1288 1289 static ufsd_t * 1290 get_state_desc(ufs_failure_states_t state) 1291 { 1292 ufsd_t *s; 1293 1294 HIDEOUS(("[get_state_desc")); 1295 1296 for (s = &state_desc[1]; s->ud_name != NULL; s++) { 1297 if (s->ud_v == state) { 1298 HIDEOUS(("] ")); 1299 return (s); 1300 } 1301 } 1302 1303 HIDEOUS(("] ")); 1304 return (&state_desc[0]); /* default */ 1305 } 1306 1307 static sfrc_t 1308 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1309 { 1310 sfrc_t rc; 1311 1312 TRIVIA(("[sf_undef, action is %s, state is %s\n", 1313 act_name(a), state_name(s))); 1314 ASSERT(s == UF_UNDEF); 1315 1316 /* shouldn't find null failure records or ever set one */ 1317 rc = set_state(f, UF_NOTFIX); 1318 1319 TRIVIA(("] ")); 1320 return (rc); 1321 } 1322 1323 1324 static sfrc_t 1325 sf_init( 1326 ufs_failure_t *f, 1327 ufsa_t a, 1328 ufs_failure_states_t s) 1329 { 1330 sfrc_t rc = SFRC_FAIL; 1331 extern time_t time; 1332 1333 TRIVIA(("[sf_init, action is %s", act_name(a))); 1334 ASSERT(s & UF_INIT); 1335 1336 switch (a) { 1337 case UFA_SET: 1338 f->uf_begin_tm = time; 1339 f->uf_retry = 1; 1340 if (!f->uf_ufsvfsp) { 1341 (void) set_state(f, UF_PANIC); 1342 TRIVIA((": NULL ufsvfsp]\n")); 1343 return (rc); 1344 } 1345 /* 1346 * because we can call panic from many different levels, 1347 * we can't be sure that we've got the vfs_lock at this 1348 * point. However, there's not much alternative and if 1349 * we don't (have the lock) the worst case is we'll just 1350 * panic again 1351 */ 1352 f->uf_vfs_lockp = &f->uf_ufsvfsp->vfs_lock; 1353 f->uf_vfs_ufsfxp = &f->uf_ufsvfsp->vfs_fsfx; 1354 1355 if (!f->uf_ufsvfsp->vfs_bufp) { 1356 (void) set_state(f, UF_PANIC); 1357 TRIVIA((": NULL vfs_bufp]\n")); 1358 return (rc); 1359 } 1360 f->uf_bp = f->uf_ufsvfsp->vfs_bufp; 1361 1362 if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) { 1363 (void) set_state(f, UF_PANIC); 1364 TRIVIA((": NULL vfs_fs]\n")); 1365 return (rc); 1366 } 1367 1368 /* vfs_fs = vfs_bufp->b_un.b_fs */ 1369 bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN); 1370 1371 f->uf_lf.lf_lock = LOCKFS_ELOCK; /* primer */ 1372 1373 if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) { 1374 (void) set_state(f, UF_PANIC); 1375 TRIVIA((": NULL vfsp or vfs_dev == NODEV")); 1376 return (rc); 1377 } 1378 f->uf_dev = f->uf_vfsp->vfs_dev; 1379 1380 rc = SFRC_SUCCESS; 1381 break; 1382 1383 case UFA_FOUND: 1384 default: 1385 /* failures marked init shouldn't even be on the queue yet */ 1386 rc = set_state(f, UF_QUEUE); 1387 TRIVIA((": found failure with state init]\n")); 1388 } 1389 1390 TRIVIA(("] ")); 1391 return (rc); 1392 } 1393 1394 static sfrc_t 1395 sf_queue( 1396 ufs_failure_t *f, 1397 ufsa_t a, 1398 ufs_failure_states_t s) 1399 { 1400 sfrc_t rc = SFRC_FAIL; 1401 1402 TRIVIA(("[sf_queue, action is %s", act_name(a))); 1403 ASSERT(s & UF_QUEUE); 1404 1405 if (!f->uf_ufsvfsp) { 1406 TRIVIA((": NULL ufsvfsp]\n")); 1407 return (rc); 1408 } 1409 1410 switch (a) { 1411 case UFA_FOUND: 1412 rc = sf_found_queue(f); 1413 break; 1414 1415 case UFA_SET: 1416 1417 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex)); 1418 1419 mutex_enter(&uf_stats.ufst_mutex); 1420 uf_stats.ufst_num_failed++; 1421 mutex_exit(&uf_stats.ufst_mutex); 1422 1423 /* 1424 * if can't get the vfs lock, just wait until 1425 * UF_TRYLCK to set fx_current 1426 */ 1427 if (mutex_tryenter(f->uf_vfs_lockp)) { 1428 f->uf_vfs_ufsfxp->fx_current = f; 1429 mutex_exit(f->uf_vfs_lockp); 1430 } else { 1431 mutex_enter(&uf_stats.ufst_mutex); 1432 uf_stats.ufst_current_races++; 1433 mutex_exit(&uf_stats.ufst_mutex); 1434 } 1435 1436 f->uf_retry = 1; 1437 rc = SFRC_SUCCESS; 1438 TRIVIA(("] ")); 1439 break; 1440 1441 default: 1442 (void) set_state(f, UF_PANIC); 1443 TRIVIA((": failed] ")); 1444 } 1445 1446 return (rc); 1447 } 1448 1449 static sfrc_t 1450 sf_found_queue(ufs_failure_t *f) 1451 { 1452 int replica; 1453 sfrc_t rc = SFRC_FAIL; 1454 1455 TRIVIA(("[sf_found_queue")); 1456 1457 /* 1458 * don't need to check for null ufsvfsp because 1459 * unmount must own list's ufs_fix.uq_mutex 1460 * to mark it null and we own that lock since 1461 * we got here. 1462 */ 1463 1464 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex)); 1465 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1466 1467 if (!mutex_tryenter(f->uf_vfs_lockp)) { 1468 TRIVIA((": tryenter(vfslockp) failed; retry]\n")); 1469 f->uf_retry = 1; 1470 return (rc); 1471 } 1472 1473 replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL && 1474 f->uf_vfs_ufsfxp->fx_current != f && 1475 !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s); 1476 1477 /* 1478 * copy general flags to this ufs_failure so we don't 1479 * need to refer back to the ufsvfs, or, more importantly, 1480 * don't need to keep acquiring (trying to acquire) vfs_lockp 1481 * 1482 * The most restrictive option wins: 1483 * panic > errlock only > errlock+unmount > repair 1484 * XXX panic > elock > elock > elock+umount 1485 */ 1486 if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) { 1487 if (!set_state(f, UF_PANIC)) { 1488 TRIVIA((": marked panic but was queued?")); 1489 real_panic(f, " "); 1490 /*NOTREACHED*/ 1491 } 1492 mutex_exit(f->uf_vfs_lockp); 1493 return (rc); 1494 } 1495 f->uf_flags = f->uf_vfs_ufsfxp->fx_flags; 1496 1497 if (replica) { 1498 if (!set_state(f, UF_REPLICA)) { 1499 f->uf_retry = 1; 1500 TRIVIA((": set to replica failed] ")); 1501 } else { 1502 TRIVIA(("] ")); 1503 } 1504 mutex_exit(f->uf_vfs_lockp); 1505 return (rc); 1506 } 1507 mutex_exit(f->uf_vfs_lockp); 1508 1509 if (!set_state(f, UF_TRYLCK)) { 1510 TRIVIA((": failed] ")); 1511 } else { 1512 rc = SFRC_SUCCESS; 1513 } 1514 return (rc); 1515 } 1516 1517 static sfrc_t 1518 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1519 { 1520 sfrc_t rc = SFRC_FAIL; 1521 1522 TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s))); 1523 ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING)); 1524 ASSERT(!terminal_state(s)); 1525 1526 if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) { 1527 TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n")); 1528 (void) set_state(f, UF_NOTFIX); 1529 return (rc); 1530 } 1531 1532 switch (a) { 1533 case UFA_SET: 1534 switch (s) { 1535 case UF_TRYLCK: 1536 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1537 rc = sf_set_trylck(f); 1538 break; 1539 1540 case UF_LOCKED: 1541 rc = sf_set_locked(f); 1542 break; 1543 1544 case UF_FIXING: 1545 f->uf_flags |= UFSFX_REPAIR_START; 1546 f->uf_retry = ufsfx_tune.uft_fixpoll_period; 1547 rc = SFRC_SUCCESS; 1548 break; 1549 1550 case UF_UMOUNT: 1551 f->uf_retry = -ufsfx_tune.uft_short_err_period; 1552 rc = SFRC_SUCCESS; 1553 break; 1554 1555 default: 1556 (void) set_state(f, UF_PANIC); 1557 TRIVIA((": failed] ")); 1558 } 1559 break; 1560 1561 case UFA_FOUND: 1562 1563 switch (s) { 1564 case UF_TRYLCK: 1565 rc = sf_found_trylck(f); 1566 break; 1567 1568 case UF_LOCKED: 1569 case UF_FIXING: 1570 rc = sf_found_lock_fix_cmn(f, s); 1571 break; 1572 1573 case UF_UMOUNT: 1574 rc = sf_found_umount(f); 1575 break; 1576 1577 default: 1578 (void) set_state(f, UF_PANIC); 1579 TRIVIA((": failed] ")); 1580 break; 1581 } 1582 break; 1583 default: 1584 (void) set_state(f, UF_PANIC); 1585 TRIVIA((": failed] ")); 1586 break; 1587 } 1588 1589 TRIVIA(("] ")); 1590 return (rc); 1591 } 1592 1593 static sfrc_t 1594 sf_set_trylck(ufs_failure_t *f) 1595 { 1596 TRIVIA(("[sf_set_trylck")); 1597 1598 if (!mutex_tryenter(f->uf_vfs_lockp)) { 1599 TRIVIA((": tryenter(vfslockp) failed; retry]\n")); 1600 f->uf_retry = 1; 1601 return (SFRC_FAIL); 1602 } 1603 1604 if (!f->uf_vfs_ufsfxp->fx_current) 1605 f->uf_vfs_ufsfxp->fx_current = f; 1606 1607 mutex_exit(f->uf_vfs_lockp); 1608 1609 f->uf_lf.lf_flags = 0; 1610 f->uf_lf.lf_lock = LOCKFS_ELOCK; 1611 f->uf_retry = -ufsfx_tune.uft_fixstart_period; 1612 TRIVIA(("] ")); 1613 return (SFRC_SUCCESS); 1614 } 1615 1616 static sfrc_t 1617 sf_found_trylck(ufs_failure_t *f) 1618 { 1619 struct lockfs lockfs_status; 1620 1621 TRIVIA(("[sf_found_trylck")); 1622 1623 if (trylock_time_exceeded(f) > 0) { 1624 (void) set_state(f, UF_PANIC); 1625 TRIVIA((": failed] ")); 1626 return (SFRC_FAIL); 1627 } 1628 1629 if (!get_lockfs_status(f, &lockfs_status)) { 1630 (void) set_state(f, UF_PANIC); 1631 TRIVIA((": failed] ")); 1632 return (SFRC_FAIL); 1633 } 1634 1635 if (f->uf_lf_err == NO_ERROR) 1636 f->uf_lf.lf_key = lockfs_status.lf_key; 1637 1638 if (!set_lockfs(f, &lockfs_status)) { 1639 (void) set_state(f, UF_PANIC); 1640 TRIVIA((": failed] ")); 1641 return (SFRC_FAIL); 1642 } 1643 TRIVIA(("] ")); 1644 return (SFRC_SUCCESS); 1645 } 1646 1647 static sfrc_t 1648 sf_set_locked(ufs_failure_t *f) 1649 { 1650 TRIVIA(("[sf_set_locked")); 1651 1652 f->uf_retry = -ufsfx_tune.uft_fixstart_period; 1653 1654 #if defined(DEBUG) 1655 if (f->uf_flags & UFSFX_REPAIR_START) 1656 TRIVIA(("clearing UFSFX_REPAIR_START ")); 1657 #endif /* DEBUG */ 1658 1659 f->uf_flags &= ~UFSFX_REPAIR_START; 1660 1661 if (f->uf_s & UF_TRYLCK) { 1662 cmn_err(CE_WARN, "Error-locked %s: \"%s\"", 1663 fs_name(f), f->uf_panic_str); 1664 1665 if (f->uf_flags & UFSFX_LCKONLY) 1666 cmn_err(CE_WARN, "Manual repair of %s required", 1667 fs_name(f)); 1668 } 1669 1670 /* 1671 * just reset to current state 1672 */ 1673 #if defined(DEBUG) 1674 TRIVIA(("locked->locked ")); 1675 #endif /* DEBUG */ 1676 1677 TRIVIA(("] ")); 1678 return (SFRC_SUCCESS); 1679 } 1680 1681 static sfrc_t 1682 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s) 1683 { 1684 time_t toolong; 1685 extern time_t time; 1686 struct buf *bp = NULL; 1687 struct fs *dfs; 1688 time_t concerned, anxious; 1689 sfrc_t rc = SFRC_FAIL; 1690 ulong_t gb_size; 1691 1692 TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s))); 1693 1694 if (s & UF_LOCKED) { 1695 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1696 1697 toolong = time > (ufsfx_tune.uft_too_long + 1698 f->uf_entered_tm); 1699 TRIVIA(("%stoolong", !toolong? "not": "")); 1700 HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ", 1701 time, ufsfx_tune.uft_too_long, f->uf_entered_tm)); 1702 1703 if (f->uf_flags & UFSFX_LCKUMOUNT) { 1704 if (set_state(f, UF_UMOUNT)) { 1705 TRIVIA(("] ")); 1706 rc = SFRC_SUCCESS; 1707 } else { 1708 TRIVIA((": failed] ")); 1709 f->uf_retry = 1; 1710 } 1711 return (rc); 1712 } 1713 if (!toolong) { 1714 rc = SFRC_SUCCESS; 1715 } else { 1716 if (!(f->uf_flags & UFSFX_REPAIR_START)) { 1717 cmn_err(CE_WARN, "%s repair of %s not started.", 1718 (f->uf_flags & UFSFX_LCKONLY)? 1719 "Manual": "Automatic", 1720 fs_name(f)); 1721 1722 f->uf_retry = ufsfx_tune.uft_long_err_period; 1723 } else { 1724 f->uf_retry = ufsfx_tune.uft_long_err_period; 1725 cmn_err(CE_WARN, 1726 "Repair of %s is not timely; operator attention is required.", 1727 fs_name(f)); 1728 } 1729 TRIVIA(("] ")); 1730 return (rc); 1731 } 1732 } 1733 1734 #if defined(DEBUG) 1735 else { 1736 ASSERT(s & UF_FIXING); 1737 } 1738 #endif /* DEBUG */ 1739 1740 /* 1741 * get on disk superblock; force it to really 1742 * come from the disk 1743 */ 1744 (void) bfinval(f->uf_dev, 0); 1745 bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE); 1746 if (bp) { 1747 bp->b_flags |= (B_STALE | B_AGE); 1748 dfs = bp->b_un.b_fs; 1749 } 1750 1751 if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) && 1752 (dfs->fs_magic != MTB_UFS_MAGIC))) { 1753 TRIVIA((": UFS_BREAD(SBLOCK) failed]\n")); 1754 f->uf_retry = 1; 1755 goto out; 1756 } 1757 1758 /* fsck started but we haven't noticed yet? */ 1759 if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) { 1760 if (!set_state(f, UF_FIXING)) { 1761 TRIVIA((": failed]\n")); 1762 f->uf_retry = 1; 1763 goto out; 1764 } 1765 } 1766 1767 /* fsck started but didn't succeed? */ 1768 if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) { 1769 TRIVIA((": fs_clean: %d", (int)dfs->fs_clean)); 1770 (void) set_state(f, UF_LOCKED); 1771 cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f)); 1772 f->uf_retry = ufsfx_tune.uft_long_err_period; 1773 goto out; 1774 } 1775 1776 gb_size = (dfs->fs_size * dfs->fs_bshift) / GB; 1777 toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig); 1778 1779 /* fsck started but doesn't seem to be proceeding? */ 1780 if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) { 1781 if (time > f->uf_entered_tm + toolong) { 1782 1783 cmn_err(CE_WARN, 1784 "Repair completion timeout exceeded on %s; manual fsck may be required", 1785 fs_name(f)); 1786 f->uf_retry = ufsfx_tune.uft_long_err_period; 1787 } 1788 } 1789 1790 concerned = f->uf_entered_tm + (toolong / 3); 1791 anxious = f->uf_entered_tm + ((2 * toolong) / 3); 1792 1793 if (time > concerned) 1794 pester_msg(f, time > anxious? CE_WARN: CE_NOTE); 1795 1796 TRIVIA(("] ")); 1797 1798 out: 1799 if (bp) 1800 brelse(bp); 1801 1802 return (rc); 1803 } 1804 1805 static sfrc_t 1806 sf_found_umount(ufs_failure_t *f) 1807 { 1808 extern time_t time; 1809 sfrc_t rc = SFRC_FAIL; 1810 struct vfs *vfsp = f->uf_vfsp; 1811 struct ufsvfs *ufsvfsp = f->uf_ufsvfsp; 1812 int toolong = 0; 1813 int err = 0; 1814 1815 TRIVIA(("[sf_found_umount")); 1816 1817 toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm; 1818 if (toolong) { 1819 TRIVIA((": unmount time limit exceeded] ")); 1820 goto out; 1821 } 1822 1823 if (!vfsp || !ufsvfsp) { /* trivial case */ 1824 TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] ")); 1825 goto out; 1826 } 1827 1828 if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) { 1829 TRIVIA((": !not error locked?")); 1830 err = EINVAL; 1831 goto out; 1832 } 1833 1834 /* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */ 1835 if (vn_vfslock(vfsp->vfs_vnodecovered)) { 1836 TRIVIA((": couldn't lock coveredvp")); 1837 err = EBUSY; 1838 goto out; 1839 } 1840 1841 if ((err = dounmount(vfsp, 0, kcred)) != 0) { 1842 1843 /* take note, but not many alternatives here */ 1844 mutex_enter(&uf_stats.ufst_mutex); 1845 uf_stats.ufst_unmount_failures++; 1846 mutex_exit(&uf_stats.ufst_mutex); 1847 1848 TRIVIA((": unmount failed] ")); 1849 } else { 1850 cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f)); 1851 } 1852 1853 out: 1854 if (toolong || (err != EBUSY && err != EAGAIN)) 1855 rc = set_state(f, UF_NOTFIX); 1856 1857 TRIVIA(("] ")); 1858 return (rc); 1859 } 1860 1861 static sfrc_t 1862 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1863 { 1864 extern time_t time; 1865 sfrc_t rc = SFRC_FAIL; 1866 1867 TRIVIA(("[sf_term_cmn, action is %s, state is %s", 1868 act_name(a), state_name(s))); 1869 ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA)); 1870 ASSERT(terminal_state(s)); 1871 1872 if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) { 1873 TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n")); 1874 return (rc); 1875 } 1876 1877 switch (a) { 1878 case UFA_SET: 1879 switch (s) { 1880 case UF_NOTFIX: 1881 case UF_FIXED: 1882 { int need_lock_vfs; 1883 1884 if (f->uf_ufsvfsp && f->uf_vfs_lockp) 1885 need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp); 1886 else 1887 need_lock_vfs = 0; 1888 1889 if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) { 1890 TRIVIA((": tryenter(vfslockp) fail; retry]\n")); 1891 f->uf_retry = 1; 1892 break; 1893 } 1894 1895 f->uf_end_tm = time; 1896 f->uf_lf.lf_lock = LOCKFS_OLOCK; 1897 f->uf_retry = 0; 1898 1899 if (f->uf_vfs_ufsfxp) 1900 f->uf_vfs_ufsfxp->fx_current = NULL; 1901 1902 if (need_lock_vfs) 1903 mutex_exit(f->uf_vfs_lockp); 1904 1905 cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s": 1906 "%s is now accessible", fs_name(f)); 1907 1908 if (s & UF_FIXED) { 1909 mutex_enter(&uf_stats.ufst_mutex); 1910 uf_stats.ufst_num_fixed++; 1911 mutex_exit(&uf_stats.ufst_mutex); 1912 } 1913 (void) timeout(ufsfx_kill_fix_failure_thread, 1914 (void *)(ufsfx_tune.uft_short_err_period * hz), 1915 ufsfx_tune.uft_short_err_period * hz); 1916 rc = SFRC_SUCCESS; 1917 break; 1918 } 1919 case UF_REPLICA: 1920 1921 ASSERT(MUTEX_HELD(f->uf_vfs_lockp)); 1922 1923 /* not actually a replica? */ 1924 if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current && 1925 f->uf_vfs_ufsfxp->fx_current != f && 1926 !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s)) { 1927 1928 f->uf_orig = f->uf_vfs_ufsfxp->fx_current; 1929 f->uf_retry = 0; 1930 rc = SFRC_SUCCESS; 1931 } else { 1932 TRIVIA((": NULL fx_current]\n")); 1933 f->uf_retry = 1; 1934 } 1935 1936 break; 1937 1938 default: 1939 rc = set_state(f, UF_PANIC); 1940 TRIVIA((": failed] ")); 1941 break; 1942 } 1943 break; 1944 1945 case UFA_FOUND: 1946 /* 1947 * XXX de-allocate these after some period? 1948 * XXX or move to an historical list? 1949 * XXX or have an ioctl which reaps them? 1950 */ 1951 /* 1952 * For now, since we don't expect lots of failures 1953 * to occur (to the point of memory shortages), 1954 * just punt 1955 */ 1956 1957 /* be sure we're not wasting cpu on old failures */ 1958 if (f->uf_retry != 0) { 1959 mutex_enter(&uf_stats.ufst_mutex); 1960 uf_stats.ufst_cpu_waste++; 1961 mutex_exit(&uf_stats.ufst_mutex); 1962 f->uf_retry = 0; 1963 } 1964 rc = SFRC_SUCCESS; 1965 break; 1966 1967 default: 1968 (void) set_state(f, UF_PANIC); 1969 TRIVIA((": failed] ")); 1970 break; 1971 } 1972 1973 TRIVIA(("] ")); 1974 return (rc); 1975 } 1976 1977 static sfrc_t 1978 sf_panic( 1979 ufs_failure_t *f, 1980 ufsa_t a, 1981 ufs_failure_states_t s) 1982 { 1983 sfrc_t rc = SFRC_FAIL; 1984 1985 TRIVIA(("[sf_panic, action is %s, prev. state is %s", 1986 act_name(a), state_name(f->uf_s))); 1987 ASSERT(s & UF_PANIC); 1988 1989 switch (a) { 1990 case UFA_SET: 1991 f->uf_retry = -ufsfx_tune.uft_short_err_period; 1992 rc = SFRC_SUCCESS; 1993 break; 1994 1995 case UFA_FOUND: 1996 default: 1997 real_panic(f, " "); 1998 1999 /* LINTED: warning: logical expression always true: op "||" */ 2000 ASSERT(DEBUG); 2001 2002 (void) set_state(f, UF_UMOUNT); /* XXX UF_NOTFIX? */ 2003 2004 break; 2005 } 2006 2007 TRIVIA(("] ")); 2008 return (rc); 2009 } 2010 2011 /* 2012 * minimum state function 2013 */ 2014 static sfrc_t 2015 sf_minimum( 2016 ufs_failure_t *f, 2017 ufsa_t a, /* LINTED argument unused in function: ignored */ 2018 ufs_failure_states_t ignored) 2019 { 2020 sfrc_t rc = SFRC_FAIL; 2021 2022 TRIVIA(("[sf_minimum, action is %s", act_name(a))); 2023 2024 switch (a) { 2025 case UFA_SET: 2026 f->uf_retry = 0; 2027 /* FALLTHROUGH */ 2028 2029 case UFA_FOUND: 2030 rc = SFRC_SUCCESS; 2031 break; 2032 2033 default: 2034 (void) set_state(f, UF_PANIC); 2035 TRIVIA((": failed] ")); 2036 break; 2037 } 2038 2039 TRIVIA(("] ")); 2040 return (rc); 2041 } 2042 2043 static int 2044 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to) 2045 { 2046 ufsd_t *s; 2047 int valid; 2048 2049 HIDEOUS(("[state_trans_valid")); 2050 2051 if (from & to) 2052 return (1); 2053 2054 s = get_state_desc(to); 2055 2056 /* 2057 * extra test is necessary since we want UF_UNDEF = 0, 2058 * (to detect freshly allocated memory) 2059 * but can't check for that value with a bit test 2060 */ 2061 valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev; 2062 2063 HIDEOUS((": %svalid] ", valid? "": "in")); 2064 return (valid); 2065 } 2066 2067 static int 2068 terminal_state(ufs_failure_states_t state) 2069 { 2070 ufsd_t *s; 2071 2072 HIDEOUS(("[terminal_state")); 2073 2074 s = get_state_desc(state); 2075 2076 HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not ")); 2077 return ((int)s->ud_attr.terminal); 2078 } 2079 2080 static void 2081 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp) 2082 { 2083 MINUTE(("[alloc_lockfs_comment")); 2084 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2085 2086 /* 2087 * ufs_fiolfs expects a kmem_alloc'ed comment; 2088 * it frees the comment if the lock fails 2089 * or else when the lock is unlocked. 2090 */ 2091 2092 f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP); 2093 if (f->uf_lf.lf_comment) { 2094 char *from; 2095 size_t len; 2096 2097 /* 2098 * use panic string if there's no previous comment 2099 * or if we're setting the error lock 2100 */ 2101 if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment || 2102 lfp->lf_comlen <= 0)) { 2103 from = f->uf_panic_str; 2104 len = LOCKFS_MAXCOMMENTLEN; 2105 } else { 2106 from = lfp->lf_comment; 2107 len = lfp->lf_comlen; 2108 } 2109 2110 bcopy(from, f->uf_lf.lf_comment, len); 2111 f->uf_lf.lf_comlen = len; 2112 2113 } else { 2114 f->uf_lf.lf_comlen = 0; 2115 } 2116 MINUTE(("] ")); 2117 } 2118 2119 static int 2120 set_lockfs(ufs_failure_t *f, struct lockfs *lfp) 2121 { 2122 int (*handle_lockfs_rc)(ufs_failure_t *); 2123 int rc; 2124 2125 MINUTE(("[set_lockfs")); 2126 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2127 ASSERT(!vfs_lock_held(f->uf_vfsp)); 2128 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 2129 2130 if (!f->uf_ufsvfsp) { 2131 MINUTE((": ufsvfsp is NULL]\n")); 2132 return (0); 2133 } 2134 2135 ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock)); 2136 2137 if (!f->uf_ufsvfsp->vfs_root) { 2138 MINUTE((": vfs_root is NULL]\n")); 2139 return (0); 2140 } 2141 2142 alloc_lockfs_comment(f, lfp); 2143 f->uf_lf_err = 0; 2144 2145 if (!LOCKFS_IS_ELOCK(lfp)) { 2146 lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK; 2147 VN_HOLD(f->uf_ufsvfsp->vfs_root); 2148 f->uf_lf_err = ufs__fiolfs(f->uf_ufsvfsp->vfs_root, 2149 &f->uf_lf, 2150 /* from_user */ 0, 2151 /* from_log */ 0); 2152 VN_RELE(f->uf_ufsvfsp->vfs_root); 2153 } 2154 2155 handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success; 2156 rc = handle_lockfs_rc(f); 2157 2158 MINUTE(("] ")); 2159 return (rc); 2160 } 2161 2162 static int 2163 lockfs_failure(ufs_failure_t *f) 2164 { 2165 int error; 2166 ufs_failure_states_t s; 2167 2168 TRIVIA(("[lockfs_failure")); 2169 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2170 2171 if (!f->uf_ufsvfsp) { 2172 TRIVIA((": ufsvfsp is NULL]\n")); 2173 return (0); 2174 } 2175 2176 error = f->uf_lf_err; 2177 switch (error) { 2178 /* non-transient errors: */ 2179 case EACCES: /* disk/in-core metadata reconciliation failed */ 2180 case EPERM: /* inode reconciliation failed; incore inode changed? */ 2181 case EIO: /* device is hard-locked or not responding */ 2182 case EROFS: /* device is write-locked */ 2183 case EDEADLK: /* can't lockfs; deadlock would result; */ 2184 /* Swapping or saving accounting records */ 2185 /* onto this fs can cause this errno. */ 2186 2187 MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)", 2188 fs_name(f), 2189 lock_name(&f->uf_lf), 2190 err_name(error), 2191 error)); 2192 2193 /* 2194 * if can't get lock, then fallback to panic, unless 2195 * unless unmount was requested (although unmount will 2196 * probably fail if the lock failed, so we'll panic 2197 * anyway 2198 */ 2199 2200 s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK)? 2201 UF_UMOUNT: UF_PANIC; 2202 2203 if (!set_state(f, s)) { 2204 real_panic(f, " "); 2205 /*NOTREACHED*/ 2206 break; 2207 } 2208 break; 2209 2210 2211 case EBUSY: 2212 case EAGAIN: 2213 2214 f->uf_retry = ufsfx_tune.uft_short_err_period; 2215 if (curthread->t_flag & T_DONTPEND) { 2216 curthread->t_flag &= ~T_DONTPEND; 2217 2218 } else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) { 2219 ufs_failure_states_t state; 2220 /* 2221 * if we didn't know that the fix had started, 2222 * take note 2223 */ 2224 state = error == EBUSY? UF_LOCKED: UF_FIXING; 2225 if (!set_state(f, state)) { 2226 TRIVIA((": failed] ")); 2227 return (0); 2228 } 2229 } 2230 break; 2231 2232 default: /* some other non-fatal error */ 2233 MINOR(("lockfs(\"%s\") of %s returned %s (%d)", 2234 lock_name(&f->uf_lf), 2235 fs_name(f), 2236 err_name(f->uf_lf_err), 2237 f->uf_lf_err)); 2238 2239 f->uf_retry = ufsfx_tune.uft_short_err_period; 2240 break; 2241 2242 case EINVAL: /* unmounted? */ 2243 (void) set_state(f, UF_NOTFIX); 2244 break; 2245 } 2246 TRIVIA(("] ")); 2247 return (1); 2248 } 2249 2250 static int 2251 lockfs_success(ufs_failure_t *f) 2252 { 2253 TRIVIA(("[lockfs_success")); 2254 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2255 2256 if (!f->uf_ufsvfsp) { 2257 TRIVIA((": ufsvfsp is NULL]\n")); 2258 return (0); 2259 } 2260 2261 switch (f->uf_lf.lf_lock) { 2262 case LOCKFS_ELOCK: /* error lock worked */ 2263 2264 if (!set_state(f, UF_LOCKED)) { 2265 TRIVIA((": failed] ")); 2266 return (0); 2267 } 2268 break; 2269 2270 case LOCKFS_ULOCK: /* unlock worked */ 2271 /* 2272 * how'd we get here? 2273 * This should be done from fsck's unlock, 2274 * not from this thread's context. 2275 */ 2276 cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f)); 2277 ufsfx_unlockfs(f->uf_ufsvfsp); 2278 break; 2279 2280 default: 2281 if (!set_state(f, UF_NOTFIX)) { 2282 TRIVIA((": failed] ")); 2283 return (0); 2284 } 2285 break; 2286 } 2287 TRIVIA(("] ")); 2288 return (1); 2289 } 2290 2291 /* 2292 * when fsck is running it puts its pid into the lockfs 2293 * comment structure, prefaced by PIDSTR 2294 */ 2295 const char *PIDSTR = "[pid:"; 2296 static int 2297 fsck_active(ufs_failure_t *f) 2298 { 2299 char *cp; 2300 int i, found, errlocked; 2301 size_t comlen; 2302 const int PIDSTRLEN = (int)strlen(PIDSTR); 2303 struct ulockfs *ulp = &f->uf_ufsvfsp->vfs_ulockfs; 2304 2305 TRIVIA(("[fsck_active")); 2306 2307 ASSERT(f); 2308 ASSERT(f->uf_s & UF_FIXING); 2309 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2310 ASSERT(f->uf_ufsvfsp); 2311 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 2312 ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock)); 2313 2314 mutex_enter(&ulp->ul_lock); 2315 cp = ulp->ul_lockfs.lf_comment; 2316 comlen = ulp->ul_lockfs.lf_comlen; 2317 errlocked = (int)ULOCKFS_IS_ELOCK(ulp); 2318 mutex_exit(&ulp->ul_lock); 2319 2320 if (!cp || comlen == 0) { 2321 TRIVIA((": null comment or comlen <= 0, found:0]")); 2322 return (0); 2323 } 2324 2325 for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++) 2326 found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0; 2327 2328 TRIVIA(("found:%d, is_elock:%d]", found, errlocked)); 2329 return (errlocked & found); 2330 } 2331 2332 static const char unknown_fs[] = "<unknown fs>"; 2333 static const char null_failure[] = "<NULL ufs failure record; unknown fs>"; 2334 static const char mutated_vfs_bufp[] = "<mutated vfs_bufp, unknown fs>"; 2335 static const char mutated_vfs_fs[] = "<mutated vfs_fs, unknown fs>"; 2336 2337 static char * 2338 fs_name(ufs_failure_t *f) 2339 { 2340 HIDEOUS(("[fs_name")); 2341 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2342 2343 if (!f) { 2344 HIDEOUS((": failure ptr is NULL]\n")); 2345 return ((char *)null_failure); 2346 } 2347 2348 if (f->uf_fsname[0] != '\0') { 2349 HIDEOUS((": return (uf_fsname)]\n")); 2350 return (f->uf_fsname); 2351 } 2352 2353 if (MUTEX_HELD(f->uf_vfs_lockp)) { 2354 if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) { 2355 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n", 2356 (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp)); 2357 return ((char *)mutated_vfs_bufp); 2358 } 2359 if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) { 2360 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n", 2361 (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs)); 2362 return ((char *)mutated_vfs_fs); 2363 } 2364 if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs && 2365 *f->uf_fs->fs_fsmnt != '\0') { 2366 HIDEOUS((": return (fs_fsmnt)]\n")); 2367 return (f->uf_fs->fs_fsmnt); 2368 } 2369 } 2370 2371 HIDEOUS((": unknown file system]\n")); 2372 return ((char *)unknown_fs); 2373 } 2374 2375 #if defined(DEBUG) 2376 static char * 2377 lock_name(struct lockfs *lfp) 2378 { 2379 struct lock_description *l; 2380 char *lname; 2381 2382 HIDEOUS(("[lock_name")); 2383 2384 lname = lock_desc[0].ld_name; 2385 for (l = &lock_desc[1]; l->ld_name != NULL; l++) { 2386 if (lfp && lfp->lf_lock == l->ld_type) { 2387 lname = l->ld_name; 2388 break; 2389 } 2390 } 2391 HIDEOUS(("]")); 2392 return (lname); 2393 } 2394 2395 static char * 2396 state_name(ufs_failure_states_t state) 2397 { 2398 ufsd_t *s; 2399 2400 HIDEOUS(("[state_name")); 2401 2402 s = get_state_desc(state); 2403 2404 HIDEOUS(("]")); 2405 return (s->ud_name); 2406 } 2407 2408 static char * 2409 err_name(int error) 2410 { 2411 struct error_description *e; 2412 2413 HIDEOUS(("[err_name")); 2414 2415 for (e = &err_desc[1]; e->ed_name != NULL; e++) { 2416 if (error == e->ed_errno) { 2417 HIDEOUS(("]")); 2418 return (e->ed_name); 2419 } 2420 } 2421 HIDEOUS(("]")); 2422 return (err_desc[0].ed_name); 2423 } 2424 2425 static char * 2426 act_name(ufsa_t action) 2427 { 2428 struct action_description *a; 2429 2430 HIDEOUS(("[act_name")); 2431 2432 for (a = &act_desc[1]; a->ad_name != NULL; a++) { 2433 if (action == a->ad_v) { 2434 HIDEOUS(("]")); 2435 return (a->ad_name); 2436 } 2437 } 2438 HIDEOUS(("]")); 2439 return (act_desc[0].ad_name); 2440 } 2441 2442 /* 2443 * dump failure list 2444 */ 2445 static void 2446 dump_uf_list(char *msg) 2447 { 2448 ufs_failure_t *f; 2449 int i; 2450 int list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex); 2451 2452 if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) { 2453 printf("dump_uf_list: couldn't get list lock\n"); 2454 return; 2455 } 2456 2457 if (msg) { 2458 printf("\n%s", msg); 2459 } 2460 printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n", 2461 ufs_fix.uq_lowat, ufs_fix.uq_ne); 2462 2463 mutex_enter(&uf_stats.ufst_mutex); 2464 printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races); 2465 printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed); 2466 printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed); 2467 printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste); 2468 printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n", 2469 uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures); 2470 mutex_exit(&uf_stats.ufst_mutex); 2471 2472 for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) { 2473 2474 if (!mutex_tryenter(&f->uf_mutex)) { 2475 printf("%d.\t\"skipped - try enter failed\"\n", i); 2476 continue; 2477 } 2478 2479 dump_uf(f, i); 2480 2481 mutex_exit(&f->uf_mutex); 2482 } 2483 2484 printf("\n"); 2485 2486 if (!list_was_locked) 2487 mutex_exit(&ufs_fix.uq_mutex); 2488 } 2489 2490 static void 2491 dump_uf(ufs_failure_t *f, int i) 2492 { 2493 if (!f) { 2494 printf("dump_uf: NULL failure record\n"); 2495 return; 2496 } 2497 2498 printf("%d.\t\"%s\" is %s.\n", 2499 i, fs_name(f), state_name(f->uf_s)); 2500 printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f); 2501 printf("\tNext: 0x%p\t\tPrev: 0x%p\n", 2502 (void *)f->uf_next, (void *)f->uf_prev); 2503 2504 if (f->uf_orig) 2505 printf("\tOriginal failure: 0x%p \"%s\"\n", 2506 (void *)f->uf_orig, f->uf_orig->uf_panic_str); 2507 2508 printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n", 2509 (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp); 2510 printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp); 2511 printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp); 2512 2513 if (f->uf_bp) 2514 printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs); 2515 else 2516 printf("\n"); 2517 2518 printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n", 2519 f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm); 2520 2521 printf("\tFlags: (%d) %s%s%s%s", f->uf_flags, 2522 f->uf_flags & UFSFX_LCKONLY? "\"lock only\" " : "", 2523 f->uf_flags & UFSFX_LCKUMOUNT? "\"lock+unmount\" " : "", 2524 f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" " : "", 2525 f->uf_flags == 0? "<none>" : ""); 2526 2527 printf("\tRetry: %ld seconds\n", f->uf_retry); 2528 2529 printf("\tLockfs:\ttype: %s\terror: %s (%d)\n", 2530 lock_name(&f->uf_lf), 2531 err_name(f->uf_lf_err), f->uf_lf_err); 2532 2533 } 2534 #endif /* DEBUG */ 2535 2536 /* 2537 * returns # of ufs_failures in a non-terminal state on queue 2538 * used to coordinate with hlock thread (see ufs_thread.c) 2539 * and to determine when the error lock thread may exit 2540 */ 2541 2542 int 2543 ufsfx_get_failure_qlen(void) 2544 { 2545 ufs_failure_t *f; 2546 ufsd_t *s; 2547 int qlen = 0; 2548 2549 MINUTE(("[ufsfx_get_failure_qlen")); 2550 2551 if (!mutex_tryenter(&ufs_fix.uq_mutex)) 2552 return (-1); 2553 2554 /* 2555 * walk down failure list 2556 */ 2557 2558 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 2559 2560 if (!mutex_tryenter(&f->uf_mutex)) 2561 continue; 2562 2563 s = get_state_desc(f->uf_s); 2564 2565 if (s->ud_attr.terminal) { 2566 mutex_exit(&f->uf_mutex); 2567 continue; 2568 } 2569 2570 MINUTE((": found: %s, \"%s: %s\"\n", 2571 fs_name(f), state_name(f->uf_s), f->uf_panic_str)); 2572 2573 qlen++; 2574 mutex_exit(&f->uf_mutex); 2575 } 2576 2577 mutex_exit(&ufs_fix.uq_mutex); 2578 2579 MINUTE((": qlen=%d]\n", qlen)); 2580 2581 return (qlen); 2582 } 2583 2584 /* 2585 * timeout routine 2586 * called to shutdown fix failure thread and server daemon 2587 */ 2588 static void 2589 ufsfx_kill_fix_failure_thread(void *arg) 2590 { 2591 clock_t odelta = (clock_t)arg; 2592 int qlen; 2593 2594 MAJOR(("[ufsfx_kill_fix_failure_thread")); 2595 2596 qlen = ufsfx_get_failure_qlen(); 2597 2598 if (qlen < 0) { 2599 clock_t delta; 2600 2601 delta = odelta << 1; 2602 if (delta <= 0) 2603 delta = INT_MAX; 2604 2605 (void) timeout(ufsfx_kill_fix_failure_thread, 2606 (void *)delta, delta); 2607 MAJOR((": rescheduled")); 2608 2609 } else if (qlen == 0) { 2610 ufs_thread_exit(&ufs_fix); 2611 MAJOR((": killed")); 2612 } 2613 /* 2614 * else 2615 * let timeout expire 2616 */ 2617 MAJOR(("]\n")); 2618 } 2619