1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/errno.h> 30 #include <sys/mode.h> 31 #include <sys/sysmacros.h> 32 #include <sys/cmn_err.h> 33 #include <sys/varargs.h> 34 #include <sys/time.h> 35 #include <sys/buf.h> 36 #include <sys/kmem.h> 37 #include <sys/t_lock.h> 38 #include <sys/poll.h> 39 #include <sys/debug.h> 40 #include <sys/cred.h> 41 #include <sys/lockfs.h> 42 #include <sys/fs/ufs_fs.h> 43 #include <sys/fs/ufs_inode.h> 44 #include <sys/fs/ufs_panic.h> 45 #include <sys/fs/ufs_lockfs.h> 46 #include <sys/fs/ufs_trans.h> 47 #include <sys/fs/ufs_mount.h> 48 #include <sys/fs/ufs_prot.h> 49 #include <sys/fs/ufs_bio.h> 50 #include <sys/pathname.h> 51 #include <sys/utsname.h> 52 #include <sys/conf.h> 53 54 /* handy */ 55 #define abs(x) ((x) < 0? -(x): (x)) 56 57 #if defined(DEBUG) 58 59 #define DBGLVL_NONE 0x00000000 60 #define DBGLVL_MAJOR 0x00000100 61 #define DBGLVL_MINOR 0x00000200 62 #define DBGLVL_MINUTE 0x00000400 63 #define DBGLVL_TRIVIA 0x00000800 64 #define DBGLVL_HIDEOUS 0x00001000 65 66 #define DBGFLG_NONE 0x00000000 67 #define DBGFLG_NOPANIC 0x00000001 68 #define DBGFLG_LVLONLY 0x00000002 69 #define DBGFLG_FIXWOULDPANIC 0x00000004 70 71 #define DBGFLG_FLAGMASK 0x0000000F 72 #define DBGFLG_LEVELMASK ~DBGFLG_FLAGMASK 73 74 #define DEBUG_FLAGS (ufs_fix_failure_dbg & DBGFLG_FLAGMASK) 75 #define DEBUG_LEVEL (ufs_fix_failure_dbg & DBGFLG_LEVELMASK) 76 77 unsigned int ufs_fix_failure_dbg = DBGLVL_NONE | DBGFLG_NONE; 78 79 #define DCALL(dbg_level, call) \ 80 { \ 81 if (DEBUG_LEVEL != DBGLVL_NONE) { \ 82 if (DEBUG_FLAGS & DBGFLG_LVLONLY) { \ 83 if (DEBUG_LEVEL & dbg_level) { \ 84 call; \ 85 } \ 86 } else { \ 87 if (dbg_level <= DEBUG_LEVEL) { \ 88 call; \ 89 } \ 90 } \ 91 } \ 92 } 93 94 #define DPRINTF(dbg_level, msg) DCALL(dbg_level, printf msg) 95 96 #define MAJOR(msg) DPRINTF(DBGLVL_MAJOR, msg) 97 #define MINOR(msg) DPRINTF(DBGLVL_MINOR, msg) 98 #define MINUTE(msg) DPRINTF(DBGLVL_MINUTE, msg) 99 #define TRIVIA(msg) DPRINTF(DBGLVL_TRIVIA, msg) 100 #define HIDEOUS(msg) DPRINTF(DBGLVL_HIDEOUS, msg) 101 102 #else /* !DEBUG */ 103 104 #define DCALL(ignored_dbg_level, ignored_routine) 105 #define MAJOR(ignored) 106 #define MINOR(ignored) 107 #define MINUTE(ignored) 108 #define TRIVIA(ignored) 109 #define HIDEOUS(ignored) 110 111 #endif /* DEBUG */ 112 113 #define NULLSTR(str) (!(str) || *(str) == '\0'? "<null>" : (str)) 114 #define NULSTRING "" 115 116 /* somewhat arbitrary limits, in seconds */ 117 /* all probably ought to be different, but these are convenient for debugging */ 118 const time_t UF_TOO_LONG = 128; /* max. wait for fsck start */ 119 120 /* all of these are in units of seconds used for retry period while ... */ 121 const time_t UF_FIXSTART_PERIOD = 16; /* awaiting fsck start */ 122 const time_t UF_FIXPOLL_PERIOD = 256; /* awaiting fsck finish */ 123 const time_t UF_SHORT_ERROR_PERIOD = 4; /* after (lockfs) error */ 124 const time_t UF_LONG_ERROR_PERIOD = 512; /* after (lockfs) error */ 125 126 #define NO_ERROR 0 127 #define LOCKFS_OLOCK LOCKFS_MAXLOCK+1 128 129 const ulong_t GB = 1024 * 1024 * 1024; 130 const ulong_t SecondsPerGig = 1024; /* ~17 minutes (overestimate) */ 131 132 /* 133 * per filesystem flags 134 */ 135 const int UFSFX_PANIC = (UFSMNT_ONERROR_PANIC >> 4); 136 const int UFSFX_LCKONLY = (UFSMNT_ONERROR_LOCK >> 4); 137 const int UFSFX_LCKUMOUNT = (UFSMNT_ONERROR_UMOUNT >> 4); 138 const int UFSFX_DEFAULT = (UFSMNT_ONERROR_DEFAULT >> 4); 139 const int UFSFX_REPAIR_START = 0x10000000; 140 141 /* return protocols */ 142 143 typedef enum triage_return_code { 144 TRIAGE_DEAD = -1, 145 TRIAGE_NO_SPIRIT, 146 TRIAGE_ATTEND_TO 147 } triage_t; 148 149 typedef enum statefunc_return_code { 150 SFRC_SUCCESS = 1, 151 SFRC_FAIL = 0 152 } sfrc_t; 153 154 /* external references */ 155 /* in ufs_thread.c */ 156 extern int ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop); 157 extern int ufs_checkaccton(vnode_t *); /* in ufs_lockfs.c */ 158 extern int ufs_checkswapon(vnode_t *); /* in ufs_lockfs.c */ 159 160 extern struct pollhead ufs_pollhd; /* in ufs_vnops.c */ 161 162 /* globals */ 163 struct ufs_q ufs_fix; 164 165 /* 166 * patchable constants: 167 * These are set in ufsfx_init() [called at modload] 168 */ 169 struct ufs_failure_tunable { 170 long uft_too_long; /* limit repair startup time */ 171 long uft_fixstart_period; /* pre-repair start period */ 172 long uft_fixpoll_period; /* post-fsck start period */ 173 long uft_short_err_period; /* post-error short period */ 174 long uft_long_err_period; /* post-error long period */ 175 } ufsfx_tune; 176 177 /* internal statistics of events */ 178 struct uf_statistics { 179 ulong_t ufst_lock_violations; 180 ulong_t ufst_current_races; 181 ulong_t ufst_unmount_failures; 182 ulong_t ufst_num_fixed; 183 ulong_t ufst_num_failed; 184 ulong_t ufst_cpu_waste; 185 time_t ufst_last_start_tm; 186 kmutex_t ufst_mutex; 187 } uf_stats; 188 189 typedef enum state_action { 190 UFA_ERROR = -1, /* internal error */ 191 UFA_FOUND, /* found uf in state */ 192 UFA_SET /* change uf to state */ 193 } ufsa_t; 194 195 /* state definition */ 196 typedef struct uf_state_desc { 197 int ud_v; /* value */ 198 char *ud_name; /* name */ 199 sfrc_t (*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 200 /* per-state actions */ 201 ufs_failure_states_t ud_prev; /* valid prev. states */ 202 203 struct uf_state_desc_attr { 204 unsigned terminal:1; /* no action req. if found */ 205 unsigned at_fail:1; /* state set by thread */ 206 /* encountering the error */ 207 unsigned unused; 208 } ud_attr; 209 } ufsd_t; 210 211 /* 212 * forward references 213 */ 214 215 /* thread to watch for failures */ 216 static void ufsfx_thread_fix_failures(void *); 217 static int ufsfx_do_failure_q(void); 218 static void ufsfx_kill_fix_failure_thread(void *); 219 220 /* routines called when failure occurs */ 221 static int ufs_fault_v(vnode_t *, char *, va_list) 222 __KVPRINTFLIKE(2); 223 static ufs_failure_t *init_failure(vnode_t *, char *, va_list) 224 __KVPRINTFLIKE(2); 225 static void queue_failure(ufs_failure_t *); 226 /*PRINTFLIKE2*/ 227 static void real_panic(ufs_failure_t *, const char *, ...) 228 __KPRINTFLIKE(2); 229 static void real_panic_v(ufs_failure_t *, const char *, va_list) 230 __KVPRINTFLIKE(2); 231 static triage_t triage(vnode_t *); 232 233 /* routines called when failure record is acted upon */ 234 static sfrc_t set_state(ufs_failure_t *, ufs_failure_states_t); 235 static int state_trans_valid(ufs_failure_states_t, ufs_failure_states_t); 236 static int terminal_state(ufs_failure_states_t); 237 238 /* routines called when states entered/found */ 239 static sfrc_t sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 240 static sfrc_t sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 241 static sfrc_t sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 242 static sfrc_t sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 243 static sfrc_t sf_found_queue(ufs_failure_t *); 244 static sfrc_t sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 245 static sfrc_t sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 246 static sfrc_t sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 247 static sfrc_t sf_set_trylck(ufs_failure_t *); 248 static sfrc_t sf_set_locked(ufs_failure_t *); 249 static sfrc_t sf_found_trylck(ufs_failure_t *); 250 static sfrc_t sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t); 251 static sfrc_t sf_found_umount(ufs_failure_t *); 252 253 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */ 254 static time_t trylock_time_exceeded(ufs_failure_t *); 255 static void pester_msg(ufs_failure_t *, int); 256 static int get_lockfs_status(ufs_failure_t *, struct lockfs *); 257 static void alloc_lockfs_comment(ufs_failure_t *, struct lockfs *); 258 static int set_lockfs(ufs_failure_t *, struct lockfs *); 259 static int lockfs_failure(ufs_failure_t *); 260 static int lockfs_success(ufs_failure_t *); 261 static int fsck_active(ufs_failure_t *); 262 263 /* low-level support routines */ 264 static ufsd_t *get_state_desc(ufs_failure_states_t); 265 static char *fs_name(ufs_failure_t *); 266 267 #if defined(DEBUG) 268 static char *state_name(ufs_failure_states_t); 269 static char *lock_name(struct lockfs *); 270 static char *err_name(int); 271 static char *act_name(ufsa_t); 272 static void dump_uf_list(char *msg); 273 static void dump_uf(ufs_failure_t *, int i); 274 #endif /* DEBUG */ 275 /* 276 * 277 * State Transitions: 278 * 279 * normally: 280 * if flagged to be locked but not unmounted: (UFSMNT_ONERROR_LOCK) 281 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED 282 * 283 * The only difference between these two is that the fsck must be started 284 * manually. 285 * 286 * if flagged to be unmounted: (UFSMNT_ONERROR_UMOUNT) 287 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX 288 * 289 * if flagged to panic: (UFSMNT_ONERROR_PANIC) 290 * UNDEF -> INIT -> PANIC 291 * 292 * if a secondary panic on a file system which has an active failure 293 * record: 294 * UNDEF -> INIT -> QUEUE -> REPLICA 295 * 296 * UNDEF, INIT, QUEUE all are set in the context of the failing thread. 297 * All other states (except possibly PANIC) are set in by the monitor 298 * (lock) thread. 299 * 300 */ 301 302 ufsd_t state_desc[] = 303 { 304 { UF_ILLEGAL, "in an unknown state", sf_minimum, UF_ILLEGAL, 305 { 0, 1, 0 } }, 306 { UF_UNDEF, "undefined", sf_undef, UF_UNDEF, 307 { 0, 1, 0 } }, 308 { UF_INIT, "being initialized", sf_init, UF_UNDEF, 309 { 0, 1, 0 } }, 310 { UF_QUEUE, "queued", sf_queue, UF_INIT, 311 { 0, 1, 0 } }, 312 { UF_TRYLCK, "trying to be locked", sf_nonterm_cmn, 313 UF_QUEUE, { 0, 0, 0 } }, 314 { UF_LOCKED, "locked", sf_nonterm_cmn, 315 UF_TRYLCK | UF_FIXING, { 0, 0, 0 } }, 316 { UF_UMOUNT, "being unmounted", sf_nonterm_cmn, 317 318 #if defined(DEBUG) 319 UF_PANIC | 320 #endif /* DEBUG */ 321 UF_TRYLCK | UF_LOCKED, { 0, 0, 0 } }, 322 { UF_FIXING, "being fixed", sf_nonterm_cmn, 323 UF_LOCKED, { 0, 0, 0 } }, 324 { UF_FIXED, "fixed", sf_term_cmn, 325 UF_FIXING, { 1, 0, 0 } }, 326 { UF_NOTFIX, "not fixed", sf_term_cmn, 327 328 #if defined(DEBUG) 329 UF_PANIC | 330 #endif /* DEBUG */ 331 332 UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING, 333 { 1, 0, 0 } }, 334 { UF_REPLICA, "a replica", sf_term_cmn, 335 UF_QUEUE, { 1, 0, 0 } }, 336 { UF_PANIC, "panicking", sf_panic, 337 /* XXX make this narrower */ UF_ALLSTATES, { 0, 0, 0 } }, 338 { UF_UNDEF, NULL, ((sfrc_t (*)()) NULL), 339 UF_UNDEF, { 0, 0, 0 } } 340 }; 341 342 /* unified collection */ 343 struct ufsfx_info { 344 struct uf_statistics *ufi_statp; 345 struct ufs_failure_tunable *ufi_tunep; 346 ufsd_t *ufi_statetab; 347 } uffsinfo; 348 349 #if defined(DEBUG) 350 struct action_description { 351 ufsa_t ad_v; 352 char *ad_name; 353 }; 354 355 #define EUNK (-1) 356 357 struct error_description { 358 int ed_errno; 359 char *ed_name; 360 } err_desc[] = 361 { 362 { EUNK, "<unexpected errno?>" }, 363 { EINVAL, "EINVAL" }, 364 { EACCES, "EACCES" }, 365 { EPERM, "EPERM" }, 366 { EIO, "EIO" }, 367 { EDEADLK, "EDEADLK" }, 368 { EBUSY, "EBUSY" }, 369 { EAGAIN, "EAGAIN" }, 370 { ERESTART, "ERESTART" }, 371 { ETIMEDOUT, "ETIMEDOUT" }, 372 { NO_ERROR, "Ok" }, 373 { EUNK, NULL } 374 }; 375 376 struct action_description act_desc[] = 377 { 378 { UFA_ERROR, "<unexpected action?>" }, 379 { UFA_FOUND, "\"found\"" }, 380 { UFA_SET, "\"set\"" }, 381 { UFA_ERROR, NULL }, 382 }; 383 384 #define LOCKFS_BADLOCK (-1) 385 386 struct lock_description { 387 int ld_type; 388 char *ld_name; 389 } lock_desc[] = 390 { 391 { LOCKFS_BADLOCK, "<unexpected lock?>" }, 392 { LOCKFS_ULOCK, "Unlock" }, 393 { LOCKFS_ELOCK, "Error Lock" }, 394 { LOCKFS_HLOCK, "Hard Lock" }, 395 { LOCKFS_OLOCK, "Old Lock" }, 396 { LOCKFS_BADLOCK, NULL } 397 }; 398 399 #endif /* DEBUG */ 400 401 /* 402 * ufs_fault, ufs_fault_v 403 * 404 * called instead of cmn_err(CE_PANIC, ...) by ufs routines 405 * when a failure is detected to put the file system into an 406 * error state (if possible) or to devolve to a panic otherwise 407 * 408 * vnode is some vnode in this file system, used to find the way 409 * to ufsvfs, vfsp etc. Since a panic can be called from many 410 * levels, the vnode is the most convenient hook to pass through. 411 * 412 */ 413 414 /*PRINTFLIKE2*/ 415 int 416 ufs_fault(vnode_t *vp, char *fmt, ...) 417 { 418 va_list adx; 419 int error; 420 421 MINOR(("[ufs_fault")); 422 423 va_start(adx, fmt); 424 error = ufs_fault_v(vp, fmt, adx); 425 va_end(adx); 426 427 MINOR((": %s (%d)]\n", err_name(error), error)); 428 return (error); 429 } 430 431 const char *nullfmt = "<null format?>"; 432 433 static int 434 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx) 435 { 436 ufs_failure_t *new = NULL; 437 ufsvfs_t *ufsvfsp; 438 triage_t fix; 439 int err = ERESTART; 440 int need_vfslock; 441 442 MINOR(("[ufs_fault_v")); 443 444 if (fmt == NULL) 445 fmt = (char *)nullfmt; 446 447 fix = triage(vp); 448 449 if (vp) { 450 ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data; 451 452 /* 453 * Something bad has happened. That is why we are here. 454 * 455 * In order for the bad thing to be recorded in the superblock 456 * we need to write to the superblock directly. 457 * In the case that logging is enabled the logging code 458 * would normally intercept our write as a delta to the log, 459 * thus we mark the filesystem FSBAD in any case. 460 */ 461 need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock); 462 463 if (need_vfslock) { 464 mutex_enter(&ufsvfsp->vfs_lock); 465 } 466 467 ufsvfsp->vfs_fs->fs_clean = FSBAD; 468 ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem)); 469 ufsvfsp->vfs_bufp->b_flags &= 470 ~(B_ASYNC | B_READ | B_DONE | B_ERROR | B_DELWRI); 471 472 (void) bdev_strategy(ufsvfsp->vfs_bufp); 473 (void) biowait(ufsvfsp->vfs_bufp); 474 475 if (need_vfslock) { 476 mutex_exit(&ufsvfsp->vfs_lock); 477 } 478 } 479 480 switch (fix) { 481 482 default: 483 case TRIAGE_DEAD: 484 case TRIAGE_NO_SPIRIT: 485 486 real_panic_v(new, fmt, adx); 487 /* LINTED: warning: logical expression always true: op "||" */ 488 ASSERT(DEBUG); 489 err = EAGAIN; 490 491 #if defined(DEBUG) 492 if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) { 493 break; 494 } 495 /* FALLTHROUGH */ 496 497 #else 498 break; 499 500 #endif /* DEBUG */ 501 502 case TRIAGE_ATTEND_TO: 503 504 /* q thread not running yet? */ 505 if (mutex_tryenter(&ufs_fix.uq_mutex)) { 506 if (!ufs_fix.uq_threadp) { 507 mutex_exit(&ufs_fix.uq_mutex); 508 ufs_thread_start(&ufs_fix, 509 ufsfx_thread_fix_failures, NULL); 510 ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK; 511 mutex_enter(&ufs_fix.uq_mutex); 512 } else { 513 /* 514 * We got the lock but we are not the current 515 * threadp so we have to release the lock. 516 */ 517 mutex_exit(&ufs_fix.uq_mutex); 518 } 519 } else { 520 MINOR((": fix failure thread already running ")); 521 /* 522 * No need to log another failure as one is already 523 * being logged. 524 */ 525 break; 526 } 527 528 if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) { 529 mutex_exit(&ufs_fix.uq_mutex); 530 cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault"); 531 } else { 532 /* 533 * Must check if we actually still own the lock and 534 * if so then release the lock and move on with life. 535 */ 536 if (mutex_owner(&ufs_fix.uq_mutex) == curthread) 537 mutex_exit(&ufs_fix.uq_mutex); 538 } 539 540 new = init_failure(vp, fmt, adx); 541 if (new != NULL) { 542 queue_failure(new); 543 break; 544 } 545 real_panic_v(new, fmt, adx); 546 break; 547 548 } 549 MINOR(("] ")); 550 return (err); 551 } 552 553 /* 554 * triage() 555 * 556 * Attempt to fix iff: 557 * - the system is not already panicking 558 * - this file system isn't explicitly marked not to be fixed 559 * - we can connect to the user-level daemon 560 * These conditions are detectable later, but if we can determine 561 * them in the failing threads context the core dump may be more 562 * useful. 563 * 564 */ 565 566 static triage_t 567 triage(vnode_t *vp) 568 { 569 struct inode *ip; 570 int need_unlock_vfs; 571 int fs_flags; 572 573 MINUTE(("[triage")); 574 575 if (panicstr) { 576 MINUTE(( 577 ": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr)); 578 return (TRIAGE_DEAD); 579 } 580 581 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) { 582 MINUTE(( 583 ": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n")); 584 return (TRIAGE_DEAD); 585 } 586 587 /* use tryenter and continue no matter what since we're panicky */ 588 need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock); 589 if (need_unlock_vfs) 590 need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock); 591 592 fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags; 593 if (need_unlock_vfs) 594 mutex_exit(&ip->i_ufsvfs->vfs_lock); 595 596 if (fs_flags & UFSFX_PANIC) { 597 MINUTE(( 598 ": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n")); 599 return (TRIAGE_NO_SPIRIT); 600 } 601 602 if (ufs_checkaccton(vp) != 0) { 603 MINUTE(( 604 ": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n")); 605 return (TRIAGE_DEAD); 606 } 607 608 if (ufs_checkswapon(vp) != 0) { 609 MINUTE(( 610 ": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n")); 611 return (TRIAGE_DEAD); 612 } 613 614 MINUTE((": return TRIAGE_ATTEND_TO] ")); 615 return (TRIAGE_ATTEND_TO); 616 } 617 618 /* 619 * init failure 620 * 621 * This routine allocates a failure struct and initializes 622 * it's member elements. 623 * Space is allocated for copies of dynamic identifying fs structures 624 * passed in. Without a much more segmented kernel architecture 625 * this is as protected as we can make it (for now.) 626 */ 627 static ufs_failure_t * 628 init_failure(vnode_t *vp, char *fmt, va_list adx) 629 { 630 ufs_failure_t *new; 631 struct inode *ip; 632 int initialization_worked = 0; 633 int need_vfs_unlock; 634 635 MINOR(("[init_failure")); 636 637 new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP); 638 if (!new) { 639 MINOR((": kmem_zalloc failed]\n")); 640 return (NULL); 641 } 642 643 /* 644 * enough information to make a fix attempt possible? 645 */ 646 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp || 647 !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt) 648 goto errout; 649 650 if (vp->v_type != VREG && vp->v_type != VDIR && 651 vp->v_type != VBLK && vp->v_type != VCHR && 652 vp->v_type != VLNK && vp->v_type != VFIFO && 653 vp->v_type != VSOCK) 654 goto errout; 655 656 if (ip->i_ufsvfs->vfs_root->v_type != VREG && 657 ip->i_ufsvfs->vfs_root->v_type != VDIR && 658 ip->i_ufsvfs->vfs_root->v_type != VBLK && 659 ip->i_ufsvfs->vfs_root->v_type != VCHR && 660 ip->i_ufsvfs->vfs_root->v_type != VLNK && 661 ip->i_ufsvfs->vfs_root->v_type != VFIFO && 662 ip->i_ufsvfs->vfs_root->v_type != VSOCK) 663 goto errout; 664 665 if ((ITOF(ip)->fs_magic != FS_MAGIC) && 666 (ITOF(ip)->fs_magic != MTB_UFS_MAGIC)) 667 goto errout; 668 669 /* intialize values */ 670 671 (void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx); 672 673 new->uf_ufsvfsp = ip->i_ufsvfs; 674 new->uf_vfsp = ip->i_vfs; 675 676 mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL); 677 need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock); 678 679 if (need_vfs_unlock) { 680 if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) { 681 /* 682 * not much alternative here, but we're panicking 683 * already, it couldn't be worse - so just 684 * proceed optimistically and take note. 685 */ 686 mutex_enter(&uf_stats.ufst_mutex); 687 uf_stats.ufst_lock_violations++; 688 mutex_exit(&uf_stats.ufst_mutex); 689 MINOR((": couldn't get vfs lock")) 690 need_vfs_unlock = 0; 691 } 692 } 693 694 if (mutex_tryenter(&new->uf_mutex)) { 695 initialization_worked = set_state(new, UF_INIT); 696 mutex_exit(&new->uf_mutex); 697 } 698 699 if (need_vfs_unlock) 700 mutex_exit(&ip->i_ufsvfs->vfs_lock); 701 702 if (initialization_worked) { 703 MINOR(("] ")); 704 return (new); 705 } 706 /* FALLTHROUGH */ 707 708 errout: 709 if (new) 710 kmem_free(new, sizeof (ufs_failure_t)); 711 MINOR((": failed]\n")); 712 return (NULL); 713 } 714 715 static void 716 queue_failure(ufs_failure_t *new) 717 { 718 MINOR(("[queue_failure")); 719 720 mutex_enter(&ufs_fix.uq_mutex); 721 722 if (ufs_fix.uq_ufhead) 723 insque(new, &ufs_fix.uq_ufhead); 724 else 725 ufs_fix.uq_ufhead = new; 726 727 if (mutex_tryenter(&new->uf_mutex)) { 728 (void) set_state(new, UF_QUEUE); 729 mutex_exit(&new->uf_mutex); 730 } 731 732 mutex_enter(&uf_stats.ufst_mutex); /* force wakeup */ 733 ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed; 734 mutex_exit(&uf_stats.ufst_mutex); 735 736 cv_broadcast(&ufs_fix.uq_cv); 737 738 DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str ? 739 new->uf_panic_str : "queue_failure: NULL panic str?")); 740 mutex_exit(&ufs_fix.uq_mutex); 741 742 MINOR(("] ")); 743 } 744 745 /*PRINTFLIKE2*/ 746 static void 747 real_panic(ufs_failure_t *f, const char *fmt, ...) 748 { 749 va_list adx; 750 751 MINUTE(("[real_panic ")); 752 753 va_start(adx, fmt); 754 real_panic_v(f, fmt, adx); 755 va_end(adx); 756 757 MINUTE((": return?!]\n")); 758 } 759 760 static void 761 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx) 762 { 763 int seriousness = CE_PANIC; 764 int need_unlock; 765 766 MINUTE(("[real_panic_v ")); 767 768 if (f && f->uf_ufsvfsp) 769 TRANS_SETERROR(f->uf_ufsvfsp); 770 771 #if defined(DEBUG) 772 if (DEBUG_FLAGS & DBGFLG_NOPANIC) { 773 seriousness = CE_WARN; 774 cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n"); 775 } 776 #endif /* DEBUG */ 777 778 delay(hz >> 1); /* allow previous warnings to get out */ 779 780 if (!f && fmt) 781 vcmn_err(seriousness, fmt, adx); 782 else 783 cmn_err(seriousness, f && f->uf_panic_str? f->uf_panic_str: 784 "real_panic: <unknown panic?>"); 785 786 if (f) { 787 need_unlock = !MUTEX_HELD(&f->uf_mutex); 788 if (need_unlock) { 789 mutex_enter(&f->uf_mutex); 790 } 791 792 f->uf_retry = -1; 793 (void) set_state(f, UF_PANIC); 794 795 if (need_unlock) { 796 mutex_exit(&f->uf_mutex); 797 } 798 } 799 MINUTE((": return?!]\n")); 800 } 801 802 /* 803 * initializes ufs panic structs, locks, etc 804 */ 805 void 806 ufsfx_init(void) 807 { 808 809 MINUTE(("[ufsfx_init")); 810 811 /* patchable; unchanged while running, so no lock is needed */ 812 ufsfx_tune.uft_too_long = UF_TOO_LONG; 813 ufsfx_tune.uft_fixstart_period = UF_FIXSTART_PERIOD; 814 ufsfx_tune.uft_fixpoll_period = UF_FIXPOLL_PERIOD; 815 ufsfx_tune.uft_short_err_period = UF_SHORT_ERROR_PERIOD; 816 ufsfx_tune.uft_long_err_period = UF_LONG_ERROR_PERIOD; 817 818 uffsinfo.ufi_statp = &uf_stats; 819 uffsinfo.ufi_tunep = &ufsfx_tune; 820 uffsinfo.ufi_statetab = &state_desc[0]; 821 822 mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL); 823 ufs_thread_init(&ufs_fix, /* maxne */ 1); 824 825 MINUTE(("] ")); 826 } 827 828 /* 829 * initializes per-ufs values 830 * returns 0 (ok) or errno 831 */ 832 int 833 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags) 834 { 835 MINUTE(("[ufsfx_mount (%d)", flags)); 836 /* don't check/need vfs_lock because it's still being initialized */ 837 838 ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4; 839 840 MINUTE((": %s: fx_flags:%ld,", 841 ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags)); 842 /* 843 * onerror={panic ^ lock only ^ unmount} 844 */ 845 846 if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) { 847 MINUTE((" PANIC")); 848 849 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) { 850 MINUTE((" LCKONLY")); 851 852 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) { 853 MINUTE((" LCKUMOUNT")); 854 855 } else { 856 ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT; 857 ASSERT(ufsvfsp->vfs_fsfx.fx_flags & 858 (UFSMNT_ONERROR_FLGMASK >> 4)); 859 MINUTE((" DEFAULT")); 860 } 861 862 pollwakeup(&ufs_pollhd, POLLPRI); 863 MINUTE(("]\n")); 864 return (0); 865 } 866 867 /* 868 * ufsfx_unmount 869 * 870 * called during unmount 871 */ 872 void 873 ufsfx_unmount(struct ufsvfs *ufsvfsp) 874 { 875 ufs_failure_t *f; 876 int must_unlock_list; 877 878 MINUTE(("[ufsfx_unmount")); 879 880 if (!ufsvfsp) { 881 MINUTE((": no ufsvfsp]")); 882 return; 883 } 884 885 if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0) 886 mutex_enter(&ufs_fix.uq_mutex); 887 888 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 889 int must_unlock_failure; 890 891 must_unlock_failure = !MUTEX_HELD(&f->uf_mutex); 892 if (must_unlock_failure) { 893 mutex_enter(&f->uf_mutex); 894 } 895 896 if (f->uf_ufsvfsp == ufsvfsp) { 897 898 /* 899 * if we owned the failure record lock, then this 900 * is probably a fix failure-triggered unmount, so 901 * the warning is not appropriate or needed 902 */ 903 904 /* XXX if rebooting don't print this? */ 905 if (!terminal_state(f->uf_s) && must_unlock_failure) { 906 cmn_err(CE_WARN, 907 "Unmounting %s while error-locked", 908 fs_name(f)); 909 } 910 911 f->uf_ufsvfsp = NULL; 912 f->uf_vfs_ufsfxp = NULL; 913 f->uf_vfs_lockp = NULL; 914 f->uf_bp = NULL; 915 f->uf_vfsp = NULL; 916 f->uf_retry = -1; 917 } 918 919 if (must_unlock_failure) 920 mutex_exit(&f->uf_mutex); 921 } 922 if (must_unlock_list) 923 mutex_exit(&ufs_fix.uq_mutex); 924 925 pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP); 926 MINUTE(("] ")); 927 } 928 929 /* 930 * ufsfx_(un)lockfs 931 * 932 * provides hook from lockfs code so we can recognize unlock/relock 933 * This is called after it is certain that the (un)lock will succeed. 934 */ 935 void 936 ufsfx_unlockfs(struct ufsvfs *ufsvfsp) 937 { 938 ufs_failure_t *f; 939 int need_unlock; 940 int need_unlock_list; 941 int informed = 0; 942 943 MINUTE(("[ufsfx_unlockfs")); 944 945 if (!ufsvfsp) 946 return; 947 948 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex); 949 950 if (need_unlock_list) 951 mutex_enter(&ufs_fix.uq_mutex); 952 953 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 954 955 need_unlock = !MUTEX_HELD(&f->uf_mutex); 956 if (need_unlock) 957 mutex_enter(&f->uf_mutex); 958 959 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) { 960 if (!(f->uf_s & UF_FIXING)) { 961 /* 962 * This might happen if we don't notice that 963 * the fs gets marked FSFIX before it is 964 * marked FSCLEAN, as might occur if the 965 * the superblock was hammered directly. 966 */ 967 if (!informed) { 968 informed = 1; 969 cmn_err(CE_NOTE, 970 "Unlock of %s succeeded before " 971 "fs_clean marked FSFIX?", 972 fs_name(f)); 973 } 974 975 /* 976 * pass through fixing state so 977 * transition protocol is satisfied 978 */ 979 if (!set_state(f, UF_FIXING)) { 980 MINUTE((": failed] ")); 981 } 982 } 983 984 if (!set_state(f, UF_FIXED)) { 985 /* it's already fixed, so don't panic now */ 986 MINUTE((": failed] ")); 987 } 988 } 989 990 if (need_unlock) 991 mutex_exit(&f->uf_mutex); 992 } 993 if (need_unlock_list) 994 mutex_exit(&ufs_fix.uq_mutex); 995 MINUTE(("] ")); 996 } 997 998 void 999 ufsfx_lockfs(struct ufsvfs *ufsvfsp) 1000 { 1001 ufs_failure_t *f; 1002 int need_unlock; 1003 int need_unlock_list; 1004 1005 MINUTE(("[ufsfx_lockfs")); 1006 1007 if (!ufsvfsp) 1008 return; 1009 1010 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex); 1011 1012 if (need_unlock_list) 1013 mutex_enter(&ufs_fix.uq_mutex); 1014 1015 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 1016 1017 need_unlock = !MUTEX_HELD(&f->uf_mutex); 1018 if (need_unlock) 1019 mutex_enter(&f->uf_mutex); 1020 1021 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) && 1022 f->uf_s != UF_PANIC) { 1023 switch (f->uf_s) { 1024 1025 default: 1026 cmn_err(CE_WARN, 1027 "fs %s not in state " 1028 "UF_TRYLCK, UF_LOCKED or UF_FIXING", 1029 fs_name(f)); 1030 break; 1031 1032 case UF_TRYLCK: 1033 if (!set_state(f, UF_LOCKED)) { 1034 MINUTE((": failed] ")); 1035 } 1036 break; 1037 1038 case UF_LOCKED: 1039 if (!set_state(f, UF_FIXING)) { 1040 MINUTE((": failed] ")); 1041 } 1042 break; 1043 1044 case UF_FIXING: 1045 break; 1046 1047 } 1048 } 1049 1050 if (need_unlock) 1051 mutex_exit(&f->uf_mutex); 1052 } 1053 if (need_unlock_list) 1054 mutex_exit(&ufs_fix.uq_mutex); 1055 1056 MINUTE(("] ")); 1057 } 1058 1059 /* 1060 * error lock, trigger fsck and unlock those fs with failures 1061 * blatantly copied from the hlock routine, although this routine 1062 * triggers differently in order to use uq_ne as meaningful data. 1063 */ 1064 /* ARGSUSED */ 1065 void 1066 ufsfx_thread_fix_failures(void *ignored) 1067 { 1068 int retry; 1069 callb_cpr_t cprinfo; 1070 1071 CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr, 1072 "ufsfixfail"); 1073 1074 MINUTE(("[ufsfx_thread_fix_failures] ")); 1075 1076 for (;;) { 1077 /* sleep until there is work to do */ 1078 1079 mutex_enter(&ufs_fix.uq_mutex); 1080 (void) ufs_thread_run(&ufs_fix, &cprinfo); 1081 ufs_fix.uq_ne = 0; 1082 mutex_exit(&ufs_fix.uq_mutex); 1083 1084 /* process failures on our q */ 1085 do { 1086 retry = ufsfx_do_failure_q(); 1087 if (retry) { 1088 mutex_enter(&ufs_fix.uq_mutex); 1089 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1090 (void) cv_reltimedwait(&ufs_fix.uq_cv, 1091 &ufs_fix.uq_mutex, (hz * retry), 1092 TR_CLOCK_TICK); 1093 CALLB_CPR_SAFE_END(&cprinfo, 1094 &ufs_fix.uq_mutex); 1095 mutex_exit(&ufs_fix.uq_mutex); 1096 } 1097 } while (retry); 1098 } 1099 /* NOTREACHED */ 1100 } 1101 1102 1103 /* 1104 * watch for fix-on-panic work 1105 * 1106 * returns # of seconds to sleep before trying again 1107 * and zero if no retry is needed 1108 */ 1109 1110 int 1111 ufsfx_do_failure_q(void) 1112 { 1113 ufs_failure_t *f; 1114 long retry = 1; 1115 ufsd_t *s; 1116 1117 MAJOR(("[ufsfx_do_failure_q")); 1118 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL)); 1119 1120 if (!mutex_tryenter(&ufs_fix.uq_mutex)) 1121 return (retry); 1122 1123 retry = 0; 1124 rescan_q: 1125 1126 /* 1127 * walk down failure list 1128 * depending on state of each failure, do whatever 1129 * is appropriate to move it to the next state 1130 * taking note of whether retry gets set 1131 * 1132 * retry protocol: 1133 * wakeup in shortest required time for any failure 1134 * retry == 0; nothing more to do (terminal state) 1135 * retry < 0; reprocess queue immediately, retry will 1136 * be abs(retry) for the next cycle 1137 * retry > 0; schedule wakeup for retry seconds 1138 */ 1139 1140 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 1141 1142 if (!mutex_tryenter(&f->uf_mutex)) { 1143 retry = 1; 1144 continue; 1145 } 1146 s = get_state_desc(f->uf_s); 1147 1148 MINOR((": found%s: %s, \"%s: %s\"\n", 1149 s->ud_attr.terminal ? " old" : "", 1150 fs_name(f), state_name(f->uf_s), f->uf_panic_str)); 1151 1152 if (s->ud_attr.terminal) { 1153 mutex_exit(&f->uf_mutex); 1154 continue; 1155 } 1156 1157 if (s->ud_sfp) 1158 (*s->ud_sfp)(f, UFA_FOUND, f->uf_s); 1159 1160 ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0); 1161 1162 if (f->uf_retry != 0) { 1163 if (retry > f->uf_retry || retry == 0) 1164 retry = f->uf_retry; 1165 if (f->uf_retry < 0) 1166 f->uf_retry = abs(f->uf_retry); 1167 } 1168 mutex_exit(&f->uf_mutex); 1169 } 1170 1171 1172 if (retry < 0) { 1173 retry = abs(retry); 1174 goto rescan_q; 1175 } 1176 1177 mutex_exit(&ufs_fix.uq_mutex); 1178 1179 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL)); 1180 MAJOR((": retry=%ld, good night]\n\n", retry)); 1181 1182 return (retry); 1183 } 1184 1185 static void 1186 pester_msg(ufs_failure_t *f, int seriousness) 1187 { 1188 MINUTE(("[pester_msg")); 1189 ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING)); 1190 1191 /* 1192 * XXX if seems too long for this fs, poke administrator 1193 * XXX to run fsck manually (and change retry time?) 1194 */ 1195 cmn_err(seriousness, "Waiting for repair of %s to %s", 1196 fs_name(f), f->uf_s & UF_LOCKED ? "start" : "finish"); 1197 MINUTE(("]")); 1198 } 1199 1200 static time_t 1201 trylock_time_exceeded(ufs_failure_t *f) 1202 { 1203 time_t toolong; 1204 extern time_t time; 1205 1206 MINUTE(("[trylock_time_exceeded")); 1207 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1208 1209 toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm; 1210 if (time > toolong) 1211 cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f)); 1212 1213 MINUTE(("] ")); 1214 return (time <= toolong? 0: time - toolong); 1215 } 1216 1217 static int 1218 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp) 1219 { 1220 MINUTE(("[get_lockfs_status")); 1221 1222 if (!f->uf_ufsvfsp) { 1223 MINUTE((": ufsvfsp is NULL]\n")); 1224 return (0); 1225 } 1226 1227 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1228 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1229 ASSERT(!vfs_lock_held(f->uf_vfsp)); 1230 ASSERT(f->uf_ufsvfsp->vfs_root != NULL); 1231 1232 f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp); 1233 1234 if (f->uf_lf_err) { 1235 f->uf_retry = ufsfx_tune.uft_short_err_period; 1236 } 1237 1238 MINUTE(("] ")); 1239 return (1); 1240 } 1241 1242 static sfrc_t 1243 set_state(ufs_failure_t *f, ufs_failure_states_t new_state) 1244 { 1245 ufsd_t *s; 1246 sfrc_t sfrc = SFRC_FAIL; 1247 int need_unlock; 1248 extern time_t time; 1249 1250 HIDEOUS(("[set_state: new state:%s", state_name(new_state))); 1251 ASSERT(f); 1252 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1253 1254 /* 1255 * if someone else is panicking, just let panic sync proceed 1256 */ 1257 if (panicstr) { 1258 (void) set_state(f, UF_NOTFIX); 1259 HIDEOUS((": state reset: not fixed] ")); 1260 return (sfrc); 1261 } 1262 1263 /* 1264 * bad state transition, an internal error 1265 */ 1266 if (!state_trans_valid(f->uf_s, new_state)) { 1267 /* recursion */ 1268 if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC)) 1269 (void) set_state(f, UF_PANIC); 1270 MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ", 1271 state_name(f->uf_s), state_name(new_state))); 1272 return (sfrc); 1273 } 1274 1275 s = get_state_desc(new_state); 1276 1277 need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex); 1278 if (need_unlock) 1279 mutex_enter(&ufs_fix.uq_mutex); 1280 1281 if (s->ud_attr.at_fail && ufs_fix.uq_threadp && 1282 curthread == ufs_fix.uq_threadp) { 1283 cmn_err(CE_WARN, "set_state: probable recursive panic of %s", 1284 fs_name(f)); 1285 } 1286 if (need_unlock) 1287 mutex_exit(&ufs_fix.uq_mutex); 1288 1289 /* NULL state functions always succeed */ 1290 sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state); 1291 1292 if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) { 1293 f->uf_s = new_state; 1294 f->uf_entered_tm = time; 1295 f->uf_counter = 0; 1296 } 1297 1298 HIDEOUS(("]\n")); 1299 return (sfrc); 1300 } 1301 1302 static ufsd_t * 1303 get_state_desc(ufs_failure_states_t state) 1304 { 1305 ufsd_t *s; 1306 1307 HIDEOUS(("[get_state_desc")); 1308 1309 for (s = &state_desc[1]; s->ud_name != NULL; s++) { 1310 if (s->ud_v == state) { 1311 HIDEOUS(("] ")); 1312 return (s); 1313 } 1314 } 1315 1316 HIDEOUS(("] ")); 1317 return (&state_desc[0]); /* default */ 1318 } 1319 1320 static sfrc_t 1321 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1322 { 1323 sfrc_t rc; 1324 1325 TRIVIA(("[sf_undef, action is %s, state is %s\n", 1326 act_name(a), state_name(s))); 1327 ASSERT(s == UF_UNDEF); 1328 1329 /* shouldn't find null failure records or ever set one */ 1330 rc = set_state(f, UF_NOTFIX); 1331 1332 TRIVIA(("] ")); 1333 return (rc); 1334 } 1335 1336 1337 static sfrc_t 1338 sf_init( 1339 ufs_failure_t *f, 1340 ufsa_t a, 1341 ufs_failure_states_t s) 1342 { 1343 sfrc_t rc = SFRC_FAIL; 1344 extern time_t time; 1345 1346 TRIVIA(("[sf_init, action is %s", act_name(a))); 1347 ASSERT(s & UF_INIT); 1348 1349 switch (a) { 1350 case UFA_SET: 1351 f->uf_begin_tm = time; 1352 f->uf_retry = 1; 1353 if (!f->uf_ufsvfsp) { 1354 (void) set_state(f, UF_PANIC); 1355 TRIVIA((": NULL ufsvfsp]\n")); 1356 return (rc); 1357 } 1358 /* 1359 * because we can call panic from many different levels, 1360 * we can't be sure that we've got the vfs_lock at this 1361 * point. However, there's not much alternative and if 1362 * we don't (have the lock) the worst case is we'll just 1363 * panic again 1364 */ 1365 f->uf_vfs_lockp = &f->uf_ufsvfsp->vfs_lock; 1366 f->uf_vfs_ufsfxp = &f->uf_ufsvfsp->vfs_fsfx; 1367 1368 if (!f->uf_ufsvfsp->vfs_bufp) { 1369 (void) set_state(f, UF_PANIC); 1370 TRIVIA((": NULL vfs_bufp]\n")); 1371 return (rc); 1372 } 1373 f->uf_bp = f->uf_ufsvfsp->vfs_bufp; 1374 1375 if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) { 1376 (void) set_state(f, UF_PANIC); 1377 TRIVIA((": NULL vfs_fs]\n")); 1378 return (rc); 1379 } 1380 1381 /* vfs_fs = vfs_bufp->b_un.b_fs */ 1382 bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN); 1383 1384 f->uf_lf.lf_lock = LOCKFS_ELOCK; /* primer */ 1385 1386 if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) { 1387 (void) set_state(f, UF_PANIC); 1388 TRIVIA((": NULL vfsp or vfs_dev == NODEV")); 1389 return (rc); 1390 } 1391 f->uf_dev = f->uf_vfsp->vfs_dev; 1392 1393 rc = SFRC_SUCCESS; 1394 break; 1395 1396 case UFA_FOUND: 1397 default: 1398 /* failures marked init shouldn't even be on the queue yet */ 1399 rc = set_state(f, UF_QUEUE); 1400 TRIVIA((": found failure with state init]\n")); 1401 } 1402 1403 TRIVIA(("] ")); 1404 return (rc); 1405 } 1406 1407 static sfrc_t 1408 sf_queue( 1409 ufs_failure_t *f, 1410 ufsa_t a, 1411 ufs_failure_states_t s) 1412 { 1413 sfrc_t rc = SFRC_FAIL; 1414 1415 TRIVIA(("[sf_queue, action is %s", act_name(a))); 1416 ASSERT(s & UF_QUEUE); 1417 1418 if (!f->uf_ufsvfsp) { 1419 TRIVIA((": NULL ufsvfsp]\n")); 1420 return (rc); 1421 } 1422 1423 switch (a) { 1424 case UFA_FOUND: 1425 rc = sf_found_queue(f); 1426 break; 1427 1428 case UFA_SET: 1429 1430 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex)); 1431 1432 mutex_enter(&uf_stats.ufst_mutex); 1433 uf_stats.ufst_num_failed++; 1434 mutex_exit(&uf_stats.ufst_mutex); 1435 1436 /* 1437 * if can't get the vfs lock, just wait until 1438 * UF_TRYLCK to set fx_current 1439 */ 1440 if (mutex_tryenter(f->uf_vfs_lockp)) { 1441 f->uf_vfs_ufsfxp->fx_current = f; 1442 mutex_exit(f->uf_vfs_lockp); 1443 } else { 1444 mutex_enter(&uf_stats.ufst_mutex); 1445 uf_stats.ufst_current_races++; 1446 mutex_exit(&uf_stats.ufst_mutex); 1447 } 1448 1449 f->uf_retry = 1; 1450 rc = SFRC_SUCCESS; 1451 TRIVIA(("] ")); 1452 break; 1453 1454 default: 1455 (void) set_state(f, UF_PANIC); 1456 TRIVIA((": failed] ")); 1457 } 1458 1459 return (rc); 1460 } 1461 1462 static sfrc_t 1463 sf_found_queue(ufs_failure_t *f) 1464 { 1465 int replica; 1466 sfrc_t rc = SFRC_FAIL; 1467 1468 TRIVIA(("[sf_found_queue")); 1469 1470 /* 1471 * don't need to check for null ufsvfsp because 1472 * unmount must own list's ufs_fix.uq_mutex 1473 * to mark it null and we own that lock since 1474 * we got here. 1475 */ 1476 1477 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex)); 1478 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1479 1480 if (!mutex_tryenter(f->uf_vfs_lockp)) { 1481 TRIVIA((": tryenter(vfslockp) failed; retry]\n")); 1482 f->uf_retry = 1; 1483 return (rc); 1484 } 1485 1486 replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL && 1487 f->uf_vfs_ufsfxp->fx_current != f && 1488 !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s); 1489 1490 /* 1491 * copy general flags to this ufs_failure so we don't 1492 * need to refer back to the ufsvfs, or, more importantly, 1493 * don't need to keep acquiring (trying to acquire) vfs_lockp 1494 * 1495 * The most restrictive option wins: 1496 * panic > errlock only > errlock+unmount > repair 1497 * XXX panic > elock > elock > elock+umount 1498 */ 1499 if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) { 1500 if (!set_state(f, UF_PANIC)) { 1501 TRIVIA((": marked panic but was queued?")); 1502 real_panic(f, " "); 1503 /*NOTREACHED*/ 1504 } 1505 mutex_exit(f->uf_vfs_lockp); 1506 return (rc); 1507 } 1508 f->uf_flags = f->uf_vfs_ufsfxp->fx_flags; 1509 1510 if (replica) { 1511 if (!set_state(f, UF_REPLICA)) { 1512 f->uf_retry = 1; 1513 TRIVIA((": set to replica failed] ")); 1514 } else { 1515 TRIVIA(("] ")); 1516 } 1517 mutex_exit(f->uf_vfs_lockp); 1518 return (rc); 1519 } 1520 mutex_exit(f->uf_vfs_lockp); 1521 1522 if (!set_state(f, UF_TRYLCK)) { 1523 TRIVIA((": failed] ")); 1524 } else { 1525 rc = SFRC_SUCCESS; 1526 } 1527 return (rc); 1528 } 1529 1530 static sfrc_t 1531 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1532 { 1533 sfrc_t rc = SFRC_FAIL; 1534 1535 TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s))); 1536 ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING)); 1537 ASSERT(!terminal_state(s)); 1538 1539 if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) { 1540 TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n")); 1541 (void) set_state(f, UF_NOTFIX); 1542 return (rc); 1543 } 1544 1545 switch (a) { 1546 case UFA_SET: 1547 switch (s) { 1548 case UF_TRYLCK: 1549 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1550 rc = sf_set_trylck(f); 1551 break; 1552 1553 case UF_LOCKED: 1554 rc = sf_set_locked(f); 1555 break; 1556 1557 case UF_FIXING: 1558 f->uf_flags |= UFSFX_REPAIR_START; 1559 f->uf_retry = ufsfx_tune.uft_fixpoll_period; 1560 rc = SFRC_SUCCESS; 1561 break; 1562 1563 case UF_UMOUNT: 1564 f->uf_retry = -ufsfx_tune.uft_short_err_period; 1565 rc = SFRC_SUCCESS; 1566 break; 1567 1568 default: 1569 (void) set_state(f, UF_PANIC); 1570 TRIVIA((": failed] ")); 1571 } 1572 break; 1573 1574 case UFA_FOUND: 1575 1576 switch (s) { 1577 case UF_TRYLCK: 1578 rc = sf_found_trylck(f); 1579 break; 1580 1581 case UF_LOCKED: 1582 case UF_FIXING: 1583 rc = sf_found_lock_fix_cmn(f, s); 1584 break; 1585 1586 case UF_UMOUNT: 1587 rc = sf_found_umount(f); 1588 break; 1589 1590 default: 1591 (void) set_state(f, UF_PANIC); 1592 TRIVIA((": failed] ")); 1593 break; 1594 } 1595 break; 1596 default: 1597 (void) set_state(f, UF_PANIC); 1598 TRIVIA((": failed] ")); 1599 break; 1600 } 1601 1602 TRIVIA(("] ")); 1603 return (rc); 1604 } 1605 1606 static sfrc_t 1607 sf_set_trylck(ufs_failure_t *f) 1608 { 1609 TRIVIA(("[sf_set_trylck")); 1610 1611 if (!mutex_tryenter(f->uf_vfs_lockp)) { 1612 TRIVIA((": tryenter(vfslockp) failed; retry]\n")); 1613 f->uf_retry = 1; 1614 return (SFRC_FAIL); 1615 } 1616 1617 if (!f->uf_vfs_ufsfxp->fx_current) 1618 f->uf_vfs_ufsfxp->fx_current = f; 1619 1620 mutex_exit(f->uf_vfs_lockp); 1621 1622 f->uf_lf.lf_flags = 0; 1623 f->uf_lf.lf_lock = LOCKFS_ELOCK; 1624 f->uf_retry = -ufsfx_tune.uft_fixstart_period; 1625 TRIVIA(("] ")); 1626 return (SFRC_SUCCESS); 1627 } 1628 1629 static sfrc_t 1630 sf_found_trylck(ufs_failure_t *f) 1631 { 1632 struct lockfs lockfs_status; 1633 1634 TRIVIA(("[sf_found_trylck")); 1635 1636 if (trylock_time_exceeded(f) > 0) { 1637 (void) set_state(f, UF_PANIC); 1638 TRIVIA((": failed] ")); 1639 return (SFRC_FAIL); 1640 } 1641 1642 if (!get_lockfs_status(f, &lockfs_status)) { 1643 (void) set_state(f, UF_PANIC); 1644 TRIVIA((": failed] ")); 1645 return (SFRC_FAIL); 1646 } 1647 1648 if (f->uf_lf_err == NO_ERROR) 1649 f->uf_lf.lf_key = lockfs_status.lf_key; 1650 1651 if (!set_lockfs(f, &lockfs_status)) { 1652 (void) set_state(f, UF_PANIC); 1653 TRIVIA((": failed] ")); 1654 return (SFRC_FAIL); 1655 } 1656 TRIVIA(("] ")); 1657 return (SFRC_SUCCESS); 1658 } 1659 1660 static sfrc_t 1661 sf_set_locked(ufs_failure_t *f) 1662 { 1663 TRIVIA(("[sf_set_locked")); 1664 1665 f->uf_retry = -ufsfx_tune.uft_fixstart_period; 1666 1667 #if defined(DEBUG) 1668 if (f->uf_flags & UFSFX_REPAIR_START) 1669 TRIVIA(("clearing UFSFX_REPAIR_START ")); 1670 #endif /* DEBUG */ 1671 1672 f->uf_flags &= ~UFSFX_REPAIR_START; 1673 1674 if (f->uf_s & UF_TRYLCK) { 1675 cmn_err(CE_WARN, "Error-locked %s: \"%s\"", 1676 fs_name(f), f->uf_panic_str); 1677 1678 if (f->uf_flags & UFSFX_LCKONLY) 1679 cmn_err(CE_WARN, "Manual repair of %s required", 1680 fs_name(f)); 1681 } 1682 1683 /* 1684 * just reset to current state 1685 */ 1686 #if defined(DEBUG) 1687 TRIVIA(("locked->locked ")); 1688 #endif /* DEBUG */ 1689 1690 TRIVIA(("] ")); 1691 return (SFRC_SUCCESS); 1692 } 1693 1694 static sfrc_t 1695 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s) 1696 { 1697 time_t toolong; 1698 extern time_t time; 1699 struct buf *bp = NULL; 1700 struct fs *dfs; 1701 time_t concerned, anxious; 1702 sfrc_t rc = SFRC_FAIL; 1703 ulong_t gb_size; 1704 1705 TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s))); 1706 1707 if (s & UF_LOCKED) { 1708 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1709 1710 toolong = 1711 time > (ufsfx_tune.uft_too_long + f->uf_entered_tm); 1712 TRIVIA(("%stoolong", !toolong? "not": "")); 1713 HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ", 1714 time, ufsfx_tune.uft_too_long, f->uf_entered_tm)); 1715 1716 if (f->uf_flags & UFSFX_LCKUMOUNT) { 1717 if (set_state(f, UF_UMOUNT)) { 1718 TRIVIA(("] ")); 1719 rc = SFRC_SUCCESS; 1720 } else { 1721 TRIVIA((": failed] ")); 1722 f->uf_retry = 1; 1723 } 1724 return (rc); 1725 } 1726 if (!toolong) { 1727 rc = SFRC_SUCCESS; 1728 } else { 1729 if (!(f->uf_flags & UFSFX_REPAIR_START)) { 1730 cmn_err(CE_WARN, "%s repair of %s not started.", 1731 (f->uf_flags & UFSFX_LCKONLY) ? 1732 "Manual" : "Automatic", fs_name(f)); 1733 1734 f->uf_retry = ufsfx_tune.uft_long_err_period; 1735 } else { 1736 f->uf_retry = ufsfx_tune.uft_long_err_period; 1737 cmn_err(CE_WARN, "Repair of %s is not timely; " 1738 "operator attention is required.", 1739 fs_name(f)); 1740 } 1741 TRIVIA(("] ")); 1742 return (rc); 1743 } 1744 } 1745 1746 #if defined(DEBUG) 1747 else { 1748 ASSERT(s & UF_FIXING); 1749 } 1750 #endif /* DEBUG */ 1751 1752 /* 1753 * get on disk superblock; force it to really 1754 * come from the disk 1755 */ 1756 (void) bfinval(f->uf_dev, 0); 1757 bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE); 1758 if (bp) { 1759 bp->b_flags |= (B_STALE | B_AGE); 1760 dfs = bp->b_un.b_fs; 1761 } 1762 1763 if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) && 1764 (dfs->fs_magic != MTB_UFS_MAGIC))) { 1765 TRIVIA((": UFS_BREAD(SBLOCK) failed]\n")); 1766 f->uf_retry = 1; 1767 goto out; 1768 } 1769 1770 /* fsck started but we haven't noticed yet? */ 1771 if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) { 1772 if (!set_state(f, UF_FIXING)) { 1773 TRIVIA((": failed]\n")); 1774 f->uf_retry = 1; 1775 goto out; 1776 } 1777 } 1778 1779 /* fsck started but didn't succeed? */ 1780 if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) { 1781 TRIVIA((": fs_clean: %d", (int)dfs->fs_clean)); 1782 (void) set_state(f, UF_LOCKED); 1783 cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f)); 1784 f->uf_retry = ufsfx_tune.uft_long_err_period; 1785 goto out; 1786 } 1787 1788 gb_size = (dfs->fs_size * dfs->fs_bshift) / GB; 1789 toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig); 1790 1791 /* fsck started but doesn't seem to be proceeding? */ 1792 if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) { 1793 if (time > f->uf_entered_tm + toolong) { 1794 1795 cmn_err(CE_WARN, 1796 "Repair completion timeout exceeded on %s; " 1797 "manual fsck may be required", fs_name(f)); 1798 f->uf_retry = ufsfx_tune.uft_long_err_period; 1799 } 1800 } 1801 1802 concerned = f->uf_entered_tm + (toolong / 3); 1803 anxious = f->uf_entered_tm + ((2 * toolong) / 3); 1804 1805 if (time > concerned) 1806 pester_msg(f, time > anxious? CE_WARN: CE_NOTE); 1807 1808 TRIVIA(("] ")); 1809 1810 out: 1811 if (bp) 1812 brelse(bp); 1813 1814 return (rc); 1815 } 1816 1817 static sfrc_t 1818 sf_found_umount(ufs_failure_t *f) 1819 { 1820 extern time_t time; 1821 sfrc_t rc = SFRC_FAIL; 1822 struct vfs *vfsp = f->uf_vfsp; 1823 struct ufsvfs *ufsvfsp = f->uf_ufsvfsp; 1824 int toolong = 0; 1825 int err = 0; 1826 1827 TRIVIA(("[sf_found_umount")); 1828 1829 toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm; 1830 if (toolong) { 1831 TRIVIA((": unmount time limit exceeded] ")); 1832 goto out; 1833 } 1834 1835 if (!vfsp || !ufsvfsp) { /* trivial case */ 1836 TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] ")); 1837 goto out; 1838 } 1839 1840 if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) { 1841 TRIVIA((": !not error locked?")); 1842 err = EINVAL; 1843 goto out; 1844 } 1845 1846 /* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */ 1847 if (vn_vfswlock(vfsp->vfs_vnodecovered)) { 1848 TRIVIA((": couldn't lock coveredvp")); 1849 err = EBUSY; 1850 goto out; 1851 } 1852 1853 if ((err = dounmount(vfsp, 0, kcred)) != 0) { 1854 1855 /* take note, but not many alternatives here */ 1856 mutex_enter(&uf_stats.ufst_mutex); 1857 uf_stats.ufst_unmount_failures++; 1858 mutex_exit(&uf_stats.ufst_mutex); 1859 1860 TRIVIA((": unmount failed] ")); 1861 } else { 1862 cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f)); 1863 } 1864 1865 out: 1866 if (toolong || (err != EBUSY && err != EAGAIN)) 1867 rc = set_state(f, UF_NOTFIX); 1868 1869 TRIVIA(("] ")); 1870 return (rc); 1871 } 1872 1873 static sfrc_t 1874 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1875 { 1876 extern time_t time; 1877 sfrc_t rc = SFRC_FAIL; 1878 1879 TRIVIA(("[sf_term_cmn, action is %s, state is %s", 1880 act_name(a), state_name(s))); 1881 ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA)); 1882 ASSERT(terminal_state(s)); 1883 1884 if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) { 1885 TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n")); 1886 return (rc); 1887 } 1888 1889 switch (a) { 1890 case UFA_SET: 1891 switch (s) { 1892 case UF_NOTFIX: 1893 case UF_FIXED: 1894 { 1895 int need_lock_vfs; 1896 1897 if (f->uf_ufsvfsp && f->uf_vfs_lockp) 1898 need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp); 1899 else 1900 need_lock_vfs = 0; 1901 1902 if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) { 1903 TRIVIA((": tryenter(vfslockp) fail; retry]\n")); 1904 f->uf_retry = 1; 1905 break; 1906 } 1907 1908 f->uf_end_tm = time; 1909 f->uf_lf.lf_lock = LOCKFS_OLOCK; 1910 f->uf_retry = 0; 1911 1912 if (f->uf_vfs_ufsfxp) 1913 f->uf_vfs_ufsfxp->fx_current = NULL; 1914 1915 if (need_lock_vfs) 1916 mutex_exit(f->uf_vfs_lockp); 1917 1918 cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s": 1919 "%s is now accessible", fs_name(f)); 1920 1921 if (s & UF_FIXED) { 1922 mutex_enter(&uf_stats.ufst_mutex); 1923 uf_stats.ufst_num_fixed++; 1924 mutex_exit(&uf_stats.ufst_mutex); 1925 } 1926 (void) timeout(ufsfx_kill_fix_failure_thread, 1927 (void *)(ufsfx_tune.uft_short_err_period * hz), 1928 ufsfx_tune.uft_short_err_period * hz); 1929 rc = SFRC_SUCCESS; 1930 break; 1931 } 1932 case UF_REPLICA: 1933 1934 ASSERT(MUTEX_HELD(f->uf_vfs_lockp)); 1935 1936 /* not actually a replica? */ 1937 if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current && 1938 f->uf_vfs_ufsfxp->fx_current != f && 1939 !terminal_state( 1940 f->uf_vfs_ufsfxp->fx_current->uf_s)) { 1941 1942 f->uf_orig = f->uf_vfs_ufsfxp->fx_current; 1943 f->uf_retry = 0; 1944 rc = SFRC_SUCCESS; 1945 } else { 1946 TRIVIA((": NULL fx_current]\n")); 1947 f->uf_retry = 1; 1948 } 1949 1950 break; 1951 1952 default: 1953 rc = set_state(f, UF_PANIC); 1954 TRIVIA((": failed] ")); 1955 break; 1956 } 1957 break; 1958 1959 case UFA_FOUND: 1960 /* 1961 * XXX de-allocate these after some period? 1962 * XXX or move to an historical list? 1963 * XXX or have an ioctl which reaps them? 1964 */ 1965 /* 1966 * For now, since we don't expect lots of failures 1967 * to occur (to the point of memory shortages), 1968 * just punt 1969 */ 1970 1971 /* be sure we're not wasting cpu on old failures */ 1972 if (f->uf_retry != 0) { 1973 mutex_enter(&uf_stats.ufst_mutex); 1974 uf_stats.ufst_cpu_waste++; 1975 mutex_exit(&uf_stats.ufst_mutex); 1976 f->uf_retry = 0; 1977 } 1978 rc = SFRC_SUCCESS; 1979 break; 1980 1981 default: 1982 (void) set_state(f, UF_PANIC); 1983 TRIVIA((": failed] ")); 1984 break; 1985 } 1986 1987 TRIVIA(("] ")); 1988 return (rc); 1989 } 1990 1991 static sfrc_t 1992 sf_panic( 1993 ufs_failure_t *f, 1994 ufsa_t a, 1995 ufs_failure_states_t s) 1996 { 1997 sfrc_t rc = SFRC_FAIL; 1998 1999 TRIVIA(("[sf_panic, action is %s, prev. state is %s", 2000 act_name(a), state_name(f->uf_s))); 2001 ASSERT(s & UF_PANIC); 2002 2003 switch (a) { 2004 case UFA_SET: 2005 f->uf_retry = -ufsfx_tune.uft_short_err_period; 2006 rc = SFRC_SUCCESS; 2007 break; 2008 2009 case UFA_FOUND: 2010 default: 2011 real_panic(f, " "); 2012 2013 /* LINTED: warning: logical expression always true: op "||" */ 2014 ASSERT(DEBUG); 2015 2016 (void) set_state(f, UF_UMOUNT); /* XXX UF_NOTFIX? */ 2017 2018 break; 2019 } 2020 2021 TRIVIA(("] ")); 2022 return (rc); 2023 } 2024 2025 /* 2026 * minimum state function 2027 */ 2028 static sfrc_t 2029 sf_minimum( 2030 ufs_failure_t *f, 2031 ufsa_t a, /* LINTED argument unused in function: ignored */ 2032 ufs_failure_states_t ignored) 2033 { 2034 sfrc_t rc = SFRC_FAIL; 2035 2036 TRIVIA(("[sf_minimum, action is %s", act_name(a))); 2037 2038 switch (a) { 2039 case UFA_SET: 2040 f->uf_retry = 0; 2041 /* FALLTHROUGH */ 2042 2043 case UFA_FOUND: 2044 rc = SFRC_SUCCESS; 2045 break; 2046 2047 default: 2048 (void) set_state(f, UF_PANIC); 2049 TRIVIA((": failed] ")); 2050 break; 2051 } 2052 2053 TRIVIA(("] ")); 2054 return (rc); 2055 } 2056 2057 static int 2058 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to) 2059 { 2060 ufsd_t *s; 2061 int valid; 2062 2063 HIDEOUS(("[state_trans_valid")); 2064 2065 if (from & to) 2066 return (1); 2067 2068 s = get_state_desc(to); 2069 2070 /* 2071 * extra test is necessary since we want UF_UNDEF = 0, 2072 * (to detect freshly allocated memory) 2073 * but can't check for that value with a bit test 2074 */ 2075 valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev; 2076 2077 HIDEOUS((": %svalid] ", valid? "": "in")); 2078 return (valid); 2079 } 2080 2081 static int 2082 terminal_state(ufs_failure_states_t state) 2083 { 2084 ufsd_t *s; 2085 2086 HIDEOUS(("[terminal_state")); 2087 2088 s = get_state_desc(state); 2089 2090 HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not ")); 2091 return ((int)s->ud_attr.terminal); 2092 } 2093 2094 static void 2095 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp) 2096 { 2097 MINUTE(("[alloc_lockfs_comment")); 2098 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2099 2100 /* 2101 * ufs_fiolfs expects a kmem_alloc'ed comment; 2102 * it frees the comment if the lock fails 2103 * or else when the lock is unlocked. 2104 */ 2105 2106 f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP); 2107 if (f->uf_lf.lf_comment) { 2108 char *from; 2109 size_t len; 2110 2111 /* 2112 * use panic string if there's no previous comment 2113 * or if we're setting the error lock 2114 */ 2115 if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment || 2116 lfp->lf_comlen <= 0)) { 2117 from = f->uf_panic_str; 2118 len = LOCKFS_MAXCOMMENTLEN; 2119 } else { 2120 from = lfp->lf_comment; 2121 len = lfp->lf_comlen; 2122 } 2123 2124 bcopy(from, f->uf_lf.lf_comment, len); 2125 f->uf_lf.lf_comlen = len; 2126 2127 } else { 2128 f->uf_lf.lf_comlen = 0; 2129 } 2130 MINUTE(("] ")); 2131 } 2132 2133 static int 2134 set_lockfs(ufs_failure_t *f, struct lockfs *lfp) 2135 { 2136 int (*handle_lockfs_rc)(ufs_failure_t *); 2137 int rc; 2138 2139 MINUTE(("[set_lockfs")); 2140 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2141 ASSERT(!vfs_lock_held(f->uf_vfsp)); 2142 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 2143 2144 if (!f->uf_ufsvfsp) { 2145 MINUTE((": ufsvfsp is NULL]\n")); 2146 return (0); 2147 } 2148 2149 ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock)); 2150 2151 if (!f->uf_ufsvfsp->vfs_root) { 2152 MINUTE((": vfs_root is NULL]\n")); 2153 return (0); 2154 } 2155 2156 alloc_lockfs_comment(f, lfp); 2157 f->uf_lf_err = 0; 2158 2159 if (!LOCKFS_IS_ELOCK(lfp)) { 2160 lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK; 2161 VN_HOLD(f->uf_ufsvfsp->vfs_root); 2162 f->uf_lf_err = 2163 ufs__fiolfs(f->uf_ufsvfsp->vfs_root, 2164 &f->uf_lf, /* from_user */ 0, /* from_log */ 0); 2165 VN_RELE(f->uf_ufsvfsp->vfs_root); 2166 } 2167 2168 handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success; 2169 rc = handle_lockfs_rc(f); 2170 2171 MINUTE(("] ")); 2172 return (rc); 2173 } 2174 2175 static int 2176 lockfs_failure(ufs_failure_t *f) 2177 { 2178 int error; 2179 ufs_failure_states_t s; 2180 2181 TRIVIA(("[lockfs_failure")); 2182 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2183 2184 if (!f->uf_ufsvfsp) { 2185 TRIVIA((": ufsvfsp is NULL]\n")); 2186 return (0); 2187 } 2188 2189 error = f->uf_lf_err; 2190 switch (error) { 2191 /* non-transient errors: */ 2192 case EACCES: /* disk/in-core metadata reconciliation failed */ 2193 case EPERM: /* inode reconciliation failed; incore inode changed? */ 2194 case EIO: /* device is hard-locked or not responding */ 2195 case EROFS: /* device is write-locked */ 2196 case EDEADLK: /* can't lockfs; deadlock would result; */ 2197 /* Swapping or saving accounting records */ 2198 /* onto this fs can cause this errno. */ 2199 2200 MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)", 2201 fs_name(f), lock_name(&f->uf_lf), 2202 err_name(error), error)); 2203 2204 /* 2205 * if can't get lock, then fallback to panic, unless 2206 * unless unmount was requested (although unmount will 2207 * probably fail if the lock failed, so we'll panic 2208 * anyway 2209 */ 2210 2211 s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK) ? 2212 UF_UMOUNT: UF_PANIC; 2213 2214 if (!set_state(f, s)) { 2215 real_panic(f, " "); 2216 /*NOTREACHED*/ 2217 break; 2218 } 2219 break; 2220 2221 2222 case EBUSY: 2223 case EAGAIN: 2224 2225 f->uf_retry = ufsfx_tune.uft_short_err_period; 2226 if (curthread->t_flag & T_DONTPEND) { 2227 curthread->t_flag &= ~T_DONTPEND; 2228 2229 } else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) { 2230 ufs_failure_states_t state; 2231 /* 2232 * if we didn't know that the fix had started, 2233 * take note 2234 */ 2235 state = error == EBUSY? UF_LOCKED: UF_FIXING; 2236 if (!set_state(f, state)) { 2237 TRIVIA((": failed] ")); 2238 return (0); 2239 } 2240 } 2241 break; 2242 2243 default: /* some other non-fatal error */ 2244 MINOR(("lockfs(\"%s\") of %s returned %s (%d)", 2245 lock_name(&f->uf_lf), fs_name(f), 2246 err_name(f->uf_lf_err), f->uf_lf_err)); 2247 2248 f->uf_retry = ufsfx_tune.uft_short_err_period; 2249 break; 2250 2251 case EINVAL: /* unmounted? */ 2252 (void) set_state(f, UF_NOTFIX); 2253 break; 2254 } 2255 TRIVIA(("] ")); 2256 return (1); 2257 } 2258 2259 static int 2260 lockfs_success(ufs_failure_t *f) 2261 { 2262 TRIVIA(("[lockfs_success")); 2263 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2264 2265 if (!f->uf_ufsvfsp) { 2266 TRIVIA((": ufsvfsp is NULL]\n")); 2267 return (0); 2268 } 2269 2270 switch (f->uf_lf.lf_lock) { 2271 case LOCKFS_ELOCK: /* error lock worked */ 2272 2273 if (!set_state(f, UF_LOCKED)) { 2274 TRIVIA((": failed] ")); 2275 return (0); 2276 } 2277 break; 2278 2279 case LOCKFS_ULOCK: /* unlock worked */ 2280 /* 2281 * how'd we get here? 2282 * This should be done from fsck's unlock, 2283 * not from this thread's context. 2284 */ 2285 cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f)); 2286 ufsfx_unlockfs(f->uf_ufsvfsp); 2287 break; 2288 2289 default: 2290 if (!set_state(f, UF_NOTFIX)) { 2291 TRIVIA((": failed] ")); 2292 return (0); 2293 } 2294 break; 2295 } 2296 TRIVIA(("] ")); 2297 return (1); 2298 } 2299 2300 /* 2301 * when fsck is running it puts its pid into the lockfs 2302 * comment structure, prefaced by PIDSTR 2303 */ 2304 const char *PIDSTR = "[pid:"; 2305 static int 2306 fsck_active(ufs_failure_t *f) 2307 { 2308 char *cp; 2309 int i, found, errlocked; 2310 size_t comlen; 2311 const int PIDSTRLEN = (int)strlen(PIDSTR); 2312 struct ulockfs *ulp = &f->uf_ufsvfsp->vfs_ulockfs; 2313 2314 TRIVIA(("[fsck_active")); 2315 2316 ASSERT(f); 2317 ASSERT(f->uf_s & UF_FIXING); 2318 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2319 ASSERT(f->uf_ufsvfsp); 2320 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 2321 ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock)); 2322 2323 mutex_enter(&ulp->ul_lock); 2324 cp = ulp->ul_lockfs.lf_comment; 2325 comlen = ulp->ul_lockfs.lf_comlen; 2326 errlocked = (int)ULOCKFS_IS_ELOCK(ulp); 2327 mutex_exit(&ulp->ul_lock); 2328 2329 if (!cp || comlen == 0) { 2330 TRIVIA((": null comment or comlen <= 0, found:0]")); 2331 return (0); 2332 } 2333 2334 for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++) 2335 found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0; 2336 2337 TRIVIA(("found:%d, is_elock:%d]", found, errlocked)); 2338 return (errlocked & found); 2339 } 2340 2341 static const char unknown_fs[] = "<unknown fs>"; 2342 static const char null_failure[] = "<NULL ufs failure record; unknown fs>"; 2343 static const char mutated_vfs_bufp[] = "<mutated vfs_bufp, unknown fs>"; 2344 static const char mutated_vfs_fs[] = "<mutated vfs_fs, unknown fs>"; 2345 2346 static char * 2347 fs_name(ufs_failure_t *f) 2348 { 2349 HIDEOUS(("[fs_name")); 2350 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2351 2352 if (!f) { 2353 HIDEOUS((": failure ptr is NULL]\n")); 2354 return ((char *)null_failure); 2355 } 2356 2357 if (f->uf_fsname[0] != '\0') { 2358 HIDEOUS((": return (uf_fsname)]\n")); 2359 return (f->uf_fsname); 2360 } 2361 2362 if (MUTEX_HELD(f->uf_vfs_lockp)) { 2363 if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) { 2364 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n", 2365 (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp)); 2366 return ((char *)mutated_vfs_bufp); 2367 } 2368 if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) { 2369 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n", 2370 (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs)); 2371 return ((char *)mutated_vfs_fs); 2372 } 2373 if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs && 2374 *f->uf_fs->fs_fsmnt != '\0') { 2375 HIDEOUS((": return (fs_fsmnt)]\n")); 2376 return (f->uf_fs->fs_fsmnt); 2377 } 2378 } 2379 2380 HIDEOUS((": unknown file system]\n")); 2381 return ((char *)unknown_fs); 2382 } 2383 2384 #if defined(DEBUG) 2385 static char * 2386 lock_name(struct lockfs *lfp) 2387 { 2388 struct lock_description *l; 2389 char *lname; 2390 2391 HIDEOUS(("[lock_name")); 2392 2393 lname = lock_desc[0].ld_name; 2394 for (l = &lock_desc[1]; l->ld_name != NULL; l++) { 2395 if (lfp && lfp->lf_lock == l->ld_type) { 2396 lname = l->ld_name; 2397 break; 2398 } 2399 } 2400 HIDEOUS(("]")); 2401 return (lname); 2402 } 2403 2404 static char * 2405 state_name(ufs_failure_states_t state) 2406 { 2407 ufsd_t *s; 2408 2409 HIDEOUS(("[state_name")); 2410 2411 s = get_state_desc(state); 2412 2413 HIDEOUS(("]")); 2414 return (s->ud_name); 2415 } 2416 2417 static char * 2418 err_name(int error) 2419 { 2420 struct error_description *e; 2421 2422 HIDEOUS(("[err_name")); 2423 2424 for (e = &err_desc[1]; e->ed_name != NULL; e++) { 2425 if (error == e->ed_errno) { 2426 HIDEOUS(("]")); 2427 return (e->ed_name); 2428 } 2429 } 2430 HIDEOUS(("]")); 2431 return (err_desc[0].ed_name); 2432 } 2433 2434 static char * 2435 act_name(ufsa_t action) 2436 { 2437 struct action_description *a; 2438 2439 HIDEOUS(("[act_name")); 2440 2441 for (a = &act_desc[1]; a->ad_name != NULL; a++) { 2442 if (action == a->ad_v) { 2443 HIDEOUS(("]")); 2444 return (a->ad_name); 2445 } 2446 } 2447 HIDEOUS(("]")); 2448 return (act_desc[0].ad_name); 2449 } 2450 2451 /* 2452 * dump failure list 2453 */ 2454 static void 2455 dump_uf_list(char *msg) 2456 { 2457 ufs_failure_t *f; 2458 int i; 2459 int list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex); 2460 2461 if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) { 2462 printf("dump_uf_list: couldn't get list lock\n"); 2463 return; 2464 } 2465 2466 if (msg) { 2467 printf("\n%s", msg); 2468 } 2469 printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n", 2470 ufs_fix.uq_lowat, ufs_fix.uq_ne); 2471 2472 mutex_enter(&uf_stats.ufst_mutex); 2473 printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races); 2474 printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed); 2475 printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed); 2476 printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste); 2477 printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n", 2478 uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures); 2479 mutex_exit(&uf_stats.ufst_mutex); 2480 2481 for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) { 2482 2483 if (!mutex_tryenter(&f->uf_mutex)) { 2484 printf("%d.\t\"skipped - try enter failed\"\n", i); 2485 continue; 2486 } 2487 2488 dump_uf(f, i); 2489 2490 mutex_exit(&f->uf_mutex); 2491 } 2492 2493 printf("\n"); 2494 2495 if (!list_was_locked) 2496 mutex_exit(&ufs_fix.uq_mutex); 2497 } 2498 2499 static void 2500 dump_uf(ufs_failure_t *f, int i) 2501 { 2502 if (!f) { 2503 printf("dump_uf: NULL failure record\n"); 2504 return; 2505 } 2506 2507 printf("%d.\t\"%s\" is %s.\n", 2508 i, fs_name(f), state_name(f->uf_s)); 2509 printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f); 2510 printf("\tNext: 0x%p\t\tPrev: 0x%p\n", 2511 (void *)f->uf_next, (void *)f->uf_prev); 2512 2513 if (f->uf_orig) 2514 printf("\tOriginal failure: 0x%p \"%s\"\n", 2515 (void *)f->uf_orig, f->uf_orig->uf_panic_str); 2516 2517 printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n", 2518 (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp); 2519 printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp); 2520 printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp); 2521 2522 if (f->uf_bp) 2523 printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs); 2524 else 2525 printf("\n"); 2526 2527 printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n", 2528 f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm); 2529 2530 printf("\tFlags: (%d) %s%s%s%s", f->uf_flags, 2531 f->uf_flags & UFSFX_LCKONLY? "\"lock only\" " : "", 2532 f->uf_flags & UFSFX_LCKUMOUNT? "\"lock+unmount\" " : "", 2533 f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" " : "", 2534 f->uf_flags == 0? "<none>" : ""); 2535 2536 printf("\tRetry: %ld seconds\n", f->uf_retry); 2537 2538 printf("\tLockfs:\ttype: %s\terror: %s (%d)\n", 2539 lock_name(&f->uf_lf), err_name(f->uf_lf_err), f->uf_lf_err); 2540 2541 } 2542 #endif /* DEBUG */ 2543 2544 /* 2545 * returns # of ufs_failures in a non-terminal state on queue 2546 * used to coordinate with hlock thread (see ufs_thread.c) 2547 * and to determine when the error lock thread may exit 2548 */ 2549 2550 int 2551 ufsfx_get_failure_qlen(void) 2552 { 2553 ufs_failure_t *f; 2554 ufsd_t *s; 2555 int qlen = 0; 2556 2557 MINUTE(("[ufsfx_get_failure_qlen")); 2558 2559 if (!mutex_tryenter(&ufs_fix.uq_mutex)) 2560 return (-1); 2561 2562 /* 2563 * walk down failure list 2564 */ 2565 2566 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 2567 2568 if (!mutex_tryenter(&f->uf_mutex)) 2569 continue; 2570 2571 s = get_state_desc(f->uf_s); 2572 2573 if (s->ud_attr.terminal) { 2574 mutex_exit(&f->uf_mutex); 2575 continue; 2576 } 2577 2578 MINUTE((": found: %s, \"%s: %s\"\n", 2579 fs_name(f), state_name(f->uf_s), f->uf_panic_str)); 2580 2581 qlen++; 2582 mutex_exit(&f->uf_mutex); 2583 } 2584 2585 mutex_exit(&ufs_fix.uq_mutex); 2586 2587 MINUTE((": qlen=%d]\n", qlen)); 2588 2589 return (qlen); 2590 } 2591 2592 /* 2593 * timeout routine 2594 * called to shutdown fix failure thread and server daemon 2595 */ 2596 static void 2597 ufsfx_kill_fix_failure_thread(void *arg) 2598 { 2599 clock_t odelta = (clock_t)arg; 2600 int qlen; 2601 2602 MAJOR(("[ufsfx_kill_fix_failure_thread")); 2603 2604 qlen = ufsfx_get_failure_qlen(); 2605 2606 if (qlen < 0) { 2607 clock_t delta; 2608 2609 delta = odelta << 1; 2610 if (delta <= 0) 2611 delta = INT_MAX; 2612 2613 (void) timeout(ufsfx_kill_fix_failure_thread, 2614 (void *)delta, delta); 2615 MAJOR((": rescheduled")); 2616 2617 } else if (qlen == 0) { 2618 ufs_thread_exit(&ufs_fix); 2619 MAJOR((": killed")); 2620 } 2621 /* 2622 * else 2623 * let timeout expire 2624 */ 2625 MAJOR(("]\n")); 2626 } 2627