1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/errno.h> 32 #include <sys/mode.h> 33 #include <sys/sysmacros.h> 34 #include <sys/cmn_err.h> 35 #include <sys/varargs.h> 36 #include <sys/time.h> 37 #include <sys/buf.h> 38 #include <sys/kmem.h> 39 #include <sys/t_lock.h> 40 #include <sys/poll.h> 41 #include <sys/debug.h> 42 #include <sys/cred.h> 43 #include <sys/lockfs.h> 44 #include <sys/fs/ufs_fs.h> 45 #include <sys/fs/ufs_inode.h> 46 #include <sys/fs/ufs_panic.h> 47 #include <sys/fs/ufs_lockfs.h> 48 #include <sys/fs/ufs_trans.h> 49 #include <sys/fs/ufs_mount.h> 50 #include <sys/fs/ufs_prot.h> 51 #include <sys/fs/ufs_bio.h> 52 #include <sys/pathname.h> 53 #include <sys/utsname.h> 54 #include <sys/conf.h> 55 56 /* handy */ 57 #define abs(x) ((x) < 0? -(x): (x)) 58 59 #if defined(DEBUG) 60 61 #define DBGLVL_NONE 0x00000000 62 #define DBGLVL_MAJOR 0x00000100 63 #define DBGLVL_MINOR 0x00000200 64 #define DBGLVL_MINUTE 0x00000400 65 #define DBGLVL_TRIVIA 0x00000800 66 #define DBGLVL_HIDEOUS 0x00001000 67 68 #define DBGFLG_NONE 0x00000000 69 #define DBGFLG_NOPANIC 0x00000001 70 #define DBGFLG_LVLONLY 0x00000002 71 #define DBGFLG_FIXWOULDPANIC 0x00000004 72 73 #define DBGFLG_FLAGMASK 0x0000000F 74 #define DBGFLG_LEVELMASK ~DBGFLG_FLAGMASK 75 76 #define DEBUG_FLAGS (ufs_fix_failure_dbg & DBGFLG_FLAGMASK) 77 #define DEBUG_LEVEL (ufs_fix_failure_dbg & DBGFLG_LEVELMASK) 78 79 unsigned int ufs_fix_failure_dbg = DBGLVL_NONE | DBGFLG_NONE; 80 81 #define DCALL(dbg_level, call) \ 82 { \ 83 if (DEBUG_LEVEL != DBGLVL_NONE) { \ 84 if (DEBUG_FLAGS & DBGFLG_LVLONLY) { \ 85 if (DEBUG_LEVEL & dbg_level) { \ 86 call; \ 87 } \ 88 } else { \ 89 if (dbg_level <= DEBUG_LEVEL) { \ 90 call; \ 91 } \ 92 } \ 93 } \ 94 } 95 96 #define DPRINTF(dbg_level, msg) DCALL(dbg_level, printf msg) 97 98 #define MAJOR(msg) DPRINTF(DBGLVL_MAJOR, msg) 99 #define MINOR(msg) DPRINTF(DBGLVL_MINOR, msg) 100 #define MINUTE(msg) DPRINTF(DBGLVL_MINUTE, msg) 101 #define TRIVIA(msg) DPRINTF(DBGLVL_TRIVIA, msg) 102 #define HIDEOUS(msg) DPRINTF(DBGLVL_HIDEOUS, msg) 103 104 #else /* !DEBUG */ 105 106 #define DCALL(ignored_dbg_level, ignored_routine) 107 #define MAJOR(ignored) 108 #define MINOR(ignored) 109 #define MINUTE(ignored) 110 #define TRIVIA(ignored) 111 #define HIDEOUS(ignored) 112 113 #endif /* DEBUG */ 114 115 #define NULLSTR(str) (!(str) || *(str) == '\0'? "<null>" : (str)) 116 #define NULSTRING "" 117 118 /* somewhat arbitrary limits, in seconds */ 119 /* all probably ought to be different, but these are convenient for debugging */ 120 const time_t UF_TOO_LONG = 128; /* max. wait for fsck start */ 121 122 /* all of these are in units of seconds used for retry period while ... */ 123 const time_t UF_FIXSTART_PERIOD = 16; /* awaiting fsck start */ 124 const time_t UF_FIXPOLL_PERIOD = 256; /* awaiting fsck finish */ 125 const time_t UF_SHORT_ERROR_PERIOD = 4; /* after (lockfs) error */ 126 const time_t UF_LONG_ERROR_PERIOD = 512; /* after (lockfs) error */ 127 128 #define NO_ERROR 0 129 #define LOCKFS_OLOCK LOCKFS_MAXLOCK+1 130 131 const ulong_t GB = 1024 * 1024 * 1024; 132 const ulong_t SecondsPerGig = 1024; /* ~17 minutes (overestimate) */ 133 134 /* 135 * per filesystem flags 136 */ 137 const int UFSFX_PANIC = (UFSMNT_ONERROR_PANIC >> 4); 138 const int UFSFX_LCKONLY = (UFSMNT_ONERROR_LOCK >> 4); 139 const int UFSFX_LCKUMOUNT = (UFSMNT_ONERROR_UMOUNT >> 4); 140 const int UFSFX_DEFAULT = (UFSMNT_ONERROR_DEFAULT >> 4); 141 const int UFSFX_REPAIR_START = 0x10000000; 142 143 /* return protocols */ 144 145 typedef enum triage_return_code { 146 TRIAGE_DEAD = -1, 147 TRIAGE_NO_SPIRIT, 148 TRIAGE_ATTEND_TO 149 } triage_t; 150 151 typedef enum statefunc_return_code { 152 SFRC_SUCCESS = 1, 153 SFRC_FAIL = 0 154 } sfrc_t; 155 156 /* external references */ 157 /* in ufs_thread.c */ 158 extern int ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop); 159 extern int ufs_checkaccton(vnode_t *); /* in ufs_lockfs.c */ 160 extern int ufs_checkswapon(vnode_t *); /* in ufs_lockfs.c */ 161 162 extern struct pollhead ufs_pollhd; /* in ufs_vnops.c */ 163 164 /* globals */ 165 struct ufs_q ufs_fix; 166 167 /* 168 * patchable constants: 169 * These are set in ufsfx_init() [called at modload] 170 */ 171 struct ufs_failure_tunable { 172 long uft_too_long; /* limit repair startup time */ 173 long uft_fixstart_period; /* pre-repair start period */ 174 long uft_fixpoll_period; /* post-fsck start period */ 175 long uft_short_err_period; /* post-error short period */ 176 long uft_long_err_period; /* post-error long period */ 177 } ufsfx_tune; 178 179 /* internal statistics of events */ 180 struct uf_statistics { 181 ulong_t ufst_lock_violations; 182 ulong_t ufst_current_races; 183 ulong_t ufst_unmount_failures; 184 ulong_t ufst_num_fixed; 185 ulong_t ufst_num_failed; 186 ulong_t ufst_cpu_waste; 187 time_t ufst_last_start_tm; 188 kmutex_t ufst_mutex; 189 } uf_stats; 190 191 typedef enum state_action { 192 UFA_ERROR = -1, /* internal error */ 193 UFA_FOUND, /* found uf in state */ 194 UFA_SET /* change uf to state */ 195 } ufsa_t; 196 197 /* state definition */ 198 typedef struct uf_state_desc { 199 int ud_v; /* value */ 200 char *ud_name; /* name */ 201 sfrc_t (*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 202 /* per-state actions */ 203 ufs_failure_states_t ud_prev; /* valid prev. states */ 204 205 struct uf_state_desc_attr { 206 unsigned terminal:1; /* no action req. if found */ 207 unsigned at_fail:1; /* state set by thread */ 208 /* encountering the error */ 209 unsigned unused; 210 } ud_attr; 211 } ufsd_t; 212 213 /* 214 * forward references 215 */ 216 217 /* thread to watch for failures */ 218 static void ufsfx_thread_fix_failures(void *); 219 static int ufsfx_do_failure_q(void); 220 static void ufsfx_kill_fix_failure_thread(void *); 221 222 /* routines called when failure occurs */ 223 static int ufs_fault_v(vnode_t *, char *, va_list) 224 __KVPRINTFLIKE(2); 225 static ufs_failure_t *init_failure(vnode_t *, char *, va_list) 226 __KVPRINTFLIKE(2); 227 static void queue_failure(ufs_failure_t *); 228 /*PRINTFLIKE2*/ 229 static void real_panic(ufs_failure_t *, const char *, ...) 230 __KPRINTFLIKE(2); 231 static void real_panic_v(ufs_failure_t *, const char *, va_list) 232 __KVPRINTFLIKE(2); 233 static triage_t triage(vnode_t *); 234 235 /* routines called when failure record is acted upon */ 236 static sfrc_t set_state(ufs_failure_t *, ufs_failure_states_t); 237 static int state_trans_valid(ufs_failure_states_t, ufs_failure_states_t); 238 static int terminal_state(ufs_failure_states_t); 239 240 /* routines called when states entered/found */ 241 static sfrc_t sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 242 static sfrc_t sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 243 static sfrc_t sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 244 static sfrc_t sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 245 static sfrc_t sf_found_queue(ufs_failure_t *); 246 static sfrc_t sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 247 static sfrc_t sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 248 static sfrc_t sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 249 static sfrc_t sf_set_trylck(ufs_failure_t *); 250 static sfrc_t sf_set_locked(ufs_failure_t *); 251 static sfrc_t sf_found_trylck(ufs_failure_t *); 252 static sfrc_t sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t); 253 static sfrc_t sf_found_umount(ufs_failure_t *); 254 255 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */ 256 static time_t trylock_time_exceeded(ufs_failure_t *); 257 static void pester_msg(ufs_failure_t *, int); 258 static int get_lockfs_status(ufs_failure_t *, struct lockfs *); 259 static void alloc_lockfs_comment(ufs_failure_t *, struct lockfs *); 260 static int set_lockfs(ufs_failure_t *, struct lockfs *); 261 static int lockfs_failure(ufs_failure_t *); 262 static int lockfs_success(ufs_failure_t *); 263 static int fsck_active(ufs_failure_t *); 264 265 /* low-level support routines */ 266 static ufsd_t *get_state_desc(ufs_failure_states_t); 267 static char *fs_name(ufs_failure_t *); 268 269 #if defined(DEBUG) 270 static char *state_name(ufs_failure_states_t); 271 static char *lock_name(struct lockfs *); 272 static char *err_name(int); 273 static char *act_name(ufsa_t); 274 static void dump_uf_list(char *msg); 275 static void dump_uf(ufs_failure_t *, int i); 276 #endif /* DEBUG */ 277 /* 278 * 279 * State Transitions: 280 * 281 * normally: 282 * if flagged to be locked but not unmounted: (UFSMNT_ONERROR_LOCK) 283 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED 284 * 285 * The only difference between these two is that the fsck must be started 286 * manually. 287 * 288 * if flagged to be unmounted: (UFSMNT_ONERROR_UMOUNT) 289 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX 290 * 291 * if flagged to panic: (UFSMNT_ONERROR_PANIC) 292 * UNDEF -> INIT -> PANIC 293 * 294 * if a secondary panic on a file system which has an active failure 295 * record: 296 * UNDEF -> INIT -> QUEUE -> REPLICA 297 * 298 * UNDEF, INIT, QUEUE all are set in the context of the failing thread. 299 * All other states (except possibly PANIC) are set in by the monitor 300 * (lock) thread. 301 * 302 */ 303 304 ufsd_t state_desc[] = 305 { 306 { UF_ILLEGAL, "in an unknown state", sf_minimum, UF_ILLEGAL, 307 { 0, 1, 0 } }, 308 { UF_UNDEF, "undefined", sf_undef, UF_UNDEF, 309 { 0, 1, 0 } }, 310 { UF_INIT, "being initialized", sf_init, UF_UNDEF, 311 { 0, 1, 0 } }, 312 { UF_QUEUE, "queued", sf_queue, UF_INIT, 313 { 0, 1, 0 } }, 314 { UF_TRYLCK, "trying to be locked", sf_nonterm_cmn, 315 UF_QUEUE, { 0, 0, 0 } }, 316 { UF_LOCKED, "locked", sf_nonterm_cmn, 317 UF_TRYLCK | UF_FIXING, { 0, 0, 0 } }, 318 { UF_UMOUNT, "being unmounted", sf_nonterm_cmn, 319 320 #if defined(DEBUG) 321 UF_PANIC | 322 #endif /* DEBUG */ 323 UF_TRYLCK | UF_LOCKED, { 0, 0, 0 } }, 324 { UF_FIXING, "being fixed", sf_nonterm_cmn, 325 UF_LOCKED, { 0, 0, 0 } }, 326 { UF_FIXED, "fixed", sf_term_cmn, 327 UF_FIXING, { 1, 0, 0 } }, 328 { UF_NOTFIX, "not fixed", sf_term_cmn, 329 330 #if defined(DEBUG) 331 UF_PANIC | 332 #endif /* DEBUG */ 333 334 UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING, 335 { 1, 0, 0 } }, 336 { UF_REPLICA, "a replica", sf_term_cmn, 337 UF_QUEUE, { 1, 0, 0 } }, 338 { UF_PANIC, "panicking", sf_panic, 339 /* XXX make this narrower */ UF_ALLSTATES, { 0, 0, 0 } }, 340 { UF_UNDEF, NULL, ((sfrc_t (*)()) NULL), 341 UF_UNDEF, { 0, 0, 0 } } 342 }; 343 344 /* unified collection */ 345 struct ufsfx_info { 346 struct uf_statistics *ufi_statp; 347 struct ufs_failure_tunable *ufi_tunep; 348 ufsd_t *ufi_statetab; 349 } uffsinfo; 350 351 #if defined(DEBUG) 352 struct action_description { 353 ufsa_t ad_v; 354 char *ad_name; 355 }; 356 357 #define EUNK (-1) 358 359 struct error_description { 360 int ed_errno; 361 char *ed_name; 362 } err_desc[] = 363 { 364 { EUNK, "<unexpected errno?>" }, 365 { EINVAL, "EINVAL" }, 366 { EACCES, "EACCES" }, 367 { EPERM, "EPERM" }, 368 { EIO, "EIO" }, 369 { EDEADLK, "EDEADLK" }, 370 { EBUSY, "EBUSY" }, 371 { EAGAIN, "EAGAIN" }, 372 { ERESTART, "ERESTART" }, 373 { ETIMEDOUT, "ETIMEDOUT" }, 374 { NO_ERROR, "Ok" }, 375 { EUNK, NULL } 376 }; 377 378 struct action_description act_desc[] = 379 { 380 { UFA_ERROR, "<unexpected action?>" }, 381 { UFA_FOUND, "\"found\"" }, 382 { UFA_SET, "\"set\"" }, 383 { UFA_ERROR, NULL }, 384 }; 385 386 #define LOCKFS_BADLOCK (-1) 387 388 struct lock_description { 389 int ld_type; 390 char *ld_name; 391 } lock_desc[] = 392 { 393 { LOCKFS_BADLOCK, "<unexpected lock?>" }, 394 { LOCKFS_ULOCK, "Unlock" }, 395 { LOCKFS_ELOCK, "Error Lock" }, 396 { LOCKFS_HLOCK, "Hard Lock" }, 397 { LOCKFS_OLOCK, "Old Lock" }, 398 { LOCKFS_BADLOCK, NULL } 399 }; 400 401 #endif /* DEBUG */ 402 403 /* 404 * ufs_fault, ufs_fault_v 405 * 406 * called instead of cmn_err(CE_PANIC, ...) by ufs routines 407 * when a failure is detected to put the file system into an 408 * error state (if possible) or to devolve to a panic otherwise 409 * 410 * vnode is some vnode in this file system, used to find the way 411 * to ufsvfs, vfsp etc. Since a panic can be called from many 412 * levels, the vnode is the most convenient hook to pass through. 413 * 414 */ 415 416 /*PRINTFLIKE2*/ 417 int 418 ufs_fault(vnode_t *vp, char *fmt, ...) 419 { 420 va_list adx; 421 int error; 422 423 MINOR(("[ufs_fault")); 424 425 va_start(adx, fmt); 426 error = ufs_fault_v(vp, fmt, adx); 427 va_end(adx); 428 429 MINOR((": %s (%d)]\n", err_name(error), error)); 430 return (error); 431 } 432 433 const char *nullfmt = "<null format?>"; 434 435 static int 436 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx) 437 { 438 ufs_failure_t *new = NULL; 439 ufsvfs_t *ufsvfsp; 440 triage_t fix; 441 int err = ERESTART; 442 int need_vfslock; 443 444 MINOR(("[ufs_fault_v")); 445 446 if (fmt == NULL) 447 fmt = (char *)nullfmt; 448 449 fix = triage(vp); 450 451 if (vp) { 452 ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data; 453 454 /* 455 * Something bad has happened. That is why we are here. 456 * 457 * In order for the bad thing to be recorded in the superblock 458 * we need to write to the superblock directly. 459 * In the case that logging is enabled the logging code 460 * would normally intercept our write as a delta to the log, 461 * thus we mark the filesystem FSBAD in any case. 462 */ 463 need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock); 464 465 if (need_vfslock) { 466 mutex_enter(&ufsvfsp->vfs_lock); 467 } 468 469 ufsvfsp->vfs_fs->fs_clean = FSBAD; 470 ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem)); 471 ufsvfsp->vfs_bufp->b_flags &= ~(B_ASYNC | B_READ | 472 B_DONE | B_ERROR | B_DELWRI); 473 474 (void) bdev_strategy(ufsvfsp->vfs_bufp); 475 (void) biowait(ufsvfsp->vfs_bufp); 476 477 if (need_vfslock) { 478 mutex_exit(&ufsvfsp->vfs_lock); 479 } 480 } 481 482 switch (fix) { 483 484 default: 485 case TRIAGE_DEAD: 486 case TRIAGE_NO_SPIRIT: 487 488 real_panic_v(new, fmt, adx); 489 /* LINTED: warning: logical expression always true: op "||" */ 490 ASSERT(DEBUG); 491 err = EAGAIN; 492 493 #if defined(DEBUG) 494 if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) { 495 break; 496 } 497 /* FALLTHROUGH */ 498 499 #else 500 break; 501 502 #endif /* DEBUG */ 503 504 case TRIAGE_ATTEND_TO: 505 506 /* q thread not running yet? */ 507 if (mutex_tryenter(&ufs_fix.uq_mutex)) { 508 if (!ufs_fix.uq_threadp) { 509 mutex_exit(&ufs_fix.uq_mutex); 510 ufs_thread_start(&ufs_fix, 511 ufsfx_thread_fix_failures, NULL); 512 ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK; 513 mutex_enter(&ufs_fix.uq_mutex); 514 } else { 515 /* 516 * We got the lock but we are not the current 517 * threadp so we have to release the lock. 518 */ 519 mutex_exit(&ufs_fix.uq_mutex); 520 } 521 } else { 522 MINOR((": fix failure thread already running ")); 523 /* 524 * No need to log another failure as one is already 525 * being logged. 526 */ 527 break; 528 } 529 530 if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) { 531 mutex_exit(&ufs_fix.uq_mutex); 532 cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault"); 533 } else { 534 /* 535 * Must check if we actually still own the lock and 536 * if so then release the lock and move on with life. 537 */ 538 if (mutex_owner(&ufs_fix.uq_mutex) == curthread) 539 mutex_exit(&ufs_fix.uq_mutex); 540 } 541 542 new = init_failure(vp, fmt, adx); 543 if (new != NULL) { 544 queue_failure(new); 545 break; 546 } 547 real_panic_v(new, fmt, adx); 548 break; 549 550 } 551 MINOR(("] ")); 552 return (err); 553 } 554 555 /* 556 * triage() 557 * 558 * Attempt to fix iff: 559 * - the system is not already panicking 560 * - this file system isn't explicitly marked not to be fixed 561 * - we can connect to the user-level daemon 562 * These conditions are detectable later, but if we can determine 563 * them in the failing threads context the core dump may be more 564 * useful. 565 * 566 */ 567 568 static triage_t 569 triage(vnode_t *vp) 570 { 571 struct inode *ip; 572 int need_unlock_vfs; 573 int fs_flags; 574 575 MINUTE(("[triage")); 576 577 if (panicstr) { 578 MINUTE(( 579 ": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr)); 580 return (TRIAGE_DEAD); 581 } 582 583 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) { 584 MINUTE(( 585 ": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n")); 586 return (TRIAGE_DEAD); 587 } 588 589 /* use tryenter and continue no matter what since we're panicky */ 590 need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock); 591 if (need_unlock_vfs) 592 need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock); 593 594 fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags; 595 if (need_unlock_vfs) 596 mutex_exit(&ip->i_ufsvfs->vfs_lock); 597 598 if (fs_flags & UFSFX_PANIC) { 599 MINUTE(( 600 ": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n")); 601 return (TRIAGE_NO_SPIRIT); 602 } 603 604 if (ufs_checkaccton(vp) != 0) { 605 MINUTE(( 606 ": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n")); 607 return (TRIAGE_DEAD); 608 } 609 610 if (ufs_checkswapon(vp) != 0) { 611 MINUTE(( 612 ": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n")); 613 return (TRIAGE_DEAD); 614 } 615 616 MINUTE((": return TRIAGE_ATTEND_TO] ")); 617 return (TRIAGE_ATTEND_TO); 618 } 619 620 /* 621 * init failure 622 * 623 * This routine allocates a failure struct and initializes 624 * it's member elements. 625 * Space is allocated for copies of dynamic identifying fs structures 626 * passed in. Without a much more segmented kernel architecture 627 * this is as protected as we can make it (for now.) 628 */ 629 static ufs_failure_t * 630 init_failure(vnode_t *vp, char *fmt, va_list adx) 631 { 632 ufs_failure_t *new; 633 struct inode *ip; 634 int initialization_worked = 0; 635 int need_vfs_unlock; 636 637 MINOR(("[init_failure")); 638 639 new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP); 640 if (!new) { 641 MINOR((": kmem_zalloc failed]\n")); 642 return (NULL); 643 } 644 645 /* 646 * enough information to make a fix attempt possible? 647 */ 648 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp || 649 !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt) 650 goto errout; 651 652 if (vp->v_type != VREG && vp->v_type != VDIR && 653 vp->v_type != VBLK && vp->v_type != VCHR && 654 vp->v_type != VLNK && vp->v_type != VFIFO && 655 vp->v_type != VSOCK) 656 goto errout; 657 658 if (ip->i_ufsvfs->vfs_root->v_type != VREG && 659 ip->i_ufsvfs->vfs_root->v_type != VDIR && 660 ip->i_ufsvfs->vfs_root->v_type != VBLK && 661 ip->i_ufsvfs->vfs_root->v_type != VCHR && 662 ip->i_ufsvfs->vfs_root->v_type != VLNK && 663 ip->i_ufsvfs->vfs_root->v_type != VFIFO && 664 ip->i_ufsvfs->vfs_root->v_type != VSOCK) 665 goto errout; 666 667 if ((ITOF(ip)->fs_magic != FS_MAGIC) && 668 (ITOF(ip)->fs_magic != MTB_UFS_MAGIC)) 669 goto errout; 670 671 /* intialize values */ 672 673 (void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx); 674 675 new->uf_ufsvfsp = ip->i_ufsvfs; 676 new->uf_vfsp = ip->i_vfs; 677 678 mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL); 679 need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock); 680 681 if (need_vfs_unlock) { 682 if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) { 683 /* 684 * not much alternative here, but we're panicking 685 * already, it couldn't be worse - so just 686 * proceed optimistically and take note. 687 */ 688 mutex_enter(&uf_stats.ufst_mutex); 689 uf_stats.ufst_lock_violations++; 690 mutex_exit(&uf_stats.ufst_mutex); 691 MINOR((": couldn't get vfs lock")) 692 need_vfs_unlock = 0; 693 } 694 } 695 696 if (mutex_tryenter(&new->uf_mutex)) { 697 initialization_worked = set_state(new, UF_INIT); 698 mutex_exit(&new->uf_mutex); 699 } 700 701 if (need_vfs_unlock) 702 mutex_exit(&ip->i_ufsvfs->vfs_lock); 703 704 if (initialization_worked) { 705 MINOR(("] ")); 706 return (new); 707 } 708 /* FALLTHROUGH */ 709 710 errout: 711 if (new) 712 kmem_free(new, sizeof (ufs_failure_t)); 713 MINOR((": failed]\n")); 714 return (NULL); 715 } 716 717 static void 718 queue_failure(ufs_failure_t *new) 719 { 720 MINOR(("[queue_failure")); 721 722 mutex_enter(&ufs_fix.uq_mutex); 723 724 if (ufs_fix.uq_ufhead) 725 insque(new, &ufs_fix.uq_ufhead); 726 else 727 ufs_fix.uq_ufhead = new; 728 729 if (mutex_tryenter(&new->uf_mutex)) { 730 (void) set_state(new, UF_QUEUE); 731 mutex_exit(&new->uf_mutex); 732 } 733 734 mutex_enter(&uf_stats.ufst_mutex); /* force wakeup */ 735 ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed; 736 mutex_exit(&uf_stats.ufst_mutex); 737 738 cv_broadcast(&ufs_fix.uq_cv); 739 740 DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str? 741 new->uf_panic_str: 742 "queue_failure: NULL panic str?")); 743 mutex_exit(&ufs_fix.uq_mutex); 744 745 MINOR(("] ")); 746 } 747 748 /*PRINTFLIKE2*/ 749 static void 750 real_panic(ufs_failure_t *f, const char *fmt, ...) 751 { 752 va_list adx; 753 754 MINUTE(("[real_panic ")); 755 756 va_start(adx, fmt); 757 real_panic_v(f, fmt, adx); 758 va_end(adx); 759 760 MINUTE((": return?!]\n")); 761 } 762 763 static void 764 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx) 765 { 766 int seriousness = CE_PANIC; 767 int need_unlock; 768 769 MINUTE(("[real_panic_v ")); 770 771 if (f && f->uf_ufsvfsp) 772 TRANS_SETERROR(f->uf_ufsvfsp); 773 774 #if defined(DEBUG) 775 if (DEBUG_FLAGS & DBGFLG_NOPANIC) { 776 seriousness = CE_WARN; 777 cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n"); 778 } 779 #endif /* DEBUG */ 780 781 delay(hz >> 1); /* allow previous warnings to get out */ 782 783 if (!f && fmt) 784 vcmn_err(seriousness, fmt, adx); 785 else 786 cmn_err(seriousness, f && f->uf_panic_str? f->uf_panic_str: 787 "real_panic: <unknown panic?>"); 788 789 if (f) { 790 need_unlock = !MUTEX_HELD(&f->uf_mutex); 791 if (need_unlock) { 792 mutex_enter(&f->uf_mutex); 793 } 794 795 f->uf_retry = -1; 796 (void) set_state(f, UF_PANIC); 797 798 if (need_unlock) { 799 mutex_exit(&f->uf_mutex); 800 } 801 } 802 MINUTE((": return?!]\n")); 803 } 804 805 /* 806 * initializes ufs panic structs, locks, etc 807 */ 808 void 809 ufsfx_init(void) 810 { 811 812 MINUTE(("[ufsfx_init")); 813 814 /* patchable; unchanged while running, so no lock is needed */ 815 ufsfx_tune.uft_too_long = UF_TOO_LONG; 816 ufsfx_tune.uft_fixstart_period = UF_FIXSTART_PERIOD; 817 ufsfx_tune.uft_fixpoll_period = UF_FIXPOLL_PERIOD; 818 ufsfx_tune.uft_short_err_period = UF_SHORT_ERROR_PERIOD; 819 ufsfx_tune.uft_long_err_period = UF_LONG_ERROR_PERIOD; 820 821 uffsinfo.ufi_statp = &uf_stats; 822 uffsinfo.ufi_tunep = &ufsfx_tune; 823 uffsinfo.ufi_statetab = &state_desc[0]; 824 825 mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL); 826 ufs_thread_init(&ufs_fix, /* maxne */ 1); 827 828 MINUTE(("] ")); 829 } 830 831 /* 832 * initializes per-ufs values 833 * returns 0 (ok) or errno 834 */ 835 int 836 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags) 837 { 838 MINUTE(("[ufsfx_mount (%d)", flags)); 839 /* don't check/need vfs_lock because it's still being initialized */ 840 841 ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4; 842 843 MINUTE((": %s: fx_flags:%ld,", 844 ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags)); 845 /* 846 * onerror={panic ^ lock only ^ unmount} 847 */ 848 849 if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) { 850 MINUTE((" PANIC")); 851 852 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) { 853 MINUTE((" LCKONLY")); 854 855 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) { 856 MINUTE((" LCKUMOUNT")); 857 858 } else { 859 ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT; 860 ASSERT(ufsvfsp->vfs_fsfx.fx_flags & 861 (UFSMNT_ONERROR_FLGMASK >> 4)); 862 MINUTE((" DEFAULT")); 863 } 864 865 pollwakeup(&ufs_pollhd, POLLPRI); 866 MINUTE(("]\n")); 867 return (0); 868 } 869 870 /* 871 * ufsfx_unmount 872 * 873 * called during unmount 874 */ 875 void 876 ufsfx_unmount(struct ufsvfs *ufsvfsp) 877 { 878 ufs_failure_t *f; 879 int must_unlock_list; 880 881 MINUTE(("[ufsfx_unmount")); 882 883 if (!ufsvfsp) { 884 MINUTE((": no ufsvfsp]")); 885 return; 886 } 887 888 if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0) 889 mutex_enter(&ufs_fix.uq_mutex); 890 891 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 892 int must_unlock_failure; 893 894 must_unlock_failure = !MUTEX_HELD(&f->uf_mutex); 895 if (must_unlock_failure) { 896 mutex_enter(&f->uf_mutex); 897 } 898 899 if (f->uf_ufsvfsp == ufsvfsp) { 900 901 /* 902 * if we owned the failure record lock, then this 903 * is probably a fix failure-triggered unmount, so 904 * the warning is not appropriate or needed 905 */ 906 907 /* XXX if rebooting don't print this? */ 908 if (!terminal_state(f->uf_s) && must_unlock_failure) { 909 cmn_err(CE_WARN, 910 "Unmounting %s while error-locked", 911 fs_name(f)); 912 } 913 914 f->uf_ufsvfsp = NULL; 915 f->uf_vfs_ufsfxp = NULL; 916 f->uf_vfs_lockp = NULL; 917 f->uf_bp = NULL; 918 f->uf_vfsp = NULL; 919 f->uf_retry = -1; 920 } 921 922 if (must_unlock_failure) 923 mutex_exit(&f->uf_mutex); 924 } 925 if (must_unlock_list) 926 mutex_exit(&ufs_fix.uq_mutex); 927 928 pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP); 929 MINUTE(("] ")); 930 } 931 932 /* 933 * ufsfx_(un)lockfs 934 * 935 * provides hook from lockfs code so we can recognize unlock/relock 936 * This is called after it is certain that the (un)lock will succeed. 937 */ 938 void 939 ufsfx_unlockfs(struct ufsvfs *ufsvfsp) 940 { 941 ufs_failure_t *f; 942 int need_unlock; 943 int need_unlock_list; 944 int informed = 0; 945 946 MINUTE(("[ufsfx_unlockfs")); 947 948 if (!ufsvfsp) 949 return; 950 951 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex); 952 953 if (need_unlock_list) 954 mutex_enter(&ufs_fix.uq_mutex); 955 956 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 957 958 need_unlock = !MUTEX_HELD(&f->uf_mutex); 959 if (need_unlock) 960 mutex_enter(&f->uf_mutex); 961 962 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) { 963 if (!(f->uf_s & UF_FIXING)) { 964 /* 965 * This might happen if we don't notice that 966 * the fs gets marked FSFIX before it is 967 * marked FSCLEAN, as might occur if the 968 * the superblock was hammered directly. 969 */ 970 if (!informed) { 971 informed = 1; 972 cmn_err(CE_NOTE, 973 "Unlock of %s succeeded before fs_clean marked FSFIX?", 974 fs_name(f)); 975 } 976 977 /* 978 * pass through fixing state so 979 * transition protocol is satisfied 980 */ 981 if (!set_state(f, UF_FIXING)) { 982 MINUTE((": failed] ")); 983 } 984 } 985 986 if (!set_state(f, UF_FIXED)) { 987 /* it's already fixed, so don't panic now */ 988 MINUTE((": failed] ")); 989 } 990 } 991 992 if (need_unlock) 993 mutex_exit(&f->uf_mutex); 994 } 995 if (need_unlock_list) 996 mutex_exit(&ufs_fix.uq_mutex); 997 MINUTE(("] ")); 998 } 999 1000 void 1001 ufsfx_lockfs(struct ufsvfs *ufsvfsp) 1002 { 1003 ufs_failure_t *f; 1004 int need_unlock; 1005 int need_unlock_list; 1006 1007 MINUTE(("[ufsfx_lockfs")); 1008 1009 if (!ufsvfsp) 1010 return; 1011 1012 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex); 1013 1014 if (need_unlock_list) 1015 mutex_enter(&ufs_fix.uq_mutex); 1016 1017 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 1018 1019 need_unlock = !MUTEX_HELD(&f->uf_mutex); 1020 if (need_unlock) 1021 mutex_enter(&f->uf_mutex); 1022 1023 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) && 1024 f->uf_s != UF_PANIC) { 1025 switch (f->uf_s) { 1026 1027 default: 1028 cmn_err(CE_WARN, 1029 "fs %s not in state UF_TRYLCK, UF_LOCKED or UF_FIXING", 1030 fs_name(f)); 1031 break; 1032 1033 case UF_TRYLCK: 1034 if (!set_state(f, UF_LOCKED)) { 1035 MINUTE((": failed] ")); 1036 } 1037 break; 1038 1039 case UF_LOCKED: 1040 if (!set_state(f, UF_FIXING)) { 1041 MINUTE((": failed] ")); 1042 } 1043 break; 1044 1045 case UF_FIXING: 1046 break; 1047 1048 } 1049 } 1050 1051 if (need_unlock) 1052 mutex_exit(&f->uf_mutex); 1053 } 1054 if (need_unlock_list) 1055 mutex_exit(&ufs_fix.uq_mutex); 1056 1057 MINUTE(("] ")); 1058 } 1059 1060 /* 1061 * error lock, trigger fsck and unlock those fs with failures 1062 * blatantly copied from the hlock routine, although this routine 1063 * triggers differently in order to use uq_ne as meaningful data. 1064 */ 1065 /* ARGSUSED */ 1066 void 1067 ufsfx_thread_fix_failures(void *ignored) 1068 { 1069 int retry; 1070 callb_cpr_t cprinfo; 1071 1072 CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr, 1073 "ufsfixfail"); 1074 1075 MINUTE(("[ufsfx_thread_fix_failures] ")); 1076 1077 for (;;) { 1078 /* sleep until there is work to do */ 1079 1080 mutex_enter(&ufs_fix.uq_mutex); 1081 (void) ufs_thread_run(&ufs_fix, &cprinfo); 1082 ufs_fix.uq_ne = 0; 1083 mutex_exit(&ufs_fix.uq_mutex); 1084 1085 /* process failures on our q */ 1086 do { 1087 retry = ufsfx_do_failure_q(); 1088 if (retry) { 1089 mutex_enter(&ufs_fix.uq_mutex); 1090 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1091 (void) cv_timedwait(&ufs_fix.uq_cv, 1092 &ufs_fix.uq_mutex, 1093 lbolt + (hz * retry)); 1094 CALLB_CPR_SAFE_END(&cprinfo, 1095 &ufs_fix.uq_mutex); 1096 mutex_exit(&ufs_fix.uq_mutex); 1097 } 1098 } while (retry); 1099 } 1100 /* NOTREACHED */ 1101 } 1102 1103 1104 /* 1105 * watch for fix-on-panic work 1106 * 1107 * returns # of seconds to sleep before trying again 1108 * and zero if no retry is needed 1109 */ 1110 1111 int 1112 ufsfx_do_failure_q(void) 1113 { 1114 ufs_failure_t *f; 1115 long retry = 1; 1116 ufsd_t *s; 1117 1118 MAJOR(("[ufsfx_do_failure_q")); 1119 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL)); 1120 1121 if (!mutex_tryenter(&ufs_fix.uq_mutex)) 1122 return (retry); 1123 1124 retry = 0; 1125 rescan_q: 1126 1127 /* 1128 * walk down failure list 1129 * depending on state of each failure, do whatever 1130 * is appropriate to move it to the next state 1131 * taking note of whether retry gets set 1132 * 1133 * retry protocol: 1134 * wakeup in shortest required time for any failure 1135 * retry == 0; nothing more to do (terminal state) 1136 * retry < 0; reprocess queue immediately, retry will 1137 * be abs(retry) for the next cycle 1138 * retry > 0; schedule wakeup for retry seconds 1139 */ 1140 1141 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 1142 1143 if (!mutex_tryenter(&f->uf_mutex)) { 1144 retry = 1; 1145 continue; 1146 } 1147 s = get_state_desc(f->uf_s); 1148 1149 MINOR((": found%s: %s, \"%s: %s\"\n", 1150 s->ud_attr.terminal? " old": "", 1151 fs_name(f), state_name(f->uf_s), f->uf_panic_str)); 1152 1153 if (s->ud_attr.terminal) { 1154 mutex_exit(&f->uf_mutex); 1155 continue; 1156 } 1157 1158 if (s->ud_sfp) 1159 (*s->ud_sfp)(f, UFA_FOUND, f->uf_s); 1160 1161 ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0); 1162 1163 if (f->uf_retry != 0) { 1164 if (retry > f->uf_retry || retry == 0) 1165 retry = f->uf_retry; 1166 if (f->uf_retry < 0) 1167 f->uf_retry = abs(f->uf_retry); 1168 } 1169 mutex_exit(&f->uf_mutex); 1170 } 1171 1172 1173 if (retry < 0) { 1174 retry = abs(retry); 1175 goto rescan_q; 1176 } 1177 1178 mutex_exit(&ufs_fix.uq_mutex); 1179 1180 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL)); 1181 MAJOR((": retry=%ld, good night]\n\n", retry)); 1182 1183 return (retry); 1184 } 1185 1186 static void 1187 pester_msg(ufs_failure_t *f, int seriousness) 1188 { 1189 MINUTE(("[pester_msg")); 1190 ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING)); 1191 1192 /* 1193 * XXX if seems too long for this fs, poke administrator 1194 * XXX to run fsck manually (and change retry time?) 1195 */ 1196 cmn_err(seriousness, 1197 "Waiting for repair of %s to %s", 1198 fs_name(f), 1199 f->uf_s & UF_LOCKED? "start": "finish"); 1200 MINUTE(("]")); 1201 } 1202 1203 static time_t 1204 trylock_time_exceeded(ufs_failure_t *f) 1205 { 1206 time_t toolong; 1207 extern time_t time; 1208 1209 MINUTE(("[trylock_time_exceeded")); 1210 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1211 1212 toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm; 1213 if (time > toolong) 1214 cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f)); 1215 1216 MINUTE(("] ")); 1217 return (time <= toolong? 0: time - toolong); 1218 } 1219 1220 static int 1221 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp) 1222 { 1223 MINUTE(("[get_lockfs_status")); 1224 1225 if (!f->uf_ufsvfsp) { 1226 MINUTE((": ufsvfsp is NULL]\n")); 1227 return (0); 1228 } 1229 1230 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1231 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1232 ASSERT(!vfs_lock_held(f->uf_vfsp)); 1233 ASSERT(f->uf_ufsvfsp->vfs_root != NULL); 1234 1235 f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp); 1236 1237 if (f->uf_lf_err) { 1238 f->uf_retry = ufsfx_tune.uft_short_err_period; 1239 } 1240 1241 MINUTE(("] ")); 1242 return (1); 1243 } 1244 1245 static sfrc_t 1246 set_state(ufs_failure_t *f, ufs_failure_states_t new_state) 1247 { 1248 ufsd_t *s; 1249 sfrc_t sfrc = SFRC_FAIL; 1250 int need_unlock; 1251 extern time_t time; 1252 1253 HIDEOUS(("[set_state: new state:%s", state_name(new_state))); 1254 ASSERT(f); 1255 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1256 1257 /* 1258 * if someone else is panicking, just let panic sync proceed 1259 */ 1260 if (panicstr) { 1261 (void) set_state(f, UF_NOTFIX); 1262 HIDEOUS((": state reset: not fixed] ")); 1263 return (sfrc); 1264 } 1265 1266 /* 1267 * bad state transition, an internal error 1268 */ 1269 if (!state_trans_valid(f->uf_s, new_state)) { 1270 /* recursion */ 1271 if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC)) 1272 (void) set_state(f, UF_PANIC); 1273 MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ", 1274 state_name(f->uf_s), state_name(new_state))); 1275 return (sfrc); 1276 } 1277 1278 s = get_state_desc(new_state); 1279 1280 need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex); 1281 if (need_unlock) 1282 mutex_enter(&ufs_fix.uq_mutex); 1283 1284 if (s->ud_attr.at_fail && ufs_fix.uq_threadp && 1285 curthread == ufs_fix.uq_threadp) { 1286 cmn_err(CE_WARN, "set_state: probable recursive panic of %s", 1287 fs_name(f)); 1288 } 1289 if (need_unlock) 1290 mutex_exit(&ufs_fix.uq_mutex); 1291 1292 /* NULL state functions always succeed */ 1293 sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state); 1294 1295 if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) { 1296 f->uf_s = new_state; 1297 f->uf_entered_tm = time; 1298 f->uf_counter = 0; 1299 } 1300 1301 HIDEOUS(("]\n")); 1302 return (sfrc); 1303 } 1304 1305 static ufsd_t * 1306 get_state_desc(ufs_failure_states_t state) 1307 { 1308 ufsd_t *s; 1309 1310 HIDEOUS(("[get_state_desc")); 1311 1312 for (s = &state_desc[1]; s->ud_name != NULL; s++) { 1313 if (s->ud_v == state) { 1314 HIDEOUS(("] ")); 1315 return (s); 1316 } 1317 } 1318 1319 HIDEOUS(("] ")); 1320 return (&state_desc[0]); /* default */ 1321 } 1322 1323 static sfrc_t 1324 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1325 { 1326 sfrc_t rc; 1327 1328 TRIVIA(("[sf_undef, action is %s, state is %s\n", 1329 act_name(a), state_name(s))); 1330 ASSERT(s == UF_UNDEF); 1331 1332 /* shouldn't find null failure records or ever set one */ 1333 rc = set_state(f, UF_NOTFIX); 1334 1335 TRIVIA(("] ")); 1336 return (rc); 1337 } 1338 1339 1340 static sfrc_t 1341 sf_init( 1342 ufs_failure_t *f, 1343 ufsa_t a, 1344 ufs_failure_states_t s) 1345 { 1346 sfrc_t rc = SFRC_FAIL; 1347 extern time_t time; 1348 1349 TRIVIA(("[sf_init, action is %s", act_name(a))); 1350 ASSERT(s & UF_INIT); 1351 1352 switch (a) { 1353 case UFA_SET: 1354 f->uf_begin_tm = time; 1355 f->uf_retry = 1; 1356 if (!f->uf_ufsvfsp) { 1357 (void) set_state(f, UF_PANIC); 1358 TRIVIA((": NULL ufsvfsp]\n")); 1359 return (rc); 1360 } 1361 /* 1362 * because we can call panic from many different levels, 1363 * we can't be sure that we've got the vfs_lock at this 1364 * point. However, there's not much alternative and if 1365 * we don't (have the lock) the worst case is we'll just 1366 * panic again 1367 */ 1368 f->uf_vfs_lockp = &f->uf_ufsvfsp->vfs_lock; 1369 f->uf_vfs_ufsfxp = &f->uf_ufsvfsp->vfs_fsfx; 1370 1371 if (!f->uf_ufsvfsp->vfs_bufp) { 1372 (void) set_state(f, UF_PANIC); 1373 TRIVIA((": NULL vfs_bufp]\n")); 1374 return (rc); 1375 } 1376 f->uf_bp = f->uf_ufsvfsp->vfs_bufp; 1377 1378 if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) { 1379 (void) set_state(f, UF_PANIC); 1380 TRIVIA((": NULL vfs_fs]\n")); 1381 return (rc); 1382 } 1383 1384 /* vfs_fs = vfs_bufp->b_un.b_fs */ 1385 bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN); 1386 1387 f->uf_lf.lf_lock = LOCKFS_ELOCK; /* primer */ 1388 1389 if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) { 1390 (void) set_state(f, UF_PANIC); 1391 TRIVIA((": NULL vfsp or vfs_dev == NODEV")); 1392 return (rc); 1393 } 1394 f->uf_dev = f->uf_vfsp->vfs_dev; 1395 1396 rc = SFRC_SUCCESS; 1397 break; 1398 1399 case UFA_FOUND: 1400 default: 1401 /* failures marked init shouldn't even be on the queue yet */ 1402 rc = set_state(f, UF_QUEUE); 1403 TRIVIA((": found failure with state init]\n")); 1404 } 1405 1406 TRIVIA(("] ")); 1407 return (rc); 1408 } 1409 1410 static sfrc_t 1411 sf_queue( 1412 ufs_failure_t *f, 1413 ufsa_t a, 1414 ufs_failure_states_t s) 1415 { 1416 sfrc_t rc = SFRC_FAIL; 1417 1418 TRIVIA(("[sf_queue, action is %s", act_name(a))); 1419 ASSERT(s & UF_QUEUE); 1420 1421 if (!f->uf_ufsvfsp) { 1422 TRIVIA((": NULL ufsvfsp]\n")); 1423 return (rc); 1424 } 1425 1426 switch (a) { 1427 case UFA_FOUND: 1428 rc = sf_found_queue(f); 1429 break; 1430 1431 case UFA_SET: 1432 1433 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex)); 1434 1435 mutex_enter(&uf_stats.ufst_mutex); 1436 uf_stats.ufst_num_failed++; 1437 mutex_exit(&uf_stats.ufst_mutex); 1438 1439 /* 1440 * if can't get the vfs lock, just wait until 1441 * UF_TRYLCK to set fx_current 1442 */ 1443 if (mutex_tryenter(f->uf_vfs_lockp)) { 1444 f->uf_vfs_ufsfxp->fx_current = f; 1445 mutex_exit(f->uf_vfs_lockp); 1446 } else { 1447 mutex_enter(&uf_stats.ufst_mutex); 1448 uf_stats.ufst_current_races++; 1449 mutex_exit(&uf_stats.ufst_mutex); 1450 } 1451 1452 f->uf_retry = 1; 1453 rc = SFRC_SUCCESS; 1454 TRIVIA(("] ")); 1455 break; 1456 1457 default: 1458 (void) set_state(f, UF_PANIC); 1459 TRIVIA((": failed] ")); 1460 } 1461 1462 return (rc); 1463 } 1464 1465 static sfrc_t 1466 sf_found_queue(ufs_failure_t *f) 1467 { 1468 int replica; 1469 sfrc_t rc = SFRC_FAIL; 1470 1471 TRIVIA(("[sf_found_queue")); 1472 1473 /* 1474 * don't need to check for null ufsvfsp because 1475 * unmount must own list's ufs_fix.uq_mutex 1476 * to mark it null and we own that lock since 1477 * we got here. 1478 */ 1479 1480 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex)); 1481 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1482 1483 if (!mutex_tryenter(f->uf_vfs_lockp)) { 1484 TRIVIA((": tryenter(vfslockp) failed; retry]\n")); 1485 f->uf_retry = 1; 1486 return (rc); 1487 } 1488 1489 replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL && 1490 f->uf_vfs_ufsfxp->fx_current != f && 1491 !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s); 1492 1493 /* 1494 * copy general flags to this ufs_failure so we don't 1495 * need to refer back to the ufsvfs, or, more importantly, 1496 * don't need to keep acquiring (trying to acquire) vfs_lockp 1497 * 1498 * The most restrictive option wins: 1499 * panic > errlock only > errlock+unmount > repair 1500 * XXX panic > elock > elock > elock+umount 1501 */ 1502 if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) { 1503 if (!set_state(f, UF_PANIC)) { 1504 TRIVIA((": marked panic but was queued?")); 1505 real_panic(f, " "); 1506 /*NOTREACHED*/ 1507 } 1508 mutex_exit(f->uf_vfs_lockp); 1509 return (rc); 1510 } 1511 f->uf_flags = f->uf_vfs_ufsfxp->fx_flags; 1512 1513 if (replica) { 1514 if (!set_state(f, UF_REPLICA)) { 1515 f->uf_retry = 1; 1516 TRIVIA((": set to replica failed] ")); 1517 } else { 1518 TRIVIA(("] ")); 1519 } 1520 mutex_exit(f->uf_vfs_lockp); 1521 return (rc); 1522 } 1523 mutex_exit(f->uf_vfs_lockp); 1524 1525 if (!set_state(f, UF_TRYLCK)) { 1526 TRIVIA((": failed] ")); 1527 } else { 1528 rc = SFRC_SUCCESS; 1529 } 1530 return (rc); 1531 } 1532 1533 static sfrc_t 1534 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1535 { 1536 sfrc_t rc = SFRC_FAIL; 1537 1538 TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s))); 1539 ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING)); 1540 ASSERT(!terminal_state(s)); 1541 1542 if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) { 1543 TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n")); 1544 (void) set_state(f, UF_NOTFIX); 1545 return (rc); 1546 } 1547 1548 switch (a) { 1549 case UFA_SET: 1550 switch (s) { 1551 case UF_TRYLCK: 1552 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1553 rc = sf_set_trylck(f); 1554 break; 1555 1556 case UF_LOCKED: 1557 rc = sf_set_locked(f); 1558 break; 1559 1560 case UF_FIXING: 1561 f->uf_flags |= UFSFX_REPAIR_START; 1562 f->uf_retry = ufsfx_tune.uft_fixpoll_period; 1563 rc = SFRC_SUCCESS; 1564 break; 1565 1566 case UF_UMOUNT: 1567 f->uf_retry = -ufsfx_tune.uft_short_err_period; 1568 rc = SFRC_SUCCESS; 1569 break; 1570 1571 default: 1572 (void) set_state(f, UF_PANIC); 1573 TRIVIA((": failed] ")); 1574 } 1575 break; 1576 1577 case UFA_FOUND: 1578 1579 switch (s) { 1580 case UF_TRYLCK: 1581 rc = sf_found_trylck(f); 1582 break; 1583 1584 case UF_LOCKED: 1585 case UF_FIXING: 1586 rc = sf_found_lock_fix_cmn(f, s); 1587 break; 1588 1589 case UF_UMOUNT: 1590 rc = sf_found_umount(f); 1591 break; 1592 1593 default: 1594 (void) set_state(f, UF_PANIC); 1595 TRIVIA((": failed] ")); 1596 break; 1597 } 1598 break; 1599 default: 1600 (void) set_state(f, UF_PANIC); 1601 TRIVIA((": failed] ")); 1602 break; 1603 } 1604 1605 TRIVIA(("] ")); 1606 return (rc); 1607 } 1608 1609 static sfrc_t 1610 sf_set_trylck(ufs_failure_t *f) 1611 { 1612 TRIVIA(("[sf_set_trylck")); 1613 1614 if (!mutex_tryenter(f->uf_vfs_lockp)) { 1615 TRIVIA((": tryenter(vfslockp) failed; retry]\n")); 1616 f->uf_retry = 1; 1617 return (SFRC_FAIL); 1618 } 1619 1620 if (!f->uf_vfs_ufsfxp->fx_current) 1621 f->uf_vfs_ufsfxp->fx_current = f; 1622 1623 mutex_exit(f->uf_vfs_lockp); 1624 1625 f->uf_lf.lf_flags = 0; 1626 f->uf_lf.lf_lock = LOCKFS_ELOCK; 1627 f->uf_retry = -ufsfx_tune.uft_fixstart_period; 1628 TRIVIA(("] ")); 1629 return (SFRC_SUCCESS); 1630 } 1631 1632 static sfrc_t 1633 sf_found_trylck(ufs_failure_t *f) 1634 { 1635 struct lockfs lockfs_status; 1636 1637 TRIVIA(("[sf_found_trylck")); 1638 1639 if (trylock_time_exceeded(f) > 0) { 1640 (void) set_state(f, UF_PANIC); 1641 TRIVIA((": failed] ")); 1642 return (SFRC_FAIL); 1643 } 1644 1645 if (!get_lockfs_status(f, &lockfs_status)) { 1646 (void) set_state(f, UF_PANIC); 1647 TRIVIA((": failed] ")); 1648 return (SFRC_FAIL); 1649 } 1650 1651 if (f->uf_lf_err == NO_ERROR) 1652 f->uf_lf.lf_key = lockfs_status.lf_key; 1653 1654 if (!set_lockfs(f, &lockfs_status)) { 1655 (void) set_state(f, UF_PANIC); 1656 TRIVIA((": failed] ")); 1657 return (SFRC_FAIL); 1658 } 1659 TRIVIA(("] ")); 1660 return (SFRC_SUCCESS); 1661 } 1662 1663 static sfrc_t 1664 sf_set_locked(ufs_failure_t *f) 1665 { 1666 TRIVIA(("[sf_set_locked")); 1667 1668 f->uf_retry = -ufsfx_tune.uft_fixstart_period; 1669 1670 #if defined(DEBUG) 1671 if (f->uf_flags & UFSFX_REPAIR_START) 1672 TRIVIA(("clearing UFSFX_REPAIR_START ")); 1673 #endif /* DEBUG */ 1674 1675 f->uf_flags &= ~UFSFX_REPAIR_START; 1676 1677 if (f->uf_s & UF_TRYLCK) { 1678 cmn_err(CE_WARN, "Error-locked %s: \"%s\"", 1679 fs_name(f), f->uf_panic_str); 1680 1681 if (f->uf_flags & UFSFX_LCKONLY) 1682 cmn_err(CE_WARN, "Manual repair of %s required", 1683 fs_name(f)); 1684 } 1685 1686 /* 1687 * just reset to current state 1688 */ 1689 #if defined(DEBUG) 1690 TRIVIA(("locked->locked ")); 1691 #endif /* DEBUG */ 1692 1693 TRIVIA(("] ")); 1694 return (SFRC_SUCCESS); 1695 } 1696 1697 static sfrc_t 1698 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s) 1699 { 1700 time_t toolong; 1701 extern time_t time; 1702 struct buf *bp = NULL; 1703 struct fs *dfs; 1704 time_t concerned, anxious; 1705 sfrc_t rc = SFRC_FAIL; 1706 ulong_t gb_size; 1707 1708 TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s))); 1709 1710 if (s & UF_LOCKED) { 1711 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1712 1713 toolong = time > (ufsfx_tune.uft_too_long + 1714 f->uf_entered_tm); 1715 TRIVIA(("%stoolong", !toolong? "not": "")); 1716 HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ", 1717 time, ufsfx_tune.uft_too_long, f->uf_entered_tm)); 1718 1719 if (f->uf_flags & UFSFX_LCKUMOUNT) { 1720 if (set_state(f, UF_UMOUNT)) { 1721 TRIVIA(("] ")); 1722 rc = SFRC_SUCCESS; 1723 } else { 1724 TRIVIA((": failed] ")); 1725 f->uf_retry = 1; 1726 } 1727 return (rc); 1728 } 1729 if (!toolong) { 1730 rc = SFRC_SUCCESS; 1731 } else { 1732 if (!(f->uf_flags & UFSFX_REPAIR_START)) { 1733 cmn_err(CE_WARN, "%s repair of %s not started.", 1734 (f->uf_flags & UFSFX_LCKONLY)? 1735 "Manual": "Automatic", 1736 fs_name(f)); 1737 1738 f->uf_retry = ufsfx_tune.uft_long_err_period; 1739 } else { 1740 f->uf_retry = ufsfx_tune.uft_long_err_period; 1741 cmn_err(CE_WARN, 1742 "Repair of %s is not timely; operator attention is required.", 1743 fs_name(f)); 1744 } 1745 TRIVIA(("] ")); 1746 return (rc); 1747 } 1748 } 1749 1750 #if defined(DEBUG) 1751 else { 1752 ASSERT(s & UF_FIXING); 1753 } 1754 #endif /* DEBUG */ 1755 1756 /* 1757 * get on disk superblock; force it to really 1758 * come from the disk 1759 */ 1760 (void) bfinval(f->uf_dev, 0); 1761 bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE); 1762 if (bp) { 1763 bp->b_flags |= (B_STALE | B_AGE); 1764 dfs = bp->b_un.b_fs; 1765 } 1766 1767 if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) && 1768 (dfs->fs_magic != MTB_UFS_MAGIC))) { 1769 TRIVIA((": UFS_BREAD(SBLOCK) failed]\n")); 1770 f->uf_retry = 1; 1771 goto out; 1772 } 1773 1774 /* fsck started but we haven't noticed yet? */ 1775 if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) { 1776 if (!set_state(f, UF_FIXING)) { 1777 TRIVIA((": failed]\n")); 1778 f->uf_retry = 1; 1779 goto out; 1780 } 1781 } 1782 1783 /* fsck started but didn't succeed? */ 1784 if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) { 1785 TRIVIA((": fs_clean: %d", (int)dfs->fs_clean)); 1786 (void) set_state(f, UF_LOCKED); 1787 cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f)); 1788 f->uf_retry = ufsfx_tune.uft_long_err_period; 1789 goto out; 1790 } 1791 1792 gb_size = (dfs->fs_size * dfs->fs_bshift) / GB; 1793 toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig); 1794 1795 /* fsck started but doesn't seem to be proceeding? */ 1796 if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) { 1797 if (time > f->uf_entered_tm + toolong) { 1798 1799 cmn_err(CE_WARN, 1800 "Repair completion timeout exceeded on %s; manual fsck may be required", 1801 fs_name(f)); 1802 f->uf_retry = ufsfx_tune.uft_long_err_period; 1803 } 1804 } 1805 1806 concerned = f->uf_entered_tm + (toolong / 3); 1807 anxious = f->uf_entered_tm + ((2 * toolong) / 3); 1808 1809 if (time > concerned) 1810 pester_msg(f, time > anxious? CE_WARN: CE_NOTE); 1811 1812 TRIVIA(("] ")); 1813 1814 out: 1815 if (bp) 1816 brelse(bp); 1817 1818 return (rc); 1819 } 1820 1821 static sfrc_t 1822 sf_found_umount(ufs_failure_t *f) 1823 { 1824 extern time_t time; 1825 sfrc_t rc = SFRC_FAIL; 1826 struct vfs *vfsp = f->uf_vfsp; 1827 struct ufsvfs *ufsvfsp = f->uf_ufsvfsp; 1828 int toolong = 0; 1829 int err = 0; 1830 1831 TRIVIA(("[sf_found_umount")); 1832 1833 toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm; 1834 if (toolong) { 1835 TRIVIA((": unmount time limit exceeded] ")); 1836 goto out; 1837 } 1838 1839 if (!vfsp || !ufsvfsp) { /* trivial case */ 1840 TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] ")); 1841 goto out; 1842 } 1843 1844 if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) { 1845 TRIVIA((": !not error locked?")); 1846 err = EINVAL; 1847 goto out; 1848 } 1849 1850 /* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */ 1851 if (vn_vfswlock(vfsp->vfs_vnodecovered)) { 1852 TRIVIA((": couldn't lock coveredvp")); 1853 err = EBUSY; 1854 goto out; 1855 } 1856 1857 if ((err = dounmount(vfsp, 0, kcred)) != 0) { 1858 1859 /* take note, but not many alternatives here */ 1860 mutex_enter(&uf_stats.ufst_mutex); 1861 uf_stats.ufst_unmount_failures++; 1862 mutex_exit(&uf_stats.ufst_mutex); 1863 1864 TRIVIA((": unmount failed] ")); 1865 } else { 1866 cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f)); 1867 } 1868 1869 out: 1870 if (toolong || (err != EBUSY && err != EAGAIN)) 1871 rc = set_state(f, UF_NOTFIX); 1872 1873 TRIVIA(("] ")); 1874 return (rc); 1875 } 1876 1877 static sfrc_t 1878 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1879 { 1880 extern time_t time; 1881 sfrc_t rc = SFRC_FAIL; 1882 1883 TRIVIA(("[sf_term_cmn, action is %s, state is %s", 1884 act_name(a), state_name(s))); 1885 ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA)); 1886 ASSERT(terminal_state(s)); 1887 1888 if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) { 1889 TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n")); 1890 return (rc); 1891 } 1892 1893 switch (a) { 1894 case UFA_SET: 1895 switch (s) { 1896 case UF_NOTFIX: 1897 case UF_FIXED: 1898 { int need_lock_vfs; 1899 1900 if (f->uf_ufsvfsp && f->uf_vfs_lockp) 1901 need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp); 1902 else 1903 need_lock_vfs = 0; 1904 1905 if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) { 1906 TRIVIA((": tryenter(vfslockp) fail; retry]\n")); 1907 f->uf_retry = 1; 1908 break; 1909 } 1910 1911 f->uf_end_tm = time; 1912 f->uf_lf.lf_lock = LOCKFS_OLOCK; 1913 f->uf_retry = 0; 1914 1915 if (f->uf_vfs_ufsfxp) 1916 f->uf_vfs_ufsfxp->fx_current = NULL; 1917 1918 if (need_lock_vfs) 1919 mutex_exit(f->uf_vfs_lockp); 1920 1921 cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s": 1922 "%s is now accessible", fs_name(f)); 1923 1924 if (s & UF_FIXED) { 1925 mutex_enter(&uf_stats.ufst_mutex); 1926 uf_stats.ufst_num_fixed++; 1927 mutex_exit(&uf_stats.ufst_mutex); 1928 } 1929 (void) timeout(ufsfx_kill_fix_failure_thread, 1930 (void *)(ufsfx_tune.uft_short_err_period * hz), 1931 ufsfx_tune.uft_short_err_period * hz); 1932 rc = SFRC_SUCCESS; 1933 break; 1934 } 1935 case UF_REPLICA: 1936 1937 ASSERT(MUTEX_HELD(f->uf_vfs_lockp)); 1938 1939 /* not actually a replica? */ 1940 if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current && 1941 f->uf_vfs_ufsfxp->fx_current != f && 1942 !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s)) { 1943 1944 f->uf_orig = f->uf_vfs_ufsfxp->fx_current; 1945 f->uf_retry = 0; 1946 rc = SFRC_SUCCESS; 1947 } else { 1948 TRIVIA((": NULL fx_current]\n")); 1949 f->uf_retry = 1; 1950 } 1951 1952 break; 1953 1954 default: 1955 rc = set_state(f, UF_PANIC); 1956 TRIVIA((": failed] ")); 1957 break; 1958 } 1959 break; 1960 1961 case UFA_FOUND: 1962 /* 1963 * XXX de-allocate these after some period? 1964 * XXX or move to an historical list? 1965 * XXX or have an ioctl which reaps them? 1966 */ 1967 /* 1968 * For now, since we don't expect lots of failures 1969 * to occur (to the point of memory shortages), 1970 * just punt 1971 */ 1972 1973 /* be sure we're not wasting cpu on old failures */ 1974 if (f->uf_retry != 0) { 1975 mutex_enter(&uf_stats.ufst_mutex); 1976 uf_stats.ufst_cpu_waste++; 1977 mutex_exit(&uf_stats.ufst_mutex); 1978 f->uf_retry = 0; 1979 } 1980 rc = SFRC_SUCCESS; 1981 break; 1982 1983 default: 1984 (void) set_state(f, UF_PANIC); 1985 TRIVIA((": failed] ")); 1986 break; 1987 } 1988 1989 TRIVIA(("] ")); 1990 return (rc); 1991 } 1992 1993 static sfrc_t 1994 sf_panic( 1995 ufs_failure_t *f, 1996 ufsa_t a, 1997 ufs_failure_states_t s) 1998 { 1999 sfrc_t rc = SFRC_FAIL; 2000 2001 TRIVIA(("[sf_panic, action is %s, prev. state is %s", 2002 act_name(a), state_name(f->uf_s))); 2003 ASSERT(s & UF_PANIC); 2004 2005 switch (a) { 2006 case UFA_SET: 2007 f->uf_retry = -ufsfx_tune.uft_short_err_period; 2008 rc = SFRC_SUCCESS; 2009 break; 2010 2011 case UFA_FOUND: 2012 default: 2013 real_panic(f, " "); 2014 2015 /* LINTED: warning: logical expression always true: op "||" */ 2016 ASSERT(DEBUG); 2017 2018 (void) set_state(f, UF_UMOUNT); /* XXX UF_NOTFIX? */ 2019 2020 break; 2021 } 2022 2023 TRIVIA(("] ")); 2024 return (rc); 2025 } 2026 2027 /* 2028 * minimum state function 2029 */ 2030 static sfrc_t 2031 sf_minimum( 2032 ufs_failure_t *f, 2033 ufsa_t a, /* LINTED argument unused in function: ignored */ 2034 ufs_failure_states_t ignored) 2035 { 2036 sfrc_t rc = SFRC_FAIL; 2037 2038 TRIVIA(("[sf_minimum, action is %s", act_name(a))); 2039 2040 switch (a) { 2041 case UFA_SET: 2042 f->uf_retry = 0; 2043 /* FALLTHROUGH */ 2044 2045 case UFA_FOUND: 2046 rc = SFRC_SUCCESS; 2047 break; 2048 2049 default: 2050 (void) set_state(f, UF_PANIC); 2051 TRIVIA((": failed] ")); 2052 break; 2053 } 2054 2055 TRIVIA(("] ")); 2056 return (rc); 2057 } 2058 2059 static int 2060 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to) 2061 { 2062 ufsd_t *s; 2063 int valid; 2064 2065 HIDEOUS(("[state_trans_valid")); 2066 2067 if (from & to) 2068 return (1); 2069 2070 s = get_state_desc(to); 2071 2072 /* 2073 * extra test is necessary since we want UF_UNDEF = 0, 2074 * (to detect freshly allocated memory) 2075 * but can't check for that value with a bit test 2076 */ 2077 valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev; 2078 2079 HIDEOUS((": %svalid] ", valid? "": "in")); 2080 return (valid); 2081 } 2082 2083 static int 2084 terminal_state(ufs_failure_states_t state) 2085 { 2086 ufsd_t *s; 2087 2088 HIDEOUS(("[terminal_state")); 2089 2090 s = get_state_desc(state); 2091 2092 HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not ")); 2093 return ((int)s->ud_attr.terminal); 2094 } 2095 2096 static void 2097 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp) 2098 { 2099 MINUTE(("[alloc_lockfs_comment")); 2100 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2101 2102 /* 2103 * ufs_fiolfs expects a kmem_alloc'ed comment; 2104 * it frees the comment if the lock fails 2105 * or else when the lock is unlocked. 2106 */ 2107 2108 f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP); 2109 if (f->uf_lf.lf_comment) { 2110 char *from; 2111 size_t len; 2112 2113 /* 2114 * use panic string if there's no previous comment 2115 * or if we're setting the error lock 2116 */ 2117 if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment || 2118 lfp->lf_comlen <= 0)) { 2119 from = f->uf_panic_str; 2120 len = LOCKFS_MAXCOMMENTLEN; 2121 } else { 2122 from = lfp->lf_comment; 2123 len = lfp->lf_comlen; 2124 } 2125 2126 bcopy(from, f->uf_lf.lf_comment, len); 2127 f->uf_lf.lf_comlen = len; 2128 2129 } else { 2130 f->uf_lf.lf_comlen = 0; 2131 } 2132 MINUTE(("] ")); 2133 } 2134 2135 static int 2136 set_lockfs(ufs_failure_t *f, struct lockfs *lfp) 2137 { 2138 int (*handle_lockfs_rc)(ufs_failure_t *); 2139 int rc; 2140 2141 MINUTE(("[set_lockfs")); 2142 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2143 ASSERT(!vfs_lock_held(f->uf_vfsp)); 2144 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 2145 2146 if (!f->uf_ufsvfsp) { 2147 MINUTE((": ufsvfsp is NULL]\n")); 2148 return (0); 2149 } 2150 2151 ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock)); 2152 2153 if (!f->uf_ufsvfsp->vfs_root) { 2154 MINUTE((": vfs_root is NULL]\n")); 2155 return (0); 2156 } 2157 2158 alloc_lockfs_comment(f, lfp); 2159 f->uf_lf_err = 0; 2160 2161 if (!LOCKFS_IS_ELOCK(lfp)) { 2162 lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK; 2163 VN_HOLD(f->uf_ufsvfsp->vfs_root); 2164 f->uf_lf_err = ufs__fiolfs(f->uf_ufsvfsp->vfs_root, 2165 &f->uf_lf, 2166 /* from_user */ 0, 2167 /* from_log */ 0); 2168 VN_RELE(f->uf_ufsvfsp->vfs_root); 2169 } 2170 2171 handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success; 2172 rc = handle_lockfs_rc(f); 2173 2174 MINUTE(("] ")); 2175 return (rc); 2176 } 2177 2178 static int 2179 lockfs_failure(ufs_failure_t *f) 2180 { 2181 int error; 2182 ufs_failure_states_t s; 2183 2184 TRIVIA(("[lockfs_failure")); 2185 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2186 2187 if (!f->uf_ufsvfsp) { 2188 TRIVIA((": ufsvfsp is NULL]\n")); 2189 return (0); 2190 } 2191 2192 error = f->uf_lf_err; 2193 switch (error) { 2194 /* non-transient errors: */ 2195 case EACCES: /* disk/in-core metadata reconciliation failed */ 2196 case EPERM: /* inode reconciliation failed; incore inode changed? */ 2197 case EIO: /* device is hard-locked or not responding */ 2198 case EROFS: /* device is write-locked */ 2199 case EDEADLK: /* can't lockfs; deadlock would result; */ 2200 /* Swapping or saving accounting records */ 2201 /* onto this fs can cause this errno. */ 2202 2203 MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)", 2204 fs_name(f), 2205 lock_name(&f->uf_lf), 2206 err_name(error), 2207 error)); 2208 2209 /* 2210 * if can't get lock, then fallback to panic, unless 2211 * unless unmount was requested (although unmount will 2212 * probably fail if the lock failed, so we'll panic 2213 * anyway 2214 */ 2215 2216 s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK)? 2217 UF_UMOUNT: UF_PANIC; 2218 2219 if (!set_state(f, s)) { 2220 real_panic(f, " "); 2221 /*NOTREACHED*/ 2222 break; 2223 } 2224 break; 2225 2226 2227 case EBUSY: 2228 case EAGAIN: 2229 2230 f->uf_retry = ufsfx_tune.uft_short_err_period; 2231 if (curthread->t_flag & T_DONTPEND) { 2232 curthread->t_flag &= ~T_DONTPEND; 2233 2234 } else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) { 2235 ufs_failure_states_t state; 2236 /* 2237 * if we didn't know that the fix had started, 2238 * take note 2239 */ 2240 state = error == EBUSY? UF_LOCKED: UF_FIXING; 2241 if (!set_state(f, state)) { 2242 TRIVIA((": failed] ")); 2243 return (0); 2244 } 2245 } 2246 break; 2247 2248 default: /* some other non-fatal error */ 2249 MINOR(("lockfs(\"%s\") of %s returned %s (%d)", 2250 lock_name(&f->uf_lf), 2251 fs_name(f), 2252 err_name(f->uf_lf_err), 2253 f->uf_lf_err)); 2254 2255 f->uf_retry = ufsfx_tune.uft_short_err_period; 2256 break; 2257 2258 case EINVAL: /* unmounted? */ 2259 (void) set_state(f, UF_NOTFIX); 2260 break; 2261 } 2262 TRIVIA(("] ")); 2263 return (1); 2264 } 2265 2266 static int 2267 lockfs_success(ufs_failure_t *f) 2268 { 2269 TRIVIA(("[lockfs_success")); 2270 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2271 2272 if (!f->uf_ufsvfsp) { 2273 TRIVIA((": ufsvfsp is NULL]\n")); 2274 return (0); 2275 } 2276 2277 switch (f->uf_lf.lf_lock) { 2278 case LOCKFS_ELOCK: /* error lock worked */ 2279 2280 if (!set_state(f, UF_LOCKED)) { 2281 TRIVIA((": failed] ")); 2282 return (0); 2283 } 2284 break; 2285 2286 case LOCKFS_ULOCK: /* unlock worked */ 2287 /* 2288 * how'd we get here? 2289 * This should be done from fsck's unlock, 2290 * not from this thread's context. 2291 */ 2292 cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f)); 2293 ufsfx_unlockfs(f->uf_ufsvfsp); 2294 break; 2295 2296 default: 2297 if (!set_state(f, UF_NOTFIX)) { 2298 TRIVIA((": failed] ")); 2299 return (0); 2300 } 2301 break; 2302 } 2303 TRIVIA(("] ")); 2304 return (1); 2305 } 2306 2307 /* 2308 * when fsck is running it puts its pid into the lockfs 2309 * comment structure, prefaced by PIDSTR 2310 */ 2311 const char *PIDSTR = "[pid:"; 2312 static int 2313 fsck_active(ufs_failure_t *f) 2314 { 2315 char *cp; 2316 int i, found, errlocked; 2317 size_t comlen; 2318 const int PIDSTRLEN = (int)strlen(PIDSTR); 2319 struct ulockfs *ulp = &f->uf_ufsvfsp->vfs_ulockfs; 2320 2321 TRIVIA(("[fsck_active")); 2322 2323 ASSERT(f); 2324 ASSERT(f->uf_s & UF_FIXING); 2325 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2326 ASSERT(f->uf_ufsvfsp); 2327 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 2328 ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock)); 2329 2330 mutex_enter(&ulp->ul_lock); 2331 cp = ulp->ul_lockfs.lf_comment; 2332 comlen = ulp->ul_lockfs.lf_comlen; 2333 errlocked = (int)ULOCKFS_IS_ELOCK(ulp); 2334 mutex_exit(&ulp->ul_lock); 2335 2336 if (!cp || comlen == 0) { 2337 TRIVIA((": null comment or comlen <= 0, found:0]")); 2338 return (0); 2339 } 2340 2341 for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++) 2342 found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0; 2343 2344 TRIVIA(("found:%d, is_elock:%d]", found, errlocked)); 2345 return (errlocked & found); 2346 } 2347 2348 static const char unknown_fs[] = "<unknown fs>"; 2349 static const char null_failure[] = "<NULL ufs failure record; unknown fs>"; 2350 static const char mutated_vfs_bufp[] = "<mutated vfs_bufp, unknown fs>"; 2351 static const char mutated_vfs_fs[] = "<mutated vfs_fs, unknown fs>"; 2352 2353 static char * 2354 fs_name(ufs_failure_t *f) 2355 { 2356 HIDEOUS(("[fs_name")); 2357 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2358 2359 if (!f) { 2360 HIDEOUS((": failure ptr is NULL]\n")); 2361 return ((char *)null_failure); 2362 } 2363 2364 if (f->uf_fsname[0] != '\0') { 2365 HIDEOUS((": return (uf_fsname)]\n")); 2366 return (f->uf_fsname); 2367 } 2368 2369 if (MUTEX_HELD(f->uf_vfs_lockp)) { 2370 if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) { 2371 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n", 2372 (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp)); 2373 return ((char *)mutated_vfs_bufp); 2374 } 2375 if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) { 2376 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n", 2377 (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs)); 2378 return ((char *)mutated_vfs_fs); 2379 } 2380 if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs && 2381 *f->uf_fs->fs_fsmnt != '\0') { 2382 HIDEOUS((": return (fs_fsmnt)]\n")); 2383 return (f->uf_fs->fs_fsmnt); 2384 } 2385 } 2386 2387 HIDEOUS((": unknown file system]\n")); 2388 return ((char *)unknown_fs); 2389 } 2390 2391 #if defined(DEBUG) 2392 static char * 2393 lock_name(struct lockfs *lfp) 2394 { 2395 struct lock_description *l; 2396 char *lname; 2397 2398 HIDEOUS(("[lock_name")); 2399 2400 lname = lock_desc[0].ld_name; 2401 for (l = &lock_desc[1]; l->ld_name != NULL; l++) { 2402 if (lfp && lfp->lf_lock == l->ld_type) { 2403 lname = l->ld_name; 2404 break; 2405 } 2406 } 2407 HIDEOUS(("]")); 2408 return (lname); 2409 } 2410 2411 static char * 2412 state_name(ufs_failure_states_t state) 2413 { 2414 ufsd_t *s; 2415 2416 HIDEOUS(("[state_name")); 2417 2418 s = get_state_desc(state); 2419 2420 HIDEOUS(("]")); 2421 return (s->ud_name); 2422 } 2423 2424 static char * 2425 err_name(int error) 2426 { 2427 struct error_description *e; 2428 2429 HIDEOUS(("[err_name")); 2430 2431 for (e = &err_desc[1]; e->ed_name != NULL; e++) { 2432 if (error == e->ed_errno) { 2433 HIDEOUS(("]")); 2434 return (e->ed_name); 2435 } 2436 } 2437 HIDEOUS(("]")); 2438 return (err_desc[0].ed_name); 2439 } 2440 2441 static char * 2442 act_name(ufsa_t action) 2443 { 2444 struct action_description *a; 2445 2446 HIDEOUS(("[act_name")); 2447 2448 for (a = &act_desc[1]; a->ad_name != NULL; a++) { 2449 if (action == a->ad_v) { 2450 HIDEOUS(("]")); 2451 return (a->ad_name); 2452 } 2453 } 2454 HIDEOUS(("]")); 2455 return (act_desc[0].ad_name); 2456 } 2457 2458 /* 2459 * dump failure list 2460 */ 2461 static void 2462 dump_uf_list(char *msg) 2463 { 2464 ufs_failure_t *f; 2465 int i; 2466 int list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex); 2467 2468 if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) { 2469 printf("dump_uf_list: couldn't get list lock\n"); 2470 return; 2471 } 2472 2473 if (msg) { 2474 printf("\n%s", msg); 2475 } 2476 printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n", 2477 ufs_fix.uq_lowat, ufs_fix.uq_ne); 2478 2479 mutex_enter(&uf_stats.ufst_mutex); 2480 printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races); 2481 printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed); 2482 printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed); 2483 printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste); 2484 printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n", 2485 uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures); 2486 mutex_exit(&uf_stats.ufst_mutex); 2487 2488 for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) { 2489 2490 if (!mutex_tryenter(&f->uf_mutex)) { 2491 printf("%d.\t\"skipped - try enter failed\"\n", i); 2492 continue; 2493 } 2494 2495 dump_uf(f, i); 2496 2497 mutex_exit(&f->uf_mutex); 2498 } 2499 2500 printf("\n"); 2501 2502 if (!list_was_locked) 2503 mutex_exit(&ufs_fix.uq_mutex); 2504 } 2505 2506 static void 2507 dump_uf(ufs_failure_t *f, int i) 2508 { 2509 if (!f) { 2510 printf("dump_uf: NULL failure record\n"); 2511 return; 2512 } 2513 2514 printf("%d.\t\"%s\" is %s.\n", 2515 i, fs_name(f), state_name(f->uf_s)); 2516 printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f); 2517 printf("\tNext: 0x%p\t\tPrev: 0x%p\n", 2518 (void *)f->uf_next, (void *)f->uf_prev); 2519 2520 if (f->uf_orig) 2521 printf("\tOriginal failure: 0x%p \"%s\"\n", 2522 (void *)f->uf_orig, f->uf_orig->uf_panic_str); 2523 2524 printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n", 2525 (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp); 2526 printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp); 2527 printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp); 2528 2529 if (f->uf_bp) 2530 printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs); 2531 else 2532 printf("\n"); 2533 2534 printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n", 2535 f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm); 2536 2537 printf("\tFlags: (%d) %s%s%s%s", f->uf_flags, 2538 f->uf_flags & UFSFX_LCKONLY? "\"lock only\" " : "", 2539 f->uf_flags & UFSFX_LCKUMOUNT? "\"lock+unmount\" " : "", 2540 f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" " : "", 2541 f->uf_flags == 0? "<none>" : ""); 2542 2543 printf("\tRetry: %ld seconds\n", f->uf_retry); 2544 2545 printf("\tLockfs:\ttype: %s\terror: %s (%d)\n", 2546 lock_name(&f->uf_lf), 2547 err_name(f->uf_lf_err), f->uf_lf_err); 2548 2549 } 2550 #endif /* DEBUG */ 2551 2552 /* 2553 * returns # of ufs_failures in a non-terminal state on queue 2554 * used to coordinate with hlock thread (see ufs_thread.c) 2555 * and to determine when the error lock thread may exit 2556 */ 2557 2558 int 2559 ufsfx_get_failure_qlen(void) 2560 { 2561 ufs_failure_t *f; 2562 ufsd_t *s; 2563 int qlen = 0; 2564 2565 MINUTE(("[ufsfx_get_failure_qlen")); 2566 2567 if (!mutex_tryenter(&ufs_fix.uq_mutex)) 2568 return (-1); 2569 2570 /* 2571 * walk down failure list 2572 */ 2573 2574 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 2575 2576 if (!mutex_tryenter(&f->uf_mutex)) 2577 continue; 2578 2579 s = get_state_desc(f->uf_s); 2580 2581 if (s->ud_attr.terminal) { 2582 mutex_exit(&f->uf_mutex); 2583 continue; 2584 } 2585 2586 MINUTE((": found: %s, \"%s: %s\"\n", 2587 fs_name(f), state_name(f->uf_s), f->uf_panic_str)); 2588 2589 qlen++; 2590 mutex_exit(&f->uf_mutex); 2591 } 2592 2593 mutex_exit(&ufs_fix.uq_mutex); 2594 2595 MINUTE((": qlen=%d]\n", qlen)); 2596 2597 return (qlen); 2598 } 2599 2600 /* 2601 * timeout routine 2602 * called to shutdown fix failure thread and server daemon 2603 */ 2604 static void 2605 ufsfx_kill_fix_failure_thread(void *arg) 2606 { 2607 clock_t odelta = (clock_t)arg; 2608 int qlen; 2609 2610 MAJOR(("[ufsfx_kill_fix_failure_thread")); 2611 2612 qlen = ufsfx_get_failure_qlen(); 2613 2614 if (qlen < 0) { 2615 clock_t delta; 2616 2617 delta = odelta << 1; 2618 if (delta <= 0) 2619 delta = INT_MAX; 2620 2621 (void) timeout(ufsfx_kill_fix_failure_thread, 2622 (void *)delta, delta); 2623 MAJOR((": rescheduled")); 2624 2625 } else if (qlen == 0) { 2626 ufs_thread_exit(&ufs_fix); 2627 MAJOR((": killed")); 2628 } 2629 /* 2630 * else 2631 * let timeout expire 2632 */ 2633 MAJOR(("]\n")); 2634 } 2635