1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/errno.h> 30 #include <sys/mode.h> 31 #include <sys/sysmacros.h> 32 #include <sys/cmn_err.h> 33 #include <sys/varargs.h> 34 #include <sys/time.h> 35 #include <sys/buf.h> 36 #include <sys/kmem.h> 37 #include <sys/t_lock.h> 38 #include <sys/poll.h> 39 #include <sys/debug.h> 40 #include <sys/cred.h> 41 #include <sys/lockfs.h> 42 #include <sys/fs/ufs_fs.h> 43 #include <sys/fs/ufs_inode.h> 44 #include <sys/fs/ufs_panic.h> 45 #include <sys/fs/ufs_lockfs.h> 46 #include <sys/fs/ufs_trans.h> 47 #include <sys/fs/ufs_mount.h> 48 #include <sys/fs/ufs_prot.h> 49 #include <sys/fs/ufs_bio.h> 50 #include <sys/pathname.h> 51 #include <sys/utsname.h> 52 #include <sys/conf.h> 53 54 /* handy */ 55 #define abs(x) ((x) < 0? -(x): (x)) 56 57 #if defined(DEBUG) 58 59 #define DBGLVL_NONE 0x00000000 60 #define DBGLVL_MAJOR 0x00000100 61 #define DBGLVL_MINOR 0x00000200 62 #define DBGLVL_MINUTE 0x00000400 63 #define DBGLVL_TRIVIA 0x00000800 64 #define DBGLVL_HIDEOUS 0x00001000 65 66 #define DBGFLG_NONE 0x00000000 67 #define DBGFLG_NOPANIC 0x00000001 68 #define DBGFLG_LVLONLY 0x00000002 69 #define DBGFLG_FIXWOULDPANIC 0x00000004 70 71 #define DBGFLG_FLAGMASK 0x0000000F 72 #define DBGFLG_LEVELMASK ~DBGFLG_FLAGMASK 73 74 #define DEBUG_FLAGS (ufs_fix_failure_dbg & DBGFLG_FLAGMASK) 75 #define DEBUG_LEVEL (ufs_fix_failure_dbg & DBGFLG_LEVELMASK) 76 77 unsigned int ufs_fix_failure_dbg = DBGLVL_NONE | DBGFLG_NONE; 78 79 #define DCALL(dbg_level, call) \ 80 { \ 81 if (DEBUG_LEVEL != DBGLVL_NONE) { \ 82 if (DEBUG_FLAGS & DBGFLG_LVLONLY) { \ 83 if (DEBUG_LEVEL & dbg_level) { \ 84 call; \ 85 } \ 86 } else { \ 87 if (dbg_level <= DEBUG_LEVEL) { \ 88 call; \ 89 } \ 90 } \ 91 } \ 92 } 93 94 #define DPRINTF(dbg_level, msg) DCALL(dbg_level, printf msg) 95 96 #define MAJOR(msg) DPRINTF(DBGLVL_MAJOR, msg) 97 #define MINOR(msg) DPRINTF(DBGLVL_MINOR, msg) 98 #define MINUTE(msg) DPRINTF(DBGLVL_MINUTE, msg) 99 #define TRIVIA(msg) DPRINTF(DBGLVL_TRIVIA, msg) 100 #define HIDEOUS(msg) DPRINTF(DBGLVL_HIDEOUS, msg) 101 102 #else /* !DEBUG */ 103 104 #define DCALL(ignored_dbg_level, ignored_routine) 105 #define MAJOR(ignored) 106 #define MINOR(ignored) 107 #define MINUTE(ignored) 108 #define TRIVIA(ignored) 109 #define HIDEOUS(ignored) 110 111 #endif /* DEBUG */ 112 113 #define NULLSTR(str) (!(str) || *(str) == '\0'? "<null>" : (str)) 114 #define NULSTRING "" 115 116 /* somewhat arbitrary limits, in seconds */ 117 /* all probably ought to be different, but these are convenient for debugging */ 118 const time_t UF_TOO_LONG = 128; /* max. wait for fsck start */ 119 120 /* all of these are in units of seconds used for retry period while ... */ 121 const time_t UF_FIXSTART_PERIOD = 16; /* awaiting fsck start */ 122 const time_t UF_FIXPOLL_PERIOD = 256; /* awaiting fsck finish */ 123 const time_t UF_SHORT_ERROR_PERIOD = 4; /* after (lockfs) error */ 124 const time_t UF_LONG_ERROR_PERIOD = 512; /* after (lockfs) error */ 125 126 #define NO_ERROR 0 127 #define LOCKFS_OLOCK LOCKFS_MAXLOCK+1 128 129 const ulong_t GB = 1024 * 1024 * 1024; 130 const ulong_t SecondsPerGig = 1024; /* ~17 minutes (overestimate) */ 131 132 /* 133 * per filesystem flags 134 */ 135 const int UFSFX_PANIC = (UFSMNT_ONERROR_PANIC >> 4); 136 const int UFSFX_LCKONLY = (UFSMNT_ONERROR_LOCK >> 4); 137 const int UFSFX_LCKUMOUNT = (UFSMNT_ONERROR_UMOUNT >> 4); 138 const int UFSFX_DEFAULT = (UFSMNT_ONERROR_DEFAULT >> 4); 139 const int UFSFX_REPAIR_START = 0x10000000; 140 141 /* return protocols */ 142 143 typedef enum triage_return_code { 144 TRIAGE_DEAD = -1, 145 TRIAGE_NO_SPIRIT, 146 TRIAGE_ATTEND_TO 147 } triage_t; 148 149 typedef enum statefunc_return_code { 150 SFRC_SUCCESS = 1, 151 SFRC_FAIL = 0 152 } sfrc_t; 153 154 /* external references */ 155 /* in ufs_thread.c */ 156 extern int ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop); 157 extern int ufs_checkaccton(vnode_t *); /* in ufs_lockfs.c */ 158 extern int ufs_checkswapon(vnode_t *); /* in ufs_lockfs.c */ 159 160 extern struct pollhead ufs_pollhd; /* in ufs_vnops.c */ 161 162 /* globals */ 163 struct ufs_q ufs_fix; 164 165 /* 166 * patchable constants: 167 * These are set in ufsfx_init() [called at modload] 168 */ 169 struct ufs_failure_tunable { 170 long uft_too_long; /* limit repair startup time */ 171 long uft_fixstart_period; /* pre-repair start period */ 172 long uft_fixpoll_period; /* post-fsck start period */ 173 long uft_short_err_period; /* post-error short period */ 174 long uft_long_err_period; /* post-error long period */ 175 } ufsfx_tune; 176 177 /* internal statistics of events */ 178 struct uf_statistics { 179 ulong_t ufst_lock_violations; 180 ulong_t ufst_current_races; 181 ulong_t ufst_unmount_failures; 182 ulong_t ufst_num_fixed; 183 ulong_t ufst_num_failed; 184 ulong_t ufst_cpu_waste; 185 time_t ufst_last_start_tm; 186 kmutex_t ufst_mutex; 187 } uf_stats; 188 189 typedef enum state_action { 190 UFA_ERROR = -1, /* internal error */ 191 UFA_FOUND, /* found uf in state */ 192 UFA_SET /* change uf to state */ 193 } ufsa_t; 194 195 /* state definition */ 196 typedef struct uf_state_desc { 197 int ud_v; /* value */ 198 char *ud_name; /* name */ 199 sfrc_t (*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 200 /* per-state actions */ 201 ufs_failure_states_t ud_prev; /* valid prev. states */ 202 203 struct uf_state_desc_attr { 204 unsigned terminal:1; /* no action req. if found */ 205 unsigned at_fail:1; /* state set by thread */ 206 /* encountering the error */ 207 unsigned unused; 208 } ud_attr; 209 } ufsd_t; 210 211 /* 212 * forward references 213 */ 214 215 /* thread to watch for failures */ 216 static void ufsfx_thread_fix_failures(void *); 217 static int ufsfx_do_failure_q(void); 218 static void ufsfx_kill_fix_failure_thread(void *); 219 220 /* routines called when failure occurs */ 221 static int ufs_fault_v(vnode_t *, char *, va_list) 222 __KVPRINTFLIKE(2); 223 static ufs_failure_t *init_failure(vnode_t *, char *, va_list) 224 __KVPRINTFLIKE(2); 225 static void queue_failure(ufs_failure_t *); 226 /*PRINTFLIKE2*/ 227 static void real_panic(ufs_failure_t *, const char *, ...) 228 __KPRINTFLIKE(2); 229 static void real_panic_v(ufs_failure_t *, const char *, va_list) 230 __KVPRINTFLIKE(2); 231 static triage_t triage(vnode_t *); 232 233 /* routines called when failure record is acted upon */ 234 static sfrc_t set_state(ufs_failure_t *, ufs_failure_states_t); 235 static int state_trans_valid(ufs_failure_states_t, ufs_failure_states_t); 236 static int terminal_state(ufs_failure_states_t); 237 238 /* routines called when states entered/found */ 239 static sfrc_t sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 240 static sfrc_t sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 241 static sfrc_t sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 242 static sfrc_t sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 243 static sfrc_t sf_found_queue(ufs_failure_t *); 244 static sfrc_t sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 245 static sfrc_t sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 246 static sfrc_t sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 247 static sfrc_t sf_set_trylck(ufs_failure_t *); 248 static sfrc_t sf_set_locked(ufs_failure_t *); 249 static sfrc_t sf_found_trylck(ufs_failure_t *); 250 static sfrc_t sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t); 251 static sfrc_t sf_found_umount(ufs_failure_t *); 252 253 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */ 254 static time_t trylock_time_exceeded(ufs_failure_t *); 255 static void pester_msg(ufs_failure_t *, int); 256 static int get_lockfs_status(ufs_failure_t *, struct lockfs *); 257 static void alloc_lockfs_comment(ufs_failure_t *, struct lockfs *); 258 static int set_lockfs(ufs_failure_t *, struct lockfs *); 259 static int lockfs_failure(ufs_failure_t *); 260 static int lockfs_success(ufs_failure_t *); 261 static int fsck_active(ufs_failure_t *); 262 263 /* low-level support routines */ 264 static ufsd_t *get_state_desc(ufs_failure_states_t); 265 static char *fs_name(ufs_failure_t *); 266 267 #if defined(DEBUG) 268 static char *state_name(ufs_failure_states_t); 269 static char *lock_name(struct lockfs *); 270 static char *err_name(int); 271 static char *act_name(ufsa_t); 272 static void dump_uf_list(char *msg); 273 static void dump_uf(ufs_failure_t *, int i); 274 #endif /* DEBUG */ 275 /* 276 * 277 * State Transitions: 278 * 279 * normally: 280 * if flagged to be locked but not unmounted: (UFSMNT_ONERROR_LOCK) 281 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED 282 * 283 * The only difference between these two is that the fsck must be started 284 * manually. 285 * 286 * if flagged to be unmounted: (UFSMNT_ONERROR_UMOUNT) 287 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX 288 * 289 * if flagged to panic: (UFSMNT_ONERROR_PANIC) 290 * UNDEF -> INIT -> PANIC 291 * 292 * if a secondary panic on a file system which has an active failure 293 * record: 294 * UNDEF -> INIT -> QUEUE -> REPLICA 295 * 296 * UNDEF, INIT, QUEUE all are set in the context of the failing thread. 297 * All other states (except possibly PANIC) are set in by the monitor 298 * (lock) thread. 299 * 300 */ 301 302 ufsd_t state_desc[] = 303 { 304 { UF_ILLEGAL, "in an unknown state", sf_minimum, UF_ILLEGAL, 305 { 0, 1, 0 } }, 306 { UF_UNDEF, "undefined", sf_undef, UF_UNDEF, 307 { 0, 1, 0 } }, 308 { UF_INIT, "being initialized", sf_init, UF_UNDEF, 309 { 0, 1, 0 } }, 310 { UF_QUEUE, "queued", sf_queue, UF_INIT, 311 { 0, 1, 0 } }, 312 { UF_TRYLCK, "trying to be locked", sf_nonterm_cmn, 313 UF_QUEUE, { 0, 0, 0 } }, 314 { UF_LOCKED, "locked", sf_nonterm_cmn, 315 UF_TRYLCK | UF_FIXING, { 0, 0, 0 } }, 316 { UF_UMOUNT, "being unmounted", sf_nonterm_cmn, 317 318 #if defined(DEBUG) 319 UF_PANIC | 320 #endif /* DEBUG */ 321 UF_TRYLCK | UF_LOCKED, { 0, 0, 0 } }, 322 { UF_FIXING, "being fixed", sf_nonterm_cmn, 323 UF_LOCKED, { 0, 0, 0 } }, 324 { UF_FIXED, "fixed", sf_term_cmn, 325 UF_FIXING, { 1, 0, 0 } }, 326 { UF_NOTFIX, "not fixed", sf_term_cmn, 327 328 #if defined(DEBUG) 329 UF_PANIC | 330 #endif /* DEBUG */ 331 332 UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING, 333 { 1, 0, 0 } }, 334 { UF_REPLICA, "a replica", sf_term_cmn, 335 UF_QUEUE, { 1, 0, 0 } }, 336 { UF_PANIC, "panicking", sf_panic, 337 /* XXX make this narrower */ UF_ALLSTATES, { 0, 0, 0 } }, 338 { UF_UNDEF, NULL, ((sfrc_t (*)()) NULL), 339 UF_UNDEF, { 0, 0, 0 } } 340 }; 341 342 /* unified collection */ 343 struct ufsfx_info { 344 struct uf_statistics *ufi_statp; 345 struct ufs_failure_tunable *ufi_tunep; 346 ufsd_t *ufi_statetab; 347 } uffsinfo; 348 349 #if defined(DEBUG) 350 struct action_description { 351 ufsa_t ad_v; 352 char *ad_name; 353 }; 354 355 #define EUNK (-1) 356 357 struct error_description { 358 int ed_errno; 359 char *ed_name; 360 } err_desc[] = 361 { 362 { EUNK, "<unexpected errno?>" }, 363 { EINVAL, "EINVAL" }, 364 { EACCES, "EACCES" }, 365 { EPERM, "EPERM" }, 366 { EIO, "EIO" }, 367 { EDEADLK, "EDEADLK" }, 368 { EBUSY, "EBUSY" }, 369 { EAGAIN, "EAGAIN" }, 370 { ERESTART, "ERESTART" }, 371 { ETIMEDOUT, "ETIMEDOUT" }, 372 { NO_ERROR, "Ok" }, 373 { EUNK, NULL } 374 }; 375 376 struct action_description act_desc[] = 377 { 378 { UFA_ERROR, "<unexpected action?>" }, 379 { UFA_FOUND, "\"found\"" }, 380 { UFA_SET, "\"set\"" }, 381 { UFA_ERROR, NULL }, 382 }; 383 384 #define LOCKFS_BADLOCK (-1) 385 386 struct lock_description { 387 int ld_type; 388 char *ld_name; 389 } lock_desc[] = 390 { 391 { LOCKFS_BADLOCK, "<unexpected lock?>" }, 392 { LOCKFS_ULOCK, "Unlock" }, 393 { LOCKFS_ELOCK, "Error Lock" }, 394 { LOCKFS_HLOCK, "Hard Lock" }, 395 { LOCKFS_OLOCK, "Old Lock" }, 396 { LOCKFS_BADLOCK, NULL } 397 }; 398 399 #endif /* DEBUG */ 400 401 /* 402 * ufs_fault, ufs_fault_v 403 * 404 * called instead of cmn_err(CE_PANIC, ...) by ufs routines 405 * when a failure is detected to put the file system into an 406 * error state (if possible) or to devolve to a panic otherwise 407 * 408 * vnode is some vnode in this file system, used to find the way 409 * to ufsvfs, vfsp etc. Since a panic can be called from many 410 * levels, the vnode is the most convenient hook to pass through. 411 * 412 */ 413 414 /*PRINTFLIKE2*/ 415 int 416 ufs_fault(vnode_t *vp, char *fmt, ...) 417 { 418 va_list adx; 419 int error; 420 421 MINOR(("[ufs_fault")); 422 423 va_start(adx, fmt); 424 error = ufs_fault_v(vp, fmt, adx); 425 va_end(adx); 426 427 MINOR((": %s (%d)]\n", err_name(error), error)); 428 return (error); 429 } 430 431 const char *nullfmt = "<null format?>"; 432 433 static int 434 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx) 435 { 436 ufs_failure_t *new = NULL; 437 ufsvfs_t *ufsvfsp; 438 triage_t fix; 439 int err = ERESTART; 440 int need_vfslock; 441 442 MINOR(("[ufs_fault_v")); 443 444 if (fmt == NULL) 445 fmt = (char *)nullfmt; 446 447 fix = triage(vp); 448 449 if (vp) { 450 ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data; 451 452 /* 453 * Something bad has happened. That is why we are here. 454 * 455 * In order for the bad thing to be recorded in the superblock 456 * we need to write to the superblock directly. 457 * In the case that logging is enabled the logging code 458 * would normally intercept our write as a delta to the log, 459 * thus we mark the filesystem FSBAD in any case. 460 */ 461 need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock); 462 463 if (need_vfslock) { 464 mutex_enter(&ufsvfsp->vfs_lock); 465 } 466 467 ufsvfsp->vfs_fs->fs_clean = FSBAD; 468 ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem)); 469 ufsvfsp->vfs_bufp->b_flags &= 470 ~(B_ASYNC | B_READ | B_DONE | B_ERROR | B_DELWRI); 471 472 (void) bdev_strategy(ufsvfsp->vfs_bufp); 473 (void) biowait(ufsvfsp->vfs_bufp); 474 475 if (need_vfslock) { 476 mutex_exit(&ufsvfsp->vfs_lock); 477 } 478 } 479 480 switch (fix) { 481 482 default: 483 case TRIAGE_DEAD: 484 case TRIAGE_NO_SPIRIT: 485 486 real_panic_v(new, fmt, adx); 487 /* LINTED: warning: logical expression always true: op "||" */ 488 ASSERT(DEBUG); 489 err = EAGAIN; 490 491 #if defined(DEBUG) 492 if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) { 493 break; 494 } 495 #else 496 break; 497 498 #endif /* DEBUG */ 499 /* FALLTHROUGH */ 500 501 case TRIAGE_ATTEND_TO: 502 503 /* q thread not running yet? */ 504 if (mutex_tryenter(&ufs_fix.uq_mutex)) { 505 if (!ufs_fix.uq_threadp) { 506 mutex_exit(&ufs_fix.uq_mutex); 507 ufs_thread_start(&ufs_fix, 508 ufsfx_thread_fix_failures, NULL); 509 ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK; 510 mutex_enter(&ufs_fix.uq_mutex); 511 } else { 512 /* 513 * We got the lock but we are not the current 514 * threadp so we have to release the lock. 515 */ 516 mutex_exit(&ufs_fix.uq_mutex); 517 } 518 } else { 519 MINOR((": fix failure thread already running ")); 520 /* 521 * No need to log another failure as one is already 522 * being logged. 523 */ 524 break; 525 } 526 527 if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) { 528 mutex_exit(&ufs_fix.uq_mutex); 529 cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault"); 530 } else { 531 /* 532 * Must check if we actually still own the lock and 533 * if so then release the lock and move on with life. 534 */ 535 if (mutex_owner(&ufs_fix.uq_mutex) == curthread) 536 mutex_exit(&ufs_fix.uq_mutex); 537 } 538 539 new = init_failure(vp, fmt, adx); 540 if (new != NULL) { 541 queue_failure(new); 542 break; 543 } 544 real_panic_v(new, fmt, adx); 545 break; 546 547 } 548 MINOR(("] ")); 549 return (err); 550 } 551 552 /* 553 * triage() 554 * 555 * Attempt to fix iff: 556 * - the system is not already panicking 557 * - this file system isn't explicitly marked not to be fixed 558 * - we can connect to the user-level daemon 559 * These conditions are detectable later, but if we can determine 560 * them in the failing threads context the core dump may be more 561 * useful. 562 * 563 */ 564 565 static triage_t 566 triage(vnode_t *vp) 567 { 568 struct inode *ip; 569 int need_unlock_vfs; 570 int fs_flags; 571 572 MINUTE(("[triage")); 573 574 if (panicstr) { 575 MINUTE(( 576 ": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr)); 577 return (TRIAGE_DEAD); 578 } 579 580 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) { 581 MINUTE(( 582 ": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n")); 583 return (TRIAGE_DEAD); 584 } 585 586 /* use tryenter and continue no matter what since we're panicky */ 587 need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock); 588 if (need_unlock_vfs) 589 need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock); 590 591 fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags; 592 if (need_unlock_vfs) 593 mutex_exit(&ip->i_ufsvfs->vfs_lock); 594 595 if (fs_flags & UFSFX_PANIC) { 596 MINUTE(( 597 ": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n")); 598 return (TRIAGE_NO_SPIRIT); 599 } 600 601 if (ufs_checkaccton(vp) != 0) { 602 MINUTE(( 603 ": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n")); 604 return (TRIAGE_DEAD); 605 } 606 607 if (ufs_checkswapon(vp) != 0) { 608 MINUTE(( 609 ": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n")); 610 return (TRIAGE_DEAD); 611 } 612 613 MINUTE((": return TRIAGE_ATTEND_TO] ")); 614 return (TRIAGE_ATTEND_TO); 615 } 616 617 /* 618 * init failure 619 * 620 * This routine allocates a failure struct and initializes 621 * it's member elements. 622 * Space is allocated for copies of dynamic identifying fs structures 623 * passed in. Without a much more segmented kernel architecture 624 * this is as protected as we can make it (for now.) 625 */ 626 static ufs_failure_t * 627 init_failure(vnode_t *vp, char *fmt, va_list adx) 628 { 629 ufs_failure_t *new; 630 struct inode *ip; 631 int initialization_worked = 0; 632 int need_vfs_unlock; 633 634 MINOR(("[init_failure")); 635 636 new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP); 637 if (!new) { 638 MINOR((": kmem_zalloc failed]\n")); 639 return (NULL); 640 } 641 642 /* 643 * enough information to make a fix attempt possible? 644 */ 645 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp || 646 !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt) 647 goto errout; 648 649 if (vp->v_type != VREG && vp->v_type != VDIR && 650 vp->v_type != VBLK && vp->v_type != VCHR && 651 vp->v_type != VLNK && vp->v_type != VFIFO && 652 vp->v_type != VSOCK) 653 goto errout; 654 655 if (ip->i_ufsvfs->vfs_root->v_type != VREG && 656 ip->i_ufsvfs->vfs_root->v_type != VDIR && 657 ip->i_ufsvfs->vfs_root->v_type != VBLK && 658 ip->i_ufsvfs->vfs_root->v_type != VCHR && 659 ip->i_ufsvfs->vfs_root->v_type != VLNK && 660 ip->i_ufsvfs->vfs_root->v_type != VFIFO && 661 ip->i_ufsvfs->vfs_root->v_type != VSOCK) 662 goto errout; 663 664 if ((ITOF(ip)->fs_magic != FS_MAGIC) && 665 (ITOF(ip)->fs_magic != MTB_UFS_MAGIC)) 666 goto errout; 667 668 /* intialize values */ 669 670 (void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx); 671 672 new->uf_ufsvfsp = ip->i_ufsvfs; 673 new->uf_vfsp = ip->i_vfs; 674 675 mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL); 676 need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock); 677 678 if (need_vfs_unlock) { 679 if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) { 680 /* 681 * not much alternative here, but we're panicking 682 * already, it couldn't be worse - so just 683 * proceed optimistically and take note. 684 */ 685 mutex_enter(&uf_stats.ufst_mutex); 686 uf_stats.ufst_lock_violations++; 687 mutex_exit(&uf_stats.ufst_mutex); 688 MINOR((": couldn't get vfs lock")) 689 need_vfs_unlock = 0; 690 } 691 } 692 693 if (mutex_tryenter(&new->uf_mutex)) { 694 initialization_worked = set_state(new, UF_INIT); 695 mutex_exit(&new->uf_mutex); 696 } 697 698 if (need_vfs_unlock) 699 mutex_exit(&ip->i_ufsvfs->vfs_lock); 700 701 if (initialization_worked) { 702 MINOR(("] ")); 703 return (new); 704 } 705 /* FALLTHROUGH */ 706 707 errout: 708 if (new) 709 kmem_free(new, sizeof (ufs_failure_t)); 710 MINOR((": failed]\n")); 711 return (NULL); 712 } 713 714 static void 715 queue_failure(ufs_failure_t *new) 716 { 717 MINOR(("[queue_failure")); 718 719 mutex_enter(&ufs_fix.uq_mutex); 720 721 if (ufs_fix.uq_ufhead) 722 insque(new, &ufs_fix.uq_ufhead); 723 else 724 ufs_fix.uq_ufhead = new; 725 726 if (mutex_tryenter(&new->uf_mutex)) { 727 (void) set_state(new, UF_QUEUE); 728 mutex_exit(&new->uf_mutex); 729 } 730 731 mutex_enter(&uf_stats.ufst_mutex); /* force wakeup */ 732 ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed; 733 mutex_exit(&uf_stats.ufst_mutex); 734 735 cv_broadcast(&ufs_fix.uq_cv); 736 737 DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str ? 738 new->uf_panic_str : "queue_failure: NULL panic str?")); 739 mutex_exit(&ufs_fix.uq_mutex); 740 741 MINOR(("] ")); 742 } 743 744 /*PRINTFLIKE2*/ 745 static void 746 real_panic(ufs_failure_t *f, const char *fmt, ...) 747 { 748 va_list adx; 749 750 MINUTE(("[real_panic ")); 751 752 va_start(adx, fmt); 753 real_panic_v(f, fmt, adx); 754 va_end(adx); 755 756 MINUTE((": return?!]\n")); 757 } 758 759 static void 760 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx) 761 { 762 int seriousness = CE_PANIC; 763 int need_unlock; 764 765 MINUTE(("[real_panic_v ")); 766 767 if (f && f->uf_ufsvfsp) 768 TRANS_SETERROR(f->uf_ufsvfsp); 769 770 #if defined(DEBUG) 771 if (DEBUG_FLAGS & DBGFLG_NOPANIC) { 772 seriousness = CE_WARN; 773 cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n"); 774 } 775 #endif /* DEBUG */ 776 777 delay(hz >> 1); /* allow previous warnings to get out */ 778 779 if (!f && fmt) 780 vcmn_err(seriousness, fmt, adx); 781 else 782 cmn_err(seriousness, f != NULL && f->uf_panic_str[0] != '\0' ? 783 f->uf_panic_str: "real_panic: <unknown panic?>"); 784 785 if (f) { 786 need_unlock = !MUTEX_HELD(&f->uf_mutex); 787 if (need_unlock) { 788 mutex_enter(&f->uf_mutex); 789 } 790 791 f->uf_retry = -1; 792 (void) set_state(f, UF_PANIC); 793 794 if (need_unlock) { 795 mutex_exit(&f->uf_mutex); 796 } 797 } 798 MINUTE((": return?!]\n")); 799 } 800 801 /* 802 * initializes ufs panic structs, locks, etc 803 */ 804 void 805 ufsfx_init(void) 806 { 807 808 MINUTE(("[ufsfx_init")); 809 810 /* patchable; unchanged while running, so no lock is needed */ 811 ufsfx_tune.uft_too_long = UF_TOO_LONG; 812 ufsfx_tune.uft_fixstart_period = UF_FIXSTART_PERIOD; 813 ufsfx_tune.uft_fixpoll_period = UF_FIXPOLL_PERIOD; 814 ufsfx_tune.uft_short_err_period = UF_SHORT_ERROR_PERIOD; 815 ufsfx_tune.uft_long_err_period = UF_LONG_ERROR_PERIOD; 816 817 uffsinfo.ufi_statp = &uf_stats; 818 uffsinfo.ufi_tunep = &ufsfx_tune; 819 uffsinfo.ufi_statetab = &state_desc[0]; 820 821 mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL); 822 ufs_thread_init(&ufs_fix, /* maxne */ 1); 823 824 MINUTE(("] ")); 825 } 826 827 /* 828 * initializes per-ufs values 829 * returns 0 (ok) or errno 830 */ 831 int 832 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags) 833 { 834 MINUTE(("[ufsfx_mount (%d)", flags)); 835 /* don't check/need vfs_lock because it's still being initialized */ 836 837 ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4; 838 839 MINUTE((": %s: fx_flags:%ld,", 840 ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags)); 841 /* 842 * onerror={panic ^ lock only ^ unmount} 843 */ 844 845 if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) { 846 MINUTE((" PANIC")); 847 848 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) { 849 MINUTE((" LCKONLY")); 850 851 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) { 852 MINUTE((" LCKUMOUNT")); 853 854 } else { 855 ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT; 856 ASSERT(ufsvfsp->vfs_fsfx.fx_flags & 857 (UFSMNT_ONERROR_FLGMASK >> 4)); 858 MINUTE((" DEFAULT")); 859 } 860 861 pollwakeup(&ufs_pollhd, POLLPRI); 862 MINUTE(("]\n")); 863 return (0); 864 } 865 866 /* 867 * ufsfx_unmount 868 * 869 * called during unmount 870 */ 871 void 872 ufsfx_unmount(struct ufsvfs *ufsvfsp) 873 { 874 ufs_failure_t *f; 875 int must_unlock_list; 876 877 MINUTE(("[ufsfx_unmount")); 878 879 if (!ufsvfsp) { 880 MINUTE((": no ufsvfsp]")); 881 return; 882 } 883 884 if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0) 885 mutex_enter(&ufs_fix.uq_mutex); 886 887 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 888 int must_unlock_failure; 889 890 must_unlock_failure = !MUTEX_HELD(&f->uf_mutex); 891 if (must_unlock_failure) { 892 mutex_enter(&f->uf_mutex); 893 } 894 895 if (f->uf_ufsvfsp == ufsvfsp) { 896 897 /* 898 * if we owned the failure record lock, then this 899 * is probably a fix failure-triggered unmount, so 900 * the warning is not appropriate or needed 901 */ 902 903 /* XXX if rebooting don't print this? */ 904 if (!terminal_state(f->uf_s) && must_unlock_failure) { 905 cmn_err(CE_WARN, 906 "Unmounting %s while error-locked", 907 fs_name(f)); 908 } 909 910 f->uf_ufsvfsp = NULL; 911 f->uf_vfs_ufsfxp = NULL; 912 f->uf_vfs_lockp = NULL; 913 f->uf_bp = NULL; 914 f->uf_vfsp = NULL; 915 f->uf_retry = -1; 916 } 917 918 if (must_unlock_failure) 919 mutex_exit(&f->uf_mutex); 920 } 921 if (must_unlock_list) 922 mutex_exit(&ufs_fix.uq_mutex); 923 924 pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP); 925 MINUTE(("] ")); 926 } 927 928 /* 929 * ufsfx_(un)lockfs 930 * 931 * provides hook from lockfs code so we can recognize unlock/relock 932 * This is called after it is certain that the (un)lock will succeed. 933 */ 934 void 935 ufsfx_unlockfs(struct ufsvfs *ufsvfsp) 936 { 937 ufs_failure_t *f; 938 int need_unlock; 939 int need_unlock_list; 940 int informed = 0; 941 942 MINUTE(("[ufsfx_unlockfs")); 943 944 if (!ufsvfsp) 945 return; 946 947 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex); 948 949 if (need_unlock_list) 950 mutex_enter(&ufs_fix.uq_mutex); 951 952 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 953 954 need_unlock = !MUTEX_HELD(&f->uf_mutex); 955 if (need_unlock) 956 mutex_enter(&f->uf_mutex); 957 958 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) { 959 if (!(f->uf_s & UF_FIXING)) { 960 /* 961 * This might happen if we don't notice that 962 * the fs gets marked FSFIX before it is 963 * marked FSCLEAN, as might occur if the 964 * the superblock was hammered directly. 965 */ 966 if (!informed) { 967 informed = 1; 968 cmn_err(CE_NOTE, 969 "Unlock of %s succeeded before " 970 "fs_clean marked FSFIX?", 971 fs_name(f)); 972 } 973 974 /* 975 * pass through fixing state so 976 * transition protocol is satisfied 977 */ 978 if (!set_state(f, UF_FIXING)) { 979 MINUTE((": failed] ")); 980 } 981 } 982 983 if (!set_state(f, UF_FIXED)) { 984 /* it's already fixed, so don't panic now */ 985 MINUTE((": failed] ")); 986 } 987 } 988 989 if (need_unlock) 990 mutex_exit(&f->uf_mutex); 991 } 992 if (need_unlock_list) 993 mutex_exit(&ufs_fix.uq_mutex); 994 MINUTE(("] ")); 995 } 996 997 void 998 ufsfx_lockfs(struct ufsvfs *ufsvfsp) 999 { 1000 ufs_failure_t *f; 1001 int need_unlock; 1002 int need_unlock_list; 1003 1004 MINUTE(("[ufsfx_lockfs")); 1005 1006 if (!ufsvfsp) 1007 return; 1008 1009 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex); 1010 1011 if (need_unlock_list) 1012 mutex_enter(&ufs_fix.uq_mutex); 1013 1014 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 1015 1016 need_unlock = !MUTEX_HELD(&f->uf_mutex); 1017 if (need_unlock) 1018 mutex_enter(&f->uf_mutex); 1019 1020 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) && 1021 f->uf_s != UF_PANIC) { 1022 switch (f->uf_s) { 1023 1024 default: 1025 cmn_err(CE_WARN, 1026 "fs %s not in state " 1027 "UF_TRYLCK, UF_LOCKED or UF_FIXING", 1028 fs_name(f)); 1029 break; 1030 1031 case UF_TRYLCK: 1032 if (!set_state(f, UF_LOCKED)) { 1033 MINUTE((": failed] ")); 1034 } 1035 break; 1036 1037 case UF_LOCKED: 1038 if (!set_state(f, UF_FIXING)) { 1039 MINUTE((": failed] ")); 1040 } 1041 break; 1042 1043 case UF_FIXING: 1044 break; 1045 1046 } 1047 } 1048 1049 if (need_unlock) 1050 mutex_exit(&f->uf_mutex); 1051 } 1052 if (need_unlock_list) 1053 mutex_exit(&ufs_fix.uq_mutex); 1054 1055 MINUTE(("] ")); 1056 } 1057 1058 /* 1059 * error lock, trigger fsck and unlock those fs with failures 1060 * blatantly copied from the hlock routine, although this routine 1061 * triggers differently in order to use uq_ne as meaningful data. 1062 */ 1063 /* ARGSUSED */ 1064 void 1065 ufsfx_thread_fix_failures(void *ignored) 1066 { 1067 int retry; 1068 callb_cpr_t cprinfo; 1069 1070 CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr, 1071 "ufsfixfail"); 1072 1073 MINUTE(("[ufsfx_thread_fix_failures] ")); 1074 1075 for (;;) { 1076 /* sleep until there is work to do */ 1077 1078 mutex_enter(&ufs_fix.uq_mutex); 1079 (void) ufs_thread_run(&ufs_fix, &cprinfo); 1080 ufs_fix.uq_ne = 0; 1081 mutex_exit(&ufs_fix.uq_mutex); 1082 1083 /* process failures on our q */ 1084 do { 1085 retry = ufsfx_do_failure_q(); 1086 if (retry) { 1087 mutex_enter(&ufs_fix.uq_mutex); 1088 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1089 (void) cv_reltimedwait(&ufs_fix.uq_cv, 1090 &ufs_fix.uq_mutex, (hz * retry), 1091 TR_CLOCK_TICK); 1092 CALLB_CPR_SAFE_END(&cprinfo, 1093 &ufs_fix.uq_mutex); 1094 mutex_exit(&ufs_fix.uq_mutex); 1095 } 1096 } while (retry); 1097 } 1098 /* NOTREACHED */ 1099 } 1100 1101 1102 /* 1103 * watch for fix-on-panic work 1104 * 1105 * returns # of seconds to sleep before trying again 1106 * and zero if no retry is needed 1107 */ 1108 1109 int 1110 ufsfx_do_failure_q(void) 1111 { 1112 ufs_failure_t *f; 1113 long retry = 1; 1114 ufsd_t *s; 1115 1116 MAJOR(("[ufsfx_do_failure_q")); 1117 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL)); 1118 1119 if (!mutex_tryenter(&ufs_fix.uq_mutex)) 1120 return (retry); 1121 1122 retry = 0; 1123 rescan_q: 1124 1125 /* 1126 * walk down failure list 1127 * depending on state of each failure, do whatever 1128 * is appropriate to move it to the next state 1129 * taking note of whether retry gets set 1130 * 1131 * retry protocol: 1132 * wakeup in shortest required time for any failure 1133 * retry == 0; nothing more to do (terminal state) 1134 * retry < 0; reprocess queue immediately, retry will 1135 * be abs(retry) for the next cycle 1136 * retry > 0; schedule wakeup for retry seconds 1137 */ 1138 1139 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 1140 1141 if (!mutex_tryenter(&f->uf_mutex)) { 1142 retry = 1; 1143 continue; 1144 } 1145 s = get_state_desc(f->uf_s); 1146 1147 MINOR((": found%s: %s, \"%s: %s\"\n", 1148 s->ud_attr.terminal ? " old" : "", 1149 fs_name(f), state_name(f->uf_s), f->uf_panic_str)); 1150 1151 if (s->ud_attr.terminal) { 1152 mutex_exit(&f->uf_mutex); 1153 continue; 1154 } 1155 1156 if (s->ud_sfp) 1157 (*s->ud_sfp)(f, UFA_FOUND, f->uf_s); 1158 1159 ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0); 1160 1161 if (f->uf_retry != 0) { 1162 if (retry > f->uf_retry || retry == 0) 1163 retry = f->uf_retry; 1164 if (f->uf_retry < 0) 1165 f->uf_retry = abs(f->uf_retry); 1166 } 1167 mutex_exit(&f->uf_mutex); 1168 } 1169 1170 1171 if (retry < 0) { 1172 retry = abs(retry); 1173 goto rescan_q; 1174 } 1175 1176 mutex_exit(&ufs_fix.uq_mutex); 1177 1178 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL)); 1179 MAJOR((": retry=%ld, good night]\n\n", retry)); 1180 1181 return (retry); 1182 } 1183 1184 static void 1185 pester_msg(ufs_failure_t *f, int seriousness) 1186 { 1187 MINUTE(("[pester_msg")); 1188 ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING)); 1189 1190 /* 1191 * XXX if seems too long for this fs, poke administrator 1192 * XXX to run fsck manually (and change retry time?) 1193 */ 1194 cmn_err(seriousness, "Waiting for repair of %s to %s", 1195 fs_name(f), f->uf_s & UF_LOCKED ? "start" : "finish"); 1196 MINUTE(("]")); 1197 } 1198 1199 static time_t 1200 trylock_time_exceeded(ufs_failure_t *f) 1201 { 1202 time_t toolong; 1203 extern time_t time; 1204 1205 MINUTE(("[trylock_time_exceeded")); 1206 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1207 1208 toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm; 1209 if (time > toolong) 1210 cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f)); 1211 1212 MINUTE(("] ")); 1213 return (time <= toolong? 0: time - toolong); 1214 } 1215 1216 static int 1217 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp) 1218 { 1219 MINUTE(("[get_lockfs_status")); 1220 1221 if (!f->uf_ufsvfsp) { 1222 MINUTE((": ufsvfsp is NULL]\n")); 1223 return (0); 1224 } 1225 1226 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1227 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1228 ASSERT(!vfs_lock_held(f->uf_vfsp)); 1229 ASSERT(f->uf_ufsvfsp->vfs_root != NULL); 1230 1231 f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp); 1232 1233 if (f->uf_lf_err) { 1234 f->uf_retry = ufsfx_tune.uft_short_err_period; 1235 } 1236 1237 MINUTE(("] ")); 1238 return (1); 1239 } 1240 1241 static sfrc_t 1242 set_state(ufs_failure_t *f, ufs_failure_states_t new_state) 1243 { 1244 ufsd_t *s; 1245 sfrc_t sfrc = SFRC_FAIL; 1246 int need_unlock; 1247 extern time_t time; 1248 1249 HIDEOUS(("[set_state: new state:%s", state_name(new_state))); 1250 ASSERT(f); 1251 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1252 1253 /* 1254 * if someone else is panicking, just let panic sync proceed 1255 */ 1256 if (panicstr) { 1257 (void) set_state(f, UF_NOTFIX); 1258 HIDEOUS((": state reset: not fixed] ")); 1259 return (sfrc); 1260 } 1261 1262 /* 1263 * bad state transition, an internal error 1264 */ 1265 if (!state_trans_valid(f->uf_s, new_state)) { 1266 /* recursion */ 1267 if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC)) 1268 (void) set_state(f, UF_PANIC); 1269 MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ", 1270 state_name(f->uf_s), state_name(new_state))); 1271 return (sfrc); 1272 } 1273 1274 s = get_state_desc(new_state); 1275 1276 need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex); 1277 if (need_unlock) 1278 mutex_enter(&ufs_fix.uq_mutex); 1279 1280 if (s->ud_attr.at_fail && ufs_fix.uq_threadp && 1281 curthread == ufs_fix.uq_threadp) { 1282 cmn_err(CE_WARN, "set_state: probable recursive panic of %s", 1283 fs_name(f)); 1284 } 1285 if (need_unlock) 1286 mutex_exit(&ufs_fix.uq_mutex); 1287 1288 /* NULL state functions always succeed */ 1289 sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state); 1290 1291 if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) { 1292 f->uf_s = new_state; 1293 f->uf_entered_tm = time; 1294 f->uf_counter = 0; 1295 } 1296 1297 HIDEOUS(("]\n")); 1298 return (sfrc); 1299 } 1300 1301 static ufsd_t * 1302 get_state_desc(ufs_failure_states_t state) 1303 { 1304 ufsd_t *s; 1305 1306 HIDEOUS(("[get_state_desc")); 1307 1308 for (s = &state_desc[1]; s->ud_name != NULL; s++) { 1309 if (s->ud_v == state) { 1310 HIDEOUS(("] ")); 1311 return (s); 1312 } 1313 } 1314 1315 HIDEOUS(("] ")); 1316 return (&state_desc[0]); /* default */ 1317 } 1318 1319 static sfrc_t 1320 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1321 { 1322 sfrc_t rc; 1323 1324 TRIVIA(("[sf_undef, action is %s, state is %s\n", 1325 act_name(a), state_name(s))); 1326 ASSERT(s == UF_UNDEF); 1327 1328 /* shouldn't find null failure records or ever set one */ 1329 rc = set_state(f, UF_NOTFIX); 1330 1331 TRIVIA(("] ")); 1332 return (rc); 1333 } 1334 1335 1336 static sfrc_t 1337 sf_init( 1338 ufs_failure_t *f, 1339 ufsa_t a, 1340 ufs_failure_states_t s) 1341 { 1342 sfrc_t rc = SFRC_FAIL; 1343 extern time_t time; 1344 1345 TRIVIA(("[sf_init, action is %s", act_name(a))); 1346 ASSERT(s & UF_INIT); 1347 1348 switch (a) { 1349 case UFA_SET: 1350 f->uf_begin_tm = time; 1351 f->uf_retry = 1; 1352 if (!f->uf_ufsvfsp) { 1353 (void) set_state(f, UF_PANIC); 1354 TRIVIA((": NULL ufsvfsp]\n")); 1355 return (rc); 1356 } 1357 /* 1358 * because we can call panic from many different levels, 1359 * we can't be sure that we've got the vfs_lock at this 1360 * point. However, there's not much alternative and if 1361 * we don't (have the lock) the worst case is we'll just 1362 * panic again 1363 */ 1364 f->uf_vfs_lockp = &f->uf_ufsvfsp->vfs_lock; 1365 f->uf_vfs_ufsfxp = &f->uf_ufsvfsp->vfs_fsfx; 1366 1367 if (!f->uf_ufsvfsp->vfs_bufp) { 1368 (void) set_state(f, UF_PANIC); 1369 TRIVIA((": NULL vfs_bufp]\n")); 1370 return (rc); 1371 } 1372 f->uf_bp = f->uf_ufsvfsp->vfs_bufp; 1373 1374 if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) { 1375 (void) set_state(f, UF_PANIC); 1376 TRIVIA((": NULL vfs_fs]\n")); 1377 return (rc); 1378 } 1379 1380 /* vfs_fs = vfs_bufp->b_un.b_fs */ 1381 bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN); 1382 1383 f->uf_lf.lf_lock = LOCKFS_ELOCK; /* primer */ 1384 1385 if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) { 1386 (void) set_state(f, UF_PANIC); 1387 TRIVIA((": NULL vfsp or vfs_dev == NODEV")); 1388 return (rc); 1389 } 1390 f->uf_dev = f->uf_vfsp->vfs_dev; 1391 1392 rc = SFRC_SUCCESS; 1393 break; 1394 1395 case UFA_FOUND: 1396 default: 1397 /* failures marked init shouldn't even be on the queue yet */ 1398 rc = set_state(f, UF_QUEUE); 1399 TRIVIA((": found failure with state init]\n")); 1400 } 1401 1402 TRIVIA(("] ")); 1403 return (rc); 1404 } 1405 1406 static sfrc_t 1407 sf_queue( 1408 ufs_failure_t *f, 1409 ufsa_t a, 1410 ufs_failure_states_t s) 1411 { 1412 sfrc_t rc = SFRC_FAIL; 1413 1414 TRIVIA(("[sf_queue, action is %s", act_name(a))); 1415 ASSERT(s & UF_QUEUE); 1416 1417 if (!f->uf_ufsvfsp) { 1418 TRIVIA((": NULL ufsvfsp]\n")); 1419 return (rc); 1420 } 1421 1422 switch (a) { 1423 case UFA_FOUND: 1424 rc = sf_found_queue(f); 1425 break; 1426 1427 case UFA_SET: 1428 1429 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex)); 1430 1431 mutex_enter(&uf_stats.ufst_mutex); 1432 uf_stats.ufst_num_failed++; 1433 mutex_exit(&uf_stats.ufst_mutex); 1434 1435 /* 1436 * if can't get the vfs lock, just wait until 1437 * UF_TRYLCK to set fx_current 1438 */ 1439 if (mutex_tryenter(f->uf_vfs_lockp)) { 1440 f->uf_vfs_ufsfxp->fx_current = f; 1441 mutex_exit(f->uf_vfs_lockp); 1442 } else { 1443 mutex_enter(&uf_stats.ufst_mutex); 1444 uf_stats.ufst_current_races++; 1445 mutex_exit(&uf_stats.ufst_mutex); 1446 } 1447 1448 f->uf_retry = 1; 1449 rc = SFRC_SUCCESS; 1450 TRIVIA(("] ")); 1451 break; 1452 1453 default: 1454 (void) set_state(f, UF_PANIC); 1455 TRIVIA((": failed] ")); 1456 } 1457 1458 return (rc); 1459 } 1460 1461 static sfrc_t 1462 sf_found_queue(ufs_failure_t *f) 1463 { 1464 int replica; 1465 sfrc_t rc = SFRC_FAIL; 1466 1467 TRIVIA(("[sf_found_queue")); 1468 1469 /* 1470 * don't need to check for null ufsvfsp because 1471 * unmount must own list's ufs_fix.uq_mutex 1472 * to mark it null and we own that lock since 1473 * we got here. 1474 */ 1475 1476 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex)); 1477 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1478 1479 if (!mutex_tryenter(f->uf_vfs_lockp)) { 1480 TRIVIA((": tryenter(vfslockp) failed; retry]\n")); 1481 f->uf_retry = 1; 1482 return (rc); 1483 } 1484 1485 replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL && 1486 f->uf_vfs_ufsfxp->fx_current != f && 1487 !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s); 1488 1489 /* 1490 * copy general flags to this ufs_failure so we don't 1491 * need to refer back to the ufsvfs, or, more importantly, 1492 * don't need to keep acquiring (trying to acquire) vfs_lockp 1493 * 1494 * The most restrictive option wins: 1495 * panic > errlock only > errlock+unmount > repair 1496 * XXX panic > elock > elock > elock+umount 1497 */ 1498 if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) { 1499 if (!set_state(f, UF_PANIC)) { 1500 TRIVIA((": marked panic but was queued?")); 1501 real_panic(f, " "); 1502 /*NOTREACHED*/ 1503 } 1504 mutex_exit(f->uf_vfs_lockp); 1505 return (rc); 1506 } 1507 f->uf_flags = f->uf_vfs_ufsfxp->fx_flags; 1508 1509 if (replica) { 1510 if (!set_state(f, UF_REPLICA)) { 1511 f->uf_retry = 1; 1512 TRIVIA((": set to replica failed] ")); 1513 } else { 1514 TRIVIA(("] ")); 1515 } 1516 mutex_exit(f->uf_vfs_lockp); 1517 return (rc); 1518 } 1519 mutex_exit(f->uf_vfs_lockp); 1520 1521 if (!set_state(f, UF_TRYLCK)) { 1522 TRIVIA((": failed] ")); 1523 } else { 1524 rc = SFRC_SUCCESS; 1525 } 1526 return (rc); 1527 } 1528 1529 static sfrc_t 1530 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1531 { 1532 sfrc_t rc = SFRC_FAIL; 1533 1534 TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s))); 1535 ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING)); 1536 ASSERT(!terminal_state(s)); 1537 1538 if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) { 1539 TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n")); 1540 (void) set_state(f, UF_NOTFIX); 1541 return (rc); 1542 } 1543 1544 switch (a) { 1545 case UFA_SET: 1546 switch (s) { 1547 case UF_TRYLCK: 1548 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1549 rc = sf_set_trylck(f); 1550 break; 1551 1552 case UF_LOCKED: 1553 rc = sf_set_locked(f); 1554 break; 1555 1556 case UF_FIXING: 1557 f->uf_flags |= UFSFX_REPAIR_START; 1558 f->uf_retry = ufsfx_tune.uft_fixpoll_period; 1559 rc = SFRC_SUCCESS; 1560 break; 1561 1562 case UF_UMOUNT: 1563 f->uf_retry = -ufsfx_tune.uft_short_err_period; 1564 rc = SFRC_SUCCESS; 1565 break; 1566 1567 default: 1568 (void) set_state(f, UF_PANIC); 1569 TRIVIA((": failed] ")); 1570 } 1571 break; 1572 1573 case UFA_FOUND: 1574 1575 switch (s) { 1576 case UF_TRYLCK: 1577 rc = sf_found_trylck(f); 1578 break; 1579 1580 case UF_LOCKED: 1581 case UF_FIXING: 1582 rc = sf_found_lock_fix_cmn(f, s); 1583 break; 1584 1585 case UF_UMOUNT: 1586 rc = sf_found_umount(f); 1587 break; 1588 1589 default: 1590 (void) set_state(f, UF_PANIC); 1591 TRIVIA((": failed] ")); 1592 break; 1593 } 1594 break; 1595 default: 1596 (void) set_state(f, UF_PANIC); 1597 TRIVIA((": failed] ")); 1598 break; 1599 } 1600 1601 TRIVIA(("] ")); 1602 return (rc); 1603 } 1604 1605 static sfrc_t 1606 sf_set_trylck(ufs_failure_t *f) 1607 { 1608 TRIVIA(("[sf_set_trylck")); 1609 1610 if (!mutex_tryenter(f->uf_vfs_lockp)) { 1611 TRIVIA((": tryenter(vfslockp) failed; retry]\n")); 1612 f->uf_retry = 1; 1613 return (SFRC_FAIL); 1614 } 1615 1616 if (!f->uf_vfs_ufsfxp->fx_current) 1617 f->uf_vfs_ufsfxp->fx_current = f; 1618 1619 mutex_exit(f->uf_vfs_lockp); 1620 1621 f->uf_lf.lf_flags = 0; 1622 f->uf_lf.lf_lock = LOCKFS_ELOCK; 1623 f->uf_retry = -ufsfx_tune.uft_fixstart_period; 1624 TRIVIA(("] ")); 1625 return (SFRC_SUCCESS); 1626 } 1627 1628 static sfrc_t 1629 sf_found_trylck(ufs_failure_t *f) 1630 { 1631 struct lockfs lockfs_status; 1632 1633 TRIVIA(("[sf_found_trylck")); 1634 1635 if (trylock_time_exceeded(f) > 0) { 1636 (void) set_state(f, UF_PANIC); 1637 TRIVIA((": failed] ")); 1638 return (SFRC_FAIL); 1639 } 1640 1641 if (!get_lockfs_status(f, &lockfs_status)) { 1642 (void) set_state(f, UF_PANIC); 1643 TRIVIA((": failed] ")); 1644 return (SFRC_FAIL); 1645 } 1646 1647 if (f->uf_lf_err == NO_ERROR) 1648 f->uf_lf.lf_key = lockfs_status.lf_key; 1649 1650 if (!set_lockfs(f, &lockfs_status)) { 1651 (void) set_state(f, UF_PANIC); 1652 TRIVIA((": failed] ")); 1653 return (SFRC_FAIL); 1654 } 1655 TRIVIA(("] ")); 1656 return (SFRC_SUCCESS); 1657 } 1658 1659 static sfrc_t 1660 sf_set_locked(ufs_failure_t *f) 1661 { 1662 TRIVIA(("[sf_set_locked")); 1663 1664 f->uf_retry = -ufsfx_tune.uft_fixstart_period; 1665 1666 #if defined(DEBUG) 1667 if (f->uf_flags & UFSFX_REPAIR_START) 1668 TRIVIA(("clearing UFSFX_REPAIR_START ")); 1669 #endif /* DEBUG */ 1670 1671 f->uf_flags &= ~UFSFX_REPAIR_START; 1672 1673 if (f->uf_s & UF_TRYLCK) { 1674 cmn_err(CE_WARN, "Error-locked %s: \"%s\"", 1675 fs_name(f), f->uf_panic_str); 1676 1677 if (f->uf_flags & UFSFX_LCKONLY) 1678 cmn_err(CE_WARN, "Manual repair of %s required", 1679 fs_name(f)); 1680 } 1681 1682 /* 1683 * just reset to current state 1684 */ 1685 #if defined(DEBUG) 1686 TRIVIA(("locked->locked ")); 1687 #endif /* DEBUG */ 1688 1689 TRIVIA(("] ")); 1690 return (SFRC_SUCCESS); 1691 } 1692 1693 static sfrc_t 1694 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s) 1695 { 1696 time_t toolong; 1697 extern time_t time; 1698 struct buf *bp = NULL; 1699 struct fs *dfs; 1700 time_t concerned, anxious; 1701 sfrc_t rc = SFRC_FAIL; 1702 ulong_t gb_size; 1703 1704 TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s))); 1705 1706 if (s & UF_LOCKED) { 1707 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1708 1709 toolong = 1710 time > (ufsfx_tune.uft_too_long + f->uf_entered_tm); 1711 TRIVIA(("%stoolong", !toolong? "not": "")); 1712 HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ", 1713 time, ufsfx_tune.uft_too_long, f->uf_entered_tm)); 1714 1715 if (f->uf_flags & UFSFX_LCKUMOUNT) { 1716 if (set_state(f, UF_UMOUNT)) { 1717 TRIVIA(("] ")); 1718 rc = SFRC_SUCCESS; 1719 } else { 1720 TRIVIA((": failed] ")); 1721 f->uf_retry = 1; 1722 } 1723 return (rc); 1724 } 1725 if (!toolong) { 1726 rc = SFRC_SUCCESS; 1727 } else { 1728 if (!(f->uf_flags & UFSFX_REPAIR_START)) { 1729 cmn_err(CE_WARN, "%s repair of %s not started.", 1730 (f->uf_flags & UFSFX_LCKONLY) ? 1731 "Manual" : "Automatic", fs_name(f)); 1732 1733 f->uf_retry = ufsfx_tune.uft_long_err_period; 1734 } else { 1735 f->uf_retry = ufsfx_tune.uft_long_err_period; 1736 cmn_err(CE_WARN, "Repair of %s is not timely; " 1737 "operator attention is required.", 1738 fs_name(f)); 1739 } 1740 TRIVIA(("] ")); 1741 return (rc); 1742 } 1743 } 1744 1745 #if defined(DEBUG) 1746 else { 1747 ASSERT(s & UF_FIXING); 1748 } 1749 #endif /* DEBUG */ 1750 1751 /* 1752 * get on disk superblock; force it to really 1753 * come from the disk 1754 */ 1755 (void) bfinval(f->uf_dev, 0); 1756 bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE); 1757 if (bp) { 1758 bp->b_flags |= (B_STALE | B_AGE); 1759 dfs = bp->b_un.b_fs; 1760 } 1761 1762 if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) && 1763 (dfs->fs_magic != MTB_UFS_MAGIC))) { 1764 TRIVIA((": UFS_BREAD(SBLOCK) failed]\n")); 1765 f->uf_retry = 1; 1766 goto out; 1767 } 1768 1769 /* fsck started but we haven't noticed yet? */ 1770 if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) { 1771 if (!set_state(f, UF_FIXING)) { 1772 TRIVIA((": failed]\n")); 1773 f->uf_retry = 1; 1774 goto out; 1775 } 1776 } 1777 1778 /* fsck started but didn't succeed? */ 1779 if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) { 1780 TRIVIA((": fs_clean: %d", (int)dfs->fs_clean)); 1781 (void) set_state(f, UF_LOCKED); 1782 cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f)); 1783 f->uf_retry = ufsfx_tune.uft_long_err_period; 1784 goto out; 1785 } 1786 1787 gb_size = (dfs->fs_size * dfs->fs_bshift) / GB; 1788 toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig); 1789 1790 /* fsck started but doesn't seem to be proceeding? */ 1791 if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) { 1792 if (time > f->uf_entered_tm + toolong) { 1793 1794 cmn_err(CE_WARN, 1795 "Repair completion timeout exceeded on %s; " 1796 "manual fsck may be required", fs_name(f)); 1797 f->uf_retry = ufsfx_tune.uft_long_err_period; 1798 } 1799 } 1800 1801 concerned = f->uf_entered_tm + (toolong / 3); 1802 anxious = f->uf_entered_tm + ((2 * toolong) / 3); 1803 1804 if (time > concerned) 1805 pester_msg(f, time > anxious? CE_WARN: CE_NOTE); 1806 1807 TRIVIA(("] ")); 1808 1809 out: 1810 if (bp) 1811 brelse(bp); 1812 1813 return (rc); 1814 } 1815 1816 static sfrc_t 1817 sf_found_umount(ufs_failure_t *f) 1818 { 1819 extern time_t time; 1820 sfrc_t rc = SFRC_FAIL; 1821 struct vfs *vfsp = f->uf_vfsp; 1822 struct ufsvfs *ufsvfsp = f->uf_ufsvfsp; 1823 int toolong = 0; 1824 int err = 0; 1825 1826 TRIVIA(("[sf_found_umount")); 1827 1828 toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm; 1829 if (toolong) { 1830 TRIVIA((": unmount time limit exceeded] ")); 1831 goto out; 1832 } 1833 1834 if (!vfsp || !ufsvfsp) { /* trivial case */ 1835 TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] ")); 1836 goto out; 1837 } 1838 1839 if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) { 1840 TRIVIA((": !not error locked?")); 1841 err = EINVAL; 1842 goto out; 1843 } 1844 1845 /* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */ 1846 if (vn_vfswlock(vfsp->vfs_vnodecovered)) { 1847 TRIVIA((": couldn't lock coveredvp")); 1848 err = EBUSY; 1849 goto out; 1850 } 1851 1852 if ((err = dounmount(vfsp, 0, kcred)) != 0) { 1853 1854 /* take note, but not many alternatives here */ 1855 mutex_enter(&uf_stats.ufst_mutex); 1856 uf_stats.ufst_unmount_failures++; 1857 mutex_exit(&uf_stats.ufst_mutex); 1858 1859 TRIVIA((": unmount failed] ")); 1860 } else { 1861 cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f)); 1862 } 1863 1864 out: 1865 if (toolong || (err != EBUSY && err != EAGAIN)) 1866 rc = set_state(f, UF_NOTFIX); 1867 1868 TRIVIA(("] ")); 1869 return (rc); 1870 } 1871 1872 static sfrc_t 1873 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1874 { 1875 extern time_t time; 1876 sfrc_t rc = SFRC_FAIL; 1877 1878 TRIVIA(("[sf_term_cmn, action is %s, state is %s", 1879 act_name(a), state_name(s))); 1880 ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA)); 1881 ASSERT(terminal_state(s)); 1882 1883 if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) { 1884 TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n")); 1885 return (rc); 1886 } 1887 1888 switch (a) { 1889 case UFA_SET: 1890 switch (s) { 1891 case UF_NOTFIX: 1892 case UF_FIXED: 1893 { 1894 int need_lock_vfs; 1895 1896 if (f->uf_ufsvfsp && f->uf_vfs_lockp) 1897 need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp); 1898 else 1899 need_lock_vfs = 0; 1900 1901 if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) { 1902 TRIVIA((": tryenter(vfslockp) fail; retry]\n")); 1903 f->uf_retry = 1; 1904 break; 1905 } 1906 1907 f->uf_end_tm = time; 1908 f->uf_lf.lf_lock = LOCKFS_OLOCK; 1909 f->uf_retry = 0; 1910 1911 if (f->uf_vfs_ufsfxp) 1912 f->uf_vfs_ufsfxp->fx_current = NULL; 1913 1914 if (need_lock_vfs) 1915 mutex_exit(f->uf_vfs_lockp); 1916 1917 cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s": 1918 "%s is now accessible", fs_name(f)); 1919 1920 if (s & UF_FIXED) { 1921 mutex_enter(&uf_stats.ufst_mutex); 1922 uf_stats.ufst_num_fixed++; 1923 mutex_exit(&uf_stats.ufst_mutex); 1924 } 1925 (void) timeout(ufsfx_kill_fix_failure_thread, 1926 (void *)(ufsfx_tune.uft_short_err_period * hz), 1927 ufsfx_tune.uft_short_err_period * hz); 1928 rc = SFRC_SUCCESS; 1929 break; 1930 } 1931 case UF_REPLICA: 1932 1933 ASSERT(MUTEX_HELD(f->uf_vfs_lockp)); 1934 1935 /* not actually a replica? */ 1936 if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current && 1937 f->uf_vfs_ufsfxp->fx_current != f && 1938 !terminal_state( 1939 f->uf_vfs_ufsfxp->fx_current->uf_s)) { 1940 1941 f->uf_orig = f->uf_vfs_ufsfxp->fx_current; 1942 f->uf_retry = 0; 1943 rc = SFRC_SUCCESS; 1944 } else { 1945 TRIVIA((": NULL fx_current]\n")); 1946 f->uf_retry = 1; 1947 } 1948 1949 break; 1950 1951 default: 1952 rc = set_state(f, UF_PANIC); 1953 TRIVIA((": failed] ")); 1954 break; 1955 } 1956 break; 1957 1958 case UFA_FOUND: 1959 /* 1960 * XXX de-allocate these after some period? 1961 * XXX or move to an historical list? 1962 * XXX or have an ioctl which reaps them? 1963 */ 1964 /* 1965 * For now, since we don't expect lots of failures 1966 * to occur (to the point of memory shortages), 1967 * just punt 1968 */ 1969 1970 /* be sure we're not wasting cpu on old failures */ 1971 if (f->uf_retry != 0) { 1972 mutex_enter(&uf_stats.ufst_mutex); 1973 uf_stats.ufst_cpu_waste++; 1974 mutex_exit(&uf_stats.ufst_mutex); 1975 f->uf_retry = 0; 1976 } 1977 rc = SFRC_SUCCESS; 1978 break; 1979 1980 default: 1981 (void) set_state(f, UF_PANIC); 1982 TRIVIA((": failed] ")); 1983 break; 1984 } 1985 1986 TRIVIA(("] ")); 1987 return (rc); 1988 } 1989 1990 static sfrc_t 1991 sf_panic( 1992 ufs_failure_t *f, 1993 ufsa_t a, 1994 ufs_failure_states_t s) 1995 { 1996 sfrc_t rc = SFRC_FAIL; 1997 1998 TRIVIA(("[sf_panic, action is %s, prev. state is %s", 1999 act_name(a), state_name(f->uf_s))); 2000 ASSERT(s & UF_PANIC); 2001 2002 switch (a) { 2003 case UFA_SET: 2004 f->uf_retry = -ufsfx_tune.uft_short_err_period; 2005 rc = SFRC_SUCCESS; 2006 break; 2007 2008 case UFA_FOUND: 2009 default: 2010 real_panic(f, " "); 2011 2012 /* LINTED: warning: logical expression always true: op "||" */ 2013 ASSERT(DEBUG); 2014 2015 (void) set_state(f, UF_UMOUNT); /* XXX UF_NOTFIX? */ 2016 2017 break; 2018 } 2019 2020 TRIVIA(("] ")); 2021 return (rc); 2022 } 2023 2024 /* 2025 * minimum state function 2026 */ 2027 static sfrc_t 2028 sf_minimum( 2029 ufs_failure_t *f, 2030 ufsa_t a, /* LINTED argument unused in function: ignored */ 2031 ufs_failure_states_t ignored) 2032 { 2033 sfrc_t rc = SFRC_FAIL; 2034 2035 TRIVIA(("[sf_minimum, action is %s", act_name(a))); 2036 2037 switch (a) { 2038 case UFA_SET: 2039 f->uf_retry = 0; 2040 /* FALLTHROUGH */ 2041 2042 case UFA_FOUND: 2043 rc = SFRC_SUCCESS; 2044 break; 2045 2046 default: 2047 (void) set_state(f, UF_PANIC); 2048 TRIVIA((": failed] ")); 2049 break; 2050 } 2051 2052 TRIVIA(("] ")); 2053 return (rc); 2054 } 2055 2056 static int 2057 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to) 2058 { 2059 ufsd_t *s; 2060 int valid; 2061 2062 HIDEOUS(("[state_trans_valid")); 2063 2064 if (from & to) 2065 return (1); 2066 2067 s = get_state_desc(to); 2068 2069 /* 2070 * extra test is necessary since we want UF_UNDEF = 0, 2071 * (to detect freshly allocated memory) 2072 * but can't check for that value with a bit test 2073 */ 2074 valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev; 2075 2076 HIDEOUS((": %svalid] ", valid? "": "in")); 2077 return (valid); 2078 } 2079 2080 static int 2081 terminal_state(ufs_failure_states_t state) 2082 { 2083 ufsd_t *s; 2084 2085 HIDEOUS(("[terminal_state")); 2086 2087 s = get_state_desc(state); 2088 2089 HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not ")); 2090 return ((int)s->ud_attr.terminal); 2091 } 2092 2093 static void 2094 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp) 2095 { 2096 MINUTE(("[alloc_lockfs_comment")); 2097 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2098 2099 /* 2100 * ufs_fiolfs expects a kmem_alloc'ed comment; 2101 * it frees the comment if the lock fails 2102 * or else when the lock is unlocked. 2103 */ 2104 2105 f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP); 2106 if (f->uf_lf.lf_comment) { 2107 char *from; 2108 size_t len; 2109 2110 /* 2111 * use panic string if there's no previous comment 2112 * or if we're setting the error lock 2113 */ 2114 if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment || 2115 lfp->lf_comlen <= 0)) { 2116 from = f->uf_panic_str; 2117 len = LOCKFS_MAXCOMMENTLEN; 2118 } else { 2119 from = lfp->lf_comment; 2120 len = lfp->lf_comlen; 2121 } 2122 2123 bcopy(from, f->uf_lf.lf_comment, len); 2124 f->uf_lf.lf_comlen = len; 2125 2126 } else { 2127 f->uf_lf.lf_comlen = 0; 2128 } 2129 MINUTE(("] ")); 2130 } 2131 2132 static int 2133 set_lockfs(ufs_failure_t *f, struct lockfs *lfp) 2134 { 2135 int (*handle_lockfs_rc)(ufs_failure_t *); 2136 int rc; 2137 2138 MINUTE(("[set_lockfs")); 2139 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2140 ASSERT(!vfs_lock_held(f->uf_vfsp)); 2141 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 2142 2143 if (!f->uf_ufsvfsp) { 2144 MINUTE((": ufsvfsp is NULL]\n")); 2145 return (0); 2146 } 2147 2148 ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock)); 2149 2150 if (!f->uf_ufsvfsp->vfs_root) { 2151 MINUTE((": vfs_root is NULL]\n")); 2152 return (0); 2153 } 2154 2155 alloc_lockfs_comment(f, lfp); 2156 f->uf_lf_err = 0; 2157 2158 if (!LOCKFS_IS_ELOCK(lfp)) { 2159 lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK; 2160 VN_HOLD(f->uf_ufsvfsp->vfs_root); 2161 f->uf_lf_err = 2162 ufs__fiolfs(f->uf_ufsvfsp->vfs_root, 2163 &f->uf_lf, /* from_user */ 0, /* from_log */ 0); 2164 VN_RELE(f->uf_ufsvfsp->vfs_root); 2165 } 2166 2167 handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success; 2168 rc = handle_lockfs_rc(f); 2169 2170 MINUTE(("] ")); 2171 return (rc); 2172 } 2173 2174 static int 2175 lockfs_failure(ufs_failure_t *f) 2176 { 2177 int error; 2178 ufs_failure_states_t s; 2179 2180 TRIVIA(("[lockfs_failure")); 2181 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2182 2183 if (!f->uf_ufsvfsp) { 2184 TRIVIA((": ufsvfsp is NULL]\n")); 2185 return (0); 2186 } 2187 2188 error = f->uf_lf_err; 2189 switch (error) { 2190 /* non-transient errors: */ 2191 case EACCES: /* disk/in-core metadata reconciliation failed */ 2192 case EPERM: /* inode reconciliation failed; incore inode changed? */ 2193 case EIO: /* device is hard-locked or not responding */ 2194 case EROFS: /* device is write-locked */ 2195 case EDEADLK: /* can't lockfs; deadlock would result; */ 2196 /* Swapping or saving accounting records */ 2197 /* onto this fs can cause this errno. */ 2198 2199 MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)", 2200 fs_name(f), lock_name(&f->uf_lf), 2201 err_name(error), error)); 2202 2203 /* 2204 * if can't get lock, then fallback to panic, unless 2205 * unless unmount was requested (although unmount will 2206 * probably fail if the lock failed, so we'll panic 2207 * anyway 2208 */ 2209 2210 s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK) ? 2211 UF_UMOUNT: UF_PANIC; 2212 2213 if (!set_state(f, s)) { 2214 real_panic(f, " "); 2215 /*NOTREACHED*/ 2216 break; 2217 } 2218 break; 2219 2220 2221 case EBUSY: 2222 case EAGAIN: 2223 2224 f->uf_retry = ufsfx_tune.uft_short_err_period; 2225 if (curthread->t_flag & T_DONTPEND) { 2226 curthread->t_flag &= ~T_DONTPEND; 2227 2228 } else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) { 2229 ufs_failure_states_t state; 2230 /* 2231 * if we didn't know that the fix had started, 2232 * take note 2233 */ 2234 state = error == EBUSY? UF_LOCKED: UF_FIXING; 2235 if (!set_state(f, state)) { 2236 TRIVIA((": failed] ")); 2237 return (0); 2238 } 2239 } 2240 break; 2241 2242 default: /* some other non-fatal error */ 2243 MINOR(("lockfs(\"%s\") of %s returned %s (%d)", 2244 lock_name(&f->uf_lf), fs_name(f), 2245 err_name(f->uf_lf_err), f->uf_lf_err)); 2246 2247 f->uf_retry = ufsfx_tune.uft_short_err_period; 2248 break; 2249 2250 case EINVAL: /* unmounted? */ 2251 (void) set_state(f, UF_NOTFIX); 2252 break; 2253 } 2254 TRIVIA(("] ")); 2255 return (1); 2256 } 2257 2258 static int 2259 lockfs_success(ufs_failure_t *f) 2260 { 2261 TRIVIA(("[lockfs_success")); 2262 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2263 2264 if (!f->uf_ufsvfsp) { 2265 TRIVIA((": ufsvfsp is NULL]\n")); 2266 return (0); 2267 } 2268 2269 switch (f->uf_lf.lf_lock) { 2270 case LOCKFS_ELOCK: /* error lock worked */ 2271 2272 if (!set_state(f, UF_LOCKED)) { 2273 TRIVIA((": failed] ")); 2274 return (0); 2275 } 2276 break; 2277 2278 case LOCKFS_ULOCK: /* unlock worked */ 2279 /* 2280 * how'd we get here? 2281 * This should be done from fsck's unlock, 2282 * not from this thread's context. 2283 */ 2284 cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f)); 2285 ufsfx_unlockfs(f->uf_ufsvfsp); 2286 break; 2287 2288 default: 2289 if (!set_state(f, UF_NOTFIX)) { 2290 TRIVIA((": failed] ")); 2291 return (0); 2292 } 2293 break; 2294 } 2295 TRIVIA(("] ")); 2296 return (1); 2297 } 2298 2299 /* 2300 * when fsck is running it puts its pid into the lockfs 2301 * comment structure, prefaced by PIDSTR 2302 */ 2303 const char *PIDSTR = "[pid:"; 2304 static int 2305 fsck_active(ufs_failure_t *f) 2306 { 2307 char *cp; 2308 int i, found, errlocked; 2309 size_t comlen; 2310 const int PIDSTRLEN = (int)strlen(PIDSTR); 2311 struct ulockfs *ulp = &f->uf_ufsvfsp->vfs_ulockfs; 2312 2313 TRIVIA(("[fsck_active")); 2314 2315 ASSERT(f); 2316 ASSERT(f->uf_s & UF_FIXING); 2317 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2318 ASSERT(f->uf_ufsvfsp); 2319 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 2320 ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock)); 2321 2322 mutex_enter(&ulp->ul_lock); 2323 cp = ulp->ul_lockfs.lf_comment; 2324 comlen = ulp->ul_lockfs.lf_comlen; 2325 errlocked = (int)ULOCKFS_IS_ELOCK(ulp); 2326 mutex_exit(&ulp->ul_lock); 2327 2328 if (!cp || comlen == 0) { 2329 TRIVIA((": null comment or comlen <= 0, found:0]")); 2330 return (0); 2331 } 2332 2333 for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++) 2334 found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0; 2335 2336 TRIVIA(("found:%d, is_elock:%d]", found, errlocked)); 2337 return (errlocked & found); 2338 } 2339 2340 static const char unknown_fs[] = "<unknown fs>"; 2341 static const char null_failure[] = "<NULL ufs failure record; unknown fs>"; 2342 static const char mutated_vfs_bufp[] = "<mutated vfs_bufp, unknown fs>"; 2343 static const char mutated_vfs_fs[] = "<mutated vfs_fs, unknown fs>"; 2344 2345 static char * 2346 fs_name(ufs_failure_t *f) 2347 { 2348 HIDEOUS(("[fs_name")); 2349 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2350 2351 if (!f) { 2352 HIDEOUS((": failure ptr is NULL]\n")); 2353 return ((char *)null_failure); 2354 } 2355 2356 if (f->uf_fsname[0] != '\0') { 2357 HIDEOUS((": return (uf_fsname)]\n")); 2358 return (f->uf_fsname); 2359 } 2360 2361 if (MUTEX_HELD(f->uf_vfs_lockp)) { 2362 if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) { 2363 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n", 2364 (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp)); 2365 return ((char *)mutated_vfs_bufp); 2366 } 2367 if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) { 2368 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n", 2369 (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs)); 2370 return ((char *)mutated_vfs_fs); 2371 } 2372 if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs && 2373 *f->uf_fs->fs_fsmnt != '\0') { 2374 HIDEOUS((": return (fs_fsmnt)]\n")); 2375 return (f->uf_fs->fs_fsmnt); 2376 } 2377 } 2378 2379 HIDEOUS((": unknown file system]\n")); 2380 return ((char *)unknown_fs); 2381 } 2382 2383 #if defined(DEBUG) 2384 static char * 2385 lock_name(struct lockfs *lfp) 2386 { 2387 struct lock_description *l; 2388 char *lname; 2389 2390 HIDEOUS(("[lock_name")); 2391 2392 lname = lock_desc[0].ld_name; 2393 for (l = &lock_desc[1]; l->ld_name != NULL; l++) { 2394 if (lfp && lfp->lf_lock == l->ld_type) { 2395 lname = l->ld_name; 2396 break; 2397 } 2398 } 2399 HIDEOUS(("]")); 2400 return (lname); 2401 } 2402 2403 static char * 2404 state_name(ufs_failure_states_t state) 2405 { 2406 ufsd_t *s; 2407 2408 HIDEOUS(("[state_name")); 2409 2410 s = get_state_desc(state); 2411 2412 HIDEOUS(("]")); 2413 return (s->ud_name); 2414 } 2415 2416 static char * 2417 err_name(int error) 2418 { 2419 struct error_description *e; 2420 2421 HIDEOUS(("[err_name")); 2422 2423 for (e = &err_desc[1]; e->ed_name != NULL; e++) { 2424 if (error == e->ed_errno) { 2425 HIDEOUS(("]")); 2426 return (e->ed_name); 2427 } 2428 } 2429 HIDEOUS(("]")); 2430 return (err_desc[0].ed_name); 2431 } 2432 2433 static char * 2434 act_name(ufsa_t action) 2435 { 2436 struct action_description *a; 2437 2438 HIDEOUS(("[act_name")); 2439 2440 for (a = &act_desc[1]; a->ad_name != NULL; a++) { 2441 if (action == a->ad_v) { 2442 HIDEOUS(("]")); 2443 return (a->ad_name); 2444 } 2445 } 2446 HIDEOUS(("]")); 2447 return (act_desc[0].ad_name); 2448 } 2449 2450 /* 2451 * dump failure list 2452 */ 2453 static void 2454 dump_uf_list(char *msg) 2455 { 2456 ufs_failure_t *f; 2457 int i; 2458 int list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex); 2459 2460 if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) { 2461 printf("dump_uf_list: couldn't get list lock\n"); 2462 return; 2463 } 2464 2465 if (msg) { 2466 printf("\n%s", msg); 2467 } 2468 printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n", 2469 ufs_fix.uq_lowat, ufs_fix.uq_ne); 2470 2471 mutex_enter(&uf_stats.ufst_mutex); 2472 printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races); 2473 printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed); 2474 printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed); 2475 printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste); 2476 printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n", 2477 uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures); 2478 mutex_exit(&uf_stats.ufst_mutex); 2479 2480 for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) { 2481 2482 if (!mutex_tryenter(&f->uf_mutex)) { 2483 printf("%d.\t\"skipped - try enter failed\"\n", i); 2484 continue; 2485 } 2486 2487 dump_uf(f, i); 2488 2489 mutex_exit(&f->uf_mutex); 2490 } 2491 2492 printf("\n"); 2493 2494 if (!list_was_locked) 2495 mutex_exit(&ufs_fix.uq_mutex); 2496 } 2497 2498 static void 2499 dump_uf(ufs_failure_t *f, int i) 2500 { 2501 if (!f) { 2502 printf("dump_uf: NULL failure record\n"); 2503 return; 2504 } 2505 2506 printf("%d.\t\"%s\" is %s.\n", 2507 i, fs_name(f), state_name(f->uf_s)); 2508 printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f); 2509 printf("\tNext: 0x%p\t\tPrev: 0x%p\n", 2510 (void *)f->uf_next, (void *)f->uf_prev); 2511 2512 if (f->uf_orig) 2513 printf("\tOriginal failure: 0x%p \"%s\"\n", 2514 (void *)f->uf_orig, f->uf_orig->uf_panic_str); 2515 2516 printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n", 2517 (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp); 2518 printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp); 2519 printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp); 2520 2521 if (f->uf_bp) 2522 printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs); 2523 else 2524 printf("\n"); 2525 2526 printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n", 2527 f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm); 2528 2529 printf("\tFlags: (%d) %s%s%s%s", f->uf_flags, 2530 f->uf_flags & UFSFX_LCKONLY? "\"lock only\" " : "", 2531 f->uf_flags & UFSFX_LCKUMOUNT? "\"lock+unmount\" " : "", 2532 f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" " : "", 2533 f->uf_flags == 0? "<none>" : ""); 2534 2535 printf("\tRetry: %ld seconds\n", f->uf_retry); 2536 2537 printf("\tLockfs:\ttype: %s\terror: %s (%d)\n", 2538 lock_name(&f->uf_lf), err_name(f->uf_lf_err), f->uf_lf_err); 2539 2540 } 2541 #endif /* DEBUG */ 2542 2543 /* 2544 * returns # of ufs_failures in a non-terminal state on queue 2545 * used to coordinate with hlock thread (see ufs_thread.c) 2546 * and to determine when the error lock thread may exit 2547 */ 2548 2549 int 2550 ufsfx_get_failure_qlen(void) 2551 { 2552 ufs_failure_t *f; 2553 ufsd_t *s; 2554 int qlen = 0; 2555 2556 MINUTE(("[ufsfx_get_failure_qlen")); 2557 2558 if (!mutex_tryenter(&ufs_fix.uq_mutex)) 2559 return (-1); 2560 2561 /* 2562 * walk down failure list 2563 */ 2564 2565 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 2566 2567 if (!mutex_tryenter(&f->uf_mutex)) 2568 continue; 2569 2570 s = get_state_desc(f->uf_s); 2571 2572 if (s->ud_attr.terminal) { 2573 mutex_exit(&f->uf_mutex); 2574 continue; 2575 } 2576 2577 MINUTE((": found: %s, \"%s: %s\"\n", 2578 fs_name(f), state_name(f->uf_s), f->uf_panic_str)); 2579 2580 qlen++; 2581 mutex_exit(&f->uf_mutex); 2582 } 2583 2584 mutex_exit(&ufs_fix.uq_mutex); 2585 2586 MINUTE((": qlen=%d]\n", qlen)); 2587 2588 return (qlen); 2589 } 2590 2591 /* 2592 * timeout routine 2593 * called to shutdown fix failure thread and server daemon 2594 */ 2595 static void 2596 ufsfx_kill_fix_failure_thread(void *arg) 2597 { 2598 clock_t odelta = (clock_t)arg; 2599 int qlen; 2600 2601 MAJOR(("[ufsfx_kill_fix_failure_thread")); 2602 2603 qlen = ufsfx_get_failure_qlen(); 2604 2605 if (qlen < 0) { 2606 clock_t delta; 2607 2608 delta = odelta << 1; 2609 if (delta <= 0) 2610 delta = INT_MAX; 2611 2612 (void) timeout(ufsfx_kill_fix_failure_thread, 2613 (void *)delta, delta); 2614 MAJOR((": rescheduled")); 2615 2616 } else if (qlen == 0) { 2617 ufs_thread_exit(&ufs_fix); 2618 MAJOR((": killed")); 2619 } 2620 /* 2621 * else 2622 * let timeout expire 2623 */ 2624 MAJOR(("]\n")); 2625 } 2626