1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/errno.h> 32 #include <sys/mode.h> 33 #include <sys/sysmacros.h> 34 #include <sys/cmn_err.h> 35 #include <sys/varargs.h> 36 #include <sys/time.h> 37 #include <sys/buf.h> 38 #include <sys/kmem.h> 39 #include <sys/t_lock.h> 40 #include <sys/poll.h> 41 #include <sys/debug.h> 42 #include <sys/cred.h> 43 #include <sys/lockfs.h> 44 #include <sys/fs/ufs_fs.h> 45 #include <sys/fs/ufs_inode.h> 46 #include <sys/fs/ufs_panic.h> 47 #include <sys/fs/ufs_lockfs.h> 48 #include <sys/fs/ufs_trans.h> 49 #include <sys/fs/ufs_mount.h> 50 #include <sys/fs/ufs_prot.h> 51 #include <sys/fs/ufs_bio.h> 52 #include <sys/pathname.h> 53 #include <sys/utsname.h> 54 #include <sys/conf.h> 55 56 /* handy */ 57 #define abs(x) ((x) < 0? -(x): (x)) 58 59 #if defined(DEBUG) 60 61 #define DBGLVL_NONE 0x00000000 62 #define DBGLVL_MAJOR 0x00000100 63 #define DBGLVL_MINOR 0x00000200 64 #define DBGLVL_MINUTE 0x00000400 65 #define DBGLVL_TRIVIA 0x00000800 66 #define DBGLVL_HIDEOUS 0x00001000 67 68 #define DBGFLG_NONE 0x00000000 69 #define DBGFLG_NOPANIC 0x00000001 70 #define DBGFLG_LVLONLY 0x00000002 71 #define DBGFLG_FIXWOULDPANIC 0x00000004 72 73 #define DBGFLG_FLAGMASK 0x0000000F 74 #define DBGFLG_LEVELMASK ~DBGFLG_FLAGMASK 75 76 #define DEBUG_FLAGS (ufs_fix_failure_dbg & DBGFLG_FLAGMASK) 77 #define DEBUG_LEVEL (ufs_fix_failure_dbg & DBGFLG_LEVELMASK) 78 79 unsigned int ufs_fix_failure_dbg = DBGLVL_NONE | DBGFLG_NONE; 80 81 #define DCALL(dbg_level, call) \ 82 { \ 83 if (DEBUG_LEVEL != DBGLVL_NONE) { \ 84 if (DEBUG_FLAGS & DBGFLG_LVLONLY) { \ 85 if (DEBUG_LEVEL & dbg_level) { \ 86 call; \ 87 } \ 88 } else { \ 89 if (dbg_level <= DEBUG_LEVEL) { \ 90 call; \ 91 } \ 92 } \ 93 } \ 94 } 95 96 #define DPRINTF(dbg_level, msg) DCALL(dbg_level, printf msg) 97 98 #define MAJOR(msg) DPRINTF(DBGLVL_MAJOR, msg) 99 #define MINOR(msg) DPRINTF(DBGLVL_MINOR, msg) 100 #define MINUTE(msg) DPRINTF(DBGLVL_MINUTE, msg) 101 #define TRIVIA(msg) DPRINTF(DBGLVL_TRIVIA, msg) 102 #define HIDEOUS(msg) DPRINTF(DBGLVL_HIDEOUS, msg) 103 104 #else /* !DEBUG */ 105 106 #define DCALL(ignored_dbg_level, ignored_routine) 107 #define MAJOR(ignored) 108 #define MINOR(ignored) 109 #define MINUTE(ignored) 110 #define TRIVIA(ignored) 111 #define HIDEOUS(ignored) 112 113 #endif /* DEBUG */ 114 115 #define NULLSTR(str) (!(str) || *(str) == '\0'? "<null>" : (str)) 116 #define NULSTRING "" 117 118 /* somewhat arbitrary limits, in seconds */ 119 /* all probably ought to be different, but these are convenient for debugging */ 120 const time_t UF_TOO_LONG = 128; /* max. wait for fsck start */ 121 122 /* all of these are in units of seconds used for retry period while ... */ 123 const time_t UF_FIXSTART_PERIOD = 16; /* awaiting fsck start */ 124 const time_t UF_FIXPOLL_PERIOD = 256; /* awaiting fsck finish */ 125 const time_t UF_SHORT_ERROR_PERIOD = 4; /* after (lockfs) error */ 126 const time_t UF_LONG_ERROR_PERIOD = 512; /* after (lockfs) error */ 127 128 #define NO_ERROR 0 129 #define LOCKFS_OLOCK LOCKFS_MAXLOCK+1 130 131 const ulong_t GB = 1024 * 1024 * 1024; 132 const ulong_t SecondsPerGig = 1024; /* ~17 minutes (overestimate) */ 133 134 /* 135 * per filesystem flags 136 */ 137 const int UFSFX_PANIC = (UFSMNT_ONERROR_PANIC >> 4); 138 const int UFSFX_LCKONLY = (UFSMNT_ONERROR_LOCK >> 4); 139 const int UFSFX_LCKUMOUNT = (UFSMNT_ONERROR_UMOUNT >> 4); 140 const int UFSFX_DEFAULT = (UFSMNT_ONERROR_DEFAULT >> 4); 141 const int UFSFX_REPAIR_START = 0x10000000; 142 143 /* return protocols */ 144 145 typedef enum triage_return_code { 146 TRIAGE_DEAD = -1, 147 TRIAGE_NO_SPIRIT, 148 TRIAGE_ATTEND_TO 149 } triage_t; 150 151 typedef enum statefunc_return_code { 152 SFRC_SUCCESS = 1, 153 SFRC_FAIL = 0 154 } sfrc_t; 155 156 /* external references */ 157 /* in ufs_thread.c */ 158 extern int ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop); 159 extern int ufs_checkaccton(vnode_t *); /* in ufs_lockfs.c */ 160 extern int ufs_checkswapon(vnode_t *); /* in ufs_lockfs.c */ 161 162 extern struct pollhead ufs_pollhd; /* in ufs_vnops.c */ 163 164 /* globals */ 165 struct ufs_q ufs_fix; 166 167 /* 168 * patchable constants: 169 * These are set in ufsfx_init() [called at modload] 170 */ 171 struct ufs_failure_tunable { 172 long uft_too_long; /* limit repair startup time */ 173 long uft_fixstart_period; /* pre-repair start period */ 174 long uft_fixpoll_period; /* post-fsck start period */ 175 long uft_short_err_period; /* post-error short period */ 176 long uft_long_err_period; /* post-error long period */ 177 } ufsfx_tune; 178 179 /* internal statistics of events */ 180 struct uf_statistics { 181 ulong_t ufst_lock_violations; 182 ulong_t ufst_current_races; 183 ulong_t ufst_unmount_failures; 184 ulong_t ufst_num_fixed; 185 ulong_t ufst_num_failed; 186 ulong_t ufst_cpu_waste; 187 time_t ufst_last_start_tm; 188 kmutex_t ufst_mutex; 189 } uf_stats; 190 191 typedef enum state_action { 192 UFA_ERROR = -1, /* internal error */ 193 UFA_FOUND, /* found uf in state */ 194 UFA_SET /* change uf to state */ 195 } ufsa_t; 196 197 /* state definition */ 198 typedef struct uf_state_desc { 199 int ud_v; /* value */ 200 char *ud_name; /* name */ 201 sfrc_t (*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 202 /* per-state actions */ 203 ufs_failure_states_t ud_prev; /* valid prev. states */ 204 205 struct uf_state_desc_attr { 206 unsigned terminal:1; /* no action req. if found */ 207 unsigned at_fail:1; /* state set by thread */ 208 /* encountering the error */ 209 unsigned unused; 210 } ud_attr; 211 } ufsd_t; 212 213 /* 214 * forward references 215 */ 216 217 /* thread to watch for failures */ 218 static void ufsfx_thread_fix_failures(void *); 219 static int ufsfx_do_failure_q(void); 220 static void ufsfx_kill_fix_failure_thread(void *); 221 222 /* routines called when failure occurs */ 223 static int ufs_fault_v(vnode_t *, char *, va_list) 224 __KVPRINTFLIKE(2); 225 static ufs_failure_t *init_failure(vnode_t *, char *, va_list) 226 __KVPRINTFLIKE(2); 227 static void queue_failure(ufs_failure_t *); 228 /*PRINTFLIKE2*/ 229 static void real_panic(ufs_failure_t *, const char *, ...) 230 __KPRINTFLIKE(2); 231 static void real_panic_v(ufs_failure_t *, const char *, va_list) 232 __KVPRINTFLIKE(2); 233 static triage_t triage(vnode_t *); 234 235 /* routines called when failure record is acted upon */ 236 static sfrc_t set_state(ufs_failure_t *, ufs_failure_states_t); 237 static int state_trans_valid(ufs_failure_states_t, ufs_failure_states_t); 238 static int terminal_state(ufs_failure_states_t); 239 240 /* routines called when states entered/found */ 241 static sfrc_t sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 242 static sfrc_t sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 243 static sfrc_t sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 244 static sfrc_t sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 245 static sfrc_t sf_found_queue(ufs_failure_t *); 246 static sfrc_t sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 247 static sfrc_t sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 248 static sfrc_t sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t); 249 static sfrc_t sf_set_trylck(ufs_failure_t *); 250 static sfrc_t sf_set_locked(ufs_failure_t *); 251 static sfrc_t sf_found_trylck(ufs_failure_t *); 252 static sfrc_t sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t); 253 static sfrc_t sf_found_umount(ufs_failure_t *); 254 255 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */ 256 static time_t trylock_time_exceeded(ufs_failure_t *); 257 static void pester_msg(ufs_failure_t *, int); 258 static int get_lockfs_status(ufs_failure_t *, struct lockfs *); 259 static void alloc_lockfs_comment(ufs_failure_t *, struct lockfs *); 260 static int set_lockfs(ufs_failure_t *, struct lockfs *); 261 static int lockfs_failure(ufs_failure_t *); 262 static int lockfs_success(ufs_failure_t *); 263 static int fsck_active(ufs_failure_t *); 264 265 /* low-level support routines */ 266 static ufsd_t *get_state_desc(ufs_failure_states_t); 267 static char *fs_name(ufs_failure_t *); 268 269 #if defined(DEBUG) 270 static char *state_name(ufs_failure_states_t); 271 static char *lock_name(struct lockfs *); 272 static char *err_name(int); 273 static char *act_name(ufsa_t); 274 static void dump_uf_list(char *msg); 275 static void dump_uf(ufs_failure_t *, int i); 276 #endif /* DEBUG */ 277 /* 278 * 279 * State Transitions: 280 * 281 * normally: 282 * if flagged to be locked but not unmounted: (UFSMNT_ONERROR_LOCK) 283 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED 284 * 285 * The only difference between these two is that the fsck must be started 286 * manually. 287 * 288 * if flagged to be unmounted: (UFSMNT_ONERROR_UMOUNT) 289 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX 290 * 291 * if flagged to panic: (UFSMNT_ONERROR_PANIC) 292 * UNDEF -> INIT -> PANIC 293 * 294 * if a secondary panic on a file system which has an active failure 295 * record: 296 * UNDEF -> INIT -> QUEUE -> REPLICA 297 * 298 * UNDEF, INIT, QUEUE all are set in the context of the failing thread. 299 * All other states (except possibly PANIC) are set in by the monitor 300 * (lock) thread. 301 * 302 */ 303 304 ufsd_t state_desc[] = 305 { 306 { UF_ILLEGAL, "in an unknown state", sf_minimum, UF_ILLEGAL, 307 { 0, 1, 0 } }, 308 { UF_UNDEF, "undefined", sf_undef, UF_UNDEF, 309 { 0, 1, 0 } }, 310 { UF_INIT, "being initialized", sf_init, UF_UNDEF, 311 { 0, 1, 0 } }, 312 { UF_QUEUE, "queued", sf_queue, UF_INIT, 313 { 0, 1, 0 } }, 314 { UF_TRYLCK, "trying to be locked", sf_nonterm_cmn, 315 UF_QUEUE, { 0, 0, 0 } }, 316 { UF_LOCKED, "locked", sf_nonterm_cmn, 317 UF_TRYLCK | UF_FIXING, { 0, 0, 0 } }, 318 { UF_UMOUNT, "being unmounted", sf_nonterm_cmn, 319 320 #if defined(DEBUG) 321 UF_PANIC | 322 #endif /* DEBUG */ 323 UF_TRYLCK | UF_LOCKED, { 0, 0, 0 } }, 324 { UF_FIXING, "being fixed", sf_nonterm_cmn, 325 UF_LOCKED, { 0, 0, 0 } }, 326 { UF_FIXED, "fixed", sf_term_cmn, 327 UF_FIXING, { 1, 0, 0 } }, 328 { UF_NOTFIX, "not fixed", sf_term_cmn, 329 330 #if defined(DEBUG) 331 UF_PANIC | 332 #endif /* DEBUG */ 333 334 UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING, 335 { 1, 0, 0 } }, 336 { UF_REPLICA, "a replica", sf_term_cmn, 337 UF_QUEUE, { 1, 0, 0 } }, 338 { UF_PANIC, "panicking", sf_panic, 339 /* XXX make this narrower */ UF_ALLSTATES, { 0, 0, 0 } }, 340 { UF_UNDEF, NULL, ((sfrc_t (*)()) NULL), 341 UF_UNDEF, { 0, 0, 0 } } 342 }; 343 344 /* unified collection */ 345 struct ufsfx_info { 346 struct uf_statistics *ufi_statp; 347 struct ufs_failure_tunable *ufi_tunep; 348 ufsd_t *ufi_statetab; 349 } uffsinfo; 350 351 #if defined(DEBUG) 352 struct action_description { 353 ufsa_t ad_v; 354 char *ad_name; 355 }; 356 357 #define EUNK (-1) 358 359 struct error_description { 360 int ed_errno; 361 char *ed_name; 362 } err_desc[] = 363 { 364 { EUNK, "<unexpected errno?>" }, 365 { EINVAL, "EINVAL" }, 366 { EACCES, "EACCES" }, 367 { EPERM, "EPERM" }, 368 { EIO, "EIO" }, 369 { EDEADLK, "EDEADLK" }, 370 { EBUSY, "EBUSY" }, 371 { EAGAIN, "EAGAIN" }, 372 { ERESTART, "ERESTART" }, 373 { ETIMEDOUT, "ETIMEDOUT" }, 374 { NO_ERROR, "Ok" }, 375 { EUNK, NULL } 376 }; 377 378 struct action_description act_desc[] = 379 { 380 { UFA_ERROR, "<unexpected action?>" }, 381 { UFA_FOUND, "\"found\"" }, 382 { UFA_SET, "\"set\"" }, 383 { UFA_ERROR, NULL }, 384 }; 385 386 #define LOCKFS_BADLOCK (-1) 387 388 struct lock_description { 389 int ld_type; 390 char *ld_name; 391 } lock_desc[] = 392 { 393 { LOCKFS_BADLOCK, "<unexpected lock?>" }, 394 { LOCKFS_ULOCK, "Unlock" }, 395 { LOCKFS_ELOCK, "Error Lock" }, 396 { LOCKFS_HLOCK, "Hard Lock" }, 397 { LOCKFS_OLOCK, "Old Lock" }, 398 { LOCKFS_BADLOCK, NULL } 399 }; 400 401 #endif /* DEBUG */ 402 403 /* 404 * ufs_fault, ufs_fault_v 405 * 406 * called instead of cmn_err(CE_PANIC, ...) by ufs routines 407 * when a failure is detected to put the file system into an 408 * error state (if possible) or to devolve to a panic otherwise 409 * 410 * vnode is some vnode in this file system, used to find the way 411 * to ufsvfs, vfsp etc. Since a panic can be called from many 412 * levels, the vnode is the most convenient hook to pass through. 413 * 414 */ 415 416 /*PRINTFLIKE2*/ 417 int 418 ufs_fault(vnode_t *vp, char *fmt, ...) 419 { 420 va_list adx; 421 int error; 422 423 MINOR(("[ufs_fault")); 424 425 va_start(adx, fmt); 426 error = ufs_fault_v(vp, fmt, adx); 427 va_end(adx); 428 429 MINOR((": %s (%d)]\n", err_name(error), error)); 430 return (error); 431 } 432 433 const char *nullfmt = "<null format?>"; 434 435 static int 436 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx) 437 { 438 ufs_failure_t *new = NULL; 439 ufsvfs_t *ufsvfsp; 440 triage_t fix; 441 int err = ERESTART; 442 int need_vfslock; 443 444 MINOR(("[ufs_fault_v")); 445 446 if (fmt == NULL) 447 fmt = (char *)nullfmt; 448 449 fix = triage(vp); 450 451 if (vp) { 452 ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data; 453 454 /* 455 * Something bad has happened. That is why we are here. 456 * 457 * In order for the bad thing to be recorded in the superblock 458 * we need to write to the superblock directly. 459 * In the case that logging is enabled the logging code 460 * would normally intercept our write as a delta to the log, 461 * thus we mark the filesystem FSBAD in any case. 462 */ 463 need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock); 464 465 if (need_vfslock) { 466 mutex_enter(&ufsvfsp->vfs_lock); 467 } 468 469 ufsvfsp->vfs_fs->fs_clean = FSBAD; 470 ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem)); 471 ufsvfsp->vfs_bufp->b_flags &= 472 ~(B_ASYNC | B_READ | B_DONE | B_ERROR | B_DELWRI); 473 474 (void) bdev_strategy(ufsvfsp->vfs_bufp); 475 (void) biowait(ufsvfsp->vfs_bufp); 476 477 if (need_vfslock) { 478 mutex_exit(&ufsvfsp->vfs_lock); 479 } 480 } 481 482 switch (fix) { 483 484 default: 485 case TRIAGE_DEAD: 486 case TRIAGE_NO_SPIRIT: 487 488 real_panic_v(new, fmt, adx); 489 /* LINTED: warning: logical expression always true: op "||" */ 490 ASSERT(DEBUG); 491 err = EAGAIN; 492 493 #if defined(DEBUG) 494 if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) { 495 break; 496 } 497 /* FALLTHROUGH */ 498 499 #else 500 break; 501 502 #endif /* DEBUG */ 503 504 case TRIAGE_ATTEND_TO: 505 506 /* q thread not running yet? */ 507 if (mutex_tryenter(&ufs_fix.uq_mutex)) { 508 if (!ufs_fix.uq_threadp) { 509 mutex_exit(&ufs_fix.uq_mutex); 510 ufs_thread_start(&ufs_fix, 511 ufsfx_thread_fix_failures, NULL); 512 ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK; 513 mutex_enter(&ufs_fix.uq_mutex); 514 } else { 515 /* 516 * We got the lock but we are not the current 517 * threadp so we have to release the lock. 518 */ 519 mutex_exit(&ufs_fix.uq_mutex); 520 } 521 } else { 522 MINOR((": fix failure thread already running ")); 523 /* 524 * No need to log another failure as one is already 525 * being logged. 526 */ 527 break; 528 } 529 530 if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) { 531 mutex_exit(&ufs_fix.uq_mutex); 532 cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault"); 533 } else { 534 /* 535 * Must check if we actually still own the lock and 536 * if so then release the lock and move on with life. 537 */ 538 if (mutex_owner(&ufs_fix.uq_mutex) == curthread) 539 mutex_exit(&ufs_fix.uq_mutex); 540 } 541 542 new = init_failure(vp, fmt, adx); 543 if (new != NULL) { 544 queue_failure(new); 545 break; 546 } 547 real_panic_v(new, fmt, adx); 548 break; 549 550 } 551 MINOR(("] ")); 552 return (err); 553 } 554 555 /* 556 * triage() 557 * 558 * Attempt to fix iff: 559 * - the system is not already panicking 560 * - this file system isn't explicitly marked not to be fixed 561 * - we can connect to the user-level daemon 562 * These conditions are detectable later, but if we can determine 563 * them in the failing threads context the core dump may be more 564 * useful. 565 * 566 */ 567 568 static triage_t 569 triage(vnode_t *vp) 570 { 571 struct inode *ip; 572 int need_unlock_vfs; 573 int fs_flags; 574 575 MINUTE(("[triage")); 576 577 if (panicstr) { 578 MINUTE(( 579 ": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr)); 580 return (TRIAGE_DEAD); 581 } 582 583 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) { 584 MINUTE(( 585 ": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n")); 586 return (TRIAGE_DEAD); 587 } 588 589 /* use tryenter and continue no matter what since we're panicky */ 590 need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock); 591 if (need_unlock_vfs) 592 need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock); 593 594 fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags; 595 if (need_unlock_vfs) 596 mutex_exit(&ip->i_ufsvfs->vfs_lock); 597 598 if (fs_flags & UFSFX_PANIC) { 599 MINUTE(( 600 ": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n")); 601 return (TRIAGE_NO_SPIRIT); 602 } 603 604 if (ufs_checkaccton(vp) != 0) { 605 MINUTE(( 606 ": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n")); 607 return (TRIAGE_DEAD); 608 } 609 610 if (ufs_checkswapon(vp) != 0) { 611 MINUTE(( 612 ": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n")); 613 return (TRIAGE_DEAD); 614 } 615 616 MINUTE((": return TRIAGE_ATTEND_TO] ")); 617 return (TRIAGE_ATTEND_TO); 618 } 619 620 /* 621 * init failure 622 * 623 * This routine allocates a failure struct and initializes 624 * it's member elements. 625 * Space is allocated for copies of dynamic identifying fs structures 626 * passed in. Without a much more segmented kernel architecture 627 * this is as protected as we can make it (for now.) 628 */ 629 static ufs_failure_t * 630 init_failure(vnode_t *vp, char *fmt, va_list adx) 631 { 632 ufs_failure_t *new; 633 struct inode *ip; 634 int initialization_worked = 0; 635 int need_vfs_unlock; 636 637 MINOR(("[init_failure")); 638 639 new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP); 640 if (!new) { 641 MINOR((": kmem_zalloc failed]\n")); 642 return (NULL); 643 } 644 645 /* 646 * enough information to make a fix attempt possible? 647 */ 648 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp || 649 !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt) 650 goto errout; 651 652 if (vp->v_type != VREG && vp->v_type != VDIR && 653 vp->v_type != VBLK && vp->v_type != VCHR && 654 vp->v_type != VLNK && vp->v_type != VFIFO && 655 vp->v_type != VSOCK) 656 goto errout; 657 658 if (ip->i_ufsvfs->vfs_root->v_type != VREG && 659 ip->i_ufsvfs->vfs_root->v_type != VDIR && 660 ip->i_ufsvfs->vfs_root->v_type != VBLK && 661 ip->i_ufsvfs->vfs_root->v_type != VCHR && 662 ip->i_ufsvfs->vfs_root->v_type != VLNK && 663 ip->i_ufsvfs->vfs_root->v_type != VFIFO && 664 ip->i_ufsvfs->vfs_root->v_type != VSOCK) 665 goto errout; 666 667 if ((ITOF(ip)->fs_magic != FS_MAGIC) && 668 (ITOF(ip)->fs_magic != MTB_UFS_MAGIC)) 669 goto errout; 670 671 /* intialize values */ 672 673 (void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx); 674 675 new->uf_ufsvfsp = ip->i_ufsvfs; 676 new->uf_vfsp = ip->i_vfs; 677 678 mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL); 679 need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock); 680 681 if (need_vfs_unlock) { 682 if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) { 683 /* 684 * not much alternative here, but we're panicking 685 * already, it couldn't be worse - so just 686 * proceed optimistically and take note. 687 */ 688 mutex_enter(&uf_stats.ufst_mutex); 689 uf_stats.ufst_lock_violations++; 690 mutex_exit(&uf_stats.ufst_mutex); 691 MINOR((": couldn't get vfs lock")) 692 need_vfs_unlock = 0; 693 } 694 } 695 696 if (mutex_tryenter(&new->uf_mutex)) { 697 initialization_worked = set_state(new, UF_INIT); 698 mutex_exit(&new->uf_mutex); 699 } 700 701 if (need_vfs_unlock) 702 mutex_exit(&ip->i_ufsvfs->vfs_lock); 703 704 if (initialization_worked) { 705 MINOR(("] ")); 706 return (new); 707 } 708 /* FALLTHROUGH */ 709 710 errout: 711 if (new) 712 kmem_free(new, sizeof (ufs_failure_t)); 713 MINOR((": failed]\n")); 714 return (NULL); 715 } 716 717 static void 718 queue_failure(ufs_failure_t *new) 719 { 720 MINOR(("[queue_failure")); 721 722 mutex_enter(&ufs_fix.uq_mutex); 723 724 if (ufs_fix.uq_ufhead) 725 insque(new, &ufs_fix.uq_ufhead); 726 else 727 ufs_fix.uq_ufhead = new; 728 729 if (mutex_tryenter(&new->uf_mutex)) { 730 (void) set_state(new, UF_QUEUE); 731 mutex_exit(&new->uf_mutex); 732 } 733 734 mutex_enter(&uf_stats.ufst_mutex); /* force wakeup */ 735 ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed; 736 mutex_exit(&uf_stats.ufst_mutex); 737 738 cv_broadcast(&ufs_fix.uq_cv); 739 740 DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str ? 741 new->uf_panic_str : "queue_failure: NULL panic str?")); 742 mutex_exit(&ufs_fix.uq_mutex); 743 744 MINOR(("] ")); 745 } 746 747 /*PRINTFLIKE2*/ 748 static void 749 real_panic(ufs_failure_t *f, const char *fmt, ...) 750 { 751 va_list adx; 752 753 MINUTE(("[real_panic ")); 754 755 va_start(adx, fmt); 756 real_panic_v(f, fmt, adx); 757 va_end(adx); 758 759 MINUTE((": return?!]\n")); 760 } 761 762 static void 763 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx) 764 { 765 int seriousness = CE_PANIC; 766 int need_unlock; 767 768 MINUTE(("[real_panic_v ")); 769 770 if (f && f->uf_ufsvfsp) 771 TRANS_SETERROR(f->uf_ufsvfsp); 772 773 #if defined(DEBUG) 774 if (DEBUG_FLAGS & DBGFLG_NOPANIC) { 775 seriousness = CE_WARN; 776 cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n"); 777 } 778 #endif /* DEBUG */ 779 780 delay(hz >> 1); /* allow previous warnings to get out */ 781 782 if (!f && fmt) 783 vcmn_err(seriousness, fmt, adx); 784 else 785 cmn_err(seriousness, f && f->uf_panic_str? f->uf_panic_str: 786 "real_panic: <unknown panic?>"); 787 788 if (f) { 789 need_unlock = !MUTEX_HELD(&f->uf_mutex); 790 if (need_unlock) { 791 mutex_enter(&f->uf_mutex); 792 } 793 794 f->uf_retry = -1; 795 (void) set_state(f, UF_PANIC); 796 797 if (need_unlock) { 798 mutex_exit(&f->uf_mutex); 799 } 800 } 801 MINUTE((": return?!]\n")); 802 } 803 804 /* 805 * initializes ufs panic structs, locks, etc 806 */ 807 void 808 ufsfx_init(void) 809 { 810 811 MINUTE(("[ufsfx_init")); 812 813 /* patchable; unchanged while running, so no lock is needed */ 814 ufsfx_tune.uft_too_long = UF_TOO_LONG; 815 ufsfx_tune.uft_fixstart_period = UF_FIXSTART_PERIOD; 816 ufsfx_tune.uft_fixpoll_period = UF_FIXPOLL_PERIOD; 817 ufsfx_tune.uft_short_err_period = UF_SHORT_ERROR_PERIOD; 818 ufsfx_tune.uft_long_err_period = UF_LONG_ERROR_PERIOD; 819 820 uffsinfo.ufi_statp = &uf_stats; 821 uffsinfo.ufi_tunep = &ufsfx_tune; 822 uffsinfo.ufi_statetab = &state_desc[0]; 823 824 mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL); 825 ufs_thread_init(&ufs_fix, /* maxne */ 1); 826 827 MINUTE(("] ")); 828 } 829 830 /* 831 * initializes per-ufs values 832 * returns 0 (ok) or errno 833 */ 834 int 835 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags) 836 { 837 MINUTE(("[ufsfx_mount (%d)", flags)); 838 /* don't check/need vfs_lock because it's still being initialized */ 839 840 ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4; 841 842 MINUTE((": %s: fx_flags:%ld,", 843 ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags)); 844 /* 845 * onerror={panic ^ lock only ^ unmount} 846 */ 847 848 if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) { 849 MINUTE((" PANIC")); 850 851 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) { 852 MINUTE((" LCKONLY")); 853 854 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) { 855 MINUTE((" LCKUMOUNT")); 856 857 } else { 858 ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT; 859 ASSERT(ufsvfsp->vfs_fsfx.fx_flags & 860 (UFSMNT_ONERROR_FLGMASK >> 4)); 861 MINUTE((" DEFAULT")); 862 } 863 864 pollwakeup(&ufs_pollhd, POLLPRI); 865 MINUTE(("]\n")); 866 return (0); 867 } 868 869 /* 870 * ufsfx_unmount 871 * 872 * called during unmount 873 */ 874 void 875 ufsfx_unmount(struct ufsvfs *ufsvfsp) 876 { 877 ufs_failure_t *f; 878 int must_unlock_list; 879 880 MINUTE(("[ufsfx_unmount")); 881 882 if (!ufsvfsp) { 883 MINUTE((": no ufsvfsp]")); 884 return; 885 } 886 887 if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0) 888 mutex_enter(&ufs_fix.uq_mutex); 889 890 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 891 int must_unlock_failure; 892 893 must_unlock_failure = !MUTEX_HELD(&f->uf_mutex); 894 if (must_unlock_failure) { 895 mutex_enter(&f->uf_mutex); 896 } 897 898 if (f->uf_ufsvfsp == ufsvfsp) { 899 900 /* 901 * if we owned the failure record lock, then this 902 * is probably a fix failure-triggered unmount, so 903 * the warning is not appropriate or needed 904 */ 905 906 /* XXX if rebooting don't print this? */ 907 if (!terminal_state(f->uf_s) && must_unlock_failure) { 908 cmn_err(CE_WARN, 909 "Unmounting %s while error-locked", 910 fs_name(f)); 911 } 912 913 f->uf_ufsvfsp = NULL; 914 f->uf_vfs_ufsfxp = NULL; 915 f->uf_vfs_lockp = NULL; 916 f->uf_bp = NULL; 917 f->uf_vfsp = NULL; 918 f->uf_retry = -1; 919 } 920 921 if (must_unlock_failure) 922 mutex_exit(&f->uf_mutex); 923 } 924 if (must_unlock_list) 925 mutex_exit(&ufs_fix.uq_mutex); 926 927 pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP); 928 MINUTE(("] ")); 929 } 930 931 /* 932 * ufsfx_(un)lockfs 933 * 934 * provides hook from lockfs code so we can recognize unlock/relock 935 * This is called after it is certain that the (un)lock will succeed. 936 */ 937 void 938 ufsfx_unlockfs(struct ufsvfs *ufsvfsp) 939 { 940 ufs_failure_t *f; 941 int need_unlock; 942 int need_unlock_list; 943 int informed = 0; 944 945 MINUTE(("[ufsfx_unlockfs")); 946 947 if (!ufsvfsp) 948 return; 949 950 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex); 951 952 if (need_unlock_list) 953 mutex_enter(&ufs_fix.uq_mutex); 954 955 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 956 957 need_unlock = !MUTEX_HELD(&f->uf_mutex); 958 if (need_unlock) 959 mutex_enter(&f->uf_mutex); 960 961 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) { 962 if (!(f->uf_s & UF_FIXING)) { 963 /* 964 * This might happen if we don't notice that 965 * the fs gets marked FSFIX before it is 966 * marked FSCLEAN, as might occur if the 967 * the superblock was hammered directly. 968 */ 969 if (!informed) { 970 informed = 1; 971 cmn_err(CE_NOTE, 972 "Unlock of %s succeeded before " 973 "fs_clean marked FSFIX?", 974 fs_name(f)); 975 } 976 977 /* 978 * pass through fixing state so 979 * transition protocol is satisfied 980 */ 981 if (!set_state(f, UF_FIXING)) { 982 MINUTE((": failed] ")); 983 } 984 } 985 986 if (!set_state(f, UF_FIXED)) { 987 /* it's already fixed, so don't panic now */ 988 MINUTE((": failed] ")); 989 } 990 } 991 992 if (need_unlock) 993 mutex_exit(&f->uf_mutex); 994 } 995 if (need_unlock_list) 996 mutex_exit(&ufs_fix.uq_mutex); 997 MINUTE(("] ")); 998 } 999 1000 void 1001 ufsfx_lockfs(struct ufsvfs *ufsvfsp) 1002 { 1003 ufs_failure_t *f; 1004 int need_unlock; 1005 int need_unlock_list; 1006 1007 MINUTE(("[ufsfx_lockfs")); 1008 1009 if (!ufsvfsp) 1010 return; 1011 1012 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex); 1013 1014 if (need_unlock_list) 1015 mutex_enter(&ufs_fix.uq_mutex); 1016 1017 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 1018 1019 need_unlock = !MUTEX_HELD(&f->uf_mutex); 1020 if (need_unlock) 1021 mutex_enter(&f->uf_mutex); 1022 1023 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) && 1024 f->uf_s != UF_PANIC) { 1025 switch (f->uf_s) { 1026 1027 default: 1028 cmn_err(CE_WARN, 1029 "fs %s not in state " 1030 "UF_TRYLCK, UF_LOCKED or UF_FIXING", 1031 fs_name(f)); 1032 break; 1033 1034 case UF_TRYLCK: 1035 if (!set_state(f, UF_LOCKED)) { 1036 MINUTE((": failed] ")); 1037 } 1038 break; 1039 1040 case UF_LOCKED: 1041 if (!set_state(f, UF_FIXING)) { 1042 MINUTE((": failed] ")); 1043 } 1044 break; 1045 1046 case UF_FIXING: 1047 break; 1048 1049 } 1050 } 1051 1052 if (need_unlock) 1053 mutex_exit(&f->uf_mutex); 1054 } 1055 if (need_unlock_list) 1056 mutex_exit(&ufs_fix.uq_mutex); 1057 1058 MINUTE(("] ")); 1059 } 1060 1061 /* 1062 * error lock, trigger fsck and unlock those fs with failures 1063 * blatantly copied from the hlock routine, although this routine 1064 * triggers differently in order to use uq_ne as meaningful data. 1065 */ 1066 /* ARGSUSED */ 1067 void 1068 ufsfx_thread_fix_failures(void *ignored) 1069 { 1070 int retry; 1071 callb_cpr_t cprinfo; 1072 1073 CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr, 1074 "ufsfixfail"); 1075 1076 MINUTE(("[ufsfx_thread_fix_failures] ")); 1077 1078 for (;;) { 1079 /* sleep until there is work to do */ 1080 1081 mutex_enter(&ufs_fix.uq_mutex); 1082 (void) ufs_thread_run(&ufs_fix, &cprinfo); 1083 ufs_fix.uq_ne = 0; 1084 mutex_exit(&ufs_fix.uq_mutex); 1085 1086 /* process failures on our q */ 1087 do { 1088 retry = ufsfx_do_failure_q(); 1089 if (retry) { 1090 mutex_enter(&ufs_fix.uq_mutex); 1091 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1092 (void) cv_timedwait(&ufs_fix.uq_cv, 1093 &ufs_fix.uq_mutex, lbolt + (hz * retry)); 1094 CALLB_CPR_SAFE_END(&cprinfo, 1095 &ufs_fix.uq_mutex); 1096 mutex_exit(&ufs_fix.uq_mutex); 1097 } 1098 } while (retry); 1099 } 1100 /* NOTREACHED */ 1101 } 1102 1103 1104 /* 1105 * watch for fix-on-panic work 1106 * 1107 * returns # of seconds to sleep before trying again 1108 * and zero if no retry is needed 1109 */ 1110 1111 int 1112 ufsfx_do_failure_q(void) 1113 { 1114 ufs_failure_t *f; 1115 long retry = 1; 1116 ufsd_t *s; 1117 1118 MAJOR(("[ufsfx_do_failure_q")); 1119 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL)); 1120 1121 if (!mutex_tryenter(&ufs_fix.uq_mutex)) 1122 return (retry); 1123 1124 retry = 0; 1125 rescan_q: 1126 1127 /* 1128 * walk down failure list 1129 * depending on state of each failure, do whatever 1130 * is appropriate to move it to the next state 1131 * taking note of whether retry gets set 1132 * 1133 * retry protocol: 1134 * wakeup in shortest required time for any failure 1135 * retry == 0; nothing more to do (terminal state) 1136 * retry < 0; reprocess queue immediately, retry will 1137 * be abs(retry) for the next cycle 1138 * retry > 0; schedule wakeup for retry seconds 1139 */ 1140 1141 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 1142 1143 if (!mutex_tryenter(&f->uf_mutex)) { 1144 retry = 1; 1145 continue; 1146 } 1147 s = get_state_desc(f->uf_s); 1148 1149 MINOR((": found%s: %s, \"%s: %s\"\n", 1150 s->ud_attr.terminal ? " old" : "", 1151 fs_name(f), state_name(f->uf_s), f->uf_panic_str)); 1152 1153 if (s->ud_attr.terminal) { 1154 mutex_exit(&f->uf_mutex); 1155 continue; 1156 } 1157 1158 if (s->ud_sfp) 1159 (*s->ud_sfp)(f, UFA_FOUND, f->uf_s); 1160 1161 ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0); 1162 1163 if (f->uf_retry != 0) { 1164 if (retry > f->uf_retry || retry == 0) 1165 retry = f->uf_retry; 1166 if (f->uf_retry < 0) 1167 f->uf_retry = abs(f->uf_retry); 1168 } 1169 mutex_exit(&f->uf_mutex); 1170 } 1171 1172 1173 if (retry < 0) { 1174 retry = abs(retry); 1175 goto rescan_q; 1176 } 1177 1178 mutex_exit(&ufs_fix.uq_mutex); 1179 1180 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL)); 1181 MAJOR((": retry=%ld, good night]\n\n", retry)); 1182 1183 return (retry); 1184 } 1185 1186 static void 1187 pester_msg(ufs_failure_t *f, int seriousness) 1188 { 1189 MINUTE(("[pester_msg")); 1190 ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING)); 1191 1192 /* 1193 * XXX if seems too long for this fs, poke administrator 1194 * XXX to run fsck manually (and change retry time?) 1195 */ 1196 cmn_err(seriousness, "Waiting for repair of %s to %s", 1197 fs_name(f), f->uf_s & UF_LOCKED ? "start" : "finish"); 1198 MINUTE(("]")); 1199 } 1200 1201 static time_t 1202 trylock_time_exceeded(ufs_failure_t *f) 1203 { 1204 time_t toolong; 1205 extern time_t time; 1206 1207 MINUTE(("[trylock_time_exceeded")); 1208 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1209 1210 toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm; 1211 if (time > toolong) 1212 cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f)); 1213 1214 MINUTE(("] ")); 1215 return (time <= toolong? 0: time - toolong); 1216 } 1217 1218 static int 1219 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp) 1220 { 1221 MINUTE(("[get_lockfs_status")); 1222 1223 if (!f->uf_ufsvfsp) { 1224 MINUTE((": ufsvfsp is NULL]\n")); 1225 return (0); 1226 } 1227 1228 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1229 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1230 ASSERT(!vfs_lock_held(f->uf_vfsp)); 1231 ASSERT(f->uf_ufsvfsp->vfs_root != NULL); 1232 1233 f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp); 1234 1235 if (f->uf_lf_err) { 1236 f->uf_retry = ufsfx_tune.uft_short_err_period; 1237 } 1238 1239 MINUTE(("] ")); 1240 return (1); 1241 } 1242 1243 static sfrc_t 1244 set_state(ufs_failure_t *f, ufs_failure_states_t new_state) 1245 { 1246 ufsd_t *s; 1247 sfrc_t sfrc = SFRC_FAIL; 1248 int need_unlock; 1249 extern time_t time; 1250 1251 HIDEOUS(("[set_state: new state:%s", state_name(new_state))); 1252 ASSERT(f); 1253 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1254 1255 /* 1256 * if someone else is panicking, just let panic sync proceed 1257 */ 1258 if (panicstr) { 1259 (void) set_state(f, UF_NOTFIX); 1260 HIDEOUS((": state reset: not fixed] ")); 1261 return (sfrc); 1262 } 1263 1264 /* 1265 * bad state transition, an internal error 1266 */ 1267 if (!state_trans_valid(f->uf_s, new_state)) { 1268 /* recursion */ 1269 if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC)) 1270 (void) set_state(f, UF_PANIC); 1271 MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ", 1272 state_name(f->uf_s), state_name(new_state))); 1273 return (sfrc); 1274 } 1275 1276 s = get_state_desc(new_state); 1277 1278 need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex); 1279 if (need_unlock) 1280 mutex_enter(&ufs_fix.uq_mutex); 1281 1282 if (s->ud_attr.at_fail && ufs_fix.uq_threadp && 1283 curthread == ufs_fix.uq_threadp) { 1284 cmn_err(CE_WARN, "set_state: probable recursive panic of %s", 1285 fs_name(f)); 1286 } 1287 if (need_unlock) 1288 mutex_exit(&ufs_fix.uq_mutex); 1289 1290 /* NULL state functions always succeed */ 1291 sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state); 1292 1293 if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) { 1294 f->uf_s = new_state; 1295 f->uf_entered_tm = time; 1296 f->uf_counter = 0; 1297 } 1298 1299 HIDEOUS(("]\n")); 1300 return (sfrc); 1301 } 1302 1303 static ufsd_t * 1304 get_state_desc(ufs_failure_states_t state) 1305 { 1306 ufsd_t *s; 1307 1308 HIDEOUS(("[get_state_desc")); 1309 1310 for (s = &state_desc[1]; s->ud_name != NULL; s++) { 1311 if (s->ud_v == state) { 1312 HIDEOUS(("] ")); 1313 return (s); 1314 } 1315 } 1316 1317 HIDEOUS(("] ")); 1318 return (&state_desc[0]); /* default */ 1319 } 1320 1321 static sfrc_t 1322 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1323 { 1324 sfrc_t rc; 1325 1326 TRIVIA(("[sf_undef, action is %s, state is %s\n", 1327 act_name(a), state_name(s))); 1328 ASSERT(s == UF_UNDEF); 1329 1330 /* shouldn't find null failure records or ever set one */ 1331 rc = set_state(f, UF_NOTFIX); 1332 1333 TRIVIA(("] ")); 1334 return (rc); 1335 } 1336 1337 1338 static sfrc_t 1339 sf_init( 1340 ufs_failure_t *f, 1341 ufsa_t a, 1342 ufs_failure_states_t s) 1343 { 1344 sfrc_t rc = SFRC_FAIL; 1345 extern time_t time; 1346 1347 TRIVIA(("[sf_init, action is %s", act_name(a))); 1348 ASSERT(s & UF_INIT); 1349 1350 switch (a) { 1351 case UFA_SET: 1352 f->uf_begin_tm = time; 1353 f->uf_retry = 1; 1354 if (!f->uf_ufsvfsp) { 1355 (void) set_state(f, UF_PANIC); 1356 TRIVIA((": NULL ufsvfsp]\n")); 1357 return (rc); 1358 } 1359 /* 1360 * because we can call panic from many different levels, 1361 * we can't be sure that we've got the vfs_lock at this 1362 * point. However, there's not much alternative and if 1363 * we don't (have the lock) the worst case is we'll just 1364 * panic again 1365 */ 1366 f->uf_vfs_lockp = &f->uf_ufsvfsp->vfs_lock; 1367 f->uf_vfs_ufsfxp = &f->uf_ufsvfsp->vfs_fsfx; 1368 1369 if (!f->uf_ufsvfsp->vfs_bufp) { 1370 (void) set_state(f, UF_PANIC); 1371 TRIVIA((": NULL vfs_bufp]\n")); 1372 return (rc); 1373 } 1374 f->uf_bp = f->uf_ufsvfsp->vfs_bufp; 1375 1376 if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) { 1377 (void) set_state(f, UF_PANIC); 1378 TRIVIA((": NULL vfs_fs]\n")); 1379 return (rc); 1380 } 1381 1382 /* vfs_fs = vfs_bufp->b_un.b_fs */ 1383 bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN); 1384 1385 f->uf_lf.lf_lock = LOCKFS_ELOCK; /* primer */ 1386 1387 if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) { 1388 (void) set_state(f, UF_PANIC); 1389 TRIVIA((": NULL vfsp or vfs_dev == NODEV")); 1390 return (rc); 1391 } 1392 f->uf_dev = f->uf_vfsp->vfs_dev; 1393 1394 rc = SFRC_SUCCESS; 1395 break; 1396 1397 case UFA_FOUND: 1398 default: 1399 /* failures marked init shouldn't even be on the queue yet */ 1400 rc = set_state(f, UF_QUEUE); 1401 TRIVIA((": found failure with state init]\n")); 1402 } 1403 1404 TRIVIA(("] ")); 1405 return (rc); 1406 } 1407 1408 static sfrc_t 1409 sf_queue( 1410 ufs_failure_t *f, 1411 ufsa_t a, 1412 ufs_failure_states_t s) 1413 { 1414 sfrc_t rc = SFRC_FAIL; 1415 1416 TRIVIA(("[sf_queue, action is %s", act_name(a))); 1417 ASSERT(s & UF_QUEUE); 1418 1419 if (!f->uf_ufsvfsp) { 1420 TRIVIA((": NULL ufsvfsp]\n")); 1421 return (rc); 1422 } 1423 1424 switch (a) { 1425 case UFA_FOUND: 1426 rc = sf_found_queue(f); 1427 break; 1428 1429 case UFA_SET: 1430 1431 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex)); 1432 1433 mutex_enter(&uf_stats.ufst_mutex); 1434 uf_stats.ufst_num_failed++; 1435 mutex_exit(&uf_stats.ufst_mutex); 1436 1437 /* 1438 * if can't get the vfs lock, just wait until 1439 * UF_TRYLCK to set fx_current 1440 */ 1441 if (mutex_tryenter(f->uf_vfs_lockp)) { 1442 f->uf_vfs_ufsfxp->fx_current = f; 1443 mutex_exit(f->uf_vfs_lockp); 1444 } else { 1445 mutex_enter(&uf_stats.ufst_mutex); 1446 uf_stats.ufst_current_races++; 1447 mutex_exit(&uf_stats.ufst_mutex); 1448 } 1449 1450 f->uf_retry = 1; 1451 rc = SFRC_SUCCESS; 1452 TRIVIA(("] ")); 1453 break; 1454 1455 default: 1456 (void) set_state(f, UF_PANIC); 1457 TRIVIA((": failed] ")); 1458 } 1459 1460 return (rc); 1461 } 1462 1463 static sfrc_t 1464 sf_found_queue(ufs_failure_t *f) 1465 { 1466 int replica; 1467 sfrc_t rc = SFRC_FAIL; 1468 1469 TRIVIA(("[sf_found_queue")); 1470 1471 /* 1472 * don't need to check for null ufsvfsp because 1473 * unmount must own list's ufs_fix.uq_mutex 1474 * to mark it null and we own that lock since 1475 * we got here. 1476 */ 1477 1478 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex)); 1479 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1480 1481 if (!mutex_tryenter(f->uf_vfs_lockp)) { 1482 TRIVIA((": tryenter(vfslockp) failed; retry]\n")); 1483 f->uf_retry = 1; 1484 return (rc); 1485 } 1486 1487 replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL && 1488 f->uf_vfs_ufsfxp->fx_current != f && 1489 !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s); 1490 1491 /* 1492 * copy general flags to this ufs_failure so we don't 1493 * need to refer back to the ufsvfs, or, more importantly, 1494 * don't need to keep acquiring (trying to acquire) vfs_lockp 1495 * 1496 * The most restrictive option wins: 1497 * panic > errlock only > errlock+unmount > repair 1498 * XXX panic > elock > elock > elock+umount 1499 */ 1500 if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) { 1501 if (!set_state(f, UF_PANIC)) { 1502 TRIVIA((": marked panic but was queued?")); 1503 real_panic(f, " "); 1504 /*NOTREACHED*/ 1505 } 1506 mutex_exit(f->uf_vfs_lockp); 1507 return (rc); 1508 } 1509 f->uf_flags = f->uf_vfs_ufsfxp->fx_flags; 1510 1511 if (replica) { 1512 if (!set_state(f, UF_REPLICA)) { 1513 f->uf_retry = 1; 1514 TRIVIA((": set to replica failed] ")); 1515 } else { 1516 TRIVIA(("] ")); 1517 } 1518 mutex_exit(f->uf_vfs_lockp); 1519 return (rc); 1520 } 1521 mutex_exit(f->uf_vfs_lockp); 1522 1523 if (!set_state(f, UF_TRYLCK)) { 1524 TRIVIA((": failed] ")); 1525 } else { 1526 rc = SFRC_SUCCESS; 1527 } 1528 return (rc); 1529 } 1530 1531 static sfrc_t 1532 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1533 { 1534 sfrc_t rc = SFRC_FAIL; 1535 1536 TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s))); 1537 ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING)); 1538 ASSERT(!terminal_state(s)); 1539 1540 if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) { 1541 TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n")); 1542 (void) set_state(f, UF_NOTFIX); 1543 return (rc); 1544 } 1545 1546 switch (a) { 1547 case UFA_SET: 1548 switch (s) { 1549 case UF_TRYLCK: 1550 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 1551 rc = sf_set_trylck(f); 1552 break; 1553 1554 case UF_LOCKED: 1555 rc = sf_set_locked(f); 1556 break; 1557 1558 case UF_FIXING: 1559 f->uf_flags |= UFSFX_REPAIR_START; 1560 f->uf_retry = ufsfx_tune.uft_fixpoll_period; 1561 rc = SFRC_SUCCESS; 1562 break; 1563 1564 case UF_UMOUNT: 1565 f->uf_retry = -ufsfx_tune.uft_short_err_period; 1566 rc = SFRC_SUCCESS; 1567 break; 1568 1569 default: 1570 (void) set_state(f, UF_PANIC); 1571 TRIVIA((": failed] ")); 1572 } 1573 break; 1574 1575 case UFA_FOUND: 1576 1577 switch (s) { 1578 case UF_TRYLCK: 1579 rc = sf_found_trylck(f); 1580 break; 1581 1582 case UF_LOCKED: 1583 case UF_FIXING: 1584 rc = sf_found_lock_fix_cmn(f, s); 1585 break; 1586 1587 case UF_UMOUNT: 1588 rc = sf_found_umount(f); 1589 break; 1590 1591 default: 1592 (void) set_state(f, UF_PANIC); 1593 TRIVIA((": failed] ")); 1594 break; 1595 } 1596 break; 1597 default: 1598 (void) set_state(f, UF_PANIC); 1599 TRIVIA((": failed] ")); 1600 break; 1601 } 1602 1603 TRIVIA(("] ")); 1604 return (rc); 1605 } 1606 1607 static sfrc_t 1608 sf_set_trylck(ufs_failure_t *f) 1609 { 1610 TRIVIA(("[sf_set_trylck")); 1611 1612 if (!mutex_tryenter(f->uf_vfs_lockp)) { 1613 TRIVIA((": tryenter(vfslockp) failed; retry]\n")); 1614 f->uf_retry = 1; 1615 return (SFRC_FAIL); 1616 } 1617 1618 if (!f->uf_vfs_ufsfxp->fx_current) 1619 f->uf_vfs_ufsfxp->fx_current = f; 1620 1621 mutex_exit(f->uf_vfs_lockp); 1622 1623 f->uf_lf.lf_flags = 0; 1624 f->uf_lf.lf_lock = LOCKFS_ELOCK; 1625 f->uf_retry = -ufsfx_tune.uft_fixstart_period; 1626 TRIVIA(("] ")); 1627 return (SFRC_SUCCESS); 1628 } 1629 1630 static sfrc_t 1631 sf_found_trylck(ufs_failure_t *f) 1632 { 1633 struct lockfs lockfs_status; 1634 1635 TRIVIA(("[sf_found_trylck")); 1636 1637 if (trylock_time_exceeded(f) > 0) { 1638 (void) set_state(f, UF_PANIC); 1639 TRIVIA((": failed] ")); 1640 return (SFRC_FAIL); 1641 } 1642 1643 if (!get_lockfs_status(f, &lockfs_status)) { 1644 (void) set_state(f, UF_PANIC); 1645 TRIVIA((": failed] ")); 1646 return (SFRC_FAIL); 1647 } 1648 1649 if (f->uf_lf_err == NO_ERROR) 1650 f->uf_lf.lf_key = lockfs_status.lf_key; 1651 1652 if (!set_lockfs(f, &lockfs_status)) { 1653 (void) set_state(f, UF_PANIC); 1654 TRIVIA((": failed] ")); 1655 return (SFRC_FAIL); 1656 } 1657 TRIVIA(("] ")); 1658 return (SFRC_SUCCESS); 1659 } 1660 1661 static sfrc_t 1662 sf_set_locked(ufs_failure_t *f) 1663 { 1664 TRIVIA(("[sf_set_locked")); 1665 1666 f->uf_retry = -ufsfx_tune.uft_fixstart_period; 1667 1668 #if defined(DEBUG) 1669 if (f->uf_flags & UFSFX_REPAIR_START) 1670 TRIVIA(("clearing UFSFX_REPAIR_START ")); 1671 #endif /* DEBUG */ 1672 1673 f->uf_flags &= ~UFSFX_REPAIR_START; 1674 1675 if (f->uf_s & UF_TRYLCK) { 1676 cmn_err(CE_WARN, "Error-locked %s: \"%s\"", 1677 fs_name(f), f->uf_panic_str); 1678 1679 if (f->uf_flags & UFSFX_LCKONLY) 1680 cmn_err(CE_WARN, "Manual repair of %s required", 1681 fs_name(f)); 1682 } 1683 1684 /* 1685 * just reset to current state 1686 */ 1687 #if defined(DEBUG) 1688 TRIVIA(("locked->locked ")); 1689 #endif /* DEBUG */ 1690 1691 TRIVIA(("] ")); 1692 return (SFRC_SUCCESS); 1693 } 1694 1695 static sfrc_t 1696 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s) 1697 { 1698 time_t toolong; 1699 extern time_t time; 1700 struct buf *bp = NULL; 1701 struct fs *dfs; 1702 time_t concerned, anxious; 1703 sfrc_t rc = SFRC_FAIL; 1704 ulong_t gb_size; 1705 1706 TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s))); 1707 1708 if (s & UF_LOCKED) { 1709 ASSERT(MUTEX_HELD(&f->uf_mutex)); 1710 1711 toolong = 1712 time > (ufsfx_tune.uft_too_long + f->uf_entered_tm); 1713 TRIVIA(("%stoolong", !toolong? "not": "")); 1714 HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ", 1715 time, ufsfx_tune.uft_too_long, f->uf_entered_tm)); 1716 1717 if (f->uf_flags & UFSFX_LCKUMOUNT) { 1718 if (set_state(f, UF_UMOUNT)) { 1719 TRIVIA(("] ")); 1720 rc = SFRC_SUCCESS; 1721 } else { 1722 TRIVIA((": failed] ")); 1723 f->uf_retry = 1; 1724 } 1725 return (rc); 1726 } 1727 if (!toolong) { 1728 rc = SFRC_SUCCESS; 1729 } else { 1730 if (!(f->uf_flags & UFSFX_REPAIR_START)) { 1731 cmn_err(CE_WARN, "%s repair of %s not started.", 1732 (f->uf_flags & UFSFX_LCKONLY) ? 1733 "Manual" : "Automatic", fs_name(f)); 1734 1735 f->uf_retry = ufsfx_tune.uft_long_err_period; 1736 } else { 1737 f->uf_retry = ufsfx_tune.uft_long_err_period; 1738 cmn_err(CE_WARN, "Repair of %s is not timely; " 1739 "operator attention is required.", 1740 fs_name(f)); 1741 } 1742 TRIVIA(("] ")); 1743 return (rc); 1744 } 1745 } 1746 1747 #if defined(DEBUG) 1748 else { 1749 ASSERT(s & UF_FIXING); 1750 } 1751 #endif /* DEBUG */ 1752 1753 /* 1754 * get on disk superblock; force it to really 1755 * come from the disk 1756 */ 1757 (void) bfinval(f->uf_dev, 0); 1758 bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE); 1759 if (bp) { 1760 bp->b_flags |= (B_STALE | B_AGE); 1761 dfs = bp->b_un.b_fs; 1762 } 1763 1764 if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) && 1765 (dfs->fs_magic != MTB_UFS_MAGIC))) { 1766 TRIVIA((": UFS_BREAD(SBLOCK) failed]\n")); 1767 f->uf_retry = 1; 1768 goto out; 1769 } 1770 1771 /* fsck started but we haven't noticed yet? */ 1772 if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) { 1773 if (!set_state(f, UF_FIXING)) { 1774 TRIVIA((": failed]\n")); 1775 f->uf_retry = 1; 1776 goto out; 1777 } 1778 } 1779 1780 /* fsck started but didn't succeed? */ 1781 if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) { 1782 TRIVIA((": fs_clean: %d", (int)dfs->fs_clean)); 1783 (void) set_state(f, UF_LOCKED); 1784 cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f)); 1785 f->uf_retry = ufsfx_tune.uft_long_err_period; 1786 goto out; 1787 } 1788 1789 gb_size = (dfs->fs_size * dfs->fs_bshift) / GB; 1790 toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig); 1791 1792 /* fsck started but doesn't seem to be proceeding? */ 1793 if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) { 1794 if (time > f->uf_entered_tm + toolong) { 1795 1796 cmn_err(CE_WARN, 1797 "Repair completion timeout exceeded on %s; " 1798 "manual fsck may be required", fs_name(f)); 1799 f->uf_retry = ufsfx_tune.uft_long_err_period; 1800 } 1801 } 1802 1803 concerned = f->uf_entered_tm + (toolong / 3); 1804 anxious = f->uf_entered_tm + ((2 * toolong) / 3); 1805 1806 if (time > concerned) 1807 pester_msg(f, time > anxious? CE_WARN: CE_NOTE); 1808 1809 TRIVIA(("] ")); 1810 1811 out: 1812 if (bp) 1813 brelse(bp); 1814 1815 return (rc); 1816 } 1817 1818 static sfrc_t 1819 sf_found_umount(ufs_failure_t *f) 1820 { 1821 extern time_t time; 1822 sfrc_t rc = SFRC_FAIL; 1823 struct vfs *vfsp = f->uf_vfsp; 1824 struct ufsvfs *ufsvfsp = f->uf_ufsvfsp; 1825 int toolong = 0; 1826 int err = 0; 1827 1828 TRIVIA(("[sf_found_umount")); 1829 1830 toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm; 1831 if (toolong) { 1832 TRIVIA((": unmount time limit exceeded] ")); 1833 goto out; 1834 } 1835 1836 if (!vfsp || !ufsvfsp) { /* trivial case */ 1837 TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] ")); 1838 goto out; 1839 } 1840 1841 if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) { 1842 TRIVIA((": !not error locked?")); 1843 err = EINVAL; 1844 goto out; 1845 } 1846 1847 /* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */ 1848 if (vn_vfswlock(vfsp->vfs_vnodecovered)) { 1849 TRIVIA((": couldn't lock coveredvp")); 1850 err = EBUSY; 1851 goto out; 1852 } 1853 1854 if ((err = dounmount(vfsp, 0, kcred)) != 0) { 1855 1856 /* take note, but not many alternatives here */ 1857 mutex_enter(&uf_stats.ufst_mutex); 1858 uf_stats.ufst_unmount_failures++; 1859 mutex_exit(&uf_stats.ufst_mutex); 1860 1861 TRIVIA((": unmount failed] ")); 1862 } else { 1863 cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f)); 1864 } 1865 1866 out: 1867 if (toolong || (err != EBUSY && err != EAGAIN)) 1868 rc = set_state(f, UF_NOTFIX); 1869 1870 TRIVIA(("] ")); 1871 return (rc); 1872 } 1873 1874 static sfrc_t 1875 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s) 1876 { 1877 extern time_t time; 1878 sfrc_t rc = SFRC_FAIL; 1879 1880 TRIVIA(("[sf_term_cmn, action is %s, state is %s", 1881 act_name(a), state_name(s))); 1882 ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA)); 1883 ASSERT(terminal_state(s)); 1884 1885 if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) { 1886 TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n")); 1887 return (rc); 1888 } 1889 1890 switch (a) { 1891 case UFA_SET: 1892 switch (s) { 1893 case UF_NOTFIX: 1894 case UF_FIXED: 1895 { 1896 int need_lock_vfs; 1897 1898 if (f->uf_ufsvfsp && f->uf_vfs_lockp) 1899 need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp); 1900 else 1901 need_lock_vfs = 0; 1902 1903 if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) { 1904 TRIVIA((": tryenter(vfslockp) fail; retry]\n")); 1905 f->uf_retry = 1; 1906 break; 1907 } 1908 1909 f->uf_end_tm = time; 1910 f->uf_lf.lf_lock = LOCKFS_OLOCK; 1911 f->uf_retry = 0; 1912 1913 if (f->uf_vfs_ufsfxp) 1914 f->uf_vfs_ufsfxp->fx_current = NULL; 1915 1916 if (need_lock_vfs) 1917 mutex_exit(f->uf_vfs_lockp); 1918 1919 cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s": 1920 "%s is now accessible", fs_name(f)); 1921 1922 if (s & UF_FIXED) { 1923 mutex_enter(&uf_stats.ufst_mutex); 1924 uf_stats.ufst_num_fixed++; 1925 mutex_exit(&uf_stats.ufst_mutex); 1926 } 1927 (void) timeout(ufsfx_kill_fix_failure_thread, 1928 (void *)(ufsfx_tune.uft_short_err_period * hz), 1929 ufsfx_tune.uft_short_err_period * hz); 1930 rc = SFRC_SUCCESS; 1931 break; 1932 } 1933 case UF_REPLICA: 1934 1935 ASSERT(MUTEX_HELD(f->uf_vfs_lockp)); 1936 1937 /* not actually a replica? */ 1938 if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current && 1939 f->uf_vfs_ufsfxp->fx_current != f && 1940 !terminal_state( 1941 f->uf_vfs_ufsfxp->fx_current->uf_s)) { 1942 1943 f->uf_orig = f->uf_vfs_ufsfxp->fx_current; 1944 f->uf_retry = 0; 1945 rc = SFRC_SUCCESS; 1946 } else { 1947 TRIVIA((": NULL fx_current]\n")); 1948 f->uf_retry = 1; 1949 } 1950 1951 break; 1952 1953 default: 1954 rc = set_state(f, UF_PANIC); 1955 TRIVIA((": failed] ")); 1956 break; 1957 } 1958 break; 1959 1960 case UFA_FOUND: 1961 /* 1962 * XXX de-allocate these after some period? 1963 * XXX or move to an historical list? 1964 * XXX or have an ioctl which reaps them? 1965 */ 1966 /* 1967 * For now, since we don't expect lots of failures 1968 * to occur (to the point of memory shortages), 1969 * just punt 1970 */ 1971 1972 /* be sure we're not wasting cpu on old failures */ 1973 if (f->uf_retry != 0) { 1974 mutex_enter(&uf_stats.ufst_mutex); 1975 uf_stats.ufst_cpu_waste++; 1976 mutex_exit(&uf_stats.ufst_mutex); 1977 f->uf_retry = 0; 1978 } 1979 rc = SFRC_SUCCESS; 1980 break; 1981 1982 default: 1983 (void) set_state(f, UF_PANIC); 1984 TRIVIA((": failed] ")); 1985 break; 1986 } 1987 1988 TRIVIA(("] ")); 1989 return (rc); 1990 } 1991 1992 static sfrc_t 1993 sf_panic( 1994 ufs_failure_t *f, 1995 ufsa_t a, 1996 ufs_failure_states_t s) 1997 { 1998 sfrc_t rc = SFRC_FAIL; 1999 2000 TRIVIA(("[sf_panic, action is %s, prev. state is %s", 2001 act_name(a), state_name(f->uf_s))); 2002 ASSERT(s & UF_PANIC); 2003 2004 switch (a) { 2005 case UFA_SET: 2006 f->uf_retry = -ufsfx_tune.uft_short_err_period; 2007 rc = SFRC_SUCCESS; 2008 break; 2009 2010 case UFA_FOUND: 2011 default: 2012 real_panic(f, " "); 2013 2014 /* LINTED: warning: logical expression always true: op "||" */ 2015 ASSERT(DEBUG); 2016 2017 (void) set_state(f, UF_UMOUNT); /* XXX UF_NOTFIX? */ 2018 2019 break; 2020 } 2021 2022 TRIVIA(("] ")); 2023 return (rc); 2024 } 2025 2026 /* 2027 * minimum state function 2028 */ 2029 static sfrc_t 2030 sf_minimum( 2031 ufs_failure_t *f, 2032 ufsa_t a, /* LINTED argument unused in function: ignored */ 2033 ufs_failure_states_t ignored) 2034 { 2035 sfrc_t rc = SFRC_FAIL; 2036 2037 TRIVIA(("[sf_minimum, action is %s", act_name(a))); 2038 2039 switch (a) { 2040 case UFA_SET: 2041 f->uf_retry = 0; 2042 /* FALLTHROUGH */ 2043 2044 case UFA_FOUND: 2045 rc = SFRC_SUCCESS; 2046 break; 2047 2048 default: 2049 (void) set_state(f, UF_PANIC); 2050 TRIVIA((": failed] ")); 2051 break; 2052 } 2053 2054 TRIVIA(("] ")); 2055 return (rc); 2056 } 2057 2058 static int 2059 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to) 2060 { 2061 ufsd_t *s; 2062 int valid; 2063 2064 HIDEOUS(("[state_trans_valid")); 2065 2066 if (from & to) 2067 return (1); 2068 2069 s = get_state_desc(to); 2070 2071 /* 2072 * extra test is necessary since we want UF_UNDEF = 0, 2073 * (to detect freshly allocated memory) 2074 * but can't check for that value with a bit test 2075 */ 2076 valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev; 2077 2078 HIDEOUS((": %svalid] ", valid? "": "in")); 2079 return (valid); 2080 } 2081 2082 static int 2083 terminal_state(ufs_failure_states_t state) 2084 { 2085 ufsd_t *s; 2086 2087 HIDEOUS(("[terminal_state")); 2088 2089 s = get_state_desc(state); 2090 2091 HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not ")); 2092 return ((int)s->ud_attr.terminal); 2093 } 2094 2095 static void 2096 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp) 2097 { 2098 MINUTE(("[alloc_lockfs_comment")); 2099 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2100 2101 /* 2102 * ufs_fiolfs expects a kmem_alloc'ed comment; 2103 * it frees the comment if the lock fails 2104 * or else when the lock is unlocked. 2105 */ 2106 2107 f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP); 2108 if (f->uf_lf.lf_comment) { 2109 char *from; 2110 size_t len; 2111 2112 /* 2113 * use panic string if there's no previous comment 2114 * or if we're setting the error lock 2115 */ 2116 if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment || 2117 lfp->lf_comlen <= 0)) { 2118 from = f->uf_panic_str; 2119 len = LOCKFS_MAXCOMMENTLEN; 2120 } else { 2121 from = lfp->lf_comment; 2122 len = lfp->lf_comlen; 2123 } 2124 2125 bcopy(from, f->uf_lf.lf_comment, len); 2126 f->uf_lf.lf_comlen = len; 2127 2128 } else { 2129 f->uf_lf.lf_comlen = 0; 2130 } 2131 MINUTE(("] ")); 2132 } 2133 2134 static int 2135 set_lockfs(ufs_failure_t *f, struct lockfs *lfp) 2136 { 2137 int (*handle_lockfs_rc)(ufs_failure_t *); 2138 int rc; 2139 2140 MINUTE(("[set_lockfs")); 2141 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2142 ASSERT(!vfs_lock_held(f->uf_vfsp)); 2143 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 2144 2145 if (!f->uf_ufsvfsp) { 2146 MINUTE((": ufsvfsp is NULL]\n")); 2147 return (0); 2148 } 2149 2150 ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock)); 2151 2152 if (!f->uf_ufsvfsp->vfs_root) { 2153 MINUTE((": vfs_root is NULL]\n")); 2154 return (0); 2155 } 2156 2157 alloc_lockfs_comment(f, lfp); 2158 f->uf_lf_err = 0; 2159 2160 if (!LOCKFS_IS_ELOCK(lfp)) { 2161 lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK; 2162 VN_HOLD(f->uf_ufsvfsp->vfs_root); 2163 f->uf_lf_err = 2164 ufs__fiolfs(f->uf_ufsvfsp->vfs_root, 2165 &f->uf_lf, /* from_user */ 0, /* from_log */ 0); 2166 VN_RELE(f->uf_ufsvfsp->vfs_root); 2167 } 2168 2169 handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success; 2170 rc = handle_lockfs_rc(f); 2171 2172 MINUTE(("] ")); 2173 return (rc); 2174 } 2175 2176 static int 2177 lockfs_failure(ufs_failure_t *f) 2178 { 2179 int error; 2180 ufs_failure_states_t s; 2181 2182 TRIVIA(("[lockfs_failure")); 2183 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2184 2185 if (!f->uf_ufsvfsp) { 2186 TRIVIA((": ufsvfsp is NULL]\n")); 2187 return (0); 2188 } 2189 2190 error = f->uf_lf_err; 2191 switch (error) { 2192 /* non-transient errors: */ 2193 case EACCES: /* disk/in-core metadata reconciliation failed */ 2194 case EPERM: /* inode reconciliation failed; incore inode changed? */ 2195 case EIO: /* device is hard-locked or not responding */ 2196 case EROFS: /* device is write-locked */ 2197 case EDEADLK: /* can't lockfs; deadlock would result; */ 2198 /* Swapping or saving accounting records */ 2199 /* onto this fs can cause this errno. */ 2200 2201 MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)", 2202 fs_name(f), lock_name(&f->uf_lf), 2203 err_name(error), error)); 2204 2205 /* 2206 * if can't get lock, then fallback to panic, unless 2207 * unless unmount was requested (although unmount will 2208 * probably fail if the lock failed, so we'll panic 2209 * anyway 2210 */ 2211 2212 s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK) ? 2213 UF_UMOUNT: UF_PANIC; 2214 2215 if (!set_state(f, s)) { 2216 real_panic(f, " "); 2217 /*NOTREACHED*/ 2218 break; 2219 } 2220 break; 2221 2222 2223 case EBUSY: 2224 case EAGAIN: 2225 2226 f->uf_retry = ufsfx_tune.uft_short_err_period; 2227 if (curthread->t_flag & T_DONTPEND) { 2228 curthread->t_flag &= ~T_DONTPEND; 2229 2230 } else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) { 2231 ufs_failure_states_t state; 2232 /* 2233 * if we didn't know that the fix had started, 2234 * take note 2235 */ 2236 state = error == EBUSY? UF_LOCKED: UF_FIXING; 2237 if (!set_state(f, state)) { 2238 TRIVIA((": failed] ")); 2239 return (0); 2240 } 2241 } 2242 break; 2243 2244 default: /* some other non-fatal error */ 2245 MINOR(("lockfs(\"%s\") of %s returned %s (%d)", 2246 lock_name(&f->uf_lf), fs_name(f), 2247 err_name(f->uf_lf_err), f->uf_lf_err)); 2248 2249 f->uf_retry = ufsfx_tune.uft_short_err_period; 2250 break; 2251 2252 case EINVAL: /* unmounted? */ 2253 (void) set_state(f, UF_NOTFIX); 2254 break; 2255 } 2256 TRIVIA(("] ")); 2257 return (1); 2258 } 2259 2260 static int 2261 lockfs_success(ufs_failure_t *f) 2262 { 2263 TRIVIA(("[lockfs_success")); 2264 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2265 2266 if (!f->uf_ufsvfsp) { 2267 TRIVIA((": ufsvfsp is NULL]\n")); 2268 return (0); 2269 } 2270 2271 switch (f->uf_lf.lf_lock) { 2272 case LOCKFS_ELOCK: /* error lock worked */ 2273 2274 if (!set_state(f, UF_LOCKED)) { 2275 TRIVIA((": failed] ")); 2276 return (0); 2277 } 2278 break; 2279 2280 case LOCKFS_ULOCK: /* unlock worked */ 2281 /* 2282 * how'd we get here? 2283 * This should be done from fsck's unlock, 2284 * not from this thread's context. 2285 */ 2286 cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f)); 2287 ufsfx_unlockfs(f->uf_ufsvfsp); 2288 break; 2289 2290 default: 2291 if (!set_state(f, UF_NOTFIX)) { 2292 TRIVIA((": failed] ")); 2293 return (0); 2294 } 2295 break; 2296 } 2297 TRIVIA(("] ")); 2298 return (1); 2299 } 2300 2301 /* 2302 * when fsck is running it puts its pid into the lockfs 2303 * comment structure, prefaced by PIDSTR 2304 */ 2305 const char *PIDSTR = "[pid:"; 2306 static int 2307 fsck_active(ufs_failure_t *f) 2308 { 2309 char *cp; 2310 int i, found, errlocked; 2311 size_t comlen; 2312 const int PIDSTRLEN = (int)strlen(PIDSTR); 2313 struct ulockfs *ulp = &f->uf_ufsvfsp->vfs_ulockfs; 2314 2315 TRIVIA(("[fsck_active")); 2316 2317 ASSERT(f); 2318 ASSERT(f->uf_s & UF_FIXING); 2319 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2320 ASSERT(f->uf_ufsvfsp); 2321 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp)); 2322 ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock)); 2323 2324 mutex_enter(&ulp->ul_lock); 2325 cp = ulp->ul_lockfs.lf_comment; 2326 comlen = ulp->ul_lockfs.lf_comlen; 2327 errlocked = (int)ULOCKFS_IS_ELOCK(ulp); 2328 mutex_exit(&ulp->ul_lock); 2329 2330 if (!cp || comlen == 0) { 2331 TRIVIA((": null comment or comlen <= 0, found:0]")); 2332 return (0); 2333 } 2334 2335 for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++) 2336 found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0; 2337 2338 TRIVIA(("found:%d, is_elock:%d]", found, errlocked)); 2339 return (errlocked & found); 2340 } 2341 2342 static const char unknown_fs[] = "<unknown fs>"; 2343 static const char null_failure[] = "<NULL ufs failure record; unknown fs>"; 2344 static const char mutated_vfs_bufp[] = "<mutated vfs_bufp, unknown fs>"; 2345 static const char mutated_vfs_fs[] = "<mutated vfs_fs, unknown fs>"; 2346 2347 static char * 2348 fs_name(ufs_failure_t *f) 2349 { 2350 HIDEOUS(("[fs_name")); 2351 ASSERT(MUTEX_HELD(&f->uf_mutex)); 2352 2353 if (!f) { 2354 HIDEOUS((": failure ptr is NULL]\n")); 2355 return ((char *)null_failure); 2356 } 2357 2358 if (f->uf_fsname[0] != '\0') { 2359 HIDEOUS((": return (uf_fsname)]\n")); 2360 return (f->uf_fsname); 2361 } 2362 2363 if (MUTEX_HELD(f->uf_vfs_lockp)) { 2364 if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) { 2365 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n", 2366 (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp)); 2367 return ((char *)mutated_vfs_bufp); 2368 } 2369 if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) { 2370 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n", 2371 (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs)); 2372 return ((char *)mutated_vfs_fs); 2373 } 2374 if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs && 2375 *f->uf_fs->fs_fsmnt != '\0') { 2376 HIDEOUS((": return (fs_fsmnt)]\n")); 2377 return (f->uf_fs->fs_fsmnt); 2378 } 2379 } 2380 2381 HIDEOUS((": unknown file system]\n")); 2382 return ((char *)unknown_fs); 2383 } 2384 2385 #if defined(DEBUG) 2386 static char * 2387 lock_name(struct lockfs *lfp) 2388 { 2389 struct lock_description *l; 2390 char *lname; 2391 2392 HIDEOUS(("[lock_name")); 2393 2394 lname = lock_desc[0].ld_name; 2395 for (l = &lock_desc[1]; l->ld_name != NULL; l++) { 2396 if (lfp && lfp->lf_lock == l->ld_type) { 2397 lname = l->ld_name; 2398 break; 2399 } 2400 } 2401 HIDEOUS(("]")); 2402 return (lname); 2403 } 2404 2405 static char * 2406 state_name(ufs_failure_states_t state) 2407 { 2408 ufsd_t *s; 2409 2410 HIDEOUS(("[state_name")); 2411 2412 s = get_state_desc(state); 2413 2414 HIDEOUS(("]")); 2415 return (s->ud_name); 2416 } 2417 2418 static char * 2419 err_name(int error) 2420 { 2421 struct error_description *e; 2422 2423 HIDEOUS(("[err_name")); 2424 2425 for (e = &err_desc[1]; e->ed_name != NULL; e++) { 2426 if (error == e->ed_errno) { 2427 HIDEOUS(("]")); 2428 return (e->ed_name); 2429 } 2430 } 2431 HIDEOUS(("]")); 2432 return (err_desc[0].ed_name); 2433 } 2434 2435 static char * 2436 act_name(ufsa_t action) 2437 { 2438 struct action_description *a; 2439 2440 HIDEOUS(("[act_name")); 2441 2442 for (a = &act_desc[1]; a->ad_name != NULL; a++) { 2443 if (action == a->ad_v) { 2444 HIDEOUS(("]")); 2445 return (a->ad_name); 2446 } 2447 } 2448 HIDEOUS(("]")); 2449 return (act_desc[0].ad_name); 2450 } 2451 2452 /* 2453 * dump failure list 2454 */ 2455 static void 2456 dump_uf_list(char *msg) 2457 { 2458 ufs_failure_t *f; 2459 int i; 2460 int list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex); 2461 2462 if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) { 2463 printf("dump_uf_list: couldn't get list lock\n"); 2464 return; 2465 } 2466 2467 if (msg) { 2468 printf("\n%s", msg); 2469 } 2470 printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n", 2471 ufs_fix.uq_lowat, ufs_fix.uq_ne); 2472 2473 mutex_enter(&uf_stats.ufst_mutex); 2474 printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races); 2475 printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed); 2476 printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed); 2477 printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste); 2478 printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n", 2479 uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures); 2480 mutex_exit(&uf_stats.ufst_mutex); 2481 2482 for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) { 2483 2484 if (!mutex_tryenter(&f->uf_mutex)) { 2485 printf("%d.\t\"skipped - try enter failed\"\n", i); 2486 continue; 2487 } 2488 2489 dump_uf(f, i); 2490 2491 mutex_exit(&f->uf_mutex); 2492 } 2493 2494 printf("\n"); 2495 2496 if (!list_was_locked) 2497 mutex_exit(&ufs_fix.uq_mutex); 2498 } 2499 2500 static void 2501 dump_uf(ufs_failure_t *f, int i) 2502 { 2503 if (!f) { 2504 printf("dump_uf: NULL failure record\n"); 2505 return; 2506 } 2507 2508 printf("%d.\t\"%s\" is %s.\n", 2509 i, fs_name(f), state_name(f->uf_s)); 2510 printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f); 2511 printf("\tNext: 0x%p\t\tPrev: 0x%p\n", 2512 (void *)f->uf_next, (void *)f->uf_prev); 2513 2514 if (f->uf_orig) 2515 printf("\tOriginal failure: 0x%p \"%s\"\n", 2516 (void *)f->uf_orig, f->uf_orig->uf_panic_str); 2517 2518 printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n", 2519 (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp); 2520 printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp); 2521 printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp); 2522 2523 if (f->uf_bp) 2524 printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs); 2525 else 2526 printf("\n"); 2527 2528 printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n", 2529 f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm); 2530 2531 printf("\tFlags: (%d) %s%s%s%s", f->uf_flags, 2532 f->uf_flags & UFSFX_LCKONLY? "\"lock only\" " : "", 2533 f->uf_flags & UFSFX_LCKUMOUNT? "\"lock+unmount\" " : "", 2534 f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" " : "", 2535 f->uf_flags == 0? "<none>" : ""); 2536 2537 printf("\tRetry: %ld seconds\n", f->uf_retry); 2538 2539 printf("\tLockfs:\ttype: %s\terror: %s (%d)\n", 2540 lock_name(&f->uf_lf), err_name(f->uf_lf_err), f->uf_lf_err); 2541 2542 } 2543 #endif /* DEBUG */ 2544 2545 /* 2546 * returns # of ufs_failures in a non-terminal state on queue 2547 * used to coordinate with hlock thread (see ufs_thread.c) 2548 * and to determine when the error lock thread may exit 2549 */ 2550 2551 int 2552 ufsfx_get_failure_qlen(void) 2553 { 2554 ufs_failure_t *f; 2555 ufsd_t *s; 2556 int qlen = 0; 2557 2558 MINUTE(("[ufsfx_get_failure_qlen")); 2559 2560 if (!mutex_tryenter(&ufs_fix.uq_mutex)) 2561 return (-1); 2562 2563 /* 2564 * walk down failure list 2565 */ 2566 2567 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) { 2568 2569 if (!mutex_tryenter(&f->uf_mutex)) 2570 continue; 2571 2572 s = get_state_desc(f->uf_s); 2573 2574 if (s->ud_attr.terminal) { 2575 mutex_exit(&f->uf_mutex); 2576 continue; 2577 } 2578 2579 MINUTE((": found: %s, \"%s: %s\"\n", 2580 fs_name(f), state_name(f->uf_s), f->uf_panic_str)); 2581 2582 qlen++; 2583 mutex_exit(&f->uf_mutex); 2584 } 2585 2586 mutex_exit(&ufs_fix.uq_mutex); 2587 2588 MINUTE((": qlen=%d]\n", qlen)); 2589 2590 return (qlen); 2591 } 2592 2593 /* 2594 * timeout routine 2595 * called to shutdown fix failure thread and server daemon 2596 */ 2597 static void 2598 ufsfx_kill_fix_failure_thread(void *arg) 2599 { 2600 clock_t odelta = (clock_t)arg; 2601 int qlen; 2602 2603 MAJOR(("[ufsfx_kill_fix_failure_thread")); 2604 2605 qlen = ufsfx_get_failure_qlen(); 2606 2607 if (qlen < 0) { 2608 clock_t delta; 2609 2610 delta = odelta << 1; 2611 if (delta <= 0) 2612 delta = INT_MAX; 2613 2614 (void) timeout(ufsfx_kill_fix_failure_thread, 2615 (void *)delta, delta); 2616 MAJOR((": rescheduled")); 2617 2618 } else if (qlen == 0) { 2619 ufs_thread_exit(&ufs_fix); 2620 MAJOR((": killed")); 2621 } 2622 /* 2623 * else 2624 * let timeout expire 2625 */ 2626 MAJOR(("]\n")); 2627 } 2628