1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * ntwdt driver 31 * ------------ 32 * 33 * Subsystem Overview 34 * ------------------ 35 * 36 * This is a pseudo driver for the Netra-1280 watchdog 37 * timer (WDT). It provides for an *application-driven* 38 * WDT (AWDT), not a traditional, hardware-based WDT. A 39 * hardware-based feature is already present on the 40 * Netra-1280, and it is referred to here as the 41 * System WDT (SWDT). 42 * 43 * ScApp and Solaris cooperate to provide either a SWDT or 44 * an AWDT; they are mutually-exclusive. Once in AWDT 45 * mode, one can only transition to SWDT mode via a reboot. 46 * This obviously gives priority to the AWDT and was done 47 * to handle scenarios where the customer might temporarily 48 * terminate their wdog-app in order to do some debugging, 49 * or even to load a new version of the wdog-app. 50 * 51 * The wdog-app does an open() of the /dev/ntwdt device node 52 * and then issues ioctl's to control the state of the AWDT. 53 * The ioctl's are implemented by this driver. Only one 54 * concurrent instance of open() is allowed. On the close(), 55 * a watchdog timer still in progress is NOT terminated. 56 * This allows the global state machine to monitor the 57 * progress of a Solaris reboot. ScApp will reset Solaris 58 * (eg, send an XIR) if the actual boot/crashdump latency 59 * is larger than the current AWDT timeout. 60 * 61 * The rationale for implementing an AWDT (vs a SWDT) is 62 * that it is more sensitive to system outage scenarios than 63 * a SWDT. Eg, a system could be in such a failed state that 64 * even though its clock-interrupt could still run (and the 65 * SWDT's watchdog timer therefore re-armed), the system could 66 * in effect have a corrupt or very poor dispatch latency. 67 * An AWDT would be sensitive to dispatch latency issues, as 68 * well as problems with its own execution (eg, a hang or 69 * crash). 70 * 71 * Subsystem Interface Overview 72 * ---------------------------- 73 * 74 * This pseudo-driver does not have any 'extern' functions. 75 * 76 * All system interaction is done via the traditional driver 77 * entry points (eg, attach(9e), _init(9e)). 78 * 79 * All interaction with user is via the entry points in the 80 * 'struct cb_ops' vector (eg, open(9e), ioctl(9e), and 81 * close(9e)). 82 * 83 * Subsystem Implementation Overview 84 * --------------------------------- 85 * 86 * ScApp and Solaris (eg, ntwdt) cooperate so that a state 87 * machine global to ScApp and ntwdt is either in AWDT mode 88 * or in SWDT mode. These two peers communicate via the SBBC 89 * Mailbox that resides in IOSRAM (SBBC_MAILBOX_KEY). 90 * They use two new mailbox messages (LW8_MBOX_WDT_GET and 91 * LW8_MBOX_WDT_SET) and one new event (LW8_EVENT_SC_RESTARTED). 92 * 93 * ntwdt implements the AWDT by implementing a "virtual 94 * WDT" (VWDT). Eg, the watchdog timer is not a traditional 95 * counter in hardware, it is a variable in ntwdt's 96 * softstate. The wdog-app's actions cause changes to this 97 * and other variables in ntwdt's softstate. 98 * 99 * The wdog-app uses the LOMIOCDOGTIME ioctl to specify 100 * the number of seconds in the watchdog timeout (and 101 * therefore the VWDT). The wdog-app then uses the 102 * LOMIOCDOGCTL ioctl to enable the wdog. This causes 103 * ntwdt to create a Cyclic that will both decrement 104 * the VWDT and check to see if it has expired. To keep 105 * the VWDT from expiring, the wdog-app uses the 106 * LOMIOCDOGPAT ioctl to re-arm (or "pat") the watchdog. 107 * This sets the VWDT value to that specified in the 108 * last LOMIOCDOGTIME ioctl. The wdog-app can use the 109 * LOMIOCDOGSTATE ioctl to query the state of the VWDT. 110 * 111 * The wdog-app can also specify how Recovery is to be 112 * done. The only choice is whether to do a crashdump 113 * or not. If ntwdt computes a VWDT expiration, then 114 * ntwdt initiates the Recovery, else ScApp will. Eg, 115 * a hang in Solaris will be sensed by ScApp and not 116 * ntwdt. The wdog-app specifies the Recovery policy 117 * via the DOGCTL ioctl. 118 * 119 * Timeout Expiration 120 * ------------------ 121 * In our implementation, ScApp senses a watchdog 122 * expiration the same way it historically has: 123 * by reading a well-known area of IOSRAM (SBBC_TOD_KEY) 124 * to see if the timestamp associated with a 125 * Solaris-generated "heartbeat" field is older 126 * than the currently specified timeout (which is 127 * also specified in this same IOSRAM section). 128 * 129 * What is different when ntwdt is running is that 130 * ntwdt is responsible for updating the Heartbeat, 131 * and not the normal client (todsg). When ntwdt 132 * puts the system in AWDT mode, it disables todsg's 133 * updating of the Heartbeat by changing the state of 134 * a pair of kernel tunables (watchdog_activated and 135 * watchdog_enable). ntwdt then takes responsibility 136 * for updating the Heartbeat. It does this by 137 * updating the Heartbeat from the Cyclic that is 138 * created when the user enables the AWDT (DOGCTL) 139 * or specifies a new timeout value (DOGTIME). 140 * 141 * As long as the AWDT is enabled, ntwdt will update 142 * the real system Heartbeat. As a result, ScApp 143 * will conclude that Solaris is still running. If 144 * the user stops re-arming the VWDT or Solaris 145 * hangs (eg), ntwdt will stop updating the Heartbeat. 146 * 147 * Note that ntwdt computes expiration via the 148 * repeatedly firing Cyclic, and ScApp computes 149 * expiration via a cessation of Heartbeat update. 150 * Since Heartbeat update stops once user stops 151 * re-arming the VWDT (ie, DOGPAT ioctl), ntwdt 152 * will compute a timeout at t(x), and ScApp will 153 * compute a timeout at t(2x), where 'x' is the 154 * current timeout value. When ntwdt computes 155 * the expiration, ntwdt masks this asymmetry. 156 * 157 * Lifecycle Events 158 * ---------------- 159 * 160 * ntwdt only handles one of the coarse-grained 161 * "lifecycle events" (eg, entering OBP, shutdown, 162 * power-down, DR) that are possible during a Solaris 163 * session: a panic. (Note that ScApp handles one 164 * of the others: "entering OBP"). Other than these, 165 * a user choosing such a state transition must first 166 * use the wdog-app to disable the watchdog, else 167 * an expiration could occur. 168 * 169 * Solaris handles a panic by registering a handler 170 * that's called during the panic. The handler will 171 * set the watchdog timeout to the value specified 172 * in the NTWDT_BOOT_TIMEOUT_PROP driver Property. 173 * Again, this value should be greater than the actual 174 * Solaris reboot/crashdump latency. 175 * 176 * When the user enters OBP via the System Controller, 177 * ScApp will disable the watchdog (from ScApp's 178 * perspective), but it will not communicate this to 179 * ntwdt. After having exited OBP, the wdog-app can 180 * be used to enable or disable the watchdog (which 181 * will get both ScApp and ntwdt in-sync). 182 * 183 * Locking 184 * ------- 185 * 186 * ntwdt has code running at three interrupt levels as 187 * well as base level. 188 * 189 * The ioctls run at base level in User Context. The 190 * driver's entry points run at base level in Kernel 191 * Context. 192 * 193 * ntwdt's three interrupt levels are used by: 194 * 195 * o LOCK_LEVEL : 196 * the Cyclic used to manage the VWDT is initialized 197 * to CY_LOCK_LEVEL 198 * 199 * o DDI_SOFTINT_MED : 200 * the SBBC mailbox implementation registers the 201 * specified handlers at this level 202 * 203 * o DDI_SOFTINT_LOW : 204 * this level is used by two handlers. One handler 205 * is triggered by the LOCK_LEVEL Cyclic. The other 206 * handler is triggered by the DDI_SOFTINT_MED 207 * handler registered to handle SBBC mailbox events. 208 * 209 * The centralizing concept is that the ntwdt_wdog_mutex 210 * in the driver's softstate is initialized to have an 211 * interrupt-block-cookie corresponding to DDI_SOFTINT_LOW. 212 * 213 * As a result, any base level code grabs ntwdt_wdog_mutex 214 * before doing work. Also, any handler running at interrupt 215 * level higher than DDI_SOFTINT_LOW "posts down" so that 216 * a DDI_SOFTINT_LOW handler is responsible for executing 217 * the "real work". Each DDI_SOFTINT_LOW handler also 218 * first grabs ntwdt_wdog_mutex, and so base level is 219 * synchronized with all interrupt levels. 220 * 221 * Note there's another mutex in the softstate: ntwdt_mutex. 222 * This mutex has few responsibilities. However, this 223 * locking order must be followed: ntwdt_wdog_mutex is 224 * held first, and then ntwdt_mutex. This choice results 225 * from the fact that the number of dynamic call sites 226 * for ntwdt_wdog_mutex is MUCH greater than that of 227 * ntwdt_mutex. As a result, almost all uses of 228 * ntwdt_wdog_mutex do not even require ntwdt_mutex to 229 * be held, which saves resources. 230 * 231 * Driver Properties 232 * ----------------- 233 * 234 * "ddi-forceattach=1;" 235 * ------------------ 236 * 237 * Using this allows our driver to be automatically 238 * loaded at boot-time AND to not be removed from memory 239 * solely due to memory-pressure. 240 * 241 * Being loaded at boot allows ntwdt to (as soon as 242 * possible) tell ScApp of the current mode of the 243 * state-machine (eg, SWDT). This is needed for the case 244 * when Solaris is re-loaded while in AWDT mode; having 245 * Solaris communicate ASAP with ScApp reduces the duration 246 * of any "split-brain" scenario where ScApp and Solaris 247 * are not in the same mode. 248 * 249 * Having ntwdt remain in memory even after a close() 250 * allows ntwdt to answer any SBBC mailbox commands 251 * that ScApp sends (as the mailbox infrastructure is 252 * not torn down until ntwdt is detach()'d). Specifically, 253 * ScApp could be re-loaded after AWDT mode had been 254 * entered and the wdog-app had close()'d ntwdt. ScApp 255 * will then eventually send a LW8_EVENT_SC_RESTARTED 256 * mailbox event in order to learn the current state of 257 * state-machine. Having ntwdt remain loaded allows this 258 * event to never go unanswered. 259 * 260 * "ntwdt-boottimeout=600;" 261 * ---------------------- 262 * 263 * This specifies the watchdog timeout value (in seconds) to 264 * use when ntwdt is aware of the need to reboot/reload Solaris. 265 * 266 * ntwdt will update ScApp by setting the watchdog timeout 267 * to the specified number of seconds when either a) Solaris 268 * panics or b) the VWDT expires. Note that this is only done 269 * if the user has chosen to enable Reset. 270 * 271 * ntwdt boundary-checks the specified value, and if out-of-range, 272 * it initializes the watchdog timeout to a default value of 273 * NTWDT_DEFAULT_BOOT_TIMEOUT seconds. Note that this is a 274 * default value and is not a *minimum* value. The valid range 275 * for the watchdog timeout is between one second and 276 * NTWDT_MAX_TIMEOUT seconds, inclusive. 277 * 278 * If ntwdt-boottimeout is set to a value less than an actual 279 * Solaris boot's latency, ScApp will reset Solaris during boot. 280 * Note that a continuous series of ScApp-induced resets will 281 * not occur; ScApp only resets Solaris on the first transition 282 * into the watchdog-expired state. 283 */ 284 285 #include <sys/note.h> 286 #include <sys/types.h> 287 #include <sys/callb.h> 288 #include <sys/stat.h> 289 #include <sys/conf.h> 290 #include <sys/ddi.h> 291 #include <sys/sunddi.h> 292 #include <sys/modctl.h> 293 #include <sys/ddi_impldefs.h> 294 #include <sys/kmem.h> 295 #include <sys/devops.h> 296 #include <sys/cyclic.h> 297 #include <sys/uadmin.h> 298 #include <sys/lw8_impl.h> 299 #include <sys/sgsbbc.h> 300 #include <sys/sgsbbc_iosram.h> 301 #include <sys/sgsbbc_mailbox.h> 302 #include <sys/todsg.h> 303 #include <sys/mem_config.h> 304 #include <sys/lom_io.h> 305 #include <sys/reboot.h> 306 #include <sys/clock.h> 307 308 309 /* 310 * tunables 311 */ 312 int ntwdt_disable_timeout_action = 0; 313 #ifdef DEBUG 314 /* 315 * tunable to simulate a Solaris hang. If is non-zero, then 316 * no system heartbeats ("hardware patting") will be done, 317 * even though all AWDT machinery is functioning OK. 318 */ 319 int ntwdt_stop_heart; 320 #endif 321 322 /* 323 * Driver Property 324 */ 325 #define NTWDT_BOOT_TIMEOUT_PROP "ntwdt-boottimeout" 326 327 /* 328 * watchdog-timeout values (in seconds): 329 * 330 * NTWDT_DEFAULT_BOOT_TIMEOUT: the default value used if 331 * this driver is aware of the 332 * reboot. 333 * 334 * NTWDT_MAX_TIMEOUT: max value settable by app (via the 335 * LOMIOCDOGTIME ioctl) 336 */ 337 #define NTWDT_DEFAULT_BOOT_TIMEOUT (10*60) 338 #define NTWDT_MAX_TIMEOUT (180*60) 339 340 341 #define NTWDT_CYCLIC_CHK_PERCENT (20) 342 #define NTWDT_MINOR_NODE "awdt" 343 #define OFFSET(base, field) ((char *)&base.field - (char *)&base) 344 345 #define NTWDT_SUCCESS 0 346 #define NTWDT_FAILURE 1 347 348 typedef struct { 349 callb_id_t ntwdt_panic_cb; 350 } ntwdt_callback_ids_t; 351 static ntwdt_callback_ids_t ntwdt_callback_ids; 352 353 /* MBOX_EVENT_LW8 that is sent in IOSRAM Mailbox: */ 354 static lw8_event_t lw8_event; /* payload */ 355 static sbbc_msg_t sbbc_msg; /* message */ 356 357 static ddi_softintr_t ntwdt_mbox_softint_id; 358 static ddi_softintr_t ntwdt_cyclic_softint_id; 359 360 /* 361 * VWDT (i.e., Virtual Watchdog Timer) state 362 */ 363 typedef struct { 364 kmutex_t ntwdt_wdog_mutex; 365 ddi_iblock_cookie_t ntwdt_wdog_mtx_cookie; 366 int ntwdt_wdog_enabled; /* wdog enabled ? */ 367 int ntwdt_reset_enabled; /* reset enabled ? */ 368 int ntwdt_timer_running; /* wdog running ? */ 369 int ntwdt_wdog_expired; /* wdog expired ? */ 370 int ntwdt_is_initial_enable; /* 1st wdog-enable? */ 371 uint32_t ntwdt_boot_timeout; /* timeout for boot */ 372 uint32_t ntwdt_secs_remaining; /* expiration timer */ 373 uint8_t ntwdt_wdog_action; /* Reset action */ 374 uint32_t ntwdt_wdog_timeout; /* timeout in seconds */ 375 hrtime_t ntwdt_cyclic_interval; /* cyclic interval */ 376 cyc_handler_t ntwdt_cycl_hdlr; 377 cyc_time_t ntwdt_cycl_time; 378 kmutex_t ntwdt_event_lock; /* lock */ 379 uint64_t ntwdt_wdog_flags; 380 } ntwdt_wdog_t; 381 382 /* ntwdt_wdog_flags */ 383 #define NTWDT_FLAG_SKIP_CYCLIC 0x1 /* skip next Cyclic */ 384 385 /* macros to set/clear one bit in ntwdt_wdog_flags */ 386 #define NTWDT_FLAG_SET(p, f)\ 387 ((p)->ntwdt_wdog_flags |= NTWDT_FLAG_##f) 388 #define NTWDT_FLAG_CLR(p, f)\ 389 ((p)->ntwdt_wdog_flags &= ~NTWDT_FLAG_##f) 390 391 392 /* softstate */ 393 typedef struct { 394 kmutex_t ntwdt_mutex; 395 dev_info_t *ntwdt_dip; /* dip */ 396 int ntwdt_open_flag; /* file open ? */ 397 ntwdt_wdog_t *ntwdt_wdog_state; /* wdog state */ 398 cyclic_id_t ntwdt_cycl_id; 399 } ntwdt_state_t; 400 401 static void *ntwdt_statep; /* softstate */ 402 static dev_info_t *ntwdt_dip; 403 /* 404 * if non-zero, then the app-wdog feature is available on 405 * this system configuration. 406 */ 407 static int ntwdt_watchdog_available; 408 /* 409 * if non-zero, then application has used the LOMIOCDOGCTL 410 * ioctl at least once in order to Enable the app-wdog. 411 * Also, if this is non-zero, then system is in AWDT mode, 412 * else it is in SWDT mode. 413 */ 414 static int ntwdt_watchdog_activated; 415 416 #define getstate(minor) \ 417 ((ntwdt_state_t *)ddi_get_soft_state(ntwdt_statep, (minor))) 418 419 static int ntwdt_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 420 static int ntwdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 421 static int ntwdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 422 void **result); 423 static int ntwdt_open(dev_t *, int, int, cred_t *); 424 static int ntwdt_close(dev_t, int, int, cred_t *); 425 static int ntwdt_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 426 427 static void ntwdt_reprogram_wd(ntwdt_state_t *); 428 static boolean_t ntwdt_panic_cb(void *arg, int code); 429 static void ntwdt_start_timer(ntwdt_state_t *); 430 static void ntwdt_stop_timer(void *); 431 static void ntwdt_stop_timer_lock(void *arg); 432 static void ntwdt_add_callbacks(ntwdt_state_t *ntwdt_ptr); 433 static void ntwdt_remove_callbacks(); 434 static void ntwdt_cyclic_pat(void *arg); 435 static void ntwdt_enforce_timeout(); 436 static void ntwdt_pat_hw_watchdog(); 437 static int ntwdt_set_cfgvar(int var, int val); 438 static void ntwdt_set_cfgvar_noreply(int var, int val); 439 static int ntwdt_read_props(ntwdt_state_t *); 440 static int ntwdt_add_mbox_handlers(ntwdt_state_t *); 441 static int ntwdt_set_hw_timeout(uint32_t period); 442 static int ntwdt_remove_mbox_handlers(void); 443 static uint_t ntwdt_event_data_handler(char *arg); 444 static uint_t ntwdt_mbox_softint(char *arg); 445 static uint_t ntwdt_cyclic_softint(char *arg); 446 static int ntwdt_lomcmd(int cmd, intptr_t arg); 447 static int ntwdt_chk_wdog_support(); 448 static int ntwdt_chk_sc_support(); 449 static int ntwdt_set_swdt_state(); 450 static void ntwdt_swdt_to_awdt(ntwdt_wdog_t *); 451 static void ntwdt_arm_vwdt(ntwdt_wdog_t *wdog_state); 452 #ifdef DEBUG 453 static int ntwdt_get_cfgvar(int var, int *val); 454 #endif 455 456 struct cb_ops ntwdt_cb_ops = { 457 ntwdt_open, /* open */ 458 ntwdt_close, /* close */ 459 nulldev, /* strategy */ 460 nulldev, /* print */ 461 nulldev, /* dump */ 462 nulldev, /* read */ 463 nulldev, /* write */ 464 ntwdt_ioctl, /* ioctl */ 465 nulldev, /* devmap */ 466 nulldev, /* mmap */ 467 nulldev, /* segmap */ 468 nochpoll, /* poll */ 469 ddi_prop_op, /* cb_prop_op */ 470 NULL, /* streamtab */ 471 D_MP | D_NEW 472 }; 473 474 static struct dev_ops ntwdt_ops = { 475 DEVO_REV, /* Devo_rev */ 476 0, /* Refcnt */ 477 ntwdt_info, /* Info */ 478 nulldev, /* Identify */ 479 nulldev, /* Probe */ 480 ntwdt_attach, /* Attach */ 481 ntwdt_detach, /* Detach */ 482 nodev, /* Reset */ 483 &ntwdt_cb_ops, /* Driver operations */ 484 0, /* Bus operations */ 485 NULL /* Power */ 486 }; 487 488 static struct modldrv modldrv = { 489 &mod_driverops, /* This one is a driver */ 490 "ntwdt-Netra-T12 v%I%", /* Name of the module. */ 491 &ntwdt_ops, /* Driver ops */ 492 }; 493 494 static struct modlinkage modlinkage = { 495 MODREV_1, (void *)&modldrv, NULL 496 }; 497 498 499 /* 500 * Flags to set in ntwdt_debug. 501 * 502 * Use either the NTWDT_DBG or NTWDT_NDBG macros 503 */ 504 #define WDT_DBG_ENTRY 0x00000001 /* drv entry points */ 505 #define WDT_DBG_HEART 0x00000002 /* system heartbeat */ 506 #define WDT_DBG_VWDT 0x00000004 /* virtual WDT */ 507 #define WDT_DBG_EVENT 0x00000010 /* SBBC Mbox events */ 508 #define WDT_DBG_PROT 0x00000020 /* SC/Solaris protocol */ 509 #define WDT_DBG_IOCTL 0x00000040 /* ioctl's */ 510 511 uint64_t ntwdt_debug; /* enables tracing of module's activity */ 512 513 /* used in non-debug version of module */ 514 #define NTWDT_NDBG(flag, msg) { if ((ntwdt_debug & (flag)) != 0) \ 515 (void) printf msg; } 516 517 #ifdef DEBUG 518 typedef struct { 519 uint32_t ntwdt_wd1; 520 uint8_t ntwdt_wd2; 521 } ntwdt_data_t; 522 523 #define NTWDTIOCSTATE _IOWR('a', 0xa, ntwdt_data_t) 524 #define NTWDTIOCPANIC _IOR('a', 0xb, uint32_t) 525 526 /* used in debug version of module */ 527 #define NTWDT_DBG(flag, msg) { if ((ntwdt_debug & (flag)) != 0) \ 528 (void) printf msg; } 529 #else 530 #define NTWDT_DBG(flag, msg) 531 #endif 532 533 534 int 535 _init(void) 536 { 537 int error = 0; 538 539 NTWDT_DBG(WDT_DBG_ENTRY, ("_init")); 540 541 /* Initialize the soft state structures */ 542 if ((error = ddi_soft_state_init(&ntwdt_statep, 543 sizeof (ntwdt_state_t), 1)) != 0) { 544 return (error); 545 } 546 547 /* Install the loadable module */ 548 if ((error = mod_install(&modlinkage)) != 0) { 549 ddi_soft_state_fini(&ntwdt_statep); 550 } 551 return (error); 552 } 553 554 int 555 _info(struct modinfo *modinfop) 556 { 557 NTWDT_DBG(WDT_DBG_ENTRY, ("_info")); 558 559 return (mod_info(&modlinkage, modinfop)); 560 } 561 562 int 563 _fini(void) 564 { 565 int error; 566 567 NTWDT_DBG(WDT_DBG_ENTRY, ("_fini")); 568 569 error = mod_remove(&modlinkage); 570 if (error == 0) { 571 ddi_soft_state_fini(&ntwdt_statep); 572 } 573 574 return (error); 575 } 576 577 static int 578 ntwdt_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 579 { 580 int instance; 581 ntwdt_state_t *ntwdt_ptr = NULL; 582 ntwdt_wdog_t *wdog_state = NULL; 583 cyc_handler_t *hdlr = NULL; 584 585 NTWDT_DBG(WDT_DBG_ENTRY, ("attach: dip/cmd: 0x%p/%d", 586 dip, cmd)); 587 588 switch (cmd) { 589 case DDI_ATTACH: 590 break; 591 592 case DDI_RESUME: 593 return (DDI_SUCCESS); 594 595 default: 596 return (DDI_FAILURE); 597 } 598 599 /* see if app-wdog is supported on our config */ 600 if (ntwdt_chk_wdog_support() != 0) 601 return (DDI_FAILURE); 602 603 /* (unsolicitedly) send SWDT state to ScApp via mailbox */ 604 ntwdt_set_swdt_state(); 605 606 instance = ddi_get_instance(dip); 607 ASSERT(instance == 0); 608 609 if (ddi_soft_state_zalloc(ntwdt_statep, instance) 610 != DDI_SUCCESS) { 611 return (DDI_FAILURE); 612 } 613 ntwdt_ptr = ddi_get_soft_state(ntwdt_statep, instance); 614 ASSERT(ntwdt_ptr != NULL); 615 616 ntwdt_dip = dip; 617 618 ntwdt_ptr->ntwdt_dip = dip; 619 ntwdt_ptr->ntwdt_cycl_id = CYCLIC_NONE; 620 mutex_init(&ntwdt_ptr->ntwdt_mutex, NULL, 621 MUTEX_DRIVER, NULL); 622 623 /* 624 * Initialize the watchdog structure 625 */ 626 ntwdt_ptr->ntwdt_wdog_state = 627 kmem_zalloc(sizeof (ntwdt_wdog_t), KM_SLEEP); 628 wdog_state = ntwdt_ptr->ntwdt_wdog_state; 629 630 /* 631 * Create an iblock-cookie so that ntwdt_wdog_mutex can be 632 * used at User Context and Interrupt Context. 633 */ 634 if (ddi_get_soft_iblock_cookie(dip, DDI_SOFTINT_LOW, 635 &wdog_state->ntwdt_wdog_mtx_cookie) != DDI_SUCCESS) { 636 cmn_err(CE_WARN, "init of iblock cookie failed " 637 "for ntwdt_wdog_mutex"); 638 goto err1; 639 } else { 640 mutex_init(&wdog_state->ntwdt_wdog_mutex, NULL, MUTEX_DRIVER, 641 (void *)wdog_state->ntwdt_wdog_mtx_cookie); 642 } 643 644 mutex_init(&wdog_state->ntwdt_event_lock, NULL, 645 MUTEX_DRIVER, NULL); 646 647 /* Cyclic fires once per second: */ 648 wdog_state->ntwdt_cyclic_interval = NANOSEC; 649 650 /* interpret our .conf file. */ 651 (void) ntwdt_read_props(ntwdt_ptr); 652 653 /* init the Cyclic that drives the VWDT */ 654 hdlr = &wdog_state->ntwdt_cycl_hdlr; 655 hdlr->cyh_level = CY_LOCK_LEVEL; 656 hdlr->cyh_func = ntwdt_cyclic_pat; 657 hdlr->cyh_arg = (void *)ntwdt_ptr; 658 659 /* Register handler for SBBC Mailbox events */ 660 if (ntwdt_add_mbox_handlers(ntwdt_ptr) != DDI_SUCCESS) 661 goto err2; 662 663 /* Softint that will be triggered by Cyclic that drives VWDT */ 664 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &ntwdt_cyclic_softint_id, 665 NULL, NULL, ntwdt_cyclic_softint, (caddr_t)ntwdt_ptr) 666 != DDI_SUCCESS) { 667 cmn_err(CE_WARN, "failed to add cyclic softintr"); 668 goto err3; 669 } 670 671 /* Register callbacks for various system events, e.g. panic */ 672 ntwdt_add_callbacks(ntwdt_ptr); 673 674 /* 675 * Create Minor Node as last activity. This prevents 676 * application from accessing our implementation until it 677 * is initialized. 678 */ 679 if (ddi_create_minor_node(dip, NTWDT_MINOR_NODE, S_IFCHR, 0, 680 DDI_PSEUDO, NULL) == DDI_FAILURE) { 681 cmn_err(CE_WARN, "failed to create Minor Node: %s", 682 NTWDT_MINOR_NODE); 683 goto err4; 684 } 685 686 /* Display our driver info in the banner */ 687 ddi_report_dev(dip); 688 689 return (DDI_SUCCESS); 690 691 err4: 692 ntwdt_remove_callbacks(); 693 ddi_remove_softintr(ntwdt_cyclic_softint_id); 694 err3: 695 ntwdt_remove_mbox_handlers(); 696 err2: 697 mutex_destroy(&wdog_state->ntwdt_event_lock); 698 mutex_destroy(&wdog_state->ntwdt_wdog_mutex); 699 err1: 700 kmem_free(wdog_state, sizeof (ntwdt_wdog_t)); 701 ntwdt_ptr->ntwdt_wdog_state = NULL; 702 703 mutex_destroy(&ntwdt_ptr->ntwdt_mutex); 704 ddi_soft_state_free(ntwdt_statep, instance); 705 706 ntwdt_dip = NULL; 707 708 return (DDI_FAILURE); 709 } 710 711 /* 712 * Do static checks to see if the app-wdog feature is supported in 713 * the current configuration. 714 * 715 * If the kernel debugger was booted, then we disallow the app-wdog 716 * feature, as we assume the user will be interested more in 717 * debuggability of system than its ability to support an app-wdog. 718 * (Note that the System Watchdog (SWDT) can still be available). 719 * 720 * If the currently loaded version of ScApp does not understand one 721 * of the IOSRAM mailbox messages that is specific to the app-wdog 722 * protocol, then we disallow use of the app-wdog feature (else 723 * we could have a "split-brain" scenario where Solaris supports 724 * app-wdog but ScApp doesn't). 725 * 726 * Note that there is no *dynamic* checking of whether ScApp supports 727 * the wdog protocol. Eg, if a new version of ScApp was loaded out 728 * from under Solaris, then once in AWDT mode, Solaris has no way 729 * of knowing that (a possibly older version of) ScApp was loaded. 730 */ 731 static int 732 ntwdt_chk_wdog_support() 733 { 734 int retval = ENOTSUP; 735 int rv; 736 737 if ((boothowto & RB_DEBUG) != 0) { 738 cmn_err(CE_WARN, "kernel debugger was booted; " 739 "application watchdog is not available."); 740 return (retval); 741 } 742 743 /* 744 * if ScApp does not support the MBOX_GET cmd, then 745 * it does not support the app-wdog feature. Also, 746 * if there is *any* type of SBBC Mailbox error at 747 * this point, we will disable the app watchdog 748 * feature. 749 */ 750 if ((rv = ntwdt_chk_sc_support()) != 0) { 751 if (rv == EINVAL) 752 cmn_err(CE_WARN, "ScApp does not support " 753 "the application watchdog feature."); 754 else 755 cmn_err(CE_WARN, "SBBC mailbox had error;" 756 "application watchdog is not available."); 757 retval = rv; 758 } else { 759 ntwdt_watchdog_available = 1; 760 retval = 0; 761 } 762 763 NTWDT_DBG(WDT_DBG_PROT, ("app-wdog is %savailable", 764 (ntwdt_watchdog_available != 0) ? "" : "not ")); 765 766 return (retval); 767 } 768 769 /* 770 * Check to see if ScApp supports the app-watchdog feature. 771 * 772 * Do this by sending one of the mailbox commands that is 773 * specific to the app-wdog protocol. If ScApp does not 774 * return an error code, we will assume it understands it 775 * (as well as the remainder of the app-wdog protocol). 776 * 777 * Notes: 778 * ntwdt_lomcmd() will return EINVAL if ScApp does not 779 * understand the message. The underlying sbbc_mbox_ 780 * utility function returns SG_MBOX_STATUS_ILLEGAL_PARAMETER 781 * ("illegal ioctl parameter"). 782 */ 783 static int 784 ntwdt_chk_sc_support() 785 { 786 lw8_get_wdt_t get_wdt; 787 788 return (ntwdt_lomcmd(LW8_MBOX_WDT_GET, (intptr_t)&get_wdt)); 789 } 790 791 static int 792 ntwdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 793 { 794 int instance = ddi_get_instance(dip); 795 ntwdt_state_t *ntwdt_ptr = NULL; 796 797 NTWDT_DBG(WDT_DBG_ENTRY, ("detach: dip/cmd: 0x%p/%d", 798 dip, cmd)); 799 800 ntwdt_ptr = ddi_get_soft_state(ntwdt_statep, instance); 801 if (ntwdt_ptr == NULL) { 802 return (DDI_FAILURE); 803 } 804 805 switch (cmd) { 806 case DDI_SUSPEND: 807 return (DDI_SUCCESS); 808 809 case DDI_DETACH: 810 /* 811 * release resources in opposite (LIFO) order as 812 * were allocated in attach(9f). 813 */ 814 ddi_remove_minor_node(dip, NULL); 815 816 ntwdt_stop_timer_lock((void *)ntwdt_ptr); 817 818 ntwdt_remove_callbacks(ntwdt_ptr); 819 820 ddi_remove_softintr(ntwdt_cyclic_softint_id); 821 822 ntwdt_remove_mbox_handlers(); 823 824 mutex_destroy(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_event_lock); 825 mutex_destroy(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex); 826 kmem_free(ntwdt_ptr->ntwdt_wdog_state, 827 sizeof (ntwdt_wdog_t)); 828 ntwdt_ptr->ntwdt_wdog_state = NULL; 829 830 mutex_destroy(&ntwdt_ptr->ntwdt_mutex); 831 832 ddi_soft_state_free(ntwdt_statep, instance); 833 834 ntwdt_dip = NULL; 835 return (DDI_SUCCESS); 836 837 default: 838 return (DDI_FAILURE); 839 } 840 } 841 842 /* 843 * Register the SBBC Mailbox handlers. 844 * 845 * Currently, only one handler is used. It processes the MBOX_EVENT_LW8 846 * Events that are sent by ScApp. Of the Events that are sent, only 847 * the Event declaring that ScApp is coming up from a reboot 848 * (LW8_EVENT_SC_RESTARTED) is processed. 849 * 850 * sbbc_mbox_reg_intr registers the handler so that it executes at 851 * a DDI_SOFTINT_MED priority. 852 */ 853 static int 854 ntwdt_add_mbox_handlers(ntwdt_state_t *ntwdt_ptr) 855 { 856 int err; 857 858 /* 859 * We need two interrupt handlers to handle the SBBC mbox 860 * events. The sbbc_mbox_xxx implementation will 861 * trigger our ntwdt_event_data_handler, which itself will 862 * trigger our ntwdt_mbox_softint. As a result, we'll 863 * register ntwdt_mbox_softint first, to ensure it cannot 864 * be called (until its caller, ntwdt_event_data_handler) 865 * is registered. 866 */ 867 868 /* 869 * add the softint that will do the real work of handling the 870 * LW8_SC_RESTARTED_EVENT sent from ScApp. 871 */ 872 if (ddi_add_softintr(ntwdt_ptr->ntwdt_dip, DDI_SOFTINT_LOW, 873 &ntwdt_mbox_softint_id, NULL, NULL, ntwdt_mbox_softint, 874 (caddr_t)ntwdt_ptr) != DDI_SUCCESS) { 875 cmn_err(CE_WARN, "Failed to add MBOX_EVENT_LW8 softintr"); 876 return (DDI_FAILURE); 877 } 878 879 /* 880 * Register an interrupt handler with the SBBC mailbox utility. 881 * This handler will get called on each event of each type of 882 * MBOX_EVENT_LW8 events. However, it will only conditionally 883 * trigger the worker-handler (ntwdt_mbox_softintr). 884 */ 885 sbbc_msg.msg_buf = (caddr_t)&lw8_event; 886 sbbc_msg.msg_len = sizeof (lw8_event); 887 888 err = sbbc_mbox_reg_intr(MBOX_EVENT_LW8, ntwdt_event_data_handler, 889 &sbbc_msg, NULL, &ntwdt_ptr->ntwdt_wdog_state->ntwdt_event_lock); 890 if (err != 0) { 891 cmn_err(CE_WARN, "Failed to register SBBC MBOX_EVENT_LW8" 892 " handler. err=%d", err); 893 894 ddi_remove_softintr(ntwdt_mbox_softint_id); 895 return (DDI_FAILURE); 896 } 897 898 return (DDI_SUCCESS); 899 } 900 901 /* 902 * Unregister the SBBC Mailbox handlers that were registered 903 * by ntwdt_add_mbox_handlers. 904 */ 905 static int 906 ntwdt_remove_mbox_handlers(void) 907 { 908 int rv = DDI_SUCCESS; 909 int err; 910 911 /* 912 * unregister the two handlers that cooperate to handle 913 * the LW8_SC_RESTARTED_EVENT. Note that they are unregistered 914 * in LIFO order (as compared to how they were registered). 915 */ 916 err = sbbc_mbox_unreg_intr(MBOX_EVENT_LW8, ntwdt_event_data_handler); 917 if (err != 0) { 918 cmn_err(CE_WARN, "Failed to unregister sbbc MBOX_EVENT_LW8 " 919 "handler. Err=%d", err); 920 rv = DDI_FAILURE; 921 } 922 923 /* remove the associated softint */ 924 ddi_remove_softintr(ntwdt_mbox_softint_id); 925 926 return (rv); 927 } 928 929 _NOTE(ARGSUSED(0)) 930 static int 931 ntwdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, 932 void *arg, void **result) 933 { 934 dev_t dev; 935 int instance; 936 int error = DDI_SUCCESS; 937 938 if (result == NULL) 939 return (DDI_FAILURE); 940 941 switch (infocmd) { 942 case DDI_INFO_DEVT2DEVINFO: 943 dev = (dev_t)arg; 944 if (getminor(dev) == 0) 945 *result = (void *)ntwdt_dip; 946 else 947 error = DDI_FAILURE; 948 break; 949 950 case DDI_INFO_DEVT2INSTANCE: 951 dev = (dev_t)arg; 952 instance = getminor(dev); 953 *result = (void *)(uintptr_t)instance; 954 break; 955 956 default: 957 error = DDI_FAILURE; 958 } 959 960 return (error); 961 } 962 963 /* 964 * Open the device this driver manages. 965 * 966 * Ensure the caller is a privileged process, else 967 * a non-privileged user could cause denial-of-service 968 * and/or negatively impact reliability/availability. 969 * 970 * Ensure there is only one concurrent open(). 971 */ 972 _NOTE(ARGSUSED(1)) 973 static int 974 ntwdt_open(dev_t *devp, int flag, int otyp, cred_t *credp) 975 { 976 int inst = getminor(*devp); 977 int ret = 0; 978 ntwdt_state_t *ntwdt_ptr = getstate(inst); 979 980 NTWDT_DBG(WDT_DBG_ENTRY, ("open: inst/soft: %d/0x%p", 981 inst, ntwdt_ptr)); 982 983 /* ensure caller is a privileged process */ 984 if (drv_priv(credp) != 0) 985 return (EPERM); 986 987 /* 988 * Check for a Deferred Attach scenario. 989 * Return ENXIO so DDI framework will call 990 * attach() and then retry the open(). 991 */ 992 if (ntwdt_ptr == NULL) 993 return (ENXIO); 994 995 mutex_enter(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex); 996 mutex_enter(&ntwdt_ptr->ntwdt_mutex); 997 if (ntwdt_ptr->ntwdt_open_flag != 0) 998 ret = EAGAIN; 999 else 1000 ntwdt_ptr->ntwdt_open_flag = 1; 1001 mutex_exit(&ntwdt_ptr->ntwdt_mutex); 1002 mutex_exit(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex); 1003 1004 return (ret); 1005 } 1006 1007 /* 1008 * Close the device this driver manages. 1009 * 1010 * Notes: 1011 * 1012 * The close() can happen while the AWDT is running ! 1013 * (and nothing is done, eg, to disable the watchdog 1014 * or to stop updating the system heartbeat). This 1015 * is the desired behavior, as this allows for the 1016 * case of monitoring a Solaris reboot in terms 1017 * of watchdog expiration. 1018 */ 1019 _NOTE(ARGSUSED(1)) 1020 static int 1021 ntwdt_close(dev_t dev, int flag, int otyp, cred_t *credp) 1022 { 1023 int inst = getminor(dev); 1024 ntwdt_state_t *ntwdt_ptr = getstate(inst); 1025 1026 NTWDT_DBG(WDT_DBG_ENTRY, ("close: inst/soft: %d/0x%p", 1027 inst, ntwdt_ptr)); 1028 1029 if (ntwdt_ptr == NULL) 1030 return (ENXIO); 1031 1032 mutex_enter(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex); 1033 mutex_enter(&ntwdt_ptr->ntwdt_mutex); 1034 if (ntwdt_ptr->ntwdt_open_flag != 0) { 1035 ntwdt_ptr->ntwdt_open_flag = 0; 1036 } 1037 mutex_exit(&ntwdt_ptr->ntwdt_mutex); 1038 mutex_exit(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex); 1039 1040 return (0); 1041 } 1042 1043 _NOTE(ARGSUSED(4)) 1044 static int 1045 ntwdt_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 1046 cred_t *credp, int *rvalp) 1047 { 1048 int inst = getminor(dev); 1049 int retval = 0; 1050 ntwdt_state_t *ntwdt_ptr = NULL; 1051 ntwdt_wdog_t *wdog_state; 1052 1053 if ((ntwdt_ptr = getstate(inst)) == NULL) 1054 return (ENXIO); 1055 1056 /* Only allow ioctl's if Solaris/ScApp support app-wdog */ 1057 if (ntwdt_watchdog_available == 0) 1058 return (ENXIO); 1059 1060 wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1061 1062 switch (cmd) { 1063 case LOMIOCDOGSTATE: { 1064 /* 1065 * Return the state of the AWDT to the application. 1066 */ 1067 lom_dogstate_t lom_dogstate; 1068 1069 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1070 lom_dogstate.reset_enable = 1071 wdog_state->ntwdt_reset_enabled; 1072 lom_dogstate.dog_enable = 1073 wdog_state->ntwdt_wdog_enabled; 1074 lom_dogstate.dog_timeout = 1075 wdog_state->ntwdt_wdog_timeout; 1076 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1077 1078 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGSTATE: wdog/reset/timeout:" 1079 " %d/%d/%d", lom_dogstate.dog_enable, 1080 lom_dogstate.reset_enable, lom_dogstate.dog_timeout)); 1081 1082 if (ddi_copyout((caddr_t)&lom_dogstate, (caddr_t)arg, 1083 sizeof (lom_dogstate_t), mode) != 0) { 1084 retval = EFAULT; 1085 } 1086 break; 1087 } 1088 1089 case LOMIOCDOGCTL: { 1090 /* 1091 * Allow application to control whether watchdog 1092 * is {dis,en}abled and whether Reset is 1093 * {dis,en}abled. 1094 */ 1095 lom_dogctl_t lom_dogctl; 1096 1097 if (ddi_copyin((caddr_t)arg, (caddr_t)&lom_dogctl, 1098 sizeof (lom_dogctl_t), mode) != 0) { 1099 retval = EFAULT; 1100 break; 1101 } 1102 1103 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGCTL: wdog/reset:" 1104 " %d/%d", lom_dogctl.dog_enable, 1105 lom_dogctl.reset_enable)); 1106 1107 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1108 1109 if (wdog_state->ntwdt_wdog_timeout == 0) { 1110 /* 1111 * then LOMIOCDOGTIME has never been used 1112 * to setup a valid timeout. 1113 */ 1114 retval = EINVAL; 1115 goto end; 1116 } 1117 1118 /* 1119 * Return error for the non-sensical combination: 1120 * "enable Reset" and "disable watchdog". 1121 */ 1122 if (lom_dogctl.dog_enable == 0 && 1123 lom_dogctl.reset_enable != 0) { 1124 retval = EINVAL; 1125 goto end; 1126 } 1127 1128 /* 1129 * Store the user-specified state in our softstate. 1130 * Note that our implementation here is stateless. 1131 * Eg, we do not disallow an "enable the watchdog" 1132 * command when the watchdog is currently enabled. 1133 * This is needed (at least in the case) when 1134 * the user enters OBP via ScApp/lom. In that case, 1135 * ScApp disables the watchdog, but does not inform 1136 * Solaris. As a result, an ensuing, unfiltered DOGCTL 1137 * to enable the watchdog is required. 1138 */ 1139 wdog_state->ntwdt_reset_enabled = 1140 lom_dogctl.reset_enable; 1141 wdog_state->ntwdt_wdog_enabled = 1142 lom_dogctl.dog_enable; 1143 1144 if (wdog_state->ntwdt_wdog_enabled != 0) { 1145 /* 1146 * then user wants to enable watchdog. 1147 * Arm the watchdog timer and start the 1148 * Cyclic, if it is not running. 1149 */ 1150 ntwdt_arm_vwdt(wdog_state); 1151 1152 if (wdog_state->ntwdt_timer_running == 0) { 1153 ntwdt_start_timer(ntwdt_ptr); 1154 } 1155 } else { 1156 /* 1157 * user wants to disable the watchdog. 1158 * Note that we do not set ntwdt_secs_remaining 1159 * to zero; that could cause a false expiration. 1160 */ 1161 if (wdog_state->ntwdt_timer_running != 0) { 1162 ntwdt_stop_timer(ntwdt_ptr); 1163 } 1164 } 1165 1166 /* 1167 * Send a permutation of mailbox commands to 1168 * ScApp that describes the current state of the 1169 * watchdog timer. Note that the permutation 1170 * depends on whether this is the first 1171 * Enabling of the watchdog or not. 1172 */ 1173 if (wdog_state->ntwdt_wdog_enabled != 0 && 1174 wdog_state->ntwdt_is_initial_enable == 0) { 1175 1176 /* switch from SWDT to AWDT mode */ 1177 ntwdt_swdt_to_awdt(wdog_state); 1178 1179 /* Tell ScApp we're in AWDT mode */ 1180 ntwdt_set_cfgvar(LW8_WDT_PROP_MODE, 1181 LW8_PROP_MODE_AWDT); 1182 } 1183 1184 /* Inform ScApp of the choices made by the app */ 1185 ntwdt_set_cfgvar(LW8_WDT_PROP_WDT, 1186 wdog_state->ntwdt_wdog_enabled); 1187 ntwdt_set_cfgvar(LW8_WDT_PROP_RECOV, 1188 wdog_state->ntwdt_reset_enabled); 1189 1190 if (wdog_state->ntwdt_wdog_enabled != 0 && 1191 wdog_state->ntwdt_is_initial_enable == 0) { 1192 /* 1193 * Clear tod_iosram_t.tod_timeout_period, 1194 * which is used in SWDT part of state 1195 * machine. (If this field is non-zero, 1196 * ScApp assumes that Solaris' SWDT is active). 1197 * 1198 * Clearing this is useful in case SC reboots 1199 * while Solaris is running, as ScApp will read 1200 * a zero and not assume SWDT is running. 1201 */ 1202 ntwdt_set_hw_timeout(0); 1203 1204 /* "the first watchdog-enable has been seen" */ 1205 wdog_state->ntwdt_is_initial_enable = 1; 1206 } 1207 1208 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1209 break; 1210 } 1211 1212 case LOMIOCDOGTIME: { 1213 /* 1214 * Allow application to set the period (in seconds) 1215 * of the watchdog timeout. 1216 */ 1217 uint32_t lom_dogtime; 1218 1219 if (ddi_copyin((caddr_t)arg, (caddr_t)&lom_dogtime, 1220 sizeof (uint32_t), mode) != 0) { 1221 retval = EFAULT; 1222 break; 1223 } 1224 1225 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGTIME: %u seconds", 1226 lom_dogtime)); 1227 1228 /* Ensure specified timeout is within range. */ 1229 if ((lom_dogtime == 0) || 1230 (lom_dogtime > NTWDT_MAX_TIMEOUT)) { 1231 retval = EINVAL; 1232 break; 1233 } 1234 1235 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1236 1237 wdog_state->ntwdt_wdog_timeout = lom_dogtime; 1238 1239 /* 1240 * If watchdog is currently running, re-arm the 1241 * watchdog timeout with the specified value. 1242 */ 1243 if (wdog_state->ntwdt_timer_running != 0) { 1244 ntwdt_arm_vwdt(wdog_state); 1245 } 1246 1247 /* Tell ScApp of the specified timeout */ 1248 ntwdt_set_cfgvar(LW8_WDT_PROP_TO, lom_dogtime); 1249 1250 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1251 break; 1252 } 1253 1254 case LOMIOCDOGPAT: { 1255 /* 1256 * Allow user to re-arm ("pat") the watchdog. 1257 */ 1258 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGPAT")); 1259 1260 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1261 1262 /* 1263 * If watchdog is not enabled or underlying 1264 * Cyclic timer is not running, exit. 1265 */ 1266 if (!(wdog_state->ntwdt_wdog_enabled && 1267 wdog_state->ntwdt_timer_running)) 1268 goto end; 1269 1270 if (wdog_state->ntwdt_wdog_expired == 0) { 1271 /* then VWDT has not expired; re-arm it */ 1272 ntwdt_arm_vwdt(wdog_state); 1273 1274 NTWDT_DBG(WDT_DBG_VWDT, ("VWDT re-armed:" 1275 " %d seconds", 1276 wdog_state->ntwdt_secs_remaining)); 1277 } 1278 1279 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1280 break; 1281 } 1282 1283 #ifdef DEBUG 1284 case NTWDTIOCPANIC: { 1285 /* 1286 * Use in unit/integration testing to test our 1287 * panic-handler code. 1288 */ 1289 cmn_err(CE_PANIC, "NTWDTIOCPANIC: force a panic"); 1290 break; 1291 } 1292 1293 case NTWDTIOCSTATE: { 1294 /* 1295 * Allow application to read wdog state from the 1296 * SC (and *not* the driver's softstate). 1297 * 1298 * Return state of: 1299 * o recovery-enabled 1300 * o current timeout value 1301 */ 1302 ntwdt_data_t ntwdt_data; 1303 int action; 1304 int timeout; 1305 int ret; 1306 1307 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1308 ret = ntwdt_get_cfgvar(LW8_WDT_PROP_TO, &timeout); 1309 ret |= ntwdt_get_cfgvar(LW8_WDT_PROP_RECOV, &action); 1310 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1311 1312 bzero((caddr_t)&ntwdt_data, sizeof (ntwdt_data)); 1313 1314 if (ret != NTWDT_SUCCESS) { 1315 retval = EIO; 1316 break; 1317 } 1318 1319 NTWDT_DBG(WDT_DBG_IOCTL, ("NTWDTIOCSTATE:" 1320 " timeout/action: %d/%d", timeout, action)); 1321 1322 ntwdt_data.ntwdt_wd1 = (uint32_t)timeout; 1323 ntwdt_data.ntwdt_wd2 = (uint8_t)action; 1324 1325 if (ddi_copyout((caddr_t)&ntwdt_data, (caddr_t)arg, 1326 sizeof (ntwdt_data_t), mode) != 0) { 1327 retval = EFAULT; 1328 } 1329 break; 1330 } 1331 #endif 1332 default: 1333 retval = EINVAL; 1334 break; 1335 } 1336 1337 return (retval); 1338 end: 1339 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1340 return (retval); 1341 } 1342 1343 /* 1344 * Arm the Virtual Watchdog Timer (VWDT). 1345 * 1346 * Assign the current watchdog timeout (ntwdt_wdog_timeout) 1347 * to the softstate variable representing the watchdog 1348 * timer (ntwdt_secs_remaining). 1349 * 1350 * To ensure (from ntwdt's perspective) that any actual 1351 * timeout expiration is at least as large as the expected 1352 * timeout, conditionally set/clear a bit that will be 1353 * checked in the Cyclic's softint. 1354 * 1355 * If the Cyclic has been started, the goal is to ignore 1356 * the _next_ firing of the Cyclic, as that firing will 1357 * NOT represent a full, one-second period. If the Cyclic 1358 * has NOT been started yet, then do not ignore the next 1359 * Cyclic's firing, as that's the First One, and it was 1360 * programmed to fire at a specific time (see ntwdt_start_timer). 1361 */ 1362 static void 1363 ntwdt_arm_vwdt(ntwdt_wdog_t *wdog_state) 1364 { 1365 /* arm the watchdog timer (VWDT) */ 1366 wdog_state->ntwdt_secs_remaining = 1367 wdog_state->ntwdt_wdog_timeout; 1368 1369 if (wdog_state->ntwdt_timer_running != 0) 1370 NTWDT_FLAG_SET(wdog_state, SKIP_CYCLIC); 1371 else 1372 NTWDT_FLAG_CLR(wdog_state, SKIP_CYCLIC); 1373 } 1374 1375 /* 1376 * Switch from SWDT mode to AWDT mode. 1377 */ 1378 _NOTE(ARGSUSED(0)) 1379 static void 1380 ntwdt_swdt_to_awdt(ntwdt_wdog_t *wdog_state) 1381 { 1382 ASSERT(wdog_state->ntwdt_is_initial_enable == 0); 1383 1384 /* 1385 * Disable SWDT. If SWDT is currently active, 1386 * display a message so user knows that SWDT Mode 1387 * has terminated. 1388 */ 1389 if (watchdog_enable != 0 || 1390 watchdog_activated != 0) 1391 cmn_err(CE_NOTE, "Hardware watchdog disabled"); 1392 watchdog_enable = 0; 1393 watchdog_activated = 0; 1394 1395 /* "we are in AWDT mode" */ 1396 ntwdt_watchdog_activated = 1; 1397 NTWDT_DBG(WDT_DBG_VWDT, ("AWDT is enabled")); 1398 } 1399 1400 /* 1401 * This is the Cyclic that runs at a multiple of the 1402 * AWDT's watchdog-timeout period. This Cyclic runs at 1403 * LOCK_LEVEL (eg, CY_LOCK_LEVEL) and will post a 1404 * soft-interrupt in order to complete all processing. 1405 * 1406 * Executing at LOCK_LEVEL gives this function a high 1407 * interrupt priority, while performing its work via 1408 * a soft-interrupt allows for a consistent (eg, MT-safe) 1409 * view of driver softstate between User and Interrupt 1410 * context. 1411 * 1412 * Context: 1413 * interrupt context: Cyclic framework calls at 1414 * CY_LOCK_LEVEL (=> 10) 1415 */ 1416 _NOTE(ARGSUSED(0)) 1417 static void 1418 ntwdt_cyclic_pat(void *arg) 1419 { 1420 /* post-down to DDI_SOFTINT_LOW */ 1421 ddi_trigger_softintr(ntwdt_cyclic_softint_id); 1422 } 1423 1424 /* 1425 * This is the soft-interrupt triggered by the AWDT 1426 * Cyclic. 1427 * 1428 * This softint does all the work re: computing whether 1429 * the VWDT expired. It grabs ntwdt_wdog_mutex 1430 * so User Context code (eg, the IOCTLs) cannot run, 1431 * and then it tests whether the VWDT expired. If it 1432 * hasn't, it decrements the VWDT timer by the amount 1433 * of the Cyclic's period. If the timer has expired, 1434 * it initiates Recovery (based on what user specified 1435 * in LOMIOCDOGCTL). 1436 * 1437 * This function also updates the normal system "heartbeat". 1438 * 1439 * Context: 1440 * interrupt-context: DDI_SOFTINT_LOW 1441 */ 1442 static uint_t 1443 ntwdt_cyclic_softint(char *arg) 1444 { 1445 ntwdt_state_t *ntwdt_ptr = (ntwdt_state_t *)arg; 1446 ntwdt_wdog_t *wdog_state; 1447 1448 wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1449 1450 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1451 1452 if ((wdog_state->ntwdt_wdog_flags & 1453 NTWDT_FLAG_SKIP_CYCLIC) != 0) { 1454 /* 1455 * then skip all processing by this interrupt. 1456 * (see ntwdt_arm_vwdt()). 1457 */ 1458 wdog_state->ntwdt_wdog_flags &= ~NTWDT_FLAG_SKIP_CYCLIC; 1459 goto end; 1460 } 1461 1462 if (wdog_state->ntwdt_timer_running == 0 || 1463 (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE) || 1464 (wdog_state->ntwdt_wdog_enabled == 0)) 1465 goto end; 1466 1467 /* re-arm ("pat") the hardware watchdog */ 1468 ntwdt_pat_hw_watchdog(); 1469 1470 /* Decrement the VWDT and see if it has expired. */ 1471 if (--wdog_state->ntwdt_secs_remaining == 0) { 1472 1473 cmn_err(CE_WARN, "application-watchdog expired"); 1474 1475 wdog_state->ntwdt_wdog_expired = 1; 1476 1477 if (wdog_state->ntwdt_reset_enabled != 0) { 1478 /* 1479 * Update ScApp so that the new wdog-timeout 1480 * value is as specified in the 1481 * NTWDT_BOOT_TIMEOUT_PROP driver Property. 1482 * This timeout is assumedly larger than the 1483 * actual Solaris reboot time. This will allow 1484 * our forced-reboot to not cause an unplanned 1485 * (series of) watchdog expiration(s). 1486 */ 1487 if (ntwdt_disable_timeout_action == 0) 1488 ntwdt_reprogram_wd(ntwdt_ptr); 1489 1490 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1491 1492 NTWDT_DBG(WDT_DBG_VWDT, ("recovery being done")); 1493 1494 ntwdt_enforce_timeout(); 1495 } else { 1496 NTWDT_DBG(WDT_DBG_VWDT, ("no recovery being done")); 1497 1498 wdog_state->ntwdt_wdog_enabled = 0; 1499 1500 /* 1501 * Tell ScApp to disable wdog; this prevents 1502 * the "2x-timeout" artifact. Eg, Solaris 1503 * times-out at t(x) and ScApp times-out at t(2x), 1504 * where (x==ntwdt_wdog_timeout). 1505 */ 1506 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_WDT, 1507 wdog_state->ntwdt_wdog_enabled); 1508 } 1509 1510 /* Schedule Callout to stop this Cyclic */ 1511 timeout(ntwdt_stop_timer_lock, ntwdt_ptr, 0); 1512 1513 } else { 1514 _NOTE(EMPTY) 1515 NTWDT_DBG(WDT_DBG_VWDT, ("time remaining in VWDT: %d" 1516 " seconds", wdog_state->ntwdt_secs_remaining)); 1517 } 1518 end: 1519 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1520 1521 return (DDI_INTR_CLAIMED); 1522 } 1523 1524 /* 1525 * Program the AWDT watchdog-timeout value to that specified 1526 * in the NTWDT_BOOT_TIMEOUT_PROP driver Property. However, 1527 * only do this if the AWDT is in the correct state. 1528 * 1529 * Caller's Context: 1530 * o interrupt context: (from software-interrupt) 1531 * o during a panic 1532 */ 1533 static void 1534 ntwdt_reprogram_wd(ntwdt_state_t *ntwdt_ptr) 1535 { 1536 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1537 1538 /* 1539 * Program the AWDT watchdog-timeout value only if the 1540 * watchdog is enabled, the user wants to do recovery, 1541 * ("reset is enabled") and the AWDT timer is currently 1542 * running. 1543 */ 1544 if (wdog_state->ntwdt_wdog_enabled != 0 && 1545 wdog_state->ntwdt_reset_enabled != 0 && 1546 wdog_state->ntwdt_timer_running != 0) { 1547 if (ddi_in_panic() != 0) 1548 ntwdt_set_cfgvar_noreply(LW8_WDT_PROP_TO, 1549 wdog_state->ntwdt_boot_timeout); 1550 else 1551 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_TO, 1552 wdog_state->ntwdt_boot_timeout); 1553 } 1554 } 1555 1556 /* 1557 * This is the callback that was registered to run during a panic. 1558 * It will set the watchdog-timeout value to be that as specified 1559 * in the NTWDT_BOOT_TIMEOUT_PROP driver Property. 1560 * 1561 * Note that unless this Property's value specifies a timeout 1562 * that's larger than the actual reboot latency, ScApp will 1563 * experience a timeout and initiate Recovery. 1564 */ 1565 _NOTE(ARGSUSED(1)) 1566 static boolean_t 1567 ntwdt_panic_cb(void *arg, int code) 1568 { 1569 ASSERT(ddi_in_panic() != 0); 1570 1571 ntwdt_reprogram_wd((ntwdt_state_t *)arg); 1572 1573 return (B_TRUE); 1574 } 1575 1576 /* 1577 * Initialize the Cyclic that is used to monitor the VWDT. 1578 */ 1579 static void 1580 ntwdt_start_timer(ntwdt_state_t *ntwdt_ptr) 1581 { 1582 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1583 cyc_handler_t *hdlr = &wdog_state->ntwdt_cycl_hdlr; 1584 cyc_time_t *when = &wdog_state->ntwdt_cycl_time; 1585 1586 /* 1587 * Init Cyclic so its first expiry occurs wdog-timeout 1588 * seconds from the current, absolute time. 1589 */ 1590 when->cyt_interval = wdog_state->ntwdt_cyclic_interval; 1591 when->cyt_when = gethrtime() + when->cyt_interval; 1592 1593 wdog_state->ntwdt_wdog_expired = 0; 1594 wdog_state->ntwdt_timer_running = 1; 1595 1596 mutex_enter(&cpu_lock); 1597 if (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE) 1598 ntwdt_ptr->ntwdt_cycl_id = cyclic_add(hdlr, when); 1599 mutex_exit(&cpu_lock); 1600 1601 NTWDT_DBG(WDT_DBG_VWDT, ("AWDT's cyclic-driven timer is started")); 1602 } 1603 1604 /* 1605 * Stop the cyclic that is used to monitor the VWDT (and 1606 * was Started by ntwdt_start_timer). 1607 * 1608 * Context: per the Cyclic API, cyclic_remove cannot be called 1609 * from interrupt-context. Note that when this is 1610 * called via a Callout, it's called from base level. 1611 */ 1612 static void 1613 ntwdt_stop_timer(void *arg) 1614 { 1615 ntwdt_state_t *ntwdt_ptr = (void *)arg; 1616 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1617 1618 mutex_enter(&cpu_lock); 1619 if (ntwdt_ptr->ntwdt_cycl_id != CYCLIC_NONE) 1620 cyclic_remove(ntwdt_ptr->ntwdt_cycl_id); 1621 mutex_exit(&cpu_lock); 1622 1623 wdog_state->ntwdt_timer_running = 0; 1624 ntwdt_ptr->ntwdt_cycl_id = CYCLIC_NONE; 1625 1626 NTWDT_DBG(WDT_DBG_VWDT, ("AWDT's cyclic-driven timer is stopped")); 1627 } 1628 1629 /* 1630 * Stop the cyclic that is used to monitor the VWDT (and 1631 * do it in a thread-safe manner). 1632 * 1633 * This is a wrapper function for the core function, 1634 * ntwdt_stop_timer. Both functions are useful, as some 1635 * callers will already have the appropriate mutex locked, and 1636 * other callers will not. 1637 */ 1638 static void 1639 ntwdt_stop_timer_lock(void *arg) 1640 { 1641 ntwdt_state_t *ntwdt_ptr = (void *)arg; 1642 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1643 1644 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1645 ntwdt_stop_timer(arg); 1646 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1647 } 1648 1649 /* 1650 * Add callbacks needed to react to major system state transitions. 1651 */ 1652 static void 1653 ntwdt_add_callbacks(ntwdt_state_t *ntwdt_ptr) 1654 { 1655 /* register a callback that's called during a panic */ 1656 ntwdt_callback_ids.ntwdt_panic_cb = callb_add(ntwdt_panic_cb, 1657 (void *)ntwdt_ptr, CB_CL_PANIC, "ntwdt_panic_cb"); 1658 } 1659 1660 /* 1661 * Remove callbacks added by ntwdt_add_callbacks. 1662 */ 1663 static void 1664 ntwdt_remove_callbacks() 1665 { 1666 callb_delete(ntwdt_callback_ids.ntwdt_panic_cb); 1667 } 1668 1669 /* 1670 * Initiate a Reset (as a result of the VWDT timeout expiring). 1671 */ 1672 static void 1673 ntwdt_enforce_timeout() 1674 { 1675 if (ntwdt_disable_timeout_action != 0) { 1676 cmn_err(CE_NOTE, "OS timeout expired, taking no action"); 1677 return; 1678 } 1679 1680 NTWDT_DBG(WDT_DBG_VWDT, ("VWDT expired; do a crashdump")); 1681 1682 (void) kadmin(A_DUMP, AD_BOOT, NULL, kcred); 1683 cmn_err(CE_PANIC, "kadmin(A_DUMP, AD_BOOT) failed"); 1684 _NOTE(NOTREACHED) 1685 } 1686 1687 /* 1688 * Interpret the Properties from driver's config file. 1689 */ 1690 static int 1691 ntwdt_read_props(ntwdt_state_t *ntwdt_ptr) 1692 { 1693 ntwdt_wdog_t *wdog_state; 1694 int boot_timeout; 1695 1696 wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1697 1698 /* 1699 * interpret Property that specifies how long 1700 * the watchdog-timeout should be set to when 1701 * Solaris panics. Assumption is that this value 1702 * is larger than the amount of time it takes 1703 * to reboot and write crashdump. If not, 1704 * ScApp could induce a reset, due to an expired 1705 * watchdog-timeout. 1706 */ 1707 wdog_state->ntwdt_boot_timeout = 1708 NTWDT_DEFAULT_BOOT_TIMEOUT; 1709 1710 boot_timeout = ddi_prop_get_int(DDI_DEV_T_ANY, 1711 ntwdt_ptr->ntwdt_dip, DDI_PROP_DONTPASS, 1712 NTWDT_BOOT_TIMEOUT_PROP, -1); 1713 1714 if (boot_timeout != -1 && boot_timeout > 0 && 1715 boot_timeout <= NTWDT_MAX_TIMEOUT) { 1716 wdog_state->ntwdt_boot_timeout = 1717 boot_timeout; 1718 } else { 1719 _NOTE(EMPTY) 1720 NTWDT_DBG(WDT_DBG_ENTRY, (NTWDT_BOOT_TIMEOUT_PROP 1721 ": using default of %d seconds.", 1722 wdog_state->ntwdt_boot_timeout)); 1723 } 1724 1725 return (DDI_SUCCESS); 1726 } 1727 1728 /* 1729 * Write state of SWDT to ScApp. 1730 * 1731 * Currently, this function is only called on attach() 1732 * of our driver. 1733 * 1734 * Note that we do not need to call this function, eg, 1735 * in response to a solicitation from ScApp (eg, 1736 * the LW8_SC_RESTARTED_EVENT). 1737 * 1738 * Context: 1739 * called in Kernel Context 1740 */ 1741 static int 1742 ntwdt_set_swdt_state() 1743 { 1744 /* 1745 * note that ScApp only needs this one 1746 * variable when system is in SWDT mode. 1747 */ 1748 ntwdt_set_cfgvar(LW8_WDT_PROP_MODE, 1749 LW8_PROP_MODE_SWDT); 1750 1751 return (0); 1752 } 1753 1754 /* 1755 * Write all AWDT state to ScApp via the SBBC mailbox 1756 * in IOSRAM. Note that the permutation of Writes 1757 * is as specified in the design spec. 1758 * 1759 * Notes: caller must perform synchronization so that 1760 * this series of Writes is consistent as viewed 1761 * by ScApp (eg, there is no LW8_WDT_xxx mailbox 1762 * command that contains "all Properties"; each 1763 * Property must be written individually). 1764 */ 1765 static int 1766 ntwdt_set_awdt_state(ntwdt_wdog_t *rstatep) 1767 { 1768 /* ScApp expects values in this order: */ 1769 ntwdt_set_cfgvar(LW8_WDT_PROP_MODE, 1770 ntwdt_watchdog_activated != 0); 1771 ntwdt_set_cfgvar(LW8_WDT_PROP_TO, 1772 rstatep->ntwdt_wdog_timeout); 1773 ntwdt_set_cfgvar(LW8_WDT_PROP_RECOV, 1774 rstatep->ntwdt_reset_enabled); 1775 ntwdt_set_cfgvar(LW8_WDT_PROP_WDT, 1776 rstatep->ntwdt_wdog_enabled); 1777 1778 return (NTWDT_SUCCESS); 1779 } 1780 1781 /* 1782 * Write a specified WDT Property (and Value) to ScApp. 1783 * 1784 * <Property, Value> is passed in the LW8_MBOX_WDT_SET 1785 * (SBBC) mailbox message. The SBBC mailbox resides in 1786 * IOSRAM. 1787 * 1788 * Note that this function is responsible for ensuring that 1789 * a driver-specific representation of a mailbox <Value> is 1790 * mapped into the representation that is expected by ScApp 1791 * (eg, see LW8_WDT_PROP_RECOV). 1792 */ 1793 static int 1794 ntwdt_set_cfgvar(int var, int val) 1795 { 1796 int rv; 1797 int mbox_val; 1798 lw8_set_wdt_t set_wdt; 1799 1800 switch (var) { 1801 case LW8_WDT_PROP_RECOV: 1802 #ifdef DEBUG 1803 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'recovery-enabled':" 1804 " %s (%d)", (val != 0) ? "enabled" : "disabled", val)); 1805 #endif 1806 mbox_val = (val != 0) ? LW8_PROP_RECOV_ENABLED : 1807 LW8_PROP_RECOV_DISABLED; 1808 break; 1809 1810 case LW8_WDT_PROP_WDT: 1811 #ifdef DEBUG 1812 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-enabled':" 1813 " %s (%d)", (val != 0) ? "enabled" : "disabled", val)); 1814 #endif 1815 mbox_val = (val != 0) ? LW8_PROP_WDT_ENABLED : 1816 LW8_PROP_WDT_DISABLED; 1817 break; 1818 1819 case LW8_WDT_PROP_TO: 1820 #ifdef DEBUG 1821 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-timeout':" 1822 " %d seconds", val)); 1823 #endif 1824 mbox_val = val; 1825 break; 1826 1827 case LW8_WDT_PROP_MODE: 1828 #ifdef DEBUG 1829 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-mode':" 1830 " %s (%d)", (val != LW8_PROP_MODE_SWDT) ? 1831 "AWDT" : "SWDT", val)); 1832 #endif 1833 mbox_val = val; 1834 break; 1835 1836 default: 1837 ASSERT(0); 1838 _NOTE(NOTREACHED) 1839 } 1840 1841 set_wdt.property_id = var; 1842 set_wdt.value = mbox_val; 1843 1844 rv = ntwdt_lomcmd(LW8_MBOX_WDT_SET, (intptr_t)&set_wdt); 1845 if (rv != 0) { 1846 _NOTE(EMPTY) 1847 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of prop/val %d/%d " 1848 "failed: %d", var, mbox_val, rv)); 1849 } 1850 1851 return (rv); 1852 } 1853 1854 static void 1855 ntwdt_set_cfgvar_noreply(int var, int val) 1856 { 1857 ntwdt_set_cfgvar(var, val); 1858 } 1859 1860 #ifdef DEBUG 1861 /* 1862 * Read a specified WDT Property from ScApp. 1863 * 1864 * <Property> is passed in the Request of the LW8_MBOX_WDT_GET 1865 * (SBBC) mailbox message, and the Property's <Value> 1866 * is returned in the message's Response. The SBBC mailbox 1867 * resides in IOSRAM. 1868 */ 1869 static int 1870 ntwdt_get_cfgvar(int var, int *val) 1871 { 1872 lw8_get_wdt_t get_wdt; 1873 int rv; 1874 1875 rv = ntwdt_lomcmd(LW8_MBOX_WDT_GET, (intptr_t)&get_wdt); 1876 if (rv != 0) { 1877 _NOTE(EMPTY) 1878 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET failed: %d", rv)); 1879 } else { 1880 switch (var) { 1881 case LW8_WDT_PROP_RECOV: 1882 *val = (uint8_t)get_wdt.recovery_enabled; 1883 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'reset-enabled':" 1884 " %s (%d)", (*val != 0) ? "enabled" : "disabled", 1885 *val)); 1886 break; 1887 1888 case LW8_WDT_PROP_WDT: 1889 *val = (uint8_t)get_wdt.watchdog_enabled; 1890 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'wdog-enabled':" 1891 " %s (%d)", (*val != 0) ? "enabled" : "disabled", 1892 *val)); 1893 break; 1894 1895 case LW8_WDT_PROP_TO: 1896 *val = (uint8_t)get_wdt.timeout; 1897 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'wdog-timeout':" 1898 " %d seconds", *val)); 1899 break; 1900 1901 default: 1902 ASSERT(0); 1903 _NOTE(NOTREACHED) 1904 } 1905 } 1906 1907 return (rv); 1908 } 1909 #endif 1910 1911 /* 1912 * Update the real system "heartbeat", which resides in IOSRAM. 1913 * This "heartbeat" is normally used in SWDT Mode, but when 1914 * in AWDT Mode, ScApp also uses its value to determine if Solaris 1915 * is up-and-running. 1916 */ 1917 static void 1918 ntwdt_pat_hw_watchdog() 1919 { 1920 tod_iosram_t tod_buf; 1921 static uint32_t i_am_alive = 0; 1922 #ifdef DEBUG 1923 if (ntwdt_stop_heart != 0) 1924 return; 1925 #endif 1926 /* Update the system heartbeat */ 1927 if (i_am_alive == UINT32_MAX) 1928 i_am_alive = 0; 1929 else 1930 i_am_alive++; 1931 1932 NTWDT_DBG(WDT_DBG_HEART, ("update heartbeat: %d", 1933 i_am_alive)); 1934 1935 if (iosram_write(SBBC_TOD_KEY, OFFSET(tod_buf, tod_i_am_alive), 1936 (char *)&i_am_alive, sizeof (uint32_t))) { 1937 cmn_err(CE_WARN, "ntwdt_pat_hw_watchdog(): " 1938 "write heartbeat failed"); 1939 } 1940 } 1941 1942 /* 1943 * Write the specified value to the system's normal (IOSRAM) 1944 * location that's used to specify Solaris' watchdog-timeout 1945 * on Serengeti platforms. 1946 * 1947 * In SWDT Mode, this location can hold values [0,n). 1948 * In AWDT Mode, this location must have value 0 (else 1949 * after a ScApp-reboot, ScApp could mistakenly interpret 1950 * that the system is in SWDT Mode). 1951 */ 1952 static int 1953 ntwdt_set_hw_timeout(uint32_t period) 1954 { 1955 tod_iosram_t tod_buf; 1956 int rv; 1957 1958 rv = iosram_write(SBBC_TOD_KEY, OFFSET(tod_buf, tod_timeout_period), 1959 (char *)&period, sizeof (uint32_t)); 1960 if (rv != 0) 1961 cmn_err(CE_WARN, "write of %d for TOD timeout " 1962 "period failed: %d", period, rv); 1963 1964 return (rv); 1965 } 1966 1967 /* 1968 * Soft-interrupt handler that is triggered when ScApp wants 1969 * to know the current state of the app-wdog. 1970 * 1971 * Grab ntwdt_wdog_mutex so that we synchronize with any 1972 * concurrent User Context and Interrupt Context activity. Call 1973 * a function that writes a permutation of the watchdog state 1974 * to the SC, then release the mutex. 1975 * 1976 * We grab the mutex not only so that each variable is consistent 1977 * but also so that the *permutation* of variables is consistent. 1978 * I.e., any set of one or more variables (that we write to SC 1979 * using multiple mailbox commands) will truly be seen as a 1980 * consistent snapshot. Note that if our protocol had a MBOX_SET 1981 * command that allowed writing all watchdog state in one 1982 * command, then the lock-hold latency would be greatly reduced. 1983 * To our advantage, this softint normally executes very 1984 * infrequently. 1985 * 1986 * Context: 1987 * called at Interrupt Context (DDI_SOFTINT_LOW) 1988 */ 1989 static uint_t 1990 ntwdt_mbox_softint(char *arg) 1991 { 1992 ntwdt_wdog_t *wdog_state; 1993 1994 wdog_state = ((ntwdt_state_t *)arg)->ntwdt_wdog_state; 1995 1996 ASSERT(wdog_state != NULL); 1997 1998 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1999 2000 /* tell ScApp state of AWDT */ 2001 ntwdt_set_awdt_state(wdog_state); 2002 2003 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 2004 2005 return (DDI_INTR_CLAIMED); 2006 } 2007 2008 /* 2009 * Handle MBOX_EVENT_LW8 Events that are sent from ScApp. 2010 * 2011 * The only (sub-)type of Event we handle is the 2012 * LW8_EVENT_SC_RESTARTED Event. We handle this by triggering 2013 * a soft-interrupt only if we are in AWDT mode. 2014 * 2015 * ScApp sends this Event when it wants to learn the current 2016 * state of the AWDT variables. Design-wise, this is used to 2017 * handle the case where the SC reboots while the system is in 2018 * AWDT mode (if the SC reboots in SWDT mode, then ScApp 2019 * already knows all necessary info and therefore won't send 2020 * this Event). 2021 * 2022 * Context: 2023 * function is called in Interrupt Context (at DDI_SOFTINT_MED) 2024 * and we conditionally trigger a softint that will run at 2025 * DDI_SOFTINT_LOW. Note that function executes at 2026 * DDI_SOFTINT_MED due to how this handler was registered by 2027 * the implementation of sbbc_mbox_reg_intr(). 2028 * 2029 * Notes: 2030 * Currently, the LW8_EVENT_SC_RESTARTED Event is only sent 2031 * by SC when in AWDT mode. 2032 */ 2033 static uint_t 2034 ntwdt_event_data_handler(char *arg) 2035 { 2036 lw8_event_t *payload; 2037 sbbc_msg_t *msg; 2038 2039 if (arg == NULL) { 2040 return (DDI_INTR_CLAIMED); 2041 } 2042 2043 msg = (sbbc_msg_t *)arg; 2044 if (msg->msg_buf == NULL) { 2045 return (DDI_INTR_CLAIMED); 2046 } 2047 2048 payload = (lw8_event_t *)msg->msg_buf; 2049 2050 switch (payload->event_type) { 2051 case LW8_EVENT_SC_RESTARTED: 2052 /* 2053 * then SC probably was rebooted, and it therefore 2054 * needs to know what the current state of AWDT is. 2055 */ 2056 NTWDT_DBG(WDT_DBG_EVENT, ("LW8_EVENT_SC_RESTARTED " 2057 "received in %s mode", 2058 (ntwdt_watchdog_activated != 0) ? "AWDT" : "SWDT")); 2059 2060 if (ntwdt_watchdog_activated != 0) { 2061 /* then system is in AWDT mode */ 2062 ddi_trigger_softintr(ntwdt_mbox_softint_id); 2063 } 2064 break; 2065 2066 default: 2067 NTWDT_DBG(WDT_DBG_EVENT, 2068 ("MBOX_EVENT_LW8: %d", payload->event_type)); 2069 break; 2070 } 2071 2072 return (DDI_INTR_CLAIMED); 2073 } 2074 2075 /* 2076 * Send an SBBC Mailbox command to ScApp. 2077 * 2078 * Use the sbbc_mbox_request_response utility function to 2079 * send the Request and receive the optional Response. 2080 * 2081 * Context: 2082 * can be called from Interrupt Context or User Context. 2083 */ 2084 static int 2085 ntwdt_lomcmd(int cmd, intptr_t arg) 2086 { 2087 sbbc_msg_t request; 2088 sbbc_msg_t *reqp; 2089 sbbc_msg_t response; 2090 sbbc_msg_t *resp; 2091 int rv = 0; 2092 2093 reqp = &request; 2094 bzero((caddr_t)&request, sizeof (request)); 2095 reqp->msg_type.type = LW8_MBOX; 2096 reqp->msg_type.sub_type = (uint16_t)cmd; 2097 2098 resp = &response; 2099 bzero((caddr_t)&response, sizeof (response)); 2100 resp->msg_type.type = LW8_MBOX; 2101 resp->msg_type.sub_type = (uint16_t)cmd; 2102 2103 switch (cmd) { 2104 case LW8_MBOX_WDT_GET: 2105 reqp->msg_len = 0; 2106 reqp->msg_buf = (caddr_t)NULL; 2107 resp->msg_len = sizeof (lw8_get_wdt_t); 2108 resp->msg_buf = (caddr_t)arg; 2109 break; 2110 2111 case LW8_MBOX_WDT_SET: 2112 reqp->msg_len = sizeof (lw8_set_wdt_t); 2113 reqp->msg_buf = (caddr_t)arg; 2114 resp->msg_len = 0; 2115 resp->msg_buf = (caddr_t)NULL; 2116 break; 2117 2118 default: 2119 return (EINVAL); 2120 } 2121 2122 rv = sbbc_mbox_request_response(reqp, resp, 2123 LW8_DEFAULT_MAX_MBOX_WAIT_TIME); 2124 2125 if ((rv) || (resp->msg_status != SG_MBOX_STATUS_SUCCESS)) { 2126 2127 NTWDT_NDBG(WDT_DBG_PROT, ("SBBC mailbox error:" 2128 " (rv/msg_status)=(%d/%d)", rv, resp->msg_status)); 2129 2130 /* errors from sgsbbc */ 2131 if (resp->msg_status > 0) { 2132 return (resp->msg_status); 2133 } 2134 2135 /* errors from ScApp */ 2136 switch (resp->msg_status) { 2137 case SG_MBOX_STATUS_ILLEGAL_PARAMETER: 2138 /* illegal ioctl parameter */ 2139 return (EINVAL); 2140 2141 default: 2142 return (EIO); 2143 } 2144 } 2145 return (0); 2146 } 2147