1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * ntwdt driver 29 * ------------ 30 * 31 * Subsystem Overview 32 * ------------------ 33 * 34 * This is a pseudo driver for the Netra-1280 watchdog 35 * timer (WDT). It provides for an *application-driven* 36 * WDT (AWDT), not a traditional, hardware-based WDT. A 37 * hardware-based feature is already present on the 38 * Netra-1280, and it is referred to here as the 39 * System WDT (SWDT). 40 * 41 * ScApp and Solaris cooperate to provide either a SWDT or 42 * an AWDT; they are mutually-exclusive. Once in AWDT 43 * mode, one can only transition to SWDT mode via a reboot. 44 * This obviously gives priority to the AWDT and was done 45 * to handle scenarios where the customer might temporarily 46 * terminate their wdog-app in order to do some debugging, 47 * or even to load a new version of the wdog-app. 48 * 49 * The wdog-app does an open() of the /dev/ntwdt device node 50 * and then issues ioctl's to control the state of the AWDT. 51 * The ioctl's are implemented by this driver. Only one 52 * concurrent instance of open() is allowed. On the close(), 53 * a watchdog timer still in progress is NOT terminated. 54 * This allows the global state machine to monitor the 55 * progress of a Solaris reboot. ScApp will reset Solaris 56 * (eg, send an XIR) if the actual boot/crashdump latency 57 * is larger than the current AWDT timeout. 58 * 59 * The rationale for implementing an AWDT (vs a SWDT) is 60 * that it is more sensitive to system outage scenarios than 61 * a SWDT. Eg, a system could be in such a failed state that 62 * even though its clock-interrupt could still run (and the 63 * SWDT's watchdog timer therefore re-armed), the system could 64 * in effect have a corrupt or very poor dispatch latency. 65 * An AWDT would be sensitive to dispatch latency issues, as 66 * well as problems with its own execution (eg, a hang or 67 * crash). 68 * 69 * Subsystem Interface Overview 70 * ---------------------------- 71 * 72 * This pseudo-driver does not have any 'extern' functions. 73 * 74 * All system interaction is done via the traditional driver 75 * entry points (eg, attach(9e), _init(9e)). 76 * 77 * All interaction with user is via the entry points in the 78 * 'struct cb_ops' vector (eg, open(9e), ioctl(9e), and 79 * close(9e)). 80 * 81 * Subsystem Implementation Overview 82 * --------------------------------- 83 * 84 * ScApp and Solaris (eg, ntwdt) cooperate so that a state 85 * machine global to ScApp and ntwdt is either in AWDT mode 86 * or in SWDT mode. These two peers communicate via the SBBC 87 * Mailbox that resides in IOSRAM (SBBC_MAILBOX_KEY). 88 * They use two new mailbox messages (LW8_MBOX_WDT_GET and 89 * LW8_MBOX_WDT_SET) and one new event (LW8_EVENT_SC_RESTARTED). 90 * 91 * ntwdt implements the AWDT by implementing a "virtual 92 * WDT" (VWDT). Eg, the watchdog timer is not a traditional 93 * counter in hardware, it is a variable in ntwdt's 94 * softstate. The wdog-app's actions cause changes to this 95 * and other variables in ntwdt's softstate. 96 * 97 * The wdog-app uses the LOMIOCDOGTIME ioctl to specify 98 * the number of seconds in the watchdog timeout (and 99 * therefore the VWDT). The wdog-app then uses the 100 * LOMIOCDOGCTL ioctl to enable the wdog. This causes 101 * ntwdt to create a Cyclic that will both decrement 102 * the VWDT and check to see if it has expired. To keep 103 * the VWDT from expiring, the wdog-app uses the 104 * LOMIOCDOGPAT ioctl to re-arm (or "pat") the watchdog. 105 * This sets the VWDT value to that specified in the 106 * last LOMIOCDOGTIME ioctl. The wdog-app can use the 107 * LOMIOCDOGSTATE ioctl to query the state of the VWDT. 108 * 109 * The wdog-app can also specify how Recovery is to be 110 * done. The only choice is whether to do a crashdump 111 * or not. If ntwdt computes a VWDT expiration, then 112 * ntwdt initiates the Recovery, else ScApp will. Eg, 113 * a hang in Solaris will be sensed by ScApp and not 114 * ntwdt. The wdog-app specifies the Recovery policy 115 * via the DOGCTL ioctl. 116 * 117 * Timeout Expiration 118 * ------------------ 119 * In our implementation, ScApp senses a watchdog 120 * expiration the same way it historically has: 121 * by reading a well-known area of IOSRAM (SBBC_TOD_KEY) 122 * to see if the timestamp associated with a 123 * Solaris-generated "heartbeat" field is older 124 * than the currently specified timeout (which is 125 * also specified in this same IOSRAM section). 126 * 127 * What is different when ntwdt is running is that 128 * ntwdt is responsible for updating the Heartbeat, 129 * and not the normal client (todsg). When ntwdt 130 * puts the system in AWDT mode, it disables todsg's 131 * updating of the Heartbeat by changing the state of 132 * a pair of kernel tunables (watchdog_activated and 133 * watchdog_enable). ntwdt then takes responsibility 134 * for updating the Heartbeat. It does this by 135 * updating the Heartbeat from the Cyclic that is 136 * created when the user enables the AWDT (DOGCTL) 137 * or specifies a new timeout value (DOGTIME). 138 * 139 * As long as the AWDT is enabled, ntwdt will update 140 * the real system Heartbeat. As a result, ScApp 141 * will conclude that Solaris is still running. If 142 * the user stops re-arming the VWDT or Solaris 143 * hangs (eg), ntwdt will stop updating the Heartbeat. 144 * 145 * Note that ntwdt computes expiration via the 146 * repeatedly firing Cyclic, and ScApp computes 147 * expiration via a cessation of Heartbeat update. 148 * Since Heartbeat update stops once user stops 149 * re-arming the VWDT (ie, DOGPAT ioctl), ntwdt 150 * will compute a timeout at t(x), and ScApp will 151 * compute a timeout at t(2x), where 'x' is the 152 * current timeout value. When ntwdt computes 153 * the expiration, ntwdt masks this asymmetry. 154 * 155 * Lifecycle Events 156 * ---------------- 157 * 158 * ntwdt only handles one of the coarse-grained 159 * "lifecycle events" (eg, entering OBP, shutdown, 160 * power-down, DR) that are possible during a Solaris 161 * session: a panic. (Note that ScApp handles one 162 * of the others: "entering OBP"). Other than these, 163 * a user choosing such a state transition must first 164 * use the wdog-app to disable the watchdog, else 165 * an expiration could occur. 166 * 167 * Solaris handles a panic by registering a handler 168 * that's called during the panic. The handler will 169 * set the watchdog timeout to the value specified 170 * in the NTWDT_BOOT_TIMEOUT_PROP driver Property. 171 * Again, this value should be greater than the actual 172 * Solaris reboot/crashdump latency. 173 * 174 * When the user enters OBP via the System Controller, 175 * ScApp will disable the watchdog (from ScApp's 176 * perspective), but it will not communicate this to 177 * ntwdt. After having exited OBP, the wdog-app can 178 * be used to enable or disable the watchdog (which 179 * will get both ScApp and ntwdt in-sync). 180 * 181 * Locking 182 * ------- 183 * 184 * ntwdt has code running at three interrupt levels as 185 * well as base level. 186 * 187 * The ioctls run at base level in User Context. The 188 * driver's entry points run at base level in Kernel 189 * Context. 190 * 191 * ntwdt's three interrupt levels are used by: 192 * 193 * o LOCK_LEVEL : 194 * the Cyclic used to manage the VWDT is initialized 195 * to CY_LOCK_LEVEL 196 * 197 * o DDI_SOFTINT_MED : 198 * the SBBC mailbox implementation registers the 199 * specified handlers at this level 200 * 201 * o DDI_SOFTINT_LOW : 202 * this level is used by two handlers. One handler 203 * is triggered by the LOCK_LEVEL Cyclic. The other 204 * handler is triggered by the DDI_SOFTINT_MED 205 * handler registered to handle SBBC mailbox events. 206 * 207 * The centralizing concept is that the ntwdt_wdog_mutex 208 * in the driver's softstate is initialized to have an 209 * interrupt-block-cookie corresponding to DDI_SOFTINT_LOW. 210 * 211 * As a result, any base level code grabs ntwdt_wdog_mutex 212 * before doing work. Also, any handler running at interrupt 213 * level higher than DDI_SOFTINT_LOW "posts down" so that 214 * a DDI_SOFTINT_LOW handler is responsible for executing 215 * the "real work". Each DDI_SOFTINT_LOW handler also 216 * first grabs ntwdt_wdog_mutex, and so base level is 217 * synchronized with all interrupt levels. 218 * 219 * Note there's another mutex in the softstate: ntwdt_mutex. 220 * This mutex has few responsibilities. However, this 221 * locking order must be followed: ntwdt_wdog_mutex is 222 * held first, and then ntwdt_mutex. This choice results 223 * from the fact that the number of dynamic call sites 224 * for ntwdt_wdog_mutex is MUCH greater than that of 225 * ntwdt_mutex. As a result, almost all uses of 226 * ntwdt_wdog_mutex do not even require ntwdt_mutex to 227 * be held, which saves resources. 228 * 229 * Driver Properties 230 * ----------------- 231 * 232 * "ddi-forceattach=1;" 233 * ------------------ 234 * 235 * Using this allows our driver to be automatically 236 * loaded at boot-time AND to not be removed from memory 237 * solely due to memory-pressure. 238 * 239 * Being loaded at boot allows ntwdt to (as soon as 240 * possible) tell ScApp of the current mode of the 241 * state-machine (eg, SWDT). This is needed for the case 242 * when Solaris is re-loaded while in AWDT mode; having 243 * Solaris communicate ASAP with ScApp reduces the duration 244 * of any "split-brain" scenario where ScApp and Solaris 245 * are not in the same mode. 246 * 247 * Having ntwdt remain in memory even after a close() 248 * allows ntwdt to answer any SBBC mailbox commands 249 * that ScApp sends (as the mailbox infrastructure is 250 * not torn down until ntwdt is detach()'d). Specifically, 251 * ScApp could be re-loaded after AWDT mode had been 252 * entered and the wdog-app had close()'d ntwdt. ScApp 253 * will then eventually send a LW8_EVENT_SC_RESTARTED 254 * mailbox event in order to learn the current state of 255 * state-machine. Having ntwdt remain loaded allows this 256 * event to never go unanswered. 257 * 258 * "ntwdt-boottimeout=600;" 259 * ---------------------- 260 * 261 * This specifies the watchdog timeout value (in seconds) to 262 * use when ntwdt is aware of the need to reboot/reload Solaris. 263 * 264 * ntwdt will update ScApp by setting the watchdog timeout 265 * to the specified number of seconds when either a) Solaris 266 * panics or b) the VWDT expires. Note that this is only done 267 * if the user has chosen to enable Reset. 268 * 269 * ntwdt boundary-checks the specified value, and if out-of-range, 270 * it initializes the watchdog timeout to a default value of 271 * NTWDT_DEFAULT_BOOT_TIMEOUT seconds. Note that this is a 272 * default value and is not a *minimum* value. The valid range 273 * for the watchdog timeout is between one second and 274 * NTWDT_MAX_TIMEOUT seconds, inclusive. 275 * 276 * If ntwdt-boottimeout is set to a value less than an actual 277 * Solaris boot's latency, ScApp will reset Solaris during boot. 278 * Note that a continuous series of ScApp-induced resets will 279 * not occur; ScApp only resets Solaris on the first transition 280 * into the watchdog-expired state. 281 */ 282 283 #include <sys/note.h> 284 #include <sys/types.h> 285 #include <sys/callb.h> 286 #include <sys/stat.h> 287 #include <sys/conf.h> 288 #include <sys/ddi.h> 289 #include <sys/sunddi.h> 290 #include <sys/modctl.h> 291 #include <sys/ddi_impldefs.h> 292 #include <sys/kmem.h> 293 #include <sys/devops.h> 294 #include <sys/cyclic.h> 295 #include <sys/uadmin.h> 296 #include <sys/lw8_impl.h> 297 #include <sys/sgsbbc.h> 298 #include <sys/sgsbbc_iosram.h> 299 #include <sys/sgsbbc_mailbox.h> 300 #include <sys/todsg.h> 301 #include <sys/mem_config.h> 302 #include <sys/lom_io.h> 303 #include <sys/reboot.h> 304 #include <sys/clock.h> 305 306 307 /* 308 * tunables 309 */ 310 int ntwdt_disable_timeout_action = 0; 311 #ifdef DEBUG 312 /* 313 * tunable to simulate a Solaris hang. If is non-zero, then 314 * no system heartbeats ("hardware patting") will be done, 315 * even though all AWDT machinery is functioning OK. 316 */ 317 int ntwdt_stop_heart; 318 #endif 319 320 /* 321 * Driver Property 322 */ 323 #define NTWDT_BOOT_TIMEOUT_PROP "ntwdt-boottimeout" 324 325 /* 326 * watchdog-timeout values (in seconds): 327 * 328 * NTWDT_DEFAULT_BOOT_TIMEOUT: the default value used if 329 * this driver is aware of the 330 * reboot. 331 * 332 * NTWDT_MAX_TIMEOUT: max value settable by app (via the 333 * LOMIOCDOGTIME ioctl) 334 */ 335 #define NTWDT_DEFAULT_BOOT_TIMEOUT (10*60) 336 #define NTWDT_MAX_TIMEOUT (180*60) 337 338 339 #define NTWDT_CYCLIC_CHK_PERCENT (20) 340 #define NTWDT_MINOR_NODE "awdt" 341 #define OFFSET(base, field) ((char *)&base.field - (char *)&base) 342 343 #define NTWDT_SUCCESS 0 344 #define NTWDT_FAILURE 1 345 346 typedef struct { 347 callb_id_t ntwdt_panic_cb; 348 } ntwdt_callback_ids_t; 349 static ntwdt_callback_ids_t ntwdt_callback_ids; 350 351 /* MBOX_EVENT_LW8 that is sent in IOSRAM Mailbox: */ 352 static lw8_event_t lw8_event; /* payload */ 353 static sbbc_msg_t sbbc_msg; /* message */ 354 355 static ddi_softintr_t ntwdt_mbox_softint_id; 356 static ddi_softintr_t ntwdt_cyclic_softint_id; 357 358 /* 359 * VWDT (i.e., Virtual Watchdog Timer) state 360 */ 361 typedef struct { 362 kmutex_t ntwdt_wdog_mutex; 363 ddi_iblock_cookie_t ntwdt_wdog_mtx_cookie; 364 int ntwdt_wdog_enabled; /* wdog enabled ? */ 365 int ntwdt_reset_enabled; /* reset enabled ? */ 366 int ntwdt_timer_running; /* wdog running ? */ 367 int ntwdt_wdog_expired; /* wdog expired ? */ 368 int ntwdt_is_initial_enable; /* 1st wdog-enable? */ 369 uint32_t ntwdt_boot_timeout; /* timeout for boot */ 370 uint32_t ntwdt_secs_remaining; /* expiration timer */ 371 uint8_t ntwdt_wdog_action; /* Reset action */ 372 uint32_t ntwdt_wdog_timeout; /* timeout in seconds */ 373 hrtime_t ntwdt_cyclic_interval; /* cyclic interval */ 374 cyc_handler_t ntwdt_cycl_hdlr; 375 cyc_time_t ntwdt_cycl_time; 376 kmutex_t ntwdt_event_lock; /* lock */ 377 uint64_t ntwdt_wdog_flags; 378 } ntwdt_wdog_t; 379 380 /* ntwdt_wdog_flags */ 381 #define NTWDT_FLAG_SKIP_CYCLIC 0x1 /* skip next Cyclic */ 382 383 /* macros to set/clear one bit in ntwdt_wdog_flags */ 384 #define NTWDT_FLAG_SET(p, f)\ 385 ((p)->ntwdt_wdog_flags |= NTWDT_FLAG_##f) 386 #define NTWDT_FLAG_CLR(p, f)\ 387 ((p)->ntwdt_wdog_flags &= ~NTWDT_FLAG_##f) 388 389 390 /* softstate */ 391 typedef struct { 392 kmutex_t ntwdt_mutex; 393 dev_info_t *ntwdt_dip; /* dip */ 394 int ntwdt_open_flag; /* file open ? */ 395 ntwdt_wdog_t *ntwdt_wdog_state; /* wdog state */ 396 cyclic_id_t ntwdt_cycl_id; 397 } ntwdt_state_t; 398 399 static void *ntwdt_statep; /* softstate */ 400 static dev_info_t *ntwdt_dip; 401 /* 402 * if non-zero, then the app-wdog feature is available on 403 * this system configuration. 404 */ 405 static int ntwdt_watchdog_available; 406 /* 407 * if non-zero, then application has used the LOMIOCDOGCTL 408 * ioctl at least once in order to Enable the app-wdog. 409 * Also, if this is non-zero, then system is in AWDT mode, 410 * else it is in SWDT mode. 411 */ 412 static int ntwdt_watchdog_activated; 413 414 #define getstate(minor) \ 415 ((ntwdt_state_t *)ddi_get_soft_state(ntwdt_statep, (minor))) 416 417 static int ntwdt_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 418 static int ntwdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 419 static int ntwdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 420 void **result); 421 static int ntwdt_open(dev_t *, int, int, cred_t *); 422 static int ntwdt_close(dev_t, int, int, cred_t *); 423 static int ntwdt_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 424 425 static void ntwdt_reprogram_wd(ntwdt_state_t *); 426 static boolean_t ntwdt_panic_cb(void *arg, int code); 427 static void ntwdt_start_timer(ntwdt_state_t *); 428 static void ntwdt_stop_timer(void *); 429 static void ntwdt_stop_timer_lock(void *arg); 430 static void ntwdt_add_callbacks(ntwdt_state_t *ntwdt_ptr); 431 static void ntwdt_remove_callbacks(); 432 static void ntwdt_cyclic_pat(void *arg); 433 static void ntwdt_enforce_timeout(); 434 static void ntwdt_pat_hw_watchdog(); 435 static int ntwdt_set_cfgvar(int var, int val); 436 static void ntwdt_set_cfgvar_noreply(int var, int val); 437 static int ntwdt_read_props(ntwdt_state_t *); 438 static int ntwdt_add_mbox_handlers(ntwdt_state_t *); 439 static int ntwdt_set_hw_timeout(uint32_t period); 440 static int ntwdt_remove_mbox_handlers(void); 441 static uint_t ntwdt_event_data_handler(char *arg); 442 static uint_t ntwdt_mbox_softint(char *arg); 443 static uint_t ntwdt_cyclic_softint(char *arg); 444 static int ntwdt_lomcmd(int cmd, intptr_t arg); 445 static int ntwdt_chk_wdog_support(); 446 static int ntwdt_chk_sc_support(); 447 static int ntwdt_set_swdt_state(); 448 static void ntwdt_swdt_to_awdt(ntwdt_wdog_t *); 449 static void ntwdt_arm_vwdt(ntwdt_wdog_t *wdog_state); 450 #ifdef DEBUG 451 static int ntwdt_get_cfgvar(int var, int *val); 452 #endif 453 454 struct cb_ops ntwdt_cb_ops = { 455 ntwdt_open, /* open */ 456 ntwdt_close, /* close */ 457 nulldev, /* strategy */ 458 nulldev, /* print */ 459 nulldev, /* dump */ 460 nulldev, /* read */ 461 nulldev, /* write */ 462 ntwdt_ioctl, /* ioctl */ 463 nulldev, /* devmap */ 464 nulldev, /* mmap */ 465 nulldev, /* segmap */ 466 nochpoll, /* poll */ 467 ddi_prop_op, /* cb_prop_op */ 468 NULL, /* streamtab */ 469 D_MP | D_NEW 470 }; 471 472 static struct dev_ops ntwdt_ops = { 473 DEVO_REV, /* Devo_rev */ 474 0, /* Refcnt */ 475 ntwdt_info, /* Info */ 476 nulldev, /* Identify */ 477 nulldev, /* Probe */ 478 ntwdt_attach, /* Attach */ 479 ntwdt_detach, /* Detach */ 480 nodev, /* Reset */ 481 &ntwdt_cb_ops, /* Driver operations */ 482 0, /* Bus operations */ 483 NULL /* Power */ 484 }; 485 486 static struct modldrv modldrv = { 487 &mod_driverops, /* This one is a driver */ 488 "ntwdt-Netra-T12", /* Name of the module. */ 489 &ntwdt_ops, /* Driver ops */ 490 }; 491 492 static struct modlinkage modlinkage = { 493 MODREV_1, (void *)&modldrv, NULL 494 }; 495 496 497 /* 498 * Flags to set in ntwdt_debug. 499 * 500 * Use either the NTWDT_DBG or NTWDT_NDBG macros 501 */ 502 #define WDT_DBG_ENTRY 0x00000001 /* drv entry points */ 503 #define WDT_DBG_HEART 0x00000002 /* system heartbeat */ 504 #define WDT_DBG_VWDT 0x00000004 /* virtual WDT */ 505 #define WDT_DBG_EVENT 0x00000010 /* SBBC Mbox events */ 506 #define WDT_DBG_PROT 0x00000020 /* SC/Solaris protocol */ 507 #define WDT_DBG_IOCTL 0x00000040 /* ioctl's */ 508 509 uint64_t ntwdt_debug; /* enables tracing of module's activity */ 510 511 /* used in non-debug version of module */ 512 #define NTWDT_NDBG(flag, msg) { if ((ntwdt_debug & (flag)) != 0) \ 513 (void) printf msg; } 514 515 #ifdef DEBUG 516 typedef struct { 517 uint32_t ntwdt_wd1; 518 uint8_t ntwdt_wd2; 519 } ntwdt_data_t; 520 521 #define NTWDTIOCSTATE _IOWR('a', 0xa, ntwdt_data_t) 522 #define NTWDTIOCPANIC _IOR('a', 0xb, uint32_t) 523 524 /* used in debug version of module */ 525 #define NTWDT_DBG(flag, msg) { if ((ntwdt_debug & (flag)) != 0) \ 526 (void) printf msg; } 527 #else 528 #define NTWDT_DBG(flag, msg) 529 #endif 530 531 532 int 533 _init(void) 534 { 535 int error = 0; 536 537 NTWDT_DBG(WDT_DBG_ENTRY, ("_init")); 538 539 /* Initialize the soft state structures */ 540 if ((error = ddi_soft_state_init(&ntwdt_statep, 541 sizeof (ntwdt_state_t), 1)) != 0) { 542 return (error); 543 } 544 545 /* Install the loadable module */ 546 if ((error = mod_install(&modlinkage)) != 0) { 547 ddi_soft_state_fini(&ntwdt_statep); 548 } 549 return (error); 550 } 551 552 int 553 _info(struct modinfo *modinfop) 554 { 555 NTWDT_DBG(WDT_DBG_ENTRY, ("_info")); 556 557 return (mod_info(&modlinkage, modinfop)); 558 } 559 560 int 561 _fini(void) 562 { 563 int error; 564 565 NTWDT_DBG(WDT_DBG_ENTRY, ("_fini")); 566 567 error = mod_remove(&modlinkage); 568 if (error == 0) { 569 ddi_soft_state_fini(&ntwdt_statep); 570 } 571 572 return (error); 573 } 574 575 static int 576 ntwdt_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 577 { 578 int instance; 579 ntwdt_state_t *ntwdt_ptr = NULL; 580 ntwdt_wdog_t *wdog_state = NULL; 581 cyc_handler_t *hdlr = NULL; 582 583 NTWDT_DBG(WDT_DBG_ENTRY, ("attach: dip/cmd: 0x%p/%d", 584 dip, cmd)); 585 586 switch (cmd) { 587 case DDI_ATTACH: 588 break; 589 590 case DDI_RESUME: 591 return (DDI_SUCCESS); 592 593 default: 594 return (DDI_FAILURE); 595 } 596 597 /* see if app-wdog is supported on our config */ 598 if (ntwdt_chk_wdog_support() != 0) 599 return (DDI_FAILURE); 600 601 /* (unsolicitedly) send SWDT state to ScApp via mailbox */ 602 ntwdt_set_swdt_state(); 603 604 instance = ddi_get_instance(dip); 605 ASSERT(instance == 0); 606 607 if (ddi_soft_state_zalloc(ntwdt_statep, instance) 608 != DDI_SUCCESS) { 609 return (DDI_FAILURE); 610 } 611 ntwdt_ptr = ddi_get_soft_state(ntwdt_statep, instance); 612 ASSERT(ntwdt_ptr != NULL); 613 614 ntwdt_dip = dip; 615 616 ntwdt_ptr->ntwdt_dip = dip; 617 ntwdt_ptr->ntwdt_cycl_id = CYCLIC_NONE; 618 mutex_init(&ntwdt_ptr->ntwdt_mutex, NULL, 619 MUTEX_DRIVER, NULL); 620 621 /* 622 * Initialize the watchdog structure 623 */ 624 ntwdt_ptr->ntwdt_wdog_state = 625 kmem_zalloc(sizeof (ntwdt_wdog_t), KM_SLEEP); 626 wdog_state = ntwdt_ptr->ntwdt_wdog_state; 627 628 /* 629 * Create an iblock-cookie so that ntwdt_wdog_mutex can be 630 * used at User Context and Interrupt Context. 631 */ 632 if (ddi_get_soft_iblock_cookie(dip, DDI_SOFTINT_LOW, 633 &wdog_state->ntwdt_wdog_mtx_cookie) != DDI_SUCCESS) { 634 cmn_err(CE_WARN, "init of iblock cookie failed " 635 "for ntwdt_wdog_mutex"); 636 goto err1; 637 } else { 638 mutex_init(&wdog_state->ntwdt_wdog_mutex, NULL, MUTEX_DRIVER, 639 (void *)wdog_state->ntwdt_wdog_mtx_cookie); 640 } 641 642 mutex_init(&wdog_state->ntwdt_event_lock, NULL, 643 MUTEX_DRIVER, NULL); 644 645 /* Cyclic fires once per second: */ 646 wdog_state->ntwdt_cyclic_interval = NANOSEC; 647 648 /* interpret our .conf file. */ 649 (void) ntwdt_read_props(ntwdt_ptr); 650 651 /* init the Cyclic that drives the VWDT */ 652 hdlr = &wdog_state->ntwdt_cycl_hdlr; 653 hdlr->cyh_level = CY_LOCK_LEVEL; 654 hdlr->cyh_func = ntwdt_cyclic_pat; 655 hdlr->cyh_arg = (void *)ntwdt_ptr; 656 657 /* Register handler for SBBC Mailbox events */ 658 if (ntwdt_add_mbox_handlers(ntwdt_ptr) != DDI_SUCCESS) 659 goto err2; 660 661 /* Softint that will be triggered by Cyclic that drives VWDT */ 662 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &ntwdt_cyclic_softint_id, 663 NULL, NULL, ntwdt_cyclic_softint, (caddr_t)ntwdt_ptr) 664 != DDI_SUCCESS) { 665 cmn_err(CE_WARN, "failed to add cyclic softintr"); 666 goto err3; 667 } 668 669 /* Register callbacks for various system events, e.g. panic */ 670 ntwdt_add_callbacks(ntwdt_ptr); 671 672 /* 673 * Create Minor Node as last activity. This prevents 674 * application from accessing our implementation until it 675 * is initialized. 676 */ 677 if (ddi_create_minor_node(dip, NTWDT_MINOR_NODE, S_IFCHR, 0, 678 DDI_PSEUDO, NULL) == DDI_FAILURE) { 679 cmn_err(CE_WARN, "failed to create Minor Node: %s", 680 NTWDT_MINOR_NODE); 681 goto err4; 682 } 683 684 /* Display our driver info in the banner */ 685 ddi_report_dev(dip); 686 687 return (DDI_SUCCESS); 688 689 err4: 690 ntwdt_remove_callbacks(); 691 ddi_remove_softintr(ntwdt_cyclic_softint_id); 692 err3: 693 ntwdt_remove_mbox_handlers(); 694 err2: 695 mutex_destroy(&wdog_state->ntwdt_event_lock); 696 mutex_destroy(&wdog_state->ntwdt_wdog_mutex); 697 err1: 698 kmem_free(wdog_state, sizeof (ntwdt_wdog_t)); 699 ntwdt_ptr->ntwdt_wdog_state = NULL; 700 701 mutex_destroy(&ntwdt_ptr->ntwdt_mutex); 702 ddi_soft_state_free(ntwdt_statep, instance); 703 704 ntwdt_dip = NULL; 705 706 return (DDI_FAILURE); 707 } 708 709 /* 710 * Do static checks to see if the app-wdog feature is supported in 711 * the current configuration. 712 * 713 * If the kernel debugger was booted, then we disallow the app-wdog 714 * feature, as we assume the user will be interested more in 715 * debuggability of system than its ability to support an app-wdog. 716 * (Note that the System Watchdog (SWDT) can still be available). 717 * 718 * If the currently loaded version of ScApp does not understand one 719 * of the IOSRAM mailbox messages that is specific to the app-wdog 720 * protocol, then we disallow use of the app-wdog feature (else 721 * we could have a "split-brain" scenario where Solaris supports 722 * app-wdog but ScApp doesn't). 723 * 724 * Note that there is no *dynamic* checking of whether ScApp supports 725 * the wdog protocol. Eg, if a new version of ScApp was loaded out 726 * from under Solaris, then once in AWDT mode, Solaris has no way 727 * of knowing that (a possibly older version of) ScApp was loaded. 728 */ 729 static int 730 ntwdt_chk_wdog_support() 731 { 732 int retval = ENOTSUP; 733 int rv; 734 735 if ((boothowto & RB_DEBUG) != 0) { 736 cmn_err(CE_WARN, "kernel debugger was booted; " 737 "application watchdog is not available."); 738 return (retval); 739 } 740 741 /* 742 * if ScApp does not support the MBOX_GET cmd, then 743 * it does not support the app-wdog feature. Also, 744 * if there is *any* type of SBBC Mailbox error at 745 * this point, we will disable the app watchdog 746 * feature. 747 */ 748 if ((rv = ntwdt_chk_sc_support()) != 0) { 749 if (rv == EINVAL) 750 cmn_err(CE_WARN, "ScApp does not support " 751 "the application watchdog feature."); 752 else 753 cmn_err(CE_WARN, "SBBC mailbox had error;" 754 "application watchdog is not available."); 755 retval = rv; 756 } else { 757 ntwdt_watchdog_available = 1; 758 retval = 0; 759 } 760 761 NTWDT_DBG(WDT_DBG_PROT, ("app-wdog is %savailable", 762 (ntwdt_watchdog_available != 0) ? "" : "not ")); 763 764 return (retval); 765 } 766 767 /* 768 * Check to see if ScApp supports the app-watchdog feature. 769 * 770 * Do this by sending one of the mailbox commands that is 771 * specific to the app-wdog protocol. If ScApp does not 772 * return an error code, we will assume it understands it 773 * (as well as the remainder of the app-wdog protocol). 774 * 775 * Notes: 776 * ntwdt_lomcmd() will return EINVAL if ScApp does not 777 * understand the message. The underlying sbbc_mbox_ 778 * utility function returns SG_MBOX_STATUS_ILLEGAL_PARAMETER 779 * ("illegal ioctl parameter"). 780 */ 781 static int 782 ntwdt_chk_sc_support() 783 { 784 lw8_get_wdt_t get_wdt; 785 786 return (ntwdt_lomcmd(LW8_MBOX_WDT_GET, (intptr_t)&get_wdt)); 787 } 788 789 static int 790 ntwdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 791 { 792 int instance = ddi_get_instance(dip); 793 ntwdt_state_t *ntwdt_ptr = NULL; 794 795 NTWDT_DBG(WDT_DBG_ENTRY, ("detach: dip/cmd: 0x%p/%d", 796 dip, cmd)); 797 798 ntwdt_ptr = ddi_get_soft_state(ntwdt_statep, instance); 799 if (ntwdt_ptr == NULL) { 800 return (DDI_FAILURE); 801 } 802 803 switch (cmd) { 804 case DDI_SUSPEND: 805 return (DDI_SUCCESS); 806 807 case DDI_DETACH: 808 /* 809 * release resources in opposite (LIFO) order as 810 * were allocated in attach(9f). 811 */ 812 ddi_remove_minor_node(dip, NULL); 813 814 ntwdt_stop_timer_lock((void *)ntwdt_ptr); 815 816 ntwdt_remove_callbacks(ntwdt_ptr); 817 818 ddi_remove_softintr(ntwdt_cyclic_softint_id); 819 820 ntwdt_remove_mbox_handlers(); 821 822 mutex_destroy(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_event_lock); 823 mutex_destroy(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex); 824 kmem_free(ntwdt_ptr->ntwdt_wdog_state, 825 sizeof (ntwdt_wdog_t)); 826 ntwdt_ptr->ntwdt_wdog_state = NULL; 827 828 mutex_destroy(&ntwdt_ptr->ntwdt_mutex); 829 830 ddi_soft_state_free(ntwdt_statep, instance); 831 832 ntwdt_dip = NULL; 833 return (DDI_SUCCESS); 834 835 default: 836 return (DDI_FAILURE); 837 } 838 } 839 840 /* 841 * Register the SBBC Mailbox handlers. 842 * 843 * Currently, only one handler is used. It processes the MBOX_EVENT_LW8 844 * Events that are sent by ScApp. Of the Events that are sent, only 845 * the Event declaring that ScApp is coming up from a reboot 846 * (LW8_EVENT_SC_RESTARTED) is processed. 847 * 848 * sbbc_mbox_reg_intr registers the handler so that it executes at 849 * a DDI_SOFTINT_MED priority. 850 */ 851 static int 852 ntwdt_add_mbox_handlers(ntwdt_state_t *ntwdt_ptr) 853 { 854 int err; 855 856 /* 857 * We need two interrupt handlers to handle the SBBC mbox 858 * events. The sbbc_mbox_xxx implementation will 859 * trigger our ntwdt_event_data_handler, which itself will 860 * trigger our ntwdt_mbox_softint. As a result, we'll 861 * register ntwdt_mbox_softint first, to ensure it cannot 862 * be called (until its caller, ntwdt_event_data_handler) 863 * is registered. 864 */ 865 866 /* 867 * add the softint that will do the real work of handling the 868 * LW8_SC_RESTARTED_EVENT sent from ScApp. 869 */ 870 if (ddi_add_softintr(ntwdt_ptr->ntwdt_dip, DDI_SOFTINT_LOW, 871 &ntwdt_mbox_softint_id, NULL, NULL, ntwdt_mbox_softint, 872 (caddr_t)ntwdt_ptr) != DDI_SUCCESS) { 873 cmn_err(CE_WARN, "Failed to add MBOX_EVENT_LW8 softintr"); 874 return (DDI_FAILURE); 875 } 876 877 /* 878 * Register an interrupt handler with the SBBC mailbox utility. 879 * This handler will get called on each event of each type of 880 * MBOX_EVENT_LW8 events. However, it will only conditionally 881 * trigger the worker-handler (ntwdt_mbox_softintr). 882 */ 883 sbbc_msg.msg_buf = (caddr_t)&lw8_event; 884 sbbc_msg.msg_len = sizeof (lw8_event); 885 886 err = sbbc_mbox_reg_intr(MBOX_EVENT_LW8, ntwdt_event_data_handler, 887 &sbbc_msg, NULL, &ntwdt_ptr->ntwdt_wdog_state->ntwdt_event_lock); 888 if (err != 0) { 889 cmn_err(CE_WARN, "Failed to register SBBC MBOX_EVENT_LW8" 890 " handler. err=%d", err); 891 892 ddi_remove_softintr(ntwdt_mbox_softint_id); 893 return (DDI_FAILURE); 894 } 895 896 return (DDI_SUCCESS); 897 } 898 899 /* 900 * Unregister the SBBC Mailbox handlers that were registered 901 * by ntwdt_add_mbox_handlers. 902 */ 903 static int 904 ntwdt_remove_mbox_handlers(void) 905 { 906 int rv = DDI_SUCCESS; 907 int err; 908 909 /* 910 * unregister the two handlers that cooperate to handle 911 * the LW8_SC_RESTARTED_EVENT. Note that they are unregistered 912 * in LIFO order (as compared to how they were registered). 913 */ 914 err = sbbc_mbox_unreg_intr(MBOX_EVENT_LW8, ntwdt_event_data_handler); 915 if (err != 0) { 916 cmn_err(CE_WARN, "Failed to unregister sbbc MBOX_EVENT_LW8 " 917 "handler. Err=%d", err); 918 rv = DDI_FAILURE; 919 } 920 921 /* remove the associated softint */ 922 ddi_remove_softintr(ntwdt_mbox_softint_id); 923 924 return (rv); 925 } 926 927 _NOTE(ARGSUSED(0)) 928 static int 929 ntwdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, 930 void *arg, void **result) 931 { 932 dev_t dev; 933 int instance; 934 int error = DDI_SUCCESS; 935 936 if (result == NULL) 937 return (DDI_FAILURE); 938 939 switch (infocmd) { 940 case DDI_INFO_DEVT2DEVINFO: 941 dev = (dev_t)arg; 942 if (getminor(dev) == 0) 943 *result = (void *)ntwdt_dip; 944 else 945 error = DDI_FAILURE; 946 break; 947 948 case DDI_INFO_DEVT2INSTANCE: 949 dev = (dev_t)arg; 950 instance = getminor(dev); 951 *result = (void *)(uintptr_t)instance; 952 break; 953 954 default: 955 error = DDI_FAILURE; 956 } 957 958 return (error); 959 } 960 961 /* 962 * Open the device this driver manages. 963 * 964 * Ensure the caller is a privileged process, else 965 * a non-privileged user could cause denial-of-service 966 * and/or negatively impact reliability/availability. 967 * 968 * Ensure there is only one concurrent open(). 969 */ 970 _NOTE(ARGSUSED(1)) 971 static int 972 ntwdt_open(dev_t *devp, int flag, int otyp, cred_t *credp) 973 { 974 int inst = getminor(*devp); 975 int ret = 0; 976 ntwdt_state_t *ntwdt_ptr = getstate(inst); 977 978 NTWDT_DBG(WDT_DBG_ENTRY, ("open: inst/soft: %d/0x%p", 979 inst, ntwdt_ptr)); 980 981 /* ensure caller is a privileged process */ 982 if (drv_priv(credp) != 0) 983 return (EPERM); 984 985 /* 986 * Check for a Deferred Attach scenario. 987 * Return ENXIO so DDI framework will call 988 * attach() and then retry the open(). 989 */ 990 if (ntwdt_ptr == NULL) 991 return (ENXIO); 992 993 mutex_enter(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex); 994 mutex_enter(&ntwdt_ptr->ntwdt_mutex); 995 if (ntwdt_ptr->ntwdt_open_flag != 0) 996 ret = EAGAIN; 997 else 998 ntwdt_ptr->ntwdt_open_flag = 1; 999 mutex_exit(&ntwdt_ptr->ntwdt_mutex); 1000 mutex_exit(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex); 1001 1002 return (ret); 1003 } 1004 1005 /* 1006 * Close the device this driver manages. 1007 * 1008 * Notes: 1009 * 1010 * The close() can happen while the AWDT is running ! 1011 * (and nothing is done, eg, to disable the watchdog 1012 * or to stop updating the system heartbeat). This 1013 * is the desired behavior, as this allows for the 1014 * case of monitoring a Solaris reboot in terms 1015 * of watchdog expiration. 1016 */ 1017 _NOTE(ARGSUSED(1)) 1018 static int 1019 ntwdt_close(dev_t dev, int flag, int otyp, cred_t *credp) 1020 { 1021 int inst = getminor(dev); 1022 ntwdt_state_t *ntwdt_ptr = getstate(inst); 1023 1024 NTWDT_DBG(WDT_DBG_ENTRY, ("close: inst/soft: %d/0x%p", 1025 inst, ntwdt_ptr)); 1026 1027 if (ntwdt_ptr == NULL) 1028 return (ENXIO); 1029 1030 mutex_enter(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex); 1031 mutex_enter(&ntwdt_ptr->ntwdt_mutex); 1032 if (ntwdt_ptr->ntwdt_open_flag != 0) { 1033 ntwdt_ptr->ntwdt_open_flag = 0; 1034 } 1035 mutex_exit(&ntwdt_ptr->ntwdt_mutex); 1036 mutex_exit(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex); 1037 1038 return (0); 1039 } 1040 1041 _NOTE(ARGSUSED(4)) 1042 static int 1043 ntwdt_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 1044 cred_t *credp, int *rvalp) 1045 { 1046 int inst = getminor(dev); 1047 int retval = 0; 1048 ntwdt_state_t *ntwdt_ptr = NULL; 1049 ntwdt_wdog_t *wdog_state; 1050 1051 if ((ntwdt_ptr = getstate(inst)) == NULL) 1052 return (ENXIO); 1053 1054 /* Only allow ioctl's if Solaris/ScApp support app-wdog */ 1055 if (ntwdt_watchdog_available == 0) 1056 return (ENXIO); 1057 1058 wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1059 1060 switch (cmd) { 1061 case LOMIOCDOGSTATE: { 1062 /* 1063 * Return the state of the AWDT to the application. 1064 */ 1065 lom_dogstate_t lom_dogstate; 1066 1067 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1068 lom_dogstate.reset_enable = 1069 wdog_state->ntwdt_reset_enabled; 1070 lom_dogstate.dog_enable = 1071 wdog_state->ntwdt_wdog_enabled; 1072 lom_dogstate.dog_timeout = 1073 wdog_state->ntwdt_wdog_timeout; 1074 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1075 1076 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGSTATE: wdog/reset/timeout:" 1077 " %d/%d/%d", lom_dogstate.dog_enable, 1078 lom_dogstate.reset_enable, lom_dogstate.dog_timeout)); 1079 1080 if (ddi_copyout((caddr_t)&lom_dogstate, (caddr_t)arg, 1081 sizeof (lom_dogstate_t), mode) != 0) { 1082 retval = EFAULT; 1083 } 1084 break; 1085 } 1086 1087 case LOMIOCDOGCTL: { 1088 /* 1089 * Allow application to control whether watchdog 1090 * is {dis,en}abled and whether Reset is 1091 * {dis,en}abled. 1092 */ 1093 lom_dogctl_t lom_dogctl; 1094 1095 if (ddi_copyin((caddr_t)arg, (caddr_t)&lom_dogctl, 1096 sizeof (lom_dogctl_t), mode) != 0) { 1097 retval = EFAULT; 1098 break; 1099 } 1100 1101 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGCTL: wdog/reset:" 1102 " %d/%d", lom_dogctl.dog_enable, 1103 lom_dogctl.reset_enable)); 1104 1105 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1106 1107 if (wdog_state->ntwdt_wdog_timeout == 0) { 1108 /* 1109 * then LOMIOCDOGTIME has never been used 1110 * to setup a valid timeout. 1111 */ 1112 retval = EINVAL; 1113 goto end; 1114 } 1115 1116 /* 1117 * Return error for the non-sensical combination: 1118 * "enable Reset" and "disable watchdog". 1119 */ 1120 if (lom_dogctl.dog_enable == 0 && 1121 lom_dogctl.reset_enable != 0) { 1122 retval = EINVAL; 1123 goto end; 1124 } 1125 1126 /* 1127 * Store the user-specified state in our softstate. 1128 * Note that our implementation here is stateless. 1129 * Eg, we do not disallow an "enable the watchdog" 1130 * command when the watchdog is currently enabled. 1131 * This is needed (at least in the case) when 1132 * the user enters OBP via ScApp/lom. In that case, 1133 * ScApp disables the watchdog, but does not inform 1134 * Solaris. As a result, an ensuing, unfiltered DOGCTL 1135 * to enable the watchdog is required. 1136 */ 1137 wdog_state->ntwdt_reset_enabled = 1138 lom_dogctl.reset_enable; 1139 wdog_state->ntwdt_wdog_enabled = 1140 lom_dogctl.dog_enable; 1141 1142 if (wdog_state->ntwdt_wdog_enabled != 0) { 1143 /* 1144 * then user wants to enable watchdog. 1145 * Arm the watchdog timer and start the 1146 * Cyclic, if it is not running. 1147 */ 1148 ntwdt_arm_vwdt(wdog_state); 1149 1150 if (wdog_state->ntwdt_timer_running == 0) { 1151 ntwdt_start_timer(ntwdt_ptr); 1152 } 1153 } else { 1154 /* 1155 * user wants to disable the watchdog. 1156 * Note that we do not set ntwdt_secs_remaining 1157 * to zero; that could cause a false expiration. 1158 */ 1159 if (wdog_state->ntwdt_timer_running != 0) { 1160 ntwdt_stop_timer(ntwdt_ptr); 1161 } 1162 } 1163 1164 /* 1165 * Send a permutation of mailbox commands to 1166 * ScApp that describes the current state of the 1167 * watchdog timer. Note that the permutation 1168 * depends on whether this is the first 1169 * Enabling of the watchdog or not. 1170 */ 1171 if (wdog_state->ntwdt_wdog_enabled != 0 && 1172 wdog_state->ntwdt_is_initial_enable == 0) { 1173 1174 /* switch from SWDT to AWDT mode */ 1175 ntwdt_swdt_to_awdt(wdog_state); 1176 1177 /* Tell ScApp we're in AWDT mode */ 1178 ntwdt_set_cfgvar(LW8_WDT_PROP_MODE, 1179 LW8_PROP_MODE_AWDT); 1180 } 1181 1182 /* Inform ScApp of the choices made by the app */ 1183 ntwdt_set_cfgvar(LW8_WDT_PROP_WDT, 1184 wdog_state->ntwdt_wdog_enabled); 1185 ntwdt_set_cfgvar(LW8_WDT_PROP_RECOV, 1186 wdog_state->ntwdt_reset_enabled); 1187 1188 if (wdog_state->ntwdt_wdog_enabled != 0 && 1189 wdog_state->ntwdt_is_initial_enable == 0) { 1190 /* 1191 * Clear tod_iosram_t.tod_timeout_period, 1192 * which is used in SWDT part of state 1193 * machine. (If this field is non-zero, 1194 * ScApp assumes that Solaris' SWDT is active). 1195 * 1196 * Clearing this is useful in case SC reboots 1197 * while Solaris is running, as ScApp will read 1198 * a zero and not assume SWDT is running. 1199 */ 1200 ntwdt_set_hw_timeout(0); 1201 1202 /* "the first watchdog-enable has been seen" */ 1203 wdog_state->ntwdt_is_initial_enable = 1; 1204 } 1205 1206 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1207 break; 1208 } 1209 1210 case LOMIOCDOGTIME: { 1211 /* 1212 * Allow application to set the period (in seconds) 1213 * of the watchdog timeout. 1214 */ 1215 uint32_t lom_dogtime; 1216 1217 if (ddi_copyin((caddr_t)arg, (caddr_t)&lom_dogtime, 1218 sizeof (uint32_t), mode) != 0) { 1219 retval = EFAULT; 1220 break; 1221 } 1222 1223 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGTIME: %u seconds", 1224 lom_dogtime)); 1225 1226 /* Ensure specified timeout is within range. */ 1227 if ((lom_dogtime == 0) || 1228 (lom_dogtime > NTWDT_MAX_TIMEOUT)) { 1229 retval = EINVAL; 1230 break; 1231 } 1232 1233 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1234 1235 wdog_state->ntwdt_wdog_timeout = lom_dogtime; 1236 1237 /* 1238 * If watchdog is currently running, re-arm the 1239 * watchdog timeout with the specified value. 1240 */ 1241 if (wdog_state->ntwdt_timer_running != 0) { 1242 ntwdt_arm_vwdt(wdog_state); 1243 } 1244 1245 /* Tell ScApp of the specified timeout */ 1246 ntwdt_set_cfgvar(LW8_WDT_PROP_TO, lom_dogtime); 1247 1248 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1249 break; 1250 } 1251 1252 case LOMIOCDOGPAT: { 1253 /* 1254 * Allow user to re-arm ("pat") the watchdog. 1255 */ 1256 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGPAT")); 1257 1258 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1259 1260 /* 1261 * If watchdog is not enabled or underlying 1262 * Cyclic timer is not running, exit. 1263 */ 1264 if (!(wdog_state->ntwdt_wdog_enabled && 1265 wdog_state->ntwdt_timer_running)) 1266 goto end; 1267 1268 if (wdog_state->ntwdt_wdog_expired == 0) { 1269 /* then VWDT has not expired; re-arm it */ 1270 ntwdt_arm_vwdt(wdog_state); 1271 1272 NTWDT_DBG(WDT_DBG_VWDT, ("VWDT re-armed:" 1273 " %d seconds", 1274 wdog_state->ntwdt_secs_remaining)); 1275 } 1276 1277 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1278 break; 1279 } 1280 1281 #ifdef DEBUG 1282 case NTWDTIOCPANIC: { 1283 /* 1284 * Use in unit/integration testing to test our 1285 * panic-handler code. 1286 */ 1287 cmn_err(CE_PANIC, "NTWDTIOCPANIC: force a panic"); 1288 break; 1289 } 1290 1291 case NTWDTIOCSTATE: { 1292 /* 1293 * Allow application to read wdog state from the 1294 * SC (and *not* the driver's softstate). 1295 * 1296 * Return state of: 1297 * o recovery-enabled 1298 * o current timeout value 1299 */ 1300 ntwdt_data_t ntwdt_data; 1301 int action; 1302 int timeout; 1303 int ret; 1304 1305 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1306 ret = ntwdt_get_cfgvar(LW8_WDT_PROP_TO, &timeout); 1307 ret |= ntwdt_get_cfgvar(LW8_WDT_PROP_RECOV, &action); 1308 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1309 1310 bzero((caddr_t)&ntwdt_data, sizeof (ntwdt_data)); 1311 1312 if (ret != NTWDT_SUCCESS) { 1313 retval = EIO; 1314 break; 1315 } 1316 1317 NTWDT_DBG(WDT_DBG_IOCTL, ("NTWDTIOCSTATE:" 1318 " timeout/action: %d/%d", timeout, action)); 1319 1320 ntwdt_data.ntwdt_wd1 = (uint32_t)timeout; 1321 ntwdt_data.ntwdt_wd2 = (uint8_t)action; 1322 1323 if (ddi_copyout((caddr_t)&ntwdt_data, (caddr_t)arg, 1324 sizeof (ntwdt_data_t), mode) != 0) { 1325 retval = EFAULT; 1326 } 1327 break; 1328 } 1329 #endif 1330 default: 1331 retval = EINVAL; 1332 break; 1333 } 1334 1335 return (retval); 1336 end: 1337 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1338 return (retval); 1339 } 1340 1341 /* 1342 * Arm the Virtual Watchdog Timer (VWDT). 1343 * 1344 * Assign the current watchdog timeout (ntwdt_wdog_timeout) 1345 * to the softstate variable representing the watchdog 1346 * timer (ntwdt_secs_remaining). 1347 * 1348 * To ensure (from ntwdt's perspective) that any actual 1349 * timeout expiration is at least as large as the expected 1350 * timeout, conditionally set/clear a bit that will be 1351 * checked in the Cyclic's softint. 1352 * 1353 * If the Cyclic has been started, the goal is to ignore 1354 * the _next_ firing of the Cyclic, as that firing will 1355 * NOT represent a full, one-second period. If the Cyclic 1356 * has NOT been started yet, then do not ignore the next 1357 * Cyclic's firing, as that's the First One, and it was 1358 * programmed to fire at a specific time (see ntwdt_start_timer). 1359 */ 1360 static void 1361 ntwdt_arm_vwdt(ntwdt_wdog_t *wdog_state) 1362 { 1363 /* arm the watchdog timer (VWDT) */ 1364 wdog_state->ntwdt_secs_remaining = 1365 wdog_state->ntwdt_wdog_timeout; 1366 1367 if (wdog_state->ntwdt_timer_running != 0) 1368 NTWDT_FLAG_SET(wdog_state, SKIP_CYCLIC); 1369 else 1370 NTWDT_FLAG_CLR(wdog_state, SKIP_CYCLIC); 1371 } 1372 1373 /* 1374 * Switch from SWDT mode to AWDT mode. 1375 */ 1376 _NOTE(ARGSUSED(0)) 1377 static void 1378 ntwdt_swdt_to_awdt(ntwdt_wdog_t *wdog_state) 1379 { 1380 ASSERT(wdog_state->ntwdt_is_initial_enable == 0); 1381 1382 /* 1383 * Disable SWDT. If SWDT is currently active, 1384 * display a message so user knows that SWDT Mode 1385 * has terminated. 1386 */ 1387 if (watchdog_enable != 0 || 1388 watchdog_activated != 0) 1389 cmn_err(CE_NOTE, "Hardware watchdog disabled"); 1390 watchdog_enable = 0; 1391 watchdog_activated = 0; 1392 1393 /* "we are in AWDT mode" */ 1394 ntwdt_watchdog_activated = 1; 1395 NTWDT_DBG(WDT_DBG_VWDT, ("AWDT is enabled")); 1396 } 1397 1398 /* 1399 * This is the Cyclic that runs at a multiple of the 1400 * AWDT's watchdog-timeout period. This Cyclic runs at 1401 * LOCK_LEVEL (eg, CY_LOCK_LEVEL) and will post a 1402 * soft-interrupt in order to complete all processing. 1403 * 1404 * Executing at LOCK_LEVEL gives this function a high 1405 * interrupt priority, while performing its work via 1406 * a soft-interrupt allows for a consistent (eg, MT-safe) 1407 * view of driver softstate between User and Interrupt 1408 * context. 1409 * 1410 * Context: 1411 * interrupt context: Cyclic framework calls at 1412 * CY_LOCK_LEVEL (=> 10) 1413 */ 1414 _NOTE(ARGSUSED(0)) 1415 static void 1416 ntwdt_cyclic_pat(void *arg) 1417 { 1418 /* post-down to DDI_SOFTINT_LOW */ 1419 ddi_trigger_softintr(ntwdt_cyclic_softint_id); 1420 } 1421 1422 /* 1423 * This is the soft-interrupt triggered by the AWDT 1424 * Cyclic. 1425 * 1426 * This softint does all the work re: computing whether 1427 * the VWDT expired. It grabs ntwdt_wdog_mutex 1428 * so User Context code (eg, the IOCTLs) cannot run, 1429 * and then it tests whether the VWDT expired. If it 1430 * hasn't, it decrements the VWDT timer by the amount 1431 * of the Cyclic's period. If the timer has expired, 1432 * it initiates Recovery (based on what user specified 1433 * in LOMIOCDOGCTL). 1434 * 1435 * This function also updates the normal system "heartbeat". 1436 * 1437 * Context: 1438 * interrupt-context: DDI_SOFTINT_LOW 1439 */ 1440 static uint_t 1441 ntwdt_cyclic_softint(char *arg) 1442 { 1443 ntwdt_state_t *ntwdt_ptr = (ntwdt_state_t *)arg; 1444 ntwdt_wdog_t *wdog_state; 1445 1446 wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1447 1448 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1449 1450 if ((wdog_state->ntwdt_wdog_flags & 1451 NTWDT_FLAG_SKIP_CYCLIC) != 0) { 1452 /* 1453 * then skip all processing by this interrupt. 1454 * (see ntwdt_arm_vwdt()). 1455 */ 1456 wdog_state->ntwdt_wdog_flags &= ~NTWDT_FLAG_SKIP_CYCLIC; 1457 goto end; 1458 } 1459 1460 if (wdog_state->ntwdt_timer_running == 0 || 1461 (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE) || 1462 (wdog_state->ntwdt_wdog_enabled == 0)) 1463 goto end; 1464 1465 /* re-arm ("pat") the hardware watchdog */ 1466 ntwdt_pat_hw_watchdog(); 1467 1468 /* Decrement the VWDT and see if it has expired. */ 1469 if (--wdog_state->ntwdt_secs_remaining == 0) { 1470 1471 cmn_err(CE_WARN, "application-watchdog expired"); 1472 1473 wdog_state->ntwdt_wdog_expired = 1; 1474 1475 if (wdog_state->ntwdt_reset_enabled != 0) { 1476 /* 1477 * Update ScApp so that the new wdog-timeout 1478 * value is as specified in the 1479 * NTWDT_BOOT_TIMEOUT_PROP driver Property. 1480 * This timeout is assumedly larger than the 1481 * actual Solaris reboot time. This will allow 1482 * our forced-reboot to not cause an unplanned 1483 * (series of) watchdog expiration(s). 1484 */ 1485 if (ntwdt_disable_timeout_action == 0) 1486 ntwdt_reprogram_wd(ntwdt_ptr); 1487 1488 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1489 1490 NTWDT_DBG(WDT_DBG_VWDT, ("recovery being done")); 1491 1492 ntwdt_enforce_timeout(); 1493 } else { 1494 NTWDT_DBG(WDT_DBG_VWDT, ("no recovery being done")); 1495 1496 wdog_state->ntwdt_wdog_enabled = 0; 1497 1498 /* 1499 * Tell ScApp to disable wdog; this prevents 1500 * the "2x-timeout" artifact. Eg, Solaris 1501 * times-out at t(x) and ScApp times-out at t(2x), 1502 * where (x==ntwdt_wdog_timeout). 1503 */ 1504 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_WDT, 1505 wdog_state->ntwdt_wdog_enabled); 1506 } 1507 1508 /* Schedule Callout to stop this Cyclic */ 1509 timeout(ntwdt_stop_timer_lock, ntwdt_ptr, 0); 1510 1511 } else { 1512 _NOTE(EMPTY) 1513 NTWDT_DBG(WDT_DBG_VWDT, ("time remaining in VWDT: %d" 1514 " seconds", wdog_state->ntwdt_secs_remaining)); 1515 } 1516 end: 1517 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1518 1519 return (DDI_INTR_CLAIMED); 1520 } 1521 1522 /* 1523 * Program the AWDT watchdog-timeout value to that specified 1524 * in the NTWDT_BOOT_TIMEOUT_PROP driver Property. However, 1525 * only do this if the AWDT is in the correct state. 1526 * 1527 * Caller's Context: 1528 * o interrupt context: (from software-interrupt) 1529 * o during a panic 1530 */ 1531 static void 1532 ntwdt_reprogram_wd(ntwdt_state_t *ntwdt_ptr) 1533 { 1534 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1535 1536 /* 1537 * Program the AWDT watchdog-timeout value only if the 1538 * watchdog is enabled, the user wants to do recovery, 1539 * ("reset is enabled") and the AWDT timer is currently 1540 * running. 1541 */ 1542 if (wdog_state->ntwdt_wdog_enabled != 0 && 1543 wdog_state->ntwdt_reset_enabled != 0 && 1544 wdog_state->ntwdt_timer_running != 0) { 1545 if (ddi_in_panic() != 0) 1546 ntwdt_set_cfgvar_noreply(LW8_WDT_PROP_TO, 1547 wdog_state->ntwdt_boot_timeout); 1548 else 1549 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_TO, 1550 wdog_state->ntwdt_boot_timeout); 1551 } 1552 } 1553 1554 /* 1555 * This is the callback that was registered to run during a panic. 1556 * It will set the watchdog-timeout value to be that as specified 1557 * in the NTWDT_BOOT_TIMEOUT_PROP driver Property. 1558 * 1559 * Note that unless this Property's value specifies a timeout 1560 * that's larger than the actual reboot latency, ScApp will 1561 * experience a timeout and initiate Recovery. 1562 */ 1563 _NOTE(ARGSUSED(1)) 1564 static boolean_t 1565 ntwdt_panic_cb(void *arg, int code) 1566 { 1567 ASSERT(ddi_in_panic() != 0); 1568 1569 ntwdt_reprogram_wd((ntwdt_state_t *)arg); 1570 1571 return (B_TRUE); 1572 } 1573 1574 /* 1575 * Initialize the Cyclic that is used to monitor the VWDT. 1576 */ 1577 static void 1578 ntwdt_start_timer(ntwdt_state_t *ntwdt_ptr) 1579 { 1580 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1581 cyc_handler_t *hdlr = &wdog_state->ntwdt_cycl_hdlr; 1582 cyc_time_t *when = &wdog_state->ntwdt_cycl_time; 1583 1584 /* 1585 * Init Cyclic so its first expiry occurs wdog-timeout 1586 * seconds from the current, absolute time. 1587 */ 1588 when->cyt_interval = wdog_state->ntwdt_cyclic_interval; 1589 when->cyt_when = gethrtime() + when->cyt_interval; 1590 1591 wdog_state->ntwdt_wdog_expired = 0; 1592 wdog_state->ntwdt_timer_running = 1; 1593 1594 mutex_enter(&cpu_lock); 1595 if (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE) 1596 ntwdt_ptr->ntwdt_cycl_id = cyclic_add(hdlr, when); 1597 mutex_exit(&cpu_lock); 1598 1599 NTWDT_DBG(WDT_DBG_VWDT, ("AWDT's cyclic-driven timer is started")); 1600 } 1601 1602 /* 1603 * Stop the cyclic that is used to monitor the VWDT (and 1604 * was Started by ntwdt_start_timer). 1605 * 1606 * Context: per the Cyclic API, cyclic_remove cannot be called 1607 * from interrupt-context. Note that when this is 1608 * called via a Callout, it's called from base level. 1609 */ 1610 static void 1611 ntwdt_stop_timer(void *arg) 1612 { 1613 ntwdt_state_t *ntwdt_ptr = (void *)arg; 1614 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1615 1616 mutex_enter(&cpu_lock); 1617 if (ntwdt_ptr->ntwdt_cycl_id != CYCLIC_NONE) 1618 cyclic_remove(ntwdt_ptr->ntwdt_cycl_id); 1619 mutex_exit(&cpu_lock); 1620 1621 wdog_state->ntwdt_timer_running = 0; 1622 ntwdt_ptr->ntwdt_cycl_id = CYCLIC_NONE; 1623 1624 NTWDT_DBG(WDT_DBG_VWDT, ("AWDT's cyclic-driven timer is stopped")); 1625 } 1626 1627 /* 1628 * Stop the cyclic that is used to monitor the VWDT (and 1629 * do it in a thread-safe manner). 1630 * 1631 * This is a wrapper function for the core function, 1632 * ntwdt_stop_timer. Both functions are useful, as some 1633 * callers will already have the appropriate mutex locked, and 1634 * other callers will not. 1635 */ 1636 static void 1637 ntwdt_stop_timer_lock(void *arg) 1638 { 1639 ntwdt_state_t *ntwdt_ptr = (void *)arg; 1640 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1641 1642 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1643 ntwdt_stop_timer(arg); 1644 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 1645 } 1646 1647 /* 1648 * Add callbacks needed to react to major system state transitions. 1649 */ 1650 static void 1651 ntwdt_add_callbacks(ntwdt_state_t *ntwdt_ptr) 1652 { 1653 /* register a callback that's called during a panic */ 1654 ntwdt_callback_ids.ntwdt_panic_cb = callb_add(ntwdt_panic_cb, 1655 (void *)ntwdt_ptr, CB_CL_PANIC, "ntwdt_panic_cb"); 1656 } 1657 1658 /* 1659 * Remove callbacks added by ntwdt_add_callbacks. 1660 */ 1661 static void 1662 ntwdt_remove_callbacks() 1663 { 1664 callb_delete(ntwdt_callback_ids.ntwdt_panic_cb); 1665 } 1666 1667 /* 1668 * Initiate a Reset (as a result of the VWDT timeout expiring). 1669 */ 1670 static void 1671 ntwdt_enforce_timeout() 1672 { 1673 if (ntwdt_disable_timeout_action != 0) { 1674 cmn_err(CE_NOTE, "OS timeout expired, taking no action"); 1675 return; 1676 } 1677 1678 NTWDT_DBG(WDT_DBG_VWDT, ("VWDT expired; do a crashdump")); 1679 1680 (void) kadmin(A_DUMP, AD_BOOT, NULL, kcred); 1681 cmn_err(CE_PANIC, "kadmin(A_DUMP, AD_BOOT) failed"); 1682 _NOTE(NOTREACHED) 1683 } 1684 1685 /* 1686 * Interpret the Properties from driver's config file. 1687 */ 1688 static int 1689 ntwdt_read_props(ntwdt_state_t *ntwdt_ptr) 1690 { 1691 ntwdt_wdog_t *wdog_state; 1692 int boot_timeout; 1693 1694 wdog_state = ntwdt_ptr->ntwdt_wdog_state; 1695 1696 /* 1697 * interpret Property that specifies how long 1698 * the watchdog-timeout should be set to when 1699 * Solaris panics. Assumption is that this value 1700 * is larger than the amount of time it takes 1701 * to reboot and write crashdump. If not, 1702 * ScApp could induce a reset, due to an expired 1703 * watchdog-timeout. 1704 */ 1705 wdog_state->ntwdt_boot_timeout = 1706 NTWDT_DEFAULT_BOOT_TIMEOUT; 1707 1708 boot_timeout = ddi_prop_get_int(DDI_DEV_T_ANY, 1709 ntwdt_ptr->ntwdt_dip, DDI_PROP_DONTPASS, 1710 NTWDT_BOOT_TIMEOUT_PROP, -1); 1711 1712 if (boot_timeout != -1 && boot_timeout > 0 && 1713 boot_timeout <= NTWDT_MAX_TIMEOUT) { 1714 wdog_state->ntwdt_boot_timeout = 1715 boot_timeout; 1716 } else { 1717 _NOTE(EMPTY) 1718 NTWDT_DBG(WDT_DBG_ENTRY, (NTWDT_BOOT_TIMEOUT_PROP 1719 ": using default of %d seconds.", 1720 wdog_state->ntwdt_boot_timeout)); 1721 } 1722 1723 return (DDI_SUCCESS); 1724 } 1725 1726 /* 1727 * Write state of SWDT to ScApp. 1728 * 1729 * Currently, this function is only called on attach() 1730 * of our driver. 1731 * 1732 * Note that we do not need to call this function, eg, 1733 * in response to a solicitation from ScApp (eg, 1734 * the LW8_SC_RESTARTED_EVENT). 1735 * 1736 * Context: 1737 * called in Kernel Context 1738 */ 1739 static int 1740 ntwdt_set_swdt_state() 1741 { 1742 /* 1743 * note that ScApp only needs this one 1744 * variable when system is in SWDT mode. 1745 */ 1746 ntwdt_set_cfgvar(LW8_WDT_PROP_MODE, 1747 LW8_PROP_MODE_SWDT); 1748 1749 return (0); 1750 } 1751 1752 /* 1753 * Write all AWDT state to ScApp via the SBBC mailbox 1754 * in IOSRAM. Note that the permutation of Writes 1755 * is as specified in the design spec. 1756 * 1757 * Notes: caller must perform synchronization so that 1758 * this series of Writes is consistent as viewed 1759 * by ScApp (eg, there is no LW8_WDT_xxx mailbox 1760 * command that contains "all Properties"; each 1761 * Property must be written individually). 1762 */ 1763 static int 1764 ntwdt_set_awdt_state(ntwdt_wdog_t *rstatep) 1765 { 1766 /* ScApp expects values in this order: */ 1767 ntwdt_set_cfgvar(LW8_WDT_PROP_MODE, 1768 ntwdt_watchdog_activated != 0); 1769 ntwdt_set_cfgvar(LW8_WDT_PROP_TO, 1770 rstatep->ntwdt_wdog_timeout); 1771 ntwdt_set_cfgvar(LW8_WDT_PROP_RECOV, 1772 rstatep->ntwdt_reset_enabled); 1773 ntwdt_set_cfgvar(LW8_WDT_PROP_WDT, 1774 rstatep->ntwdt_wdog_enabled); 1775 1776 return (NTWDT_SUCCESS); 1777 } 1778 1779 /* 1780 * Write a specified WDT Property (and Value) to ScApp. 1781 * 1782 * <Property, Value> is passed in the LW8_MBOX_WDT_SET 1783 * (SBBC) mailbox message. The SBBC mailbox resides in 1784 * IOSRAM. 1785 * 1786 * Note that this function is responsible for ensuring that 1787 * a driver-specific representation of a mailbox <Value> is 1788 * mapped into the representation that is expected by ScApp 1789 * (eg, see LW8_WDT_PROP_RECOV). 1790 */ 1791 static int 1792 ntwdt_set_cfgvar(int var, int val) 1793 { 1794 int rv; 1795 int mbox_val; 1796 lw8_set_wdt_t set_wdt; 1797 1798 switch (var) { 1799 case LW8_WDT_PROP_RECOV: 1800 #ifdef DEBUG 1801 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'recovery-enabled':" 1802 " %s (%d)", (val != 0) ? "enabled" : "disabled", val)); 1803 #endif 1804 mbox_val = (val != 0) ? LW8_PROP_RECOV_ENABLED : 1805 LW8_PROP_RECOV_DISABLED; 1806 break; 1807 1808 case LW8_WDT_PROP_WDT: 1809 #ifdef DEBUG 1810 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-enabled':" 1811 " %s (%d)", (val != 0) ? "enabled" : "disabled", val)); 1812 #endif 1813 mbox_val = (val != 0) ? LW8_PROP_WDT_ENABLED : 1814 LW8_PROP_WDT_DISABLED; 1815 break; 1816 1817 case LW8_WDT_PROP_TO: 1818 #ifdef DEBUG 1819 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-timeout':" 1820 " %d seconds", val)); 1821 #endif 1822 mbox_val = val; 1823 break; 1824 1825 case LW8_WDT_PROP_MODE: 1826 #ifdef DEBUG 1827 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-mode':" 1828 " %s (%d)", (val != LW8_PROP_MODE_SWDT) ? 1829 "AWDT" : "SWDT", val)); 1830 #endif 1831 mbox_val = val; 1832 break; 1833 1834 default: 1835 ASSERT(0); 1836 _NOTE(NOTREACHED) 1837 } 1838 1839 set_wdt.property_id = var; 1840 set_wdt.value = mbox_val; 1841 1842 rv = ntwdt_lomcmd(LW8_MBOX_WDT_SET, (intptr_t)&set_wdt); 1843 if (rv != 0) { 1844 _NOTE(EMPTY) 1845 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of prop/val %d/%d " 1846 "failed: %d", var, mbox_val, rv)); 1847 } 1848 1849 return (rv); 1850 } 1851 1852 static void 1853 ntwdt_set_cfgvar_noreply(int var, int val) 1854 { 1855 ntwdt_set_cfgvar(var, val); 1856 } 1857 1858 #ifdef DEBUG 1859 /* 1860 * Read a specified WDT Property from ScApp. 1861 * 1862 * <Property> is passed in the Request of the LW8_MBOX_WDT_GET 1863 * (SBBC) mailbox message, and the Property's <Value> 1864 * is returned in the message's Response. The SBBC mailbox 1865 * resides in IOSRAM. 1866 */ 1867 static int 1868 ntwdt_get_cfgvar(int var, int *val) 1869 { 1870 lw8_get_wdt_t get_wdt; 1871 int rv; 1872 1873 rv = ntwdt_lomcmd(LW8_MBOX_WDT_GET, (intptr_t)&get_wdt); 1874 if (rv != 0) { 1875 _NOTE(EMPTY) 1876 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET failed: %d", rv)); 1877 } else { 1878 switch (var) { 1879 case LW8_WDT_PROP_RECOV: 1880 *val = (uint8_t)get_wdt.recovery_enabled; 1881 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'reset-enabled':" 1882 " %s (%d)", (*val != 0) ? "enabled" : "disabled", 1883 *val)); 1884 break; 1885 1886 case LW8_WDT_PROP_WDT: 1887 *val = (uint8_t)get_wdt.watchdog_enabled; 1888 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'wdog-enabled':" 1889 " %s (%d)", (*val != 0) ? "enabled" : "disabled", 1890 *val)); 1891 break; 1892 1893 case LW8_WDT_PROP_TO: 1894 *val = (uint8_t)get_wdt.timeout; 1895 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'wdog-timeout':" 1896 " %d seconds", *val)); 1897 break; 1898 1899 default: 1900 ASSERT(0); 1901 _NOTE(NOTREACHED) 1902 } 1903 } 1904 1905 return (rv); 1906 } 1907 #endif 1908 1909 /* 1910 * Update the real system "heartbeat", which resides in IOSRAM. 1911 * This "heartbeat" is normally used in SWDT Mode, but when 1912 * in AWDT Mode, ScApp also uses its value to determine if Solaris 1913 * is up-and-running. 1914 */ 1915 static void 1916 ntwdt_pat_hw_watchdog() 1917 { 1918 tod_iosram_t tod_buf; 1919 static uint32_t i_am_alive = 0; 1920 #ifdef DEBUG 1921 if (ntwdt_stop_heart != 0) 1922 return; 1923 #endif 1924 /* Update the system heartbeat */ 1925 if (i_am_alive == UINT32_MAX) 1926 i_am_alive = 0; 1927 else 1928 i_am_alive++; 1929 1930 NTWDT_DBG(WDT_DBG_HEART, ("update heartbeat: %d", 1931 i_am_alive)); 1932 1933 if (iosram_write(SBBC_TOD_KEY, OFFSET(tod_buf, tod_i_am_alive), 1934 (char *)&i_am_alive, sizeof (uint32_t))) { 1935 cmn_err(CE_WARN, "ntwdt_pat_hw_watchdog(): " 1936 "write heartbeat failed"); 1937 } 1938 } 1939 1940 /* 1941 * Write the specified value to the system's normal (IOSRAM) 1942 * location that's used to specify Solaris' watchdog-timeout 1943 * on Serengeti platforms. 1944 * 1945 * In SWDT Mode, this location can hold values [0,n). 1946 * In AWDT Mode, this location must have value 0 (else 1947 * after a ScApp-reboot, ScApp could mistakenly interpret 1948 * that the system is in SWDT Mode). 1949 */ 1950 static int 1951 ntwdt_set_hw_timeout(uint32_t period) 1952 { 1953 tod_iosram_t tod_buf; 1954 int rv; 1955 1956 rv = iosram_write(SBBC_TOD_KEY, OFFSET(tod_buf, tod_timeout_period), 1957 (char *)&period, sizeof (uint32_t)); 1958 if (rv != 0) 1959 cmn_err(CE_WARN, "write of %d for TOD timeout " 1960 "period failed: %d", period, rv); 1961 1962 return (rv); 1963 } 1964 1965 /* 1966 * Soft-interrupt handler that is triggered when ScApp wants 1967 * to know the current state of the app-wdog. 1968 * 1969 * Grab ntwdt_wdog_mutex so that we synchronize with any 1970 * concurrent User Context and Interrupt Context activity. Call 1971 * a function that writes a permutation of the watchdog state 1972 * to the SC, then release the mutex. 1973 * 1974 * We grab the mutex not only so that each variable is consistent 1975 * but also so that the *permutation* of variables is consistent. 1976 * I.e., any set of one or more variables (that we write to SC 1977 * using multiple mailbox commands) will truly be seen as a 1978 * consistent snapshot. Note that if our protocol had a MBOX_SET 1979 * command that allowed writing all watchdog state in one 1980 * command, then the lock-hold latency would be greatly reduced. 1981 * To our advantage, this softint normally executes very 1982 * infrequently. 1983 * 1984 * Context: 1985 * called at Interrupt Context (DDI_SOFTINT_LOW) 1986 */ 1987 static uint_t 1988 ntwdt_mbox_softint(char *arg) 1989 { 1990 ntwdt_wdog_t *wdog_state; 1991 1992 wdog_state = ((ntwdt_state_t *)arg)->ntwdt_wdog_state; 1993 1994 ASSERT(wdog_state != NULL); 1995 1996 mutex_enter(&wdog_state->ntwdt_wdog_mutex); 1997 1998 /* tell ScApp state of AWDT */ 1999 ntwdt_set_awdt_state(wdog_state); 2000 2001 mutex_exit(&wdog_state->ntwdt_wdog_mutex); 2002 2003 return (DDI_INTR_CLAIMED); 2004 } 2005 2006 /* 2007 * Handle MBOX_EVENT_LW8 Events that are sent from ScApp. 2008 * 2009 * The only (sub-)type of Event we handle is the 2010 * LW8_EVENT_SC_RESTARTED Event. We handle this by triggering 2011 * a soft-interrupt only if we are in AWDT mode. 2012 * 2013 * ScApp sends this Event when it wants to learn the current 2014 * state of the AWDT variables. Design-wise, this is used to 2015 * handle the case where the SC reboots while the system is in 2016 * AWDT mode (if the SC reboots in SWDT mode, then ScApp 2017 * already knows all necessary info and therefore won't send 2018 * this Event). 2019 * 2020 * Context: 2021 * function is called in Interrupt Context (at DDI_SOFTINT_MED) 2022 * and we conditionally trigger a softint that will run at 2023 * DDI_SOFTINT_LOW. Note that function executes at 2024 * DDI_SOFTINT_MED due to how this handler was registered by 2025 * the implementation of sbbc_mbox_reg_intr(). 2026 * 2027 * Notes: 2028 * Currently, the LW8_EVENT_SC_RESTARTED Event is only sent 2029 * by SC when in AWDT mode. 2030 */ 2031 static uint_t 2032 ntwdt_event_data_handler(char *arg) 2033 { 2034 lw8_event_t *payload; 2035 sbbc_msg_t *msg; 2036 2037 if (arg == NULL) { 2038 return (DDI_INTR_CLAIMED); 2039 } 2040 2041 msg = (sbbc_msg_t *)arg; 2042 if (msg->msg_buf == NULL) { 2043 return (DDI_INTR_CLAIMED); 2044 } 2045 2046 payload = (lw8_event_t *)msg->msg_buf; 2047 2048 switch (payload->event_type) { 2049 case LW8_EVENT_SC_RESTARTED: 2050 /* 2051 * then SC probably was rebooted, and it therefore 2052 * needs to know what the current state of AWDT is. 2053 */ 2054 NTWDT_DBG(WDT_DBG_EVENT, ("LW8_EVENT_SC_RESTARTED " 2055 "received in %s mode", 2056 (ntwdt_watchdog_activated != 0) ? "AWDT" : "SWDT")); 2057 2058 if (ntwdt_watchdog_activated != 0) { 2059 /* then system is in AWDT mode */ 2060 ddi_trigger_softintr(ntwdt_mbox_softint_id); 2061 } 2062 break; 2063 2064 default: 2065 NTWDT_DBG(WDT_DBG_EVENT, 2066 ("MBOX_EVENT_LW8: %d", payload->event_type)); 2067 break; 2068 } 2069 2070 return (DDI_INTR_CLAIMED); 2071 } 2072 2073 /* 2074 * Send an SBBC Mailbox command to ScApp. 2075 * 2076 * Use the sbbc_mbox_request_response utility function to 2077 * send the Request and receive the optional Response. 2078 * 2079 * Context: 2080 * can be called from Interrupt Context or User Context. 2081 */ 2082 static int 2083 ntwdt_lomcmd(int cmd, intptr_t arg) 2084 { 2085 sbbc_msg_t request; 2086 sbbc_msg_t *reqp; 2087 sbbc_msg_t response; 2088 sbbc_msg_t *resp; 2089 int rv = 0; 2090 2091 reqp = &request; 2092 bzero((caddr_t)&request, sizeof (request)); 2093 reqp->msg_type.type = LW8_MBOX; 2094 reqp->msg_type.sub_type = (uint16_t)cmd; 2095 2096 resp = &response; 2097 bzero((caddr_t)&response, sizeof (response)); 2098 resp->msg_type.type = LW8_MBOX; 2099 resp->msg_type.sub_type = (uint16_t)cmd; 2100 2101 switch (cmd) { 2102 case LW8_MBOX_WDT_GET: 2103 reqp->msg_len = 0; 2104 reqp->msg_buf = (caddr_t)NULL; 2105 resp->msg_len = sizeof (lw8_get_wdt_t); 2106 resp->msg_buf = (caddr_t)arg; 2107 break; 2108 2109 case LW8_MBOX_WDT_SET: 2110 reqp->msg_len = sizeof (lw8_set_wdt_t); 2111 reqp->msg_buf = (caddr_t)arg; 2112 resp->msg_len = 0; 2113 resp->msg_buf = (caddr_t)NULL; 2114 break; 2115 2116 default: 2117 return (EINVAL); 2118 } 2119 2120 rv = sbbc_mbox_request_response(reqp, resp, 2121 LW8_DEFAULT_MAX_MBOX_WAIT_TIME); 2122 2123 if ((rv) || (resp->msg_status != SG_MBOX_STATUS_SUCCESS)) { 2124 2125 NTWDT_NDBG(WDT_DBG_PROT, ("SBBC mailbox error:" 2126 " (rv/msg_status)=(%d/%d)", rv, resp->msg_status)); 2127 2128 /* errors from sgsbbc */ 2129 if (resp->msg_status > 0) { 2130 return (resp->msg_status); 2131 } 2132 2133 /* errors from ScApp */ 2134 switch (resp->msg_status) { 2135 case SG_MBOX_STATUS_ILLEGAL_PARAMETER: 2136 /* illegal ioctl parameter */ 2137 return (EINVAL); 2138 2139 default: 2140 return (EIO); 2141 } 2142 } 2143 return (0); 2144 } 2145