1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * ntwdt driver
29 * ------------
30 *
31 * Subsystem Overview
32 * ------------------
33 *
34 * This is a pseudo driver for the Netra-1280 watchdog
35 * timer (WDT). It provides for an *application-driven*
36 * WDT (AWDT), not a traditional, hardware-based WDT. A
37 * hardware-based feature is already present on the
38 * Netra-1280, and it is referred to here as the
39 * System WDT (SWDT).
40 *
41 * ScApp and Solaris cooperate to provide either a SWDT or
42 * an AWDT; they are mutually-exclusive. Once in AWDT
43 * mode, one can only transition to SWDT mode via a reboot.
44 * This obviously gives priority to the AWDT and was done
45 * to handle scenarios where the customer might temporarily
46 * terminate their wdog-app in order to do some debugging,
47 * or even to load a new version of the wdog-app.
48 *
49 * The wdog-app does an open() of the /dev/ntwdt device node
50 * and then issues ioctl's to control the state of the AWDT.
51 * The ioctl's are implemented by this driver. Only one
52 * concurrent instance of open() is allowed. On the close(),
53 * a watchdog timer still in progress is NOT terminated.
54 * This allows the global state machine to monitor the
55 * progress of a Solaris reboot. ScApp will reset Solaris
56 * (eg, send an XIR) if the actual boot/crashdump latency
57 * is larger than the current AWDT timeout.
58 *
59 * The rationale for implementing an AWDT (vs a SWDT) is
60 * that it is more sensitive to system outage scenarios than
61 * a SWDT. Eg, a system could be in such a failed state that
62 * even though its clock-interrupt could still run (and the
63 * SWDT's watchdog timer therefore re-armed), the system could
64 * in effect have a corrupt or very poor dispatch latency.
65 * An AWDT would be sensitive to dispatch latency issues, as
66 * well as problems with its own execution (eg, a hang or
67 * crash).
68 *
69 * Subsystem Interface Overview
70 * ----------------------------
71 *
72 * This pseudo-driver does not have any 'extern' functions.
73 *
74 * All system interaction is done via the traditional driver
75 * entry points (eg, attach(9e), _init(9e)).
76 *
77 * All interaction with user is via the entry points in the
78 * 'struct cb_ops' vector (eg, open(9e), ioctl(9e), and
79 * close(9e)).
80 *
81 * Subsystem Implementation Overview
82 * ---------------------------------
83 *
84 * ScApp and Solaris (eg, ntwdt) cooperate so that a state
85 * machine global to ScApp and ntwdt is either in AWDT mode
86 * or in SWDT mode. These two peers communicate via the SBBC
87 * Mailbox that resides in IOSRAM (SBBC_MAILBOX_KEY).
88 * They use two new mailbox messages (LW8_MBOX_WDT_GET and
89 * LW8_MBOX_WDT_SET) and one new event (LW8_EVENT_SC_RESTARTED).
90 *
91 * ntwdt implements the AWDT by implementing a "virtual
92 * WDT" (VWDT). Eg, the watchdog timer is not a traditional
93 * counter in hardware, it is a variable in ntwdt's
94 * softstate. The wdog-app's actions cause changes to this
95 * and other variables in ntwdt's softstate.
96 *
97 * The wdog-app uses the LOMIOCDOGTIME ioctl to specify
98 * the number of seconds in the watchdog timeout (and
99 * therefore the VWDT). The wdog-app then uses the
100 * LOMIOCDOGCTL ioctl to enable the wdog. This causes
101 * ntwdt to create a Cyclic that will both decrement
102 * the VWDT and check to see if it has expired. To keep
103 * the VWDT from expiring, the wdog-app uses the
104 * LOMIOCDOGPAT ioctl to re-arm (or "pat") the watchdog.
105 * This sets the VWDT value to that specified in the
106 * last LOMIOCDOGTIME ioctl. The wdog-app can use the
107 * LOMIOCDOGSTATE ioctl to query the state of the VWDT.
108 *
109 * The wdog-app can also specify how Recovery is to be
110 * done. The only choice is whether to do a crashdump
111 * or not. If ntwdt computes a VWDT expiration, then
112 * ntwdt initiates the Recovery, else ScApp will. Eg,
113 * a hang in Solaris will be sensed by ScApp and not
114 * ntwdt. The wdog-app specifies the Recovery policy
115 * via the DOGCTL ioctl.
116 *
117 * Timeout Expiration
118 * ------------------
119 * In our implementation, ScApp senses a watchdog
120 * expiration the same way it historically has:
121 * by reading a well-known area of IOSRAM (SBBC_TOD_KEY)
122 * to see if the timestamp associated with a
123 * Solaris-generated "heartbeat" field is older
124 * than the currently specified timeout (which is
125 * also specified in this same IOSRAM section).
126 *
127 * What is different when ntwdt is running is that
128 * ntwdt is responsible for updating the Heartbeat,
129 * and not the normal client (todsg). When ntwdt
130 * puts the system in AWDT mode, it disables todsg's
131 * updating of the Heartbeat by changing the state of
132 * a pair of kernel tunables (watchdog_activated and
133 * watchdog_enable). ntwdt then takes responsibility
134 * for updating the Heartbeat. It does this by
135 * updating the Heartbeat from the Cyclic that is
136 * created when the user enables the AWDT (DOGCTL)
137 * or specifies a new timeout value (DOGTIME).
138 *
139 * As long as the AWDT is enabled, ntwdt will update
140 * the real system Heartbeat. As a result, ScApp
141 * will conclude that Solaris is still running. If
142 * the user stops re-arming the VWDT or Solaris
143 * hangs (eg), ntwdt will stop updating the Heartbeat.
144 *
145 * Note that ntwdt computes expiration via the
146 * repeatedly firing Cyclic, and ScApp computes
147 * expiration via a cessation of Heartbeat update.
148 * Since Heartbeat update stops once user stops
149 * re-arming the VWDT (ie, DOGPAT ioctl), ntwdt
150 * will compute a timeout at t(x), and ScApp will
151 * compute a timeout at t(2x), where 'x' is the
152 * current timeout value. When ntwdt computes
153 * the expiration, ntwdt masks this asymmetry.
154 *
155 * Lifecycle Events
156 * ----------------
157 *
158 * ntwdt only handles one of the coarse-grained
159 * "lifecycle events" (eg, entering OBP, shutdown,
160 * power-down, DR) that are possible during a Solaris
161 * session: a panic. (Note that ScApp handles one
162 * of the others: "entering OBP"). Other than these,
163 * a user choosing such a state transition must first
164 * use the wdog-app to disable the watchdog, else
165 * an expiration could occur.
166 *
167 * Solaris handles a panic by registering a handler
168 * that's called during the panic. The handler will
169 * set the watchdog timeout to the value specified
170 * in the NTWDT_BOOT_TIMEOUT_PROP driver Property.
171 * Again, this value should be greater than the actual
172 * Solaris reboot/crashdump latency.
173 *
174 * When the user enters OBP via the System Controller,
175 * ScApp will disable the watchdog (from ScApp's
176 * perspective), but it will not communicate this to
177 * ntwdt. After having exited OBP, the wdog-app can
178 * be used to enable or disable the watchdog (which
179 * will get both ScApp and ntwdt in-sync).
180 *
181 * Locking
182 * -------
183 *
184 * ntwdt has code running at three interrupt levels as
185 * well as base level.
186 *
187 * The ioctls run at base level in User Context. The
188 * driver's entry points run at base level in Kernel
189 * Context.
190 *
191 * ntwdt's three interrupt levels are used by:
192 *
193 * o LOCK_LEVEL :
194 * the Cyclic used to manage the VWDT is initialized
195 * to CY_LOCK_LEVEL
196 *
197 * o DDI_SOFTINT_MED :
198 * the SBBC mailbox implementation registers the
199 * specified handlers at this level
200 *
201 * o DDI_SOFTINT_LOW :
202 * this level is used by two handlers. One handler
203 * is triggered by the LOCK_LEVEL Cyclic. The other
204 * handler is triggered by the DDI_SOFTINT_MED
205 * handler registered to handle SBBC mailbox events.
206 *
207 * The centralizing concept is that the ntwdt_wdog_mutex
208 * in the driver's softstate is initialized to have an
209 * interrupt-block-cookie corresponding to DDI_SOFTINT_LOW.
210 *
211 * As a result, any base level code grabs ntwdt_wdog_mutex
212 * before doing work. Also, any handler running at interrupt
213 * level higher than DDI_SOFTINT_LOW "posts down" so that
214 * a DDI_SOFTINT_LOW handler is responsible for executing
215 * the "real work". Each DDI_SOFTINT_LOW handler also
216 * first grabs ntwdt_wdog_mutex, and so base level is
217 * synchronized with all interrupt levels.
218 *
219 * Note there's another mutex in the softstate: ntwdt_mutex.
220 * This mutex has few responsibilities. However, this
221 * locking order must be followed: ntwdt_wdog_mutex is
222 * held first, and then ntwdt_mutex. This choice results
223 * from the fact that the number of dynamic call sites
224 * for ntwdt_wdog_mutex is MUCH greater than that of
225 * ntwdt_mutex. As a result, almost all uses of
226 * ntwdt_wdog_mutex do not even require ntwdt_mutex to
227 * be held, which saves resources.
228 *
229 * Driver Properties
230 * -----------------
231 *
232 * "ddi-forceattach=1;"
233 * ------------------
234 *
235 * Using this allows our driver to be automatically
236 * loaded at boot-time AND to not be removed from memory
237 * solely due to memory-pressure.
238 *
239 * Being loaded at boot allows ntwdt to (as soon as
240 * possible) tell ScApp of the current mode of the
241 * state-machine (eg, SWDT). This is needed for the case
242 * when Solaris is re-loaded while in AWDT mode; having
243 * Solaris communicate ASAP with ScApp reduces the duration
244 * of any "split-brain" scenario where ScApp and Solaris
245 * are not in the same mode.
246 *
247 * Having ntwdt remain in memory even after a close()
248 * allows ntwdt to answer any SBBC mailbox commands
249 * that ScApp sends (as the mailbox infrastructure is
250 * not torn down until ntwdt is detach()'d). Specifically,
251 * ScApp could be re-loaded after AWDT mode had been
252 * entered and the wdog-app had close()'d ntwdt. ScApp
253 * will then eventually send a LW8_EVENT_SC_RESTARTED
254 * mailbox event in order to learn the current state of
255 * state-machine. Having ntwdt remain loaded allows this
256 * event to never go unanswered.
257 *
258 * "ntwdt-boottimeout=600;"
259 * ----------------------
260 *
261 * This specifies the watchdog timeout value (in seconds) to
262 * use when ntwdt is aware of the need to reboot/reload Solaris.
263 *
264 * ntwdt will update ScApp by setting the watchdog timeout
265 * to the specified number of seconds when either a) Solaris
266 * panics or b) the VWDT expires. Note that this is only done
267 * if the user has chosen to enable Reset.
268 *
269 * ntwdt boundary-checks the specified value, and if out-of-range,
270 * it initializes the watchdog timeout to a default value of
271 * NTWDT_DEFAULT_BOOT_TIMEOUT seconds. Note that this is a
272 * default value and is not a *minimum* value. The valid range
273 * for the watchdog timeout is between one second and
274 * NTWDT_MAX_TIMEOUT seconds, inclusive.
275 *
276 * If ntwdt-boottimeout is set to a value less than an actual
277 * Solaris boot's latency, ScApp will reset Solaris during boot.
278 * Note that a continuous series of ScApp-induced resets will
279 * not occur; ScApp only resets Solaris on the first transition
280 * into the watchdog-expired state.
281 */
282
283 #include <sys/note.h>
284 #include <sys/types.h>
285 #include <sys/callb.h>
286 #include <sys/stat.h>
287 #include <sys/conf.h>
288 #include <sys/ddi.h>
289 #include <sys/sunddi.h>
290 #include <sys/modctl.h>
291 #include <sys/ddi_impldefs.h>
292 #include <sys/kmem.h>
293 #include <sys/devops.h>
294 #include <sys/cyclic.h>
295 #include <sys/uadmin.h>
296 #include <sys/lw8_impl.h>
297 #include <sys/sgsbbc.h>
298 #include <sys/sgsbbc_iosram.h>
299 #include <sys/sgsbbc_mailbox.h>
300 #include <sys/todsg.h>
301 #include <sys/mem_config.h>
302 #include <sys/lom_io.h>
303 #include <sys/reboot.h>
304 #include <sys/clock.h>
305
306
307 /*
308 * tunables
309 */
310 int ntwdt_disable_timeout_action = 0;
311 #ifdef DEBUG
312 /*
313 * tunable to simulate a Solaris hang. If is non-zero, then
314 * no system heartbeats ("hardware patting") will be done,
315 * even though all AWDT machinery is functioning OK.
316 */
317 int ntwdt_stop_heart;
318 #endif
319
320 /*
321 * Driver Property
322 */
323 #define NTWDT_BOOT_TIMEOUT_PROP "ntwdt-boottimeout"
324
325 /*
326 * watchdog-timeout values (in seconds):
327 *
328 * NTWDT_DEFAULT_BOOT_TIMEOUT: the default value used if
329 * this driver is aware of the
330 * reboot.
331 *
332 * NTWDT_MAX_TIMEOUT: max value settable by app (via the
333 * LOMIOCDOGTIME ioctl)
334 */
335 #define NTWDT_DEFAULT_BOOT_TIMEOUT (10*60)
336 #define NTWDT_MAX_TIMEOUT (180*60)
337
338
339 #define NTWDT_CYCLIC_CHK_PERCENT (20)
340 #define NTWDT_MINOR_NODE "awdt"
341 #define OFFSET(base, field) ((char *)&base.field - (char *)&base)
342
343 #define NTWDT_SUCCESS 0
344 #define NTWDT_FAILURE 1
345
346 typedef struct {
347 callb_id_t ntwdt_panic_cb;
348 } ntwdt_callback_ids_t;
349 static ntwdt_callback_ids_t ntwdt_callback_ids;
350
351 /* MBOX_EVENT_LW8 that is sent in IOSRAM Mailbox: */
352 static lw8_event_t lw8_event; /* payload */
353 static sbbc_msg_t sbbc_msg; /* message */
354
355 static ddi_softintr_t ntwdt_mbox_softint_id;
356 static ddi_softintr_t ntwdt_cyclic_softint_id;
357
358 /*
359 * VWDT (i.e., Virtual Watchdog Timer) state
360 */
361 typedef struct {
362 kmutex_t ntwdt_wdog_mutex;
363 ddi_iblock_cookie_t ntwdt_wdog_mtx_cookie;
364 int ntwdt_wdog_enabled; /* wdog enabled ? */
365 int ntwdt_reset_enabled; /* reset enabled ? */
366 int ntwdt_timer_running; /* wdog running ? */
367 int ntwdt_wdog_expired; /* wdog expired ? */
368 int ntwdt_is_initial_enable; /* 1st wdog-enable? */
369 uint32_t ntwdt_boot_timeout; /* timeout for boot */
370 uint32_t ntwdt_secs_remaining; /* expiration timer */
371 uint8_t ntwdt_wdog_action; /* Reset action */
372 uint32_t ntwdt_wdog_timeout; /* timeout in seconds */
373 hrtime_t ntwdt_cyclic_interval; /* cyclic interval */
374 cyc_handler_t ntwdt_cycl_hdlr;
375 cyc_time_t ntwdt_cycl_time;
376 kmutex_t ntwdt_event_lock; /* lock */
377 uint64_t ntwdt_wdog_flags;
378 } ntwdt_wdog_t;
379
380 /* ntwdt_wdog_flags */
381 #define NTWDT_FLAG_SKIP_CYCLIC 0x1 /* skip next Cyclic */
382
383 /* macros to set/clear one bit in ntwdt_wdog_flags */
384 #define NTWDT_FLAG_SET(p, f)\
385 ((p)->ntwdt_wdog_flags |= NTWDT_FLAG_##f)
386 #define NTWDT_FLAG_CLR(p, f)\
387 ((p)->ntwdt_wdog_flags &= ~NTWDT_FLAG_##f)
388
389
390 /* softstate */
391 typedef struct {
392 kmutex_t ntwdt_mutex;
393 dev_info_t *ntwdt_dip; /* dip */
394 int ntwdt_open_flag; /* file open ? */
395 ntwdt_wdog_t *ntwdt_wdog_state; /* wdog state */
396 cyclic_id_t ntwdt_cycl_id;
397 } ntwdt_state_t;
398
399 static void *ntwdt_statep; /* softstate */
400 static dev_info_t *ntwdt_dip;
401 /*
402 * if non-zero, then the app-wdog feature is available on
403 * this system configuration.
404 */
405 static int ntwdt_watchdog_available;
406 /*
407 * if non-zero, then application has used the LOMIOCDOGCTL
408 * ioctl at least once in order to Enable the app-wdog.
409 * Also, if this is non-zero, then system is in AWDT mode,
410 * else it is in SWDT mode.
411 */
412 static int ntwdt_watchdog_activated;
413
414 #define getstate(minor) \
415 ((ntwdt_state_t *)ddi_get_soft_state(ntwdt_statep, (minor)))
416
417 static int ntwdt_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
418 static int ntwdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
419 static int ntwdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
420 void **result);
421 static int ntwdt_open(dev_t *, int, int, cred_t *);
422 static int ntwdt_close(dev_t, int, int, cred_t *);
423 static int ntwdt_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
424
425 static void ntwdt_reprogram_wd(ntwdt_state_t *);
426 static boolean_t ntwdt_panic_cb(void *arg, int code);
427 static void ntwdt_start_timer(ntwdt_state_t *);
428 static void ntwdt_stop_timer(void *);
429 static void ntwdt_stop_timer_lock(void *arg);
430 static void ntwdt_add_callbacks(ntwdt_state_t *ntwdt_ptr);
431 static void ntwdt_remove_callbacks();
432 static void ntwdt_cyclic_pat(void *arg);
433 static void ntwdt_enforce_timeout();
434 static void ntwdt_pat_hw_watchdog();
435 static int ntwdt_set_cfgvar(int var, int val);
436 static void ntwdt_set_cfgvar_noreply(int var, int val);
437 static int ntwdt_read_props(ntwdt_state_t *);
438 static int ntwdt_add_mbox_handlers(ntwdt_state_t *);
439 static int ntwdt_set_hw_timeout(uint32_t period);
440 static int ntwdt_remove_mbox_handlers(void);
441 static uint_t ntwdt_event_data_handler(char *arg);
442 static uint_t ntwdt_mbox_softint(char *arg);
443 static uint_t ntwdt_cyclic_softint(char *arg);
444 static int ntwdt_lomcmd(int cmd, intptr_t arg);
445 static int ntwdt_chk_wdog_support();
446 static int ntwdt_chk_sc_support();
447 static int ntwdt_set_swdt_state();
448 static void ntwdt_swdt_to_awdt(ntwdt_wdog_t *);
449 static void ntwdt_arm_vwdt(ntwdt_wdog_t *wdog_state);
450 #ifdef DEBUG
451 static int ntwdt_get_cfgvar(int var, int *val);
452 #endif
453
454 struct cb_ops ntwdt_cb_ops = {
455 ntwdt_open, /* open */
456 ntwdt_close, /* close */
457 nulldev, /* strategy */
458 nulldev, /* print */
459 nulldev, /* dump */
460 nulldev, /* read */
461 nulldev, /* write */
462 ntwdt_ioctl, /* ioctl */
463 nulldev, /* devmap */
464 nulldev, /* mmap */
465 nulldev, /* segmap */
466 nochpoll, /* poll */
467 ddi_prop_op, /* cb_prop_op */
468 NULL, /* streamtab */
469 D_MP | D_NEW
470 };
471
472 static struct dev_ops ntwdt_ops = {
473 DEVO_REV, /* Devo_rev */
474 0, /* Refcnt */
475 ntwdt_info, /* Info */
476 nulldev, /* Identify */
477 nulldev, /* Probe */
478 ntwdt_attach, /* Attach */
479 ntwdt_detach, /* Detach */
480 nodev, /* Reset */
481 &ntwdt_cb_ops, /* Driver operations */
482 0, /* Bus operations */
483 NULL /* Power */
484 };
485
486 static struct modldrv modldrv = {
487 &mod_driverops, /* This one is a driver */
488 "ntwdt-Netra-T12", /* Name of the module. */
489 &ntwdt_ops, /* Driver ops */
490 };
491
492 static struct modlinkage modlinkage = {
493 MODREV_1, (void *)&modldrv, NULL
494 };
495
496
497 /*
498 * Flags to set in ntwdt_debug.
499 *
500 * Use either the NTWDT_DBG or NTWDT_NDBG macros
501 */
502 #define WDT_DBG_ENTRY 0x00000001 /* drv entry points */
503 #define WDT_DBG_HEART 0x00000002 /* system heartbeat */
504 #define WDT_DBG_VWDT 0x00000004 /* virtual WDT */
505 #define WDT_DBG_EVENT 0x00000010 /* SBBC Mbox events */
506 #define WDT_DBG_PROT 0x00000020 /* SC/Solaris protocol */
507 #define WDT_DBG_IOCTL 0x00000040 /* ioctl's */
508
509 uint64_t ntwdt_debug; /* enables tracing of module's activity */
510
511 /* used in non-debug version of module */
512 #define NTWDT_NDBG(flag, msg) { if ((ntwdt_debug & (flag)) != 0) \
513 (void) printf msg; }
514
515 #ifdef DEBUG
516 typedef struct {
517 uint32_t ntwdt_wd1;
518 uint8_t ntwdt_wd2;
519 } ntwdt_data_t;
520
521 #define NTWDTIOCSTATE _IOWR('a', 0xa, ntwdt_data_t)
522 #define NTWDTIOCPANIC _IOR('a', 0xb, uint32_t)
523
524 /* used in debug version of module */
525 #define NTWDT_DBG(flag, msg) { if ((ntwdt_debug & (flag)) != 0) \
526 (void) printf msg; }
527 #else
528 #define NTWDT_DBG(flag, msg)
529 #endif
530
531
532 int
_init(void)533 _init(void)
534 {
535 int error = 0;
536
537 NTWDT_DBG(WDT_DBG_ENTRY, ("_init"));
538
539 /* Initialize the soft state structures */
540 if ((error = ddi_soft_state_init(&ntwdt_statep,
541 sizeof (ntwdt_state_t), 1)) != 0) {
542 return (error);
543 }
544
545 /* Install the loadable module */
546 if ((error = mod_install(&modlinkage)) != 0) {
547 ddi_soft_state_fini(&ntwdt_statep);
548 }
549 return (error);
550 }
551
552 int
_info(struct modinfo * modinfop)553 _info(struct modinfo *modinfop)
554 {
555 NTWDT_DBG(WDT_DBG_ENTRY, ("_info"));
556
557 return (mod_info(&modlinkage, modinfop));
558 }
559
560 int
_fini(void)561 _fini(void)
562 {
563 int error;
564
565 NTWDT_DBG(WDT_DBG_ENTRY, ("_fini"));
566
567 error = mod_remove(&modlinkage);
568 if (error == 0) {
569 ddi_soft_state_fini(&ntwdt_statep);
570 }
571
572 return (error);
573 }
574
575 static int
ntwdt_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)576 ntwdt_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
577 {
578 int instance;
579 ntwdt_state_t *ntwdt_ptr = NULL;
580 ntwdt_wdog_t *wdog_state = NULL;
581 cyc_handler_t *hdlr = NULL;
582
583 NTWDT_DBG(WDT_DBG_ENTRY, ("attach: dip/cmd: 0x%p/%d",
584 (void *)dip, cmd));
585
586 switch (cmd) {
587 case DDI_ATTACH:
588 break;
589
590 case DDI_RESUME:
591 return (DDI_SUCCESS);
592
593 default:
594 return (DDI_FAILURE);
595 }
596
597 /* see if app-wdog is supported on our config */
598 if (ntwdt_chk_wdog_support() != 0)
599 return (DDI_FAILURE);
600
601 /* (unsolicitedly) send SWDT state to ScApp via mailbox */
602 (void) ntwdt_set_swdt_state();
603
604 instance = ddi_get_instance(dip);
605 ASSERT(instance == 0);
606
607 if (ddi_soft_state_zalloc(ntwdt_statep, instance)
608 != DDI_SUCCESS) {
609 return (DDI_FAILURE);
610 }
611 ntwdt_ptr = ddi_get_soft_state(ntwdt_statep, instance);
612 ASSERT(ntwdt_ptr != NULL);
613
614 ntwdt_dip = dip;
615
616 ntwdt_ptr->ntwdt_dip = dip;
617 ntwdt_ptr->ntwdt_cycl_id = CYCLIC_NONE;
618 mutex_init(&ntwdt_ptr->ntwdt_mutex, NULL,
619 MUTEX_DRIVER, NULL);
620
621 /*
622 * Initialize the watchdog structure
623 */
624 ntwdt_ptr->ntwdt_wdog_state =
625 kmem_zalloc(sizeof (ntwdt_wdog_t), KM_SLEEP);
626 wdog_state = ntwdt_ptr->ntwdt_wdog_state;
627
628 /*
629 * Create an iblock-cookie so that ntwdt_wdog_mutex can be
630 * used at User Context and Interrupt Context.
631 */
632 if (ddi_get_soft_iblock_cookie(dip, DDI_SOFTINT_LOW,
633 &wdog_state->ntwdt_wdog_mtx_cookie) != DDI_SUCCESS) {
634 cmn_err(CE_WARN, "init of iblock cookie failed "
635 "for ntwdt_wdog_mutex");
636 goto err1;
637 } else {
638 mutex_init(&wdog_state->ntwdt_wdog_mutex, NULL, MUTEX_DRIVER,
639 (void *)wdog_state->ntwdt_wdog_mtx_cookie);
640 }
641
642 mutex_init(&wdog_state->ntwdt_event_lock, NULL,
643 MUTEX_DRIVER, NULL);
644
645 /* Cyclic fires once per second: */
646 wdog_state->ntwdt_cyclic_interval = NANOSEC;
647
648 /* interpret our .conf file. */
649 (void) ntwdt_read_props(ntwdt_ptr);
650
651 /* init the Cyclic that drives the VWDT */
652 hdlr = &wdog_state->ntwdt_cycl_hdlr;
653 hdlr->cyh_level = CY_LOCK_LEVEL;
654 hdlr->cyh_func = ntwdt_cyclic_pat;
655 hdlr->cyh_arg = (void *)ntwdt_ptr;
656
657 /* Register handler for SBBC Mailbox events */
658 if (ntwdt_add_mbox_handlers(ntwdt_ptr) != DDI_SUCCESS)
659 goto err2;
660
661 /* Softint that will be triggered by Cyclic that drives VWDT */
662 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &ntwdt_cyclic_softint_id,
663 NULL, NULL, ntwdt_cyclic_softint, (caddr_t)ntwdt_ptr)
664 != DDI_SUCCESS) {
665 cmn_err(CE_WARN, "failed to add cyclic softintr");
666 goto err3;
667 }
668
669 /* Register callbacks for various system events, e.g. panic */
670 ntwdt_add_callbacks(ntwdt_ptr);
671
672 /*
673 * Create Minor Node as last activity. This prevents
674 * application from accessing our implementation until it
675 * is initialized.
676 */
677 if (ddi_create_minor_node(dip, NTWDT_MINOR_NODE, S_IFCHR, 0,
678 DDI_PSEUDO, 0) == DDI_FAILURE) {
679 cmn_err(CE_WARN, "failed to create Minor Node: %s",
680 NTWDT_MINOR_NODE);
681 goto err4;
682 }
683
684 /* Display our driver info in the banner */
685 ddi_report_dev(dip);
686
687 return (DDI_SUCCESS);
688
689 err4:
690 ntwdt_remove_callbacks();
691 ddi_remove_softintr(ntwdt_cyclic_softint_id);
692 err3:
693 (void) ntwdt_remove_mbox_handlers();
694 err2:
695 mutex_destroy(&wdog_state->ntwdt_event_lock);
696 mutex_destroy(&wdog_state->ntwdt_wdog_mutex);
697 err1:
698 kmem_free(wdog_state, sizeof (ntwdt_wdog_t));
699 ntwdt_ptr->ntwdt_wdog_state = NULL;
700
701 mutex_destroy(&ntwdt_ptr->ntwdt_mutex);
702 ddi_soft_state_free(ntwdt_statep, instance);
703
704 ntwdt_dip = NULL;
705
706 return (DDI_FAILURE);
707 }
708
709 /*
710 * Do static checks to see if the app-wdog feature is supported in
711 * the current configuration.
712 *
713 * If the kernel debugger was booted, then we disallow the app-wdog
714 * feature, as we assume the user will be interested more in
715 * debuggability of system than its ability to support an app-wdog.
716 * (Note that the System Watchdog (SWDT) can still be available).
717 *
718 * If the currently loaded version of ScApp does not understand one
719 * of the IOSRAM mailbox messages that is specific to the app-wdog
720 * protocol, then we disallow use of the app-wdog feature (else
721 * we could have a "split-brain" scenario where Solaris supports
722 * app-wdog but ScApp doesn't).
723 *
724 * Note that there is no *dynamic* checking of whether ScApp supports
725 * the wdog protocol. Eg, if a new version of ScApp was loaded out
726 * from under Solaris, then once in AWDT mode, Solaris has no way
727 * of knowing that (a possibly older version of) ScApp was loaded.
728 */
729 static int
ntwdt_chk_wdog_support()730 ntwdt_chk_wdog_support()
731 {
732 int retval = ENOTSUP;
733 int rv;
734
735 if ((boothowto & RB_DEBUG) != 0) {
736 cmn_err(CE_WARN, "kernel debugger was booted; "
737 "application watchdog is not available.");
738 return (retval);
739 }
740
741 /*
742 * if ScApp does not support the MBOX_GET cmd, then
743 * it does not support the app-wdog feature. Also,
744 * if there is *any* type of SBBC Mailbox error at
745 * this point, we will disable the app watchdog
746 * feature.
747 */
748 if ((rv = ntwdt_chk_sc_support()) != 0) {
749 if (rv == EINVAL)
750 cmn_err(CE_WARN, "ScApp does not support "
751 "the application watchdog feature.");
752 else
753 cmn_err(CE_WARN, "SBBC mailbox had error;"
754 "application watchdog is not available.");
755 retval = rv;
756 } else {
757 ntwdt_watchdog_available = 1;
758 retval = 0;
759 }
760
761 NTWDT_DBG(WDT_DBG_PROT, ("app-wdog is %savailable",
762 (ntwdt_watchdog_available != 0) ? "" : "not "));
763
764 return (retval);
765 }
766
767 /*
768 * Check to see if ScApp supports the app-watchdog feature.
769 *
770 * Do this by sending one of the mailbox commands that is
771 * specific to the app-wdog protocol. If ScApp does not
772 * return an error code, we will assume it understands it
773 * (as well as the remainder of the app-wdog protocol).
774 *
775 * Notes:
776 * ntwdt_lomcmd() will return EINVAL if ScApp does not
777 * understand the message. The underlying sbbc_mbox_
778 * utility function returns SG_MBOX_STATUS_ILLEGAL_PARAMETER
779 * ("illegal ioctl parameter").
780 */
781 static int
ntwdt_chk_sc_support()782 ntwdt_chk_sc_support()
783 {
784 lw8_get_wdt_t get_wdt;
785
786 return (ntwdt_lomcmd(LW8_MBOX_WDT_GET, (intptr_t)&get_wdt));
787 }
788
789 static int
ntwdt_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)790 ntwdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
791 {
792 int instance = ddi_get_instance(dip);
793 ntwdt_state_t *ntwdt_ptr = NULL;
794
795 NTWDT_DBG(WDT_DBG_ENTRY, ("detach: dip/cmd: 0x%p/%d",
796 (void *)dip, cmd));
797
798 ntwdt_ptr = ddi_get_soft_state(ntwdt_statep, instance);
799 if (ntwdt_ptr == NULL) {
800 return (DDI_FAILURE);
801 }
802
803 switch (cmd) {
804 case DDI_SUSPEND:
805 return (DDI_SUCCESS);
806
807 case DDI_DETACH:
808 /*
809 * release resources in opposite (LIFO) order as
810 * were allocated in attach(9f).
811 */
812 ddi_remove_minor_node(dip, NULL);
813
814 ntwdt_stop_timer_lock((void *)ntwdt_ptr);
815
816 ntwdt_remove_callbacks();
817
818 ddi_remove_softintr(ntwdt_cyclic_softint_id);
819
820 (void) ntwdt_remove_mbox_handlers();
821
822 mutex_destroy(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_event_lock);
823 mutex_destroy(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
824 kmem_free(ntwdt_ptr->ntwdt_wdog_state,
825 sizeof (ntwdt_wdog_t));
826 ntwdt_ptr->ntwdt_wdog_state = NULL;
827
828 mutex_destroy(&ntwdt_ptr->ntwdt_mutex);
829
830 ddi_soft_state_free(ntwdt_statep, instance);
831
832 ntwdt_dip = NULL;
833 return (DDI_SUCCESS);
834
835 default:
836 return (DDI_FAILURE);
837 }
838 }
839
840 /*
841 * Register the SBBC Mailbox handlers.
842 *
843 * Currently, only one handler is used. It processes the MBOX_EVENT_LW8
844 * Events that are sent by ScApp. Of the Events that are sent, only
845 * the Event declaring that ScApp is coming up from a reboot
846 * (LW8_EVENT_SC_RESTARTED) is processed.
847 *
848 * sbbc_mbox_reg_intr registers the handler so that it executes at
849 * a DDI_SOFTINT_MED priority.
850 */
851 static int
ntwdt_add_mbox_handlers(ntwdt_state_t * ntwdt_ptr)852 ntwdt_add_mbox_handlers(ntwdt_state_t *ntwdt_ptr)
853 {
854 int err;
855
856 /*
857 * We need two interrupt handlers to handle the SBBC mbox
858 * events. The sbbc_mbox_xxx implementation will
859 * trigger our ntwdt_event_data_handler, which itself will
860 * trigger our ntwdt_mbox_softint. As a result, we'll
861 * register ntwdt_mbox_softint first, to ensure it cannot
862 * be called (until its caller, ntwdt_event_data_handler)
863 * is registered.
864 */
865
866 /*
867 * add the softint that will do the real work of handling the
868 * LW8_SC_RESTARTED_EVENT sent from ScApp.
869 */
870 if (ddi_add_softintr(ntwdt_ptr->ntwdt_dip, DDI_SOFTINT_LOW,
871 &ntwdt_mbox_softint_id, NULL, NULL, ntwdt_mbox_softint,
872 (caddr_t)ntwdt_ptr) != DDI_SUCCESS) {
873 cmn_err(CE_WARN, "Failed to add MBOX_EVENT_LW8 softintr");
874 return (DDI_FAILURE);
875 }
876
877 /*
878 * Register an interrupt handler with the SBBC mailbox utility.
879 * This handler will get called on each event of each type of
880 * MBOX_EVENT_LW8 events. However, it will only conditionally
881 * trigger the worker-handler (ntwdt_mbox_softintr).
882 */
883 sbbc_msg.msg_buf = (caddr_t)&lw8_event;
884 sbbc_msg.msg_len = sizeof (lw8_event);
885
886 err = sbbc_mbox_reg_intr(MBOX_EVENT_LW8, ntwdt_event_data_handler,
887 &sbbc_msg, NULL, &ntwdt_ptr->ntwdt_wdog_state->ntwdt_event_lock);
888 if (err != 0) {
889 cmn_err(CE_WARN, "Failed to register SBBC MBOX_EVENT_LW8"
890 " handler. err=%d", err);
891
892 ddi_remove_softintr(ntwdt_mbox_softint_id);
893 return (DDI_FAILURE);
894 }
895
896 return (DDI_SUCCESS);
897 }
898
899 /*
900 * Unregister the SBBC Mailbox handlers that were registered
901 * by ntwdt_add_mbox_handlers.
902 */
903 static int
ntwdt_remove_mbox_handlers(void)904 ntwdt_remove_mbox_handlers(void)
905 {
906 int rv = DDI_SUCCESS;
907 int err;
908
909 /*
910 * unregister the two handlers that cooperate to handle
911 * the LW8_SC_RESTARTED_EVENT. Note that they are unregistered
912 * in LIFO order (as compared to how they were registered).
913 */
914 err = sbbc_mbox_unreg_intr(MBOX_EVENT_LW8, ntwdt_event_data_handler);
915 if (err != 0) {
916 cmn_err(CE_WARN, "Failed to unregister sbbc MBOX_EVENT_LW8 "
917 "handler. Err=%d", err);
918 rv = DDI_FAILURE;
919 }
920
921 /* remove the associated softint */
922 ddi_remove_softintr(ntwdt_mbox_softint_id);
923
924 return (rv);
925 }
926
927 _NOTE(ARGSUSED(0))
928 static int
ntwdt_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)929 ntwdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd,
930 void *arg, void **result)
931 {
932 dev_t dev;
933 int instance;
934 int error = DDI_SUCCESS;
935
936 if (result == NULL)
937 return (DDI_FAILURE);
938
939 switch (infocmd) {
940 case DDI_INFO_DEVT2DEVINFO:
941 dev = (dev_t)arg;
942 if (getminor(dev) == 0)
943 *result = (void *)ntwdt_dip;
944 else
945 error = DDI_FAILURE;
946 break;
947
948 case DDI_INFO_DEVT2INSTANCE:
949 dev = (dev_t)arg;
950 instance = getminor(dev);
951 *result = (void *)(uintptr_t)instance;
952 break;
953
954 default:
955 error = DDI_FAILURE;
956 }
957
958 return (error);
959 }
960
961 /*
962 * Open the device this driver manages.
963 *
964 * Ensure the caller is a privileged process, else
965 * a non-privileged user could cause denial-of-service
966 * and/or negatively impact reliability/availability.
967 *
968 * Ensure there is only one concurrent open().
969 */
970 _NOTE(ARGSUSED(1))
971 static int
ntwdt_open(dev_t * devp,int flag,int otyp,cred_t * credp)972 ntwdt_open(dev_t *devp, int flag, int otyp, cred_t *credp)
973 {
974 int inst = getminor(*devp);
975 int ret = 0;
976 ntwdt_state_t *ntwdt_ptr = getstate(inst);
977
978 NTWDT_DBG(WDT_DBG_ENTRY, ("open: inst/soft: %d/0x%p",
979 inst, (void *)ntwdt_ptr));
980
981 /* ensure caller is a privileged process */
982 if (drv_priv(credp) != 0)
983 return (EPERM);
984
985 /*
986 * Check for a Deferred Attach scenario.
987 * Return ENXIO so DDI framework will call
988 * attach() and then retry the open().
989 */
990 if (ntwdt_ptr == NULL)
991 return (ENXIO);
992
993 mutex_enter(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
994 mutex_enter(&ntwdt_ptr->ntwdt_mutex);
995 if (ntwdt_ptr->ntwdt_open_flag != 0)
996 ret = EAGAIN;
997 else
998 ntwdt_ptr->ntwdt_open_flag = 1;
999 mutex_exit(&ntwdt_ptr->ntwdt_mutex);
1000 mutex_exit(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
1001
1002 return (ret);
1003 }
1004
1005 /*
1006 * Close the device this driver manages.
1007 *
1008 * Notes:
1009 *
1010 * The close() can happen while the AWDT is running !
1011 * (and nothing is done, eg, to disable the watchdog
1012 * or to stop updating the system heartbeat). This
1013 * is the desired behavior, as this allows for the
1014 * case of monitoring a Solaris reboot in terms
1015 * of watchdog expiration.
1016 */
1017 _NOTE(ARGSUSED(1))
1018 static int
ntwdt_close(dev_t dev,int flag,int otyp,cred_t * credp)1019 ntwdt_close(dev_t dev, int flag, int otyp, cred_t *credp)
1020 {
1021 int inst = getminor(dev);
1022 ntwdt_state_t *ntwdt_ptr = getstate(inst);
1023
1024 NTWDT_DBG(WDT_DBG_ENTRY, ("close: inst/soft: %d/0x%p",
1025 inst, (void *)ntwdt_ptr));
1026
1027 if (ntwdt_ptr == NULL)
1028 return (ENXIO);
1029
1030 mutex_enter(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
1031 mutex_enter(&ntwdt_ptr->ntwdt_mutex);
1032 if (ntwdt_ptr->ntwdt_open_flag != 0) {
1033 ntwdt_ptr->ntwdt_open_flag = 0;
1034 }
1035 mutex_exit(&ntwdt_ptr->ntwdt_mutex);
1036 mutex_exit(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
1037
1038 return (0);
1039 }
1040
1041 _NOTE(ARGSUSED(4))
1042 static int
ntwdt_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)1043 ntwdt_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
1044 cred_t *credp, int *rvalp)
1045 {
1046 int inst = getminor(dev);
1047 int retval = 0;
1048 ntwdt_state_t *ntwdt_ptr = NULL;
1049 ntwdt_wdog_t *wdog_state;
1050
1051 if ((ntwdt_ptr = getstate(inst)) == NULL)
1052 return (ENXIO);
1053
1054 /* Only allow ioctl's if Solaris/ScApp support app-wdog */
1055 if (ntwdt_watchdog_available == 0)
1056 return (ENXIO);
1057
1058 wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1059
1060 switch (cmd) {
1061 case LOMIOCDOGSTATE: {
1062 /*
1063 * Return the state of the AWDT to the application.
1064 */
1065 lom_dogstate_t lom_dogstate;
1066
1067 mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1068 lom_dogstate.reset_enable =
1069 wdog_state->ntwdt_reset_enabled;
1070 lom_dogstate.dog_enable =
1071 wdog_state->ntwdt_wdog_enabled;
1072 lom_dogstate.dog_timeout =
1073 wdog_state->ntwdt_wdog_timeout;
1074 mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1075
1076 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGSTATE: wdog/reset/timeout:"
1077 " %d/%d/%d", lom_dogstate.dog_enable,
1078 lom_dogstate.reset_enable, lom_dogstate.dog_timeout));
1079
1080 if (ddi_copyout((caddr_t)&lom_dogstate, (caddr_t)arg,
1081 sizeof (lom_dogstate_t), mode) != 0) {
1082 retval = EFAULT;
1083 }
1084 break;
1085 }
1086
1087 case LOMIOCDOGCTL: {
1088 /*
1089 * Allow application to control whether watchdog
1090 * is {dis,en}abled and whether Reset is
1091 * {dis,en}abled.
1092 */
1093 lom_dogctl_t lom_dogctl;
1094
1095 if (ddi_copyin((caddr_t)arg, (caddr_t)&lom_dogctl,
1096 sizeof (lom_dogctl_t), mode) != 0) {
1097 retval = EFAULT;
1098 break;
1099 }
1100
1101 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGCTL: wdog/reset:"
1102 " %d/%d", lom_dogctl.dog_enable,
1103 lom_dogctl.reset_enable));
1104
1105 mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1106
1107 if (wdog_state->ntwdt_wdog_timeout == 0) {
1108 /*
1109 * then LOMIOCDOGTIME has never been used
1110 * to setup a valid timeout.
1111 */
1112 retval = EINVAL;
1113 goto end;
1114 }
1115
1116 /*
1117 * Return error for the non-sensical combination:
1118 * "enable Reset" and "disable watchdog".
1119 */
1120 if (lom_dogctl.dog_enable == 0 &&
1121 lom_dogctl.reset_enable != 0) {
1122 retval = EINVAL;
1123 goto end;
1124 }
1125
1126 /*
1127 * Store the user-specified state in our softstate.
1128 * Note that our implementation here is stateless.
1129 * Eg, we do not disallow an "enable the watchdog"
1130 * command when the watchdog is currently enabled.
1131 * This is needed (at least in the case) when
1132 * the user enters OBP via ScApp/lom. In that case,
1133 * ScApp disables the watchdog, but does not inform
1134 * Solaris. As a result, an ensuing, unfiltered DOGCTL
1135 * to enable the watchdog is required.
1136 */
1137 wdog_state->ntwdt_reset_enabled =
1138 lom_dogctl.reset_enable;
1139 wdog_state->ntwdt_wdog_enabled =
1140 lom_dogctl.dog_enable;
1141
1142 if (wdog_state->ntwdt_wdog_enabled != 0) {
1143 /*
1144 * then user wants to enable watchdog.
1145 * Arm the watchdog timer and start the
1146 * Cyclic, if it is not running.
1147 */
1148 ntwdt_arm_vwdt(wdog_state);
1149
1150 if (wdog_state->ntwdt_timer_running == 0) {
1151 ntwdt_start_timer(ntwdt_ptr);
1152 }
1153 } else {
1154 /*
1155 * user wants to disable the watchdog.
1156 * Note that we do not set ntwdt_secs_remaining
1157 * to zero; that could cause a false expiration.
1158 */
1159 if (wdog_state->ntwdt_timer_running != 0) {
1160 ntwdt_stop_timer(ntwdt_ptr);
1161 }
1162 }
1163
1164 /*
1165 * Send a permutation of mailbox commands to
1166 * ScApp that describes the current state of the
1167 * watchdog timer. Note that the permutation
1168 * depends on whether this is the first
1169 * Enabling of the watchdog or not.
1170 */
1171 if (wdog_state->ntwdt_wdog_enabled != 0 &&
1172 wdog_state->ntwdt_is_initial_enable == 0) {
1173
1174 /* switch from SWDT to AWDT mode */
1175 ntwdt_swdt_to_awdt(wdog_state);
1176
1177 /* Tell ScApp we're in AWDT mode */
1178 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_MODE,
1179 LW8_PROP_MODE_AWDT);
1180 }
1181
1182 /* Inform ScApp of the choices made by the app */
1183 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_WDT,
1184 wdog_state->ntwdt_wdog_enabled);
1185 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_RECOV,
1186 wdog_state->ntwdt_reset_enabled);
1187
1188 if (wdog_state->ntwdt_wdog_enabled != 0 &&
1189 wdog_state->ntwdt_is_initial_enable == 0) {
1190 /*
1191 * Clear tod_iosram_t.tod_timeout_period,
1192 * which is used in SWDT part of state
1193 * machine. (If this field is non-zero,
1194 * ScApp assumes that Solaris' SWDT is active).
1195 *
1196 * Clearing this is useful in case SC reboots
1197 * while Solaris is running, as ScApp will read
1198 * a zero and not assume SWDT is running.
1199 */
1200 (void) ntwdt_set_hw_timeout(0);
1201
1202 /* "the first watchdog-enable has been seen" */
1203 wdog_state->ntwdt_is_initial_enable = 1;
1204 }
1205
1206 mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1207 break;
1208 }
1209
1210 case LOMIOCDOGTIME: {
1211 /*
1212 * Allow application to set the period (in seconds)
1213 * of the watchdog timeout.
1214 */
1215 uint32_t lom_dogtime;
1216
1217 if (ddi_copyin((caddr_t)arg, (caddr_t)&lom_dogtime,
1218 sizeof (uint32_t), mode) != 0) {
1219 retval = EFAULT;
1220 break;
1221 }
1222
1223 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGTIME: %u seconds",
1224 lom_dogtime));
1225
1226 /* Ensure specified timeout is within range. */
1227 if ((lom_dogtime == 0) ||
1228 (lom_dogtime > NTWDT_MAX_TIMEOUT)) {
1229 retval = EINVAL;
1230 break;
1231 }
1232
1233 mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1234
1235 wdog_state->ntwdt_wdog_timeout = lom_dogtime;
1236
1237 /*
1238 * If watchdog is currently running, re-arm the
1239 * watchdog timeout with the specified value.
1240 */
1241 if (wdog_state->ntwdt_timer_running != 0) {
1242 ntwdt_arm_vwdt(wdog_state);
1243 }
1244
1245 /* Tell ScApp of the specified timeout */
1246 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_TO, lom_dogtime);
1247
1248 mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1249 break;
1250 }
1251
1252 case LOMIOCDOGPAT: {
1253 /*
1254 * Allow user to re-arm ("pat") the watchdog.
1255 */
1256 NTWDT_DBG(WDT_DBG_IOCTL, ("DOGPAT"));
1257
1258 mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1259
1260 /*
1261 * If watchdog is not enabled or underlying
1262 * Cyclic timer is not running, exit.
1263 */
1264 if (!(wdog_state->ntwdt_wdog_enabled &&
1265 wdog_state->ntwdt_timer_running))
1266 goto end;
1267
1268 if (wdog_state->ntwdt_wdog_expired == 0) {
1269 /* then VWDT has not expired; re-arm it */
1270 ntwdt_arm_vwdt(wdog_state);
1271
1272 NTWDT_DBG(WDT_DBG_VWDT, ("VWDT re-armed:"
1273 " %d seconds",
1274 wdog_state->ntwdt_secs_remaining));
1275 }
1276
1277 mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1278 break;
1279 }
1280
1281 #ifdef DEBUG
1282 case NTWDTIOCPANIC: {
1283 /*
1284 * Use in unit/integration testing to test our
1285 * panic-handler code.
1286 */
1287 cmn_err(CE_PANIC, "NTWDTIOCPANIC: force a panic");
1288 break;
1289 }
1290
1291 case NTWDTIOCSTATE: {
1292 /*
1293 * Allow application to read wdog state from the
1294 * SC (and *not* the driver's softstate).
1295 *
1296 * Return state of:
1297 * o recovery-enabled
1298 * o current timeout value
1299 */
1300 ntwdt_data_t ntwdt_data;
1301 int action;
1302 int timeout;
1303 int ret;
1304
1305 mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1306 ret = ntwdt_get_cfgvar(LW8_WDT_PROP_TO, &timeout);
1307 ret |= ntwdt_get_cfgvar(LW8_WDT_PROP_RECOV, &action);
1308 mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1309
1310 bzero((caddr_t)&ntwdt_data, sizeof (ntwdt_data));
1311
1312 if (ret != NTWDT_SUCCESS) {
1313 retval = EIO;
1314 break;
1315 }
1316
1317 NTWDT_DBG(WDT_DBG_IOCTL, ("NTWDTIOCSTATE:"
1318 " timeout/action: %d/%d", timeout, action));
1319
1320 ntwdt_data.ntwdt_wd1 = (uint32_t)timeout;
1321 ntwdt_data.ntwdt_wd2 = (uint8_t)action;
1322
1323 if (ddi_copyout((caddr_t)&ntwdt_data, (caddr_t)arg,
1324 sizeof (ntwdt_data_t), mode) != 0) {
1325 retval = EFAULT;
1326 }
1327 break;
1328 }
1329 #endif
1330 default:
1331 retval = EINVAL;
1332 break;
1333 }
1334
1335 return (retval);
1336 end:
1337 mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1338 return (retval);
1339 }
1340
1341 /*
1342 * Arm the Virtual Watchdog Timer (VWDT).
1343 *
1344 * Assign the current watchdog timeout (ntwdt_wdog_timeout)
1345 * to the softstate variable representing the watchdog
1346 * timer (ntwdt_secs_remaining).
1347 *
1348 * To ensure (from ntwdt's perspective) that any actual
1349 * timeout expiration is at least as large as the expected
1350 * timeout, conditionally set/clear a bit that will be
1351 * checked in the Cyclic's softint.
1352 *
1353 * If the Cyclic has been started, the goal is to ignore
1354 * the _next_ firing of the Cyclic, as that firing will
1355 * NOT represent a full, one-second period. If the Cyclic
1356 * has NOT been started yet, then do not ignore the next
1357 * Cyclic's firing, as that's the First One, and it was
1358 * programmed to fire at a specific time (see ntwdt_start_timer).
1359 */
1360 static void
ntwdt_arm_vwdt(ntwdt_wdog_t * wdog_state)1361 ntwdt_arm_vwdt(ntwdt_wdog_t *wdog_state)
1362 {
1363 /* arm the watchdog timer (VWDT) */
1364 wdog_state->ntwdt_secs_remaining =
1365 wdog_state->ntwdt_wdog_timeout;
1366
1367 if (wdog_state->ntwdt_timer_running != 0)
1368 NTWDT_FLAG_SET(wdog_state, SKIP_CYCLIC);
1369 else
1370 NTWDT_FLAG_CLR(wdog_state, SKIP_CYCLIC);
1371 }
1372
1373 /*
1374 * Switch from SWDT mode to AWDT mode.
1375 */
1376 _NOTE(ARGSUSED(0))
1377 static void
ntwdt_swdt_to_awdt(ntwdt_wdog_t * wdog_state)1378 ntwdt_swdt_to_awdt(ntwdt_wdog_t *wdog_state)
1379 {
1380 ASSERT(wdog_state->ntwdt_is_initial_enable == 0);
1381
1382 /*
1383 * Disable SWDT. If SWDT is currently active,
1384 * display a message so user knows that SWDT Mode
1385 * has terminated.
1386 */
1387 if (watchdog_enable != 0 ||
1388 watchdog_activated != 0)
1389 cmn_err(CE_NOTE, "Hardware watchdog disabled");
1390 watchdog_enable = 0;
1391 watchdog_activated = 0;
1392
1393 /* "we are in AWDT mode" */
1394 ntwdt_watchdog_activated = 1;
1395 NTWDT_DBG(WDT_DBG_VWDT, ("AWDT is enabled"));
1396 }
1397
1398 /*
1399 * This is the Cyclic that runs at a multiple of the
1400 * AWDT's watchdog-timeout period. This Cyclic runs at
1401 * LOCK_LEVEL (eg, CY_LOCK_LEVEL) and will post a
1402 * soft-interrupt in order to complete all processing.
1403 *
1404 * Executing at LOCK_LEVEL gives this function a high
1405 * interrupt priority, while performing its work via
1406 * a soft-interrupt allows for a consistent (eg, MT-safe)
1407 * view of driver softstate between User and Interrupt
1408 * context.
1409 *
1410 * Context:
1411 * interrupt context: Cyclic framework calls at
1412 * CY_LOCK_LEVEL (=> 10)
1413 */
1414 _NOTE(ARGSUSED(0))
1415 static void
ntwdt_cyclic_pat(void * arg)1416 ntwdt_cyclic_pat(void *arg)
1417 {
1418 /* post-down to DDI_SOFTINT_LOW */
1419 ddi_trigger_softintr(ntwdt_cyclic_softint_id);
1420 }
1421
1422 /*
1423 * This is the soft-interrupt triggered by the AWDT
1424 * Cyclic.
1425 *
1426 * This softint does all the work re: computing whether
1427 * the VWDT expired. It grabs ntwdt_wdog_mutex
1428 * so User Context code (eg, the IOCTLs) cannot run,
1429 * and then it tests whether the VWDT expired. If it
1430 * hasn't, it decrements the VWDT timer by the amount
1431 * of the Cyclic's period. If the timer has expired,
1432 * it initiates Recovery (based on what user specified
1433 * in LOMIOCDOGCTL).
1434 *
1435 * This function also updates the normal system "heartbeat".
1436 *
1437 * Context:
1438 * interrupt-context: DDI_SOFTINT_LOW
1439 */
1440 static uint_t
ntwdt_cyclic_softint(char * arg)1441 ntwdt_cyclic_softint(char *arg)
1442 {
1443 ntwdt_state_t *ntwdt_ptr = (ntwdt_state_t *)arg;
1444 ntwdt_wdog_t *wdog_state;
1445
1446 wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1447
1448 mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1449
1450 if ((wdog_state->ntwdt_wdog_flags &
1451 NTWDT_FLAG_SKIP_CYCLIC) != 0) {
1452 /*
1453 * then skip all processing by this interrupt.
1454 * (see ntwdt_arm_vwdt()).
1455 */
1456 wdog_state->ntwdt_wdog_flags &= ~NTWDT_FLAG_SKIP_CYCLIC;
1457 goto end;
1458 }
1459
1460 if (wdog_state->ntwdt_timer_running == 0 ||
1461 (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE) ||
1462 (wdog_state->ntwdt_wdog_enabled == 0))
1463 goto end;
1464
1465 /* re-arm ("pat") the hardware watchdog */
1466 ntwdt_pat_hw_watchdog();
1467
1468 /* Decrement the VWDT and see if it has expired. */
1469 if (--wdog_state->ntwdt_secs_remaining == 0) {
1470
1471 cmn_err(CE_WARN, "application-watchdog expired");
1472
1473 wdog_state->ntwdt_wdog_expired = 1;
1474
1475 if (wdog_state->ntwdt_reset_enabled != 0) {
1476 /*
1477 * Update ScApp so that the new wdog-timeout
1478 * value is as specified in the
1479 * NTWDT_BOOT_TIMEOUT_PROP driver Property.
1480 * This timeout is assumedly larger than the
1481 * actual Solaris reboot time. This will allow
1482 * our forced-reboot to not cause an unplanned
1483 * (series of) watchdog expiration(s).
1484 */
1485 if (ntwdt_disable_timeout_action == 0)
1486 ntwdt_reprogram_wd(ntwdt_ptr);
1487
1488 mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1489
1490 NTWDT_DBG(WDT_DBG_VWDT, ("recovery being done"));
1491
1492 ntwdt_enforce_timeout();
1493 } else {
1494 NTWDT_DBG(WDT_DBG_VWDT, ("no recovery being done"));
1495
1496 wdog_state->ntwdt_wdog_enabled = 0;
1497
1498 /*
1499 * Tell ScApp to disable wdog; this prevents
1500 * the "2x-timeout" artifact. Eg, Solaris
1501 * times-out at t(x) and ScApp times-out at t(2x),
1502 * where (x==ntwdt_wdog_timeout).
1503 */
1504 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_WDT,
1505 wdog_state->ntwdt_wdog_enabled);
1506 }
1507
1508 /* Schedule Callout to stop this Cyclic */
1509 (void) timeout(ntwdt_stop_timer_lock, ntwdt_ptr, 0);
1510
1511 } else {
1512 _NOTE(EMPTY)
1513 NTWDT_DBG(WDT_DBG_VWDT, ("time remaining in VWDT: %d"
1514 " seconds", wdog_state->ntwdt_secs_remaining));
1515 }
1516 end:
1517 mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1518
1519 return (DDI_INTR_CLAIMED);
1520 }
1521
1522 /*
1523 * Program the AWDT watchdog-timeout value to that specified
1524 * in the NTWDT_BOOT_TIMEOUT_PROP driver Property. However,
1525 * only do this if the AWDT is in the correct state.
1526 *
1527 * Caller's Context:
1528 * o interrupt context: (from software-interrupt)
1529 * o during a panic
1530 */
1531 static void
ntwdt_reprogram_wd(ntwdt_state_t * ntwdt_ptr)1532 ntwdt_reprogram_wd(ntwdt_state_t *ntwdt_ptr)
1533 {
1534 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1535
1536 /*
1537 * Program the AWDT watchdog-timeout value only if the
1538 * watchdog is enabled, the user wants to do recovery,
1539 * ("reset is enabled") and the AWDT timer is currently
1540 * running.
1541 */
1542 if (wdog_state->ntwdt_wdog_enabled != 0 &&
1543 wdog_state->ntwdt_reset_enabled != 0 &&
1544 wdog_state->ntwdt_timer_running != 0) {
1545 if (ddi_in_panic() != 0)
1546 (void) ntwdt_set_cfgvar_noreply(LW8_WDT_PROP_TO,
1547 wdog_state->ntwdt_boot_timeout);
1548 else
1549 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_TO,
1550 wdog_state->ntwdt_boot_timeout);
1551 }
1552 }
1553
1554 /*
1555 * This is the callback that was registered to run during a panic.
1556 * It will set the watchdog-timeout value to be that as specified
1557 * in the NTWDT_BOOT_TIMEOUT_PROP driver Property.
1558 *
1559 * Note that unless this Property's value specifies a timeout
1560 * that's larger than the actual reboot latency, ScApp will
1561 * experience a timeout and initiate Recovery.
1562 */
1563 _NOTE(ARGSUSED(1))
1564 static boolean_t
ntwdt_panic_cb(void * arg,int code)1565 ntwdt_panic_cb(void *arg, int code)
1566 {
1567 ASSERT(ddi_in_panic() != 0);
1568
1569 ntwdt_reprogram_wd((ntwdt_state_t *)arg);
1570
1571 return (B_TRUE);
1572 }
1573
1574 /*
1575 * Initialize the Cyclic that is used to monitor the VWDT.
1576 */
1577 static void
ntwdt_start_timer(ntwdt_state_t * ntwdt_ptr)1578 ntwdt_start_timer(ntwdt_state_t *ntwdt_ptr)
1579 {
1580 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1581 cyc_handler_t *hdlr = &wdog_state->ntwdt_cycl_hdlr;
1582 cyc_time_t *when = &wdog_state->ntwdt_cycl_time;
1583
1584 /*
1585 * Init Cyclic so its first expiry occurs wdog-timeout
1586 * seconds from the current, absolute time.
1587 */
1588 when->cyt_interval = wdog_state->ntwdt_cyclic_interval;
1589 when->cyt_when = gethrtime() + when->cyt_interval;
1590
1591 wdog_state->ntwdt_wdog_expired = 0;
1592 wdog_state->ntwdt_timer_running = 1;
1593
1594 mutex_enter(&cpu_lock);
1595 if (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE)
1596 ntwdt_ptr->ntwdt_cycl_id = cyclic_add(hdlr, when);
1597 mutex_exit(&cpu_lock);
1598
1599 NTWDT_DBG(WDT_DBG_VWDT, ("AWDT's cyclic-driven timer is started"));
1600 }
1601
1602 /*
1603 * Stop the cyclic that is used to monitor the VWDT (and
1604 * was Started by ntwdt_start_timer).
1605 *
1606 * Context: per the Cyclic API, cyclic_remove cannot be called
1607 * from interrupt-context. Note that when this is
1608 * called via a Callout, it's called from base level.
1609 */
1610 static void
ntwdt_stop_timer(void * arg)1611 ntwdt_stop_timer(void *arg)
1612 {
1613 ntwdt_state_t *ntwdt_ptr = (void *)arg;
1614 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1615
1616 mutex_enter(&cpu_lock);
1617 if (ntwdt_ptr->ntwdt_cycl_id != CYCLIC_NONE)
1618 cyclic_remove(ntwdt_ptr->ntwdt_cycl_id);
1619 mutex_exit(&cpu_lock);
1620
1621 wdog_state->ntwdt_timer_running = 0;
1622 ntwdt_ptr->ntwdt_cycl_id = CYCLIC_NONE;
1623
1624 NTWDT_DBG(WDT_DBG_VWDT, ("AWDT's cyclic-driven timer is stopped"));
1625 }
1626
1627 /*
1628 * Stop the cyclic that is used to monitor the VWDT (and
1629 * do it in a thread-safe manner).
1630 *
1631 * This is a wrapper function for the core function,
1632 * ntwdt_stop_timer. Both functions are useful, as some
1633 * callers will already have the appropriate mutex locked, and
1634 * other callers will not.
1635 */
1636 static void
ntwdt_stop_timer_lock(void * arg)1637 ntwdt_stop_timer_lock(void *arg)
1638 {
1639 ntwdt_state_t *ntwdt_ptr = (void *)arg;
1640 ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1641
1642 mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1643 ntwdt_stop_timer(arg);
1644 mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1645 }
1646
1647 /*
1648 * Add callbacks needed to react to major system state transitions.
1649 */
1650 static void
ntwdt_add_callbacks(ntwdt_state_t * ntwdt_ptr)1651 ntwdt_add_callbacks(ntwdt_state_t *ntwdt_ptr)
1652 {
1653 /* register a callback that's called during a panic */
1654 ntwdt_callback_ids.ntwdt_panic_cb = callb_add(ntwdt_panic_cb,
1655 (void *)ntwdt_ptr, CB_CL_PANIC, "ntwdt_panic_cb");
1656 }
1657
1658 /*
1659 * Remove callbacks added by ntwdt_add_callbacks.
1660 */
1661 static void
ntwdt_remove_callbacks()1662 ntwdt_remove_callbacks()
1663 {
1664 (void) callb_delete(ntwdt_callback_ids.ntwdt_panic_cb);
1665 }
1666
1667 /*
1668 * Initiate a Reset (as a result of the VWDT timeout expiring).
1669 */
1670 static void
ntwdt_enforce_timeout()1671 ntwdt_enforce_timeout()
1672 {
1673 if (ntwdt_disable_timeout_action != 0) {
1674 cmn_err(CE_NOTE, "OS timeout expired, taking no action");
1675 return;
1676 }
1677
1678 NTWDT_DBG(WDT_DBG_VWDT, ("VWDT expired; do a crashdump"));
1679
1680 (void) kadmin(A_DUMP, AD_BOOT, NULL, kcred);
1681 cmn_err(CE_PANIC, "kadmin(A_DUMP, AD_BOOT) failed");
1682 _NOTE(NOTREACHED)
1683 }
1684
1685 /*
1686 * Interpret the Properties from driver's config file.
1687 */
1688 static int
ntwdt_read_props(ntwdt_state_t * ntwdt_ptr)1689 ntwdt_read_props(ntwdt_state_t *ntwdt_ptr)
1690 {
1691 ntwdt_wdog_t *wdog_state;
1692 int boot_timeout;
1693
1694 wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1695
1696 /*
1697 * interpret Property that specifies how long
1698 * the watchdog-timeout should be set to when
1699 * Solaris panics. Assumption is that this value
1700 * is larger than the amount of time it takes
1701 * to reboot and write crashdump. If not,
1702 * ScApp could induce a reset, due to an expired
1703 * watchdog-timeout.
1704 */
1705 wdog_state->ntwdt_boot_timeout =
1706 NTWDT_DEFAULT_BOOT_TIMEOUT;
1707
1708 boot_timeout = ddi_prop_get_int(DDI_DEV_T_ANY,
1709 ntwdt_ptr->ntwdt_dip, DDI_PROP_DONTPASS,
1710 NTWDT_BOOT_TIMEOUT_PROP, -1);
1711
1712 if (boot_timeout != -1 && boot_timeout > 0 &&
1713 boot_timeout <= NTWDT_MAX_TIMEOUT) {
1714 wdog_state->ntwdt_boot_timeout =
1715 boot_timeout;
1716 } else {
1717 _NOTE(EMPTY)
1718 NTWDT_DBG(WDT_DBG_ENTRY, (NTWDT_BOOT_TIMEOUT_PROP
1719 ": using default of %d seconds.",
1720 wdog_state->ntwdt_boot_timeout));
1721 }
1722
1723 return (DDI_SUCCESS);
1724 }
1725
1726 /*
1727 * Write state of SWDT to ScApp.
1728 *
1729 * Currently, this function is only called on attach()
1730 * of our driver.
1731 *
1732 * Note that we do not need to call this function, eg,
1733 * in response to a solicitation from ScApp (eg,
1734 * the LW8_SC_RESTARTED_EVENT).
1735 *
1736 * Context:
1737 * called in Kernel Context
1738 */
1739 static int
ntwdt_set_swdt_state()1740 ntwdt_set_swdt_state()
1741 {
1742 /*
1743 * note that ScApp only needs this one
1744 * variable when system is in SWDT mode.
1745 */
1746 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_MODE,
1747 LW8_PROP_MODE_SWDT);
1748
1749 return (0);
1750 }
1751
1752 /*
1753 * Write all AWDT state to ScApp via the SBBC mailbox
1754 * in IOSRAM. Note that the permutation of Writes
1755 * is as specified in the design spec.
1756 *
1757 * Notes: caller must perform synchronization so that
1758 * this series of Writes is consistent as viewed
1759 * by ScApp (eg, there is no LW8_WDT_xxx mailbox
1760 * command that contains "all Properties"; each
1761 * Property must be written individually).
1762 */
1763 static int
ntwdt_set_awdt_state(ntwdt_wdog_t * rstatep)1764 ntwdt_set_awdt_state(ntwdt_wdog_t *rstatep)
1765 {
1766 /* ScApp expects values in this order: */
1767 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_MODE,
1768 ntwdt_watchdog_activated != 0);
1769 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_TO,
1770 rstatep->ntwdt_wdog_timeout);
1771 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_RECOV,
1772 rstatep->ntwdt_reset_enabled);
1773 (void) ntwdt_set_cfgvar(LW8_WDT_PROP_WDT,
1774 rstatep->ntwdt_wdog_enabled);
1775
1776 return (NTWDT_SUCCESS);
1777 }
1778
1779 /*
1780 * Write a specified WDT Property (and Value) to ScApp.
1781 *
1782 * <Property, Value> is passed in the LW8_MBOX_WDT_SET
1783 * (SBBC) mailbox message. The SBBC mailbox resides in
1784 * IOSRAM.
1785 *
1786 * Note that this function is responsible for ensuring that
1787 * a driver-specific representation of a mailbox <Value> is
1788 * mapped into the representation that is expected by ScApp
1789 * (eg, see LW8_WDT_PROP_RECOV).
1790 */
1791 static int
ntwdt_set_cfgvar(int var,int val)1792 ntwdt_set_cfgvar(int var, int val)
1793 {
1794 int rv;
1795 int mbox_val;
1796 lw8_set_wdt_t set_wdt;
1797
1798 switch (var) {
1799 case LW8_WDT_PROP_RECOV:
1800 #ifdef DEBUG
1801 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'recovery-enabled':"
1802 " %s (%d)", (val != 0) ? "enabled" : "disabled", val));
1803 #endif
1804 mbox_val = (val != 0) ? LW8_PROP_RECOV_ENABLED :
1805 LW8_PROP_RECOV_DISABLED;
1806 break;
1807
1808 case LW8_WDT_PROP_WDT:
1809 #ifdef DEBUG
1810 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-enabled':"
1811 " %s (%d)", (val != 0) ? "enabled" : "disabled", val));
1812 #endif
1813 mbox_val = (val != 0) ? LW8_PROP_WDT_ENABLED :
1814 LW8_PROP_WDT_DISABLED;
1815 break;
1816
1817 case LW8_WDT_PROP_TO:
1818 #ifdef DEBUG
1819 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-timeout':"
1820 " %d seconds", val));
1821 #endif
1822 mbox_val = val;
1823 break;
1824
1825 case LW8_WDT_PROP_MODE:
1826 #ifdef DEBUG
1827 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-mode':"
1828 " %s (%d)", (val != LW8_PROP_MODE_SWDT) ?
1829 "AWDT" : "SWDT", val));
1830 #endif
1831 mbox_val = val;
1832 break;
1833
1834 default:
1835 ASSERT(0);
1836 _NOTE(NOTREACHED)
1837 }
1838
1839 set_wdt.property_id = var;
1840 set_wdt.value = mbox_val;
1841
1842 rv = ntwdt_lomcmd(LW8_MBOX_WDT_SET, (intptr_t)&set_wdt);
1843 if (rv != 0) {
1844 _NOTE(EMPTY)
1845 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of prop/val %d/%d "
1846 "failed: %d", var, mbox_val, rv));
1847 }
1848
1849 return (rv);
1850 }
1851
1852 static void
ntwdt_set_cfgvar_noreply(int var,int val)1853 ntwdt_set_cfgvar_noreply(int var, int val)
1854 {
1855 (void) ntwdt_set_cfgvar(var, val);
1856 }
1857
1858 #ifdef DEBUG
1859 /*
1860 * Read a specified WDT Property from ScApp.
1861 *
1862 * <Property> is passed in the Request of the LW8_MBOX_WDT_GET
1863 * (SBBC) mailbox message, and the Property's <Value>
1864 * is returned in the message's Response. The SBBC mailbox
1865 * resides in IOSRAM.
1866 */
1867 static int
ntwdt_get_cfgvar(int var,int * val)1868 ntwdt_get_cfgvar(int var, int *val)
1869 {
1870 lw8_get_wdt_t get_wdt;
1871 int rv;
1872
1873 rv = ntwdt_lomcmd(LW8_MBOX_WDT_GET, (intptr_t)&get_wdt);
1874 if (rv != 0) {
1875 _NOTE(EMPTY)
1876 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET failed: %d", rv));
1877 } else {
1878 switch (var) {
1879 case LW8_WDT_PROP_RECOV:
1880 *val = (uint8_t)get_wdt.recovery_enabled;
1881 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'reset-enabled':"
1882 " %s (%d)", (*val != 0) ? "enabled" : "disabled",
1883 *val));
1884 break;
1885
1886 case LW8_WDT_PROP_WDT:
1887 *val = (uint8_t)get_wdt.watchdog_enabled;
1888 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'wdog-enabled':"
1889 " %s (%d)", (*val != 0) ? "enabled" : "disabled",
1890 *val));
1891 break;
1892
1893 case LW8_WDT_PROP_TO:
1894 *val = (uint8_t)get_wdt.timeout;
1895 NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'wdog-timeout':"
1896 " %d seconds", *val));
1897 break;
1898
1899 default:
1900 ASSERT(0);
1901 _NOTE(NOTREACHED)
1902 }
1903 }
1904
1905 return (rv);
1906 }
1907 #endif
1908
1909 /*
1910 * Update the real system "heartbeat", which resides in IOSRAM.
1911 * This "heartbeat" is normally used in SWDT Mode, but when
1912 * in AWDT Mode, ScApp also uses its value to determine if Solaris
1913 * is up-and-running.
1914 */
1915 static void
ntwdt_pat_hw_watchdog()1916 ntwdt_pat_hw_watchdog()
1917 {
1918 tod_iosram_t tod_buf;
1919 static uint32_t i_am_alive = 0;
1920 #ifdef DEBUG
1921 if (ntwdt_stop_heart != 0)
1922 return;
1923 #endif
1924 /* Update the system heartbeat */
1925 if (i_am_alive == UINT32_MAX)
1926 i_am_alive = 0;
1927 else
1928 i_am_alive++;
1929
1930 NTWDT_DBG(WDT_DBG_HEART, ("update heartbeat: %d",
1931 i_am_alive));
1932
1933 if (iosram_write(SBBC_TOD_KEY, OFFSET(tod_buf, tod_i_am_alive),
1934 (char *)&i_am_alive, sizeof (uint32_t))) {
1935 cmn_err(CE_WARN, "ntwdt_pat_hw_watchdog(): "
1936 "write heartbeat failed");
1937 }
1938 }
1939
1940 /*
1941 * Write the specified value to the system's normal (IOSRAM)
1942 * location that's used to specify Solaris' watchdog-timeout
1943 * on Serengeti platforms.
1944 *
1945 * In SWDT Mode, this location can hold values [0,n).
1946 * In AWDT Mode, this location must have value 0 (else
1947 * after a ScApp-reboot, ScApp could mistakenly interpret
1948 * that the system is in SWDT Mode).
1949 */
1950 static int
ntwdt_set_hw_timeout(uint32_t period)1951 ntwdt_set_hw_timeout(uint32_t period)
1952 {
1953 tod_iosram_t tod_buf;
1954 int rv;
1955
1956 rv = iosram_write(SBBC_TOD_KEY, OFFSET(tod_buf, tod_timeout_period),
1957 (char *)&period, sizeof (uint32_t));
1958 if (rv != 0)
1959 cmn_err(CE_WARN, "write of %d for TOD timeout "
1960 "period failed: %d", period, rv);
1961
1962 return (rv);
1963 }
1964
1965 /*
1966 * Soft-interrupt handler that is triggered when ScApp wants
1967 * to know the current state of the app-wdog.
1968 *
1969 * Grab ntwdt_wdog_mutex so that we synchronize with any
1970 * concurrent User Context and Interrupt Context activity. Call
1971 * a function that writes a permutation of the watchdog state
1972 * to the SC, then release the mutex.
1973 *
1974 * We grab the mutex not only so that each variable is consistent
1975 * but also so that the *permutation* of variables is consistent.
1976 * I.e., any set of one or more variables (that we write to SC
1977 * using multiple mailbox commands) will truly be seen as a
1978 * consistent snapshot. Note that if our protocol had a MBOX_SET
1979 * command that allowed writing all watchdog state in one
1980 * command, then the lock-hold latency would be greatly reduced.
1981 * To our advantage, this softint normally executes very
1982 * infrequently.
1983 *
1984 * Context:
1985 * called at Interrupt Context (DDI_SOFTINT_LOW)
1986 */
1987 static uint_t
ntwdt_mbox_softint(char * arg)1988 ntwdt_mbox_softint(char *arg)
1989 {
1990 ntwdt_wdog_t *wdog_state;
1991
1992 wdog_state = ((ntwdt_state_t *)arg)->ntwdt_wdog_state;
1993
1994 ASSERT(wdog_state != NULL);
1995
1996 mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1997
1998 /* tell ScApp state of AWDT */
1999 (void) ntwdt_set_awdt_state(wdog_state);
2000
2001 mutex_exit(&wdog_state->ntwdt_wdog_mutex);
2002
2003 return (DDI_INTR_CLAIMED);
2004 }
2005
2006 /*
2007 * Handle MBOX_EVENT_LW8 Events that are sent from ScApp.
2008 *
2009 * The only (sub-)type of Event we handle is the
2010 * LW8_EVENT_SC_RESTARTED Event. We handle this by triggering
2011 * a soft-interrupt only if we are in AWDT mode.
2012 *
2013 * ScApp sends this Event when it wants to learn the current
2014 * state of the AWDT variables. Design-wise, this is used to
2015 * handle the case where the SC reboots while the system is in
2016 * AWDT mode (if the SC reboots in SWDT mode, then ScApp
2017 * already knows all necessary info and therefore won't send
2018 * this Event).
2019 *
2020 * Context:
2021 * function is called in Interrupt Context (at DDI_SOFTINT_MED)
2022 * and we conditionally trigger a softint that will run at
2023 * DDI_SOFTINT_LOW. Note that function executes at
2024 * DDI_SOFTINT_MED due to how this handler was registered by
2025 * the implementation of sbbc_mbox_reg_intr().
2026 *
2027 * Notes:
2028 * Currently, the LW8_EVENT_SC_RESTARTED Event is only sent
2029 * by SC when in AWDT mode.
2030 */
2031 static uint_t
ntwdt_event_data_handler(char * arg)2032 ntwdt_event_data_handler(char *arg)
2033 {
2034 lw8_event_t *payload;
2035 sbbc_msg_t *msg;
2036
2037 if (arg == NULL) {
2038 return (DDI_INTR_CLAIMED);
2039 }
2040
2041 msg = (sbbc_msg_t *)arg;
2042 if (msg->msg_buf == NULL) {
2043 return (DDI_INTR_CLAIMED);
2044 }
2045
2046 payload = (lw8_event_t *)msg->msg_buf;
2047
2048 switch (payload->event_type) {
2049 case LW8_EVENT_SC_RESTARTED:
2050 /*
2051 * then SC probably was rebooted, and it therefore
2052 * needs to know what the current state of AWDT is.
2053 */
2054 NTWDT_DBG(WDT_DBG_EVENT, ("LW8_EVENT_SC_RESTARTED "
2055 "received in %s mode",
2056 (ntwdt_watchdog_activated != 0) ? "AWDT" : "SWDT"));
2057
2058 if (ntwdt_watchdog_activated != 0) {
2059 /* then system is in AWDT mode */
2060 ddi_trigger_softintr(ntwdt_mbox_softint_id);
2061 }
2062 break;
2063
2064 default:
2065 NTWDT_DBG(WDT_DBG_EVENT,
2066 ("MBOX_EVENT_LW8: %d", payload->event_type));
2067 break;
2068 }
2069
2070 return (DDI_INTR_CLAIMED);
2071 }
2072
2073 /*
2074 * Send an SBBC Mailbox command to ScApp.
2075 *
2076 * Use the sbbc_mbox_request_response utility function to
2077 * send the Request and receive the optional Response.
2078 *
2079 * Context:
2080 * can be called from Interrupt Context or User Context.
2081 */
2082 static int
ntwdt_lomcmd(int cmd,intptr_t arg)2083 ntwdt_lomcmd(int cmd, intptr_t arg)
2084 {
2085 sbbc_msg_t request;
2086 sbbc_msg_t *reqp;
2087 sbbc_msg_t response;
2088 sbbc_msg_t *resp;
2089 int rv = 0;
2090
2091 reqp = &request;
2092 bzero((caddr_t)&request, sizeof (request));
2093 reqp->msg_type.type = LW8_MBOX;
2094 reqp->msg_type.sub_type = (uint16_t)cmd;
2095
2096 resp = &response;
2097 bzero((caddr_t)&response, sizeof (response));
2098 resp->msg_type.type = LW8_MBOX;
2099 resp->msg_type.sub_type = (uint16_t)cmd;
2100
2101 switch (cmd) {
2102 case LW8_MBOX_WDT_GET:
2103 reqp->msg_len = 0;
2104 reqp->msg_buf = (caddr_t)NULL;
2105 resp->msg_len = sizeof (lw8_get_wdt_t);
2106 resp->msg_buf = (caddr_t)arg;
2107 break;
2108
2109 case LW8_MBOX_WDT_SET:
2110 reqp->msg_len = sizeof (lw8_set_wdt_t);
2111 reqp->msg_buf = (caddr_t)arg;
2112 resp->msg_len = 0;
2113 resp->msg_buf = (caddr_t)NULL;
2114 break;
2115
2116 default:
2117 return (EINVAL);
2118 }
2119
2120 rv = sbbc_mbox_request_response(reqp, resp,
2121 LW8_DEFAULT_MAX_MBOX_WAIT_TIME);
2122
2123 if ((rv) || (resp->msg_status != SG_MBOX_STATUS_SUCCESS)) {
2124
2125 NTWDT_NDBG(WDT_DBG_PROT, ("SBBC mailbox error:"
2126 " (rv/msg_status)=(%d/%d)", rv, resp->msg_status));
2127
2128 /* errors from sgsbbc */
2129 if (resp->msg_status > 0) {
2130 return (resp->msg_status);
2131 }
2132
2133 /* errors from ScApp */
2134 switch (resp->msg_status) {
2135 case SG_MBOX_STATUS_ILLEGAL_PARAMETER:
2136 /* illegal ioctl parameter */
2137 return (EINVAL);
2138
2139 default:
2140 return (EIO);
2141 }
2142 }
2143 return (0);
2144 }
2145