xref: /illumos-gate/usr/src/uts/sun4u/lw8/io/ntwdt.c (revision ba2be53024c0b999e74ba9adcd7d80fec5df8c57)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * ntwdt driver
31  * ------------
32  *
33  * Subsystem Overview
34  * ------------------
35  *
36  * This is a pseudo driver for the Netra-1280 watchdog
37  * timer (WDT).  It provides for an *application-driven*
38  * WDT (AWDT), not a traditional, hardware-based WDT.  A
39  * hardware-based feature is already present on the
40  * Netra-1280, and it is referred to here as the
41  * System WDT (SWDT).
42  *
43  * ScApp and Solaris cooperate to provide either a SWDT or
44  * an AWDT; they are mutually-exclusive.  Once in AWDT
45  * mode, one can only transition to SWDT mode via a reboot.
46  * This obviously gives priority to the AWDT and was done
47  * to handle scenarios where the customer might temporarily
48  * terminate their wdog-app in order to do some debugging,
49  * or even to load a new version of the wdog-app.
50  *
51  * The wdog-app does an open() of the /dev/ntwdt device node
52  * and then issues ioctl's to control the state of the AWDT.
53  * The ioctl's are implemented by this driver.  Only one
54  * concurrent instance of open() is allowed.  On the close(),
55  * a watchdog timer still in progress is NOT terminated.
56  * This allows the global state machine to monitor the
57  * progress of a Solaris reboot.  ScApp will reset Solaris
58  * (eg, send an XIR) if the actual boot/crashdump latency
59  * is larger than the current AWDT timeout.
60  *
61  * The rationale for implementing an AWDT (vs a SWDT) is
62  * that it is more sensitive to system outage scenarios than
63  * a SWDT.  Eg, a system could be in such a failed state that
64  * even though its clock-interrupt could still run (and the
65  * SWDT's watchdog timer therefore re-armed), the system could
66  * in effect have a corrupt or very poor dispatch latency.
67  * An AWDT would be sensitive to dispatch latency issues, as
68  * well as problems with its own execution (eg, a hang or
69  * crash).
70  *
71  * Subsystem Interface Overview
72  * ----------------------------
73  *
74  * This pseudo-driver does not have any 'extern' functions.
75  *
76  * All system interaction is done via the traditional driver
77  * entry points (eg, attach(9e), _init(9e)).
78  *
79  * All interaction with user is via the entry points in the
80  * 'struct cb_ops' vector (eg, open(9e), ioctl(9e), and
81  * close(9e)).
82  *
83  * Subsystem Implementation Overview
84  * ---------------------------------
85  *
86  * ScApp and Solaris (eg, ntwdt) cooperate so that a state
87  * machine global to ScApp and ntwdt is either in AWDT mode
88  * or in SWDT mode.  These two peers communicate via the SBBC
89  * Mailbox that resides in IOSRAM (SBBC_MAILBOX_KEY).
90  * They use two new mailbox messages (LW8_MBOX_WDT_GET and
91  * LW8_MBOX_WDT_SET) and one new event (LW8_EVENT_SC_RESTARTED).
92  *
93  * ntwdt implements the AWDT by implementing a "virtual
94  * WDT" (VWDT).  Eg, the watchdog timer is not a traditional
95  * counter in hardware, it is a variable in ntwdt's
96  * softstate.  The wdog-app's actions cause changes to this
97  * and other variables in ntwdt's softstate.
98  *
99  * The wdog-app uses the LOMIOCDOGTIME ioctl to specify
100  * the number of seconds in the watchdog timeout (and
101  * therefore the VWDT).  The wdog-app then uses the
102  * LOMIOCDOGCTL ioctl to enable the wdog.  This causes
103  * ntwdt to create a Cyclic that will both decrement
104  * the VWDT and check to see if it has expired.  To keep
105  * the VWDT from expiring, the wdog-app uses the
106  * LOMIOCDOGPAT ioctl to re-arm (or "pat") the watchdog.
107  * This sets the VWDT value to that specified in the
108  * last LOMIOCDOGTIME ioctl.  The wdog-app can use the
109  * LOMIOCDOGSTATE ioctl to query the state of the VWDT.
110  *
111  * The wdog-app can also specify how Recovery is to be
112  * done.  The only choice is whether to do a crashdump
113  * or not.  If ntwdt computes a VWDT expiration, then
114  * ntwdt initiates the Recovery, else ScApp will.  Eg,
115  * a hang in Solaris will be sensed by ScApp and not
116  * ntwdt.  The wdog-app specifies the Recovery policy
117  * via the DOGCTL ioctl.
118  *
119  *   Timeout Expiration
120  *   ------------------
121  *   In our implementation, ScApp senses a watchdog
122  *   expiration the same way it historically has:
123  *   by reading a well-known area of IOSRAM (SBBC_TOD_KEY)
124  *   to see if the timestamp associated with a
125  *   Solaris-generated "heartbeat" field is older
126  *   than the currently specified timeout (which is
127  *   also specified in this same IOSRAM section).
128  *
129  *   What is different when ntwdt is running is that
130  *   ntwdt is responsible for updating the Heartbeat,
131  *   and not the normal client (todsg).  When ntwdt
132  *   puts the system in AWDT mode, it disables todsg's
133  *   updating of the Heartbeat by changing the state of
134  *   a pair of kernel tunables (watchdog_activated and
135  *   watchdog_enable).  ntwdt then takes responsibility
136  *   for updating the Heartbeat.  It does this by
137  *   updating the Heartbeat from the Cyclic that is
138  *   created when the user enables the AWDT (DOGCTL)
139  *   or specifies a new timeout value (DOGTIME).
140  *
141  *   As long as the AWDT is enabled, ntwdt will update
142  *   the real system Heartbeat.  As a result, ScApp
143  *   will conclude that Solaris is still running.  If
144  *   the user stops re-arming the VWDT or Solaris
145  *   hangs (eg), ntwdt will stop updating the Heartbeat.
146  *
147  *   Note that ntwdt computes expiration via the
148  *   repeatedly firing Cyclic, and ScApp computes
149  *   expiration via a cessation of Heartbeat update.
150  *   Since Heartbeat update stops once user stops
151  *   re-arming the VWDT (ie, DOGPAT ioctl), ntwdt
152  *   will compute a timeout at t(x), and ScApp will
153  *   compute a timeout at t(2x), where 'x' is the
154  *   current timeout value.  When ntwdt computes
155  *   the expiration, ntwdt masks this asymmetry.
156  *
157  *   Lifecycle Events
158  *   ----------------
159  *
160  *   ntwdt only handles one of the coarse-grained
161  *   "lifecycle events" (eg, entering OBP, shutdown,
162  *   power-down, DR) that are possible during a Solaris
163  *   session: a panic.  (Note that ScApp handles one
164  *   of the others: "entering OBP").  Other than these,
165  *   a user choosing such a state transition must first
166  *   use the wdog-app to disable the watchdog, else
167  *   an expiration could occur.
168  *
169  *   Solaris handles a panic by registering a handler
170  *   that's called during the panic.  The handler will
171  *   set the watchdog timeout to the value specified
172  *   in the NTWDT_BOOT_TIMEOUT_PROP driver Property.
173  *   Again, this value should be greater than the actual
174  *   Solaris reboot/crashdump latency.
175  *
176  *   When the user enters OBP via the System Controller,
177  *   ScApp will disable the watchdog (from ScApp's
178  *   perspective), but it will not communicate this to
179  *   ntwdt.  After having exited OBP, the wdog-app can
180  *   be used to enable or disable the watchdog (which
181  *   will get both ScApp and ntwdt in-sync).
182  *
183  *   Locking
184  *   -------
185  *
186  *   ntwdt has code running at three interrupt levels as
187  *   well as base level.
188  *
189  *   The ioctls run at base level in User Context.  The
190  *   driver's entry points run at base level in Kernel
191  *   Context.
192  *
193  *   ntwdt's three interrupt levels are used by:
194  *
195  *    o LOCK_LEVEL :
196  *        the Cyclic used to manage the VWDT is initialized
197  *        to CY_LOCK_LEVEL
198  *
199  *    o DDI_SOFTINT_MED :
200  *        the SBBC mailbox implementation registers the
201  *        specified handlers at this level
202  *
203  *    o DDI_SOFTINT_LOW :
204  *        this level is used by two handlers.  One handler
205  *        is triggered by the LOCK_LEVEL Cyclic.  The other
206  *        handler is triggered by the DDI_SOFTINT_MED
207  *        handler registered to handle SBBC mailbox events.
208  *
209  *   The centralizing concept is that the ntwdt_wdog_mutex
210  *   in the driver's softstate is initialized to have an
211  *   interrupt-block-cookie corresponding to DDI_SOFTINT_LOW.
212  *
213  *   As a result, any base level code grabs ntwdt_wdog_mutex
214  *   before doing work.  Also, any handler running at interrupt
215  *   level higher than DDI_SOFTINT_LOW "posts down" so that
216  *   a DDI_SOFTINT_LOW handler is responsible for executing
217  *   the "real work".  Each DDI_SOFTINT_LOW handler also
218  *   first grabs ntwdt_wdog_mutex, and so base level is
219  *   synchronized with all interrupt levels.
220  *
221  *   Note there's another mutex in the softstate: ntwdt_mutex.
222  *   This mutex has few responsibilities.  However, this
223  *   locking order must be followed: ntwdt_wdog_mutex is
224  *   held first, and then ntwdt_mutex.  This choice results
225  *   from the fact that the number of dynamic call sites
226  *   for ntwdt_wdog_mutex is MUCH greater than that of
227  *   ntwdt_mutex.  As a result, almost all uses of
228  *   ntwdt_wdog_mutex do not even require ntwdt_mutex to
229  *   be held, which saves resources.
230  *
231  *   Driver Properties
232  *   -----------------
233  *
234  *   "ddi-forceattach=1;"
235  *    ------------------
236  *
237  *    Using this allows our driver to be automatically
238  *    loaded at boot-time AND to not be removed from memory
239  *    solely due to memory-pressure.
240  *
241  *    Being loaded at boot allows ntwdt to (as soon as
242  *    possible) tell ScApp of the current mode of the
243  *    state-machine (eg, SWDT).  This is needed for the case
244  *    when Solaris is re-loaded while in AWDT mode; having
245  *    Solaris communicate ASAP with ScApp reduces the duration
246  *    of any "split-brain" scenario where ScApp and Solaris
247  *    are not in the same mode.
248  *
249  *    Having ntwdt remain in memory even after a close()
250  *    allows ntwdt to answer any SBBC mailbox commands
251  *    that ScApp sends (as the mailbox infrastructure is
252  *    not torn down until ntwdt is detach()'d).  Specifically,
253  *    ScApp could be re-loaded after AWDT mode had been
254  *    entered and the wdog-app had close()'d ntwdt.  ScApp
255  *    will then eventually send a LW8_EVENT_SC_RESTARTED
256  *    mailbox event in order to learn the current state of
257  *    state-machine.  Having ntwdt remain loaded allows this
258  *    event to never go unanswered.
259  *
260  *   "ntwdt-boottimeout=600;"
261  *    ----------------------
262  *
263  *    This specifies the watchdog timeout value (in seconds) to
264  *    use when ntwdt is aware of the need to reboot/reload Solaris.
265  *
266  *    ntwdt will update ScApp by setting the watchdog timeout
267  *    to the specified number of seconds when either a) Solaris
268  *    panics or b) the VWDT expires.  Note that this is only done
269  *    if the user has chosen to enable Reset.
270  *
271  *    ntwdt boundary-checks the specified value, and if out-of-range,
272  *    it initializes the watchdog timeout to a default value of
273  *    NTWDT_DEFAULT_BOOT_TIMEOUT seconds.  Note that this is a
274  *    default value and is not a *minimum* value.  The valid range
275  *    for the watchdog timeout is between one second and
276  *    NTWDT_MAX_TIMEOUT seconds, inclusive.
277  *
278  *    If ntwdt-boottimeout is set to a value less than an actual
279  *    Solaris boot's latency, ScApp will reset Solaris during boot.
280  *    Note that a continuous series of ScApp-induced resets will
281  *    not occur; ScApp only resets Solaris on the first transition
282  *    into the watchdog-expired state.
283  */
284 
285 #include <sys/note.h>
286 #include <sys/types.h>
287 #include <sys/callb.h>
288 #include <sys/stat.h>
289 #include <sys/conf.h>
290 #include <sys/ddi.h>
291 #include <sys/sunddi.h>
292 #include <sys/modctl.h>
293 #include <sys/ddi_impldefs.h>
294 #include <sys/kmem.h>
295 #include <sys/devops.h>
296 #include <sys/cyclic.h>
297 #include <sys/uadmin.h>
298 #include <sys/lw8_impl.h>
299 #include <sys/sgsbbc.h>
300 #include <sys/sgsbbc_iosram.h>
301 #include <sys/sgsbbc_mailbox.h>
302 #include <sys/todsg.h>
303 #include <sys/mem_config.h>
304 #include <sys/lom_io.h>
305 #include <sys/reboot.h>
306 #include <sys/clock.h>
307 
308 
309 /*
310  * tunables
311  */
312 int ntwdt_disable_timeout_action = 0;
313 #ifdef DEBUG
314 /*
315  * tunable to simulate a Solaris hang. If is non-zero, then
316  * no system heartbeats ("hardware patting") will be done,
317  * even though all AWDT machinery is functioning OK.
318  */
319 int ntwdt_stop_heart;
320 #endif
321 
322 /*
323  * Driver Property
324  */
325 #define	NTWDT_BOOT_TIMEOUT_PROP	"ntwdt-boottimeout"
326 
327 /*
328  * watchdog-timeout values (in seconds):
329  *
330  * NTWDT_DEFAULT_BOOT_TIMEOUT: the default value used if
331  *                             this driver is aware of the
332  *                             reboot.
333  *
334  * NTWDT_MAX_TIMEOUT:  max value settable by app (via the
335  *                     LOMIOCDOGTIME ioctl)
336  */
337 #define	NTWDT_DEFAULT_BOOT_TIMEOUT	(10*60)
338 #define	NTWDT_MAX_TIMEOUT		(180*60)
339 
340 
341 #define	NTWDT_CYCLIC_CHK_PERCENT	(20)
342 #define	NTWDT_MINOR_NODE	"awdt"
343 #define	OFFSET(base, field)	((char *)&base.field - (char *)&base)
344 
345 #define	NTWDT_SUCCESS	0
346 #define	NTWDT_FAILURE	1
347 
348 typedef struct {
349 	callb_id_t	ntwdt_panic_cb;
350 } ntwdt_callback_ids_t;
351 static ntwdt_callback_ids_t ntwdt_callback_ids;
352 
353 /* MBOX_EVENT_LW8 that is sent in IOSRAM Mailbox: */
354 static lw8_event_t	lw8_event;		/* payload */
355 static sbbc_msg_t	sbbc_msg;		/* message */
356 
357 static ddi_softintr_t	ntwdt_mbox_softint_id;
358 static ddi_softintr_t	ntwdt_cyclic_softint_id;
359 
360 /*
361  * VWDT (i.e., Virtual Watchdog Timer) state
362  */
363 typedef struct {
364 	kmutex_t		ntwdt_wdog_mutex;
365 	ddi_iblock_cookie_t	ntwdt_wdog_mtx_cookie;
366 	int			ntwdt_wdog_enabled;	/* wdog enabled ? */
367 	int			ntwdt_reset_enabled;	/* reset enabled ? */
368 	int			ntwdt_timer_running;	/* wdog running ? */
369 	int			ntwdt_wdog_expired;	/* wdog expired ? */
370 	int			ntwdt_is_initial_enable; /* 1st wdog-enable? */
371 	uint32_t		ntwdt_boot_timeout;	/* timeout for boot */
372 	uint32_t		ntwdt_secs_remaining;	/* expiration timer */
373 	uint8_t			ntwdt_wdog_action;	/* Reset action */
374 	uint32_t		ntwdt_wdog_timeout;	/* timeout in seconds */
375 	hrtime_t		ntwdt_cyclic_interval;	/* cyclic interval */
376 	cyc_handler_t		ntwdt_cycl_hdlr;
377 	cyc_time_t		ntwdt_cycl_time;
378 	kmutex_t		ntwdt_event_lock;	/* lock */
379 	uint64_t		ntwdt_wdog_flags;
380 } ntwdt_wdog_t;
381 
382 /* ntwdt_wdog_flags */
383 #define	NTWDT_FLAG_SKIP_CYCLIC		0x1	/* skip next Cyclic */
384 
385 /* macros to set/clear one bit in ntwdt_wdog_flags */
386 #define	NTWDT_FLAG_SET(p, f)\
387 	((p)->ntwdt_wdog_flags |= NTWDT_FLAG_##f)
388 #define	NTWDT_FLAG_CLR(p, f)\
389 	((p)->ntwdt_wdog_flags &= ~NTWDT_FLAG_##f)
390 
391 
392 /* softstate */
393 typedef struct {
394 	kmutex_t		ntwdt_mutex;
395 	dev_info_t		*ntwdt_dip;		/* dip */
396 	int			ntwdt_open_flag;	/* file open ? */
397 	ntwdt_wdog_t		*ntwdt_wdog_state;	/* wdog state */
398 	cyclic_id_t		ntwdt_cycl_id;
399 } ntwdt_state_t;
400 
401 static	void		*ntwdt_statep;	/* softstate */
402 static	dev_info_t	*ntwdt_dip;
403 /*
404  * if non-zero, then the app-wdog feature is available on
405  * this system configuration.
406  */
407 static	int	ntwdt_watchdog_available;
408 /*
409  * if non-zero, then application has used the LOMIOCDOGCTL
410  * ioctl at least once in order to Enable the app-wdog.
411  * Also, if this is non-zero, then system is in AWDT mode,
412  * else it is in SWDT mode.
413  */
414 static	int	ntwdt_watchdog_activated;
415 
416 #define	getstate(minor)	\
417 	((ntwdt_state_t *)ddi_get_soft_state(ntwdt_statep, (minor)))
418 
419 static int	ntwdt_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
420 static int	ntwdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
421 static int	ntwdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
422 		    void **result);
423 static int	ntwdt_open(dev_t *, int, int, cred_t *);
424 static int	ntwdt_close(dev_t, int, int, cred_t *);
425 static int	ntwdt_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
426 
427 static void	ntwdt_reprogram_wd(ntwdt_state_t *);
428 static boolean_t	ntwdt_panic_cb(void *arg, int code);
429 static void	ntwdt_start_timer(ntwdt_state_t *);
430 static void	ntwdt_stop_timer(void *);
431 static void	ntwdt_stop_timer_lock(void *arg);
432 static void	ntwdt_add_callbacks(ntwdt_state_t *ntwdt_ptr);
433 static void	ntwdt_remove_callbacks();
434 static void	ntwdt_cyclic_pat(void *arg);
435 static void	ntwdt_enforce_timeout();
436 static void	ntwdt_pat_hw_watchdog();
437 static int	ntwdt_set_cfgvar(int var, int val);
438 static void	ntwdt_set_cfgvar_noreply(int var, int val);
439 static int	ntwdt_read_props(ntwdt_state_t *);
440 static int	ntwdt_add_mbox_handlers(ntwdt_state_t *);
441 static int	ntwdt_set_hw_timeout(uint32_t period);
442 static int	ntwdt_remove_mbox_handlers(void);
443 static uint_t	ntwdt_event_data_handler(char *arg);
444 static uint_t	ntwdt_mbox_softint(char *arg);
445 static uint_t	ntwdt_cyclic_softint(char *arg);
446 static int	ntwdt_lomcmd(int cmd, intptr_t arg);
447 static int	ntwdt_chk_wdog_support();
448 static int	ntwdt_chk_sc_support();
449 static int	ntwdt_set_swdt_state();
450 static void	ntwdt_swdt_to_awdt(ntwdt_wdog_t *);
451 static void	ntwdt_arm_vwdt(ntwdt_wdog_t *wdog_state);
452 #ifdef DEBUG
453 static int	ntwdt_get_cfgvar(int var, int *val);
454 #endif
455 
456 struct cb_ops ntwdt_cb_ops = {
457 	ntwdt_open,	/* open  */
458 	ntwdt_close,	/* close */
459 	nulldev,	/* strategy */
460 	nulldev,	/* print */
461 	nulldev,	/* dump */
462 	nulldev,	/* read */
463 	nulldev,	/* write */
464 	ntwdt_ioctl,	/* ioctl */
465 	nulldev,	/* devmap */
466 	nulldev,	/* mmap */
467 	nulldev,	/* segmap */
468 	nochpoll,	/* poll */
469 	ddi_prop_op,	/* cb_prop_op */
470 	NULL,		/* streamtab  */
471 	D_MP | D_NEW
472 };
473 
474 static struct dev_ops ntwdt_ops = {
475 	DEVO_REV,		/* Devo_rev */
476 	0,			/* Refcnt */
477 	ntwdt_info,		/* Info */
478 	nulldev,		/* Identify */
479 	nulldev,		/* Probe */
480 	ntwdt_attach,		/* Attach */
481 	ntwdt_detach,		/* Detach */
482 	nodev,			/* Reset */
483 	&ntwdt_cb_ops,		/* Driver operations */
484 	0,			/* Bus operations */
485 	NULL			/* Power */
486 };
487 
488 static struct modldrv modldrv = {
489 	&mod_driverops, 		/* This one is a driver */
490 	"ntwdt-Netra-T12 v%I%", 	/* Name of the module. */
491 	&ntwdt_ops,			/* Driver ops */
492 };
493 
494 static struct modlinkage modlinkage = {
495 	MODREV_1, (void *)&modldrv, NULL
496 };
497 
498 
499 /*
500  * Flags to set in ntwdt_debug.
501  *
502  * Use either the NTWDT_DBG or NTWDT_NDBG macros
503  */
504 #define	WDT_DBG_ENTRY	0x00000001	/* drv entry points */
505 #define	WDT_DBG_HEART	0x00000002	/* system heartbeat */
506 #define	WDT_DBG_VWDT	0x00000004	/* virtual WDT */
507 #define	WDT_DBG_EVENT	0x00000010	/* SBBC Mbox events */
508 #define	WDT_DBG_PROT	0x00000020	/* SC/Solaris protocol */
509 #define	WDT_DBG_IOCTL	0x00000040	/* ioctl's */
510 
511 uint64_t ntwdt_debug;	/* enables tracing of module's activity */
512 
513 /* used in non-debug version of module */
514 #define	NTWDT_NDBG(flag, msg)	{ if ((ntwdt_debug & (flag)) != 0) \
515 	(void) printf msg; }
516 
517 #ifdef DEBUG
518 typedef struct {
519 	uint32_t	ntwdt_wd1;
520 	uint8_t		ntwdt_wd2;
521 } ntwdt_data_t;
522 
523 #define	NTWDTIOCSTATE	_IOWR('a', 0xa, ntwdt_data_t)
524 #define	NTWDTIOCPANIC	_IOR('a',  0xb, uint32_t)
525 
526 /* used in debug version of module */
527 #define	NTWDT_DBG(flag, msg)	{ if ((ntwdt_debug & (flag)) != 0) \
528 	(void) printf msg; }
529 #else
530 #define	NTWDT_DBG(flag, msg)
531 #endif
532 
533 
534 int
535 _init(void)
536 {
537 	int error = 0;
538 
539 	NTWDT_DBG(WDT_DBG_ENTRY, ("_init"));
540 
541 	/* Initialize the soft state structures */
542 	if ((error = ddi_soft_state_init(&ntwdt_statep,
543 	    sizeof (ntwdt_state_t), 1)) != 0) {
544 		return (error);
545 	}
546 
547 	/* Install the loadable module */
548 	if ((error = mod_install(&modlinkage)) != 0) {
549 		ddi_soft_state_fini(&ntwdt_statep);
550 	}
551 	return (error);
552 }
553 
554 int
555 _info(struct modinfo *modinfop)
556 {
557 	NTWDT_DBG(WDT_DBG_ENTRY, ("_info"));
558 
559 	return (mod_info(&modlinkage, modinfop));
560 }
561 
562 int
563 _fini(void)
564 {
565 	int error;
566 
567 	NTWDT_DBG(WDT_DBG_ENTRY, ("_fini"));
568 
569 	error = mod_remove(&modlinkage);
570 	if (error == 0) {
571 		ddi_soft_state_fini(&ntwdt_statep);
572 	}
573 
574 	return (error);
575 }
576 
577 static int
578 ntwdt_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
579 {
580 	int			instance;
581 	ntwdt_state_t		*ntwdt_ptr = NULL;
582 	ntwdt_wdog_t		*wdog_state = NULL;
583 	cyc_handler_t		*hdlr = NULL;
584 
585 	NTWDT_DBG(WDT_DBG_ENTRY, ("attach: dip/cmd: 0x%p/%d",
586 	    dip, cmd));
587 
588 	switch (cmd) {
589 	case DDI_ATTACH:
590 		break;
591 
592 	case DDI_RESUME:
593 		return (DDI_SUCCESS);
594 
595 	default:
596 		return (DDI_FAILURE);
597 	}
598 
599 	/* see if app-wdog is supported on our config */
600 	if (ntwdt_chk_wdog_support() != 0)
601 		return (DDI_FAILURE);
602 
603 	/* (unsolicitedly) send SWDT state to ScApp via mailbox */
604 	ntwdt_set_swdt_state();
605 
606 	instance = ddi_get_instance(dip);
607 	ASSERT(instance == 0);
608 
609 	if (ddi_soft_state_zalloc(ntwdt_statep, instance)
610 	    != DDI_SUCCESS) {
611 		return (DDI_FAILURE);
612 	}
613 	ntwdt_ptr = ddi_get_soft_state(ntwdt_statep, instance);
614 	ASSERT(ntwdt_ptr != NULL);
615 
616 	ntwdt_dip = dip;
617 
618 	ntwdt_ptr->ntwdt_dip = dip;
619 	ntwdt_ptr->ntwdt_cycl_id = CYCLIC_NONE;
620 	mutex_init(&ntwdt_ptr->ntwdt_mutex, NULL,
621 	    MUTEX_DRIVER, NULL);
622 
623 	/*
624 	 * Initialize the watchdog structure
625 	 */
626 	ntwdt_ptr->ntwdt_wdog_state =
627 	    kmem_zalloc(sizeof (ntwdt_wdog_t), KM_SLEEP);
628 	wdog_state = ntwdt_ptr->ntwdt_wdog_state;
629 
630 	/*
631 	 * Create an iblock-cookie so that ntwdt_wdog_mutex can be
632 	 * used at User Context and Interrupt Context.
633 	 */
634 	if (ddi_get_soft_iblock_cookie(dip, DDI_SOFTINT_LOW,
635 	    &wdog_state->ntwdt_wdog_mtx_cookie) != DDI_SUCCESS) {
636 		cmn_err(CE_WARN, "init of iblock cookie failed "
637 		    "for ntwdt_wdog_mutex");
638 		goto err1;
639 	} else {
640 		mutex_init(&wdog_state->ntwdt_wdog_mutex, NULL, MUTEX_DRIVER,
641 		    (void *)wdog_state->ntwdt_wdog_mtx_cookie);
642 	}
643 
644 	mutex_init(&wdog_state->ntwdt_event_lock, NULL,
645 	    MUTEX_DRIVER, NULL);
646 
647 	/* Cyclic fires once per second: */
648 	wdog_state->ntwdt_cyclic_interval = NANOSEC;
649 
650 	/* interpret our .conf file. */
651 	(void) ntwdt_read_props(ntwdt_ptr);
652 
653 	/* init the Cyclic that drives the VWDT */
654 	hdlr = &wdog_state->ntwdt_cycl_hdlr;
655 	hdlr->cyh_level = CY_LOCK_LEVEL;
656 	hdlr->cyh_func = ntwdt_cyclic_pat;
657 	hdlr->cyh_arg = (void *)ntwdt_ptr;
658 
659 	/* Register handler for SBBC Mailbox events */
660 	if (ntwdt_add_mbox_handlers(ntwdt_ptr) != DDI_SUCCESS)
661 		goto err2;
662 
663 	/* Softint that will be triggered by Cyclic that drives VWDT */
664 	if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &ntwdt_cyclic_softint_id,
665 	    NULL, NULL, ntwdt_cyclic_softint, (caddr_t)ntwdt_ptr)
666 	    != DDI_SUCCESS) {
667 		cmn_err(CE_WARN, "failed to add cyclic softintr");
668 		goto err3;
669 	}
670 
671 	/* Register callbacks for various system events, e.g. panic */
672 	ntwdt_add_callbacks(ntwdt_ptr);
673 
674 	/*
675 	 * Create Minor Node as last activity.  This prevents
676 	 * application from accessing our implementation until it
677 	 * is initialized.
678 	 */
679 	if (ddi_create_minor_node(dip, NTWDT_MINOR_NODE, S_IFCHR, 0,
680 	    DDI_PSEUDO, NULL) == DDI_FAILURE) {
681 		cmn_err(CE_WARN, "failed to create Minor Node: %s",
682 		    NTWDT_MINOR_NODE);
683 		goto err4;
684 	}
685 
686 	/* Display our driver info in the banner */
687 	ddi_report_dev(dip);
688 
689 	return (DDI_SUCCESS);
690 
691 err4:
692 	ntwdt_remove_callbacks();
693 	ddi_remove_softintr(ntwdt_cyclic_softint_id);
694 err3:
695 	ntwdt_remove_mbox_handlers();
696 err2:
697 	mutex_destroy(&wdog_state->ntwdt_event_lock);
698 	mutex_destroy(&wdog_state->ntwdt_wdog_mutex);
699 err1:
700 	kmem_free(wdog_state, sizeof (ntwdt_wdog_t));
701 	ntwdt_ptr->ntwdt_wdog_state = NULL;
702 
703 	mutex_destroy(&ntwdt_ptr->ntwdt_mutex);
704 	ddi_soft_state_free(ntwdt_statep, instance);
705 
706 	ntwdt_dip = NULL;
707 
708 	return (DDI_FAILURE);
709 }
710 
711 /*
712  * Do static checks to see if the app-wdog feature is supported in
713  * the current configuration.
714  *
715  * If the kernel debugger was booted, then we disallow the app-wdog
716  * feature, as we assume the user will be interested more in
717  * debuggability of system than its ability to support an app-wdog.
718  * (Note that the System Watchdog (SWDT) can still be available).
719  *
720  * If the currently loaded version of ScApp does not understand one
721  * of the IOSRAM mailbox messages that is specific to the app-wdog
722  * protocol, then we disallow use of the app-wdog feature (else
723  * we could have a "split-brain" scenario where Solaris supports
724  * app-wdog but ScApp doesn't).
725  *
726  * Note that there is no *dynamic* checking of whether ScApp supports
727  * the wdog protocol.  Eg, if a new version of ScApp was loaded out
728  * from under Solaris, then once in AWDT mode, Solaris has no way
729  * of knowing that (a possibly older version of) ScApp was loaded.
730  */
731 static int
732 ntwdt_chk_wdog_support()
733 {
734 	int	retval = ENOTSUP;
735 	int	rv;
736 
737 	if ((boothowto & RB_DEBUG) != 0) {
738 		cmn_err(CE_WARN, "kernel debugger was booted; "
739 		    "application watchdog is not available.");
740 		return (retval);
741 	}
742 
743 	/*
744 	 * if ScApp does not support the MBOX_GET cmd, then
745 	 * it does not support the app-wdog feature.  Also,
746 	 * if there is *any* type of SBBC Mailbox error at
747 	 * this point, we will disable the app watchdog
748 	 * feature.
749 	 */
750 	if ((rv = ntwdt_chk_sc_support()) != 0) {
751 		if (rv == EINVAL)
752 			cmn_err(CE_WARN, "ScApp does not support "
753 			    "the application watchdog feature.");
754 		else
755 			cmn_err(CE_WARN, "SBBC mailbox had error;"
756 			    "application watchdog is not available.");
757 		retval = rv;
758 	} else {
759 		ntwdt_watchdog_available = 1;
760 		retval = 0;
761 	}
762 
763 	NTWDT_DBG(WDT_DBG_PROT, ("app-wdog is %savailable",
764 	    (ntwdt_watchdog_available != 0) ? "" : "not "));
765 
766 	return (retval);
767 }
768 
769 /*
770  * Check to see if ScApp supports the app-watchdog feature.
771  *
772  * Do this by sending one of the mailbox commands that is
773  * specific to the app-wdog protocol.  If ScApp does not
774  * return an error code, we will assume it understands it
775  * (as well as the remainder of the app-wdog protocol).
776  *
777  * Notes:
778  *  ntwdt_lomcmd() will return EINVAL if ScApp does not
779  *  understand the message.  The underlying sbbc_mbox_
780  *  utility function returns SG_MBOX_STATUS_ILLEGAL_PARAMETER
781  *  ("illegal ioctl parameter").
782  */
783 static int
784 ntwdt_chk_sc_support()
785 {
786 	lw8_get_wdt_t	get_wdt;
787 
788 	return (ntwdt_lomcmd(LW8_MBOX_WDT_GET, (intptr_t)&get_wdt));
789 }
790 
791 static int
792 ntwdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
793 {
794 	int		instance = ddi_get_instance(dip);
795 	ntwdt_state_t	*ntwdt_ptr = NULL;
796 
797 	NTWDT_DBG(WDT_DBG_ENTRY, ("detach: dip/cmd: 0x%p/%d",
798 	    dip, cmd));
799 
800 	ntwdt_ptr = ddi_get_soft_state(ntwdt_statep, instance);
801 	if (ntwdt_ptr == NULL) {
802 		return (DDI_FAILURE);
803 	}
804 
805 	switch (cmd) {
806 	case DDI_SUSPEND:
807 		return (DDI_SUCCESS);
808 
809 	case DDI_DETACH:
810 		/*
811 		 * release resources in opposite (LIFO) order as
812 		 * were allocated in attach(9f).
813 		 */
814 		ddi_remove_minor_node(dip, NULL);
815 
816 		ntwdt_stop_timer_lock((void *)ntwdt_ptr);
817 
818 		ntwdt_remove_callbacks(ntwdt_ptr);
819 
820 		ddi_remove_softintr(ntwdt_cyclic_softint_id);
821 
822 		ntwdt_remove_mbox_handlers();
823 
824 		mutex_destroy(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_event_lock);
825 		mutex_destroy(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
826 		kmem_free(ntwdt_ptr->ntwdt_wdog_state,
827 		    sizeof (ntwdt_wdog_t));
828 		ntwdt_ptr->ntwdt_wdog_state = NULL;
829 
830 		mutex_destroy(&ntwdt_ptr->ntwdt_mutex);
831 
832 		ddi_soft_state_free(ntwdt_statep, instance);
833 
834 		ntwdt_dip = NULL;
835 		return (DDI_SUCCESS);
836 
837 	default:
838 		return (DDI_FAILURE);
839 	}
840 }
841 
842 /*
843  * Register the SBBC Mailbox handlers.
844  *
845  * Currently, only one handler is used.  It processes the MBOX_EVENT_LW8
846  * Events that are sent by ScApp.  Of the Events that are sent, only
847  * the Event declaring that ScApp is coming up from a reboot
848  * (LW8_EVENT_SC_RESTARTED) is processed.
849  *
850  * sbbc_mbox_reg_intr registers the handler so that it executes at
851  * a DDI_SOFTINT_MED priority.
852  */
853 static int
854 ntwdt_add_mbox_handlers(ntwdt_state_t *ntwdt_ptr)
855 {
856 	int	err;
857 
858 	/*
859 	 * We need two interrupt handlers to handle the SBBC mbox
860 	 * events.  The sbbc_mbox_xxx implementation will
861 	 * trigger our ntwdt_event_data_handler, which itself will
862 	 * trigger our ntwdt_mbox_softint.  As a result, we'll
863 	 * register ntwdt_mbox_softint first, to ensure it cannot
864 	 * be called (until its caller, ntwdt_event_data_handler)
865 	 * is registered.
866 	 */
867 
868 	/*
869 	 * add the softint that will do the real work of handling the
870 	 * LW8_SC_RESTARTED_EVENT sent from ScApp.
871 	 */
872 	if (ddi_add_softintr(ntwdt_ptr->ntwdt_dip, DDI_SOFTINT_LOW,
873 	    &ntwdt_mbox_softint_id, NULL, NULL, ntwdt_mbox_softint,
874 	    (caddr_t)ntwdt_ptr) != DDI_SUCCESS) {
875 		cmn_err(CE_WARN, "Failed to add MBOX_EVENT_LW8 softintr");
876 		return (DDI_FAILURE);
877 	}
878 
879 	/*
880 	 * Register an interrupt handler with the SBBC mailbox utility.
881 	 * This handler will get called on each event of each type of
882 	 * MBOX_EVENT_LW8 events.  However, it will only conditionally
883 	 * trigger the worker-handler (ntwdt_mbox_softintr).
884 	 */
885 	sbbc_msg.msg_buf = (caddr_t)&lw8_event;
886 	sbbc_msg.msg_len = sizeof (lw8_event);
887 
888 	err = sbbc_mbox_reg_intr(MBOX_EVENT_LW8, ntwdt_event_data_handler,
889 	    &sbbc_msg, NULL, &ntwdt_ptr->ntwdt_wdog_state->ntwdt_event_lock);
890 	if (err != 0) {
891 		cmn_err(CE_WARN, "Failed to register SBBC MBOX_EVENT_LW8"
892 		    " handler. err=%d", err);
893 
894 		ddi_remove_softintr(ntwdt_mbox_softint_id);
895 		return (DDI_FAILURE);
896 	}
897 
898 	return (DDI_SUCCESS);
899 }
900 
901 /*
902  * Unregister the SBBC Mailbox handlers that were registered
903  * by ntwdt_add_mbox_handlers.
904  */
905 static int
906 ntwdt_remove_mbox_handlers(void)
907 {
908 	int	rv = DDI_SUCCESS;
909 	int	err;
910 
911 	/*
912 	 * unregister the two handlers that cooperate to handle
913 	 * the LW8_SC_RESTARTED_EVENT.  Note that they are unregistered
914 	 * in LIFO order (as compared to how they were registered).
915 	 */
916 	err = sbbc_mbox_unreg_intr(MBOX_EVENT_LW8, ntwdt_event_data_handler);
917 	if (err != 0) {
918 		cmn_err(CE_WARN, "Failed to unregister sbbc MBOX_EVENT_LW8 "
919 		    "handler. Err=%d", err);
920 		rv = DDI_FAILURE;
921 	}
922 
923 	/* remove the associated softint */
924 	ddi_remove_softintr(ntwdt_mbox_softint_id);
925 
926 	return (rv);
927 }
928 
929 _NOTE(ARGSUSED(0))
930 static int
931 ntwdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd,
932     void *arg, void **result)
933 {
934 	dev_t	dev;
935 	int	instance;
936 	int	error = DDI_SUCCESS;
937 
938 	if (result == NULL)
939 		return (DDI_FAILURE);
940 
941 	switch (infocmd) {
942 	case DDI_INFO_DEVT2DEVINFO:
943 		dev = (dev_t)arg;
944 		if (getminor(dev) == 0)
945 			*result = (void *)ntwdt_dip;
946 		else
947 			error = DDI_FAILURE;
948 		break;
949 
950 	case DDI_INFO_DEVT2INSTANCE:
951 		dev = (dev_t)arg;
952 		instance = getminor(dev);
953 		*result = (void *)(uintptr_t)instance;
954 		break;
955 
956 	default:
957 		error = DDI_FAILURE;
958 	}
959 
960 	return (error);
961 }
962 
963 /*
964  * Open the device this driver manages.
965  *
966  * Ensure the caller is a privileged process, else
967  * a non-privileged user could cause denial-of-service
968  * and/or negatively impact reliability/availability.
969  *
970  * Ensure there is only one concurrent open().
971  */
972 _NOTE(ARGSUSED(1))
973 static int
974 ntwdt_open(dev_t *devp, int flag, int otyp, cred_t *credp)
975 {
976 	int		inst = getminor(*devp);
977 	int		ret = 0;
978 	ntwdt_state_t	*ntwdt_ptr = getstate(inst);
979 
980 	NTWDT_DBG(WDT_DBG_ENTRY, ("open: inst/soft: %d/0x%p",
981 	    inst, ntwdt_ptr));
982 
983 	/* ensure caller is a privileged process */
984 	if (drv_priv(credp) != 0)
985 		return (EPERM);
986 
987 	/*
988 	 * Check for a Deferred Attach scenario.
989 	 * Return ENXIO so DDI framework will call
990 	 * attach() and then retry the open().
991 	 */
992 	if (ntwdt_ptr == NULL)
993 		return (ENXIO);
994 
995 	mutex_enter(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
996 	mutex_enter(&ntwdt_ptr->ntwdt_mutex);
997 	if (ntwdt_ptr->ntwdt_open_flag != 0)
998 		ret = EAGAIN;
999 	else
1000 		ntwdt_ptr->ntwdt_open_flag = 1;
1001 	mutex_exit(&ntwdt_ptr->ntwdt_mutex);
1002 	mutex_exit(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
1003 
1004 	return (ret);
1005 }
1006 
1007 /*
1008  * Close the device this driver manages.
1009  *
1010  * Notes:
1011  *
1012  *  The close() can happen while the AWDT is running !
1013  *  (and nothing is done, eg, to disable the watchdog
1014  *  or to stop updating the system heartbeat).  This
1015  *  is the desired behavior, as this allows for the
1016  *  case of monitoring a Solaris reboot in terms
1017  *  of watchdog expiration.
1018  */
1019 _NOTE(ARGSUSED(1))
1020 static int
1021 ntwdt_close(dev_t dev, int flag, int otyp, cred_t *credp)
1022 {
1023 	int		inst = getminor(dev);
1024 	ntwdt_state_t	*ntwdt_ptr = getstate(inst);
1025 
1026 	NTWDT_DBG(WDT_DBG_ENTRY, ("close: inst/soft: %d/0x%p",
1027 	    inst, ntwdt_ptr));
1028 
1029 	if (ntwdt_ptr == NULL)
1030 		return (ENXIO);
1031 
1032 	mutex_enter(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
1033 	mutex_enter(&ntwdt_ptr->ntwdt_mutex);
1034 	if (ntwdt_ptr->ntwdt_open_flag != 0) {
1035 		ntwdt_ptr->ntwdt_open_flag = 0;
1036 	}
1037 	mutex_exit(&ntwdt_ptr->ntwdt_mutex);
1038 	mutex_exit(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
1039 
1040 	return (0);
1041 }
1042 
1043 _NOTE(ARGSUSED(4))
1044 static int
1045 ntwdt_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
1046     cred_t *credp, int *rvalp)
1047 {
1048 	int		inst = getminor(dev);
1049 	int		retval = 0;
1050 	ntwdt_state_t	*ntwdt_ptr = NULL;
1051 	ntwdt_wdog_t	*wdog_state;
1052 
1053 	if ((ntwdt_ptr = getstate(inst)) == NULL)
1054 		return (ENXIO);
1055 
1056 	/* Only allow ioctl's if Solaris/ScApp support app-wdog */
1057 	if (ntwdt_watchdog_available == 0)
1058 		return (ENXIO);
1059 
1060 	wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1061 
1062 	switch (cmd) {
1063 	case LOMIOCDOGSTATE: {
1064 		/*
1065 		 * Return the state of the AWDT to the application.
1066 		 */
1067 		lom_dogstate_t lom_dogstate;
1068 
1069 		mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1070 		lom_dogstate.reset_enable =
1071 		    wdog_state->ntwdt_reset_enabled;
1072 		lom_dogstate.dog_enable =
1073 		    wdog_state->ntwdt_wdog_enabled;
1074 		lom_dogstate.dog_timeout =
1075 		    wdog_state->ntwdt_wdog_timeout;
1076 		mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1077 
1078 		NTWDT_DBG(WDT_DBG_IOCTL, ("DOGSTATE: wdog/reset/timeout:"
1079 		    " %d/%d/%d", lom_dogstate.dog_enable,
1080 		    lom_dogstate.reset_enable, lom_dogstate.dog_timeout));
1081 
1082 		if (ddi_copyout((caddr_t)&lom_dogstate, (caddr_t)arg,
1083 		    sizeof (lom_dogstate_t), mode) != 0) {
1084 			retval = EFAULT;
1085 		}
1086 		break;
1087 	}
1088 
1089 	case LOMIOCDOGCTL: {
1090 		/*
1091 		 * Allow application to control whether watchdog
1092 		 * is {dis,en}abled and whether Reset is
1093 		 * {dis,en}abled.
1094 		 */
1095 		lom_dogctl_t	lom_dogctl;
1096 
1097 		if (ddi_copyin((caddr_t)arg, (caddr_t)&lom_dogctl,
1098 		    sizeof (lom_dogctl_t), mode) != 0) {
1099 			retval = EFAULT;
1100 			break;
1101 		}
1102 
1103 		NTWDT_DBG(WDT_DBG_IOCTL, ("DOGCTL: wdog/reset:"
1104 		    " %d/%d", lom_dogctl.dog_enable,
1105 		    lom_dogctl.reset_enable));
1106 
1107 		mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1108 
1109 		if (wdog_state->ntwdt_wdog_timeout == 0) {
1110 			/*
1111 			 * then LOMIOCDOGTIME has never been used
1112 			 * to setup a valid timeout.
1113 			 */
1114 			retval = EINVAL;
1115 			goto end;
1116 		}
1117 
1118 		/*
1119 		 * Return error for the non-sensical combination:
1120 		 * "enable Reset" and "disable watchdog".
1121 		 */
1122 		if (lom_dogctl.dog_enable == 0 &&
1123 		    lom_dogctl.reset_enable != 0) {
1124 			retval = EINVAL;
1125 			goto end;
1126 		}
1127 
1128 		/*
1129 		 * Store the user-specified state in our softstate.
1130 		 * Note that our implementation here is stateless.
1131 		 * Eg, we do not disallow an "enable the watchdog"
1132 		 * command when the watchdog is currently enabled.
1133 		 * This is needed (at least in the case) when
1134 		 * the user enters OBP via ScApp/lom.  In that case,
1135 		 * ScApp disables the watchdog, but does not inform
1136 		 * Solaris.  As a result, an ensuing, unfiltered DOGCTL
1137 		 * to enable the watchdog is required.
1138 		 */
1139 		wdog_state->ntwdt_reset_enabled =
1140 		    lom_dogctl.reset_enable;
1141 		wdog_state->ntwdt_wdog_enabled =
1142 		    lom_dogctl.dog_enable;
1143 
1144 		if (wdog_state->ntwdt_wdog_enabled != 0) {
1145 			/*
1146 			 * then user wants to enable watchdog.
1147 			 * Arm the watchdog timer and start the
1148 			 * Cyclic, if it is not running.
1149 			 */
1150 			ntwdt_arm_vwdt(wdog_state);
1151 
1152 			if (wdog_state->ntwdt_timer_running == 0) {
1153 				ntwdt_start_timer(ntwdt_ptr);
1154 			}
1155 		} else {
1156 			/*
1157 			 * user wants to disable the watchdog.
1158 			 * Note that we do not set ntwdt_secs_remaining
1159 			 * to zero; that could cause a false expiration.
1160 			 */
1161 			if (wdog_state->ntwdt_timer_running != 0) {
1162 				ntwdt_stop_timer(ntwdt_ptr);
1163 			}
1164 		}
1165 
1166 		/*
1167 		 * Send a permutation of mailbox commands to
1168 		 * ScApp that describes the current state of the
1169 		 * watchdog timer.  Note that the permutation
1170 		 * depends on whether this is the first
1171 		 * Enabling of the watchdog or not.
1172 		 */
1173 		if (wdog_state->ntwdt_wdog_enabled != 0 &&
1174 		    wdog_state->ntwdt_is_initial_enable == 0) {
1175 
1176 			/* switch from SWDT to AWDT mode */
1177 			ntwdt_swdt_to_awdt(wdog_state);
1178 
1179 			/* Tell ScApp we're in AWDT mode */
1180 			ntwdt_set_cfgvar(LW8_WDT_PROP_MODE,
1181 			    LW8_PROP_MODE_AWDT);
1182 		}
1183 
1184 		/* Inform ScApp of the choices made by the app */
1185 		ntwdt_set_cfgvar(LW8_WDT_PROP_WDT,
1186 		    wdog_state->ntwdt_wdog_enabled);
1187 		ntwdt_set_cfgvar(LW8_WDT_PROP_RECOV,
1188 		    wdog_state->ntwdt_reset_enabled);
1189 
1190 		if (wdog_state->ntwdt_wdog_enabled != 0 &&
1191 		    wdog_state->ntwdt_is_initial_enable == 0) {
1192 			/*
1193 			 * Clear tod_iosram_t.tod_timeout_period,
1194 			 * which is used in SWDT part of state
1195 			 * machine.  (If this field is non-zero,
1196 			 * ScApp assumes that Solaris' SWDT is active).
1197 			 *
1198 			 * Clearing this is useful in case SC reboots
1199 			 * while Solaris is running, as ScApp will read
1200 			 * a zero and not assume SWDT is running.
1201 			 */
1202 			ntwdt_set_hw_timeout(0);
1203 
1204 			/* "the first watchdog-enable has been seen" */
1205 			wdog_state->ntwdt_is_initial_enable = 1;
1206 		}
1207 
1208 		mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1209 		break;
1210 	}
1211 
1212 	case LOMIOCDOGTIME: {
1213 		/*
1214 		 * Allow application to set the period (in seconds)
1215 		 * of the watchdog timeout.
1216 		 */
1217 		uint32_t	lom_dogtime;
1218 
1219 		if (ddi_copyin((caddr_t)arg, (caddr_t)&lom_dogtime,
1220 		    sizeof (uint32_t), mode) != 0) {
1221 			retval = EFAULT;
1222 			break;
1223 		}
1224 
1225 		NTWDT_DBG(WDT_DBG_IOCTL, ("DOGTIME: %u seconds",
1226 		    lom_dogtime));
1227 
1228 		/* Ensure specified timeout is within range. */
1229 		if ((lom_dogtime == 0) ||
1230 		    (lom_dogtime > NTWDT_MAX_TIMEOUT)) {
1231 			retval = EINVAL;
1232 			break;
1233 		}
1234 
1235 		mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1236 
1237 		wdog_state->ntwdt_wdog_timeout = lom_dogtime;
1238 
1239 		/*
1240 		 * If watchdog is currently running, re-arm the
1241 		 * watchdog timeout with the specified value.
1242 		 */
1243 		if (wdog_state->ntwdt_timer_running != 0) {
1244 			ntwdt_arm_vwdt(wdog_state);
1245 		}
1246 
1247 		/* Tell ScApp of the specified timeout */
1248 		ntwdt_set_cfgvar(LW8_WDT_PROP_TO, lom_dogtime);
1249 
1250 		mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1251 		break;
1252 	}
1253 
1254 	case LOMIOCDOGPAT: {
1255 		/*
1256 		 * Allow user to re-arm ("pat") the watchdog.
1257 		 */
1258 		NTWDT_DBG(WDT_DBG_IOCTL, ("DOGPAT"));
1259 
1260 		mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1261 
1262 		/*
1263 		 * If watchdog is not enabled or underlying
1264 		 * Cyclic timer is not running, exit.
1265 		 */
1266 		if (!(wdog_state->ntwdt_wdog_enabled &&
1267 		    wdog_state->ntwdt_timer_running))
1268 			goto end;
1269 
1270 		if (wdog_state->ntwdt_wdog_expired == 0) {
1271 			/* then VWDT has not expired; re-arm it */
1272 			ntwdt_arm_vwdt(wdog_state);
1273 
1274 			NTWDT_DBG(WDT_DBG_VWDT, ("VWDT re-armed:"
1275 			    " %d seconds",
1276 			    wdog_state->ntwdt_secs_remaining));
1277 		}
1278 
1279 		mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1280 		break;
1281 	}
1282 
1283 #ifdef DEBUG
1284 	case NTWDTIOCPANIC: {
1285 		/*
1286 		 * Use in unit/integration testing to test our
1287 		 * panic-handler code.
1288 		 */
1289 		cmn_err(CE_PANIC, "NTWDTIOCPANIC: force a panic");
1290 		break;
1291 	}
1292 
1293 	case NTWDTIOCSTATE: {
1294 		/*
1295 		 * Allow application to read wdog state from the
1296 		 * SC (and *not* the driver's softstate).
1297 		 *
1298 		 * Return state of:
1299 		 *  o recovery-enabled
1300 		 *  o current timeout value
1301 		 */
1302 		ntwdt_data_t	ntwdt_data;
1303 		int		action;
1304 		int		timeout;
1305 		int		ret;
1306 
1307 		mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1308 		ret = ntwdt_get_cfgvar(LW8_WDT_PROP_TO, &timeout);
1309 		ret |= ntwdt_get_cfgvar(LW8_WDT_PROP_RECOV, &action);
1310 		mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1311 
1312 		bzero((caddr_t)&ntwdt_data, sizeof (ntwdt_data));
1313 
1314 		if (ret != NTWDT_SUCCESS) {
1315 			retval = EIO;
1316 			break;
1317 		}
1318 
1319 		NTWDT_DBG(WDT_DBG_IOCTL, ("NTWDTIOCSTATE:"
1320 		    " timeout/action: %d/%d", timeout, action));
1321 
1322 		ntwdt_data.ntwdt_wd1 = (uint32_t)timeout;
1323 		ntwdt_data.ntwdt_wd2 = (uint8_t)action;
1324 
1325 		if (ddi_copyout((caddr_t)&ntwdt_data, (caddr_t)arg,
1326 		    sizeof (ntwdt_data_t), mode) != 0) {
1327 			retval = EFAULT;
1328 		}
1329 		break;
1330 	}
1331 #endif
1332 	default:
1333 		retval = EINVAL;
1334 		break;
1335 	}
1336 
1337 	return (retval);
1338 end:
1339 	mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1340 	return (retval);
1341 }
1342 
1343 /*
1344  * Arm the Virtual Watchdog Timer (VWDT).
1345  *
1346  * Assign the current watchdog timeout (ntwdt_wdog_timeout)
1347  * to the softstate variable representing the watchdog
1348  * timer (ntwdt_secs_remaining).
1349  *
1350  * To ensure (from ntwdt's perspective) that any actual
1351  * timeout expiration is at least as large as the expected
1352  * timeout, conditionally set/clear a bit that will be
1353  * checked in the Cyclic's softint.
1354  *
1355  * If the Cyclic has been started, the goal is to ignore
1356  * the _next_ firing of the Cyclic, as that firing will
1357  * NOT represent a full, one-second period.  If the Cyclic
1358  * has NOT been started yet, then do not ignore the next
1359  * Cyclic's firing, as that's the First One, and it was
1360  * programmed to fire at a specific time (see ntwdt_start_timer).
1361  */
1362 static void
1363 ntwdt_arm_vwdt(ntwdt_wdog_t *wdog_state)
1364 {
1365 	/* arm the watchdog timer (VWDT) */
1366 	wdog_state->ntwdt_secs_remaining =
1367 	    wdog_state->ntwdt_wdog_timeout;
1368 
1369 	if (wdog_state->ntwdt_timer_running != 0)
1370 		NTWDT_FLAG_SET(wdog_state, SKIP_CYCLIC);
1371 	else
1372 		NTWDT_FLAG_CLR(wdog_state, SKIP_CYCLIC);
1373 }
1374 
1375 /*
1376  * Switch from SWDT mode to AWDT mode.
1377  */
1378 _NOTE(ARGSUSED(0))
1379 static void
1380 ntwdt_swdt_to_awdt(ntwdt_wdog_t *wdog_state)
1381 {
1382 	ASSERT(wdog_state->ntwdt_is_initial_enable == 0);
1383 
1384 	/*
1385 	 * Disable SWDT.  If SWDT is currently active,
1386 	 * display a message so user knows that SWDT Mode
1387 	 * has terminated.
1388 	 */
1389 	if (watchdog_enable != 0 ||
1390 	    watchdog_activated != 0)
1391 		cmn_err(CE_NOTE, "Hardware watchdog disabled");
1392 	watchdog_enable = 0;
1393 	watchdog_activated = 0;
1394 
1395 	/* "we are in AWDT mode" */
1396 	ntwdt_watchdog_activated = 1;
1397 	NTWDT_DBG(WDT_DBG_VWDT, ("AWDT is enabled"));
1398 }
1399 
1400 /*
1401  * This is the Cyclic that runs at a multiple of the
1402  * AWDT's watchdog-timeout period.  This Cyclic runs at
1403  * LOCK_LEVEL (eg, CY_LOCK_LEVEL) and will post a
1404  * soft-interrupt in order to complete all processing.
1405  *
1406  * Executing at LOCK_LEVEL gives this function a high
1407  * interrupt priority, while performing its work via
1408  * a soft-interrupt allows for a consistent (eg, MT-safe)
1409  * view of driver softstate between User and Interrupt
1410  * context.
1411  *
1412  * Context:
1413  *  interrupt context: Cyclic framework calls at
1414  *                     CY_LOCK_LEVEL (=> 10)
1415  */
1416 _NOTE(ARGSUSED(0))
1417 static void
1418 ntwdt_cyclic_pat(void *arg)
1419 {
1420 	/* post-down to DDI_SOFTINT_LOW */
1421 	ddi_trigger_softintr(ntwdt_cyclic_softint_id);
1422 }
1423 
1424 /*
1425  * This is the soft-interrupt triggered by the AWDT
1426  * Cyclic.
1427  *
1428  * This softint does all the work re: computing whether
1429  * the VWDT expired.  It grabs ntwdt_wdog_mutex
1430  * so User Context code (eg, the IOCTLs) cannot run,
1431  * and then it tests whether the VWDT expired.  If it
1432  * hasn't, it decrements the VWDT timer by the amount
1433  * of the Cyclic's period.  If the timer has expired,
1434  * it initiates Recovery (based on what user specified
1435  * in LOMIOCDOGCTL).
1436  *
1437  * This function also updates the normal system "heartbeat".
1438  *
1439  * Context:
1440  *  interrupt-context: DDI_SOFTINT_LOW
1441  */
1442 static uint_t
1443 ntwdt_cyclic_softint(char *arg)
1444 {
1445 	ntwdt_state_t	*ntwdt_ptr = (ntwdt_state_t *)arg;
1446 	ntwdt_wdog_t	*wdog_state;
1447 
1448 	wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1449 
1450 	mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1451 
1452 	if ((wdog_state->ntwdt_wdog_flags &
1453 	    NTWDT_FLAG_SKIP_CYCLIC) != 0) {
1454 		/*
1455 		 * then skip all processing by this interrupt.
1456 		 * (see ntwdt_arm_vwdt()).
1457 		 */
1458 		wdog_state->ntwdt_wdog_flags &= ~NTWDT_FLAG_SKIP_CYCLIC;
1459 		goto end;
1460 	}
1461 
1462 	if (wdog_state->ntwdt_timer_running == 0 ||
1463 	    (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE) ||
1464 	    (wdog_state->ntwdt_wdog_enabled == 0))
1465 		goto end;
1466 
1467 	/* re-arm ("pat") the hardware watchdog */
1468 	ntwdt_pat_hw_watchdog();
1469 
1470 	/* Decrement the VWDT and see if it has expired. */
1471 	if (--wdog_state->ntwdt_secs_remaining == 0) {
1472 
1473 		cmn_err(CE_WARN, "application-watchdog expired");
1474 
1475 		wdog_state->ntwdt_wdog_expired = 1;
1476 
1477 		if (wdog_state->ntwdt_reset_enabled != 0) {
1478 			/*
1479 			 * Update ScApp so that the new wdog-timeout
1480 			 * value is as specified in the
1481 			 * NTWDT_BOOT_TIMEOUT_PROP driver Property.
1482 			 * This timeout is assumedly larger than the
1483 			 * actual Solaris reboot time.  This will allow
1484 			 * our forced-reboot to not cause an unplanned
1485 			 * (series of) watchdog expiration(s).
1486 			 */
1487 			if (ntwdt_disable_timeout_action == 0)
1488 				ntwdt_reprogram_wd(ntwdt_ptr);
1489 
1490 			mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1491 
1492 			NTWDT_DBG(WDT_DBG_VWDT, ("recovery being done"));
1493 
1494 			ntwdt_enforce_timeout();
1495 		} else {
1496 			NTWDT_DBG(WDT_DBG_VWDT, ("no recovery being done"));
1497 
1498 			wdog_state->ntwdt_wdog_enabled = 0;
1499 
1500 			/*
1501 			 * Tell ScApp to disable wdog; this prevents
1502 			 * the "2x-timeout" artifact.  Eg, Solaris
1503 			 * times-out at t(x) and ScApp times-out at t(2x),
1504 			 * where (x==ntwdt_wdog_timeout).
1505 			 */
1506 			(void) ntwdt_set_cfgvar(LW8_WDT_PROP_WDT,
1507 			    wdog_state->ntwdt_wdog_enabled);
1508 		}
1509 
1510 		/* Schedule Callout to stop this Cyclic */
1511 		timeout(ntwdt_stop_timer_lock, ntwdt_ptr, 0);
1512 
1513 	} else {
1514 		_NOTE(EMPTY)
1515 		NTWDT_DBG(WDT_DBG_VWDT, ("time remaining in VWDT: %d"
1516 		    " seconds", wdog_state->ntwdt_secs_remaining));
1517 	}
1518 end:
1519 	mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1520 
1521 	return (DDI_INTR_CLAIMED);
1522 }
1523 
1524 /*
1525  * Program the AWDT watchdog-timeout value to that specified
1526  * in the NTWDT_BOOT_TIMEOUT_PROP driver Property.  However,
1527  * only do this if the AWDT is in the correct state.
1528  *
1529  * Caller's Context:
1530  *  o interrupt context: (from software-interrupt)
1531  *  o during a panic
1532  */
1533 static void
1534 ntwdt_reprogram_wd(ntwdt_state_t *ntwdt_ptr)
1535 {
1536 	ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1537 
1538 	/*
1539 	 * Program the AWDT watchdog-timeout value only if the
1540 	 * watchdog is enabled, the user wants to do recovery,
1541 	 * ("reset is enabled") and the AWDT timer is currently
1542 	 * running.
1543 	 */
1544 	if (wdog_state->ntwdt_wdog_enabled != 0 &&
1545 	    wdog_state->ntwdt_reset_enabled != 0 &&
1546 	    wdog_state->ntwdt_timer_running != 0) {
1547 		if (ddi_in_panic() != 0)
1548 			ntwdt_set_cfgvar_noreply(LW8_WDT_PROP_TO,
1549 			    wdog_state->ntwdt_boot_timeout);
1550 		else
1551 			(void) ntwdt_set_cfgvar(LW8_WDT_PROP_TO,
1552 			    wdog_state->ntwdt_boot_timeout);
1553 	}
1554 }
1555 
1556 /*
1557  * This is the callback that was registered to run during a panic.
1558  * It will set the watchdog-timeout value to be that as specified
1559  * in the NTWDT_BOOT_TIMEOUT_PROP driver Property.
1560  *
1561  * Note that unless this Property's value specifies a timeout
1562  * that's larger than the actual reboot latency, ScApp will
1563  * experience a timeout and initiate Recovery.
1564  */
1565 _NOTE(ARGSUSED(1))
1566 static boolean_t
1567 ntwdt_panic_cb(void *arg, int code)
1568 {
1569 	ASSERT(ddi_in_panic() != 0);
1570 
1571 	ntwdt_reprogram_wd((ntwdt_state_t *)arg);
1572 
1573 	return (B_TRUE);
1574 }
1575 
1576 /*
1577  * Initialize the Cyclic that is used to monitor the VWDT.
1578  */
1579 static void
1580 ntwdt_start_timer(ntwdt_state_t *ntwdt_ptr)
1581 {
1582 	ntwdt_wdog_t	*wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1583 	cyc_handler_t	*hdlr = &wdog_state->ntwdt_cycl_hdlr;
1584 	cyc_time_t	*when = &wdog_state->ntwdt_cycl_time;
1585 
1586 	/*
1587 	 * Init Cyclic so its first expiry occurs wdog-timeout
1588 	 * seconds from the current, absolute time.
1589 	 */
1590 	when->cyt_interval = wdog_state->ntwdt_cyclic_interval;
1591 	when->cyt_when = gethrtime() + when->cyt_interval;
1592 
1593 	wdog_state->ntwdt_wdog_expired = 0;
1594 	wdog_state->ntwdt_timer_running = 1;
1595 
1596 	mutex_enter(&cpu_lock);
1597 	if (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE)
1598 		ntwdt_ptr->ntwdt_cycl_id = cyclic_add(hdlr, when);
1599 	mutex_exit(&cpu_lock);
1600 
1601 	NTWDT_DBG(WDT_DBG_VWDT, ("AWDT's cyclic-driven timer is started"));
1602 }
1603 
1604 /*
1605  * Stop the cyclic that is used to monitor the VWDT (and
1606  * was Started by ntwdt_start_timer).
1607  *
1608  * Context: per the Cyclic API, cyclic_remove cannot be called
1609  *          from interrupt-context.  Note that when this is
1610  *	    called via a Callout, it's called from base level.
1611  */
1612 static void
1613 ntwdt_stop_timer(void *arg)
1614 {
1615 	ntwdt_state_t	*ntwdt_ptr = (void *)arg;
1616 	ntwdt_wdog_t	*wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1617 
1618 	mutex_enter(&cpu_lock);
1619 	if (ntwdt_ptr->ntwdt_cycl_id != CYCLIC_NONE)
1620 		cyclic_remove(ntwdt_ptr->ntwdt_cycl_id);
1621 	mutex_exit(&cpu_lock);
1622 
1623 	wdog_state->ntwdt_timer_running = 0;
1624 	ntwdt_ptr->ntwdt_cycl_id = CYCLIC_NONE;
1625 
1626 	NTWDT_DBG(WDT_DBG_VWDT, ("AWDT's cyclic-driven timer is stopped"));
1627 }
1628 
1629 /*
1630  * Stop the cyclic that is used to monitor the VWDT (and
1631  * do it in a thread-safe manner).
1632  *
1633  * This is a wrapper function for the core function,
1634  * ntwdt_stop_timer.  Both functions are useful, as some
1635  * callers will already have the appropriate mutex locked, and
1636  * other callers will not.
1637  */
1638 static void
1639 ntwdt_stop_timer_lock(void *arg)
1640 {
1641 	ntwdt_state_t	*ntwdt_ptr = (void *)arg;
1642 	ntwdt_wdog_t	*wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1643 
1644 	mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1645 	ntwdt_stop_timer(arg);
1646 	mutex_exit(&wdog_state->ntwdt_wdog_mutex);
1647 }
1648 
1649 /*
1650  * Add callbacks needed to react to major system state transitions.
1651  */
1652 static void
1653 ntwdt_add_callbacks(ntwdt_state_t *ntwdt_ptr)
1654 {
1655 	/* register a callback that's called during a panic */
1656 	ntwdt_callback_ids.ntwdt_panic_cb = callb_add(ntwdt_panic_cb,
1657 	    (void *)ntwdt_ptr, CB_CL_PANIC, "ntwdt_panic_cb");
1658 }
1659 
1660 /*
1661  * Remove callbacks added by ntwdt_add_callbacks.
1662  */
1663 static void
1664 ntwdt_remove_callbacks()
1665 {
1666 	callb_delete(ntwdt_callback_ids.ntwdt_panic_cb);
1667 }
1668 
1669 /*
1670  * Initiate a Reset (as a result of the VWDT timeout expiring).
1671  */
1672 static void
1673 ntwdt_enforce_timeout()
1674 {
1675 	if (ntwdt_disable_timeout_action != 0) {
1676 		cmn_err(CE_NOTE, "OS timeout expired, taking no action");
1677 		return;
1678 	}
1679 
1680 	NTWDT_DBG(WDT_DBG_VWDT, ("VWDT expired; do a crashdump"));
1681 
1682 	(void) kadmin(A_DUMP, AD_BOOT, NULL, kcred);
1683 	cmn_err(CE_PANIC, "kadmin(A_DUMP, AD_BOOT) failed");
1684 	_NOTE(NOTREACHED)
1685 }
1686 
1687 /*
1688  * Interpret the Properties from driver's config file.
1689  */
1690 static int
1691 ntwdt_read_props(ntwdt_state_t *ntwdt_ptr)
1692 {
1693 	ntwdt_wdog_t	*wdog_state;
1694 	int		boot_timeout;
1695 
1696 	wdog_state = ntwdt_ptr->ntwdt_wdog_state;
1697 
1698 	/*
1699 	 * interpret Property that specifies how long
1700 	 * the watchdog-timeout should be set to when
1701 	 * Solaris panics.  Assumption is that this value
1702 	 * is larger than the amount of time it takes
1703 	 * to reboot and write crashdump.  If not,
1704 	 * ScApp could induce a reset, due to an expired
1705 	 * watchdog-timeout.
1706 	 */
1707 	wdog_state->ntwdt_boot_timeout =
1708 	    NTWDT_DEFAULT_BOOT_TIMEOUT;
1709 
1710 	boot_timeout = ddi_prop_get_int(DDI_DEV_T_ANY,
1711 	    ntwdt_ptr->ntwdt_dip, DDI_PROP_DONTPASS,
1712 	    NTWDT_BOOT_TIMEOUT_PROP, -1);
1713 
1714 	if (boot_timeout != -1 && boot_timeout > 0 &&
1715 	    boot_timeout <= NTWDT_MAX_TIMEOUT) {
1716 		wdog_state->ntwdt_boot_timeout =
1717 		    boot_timeout;
1718 	} else {
1719 		_NOTE(EMPTY)
1720 		NTWDT_DBG(WDT_DBG_ENTRY, (NTWDT_BOOT_TIMEOUT_PROP
1721 		    ": using default of %d seconds.",
1722 		    wdog_state->ntwdt_boot_timeout));
1723 	}
1724 
1725 	return (DDI_SUCCESS);
1726 }
1727 
1728 /*
1729  * Write state of SWDT to ScApp.
1730  *
1731  * Currently, this function is only called on attach()
1732  * of our driver.
1733  *
1734  * Note that we do not need to call this function, eg,
1735  * in response to a solicitation from ScApp (eg,
1736  * the LW8_SC_RESTARTED_EVENT).
1737  *
1738  * Context:
1739  *  called in Kernel Context
1740  */
1741 static int
1742 ntwdt_set_swdt_state()
1743 {
1744 	/*
1745 	 * note that ScApp only needs this one
1746 	 * variable when system is in SWDT mode.
1747 	 */
1748 	ntwdt_set_cfgvar(LW8_WDT_PROP_MODE,
1749 	    LW8_PROP_MODE_SWDT);
1750 
1751 	return (0);
1752 }
1753 
1754 /*
1755  * Write all AWDT state to ScApp via the SBBC mailbox
1756  * in IOSRAM.  Note that the permutation of Writes
1757  * is as specified in the design spec.
1758  *
1759  * Notes: caller must perform synchronization so that
1760  *        this series of Writes is consistent as viewed
1761  *        by ScApp (eg, there is no LW8_WDT_xxx mailbox
1762  *        command that contains "all Properties"; each
1763  *        Property must be written individually).
1764  */
1765 static int
1766 ntwdt_set_awdt_state(ntwdt_wdog_t *rstatep)
1767 {
1768 	/* ScApp expects values in this order: */
1769 	ntwdt_set_cfgvar(LW8_WDT_PROP_MODE,
1770 	    ntwdt_watchdog_activated != 0);
1771 	ntwdt_set_cfgvar(LW8_WDT_PROP_TO,
1772 	    rstatep->ntwdt_wdog_timeout);
1773 	ntwdt_set_cfgvar(LW8_WDT_PROP_RECOV,
1774 	    rstatep->ntwdt_reset_enabled);
1775 	ntwdt_set_cfgvar(LW8_WDT_PROP_WDT,
1776 	    rstatep->ntwdt_wdog_enabled);
1777 
1778 	return (NTWDT_SUCCESS);
1779 }
1780 
1781 /*
1782  * Write a specified WDT Property (and Value) to ScApp.
1783  *
1784  * <Property, Value> is passed in the LW8_MBOX_WDT_SET
1785  * (SBBC) mailbox message.  The SBBC mailbox resides in
1786  * IOSRAM.
1787  *
1788  * Note that this function is responsible for ensuring that
1789  * a driver-specific representation of a mailbox <Value> is
1790  * mapped into the representation that is expected by ScApp
1791  * (eg, see LW8_WDT_PROP_RECOV).
1792  */
1793 static int
1794 ntwdt_set_cfgvar(int var, int val)
1795 {
1796 	int 		rv;
1797 	int 		mbox_val;
1798 	lw8_set_wdt_t	set_wdt;
1799 
1800 	switch (var) {
1801 	case LW8_WDT_PROP_RECOV:
1802 #ifdef DEBUG
1803 		NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'recovery-enabled':"
1804 		    " %s (%d)", (val != 0) ? "enabled" : "disabled", val));
1805 #endif
1806 		mbox_val = (val != 0) ? LW8_PROP_RECOV_ENABLED :
1807 		    LW8_PROP_RECOV_DISABLED;
1808 		break;
1809 
1810 	case LW8_WDT_PROP_WDT:
1811 #ifdef DEBUG
1812 		NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-enabled':"
1813 		    " %s (%d)", (val != 0) ? "enabled" : "disabled", val));
1814 #endif
1815 		mbox_val = (val != 0) ? LW8_PROP_WDT_ENABLED :
1816 		    LW8_PROP_WDT_DISABLED;
1817 		break;
1818 
1819 	case LW8_WDT_PROP_TO:
1820 #ifdef DEBUG
1821 		NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-timeout':"
1822 		    " %d seconds", val));
1823 #endif
1824 		mbox_val = val;
1825 		break;
1826 
1827 	case LW8_WDT_PROP_MODE:
1828 #ifdef DEBUG
1829 		NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-mode':"
1830 		    " %s (%d)", (val != LW8_PROP_MODE_SWDT) ?
1831 		    "AWDT" : "SWDT", val));
1832 #endif
1833 		mbox_val = val;
1834 		break;
1835 
1836 	default:
1837 		ASSERT(0);
1838 		_NOTE(NOTREACHED)
1839 	}
1840 
1841 	set_wdt.property_id = var;
1842 	set_wdt.value = mbox_val;
1843 
1844 	rv = ntwdt_lomcmd(LW8_MBOX_WDT_SET, (intptr_t)&set_wdt);
1845 	if (rv != 0) {
1846 		_NOTE(EMPTY)
1847 		NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of prop/val %d/%d "
1848 		    "failed: %d", var, mbox_val, rv));
1849 	}
1850 
1851 	return (rv);
1852 }
1853 
1854 static void
1855 ntwdt_set_cfgvar_noreply(int var, int val)
1856 {
1857 	ntwdt_set_cfgvar(var, val);
1858 }
1859 
1860 #ifdef DEBUG
1861 /*
1862  * Read a specified WDT Property from ScApp.
1863  *
1864  * <Property> is passed in the Request of the LW8_MBOX_WDT_GET
1865  * (SBBC) mailbox message, and the Property's <Value>
1866  * is returned in the message's Response.  The SBBC mailbox
1867  * resides in IOSRAM.
1868  */
1869 static int
1870 ntwdt_get_cfgvar(int var, int *val)
1871 {
1872 	lw8_get_wdt_t	get_wdt;
1873 	int		rv;
1874 
1875 	rv = ntwdt_lomcmd(LW8_MBOX_WDT_GET, (intptr_t)&get_wdt);
1876 	if (rv != 0) {
1877 		_NOTE(EMPTY)
1878 		NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET failed: %d", rv));
1879 	} else {
1880 		switch (var) {
1881 		case LW8_WDT_PROP_RECOV:
1882 			*val = (uint8_t)get_wdt.recovery_enabled;
1883 			NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'reset-enabled':"
1884 			    " %s (%d)", (*val != 0) ? "enabled" : "disabled",
1885 			    *val));
1886 			break;
1887 
1888 		case LW8_WDT_PROP_WDT:
1889 			*val = (uint8_t)get_wdt.watchdog_enabled;
1890 			NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'wdog-enabled':"
1891 			    " %s (%d)", (*val != 0) ? "enabled" : "disabled",
1892 			    *val));
1893 			break;
1894 
1895 		case LW8_WDT_PROP_TO:
1896 			*val = (uint8_t)get_wdt.timeout;
1897 			NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'wdog-timeout':"
1898 			    " %d seconds", *val));
1899 			break;
1900 
1901 		default:
1902 			ASSERT(0);
1903 			_NOTE(NOTREACHED)
1904 		}
1905 	}
1906 
1907 	return (rv);
1908 }
1909 #endif
1910 
1911 /*
1912  * Update the real system "heartbeat", which resides in IOSRAM.
1913  * This "heartbeat" is normally used in SWDT Mode, but when
1914  * in AWDT Mode, ScApp also uses its value to determine if Solaris
1915  * is up-and-running.
1916  */
1917 static void
1918 ntwdt_pat_hw_watchdog()
1919 {
1920 	tod_iosram_t	tod_buf;
1921 	static uint32_t	i_am_alive = 0;
1922 #ifdef DEBUG
1923 	if (ntwdt_stop_heart != 0)
1924 		return;
1925 #endif
1926 	/* Update the system heartbeat */
1927 	if (i_am_alive == UINT32_MAX)
1928 		i_am_alive = 0;
1929 	else
1930 		i_am_alive++;
1931 
1932 	NTWDT_DBG(WDT_DBG_HEART, ("update heartbeat: %d",
1933 	    i_am_alive));
1934 
1935 	if (iosram_write(SBBC_TOD_KEY, OFFSET(tod_buf, tod_i_am_alive),
1936 			(char *)&i_am_alive, sizeof (uint32_t))) {
1937 		cmn_err(CE_WARN, "ntwdt_pat_hw_watchdog(): "
1938 		    "write heartbeat failed");
1939 	}
1940 }
1941 
1942 /*
1943  * Write the specified value to the system's normal (IOSRAM)
1944  * location that's used to specify Solaris' watchdog-timeout
1945  * on Serengeti platforms.
1946  *
1947  * In SWDT Mode, this location can hold values [0,n).
1948  * In AWDT Mode, this location must have value 0 (else
1949  * after a ScApp-reboot, ScApp could mistakenly interpret
1950  * that the system is in SWDT Mode).
1951  */
1952 static int
1953 ntwdt_set_hw_timeout(uint32_t period)
1954 {
1955 	tod_iosram_t	tod_buf;
1956 	int		rv;
1957 
1958 	rv = iosram_write(SBBC_TOD_KEY, OFFSET(tod_buf, tod_timeout_period),
1959 	    (char *)&period, sizeof (uint32_t));
1960 	if (rv != 0)
1961 		cmn_err(CE_WARN, "write of %d for TOD timeout "
1962 		    "period failed: %d", period, rv);
1963 
1964 	return (rv);
1965 }
1966 
1967 /*
1968  * Soft-interrupt handler that is triggered when ScApp wants
1969  * to know the current state of the app-wdog.
1970  *
1971  * Grab ntwdt_wdog_mutex so that we synchronize with any
1972  * concurrent User Context and Interrupt Context activity.  Call
1973  * a function that writes a permutation of the watchdog state
1974  * to the SC, then release the mutex.
1975  *
1976  * We grab the mutex not only so that each variable is consistent
1977  * but also so that the *permutation* of variables is consistent.
1978  * I.e., any set of one or more variables (that we write to SC
1979  * using multiple mailbox commands) will truly be seen as a
1980  * consistent snapshot.  Note that if our protocol had a MBOX_SET
1981  * command that allowed writing all watchdog state in one
1982  * command, then the lock-hold latency would be greatly reduced.
1983  * To our advantage, this softint normally executes very
1984  * infrequently.
1985  *
1986  * Context:
1987  *  called at Interrupt Context (DDI_SOFTINT_LOW)
1988  */
1989 static uint_t
1990 ntwdt_mbox_softint(char *arg)
1991 {
1992 	ntwdt_wdog_t	*wdog_state;
1993 
1994 	wdog_state = ((ntwdt_state_t *)arg)->ntwdt_wdog_state;
1995 
1996 	ASSERT(wdog_state != NULL);
1997 
1998 	mutex_enter(&wdog_state->ntwdt_wdog_mutex);
1999 
2000 	/* tell ScApp state of AWDT */
2001 	ntwdt_set_awdt_state(wdog_state);
2002 
2003 	mutex_exit(&wdog_state->ntwdt_wdog_mutex);
2004 
2005 	return (DDI_INTR_CLAIMED);
2006 }
2007 
2008 /*
2009  * Handle MBOX_EVENT_LW8 Events that are sent from ScApp.
2010  *
2011  * The only (sub-)type of Event we handle is the
2012  * LW8_EVENT_SC_RESTARTED Event.  We handle this by triggering
2013  * a soft-interrupt only if we are in AWDT mode.
2014  *
2015  * ScApp sends this Event when it wants to learn the current
2016  * state of the AWDT variables.  Design-wise, this is used to
2017  * handle the case where the SC reboots while the system is in
2018  * AWDT mode (if the SC reboots in SWDT mode, then ScApp
2019  * already knows all necessary info and therefore won't send
2020  * this Event).
2021  *
2022  * Context:
2023  *  function is called in Interrupt Context (at DDI_SOFTINT_MED)
2024  *  and we conditionally trigger a softint that will run at
2025  *  DDI_SOFTINT_LOW.  Note that function executes at
2026  *  DDI_SOFTINT_MED due to how this handler was registered by
2027  *  the implementation of sbbc_mbox_reg_intr().
2028  *
2029  * Notes:
2030  *  Currently, the LW8_EVENT_SC_RESTARTED Event is only sent
2031  *  by SC when in AWDT mode.
2032  */
2033 static uint_t
2034 ntwdt_event_data_handler(char *arg)
2035 {
2036 	lw8_event_t	*payload;
2037 	sbbc_msg_t	*msg;
2038 
2039 	if (arg == NULL) {
2040 		return (DDI_INTR_CLAIMED);
2041 	}
2042 
2043 	msg = (sbbc_msg_t *)arg;
2044 	if (msg->msg_buf == NULL) {
2045 		return (DDI_INTR_CLAIMED);
2046 	}
2047 
2048 	payload = (lw8_event_t *)msg->msg_buf;
2049 
2050 	switch (payload->event_type) {
2051 	case LW8_EVENT_SC_RESTARTED:
2052 		/*
2053 		 * then SC probably was rebooted, and it therefore
2054 		 * needs to know what the current state of AWDT is.
2055 		 */
2056 		NTWDT_DBG(WDT_DBG_EVENT, ("LW8_EVENT_SC_RESTARTED "
2057 		    "received in %s mode",
2058 		    (ntwdt_watchdog_activated != 0) ? "AWDT" : "SWDT"));
2059 
2060 		if (ntwdt_watchdog_activated != 0) {
2061 			/* then system is in AWDT mode */
2062 			ddi_trigger_softintr(ntwdt_mbox_softint_id);
2063 		}
2064 		break;
2065 
2066 	default:
2067 		NTWDT_DBG(WDT_DBG_EVENT,
2068 		    ("MBOX_EVENT_LW8: %d", payload->event_type));
2069 		break;
2070 	}
2071 
2072 	return (DDI_INTR_CLAIMED);
2073 }
2074 
2075 /*
2076  * Send an SBBC Mailbox command to ScApp.
2077  *
2078  * Use the sbbc_mbox_request_response utility function to
2079  * send the Request and receive the optional Response.
2080  *
2081  * Context:
2082  *  can be called from Interrupt Context or User Context.
2083  */
2084 static int
2085 ntwdt_lomcmd(int cmd, intptr_t arg)
2086 {
2087 	sbbc_msg_t	request;
2088 	sbbc_msg_t	*reqp;
2089 	sbbc_msg_t	response;
2090 	sbbc_msg_t	*resp;
2091 	int		rv = 0;
2092 
2093 	reqp = &request;
2094 	bzero((caddr_t)&request, sizeof (request));
2095 	reqp->msg_type.type = LW8_MBOX;
2096 	reqp->msg_type.sub_type = (uint16_t)cmd;
2097 
2098 	resp = &response;
2099 	bzero((caddr_t)&response, sizeof (response));
2100 	resp->msg_type.type = LW8_MBOX;
2101 	resp->msg_type.sub_type = (uint16_t)cmd;
2102 
2103 	switch (cmd) {
2104 	case LW8_MBOX_WDT_GET:
2105 		reqp->msg_len = 0;
2106 		reqp->msg_buf = (caddr_t)NULL;
2107 		resp->msg_len = sizeof (lw8_get_wdt_t);
2108 		resp->msg_buf = (caddr_t)arg;
2109 		break;
2110 
2111 	case LW8_MBOX_WDT_SET:
2112 		reqp->msg_len = sizeof (lw8_set_wdt_t);
2113 		reqp->msg_buf = (caddr_t)arg;
2114 		resp->msg_len = 0;
2115 		resp->msg_buf = (caddr_t)NULL;
2116 		break;
2117 
2118 	default:
2119 		return (EINVAL);
2120 	}
2121 
2122 	rv = sbbc_mbox_request_response(reqp, resp,
2123 		LW8_DEFAULT_MAX_MBOX_WAIT_TIME);
2124 
2125 	if ((rv) || (resp->msg_status != SG_MBOX_STATUS_SUCCESS)) {
2126 
2127 		NTWDT_NDBG(WDT_DBG_PROT, ("SBBC mailbox error:"
2128 		    " (rv/msg_status)=(%d/%d)", rv, resp->msg_status));
2129 
2130 		/* errors from sgsbbc */
2131 		if (resp->msg_status > 0) {
2132 			return (resp->msg_status);
2133 		}
2134 
2135 		/* errors from ScApp */
2136 		switch (resp->msg_status) {
2137 		case SG_MBOX_STATUS_ILLEGAL_PARAMETER:
2138 			/* illegal ioctl parameter */
2139 			return (EINVAL);
2140 
2141 		default:
2142 			return (EIO);
2143 		}
2144 	}
2145 	return (0);
2146 }
2147