xref: /titanic_44/usr/src/uts/sun4v/os/wdt.c (revision 6528affb110ab8cf8b4464874b4a07f3f937475d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/hsvc.h>
30 #include <sys/wdt.h>
31 #include <sys/cmn_err.h>
32 #include <sys/cyclic.h>
33 #include <sys/kmem.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/hypervisor_api.h>
37 #include <sys/mach_descrip.h>
38 #include <sys/mdesc.h>
39 
40 #define	WDT_ON			1
41 #define	WDT_OFF			0
42 
43 /*
44  * MILLISEC defines the number of milliseconds in a second.
45  */
46 #define	WDT_DEFAULT_RESOLUTION	(1 * MILLISEC)	/* Default resolution = 1s */
47 #define	WDT_MIN_TIMEOUT		(1 * MILLISEC)	/* Minimum timeout = 1s */
48 #define	WDT_REGULAR_TIMEOUT	(10 * MILLISEC)	/* Default timeout = 10s */
49 #define	WDT_LONG_TIMEOUT	(60 * MILLISEC)	/* Long timeout = 60s */
50 
51 #define	WDT_MIN_COREAPI_MAJOR	1
52 #define	WDT_MIN_COREAPI_MINOR	1
53 
54 static void config_watchdog(uint64_t, int);
55 static void watchdog_cyclic_init(hrtime_t);
56 
57 /*
58  * Flag used to pat/suspend/resume the watchdog timer.
59  */
60 int watchdog_activated = WDT_OFF;
61 
62 /*
63  * Tuneable to control watchdog functionality. Watchdog can be
64  * disabled via /etc/system.
65  */
66 int watchdog_enabled = 1;
67 
68 /*
69  * The following tuneable can be set via /etc/system to control
70  * watchdog pat frequency, which is set to approximately 44% of
71  * the timeout value.
72  */
73 static uint64_t watchdog_timeout = WDT_REGULAR_TIMEOUT;
74 
75 static uint64_t watchdog_long_timeout = WDT_LONG_TIMEOUT;
76 static uint64_t watchdog_resolution = WDT_DEFAULT_RESOLUTION;
77 
78 void
79 watchdog_init(void)
80 {
81 	int num_nodes;
82 	int nplat;
83 	md_t *mdp;
84 	mde_cookie_t *listp = NULL;
85 	int listsz;
86 	uint64_t major;
87 	uint64_t minor;
88 	uint64_t watchdog_max_timeout;
89 	hrtime_t cyclic_interval;
90 
91 	if (!watchdog_enabled) {
92 		return;
93 	}
94 
95 	if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0 ||
96 		major != WDT_MIN_COREAPI_MAJOR ||
97 		minor < WDT_MIN_COREAPI_MINOR) {
98 		cmn_err(CE_NOTE, "Disabling watchdog as watchdog services are "
99 		    "not available\n");
100 		watchdog_enabled = 0;
101 		return;
102 	}
103 
104 	/*
105 	 * Get the watchdog-max-timeout and watchdog-resolution MD properties.
106 	 */
107 	if ((mdp = md_get_handle()) == NULL) {
108 		cmn_err(CE_WARN, "Unable to initialize machine description, "
109 		    "watchdog is disabled.");
110 		watchdog_enabled = 0;
111 		return;
112 	}
113 
114 	num_nodes = md_node_count(mdp);
115 	ASSERT(num_nodes > 0);
116 
117 	listsz = num_nodes * sizeof (mde_cookie_t);
118 	listp = kmem_zalloc(listsz, KM_SLEEP);
119 
120 	nplat = md_scan_dag(mdp, md_root_node(mdp),
121 	    md_find_name(mdp, "platform"), md_find_name(mdp, "fwd"), listp);
122 
123 	ASSERT(nplat == 1);
124 
125 	if (md_get_prop_val(mdp, listp[0], "watchdog-max-timeout",
126 	    &watchdog_max_timeout) || watchdog_max_timeout < WDT_MIN_TIMEOUT) {
127 		cmn_err(CE_WARN, "Invalid watchdog-max-timeout, watchdog "
128 		    "is disabled.");
129 		watchdog_enabled = 0;
130 		kmem_free(listp, listsz);
131 		(void) md_fini_handle(mdp);
132 		return;
133 	}
134 
135 	/*
136 	 * Make sure that watchdog timeout value is within limits.
137 	 */
138 	if (watchdog_timeout < WDT_MIN_TIMEOUT)
139 		watchdog_timeout = WDT_MIN_TIMEOUT;
140 	else if (watchdog_timeout > WDT_LONG_TIMEOUT)
141 		watchdog_timeout = WDT_LONG_TIMEOUT;
142 
143 	if (watchdog_timeout > watchdog_max_timeout)
144 		watchdog_timeout = watchdog_max_timeout;
145 
146 	if (watchdog_long_timeout > watchdog_max_timeout)
147 		watchdog_long_timeout = watchdog_max_timeout;
148 
149 	if (md_get_prop_val(mdp, listp[0], "watchdog-resolution",
150 	    &watchdog_resolution)) {
151 		cmn_err(CE_WARN, "Cannot read watchdog-resolution, watchdog "
152 		    "is disabled.");
153 		watchdog_enabled = 0;
154 		kmem_free(listp, listsz);
155 		(void) md_fini_handle(mdp);
156 		return;
157 	}
158 
159 	if (watchdog_resolution == 0 ||
160 	    watchdog_resolution > WDT_DEFAULT_RESOLUTION)
161 		watchdog_resolution = WDT_DEFAULT_RESOLUTION;
162 
163 	kmem_free(listp, listsz);
164 	(void) md_fini_handle(mdp);
165 
166 	/*
167 	 * round the timeout to the nearest smaller value.
168 	 */
169 	watchdog_long_timeout -=
170 	    watchdog_long_timeout % watchdog_resolution;
171 	watchdog_timeout -=
172 	    watchdog_timeout % watchdog_resolution;
173 
174 	config_watchdog(watchdog_timeout, WDT_ON);
175 
176 	/*
177 	 * Cyclic need to be fired twice the frequency of regular
178 	 * watchdog timeout. Pedantic here and setting cyclic
179 	 * frequency to approximately 44% of watchdog_timeout.
180 	 */
181 	cyclic_interval = (watchdog_timeout >> 1) - (watchdog_timeout >> 4);
182 	/*
183 	 * Note that regular timeout interval is in millisecond,
184 	 * therefore to get cyclic interval in nanosecond need to
185 	 * multiply by MICROSEC.
186 	 */
187 	cyclic_interval *= MICROSEC;
188 
189 	watchdog_cyclic_init(cyclic_interval);
190 }
191 
192 /*
193  * Pat the watchdog timer periodically using the hypervisor API.
194  * Regular pat occurs when the system runs normally.
195  * Long pat is when system panics.
196  */
197 void
198 watchdog_pat()
199 {
200 	if (watchdog_enabled && watchdog_activated) {
201 		if (panicstr)
202 			config_watchdog(watchdog_long_timeout, WDT_ON);
203 		else
204 			config_watchdog(watchdog_timeout, WDT_ON);
205 	}
206 }
207 
208 /*
209  * We don't save/restore the remaining watchdog timeout time at present.
210  */
211 void
212 watchdog_suspend()
213 {
214 	if (watchdog_enabled && watchdog_activated) {
215 		config_watchdog(0, WDT_OFF);
216 	}
217 }
218 
219 /*
220  * We don't save/restore the remaining watchdog timeout time at present.
221  */
222 void
223 watchdog_resume()
224 {
225 	if (watchdog_enabled && !watchdog_activated) {
226 		if (panicstr) {
227 			config_watchdog(watchdog_long_timeout, WDT_ON);
228 		} else {
229 			config_watchdog(watchdog_timeout, WDT_ON);
230 		}
231 	}
232 }
233 
234 void
235 watchdog_clear()
236 {
237 	if (watchdog_enabled && watchdog_activated) {
238 		config_watchdog(0, WDT_OFF);
239 	}
240 }
241 
242 static void
243 config_watchdog(uint64_t timeout, int new_state)
244 {
245 	uint64_t time_remaining;
246 	uint64_t ret;
247 
248 	watchdog_activated = new_state;
249 	ret = hv_mach_set_watchdog(timeout, &time_remaining);
250 	if (ret != H_EOK) {
251 		cmn_err(CE_WARN, "Failed to operate on the watchdog. "
252 		    "Error = 0x%lx", ret);
253 		watchdog_enabled = 0;
254 	}
255 }
256 
257 /*
258  * Once the watchdog cyclic is initialized, it won't be removed.
259  * The only way to not add the watchdog cyclic is to disable the watchdog
260  * by setting the watchdog_enabled to 0 in /etc/system file.
261  */
262 static void
263 watchdog_cyclic_init(hrtime_t wdt_cyclic_interval)
264 {
265 	cyc_handler_t hdlr;
266 	cyc_time_t when;
267 
268 	hdlr.cyh_func = (cyc_func_t)watchdog_pat;
269 	hdlr.cyh_level = CY_HIGH_LEVEL;
270 	hdlr.cyh_arg = NULL;
271 
272 	when.cyt_when = 0;
273 	when.cyt_interval = wdt_cyclic_interval;
274 
275 	mutex_enter(&cpu_lock);
276 	(void) cyclic_add(&hdlr, &when);
277 	mutex_exit(&cpu_lock);
278 }
279