xref: /freebsd/usr.sbin/watchdogd/watchdogd.c (revision 8a166cafe0965f6bd72cd3d2f5372704f05cb5e8)
1 /*-
2  * Copyright (c) 2003-2004  Sean M. Kelly <smkelly@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * Software watchdog daemon.
29  */
30 
31 #include <sys/types.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/mman.h>
35 #include <sys/param.h>
36 #include <sys/rtprio.h>
37 #include <sys/stat.h>
38 #include <sys/time.h>
39 #include <sys/watchdog.h>
40 
41 #include <err.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <libutil.h>
45 #include <math.h>
46 #include <paths.h>
47 #include <signal.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <strings.h>
52 #include <sysexits.h>
53 #include <unistd.h>
54 
55 static void	parseargs(int, char *[]);
56 static void	sighandler(int);
57 static void	watchdog_loop(void);
58 static int	watchdog_init(void);
59 static int	watchdog_onoff(int onoff);
60 static int	watchdog_patpat(u_int timeout);
61 static void	usage(void);
62 
63 static int debugging = 0;
64 static int end_program = 0;
65 static const char *pidfile = _PATH_VARRUN "watchdogd.pid";
66 static u_int timeout = WD_TO_16SEC;
67 static u_int passive = 0;
68 static int is_daemon = 0;
69 static int fd = -1;
70 static int nap = 1;
71 static char *test_cmd = NULL;
72 
73 /*
74  * Ask malloc() to map minimum-sized chunks of virtual address space at a time,
75  * so that mlockall() won't needlessly wire megabytes of unused memory into the
76  * process.  This must be done using the malloc_conf string so that it gets set
77  * up before the first allocation, which happens before entry to main().
78  */
79 const char * malloc_conf = "lg_chunk:0";
80 
81 /*
82  * Periodically pat the watchdog, preventing it from firing.
83  */
84 int
85 main(int argc, char *argv[])
86 {
87 	struct rtprio rtp;
88 	struct pidfh *pfh;
89 	pid_t otherpid;
90 
91 	if (getuid() != 0)
92 		errx(EX_SOFTWARE, "not super user");
93 
94 	parseargs(argc, argv);
95 
96 	rtp.type = RTP_PRIO_REALTIME;
97 	rtp.prio = 0;
98 	if (rtprio(RTP_SET, 0, &rtp) == -1)
99 		err(EX_OSERR, "rtprio");
100 
101 	if (watchdog_init() == -1)
102 		errx(EX_SOFTWARE, "unable to initialize watchdog");
103 
104 	if (is_daemon) {
105 		if (watchdog_onoff(1) == -1)
106 			err(EX_OSERR, "patting the dog");
107 
108 		pfh = pidfile_open(pidfile, 0600, &otherpid);
109 		if (pfh == NULL) {
110 			if (errno == EEXIST) {
111 				errx(EX_SOFTWARE, "%s already running, pid: %d",
112 				    getprogname(), otherpid);
113 			}
114 			warn("Cannot open or create pidfile");
115 		}
116 
117 		if (debugging == 0 && daemon(0, 0) == -1) {
118 			watchdog_onoff(0);
119 			pidfile_remove(pfh);
120 			err(EX_OSERR, "daemon");
121 		}
122 
123 		signal(SIGHUP, SIG_IGN);
124 		signal(SIGINT, sighandler);
125 		signal(SIGTERM, sighandler);
126 
127 		pidfile_write(pfh);
128 		if (madvise(0, 0, MADV_PROTECT) != 0)
129 			warn("madvise failed");
130 		if (mlockall(MCL_CURRENT | MCL_FUTURE) != 0)
131 			warn("mlockall failed");
132 
133 		watchdog_loop();
134 
135 		/* exiting */
136 		pidfile_remove(pfh);
137 		return (EX_OK);
138 	} else {
139 		if (passive)
140 			timeout |= WD_PASSIVE;
141 		else
142 			timeout |= WD_ACTIVE;
143 		if (watchdog_patpat(timeout) < 0)
144 			err(EX_OSERR, "patting the dog");
145 		return (EX_OK);
146 	}
147 }
148 
149 /*
150  * Catch signals and begin shutdown process.
151  */
152 static void
153 sighandler(int signum)
154 {
155 
156 	if (signum == SIGINT || signum == SIGTERM)
157 		end_program = 1;
158 }
159 
160 /*
161  * Open the watchdog device.
162  */
163 static int
164 watchdog_init(void)
165 {
166 
167 	fd = open("/dev/" _PATH_WATCHDOG, O_RDWR);
168 	if (fd >= 0)
169 		return (0);
170 	warn("Could not open watchdog device");
171 	return (-1);
172 }
173 
174 /*
175  * Main program loop which is iterated every second.
176  */
177 static void
178 watchdog_loop(void)
179 {
180 	struct stat sb;
181 	int failed;
182 
183 	while (end_program != 2) {
184 		failed = 0;
185 
186 		if (test_cmd != NULL)
187 			failed = system(test_cmd);
188 		else
189 			failed = stat("/etc", &sb);
190 
191 		if (failed == 0)
192 			watchdog_patpat(timeout|WD_ACTIVE);
193 		sleep(nap);
194 
195 		if (end_program != 0) {
196 			if (watchdog_onoff(0) == 0) {
197 				end_program = 2;
198 			} else {
199 				warnx("Could not stop the watchdog, not exiting");
200 				end_program = 0;
201 			}
202 		}
203 	}
204 }
205 
206 /*
207  * Reset the watchdog timer. This function must be called periodically
208  * to keep the watchdog from firing.
209  */
210 static int
211 watchdog_patpat(u_int t)
212 {
213 
214 	return ioctl(fd, WDIOCPATPAT, &t);
215 }
216 
217 /*
218  * Toggle the kernel's watchdog. This routine is used to enable and
219  * disable the watchdog.
220  */
221 static int
222 watchdog_onoff(int onoff)
223 {
224 
225 	if (onoff)
226 		return watchdog_patpat((timeout|WD_ACTIVE));
227 	else
228 		return watchdog_patpat(0);
229 }
230 
231 /*
232  * Tell user how to use the program.
233  */
234 static void
235 usage(void)
236 {
237 	if (is_daemon)
238 		fprintf(stderr, "usage: watchdogd [-d] [-e cmd] [-I file] [-s sleep] [-t timeout]\n");
239 	else
240 		fprintf(stderr, "usage: watchdog [-d] [-t timeout]\n");
241 	exit(EX_USAGE);
242 }
243 
244 /*
245  * Handle the few command line arguments supported.
246  */
247 static void
248 parseargs(int argc, char *argv[])
249 {
250 	int c;
251 	char *p;
252 	double a;
253 
254 	c = strlen(argv[0]);
255 	if (argv[0][c - 1] == 'd')
256 		is_daemon = 1;
257 	while ((c = getopt(argc, argv,
258 	    is_daemon ? "I:de:s:t:?" : "dt:?")) != -1) {
259 		switch (c) {
260 		case 'I':
261 			pidfile = optarg;
262 			break;
263 		case 'd':
264 			debugging = 1;
265 			break;
266 		case 'e':
267 			test_cmd = strdup(optarg);
268 			break;
269 #ifdef notyet
270 		case 'p':
271 			passive = 1;
272 			break;
273 #endif
274 		case 's':
275 			p = NULL;
276 			errno = 0;
277 			nap = strtol(optarg, &p, 0);
278 			if ((p != NULL && *p != '\0') || errno != 0)
279 				errx(EX_USAGE, "-s argument is not a number");
280 			break;
281 		case 't':
282 			p = NULL;
283 			errno = 0;
284 			a = strtod(optarg, &p);
285 			if ((p != NULL && *p != '\0') || errno != 0)
286 				errx(EX_USAGE, "-t argument is not a number");
287 			if (a < 0)
288 				errx(EX_USAGE, "-t argument must be positive");
289 			if (a == 0)
290 				timeout = WD_TO_NEVER;
291 			else
292 				timeout = flsll(a * 1e9);
293 			if (debugging)
294 				printf("Timeout is 2^%d nanoseconds\n",
295 				    timeout);
296 			break;
297 		case '?':
298 		default:
299 			usage();
300 			/* NOTREACHED */
301 		}
302 	}
303 	if (argc != optind)
304 		errx(EX_USAGE, "extra arguments.");
305 	if (is_daemon && timeout < WD_TO_1SEC)
306 		errx(EX_USAGE, "-t argument is less than one second.");
307 }
308