xref: /titanic_41/usr/src/cmd/picl/plugins/sun4u/snowbird/watchdog/piclwatchdog.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This module is used to monitor and control watchdog timer for
31  * UltraSPARC-IIi CPU in Snowbird
32  */
33 
34 #include <stdio.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <stdlib.h>
38 #include <stdarg.h>
39 #include <strings.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <alloca.h>
43 #include <limits.h>
44 #include <sys/types.h>
45 #include <sys/stat.h>
46 #include <libintl.h>
47 #include <syslog.h>
48 #include <locale.h>
49 #include <picl.h>
50 #include <picltree.h>
51 #include <libnvpair.h>
52 #include <poll.h>
53 #include <errno.h>
54 #include <syslog.h>
55 #include <sys/priocntl.h>
56 #include <sys/rtpriocntl.h>
57 #include <sys/tspriocntl.h>
58 #include <sys/fsspriocntl.h>
59 #include <stropts.h>
60 #include <synch.h>
61 #include <signal.h>
62 #include <thread.h>
63 #include <picldefs.h>
64 #include <smclib.h>
65 #include "piclwatchdog.h"
66 
67 #pragma	init(wd_picl_register)	/* init section */
68 
69 /* debug variables */
70 static  int wd_debug = 0;
71 static hrtime_t start1, end1;
72 static int count = 0;
73 typedef struct { /* used to keep track of time taken for last 5 pats */
74 	int res_seq;
75 	int req_seq;
76 	int64_t time;
77 } wd_time_t;
78 
79 #define	NUMBER_OF_READINGS	5
80 static wd_time_t time1[NUMBER_OF_READINGS];
81 
82 /* global declarations */
83 static int	wd_fd = -1;	/* fd used to send watchdog commands */
84 static int 	polling_fd = -1; /* polling thread that snoops for events */
85 static int 	wd_enable = 1;
86 static int 	state_configured = 0;	/* chassis state */
87 static int	props_created = 0;
88 static int 	wd_pat_thr_priority = -1;
89 static pid_t 	pid = -1;	/* PID that owns watchdog services */
90 static cond_t	patting_cv;
91 static mutex_t	data_lock;
92 static mutex_t	patting_lock;
93 static int32_t	pat_time = 0;
94 static thread_t	polling_thr_tid;
95 static thread_t patting_thr_tid;
96 static wd_data_t wd_data;
97 static char wd_conf[MAXPATHLEN];
98 
99 #define	NULLREAD	(int (*)(ptree_rarg_t *, void *))0
100 #define	NULLWRITE	(int (*)(ptree_warg_t *, const void *))0
101 
102 /* ptree interface */
103 static void wd_picl_register(void);
104 static void wd_picl_init(void);
105 static void wd_picl_fini(void);
106 static void wd_state_change_evhandler(const char *,
107 	const void *, size_t, void *);
108 
109 /* local functions */
110 static int wd_write_timeout(ptree_warg_t *, const void *);
111 static int wd_write_action(ptree_warg_t *, const void *);
112 static int wd_read_action(ptree_rarg_t *, void *);
113 static int wd_read_timeout(ptree_rarg_t *, void *);
114 extern char *strtok_r(char *s1, const char *s2, char **lasts);
115 extern int wd_get_chassis_type();
116 
117 static picld_plugin_reg_t wd_reg_info = {
118 	PICLD_PLUGIN_VERSION_1,
119 	PICLD_PLUGIN_CRITICAL,
120 	"SUNW_picl_watchdog",
121 	wd_picl_init,
122 	wd_picl_fini,
123 };
124 
125 /*
126  * This function parses wd.conf file to set the tunables
127  * tunables at present: patting thread priority, pat time, wd_enable
128  */
129 static void
wd_parse_config_file(char * wd_conf)130 wd_parse_config_file(char *wd_conf)
131 {
132 	FILE	*fp;
133 	char	buf[WD_CONF_MAXSIZE];
134 	char	*token, *last, *value;
135 
136 	if ((fp = fopen(wd_conf, "r")) == NULL) {
137 		return;
138 	}
139 
140 	while (fgets(buf, sizeof (buf), fp) != NULL) {
141 		if (buf[0] == '\0' || buf[0] == '#') {
142 			continue;
143 		}
144 		token = last = value = NULL;
145 		value = (char *)strtok_r((char *)buf, WD_DELIMETER, &last);
146 		if (last) {
147 			token = (char *)strtok_r(last, WD_DELIMETER, &last);
148 		} else {
149 			continue;
150 		}
151 
152 		if (value == NULL || token == NULL) {
153 			continue;
154 		}
155 		if (strcmp(token, WD_PAT_THREAD_PRIORITY) == 0) {
156 			wd_pat_thr_priority = strtol(value,
157 				(char **)NULL, 10);
158 		} else if (strcmp(token, WD_PATTING_TIME) == 0) {
159 			errno = 0;
160 			pat_time = strtol(value, (char **)NULL, 10);
161 			if (errno != 0) {
162 				pat_time = 0;
163 			}
164 		} else if (strcmp(token, WD_ENABLE) == 0) {
165 			if (strcmp(value, "false") == 0) {
166 				wd_enable = 0;
167 			}
168 		} else {	/* unknown token */
169 			continue;
170 		}
171 	}
172 	(void) fclose(fp);
173 }
174 
175 /*
176  * read the SMC watchdog registers
177  */
178 static int
wd_get_reg_dump(uint8_t buffer[])179 wd_get_reg_dump(uint8_t buffer[])
180 {
181 	int rc = 0, i;
182 	sc_reqmsg_t	req_pkt;
183 	sc_rspmsg_t	rsp_pkt;
184 
185 	/* initialize the request packet */
186 	(void) smc_init_smc_msg(&req_pkt, SMC_GET_WATCHDOG_TIMER,
187 		DEFAULT_SEQN, 0);
188 
189 	/* make a call to smc library to send cmd */
190 	if ((rc = smc_send_msg(DEFAULT_FD, &req_pkt, &rsp_pkt,
191 		WD_POLL_TIMEOUT)) != SMC_SUCCESS) {
192 		WD_DEBUG1(WD_PICL_GET_ERR, rc);
193 		return (PICL_FAILURE);
194 	}
195 
196 	/* read 8 bytes */
197 	bzero(buffer, WD_REGISTER_LEN);
198 	for (i = 0; i < WD_REGISTER_LEN; i++) {
199 		buffer[i] = rsp_pkt.data[i];
200 	}
201 	return (PICL_SUCCESS);
202 }
203 
204 /*
205  * get the HEALTHY# line state
206  * Return -1 for Error
207  *         0 for HEALTHY# down
208  *         1 for HEALTHY# up
209  */
210 static int
wd_get_healthy_status()211 wd_get_healthy_status()
212 {
213 	sc_reqmsg_t	req_pkt;
214 	sc_rspmsg_t	rsp_pkt;
215 
216 	/* initialize the request packet */
217 	(void) smc_init_smc_msg(&req_pkt, SMC_GET_EXECUTION_STATE,
218 		DEFAULT_SEQN, 0);
219 
220 	/* make a call to smc library to send cmd */
221 	if (smc_send_msg(DEFAULT_FD, &req_pkt, &rsp_pkt,
222 		WD_POLL_TIMEOUT) != SMC_SUCCESS) {
223 		return (-1);
224 	}
225 
226 	return ((rsp_pkt.data[0] & IS_HEALTHY) ? WD_HEALTHY_UP :
227 		WD_HEALTHY_DOWN);
228 }
229 
230 /*ARGSUSED*/
231 static void
event_completion_handler(char * ename,void * earg,size_t size)232 event_completion_handler(char *ename, void *earg, size_t size)
233 {
234 	free(ename);
235 	free(earg);
236 }
237 
238 /*
239  * posts picl-state-change event if there is change in watchdog-timer state
240  */
241 static picl_errno_t
post_wd_state_event(picl_nodehdl_t nodeh,char * state)242 post_wd_state_event(picl_nodehdl_t nodeh, char *state)
243 {
244 	nvlist_t	*nvl;
245 	size_t		nvl_size;
246 	char		*pack_buf = NULL;
247 	picl_errno_t	rc;
248 	char *ename = PICLEVENT_STATE_CHANGE, *evname = NULL;
249 
250 	if (state == NULL) {
251 		return (PICL_FAILURE);
252 	}
253 
254 	if ((evname = strdup(ename)) == NULL) {
255 		return (PICL_NOSPACE);
256 	}
257 
258 	if ((rc = nvlist_alloc(&nvl, NV_UNIQUE_NAME_TYPE, NULL)) != 0) {
259 		free(evname);
260 		syslog(LOG_ERR, WD_NVLIST_ERR, rc);
261 		return (PICL_FAILURE);
262 	}
263 
264 	if ((rc = nvlist_add_uint64(nvl, PICLEVENTARG_NODEHANDLE,
265 		nodeh)) != 0) {
266 		nvlist_free(nvl);
267 		free(evname);
268 		syslog(LOG_ERR, WD_NVLIST_ERR, rc);
269 		return (PICL_FAILURE);
270 	}
271 
272 	if ((rc = nvlist_add_string(nvl, PICLEVENTARG_STATE,
273 		state)) != 0) {
274 		nvlist_free(nvl);
275 		free(evname);
276 		syslog(LOG_ERR, WD_NVLIST_ERR, rc);
277 		return (PICL_FAILURE);
278 	}
279 
280 	if ((rc = nvlist_pack(nvl, &pack_buf, &nvl_size, NV_ENCODE_NATIVE,
281 		NULL)) != 0) {
282 		nvlist_free(nvl);
283 		free(evname);
284 		syslog(LOG_ERR, WD_NVLIST_ERR, rc);
285 		return (PICL_FAILURE);
286 	}
287 
288 	if ((rc = ptree_post_event(evname, pack_buf, nvl_size,
289 		event_completion_handler)) != PICL_SUCCESS) {
290 		free(pack_buf);
291 		free(evname);
292 		nvlist_free(nvl);
293 		return (rc);
294 	}
295 	nvlist_free(nvl);
296 	return (PICL_SUCCESS);
297 }
298 
299 /*
300  * Updates the State value in picl tree and posts a state-change event
301  */
302 static void
wd_picl_update_state(int level,uint8_t stat)303 wd_picl_update_state(int level, uint8_t stat)
304 {
305 	picl_errno_t rc;
306 	char 	state[PICL_PROPNAMELEN_MAX];
307 
308 	switch (stat) {
309 	case WD_ARMED:
310 		(void) strncpy(state, PICL_PROPVAL_WD_STATE_ARMED,
311 			sizeof (state));
312 		break;
313 	case WD_DISARMED:
314 		(void) strncpy(state, PICL_PROPVAL_WD_STATE_DISARMED,
315 			sizeof (state));
316 		break;
317 	case WD_EXPIRED:
318 		(void) strncpy(state, PICL_PROPVAL_WD_STATE_EXPIRED,
319 			sizeof (state));
320 		break;
321 	default:
322 		return;
323 	}
324 
325 	(void) mutex_lock(&data_lock);
326 	switch (level) {
327 	case WD1:
328 		wd_data.wd1_run_state = stat;
329 		break;
330 	case WD2:
331 		wd_data.wd2_run_state = stat;
332 		break;
333 	case WD1_2:
334 		wd_data.wd1_run_state = stat;
335 		wd_data.wd2_run_state = stat;
336 		break;
337 	default:
338 		return;
339 	}
340 	(void) mutex_unlock(&data_lock);
341 
342 	if (!state_configured) {
343 		return;
344 	}
345 
346 	switch (level) {
347 	case WD1:
348 		if ((rc = post_wd_state_event(wd_data.wd1_nodehdl,
349 			state)) != PICL_SUCCESS) {
350 			syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc);
351 		}
352 		break;
353 	case WD2:
354 		if ((rc = post_wd_state_event(wd_data.wd2_nodehdl,
355 			state)) != PICL_SUCCESS) {
356 			syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc);
357 		}
358 		break;
359 
360 	case WD1_2:
361 		if ((rc = post_wd_state_event(wd_data.wd1_nodehdl,
362 			state)) != PICL_SUCCESS) {
363 			syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc);
364 		}
365 		if ((rc = post_wd_state_event(wd_data.wd2_nodehdl,
366 			state)) != PICL_SUCCESS) {
367 			syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc);
368 		}
369 		break;
370 	}
371 }
372 
373 /*
374  * Sends a command to SMC to reset the watchdog-timers
375  */
376 static int
wd_pat()377 wd_pat()
378 {
379 	int rc = 0;
380 	static uint8_t	seq = 1;
381 	sc_reqmsg_t	req_pkt;
382 	sc_rspmsg_t	rsp_pkt;
383 
384 	if (seq < WD_MAX_SEQN) {
385 		req_pkt.hdr.msg_id = seq++;
386 	} else {
387 		seq = 1;
388 		req_pkt.hdr.msg_id = seq;
389 	}
390 
391 	if (wd_debug & WD_TIME_DEBUG) {
392 		start1 = gethrtime();
393 	}
394 
395 	/* initialize the request packet */
396 	(void) smc_init_smc_msg(&req_pkt, SMC_RESET_WATCHDOG_TIMER,
397 		DEFAULT_SEQN, 0);
398 
399 	/* make a call to smc library to send cmd */
400 	if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt,
401 		WD_POLL_TIMEOUT)) != SMC_SUCCESS) {
402 		syslog(LOG_CRIT, WD_PICL_PAT_ERR, rc);
403 		return (PICL_FAILURE);
404 	}
405 
406 	if (wd_debug & WD_TIME_DEBUG) {
407 		end1 = gethrtime();
408 		time1[count].res_seq = SC_MSG_ID(&rsp_pkt);
409 		time1[count].req_seq = SC_MSG_ID(&req_pkt);
410 		time1[count].time = (end1 - start1);
411 
412 		if (count < (NUMBER_OF_READINGS - 1)) {
413 			count++;
414 		} else {
415 			count = 0;
416 		}
417 	}
418 	return (PICL_SUCCESS);
419 }
420 
421 /* used to set the new values for watchdog and start the watchdog */
422 static int
wd_start(uchar_t action_1,uchar_t action_2,uchar_t timeout_2,uchar_t * timeout_1,uint8_t patting_option)423 wd_start(uchar_t action_1, uchar_t action_2,
424 	uchar_t timeout_2, uchar_t *timeout_1, uint8_t patting_option)
425 {
426 	int rc = 0;
427 	sc_reqmsg_t	req_pkt;
428 	sc_rspmsg_t	rsp_pkt;
429 
430 	if (timeout_1 == NULL) {
431 		return (PICL_FAILURE);
432 	}
433 
434 	req_pkt.data[0] = WD_USEFLAG_OS;
435 	req_pkt.data[1] = action_1 | action_2;	/* actions */
436 	req_pkt.data[2] = timeout_2;		/* wd timeout 2 */
437 	req_pkt.data[3] = WD_XPR_FLG_CLR_OS;	/* expiration flags */
438 	req_pkt.data[4] = timeout_1[1];		/* LSB for wd timeout 1 */
439 	req_pkt.data[5] = timeout_1[0];		/* MSB for wd timeout 1 */
440 
441 	if (patting_option == ENABLE_AUTO_PAT) {
442 		req_pkt.data[0] |= WD_ENABLE_AUTO_PAT;
443 	}
444 
445 	/* initialize the request packet */
446 	(void) smc_init_smc_msg(&req_pkt, SMC_SET_WATCHDOG_TIMER,
447 		DEFAULT_SEQN, WD_SET_CMD_DATA_LEN);
448 
449 	/* make a call to smc library to send cmd */
450 	if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt,
451 		WD_POLL_TIMEOUT)) != SMC_SUCCESS) {
452 		WD_DEBUG1(WD_PICL_START_ERR, rc);
453 		return (PICL_FAILURE);
454 	}
455 
456 	/* reset the watchdog timer */
457 	(void) smc_init_smc_msg(&req_pkt, SMC_RESET_WATCHDOG_TIMER,
458 		DEFAULT_SEQN, 0);
459 	if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt,
460 		WD_POLL_TIMEOUT)) != SMC_SUCCESS) {
461 		WD_DEBUG1(WD_PICL_START_ERR, rc);
462 		return (PICL_FAILURE);
463 	}
464 	return (PICL_SUCCESS);
465 }
466 
467 /*
468  * Validates timeout and action fields and arms the watchdog-timers
469  */
470 static int
wd_arm(uint8_t patting_option)471 wd_arm(uint8_t patting_option)
472 {
473 	int rc;
474 	uint16_t	wd_time1;
475 	uint8_t		wd_time2, wd1_action, wd2_action;
476 	uint8_t		timeout1[2];
477 
478 	if (wd_data.wd1_timeout >= 0) {
479 		wd_time1 = wd_data.wd1_timeout/WD_L1_RESOLUTION;
480 	} else {
481 		wd_time1 = 0;
482 	}
483 
484 	if (wd_data.wd2_timeout >= 0) {
485 		wd_time2 = wd_data.wd2_timeout/WD_L2_RESOLUTION;
486 	} else {
487 		wd_time2 = 0;
488 	}
489 
490 	timeout1[0] = wd_time1 >> 8;	/* MSB */
491 	timeout1[1] = wd_time1 & 0x00ff;	/* LSB */
492 
493 	/* check the HELATHY# status if action is alarm */
494 	if (wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_HOST ||
495 		wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_SAT) {
496 		rc = wd_get_healthy_status();
497 		if (rc == WD_HEALTHY_DOWN) {
498 			WD_DEBUG0(WD_HEALTHY_ERR);
499 			return (PICL_FAILURE);
500 		} else if (rc == -1) {
501 			syslog(LOG_ERR, WD_GET_HEALTH_ERR);
502 			return (PICL_FAILURE);
503 		}
504 	}
505 
506 	if (wd_data.wd1_timeout == -1) {
507 		wd1_action = WD_ACTION_NONE2;
508 	} else {
509 		wd1_action = wd_data.wd1_action;
510 	}
511 
512 	if (wd_data.wd2_timeout == -1) {
513 		wd2_action = WD_ACTION_NONE2;
514 	} else {
515 		wd2_action = wd_data.wd2_action;
516 	}
517 
518 	rc = wd_start(wd1_action, wd2_action,
519 		wd_time2, timeout1, patting_option);
520 	return (rc);
521 }
522 
523 /*
524  * This is thread is a RealTime class thread. This thread pats the
525  * watchdog-timers in regular intervals before the expiry.
526  */
527 /*ARGSUSED*/
528 static void *
wd_patting_thread(void * args)529 wd_patting_thread(void *args)
530 {
531 	time_t sec;
532 	pcinfo_t pci;
533 	long nano_sec;
534 	timestruc_t to;
535 	long sleep_time;
536 	struct timeval tp;
537 	int err, state;
538 
539 	for (;;) {
540 		(void) mutex_lock(&patting_lock);
541 		while (wd_data.wd_pat_state == WD_NORESET) {
542 			(void) cond_wait(&patting_cv, &patting_lock);
543 		}
544 		(void) mutex_unlock(&patting_lock);
545 
546 		/* reset pat-time to zero */
547 		pat_time = 0;		/* tunable */
548 		wd_parse_config_file(wd_conf);
549 
550 		if (wd_pat_thr_priority < 0 || wd_pat_thr_priority > 59) {
551 			wd_pat_thr_priority = WD_DEFAULT_THREAD_PRIORITY;
552 		}
553 
554 		/* change the priority of thread to realtime class */
555 		(void) strncpy(pci.pc_clname, "RT", sizeof (pci.pc_clname));
556 		if (priocntl(P_LWPID, P_MYID, PC_GETCID, (caddr_t)&pci) != -1) {
557 			pcparms_t pcp;
558 			rtparms_t *rtp = (rtparms_t *)pcp.pc_clparms;
559 			rtp->rt_pri = wd_pat_thr_priority;
560 			rtp->rt_tqsecs = 0;
561 			rtp->rt_tqnsecs = RT_TQDEF;
562 			pcp.pc_cid = pci.pc_cid;
563 			if (priocntl(P_LWPID, P_MYID, PC_SETPARMS,
564 				(caddr_t)&pcp) != 0) {
565 				syslog(LOG_ERR, WD_PICL_RT_THRD_FAIL);
566 			}
567 		} else {
568 			syslog(LOG_ERR, WD_PICL_RT_THRD_NO_PERM_ERR);
569 		}
570 
571 		switch (wd_data.wd1_timeout) {
572 		case 0:
573 			if (wd_arm(DISABLE_AUTO_PAT) == PICL_SUCCESS) {
574 				wd_picl_update_state(WD1, WD_ARMED);
575 				if (wd_data.wd2_timeout >= 0) {
576 					wd_picl_update_state(WD2, WD_ARMED);
577 				}
578 			} else {
579 				syslog(LOG_ERR, WD_PICL_START_ERR,
580 					PICL_FAILURE);
581 			}
582 			/* no need to pat */
583 			(void) mutex_lock(&patting_lock);
584 			wd_data.wd_pat_state = WD_NORESET;
585 			(void) mutex_unlock(&patting_lock);
586 			continue;
587 		case -1:
588 			if (wd_data.wd2_timeout < 0) {
589 				(void) mutex_lock(&patting_lock);
590 				wd_data.wd_pat_state = WD_NORESET;
591 				(void) mutex_unlock(&patting_lock);
592 				continue;
593 			}
594 			if (wd_arm(DISABLE_AUTO_PAT) == PICL_SUCCESS) {
595 				wd_picl_update_state(WD2, WD_ARMED);
596 			} else {
597 				syslog(LOG_ERR, WD_PICL_START_ERR,
598 					PICL_FAILURE);
599 			}
600 			/* no need to pat */
601 			(void) mutex_lock(&patting_lock);
602 			wd_data.wd_pat_state = WD_NORESET;
603 			(void) mutex_unlock(&patting_lock);
604 			continue;
605 		default:
606 			break;
607 		}
608 
609 		if (pat_time == 0) {
610 			if (wd_data.wd1_timeout > WD_PAT_TIME) {
611 				pat_time = WD_PAT_TIME;
612 			} else {
613 				pat_time = wd_data.wd1_timeout - 80;
614 			}
615 		}
616 		if (pat_time <= 0) {
617 			WD_DEBUG0(WD_PICL_PAT_TIME_ERR);
618 			(void) mutex_lock(&patting_lock);
619 				wd_data.wd_pat_state = WD_NORESET;
620 			(void) mutex_unlock(&patting_lock);
621 			continue;
622 		}
623 		sleep_time = wd_data.wd1_timeout - pat_time;
624 
625 		if (wd_data.wd1_timeout <= 0 || sleep_time <= 0) {
626 			WD_DEBUG0(WD_PICL_ARM_PAT_ERR);
627 			(void) mutex_lock(&patting_lock);
628 				wd_data.wd_pat_state = WD_NORESET;
629 			(void) mutex_unlock(&patting_lock);
630 			continue;
631 		} else {
632 			wd_picl_update_state(WD1, WD_ARMED);
633 		}
634 
635 		if (wd_data.wd2_timeout >= 0) {
636 			wd_picl_update_state(WD2, WD_ARMED);
637 		}
638 
639 		sec = sleep_time/1000;
640 		nano_sec = (sleep_time - (sec * 1000)) * 1000000;
641 
642 		if (wd_arm(ENABLE_AUTO_PAT) != PICL_SUCCESS) {
643 			wd_picl_update_state(WD1_2, WD_DISARMED);
644 			(void) mutex_lock(&patting_lock);
645 				wd_data.wd_pat_state = WD_NORESET;
646 			(void) mutex_unlock(&patting_lock);
647 			syslog(LOG_ERR, WD_PICL_START_ERR, PICL_FAILURE);
648 			continue;
649 		}
650 
651 		do	/* pat the watchdog until expiry or user disarm */
652 		{
653 			(void) mutex_lock(&patting_lock);
654 			state = wd_data.wd_pat_state;
655 			if (state == WD_NORESET) {
656 				(void) mutex_unlock(&patting_lock);
657 				break;
658 			}
659 			(void) gettimeofday(&tp, NULL);
660 			to.tv_sec = tp.tv_sec + sec;
661 			if ((nano_sec + (tp.tv_usec * 1000)) >= 1000000000) {
662 				to.tv_sec +=  1;
663 				to.tv_nsec = (nano_sec +
664 					(tp.tv_usec * 1000)) - 1000000000;
665 			} else {
666 				to.tv_nsec = nano_sec + (tp.tv_usec * 1000);
667 			}
668 
669 			err = cond_timedwait(&patting_cv, &patting_lock, &to);
670 			(void) mutex_unlock(&patting_lock);
671 
672 			if (err == ETIME) { /* woke up from sleep */
673 				(void) wd_pat();
674 			}
675 		} while (state == WD_RESET);
676 	}
677 	/*NOTREACHED*/
678 	return (NULL);
679 }
680 
681 /*
682  * returns 0 if owner is not alive
683  * returns 1 if owner is alive
684  * returns -1 if there is no active owner
685  */
686 static int
is_owner_alive()687 is_owner_alive()
688 {
689 	char strpid[50];
690 	struct stat buf;
691 
692 	if (pid == -1) {
693 		return (-1);
694 	}
695 
696 	/* check if the file exists or not */
697 	(void) snprintf(strpid, sizeof (pid), "/proc/%ld/status", pid);
698 	errno = 0;
699 	if (stat(strpid, &buf) == 0) {
700 		return (1);
701 	}
702 	if (errno == ENOENT) {
703 		return (0);
704 	} else {
705 		syslog(LOG_ERR, WD_GET_OWN_FAILED, errno);
706 	}
707 	return (-1);
708 }
709 
710 /*
711  * Sends a cmd to SMC to stop watchdog timers
712  */
713 static int
wd_stop()714 wd_stop()
715 {
716 	int rc = 0;
717 	sc_reqmsg_t	req_pkt;
718 	sc_rspmsg_t	rsp_pkt;
719 	uint8_t	buffer[8];
720 
721 	if (wd_get_reg_dump(buffer) != 0) {
722 		return (PICL_FAILURE);
723 	}
724 	/* clear the expiration flags */
725 	buffer[3] = 0xff;	/* expiration flags */
726 
727 	(void) memcpy(SC_MSG_DATA(&req_pkt), buffer,
728 		WD_SET_CMD_DATA_LEN);
729 
730 	/* initialize the request packet */
731 	(void) smc_init_smc_msg(&req_pkt, SMC_SET_WATCHDOG_TIMER,
732 		DEFAULT_SEQN, WD_SET_CMD_DATA_LEN);
733 
734 	/* make a call to smc library to send cmd */
735 	if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt,
736 		WD_POLL_TIMEOUT)) != SMC_SUCCESS) {
737 		syslog(LOG_ERR, WD_PICL_STOP_ERR, rc);
738 		return (PICL_FAILURE);
739 	}
740 	return (PICL_SUCCESS);
741 }
742 
743 /*
744  * Function used by volatile callback function for wd-op property
745  * under controller. This is used to arm, disarm the watchdog-timers
746  * in response to user actions
747  */
748 static int
wd_worker_function(uint8_t flag,pid_t proc_id)749 wd_worker_function(uint8_t flag, pid_t proc_id)
750 {
751 	int rc = PICL_SUCCESS;
752 	int wd1_state, wd2_state;
753 
754 	(void) mutex_lock(&data_lock);
755 	wd1_state = wd_data.wd1_run_state;
756 	wd2_state = wd_data.wd2_run_state;
757 	(void) mutex_unlock(&data_lock);
758 
759 	switch (flag) {
760 
761 	case USER_ARMED_WD:
762 
763 	/* watchdog can only be armed if all the timers are disarmed */
764 	if (wd1_state != WD_DISARMED) {
765 		WD_DEBUG0(WD_PICL_WD1_RUNNING_ERR);
766 		rc = PICL_FAILURE;
767 		break;
768 	}
769 	if (wd2_state != WD_DISARMED) {
770 		WD_DEBUG0(WD_PICL_WD2_RUNNING_ERR);
771 		rc = PICL_FAILURE;
772 		break;
773 	}
774 
775 	/* check the HELATHY# status if action is alarm */
776 	if (wd_data.wd1_timeout >= 0) {
777 		if (wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_HOST ||
778 			wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_SAT) {
779 			rc = wd_get_healthy_status();
780 			if (rc == WD_HEALTHY_DOWN) {
781 				WD_DEBUG0(WD_HEALTHY_ERR);
782 				return (PICL_FAILURE);
783 			} else if (rc == -1) {
784 				syslog(LOG_ERR, WD_GET_HEALTH_ERR);
785 				return (PICL_FAILURE);
786 			} else {
787 				rc = PICL_SUCCESS;
788 			}
789 		}
790 	}
791 
792 	/* signal the patting thread */
793 	(void) mutex_lock(&patting_lock);
794 	wd_data.wd_pat_state = WD_RESET;
795 	(void) cond_signal(&patting_cv);
796 	(void) mutex_unlock(&patting_lock);
797 	break;
798 
799 	case USER_DISARMED_WD:
800 
801 	/*
802 	 * if the caller doesnot own watchdog services,
803 	 * check to see if the owner is still alive using procfs
804 	 */
805 	if (proc_id !=  pid) {
806 		switch (is_owner_alive()) {
807 		case -1:
808 			if ((wd1_state != WD_DISARMED) ||
809 			(wd2_state != WD_DISARMED)) {
810 				break;
811 			}
812 			/* watchdog is already disarmed */
813 			WD_DEBUG0(WD_PICL_NO_WD_ERR);
814 			return (PICL_FAILURE);
815 		case 1:
816 			/* owner is still alive, deny the operation */
817 			WD_DEBUG0(WD_PICL_PERM_DENIED);
818 			return (PICL_PERMDENIED);
819 		default:
820 			break;
821 		}
822 	}
823 
824 	/* watchdog is running */
825 	if ((rc = wd_stop()) == PICL_SUCCESS) {
826 		wd_picl_update_state(WD1_2, WD_DISARMED);
827 		(void) mutex_lock(&patting_lock);
828 		wd_data.wd_pat_state = WD_NORESET;
829 		(void) cond_signal(&patting_cv);
830 		(void) mutex_unlock(&patting_lock);
831 	}
832 	break;
833 
834 	case USER_ARMED_PAT_WD: /* for debug purposes only */
835 
836 	/*
837 	 * first arm-pat operation is used for arming the watchdog
838 	 * subsequent arm-pat operations will be used for patting
839 	 * the watchdog
840 	 */
841 	/* WD is stopped */
842 	if (wd1_state == WD_DISARMED && wd2_state == WD_DISARMED) {
843 		if ((rc = wd_arm(DISABLE_AUTO_PAT)) == PICL_SUCCESS) {
844 			if (wd_data.wd1_timeout >= 0) {
845 				wd_picl_update_state(WD1, WD_ARMED);
846 			}
847 
848 			if (wd_data.wd2_timeout >= 0) {
849 				wd_picl_update_state(WD2, WD_ARMED);
850 			}
851 		} else {
852 			return (rc);
853 		}
854 	} else {	/* WD is running */
855 		if (wd1_state != WD_ARMED) {
856 			WD_DEBUG0(WD_PICL_NO_WD_ERR);
857 			return (PICL_INVALIDARG);
858 		}
859 
860 		/* check if OS is patting the watchdog or not */
861 		(void) mutex_lock(&patting_lock);
862 		if (wd_data.wd_pat_state == WD_RESET) {
863 			WD_DEBUG0(WD_PICL_TRY_PAT_ERR);
864 			(void) mutex_unlock(&patting_lock);
865 			return (PICL_INVALIDARG);
866 		}
867 
868 		/* check if the process owns the WD services */
869 		if (proc_id != pid) {
870 			WD_DEBUG0(WD_PICL_PERM_DENIED);
871 			return (PICL_PERMDENIED);
872 		}
873 		rc = wd_pat();
874 	}
875 	break;
876 
877 	default:
878 	rc = PICL_INVALIDARG;
879 	break;
880 
881 	} /* switch */
882 
883 	return (rc);
884 }
885 
886 /*ARGSUSED*/
887 static int
wd_write_op(ptree_warg_t * parg,const void * buf)888 wd_write_op(ptree_warg_t *parg, const void *buf)
889 {
890 	int rc = PICL_INVALIDARG;
891 	uint8_t	flag;
892 
893 	/* only after state is configured */
894 	if (!state_configured) {
895 		if (parg->cred.dc_pid != getpid()) {
896 			WD_DEBUG0(WD_PICL_STATE_INVALID);
897 			return (PICL_PERMDENIED);
898 		}
899 	}
900 
901 	/* only super user can write this property */
902 	if (parg->cred.dc_euid != SUPER_USER) {
903 		WD_DEBUG0(WD_NO_ROOT_PERM);
904 		return (PICL_PERMDENIED);
905 	}
906 
907 	if (strcmp((char *)buf, PICL_PROPVAL_WD_OP_ARM) == 0) {
908 		flag = USER_ARMED_WD;
909 		rc = PICL_SUCCESS;
910 	}
911 
912 	if (strcmp((char *)buf, PICL_PROPVAL_WD_OP_DISARM) == 0) {
913 		flag = USER_DISARMED_WD;
914 		rc = PICL_SUCCESS;
915 	}
916 
917 	/* for debug purpose only */
918 	if (strcmp((char *)buf, WD_ARM_PAT) == 0) {
919 		flag = USER_ARMED_PAT_WD;
920 		rc = PICL_SUCCESS;
921 	}
922 
923 	if (rc == PICL_SUCCESS) {
924 		rc = wd_worker_function(flag, parg->cred.dc_pid);
925 	} else {
926 		rc = PICL_INVALIDARG;
927 	}
928 
929 	if (rc == PICL_SUCCESS) {
930 
931 		switch (flag) {
932 		case USER_ARMED_PAT_WD:
933 		case USER_ARMED_WD:
934 
935 			/* get the process id of client */
936 			if (parg->cred.dc_pid != getpid()) {
937 				pid = parg->cred.dc_pid;
938 			} else {
939 				pid = -1;
940 			}
941 			break;
942 		case USER_DISARMED_WD:
943 			/* reset the pid */
944 			pid = -1;
945 		default:
946 			break;
947 		}
948 	}
949 	return (rc);
950 }
951 
952 /* volatile call back function to read the watchdog L1 status */
953 /*ARGSUSED*/
954 static int
wd1_read_status(ptree_rarg_t * parg,void * buf)955 wd1_read_status(ptree_rarg_t *parg, void *buf)
956 {
957 	int rc = PICL_SUCCESS;
958 
959 	(void) mutex_lock(&data_lock);
960 
961 	switch (wd_data.wd1_run_state) {
962 
963 	case WD_EXPIRED:
964 		(void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_EXPIRED,
965 			PICL_PROPNAMELEN_MAX);
966 		break;
967 
968 	case WD_DISARMED:
969 		(void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_DISARMED,
970 			PICL_PROPNAMELEN_MAX);
971 		break;
972 
973 	case WD_ARMED:
974 		(void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_ARMED,
975 			PICL_PROPNAMELEN_MAX);
976 		break;
977 
978 	default:
979 		rc = PICL_FAILURE;
980 	}
981 	(void) mutex_unlock(&data_lock);
982 	return (rc);
983 }
984 
985 /*
986  * this function is used to read the state of L2 timer
987  */
988 static int
wd_get_wd2_status(int * present_status)989 wd_get_wd2_status(int *present_status)
990 {
991 	int rc;
992 	uchar_t	buffer[WD_REGISTER_LEN];
993 
994 	bzero(buffer, WD_REGISTER_LEN);
995 	(void) mutex_lock(&data_lock);
996 	*present_status = wd_data.wd2_run_state;
997 	if (wd_data.wd2_run_state != WD_ARMED) {
998 		/* we already have the latest state */
999 		(void) mutex_unlock(&data_lock);
1000 		return (PICL_SUCCESS);
1001 	}
1002 	(void) mutex_unlock(&data_lock);
1003 
1004 	/* read watchdog registers */
1005 	if ((rc = wd_get_reg_dump(buffer)) != 0) {
1006 		return (rc);
1007 	}
1008 
1009 	if (buffer[0] & WD_WD_RUNNING) {
1010 		*present_status = WD_ARMED;
1011 		return (PICL_SUCCESS);
1012 	}
1013 
1014 	if (buffer[3] != 0) {
1015 		(void) mutex_lock(&data_lock);
1016 		*present_status = wd_data.wd2_run_state = WD_EXPIRED;
1017 		(void) mutex_unlock(&data_lock);
1018 	}
1019 	return (PICL_SUCCESS);
1020 }
1021 
1022 /* volatile call back function to read the watchdog L2 status */
1023 /*ARGSUSED*/
1024 static int
wd2_read_status(ptree_rarg_t * parg,void * buf)1025 wd2_read_status(ptree_rarg_t *parg, void *buf)
1026 {
1027 	int present_status, rc;
1028 
1029 	if ((rc = wd_get_wd2_status(&present_status)) !=
1030 		PICL_SUCCESS) {
1031 		return (rc);
1032 	}
1033 
1034 	/* copy the present state in user buffer */
1035 	switch (present_status) {
1036 	case WD_ARMED:
1037 		(void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_ARMED,
1038 			PICL_PROPNAMELEN_MAX);
1039 		break;
1040 	case WD_EXPIRED:
1041 		(void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_EXPIRED,
1042 			PICL_PROPNAMELEN_MAX);
1043 		break;
1044 	case WD_DISARMED:
1045 		(void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_DISARMED,
1046 			PICL_PROPNAMELEN_MAX);
1047 		break;
1048 	}
1049 	return (PICL_SUCCESS);
1050 }
1051 
1052 /* this thread listens for watchdog expiry events */
1053 /*ARGSUSED*/
1054 static void *
wd_polling(void * args)1055 wd_polling(void *args)
1056 {
1057 	uint8_t	stat;
1058 	int poll_retval;
1059 	struct pollfd fds;
1060 	sc_rspmsg_t rsp_pkt;
1061 	int i;
1062 
1063 	fds.fd = polling_fd;
1064 	fds.events = POLLIN | POLLPRI;
1065 	fds.revents = 0;
1066 
1067 	for (;;) {
1068 		poll_retval = poll(&fds, 1, -1);
1069 		if (props_created == 0)
1070 			continue;
1071 		switch (poll_retval) {
1072 		case 0:
1073 		break;
1074 
1075 		case -1:
1076 			syslog(LOG_ERR, WD_PICL_POLL_ERR);
1077 		break;
1078 
1079 		default:
1080 		/* something happened */
1081 		if ((read(polling_fd, &rsp_pkt,
1082 			sizeof (sc_rspmsg_t))) < 0) {
1083 			syslog(LOG_ERR, WD_PICL_SMC_READ_ERR);
1084 			break;
1085 		}
1086 
1087 		if (rsp_pkt.hdr.cmd == SMC_EXPIRED_WATCHDOG_NOTIF) {
1088 
1089 			(void) mutex_lock(&data_lock);
1090 			stat = wd_data.wd1_run_state;
1091 			(void) mutex_unlock(&data_lock);
1092 
1093 			if (stat != WD_ARMED) {
1094 				continue;
1095 			}
1096 
1097 			wd_picl_update_state(WD1, WD_EXPIRED);
1098 
1099 			(void) mutex_lock(&patting_lock);
1100 			wd_data.wd_pat_state = WD_NORESET;
1101 			(void) cond_signal(&patting_cv);
1102 
1103 			(void) mutex_unlock(&patting_lock);
1104 			syslog(LOG_WARNING, WD_WD1_EXPIRED);
1105 			if (wd_debug & WD_TIME_DEBUG) {
1106 				syslog(LOG_ERR, " latest count : %d", count);
1107 				for (i = 0; i < NUMBER_OF_READINGS; i++) {
1108 					syslog(LOG_ERR, "i = %d, req_seq = %d,"
1109 					"res_seq = %d, time = %lld nsec",
1110 						i, time1[i].req_seq,
1111 						time1[i].res_seq,
1112 						time1[i].time);
1113 				}
1114 			}
1115 			if (wd_data.reboot_action) {
1116 				wd_data.reboot_action = 0;
1117 				(void) system(SHUTDOWN_CMD);
1118 			}
1119 		}
1120 		break;
1121 
1122 		} /* switch */
1123 	}
1124 	/*NOTREACHED*/
1125 	return (NULL);
1126 }
1127 
1128 /*
1129  * This function reads the hardware state and gets the status of
1130  * watchdog-timers
1131  */
1132 static int
wd_get_status(wd_state_t * state)1133 wd_get_status(wd_state_t *state)
1134 {
1135 	picl_errno_t	rc;
1136 	uchar_t		buffer[WD_REGISTER_LEN];
1137 
1138 	bzero(buffer, WD_REGISTER_LEN);
1139 	/* read watchdog registers */
1140 	if ((rc = wd_get_reg_dump(buffer)) != 0) {
1141 		return (rc);
1142 	}
1143 
1144 	/* get action */
1145 	state->action1 = buffer[1] & 0xF0; /* most significant 4 bits */
1146 	if (state->action1 == 0x0) {
1147 		state->action1 = WD_ACTION_NONE1;
1148 	}
1149 	state->action2 = buffer[1] & 0x0F; /* least significant 4 bits */
1150 	if (state->action2 == 0x0) {
1151 		state->action2 = WD_ACTION_NONE2;
1152 	}
1153 
1154 	state->timeout2 = buffer[2];
1155 	state->timeout1[0] = buffer[5];	/* MSB */
1156 	state->timeout1[1] = buffer[4];	/* LSB */
1157 
1158 	state->present_t1[0] = buffer[7]; /* MSB */
1159 	state->present_t1[1] = buffer[6]; /* LSB */
1160 
1161 	if (buffer[0] & WD_WD_RUNNING) {
1162 		state->present_state = WD_ARMED;
1163 		return (PICL_SUCCESS);
1164 	}
1165 
1166 	if (buffer[3] != 0) {
1167 		state->present_state = WD_EXPIRED;
1168 		return (PICL_SUCCESS);
1169 	} else {
1170 		state->present_state = WD_DISARMED;
1171 		return (PICL_SUCCESS);
1172 	}
1173 }
1174 
1175 /* read the smc hardware and intialize the internal state */
1176 static void
wd_set_init_state()1177 wd_set_init_state()
1178 {
1179 	wd_state_t state;
1180 	uint16_t tmp1, tmp2, wd_time1;
1181 
1182 	if (wd_get_status(&state) != PICL_SUCCESS) {
1183 		syslog(LOG_ERR, WD_PICL_GET_STAT_ERR);
1184 		/* defualt state is expired ??? */
1185 		state.present_state = WD_EXPIRED;
1186 	}
1187 
1188 	switch (state.present_state) {
1189 	case WD_EXPIRED:
1190 	case WD_DISARMED:
1191 		if (state.present_state == WD_EXPIRED)
1192 			wd_picl_update_state(WD1_2, WD_EXPIRED);
1193 		else
1194 			wd_picl_update_state(WD1_2, WD_DISARMED);
1195 		wd_data.wd_pat_state = WD_NORESET;
1196 		wd_data.wd1_action = state.action1;
1197 		wd_data.wd2_action = state.action2;
1198 		tmp1 = state.timeout1[0] << 8;
1199 		tmp2 = state.timeout1[1];
1200 		wd_time1 = tmp1 | tmp2;
1201 		wd_data.wd1_timeout = wd_time1 * WD_L1_RESOLUTION;
1202 		wd_data.wd2_timeout = state.timeout2 * WD_L2_RESOLUTION;
1203 		break;
1204 	case WD_ARMED:
1205 		/*
1206 		 * get the present values and restart the
1207 		 * watchdog from os level and continue to pat
1208 		 */
1209 		wd_picl_update_state(WD1_2, WD_ARMED);
1210 		wd_data.wd_pat_state = WD_RESET;
1211 		wd_data.wd1_action = (state.action1 << 4);
1212 		wd_data.wd2_action = state.action2;
1213 
1214 		tmp1 = state.timeout1[0] << 8;
1215 		tmp2 = state.timeout1[1];
1216 		wd_time1 = tmp1 | tmp2;
1217 		wd_data.wd1_timeout = wd_time1 * WD_L1_RESOLUTION;
1218 		wd_data.wd2_timeout = state.timeout2 * WD_L2_RESOLUTION;
1219 		(void) wd_stop();
1220 	}
1221 }
1222 
1223 /*
1224  * wrapper for ptree interface to create property
1225  */
1226 static int
wd_create_property(int ptype,int pmode,size_t psize,char * pname,int (* readfn)(ptree_rarg_t *,void *),int (* writefn)(ptree_warg_t *,const void *),picl_nodehdl_t nodeh,picl_prophdl_t * propp,void * vbuf)1227 wd_create_property(
1228 	int		ptype,		/* PICL property type */
1229 	int		pmode,		/* PICL access mode */
1230 	size_t		psize,		/* size of PICL property */
1231 	char		*pname,		/* property name */
1232 	int		(*readfn)(ptree_rarg_t *, void *),
1233 	int		(*writefn)(ptree_warg_t *, const void *),
1234 	picl_nodehdl_t	nodeh,		/* node for property */
1235 	picl_prophdl_t	*propp,		/* pointer to prop_handle */
1236 	void		*vbuf)		/* initial value */
1237 {
1238 	picl_errno_t		rc;
1239 	ptree_propinfo_t	propinfo;
1240 
1241 	rc = ptree_init_propinfo(&propinfo, PTREE_PROPINFO_VERSION,
1242 		ptype, pmode, psize, pname, readfn, writefn);
1243 	if (rc != PICL_SUCCESS) {
1244 		syslog(LOG_ERR, WD_PICL_PROP_INIT_ERR, rc);
1245 		return (rc);
1246 	}
1247 
1248 	rc = ptree_create_and_add_prop(nodeh, &propinfo, vbuf, propp);
1249 	if (rc != PICL_SUCCESS) {
1250 		return (rc);
1251 	}
1252 
1253 	return (PICL_SUCCESS);
1254 }
1255 
1256 /* Create and add Watchdog properties */
1257 static void
wd_create_add_props()1258 wd_create_add_props()
1259 {
1260 	int rc;
1261 	picl_nodehdl_t	rooth, sysmgmt_h, platformh;
1262 	int32_t	timeout1 = 0;
1263 	int32_t	timeout2 = 0;
1264 	char		buf[PICL_WD_PROPVAL_MAX];
1265 
1266 	/* get picl root node handle */
1267 	if ((rc = ptree_get_root(&rooth)) != PICL_SUCCESS) {
1268 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 1, rc);
1269 		return;
1270 	}
1271 
1272 	/* get picl platform node handle */
1273 	if ((rc = ptree_get_node_by_path(PLATFORM_PATH,
1274 		&platformh)) != PICL_SUCCESS) {
1275 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 2, rc);
1276 		return;
1277 	}
1278 
1279 	/* get the picl sysmgmt node handle */
1280 	if ((rc = ptree_find_node(platformh, PICL_PROP_NAME,
1281 		PICL_PTYPE_CHARSTRING,
1282 		PICL_NODE_SYSMGMT, strlen(PICL_NODE_SYSMGMT),
1283 		&sysmgmt_h)) != PICL_SUCCESS) {
1284 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 3, rc);
1285 		return;
1286 	}
1287 
1288 	/* start creating the watchdog nodes and properties */
1289 	if ((rc = ptree_create_and_add_node(sysmgmt_h, PICL_NODE_WD_CONTROLLER,
1290 		PICL_CLASS_WATCHDOG_CONTROLLER,
1291 		&(wd_data.wd_ctrl_nodehdl))) != PICL_SUCCESS) {
1292 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 4, rc);
1293 		return;
1294 	}
1295 
1296 	/* Add wd-op property to watchdog controller node */
1297 	(void) strncpy(buf, "", sizeof (buf));
1298 	if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING,
1299 		PICL_WRITE + PICL_VOLATILE,
1300 		PICL_PROPNAMELEN_MAX, PICL_PROP_WATCHDOG_OPERATION,
1301 		NULL, wd_write_op,
1302 		wd_data.wd_ctrl_nodehdl,
1303 		&(wd_data.wd_ops_hdl),
1304 		(void *)buf)) != PICL_SUCCESS) {
1305 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 5, rc);
1306 		return;
1307 	}
1308 
1309 	/* create L1 node and add to controller */
1310 	if ((rc = ptree_create_and_add_node(wd_data.wd_ctrl_nodehdl,
1311 		PICL_NODE_WD_L1, PICL_CLASS_WATCHDOG_TIMER,
1312 		&(wd_data.wd1_nodehdl))) != PICL_SUCCESS) {
1313 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 6, rc);
1314 		return;
1315 	}
1316 
1317 	/* create L2 node and add to controller */
1318 	if ((rc = ptree_create_and_add_node(wd_data.wd_ctrl_nodehdl,
1319 		PICL_NODE_WD_L2, PICL_CLASS_WATCHDOG_TIMER,
1320 		&(wd_data.wd2_nodehdl))) != PICL_SUCCESS) {
1321 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 7, rc);
1322 		return;
1323 	}
1324 
1325 	/* create watchdog properties */
1326 	/* create state property here */
1327 	(void) strncpy(buf, PICL_PROPVAL_WD_STATE_DISARMED,
1328 		sizeof (buf));
1329 	if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING,
1330 		PICL_READ + PICL_VOLATILE, PICL_PROPNAMELEN_MAX,
1331 		PICL_PROP_STATE, wd1_read_status, NULLWRITE,
1332 		wd_data.wd1_nodehdl,
1333 		&(wd_data.wd1_state_hdl), (void *)buf)) != PICL_SUCCESS) {
1334 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 8, rc);
1335 		return;
1336 	}
1337 
1338 	if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING,
1339 		PICL_READ + PICL_VOLATILE, PICL_PROPNAMELEN_MAX,
1340 		PICL_PROP_STATE, wd2_read_status, NULLWRITE,
1341 		wd_data.wd2_nodehdl,
1342 		&(wd_data.wd2_state_hdl), (void *)buf)) != PICL_SUCCESS) {
1343 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 9, rc);
1344 		return;
1345 	}
1346 
1347 	/* create timeout property here */
1348 	if ((rc = wd_create_property(PICL_PTYPE_UNSIGNED_INT,
1349 		PICL_READ + PICL_WRITE + PICL_VOLATILE,
1350 		sizeof (timeout1), PICL_PROP_WATCHDOG_TIMEOUT,
1351 		wd_read_timeout, wd_write_timeout, wd_data.wd1_nodehdl,
1352 		&(wd_data.wd1_timeout_hdl), (void *)&(timeout1))) !=
1353 		PICL_SUCCESS) {
1354 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 10, rc);
1355 		return;
1356 	}
1357 
1358 	if ((rc = wd_create_property(PICL_PTYPE_UNSIGNED_INT,
1359 		PICL_READ + PICL_WRITE + PICL_VOLATILE,
1360 		sizeof (wd_data.wd2_timeout), PICL_PROP_WATCHDOG_TIMEOUT,
1361 		wd_read_timeout, wd_write_timeout, wd_data.wd2_nodehdl,
1362 		&(wd_data.wd2_timeout_hdl), (void *)&(timeout2))) !=
1363 		PICL_SUCCESS) {
1364 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 11, rc);
1365 		return;
1366 	}
1367 
1368 	/* create wd_action property here */
1369 	(void) strncpy(buf, PICL_PROPVAL_WD_ACTION_NONE,
1370 		sizeof (buf));
1371 	if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING,
1372 		PICL_READ + PICL_WRITE + PICL_VOLATILE,
1373 		PICL_PROPNAMELEN_MAX, PICL_PROP_WATCHDOG_ACTION,
1374 		wd_read_action, wd_write_action,
1375 		wd_data.wd1_nodehdl, &(wd_data.wd1_action_hdl),
1376 		(void *)buf)) != PICL_SUCCESS) {
1377 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 12, rc);
1378 		return;
1379 	}
1380 
1381 	if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING,
1382 		PICL_READ + PICL_WRITE + PICL_VOLATILE,
1383 		PICL_PROPNAMELEN_MAX, PICL_PROP_WATCHDOG_ACTION,
1384 		wd_read_action, wd_write_action,
1385 		wd_data.wd2_nodehdl, &(wd_data.wd2_action_hdl),
1386 		(void *)buf)) != PICL_SUCCESS) {
1387 		syslog(LOG_ERR, WD_NODE_INIT_ERR, 13, rc);
1388 		return;
1389 	}
1390 }
1391 
1392 static int
wd_ioctl(int fd,int cmd,int len,char * buf)1393 wd_ioctl(int fd, int cmd, int len, char *buf)
1394 {
1395 	int rtnval;
1396 	struct strioctl sioc;
1397 	sioc.ic_cmd = cmd;
1398 	sioc.ic_timout = 60;
1399 	sioc.ic_len = len;
1400 	sioc.ic_dp = buf;
1401 	rtnval = ioctl(fd, I_STR, &sioc);
1402 	return (rtnval);
1403 }
1404 
1405 static int
wd_open(int attr)1406 wd_open(int attr)
1407 {
1408 	int cc;
1409 	sc_cmdspec_t wd_cmdspec;
1410 
1411 	if ((wd_fd = open(SMC_NODE, attr)) < 0) {
1412 		return (-1);
1413 	}
1414 
1415 	/* get exclusive access for set and reset commands of watchdog */
1416 	wd_cmdspec.args[0] = SMC_SET_WATCHDOG_TIMER;
1417 	wd_cmdspec.args[1] = SMC_RESET_WATCHDOG_TIMER;
1418 	wd_cmdspec.attribute = SC_ATTR_EXCLUSIVE;
1419 
1420 	cc = wd_ioctl(wd_fd, SCIOC_MSG_SPEC, 3,
1421 		(char *)&wd_cmdspec);
1422 	if (cc < 0) {
1423 		syslog(LOG_ERR, WD_PICL_EXCLUSIVE_ACCESS_ERR);
1424 		return (-1);
1425 	}
1426 	return (wd_fd);
1427 }
1428 
1429 static int
wd_open_pollfd(int attr)1430 wd_open_pollfd(int attr)
1431 {
1432 	int cc;
1433 	sc_cmdspec_t wd_cmdspec;
1434 
1435 	if ((polling_fd = open(SMC_NODE, attr)) < 0) {
1436 		return (-1);
1437 	}
1438 
1439 	/* request for watchdog expiry notification	*/
1440 	wd_cmdspec.args[0] = SMC_EXPIRED_WATCHDOG_NOTIF;
1441 	wd_cmdspec.attribute = SC_ATTR_EXCLUSIVE;
1442 
1443 	cc = wd_ioctl(polling_fd, SCIOC_MSG_SPEC, 2,
1444 		(char *)&wd_cmdspec);
1445 	if (cc < 0) {
1446 		syslog(LOG_ERR, WD_PICL_SET_ATTR_FAILED);
1447 		return (-1);
1448 	}
1449 	return (polling_fd);
1450 }
1451 
1452 /* read the ENVIRONMENT variables and initialize tunables */
1453 static void
wd_get_env()1454 wd_get_env()
1455 {
1456 	char *val;
1457 	int intval = 0;
1458 
1459 	/* read frutree debug flag value */
1460 	if (val = getenv(WATCHDOG_DEBUG)) {
1461 		errno = 0;
1462 		intval = strtol(val, (char **)NULL, 0);
1463 		if (errno == 0) {
1464 			wd_debug = intval;
1465 		}
1466 	}
1467 }
1468 
1469 /*
1470  * PTREE Entry Points
1471  */
1472 
1473 /* picl-state-change event handler */
1474 /*ARGSUSED*/
1475 static void
wd_state_change_evhandler(const char * ename,const void * earg,size_t size,void * cookie)1476 wd_state_change_evhandler(const char *ename, const void *earg,
1477 			size_t size, void *cookie)
1478 {
1479 	char 		*value;
1480 	picl_errno_t	rc;
1481 	nvlist_t	*nvlp;
1482 	picl_nodehdl_t  fruhdl;
1483 	static 		int spawn_threads = 1;
1484 	char 		name[PICL_PROPNAMELEN_MAX];
1485 
1486 	if (strcmp(ename, PICLEVENT_STATE_CHANGE)) {
1487 		return;
1488 	}
1489 
1490 	/* neglect all events if wd props are already created */
1491 	if (props_created && state_configured) {
1492 		return;
1493 	}
1494 
1495 	if (nvlist_unpack((char *)earg, size, &nvlp, NULL)) {
1496 		return;
1497 	}
1498 	if ((nvlist_lookup_uint64(nvlp, PICLEVENTARG_NODEHANDLE,
1499 		&fruhdl)) == -1) {
1500 		nvlist_free(nvlp);
1501 		return;
1502 	}
1503 	if (nvlist_lookup_string(nvlp, PICLEVENTARG_STATE, &value)) {
1504 		nvlist_free(nvlp);
1505 		return;
1506 	}
1507 
1508 	rc = ptree_get_propval_by_name(fruhdl, PICL_PROP_NAME,
1509 		(void *)name, sizeof (name));
1510 	if (rc != PICL_SUCCESS) {
1511 		nvlist_free(nvlp);
1512 		return;
1513 	}
1514 
1515 	/* care for only events on chassis node */
1516 	if (strcmp(name, PICL_NODE_CHASSIS) != 0) {
1517 		nvlist_free(nvlp);
1518 		return;
1519 	}
1520 
1521 	if (strcmp(value, PICLEVENTARGVAL_CONFIGURED) == 0) {
1522 		state_configured = 1;
1523 		nvlist_free(nvlp);
1524 		return;
1525 	}
1526 
1527 	if (strcmp(value, PICLEVENTARGVAL_CONFIGURING) != 0) {
1528 		nvlist_free(nvlp);
1529 		return;
1530 	}
1531 
1532 	if (wd_fd < 0) {
1533 		if ((wd_fd = wd_open(O_RDWR))  < 0) {
1534 			syslog(LOG_CRIT, WD_PICL_SMC_OPEN_ERR);
1535 			nvlist_free(nvlp);
1536 			return;
1537 		}
1538 	}
1539 
1540 	if (polling_fd < 0) {
1541 		if ((polling_fd = wd_open_pollfd(O_RDWR))  < 0) {
1542 			syslog(LOG_CRIT, WD_PICL_SMC_OPEN_ERR);
1543 			nvlist_free(nvlp);
1544 			return;
1545 		}
1546 	}
1547 
1548 	switch (wd_get_chassis_type()) {
1549 		case WD_HOST: /* is host */
1550 			wd_data.is_host = B_TRUE;
1551 			break;
1552 		case WD_STANDALONE: /* is satellite */
1553 			wd_data.is_host = B_FALSE;
1554 			break;
1555 		default:
1556 			nvlist_free(nvlp);
1557 			return;
1558 	}
1559 
1560 	(void) wd_create_add_props(); /* create and add properties */
1561 	props_created = 1;
1562 
1563 	/* read the hardware and initialize values */
1564 	(void) wd_set_init_state();
1565 
1566 	/* initialize wd-conf value */
1567 	(void) snprintf(wd_conf, sizeof (wd_conf), "%s/%s",
1568 		PICL_CONFIG_DIR, WD_CONF_FILE);
1569 
1570 	if (spawn_threads == 0) {
1571 		/* threads are already created */
1572 		nvlist_free(nvlp);
1573 		return;
1574 	}
1575 
1576 	/* start monitoring for the events */
1577 	if (thr_create(NULL,  NULL,  wd_polling,
1578 		NULL,  THR_BOUND, &polling_thr_tid) != 0) {
1579 		syslog(LOG_ERR, WD_PICL_THREAD_CREATE_FAILED,
1580 			"polling");
1581 		nvlist_free(nvlp);
1582 		return;
1583 	}
1584 
1585 	/* thread used to pat watchdog */
1586 	if (thr_create(NULL,  NULL,  wd_patting_thread,
1587 		NULL,  THR_BOUND, &patting_thr_tid) != 0) {
1588 		syslog(LOG_ERR, WD_PICL_THREAD_CREATE_FAILED,
1589 			"patting");
1590 		nvlist_free(nvlp);
1591 		return;
1592 	}
1593 	spawn_threads = 0;
1594 	nvlist_free(nvlp);
1595 }
1596 
1597 static void
wd_picl_register(void)1598 wd_picl_register(void)
1599 {
1600 	int rc = 0;
1601 	if ((rc = picld_plugin_register(&wd_reg_info)) != PICL_SUCCESS) {
1602 		syslog(LOG_ERR, WD_PICL_REG_ERR, rc);
1603 	}
1604 }
1605 
1606 /* entry point (initialization) */
1607 static void
wd_picl_init(void)1608 wd_picl_init(void)
1609 {
1610 	/* initialize the wd_conf path and name */
1611 	(void) snprintf(wd_conf, sizeof (wd_conf), "%s/%s",
1612 		PICL_CONFIG_DIR, WD_CONF_FILE);
1613 
1614 	/* parse configuration file and set tunables */
1615 	wd_parse_config_file(wd_conf);
1616 
1617 	/* if watchdog-enable is set to false dont intialize wd subsystem */
1618 	if (wd_enable == 0) {
1619 		return;
1620 	}
1621 
1622 	/* read watchdog related environment variables */
1623 	wd_get_env();
1624 
1625 	/* event handler for state change notifications from frutree */
1626 	(void) ptree_register_handler(PICLEVENT_STATE_CHANGE,
1627 		wd_state_change_evhandler, NULL);
1628 }
1629 
1630 static void
wd_picl_fini(void)1631 wd_picl_fini(void)
1632 {
1633 	(void) ptree_unregister_handler(PICLEVENT_STATE_CHANGE,
1634 		wd_state_change_evhandler, NULL);
1635 
1636 	state_configured = 0;	/* chassis state */
1637 	props_created = 0;
1638 	(void) ptree_delete_node(wd_data.wd_ctrl_nodehdl);
1639 	(void) ptree_destroy_node(wd_data.wd_ctrl_nodehdl);
1640 }
1641 
1642 /*
1643  * volatile function to read the timeout
1644  */
1645 static int
wd_read_timeout(ptree_rarg_t * parg,void * buf)1646 wd_read_timeout(ptree_rarg_t *parg, void *buf)
1647 {
1648 	/* update the buffer provided by user */
1649 	(void) mutex_lock(&data_lock);
1650 	if (parg->proph == wd_data.wd1_timeout_hdl) {
1651 		*(int32_t *)buf = wd_data.wd1_timeout;
1652 	} else if (parg->proph == wd_data.wd2_timeout_hdl) {
1653 		*(int32_t *)buf = wd_data.wd2_timeout;
1654 	}
1655 	(void) mutex_unlock(&data_lock);
1656 	return (PICL_SUCCESS);
1657 }
1658 
1659 /*
1660  * volatile function to read the action
1661  */
1662 static int
wd_read_action(ptree_rarg_t * parg,void * buf)1663 wd_read_action(ptree_rarg_t *parg, void *buf)
1664 {
1665 	(void) mutex_lock(&data_lock);
1666 	if (parg->proph == wd_data.wd1_action_hdl) {
1667 		switch (wd_data.wd1_action) {
1668 		case WD_ACTION_HEALTHY_DOWN_HOST:
1669 		case WD_ACTION_HEALTHY_DOWN_SAT:
1670 		(void) strcpy((char *)buf,
1671 			PICL_PROPVAL_WD_ACTION_ALARM);
1672 		break;
1673 		case WD_ACTION_NONE1:
1674 		case WD_ACTION_NONE2:
1675 		if (wd_data.reboot_action == 1) {
1676 			(void) strcpy((char *)buf,
1677 				PICL_PROPVAL_WD_ACTION_REBOOT);
1678 		} else {
1679 			(void) strcpy((char *)buf,
1680 				PICL_PROPVAL_WD_ACTION_NONE);
1681 		}
1682 		break;
1683 		}
1684 	} else if (parg->proph == wd_data.wd2_action_hdl) {
1685 		switch (wd_data.wd2_action) {
1686 		case WD_ACTION_HARD_RESET:
1687 		(void) strcpy((char *)buf,
1688 			PICL_PROPVAL_WD_ACTION_RESET);
1689 		break;
1690 		case WD_ACTION_NONE2:
1691 		(void) strcpy((char *)buf, PICL_PROPVAL_WD_ACTION_NONE);
1692 		break;
1693 		}
1694 	}
1695 	(void) mutex_unlock(&data_lock);
1696 	return (PICL_SUCCESS);
1697 }
1698 
1699 /*
1700  * volatile function to write the action
1701  * this function validates the user value before programming the
1702  * action property. Properties can be modified only when watchdog
1703  * is in disarmed state.
1704  */
1705 static int
wd_write_action(ptree_warg_t * parg,const void * buf)1706 wd_write_action(ptree_warg_t *parg, const void *buf)
1707 {
1708 	int flag = 0x0;
1709 	picl_errno_t rc = PICL_SUCCESS;
1710 	char wd_action[PICL_WD_PROPVAL_MAX];
1711 
1712 	/* only super user can write this property */
1713 	if (parg->cred.dc_euid != SUPER_USER) {
1714 		return (PICL_PERMDENIED);
1715 	}
1716 
1717 	if (parg->proph == wd_data.wd1_action_hdl) {
1718 		flag = WD1;
1719 	} else if (parg->proph == wd_data.wd2_action_hdl) {
1720 		flag = WD2;
1721 	}
1722 
1723 	/* dont allow any write operations when watchdog is armed */
1724 	(void) mutex_lock(&data_lock);
1725 	if (wd_data.wd1_run_state != WD_DISARMED ||
1726 		wd_data.wd2_run_state != WD_DISARMED) {
1727 		(void) mutex_unlock(&data_lock);
1728 		return (PICL_PERMDENIED);
1729 	}
1730 
1731 	/* validate the values and store in internal cache */
1732 	(void) strcpy(wd_action, (char *)buf);
1733 	switch (flag) {
1734 	case WD1:
1735 	if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_ALARM) == 0) {
1736 		if (wd_data.is_host)
1737 			wd_data.wd1_action = WD_ACTION_HEALTHY_DOWN_HOST;
1738 		else
1739 			wd_data.wd1_action = WD_ACTION_HEALTHY_DOWN_SAT;
1740 		wd_data.reboot_action = 0;
1741 	} else if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_NONE) == 0) {
1742 		wd_data.wd1_action = WD_ACTION_NONE1;
1743 		wd_data.reboot_action = 0;
1744 	} else if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_REBOOT) == 0) {
1745 		wd_data.wd1_action = WD_ACTION_NONE1;
1746 		wd_data.reboot_action = 1;
1747 	} else {
1748 		rc = PICL_INVALIDARG;
1749 	}
1750 	break;
1751 
1752 	case WD2:
1753 	if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_RESET) == 0) {
1754 		wd_data.wd2_action = WD_ACTION_HARD_RESET;
1755 	} else if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_NONE) == 0) {
1756 		wd_data.wd2_action = WD_ACTION_NONE2;
1757 	} else {
1758 		rc = PICL_INVALIDARG;
1759 	}
1760 	break;
1761 	}
1762 	(void) mutex_unlock(&data_lock);
1763 	return (rc);
1764 }
1765 
1766 /*
1767  * volatile function to write the timeout
1768  * this function validates the user value before programming the
1769  * timeout property. Properties can be modified only when watchdog
1770  * is in disarmed state.
1771  */
1772 static int
wd_write_timeout(ptree_warg_t * parg,const void * buf)1773 wd_write_timeout(ptree_warg_t *parg, const void *buf)
1774 {
1775 	int32_t timeout;
1776 	int flag = 0x0;
1777 
1778 	/* only super user can write this property */
1779 	if (parg->cred.dc_euid != SUPER_USER) {
1780 		return (PICL_PERMDENIED);
1781 	}
1782 
1783 	/* dont allow any write operations when watchdog is armed */
1784 	(void) mutex_lock(&data_lock);
1785 	if (wd_data.wd1_run_state != WD_DISARMED ||
1786 		wd_data.wd2_run_state != WD_DISARMED) {
1787 		(void) mutex_unlock(&data_lock);
1788 		return (PICL_PERMDENIED);
1789 	}
1790 	(void) mutex_unlock(&data_lock);
1791 
1792 	if (parg->proph == wd_data.wd1_timeout_hdl) {
1793 		flag = WD1;
1794 	} else if (parg->proph == wd_data.wd2_timeout_hdl) {
1795 		flag = WD2;
1796 	}
1797 
1798 	/* validate the timeout values */
1799 	timeout = *(int32_t *)buf;
1800 	if (timeout < -1) {
1801 		return (PICL_INVALIDARG);
1802 	}
1803 
1804 	if (timeout > 0) {
1805 		switch (flag) {
1806 		case WD1:
1807 		if ((timeout % WD_L1_RESOLUTION) != 0) {
1808 			return (PICL_INVALIDARG);
1809 		}
1810 		if ((timeout/WD_L1_RESOLUTION) > WD_MAX_L1) {
1811 			return (PICL_INVALIDARG);
1812 		}
1813 		break;
1814 		case WD2:
1815 		if ((timeout % WD_L2_RESOLUTION) != 0) {
1816 			return (PICL_INVALIDARG);
1817 		}
1818 		if ((timeout/WD_L2_RESOLUTION) > WD_MAX_L2) {
1819 							/* 255 sec */
1820 			return (PICL_INVALIDARG);
1821 		}
1822 		}
1823 	}
1824 
1825 	/* update the internal cache */
1826 	(void) mutex_lock(&data_lock);
1827 	switch (flag) {
1828 	case WD1:
1829 		wd_data.wd1_timeout = timeout;
1830 		break;
1831 	case WD2:
1832 		wd_data.wd2_timeout = timeout;
1833 		break;
1834 	}
1835 	(void) mutex_unlock(&data_lock);
1836 	return (PICL_SUCCESS);
1837 }
1838