1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This module is used to monitor and control watchdog timer for 31 * UltraSPARC-IIi CPU in Snowbird 32 */ 33 34 #include <stdio.h> 35 #include <fcntl.h> 36 #include <unistd.h> 37 #include <stdlib.h> 38 #include <stdarg.h> 39 #include <strings.h> 40 #include <string.h> 41 #include <ctype.h> 42 #include <alloca.h> 43 #include <limits.h> 44 #include <sys/types.h> 45 #include <sys/stat.h> 46 #include <libintl.h> 47 #include <syslog.h> 48 #include <locale.h> 49 #include <picl.h> 50 #include <picltree.h> 51 #include <libnvpair.h> 52 #include <poll.h> 53 #include <errno.h> 54 #include <syslog.h> 55 #include <sys/priocntl.h> 56 #include <sys/rtpriocntl.h> 57 #include <sys/tspriocntl.h> 58 #include <sys/fsspriocntl.h> 59 #include <stropts.h> 60 #include <synch.h> 61 #include <signal.h> 62 #include <thread.h> 63 #include <picldefs.h> 64 #include <smclib.h> 65 #include "piclwatchdog.h" 66 67 #pragma init(wd_picl_register) /* init section */ 68 69 /* debug variables */ 70 static int wd_debug = 0; 71 static hrtime_t start1, end1; 72 static int count = 0; 73 typedef struct { /* used to keep track of time taken for last 5 pats */ 74 int res_seq; 75 int req_seq; 76 int64_t time; 77 } wd_time_t; 78 79 #define NUMBER_OF_READINGS 5 80 static wd_time_t time1[NUMBER_OF_READINGS]; 81 82 /* global declarations */ 83 static int wd_fd = -1; /* fd used to send watchdog commands */ 84 static int polling_fd = -1; /* polling thread that snoops for events */ 85 static int wd_enable = 1; 86 static int state_configured = 0; /* chassis state */ 87 static int props_created = 0; 88 static int wd_pat_thr_priority = -1; 89 static pid_t pid = -1; /* PID that owns watchdog services */ 90 static cond_t patting_cv; 91 static mutex_t data_lock; 92 static mutex_t patting_lock; 93 static int32_t pat_time = 0; 94 static thread_t polling_thr_tid; 95 static thread_t patting_thr_tid; 96 static wd_data_t wd_data; 97 static char wd_conf[MAXPATHLEN]; 98 99 #define NULLREAD (int (*)(ptree_rarg_t *, void *))0 100 #define NULLWRITE (int (*)(ptree_warg_t *, const void *))0 101 102 /* ptree interface */ 103 static void wd_picl_register(void); 104 static void wd_picl_init(void); 105 static void wd_picl_fini(void); 106 static void wd_state_change_evhandler(const char *, 107 const void *, size_t, void *); 108 109 /* local functions */ 110 static int wd_write_timeout(ptree_warg_t *, const void *); 111 static int wd_write_action(ptree_warg_t *, const void *); 112 static int wd_read_action(ptree_rarg_t *, void *); 113 static int wd_read_timeout(ptree_rarg_t *, void *); 114 extern char *strtok_r(char *s1, const char *s2, char **lasts); 115 extern int wd_get_chassis_type(); 116 117 static picld_plugin_reg_t wd_reg_info = { 118 PICLD_PLUGIN_VERSION_1, 119 PICLD_PLUGIN_CRITICAL, 120 "SUNW_picl_watchdog", 121 wd_picl_init, 122 wd_picl_fini, 123 }; 124 125 /* 126 * This function parses wd.conf file to set the tunables 127 * tunables at present: patting thread priority, pat time, wd_enable 128 */ 129 static void 130 wd_parse_config_file(char *wd_conf) 131 { 132 FILE *fp; 133 char buf[WD_CONF_MAXSIZE]; 134 char *token, *last, *value; 135 136 if ((fp = fopen(wd_conf, "r")) == NULL) { 137 return; 138 } 139 140 while (fgets(buf, sizeof (buf), fp) != NULL) { 141 if (buf[0] == '\0' || buf[0] == '#') { 142 continue; 143 } 144 token = last = value = NULL; 145 value = (char *)strtok_r((char *)buf, WD_DELIMETER, &last); 146 if (last) { 147 token = (char *)strtok_r(last, WD_DELIMETER, &last); 148 } else { 149 continue; 150 } 151 152 if (value == NULL || token == NULL) { 153 continue; 154 } 155 if (strcmp(token, WD_PAT_THREAD_PRIORITY) == 0) { 156 wd_pat_thr_priority = strtol(value, 157 (char **)NULL, 10); 158 } else if (strcmp(token, WD_PATTING_TIME) == 0) { 159 errno = 0; 160 pat_time = strtol(value, (char **)NULL, 10); 161 if (errno != 0) { 162 pat_time = 0; 163 } 164 } else if (strcmp(token, WD_ENABLE) == 0) { 165 if (strcmp(value, "false") == 0) { 166 wd_enable = 0; 167 } 168 } else { /* unknown token */ 169 continue; 170 } 171 } 172 (void) fclose(fp); 173 } 174 175 /* 176 * read the SMC watchdog registers 177 */ 178 static int 179 wd_get_reg_dump(uint8_t buffer[]) 180 { 181 int rc = 0, i; 182 sc_reqmsg_t req_pkt; 183 sc_rspmsg_t rsp_pkt; 184 185 /* initialize the request packet */ 186 (void) smc_init_smc_msg(&req_pkt, SMC_GET_WATCHDOG_TIMER, 187 DEFAULT_SEQN, 0); 188 189 /* make a call to smc library to send cmd */ 190 if ((rc = smc_send_msg(DEFAULT_FD, &req_pkt, &rsp_pkt, 191 WD_POLL_TIMEOUT)) != SMC_SUCCESS) { 192 WD_DEBUG1(WD_PICL_GET_ERR, rc); 193 return (PICL_FAILURE); 194 } 195 196 /* read 8 bytes */ 197 bzero(buffer, WD_REGISTER_LEN); 198 for (i = 0; i < WD_REGISTER_LEN; i++) { 199 buffer[i] = rsp_pkt.data[i]; 200 } 201 return (PICL_SUCCESS); 202 } 203 204 /* 205 * get the HEALTHY# line state 206 * Return -1 for Error 207 * 0 for HEALTHY# down 208 * 1 for HEALTHY# up 209 */ 210 static int 211 wd_get_healthy_status() 212 { 213 sc_reqmsg_t req_pkt; 214 sc_rspmsg_t rsp_pkt; 215 216 /* initialize the request packet */ 217 (void) smc_init_smc_msg(&req_pkt, SMC_GET_EXECUTION_STATE, 218 DEFAULT_SEQN, 0); 219 220 /* make a call to smc library to send cmd */ 221 if (smc_send_msg(DEFAULT_FD, &req_pkt, &rsp_pkt, 222 WD_POLL_TIMEOUT) != SMC_SUCCESS) { 223 return (-1); 224 } 225 226 return ((rsp_pkt.data[0] & IS_HEALTHY) ? WD_HEALTHY_UP : 227 WD_HEALTHY_DOWN); 228 } 229 230 /*ARGSUSED*/ 231 static void 232 event_completion_handler(char *ename, void *earg, size_t size) 233 { 234 free(ename); 235 free(earg); 236 } 237 238 /* 239 * posts picl-state-change event if there is change in watchdog-timer state 240 */ 241 static picl_errno_t 242 post_wd_state_event(picl_nodehdl_t nodeh, char *state) 243 { 244 nvlist_t *nvl; 245 size_t nvl_size; 246 char *pack_buf = NULL; 247 picl_errno_t rc; 248 char *ename = PICLEVENT_STATE_CHANGE, *evname = NULL; 249 250 if (state == NULL) { 251 return (PICL_FAILURE); 252 } 253 254 if ((evname = strdup(ename)) == NULL) { 255 return (PICL_NOSPACE); 256 } 257 258 if ((rc = nvlist_alloc(&nvl, NV_UNIQUE_NAME_TYPE, NULL)) != 0) { 259 free(evname); 260 syslog(LOG_ERR, WD_NVLIST_ERR, rc); 261 return (PICL_FAILURE); 262 } 263 264 if ((rc = nvlist_add_uint64(nvl, PICLEVENTARG_NODEHANDLE, 265 nodeh)) != 0) { 266 nvlist_free(nvl); 267 free(evname); 268 syslog(LOG_ERR, WD_NVLIST_ERR, rc); 269 return (PICL_FAILURE); 270 } 271 272 if ((rc = nvlist_add_string(nvl, PICLEVENTARG_STATE, 273 state)) != 0) { 274 nvlist_free(nvl); 275 free(evname); 276 syslog(LOG_ERR, WD_NVLIST_ERR, rc); 277 return (PICL_FAILURE); 278 } 279 280 if ((rc = nvlist_pack(nvl, &pack_buf, &nvl_size, NV_ENCODE_NATIVE, 281 NULL)) != 0) { 282 nvlist_free(nvl); 283 free(evname); 284 syslog(LOG_ERR, WD_NVLIST_ERR, rc); 285 return (PICL_FAILURE); 286 } 287 288 if ((rc = ptree_post_event(evname, pack_buf, nvl_size, 289 event_completion_handler)) != PICL_SUCCESS) { 290 free(pack_buf); 291 free(evname); 292 nvlist_free(nvl); 293 return (rc); 294 } 295 nvlist_free(nvl); 296 return (PICL_SUCCESS); 297 } 298 299 /* 300 * Updates the State value in picl tree and posts a state-change event 301 */ 302 static void 303 wd_picl_update_state(int level, uint8_t stat) 304 { 305 picl_errno_t rc; 306 char state[PICL_PROPNAMELEN_MAX]; 307 308 switch (stat) { 309 case WD_ARMED: 310 (void) strncpy(state, PICL_PROPVAL_WD_STATE_ARMED, 311 sizeof (state)); 312 break; 313 case WD_DISARMED: 314 (void) strncpy(state, PICL_PROPVAL_WD_STATE_DISARMED, 315 sizeof (state)); 316 break; 317 case WD_EXPIRED: 318 (void) strncpy(state, PICL_PROPVAL_WD_STATE_EXPIRED, 319 sizeof (state)); 320 break; 321 default: 322 return; 323 } 324 325 (void) mutex_lock(&data_lock); 326 switch (level) { 327 case WD1: 328 wd_data.wd1_run_state = stat; 329 break; 330 case WD2: 331 wd_data.wd2_run_state = stat; 332 break; 333 case WD1_2: 334 wd_data.wd1_run_state = stat; 335 wd_data.wd2_run_state = stat; 336 break; 337 default: 338 return; 339 } 340 (void) mutex_unlock(&data_lock); 341 342 if (!state_configured) { 343 return; 344 } 345 346 switch (level) { 347 case WD1: 348 if ((rc = post_wd_state_event(wd_data.wd1_nodehdl, 349 state)) != PICL_SUCCESS) { 350 syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc); 351 } 352 break; 353 case WD2: 354 if ((rc = post_wd_state_event(wd_data.wd2_nodehdl, 355 state)) != PICL_SUCCESS) { 356 syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc); 357 } 358 break; 359 360 case WD1_2: 361 if ((rc = post_wd_state_event(wd_data.wd1_nodehdl, 362 state)) != PICL_SUCCESS) { 363 syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc); 364 } 365 if ((rc = post_wd_state_event(wd_data.wd2_nodehdl, 366 state)) != PICL_SUCCESS) { 367 syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc); 368 } 369 break; 370 } 371 } 372 373 /* 374 * Sends a command to SMC to reset the watchdog-timers 375 */ 376 static int 377 wd_pat() 378 { 379 int rc = 0; 380 static uint8_t seq = 1; 381 sc_reqmsg_t req_pkt; 382 sc_rspmsg_t rsp_pkt; 383 384 if (seq < WD_MAX_SEQN) { 385 req_pkt.hdr.msg_id = seq++; 386 } else { 387 seq = 1; 388 req_pkt.hdr.msg_id = seq; 389 } 390 391 if (wd_debug & WD_TIME_DEBUG) { 392 start1 = gethrtime(); 393 } 394 395 /* initialize the request packet */ 396 (void) smc_init_smc_msg(&req_pkt, SMC_RESET_WATCHDOG_TIMER, 397 DEFAULT_SEQN, 0); 398 399 /* make a call to smc library to send cmd */ 400 if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt, 401 WD_POLL_TIMEOUT)) != SMC_SUCCESS) { 402 syslog(LOG_CRIT, WD_PICL_PAT_ERR, rc); 403 return (PICL_FAILURE); 404 } 405 406 if (wd_debug & WD_TIME_DEBUG) { 407 end1 = gethrtime(); 408 time1[count].res_seq = SC_MSG_ID(&rsp_pkt); 409 time1[count].req_seq = SC_MSG_ID(&req_pkt); 410 time1[count].time = (end1 - start1); 411 412 if (count < (NUMBER_OF_READINGS - 1)) { 413 count++; 414 } else { 415 count = 0; 416 } 417 } 418 return (PICL_SUCCESS); 419 } 420 421 /* used to set the new values for watchdog and start the watchdog */ 422 static int 423 wd_start(uchar_t action_1, uchar_t action_2, 424 uchar_t timeout_2, uchar_t *timeout_1, uint8_t patting_option) 425 { 426 int rc = 0; 427 sc_reqmsg_t req_pkt; 428 sc_rspmsg_t rsp_pkt; 429 430 if (timeout_1 == NULL) { 431 return (PICL_FAILURE); 432 } 433 434 req_pkt.data[0] = WD_USEFLAG_OS; 435 req_pkt.data[1] = action_1 | action_2; /* actions */ 436 req_pkt.data[2] = timeout_2; /* wd timeout 2 */ 437 req_pkt.data[3] = WD_XPR_FLG_CLR_OS; /* expiration flags */ 438 req_pkt.data[4] = timeout_1[1]; /* LSB for wd timeout 1 */ 439 req_pkt.data[5] = timeout_1[0]; /* MSB for wd timeout 1 */ 440 441 if (patting_option == ENABLE_AUTO_PAT) { 442 req_pkt.data[0] |= WD_ENABLE_AUTO_PAT; 443 } 444 445 /* initialize the request packet */ 446 (void) smc_init_smc_msg(&req_pkt, SMC_SET_WATCHDOG_TIMER, 447 DEFAULT_SEQN, WD_SET_CMD_DATA_LEN); 448 449 /* make a call to smc library to send cmd */ 450 if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt, 451 WD_POLL_TIMEOUT)) != SMC_SUCCESS) { 452 WD_DEBUG1(WD_PICL_START_ERR, rc); 453 return (PICL_FAILURE); 454 } 455 456 /* reset the watchdog timer */ 457 (void) smc_init_smc_msg(&req_pkt, SMC_RESET_WATCHDOG_TIMER, 458 DEFAULT_SEQN, 0); 459 if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt, 460 WD_POLL_TIMEOUT)) != SMC_SUCCESS) { 461 WD_DEBUG1(WD_PICL_START_ERR, rc); 462 return (PICL_FAILURE); 463 } 464 return (PICL_SUCCESS); 465 } 466 467 /* 468 * Validates timeout and action fields and arms the watchdog-timers 469 */ 470 static int 471 wd_arm(uint8_t patting_option) 472 { 473 int rc; 474 uint16_t wd_time1; 475 uint8_t wd_time2, wd1_action, wd2_action; 476 uint8_t timeout1[2]; 477 478 if (wd_data.wd1_timeout >= 0) { 479 wd_time1 = wd_data.wd1_timeout/WD_L1_RESOLUTION; 480 } else { 481 wd_time1 = 0; 482 } 483 484 if (wd_data.wd2_timeout >= 0) { 485 wd_time2 = wd_data.wd2_timeout/WD_L2_RESOLUTION; 486 } else { 487 wd_time2 = 0; 488 } 489 490 timeout1[0] = wd_time1 >> 8; /* MSB */ 491 timeout1[1] = wd_time1 & 0x00ff; /* LSB */ 492 493 /* check the HELATHY# status if action is alarm */ 494 if (wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_HOST || 495 wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_SAT) { 496 rc = wd_get_healthy_status(); 497 if (rc == WD_HEALTHY_DOWN) { 498 WD_DEBUG0(WD_HEALTHY_ERR); 499 return (PICL_FAILURE); 500 } else if (rc == -1) { 501 syslog(LOG_ERR, WD_GET_HEALTH_ERR); 502 return (PICL_FAILURE); 503 } 504 } 505 506 if (wd_data.wd1_timeout == -1) { 507 wd1_action = WD_ACTION_NONE2; 508 } else { 509 wd1_action = wd_data.wd1_action; 510 } 511 512 if (wd_data.wd2_timeout == -1) { 513 wd2_action = WD_ACTION_NONE2; 514 } else { 515 wd2_action = wd_data.wd2_action; 516 } 517 518 rc = wd_start(wd1_action, wd2_action, 519 wd_time2, timeout1, patting_option); 520 return (rc); 521 } 522 523 /* 524 * This is thread is a RealTime class thread. This thread pats the 525 * watchdog-timers in regular intervals before the expiry. 526 */ 527 /*ARGSUSED*/ 528 static void * 529 wd_patting_thread(void *args) 530 { 531 time_t sec; 532 pcinfo_t pci; 533 long nano_sec; 534 timestruc_t to; 535 long sleep_time; 536 struct timeval tp; 537 int err, state; 538 539 for (;;) { 540 (void) mutex_lock(&patting_lock); 541 while (wd_data.wd_pat_state == WD_NORESET) { 542 (void) cond_wait(&patting_cv, &patting_lock); 543 } 544 (void) mutex_unlock(&patting_lock); 545 546 /* reset pat-time to zero */ 547 pat_time = 0; /* tunable */ 548 wd_parse_config_file(wd_conf); 549 550 if (wd_pat_thr_priority < 0 || wd_pat_thr_priority > 59) { 551 wd_pat_thr_priority = WD_DEFAULT_THREAD_PRIORITY; 552 } 553 554 /* change the priority of thread to realtime class */ 555 (void) strncpy(pci.pc_clname, "RT", sizeof (pci.pc_clname)); 556 if (priocntl(P_LWPID, P_MYID, PC_GETCID, (caddr_t)&pci) != -1) { 557 pcparms_t pcp; 558 rtparms_t *rtp = (rtparms_t *)pcp.pc_clparms; 559 rtp->rt_pri = wd_pat_thr_priority; 560 rtp->rt_tqsecs = 0; 561 rtp->rt_tqnsecs = RT_TQDEF; 562 pcp.pc_cid = pci.pc_cid; 563 if (priocntl(P_LWPID, P_MYID, PC_SETPARMS, 564 (caddr_t)&pcp) != 0) { 565 syslog(LOG_ERR, WD_PICL_RT_THRD_FAIL); 566 } 567 } else { 568 syslog(LOG_ERR, WD_PICL_RT_THRD_NO_PERM_ERR); 569 } 570 571 switch (wd_data.wd1_timeout) { 572 case 0: 573 if (wd_arm(DISABLE_AUTO_PAT) == PICL_SUCCESS) { 574 wd_picl_update_state(WD1, WD_ARMED); 575 if (wd_data.wd2_timeout >= 0) { 576 wd_picl_update_state(WD2, WD_ARMED); 577 } 578 } else { 579 syslog(LOG_ERR, WD_PICL_START_ERR, 580 PICL_FAILURE); 581 } 582 /* no need to pat */ 583 (void) mutex_lock(&patting_lock); 584 wd_data.wd_pat_state = WD_NORESET; 585 (void) mutex_unlock(&patting_lock); 586 continue; 587 case -1: 588 if (wd_data.wd2_timeout < 0) { 589 (void) mutex_lock(&patting_lock); 590 wd_data.wd_pat_state = WD_NORESET; 591 (void) mutex_unlock(&patting_lock); 592 continue; 593 } 594 if (wd_arm(DISABLE_AUTO_PAT) == PICL_SUCCESS) { 595 wd_picl_update_state(WD2, WD_ARMED); 596 } else { 597 syslog(LOG_ERR, WD_PICL_START_ERR, 598 PICL_FAILURE); 599 } 600 /* no need to pat */ 601 (void) mutex_lock(&patting_lock); 602 wd_data.wd_pat_state = WD_NORESET; 603 (void) mutex_unlock(&patting_lock); 604 continue; 605 default: 606 break; 607 } 608 609 if (pat_time == 0) { 610 if (wd_data.wd1_timeout > WD_PAT_TIME) { 611 pat_time = WD_PAT_TIME; 612 } else { 613 pat_time = wd_data.wd1_timeout - 80; 614 } 615 } 616 if (pat_time <= 0) { 617 WD_DEBUG0(WD_PICL_PAT_TIME_ERR); 618 (void) mutex_lock(&patting_lock); 619 wd_data.wd_pat_state = WD_NORESET; 620 (void) mutex_unlock(&patting_lock); 621 continue; 622 } 623 sleep_time = wd_data.wd1_timeout - pat_time; 624 625 if (wd_data.wd1_timeout <= 0 || sleep_time <= 0) { 626 WD_DEBUG0(WD_PICL_ARM_PAT_ERR); 627 (void) mutex_lock(&patting_lock); 628 wd_data.wd_pat_state = WD_NORESET; 629 (void) mutex_unlock(&patting_lock); 630 continue; 631 } else { 632 wd_picl_update_state(WD1, WD_ARMED); 633 } 634 635 if (wd_data.wd2_timeout >= 0) { 636 wd_picl_update_state(WD2, WD_ARMED); 637 } 638 639 sec = sleep_time/1000; 640 nano_sec = (sleep_time - (sec * 1000)) * 1000000; 641 642 if (wd_arm(ENABLE_AUTO_PAT) != PICL_SUCCESS) { 643 wd_picl_update_state(WD1_2, WD_DISARMED); 644 (void) mutex_lock(&patting_lock); 645 wd_data.wd_pat_state = WD_NORESET; 646 (void) mutex_unlock(&patting_lock); 647 syslog(LOG_ERR, WD_PICL_START_ERR, PICL_FAILURE); 648 continue; 649 } 650 651 do /* pat the watchdog until expiry or user disarm */ 652 { 653 (void) mutex_lock(&patting_lock); 654 state = wd_data.wd_pat_state; 655 if (state == WD_NORESET) { 656 (void) mutex_unlock(&patting_lock); 657 break; 658 } 659 (void) gettimeofday(&tp, NULL); 660 to.tv_sec = tp.tv_sec + sec; 661 if ((nano_sec + (tp.tv_usec * 1000)) >= 1000000000) { 662 to.tv_sec += 1; 663 to.tv_nsec = (nano_sec + 664 (tp.tv_usec * 1000)) - 1000000000; 665 } else { 666 to.tv_nsec = nano_sec + (tp.tv_usec * 1000); 667 } 668 669 err = cond_timedwait(&patting_cv, &patting_lock, &to); 670 (void) mutex_unlock(&patting_lock); 671 672 if (err == ETIME) { /* woke up from sleep */ 673 (void) wd_pat(); 674 } 675 } while (state == WD_RESET); 676 } 677 /*NOTREACHED*/ 678 return (NULL); 679 } 680 681 /* 682 * returns 0 if owner is not alive 683 * returns 1 if owner is alive 684 * returns -1 if there is no active owner 685 */ 686 static int 687 is_owner_alive() 688 { 689 char strpid[50]; 690 struct stat buf; 691 692 if (pid == -1) { 693 return (-1); 694 } 695 696 /* check if the file exists or not */ 697 (void) snprintf(strpid, sizeof (pid), "/proc/%ld/status", pid); 698 errno = 0; 699 if (stat(strpid, &buf) == 0) { 700 return (1); 701 } 702 if (errno == ENOENT) { 703 return (0); 704 } else { 705 syslog(LOG_ERR, WD_GET_OWN_FAILED, errno); 706 } 707 return (-1); 708 } 709 710 /* 711 * Sends a cmd to SMC to stop watchdog timers 712 */ 713 static int 714 wd_stop() 715 { 716 int rc = 0; 717 sc_reqmsg_t req_pkt; 718 sc_rspmsg_t rsp_pkt; 719 uint8_t buffer[8]; 720 721 if (wd_get_reg_dump(buffer) != 0) { 722 return (PICL_FAILURE); 723 } 724 /* clear the expiration flags */ 725 buffer[3] = 0xff; /* expiration flags */ 726 727 (void) memcpy(SC_MSG_DATA(&req_pkt), buffer, 728 WD_SET_CMD_DATA_LEN); 729 730 /* initialize the request packet */ 731 (void) smc_init_smc_msg(&req_pkt, SMC_SET_WATCHDOG_TIMER, 732 DEFAULT_SEQN, WD_SET_CMD_DATA_LEN); 733 734 /* make a call to smc library to send cmd */ 735 if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt, 736 WD_POLL_TIMEOUT)) != SMC_SUCCESS) { 737 syslog(LOG_ERR, WD_PICL_STOP_ERR, rc); 738 return (PICL_FAILURE); 739 } 740 return (PICL_SUCCESS); 741 } 742 743 /* 744 * Function used by volatile callback function for wd-op property 745 * under controller. This is used to arm, disarm the watchdog-timers 746 * in response to user actions 747 */ 748 static int 749 wd_worker_function(uint8_t flag, pid_t proc_id) 750 { 751 int rc = PICL_SUCCESS; 752 int wd1_state, wd2_state; 753 754 (void) mutex_lock(&data_lock); 755 wd1_state = wd_data.wd1_run_state; 756 wd2_state = wd_data.wd2_run_state; 757 (void) mutex_unlock(&data_lock); 758 759 switch (flag) { 760 761 case USER_ARMED_WD: 762 763 /* watchdog can only be armed if all the timers are disarmed */ 764 if (wd1_state != WD_DISARMED) { 765 WD_DEBUG0(WD_PICL_WD1_RUNNING_ERR); 766 rc = PICL_FAILURE; 767 break; 768 } 769 if (wd2_state != WD_DISARMED) { 770 WD_DEBUG0(WD_PICL_WD2_RUNNING_ERR); 771 rc = PICL_FAILURE; 772 break; 773 } 774 775 /* check the HELATHY# status if action is alarm */ 776 if (wd_data.wd1_timeout >= 0) { 777 if (wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_HOST || 778 wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_SAT) { 779 rc = wd_get_healthy_status(); 780 if (rc == WD_HEALTHY_DOWN) { 781 WD_DEBUG0(WD_HEALTHY_ERR); 782 return (PICL_FAILURE); 783 } else if (rc == -1) { 784 syslog(LOG_ERR, WD_GET_HEALTH_ERR); 785 return (PICL_FAILURE); 786 } else { 787 rc = PICL_SUCCESS; 788 } 789 } 790 } 791 792 /* signal the patting thread */ 793 (void) mutex_lock(&patting_lock); 794 wd_data.wd_pat_state = WD_RESET; 795 (void) cond_signal(&patting_cv); 796 (void) mutex_unlock(&patting_lock); 797 break; 798 799 case USER_DISARMED_WD: 800 801 /* 802 * if the caller doesnot own watchdog services, 803 * check to see if the owner is still alive using procfs 804 */ 805 if (proc_id != pid) { 806 switch (is_owner_alive()) { 807 case -1: 808 if ((wd1_state != WD_DISARMED) || 809 (wd2_state != WD_DISARMED)) { 810 break; 811 } 812 /* watchdog is already disarmed */ 813 WD_DEBUG0(WD_PICL_NO_WD_ERR); 814 return (PICL_FAILURE); 815 case 1: 816 /* owner is still alive, deny the operation */ 817 WD_DEBUG0(WD_PICL_PERM_DENIED); 818 return (PICL_PERMDENIED); 819 default: 820 break; 821 } 822 } 823 824 /* watchdog is running */ 825 if ((rc = wd_stop()) == PICL_SUCCESS) { 826 wd_picl_update_state(WD1_2, WD_DISARMED); 827 (void) mutex_lock(&patting_lock); 828 wd_data.wd_pat_state = WD_NORESET; 829 (void) cond_signal(&patting_cv); 830 (void) mutex_unlock(&patting_lock); 831 } 832 break; 833 834 case USER_ARMED_PAT_WD: /* for debug purposes only */ 835 836 /* 837 * first arm-pat operation is used for arming the watchdog 838 * subsequent arm-pat operations will be used for patting 839 * the watchdog 840 */ 841 /* WD is stopped */ 842 if (wd1_state == WD_DISARMED && wd2_state == WD_DISARMED) { 843 if ((rc = wd_arm(DISABLE_AUTO_PAT)) == PICL_SUCCESS) { 844 if (wd_data.wd1_timeout >= 0) { 845 wd_picl_update_state(WD1, WD_ARMED); 846 } 847 848 if (wd_data.wd2_timeout >= 0) { 849 wd_picl_update_state(WD2, WD_ARMED); 850 } 851 } else { 852 return (rc); 853 } 854 } else { /* WD is running */ 855 if (wd1_state != WD_ARMED) { 856 WD_DEBUG0(WD_PICL_NO_WD_ERR); 857 return (PICL_INVALIDARG); 858 } 859 860 /* check if OS is patting the watchdog or not */ 861 (void) mutex_lock(&patting_lock); 862 if (wd_data.wd_pat_state == WD_RESET) { 863 WD_DEBUG0(WD_PICL_TRY_PAT_ERR); 864 (void) mutex_unlock(&patting_lock); 865 return (PICL_INVALIDARG); 866 } 867 868 /* check if the process owns the WD services */ 869 if (proc_id != pid) { 870 WD_DEBUG0(WD_PICL_PERM_DENIED); 871 return (PICL_PERMDENIED); 872 } 873 rc = wd_pat(); 874 } 875 break; 876 877 default: 878 rc = PICL_INVALIDARG; 879 break; 880 881 } /* switch */ 882 883 return (rc); 884 } 885 886 /*ARGSUSED*/ 887 static int 888 wd_write_op(ptree_warg_t *parg, const void *buf) 889 { 890 int rc = PICL_INVALIDARG; 891 uint8_t flag; 892 893 /* only after state is configured */ 894 if (!state_configured) { 895 if (parg->cred.dc_pid != getpid()) { 896 WD_DEBUG0(WD_PICL_STATE_INVALID); 897 return (PICL_PERMDENIED); 898 } 899 } 900 901 /* only super user can write this property */ 902 if (parg->cred.dc_euid != SUPER_USER) { 903 WD_DEBUG0(WD_NO_ROOT_PERM); 904 return (PICL_PERMDENIED); 905 } 906 907 if (strcmp((char *)buf, PICL_PROPVAL_WD_OP_ARM) == 0) { 908 flag = USER_ARMED_WD; 909 rc = PICL_SUCCESS; 910 } 911 912 if (strcmp((char *)buf, PICL_PROPVAL_WD_OP_DISARM) == 0) { 913 flag = USER_DISARMED_WD; 914 rc = PICL_SUCCESS; 915 } 916 917 /* for debug purpose only */ 918 if (strcmp((char *)buf, WD_ARM_PAT) == 0) { 919 flag = USER_ARMED_PAT_WD; 920 rc = PICL_SUCCESS; 921 } 922 923 if (rc == PICL_SUCCESS) { 924 rc = wd_worker_function(flag, parg->cred.dc_pid); 925 } else { 926 rc = PICL_INVALIDARG; 927 } 928 929 if (rc == PICL_SUCCESS) { 930 931 switch (flag) { 932 case USER_ARMED_PAT_WD: 933 case USER_ARMED_WD: 934 935 /* get the process id of client */ 936 if (parg->cred.dc_pid != getpid()) { 937 pid = parg->cred.dc_pid; 938 } else { 939 pid = -1; 940 } 941 break; 942 case USER_DISARMED_WD: 943 /* reset the pid */ 944 pid = -1; 945 default: 946 break; 947 } 948 } 949 return (rc); 950 } 951 952 /* volatile call back function to read the watchdog L1 status */ 953 /*ARGSUSED*/ 954 static int 955 wd1_read_status(ptree_rarg_t *parg, void *buf) 956 { 957 int rc = PICL_SUCCESS; 958 959 (void) mutex_lock(&data_lock); 960 961 switch (wd_data.wd1_run_state) { 962 963 case WD_EXPIRED: 964 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_EXPIRED, 965 PICL_PROPNAMELEN_MAX); 966 break; 967 968 case WD_DISARMED: 969 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_DISARMED, 970 PICL_PROPNAMELEN_MAX); 971 break; 972 973 case WD_ARMED: 974 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_ARMED, 975 PICL_PROPNAMELEN_MAX); 976 break; 977 978 default: 979 rc = PICL_FAILURE; 980 } 981 (void) mutex_unlock(&data_lock); 982 return (rc); 983 } 984 985 /* 986 * this function is used to read the state of L2 timer 987 */ 988 static int 989 wd_get_wd2_status(int *present_status) 990 { 991 int rc; 992 uchar_t buffer[WD_REGISTER_LEN]; 993 994 bzero(buffer, WD_REGISTER_LEN); 995 (void) mutex_lock(&data_lock); 996 *present_status = wd_data.wd2_run_state; 997 if (wd_data.wd2_run_state != WD_ARMED) { 998 /* we already have the latest state */ 999 (void) mutex_unlock(&data_lock); 1000 return (PICL_SUCCESS); 1001 } 1002 (void) mutex_unlock(&data_lock); 1003 1004 /* read watchdog registers */ 1005 if ((rc = wd_get_reg_dump(buffer)) != 0) { 1006 return (rc); 1007 } 1008 1009 if (buffer[0] & WD_WD_RUNNING) { 1010 *present_status = WD_ARMED; 1011 return (PICL_SUCCESS); 1012 } 1013 1014 if (buffer[3] != 0) { 1015 (void) mutex_lock(&data_lock); 1016 *present_status = wd_data.wd2_run_state = WD_EXPIRED; 1017 (void) mutex_unlock(&data_lock); 1018 } 1019 return (PICL_SUCCESS); 1020 } 1021 1022 /* volatile call back function to read the watchdog L2 status */ 1023 /*ARGSUSED*/ 1024 static int 1025 wd2_read_status(ptree_rarg_t *parg, void *buf) 1026 { 1027 int present_status, rc; 1028 1029 if ((rc = wd_get_wd2_status(&present_status)) != 1030 PICL_SUCCESS) { 1031 return (rc); 1032 } 1033 1034 /* copy the present state in user buffer */ 1035 switch (present_status) { 1036 case WD_ARMED: 1037 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_ARMED, 1038 PICL_PROPNAMELEN_MAX); 1039 break; 1040 case WD_EXPIRED: 1041 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_EXPIRED, 1042 PICL_PROPNAMELEN_MAX); 1043 break; 1044 case WD_DISARMED: 1045 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_DISARMED, 1046 PICL_PROPNAMELEN_MAX); 1047 break; 1048 } 1049 return (PICL_SUCCESS); 1050 } 1051 1052 /* this thread listens for watchdog expiry events */ 1053 /*ARGSUSED*/ 1054 static void * 1055 wd_polling(void *args) 1056 { 1057 uint8_t stat; 1058 int poll_retval; 1059 struct pollfd fds; 1060 sc_rspmsg_t rsp_pkt; 1061 int i; 1062 1063 fds.fd = polling_fd; 1064 fds.events = POLLIN | POLLPRI; 1065 fds.revents = 0; 1066 1067 for (;;) { 1068 poll_retval = poll(&fds, 1, -1); 1069 if (props_created == 0) 1070 continue; 1071 switch (poll_retval) { 1072 case 0: 1073 break; 1074 1075 case -1: 1076 syslog(LOG_ERR, WD_PICL_POLL_ERR); 1077 break; 1078 1079 default: 1080 /* something happened */ 1081 if ((read(polling_fd, &rsp_pkt, 1082 sizeof (sc_rspmsg_t))) < 0) { 1083 syslog(LOG_ERR, WD_PICL_SMC_READ_ERR); 1084 break; 1085 } 1086 1087 if (rsp_pkt.hdr.cmd == SMC_EXPIRED_WATCHDOG_NOTIF) { 1088 1089 (void) mutex_lock(&data_lock); 1090 stat = wd_data.wd1_run_state; 1091 (void) mutex_unlock(&data_lock); 1092 1093 if (stat != WD_ARMED) { 1094 continue; 1095 } 1096 1097 wd_picl_update_state(WD1, WD_EXPIRED); 1098 1099 (void) mutex_lock(&patting_lock); 1100 wd_data.wd_pat_state = WD_NORESET; 1101 (void) cond_signal(&patting_cv); 1102 1103 (void) mutex_unlock(&patting_lock); 1104 syslog(LOG_WARNING, WD_WD1_EXPIRED); 1105 if (wd_debug & WD_TIME_DEBUG) { 1106 syslog(LOG_ERR, " latest count : %d", count); 1107 for (i = 0; i < NUMBER_OF_READINGS; i++) { 1108 syslog(LOG_ERR, "i = %d, req_seq = %d," 1109 "res_seq = %d, time = %lld nsec", 1110 i, time1[i].req_seq, 1111 time1[i].res_seq, 1112 time1[i].time); 1113 } 1114 } 1115 if (wd_data.reboot_action) { 1116 wd_data.reboot_action = 0; 1117 (void) system(SHUTDOWN_CMD); 1118 } 1119 } 1120 break; 1121 1122 } /* switch */ 1123 } 1124 /*NOTREACHED*/ 1125 return (NULL); 1126 } 1127 1128 /* 1129 * This function reads the hardware state and gets the status of 1130 * watchdog-timers 1131 */ 1132 static int 1133 wd_get_status(wd_state_t *state) 1134 { 1135 picl_errno_t rc; 1136 uchar_t buffer[WD_REGISTER_LEN]; 1137 1138 bzero(buffer, WD_REGISTER_LEN); 1139 /* read watchdog registers */ 1140 if ((rc = wd_get_reg_dump(buffer)) != 0) { 1141 return (rc); 1142 } 1143 1144 /* get action */ 1145 state->action1 = buffer[1] & 0xF0; /* most significant 4 bits */ 1146 if (state->action1 == 0x0) { 1147 state->action1 = WD_ACTION_NONE1; 1148 } 1149 state->action2 = buffer[1] & 0x0F; /* least significant 4 bits */ 1150 if (state->action2 == 0x0) { 1151 state->action2 = WD_ACTION_NONE2; 1152 } 1153 1154 state->timeout2 = buffer[2]; 1155 state->timeout1[0] = buffer[5]; /* MSB */ 1156 state->timeout1[1] = buffer[4]; /* LSB */ 1157 1158 state->present_t1[0] = buffer[7]; /* MSB */ 1159 state->present_t1[1] = buffer[6]; /* LSB */ 1160 1161 if (buffer[0] & WD_WD_RUNNING) { 1162 state->present_state = WD_ARMED; 1163 return (PICL_SUCCESS); 1164 } 1165 1166 if (buffer[3] != 0) { 1167 state->present_state = WD_EXPIRED; 1168 return (PICL_SUCCESS); 1169 } else { 1170 state->present_state = WD_DISARMED; 1171 return (PICL_SUCCESS); 1172 } 1173 } 1174 1175 /* read the smc hardware and intialize the internal state */ 1176 static void 1177 wd_set_init_state() 1178 { 1179 wd_state_t state; 1180 uint16_t tmp1, tmp2, wd_time1; 1181 1182 if (wd_get_status(&state) != PICL_SUCCESS) { 1183 syslog(LOG_ERR, WD_PICL_GET_STAT_ERR); 1184 /* defualt state is expired ??? */ 1185 state.present_state = WD_EXPIRED; 1186 } 1187 1188 switch (state.present_state) { 1189 case WD_EXPIRED: 1190 case WD_DISARMED: 1191 if (state.present_state == WD_EXPIRED) 1192 wd_picl_update_state(WD1_2, WD_EXPIRED); 1193 else 1194 wd_picl_update_state(WD1_2, WD_DISARMED); 1195 wd_data.wd_pat_state = WD_NORESET; 1196 wd_data.wd1_action = state.action1; 1197 wd_data.wd2_action = state.action2; 1198 tmp1 = state.timeout1[0] << 8; 1199 tmp2 = state.timeout1[1]; 1200 wd_time1 = tmp1 | tmp2; 1201 wd_data.wd1_timeout = wd_time1 * WD_L1_RESOLUTION; 1202 wd_data.wd2_timeout = state.timeout2 * WD_L2_RESOLUTION; 1203 break; 1204 case WD_ARMED: 1205 /* 1206 * get the present values and restart the 1207 * watchdog from os level and continue to pat 1208 */ 1209 wd_picl_update_state(WD1_2, WD_ARMED); 1210 wd_data.wd_pat_state = WD_RESET; 1211 wd_data.wd1_action = (state.action1 << 4); 1212 wd_data.wd2_action = state.action2; 1213 1214 tmp1 = state.timeout1[0] << 8; 1215 tmp2 = state.timeout1[1]; 1216 wd_time1 = tmp1 | tmp2; 1217 wd_data.wd1_timeout = wd_time1 * WD_L1_RESOLUTION; 1218 wd_data.wd2_timeout = state.timeout2 * WD_L2_RESOLUTION; 1219 (void) wd_stop(); 1220 } 1221 } 1222 1223 /* 1224 * wrapper for ptree interface to create property 1225 */ 1226 static int 1227 wd_create_property( 1228 int ptype, /* PICL property type */ 1229 int pmode, /* PICL access mode */ 1230 size_t psize, /* size of PICL property */ 1231 char *pname, /* property name */ 1232 int (*readfn)(ptree_rarg_t *, void *), 1233 int (*writefn)(ptree_warg_t *, const void *), 1234 picl_nodehdl_t nodeh, /* node for property */ 1235 picl_prophdl_t *propp, /* pointer to prop_handle */ 1236 void *vbuf) /* initial value */ 1237 { 1238 picl_errno_t rc; 1239 ptree_propinfo_t propinfo; 1240 1241 rc = ptree_init_propinfo(&propinfo, PTREE_PROPINFO_VERSION, 1242 ptype, pmode, psize, pname, readfn, writefn); 1243 if (rc != PICL_SUCCESS) { 1244 syslog(LOG_ERR, WD_PICL_PROP_INIT_ERR, rc); 1245 return (rc); 1246 } 1247 1248 rc = ptree_create_and_add_prop(nodeh, &propinfo, vbuf, propp); 1249 if (rc != PICL_SUCCESS) { 1250 return (rc); 1251 } 1252 1253 return (PICL_SUCCESS); 1254 } 1255 1256 /* Create and add Watchdog properties */ 1257 static void 1258 wd_create_add_props() 1259 { 1260 int rc; 1261 picl_nodehdl_t rooth, sysmgmt_h, platformh; 1262 int32_t timeout1 = 0; 1263 int32_t timeout2 = 0; 1264 char buf[PICL_WD_PROPVAL_MAX]; 1265 1266 /* get picl root node handle */ 1267 if ((rc = ptree_get_root(&rooth)) != PICL_SUCCESS) { 1268 syslog(LOG_ERR, WD_NODE_INIT_ERR, 1, rc); 1269 return; 1270 } 1271 1272 /* get picl platform node handle */ 1273 if ((rc = ptree_get_node_by_path(PLATFORM_PATH, 1274 &platformh)) != PICL_SUCCESS) { 1275 syslog(LOG_ERR, WD_NODE_INIT_ERR, 2, rc); 1276 return; 1277 } 1278 1279 /* get the picl sysmgmt node handle */ 1280 if ((rc = ptree_find_node(platformh, PICL_PROP_NAME, 1281 PICL_PTYPE_CHARSTRING, 1282 PICL_NODE_SYSMGMT, strlen(PICL_NODE_SYSMGMT), 1283 &sysmgmt_h)) != PICL_SUCCESS) { 1284 syslog(LOG_ERR, WD_NODE_INIT_ERR, 3, rc); 1285 return; 1286 } 1287 1288 /* start creating the watchdog nodes and properties */ 1289 if ((rc = ptree_create_and_add_node(sysmgmt_h, PICL_NODE_WD_CONTROLLER, 1290 PICL_CLASS_WATCHDOG_CONTROLLER, 1291 &(wd_data.wd_ctrl_nodehdl))) != PICL_SUCCESS) { 1292 syslog(LOG_ERR, WD_NODE_INIT_ERR, 4, rc); 1293 return; 1294 } 1295 1296 /* Add wd-op property to watchdog controller node */ 1297 (void) strncpy(buf, "", sizeof (buf)); 1298 if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING, 1299 PICL_WRITE + PICL_VOLATILE, 1300 PICL_PROPNAMELEN_MAX, PICL_PROP_WATCHDOG_OPERATION, 1301 NULL, wd_write_op, 1302 wd_data.wd_ctrl_nodehdl, 1303 &(wd_data.wd_ops_hdl), 1304 (void *)buf)) != PICL_SUCCESS) { 1305 syslog(LOG_ERR, WD_NODE_INIT_ERR, 5, rc); 1306 return; 1307 } 1308 1309 /* create L1 node and add to controller */ 1310 if ((rc = ptree_create_and_add_node(wd_data.wd_ctrl_nodehdl, 1311 PICL_NODE_WD_L1, PICL_CLASS_WATCHDOG_TIMER, 1312 &(wd_data.wd1_nodehdl))) != PICL_SUCCESS) { 1313 syslog(LOG_ERR, WD_NODE_INIT_ERR, 6, rc); 1314 return; 1315 } 1316 1317 /* create L2 node and add to controller */ 1318 if ((rc = ptree_create_and_add_node(wd_data.wd_ctrl_nodehdl, 1319 PICL_NODE_WD_L2, PICL_CLASS_WATCHDOG_TIMER, 1320 &(wd_data.wd2_nodehdl))) != PICL_SUCCESS) { 1321 syslog(LOG_ERR, WD_NODE_INIT_ERR, 7, rc); 1322 return; 1323 } 1324 1325 /* create watchdog properties */ 1326 /* create state property here */ 1327 (void) strncpy(buf, PICL_PROPVAL_WD_STATE_DISARMED, 1328 sizeof (buf)); 1329 if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING, 1330 PICL_READ + PICL_VOLATILE, PICL_PROPNAMELEN_MAX, 1331 PICL_PROP_STATE, wd1_read_status, NULLWRITE, 1332 wd_data.wd1_nodehdl, 1333 &(wd_data.wd1_state_hdl), (void *)buf)) != PICL_SUCCESS) { 1334 syslog(LOG_ERR, WD_NODE_INIT_ERR, 8, rc); 1335 return; 1336 } 1337 1338 if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING, 1339 PICL_READ + PICL_VOLATILE, PICL_PROPNAMELEN_MAX, 1340 PICL_PROP_STATE, wd2_read_status, NULLWRITE, 1341 wd_data.wd2_nodehdl, 1342 &(wd_data.wd2_state_hdl), (void *)buf)) != PICL_SUCCESS) { 1343 syslog(LOG_ERR, WD_NODE_INIT_ERR, 9, rc); 1344 return; 1345 } 1346 1347 /* create timeout property here */ 1348 if ((rc = wd_create_property(PICL_PTYPE_UNSIGNED_INT, 1349 PICL_READ + PICL_WRITE + PICL_VOLATILE, 1350 sizeof (timeout1), PICL_PROP_WATCHDOG_TIMEOUT, 1351 wd_read_timeout, wd_write_timeout, wd_data.wd1_nodehdl, 1352 &(wd_data.wd1_timeout_hdl), (void *)&(timeout1))) != 1353 PICL_SUCCESS) { 1354 syslog(LOG_ERR, WD_NODE_INIT_ERR, 10, rc); 1355 return; 1356 } 1357 1358 if ((rc = wd_create_property(PICL_PTYPE_UNSIGNED_INT, 1359 PICL_READ + PICL_WRITE + PICL_VOLATILE, 1360 sizeof (wd_data.wd2_timeout), PICL_PROP_WATCHDOG_TIMEOUT, 1361 wd_read_timeout, wd_write_timeout, wd_data.wd2_nodehdl, 1362 &(wd_data.wd2_timeout_hdl), (void *)&(timeout2))) != 1363 PICL_SUCCESS) { 1364 syslog(LOG_ERR, WD_NODE_INIT_ERR, 11, rc); 1365 return; 1366 } 1367 1368 /* create wd_action property here */ 1369 (void) strncpy(buf, PICL_PROPVAL_WD_ACTION_NONE, 1370 sizeof (buf)); 1371 if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING, 1372 PICL_READ + PICL_WRITE + PICL_VOLATILE, 1373 PICL_PROPNAMELEN_MAX, PICL_PROP_WATCHDOG_ACTION, 1374 wd_read_action, wd_write_action, 1375 wd_data.wd1_nodehdl, &(wd_data.wd1_action_hdl), 1376 (void *)buf)) != PICL_SUCCESS) { 1377 syslog(LOG_ERR, WD_NODE_INIT_ERR, 12, rc); 1378 return; 1379 } 1380 1381 if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING, 1382 PICL_READ + PICL_WRITE + PICL_VOLATILE, 1383 PICL_PROPNAMELEN_MAX, PICL_PROP_WATCHDOG_ACTION, 1384 wd_read_action, wd_write_action, 1385 wd_data.wd2_nodehdl, &(wd_data.wd2_action_hdl), 1386 (void *)buf)) != PICL_SUCCESS) { 1387 syslog(LOG_ERR, WD_NODE_INIT_ERR, 13, rc); 1388 return; 1389 } 1390 } 1391 1392 static int 1393 wd_ioctl(int fd, int cmd, int len, char *buf) 1394 { 1395 int rtnval; 1396 struct strioctl sioc; 1397 sioc.ic_cmd = cmd; 1398 sioc.ic_timout = 60; 1399 sioc.ic_len = len; 1400 sioc.ic_dp = buf; 1401 rtnval = ioctl(fd, I_STR, &sioc); 1402 return (rtnval); 1403 } 1404 1405 static int 1406 wd_open(int attr) 1407 { 1408 int cc; 1409 sc_cmdspec_t wd_cmdspec; 1410 1411 if ((wd_fd = open(SMC_NODE, attr)) < 0) { 1412 return (-1); 1413 } 1414 1415 /* get exclusive access for set and reset commands of watchdog */ 1416 wd_cmdspec.args[0] = SMC_SET_WATCHDOG_TIMER; 1417 wd_cmdspec.args[1] = SMC_RESET_WATCHDOG_TIMER; 1418 wd_cmdspec.attribute = SC_ATTR_EXCLUSIVE; 1419 1420 cc = wd_ioctl(wd_fd, SCIOC_MSG_SPEC, 3, 1421 (char *)&wd_cmdspec); 1422 if (cc < 0) { 1423 syslog(LOG_ERR, WD_PICL_EXCLUSIVE_ACCESS_ERR); 1424 return (-1); 1425 } 1426 return (wd_fd); 1427 } 1428 1429 static int 1430 wd_open_pollfd(int attr) 1431 { 1432 int cc; 1433 sc_cmdspec_t wd_cmdspec; 1434 1435 if ((polling_fd = open(SMC_NODE, attr)) < 0) { 1436 return (-1); 1437 } 1438 1439 /* request for watchdog expiry notification */ 1440 wd_cmdspec.args[0] = SMC_EXPIRED_WATCHDOG_NOTIF; 1441 wd_cmdspec.attribute = SC_ATTR_EXCLUSIVE; 1442 1443 cc = wd_ioctl(polling_fd, SCIOC_MSG_SPEC, 2, 1444 (char *)&wd_cmdspec); 1445 if (cc < 0) { 1446 syslog(LOG_ERR, WD_PICL_SET_ATTR_FAILED); 1447 return (-1); 1448 } 1449 return (polling_fd); 1450 } 1451 1452 /* read the ENVIRONMENT variables and initialize tunables */ 1453 static void 1454 wd_get_env() 1455 { 1456 char *val; 1457 int intval = 0; 1458 1459 /* read frutree debug flag value */ 1460 if (val = getenv(WATCHDOG_DEBUG)) { 1461 errno = 0; 1462 intval = strtol(val, (char **)NULL, 0); 1463 if (errno == 0) { 1464 wd_debug = intval; 1465 } 1466 } 1467 } 1468 1469 /* 1470 * PTREE Entry Points 1471 */ 1472 1473 /* picl-state-change event handler */ 1474 /*ARGSUSED*/ 1475 static void 1476 wd_state_change_evhandler(const char *ename, const void *earg, 1477 size_t size, void *cookie) 1478 { 1479 char *value; 1480 picl_errno_t rc; 1481 nvlist_t *nvlp; 1482 picl_nodehdl_t fruhdl; 1483 static int spawn_threads = 1; 1484 char name[PICL_PROPNAMELEN_MAX]; 1485 1486 if (strcmp(ename, PICLEVENT_STATE_CHANGE)) { 1487 return; 1488 } 1489 1490 /* neglect all events if wd props are already created */ 1491 if (props_created && state_configured) { 1492 return; 1493 } 1494 1495 if (nvlist_unpack((char *)earg, size, &nvlp, NULL)) { 1496 return; 1497 } 1498 if ((nvlist_lookup_uint64(nvlp, PICLEVENTARG_NODEHANDLE, 1499 &fruhdl)) == -1) { 1500 nvlist_free(nvlp); 1501 return; 1502 } 1503 if (nvlist_lookup_string(nvlp, PICLEVENTARG_STATE, &value)) { 1504 nvlist_free(nvlp); 1505 return; 1506 } 1507 1508 rc = ptree_get_propval_by_name(fruhdl, PICL_PROP_NAME, 1509 (void *)name, sizeof (name)); 1510 if (rc != PICL_SUCCESS) { 1511 nvlist_free(nvlp); 1512 return; 1513 } 1514 1515 /* care for only events on chassis node */ 1516 if (strcmp(name, PICL_NODE_CHASSIS) != 0) { 1517 nvlist_free(nvlp); 1518 return; 1519 } 1520 1521 if (strcmp(value, PICLEVENTARGVAL_CONFIGURED) == 0) { 1522 state_configured = 1; 1523 nvlist_free(nvlp); 1524 return; 1525 } 1526 1527 if (strcmp(value, PICLEVENTARGVAL_CONFIGURING) != 0) { 1528 nvlist_free(nvlp); 1529 return; 1530 } 1531 1532 if (wd_fd < 0) { 1533 if ((wd_fd = wd_open(O_RDWR)) < 0) { 1534 syslog(LOG_CRIT, WD_PICL_SMC_OPEN_ERR); 1535 nvlist_free(nvlp); 1536 return; 1537 } 1538 } 1539 1540 if (polling_fd < 0) { 1541 if ((polling_fd = wd_open_pollfd(O_RDWR)) < 0) { 1542 syslog(LOG_CRIT, WD_PICL_SMC_OPEN_ERR); 1543 nvlist_free(nvlp); 1544 return; 1545 } 1546 } 1547 1548 switch (wd_get_chassis_type()) { 1549 case WD_HOST: /* is host */ 1550 wd_data.is_host = B_TRUE; 1551 break; 1552 case WD_STANDALONE: /* is satellite */ 1553 wd_data.is_host = B_FALSE; 1554 break; 1555 default: 1556 nvlist_free(nvlp); 1557 return; 1558 } 1559 1560 (void) wd_create_add_props(); /* create and add properties */ 1561 props_created = 1; 1562 1563 /* read the hardware and initialize values */ 1564 (void) wd_set_init_state(); 1565 1566 /* initialize wd-conf value */ 1567 (void) snprintf(wd_conf, sizeof (wd_conf), "%s/%s", 1568 PICL_CONFIG_DIR, WD_CONF_FILE); 1569 1570 if (spawn_threads == 0) { 1571 /* threads are already created */ 1572 nvlist_free(nvlp); 1573 return; 1574 } 1575 1576 /* start monitoring for the events */ 1577 if (thr_create(NULL, NULL, wd_polling, 1578 NULL, THR_BOUND, &polling_thr_tid) != 0) { 1579 syslog(LOG_ERR, WD_PICL_THREAD_CREATE_FAILED, 1580 "polling"); 1581 nvlist_free(nvlp); 1582 return; 1583 } 1584 1585 /* thread used to pat watchdog */ 1586 if (thr_create(NULL, NULL, wd_patting_thread, 1587 NULL, THR_BOUND, &patting_thr_tid) != 0) { 1588 syslog(LOG_ERR, WD_PICL_THREAD_CREATE_FAILED, 1589 "patting"); 1590 nvlist_free(nvlp); 1591 return; 1592 } 1593 spawn_threads = 0; 1594 nvlist_free(nvlp); 1595 } 1596 1597 static void 1598 wd_picl_register(void) 1599 { 1600 int rc = 0; 1601 if ((rc = picld_plugin_register(&wd_reg_info)) != PICL_SUCCESS) { 1602 syslog(LOG_ERR, WD_PICL_REG_ERR, rc); 1603 } 1604 } 1605 1606 /* entry point (initialization) */ 1607 static void 1608 wd_picl_init(void) 1609 { 1610 /* initialize the wd_conf path and name */ 1611 (void) snprintf(wd_conf, sizeof (wd_conf), "%s/%s", 1612 PICL_CONFIG_DIR, WD_CONF_FILE); 1613 1614 /* parse configuration file and set tunables */ 1615 wd_parse_config_file(wd_conf); 1616 1617 /* if watchdog-enable is set to false dont intialize wd subsystem */ 1618 if (wd_enable == 0) { 1619 return; 1620 } 1621 1622 /* read watchdog related environment variables */ 1623 wd_get_env(); 1624 1625 /* event handler for state change notifications from frutree */ 1626 (void) ptree_register_handler(PICLEVENT_STATE_CHANGE, 1627 wd_state_change_evhandler, NULL); 1628 } 1629 1630 static void 1631 wd_picl_fini(void) 1632 { 1633 (void) ptree_unregister_handler(PICLEVENT_STATE_CHANGE, 1634 wd_state_change_evhandler, NULL); 1635 1636 state_configured = 0; /* chassis state */ 1637 props_created = 0; 1638 (void) ptree_delete_node(wd_data.wd_ctrl_nodehdl); 1639 (void) ptree_destroy_node(wd_data.wd_ctrl_nodehdl); 1640 } 1641 1642 /* 1643 * volatile function to read the timeout 1644 */ 1645 static int 1646 wd_read_timeout(ptree_rarg_t *parg, void *buf) 1647 { 1648 /* update the buffer provided by user */ 1649 (void) mutex_lock(&data_lock); 1650 if (parg->proph == wd_data.wd1_timeout_hdl) { 1651 *(int32_t *)buf = wd_data.wd1_timeout; 1652 } else if (parg->proph == wd_data.wd2_timeout_hdl) { 1653 *(int32_t *)buf = wd_data.wd2_timeout; 1654 } 1655 (void) mutex_unlock(&data_lock); 1656 return (PICL_SUCCESS); 1657 } 1658 1659 /* 1660 * volatile function to read the action 1661 */ 1662 static int 1663 wd_read_action(ptree_rarg_t *parg, void *buf) 1664 { 1665 (void) mutex_lock(&data_lock); 1666 if (parg->proph == wd_data.wd1_action_hdl) { 1667 switch (wd_data.wd1_action) { 1668 case WD_ACTION_HEALTHY_DOWN_HOST: 1669 case WD_ACTION_HEALTHY_DOWN_SAT: 1670 (void) strcpy((char *)buf, 1671 PICL_PROPVAL_WD_ACTION_ALARM); 1672 break; 1673 case WD_ACTION_NONE1: 1674 case WD_ACTION_NONE2: 1675 if (wd_data.reboot_action == 1) { 1676 (void) strcpy((char *)buf, 1677 PICL_PROPVAL_WD_ACTION_REBOOT); 1678 } else { 1679 (void) strcpy((char *)buf, 1680 PICL_PROPVAL_WD_ACTION_NONE); 1681 } 1682 break; 1683 } 1684 } else if (parg->proph == wd_data.wd2_action_hdl) { 1685 switch (wd_data.wd2_action) { 1686 case WD_ACTION_HARD_RESET: 1687 (void) strcpy((char *)buf, 1688 PICL_PROPVAL_WD_ACTION_RESET); 1689 break; 1690 case WD_ACTION_NONE2: 1691 (void) strcpy((char *)buf, PICL_PROPVAL_WD_ACTION_NONE); 1692 break; 1693 } 1694 } 1695 (void) mutex_unlock(&data_lock); 1696 return (PICL_SUCCESS); 1697 } 1698 1699 /* 1700 * volatile function to write the action 1701 * this function validates the user value before programming the 1702 * action property. Properties can be modified only when watchdog 1703 * is in disarmed state. 1704 */ 1705 static int 1706 wd_write_action(ptree_warg_t *parg, const void *buf) 1707 { 1708 int flag = 0x0; 1709 picl_errno_t rc = PICL_SUCCESS; 1710 char wd_action[PICL_WD_PROPVAL_MAX]; 1711 1712 /* only super user can write this property */ 1713 if (parg->cred.dc_euid != SUPER_USER) { 1714 return (PICL_PERMDENIED); 1715 } 1716 1717 if (parg->proph == wd_data.wd1_action_hdl) { 1718 flag = WD1; 1719 } else if (parg->proph == wd_data.wd2_action_hdl) { 1720 flag = WD2; 1721 } 1722 1723 /* dont allow any write operations when watchdog is armed */ 1724 (void) mutex_lock(&data_lock); 1725 if (wd_data.wd1_run_state != WD_DISARMED || 1726 wd_data.wd2_run_state != WD_DISARMED) { 1727 (void) mutex_unlock(&data_lock); 1728 return (PICL_PERMDENIED); 1729 } 1730 1731 /* validate the values and store in internal cache */ 1732 (void) strcpy(wd_action, (char *)buf); 1733 switch (flag) { 1734 case WD1: 1735 if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_ALARM) == 0) { 1736 if (wd_data.is_host) 1737 wd_data.wd1_action = WD_ACTION_HEALTHY_DOWN_HOST; 1738 else 1739 wd_data.wd1_action = WD_ACTION_HEALTHY_DOWN_SAT; 1740 wd_data.reboot_action = 0; 1741 } else if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_NONE) == 0) { 1742 wd_data.wd1_action = WD_ACTION_NONE1; 1743 wd_data.reboot_action = 0; 1744 } else if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_REBOOT) == 0) { 1745 wd_data.wd1_action = WD_ACTION_NONE1; 1746 wd_data.reboot_action = 1; 1747 } else { 1748 rc = PICL_INVALIDARG; 1749 } 1750 break; 1751 1752 case WD2: 1753 if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_RESET) == 0) { 1754 wd_data.wd2_action = WD_ACTION_HARD_RESET; 1755 } else if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_NONE) == 0) { 1756 wd_data.wd2_action = WD_ACTION_NONE2; 1757 } else { 1758 rc = PICL_INVALIDARG; 1759 } 1760 break; 1761 } 1762 (void) mutex_unlock(&data_lock); 1763 return (rc); 1764 } 1765 1766 /* 1767 * volatile function to write the timeout 1768 * this function validates the user value before programming the 1769 * timeout property. Properties can be modified only when watchdog 1770 * is in disarmed state. 1771 */ 1772 static int 1773 wd_write_timeout(ptree_warg_t *parg, const void *buf) 1774 { 1775 int32_t timeout; 1776 int flag = 0x0; 1777 1778 /* only super user can write this property */ 1779 if (parg->cred.dc_euid != SUPER_USER) { 1780 return (PICL_PERMDENIED); 1781 } 1782 1783 /* dont allow any write operations when watchdog is armed */ 1784 (void) mutex_lock(&data_lock); 1785 if (wd_data.wd1_run_state != WD_DISARMED || 1786 wd_data.wd2_run_state != WD_DISARMED) { 1787 (void) mutex_unlock(&data_lock); 1788 return (PICL_PERMDENIED); 1789 } 1790 (void) mutex_unlock(&data_lock); 1791 1792 if (parg->proph == wd_data.wd1_timeout_hdl) { 1793 flag = WD1; 1794 } else if (parg->proph == wd_data.wd2_timeout_hdl) { 1795 flag = WD2; 1796 } 1797 1798 /* validate the timeout values */ 1799 timeout = *(int32_t *)buf; 1800 if (timeout < -1) { 1801 return (PICL_INVALIDARG); 1802 } 1803 1804 if (timeout > 0) { 1805 switch (flag) { 1806 case WD1: 1807 if ((timeout % WD_L1_RESOLUTION) != 0) { 1808 return (PICL_INVALIDARG); 1809 } 1810 if ((timeout/WD_L1_RESOLUTION) > WD_MAX_L1) { 1811 return (PICL_INVALIDARG); 1812 } 1813 break; 1814 case WD2: 1815 if ((timeout % WD_L2_RESOLUTION) != 0) { 1816 return (PICL_INVALIDARG); 1817 } 1818 if ((timeout/WD_L2_RESOLUTION) > WD_MAX_L2) { 1819 /* 255 sec */ 1820 return (PICL_INVALIDARG); 1821 } 1822 } 1823 } 1824 1825 /* update the internal cache */ 1826 (void) mutex_lock(&data_lock); 1827 switch (flag) { 1828 case WD1: 1829 wd_data.wd1_timeout = timeout; 1830 break; 1831 case WD2: 1832 wd_data.wd2_timeout = timeout; 1833 break; 1834 } 1835 (void) mutex_unlock(&data_lock); 1836 return (PICL_SUCCESS); 1837 } 1838