1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 28 /* 29 * Serengeti Environmental Information driver (sgenv) 30 * 31 * This driver requests the environmental properties from the SC. These 32 * request-response transactions are transferred through the SBBC mailbox, 33 * between the Domain and the SC. 34 * 35 * All sensors have the same sort of properties: Low and high limits, warning 36 * thresholds, last measured value, time of measurement, units (e.g., degrees 37 * Celsius, volts, etc.), and so on. 38 * 39 * Each sensor is named by a unique Tag. The Tag identifies the geographical 40 * location of the sensor in the Serengeti, and what it is the sensor measures. 41 * 42 * Requestable sensor properties are broken into two types: Those which are 43 * quasi-constant (infrequently change) - e.g., tolerance-defining low and high 44 * limits; and those which are volatile (typically change) - e.g., the current 45 * measurement. 46 * 47 * Unfortunately, property sets are too large to comprise a single mailbox 48 * message, so the sets are further subdivided into notionally arbitrary 49 * collections. NOTE: The SC-mailbox framework now supports fragmented messages 50 * which could allow us to request the data in larger chunks in the future. 51 * 52 * Each collection is fetched by a separate transaction. 53 * 54 * Firstly there is a transaction to obtain a list of all collections. Each non- 55 * zero key in this list is associated whith one of the collections of sensors. 56 * (This sparse list of keys is then used as an index to obtain all the sensor 57 * data for each collection). 58 * 59 * For each collection, there is one request-reply transaction to obtain a list 60 * of all sensors in that collection and the limits that apply to each; and a 61 * separate request-reply transaction to obtain the measurements from the 62 * sensors in the collection. 63 * 64 * The sgenv driver assembles each property set from the constituent 65 * collections, and caches the assembled property sets into the appropriate 66 * cache (env_cache, board_cache). The caches are created at startup and are 67 * updated on receipt of events from the SC. These events (which include DR 68 * events and ENV events) notify sgenv of configuration changes and 69 * environmental state changes (such as a sensor state change, Fan speed 70 * change). 71 * 72 * The SC-APP maintains a pseudo-sensor in each collection "measuring" changes 73 * to the quasi-constants in that collection. By monitoring these pseudo-sensor 74 * measurements, the kstat driver avoids redundant or speculative re-fetches of 75 * the quasi-constant properties. 76 */ 77 78 #include <sys/time.h> 79 #include <sys/errno.h> 80 #include <sys/kmem.h> 81 #include <sys/stat.h> 82 #include <sys/cmn_err.h> 83 #include <sys/disp.h> 84 85 #include <sys/conf.h> 86 #include <sys/modctl.h> 87 #include <sys/devops.h> 88 #include <sys/ddi.h> 89 #include <sys/sunddi.h> 90 91 #include <sys/sgevents.h> 92 #include <sys/sysevent.h> 93 #include <sys/sysevent/eventdefs.h> 94 #include <sys/sysevent/domain.h> 95 #include <sys/sysevent/env.h> 96 97 #include <sys/serengeti.h> 98 #include <sys/sgfrutypes.h> 99 100 #include <sys/sgsbbc.h> 101 #include <sys/sgsbbc_iosram.h> 102 #include <sys/sgsbbc_mailbox.h> 103 104 #include <sys/sbd_ioctl.h> /* sbd header files needed for board support */ 105 #include <sys/sbdp_priv.h> 106 #include <sys/sbd.h> 107 108 #include <sys/sgenv_impl.h> 109 110 111 /* 112 * Global Variables - can be patched from Solaris 113 * ============================================== 114 */ 115 116 /* 117 * the maximum amount of time this driver is prepared to wait for the mailbox 118 * to reply before it decides to timeout. The value is initially set in the 119 * _init() routine to the global Serengeti variable <sbbc_mbox_default_timeout> 120 * but could be tuned specifically for SGENV after booting up the system. 121 */ 122 int sgenv_max_mbox_wait_time = 0; 123 124 #ifdef DEBUG 125 /* 126 * This variable controls the level of debug output 127 */ 128 uint_t sgenv_debug = SGENV_DEBUG_NONE; 129 #endif 130 131 132 /* 133 * Module Variables 134 * ================ 135 */ 136 137 /* 138 * Driver entry points 139 */ 140 static struct cb_ops sgenv_cb_ops = { 141 nodev, /* open() */ 142 nodev, /* close() */ 143 nodev, /* strategy() */ 144 nodev, /* print() */ 145 nodev, /* dump() */ 146 nodev, /* read() */ 147 nodev, /* write() */ 148 nodev, /* ioctl() */ 149 nodev, /* devmap() */ 150 nodev, /* mmap() */ 151 ddi_segmap, /* segmap() */ 152 nochpoll, /* poll() */ 153 ddi_prop_op, /* prop_op() */ 154 NULL, /* cb_str */ 155 D_NEW | D_MP /* cb_flag */ 156 }; 157 158 159 static struct dev_ops sgenv_ops = { 160 DEVO_REV, 161 0, /* ref count */ 162 ddi_getinfo_1to1, /* getinfo() */ 163 nulldev, /* identify() */ 164 nulldev, /* probe() */ 165 sgenv_attach, /* attach() */ 166 sgenv_detach, /* detach */ 167 nodev, /* reset */ 168 &sgenv_cb_ops, /* pointer to cb_ops structure */ 169 (struct bus_ops *)NULL, 170 nulldev, /* power() */ 171 ddi_quiesce_not_needed, /* quiesce() */ 172 }; 173 174 /* 175 * Loadable module support. 176 */ 177 extern struct mod_ops mod_driverops; 178 179 static struct modldrv modldrv = { 180 &mod_driverops, /* Type of module. This is a driver */ 181 "Environmental Driver", /* Name of the module */ 182 &sgenv_ops /* pointer to the dev_ops structure */ 183 }; 184 185 static struct modlinkage modlinkage = { 186 MODREV_1, 187 &modldrv, 188 NULL 189 }; 190 191 /* Opaque state structure pointer */ 192 static void *sgenv_statep; 193 194 /* 195 * <env_cache> is a cache of all the sensor readings which is persistent 196 * between kstat reads. It is created at init and gets updated upon receipt 197 * of events from the SC. 198 * 199 * The kstat_update function takes a copy of the non-zero entries in this 200 * cache and creates a temp buffer called env_cache_snapshot. The 201 * kstat_snapshot function then bcopies the env_cache_snapshot into the 202 * kstat buffer. This is done because there is no way to ensure that the 203 * env_cache won't change between the kstat_update and the kstat_snapshot 204 * which will cause problems as the update sets the ks_data_size. 205 */ 206 static env_sensor_t *env_cache[SGENV_MAX_HPU_KEYS] = {NULL}; 207 static void *env_cache_snapshot = NULL; 208 static size_t env_cache_snapshot_size = 0; 209 210 /* 211 * This is set to TRUE the first time env data is stored in the cache 212 * so that at least from then on, old data can be returned if a call to 213 * the mailbox fails. 214 */ 215 static int env_cache_updated = FALSE; 216 217 /* 218 * This lock is needed by the variable-sized kstat which returns 219 * environmental info. It prevents data-size races with kstat clients. 220 */ 221 static kmutex_t env_kstat_lock; 222 223 /* 224 * The <env_cache> can be accessed asynchronously by the polling function 225 * and the kstat_read framework. This mutex ensures that access to the data 226 * is controlled correctly. 227 */ 228 static kmutex_t env_cache_lock; 229 230 /* 231 * We need to store the last time we asked the SC for environmental information 232 * so that we do not send too many requests in a short period of time. 233 */ 234 static hrtime_t last_env_read_time = 0; 235 236 /* 237 * Variables to coordinate between the handlers which are triggered when 238 * the env cache needs to be updated and the thread which does the work. 239 */ 240 static volatile int env_thread_run = 0; 241 static kthread_t *env_thread = NULL; 242 static kt_did_t env_thread_tid; 243 244 static kcondvar_t env_flag_cond; 245 static kmutex_t env_flag_lock; 246 static boolean_t env_cache_updating = B_FALSE; 247 static boolean_t env_cache_update_needed = B_TRUE; 248 249 /* 250 * <board_cache> is a cache of all the board status info and it is persistent 251 * between kstat reads. 252 * 253 * The kstat_update function takes a copy of the non-zero entries in this 254 * cache and copies them into the board_cache_snapshot buffer. The 255 * kstat_snapshot function then bcopies the board_cache_snapshot into the 256 * kstat buffer. This is done because there is no way to ensure that the 257 * board_cache won't change between the kstat_update and the kstat_snapshot 258 * which will cause problems as the update sets the ks_data_size. 259 */ 260 static sg_board_info_t board_cache[SG_MAX_BDS] = {NULL}; 261 static sg_board_info_t board_cache_snapshot[SG_MAX_BDS] = {NULL}; 262 static int board_cache_updated = FALSE; 263 264 /* 265 * This mutex ensures the <board_cache> is not destroyed while the board data 266 * is being collected. 267 */ 268 static kmutex_t board_cache_lock; 269 270 /* 271 * This lock is needed by the variable-sized kstat which returns 272 * board status info. It prevents data-size races with kstat clients. 273 */ 274 static kmutex_t board_kstat_lock; 275 276 /* 277 * This is a count of the number of board readings were stored by 278 * the kstat_update routine - this is needed by the kstat_snapshot routine. 279 */ 280 static int board_count = 0; 281 static int board_count_snapshot = 0; 282 283 /* 284 * We need to store the last time we asked the SC for board information 285 * so that we do not send too many requests in a short period of time. 286 */ 287 static hrtime_t last_board_read_time = 0; 288 289 /* 290 * Variables to coordinate between the handlers which are triggered when 291 * the board cache needs to be updated and the thread which does the work. 292 */ 293 static volatile int board_thread_run = 0; 294 static kthread_t *board_thread = NULL; 295 static kt_did_t board_thread_tid; 296 static kcondvar_t board_flag_cond; 297 298 static kmutex_t board_flag_lock; 299 static boolean_t board_cache_updating = B_FALSE; 300 static boolean_t board_cache_update_needed = B_TRUE; 301 302 /* 303 * Used to keep track of the number of sensors associated with each key. 304 * The sum of all the values in this array is used to set ks_data_size. 305 */ 306 static int vol_sensor_count[SGENV_MAX_HPU_KEYS] = {0}; 307 308 /* 309 * This variable keeps a count of the number of errors that have occurred 310 * when we make calls to the mailbox for Env or Board data. 311 */ 312 static int sgenv_mbox_error_count = 0; 313 314 /* 315 * mutex which protects the keyswitch interrupt handler. 316 */ 317 static kmutex_t keysw_hdlr_lock; 318 319 /* 320 * mutex which protects the env interrupt handler. 321 */ 322 static kmutex_t env_hdlr_lock; 323 324 /* 325 * mutex which protects the DR handler interrupt handler. 326 */ 327 static kmutex_t dr_hdlr_lock; 328 329 /* 330 * Payloads of the event handlers. 331 */ 332 static sg_event_key_position_t keysw_payload; 333 static sbbc_msg_t keysw_payload_msg; 334 335 static sg_event_env_changed_t env_payload; 336 static sbbc_msg_t env_payload_msg; 337 338 static sg_event_fan_status_t fan_payload; 339 static sbbc_msg_t fan_payload_msg; 340 341 static sg_system_fru_descriptor_t dr_payload; 342 static sbbc_msg_t dr_payload_msg; 343 344 /* 345 * The following 3 arrays list all possible HPUs, Parts and Device types 346 */ 347 348 /* 349 * ensure that all possible HPUs exported, as described in the main comment 350 * in <sys/sensor_tag.h>, are accounted for here. 351 */ 352 static const hpu_value_t hpus[] = { 353 HPU_ENTRY(SG_HPU_TYPE_UNKNOWN), 354 HPU_ENTRY(SG_HPU_TYPE_CPU_BOARD), 355 HPU_ENTRY(SG_HPU_TYPE_PCI_IO_BOARD), 356 HPU_ENTRY(SG_HPU_TYPE_CPCI_IO_BOARD), 357 HPU_ENTRY(SG_HPU_TYPE_SP_CPCI_IO_BOARD), 358 HPU_ENTRY(SG_HPU_TYPE_REPEATER_BOARD), 359 HPU_ENTRY(SG_HPU_TYPE_L2_REPEATER_BOARD), 360 HPU_ENTRY(SG_HPU_TYPE_SYSTEM_CONTROLLER_BOARD), 361 HPU_ENTRY(SG_HPU_TYPE_SP_SYSTEM_CONTROLLER_BOARD), 362 HPU_ENTRY(SG_HPU_TYPE_A123_POWER_SUPPLY), 363 HPU_ENTRY(SG_HPU_TYPE_A138_POWER_SUPPLY), 364 HPU_ENTRY(SG_HPU_TYPE_A145_POWER_SUPPLY), 365 HPU_ENTRY(SG_HPU_TYPE_A152_POWER_SUPPLY), 366 HPU_ENTRY(SG_HPU_TYPE_A153_POWER_SUPPLY), 367 HPU_ENTRY(SG_HPU_TYPE_RACK_FAN_TRAY), 368 HPU_ENTRY(SG_HPU_TYPE_SP_FAN_TRAY), 369 HPU_ENTRY(SG_HPU_TYPE_MD_TOP_IO_FAN_TRAY), 370 HPU_ENTRY(SG_HPU_TYPE_MD_BOTTOM_IO_FAN_TRAY), 371 HPU_ENTRY(SG_HPU_TYPE_R12_THREE_FAN_TRAY), 372 HPU_ENTRY(SG_HPU_TYPE_K12_IO_ONE_FAN_TRAY), 373 HPU_ENTRY(SG_HPU_TYPE_K12_CPU_THREE_FAN_TRAY), 374 HPU_ENTRY(SG_HPU_TYPE_R24_IO_FOUR_FAN_TRAY), 375 HPU_ENTRY(SG_HPU_TYPE_R24_CPU_SIX_FAN_TRAY), 376 0, (char *)NULL 377 }; 378 379 static const struct part_value parts[] = { 380 PART_VALUE(SG_SENSOR_PART_SBBC), 381 PART_VALUE(SG_SENSOR_PART_SDC), 382 PART_VALUE(SG_SENSOR_PART_AR), 383 PART_VALUE(SG_SENSOR_PART_CBH), 384 PART_VALUE(SG_SENSOR_PART_DX), 385 PART_VALUE(SG_SENSOR_PART_CHEETAH), 386 PART_VALUE(SG_SENSOR_PART_1_5_VDC), 387 PART_VALUE(SG_SENSOR_PART_3_3_VDC), 388 PART_VALUE(SG_SENSOR_PART_5_VDC), 389 PART_VALUE(SG_SENSOR_PART_12_VDC), 390 PART_VALUE(SG_SENSOR_PART_48_VDC), 391 PART_VALUE(SG_SENSOR_PART_CURRENT), 392 PART_VALUE(SG_SENSOR_PART_BOARD), 393 PART_VALUE(SG_SENSOR_PART_SCAPP), 394 PART_VALUE(SG_SENSOR_PART_SCHIZO), 395 PART_VALUE(SG_SENSOR_PART_FAN), 396 0, (char *)NULL 397 }; 398 399 static const struct type_value types[] = { 400 TYPE_VALUE(SG_SENSOR_TYPE_CURRENT, SG_CURRENT_SCALE), 401 TYPE_VALUE(SG_SENSOR_TYPE_TEMPERATURE, SG_TEMPERATURE_SCALE), 402 TYPE_VALUE(SG_SENSOR_TYPE_1_5_VDC, SG_1_5_VDC_SCALE), 403 TYPE_VALUE(SG_SENSOR_TYPE_1_8_VDC, SG_1_8_VDC_SCALE), 404 TYPE_VALUE(SG_SENSOR_TYPE_3_3_VDC, SG_3_3_VDC_SCALE), 405 TYPE_VALUE(SG_SENSOR_TYPE_5_VDC, SG_5_VDC_SCALE), 406 TYPE_VALUE(SG_SENSOR_TYPE_12_VDC, SG_12_VDC_SCALE), 407 TYPE_VALUE(SG_SENSOR_TYPE_48_VDC, SG_48_VDC_SCALE), 408 TYPE_VALUE(SG_SENSOR_TYPE_ENVDB, 1), 409 TYPE_VALUE(SG_SENSOR_TYPE_COOLING, 1), 410 0, (char *)NULL 411 }; 412 413 int 414 _init(void) 415 { 416 int error = 0; 417 418 error = ddi_soft_state_init(&sgenv_statep, 419 sizeof (sgenv_soft_state_t), 1); 420 421 if (error) 422 return (error); 423 424 error = mod_install(&modlinkage); 425 if (error) { 426 ddi_soft_state_fini(&sgenv_statep); 427 return (error); 428 } 429 430 mutex_init(&env_kstat_lock, NULL, MUTEX_DEFAULT, NULL); 431 mutex_init(&env_cache_lock, NULL, MUTEX_DEFAULT, NULL); 432 mutex_init(&env_flag_lock, NULL, MUTEX_DEFAULT, NULL); 433 cv_init(&env_flag_cond, NULL, CV_DEFAULT, NULL); 434 435 mutex_init(&board_cache_lock, NULL, MUTEX_DEFAULT, NULL); 436 mutex_init(&board_kstat_lock, NULL, MUTEX_DEFAULT, NULL); 437 mutex_init(&board_flag_lock, NULL, MUTEX_DEFAULT, NULL); 438 cv_init(&board_flag_cond, NULL, CV_DEFAULT, NULL); 439 440 mutex_init(&keysw_hdlr_lock, NULL, MUTEX_DEFAULT, NULL); 441 mutex_init(&env_hdlr_lock, NULL, MUTEX_DEFAULT, NULL); 442 mutex_init(&dr_hdlr_lock, NULL, MUTEX_DEFAULT, NULL); 443 444 /* set the default timeout value */ 445 sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout; 446 447 return (error); 448 } 449 450 451 int 452 _info(struct modinfo *modinfop) 453 { 454 return (mod_info(&modlinkage, modinfop)); 455 } 456 457 458 int 459 _fini(void) 460 { 461 int error = 0; 462 463 error = mod_remove(&modlinkage); 464 if (error) 465 return (error); 466 467 mutex_destroy(&env_kstat_lock); 468 mutex_destroy(&env_cache_lock); 469 470 mutex_destroy(&board_cache_lock); 471 mutex_destroy(&board_kstat_lock); 472 473 mutex_destroy(&keysw_hdlr_lock); 474 mutex_destroy(&env_hdlr_lock); 475 mutex_destroy(&dr_hdlr_lock); 476 477 ddi_soft_state_fini(&sgenv_statep); 478 479 return (error); 480 } 481 482 483 static int 484 sgenv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 485 { 486 sgenv_soft_state_t *softsp; 487 488 int instance; 489 int err; 490 491 switch (cmd) { 492 case DDI_ATTACH: 493 494 instance = ddi_get_instance(dip); 495 496 /* allocate a global sgenv_soft_state structure */ 497 err = ddi_soft_state_zalloc(sgenv_statep, instance); 498 if (err != DDI_SUCCESS) { 499 cmn_err(CE_WARN, "attach: could not allocate state " 500 "structure for inst %d.", instance); 501 return (DDI_FAILURE); 502 } 503 504 softsp = ddi_get_soft_state(sgenv_statep, instance); 505 if (softsp == NULL) { 506 ddi_soft_state_free(sgenv_statep, instance); 507 cmn_err(CE_WARN, "attach: could not get state " 508 "structure for inst %d.", instance); 509 return (DDI_FAILURE); 510 } 511 512 softsp->dip = dip; 513 softsp->instance = instance; 514 515 err = sgenv_add_kstats(softsp); 516 if (err != 0) { 517 /* 518 * Some of the kstats may have been created before the 519 * error occurred in sgenv_add_kstats(), so we call 520 * sgenv_remove_kstats() which removes any kstats 521 * already created. 522 */ 523 sgenv_remove_kstats(softsp); 524 ddi_soft_state_free(sgenv_statep, instance); 525 return (DDI_FAILURE); 526 } 527 528 /* 529 * Before we setup the framework to read the data from the SC 530 * we need to ensure the caches are initialized correctly. 531 */ 532 sgenv_init_board_cache(); 533 sgenv_init_env_cache(); 534 535 /* 536 * Add the threads which will update the env and board caches 537 * and post events to Sysevent Framework in the background 538 * when the interrupt handlers watching for ENV/DR events 539 * indicate to the threads that they need to do so. 540 */ 541 err = sgenv_create_cache_update_threads(); 542 if (err != DDI_SUCCESS) { 543 sgenv_remove_kstats(softsp); 544 ddi_soft_state_free(sgenv_statep, instance); 545 return (DDI_FAILURE); 546 } 547 548 err = ddi_create_minor_node(dip, SGENV_DRV_NAME, S_IFCHR, 549 instance, DDI_PSEUDO, NULL); 550 if (err != DDI_SUCCESS) { 551 sgenv_remove_kstats(softsp); 552 (void) sgenv_remove_cache_update_threads(); 553 ddi_soft_state_free(sgenv_statep, instance); 554 return (DDI_FAILURE); 555 } 556 557 /* 558 * Add the handlers which watch for unsolicited messages 559 * and post event to Sysevent Framework. 560 */ 561 err = sgenv_add_intr_handlers(); 562 if (err != DDI_SUCCESS) { 563 cmn_err(CE_WARN, "Failed to add event handlers"); 564 (void) sgenv_remove_intr_handlers(); 565 sgenv_remove_kstats(softsp); 566 (void) sgenv_remove_cache_update_threads(); 567 ddi_soft_state_free(sgenv_statep, instance); 568 return (DDI_FAILURE); 569 } 570 571 ddi_report_dev(dip); 572 573 return (DDI_SUCCESS); 574 575 case DDI_RESUME: 576 return (DDI_SUCCESS); 577 578 default: 579 return (DDI_FAILURE); 580 } 581 } 582 583 584 static int 585 sgenv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 586 { 587 sgenv_soft_state_t *softsp; 588 589 int instance; 590 int err; 591 592 switch (cmd) { 593 case DDI_DETACH: 594 595 instance = ddi_get_instance(dip); 596 597 softsp = ddi_get_soft_state(sgenv_statep, instance); 598 if (softsp == NULL) { 599 cmn_err(CE_WARN, "detach: could not get state " 600 "structure for inst %d.", instance); 601 return (DDI_FAILURE); 602 } 603 604 err = sgenv_remove_cache_update_threads(); 605 if (err != DDI_SUCCESS) { 606 cmn_err(CE_WARN, "Failed to remove update threads"); 607 } 608 609 /* 610 * Remove the handlers which watch for unsolicited messages 611 * and post event to Sysevent Framework. 612 */ 613 err = sgenv_remove_intr_handlers(); 614 if (err != DDI_SUCCESS) { 615 cmn_err(CE_WARN, "Failed to remove event handlers"); 616 } 617 618 sgenv_remove_kstats(softsp); 619 620 ddi_soft_state_free(sgenv_statep, instance); 621 622 ddi_remove_minor_node(dip, NULL); 623 624 return (DDI_SUCCESS); 625 626 case DDI_SUSPEND: 627 return (DDI_SUCCESS); 628 629 default: 630 return (DDI_FAILURE); 631 } 632 } 633 634 635 static int 636 sgenv_add_kstats(sgenv_soft_state_t *softsp) 637 { 638 kstat_t *ksp; 639 kstat_named_t *keyswitch_named_data; 640 641 int inst = softsp->instance; 642 643 /* 644 * Create the 'keyswitch position' named kstat. 645 */ 646 ksp = kstat_create(SGENV_DRV_NAME, inst, SG_KEYSWITCH_KSTAT_NAME, 647 "misc", KSTAT_TYPE_NAMED, 1, NULL); 648 649 if (ksp != NULL) { 650 /* initialize the named kstat */ 651 keyswitch_named_data = (struct kstat_named *)(ksp->ks_data); 652 653 kstat_named_init(&keyswitch_named_data[0], 654 POSITION_KSTAT_NAME, 655 KSTAT_DATA_INT32); 656 657 ksp->ks_update = sgenv_keyswitch_kstat_update; 658 kstat_install(ksp); 659 660 /* update the soft state */ 661 softsp->keyswitch_ksp = ksp; 662 663 } else { 664 cmn_err(CE_WARN, "Keyswitch: kstat_create failed"); 665 return (-1); 666 } 667 668 669 /* 670 * Environmental Information. 671 */ 672 ksp = kstat_create(SGENV_DRV_NAME, inst, SG_ENV_INFO_KSTAT_NAME, 673 "misc", KSTAT_TYPE_RAW, 0, 674 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE); 675 676 if (ksp != NULL) { 677 ksp->ks_data = NULL; 678 ksp->ks_data_size = 0; 679 ksp->ks_snaptime = 0; 680 ksp->ks_update = sgenv_env_info_kstat_update; 681 ksp->ks_snapshot = sgenv_env_info_kstat_snapshot; 682 ksp->ks_lock = &env_kstat_lock; 683 kstat_install(ksp); 684 685 /* update the soft state */ 686 softsp->env_info_ksp = ksp; 687 688 } else { 689 cmn_err(CE_WARN, "Environmental Info: kstat_create failed"); 690 return (-1); 691 } 692 693 694 /* 695 * Board Status Information. 696 */ 697 ksp = kstat_create(SGENV_DRV_NAME, inst, SG_BOARD_STATUS_KSTAT_NAME, 698 "misc", KSTAT_TYPE_RAW, 0, 699 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE); 700 701 if (ksp != NULL) { 702 ksp->ks_data = NULL; 703 ksp->ks_data_size = 0; 704 ksp->ks_snaptime = 0; 705 ksp->ks_update = sgenv_board_info_kstat_update; 706 ksp->ks_snapshot = sgenv_board_info_kstat_snapshot; 707 ksp->ks_lock = &board_kstat_lock; 708 kstat_install(ksp); 709 710 /* update the soft state */ 711 softsp->board_info_ksp = ksp; 712 713 } else { 714 cmn_err(CE_WARN, "Board Status Info: kstat_create failed"); 715 return (-1); 716 } 717 718 return (0); 719 } 720 721 722 static void 723 sgenv_remove_kstats(sgenv_soft_state_t *softsp) 724 { 725 kstat_t *ksp; 726 727 ksp = softsp->keyswitch_ksp; 728 if (ksp != NULL) { 729 softsp->keyswitch_ksp = NULL; 730 kstat_delete(ksp); 731 } 732 733 ksp = softsp->env_info_ksp; 734 if (ksp != NULL) { 735 sgenv_destroy_env_cache(); 736 softsp->env_info_ksp = NULL; 737 ksp->ks_lock = NULL; 738 kstat_delete(ksp); 739 } 740 741 ksp = softsp->board_info_ksp; 742 if (ksp != NULL) { 743 softsp->board_info_ksp = NULL; 744 ksp->ks_lock = NULL; 745 kstat_delete(ksp); 746 } 747 } 748 749 750 /* 751 * This function registers mailbox interrupt handlers to watch for certain 752 * unsolicited mailbox messages, which indicate that some event has occurred. 753 * 754 * Currently only the following events are handled: 755 * MBOX_EVENT_KEY_SWITCH 756 * MBOX_EVENT_ENV 757 * - Thresholds/Limits Exceeded 758 * - Fan Status changed 759 * 760 * ERRORS: 761 * We return DDI_FAILURE if we fail to register any one of the 762 * interrupt handlers. 763 */ 764 static int 765 sgenv_add_intr_handlers(void) 766 { 767 int err; 768 769 /* 770 * Register an interrupt handler with the sgsbbc driver for the 771 * MBOX_EVENT_KEY_SWITCH events. 772 * - The virtual keyswitch has changed, we generate a sysevent. 773 */ 774 keysw_payload_msg.msg_buf = (caddr_t)&keysw_payload; 775 keysw_payload_msg.msg_len = sizeof (keysw_payload); 776 777 err = sbbc_mbox_reg_intr(MBOX_EVENT_KEY_SWITCH, sgenv_keyswitch_handler, 778 &keysw_payload_msg, NULL, &keysw_hdlr_lock); 779 if (err != 0) { 780 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_KEY_SWITCH " 781 "handler. Err=%d", err); 782 return (DDI_FAILURE); 783 } 784 785 /* 786 * Register an interrupt handler with the sgsbbc driver for the 787 * MBOX_EVENT_ENV events. 788 * - Thresholds/Limits Exceeded, we generate a sysevent 789 * and we update our caches. 790 */ 791 env_payload_msg.msg_buf = (caddr_t)&env_payload; 792 env_payload_msg.msg_len = sizeof (env_payload); 793 794 err = sbbc_mbox_reg_intr(MBOX_EVENT_ENV, sgenv_env_data_handler, 795 &env_payload_msg, NULL, &env_hdlr_lock); 796 if (err != 0) { 797 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_ENV " 798 "(env) handler. Err=%d", err); 799 return (DDI_FAILURE); 800 } 801 802 /* 803 * Register an interrupt handler with the sgsbbc driver for the 804 * MBOX_EVENT_ENV events. 805 * - Fan Status changed, we generate a sysevent, and 806 * we update the env cache only. 807 */ 808 fan_payload_msg.msg_buf = (caddr_t)&fan_payload; 809 fan_payload_msg.msg_len = sizeof (fan_payload); 810 811 err = sbbc_mbox_reg_intr(MBOX_EVENT_ENV, sgenv_fan_status_handler, 812 &fan_payload_msg, NULL, &env_hdlr_lock); 813 if (err != 0) { 814 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_ENV (fan)" 815 "handler. Err=%d", err); 816 return (DDI_FAILURE); 817 } 818 819 /* 820 * Register an interrupt handler with the sgsbbc driver for the 821 * MBOX_EVENT_GENERIC events. 822 * - DR state change, we update our caches. 823 */ 824 dr_payload_msg.msg_buf = (caddr_t)&dr_payload; 825 dr_payload_msg.msg_len = sizeof (dr_payload); 826 827 err = sbbc_mbox_reg_intr(MBOX_EVENT_GENERIC, sgenv_dr_event_handler, 828 &dr_payload_msg, NULL, &dr_hdlr_lock); 829 if (err != 0) { 830 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_GENERIC (DR)" 831 "handler. Err=%d", err); 832 return (DDI_FAILURE); 833 } 834 835 return (DDI_SUCCESS); 836 } 837 838 /* 839 * This function unregisters the mailbox interrupt handlers. 840 * 841 * ERRORS: 842 * We return DDI_FAILURE if we fail to register any one of the 843 * interrupt handlers. 844 */ 845 static int 846 sgenv_remove_intr_handlers(void) 847 { 848 int rv = DDI_SUCCESS; 849 int err; 850 851 err = sbbc_mbox_unreg_intr(MBOX_EVENT_KEY_SWITCH, 852 sgenv_keyswitch_handler); 853 if (err != 0) { 854 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_KEY_SWITCH " 855 "handler. Err=%d", err); 856 rv = DDI_FAILURE; 857 } 858 859 err = sbbc_mbox_unreg_intr(MBOX_EVENT_ENV, sgenv_env_data_handler); 860 if (err != 0) { 861 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_ENV (env)" 862 "handler. Err=%d", err); 863 rv = DDI_FAILURE; 864 } 865 866 err = sbbc_mbox_unreg_intr(MBOX_EVENT_ENV, sgenv_fan_status_handler); 867 if (err != 0) { 868 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_ENV (fan)" 869 "handler. Err=%d", err); 870 rv = DDI_FAILURE; 871 } 872 873 err = sbbc_mbox_unreg_intr(MBOX_EVENT_GENERIC, sgenv_dr_event_handler); 874 if (err != 0) { 875 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_GENERIC (DR) " 876 "handler. Err=%d", err); 877 rv = DDI_FAILURE; 878 } 879 880 return (rv); 881 } 882 883 884 static int 885 sgenv_create_cache_update_threads(void) 886 { 887 DCMN_ERR_S(f, "sgenv_create_cache_update_threads()"); 888 889 DCMN_ERR_THREAD(CE_NOTE, "Entering %s", f); 890 891 /* Create thread to ensure env_cache is updated */ 892 env_thread_run = 1; 893 894 env_thread = thread_create(NULL, 0, sgenv_update_env_cache, 895 NULL, 0, &p0, TS_RUN, minclsyspri); 896 env_thread_tid = env_thread->t_did; 897 898 /* Create thread to ensure board_cache is updated */ 899 board_thread_run = 1; 900 901 board_thread = thread_create(NULL, 0, sgenv_update_board_cache, 902 NULL, 0, &p0, TS_RUN, minclsyspri); 903 board_thread_tid = board_thread->t_did; 904 905 DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f); 906 907 return (DDI_SUCCESS); 908 } 909 910 911 static int 912 sgenv_remove_cache_update_threads(void) 913 { 914 DCMN_ERR_S(f, "sgenv_remove_cache_update_threads()"); 915 916 DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for cache update threads", f); 917 918 /* Cause the env_cache thread to terminate. */ 919 mutex_enter(&env_flag_lock); 920 env_thread_run = 0; 921 cv_signal(&env_flag_cond); 922 mutex_exit(&env_flag_lock); 923 924 thread_join(env_thread_tid); 925 926 /* Cause the board_cache thread to terminate. */ 927 mutex_enter(&board_flag_lock); 928 board_thread_run = 0; 929 cv_signal(&board_flag_cond); 930 mutex_exit(&board_flag_lock); 931 932 thread_join(board_thread_tid); 933 934 DCMN_ERR_THREAD(CE_NOTE, "%s: cache update threads finished", f); 935 936 return (DDI_SUCCESS); 937 } 938 939 940 static int 941 sgenv_keyswitch_kstat_update(kstat_t *ksp, int rw) 942 { 943 sg_keyswitch_kstat_t *keysw_data; 944 945 int8_t posn; /* keysw posn read from IO-SRAM */ 946 int size; /* size of IO-SRAM chunk */ 947 int rv = 0; /* return value of iosram_read() */ 948 949 keysw_data = (sg_keyswitch_kstat_t *)ksp->ks_data; 950 951 switch (rw) { 952 case KSTAT_WRITE: 953 /* 954 * Write not permitted 955 */ 956 return (EACCES); 957 958 case KSTAT_READ: 959 /* 960 * Get the size of the keyswitch IO-SRAM chunk. 961 * This should be one byte. 962 * 963 * If the size is not 1 byte we set the position to UNKNOWN 964 * 965 * Otherwise we read the keyswitch position from IO-SRAM. 966 * Then check that this is a valid keyswitch position. 967 * If it is not valid then something is corrupt and set 968 * the position to UNKNOWN. 969 */ 970 size = iosram_size(SBBC_KEYSWITCH_KEY); 971 if (size != 1) { 972 posn = SG_KEYSWITCH_POSN_UNKNOWN; 973 rv = -1; 974 975 } else if ((rv = iosram_read(SBBC_KEYSWITCH_KEY, 0, 976 (char *)&posn, size)) != 0) { 977 posn = SG_KEYSWITCH_POSN_UNKNOWN; 978 979 } else { 980 /* Check posn is not corrupt */ 981 switch (posn) { 982 case SG_KEYSWITCH_POSN_ON: 983 case SG_KEYSWITCH_POSN_DIAG: 984 case SG_KEYSWITCH_POSN_SECURE: 985 /* value read from kstat is OK */ 986 break; 987 988 default: 989 /* value read from kstat is corrupt */ 990 posn = SG_KEYSWITCH_POSN_UNKNOWN; 991 break; 992 } 993 } 994 995 /* Write position to kstat. */ 996 keysw_data->keyswitch_position.value.i32 = posn; 997 998 return (rv); 999 1000 default: 1001 return (EINVAL); 1002 } 1003 } 1004 1005 static void 1006 sgenv_init_env_cache(void) 1007 { 1008 ASSERT(env_thread_run == 0); 1009 ASSERT(env_thread == NULL); 1010 } 1011 1012 1013 /* 1014 * This thread runs in the background and waits for an interrupt handler 1015 * registered to wait for ENV/DR events from the SC to signal/flag that we 1016 * need to update our Env Cache. 1017 */ 1018 static void 1019 sgenv_update_env_cache(void) 1020 { 1021 DCMN_ERR_S(f, "sgenv_update_env_cache()"); 1022 1023 mutex_enter(&env_flag_lock); 1024 1025 while (env_thread_run == 1) { 1026 1027 /* 1028 * We check to see if the update needed flag is set. 1029 * If it is then this means that: 1030 * 1) This is the first time through the while loop 1031 * and we need to initialize the cache. 1032 * 2) An interrupt handler was triggered while we 1033 * we were updating the env cache during the previous 1034 * iteration of the while loop and we need to refresh 1035 * the env data to ensure we are completely up to date. 1036 * 1037 * Otherwise we wait until we get a signal from one of the 1038 * interrupt handlers. 1039 */ 1040 if (env_cache_update_needed) { 1041 DCMN_ERR_THREAD(CE_NOTE, "%s: update needed", f); 1042 1043 env_cache_update_needed = B_FALSE; 1044 1045 } else { 1046 DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for signal", f); 1047 1048 cv_wait(&env_flag_cond, &env_flag_lock); 1049 1050 /* Check if we are being asked to terminate */ 1051 if (env_thread_run == 0) { 1052 break; 1053 } 1054 1055 env_cache_updating = B_TRUE; 1056 } 1057 1058 mutex_exit(&env_flag_lock); 1059 (void) sgenv_get_env_info_data(); 1060 1061 (void) sgenv_check_sensor_thresholds(); 1062 mutex_enter(&env_flag_lock); 1063 1064 if (env_cache_update_needed == B_FALSE) 1065 env_cache_updating = B_FALSE; 1066 } 1067 1068 mutex_exit(&env_flag_lock); 1069 1070 DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f); 1071 1072 env_thread_run = -1; 1073 thread_exit(); 1074 } 1075 1076 1077 /* 1078 * We always return what is in the env_cache. It is up to the SC to ensure 1079 * that the env_cache is current by sending events to us when something 1080 * changes. The cache will then be updated by going to the SC to get the 1081 * new data. That way the kstat_update code can always be sure that it gets 1082 * current data without having to wait while the SC responds (slowly) to our 1083 * request for data. 1084 * 1085 * The way the update and snapshot code works, we cannot be guaranteed that 1086 * someone won't grab the env_cache_lock between the update and snapshot 1087 * calls so we use a temporary snapshot of the env_cache. We cannot hold 1088 * any locks across the calls from the update to the snapshot as we are 1089 * not guaranteed that the snapshot function will be called. So we create 1090 * the snapshot of the env_cache in the update routine and dump this to the 1091 * kstat user buffer in the snapshot routine. (There are error conditions in 1092 * which the snapshot will not be called by the kstat framework so we need 1093 * to handle these appropriately.) 1094 */ 1095 static int 1096 sgenv_env_info_kstat_update(kstat_t *ksp, int rw) 1097 { 1098 DCMN_ERR_S(f, "sgenv_env_info_kstat_update()"); 1099 1100 int err = 0; 1101 int key_posn; 1102 env_sensor_t *ptr; 1103 1104 switch (rw) { 1105 case KSTAT_WRITE: 1106 /* 1107 * Write not permitted 1108 */ 1109 return (EACCES); 1110 1111 case KSTAT_READ: 1112 1113 mutex_enter(&env_cache_lock); 1114 /* 1115 * We now need to ensure that there is enough room allocated 1116 * by the kstat framework to return the data via ks_data. 1117 * It is possible there may be no data in the cache but 1118 * we still return zero sized kstats to ensure no client breaks 1119 */ 1120 sgenv_update_env_kstat_size(ksp); 1121 1122 /* 1123 * If the snapshot still has data (this could be because the 1124 * kstat framework discovered an error and did not call the 1125 * snapshot code which should have freed this buffer) we free 1126 * it here. 1127 */ 1128 if ((env_cache_snapshot != NULL) && 1129 (env_cache_snapshot_size > 0)) { 1130 DCMN_ERR_CACHE(CE_NOTE, "%s freeing " 1131 "env_cache_snapshot buf", f); 1132 kmem_free(env_cache_snapshot, env_cache_snapshot_size); 1133 } 1134 1135 /* 1136 * Create a new snapshot buffer based on ks_data_size 1137 */ 1138 env_cache_snapshot_size = ksp->ks_data_size; 1139 env_cache_snapshot = kmem_zalloc( 1140 env_cache_snapshot_size, KM_SLEEP); 1141 1142 /* 1143 * We need to take a fresh snapshot of the env_cache here. 1144 * For each sensor collection, we check to see if there is 1145 * data in the cache (ie. != NULL). If there is, we copy it 1146 * into the snapshot. 1147 */ 1148 ptr = env_cache_snapshot; 1149 for (key_posn = 0; key_posn < SGENV_MAX_HPU_KEYS; key_posn++) { 1150 if (vol_sensor_count[key_posn] <= 0) 1151 continue; 1152 1153 ASSERT(vol_sensor_count[key_posn] <= 1154 SGENV_MAX_SENSORS_PER_KEY); 1155 1156 /* 1157 * <env_cache> entry should have been allocated 1158 * in the kstat_update function already. 1159 * 1160 * If this <env_cache> entry is NULL, then 1161 * it has already been destroyed or cleared 1162 * and the sensor readings have disappeared. 1163 */ 1164 if (env_cache[key_posn] == NULL) { 1165 DCMN_ERR(CE_NOTE, "!Cache entry %d has " 1166 "disappeared", key_posn); 1167 vol_sensor_count[key_posn] = 0; 1168 continue; 1169 } 1170 1171 bcopy(&env_cache[key_posn][0], ptr, 1172 sizeof (env_sensor_t) * 1173 vol_sensor_count[key_posn]); 1174 ptr += vol_sensor_count[key_posn]; 1175 } 1176 mutex_exit(&env_cache_lock); 1177 1178 return (err); 1179 1180 default: 1181 return (EINVAL); 1182 } 1183 } 1184 1185 static int 1186 sgenv_env_info_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 1187 { 1188 DCMN_ERR_S(f, "sgenv_env_info_kstat_snapshot()"); 1189 1190 switch (rw) { 1191 case KSTAT_WRITE: 1192 /* 1193 * Write not permitted 1194 */ 1195 return (EACCES); 1196 1197 case KSTAT_READ: 1198 1199 /* 1200 * We have taken a snapshot of the env_cache in the 1201 * update routine so we simply bcopy this into the 1202 * kstat buf. No locks needed here. 1203 */ 1204 if (env_cache_snapshot_size > 0) 1205 bcopy(env_cache_snapshot, buf, env_cache_snapshot_size); 1206 1207 ksp->ks_snaptime = last_env_read_time; 1208 1209 /* 1210 * Free the memory used by the snapshot. If for some reason 1211 * the kstat framework does not call this snapshot routine, 1212 * we also have a check in the update routine so the next 1213 * time it is called it checks for this condition and frees 1214 * the snapshot buffer there. 1215 */ 1216 DCMN_ERR_CACHE(CE_NOTE, "%s freeing env_cache_snapshot buf", f); 1217 kmem_free(env_cache_snapshot, env_cache_snapshot_size); 1218 env_cache_snapshot = NULL; 1219 env_cache_snapshot_size = 0; 1220 1221 return (0); 1222 1223 default: 1224 return (EINVAL); 1225 } 1226 } 1227 1228 static void 1229 sgenv_init_board_cache(void) 1230 { 1231 int i; 1232 1233 ASSERT(board_thread_run == 0); 1234 ASSERT(board_thread == NULL); 1235 1236 /* 1237 * Init all node-ids to be -1. 1238 */ 1239 mutex_enter(&board_cache_lock); 1240 for (i = 0; i < SG_MAX_BDS; i++) 1241 board_cache[i].node_id = (-1); 1242 mutex_exit(&board_cache_lock); 1243 } 1244 1245 1246 /* 1247 * This thread runs in the background and waits for an interrupt handler 1248 * registered to wait for DR events from the SC to signal/flag that we 1249 * need to update our Board Cache. 1250 */ 1251 static void 1252 sgenv_update_board_cache(void) 1253 { 1254 DCMN_ERR_S(f, "sgenv_update_board_cache()"); 1255 1256 mutex_enter(&board_flag_lock); 1257 1258 while (board_thread_run == 1) { 1259 1260 /* 1261 * We check to see if the update needed flag is set. 1262 * If it is then this means that: 1263 * 1) This is the first time through the while loop 1264 * and we need to initialize the cache. 1265 * 2) An interrupt handler was triggered while we 1266 * we were updating the cache during the previous 1267 * iteration of the while loop and we need to refresh 1268 * the env data to ensure we are completely up to date. 1269 * 1270 * Otherwise we wait until we get a signal from one of the 1271 * interrupt handlers. 1272 */ 1273 if (board_cache_update_needed) { 1274 DCMN_ERR_THREAD(CE_NOTE, "%s: update needed", f); 1275 board_cache_update_needed = B_FALSE; 1276 1277 } else { 1278 DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for signal", f); 1279 1280 cv_wait(&board_flag_cond, &board_flag_lock); 1281 1282 /* Check if we are being asked to terminate */ 1283 if (board_thread_run == 0) { 1284 break; 1285 } 1286 1287 board_cache_updating = B_TRUE; 1288 } 1289 1290 mutex_exit(&board_flag_lock); 1291 (void) sgenv_get_board_info_data(); 1292 mutex_enter(&board_flag_lock); 1293 1294 if (board_cache_update_needed == B_FALSE) 1295 board_cache_updating = B_FALSE; 1296 } 1297 1298 mutex_exit(&board_flag_lock); 1299 1300 DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f); 1301 1302 board_thread_run = -1; 1303 thread_exit(); 1304 } 1305 1306 1307 /* 1308 * We always return what is in the board_cache. It is up to the SC to ensure 1309 * that the board_cache is current by sending events to us when something 1310 * changes. The cache will then be updated by going to the SC to get the 1311 * new data. That way the kstat_update code can always be sure that it gets 1312 * current data without having to wait while the SC responds (slowly) to our 1313 * request for data. 1314 * 1315 * The way the update and snapshot code works, we cannot be guaranteed that 1316 * someone won't grab the board_cache_lock between the update and snapshot 1317 * calls so we use a snapshot buffer of the board_cache. We cannot hold 1318 * any locks across the calls from the update to the snapshot as we are 1319 * not guaranteed that the snapshot function will be called. So we create 1320 * the snapshot of the board_cache in the update routine and dump this to the 1321 * kstat user buffer in the snapshot routine. (There are error conditions in 1322 * which the snapshot will not be called by the kstat framework so we need 1323 * to handle these appropriately.) 1324 */ 1325 static int 1326 sgenv_board_info_kstat_update(kstat_t *ksp, int rw) 1327 { 1328 int i; 1329 1330 switch (rw) { 1331 case KSTAT_WRITE: 1332 /* 1333 * Write not permitted 1334 */ 1335 return (EACCES); 1336 1337 case KSTAT_READ: 1338 /* 1339 * The board_cache is created during startup, and so should be 1340 * available before a user can log in and trigger a kstat read, 1341 * but we check just in case. 1342 */ 1343 if (board_cache_updated == FALSE) 1344 return (ENXIO); 1345 1346 mutex_enter(&board_cache_lock); 1347 1348 /* 1349 * Set <ks_data_size> to the new number of board readings so 1350 * that the snapshot routine can allocate the correctly sized 1351 * kstat. 1352 */ 1353 ksp->ks_data_size = board_count * sizeof (sg_board_info_t); 1354 1355 board_count_snapshot = board_count; 1356 1357 /* 1358 * We are now guaranteed that that board_cache is not in flux 1359 * (as we have the lock) so we take a copy of the board_cache 1360 * into the board_cache_snapshot so that the snapshot routine 1361 * can copy it from the board_cache_snapshot into the user kstat 1362 * buffer. 1363 */ 1364 for (i = 0; i < SG_MAX_BDS; i++) { 1365 board_cache_snapshot[i] = board_cache[i]; 1366 } 1367 1368 mutex_exit(&board_cache_lock); 1369 1370 return (0); 1371 1372 default: 1373 return (EINVAL); 1374 } 1375 } 1376 1377 static int 1378 sgenv_board_info_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 1379 { 1380 DCMN_ERR_S(f, "sgenv_board_info_kstat_snapshot()"); 1381 1382 sg_board_info_t *bdp; 1383 int i, num_bds = 0; 1384 1385 switch (rw) { 1386 case KSTAT_WRITE: 1387 /* 1388 * Write not permitted 1389 */ 1390 return (EACCES); 1391 1392 case KSTAT_READ: 1393 1394 if (board_cache_updated == FALSE) { 1395 ksp->ks_data_size = 0; 1396 ksp->ks_data = NULL; 1397 return (ENOMEM); 1398 } 1399 1400 /* 1401 * Update the snap_time with the last time we got fresh data 1402 * from the SC. 1403 */ 1404 ksp->ks_snaptime = last_board_read_time; 1405 1406 ASSERT(board_count_snapshot <= SG_MAX_BDS); 1407 /* 1408 * For each entry in the board_cache_snapshot we check to see 1409 * if the node_id is != NULL before we copy it into 1410 * the kstat buf. 1411 */ 1412 for (i = 0; i < SG_MAX_BDS; i++) { 1413 bdp = &board_cache_snapshot[i]; 1414 DCMN_ERR_CACHE(CE_NOTE, "%s: looking at " 1415 "cache_snapshot entry[%d], node=%d", 1416 f, i, bdp->node_id); 1417 if (bdp->node_id >= 0) { 1418 /* 1419 * Need a check to ensure that the buf 1420 * is still within the allocated size. 1421 * We check how many boards are already 1422 * in the user buf before adding one. 1423 */ 1424 num_bds++; 1425 if (num_bds > board_count_snapshot) { 1426 ksp->ks_data_size = 0; 1427 ksp->ks_data = NULL; 1428 DCMN_ERR(CE_WARN, "%s: buf overflow." 1429 " %d >= %d.", 1430 f, num_bds, board_count_snapshot); 1431 return (EIO); 1432 } 1433 1434 DCMN_ERR_CACHE(CE_NOTE, "%s: about to bcopy" 1435 " cache_snapshot entry[%d], node=%d," 1436 " board=%d", f, i, bdp->node_id, 1437 bdp->board_num); 1438 bcopy(bdp, buf, sizeof (sg_board_info_t)); 1439 buf = ((sg_board_info_t *)buf) + 1; 1440 } 1441 } 1442 return (0); 1443 1444 default: 1445 return (EINVAL); 1446 } 1447 } 1448 1449 1450 /* 1451 * This function coordinates reading the env data from the SC. 1452 * 1453 * ERROR: 1454 * If an error occurs while making a call to the mailbox and we have data 1455 * in the cache from a previous call to the SC, we return an error of 0. 1456 * That way the kstat framework will return the old data instead of 1457 * returning an error and an empty kstat. 1458 */ 1459 static int 1460 sgenv_get_env_info_data(void) 1461 { 1462 DCMN_ERR_S(f, "sgenv_get_env_info_data()"); 1463 1464 envresp_key_t new_keys[SGENV_MAX_HPU_KEYS] = {0}; 1465 envresp_key_t old_key; 1466 envresp_key_t key; 1467 1468 int i; 1469 1470 int err = 0; /* return value of func's which get env data */ 1471 int status = 0; /* reason why env data func returned an error */ 1472 1473 DCMN_ERR_EVENT(CE_NOTE, "%s: entered.", f); 1474 1475 err = sgenv_get_hpu_keys(new_keys, &status); 1476 1477 if (err != 0) { 1478 /* 1479 * If we get an error getting the key values, then we return 1480 * as we cannot proceed any farther. If there is old env data 1481 * in the cache, then we return zero so that the kstat 1482 * framework will export the old data. 1483 */ 1484 if (env_cache_updated == FALSE) { 1485 sgenv_mbox_error_msg("HPU Keys", err, status); 1486 return (err); 1487 } else { 1488 sgenv_mbox_error_msg("HPU Keys", err, status); 1489 return (0); 1490 } 1491 } 1492 1493 1494 for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) { 1495 1496 if (vol_sensor_count[i] == 0) { 1497 /* empty collection */ 1498 old_key = 0; 1499 } else { 1500 /* 1501 * populated collection: 1502 * (assert size is OK, and 1st sensor is pseudo-sensor) 1503 */ 1504 ASSERT(env_cache[i] != NULL); 1505 ASSERT(env_cache[i][0].sd_id.id.sensor_part == 1506 SG_SENSOR_PART_SCAPP); 1507 ASSERT(env_cache[i][0].sd_id.id.sensor_type == 1508 SG_SENSOR_TYPE_ENVDB); 1509 ASSERT(SG_INFO_VALUESTATUS(env_cache[i][0].sd_infostamp) 1510 == SG_INFO_VALUE_OK); 1511 1512 old_key = env_cache[i][0].sd_value; 1513 } 1514 1515 key = new_keys[i]; 1516 1517 /* 1518 * No data is associated with this key position and there was 1519 * no data on the previous read either so we simply continue 1520 * to the next key position. 1521 */ 1522 if ((key == 0) && (old_key == 0)) { 1523 ASSERT(env_cache[i] == NULL); 1524 continue; 1525 } 1526 1527 1528 /* 1529 * We need to grab this lock every time we are going to 1530 * update a HPU. However, a kstat_read can grab 1531 * the env_cache_lock when it wants to get a snapshot of 1532 * the env_cache. This has the affect of stopping the 1533 * active env_cache writer after they have updated the 1534 * active HPU, allowing the kstat_read to get a dump of 1535 * the env_cache, then the env_cache writer can resume 1536 * updating the cache. For performance it is more important 1537 * that the kstat_read completes quickly so we allow the 1538 * kstat_read to interrupt the updating of the env_cache. 1539 * The updating can take anything from a few seconds to 1540 * several minutes to complete. 1541 */ 1542 mutex_enter(&env_cache_lock); 1543 1544 /* 1545 * If the key just read is zero, then the 1546 * group of sensors have been removed by 1547 * some means and we need to zero out 1548 * the env_cache. (this ensures that data 1549 * belonging to a removed board is not 1550 * returned) 1551 */ 1552 if (key == 0) { 1553 ASSERT(old_key != 0); 1554 (void) sgenv_clear_env_cache_entry(i); 1555 mutex_exit(&env_cache_lock); 1556 continue; 1557 } 1558 1559 /* 1560 * Check to see if this key has changed since 1561 * the last read. 1562 * 1563 * If it has changed, we need to update everything. 1564 * 1565 * If it hasn't we simply read the volatiles 1566 * and check to see if the constants have changed. 1567 */ 1568 if (key != old_key) { 1569 /* 1570 * If the key is non-zero, then a new HPU has 1571 * been added to the system or it has changed 1572 * somehow and we need to re-read everything. 1573 * (we also need to zero out the env_cache as 1574 * there may be less sensors returned now and 1575 * the old ones may not be overwritten) 1576 */ 1577 1578 /* 1579 * If the <env_cache> has not already been 1580 * allocated for this key position then we 1581 * go ahead and allocate it. 1582 */ 1583 if (env_cache[i] == NULL) { 1584 err = sgenv_create_env_cache_entry(i); 1585 if (err == DDI_FAILURE) { 1586 mutex_exit(&env_cache_lock); 1587 continue; 1588 } 1589 } 1590 1591 err = sgenv_get_env_data(new_keys[i], i, 1592 SG_GET_ENV_CONSTANTS, &status); 1593 if (err) { 1594 err = sgenv_handle_env_data_error(err, status, 1595 i, old_key, "Constant Data"); 1596 mutex_exit(&env_cache_lock); 1597 if (err != DDI_FAILURE) { 1598 continue; 1599 } else if (env_cache_updated == TRUE) { 1600 return (0); 1601 } else { 1602 return (DDI_FAILURE); 1603 } 1604 } 1605 1606 err = sgenv_get_env_data(new_keys[i], i, 1607 SG_GET_ENV_THRESHOLDS, &status); 1608 if (err) { 1609 err = sgenv_handle_env_data_error(err, status, 1610 i, old_key, "Threshold Data"); 1611 mutex_exit(&env_cache_lock); 1612 if (err != DDI_FAILURE) { 1613 continue; 1614 } else if (env_cache_updated == TRUE) { 1615 return (0); 1616 } else { 1617 return (DDI_FAILURE); 1618 } 1619 } 1620 1621 err = sgenv_get_env_data(new_keys[i], i, 1622 SG_GET_ENV_VOLATILES, &status); 1623 if (err) { 1624 err = sgenv_handle_env_data_error(err, status, 1625 i, old_key, "Volatile Data (fresh)"); 1626 mutex_exit(&env_cache_lock); 1627 if (err != DDI_FAILURE) { 1628 continue; 1629 } else if (env_cache_updated == TRUE) { 1630 return (0); 1631 } else { 1632 return (DDI_FAILURE); 1633 } 1634 } 1635 1636 /* 1637 * As we have successfully got env data for a HPU, 1638 * we ensure <env_cache_updated> is set to TRUE so that 1639 * in the future, if an error occurs during the mailbox 1640 * transfer, we know that there is old data for at 1641 * least one HPU in the <env_cache> which could be 1642 * returned instead of returning an error to the kstat 1643 * framework indicating that we have no data to return. 1644 */ 1645 env_cache_updated = TRUE; 1646 last_env_read_time = gethrtime(); 1647 1648 } else { 1649 /* 1650 * key == old_key 1651 * 1652 * Handle the case when the value of the old key and 1653 * the new key are identical. 1654 */ 1655 ASSERT(env_cache[i] != NULL); 1656 1657 /* 1658 * If the keys are identical, then the quasi-constants 1659 * should not have changed (and so don't need updating). 1660 * Similarly for the threshold readings. 1661 */ 1662 1663 /* Update the volatile data */ 1664 err = sgenv_get_env_data(new_keys[i], i, 1665 SG_GET_ENV_VOLATILES, &status); 1666 if (err) { 1667 err = sgenv_handle_env_data_error(err, status, 1668 i, old_key, "Volatile Data (update)"); 1669 mutex_exit(&env_cache_lock); 1670 if (err == DDI_FAILURE) { 1671 return (0); 1672 } else { 1673 continue; 1674 } 1675 } 1676 1677 } 1678 mutex_exit(&env_cache_lock); 1679 } 1680 1681 return (0); 1682 } 1683 1684 1685 static int 1686 sgenv_get_board_info_data(void) 1687 { 1688 /* 1689 * This array keeps track of the valid nodes in a system. A call is 1690 * made to OBP to get the "nodeid" property from all the ssm nodes, 1691 * and for each nodeid found, that position in the array is set to 1692 * TRUE. For a Serengeti only one position in the array will be TRUE. 1693 */ 1694 static uint_t node_present[SSM_MAX_INSTANCES] = {SGENV_NO_NODE_EXISTS}; 1695 1696 static fn_t f = "sgenv_get_board_info_data()"; 1697 static int first_time = TRUE; 1698 1699 sbbc_msg_t req; 1700 sbbc_msg_t resp; 1701 int node; /* loop index */ 1702 int board; /* loop index */ 1703 show_board_t show_bd, *shbp = &show_bd; 1704 info_t inform; 1705 int status; /* msg_status returned by response */ 1706 int rv = 0; /* return value of call to mailbox */ 1707 sg_board_info_t *ptr; 1708 1709 DCMN_ERR_EVENT(CE_NOTE, "%s: entered.", f); 1710 1711 ASSERT(board_cache != NULL); 1712 1713 if (first_time) { 1714 sgenv_set_valid_node_positions(node_present); 1715 first_time = FALSE; 1716 } 1717 1718 for (node = 0; node < SSM_MAX_INSTANCES; node++) { 1719 1720 if (node_present[node] == SGENV_NO_NODE_EXISTS) 1721 continue; 1722 1723 for (board = 0; board < SG_MAX_BDS; board++) { 1724 1725 /* 1726 * If we have discovered in a previous call to the SC 1727 * that there is no board in this slot on this type of 1728 * chassis then we don't waste resources asking the SC 1729 * for nonexistent data. 1730 */ 1731 if ((node_present[node] & (1 << board)) == 0) 1732 continue; 1733 1734 inform.board = board; 1735 inform.node = node; 1736 inform.revision = 0xdead; 1737 1738 req.msg_type.type = DR_MBOX; 1739 req.msg_type.sub_type = DR_MBOX_SHOW_BOARD; 1740 req.msg_status = SG_MBOX_STATUS_SUCCESS; 1741 req.msg_len = sizeof (info_t); 1742 req.msg_bytes = sizeof (info_t); 1743 req.msg_buf = (caddr_t)&inform; 1744 1745 bzero(shbp, sizeof (show_board_t)); 1746 shbp->s_cond = -1; 1747 shbp->s_power = -1; 1748 shbp->s_assigned = -1; 1749 shbp->s_claimed = -1; 1750 shbp->s_present = -1; 1751 1752 resp.msg_type.type = DR_MBOX; 1753 resp.msg_type.sub_type = DR_MBOX_SHOW_BOARD; 1754 resp.msg_bytes = sizeof (show_board_t); 1755 resp.msg_status = SG_MBOX_STATUS_SUCCESS; 1756 resp.msg_len = sizeof (show_board_t); 1757 resp.msg_buf = (caddr_t)shbp; 1758 1759 1760 /* 1761 * We want to avoid the case where an invalid time 1762 * is specified by a user (by patching the 1763 * global variable <sgenv_max_mbox_wait_time>). 1764 * 1765 * Any incorrect values are reset to the default time. 1766 */ 1767 if (sgenv_max_mbox_wait_time <= 1768 max(sbbc_mbox_min_timeout, 0)) 1769 sgenv_max_mbox_wait_time = 1770 sbbc_mbox_default_timeout; 1771 1772 rv = sbbc_mbox_request_response(&req, &resp, 1773 sgenv_max_mbox_wait_time); 1774 status = resp.msg_status; 1775 1776 if ((rv) || (status != SG_MBOX_STATUS_SUCCESS)) { 1777 /* 1778 * errors from Solaris sgsbbc driver 1779 */ 1780 if (status > SG_MBOX_STATUS_SUCCESS) { 1781 sgenv_mbox_error_msg("Board Info", rv, 1782 resp.msg_status); 1783 return (rv); 1784 } 1785 1786 /* 1787 * errors from SCAPP 1788 */ 1789 if (status == SG_MBOX_STATUS_ILLEGAL_NODE) { 1790 sgenv_mbox_error_msg("Board Info", rv, 1791 resp.msg_status); 1792 node_present[node] = 1793 SGENV_NO_NODE_EXISTS; 1794 1795 /* 1796 * No point looping through the rest of 1797 * the boards associated with this node. 1798 */ 1799 break; 1800 1801 } else if (status == 1802 SG_MBOX_STATUS_ILLEGAL_SLOT) { 1803 1804 /* 1805 * We clear the bit representing <board> 1806 * in <node> to indicate that this slot 1807 * cannot exist on this chassis. 1808 */ 1809 node_present[node] &= (~(1 << board) & 1810 SGENV_NODE_TYPE_DS); 1811 continue; 1812 1813 } else if (status == 1814 SG_MBOX_STATUS_BOARD_ACCESS_DENIED) { 1815 /* 1816 * We cannot access data for this slot, 1817 * however we may be able to do so in 1818 * the future. We do nothing. 1819 */ 1820 rv = rv; 1821 } else { 1822 char err_msg[40]; 1823 1824 (void) sprintf(err_msg, 1825 "Board data for " 1826 "Node%d/Slot%d", node, board); 1827 sgenv_mbox_error_msg(err_msg, rv, 1828 resp.msg_status); 1829 1830 if (rv == 0) 1831 rv = status; 1832 1833 continue; 1834 } 1835 } 1836 1837 mutex_enter(&board_cache_lock); 1838 ptr = &board_cache[board]; 1839 1840 /* 1841 * Check if the SC returns data for this board. 1842 */ 1843 if (shbp->s_assigned == -1) { 1844 /* 1845 * If this cache entry used to have data and 1846 * now doesn't we decrement the board_count 1847 * clear the env_cache. The board must have 1848 * been removed. 1849 */ 1850 if (ptr->node_id != -1) { 1851 board_count--; 1852 1853 /* 1854 * clear board_cache entry by 1855 * setting node_id to -1; 1856 */ 1857 ptr->node_id = -1; 1858 DCMN_ERR_CACHE(CE_NOTE, "%s: " 1859 "Clearing cache line %d [%p]", 1860 f, board, (void *)ptr); 1861 } 1862 } else { 1863 /* 1864 * If this cache entry was previously empty 1865 * and we now have data for it we increment 1866 * the board_count. A new board must have 1867 * been added. 1868 */ 1869 if (ptr->node_id == -1) 1870 board_count++; 1871 /* 1872 * update the board_cache entry 1873 */ 1874 DCMN_ERR_CACHE(CE_NOTE, "%s: " 1875 "Writing data for bd=%d into " 1876 " the board_cache at [%p]", 1877 f, board, (void *)ptr); 1878 ptr->node_id = node; 1879 ptr->board_num = board; 1880 ptr->condition = shbp->s_cond; 1881 ptr->assigned = shbp->s_assigned; 1882 ptr->claimed = shbp->s_claimed; 1883 ptr->present = shbp->s_present; 1884 ptr->led.led_status = 1885 shbp->s_ledstatus; 1886 last_board_read_time = gethrtime(); 1887 } 1888 mutex_exit(&board_cache_lock); 1889 } /* board */ 1890 } /* node */ 1891 1892 /* 1893 * Indicate that have managed to store valid data in the <board_cache> 1894 * at least once. 1895 */ 1896 if (board_count > 0) 1897 board_cache_updated = TRUE; 1898 1899 1900 return (rv); 1901 } 1902 1903 1904 static int 1905 sgenv_get_hpu_keys(envresp_key_t *new, int *status) 1906 { 1907 sbbc_msg_t req; /* request */ 1908 sbbc_msg_t resp; /* response */ 1909 1910 int rv; /* return value from call to mbox */ 1911 1912 req.msg_type.type = SG_ENV; 1913 req.msg_type.sub_type = SG_GET_ENV_HPU_KEYS; 1914 req.msg_status = SG_MBOX_STATUS_SUCCESS; 1915 req.msg_len = 0; 1916 req.msg_bytes = 0; 1917 1918 resp.msg_type.type = SG_ENV; 1919 resp.msg_type.sub_type = SG_GET_ENV_HPU_KEYS; 1920 resp.msg_status = SG_MBOX_STATUS_SUCCESS; 1921 resp.msg_len = sizeof (envresp_key_t) * SGENV_MAX_HPU_KEYS; 1922 resp.msg_bytes = 0; 1923 resp.msg_buf = (caddr_t)new; 1924 1925 /* 1926 * We want to avoid the case where an invalid time 1927 * is specified by a user (by patching the 1928 * global variable <sgenv_max_mbox_wait_time>). 1929 * 1930 * Any incorrect values are reset to the default time. 1931 */ 1932 if (sgenv_max_mbox_wait_time <= max(sbbc_mbox_min_timeout, 0)) 1933 sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout; 1934 1935 rv = sbbc_mbox_request_response(&req, &resp, sgenv_max_mbox_wait_time); 1936 1937 *status = resp.msg_status; 1938 1939 return (rv); 1940 } 1941 1942 1943 static int 1944 sgenv_get_env_data(envresp_key_t key, int key_posn, uint16_t flag, int *status) 1945 { 1946 /* 1947 * Only one of these buffers is ever going to be used in a call 1948 * so to save kernel stack space we use a union. 1949 */ 1950 union { 1951 envresp_constants_t con[SGENV_MAX_SENSORS_PER_KEY]; 1952 envresp_volatiles_t vol[SGENV_MAX_SENSORS_PER_KEY]; 1953 envresp_thresholds_t thr[SGENV_MAX_SENSORS_PER_KEY]; 1954 } buf; 1955 1956 sbbc_msg_t req; /* request */ 1957 sbbc_msg_t resp; /* response */ 1958 1959 int i; /* loop variable for mbox msg_buf */ 1960 int rv; /* return value from call to mbox */ 1961 1962 ASSERT(MUTEX_HELD(&env_cache_lock)); 1963 ASSERT(env_cache[key_posn] != NULL); 1964 1965 if (flag == SG_GET_ENV_CONSTANTS) { 1966 resp.msg_len = sizeof (buf.con); 1967 resp.msg_buf = (caddr_t)buf.con; 1968 1969 } else if (flag == SG_GET_ENV_VOLATILES) { 1970 resp.msg_len = sizeof (buf.vol); 1971 resp.msg_buf = (caddr_t)buf.vol; 1972 1973 } else if (flag == SG_GET_ENV_THRESHOLDS) { 1974 resp.msg_len = sizeof (buf.thr); 1975 resp.msg_buf = (caddr_t)buf.thr; 1976 1977 } else { 1978 *status = EINVAL; 1979 return (-1); 1980 } 1981 1982 req.msg_type.type = SG_ENV; 1983 req.msg_type.sub_type = flag; 1984 req.msg_status = SG_MBOX_STATUS_SUCCESS; 1985 req.msg_len = 0; 1986 req.msg_bytes = 0; 1987 req.msg_data[0] = key; 1988 1989 resp.msg_type.type = SG_ENV; 1990 resp.msg_type.sub_type = flag; 1991 resp.msg_status = SG_MBOX_STATUS_SUCCESS; 1992 resp.msg_bytes = 0; 1993 1994 /* 1995 * We want to avoid the case where an invalid time 1996 * is specified by a user (by patching the 1997 * global variable <sgenv_max_mbox_wait_time>). 1998 * 1999 * Any incorrect values are reset to the default time. 2000 */ 2001 if (sgenv_max_mbox_wait_time <= max(sbbc_mbox_min_timeout, 0)) 2002 sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout; 2003 2004 2005 rv = sbbc_mbox_request_response(&req, &resp, sgenv_max_mbox_wait_time); 2006 2007 *status = resp.msg_status; 2008 2009 /* 2010 * We now check that the data returned is valid. 2011 */ 2012 if (rv != 0) { 2013 /* 2014 * The SBBC driver encountered an error. 2015 */ 2016 return (rv); 2017 2018 } else { 2019 /* 2020 * The SC encountered an error. 2021 */ 2022 switch (*status) { 2023 case SG_MBOX_STATUS_SUCCESS: 2024 /* 2025 * No problems encountered - continue and return the 2026 * new data. 2027 */ 2028 break; 2029 2030 case ETIMEDOUT: 2031 /* 2032 * For some reason the mailbox failed to return data 2033 * and instead timed out so we return ETIMEDOUT 2034 */ 2035 return (ETIMEDOUT); 2036 2037 case ENXIO: 2038 /* 2039 * no sensors associated with this key, this may have 2040 * changed since we read the keys. 2041 */ 2042 return (ENXIO); 2043 2044 default: 2045 /* 2046 * The contents of the mbox message contain corrupt 2047 * data. Flag this as an error to be returned. 2048 */ 2049 SGENV_PRINT_MBOX_MSG((&resp), "Env info problem"); 2050 return (EINVAL); 2051 } 2052 } 2053 2054 /* 2055 * Depending on the type of data returned, save the constant/volatile 2056 * data returned in the mailbox message into the <env_cache>. 2057 */ 2058 for (i = 0; i < resp.msg_data[0]; i++) { 2059 2060 if (flag == SG_GET_ENV_CONSTANTS) { 2061 env_cache[key_posn][i].sd_id.tag_id = 2062 buf.con[i].id.tag_id; 2063 env_cache[key_posn][i].sd_lo = 2064 buf.con[i].lo; 2065 env_cache[key_posn][i].sd_hi = 2066 buf.con[i].hi; 2067 2068 } else if (flag == SG_GET_ENV_VOLATILES) { 2069 env_cache[key_posn][i].sd_value = 2070 buf.vol[i].value; 2071 env_cache[key_posn][i].sd_infostamp = 2072 buf.vol[i].info; 2073 2074 sgenv_set_sensor_status(&env_cache[key_posn][i]); 2075 2076 } else if (flag == SG_GET_ENV_THRESHOLDS) { 2077 env_cache[key_posn][i].sd_lo_warn = 2078 buf.thr[i].lo_warn; 2079 env_cache[key_posn][i].sd_hi_warn = 2080 buf.thr[i].hi_warn; 2081 } 2082 } 2083 2084 if (flag == SG_GET_ENV_VOLATILES) 2085 vol_sensor_count[key_posn] = resp.msg_data[0]; 2086 2087 return (rv); 2088 } 2089 2090 2091 /* 2092 * This function handles any errors received from the mailbox framework while 2093 * getting environmental data. 2094 * 2095 * INPUT PARAMETERS 2096 * err - return value from call to mailbox framework. 2097 * status - message status returned by mailbox framework. 2098 * key - key from previous (if any) reading of env data. 2099 * Needed to see if we have old data in the <env_cache>. 2100 * str - String indicating what type of env request failed. 2101 * 2102 * RETURN VALUES 2103 * rv == DDI_FAILURE - there is no point in continuing processing 2104 * the data, we should exit from the kstat 2105 * framework. 2106 * rv != DDI_FAILURE - error has been handled correctly, continue 2107 * processing the data returned from the SC. 2108 */ 2109 static int 2110 sgenv_handle_env_data_error(int err, int status, int key_posn, 2111 envresp_key_t key, char *str) 2112 { 2113 int rv = DDI_SUCCESS; 2114 2115 ASSERT(str != (char *)NULL); 2116 2117 switch (err) { 2118 case ENXIO: 2119 /* 2120 * The SC has changed the env data associated with this key 2121 * since we started getting the data. We cannot tell if the 2122 * data has disappeared due to the removal of the board from 2123 * our Domain or just that the data has been updated. We 2124 * simply return the last known data (if possible) and the 2125 * next time we request the env data, the SC will have 2126 * finished processing this board so we will receive the 2127 * correct key values and we can get the correct data. 2128 */ 2129 DCMN_ERR_CACHE(CE_NOTE, "key @ posn %d has changed from %d" 2130 " while %s", key_posn, key, str); 2131 rv = ENXIO; 2132 break; 2133 2134 default: 2135 sgenv_mbox_error_msg(str, err, status); 2136 rv = DDI_FAILURE; 2137 break; 2138 } 2139 2140 /* 2141 * If there was no data in the <env_cache>, we need to clear the data 2142 * just added as the <env_cache> will only be partially filled. 2143 */ 2144 if (key == 0) 2145 sgenv_clear_env_cache_entry(key_posn); 2146 2147 return (rv); 2148 } 2149 2150 2151 /* 2152 * If the sensor readings for a particular collection of HPUs become invalid, 2153 * then we clear the cache by freeing up the memory. 2154 */ 2155 static void 2156 sgenv_clear_env_cache_entry(int key_posn) 2157 { 2158 ASSERT(MUTEX_HELD(&env_cache_lock)); 2159 2160 if (env_cache[key_posn] != NULL) { 2161 kmem_free(env_cache[key_posn], sizeof (env_sensor_t) * 2162 SGENV_MAX_SENSORS_PER_KEY); 2163 env_cache[key_posn] = NULL; 2164 vol_sensor_count[key_posn] = 0; 2165 } 2166 } 2167 2168 2169 static void 2170 sgenv_mbox_error_msg(char *str, int err, int status) 2171 { 2172 /* 2173 * We update the count of errors we have encountered during calls to 2174 * the mailbox framework (unless we will cause a wraparound) 2175 */ 2176 if (sgenv_mbox_error_count < INT_MAX) 2177 sgenv_mbox_error_count++; 2178 2179 #ifdef DEBUG 2180 if ((sgenv_debug & SGENV_DEBUG_MSG) == 0) 2181 return; 2182 2183 ASSERT(str != NULL); 2184 2185 switch (err) { 2186 case ENOTSUP: 2187 DCMN_ERR(CE_WARN, "!This system configuration does not " 2188 "support SGENV"); 2189 break; 2190 case ETIMEDOUT: 2191 DCMN_ERR(CE_WARN, "!Mailbox timed out while servicing " 2192 "SGENV request for %s", str); 2193 break; 2194 default: 2195 DCMN_ERR(CE_WARN, "!Error occurred reading %s, Errno=%d," 2196 " Status=%d", str, err, status); 2197 break; 2198 } 2199 #endif 2200 } 2201 2202 2203 /* 2204 * INPUT PARAMETERS 2205 * key_posn - The position in the env_cache for which we want to 2206 * allocate space for a HPU's env data. 2207 * 2208 * ERROR VALUES 2209 * DDI_FAILURE - We failed to allocate memory for this cache entry. 2210 * There is no point asking the SC for env data for this 2211 * HPU as we will have nowhere to store it. 2212 */ 2213 static int 2214 sgenv_create_env_cache_entry(int key_posn) 2215 { 2216 int i; /* used to loop thru each sensor to set the status */ 2217 2218 ASSERT(key_posn < SGENV_MAX_HPU_KEYS); 2219 ASSERT(key_posn >= 0); 2220 2221 env_cache[key_posn] = (env_sensor_t *)kmem_zalloc( 2222 sizeof (env_sensor_t) * SGENV_MAX_SENSORS_PER_KEY, KM_NOSLEEP); 2223 if (env_cache[key_posn] == NULL) { 2224 cmn_err(CE_WARN, "Failed to allocate memory for env_cache[%d]", 2225 key_posn); 2226 return (DDI_FAILURE); 2227 } 2228 2229 for (i = 0; i < SGENV_MAX_SENSORS_PER_KEY; i++) 2230 env_cache[key_posn][i].sd_status = SG_SENSOR_STATUS_OK; 2231 2232 return (DDI_SUCCESS); 2233 } 2234 2235 2236 static void 2237 sgenv_destroy_env_cache(void) 2238 { 2239 int i; 2240 2241 ASSERT(MUTEX_HELD(&env_cache_lock) == FALSE); 2242 mutex_enter(&env_cache_lock); 2243 for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) { 2244 if (env_cache[i] != NULL) { 2245 kmem_free(env_cache[i], sizeof (env_sensor_t) * 2246 SGENV_MAX_SENSORS_PER_KEY); 2247 env_cache[i] = NULL; 2248 vol_sensor_count[i] = 0; 2249 } 2250 } 2251 env_cache_updated = FALSE; 2252 2253 mutex_exit(&env_cache_lock); 2254 } 2255 2256 static void 2257 sgenv_update_env_kstat_size(kstat_t *ksp) 2258 { 2259 int i; 2260 2261 ASSERT(MUTEX_HELD(&env_cache_lock)); 2262 2263 /* reinitialize this and recount number of sensors */ 2264 ksp->ks_data_size = 0; 2265 2266 for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) { 2267 if (vol_sensor_count[i] <= 0) 2268 continue; 2269 2270 ASSERT(vol_sensor_count[i] <= SGENV_MAX_SENSORS_PER_KEY); 2271 2272 /* 2273 * increment ksp->ks_data_size by the number of 2274 * sensors in the collection <i>. 2275 */ 2276 ksp->ks_data_size += vol_sensor_count[i] * 2277 sizeof (env_sensor_t); 2278 } 2279 ASSERT(ksp->ks_data_size >= 0); 2280 } 2281 2282 2283 /* 2284 * This function is triggered by the thread that updates the env_cache. 2285 * It checks for any sensors which have exceeded their limits/thresholds 2286 * and generates sysevents for the sensor values that have changed. 2287 */ 2288 /*ARGSUSED*/ 2289 static uint_t 2290 sgenv_check_sensor_thresholds(void) 2291 { 2292 DCMN_ERR_S(f, "sgenv_poll_env()"); 2293 2294 int key; /* loop through keys */ 2295 int i; /* loops through each sensor for each <key> */ 2296 2297 env_sensor_t sensor; 2298 env_sensor_status_t status; 2299 2300 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f); 2301 2302 mutex_enter(&env_cache_lock); 2303 2304 for (key = 0; key < SGENV_MAX_HPU_KEYS; key++) { 2305 2306 if (vol_sensor_count[key] == 0) 2307 continue; 2308 2309 for (i = 0; i < vol_sensor_count[key]; i++) { 2310 sensor = env_cache[key][i]; 2311 status = sensor.sd_status; 2312 2313 if (SG_GET_SENSOR_STATUS(status) == 2314 SG_GET_PREV_SENSOR_STATUS(status)) { 2315 continue; 2316 } 2317 2318 /* 2319 * This sensor has changed in status since the last 2320 * time we polled - we need to inform the sysevent 2321 * framework. 2322 */ 2323 switch (sensor.sd_id.id.sensor_type) { 2324 /* 2325 * we don't care about the pseudo sensors and 2326 * the Fan Status is notified by a separate 2327 * unsolicited event so we simply get the next 2328 * reading 2329 */ 2330 case SG_SENSOR_TYPE_ENVDB: 2331 case SG_SENSOR_TYPE_COOLING: 2332 continue; 2333 2334 /* 2335 * We have handled all the special cases by now. 2336 */ 2337 default: 2338 (void) sgenv_process_threshold_event(sensor); 2339 break; 2340 } 2341 2342 SGENV_PRINT_POLL_INFO(sensor); 2343 } 2344 } 2345 mutex_exit(&env_cache_lock); 2346 2347 return (DDI_SUCCESS); 2348 } 2349 2350 2351 /* 2352 * This function is passed in an array of length SSM_MAX_INSTANCES and 2353 * it searches OBP to for ssm nodes, and for each one if finds, it sets the 2354 * corresponding position in the array to TRUE. 2355 */ 2356 static void 2357 sgenv_set_valid_node_positions(uint_t *node_present) 2358 { 2359 dev_info_t *rdip; /* root dev info ptr */ 2360 dev_info_t *dip; 2361 2362 ASSERT(node_present != NULL); 2363 2364 rdip = ddi_root_node(); 2365 2366 for (dip = ddi_get_child(rdip); dip != NULL; 2367 dip = ddi_get_next_sibling(dip)) { 2368 if (strncmp("ssm", ddi_node_name(dip), 3) == 0) { 2369 int value; 2370 2371 value = ddi_getprop(DDI_DEV_T_ANY, dip, 2372 DDI_PROP_DONTPASS, "nodeid", 0); 2373 2374 /* 2375 * If we get a valid nodeID which has not already 2376 * been found in a previous call to this function, 2377 * then we set all 10 LSB bits to indicate there may 2378 * be a board present in each slot. 2379 * 2380 * It is the job of sgenv_get_board_info_data() to weed 2381 * out the invalid cases when we don't have a 2382 * DS chassis. 2383 * 2384 * NOTE: We make the assumption that a chassis cannot 2385 * be DR'ed out, which is true for a Serengeti. 2386 * By the time WildCat need this functionality Solaris 2387 * will be able to know what kind of a chassis is 2388 * present and there will be no need to try and work 2389 * this out from the msg_status from the mailbox. 2390 */ 2391 if ((value >= 0) && 2392 (value < SSM_MAX_INSTANCES) && 2393 (node_present[value] == SGENV_NO_NODE_EXISTS)) { 2394 node_present[value] = SGENV_NODE_TYPE_DS; 2395 } 2396 2397 } 2398 } 2399 } 2400 2401 2402 static void 2403 sgenv_set_sensor_status(env_sensor_t *sensor) 2404 { 2405 env_sensor_status_t *status; 2406 2407 ASSERT(sensor != NULL); 2408 status = &sensor->sd_status; 2409 2410 /* 2411 * Save the previous status so we can compare them later 2412 */ 2413 SG_SET_PREV_SENSOR_STATUS(*status, *status); 2414 2415 switch (sensor->sd_id.id.sensor_type) { 2416 case SG_SENSOR_TYPE_ENVDB: 2417 /* 2418 * We want the status of this sensor to always be OK 2419 * The concept of limits/thresholds do not exist for it. 2420 */ 2421 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_OK); 2422 break; 2423 2424 case SG_SENSOR_TYPE_COOLING: 2425 /* 2426 * Fans have no concept of limits/thresholds, they have a state 2427 * which we store in the <sd_status> field so that we can see 2428 * when this state is changed. 2429 */ 2430 if (sensor->sd_value == SGENV_FAN_SPEED_HIGH) { 2431 SG_SET_SENSOR_STATUS(*status, 2432 SG_SENSOR_STATUS_FAN_HIGH); 2433 2434 } else if (sensor->sd_value == SGENV_FAN_SPEED_LOW) { 2435 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_FAN_LOW); 2436 2437 } else if (sensor->sd_value == SGENV_FAN_SPEED_OFF) { 2438 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_FAN_OFF); 2439 2440 } else { 2441 SG_SET_SENSOR_STATUS(*status, 2442 SG_SENSOR_STATUS_FAN_FAIL); 2443 } 2444 2445 /* 2446 * If this is the first time this fan status has been read, 2447 * then we need to initialize the previous reading to be the 2448 * same as the current reading so that an event is not 2449 * triggered. 2450 * 2451 * [ When the env_cache is being created, the status of the 2452 * sensors is set to SG_SENSOR_STATUS_OK, which is not a 2453 * valid Fan status ]. 2454 */ 2455 if (SG_GET_PREV_SENSOR_STATUS(*status) == SG_SENSOR_STATUS_OK) { 2456 SG_SET_PREV_SENSOR_STATUS(*status, *status); 2457 } 2458 2459 break; 2460 2461 default: 2462 if (sensor->sd_value > sensor->sd_hi) { 2463 SG_SET_SENSOR_STATUS(*status, 2464 SG_SENSOR_STATUS_HI_DANGER); 2465 2466 } else if (sensor->sd_value > sensor->sd_hi_warn) { 2467 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_HI_WARN); 2468 2469 } else if (sensor->sd_value < sensor->sd_lo) { 2470 SG_SET_SENSOR_STATUS(*status, 2471 SG_SENSOR_STATUS_LO_DANGER); 2472 2473 } else if (sensor->sd_value < sensor->sd_lo_warn) { 2474 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_LO_WARN); 2475 2476 } else { 2477 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_OK); 2478 } 2479 break; 2480 } 2481 } 2482 2483 2484 2485 2486 /* 2487 * This function, when given an integer arg describing a HPU type, 2488 * returns the descriptive string associated with this HPU type. 2489 */ 2490 static const char * 2491 sgenv_get_hpu_id_str(uint_t hpu_type) 2492 { 2493 const hpu_value_t *hpu_list = hpus; 2494 2495 while (hpu_list->name != (char *)NULL) { 2496 if (hpu_list->value == hpu_type) 2497 return (hpu_list->IDstr); 2498 else 2499 hpu_list++; 2500 } 2501 return ((char *)NULL); 2502 } 2503 2504 2505 /* 2506 * This function, when given an integer arg describing a sensor part, 2507 * returns the descriptive string associated with this sensor part. 2508 */ 2509 static const char * 2510 sgenv_get_part_str(uint_t sensor_part) 2511 { 2512 const part_value_t *part_list = parts; 2513 2514 while (part_list->name != (char *)NULL) { 2515 if (part_list->value == sensor_part) 2516 return (part_list->name); 2517 else 2518 part_list++; 2519 } 2520 return ((char *)NULL); 2521 } 2522 2523 2524 /* 2525 * This function, when given an integer arg describing a sensor type, 2526 * returns the descriptive string associated with this sensor type. 2527 */ 2528 static const char * 2529 sgenv_get_type_str(uint_t sensor_type) 2530 { 2531 const type_value_t *type_list = types; 2532 2533 while (type_list->name != (char *)NULL) { 2534 if (type_list->value == sensor_type) 2535 return (type_list->name); 2536 else 2537 type_list++; 2538 } 2539 return ((char *)NULL); 2540 } 2541 2542 2543 /* 2544 * This function takes a sensor TagID and generates a string describing 2545 * where in the system the sensor is. 2546 */ 2547 static void 2548 sgenv_tagid_to_string(sensor_id_t id, char *str) 2549 { 2550 const char *hpu_str; 2551 const char *part_str; 2552 const char *type_str; 2553 2554 ASSERT(str != NULL); 2555 2556 hpu_str = sgenv_get_hpu_id_str(id.id.hpu_type); 2557 part_str = sgenv_get_part_str(id.id.sensor_part); 2558 type_str = sgenv_get_type_str(id.id.sensor_type); 2559 2560 (void) sprintf(str, 2561 "Sensor: Node=%d, Board=%s%d, Device=%s%d, Type=%s%d: reading has ", 2562 id.id.node_id, 2563 ((hpu_str != NULL) ? hpu_str : ""), 2564 id.id.hpu_slot, 2565 ((part_str != NULL) ? part_str : ""), 2566 id.id.sensor_partnum, 2567 ((type_str != NULL) ? type_str : ""), 2568 id.id.sensor_typenum); 2569 2570 } 2571 2572 2573 /* 2574 * This interrupt handler watches for unsolicited mailbox messages from the SC 2575 * telling it that the Keyswitch Position had changed. It then informs the 2576 * Sysevent Framework of this change. 2577 */ 2578 static uint_t 2579 sgenv_keyswitch_handler(char *arg) 2580 { 2581 DCMN_ERR_S(f, "sgenv_keyswitch_handler()"); 2582 2583 sysevent_t *ev = NULL; 2584 sysevent_id_t eid; 2585 sysevent_value_t se_val; 2586 sysevent_attr_list_t *ev_attr_list = NULL; 2587 sg_event_key_position_t *payload = NULL; 2588 sbbc_msg_t *msg = NULL; 2589 int err; 2590 2591 DCMN_ERR_EVENT(CE_NOTE, "%s called", f); 2592 2593 if (arg == NULL) { 2594 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f); 2595 return (DDI_INTR_CLAIMED); 2596 } 2597 2598 msg = (sbbc_msg_t *)arg; 2599 if (msg->msg_buf == NULL) { 2600 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f); 2601 return (DDI_INTR_CLAIMED); 2602 } 2603 2604 payload = (sg_event_key_position_t *)msg->msg_buf; 2605 if (payload == NULL) { 2606 DCMN_ERR_EVENT(CE_NOTE, "%s: payload == NULL", f); 2607 return (DDI_INTR_CLAIMED); 2608 } 2609 2610 DCMN_ERR_EVENT(CE_NOTE, "Key posn = %d", (int)*payload); 2611 2612 2613 /* 2614 * Allocate memory for sysevent buffer. 2615 */ 2616 ev = sysevent_alloc(EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE, 2617 EP_SGENV, SE_NOSLEEP); 2618 if (ev == NULL) { 2619 cmn_err(CE_WARN, "%s: Failed to alloc mem for %s/%s event", 2620 f, EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE); 2621 return (DDI_INTR_CLAIMED); 2622 } 2623 2624 2625 /* 2626 * Set the DOMAIN_WHAT_CHANGED attribute. 2627 */ 2628 se_val.value_type = SE_DATA_TYPE_STRING; 2629 se_val.value.sv_string = DOMAIN_KEYSWITCH; 2630 err = sysevent_add_attr(&ev_attr_list, DOMAIN_WHAT_CHANGED, 2631 &se_val, SE_NOSLEEP); 2632 if (err != 0) { 2633 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2634 DOMAIN_WHAT_CHANGED, EC_DOMAIN, 2635 ESC_DOMAIN_STATE_CHANGE); 2636 sysevent_free(ev); 2637 return (DDI_INTR_CLAIMED); 2638 } 2639 2640 2641 /* 2642 * Log this event with sysevent framework. 2643 */ 2644 if (sysevent_attach_attributes(ev, ev_attr_list) != 0) { 2645 cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event", 2646 EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE); 2647 sysevent_free_attr(ev_attr_list); 2648 sysevent_free(ev); 2649 return (DDI_INTR_CLAIMED); 2650 } 2651 err = log_sysevent(ev, SE_NOSLEEP, &eid); 2652 if (err != 0) { 2653 cmn_err(CE_WARN, "Failed to log %s/%s event", 2654 EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE); 2655 sysevent_free(ev); 2656 return (DDI_INTR_CLAIMED); 2657 } 2658 2659 /* clean up */ 2660 sysevent_free(ev); 2661 2662 return (DDI_INTR_CLAIMED); 2663 } 2664 2665 2666 /* 2667 * This interrupt handler watches for unsolicited mailbox messages from the SC 2668 * telling it that an environmental sensor has exceeded a threshold/limit level 2669 * or has returned to normal having previously exceeded a threshold/limit level. 2670 * It then informs the Sysevent Framework of this change and updates the 2671 * env_cache. 2672 */ 2673 static uint_t 2674 sgenv_env_data_handler(char *arg) 2675 { 2676 DCMN_ERR_S(f, "sgenv_env_data_handler()"); 2677 2678 sg_event_env_changed_t *payload = NULL; 2679 sbbc_msg_t *msg = NULL; 2680 2681 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f); 2682 2683 if (arg == NULL) { 2684 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f); 2685 return (DDI_INTR_CLAIMED); 2686 } 2687 2688 msg = (sbbc_msg_t *)arg; 2689 2690 if (msg->msg_buf == NULL) { 2691 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f); 2692 return (DDI_INTR_CLAIMED); 2693 } 2694 2695 payload = (sg_event_env_changed_t *)msg->msg_buf; 2696 2697 /* 2698 * We check the first field of the msg_buf to see if the event_type 2699 * is SC_EVENT_ENV, if it is then we handle the event. 2700 */ 2701 if (payload->event_type != SC_EVENT_ENV) { 2702 return (DDI_INTR_CLAIMED); 2703 } 2704 2705 /* 2706 * We now need to signal to the env background thread to ask the SC 2707 * for env readings and discover which sensor caused the SC to send 2708 * the ENV event before sending a sysevent to userland. 2709 */ 2710 sgenv_indicate_cache_update_needed(ENV_CACHE); 2711 2712 return (DDI_INTR_CLAIMED); 2713 } 2714 2715 2716 /* 2717 * This interrupt handler watches for unsolicited mailbox messages from the SC 2718 * telling it that the status of a fan has changed. We register a sysevent 2719 * and trigger a softint to update the env cache. 2720 */ 2721 static uint_t 2722 sgenv_fan_status_handler(char *arg) 2723 { 2724 DCMN_ERR_S(f, "sgenv_fan_status_handler()"); 2725 2726 sysevent_t *ev = NULL; 2727 sysevent_id_t eid; 2728 sysevent_value_t se_val; 2729 sysevent_attr_list_t *ev_attr_list = NULL; 2730 sg_event_fan_status_t *payload = NULL; 2731 sbbc_msg_t *msg = NULL; 2732 char fan_str[MAXNAMELEN]; 2733 int err; 2734 2735 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f); 2736 2737 if (arg == NULL) { 2738 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f); 2739 return (DDI_INTR_CLAIMED); 2740 } 2741 2742 msg = (sbbc_msg_t *)arg; 2743 2744 /* 2745 * We check the first field of the msg_buf to see if the event_type 2746 * is SC_EVENT_FAN 2747 */ 2748 if (msg->msg_buf == NULL) { 2749 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f); 2750 return (DDI_INTR_CLAIMED); 2751 } 2752 2753 payload = (sg_event_fan_status_t *)msg->msg_buf; 2754 2755 /* 2756 * If another type of ENV Event triggered this handler then we simply 2757 * return now. 2758 */ 2759 if (payload->event_type != SC_EVENT_FAN) { 2760 return (DDI_INTR_CLAIMED); 2761 } 2762 2763 /* 2764 * Allocate memory for sysevent buffer. 2765 */ 2766 ev = sysevent_alloc(EC_ENV, ESC_ENV_FAN, EP_SGENV, SE_NOSLEEP); 2767 if (ev == NULL) { 2768 cmn_err(CE_WARN, "%s: Failed to alloc mem for %s/%s event", 2769 f, EC_ENV, ESC_ENV_FAN); 2770 return (DDI_INTR_CLAIMED); 2771 } 2772 2773 2774 /* 2775 * Set the following attributes for this event: 2776 * 2777 * ENV_FRU_ID 2778 * ENV_FRU_RESOURCE_ID 2779 * ENV_FRU_DEVICE 2780 * ENV_FRU_STATE 2781 * ENV_MSG 2782 * 2783 */ 2784 se_val.value_type = SE_DATA_TYPE_STRING; 2785 se_val.value.sv_string = ENV_RESERVED_ATTR; 2786 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_ID, &se_val, SE_NOSLEEP); 2787 if (err != 0) { 2788 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2789 ENV_FRU_ID, EC_ENV, ESC_ENV_FAN); 2790 sysevent_free(ev); 2791 return (DDI_INTR_CLAIMED); 2792 } 2793 2794 se_val.value_type = SE_DATA_TYPE_STRING; 2795 se_val.value.sv_string = ENV_RESERVED_ATTR; 2796 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_RESOURCE_ID, 2797 &se_val, SE_NOSLEEP); 2798 if (err != 0) { 2799 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2800 ENV_FRU_RESOURCE_ID, EC_ENV, ESC_ENV_FAN); 2801 sysevent_free_attr(ev_attr_list); 2802 sysevent_free(ev); 2803 return (DDI_INTR_CLAIMED); 2804 } 2805 2806 se_val.value_type = SE_DATA_TYPE_STRING; 2807 se_val.value.sv_string = ENV_RESERVED_ATTR; 2808 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_DEVICE, 2809 &se_val, SE_NOSLEEP); 2810 if (err != 0) { 2811 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2812 ENV_FRU_DEVICE, EC_ENV, ESC_ENV_FAN); 2813 sysevent_free_attr(ev_attr_list); 2814 sysevent_free(ev); 2815 return (DDI_INTR_CLAIMED); 2816 } 2817 2818 /* 2819 * Checks the fan to see if it has failed. 2820 */ 2821 se_val.value_type = SE_DATA_TYPE_INT32; 2822 switch (payload->fan_speed) { 2823 case SGENV_FAN_SPEED_OFF: 2824 case SGENV_FAN_SPEED_LOW: 2825 case SGENV_FAN_SPEED_HIGH: 2826 se_val.value.sv_int32 = ENV_OK; 2827 break; 2828 2829 case SGENV_FAN_SPEED_UNKNOWN: 2830 default: 2831 se_val.value.sv_int32 = ENV_FAILED; 2832 break; 2833 } 2834 2835 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_STATE, 2836 &se_val, SE_NOSLEEP); 2837 if (err != 0) { 2838 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2839 ENV_FRU_STATE, EC_ENV, ESC_ENV_FAN); 2840 sysevent_free_attr(ev_attr_list); 2841 sysevent_free(ev); 2842 return (DDI_INTR_CLAIMED); 2843 } 2844 2845 2846 /* 2847 * Create the message to be sent to sysevent. 2848 */ 2849 (void) sprintf(fan_str, 2850 "The status of the fan in Node%d/Slot%d is now ", 2851 payload->node_id, payload->slot_number); 2852 switch (payload->fan_speed) { 2853 case SGENV_FAN_SPEED_OFF: 2854 (void) strcat(fan_str, SGENV_FAN_SPEED_OFF_STR); 2855 break; 2856 2857 case SGENV_FAN_SPEED_LOW: 2858 (void) strcat(fan_str, SGENV_FAN_SPEED_LOW_STR); 2859 break; 2860 2861 case SGENV_FAN_SPEED_HIGH: 2862 (void) strcat(fan_str, SGENV_FAN_SPEED_HIGH_STR); 2863 break; 2864 2865 case SGENV_FAN_SPEED_UNKNOWN: 2866 default: 2867 (void) strcat(fan_str, SGENV_FAN_SPEED_UNKNOWN_STR); 2868 break; 2869 } 2870 2871 DCMN_ERR_EVENT(CE_NOTE, "Fan: %s", fan_str); 2872 2873 se_val.value_type = SE_DATA_TYPE_STRING; 2874 se_val.value.sv_string = fan_str; 2875 err = sysevent_add_attr(&ev_attr_list, ENV_MSG, &se_val, SE_NOSLEEP); 2876 if (err != 0) { 2877 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2878 ENV_MSG, EC_ENV, ESC_ENV_FAN); 2879 sysevent_free_attr(ev_attr_list); 2880 sysevent_free(ev); 2881 return (DDI_INTR_CLAIMED); 2882 } 2883 2884 2885 /* 2886 * Log this event with sysevent framework. 2887 */ 2888 if (sysevent_attach_attributes(ev, ev_attr_list) != 0) { 2889 cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event", 2890 EC_ENV, ESC_ENV_FAN); 2891 sysevent_free_attr(ev_attr_list); 2892 sysevent_free(ev); 2893 return (DDI_INTR_CLAIMED); 2894 } 2895 err = log_sysevent(ev, SE_NOSLEEP, &eid); 2896 if (err != 0) { 2897 cmn_err(CE_WARN, "Failed to log %s/%s event", 2898 EC_ENV, ESC_ENV_FAN); 2899 sysevent_free(ev); 2900 return (DDI_INTR_CLAIMED); 2901 } 2902 sysevent_free(ev); 2903 2904 /* 2905 * We now need to signal to the env background thread to ask the SC 2906 * for env readings and discover which sensor caused the SC to send 2907 * the ENV event before sending a sysevent to userland. 2908 */ 2909 sgenv_indicate_cache_update_needed(ENV_CACHE); 2910 2911 return (DDI_INTR_CLAIMED); 2912 } 2913 2914 2915 /* 2916 * This function informs the Sysevent Framework that a temperature, voltage 2917 * or current reading for a sensor has exceeded its threshold/limit value or 2918 * that the reading has returned to a safe value having exceeded its 2919 * threshold/limit value previously. 2920 */ 2921 static int 2922 sgenv_process_threshold_event(env_sensor_t sensor) 2923 { 2924 DCMN_ERR_S(f, "sgenv_process_threshold_event()"); 2925 2926 sysevent_t *ev = NULL; 2927 sysevent_id_t eid; 2928 sysevent_value_t se_val; 2929 sysevent_attr_list_t *ev_attr_list = NULL; 2930 int err; 2931 2932 char sensor_str[MAX_TAG_ID_STR_LEN]; /* holds the sensor TagID */ 2933 2934 /* 2935 * This function handles the case when a temperature reading passes 2936 * a threshold/limit level and also the case when there are power 2937 * fluctuations (voltage/current readings pass a threshold/limit level) 2938 * so we need to work out which case it is. 2939 * 2940 * if <temp_event_type> is TRUE, then need to handle an event 2941 * of type ESC_ENV_TEMP. 2942 */ 2943 int temp_event_type; 2944 2945 switch (sensor.sd_id.id.sensor_type) { 2946 case SG_SENSOR_TYPE_TEMPERATURE: 2947 temp_event_type = TRUE; 2948 ev = sysevent_alloc(EC_ENV, ESC_ENV_TEMP, EP_SGENV, SE_NOSLEEP); 2949 if (ev == NULL) { 2950 cmn_err(CE_WARN, "Failed to allocate sysevent buffer " 2951 "for %s/%s event", EC_ENV, ESC_ENV_TEMP); 2952 return (DDI_FAILURE); 2953 } 2954 break; 2955 2956 default: 2957 temp_event_type = FALSE; 2958 ev = sysevent_alloc(EC_ENV, ESC_ENV_POWER, 2959 EP_SGENV, SE_NOSLEEP); 2960 if (ev == NULL) { 2961 cmn_err(CE_WARN, "Failed to allocate sysevent buffer " 2962 "for %s/%s event", EC_ENV, ESC_ENV_POWER); 2963 return (DDI_FAILURE); 2964 } 2965 break; 2966 } 2967 2968 2969 /* 2970 * Set the following attributes for this event: 2971 * 2972 * ENV_FRU_ID 2973 * ENV_FRU_RESOURCE_ID 2974 * ENV_FRU_DEVICE 2975 * ENV_FRU_STATE 2976 * ENV_MSG 2977 * 2978 */ 2979 se_val.value_type = SE_DATA_TYPE_STRING; 2980 se_val.value.sv_string = ENV_RESERVED_ATTR; 2981 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_ID, &se_val, SE_NOSLEEP); 2982 if (err != 0) { 2983 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2984 ENV_FRU_ID, EC_ENV, 2985 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 2986 sysevent_free(ev); 2987 return (DDI_FAILURE); 2988 } 2989 2990 se_val.value_type = SE_DATA_TYPE_STRING; 2991 se_val.value.sv_string = ENV_RESERVED_ATTR; 2992 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_RESOURCE_ID, 2993 &se_val, SE_NOSLEEP); 2994 if (err != 0) { 2995 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2996 ENV_FRU_RESOURCE_ID, EC_ENV, 2997 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 2998 sysevent_free_attr(ev_attr_list); 2999 sysevent_free(ev); 3000 return (DDI_FAILURE); 3001 } 3002 3003 se_val.value_type = SE_DATA_TYPE_STRING; 3004 se_val.value.sv_string = ENV_RESERVED_ATTR; 3005 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_DEVICE, 3006 &se_val, SE_NOSLEEP); 3007 if (err != 0) { 3008 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 3009 ENV_FRU_DEVICE, EC_ENV, 3010 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 3011 sysevent_free_attr(ev_attr_list); 3012 sysevent_free(ev); 3013 return (DDI_FAILURE); 3014 } 3015 3016 3017 /* 3018 * We need to find out the status of the reading. 3019 */ 3020 se_val.value_type = SE_DATA_TYPE_INT32; 3021 switch (SG_GET_SENSOR_STATUS(sensor.sd_status)) { 3022 case SG_SENSOR_STATUS_OK: 3023 se_val.value.sv_int32 = ENV_OK; 3024 break; 3025 3026 case SG_SENSOR_STATUS_LO_WARN: 3027 case SG_SENSOR_STATUS_HI_WARN: 3028 se_val.value.sv_int32 = ENV_WARNING; 3029 break; 3030 3031 case SG_SENSOR_STATUS_LO_DANGER: 3032 case SG_SENSOR_STATUS_HI_DANGER: 3033 default: 3034 se_val.value.sv_int32 = ENV_FAILED; 3035 break; 3036 } 3037 3038 /* 3039 * Add ENV_FRU_STATE attribute. 3040 */ 3041 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_STATE, 3042 &se_val, SE_NOSLEEP); 3043 if (err != 0) { 3044 cmn_err(CE_WARN, "Failed to add attr[%s] for %s/%s event " 3045 "(Err=%d)", ENV_FRU_STATE, EC_ENV, 3046 (temp_event_type ? ESC_ENV_TEMP: ESC_ENV_POWER), 3047 err); 3048 sysevent_free_attr(ev_attr_list); 3049 sysevent_free(ev); 3050 return (DDI_FAILURE); 3051 } 3052 3053 3054 /* 3055 * Save the sensor TagID as a string so that a meaningful message 3056 * can be passed to as part of the ENV_MSG attribute. 3057 */ 3058 sgenv_tagid_to_string(sensor.sd_id, sensor_str); 3059 3060 /* 3061 * We need to add a string stating what type of event occurred. 3062 */ 3063 switch (SG_GET_SENSOR_STATUS(sensor.sd_status)) { 3064 case SG_SENSOR_STATUS_OK: 3065 (void) strcat(sensor_str, SGENV_EVENT_MSG_OK); 3066 break; 3067 3068 case SG_SENSOR_STATUS_LO_WARN: 3069 (void) strcat(sensor_str, SGENV_EVENT_MSG_LO_WARN); 3070 break; 3071 3072 case SG_SENSOR_STATUS_HI_WARN: 3073 (void) strcat(sensor_str, SGENV_EVENT_MSG_HI_WARN); 3074 break; 3075 3076 case SG_SENSOR_STATUS_LO_DANGER: 3077 (void) strcat(sensor_str, SGENV_EVENT_MSG_LO_DANGER); 3078 break; 3079 3080 case SG_SENSOR_STATUS_HI_DANGER: 3081 (void) strcat(sensor_str, SGENV_EVENT_MSG_HI_DANGER); 3082 break; 3083 3084 default: 3085 DCMN_ERR_EVENT(CE_NOTE, "%s: Unknown sensor status", f); 3086 (void) strcat(sensor_str, SGENV_EVENT_MSG_UNKNOWN); 3087 break; 3088 } 3089 3090 DCMN_ERR_EVENT(CE_NOTE, "Temp/Power: %s", sensor_str); 3091 3092 /* 3093 * Add ENV_MSG attribute. 3094 */ 3095 se_val.value_type = SE_DATA_TYPE_STRING; 3096 se_val.value.sv_string = sensor_str; 3097 err = sysevent_add_attr(&ev_attr_list, ENV_MSG, &se_val, SE_NOSLEEP); 3098 if (err != 0) { 3099 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 3100 ENV_MSG, EC_ENV, 3101 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 3102 sysevent_free_attr(ev_attr_list); 3103 sysevent_free(ev); 3104 return (DDI_FAILURE); 3105 } 3106 3107 3108 /* 3109 * Log this event with sysevent framework. 3110 */ 3111 if (sysevent_attach_attributes(ev, ev_attr_list) != 0) { 3112 cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event", 3113 EC_ENV, 3114 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 3115 sysevent_free_attr(ev_attr_list); 3116 sysevent_free(ev); 3117 return (DDI_FAILURE); 3118 } 3119 err = log_sysevent(ev, SE_NOSLEEP, &eid); 3120 if (err != 0) { 3121 cmn_err(CE_WARN, "Failed to log %s/%s event", EC_ENV, 3122 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 3123 sysevent_free(ev); 3124 return (DDI_FAILURE); 3125 } 3126 sysevent_free(ev); 3127 3128 return (DDI_SUCCESS); 3129 } 3130 3131 3132 /* 3133 * This function gets called when sgenv is notified of a DR event. 3134 * We need to update the board and env caches to ensure that they 3135 * now contain the latest system information.. 3136 */ 3137 static uint_t 3138 sgenv_dr_event_handler(char *arg) 3139 { 3140 DCMN_ERR_S(f, "sgenv_dr_event_handler()"); 3141 3142 sg_system_fru_descriptor_t *payload = NULL; 3143 sbbc_msg_t *msg = NULL; 3144 3145 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f); 3146 DCMN_ERR_EVENT(CE_NOTE, "%s: Start: %lld", f, gethrtime()); 3147 3148 3149 if (arg == NULL) { 3150 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f); 3151 return (DDI_INTR_CLAIMED); 3152 } 3153 3154 msg = (sbbc_msg_t *)arg; 3155 3156 if (msg->msg_buf == NULL) { 3157 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f); 3158 return (DDI_INTR_CLAIMED); 3159 } 3160 3161 payload = (sg_system_fru_descriptor_t *)msg->msg_buf; 3162 3163 /* 3164 * We check the event_details field of the msg_buf to see if 3165 * we need to invalidate the caches 3166 */ 3167 switch (payload->event_details) { 3168 case SG_EVT_BOARD_ABSENT: 3169 case SG_EVT_BOARD_PRESENT: 3170 case SG_EVT_UNASSIGN: 3171 case SG_EVT_ASSIGN: 3172 case SG_EVT_UNAVAILABLE: 3173 case SG_EVT_AVAILABLE: 3174 case SG_EVT_POWER_OFF: 3175 case SG_EVT_POWER_ON: 3176 case SG_EVT_PASSED_TEST: 3177 case SG_EVT_FAILED_TEST: 3178 /* 3179 * We now need to signal to the background threads to poll the 3180 * SC for env readings and board info which may have changed 3181 * as a result of the DR changes. This will cause the 3182 * env_cache and the board_cache to be updated. 3183 */ 3184 DCMN_ERR_EVENT(CE_NOTE, "%s: about to signal to background " 3185 "threads due to event %d.", f, payload->event_details); 3186 3187 sgenv_indicate_cache_update_needed(ENV_CACHE); 3188 sgenv_indicate_cache_update_needed(BOARD_CACHE); 3189 3190 break; 3191 3192 default: 3193 DCMN_ERR_EVENT(CE_NOTE, "%s: Unknown DR event type.", f); 3194 break; 3195 } 3196 3197 DCMN_ERR_EVENT(CE_NOTE, "%s: Finish: %lld", f, gethrtime()); 3198 3199 return (DDI_INTR_CLAIMED); 3200 } 3201 3202 3203 /* 3204 * This function is called by the interrupt handlers watching for ENV/DR events 3205 * from the SC. It indicates to the thread responsible for the cache specified 3206 * that it needs to update its data. 3207 */ 3208 static void 3209 sgenv_indicate_cache_update_needed(int cache_type) 3210 { 3211 DCMN_ERR_S(f, "sgenv_indicate_cache_update_needed()"); 3212 3213 /* 3214 * If the cache is already being updated, we set a flag to 3215 * inform the thread that it needs to reread the data when 3216 * it is finished as we cannot be sure if the data was read 3217 * before or after the time this handler was triggered. 3218 * 3219 * Otherwise the thread is waiting for us and we signal 3220 * to it to start reading the data. 3221 */ 3222 switch (cache_type) { 3223 case ENV_CACHE: 3224 mutex_enter(&env_flag_lock); 3225 if (env_cache_updating) { 3226 DCMN_ERR_THREAD(CE_NOTE, "%s: Thread already " 3227 "updating env cache", f); 3228 env_cache_update_needed = B_TRUE; 3229 3230 } else { 3231 DCMN_ERR_THREAD(CE_NOTE, "%s: Sending signal " 3232 "to env thread", f); 3233 cv_signal(&env_flag_cond); 3234 } 3235 mutex_exit(&env_flag_lock); 3236 break; 3237 3238 case BOARD_CACHE: 3239 mutex_enter(&board_flag_lock); 3240 if (board_cache_updating) { 3241 DCMN_ERR_THREAD(CE_NOTE, "%s: Thread already " 3242 "updating board cache", f); 3243 board_cache_update_needed = B_TRUE; 3244 3245 } else { 3246 DCMN_ERR_THREAD(CE_NOTE, "%s: Sending signal " 3247 "to board thread", f); 3248 cv_signal(&board_flag_cond); 3249 } 3250 mutex_exit(&board_flag_lock); 3251 break; 3252 3253 default: 3254 DCMN_ERR(CE_NOTE, "%s: Unknown cache type:0x%x", f, cache_type); 3255 break; 3256 } 3257 } 3258