1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 28 /* 29 * Serengeti Environmental Information driver (sgenv) 30 * 31 * This driver requests the environmental properties from the SC. These 32 * request-response transactions are transferred through the SBBC mailbox, 33 * between the Domain and the SC. 34 * 35 * All sensors have the same sort of properties: Low and high limits, warning 36 * thresholds, last measured value, time of measurement, units (e.g., degrees 37 * Celsius, volts, etc.), and so on. 38 * 39 * Each sensor is named by a unique Tag. The Tag identifies the geographical 40 * location of the sensor in the Serengeti, and what it is the sensor measures. 41 * 42 * Requestable sensor properties are broken into two types: Those which are 43 * quasi-constant (infrequently change) - e.g., tolerance-defining low and high 44 * limits; and those which are volatile (typically change) - e.g., the current 45 * measurement. 46 * 47 * Unfortunately, property sets are too large to comprise a single mailbox 48 * message, so the sets are further subdivided into notionally arbitrary 49 * collections. NOTE: The SC-mailbox framework now supports fragmented messages 50 * which could allow us to request the data in larger chunks in the future. 51 * 52 * Each collection is fetched by a separate transaction. 53 * 54 * Firstly there is a transaction to obtain a list of all collections. Each non- 55 * zero key in this list is associated whith one of the collections of sensors. 56 * (This sparse list of keys is then used as an index to obtain all the sensor 57 * data for each collection). 58 * 59 * For each collection, there is one request-reply transaction to obtain a list 60 * of all sensors in that collection and the limits that apply to each; and a 61 * separate request-reply transaction to obtain the measurements from the 62 * sensors in the collection. 63 * 64 * The sgenv driver assembles each property set from the constituent 65 * collections, and caches the assembled property sets into the appropriate 66 * cache (env_cache, board_cache). The caches are created at startup and are 67 * updated on receipt of events from the SC. These events (which include DR 68 * events and ENV events) notify sgenv of configuration changes and 69 * environmental state changes (such as a sensor state change, Fan speed 70 * change). 71 * 72 * The SC-APP maintains a pseudo-sensor in each collection "measuring" changes 73 * to the quasi-constants in that collection. By monitoring these pseudo-sensor 74 * measurements, the kstat driver avoids redundant or speculative re-fetches of 75 * the quasi-constant properties. 76 */ 77 78 #include <sys/time.h> 79 #include <sys/errno.h> 80 #include <sys/kmem.h> 81 #include <sys/stat.h> 82 #include <sys/cmn_err.h> 83 #include <sys/disp.h> 84 85 #include <sys/conf.h> 86 #include <sys/modctl.h> 87 #include <sys/devops.h> 88 #include <sys/ddi.h> 89 #include <sys/sunddi.h> 90 91 #include <sys/sgevents.h> 92 #include <sys/sysevent.h> 93 #include <sys/sysevent/eventdefs.h> 94 #include <sys/sysevent/domain.h> 95 #include <sys/sysevent/env.h> 96 97 #include <sys/serengeti.h> 98 #include <sys/sgfrutypes.h> 99 100 #include <sys/sgsbbc.h> 101 #include <sys/sgsbbc_iosram.h> 102 #include <sys/sgsbbc_mailbox.h> 103 104 #include <sys/sbd_ioctl.h> /* sbd header files needed for board support */ 105 #include <sys/sbdp_priv.h> 106 #include <sys/sbd.h> 107 108 #include <sys/sgenv_impl.h> 109 110 111 /* 112 * Global Variables - can be patched from Solaris 113 * ============================================== 114 */ 115 116 /* 117 * the maximum amount of time this driver is prepared to wait for the mailbox 118 * to reply before it decides to timeout. The value is initially set in the 119 * _init() routine to the global Serengeti variable <sbbc_mbox_default_timeout> 120 * but could be tuned specifically for SGENV after booting up the system. 121 */ 122 int sgenv_max_mbox_wait_time = 0; 123 124 #ifdef DEBUG 125 /* 126 * This variable controls the level of debug output 127 */ 128 uint_t sgenv_debug = SGENV_DEBUG_NONE; 129 #endif 130 131 132 /* 133 * Module Variables 134 * ================ 135 */ 136 137 /* 138 * Driver entry points 139 */ 140 static struct cb_ops sgenv_cb_ops = { 141 nodev, /* open() */ 142 nodev, /* close() */ 143 nodev, /* strategy() */ 144 nodev, /* print() */ 145 nodev, /* dump() */ 146 nodev, /* read() */ 147 nodev, /* write() */ 148 nodev, /* ioctl() */ 149 nodev, /* devmap() */ 150 nodev, /* mmap() */ 151 ddi_segmap, /* segmap() */ 152 nochpoll, /* poll() */ 153 ddi_prop_op, /* prop_op() */ 154 NULL, /* cb_str */ 155 D_NEW | D_MP /* cb_flag */ 156 }; 157 158 159 static struct dev_ops sgenv_ops = { 160 DEVO_REV, 161 0, /* ref count */ 162 ddi_getinfo_1to1, /* getinfo() */ 163 nulldev, /* identify() */ 164 nulldev, /* probe() */ 165 sgenv_attach, /* attach() */ 166 sgenv_detach, /* detach */ 167 nodev, /* reset */ 168 &sgenv_cb_ops, /* pointer to cb_ops structure */ 169 (struct bus_ops *)NULL, 170 nulldev, /* power() */ 171 ddi_quiesce_not_needed, /* quiesce() */ 172 }; 173 174 /* 175 * Loadable module support. 176 */ 177 extern struct mod_ops mod_driverops; 178 179 static struct modldrv modldrv = { 180 &mod_driverops, /* Type of module. This is a driver */ 181 "Environmental Driver", /* Name of the module */ 182 &sgenv_ops /* pointer to the dev_ops structure */ 183 }; 184 185 static struct modlinkage modlinkage = { 186 MODREV_1, 187 &modldrv, 188 NULL 189 }; 190 191 /* Opaque state structure pointer */ 192 static void *sgenv_statep; 193 194 /* 195 * <env_cache> is a cache of all the sensor readings which is persistent 196 * between kstat reads. It is created at init and gets updated upon receipt 197 * of events from the SC. 198 * 199 * The kstat_update function takes a copy of the non-zero entries in this 200 * cache and creates a temp buffer called env_cache_snapshot. The 201 * kstat_snapshot function then bcopies the env_cache_snapshot into the 202 * kstat buffer. This is done because there is no way to ensure that the 203 * env_cache won't change between the kstat_update and the kstat_snapshot 204 * which will cause problems as the update sets the ks_data_size. 205 */ 206 static env_sensor_t *env_cache[SGENV_MAX_HPU_KEYS] = {NULL}; 207 static void *env_cache_snapshot = NULL; 208 static size_t env_cache_snapshot_size = 0; 209 210 /* 211 * This is set to TRUE the first time env data is stored in the cache 212 * so that at least from then on, old data can be returned if a call to 213 * the mailbox fails. 214 */ 215 static int env_cache_updated = FALSE; 216 217 /* 218 * This lock is needed by the variable-sized kstat which returns 219 * environmental info. It prevents data-size races with kstat clients. 220 */ 221 static kmutex_t env_kstat_lock; 222 223 /* 224 * The <env_cache> can be accessed asynchronously by the polling function 225 * and the kstat_read framework. This mutex ensures that access to the data 226 * is controlled correctly. 227 */ 228 static kmutex_t env_cache_lock; 229 230 /* 231 * We need to store the last time we asked the SC for environmental information 232 * so that we do not send too many requests in a short period of time. 233 */ 234 static hrtime_t last_env_read_time = 0; 235 236 /* 237 * Variables to coordinate between the handlers which are triggered when 238 * the env cache needs to be updated and the thread which does the work. 239 */ 240 static volatile int env_thread_run = 0; 241 static kthread_t *env_thread = NULL; 242 static kt_did_t env_thread_tid; 243 244 static kcondvar_t env_flag_cond; 245 static kmutex_t env_flag_lock; 246 static boolean_t env_cache_updating = B_FALSE; 247 static boolean_t env_cache_update_needed = B_TRUE; 248 249 /* 250 * <board_cache> is a cache of all the board status info and it is persistent 251 * between kstat reads. 252 * 253 * The kstat_update function takes a copy of the non-zero entries in this 254 * cache and copies them into the board_cache_snapshot buffer. The 255 * kstat_snapshot function then bcopies the board_cache_snapshot into the 256 * kstat buffer. This is done because there is no way to ensure that the 257 * board_cache won't change between the kstat_update and the kstat_snapshot 258 * which will cause problems as the update sets the ks_data_size. 259 */ 260 static sg_board_info_t board_cache[SG_MAX_BDS] = { 0 }; 261 static sg_board_info_t board_cache_snapshot[SG_MAX_BDS] = { 0 }; 262 static int board_cache_updated = FALSE; 263 264 /* 265 * This mutex ensures the <board_cache> is not destroyed while the board data 266 * is being collected. 267 */ 268 static kmutex_t board_cache_lock; 269 270 /* 271 * This lock is needed by the variable-sized kstat which returns 272 * board status info. It prevents data-size races with kstat clients. 273 */ 274 static kmutex_t board_kstat_lock; 275 276 /* 277 * This is a count of the number of board readings were stored by 278 * the kstat_update routine - this is needed by the kstat_snapshot routine. 279 */ 280 static int board_count = 0; 281 static int board_count_snapshot = 0; 282 283 /* 284 * We need to store the last time we asked the SC for board information 285 * so that we do not send too many requests in a short period of time. 286 */ 287 static hrtime_t last_board_read_time = 0; 288 289 /* 290 * Variables to coordinate between the handlers which are triggered when 291 * the board cache needs to be updated and the thread which does the work. 292 */ 293 static volatile int board_thread_run = 0; 294 static kthread_t *board_thread = NULL; 295 static kt_did_t board_thread_tid; 296 static kcondvar_t board_flag_cond; 297 298 static kmutex_t board_flag_lock; 299 static boolean_t board_cache_updating = B_FALSE; 300 static boolean_t board_cache_update_needed = B_TRUE; 301 302 /* 303 * Used to keep track of the number of sensors associated with each key. 304 * The sum of all the values in this array is used to set ks_data_size. 305 */ 306 static int vol_sensor_count[SGENV_MAX_HPU_KEYS] = {0}; 307 308 /* 309 * This variable keeps a count of the number of errors that have occurred 310 * when we make calls to the mailbox for Env or Board data. 311 */ 312 static int sgenv_mbox_error_count = 0; 313 314 /* 315 * mutex which protects the keyswitch interrupt handler. 316 */ 317 static kmutex_t keysw_hdlr_lock; 318 319 /* 320 * mutex which protects the env interrupt handler. 321 */ 322 static kmutex_t env_hdlr_lock; 323 324 /* 325 * mutex which protects the DR handler interrupt handler. 326 */ 327 static kmutex_t dr_hdlr_lock; 328 329 /* 330 * Payloads of the event handlers. 331 */ 332 static sg_event_key_position_t keysw_payload; 333 static sbbc_msg_t keysw_payload_msg; 334 335 static sg_event_env_changed_t env_payload; 336 static sbbc_msg_t env_payload_msg; 337 338 static sg_event_fan_status_t fan_payload; 339 static sbbc_msg_t fan_payload_msg; 340 341 static sg_system_fru_descriptor_t dr_payload; 342 static sbbc_msg_t dr_payload_msg; 343 344 /* 345 * The following 3 arrays list all possible HPUs, Parts and Device types 346 */ 347 348 /* 349 * ensure that all possible HPUs exported, as described in the main comment 350 * in <sys/sensor_tag.h>, are accounted for here. 351 */ 352 static const hpu_value_t hpus[] = { 353 HPU_ENTRY(SG_HPU_TYPE_UNKNOWN), 354 HPU_ENTRY(SG_HPU_TYPE_CPU_BOARD), 355 HPU_ENTRY(SG_HPU_TYPE_PCI_IO_BOARD), 356 HPU_ENTRY(SG_HPU_TYPE_CPCI_IO_BOARD), 357 HPU_ENTRY(SG_HPU_TYPE_SP_CPCI_IO_BOARD), 358 HPU_ENTRY(SG_HPU_TYPE_REPEATER_BOARD), 359 HPU_ENTRY(SG_HPU_TYPE_L2_REPEATER_BOARD), 360 HPU_ENTRY(SG_HPU_TYPE_SYSTEM_CONTROLLER_BOARD), 361 HPU_ENTRY(SG_HPU_TYPE_SP_SYSTEM_CONTROLLER_BOARD), 362 HPU_ENTRY(SG_HPU_TYPE_A123_POWER_SUPPLY), 363 HPU_ENTRY(SG_HPU_TYPE_A138_POWER_SUPPLY), 364 HPU_ENTRY(SG_HPU_TYPE_A145_POWER_SUPPLY), 365 HPU_ENTRY(SG_HPU_TYPE_A152_POWER_SUPPLY), 366 HPU_ENTRY(SG_HPU_TYPE_A153_POWER_SUPPLY), 367 HPU_ENTRY(SG_HPU_TYPE_RACK_FAN_TRAY), 368 HPU_ENTRY(SG_HPU_TYPE_SP_FAN_TRAY), 369 HPU_ENTRY(SG_HPU_TYPE_MD_TOP_IO_FAN_TRAY), 370 HPU_ENTRY(SG_HPU_TYPE_MD_BOTTOM_IO_FAN_TRAY), 371 HPU_ENTRY(SG_HPU_TYPE_R12_THREE_FAN_TRAY), 372 HPU_ENTRY(SG_HPU_TYPE_K12_IO_ONE_FAN_TRAY), 373 HPU_ENTRY(SG_HPU_TYPE_K12_CPU_THREE_FAN_TRAY), 374 HPU_ENTRY(SG_HPU_TYPE_R24_IO_FOUR_FAN_TRAY), 375 HPU_ENTRY(SG_HPU_TYPE_R24_CPU_SIX_FAN_TRAY), 376 0, (char *)NULL 377 }; 378 379 static const struct part_value parts[] = { 380 PART_VALUE(SG_SENSOR_PART_SBBC), 381 PART_VALUE(SG_SENSOR_PART_SDC), 382 PART_VALUE(SG_SENSOR_PART_AR), 383 PART_VALUE(SG_SENSOR_PART_CBH), 384 PART_VALUE(SG_SENSOR_PART_DX), 385 PART_VALUE(SG_SENSOR_PART_CHEETAH), 386 PART_VALUE(SG_SENSOR_PART_1_5_VDC), 387 PART_VALUE(SG_SENSOR_PART_3_3_VDC), 388 PART_VALUE(SG_SENSOR_PART_5_VDC), 389 PART_VALUE(SG_SENSOR_PART_12_VDC), 390 PART_VALUE(SG_SENSOR_PART_48_VDC), 391 PART_VALUE(SG_SENSOR_PART_CURRENT), 392 PART_VALUE(SG_SENSOR_PART_BOARD), 393 PART_VALUE(SG_SENSOR_PART_SCAPP), 394 PART_VALUE(SG_SENSOR_PART_SCHIZO), 395 PART_VALUE(SG_SENSOR_PART_FAN), 396 0, (char *)NULL 397 }; 398 399 static const struct type_value types[] = { 400 TYPE_VALUE(SG_SENSOR_TYPE_CURRENT, SG_CURRENT_SCALE), 401 TYPE_VALUE(SG_SENSOR_TYPE_TEMPERATURE, SG_TEMPERATURE_SCALE), 402 TYPE_VALUE(SG_SENSOR_TYPE_1_5_VDC, SG_1_5_VDC_SCALE), 403 TYPE_VALUE(SG_SENSOR_TYPE_1_8_VDC, SG_1_8_VDC_SCALE), 404 TYPE_VALUE(SG_SENSOR_TYPE_3_3_VDC, SG_3_3_VDC_SCALE), 405 TYPE_VALUE(SG_SENSOR_TYPE_5_VDC, SG_5_VDC_SCALE), 406 TYPE_VALUE(SG_SENSOR_TYPE_12_VDC, SG_12_VDC_SCALE), 407 TYPE_VALUE(SG_SENSOR_TYPE_48_VDC, SG_48_VDC_SCALE), 408 TYPE_VALUE(SG_SENSOR_TYPE_ENVDB, 1), 409 TYPE_VALUE(SG_SENSOR_TYPE_COOLING, 1), 410 0, (char *)NULL 411 }; 412 413 int 414 _init(void) 415 { 416 int error = 0; 417 418 error = ddi_soft_state_init(&sgenv_statep, 419 sizeof (sgenv_soft_state_t), 1); 420 421 if (error) 422 return (error); 423 424 error = mod_install(&modlinkage); 425 if (error) { 426 ddi_soft_state_fini(&sgenv_statep); 427 return (error); 428 } 429 430 mutex_init(&env_kstat_lock, NULL, MUTEX_DEFAULT, NULL); 431 mutex_init(&env_cache_lock, NULL, MUTEX_DEFAULT, NULL); 432 mutex_init(&env_flag_lock, NULL, MUTEX_DEFAULT, NULL); 433 cv_init(&env_flag_cond, NULL, CV_DEFAULT, NULL); 434 435 mutex_init(&board_cache_lock, NULL, MUTEX_DEFAULT, NULL); 436 mutex_init(&board_kstat_lock, NULL, MUTEX_DEFAULT, NULL); 437 mutex_init(&board_flag_lock, NULL, MUTEX_DEFAULT, NULL); 438 cv_init(&board_flag_cond, NULL, CV_DEFAULT, NULL); 439 440 mutex_init(&keysw_hdlr_lock, NULL, MUTEX_DEFAULT, NULL); 441 mutex_init(&env_hdlr_lock, NULL, MUTEX_DEFAULT, NULL); 442 mutex_init(&dr_hdlr_lock, NULL, MUTEX_DEFAULT, NULL); 443 444 /* set the default timeout value */ 445 sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout; 446 447 return (error); 448 } 449 450 451 int 452 _info(struct modinfo *modinfop) 453 { 454 return (mod_info(&modlinkage, modinfop)); 455 } 456 457 458 int 459 _fini(void) 460 { 461 int error = 0; 462 463 error = mod_remove(&modlinkage); 464 if (error) 465 return (error); 466 467 mutex_destroy(&env_kstat_lock); 468 mutex_destroy(&env_cache_lock); 469 470 mutex_destroy(&board_cache_lock); 471 mutex_destroy(&board_kstat_lock); 472 473 mutex_destroy(&keysw_hdlr_lock); 474 mutex_destroy(&env_hdlr_lock); 475 mutex_destroy(&dr_hdlr_lock); 476 477 ddi_soft_state_fini(&sgenv_statep); 478 479 return (error); 480 } 481 482 483 static int 484 sgenv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 485 { 486 sgenv_soft_state_t *softsp; 487 488 int instance; 489 int err; 490 491 switch (cmd) { 492 case DDI_ATTACH: 493 494 instance = ddi_get_instance(dip); 495 496 /* allocate a global sgenv_soft_state structure */ 497 err = ddi_soft_state_zalloc(sgenv_statep, instance); 498 if (err != DDI_SUCCESS) { 499 cmn_err(CE_WARN, "attach: could not allocate state " 500 "structure for inst %d.", instance); 501 return (DDI_FAILURE); 502 } 503 504 softsp = ddi_get_soft_state(sgenv_statep, instance); 505 if (softsp == NULL) { 506 ddi_soft_state_free(sgenv_statep, instance); 507 cmn_err(CE_WARN, "attach: could not get state " 508 "structure for inst %d.", instance); 509 return (DDI_FAILURE); 510 } 511 512 softsp->dip = dip; 513 softsp->instance = instance; 514 515 err = sgenv_add_kstats(softsp); 516 if (err != 0) { 517 /* 518 * Some of the kstats may have been created before the 519 * error occurred in sgenv_add_kstats(), so we call 520 * sgenv_remove_kstats() which removes any kstats 521 * already created. 522 */ 523 sgenv_remove_kstats(softsp); 524 ddi_soft_state_free(sgenv_statep, instance); 525 return (DDI_FAILURE); 526 } 527 528 /* 529 * Before we setup the framework to read the data from the SC 530 * we need to ensure the caches are initialized correctly. 531 */ 532 sgenv_init_board_cache(); 533 sgenv_init_env_cache(); 534 535 /* 536 * Add the threads which will update the env and board caches 537 * and post events to Sysevent Framework in the background 538 * when the interrupt handlers watching for ENV/DR events 539 * indicate to the threads that they need to do so. 540 */ 541 err = sgenv_create_cache_update_threads(); 542 if (err != DDI_SUCCESS) { 543 sgenv_remove_kstats(softsp); 544 ddi_soft_state_free(sgenv_statep, instance); 545 return (DDI_FAILURE); 546 } 547 548 err = ddi_create_minor_node(dip, SGENV_DRV_NAME, S_IFCHR, 549 instance, DDI_PSEUDO, 0); 550 if (err != DDI_SUCCESS) { 551 sgenv_remove_kstats(softsp); 552 (void) sgenv_remove_cache_update_threads(); 553 ddi_soft_state_free(sgenv_statep, instance); 554 return (DDI_FAILURE); 555 } 556 557 /* 558 * Add the handlers which watch for unsolicited messages 559 * and post event to Sysevent Framework. 560 */ 561 err = sgenv_add_intr_handlers(); 562 if (err != DDI_SUCCESS) { 563 cmn_err(CE_WARN, "Failed to add event handlers"); 564 (void) sgenv_remove_intr_handlers(); 565 sgenv_remove_kstats(softsp); 566 (void) sgenv_remove_cache_update_threads(); 567 ddi_soft_state_free(sgenv_statep, instance); 568 return (DDI_FAILURE); 569 } 570 571 ddi_report_dev(dip); 572 573 return (DDI_SUCCESS); 574 575 case DDI_RESUME: 576 return (DDI_SUCCESS); 577 578 default: 579 return (DDI_FAILURE); 580 } 581 } 582 583 584 static int 585 sgenv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 586 { 587 sgenv_soft_state_t *softsp; 588 589 int instance; 590 int err; 591 592 switch (cmd) { 593 case DDI_DETACH: 594 595 instance = ddi_get_instance(dip); 596 597 softsp = ddi_get_soft_state(sgenv_statep, instance); 598 if (softsp == NULL) { 599 cmn_err(CE_WARN, "detach: could not get state " 600 "structure for inst %d.", instance); 601 return (DDI_FAILURE); 602 } 603 604 err = sgenv_remove_cache_update_threads(); 605 if (err != DDI_SUCCESS) { 606 cmn_err(CE_WARN, "Failed to remove update threads"); 607 } 608 609 /* 610 * Remove the handlers which watch for unsolicited messages 611 * and post event to Sysevent Framework. 612 */ 613 err = sgenv_remove_intr_handlers(); 614 if (err != DDI_SUCCESS) { 615 cmn_err(CE_WARN, "Failed to remove event handlers"); 616 } 617 618 sgenv_remove_kstats(softsp); 619 620 ddi_soft_state_free(sgenv_statep, instance); 621 622 ddi_remove_minor_node(dip, NULL); 623 624 return (DDI_SUCCESS); 625 626 case DDI_SUSPEND: 627 return (DDI_SUCCESS); 628 629 default: 630 return (DDI_FAILURE); 631 } 632 } 633 634 635 static int 636 sgenv_add_kstats(sgenv_soft_state_t *softsp) 637 { 638 kstat_t *ksp; 639 kstat_named_t *keyswitch_named_data; 640 641 int inst = softsp->instance; 642 643 /* 644 * Create the 'keyswitch position' named kstat. 645 */ 646 ksp = kstat_create(SGENV_DRV_NAME, inst, SG_KEYSWITCH_KSTAT_NAME, 647 "misc", KSTAT_TYPE_NAMED, 1, 0); 648 649 if (ksp != NULL) { 650 /* initialize the named kstat */ 651 keyswitch_named_data = (struct kstat_named *)(ksp->ks_data); 652 653 kstat_named_init(&keyswitch_named_data[0], 654 POSITION_KSTAT_NAME, 655 KSTAT_DATA_INT32); 656 657 ksp->ks_update = sgenv_keyswitch_kstat_update; 658 kstat_install(ksp); 659 660 /* update the soft state */ 661 softsp->keyswitch_ksp = ksp; 662 663 } else { 664 cmn_err(CE_WARN, "Keyswitch: kstat_create failed"); 665 return (-1); 666 } 667 668 669 /* 670 * Environmental Information. 671 */ 672 ksp = kstat_create(SGENV_DRV_NAME, inst, SG_ENV_INFO_KSTAT_NAME, 673 "misc", KSTAT_TYPE_RAW, 0, 674 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE); 675 676 if (ksp != NULL) { 677 ksp->ks_data = NULL; 678 ksp->ks_data_size = 0; 679 ksp->ks_snaptime = 0; 680 ksp->ks_update = sgenv_env_info_kstat_update; 681 ksp->ks_snapshot = sgenv_env_info_kstat_snapshot; 682 ksp->ks_lock = &env_kstat_lock; 683 kstat_install(ksp); 684 685 /* update the soft state */ 686 softsp->env_info_ksp = ksp; 687 688 } else { 689 cmn_err(CE_WARN, "Environmental Info: kstat_create failed"); 690 return (-1); 691 } 692 693 694 /* 695 * Board Status Information. 696 */ 697 ksp = kstat_create(SGENV_DRV_NAME, inst, SG_BOARD_STATUS_KSTAT_NAME, 698 "misc", KSTAT_TYPE_RAW, 0, 699 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE); 700 701 if (ksp != NULL) { 702 ksp->ks_data = NULL; 703 ksp->ks_data_size = 0; 704 ksp->ks_snaptime = 0; 705 ksp->ks_update = sgenv_board_info_kstat_update; 706 ksp->ks_snapshot = sgenv_board_info_kstat_snapshot; 707 ksp->ks_lock = &board_kstat_lock; 708 kstat_install(ksp); 709 710 /* update the soft state */ 711 softsp->board_info_ksp = ksp; 712 713 } else { 714 cmn_err(CE_WARN, "Board Status Info: kstat_create failed"); 715 return (-1); 716 } 717 718 return (0); 719 } 720 721 722 static void 723 sgenv_remove_kstats(sgenv_soft_state_t *softsp) 724 { 725 kstat_t *ksp; 726 727 ksp = softsp->keyswitch_ksp; 728 if (ksp != NULL) { 729 softsp->keyswitch_ksp = NULL; 730 kstat_delete(ksp); 731 } 732 733 ksp = softsp->env_info_ksp; 734 if (ksp != NULL) { 735 sgenv_destroy_env_cache(); 736 softsp->env_info_ksp = NULL; 737 ksp->ks_lock = NULL; 738 kstat_delete(ksp); 739 } 740 741 ksp = softsp->board_info_ksp; 742 if (ksp != NULL) { 743 softsp->board_info_ksp = NULL; 744 ksp->ks_lock = NULL; 745 kstat_delete(ksp); 746 } 747 } 748 749 750 /* 751 * This function registers mailbox interrupt handlers to watch for certain 752 * unsolicited mailbox messages, which indicate that some event has occurred. 753 * 754 * Currently only the following events are handled: 755 * MBOX_EVENT_KEY_SWITCH 756 * MBOX_EVENT_ENV 757 * - Thresholds/Limits Exceeded 758 * - Fan Status changed 759 * 760 * ERRORS: 761 * We return DDI_FAILURE if we fail to register any one of the 762 * interrupt handlers. 763 */ 764 static int 765 sgenv_add_intr_handlers(void) 766 { 767 int err; 768 769 /* 770 * Register an interrupt handler with the sgsbbc driver for the 771 * MBOX_EVENT_KEY_SWITCH events. 772 * - The virtual keyswitch has changed, we generate a sysevent. 773 */ 774 keysw_payload_msg.msg_buf = (caddr_t)&keysw_payload; 775 keysw_payload_msg.msg_len = sizeof (keysw_payload); 776 777 err = sbbc_mbox_reg_intr(MBOX_EVENT_KEY_SWITCH, sgenv_keyswitch_handler, 778 &keysw_payload_msg, NULL, &keysw_hdlr_lock); 779 if (err != 0) { 780 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_KEY_SWITCH " 781 "handler. Err=%d", err); 782 return (DDI_FAILURE); 783 } 784 785 /* 786 * Register an interrupt handler with the sgsbbc driver for the 787 * MBOX_EVENT_ENV events. 788 * - Thresholds/Limits Exceeded, we generate a sysevent 789 * and we update our caches. 790 */ 791 env_payload_msg.msg_buf = (caddr_t)&env_payload; 792 env_payload_msg.msg_len = sizeof (env_payload); 793 794 err = sbbc_mbox_reg_intr(MBOX_EVENT_ENV, sgenv_env_data_handler, 795 &env_payload_msg, NULL, &env_hdlr_lock); 796 if (err != 0) { 797 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_ENV " 798 "(env) handler. Err=%d", err); 799 return (DDI_FAILURE); 800 } 801 802 /* 803 * Register an interrupt handler with the sgsbbc driver for the 804 * MBOX_EVENT_ENV events. 805 * - Fan Status changed, we generate a sysevent, and 806 * we update the env cache only. 807 */ 808 fan_payload_msg.msg_buf = (caddr_t)&fan_payload; 809 fan_payload_msg.msg_len = sizeof (fan_payload); 810 811 err = sbbc_mbox_reg_intr(MBOX_EVENT_ENV, sgenv_fan_status_handler, 812 &fan_payload_msg, NULL, &env_hdlr_lock); 813 if (err != 0) { 814 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_ENV (fan)" 815 "handler. Err=%d", err); 816 return (DDI_FAILURE); 817 } 818 819 /* 820 * Register an interrupt handler with the sgsbbc driver for the 821 * MBOX_EVENT_GENERIC events. 822 * - DR state change, we update our caches. 823 */ 824 dr_payload_msg.msg_buf = (caddr_t)&dr_payload; 825 dr_payload_msg.msg_len = sizeof (dr_payload); 826 827 err = sbbc_mbox_reg_intr(MBOX_EVENT_GENERIC, sgenv_dr_event_handler, 828 &dr_payload_msg, NULL, &dr_hdlr_lock); 829 if (err != 0) { 830 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_GENERIC (DR)" 831 "handler. Err=%d", err); 832 return (DDI_FAILURE); 833 } 834 835 return (DDI_SUCCESS); 836 } 837 838 /* 839 * This function unregisters the mailbox interrupt handlers. 840 * 841 * ERRORS: 842 * We return DDI_FAILURE if we fail to register any one of the 843 * interrupt handlers. 844 */ 845 static int 846 sgenv_remove_intr_handlers(void) 847 { 848 int rv = DDI_SUCCESS; 849 int err; 850 851 err = sbbc_mbox_unreg_intr(MBOX_EVENT_KEY_SWITCH, 852 sgenv_keyswitch_handler); 853 if (err != 0) { 854 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_KEY_SWITCH " 855 "handler. Err=%d", err); 856 rv = DDI_FAILURE; 857 } 858 859 err = sbbc_mbox_unreg_intr(MBOX_EVENT_ENV, sgenv_env_data_handler); 860 if (err != 0) { 861 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_ENV (env)" 862 "handler. Err=%d", err); 863 rv = DDI_FAILURE; 864 } 865 866 err = sbbc_mbox_unreg_intr(MBOX_EVENT_ENV, sgenv_fan_status_handler); 867 if (err != 0) { 868 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_ENV (fan)" 869 "handler. Err=%d", err); 870 rv = DDI_FAILURE; 871 } 872 873 err = sbbc_mbox_unreg_intr(MBOX_EVENT_GENERIC, sgenv_dr_event_handler); 874 if (err != 0) { 875 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_GENERIC (DR) " 876 "handler. Err=%d", err); 877 rv = DDI_FAILURE; 878 } 879 880 return (rv); 881 } 882 883 884 static int 885 sgenv_create_cache_update_threads(void) 886 { 887 DCMN_ERR_S(f, "sgenv_create_cache_update_threads()"); 888 889 DCMN_ERR_THREAD(CE_NOTE, "Entering %s", f); 890 891 /* Create thread to ensure env_cache is updated */ 892 env_thread_run = 1; 893 894 env_thread = thread_create(NULL, 0, sgenv_update_env_cache, 895 NULL, 0, &p0, TS_RUN, minclsyspri); 896 env_thread_tid = env_thread->t_did; 897 898 /* Create thread to ensure board_cache is updated */ 899 board_thread_run = 1; 900 901 board_thread = thread_create(NULL, 0, sgenv_update_board_cache, 902 NULL, 0, &p0, TS_RUN, minclsyspri); 903 board_thread_tid = board_thread->t_did; 904 905 DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f); 906 907 return (DDI_SUCCESS); 908 } 909 910 911 static int 912 sgenv_remove_cache_update_threads(void) 913 { 914 DCMN_ERR_S(f, "sgenv_remove_cache_update_threads()"); 915 916 DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for cache update threads", f); 917 918 /* Cause the env_cache thread to terminate. */ 919 mutex_enter(&env_flag_lock); 920 env_thread_run = 0; 921 cv_signal(&env_flag_cond); 922 mutex_exit(&env_flag_lock); 923 924 thread_join(env_thread_tid); 925 926 /* Cause the board_cache thread to terminate. */ 927 mutex_enter(&board_flag_lock); 928 board_thread_run = 0; 929 cv_signal(&board_flag_cond); 930 mutex_exit(&board_flag_lock); 931 932 thread_join(board_thread_tid); 933 934 DCMN_ERR_THREAD(CE_NOTE, "%s: cache update threads finished", f); 935 936 return (DDI_SUCCESS); 937 } 938 939 940 static int 941 sgenv_keyswitch_kstat_update(kstat_t *ksp, int rw) 942 { 943 sg_keyswitch_kstat_t *keysw_data; 944 945 int8_t posn; /* keysw posn read from IO-SRAM */ 946 int size; /* size of IO-SRAM chunk */ 947 int rv = 0; /* return value of iosram_read() */ 948 949 keysw_data = (sg_keyswitch_kstat_t *)ksp->ks_data; 950 951 switch (rw) { 952 case KSTAT_WRITE: 953 /* 954 * Write not permitted 955 */ 956 return (EACCES); 957 958 case KSTAT_READ: 959 /* 960 * Get the size of the keyswitch IO-SRAM chunk. 961 * This should be one byte. 962 * 963 * If the size is not 1 byte we set the position to UNKNOWN 964 * 965 * Otherwise we read the keyswitch position from IO-SRAM. 966 * Then check that this is a valid keyswitch position. 967 * If it is not valid then something is corrupt and set 968 * the position to UNKNOWN. 969 */ 970 size = iosram_size(SBBC_KEYSWITCH_KEY); 971 if (size != 1) { 972 posn = SG_KEYSWITCH_POSN_UNKNOWN; 973 rv = -1; 974 975 } else if ((rv = iosram_read(SBBC_KEYSWITCH_KEY, 0, 976 (char *)&posn, size)) != 0) { 977 posn = SG_KEYSWITCH_POSN_UNKNOWN; 978 979 } else { 980 /* Check posn is not corrupt */ 981 switch (posn) { 982 case SG_KEYSWITCH_POSN_ON: 983 case SG_KEYSWITCH_POSN_DIAG: 984 case SG_KEYSWITCH_POSN_SECURE: 985 /* value read from kstat is OK */ 986 break; 987 988 default: 989 /* value read from kstat is corrupt */ 990 posn = SG_KEYSWITCH_POSN_UNKNOWN; 991 break; 992 } 993 } 994 995 /* Write position to kstat. */ 996 keysw_data->keyswitch_position.value.i32 = posn; 997 998 return (rv); 999 1000 default: 1001 return (EINVAL); 1002 } 1003 } 1004 1005 static void 1006 sgenv_init_env_cache(void) 1007 { 1008 ASSERT(env_thread_run == 0); 1009 ASSERT(env_thread == NULL); 1010 } 1011 1012 1013 /* 1014 * This thread runs in the background and waits for an interrupt handler 1015 * registered to wait for ENV/DR events from the SC to signal/flag that we 1016 * need to update our Env Cache. 1017 */ 1018 static void 1019 sgenv_update_env_cache(void) 1020 { 1021 DCMN_ERR_S(f, "sgenv_update_env_cache()"); 1022 1023 mutex_enter(&env_flag_lock); 1024 1025 while (env_thread_run == 1) { 1026 1027 /* 1028 * We check to see if the update needed flag is set. 1029 * If it is then this means that: 1030 * 1) This is the first time through the while loop 1031 * and we need to initialize the cache. 1032 * 2) An interrupt handler was triggered while we 1033 * we were updating the env cache during the previous 1034 * iteration of the while loop and we need to refresh 1035 * the env data to ensure we are completely up to date. 1036 * 1037 * Otherwise we wait until we get a signal from one of the 1038 * interrupt handlers. 1039 */ 1040 if (env_cache_update_needed) { 1041 DCMN_ERR_THREAD(CE_NOTE, "%s: update needed", f); 1042 1043 env_cache_update_needed = B_FALSE; 1044 1045 } else { 1046 DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for signal", f); 1047 1048 cv_wait(&env_flag_cond, &env_flag_lock); 1049 1050 /* Check if we are being asked to terminate */ 1051 if (env_thread_run == 0) { 1052 break; 1053 } 1054 1055 env_cache_updating = B_TRUE; 1056 } 1057 1058 mutex_exit(&env_flag_lock); 1059 (void) sgenv_get_env_info_data(); 1060 1061 (void) sgenv_check_sensor_thresholds(); 1062 mutex_enter(&env_flag_lock); 1063 1064 if (env_cache_update_needed == B_FALSE) 1065 env_cache_updating = B_FALSE; 1066 } 1067 1068 mutex_exit(&env_flag_lock); 1069 1070 DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f); 1071 1072 env_thread_run = -1; 1073 thread_exit(); 1074 } 1075 1076 1077 /* 1078 * We always return what is in the env_cache. It is up to the SC to ensure 1079 * that the env_cache is current by sending events to us when something 1080 * changes. The cache will then be updated by going to the SC to get the 1081 * new data. That way the kstat_update code can always be sure that it gets 1082 * current data without having to wait while the SC responds (slowly) to our 1083 * request for data. 1084 * 1085 * The way the update and snapshot code works, we cannot be guaranteed that 1086 * someone won't grab the env_cache_lock between the update and snapshot 1087 * calls so we use a temporary snapshot of the env_cache. We cannot hold 1088 * any locks across the calls from the update to the snapshot as we are 1089 * not guaranteed that the snapshot function will be called. So we create 1090 * the snapshot of the env_cache in the update routine and dump this to the 1091 * kstat user buffer in the snapshot routine. (There are error conditions in 1092 * which the snapshot will not be called by the kstat framework so we need 1093 * to handle these appropriately.) 1094 */ 1095 static int 1096 sgenv_env_info_kstat_update(kstat_t *ksp, int rw) 1097 { 1098 DCMN_ERR_S(f, "sgenv_env_info_kstat_update()"); 1099 1100 int err = 0; 1101 int key_posn; 1102 env_sensor_t *ptr; 1103 1104 switch (rw) { 1105 case KSTAT_WRITE: 1106 /* 1107 * Write not permitted 1108 */ 1109 return (EACCES); 1110 1111 case KSTAT_READ: 1112 1113 mutex_enter(&env_cache_lock); 1114 /* 1115 * We now need to ensure that there is enough room allocated 1116 * by the kstat framework to return the data via ks_data. 1117 * It is possible there may be no data in the cache but 1118 * we still return zero sized kstats to ensure no client breaks 1119 */ 1120 sgenv_update_env_kstat_size(ksp); 1121 1122 /* 1123 * If the snapshot still has data (this could be because the 1124 * kstat framework discovered an error and did not call the 1125 * snapshot code which should have freed this buffer) we free 1126 * it here. 1127 */ 1128 if ((env_cache_snapshot != NULL) && 1129 (env_cache_snapshot_size > 0)) { 1130 DCMN_ERR_CACHE(CE_NOTE, "%s freeing " 1131 "env_cache_snapshot buf", f); 1132 kmem_free(env_cache_snapshot, env_cache_snapshot_size); 1133 } 1134 1135 /* 1136 * Create a new snapshot buffer based on ks_data_size 1137 */ 1138 env_cache_snapshot_size = ksp->ks_data_size; 1139 env_cache_snapshot = kmem_zalloc( 1140 env_cache_snapshot_size, KM_SLEEP); 1141 1142 /* 1143 * We need to take a fresh snapshot of the env_cache here. 1144 * For each sensor collection, we check to see if there is 1145 * data in the cache (ie. != NULL). If there is, we copy it 1146 * into the snapshot. 1147 */ 1148 ptr = env_cache_snapshot; 1149 for (key_posn = 0; key_posn < SGENV_MAX_HPU_KEYS; key_posn++) { 1150 if (vol_sensor_count[key_posn] <= 0) 1151 continue; 1152 1153 ASSERT(vol_sensor_count[key_posn] <= 1154 SGENV_MAX_SENSORS_PER_KEY); 1155 1156 /* 1157 * <env_cache> entry should have been allocated 1158 * in the kstat_update function already. 1159 * 1160 * If this <env_cache> entry is NULL, then 1161 * it has already been destroyed or cleared 1162 * and the sensor readings have disappeared. 1163 */ 1164 if (env_cache[key_posn] == NULL) { 1165 DCMN_ERR(CE_NOTE, "!Cache entry %d has " 1166 "disappeared", key_posn); 1167 vol_sensor_count[key_posn] = 0; 1168 continue; 1169 } 1170 1171 bcopy(&env_cache[key_posn][0], ptr, 1172 sizeof (env_sensor_t) * 1173 vol_sensor_count[key_posn]); 1174 ptr += vol_sensor_count[key_posn]; 1175 } 1176 mutex_exit(&env_cache_lock); 1177 1178 return (err); 1179 1180 default: 1181 return (EINVAL); 1182 } 1183 } 1184 1185 static int 1186 sgenv_env_info_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 1187 { 1188 DCMN_ERR_S(f, "sgenv_env_info_kstat_snapshot()"); 1189 1190 switch (rw) { 1191 case KSTAT_WRITE: 1192 /* 1193 * Write not permitted 1194 */ 1195 return (EACCES); 1196 1197 case KSTAT_READ: 1198 1199 /* 1200 * We have taken a snapshot of the env_cache in the 1201 * update routine so we simply bcopy this into the 1202 * kstat buf. No locks needed here. 1203 */ 1204 if (env_cache_snapshot_size > 0) 1205 bcopy(env_cache_snapshot, buf, env_cache_snapshot_size); 1206 1207 ksp->ks_snaptime = last_env_read_time; 1208 1209 /* 1210 * Free the memory used by the snapshot. If for some reason 1211 * the kstat framework does not call this snapshot routine, 1212 * we also have a check in the update routine so the next 1213 * time it is called it checks for this condition and frees 1214 * the snapshot buffer there. 1215 */ 1216 DCMN_ERR_CACHE(CE_NOTE, "%s freeing env_cache_snapshot buf", f); 1217 kmem_free(env_cache_snapshot, env_cache_snapshot_size); 1218 env_cache_snapshot = NULL; 1219 env_cache_snapshot_size = 0; 1220 1221 return (0); 1222 1223 default: 1224 return (EINVAL); 1225 } 1226 } 1227 1228 static void 1229 sgenv_init_board_cache(void) 1230 { 1231 int i; 1232 1233 ASSERT(board_thread_run == 0); 1234 ASSERT(board_thread == NULL); 1235 1236 /* 1237 * Init all node-ids to be -1. 1238 */ 1239 mutex_enter(&board_cache_lock); 1240 for (i = 0; i < SG_MAX_BDS; i++) 1241 board_cache[i].node_id = (-1); 1242 mutex_exit(&board_cache_lock); 1243 } 1244 1245 1246 /* 1247 * This thread runs in the background and waits for an interrupt handler 1248 * registered to wait for DR events from the SC to signal/flag that we 1249 * need to update our Board Cache. 1250 */ 1251 static void 1252 sgenv_update_board_cache(void) 1253 { 1254 DCMN_ERR_S(f, "sgenv_update_board_cache()"); 1255 1256 mutex_enter(&board_flag_lock); 1257 1258 while (board_thread_run == 1) { 1259 1260 /* 1261 * We check to see if the update needed flag is set. 1262 * If it is then this means that: 1263 * 1) This is the first time through the while loop 1264 * and we need to initialize the cache. 1265 * 2) An interrupt handler was triggered while we 1266 * we were updating the cache during the previous 1267 * iteration of the while loop and we need to refresh 1268 * the env data to ensure we are completely up to date. 1269 * 1270 * Otherwise we wait until we get a signal from one of the 1271 * interrupt handlers. 1272 */ 1273 if (board_cache_update_needed) { 1274 DCMN_ERR_THREAD(CE_NOTE, "%s: update needed", f); 1275 board_cache_update_needed = B_FALSE; 1276 1277 } else { 1278 DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for signal", f); 1279 1280 cv_wait(&board_flag_cond, &board_flag_lock); 1281 1282 /* Check if we are being asked to terminate */ 1283 if (board_thread_run == 0) { 1284 break; 1285 } 1286 1287 board_cache_updating = B_TRUE; 1288 } 1289 1290 mutex_exit(&board_flag_lock); 1291 (void) sgenv_get_board_info_data(); 1292 mutex_enter(&board_flag_lock); 1293 1294 if (board_cache_update_needed == B_FALSE) 1295 board_cache_updating = B_FALSE; 1296 } 1297 1298 mutex_exit(&board_flag_lock); 1299 1300 DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f); 1301 1302 board_thread_run = -1; 1303 thread_exit(); 1304 } 1305 1306 1307 /* 1308 * We always return what is in the board_cache. It is up to the SC to ensure 1309 * that the board_cache is current by sending events to us when something 1310 * changes. The cache will then be updated by going to the SC to get the 1311 * new data. That way the kstat_update code can always be sure that it gets 1312 * current data without having to wait while the SC responds (slowly) to our 1313 * request for data. 1314 * 1315 * The way the update and snapshot code works, we cannot be guaranteed that 1316 * someone won't grab the board_cache_lock between the update and snapshot 1317 * calls so we use a snapshot buffer of the board_cache. We cannot hold 1318 * any locks across the calls from the update to the snapshot as we are 1319 * not guaranteed that the snapshot function will be called. So we create 1320 * the snapshot of the board_cache in the update routine and dump this to the 1321 * kstat user buffer in the snapshot routine. (There are error conditions in 1322 * which the snapshot will not be called by the kstat framework so we need 1323 * to handle these appropriately.) 1324 */ 1325 static int 1326 sgenv_board_info_kstat_update(kstat_t *ksp, int rw) 1327 { 1328 int i; 1329 1330 switch (rw) { 1331 case KSTAT_WRITE: 1332 /* 1333 * Write not permitted 1334 */ 1335 return (EACCES); 1336 1337 case KSTAT_READ: 1338 /* 1339 * The board_cache is created during startup, and so should be 1340 * available before a user can log in and trigger a kstat read, 1341 * but we check just in case. 1342 */ 1343 if (board_cache_updated == FALSE) 1344 return (ENXIO); 1345 1346 mutex_enter(&board_cache_lock); 1347 1348 /* 1349 * Set <ks_data_size> to the new number of board readings so 1350 * that the snapshot routine can allocate the correctly sized 1351 * kstat. 1352 */ 1353 ksp->ks_data_size = board_count * sizeof (sg_board_info_t); 1354 1355 board_count_snapshot = board_count; 1356 1357 /* 1358 * We are now guaranteed that that board_cache is not in flux 1359 * (as we have the lock) so we take a copy of the board_cache 1360 * into the board_cache_snapshot so that the snapshot routine 1361 * can copy it from the board_cache_snapshot into the user kstat 1362 * buffer. 1363 */ 1364 for (i = 0; i < SG_MAX_BDS; i++) { 1365 board_cache_snapshot[i] = board_cache[i]; 1366 } 1367 1368 mutex_exit(&board_cache_lock); 1369 1370 return (0); 1371 1372 default: 1373 return (EINVAL); 1374 } 1375 } 1376 1377 static int 1378 sgenv_board_info_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 1379 { 1380 DCMN_ERR_S(f, "sgenv_board_info_kstat_snapshot()"); 1381 1382 sg_board_info_t *bdp; 1383 int i, num_bds = 0; 1384 1385 switch (rw) { 1386 case KSTAT_WRITE: 1387 /* 1388 * Write not permitted 1389 */ 1390 return (EACCES); 1391 1392 case KSTAT_READ: 1393 1394 if (board_cache_updated == FALSE) { 1395 ksp->ks_data_size = 0; 1396 ksp->ks_data = NULL; 1397 return (ENOMEM); 1398 } 1399 1400 /* 1401 * Update the snap_time with the last time we got fresh data 1402 * from the SC. 1403 */ 1404 ksp->ks_snaptime = last_board_read_time; 1405 1406 ASSERT(board_count_snapshot <= SG_MAX_BDS); 1407 /* 1408 * For each entry in the board_cache_snapshot we check to see 1409 * if the node_id is != NULL before we copy it into 1410 * the kstat buf. 1411 */ 1412 for (i = 0; i < SG_MAX_BDS; i++) { 1413 bdp = &board_cache_snapshot[i]; 1414 DCMN_ERR_CACHE(CE_NOTE, "%s: looking at " 1415 "cache_snapshot entry[%d], node=%d", 1416 f, i, bdp->node_id); 1417 if (bdp->node_id >= 0) { 1418 /* 1419 * Need a check to ensure that the buf 1420 * is still within the allocated size. 1421 * We check how many boards are already 1422 * in the user buf before adding one. 1423 */ 1424 num_bds++; 1425 if (num_bds > board_count_snapshot) { 1426 ksp->ks_data_size = 0; 1427 ksp->ks_data = NULL; 1428 DCMN_ERR(CE_WARN, "%s: buf overflow." 1429 " %d >= %d.", 1430 f, num_bds, board_count_snapshot); 1431 return (EIO); 1432 } 1433 1434 DCMN_ERR_CACHE(CE_NOTE, "%s: about to bcopy" 1435 " cache_snapshot entry[%d], node=%d," 1436 " board=%d", f, i, bdp->node_id, 1437 bdp->board_num); 1438 bcopy(bdp, buf, sizeof (sg_board_info_t)); 1439 buf = ((sg_board_info_t *)buf) + 1; 1440 } 1441 } 1442 return (0); 1443 1444 default: 1445 return (EINVAL); 1446 } 1447 } 1448 1449 1450 /* 1451 * This function coordinates reading the env data from the SC. 1452 * 1453 * ERROR: 1454 * If an error occurs while making a call to the mailbox and we have data 1455 * in the cache from a previous call to the SC, we return an error of 0. 1456 * That way the kstat framework will return the old data instead of 1457 * returning an error and an empty kstat. 1458 */ 1459 static int 1460 sgenv_get_env_info_data(void) 1461 { 1462 DCMN_ERR_S(f, "sgenv_get_env_info_data()"); 1463 1464 envresp_key_t new_keys[SGENV_MAX_HPU_KEYS] = {0}; 1465 envresp_key_t old_key; 1466 envresp_key_t key; 1467 1468 int i; 1469 1470 int err = 0; /* return value of func's which get env data */ 1471 int status = 0; /* reason why env data func returned an error */ 1472 1473 DCMN_ERR_EVENT(CE_NOTE, "%s: entered.", f); 1474 1475 err = sgenv_get_hpu_keys(new_keys, &status); 1476 1477 if (err != 0) { 1478 /* 1479 * If we get an error getting the key values, then we return 1480 * as we cannot proceed any farther. If there is old env data 1481 * in the cache, then we return zero so that the kstat 1482 * framework will export the old data. 1483 */ 1484 if (env_cache_updated == FALSE) { 1485 sgenv_mbox_error_msg("HPU Keys", err, status); 1486 return (err); 1487 } else { 1488 sgenv_mbox_error_msg("HPU Keys", err, status); 1489 return (0); 1490 } 1491 } 1492 1493 1494 for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) { 1495 1496 if (vol_sensor_count[i] == 0) { 1497 /* empty collection */ 1498 old_key = 0; 1499 } else { 1500 /* 1501 * populated collection: 1502 * (assert size is OK, and 1st sensor is pseudo-sensor) 1503 */ 1504 ASSERT(env_cache[i] != NULL); 1505 ASSERT(env_cache[i][0].sd_id.id.sensor_part == 1506 SG_SENSOR_PART_SCAPP); 1507 ASSERT(env_cache[i][0].sd_id.id.sensor_type == 1508 SG_SENSOR_TYPE_ENVDB); 1509 ASSERT(SG_INFO_VALUESTATUS(env_cache[i][0].sd_infostamp) 1510 == SG_INFO_VALUE_OK); 1511 1512 old_key = env_cache[i][0].sd_value; 1513 } 1514 1515 key = new_keys[i]; 1516 1517 /* 1518 * No data is associated with this key position and there was 1519 * no data on the previous read either so we simply continue 1520 * to the next key position. 1521 */ 1522 if ((key == 0) && (old_key == 0)) { 1523 ASSERT(env_cache[i] == NULL); 1524 continue; 1525 } 1526 1527 1528 /* 1529 * We need to grab this lock every time we are going to 1530 * update a HPU. However, a kstat_read can grab 1531 * the env_cache_lock when it wants to get a snapshot of 1532 * the env_cache. This has the affect of stopping the 1533 * active env_cache writer after they have updated the 1534 * active HPU, allowing the kstat_read to get a dump of 1535 * the env_cache, then the env_cache writer can resume 1536 * updating the cache. For performance it is more important 1537 * that the kstat_read completes quickly so we allow the 1538 * kstat_read to interrupt the updating of the env_cache. 1539 * The updating can take anything from a few seconds to 1540 * several minutes to complete. 1541 */ 1542 mutex_enter(&env_cache_lock); 1543 1544 /* 1545 * If the key just read is zero, then the 1546 * group of sensors have been removed by 1547 * some means and we need to zero out 1548 * the env_cache. (this ensures that data 1549 * belonging to a removed board is not 1550 * returned) 1551 */ 1552 if (key == 0) { 1553 ASSERT(old_key != 0); 1554 (void) sgenv_clear_env_cache_entry(i); 1555 mutex_exit(&env_cache_lock); 1556 continue; 1557 } 1558 1559 /* 1560 * Check to see if this key has changed since 1561 * the last read. 1562 * 1563 * If it has changed, we need to update everything. 1564 * 1565 * If it hasn't we simply read the volatiles 1566 * and check to see if the constants have changed. 1567 */ 1568 if (key != old_key) { 1569 /* 1570 * If the key is non-zero, then a new HPU has 1571 * been added to the system or it has changed 1572 * somehow and we need to re-read everything. 1573 * (we also need to zero out the env_cache as 1574 * there may be less sensors returned now and 1575 * the old ones may not be overwritten) 1576 */ 1577 1578 /* 1579 * If the <env_cache> has not already been 1580 * allocated for this key position then we 1581 * go ahead and allocate it. 1582 */ 1583 if (env_cache[i] == NULL) { 1584 err = sgenv_create_env_cache_entry(i); 1585 if (err == DDI_FAILURE) { 1586 mutex_exit(&env_cache_lock); 1587 continue; 1588 } 1589 } 1590 1591 err = sgenv_get_env_data(new_keys[i], i, 1592 SG_GET_ENV_CONSTANTS, &status); 1593 if (err) { 1594 err = sgenv_handle_env_data_error(err, status, 1595 i, old_key, "Constant Data"); 1596 mutex_exit(&env_cache_lock); 1597 if (err != DDI_FAILURE) { 1598 continue; 1599 } else if (env_cache_updated == TRUE) { 1600 return (0); 1601 } else { 1602 return (DDI_FAILURE); 1603 } 1604 } 1605 1606 err = sgenv_get_env_data(new_keys[i], i, 1607 SG_GET_ENV_THRESHOLDS, &status); 1608 if (err) { 1609 err = sgenv_handle_env_data_error(err, status, 1610 i, old_key, "Threshold Data"); 1611 mutex_exit(&env_cache_lock); 1612 if (err != DDI_FAILURE) { 1613 continue; 1614 } else if (env_cache_updated == TRUE) { 1615 return (0); 1616 } else { 1617 return (DDI_FAILURE); 1618 } 1619 } 1620 1621 err = sgenv_get_env_data(new_keys[i], i, 1622 SG_GET_ENV_VOLATILES, &status); 1623 if (err) { 1624 err = sgenv_handle_env_data_error(err, status, 1625 i, old_key, "Volatile Data (fresh)"); 1626 mutex_exit(&env_cache_lock); 1627 if (err != DDI_FAILURE) { 1628 continue; 1629 } else if (env_cache_updated == TRUE) { 1630 return (0); 1631 } else { 1632 return (DDI_FAILURE); 1633 } 1634 } 1635 1636 /* 1637 * As we have successfully got env data for a HPU, 1638 * we ensure <env_cache_updated> is set to TRUE so that 1639 * in the future, if an error occurs during the mailbox 1640 * transfer, we know that there is old data for at 1641 * least one HPU in the <env_cache> which could be 1642 * returned instead of returning an error to the kstat 1643 * framework indicating that we have no data to return. 1644 */ 1645 env_cache_updated = TRUE; 1646 last_env_read_time = gethrtime(); 1647 1648 } else { 1649 /* 1650 * key == old_key 1651 * 1652 * Handle the case when the value of the old key and 1653 * the new key are identical. 1654 */ 1655 ASSERT(env_cache[i] != NULL); 1656 1657 /* 1658 * If the keys are identical, then the quasi-constants 1659 * should not have changed (and so don't need updating). 1660 * Similarly for the threshold readings. 1661 */ 1662 1663 /* Update the volatile data */ 1664 err = sgenv_get_env_data(new_keys[i], i, 1665 SG_GET_ENV_VOLATILES, &status); 1666 if (err) { 1667 err = sgenv_handle_env_data_error(err, status, 1668 i, old_key, "Volatile Data (update)"); 1669 mutex_exit(&env_cache_lock); 1670 if (err == DDI_FAILURE) { 1671 return (0); 1672 } else { 1673 continue; 1674 } 1675 } 1676 1677 } 1678 mutex_exit(&env_cache_lock); 1679 } 1680 1681 return (0); 1682 } 1683 1684 1685 static int 1686 sgenv_get_board_info_data(void) 1687 { 1688 /* 1689 * This array keeps track of the valid nodes in a system. A call is 1690 * made to OBP to get the "nodeid" property from all the ssm nodes, 1691 * and for each nodeid found, that position in the array is set to 1692 * TRUE. For a Serengeti only one position in the array will be TRUE. 1693 */ 1694 static uint_t node_present[SSM_MAX_INSTANCES] = {SGENV_NO_NODE_EXISTS}; 1695 1696 static fn_t f = "sgenv_get_board_info_data()"; 1697 static int first_time = TRUE; 1698 1699 sbbc_msg_t req; 1700 sbbc_msg_t resp; 1701 int node; /* loop index */ 1702 int board; /* loop index */ 1703 show_board_t show_bd, *shbp = &show_bd; 1704 info_t inform; 1705 int status; /* msg_status returned by response */ 1706 int rv = 0; /* return value of call to mailbox */ 1707 sg_board_info_t *ptr; 1708 1709 DCMN_ERR_EVENT(CE_NOTE, "%s: entered.", f); 1710 1711 if (first_time) { 1712 sgenv_set_valid_node_positions(node_present); 1713 first_time = FALSE; 1714 } 1715 1716 for (node = 0; node < SSM_MAX_INSTANCES; node++) { 1717 1718 if (node_present[node] == SGENV_NO_NODE_EXISTS) 1719 continue; 1720 1721 for (board = 0; board < SG_MAX_BDS; board++) { 1722 1723 /* 1724 * If we have discovered in a previous call to the SC 1725 * that there is no board in this slot on this type of 1726 * chassis then we don't waste resources asking the SC 1727 * for nonexistent data. 1728 */ 1729 if ((node_present[node] & (1 << board)) == 0) 1730 continue; 1731 1732 inform.board = board; 1733 inform.node = node; 1734 inform.revision = 0xdead; 1735 1736 req.msg_type.type = DR_MBOX; 1737 req.msg_type.sub_type = DR_MBOX_SHOW_BOARD; 1738 req.msg_status = SG_MBOX_STATUS_SUCCESS; 1739 req.msg_len = sizeof (info_t); 1740 req.msg_bytes = sizeof (info_t); 1741 req.msg_buf = (caddr_t)&inform; 1742 1743 bzero(shbp, sizeof (show_board_t)); 1744 shbp->s_cond = -1; 1745 shbp->s_power = -1; 1746 shbp->s_assigned = -1; 1747 shbp->s_claimed = -1; 1748 shbp->s_present = -1; 1749 1750 resp.msg_type.type = DR_MBOX; 1751 resp.msg_type.sub_type = DR_MBOX_SHOW_BOARD; 1752 resp.msg_bytes = sizeof (show_board_t); 1753 resp.msg_status = SG_MBOX_STATUS_SUCCESS; 1754 resp.msg_len = sizeof (show_board_t); 1755 resp.msg_buf = (caddr_t)shbp; 1756 1757 1758 /* 1759 * We want to avoid the case where an invalid time 1760 * is specified by a user (by patching the 1761 * global variable <sgenv_max_mbox_wait_time>). 1762 * 1763 * Any incorrect values are reset to the default time. 1764 */ 1765 if (sgenv_max_mbox_wait_time <= 1766 max(sbbc_mbox_min_timeout, 0)) 1767 sgenv_max_mbox_wait_time = 1768 sbbc_mbox_default_timeout; 1769 1770 rv = sbbc_mbox_request_response(&req, &resp, 1771 sgenv_max_mbox_wait_time); 1772 status = resp.msg_status; 1773 1774 if ((rv) || (status != SG_MBOX_STATUS_SUCCESS)) { 1775 /* 1776 * errors from Solaris sgsbbc driver 1777 */ 1778 if (status > SG_MBOX_STATUS_SUCCESS) { 1779 sgenv_mbox_error_msg("Board Info", rv, 1780 resp.msg_status); 1781 return (rv); 1782 } 1783 1784 /* 1785 * errors from SCAPP 1786 */ 1787 if (status == SG_MBOX_STATUS_ILLEGAL_NODE) { 1788 sgenv_mbox_error_msg("Board Info", rv, 1789 resp.msg_status); 1790 node_present[node] = 1791 SGENV_NO_NODE_EXISTS; 1792 1793 /* 1794 * No point looping through the rest of 1795 * the boards associated with this node. 1796 */ 1797 break; 1798 1799 } else if (status == 1800 SG_MBOX_STATUS_ILLEGAL_SLOT) { 1801 1802 /* 1803 * We clear the bit representing <board> 1804 * in <node> to indicate that this slot 1805 * cannot exist on this chassis. 1806 */ 1807 node_present[node] &= (~(1 << board) & 1808 SGENV_NODE_TYPE_DS); 1809 continue; 1810 1811 } else if (status == 1812 SG_MBOX_STATUS_BOARD_ACCESS_DENIED) { 1813 /* 1814 * We cannot access data for this slot, 1815 * however we may be able to do so in 1816 * the future. We do nothing. 1817 */ 1818 rv = rv; 1819 } else { 1820 char err_msg[40]; 1821 1822 (void) sprintf(err_msg, 1823 "Board data for " 1824 "Node%d/Slot%d", node, board); 1825 sgenv_mbox_error_msg(err_msg, rv, 1826 resp.msg_status); 1827 1828 if (rv == 0) 1829 rv = status; 1830 1831 continue; 1832 } 1833 } 1834 1835 mutex_enter(&board_cache_lock); 1836 ptr = &board_cache[board]; 1837 1838 /* 1839 * Check if the SC returns data for this board. 1840 */ 1841 if (shbp->s_assigned == -1) { 1842 /* 1843 * If this cache entry used to have data and 1844 * now doesn't we decrement the board_count 1845 * clear the env_cache. The board must have 1846 * been removed. 1847 */ 1848 if (ptr->node_id != -1) { 1849 board_count--; 1850 1851 /* 1852 * clear board_cache entry by 1853 * setting node_id to -1; 1854 */ 1855 ptr->node_id = -1; 1856 DCMN_ERR_CACHE(CE_NOTE, "%s: " 1857 "Clearing cache line %d [%p]", 1858 f, board, (void *)ptr); 1859 } 1860 } else { 1861 /* 1862 * If this cache entry was previously empty 1863 * and we now have data for it we increment 1864 * the board_count. A new board must have 1865 * been added. 1866 */ 1867 if (ptr->node_id == -1) 1868 board_count++; 1869 /* 1870 * update the board_cache entry 1871 */ 1872 DCMN_ERR_CACHE(CE_NOTE, "%s: " 1873 "Writing data for bd=%d into " 1874 " the board_cache at [%p]", 1875 f, board, (void *)ptr); 1876 ptr->node_id = node; 1877 ptr->board_num = board; 1878 ptr->condition = shbp->s_cond; 1879 ptr->assigned = shbp->s_assigned; 1880 ptr->claimed = shbp->s_claimed; 1881 ptr->present = shbp->s_present; 1882 ptr->led.led_status = 1883 shbp->s_ledstatus; 1884 last_board_read_time = gethrtime(); 1885 } 1886 mutex_exit(&board_cache_lock); 1887 } /* board */ 1888 } /* node */ 1889 1890 /* 1891 * Indicate that have managed to store valid data in the <board_cache> 1892 * at least once. 1893 */ 1894 if (board_count > 0) 1895 board_cache_updated = TRUE; 1896 1897 1898 return (rv); 1899 } 1900 1901 1902 static int 1903 sgenv_get_hpu_keys(envresp_key_t *new, int *status) 1904 { 1905 sbbc_msg_t req; /* request */ 1906 sbbc_msg_t resp; /* response */ 1907 1908 int rv; /* return value from call to mbox */ 1909 1910 req.msg_type.type = SG_ENV; 1911 req.msg_type.sub_type = SG_GET_ENV_HPU_KEYS; 1912 req.msg_status = SG_MBOX_STATUS_SUCCESS; 1913 req.msg_len = 0; 1914 req.msg_bytes = 0; 1915 1916 resp.msg_type.type = SG_ENV; 1917 resp.msg_type.sub_type = SG_GET_ENV_HPU_KEYS; 1918 resp.msg_status = SG_MBOX_STATUS_SUCCESS; 1919 resp.msg_len = sizeof (envresp_key_t) * SGENV_MAX_HPU_KEYS; 1920 resp.msg_bytes = 0; 1921 resp.msg_buf = (caddr_t)new; 1922 1923 /* 1924 * We want to avoid the case where an invalid time 1925 * is specified by a user (by patching the 1926 * global variable <sgenv_max_mbox_wait_time>). 1927 * 1928 * Any incorrect values are reset to the default time. 1929 */ 1930 if (sgenv_max_mbox_wait_time <= max(sbbc_mbox_min_timeout, 0)) 1931 sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout; 1932 1933 rv = sbbc_mbox_request_response(&req, &resp, sgenv_max_mbox_wait_time); 1934 1935 *status = resp.msg_status; 1936 1937 return (rv); 1938 } 1939 1940 1941 static int 1942 sgenv_get_env_data(envresp_key_t key, int key_posn, uint16_t flag, int *status) 1943 { 1944 /* 1945 * Only one of these buffers is ever going to be used in a call 1946 * so to save kernel stack space we use a union. 1947 */ 1948 union { 1949 envresp_constants_t con[SGENV_MAX_SENSORS_PER_KEY]; 1950 envresp_volatiles_t vol[SGENV_MAX_SENSORS_PER_KEY]; 1951 envresp_thresholds_t thr[SGENV_MAX_SENSORS_PER_KEY]; 1952 } buf; 1953 1954 sbbc_msg_t req; /* request */ 1955 sbbc_msg_t resp; /* response */ 1956 1957 int i; /* loop variable for mbox msg_buf */ 1958 int rv; /* return value from call to mbox */ 1959 1960 ASSERT(MUTEX_HELD(&env_cache_lock)); 1961 ASSERT(env_cache[key_posn] != NULL); 1962 1963 if (flag == SG_GET_ENV_CONSTANTS) { 1964 resp.msg_len = sizeof (buf.con); 1965 resp.msg_buf = (caddr_t)buf.con; 1966 1967 } else if (flag == SG_GET_ENV_VOLATILES) { 1968 resp.msg_len = sizeof (buf.vol); 1969 resp.msg_buf = (caddr_t)buf.vol; 1970 1971 } else if (flag == SG_GET_ENV_THRESHOLDS) { 1972 resp.msg_len = sizeof (buf.thr); 1973 resp.msg_buf = (caddr_t)buf.thr; 1974 1975 } else { 1976 *status = EINVAL; 1977 return (-1); 1978 } 1979 1980 req.msg_type.type = SG_ENV; 1981 req.msg_type.sub_type = flag; 1982 req.msg_status = SG_MBOX_STATUS_SUCCESS; 1983 req.msg_len = 0; 1984 req.msg_bytes = 0; 1985 req.msg_data[0] = key; 1986 1987 resp.msg_type.type = SG_ENV; 1988 resp.msg_type.sub_type = flag; 1989 resp.msg_status = SG_MBOX_STATUS_SUCCESS; 1990 resp.msg_bytes = 0; 1991 1992 /* 1993 * We want to avoid the case where an invalid time 1994 * is specified by a user (by patching the 1995 * global variable <sgenv_max_mbox_wait_time>). 1996 * 1997 * Any incorrect values are reset to the default time. 1998 */ 1999 if (sgenv_max_mbox_wait_time <= max(sbbc_mbox_min_timeout, 0)) 2000 sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout; 2001 2002 2003 rv = sbbc_mbox_request_response(&req, &resp, sgenv_max_mbox_wait_time); 2004 2005 *status = resp.msg_status; 2006 2007 /* 2008 * We now check that the data returned is valid. 2009 */ 2010 if (rv != 0) { 2011 /* 2012 * The SBBC driver encountered an error. 2013 */ 2014 return (rv); 2015 2016 } else { 2017 /* 2018 * The SC encountered an error. 2019 */ 2020 switch (*status) { 2021 case SG_MBOX_STATUS_SUCCESS: 2022 /* 2023 * No problems encountered - continue and return the 2024 * new data. 2025 */ 2026 break; 2027 2028 case ETIMEDOUT: 2029 /* 2030 * For some reason the mailbox failed to return data 2031 * and instead timed out so we return ETIMEDOUT 2032 */ 2033 return (ETIMEDOUT); 2034 2035 case ENXIO: 2036 /* 2037 * no sensors associated with this key, this may have 2038 * changed since we read the keys. 2039 */ 2040 return (ENXIO); 2041 2042 default: 2043 /* 2044 * The contents of the mbox message contain corrupt 2045 * data. Flag this as an error to be returned. 2046 */ 2047 SGENV_PRINT_MBOX_MSG((&resp), "Env info problem"); 2048 return (EINVAL); 2049 } 2050 } 2051 2052 /* 2053 * Depending on the type of data returned, save the constant/volatile 2054 * data returned in the mailbox message into the <env_cache>. 2055 */ 2056 for (i = 0; i < resp.msg_data[0]; i++) { 2057 2058 if (flag == SG_GET_ENV_CONSTANTS) { 2059 env_cache[key_posn][i].sd_id.tag_id = 2060 buf.con[i].id.tag_id; 2061 env_cache[key_posn][i].sd_lo = 2062 buf.con[i].lo; 2063 env_cache[key_posn][i].sd_hi = 2064 buf.con[i].hi; 2065 2066 } else if (flag == SG_GET_ENV_VOLATILES) { 2067 env_cache[key_posn][i].sd_value = 2068 buf.vol[i].value; 2069 env_cache[key_posn][i].sd_infostamp = 2070 buf.vol[i].info; 2071 2072 sgenv_set_sensor_status(&env_cache[key_posn][i]); 2073 2074 } else if (flag == SG_GET_ENV_THRESHOLDS) { 2075 env_cache[key_posn][i].sd_lo_warn = 2076 buf.thr[i].lo_warn; 2077 env_cache[key_posn][i].sd_hi_warn = 2078 buf.thr[i].hi_warn; 2079 } 2080 } 2081 2082 if (flag == SG_GET_ENV_VOLATILES) 2083 vol_sensor_count[key_posn] = resp.msg_data[0]; 2084 2085 return (rv); 2086 } 2087 2088 2089 /* 2090 * This function handles any errors received from the mailbox framework while 2091 * getting environmental data. 2092 * 2093 * INPUT PARAMETERS 2094 * err - return value from call to mailbox framework. 2095 * status - message status returned by mailbox framework. 2096 * key - key from previous (if any) reading of env data. 2097 * Needed to see if we have old data in the <env_cache>. 2098 * str - String indicating what type of env request failed. 2099 * 2100 * RETURN VALUES 2101 * rv == DDI_FAILURE - there is no point in continuing processing 2102 * the data, we should exit from the kstat 2103 * framework. 2104 * rv != DDI_FAILURE - error has been handled correctly, continue 2105 * processing the data returned from the SC. 2106 */ 2107 static int 2108 sgenv_handle_env_data_error(int err, int status, int key_posn, 2109 envresp_key_t key, char *str) 2110 { 2111 int rv = DDI_SUCCESS; 2112 2113 ASSERT(str != (char *)NULL); 2114 2115 switch (err) { 2116 case ENXIO: 2117 /* 2118 * The SC has changed the env data associated with this key 2119 * since we started getting the data. We cannot tell if the 2120 * data has disappeared due to the removal of the board from 2121 * our Domain or just that the data has been updated. We 2122 * simply return the last known data (if possible) and the 2123 * next time we request the env data, the SC will have 2124 * finished processing this board so we will receive the 2125 * correct key values and we can get the correct data. 2126 */ 2127 DCMN_ERR_CACHE(CE_NOTE, "key @ posn %d has changed from %d" 2128 " while %s", key_posn, key, str); 2129 rv = ENXIO; 2130 break; 2131 2132 default: 2133 sgenv_mbox_error_msg(str, err, status); 2134 rv = DDI_FAILURE; 2135 break; 2136 } 2137 2138 /* 2139 * If there was no data in the <env_cache>, we need to clear the data 2140 * just added as the <env_cache> will only be partially filled. 2141 */ 2142 if (key == 0) 2143 sgenv_clear_env_cache_entry(key_posn); 2144 2145 return (rv); 2146 } 2147 2148 2149 /* 2150 * If the sensor readings for a particular collection of HPUs become invalid, 2151 * then we clear the cache by freeing up the memory. 2152 */ 2153 static void 2154 sgenv_clear_env_cache_entry(int key_posn) 2155 { 2156 ASSERT(MUTEX_HELD(&env_cache_lock)); 2157 2158 if (env_cache[key_posn] != NULL) { 2159 kmem_free(env_cache[key_posn], sizeof (env_sensor_t) * 2160 SGENV_MAX_SENSORS_PER_KEY); 2161 env_cache[key_posn] = NULL; 2162 vol_sensor_count[key_posn] = 0; 2163 } 2164 } 2165 2166 2167 static void 2168 sgenv_mbox_error_msg(char *str, int err, int status) 2169 { 2170 /* 2171 * We update the count of errors we have encountered during calls to 2172 * the mailbox framework (unless we will cause a wraparound) 2173 */ 2174 if (sgenv_mbox_error_count < INT_MAX) 2175 sgenv_mbox_error_count++; 2176 2177 #ifdef DEBUG 2178 if ((sgenv_debug & SGENV_DEBUG_MSG) == 0) 2179 return; 2180 2181 ASSERT(str != NULL); 2182 2183 switch (err) { 2184 case ENOTSUP: 2185 DCMN_ERR(CE_WARN, "!This system configuration does not " 2186 "support SGENV"); 2187 break; 2188 case ETIMEDOUT: 2189 DCMN_ERR(CE_WARN, "!Mailbox timed out while servicing " 2190 "SGENV request for %s", str); 2191 break; 2192 default: 2193 DCMN_ERR(CE_WARN, "!Error occurred reading %s, Errno=%d," 2194 " Status=%d", str, err, status); 2195 break; 2196 } 2197 #endif 2198 } 2199 2200 2201 /* 2202 * INPUT PARAMETERS 2203 * key_posn - The position in the env_cache for which we want to 2204 * allocate space for a HPU's env data. 2205 * 2206 * ERROR VALUES 2207 * DDI_FAILURE - We failed to allocate memory for this cache entry. 2208 * There is no point asking the SC for env data for this 2209 * HPU as we will have nowhere to store it. 2210 */ 2211 static int 2212 sgenv_create_env_cache_entry(int key_posn) 2213 { 2214 int i; /* used to loop thru each sensor to set the status */ 2215 2216 ASSERT(key_posn < SGENV_MAX_HPU_KEYS); 2217 ASSERT(key_posn >= 0); 2218 2219 env_cache[key_posn] = (env_sensor_t *)kmem_zalloc( 2220 sizeof (env_sensor_t) * SGENV_MAX_SENSORS_PER_KEY, KM_NOSLEEP); 2221 if (env_cache[key_posn] == NULL) { 2222 cmn_err(CE_WARN, "Failed to allocate memory for env_cache[%d]", 2223 key_posn); 2224 return (DDI_FAILURE); 2225 } 2226 2227 for (i = 0; i < SGENV_MAX_SENSORS_PER_KEY; i++) 2228 env_cache[key_posn][i].sd_status = SG_SENSOR_STATUS_OK; 2229 2230 return (DDI_SUCCESS); 2231 } 2232 2233 2234 static void 2235 sgenv_destroy_env_cache(void) 2236 { 2237 int i; 2238 2239 ASSERT(MUTEX_HELD(&env_cache_lock) == FALSE); 2240 mutex_enter(&env_cache_lock); 2241 for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) { 2242 if (env_cache[i] != NULL) { 2243 kmem_free(env_cache[i], sizeof (env_sensor_t) * 2244 SGENV_MAX_SENSORS_PER_KEY); 2245 env_cache[i] = NULL; 2246 vol_sensor_count[i] = 0; 2247 } 2248 } 2249 env_cache_updated = FALSE; 2250 2251 mutex_exit(&env_cache_lock); 2252 } 2253 2254 static void 2255 sgenv_update_env_kstat_size(kstat_t *ksp) 2256 { 2257 int i; 2258 2259 ASSERT(MUTEX_HELD(&env_cache_lock)); 2260 2261 /* reinitialize this and recount number of sensors */ 2262 ksp->ks_data_size = 0; 2263 2264 for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) { 2265 if (vol_sensor_count[i] <= 0) 2266 continue; 2267 2268 ASSERT(vol_sensor_count[i] <= SGENV_MAX_SENSORS_PER_KEY); 2269 2270 /* 2271 * increment ksp->ks_data_size by the number of 2272 * sensors in the collection <i>. 2273 */ 2274 ksp->ks_data_size += vol_sensor_count[i] * 2275 sizeof (env_sensor_t); 2276 } 2277 ASSERT(ksp->ks_data_size >= 0); 2278 } 2279 2280 2281 /* 2282 * This function is triggered by the thread that updates the env_cache. 2283 * It checks for any sensors which have exceeded their limits/thresholds 2284 * and generates sysevents for the sensor values that have changed. 2285 */ 2286 /*ARGSUSED*/ 2287 static uint_t 2288 sgenv_check_sensor_thresholds(void) 2289 { 2290 DCMN_ERR_S(f, "sgenv_poll_env()"); 2291 2292 int key; /* loop through keys */ 2293 int i; /* loops through each sensor for each <key> */ 2294 2295 env_sensor_t sensor; 2296 env_sensor_status_t status; 2297 2298 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f); 2299 2300 mutex_enter(&env_cache_lock); 2301 2302 for (key = 0; key < SGENV_MAX_HPU_KEYS; key++) { 2303 2304 if (vol_sensor_count[key] == 0) 2305 continue; 2306 2307 for (i = 0; i < vol_sensor_count[key]; i++) { 2308 sensor = env_cache[key][i]; 2309 status = sensor.sd_status; 2310 2311 if (SG_GET_SENSOR_STATUS(status) == 2312 SG_GET_PREV_SENSOR_STATUS(status)) { 2313 continue; 2314 } 2315 2316 /* 2317 * This sensor has changed in status since the last 2318 * time we polled - we need to inform the sysevent 2319 * framework. 2320 */ 2321 switch (sensor.sd_id.id.sensor_type) { 2322 /* 2323 * we don't care about the pseudo sensors and 2324 * the Fan Status is notified by a separate 2325 * unsolicited event so we simply get the next 2326 * reading 2327 */ 2328 case SG_SENSOR_TYPE_ENVDB: 2329 case SG_SENSOR_TYPE_COOLING: 2330 continue; 2331 2332 /* 2333 * We have handled all the special cases by now. 2334 */ 2335 default: 2336 (void) sgenv_process_threshold_event(sensor); 2337 break; 2338 } 2339 2340 SGENV_PRINT_POLL_INFO(sensor); 2341 } 2342 } 2343 mutex_exit(&env_cache_lock); 2344 2345 return (DDI_SUCCESS); 2346 } 2347 2348 2349 /* 2350 * This function is passed in an array of length SSM_MAX_INSTANCES and 2351 * it searches OBP to for ssm nodes, and for each one if finds, it sets the 2352 * corresponding position in the array to TRUE. 2353 */ 2354 static void 2355 sgenv_set_valid_node_positions(uint_t *node_present) 2356 { 2357 dev_info_t *rdip; /* root dev info ptr */ 2358 dev_info_t *dip; 2359 2360 ASSERT(node_present != NULL); 2361 2362 rdip = ddi_root_node(); 2363 2364 for (dip = ddi_get_child(rdip); dip != NULL; 2365 dip = ddi_get_next_sibling(dip)) { 2366 if (strncmp("ssm", ddi_node_name(dip), 3) == 0) { 2367 int value; 2368 2369 value = ddi_getprop(DDI_DEV_T_ANY, dip, 2370 DDI_PROP_DONTPASS, "nodeid", 0); 2371 2372 /* 2373 * If we get a valid nodeID which has not already 2374 * been found in a previous call to this function, 2375 * then we set all 10 LSB bits to indicate there may 2376 * be a board present in each slot. 2377 * 2378 * It is the job of sgenv_get_board_info_data() to weed 2379 * out the invalid cases when we don't have a 2380 * DS chassis. 2381 * 2382 * NOTE: We make the assumption that a chassis cannot 2383 * be DR'ed out, which is true for a Serengeti. 2384 * By the time WildCat need this functionality Solaris 2385 * will be able to know what kind of a chassis is 2386 * present and there will be no need to try and work 2387 * this out from the msg_status from the mailbox. 2388 */ 2389 if ((value >= 0) && 2390 (value < SSM_MAX_INSTANCES) && 2391 (node_present[value] == SGENV_NO_NODE_EXISTS)) { 2392 node_present[value] = SGENV_NODE_TYPE_DS; 2393 } 2394 2395 } 2396 } 2397 } 2398 2399 2400 static void 2401 sgenv_set_sensor_status(env_sensor_t *sensor) 2402 { 2403 env_sensor_status_t *status; 2404 2405 ASSERT(sensor != NULL); 2406 status = &sensor->sd_status; 2407 2408 /* 2409 * Save the previous status so we can compare them later 2410 */ 2411 SG_SET_PREV_SENSOR_STATUS(*status, *status); 2412 2413 switch (sensor->sd_id.id.sensor_type) { 2414 case SG_SENSOR_TYPE_ENVDB: 2415 /* 2416 * We want the status of this sensor to always be OK 2417 * The concept of limits/thresholds do not exist for it. 2418 */ 2419 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_OK); 2420 break; 2421 2422 case SG_SENSOR_TYPE_COOLING: 2423 /* 2424 * Fans have no concept of limits/thresholds, they have a state 2425 * which we store in the <sd_status> field so that we can see 2426 * when this state is changed. 2427 */ 2428 if (sensor->sd_value == SGENV_FAN_SPEED_HIGH) { 2429 SG_SET_SENSOR_STATUS(*status, 2430 SG_SENSOR_STATUS_FAN_HIGH); 2431 2432 } else if (sensor->sd_value == SGENV_FAN_SPEED_LOW) { 2433 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_FAN_LOW); 2434 2435 } else if (sensor->sd_value == SGENV_FAN_SPEED_OFF) { 2436 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_FAN_OFF); 2437 2438 } else { 2439 SG_SET_SENSOR_STATUS(*status, 2440 SG_SENSOR_STATUS_FAN_FAIL); 2441 } 2442 2443 /* 2444 * If this is the first time this fan status has been read, 2445 * then we need to initialize the previous reading to be the 2446 * same as the current reading so that an event is not 2447 * triggered. 2448 * 2449 * [ When the env_cache is being created, the status of the 2450 * sensors is set to SG_SENSOR_STATUS_OK, which is not a 2451 * valid Fan status ]. 2452 */ 2453 if (SG_GET_PREV_SENSOR_STATUS(*status) == SG_SENSOR_STATUS_OK) { 2454 SG_SET_PREV_SENSOR_STATUS(*status, *status); 2455 } 2456 2457 break; 2458 2459 default: 2460 if (sensor->sd_value > sensor->sd_hi) { 2461 SG_SET_SENSOR_STATUS(*status, 2462 SG_SENSOR_STATUS_HI_DANGER); 2463 2464 } else if (sensor->sd_value > sensor->sd_hi_warn) { 2465 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_HI_WARN); 2466 2467 } else if (sensor->sd_value < sensor->sd_lo) { 2468 SG_SET_SENSOR_STATUS(*status, 2469 SG_SENSOR_STATUS_LO_DANGER); 2470 2471 } else if (sensor->sd_value < sensor->sd_lo_warn) { 2472 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_LO_WARN); 2473 2474 } else { 2475 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_OK); 2476 } 2477 break; 2478 } 2479 } 2480 2481 2482 2483 2484 /* 2485 * This function, when given an integer arg describing a HPU type, 2486 * returns the descriptive string associated with this HPU type. 2487 */ 2488 static const char * 2489 sgenv_get_hpu_id_str(uint_t hpu_type) 2490 { 2491 const hpu_value_t *hpu_list = hpus; 2492 2493 while (hpu_list->name != (char *)NULL) { 2494 if (hpu_list->value == hpu_type) 2495 return (hpu_list->IDstr); 2496 else 2497 hpu_list++; 2498 } 2499 return ((char *)NULL); 2500 } 2501 2502 2503 /* 2504 * This function, when given an integer arg describing a sensor part, 2505 * returns the descriptive string associated with this sensor part. 2506 */ 2507 static const char * 2508 sgenv_get_part_str(uint_t sensor_part) 2509 { 2510 const part_value_t *part_list = parts; 2511 2512 while (part_list->name != (char *)NULL) { 2513 if (part_list->value == sensor_part) 2514 return (part_list->name); 2515 else 2516 part_list++; 2517 } 2518 return ((char *)NULL); 2519 } 2520 2521 2522 /* 2523 * This function, when given an integer arg describing a sensor type, 2524 * returns the descriptive string associated with this sensor type. 2525 */ 2526 static const char * 2527 sgenv_get_type_str(uint_t sensor_type) 2528 { 2529 const type_value_t *type_list = types; 2530 2531 while (type_list->name != (char *)NULL) { 2532 if (type_list->value == sensor_type) 2533 return (type_list->name); 2534 else 2535 type_list++; 2536 } 2537 return ((char *)NULL); 2538 } 2539 2540 2541 /* 2542 * This function takes a sensor TagID and generates a string describing 2543 * where in the system the sensor is. 2544 */ 2545 static void 2546 sgenv_tagid_to_string(sensor_id_t id, char *str) 2547 { 2548 const char *hpu_str; 2549 const char *part_str; 2550 const char *type_str; 2551 2552 ASSERT(str != NULL); 2553 2554 hpu_str = sgenv_get_hpu_id_str(id.id.hpu_type); 2555 part_str = sgenv_get_part_str(id.id.sensor_part); 2556 type_str = sgenv_get_type_str(id.id.sensor_type); 2557 2558 (void) sprintf(str, 2559 "Sensor: Node=%d, Board=%s%d, Device=%s%d, Type=%s%d: reading has ", 2560 id.id.node_id, 2561 ((hpu_str != NULL) ? hpu_str : ""), 2562 id.id.hpu_slot, 2563 ((part_str != NULL) ? part_str : ""), 2564 id.id.sensor_partnum, 2565 ((type_str != NULL) ? type_str : ""), 2566 id.id.sensor_typenum); 2567 2568 } 2569 2570 2571 /* 2572 * This interrupt handler watches for unsolicited mailbox messages from the SC 2573 * telling it that the Keyswitch Position had changed. It then informs the 2574 * Sysevent Framework of this change. 2575 */ 2576 static uint_t 2577 sgenv_keyswitch_handler(char *arg) 2578 { 2579 DCMN_ERR_S(f, "sgenv_keyswitch_handler()"); 2580 2581 sysevent_t *ev = NULL; 2582 sysevent_id_t eid; 2583 sysevent_value_t se_val; 2584 sysevent_attr_list_t *ev_attr_list = NULL; 2585 sg_event_key_position_t *payload = NULL; 2586 sbbc_msg_t *msg = NULL; 2587 int err; 2588 2589 DCMN_ERR_EVENT(CE_NOTE, "%s called", f); 2590 2591 if (arg == NULL) { 2592 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f); 2593 return (DDI_INTR_CLAIMED); 2594 } 2595 2596 msg = (sbbc_msg_t *)arg; 2597 if (msg->msg_buf == NULL) { 2598 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f); 2599 return (DDI_INTR_CLAIMED); 2600 } 2601 2602 payload = (sg_event_key_position_t *)msg->msg_buf; 2603 if (payload == NULL) { 2604 DCMN_ERR_EVENT(CE_NOTE, "%s: payload == NULL", f); 2605 return (DDI_INTR_CLAIMED); 2606 } 2607 2608 DCMN_ERR_EVENT(CE_NOTE, "Key posn = %d", (int)*payload); 2609 2610 2611 /* 2612 * Allocate memory for sysevent buffer. 2613 */ 2614 ev = sysevent_alloc(EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE, 2615 EP_SGENV, SE_NOSLEEP); 2616 if (ev == NULL) { 2617 cmn_err(CE_WARN, "%s: Failed to alloc mem for %s/%s event", 2618 f, EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE); 2619 return (DDI_INTR_CLAIMED); 2620 } 2621 2622 2623 /* 2624 * Set the DOMAIN_WHAT_CHANGED attribute. 2625 */ 2626 se_val.value_type = SE_DATA_TYPE_STRING; 2627 se_val.value.sv_string = DOMAIN_KEYSWITCH; 2628 err = sysevent_add_attr(&ev_attr_list, DOMAIN_WHAT_CHANGED, 2629 &se_val, SE_NOSLEEP); 2630 if (err != 0) { 2631 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2632 DOMAIN_WHAT_CHANGED, EC_DOMAIN, 2633 ESC_DOMAIN_STATE_CHANGE); 2634 sysevent_free(ev); 2635 return (DDI_INTR_CLAIMED); 2636 } 2637 2638 2639 /* 2640 * Log this event with sysevent framework. 2641 */ 2642 if (sysevent_attach_attributes(ev, ev_attr_list) != 0) { 2643 cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event", 2644 EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE); 2645 sysevent_free_attr(ev_attr_list); 2646 sysevent_free(ev); 2647 return (DDI_INTR_CLAIMED); 2648 } 2649 err = log_sysevent(ev, SE_NOSLEEP, &eid); 2650 if (err != 0) { 2651 cmn_err(CE_WARN, "Failed to log %s/%s event", 2652 EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE); 2653 sysevent_free(ev); 2654 return (DDI_INTR_CLAIMED); 2655 } 2656 2657 /* clean up */ 2658 sysevent_free(ev); 2659 2660 return (DDI_INTR_CLAIMED); 2661 } 2662 2663 2664 /* 2665 * This interrupt handler watches for unsolicited mailbox messages from the SC 2666 * telling it that an environmental sensor has exceeded a threshold/limit level 2667 * or has returned to normal having previously exceeded a threshold/limit level. 2668 * It then informs the Sysevent Framework of this change and updates the 2669 * env_cache. 2670 */ 2671 static uint_t 2672 sgenv_env_data_handler(char *arg) 2673 { 2674 DCMN_ERR_S(f, "sgenv_env_data_handler()"); 2675 2676 sg_event_env_changed_t *payload = NULL; 2677 sbbc_msg_t *msg = NULL; 2678 2679 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f); 2680 2681 if (arg == NULL) { 2682 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f); 2683 return (DDI_INTR_CLAIMED); 2684 } 2685 2686 msg = (sbbc_msg_t *)arg; 2687 2688 if (msg->msg_buf == NULL) { 2689 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f); 2690 return (DDI_INTR_CLAIMED); 2691 } 2692 2693 payload = (sg_event_env_changed_t *)msg->msg_buf; 2694 2695 /* 2696 * We check the first field of the msg_buf to see if the event_type 2697 * is SC_EVENT_ENV, if it is then we handle the event. 2698 */ 2699 if (payload->event_type != SC_EVENT_ENV) { 2700 return (DDI_INTR_CLAIMED); 2701 } 2702 2703 /* 2704 * We now need to signal to the env background thread to ask the SC 2705 * for env readings and discover which sensor caused the SC to send 2706 * the ENV event before sending a sysevent to userland. 2707 */ 2708 sgenv_indicate_cache_update_needed(ENV_CACHE); 2709 2710 return (DDI_INTR_CLAIMED); 2711 } 2712 2713 2714 /* 2715 * This interrupt handler watches for unsolicited mailbox messages from the SC 2716 * telling it that the status of a fan has changed. We register a sysevent 2717 * and trigger a softint to update the env cache. 2718 */ 2719 static uint_t 2720 sgenv_fan_status_handler(char *arg) 2721 { 2722 DCMN_ERR_S(f, "sgenv_fan_status_handler()"); 2723 2724 sysevent_t *ev = NULL; 2725 sysevent_id_t eid; 2726 sysevent_value_t se_val; 2727 sysevent_attr_list_t *ev_attr_list = NULL; 2728 sg_event_fan_status_t *payload = NULL; 2729 sbbc_msg_t *msg = NULL; 2730 char fan_str[MAXNAMELEN]; 2731 int err; 2732 2733 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f); 2734 2735 if (arg == NULL) { 2736 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f); 2737 return (DDI_INTR_CLAIMED); 2738 } 2739 2740 msg = (sbbc_msg_t *)arg; 2741 2742 /* 2743 * We check the first field of the msg_buf to see if the event_type 2744 * is SC_EVENT_FAN 2745 */ 2746 if (msg->msg_buf == NULL) { 2747 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f); 2748 return (DDI_INTR_CLAIMED); 2749 } 2750 2751 payload = (sg_event_fan_status_t *)msg->msg_buf; 2752 2753 /* 2754 * If another type of ENV Event triggered this handler then we simply 2755 * return now. 2756 */ 2757 if (payload->event_type != SC_EVENT_FAN) { 2758 return (DDI_INTR_CLAIMED); 2759 } 2760 2761 /* 2762 * Allocate memory for sysevent buffer. 2763 */ 2764 ev = sysevent_alloc(EC_ENV, ESC_ENV_FAN, EP_SGENV, SE_NOSLEEP); 2765 if (ev == NULL) { 2766 cmn_err(CE_WARN, "%s: Failed to alloc mem for %s/%s event", 2767 f, EC_ENV, ESC_ENV_FAN); 2768 return (DDI_INTR_CLAIMED); 2769 } 2770 2771 2772 /* 2773 * Set the following attributes for this event: 2774 * 2775 * ENV_FRU_ID 2776 * ENV_FRU_RESOURCE_ID 2777 * ENV_FRU_DEVICE 2778 * ENV_FRU_STATE 2779 * ENV_MSG 2780 * 2781 */ 2782 se_val.value_type = SE_DATA_TYPE_STRING; 2783 se_val.value.sv_string = ENV_RESERVED_ATTR; 2784 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_ID, &se_val, SE_NOSLEEP); 2785 if (err != 0) { 2786 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2787 ENV_FRU_ID, EC_ENV, ESC_ENV_FAN); 2788 sysevent_free(ev); 2789 return (DDI_INTR_CLAIMED); 2790 } 2791 2792 se_val.value_type = SE_DATA_TYPE_STRING; 2793 se_val.value.sv_string = ENV_RESERVED_ATTR; 2794 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_RESOURCE_ID, 2795 &se_val, SE_NOSLEEP); 2796 if (err != 0) { 2797 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2798 ENV_FRU_RESOURCE_ID, EC_ENV, ESC_ENV_FAN); 2799 sysevent_free_attr(ev_attr_list); 2800 sysevent_free(ev); 2801 return (DDI_INTR_CLAIMED); 2802 } 2803 2804 se_val.value_type = SE_DATA_TYPE_STRING; 2805 se_val.value.sv_string = ENV_RESERVED_ATTR; 2806 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_DEVICE, 2807 &se_val, SE_NOSLEEP); 2808 if (err != 0) { 2809 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2810 ENV_FRU_DEVICE, EC_ENV, ESC_ENV_FAN); 2811 sysevent_free_attr(ev_attr_list); 2812 sysevent_free(ev); 2813 return (DDI_INTR_CLAIMED); 2814 } 2815 2816 /* 2817 * Checks the fan to see if it has failed. 2818 */ 2819 se_val.value_type = SE_DATA_TYPE_INT32; 2820 switch (payload->fan_speed) { 2821 case SGENV_FAN_SPEED_OFF: 2822 case SGENV_FAN_SPEED_LOW: 2823 case SGENV_FAN_SPEED_HIGH: 2824 se_val.value.sv_int32 = ENV_OK; 2825 break; 2826 2827 case SGENV_FAN_SPEED_UNKNOWN: 2828 default: 2829 se_val.value.sv_int32 = ENV_FAILED; 2830 break; 2831 } 2832 2833 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_STATE, 2834 &se_val, SE_NOSLEEP); 2835 if (err != 0) { 2836 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2837 ENV_FRU_STATE, EC_ENV, ESC_ENV_FAN); 2838 sysevent_free_attr(ev_attr_list); 2839 sysevent_free(ev); 2840 return (DDI_INTR_CLAIMED); 2841 } 2842 2843 2844 /* 2845 * Create the message to be sent to sysevent. 2846 */ 2847 (void) sprintf(fan_str, 2848 "The status of the fan in Node%d/Slot%d is now ", 2849 payload->node_id, payload->slot_number); 2850 switch (payload->fan_speed) { 2851 case SGENV_FAN_SPEED_OFF: 2852 (void) strcat(fan_str, SGENV_FAN_SPEED_OFF_STR); 2853 break; 2854 2855 case SGENV_FAN_SPEED_LOW: 2856 (void) strcat(fan_str, SGENV_FAN_SPEED_LOW_STR); 2857 break; 2858 2859 case SGENV_FAN_SPEED_HIGH: 2860 (void) strcat(fan_str, SGENV_FAN_SPEED_HIGH_STR); 2861 break; 2862 2863 case SGENV_FAN_SPEED_UNKNOWN: 2864 default: 2865 (void) strcat(fan_str, SGENV_FAN_SPEED_UNKNOWN_STR); 2866 break; 2867 } 2868 2869 DCMN_ERR_EVENT(CE_NOTE, "Fan: %s", fan_str); 2870 2871 se_val.value_type = SE_DATA_TYPE_STRING; 2872 se_val.value.sv_string = fan_str; 2873 err = sysevent_add_attr(&ev_attr_list, ENV_MSG, &se_val, SE_NOSLEEP); 2874 if (err != 0) { 2875 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2876 ENV_MSG, EC_ENV, ESC_ENV_FAN); 2877 sysevent_free_attr(ev_attr_list); 2878 sysevent_free(ev); 2879 return (DDI_INTR_CLAIMED); 2880 } 2881 2882 2883 /* 2884 * Log this event with sysevent framework. 2885 */ 2886 if (sysevent_attach_attributes(ev, ev_attr_list) != 0) { 2887 cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event", 2888 EC_ENV, ESC_ENV_FAN); 2889 sysevent_free_attr(ev_attr_list); 2890 sysevent_free(ev); 2891 return (DDI_INTR_CLAIMED); 2892 } 2893 err = log_sysevent(ev, SE_NOSLEEP, &eid); 2894 if (err != 0) { 2895 cmn_err(CE_WARN, "Failed to log %s/%s event", 2896 EC_ENV, ESC_ENV_FAN); 2897 sysevent_free(ev); 2898 return (DDI_INTR_CLAIMED); 2899 } 2900 sysevent_free(ev); 2901 2902 /* 2903 * We now need to signal to the env background thread to ask the SC 2904 * for env readings and discover which sensor caused the SC to send 2905 * the ENV event before sending a sysevent to userland. 2906 */ 2907 sgenv_indicate_cache_update_needed(ENV_CACHE); 2908 2909 return (DDI_INTR_CLAIMED); 2910 } 2911 2912 2913 /* 2914 * This function informs the Sysevent Framework that a temperature, voltage 2915 * or current reading for a sensor has exceeded its threshold/limit value or 2916 * that the reading has returned to a safe value having exceeded its 2917 * threshold/limit value previously. 2918 */ 2919 static int 2920 sgenv_process_threshold_event(env_sensor_t sensor) 2921 { 2922 DCMN_ERR_S(f, "sgenv_process_threshold_event()"); 2923 2924 sysevent_t *ev = NULL; 2925 sysevent_id_t eid; 2926 sysevent_value_t se_val; 2927 sysevent_attr_list_t *ev_attr_list = NULL; 2928 int err; 2929 2930 char sensor_str[MAX_TAG_ID_STR_LEN]; /* holds the sensor TagID */ 2931 2932 /* 2933 * This function handles the case when a temperature reading passes 2934 * a threshold/limit level and also the case when there are power 2935 * fluctuations (voltage/current readings pass a threshold/limit level) 2936 * so we need to work out which case it is. 2937 * 2938 * if <temp_event_type> is TRUE, then need to handle an event 2939 * of type ESC_ENV_TEMP. 2940 */ 2941 int temp_event_type; 2942 2943 switch (sensor.sd_id.id.sensor_type) { 2944 case SG_SENSOR_TYPE_TEMPERATURE: 2945 temp_event_type = TRUE; 2946 ev = sysevent_alloc(EC_ENV, ESC_ENV_TEMP, EP_SGENV, SE_NOSLEEP); 2947 if (ev == NULL) { 2948 cmn_err(CE_WARN, "Failed to allocate sysevent buffer " 2949 "for %s/%s event", EC_ENV, ESC_ENV_TEMP); 2950 return (DDI_FAILURE); 2951 } 2952 break; 2953 2954 default: 2955 temp_event_type = FALSE; 2956 ev = sysevent_alloc(EC_ENV, ESC_ENV_POWER, 2957 EP_SGENV, SE_NOSLEEP); 2958 if (ev == NULL) { 2959 cmn_err(CE_WARN, "Failed to allocate sysevent buffer " 2960 "for %s/%s event", EC_ENV, ESC_ENV_POWER); 2961 return (DDI_FAILURE); 2962 } 2963 break; 2964 } 2965 2966 2967 /* 2968 * Set the following attributes for this event: 2969 * 2970 * ENV_FRU_ID 2971 * ENV_FRU_RESOURCE_ID 2972 * ENV_FRU_DEVICE 2973 * ENV_FRU_STATE 2974 * ENV_MSG 2975 * 2976 */ 2977 se_val.value_type = SE_DATA_TYPE_STRING; 2978 se_val.value.sv_string = ENV_RESERVED_ATTR; 2979 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_ID, &se_val, SE_NOSLEEP); 2980 if (err != 0) { 2981 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2982 ENV_FRU_ID, EC_ENV, 2983 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 2984 sysevent_free(ev); 2985 return (DDI_FAILURE); 2986 } 2987 2988 se_val.value_type = SE_DATA_TYPE_STRING; 2989 se_val.value.sv_string = ENV_RESERVED_ATTR; 2990 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_RESOURCE_ID, 2991 &se_val, SE_NOSLEEP); 2992 if (err != 0) { 2993 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 2994 ENV_FRU_RESOURCE_ID, EC_ENV, 2995 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 2996 sysevent_free_attr(ev_attr_list); 2997 sysevent_free(ev); 2998 return (DDI_FAILURE); 2999 } 3000 3001 se_val.value_type = SE_DATA_TYPE_STRING; 3002 se_val.value.sv_string = ENV_RESERVED_ATTR; 3003 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_DEVICE, 3004 &se_val, SE_NOSLEEP); 3005 if (err != 0) { 3006 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 3007 ENV_FRU_DEVICE, EC_ENV, 3008 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 3009 sysevent_free_attr(ev_attr_list); 3010 sysevent_free(ev); 3011 return (DDI_FAILURE); 3012 } 3013 3014 3015 /* 3016 * We need to find out the status of the reading. 3017 */ 3018 se_val.value_type = SE_DATA_TYPE_INT32; 3019 switch (SG_GET_SENSOR_STATUS(sensor.sd_status)) { 3020 case SG_SENSOR_STATUS_OK: 3021 se_val.value.sv_int32 = ENV_OK; 3022 break; 3023 3024 case SG_SENSOR_STATUS_LO_WARN: 3025 case SG_SENSOR_STATUS_HI_WARN: 3026 se_val.value.sv_int32 = ENV_WARNING; 3027 break; 3028 3029 case SG_SENSOR_STATUS_LO_DANGER: 3030 case SG_SENSOR_STATUS_HI_DANGER: 3031 default: 3032 se_val.value.sv_int32 = ENV_FAILED; 3033 break; 3034 } 3035 3036 /* 3037 * Add ENV_FRU_STATE attribute. 3038 */ 3039 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_STATE, 3040 &se_val, SE_NOSLEEP); 3041 if (err != 0) { 3042 cmn_err(CE_WARN, "Failed to add attr[%s] for %s/%s event " 3043 "(Err=%d)", ENV_FRU_STATE, EC_ENV, 3044 (temp_event_type ? ESC_ENV_TEMP: ESC_ENV_POWER), 3045 err); 3046 sysevent_free_attr(ev_attr_list); 3047 sysevent_free(ev); 3048 return (DDI_FAILURE); 3049 } 3050 3051 3052 /* 3053 * Save the sensor TagID as a string so that a meaningful message 3054 * can be passed to as part of the ENV_MSG attribute. 3055 */ 3056 sgenv_tagid_to_string(sensor.sd_id, sensor_str); 3057 3058 /* 3059 * We need to add a string stating what type of event occurred. 3060 */ 3061 switch (SG_GET_SENSOR_STATUS(sensor.sd_status)) { 3062 case SG_SENSOR_STATUS_OK: 3063 (void) strcat(sensor_str, SGENV_EVENT_MSG_OK); 3064 break; 3065 3066 case SG_SENSOR_STATUS_LO_WARN: 3067 (void) strcat(sensor_str, SGENV_EVENT_MSG_LO_WARN); 3068 break; 3069 3070 case SG_SENSOR_STATUS_HI_WARN: 3071 (void) strcat(sensor_str, SGENV_EVENT_MSG_HI_WARN); 3072 break; 3073 3074 case SG_SENSOR_STATUS_LO_DANGER: 3075 (void) strcat(sensor_str, SGENV_EVENT_MSG_LO_DANGER); 3076 break; 3077 3078 case SG_SENSOR_STATUS_HI_DANGER: 3079 (void) strcat(sensor_str, SGENV_EVENT_MSG_HI_DANGER); 3080 break; 3081 3082 default: 3083 DCMN_ERR_EVENT(CE_NOTE, "%s: Unknown sensor status", f); 3084 (void) strcat(sensor_str, SGENV_EVENT_MSG_UNKNOWN); 3085 break; 3086 } 3087 3088 DCMN_ERR_EVENT(CE_NOTE, "Temp/Power: %s", sensor_str); 3089 3090 /* 3091 * Add ENV_MSG attribute. 3092 */ 3093 se_val.value_type = SE_DATA_TYPE_STRING; 3094 se_val.value.sv_string = sensor_str; 3095 err = sysevent_add_attr(&ev_attr_list, ENV_MSG, &se_val, SE_NOSLEEP); 3096 if (err != 0) { 3097 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event", 3098 ENV_MSG, EC_ENV, 3099 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 3100 sysevent_free_attr(ev_attr_list); 3101 sysevent_free(ev); 3102 return (DDI_FAILURE); 3103 } 3104 3105 3106 /* 3107 * Log this event with sysevent framework. 3108 */ 3109 if (sysevent_attach_attributes(ev, ev_attr_list) != 0) { 3110 cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event", 3111 EC_ENV, 3112 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 3113 sysevent_free_attr(ev_attr_list); 3114 sysevent_free(ev); 3115 return (DDI_FAILURE); 3116 } 3117 err = log_sysevent(ev, SE_NOSLEEP, &eid); 3118 if (err != 0) { 3119 cmn_err(CE_WARN, "Failed to log %s/%s event", EC_ENV, 3120 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER)); 3121 sysevent_free(ev); 3122 return (DDI_FAILURE); 3123 } 3124 sysevent_free(ev); 3125 3126 return (DDI_SUCCESS); 3127 } 3128 3129 3130 /* 3131 * This function gets called when sgenv is notified of a DR event. 3132 * We need to update the board and env caches to ensure that they 3133 * now contain the latest system information.. 3134 */ 3135 static uint_t 3136 sgenv_dr_event_handler(char *arg) 3137 { 3138 DCMN_ERR_S(f, "sgenv_dr_event_handler()"); 3139 3140 sg_system_fru_descriptor_t *payload = NULL; 3141 sbbc_msg_t *msg = NULL; 3142 3143 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f); 3144 DCMN_ERR_EVENT(CE_NOTE, "%s: Start: %lld", f, gethrtime()); 3145 3146 3147 if (arg == NULL) { 3148 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f); 3149 return (DDI_INTR_CLAIMED); 3150 } 3151 3152 msg = (sbbc_msg_t *)arg; 3153 3154 if (msg->msg_buf == NULL) { 3155 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f); 3156 return (DDI_INTR_CLAIMED); 3157 } 3158 3159 payload = (sg_system_fru_descriptor_t *)msg->msg_buf; 3160 3161 /* 3162 * We check the event_details field of the msg_buf to see if 3163 * we need to invalidate the caches 3164 */ 3165 switch (payload->event_details) { 3166 case SG_EVT_BOARD_ABSENT: 3167 case SG_EVT_BOARD_PRESENT: 3168 case SG_EVT_UNASSIGN: 3169 case SG_EVT_ASSIGN: 3170 case SG_EVT_UNAVAILABLE: 3171 case SG_EVT_AVAILABLE: 3172 case SG_EVT_POWER_OFF: 3173 case SG_EVT_POWER_ON: 3174 case SG_EVT_PASSED_TEST: 3175 case SG_EVT_FAILED_TEST: 3176 /* 3177 * We now need to signal to the background threads to poll the 3178 * SC for env readings and board info which may have changed 3179 * as a result of the DR changes. This will cause the 3180 * env_cache and the board_cache to be updated. 3181 */ 3182 DCMN_ERR_EVENT(CE_NOTE, "%s: about to signal to background " 3183 "threads due to event %d.", f, payload->event_details); 3184 3185 sgenv_indicate_cache_update_needed(ENV_CACHE); 3186 sgenv_indicate_cache_update_needed(BOARD_CACHE); 3187 3188 break; 3189 3190 default: 3191 DCMN_ERR_EVENT(CE_NOTE, "%s: Unknown DR event type.", f); 3192 break; 3193 } 3194 3195 DCMN_ERR_EVENT(CE_NOTE, "%s: Finish: %lld", f, gethrtime()); 3196 3197 return (DDI_INTR_CLAIMED); 3198 } 3199 3200 3201 /* 3202 * This function is called by the interrupt handlers watching for ENV/DR events 3203 * from the SC. It indicates to the thread responsible for the cache specified 3204 * that it needs to update its data. 3205 */ 3206 static void 3207 sgenv_indicate_cache_update_needed(int cache_type) 3208 { 3209 DCMN_ERR_S(f, "sgenv_indicate_cache_update_needed()"); 3210 3211 /* 3212 * If the cache is already being updated, we set a flag to 3213 * inform the thread that it needs to reread the data when 3214 * it is finished as we cannot be sure if the data was read 3215 * before or after the time this handler was triggered. 3216 * 3217 * Otherwise the thread is waiting for us and we signal 3218 * to it to start reading the data. 3219 */ 3220 switch (cache_type) { 3221 case ENV_CACHE: 3222 mutex_enter(&env_flag_lock); 3223 if (env_cache_updating) { 3224 DCMN_ERR_THREAD(CE_NOTE, "%s: Thread already " 3225 "updating env cache", f); 3226 env_cache_update_needed = B_TRUE; 3227 3228 } else { 3229 DCMN_ERR_THREAD(CE_NOTE, "%s: Sending signal " 3230 "to env thread", f); 3231 cv_signal(&env_flag_cond); 3232 } 3233 mutex_exit(&env_flag_lock); 3234 break; 3235 3236 case BOARD_CACHE: 3237 mutex_enter(&board_flag_lock); 3238 if (board_cache_updating) { 3239 DCMN_ERR_THREAD(CE_NOTE, "%s: Thread already " 3240 "updating board cache", f); 3241 board_cache_update_needed = B_TRUE; 3242 3243 } else { 3244 DCMN_ERR_THREAD(CE_NOTE, "%s: Sending signal " 3245 "to board thread", f); 3246 cv_signal(&board_flag_cond); 3247 } 3248 mutex_exit(&board_flag_lock); 3249 break; 3250 3251 default: 3252 DCMN_ERR(CE_NOTE, "%s: Unknown cache type:0x%x", f, cache_type); 3253 break; 3254 } 3255 } 3256