1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This file contains the Starcat Solaris Mailbox Client module. This module 31 * handles mailbox messages from the SC to the OS (as opposed to messages sent 32 * to specific drivers) and vice versa. Two task queues are created upon 33 * startup; one handles reading and processing of all incoming messages, while 34 * the other handles transmission of all outgoing messages. 35 */ 36 37 #include <sys/types.h> 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/sysmacros.h> 41 #include <sys/sunddi.h> 42 #include <sys/errno.h> 43 #include <sys/cmn_err.h> 44 #include <sys/condvar.h> 45 #include <sys/mutex.h> 46 #include <sys/disp.h> 47 #include <sys/thread.h> 48 #include <sys/debug.h> 49 #include <sys/cpu_sgnblk_defs.h> 50 #include <sys/machsystm.h> 51 #include <sys/modctl.h> 52 #include <sys/iosramio.h> 53 #include <sys/mboxsc.h> 54 #include <sys/promif.h> 55 #include <sys/uadmin.h> 56 #include <sys/cred.h> 57 #include <sys/taskq.h> 58 #include <sys/utsname.h> 59 #include <sys/plat_ecc_unum.h> 60 #include <sys/fm/protocol.h> 61 #include <sys/fm/util.h> 62 #include <sys/starcat.h> 63 #include <sys/plat_ecc_dimm.h> 64 #include <sys/plat_datapath.h> 65 66 /* mailbox keys */ 67 #define SCDM_KEY 0x5343444d /* 'S', 'C', 'D', 'M' */ 68 #define DMSC_KEY 0x444d5343 /* 'D', 'M', 'S', 'C' */ 69 70 /* mailbox commands */ 71 #define SCDM_CMD ('S' << 8) /* generic SSP */ 72 #define SCDM_CMD_SUCCESS (SCDM_CMD | 0x1) 73 #define SCDM_GOTO_OBP (SCDM_CMD | 0x2) 74 #define SCDM_GOTO_PANIC (SCDM_CMD | 0x3) 75 #define SCDM_ENVIRON (SCDM_CMD | 0x4) /* environmental intr */ 76 #define SCDM_SHUTDOWN (SCDM_CMD | 0x5) /* setkeyswitch STANDBY */ 77 #define SCDM_GET_NODENAME (SCDM_CMD | 0x6) /* get domain nodename */ 78 #define SCDM_LOG_ECC_ERROR (SCDM_CMD | 0x7) /* ECC error logging */ 79 #define SCDM_LOG_ECC_INDICTMENT (SCDM_CMD | 0x8) /* ECC indictment logging */ 80 #define SCDM_LOG_ECC (SCDM_CMD | 0x9) /* ECC info */ 81 #define SCDM_LOG_ECC_CAP_INIT (SCDM_CMD | 0xa) /* ECC Capability Init */ 82 #define SCDM_LOG_ECC_CAP_RESP (SCDM_CMD | 0xb) /* ECC Capability Response */ 83 #define SCDM_DIMM_SERIAL_ID (SCDM_CMD | 0xc) /* DIMM ser# req/resp */ 84 #define SCDM_DP_ERROR_MSG (SCDM_CMD | 0xd) /* datapath error */ 85 #define SCDM_DP_FAULT_MSG (SCDM_CMD | 0xe) /* datapath fault */ 86 87 /* general constants */ 88 #define GETMSG_TIMEOUT_MS 500 89 #define PUTMSG_TIMEOUT_MS 6000 90 #define MIN_INPUTQ_TASKS 2 91 #define MAX_INPUTQ_TASKS 4 92 #define MIN_OUTPUTQ_TASKS 2 93 #define MAX_OUTPUTQ_TASKS 512 94 #ifndef TRUE 95 #define TRUE 1 96 #endif 97 #ifndef FALSE 98 #define FALSE 0 99 #endif 100 101 clock_t ecc_message_timeout_ms = PUTMSG_TIMEOUT_MS; 102 103 /* 104 * When a message needs to be sent to the SC, an scosmb_msgdata_t should be 105 * populated with the data to be used for the message, and a call to 106 * scosmb_process_output should be dispatched on the scosmb_output_taskq, with 107 * the address of the scosmb_msgdata_t structure as its arg. The "length" and 108 * "data" fields can be used if the message needs to include data beyond the 109 * header fields (type, cmd, and transid) and that information must be recorded 110 * when the message is placed on the taskq. If appropriate for the message type 111 * (e.g. nodename info that should always be the most recent available), the 112 * "data" field can be set to NULL and the additional data can be assembled 113 * immediately prior to sending the message in scosmb_process_output(). 114 * 115 * If log_error is set, any errors in delivering the message cause a 116 * cmn_err() message to be issued. If it is zero, the error is expressed 117 * only through return values. 118 */ 119 typedef struct { 120 uint32_t type; 121 uint32_t cmd; 122 uint64_t transid; 123 uint32_t length; 124 int log_error; 125 void *data; 126 } scosmb_msgdata_t; 127 128 /* 129 * Datapath error and fault messages arrive unsolicited. The message data 130 * is contained in a plat_datapath_info_t structure. 131 */ 132 typedef struct { 133 uint8_t type; /* CDS, DX, EX, CP */ 134 uint8_t pad; /* for alignment */ 135 uint16_t cpuid; /* Safari ID of base CPU */ 136 uint32_t t_value; /* SERD timeout threshold (seconds) */ 137 } plat_datapath_info_t; 138 139 /* externally visible routines */ 140 void scosmb_update_nodename(uint64_t transid); 141 142 /* local routines */ 143 static void scosmb_inbox_handler(); 144 static void scosmb_process_input(void *unused); 145 static int scosmb_process_output(scosmb_msgdata_t *arg); 146 147 /* local variables */ 148 static uint8_t scosmb_mboxsc_failed = FALSE; 149 static uint8_t scosmb_mboxsc_timedout = FALSE; 150 static uint8_t scosmb_nodename_event_pending = FALSE; 151 static char scosmb_hdr[] = "SCOSMB:"; 152 static kmutex_t scosmb_mutex; 153 static taskq_t *scosmb_input_taskq = NULL; 154 static taskq_t *scosmb_output_taskq = NULL; 155 156 static char *dperrtype[] = { 157 DP_ERROR_CDS, 158 DP_ERROR_DX, 159 DP_ERROR_EX, 160 DP_ERROR_CP 161 }; 162 163 /* 164 * Structures from modctl.h used for loadable module support. 165 * SCOSMB is a "miscellaneous" module. 166 */ 167 extern struct mod_ops mod_miscops; 168 169 static struct modlmisc modlmisc = { 170 &mod_miscops, 171 "Sun Fire 15000 OS Mbox Client v1.10", 172 }; 173 174 static struct modlinkage modlinkage = { 175 MODREV_1, 176 (void *)&modlmisc, 177 NULL 178 }; 179 180 181 /* 182 * _init 183 * 184 * Loadable module support routine. Initializes mutex and condition variables 185 * and starts thread. 186 */ 187 int 188 _init(void) 189 { 190 int error; 191 192 /* 193 * Initialize the mailboxes 194 */ 195 if ((error = mboxsc_init(SCDM_KEY, MBOXSC_MBOX_IN, 196 scosmb_inbox_handler)) != 0) { 197 cmn_err(CE_WARN, "%s mboxsc_init failed (0x%x)\n", scosmb_hdr, 198 error); 199 return (error); 200 } 201 202 if ((error = mboxsc_init(DMSC_KEY, MBOXSC_MBOX_OUT, NULL)) != 0) { 203 cmn_err(CE_WARN, "%s mboxsc_init failed (0x%x)\n", scosmb_hdr, 204 error); 205 mboxsc_fini(SCDM_KEY); 206 return (error); 207 } 208 209 /* 210 * Initialize the global lock 211 */ 212 mutex_init(&scosmb_mutex, NULL, MUTEX_DEFAULT, NULL); 213 214 /* 215 * Create the task queues used for processing input and output messages 216 */ 217 scosmb_input_taskq = taskq_create("scosmb_input_taskq", 1, 218 minclsyspri, MIN_INPUTQ_TASKS, MAX_INPUTQ_TASKS, TASKQ_PREPOPULATE); 219 scosmb_output_taskq = taskq_create("scosmb_output_taskq", 1, 220 minclsyspri, MIN_OUTPUTQ_TASKS, MAX_OUTPUTQ_TASKS, 221 TASKQ_PREPOPULATE); 222 223 /* 224 * Attempt to install the module. If unsuccessful, uninitialize 225 * everything. 226 */ 227 error = mod_install(&modlinkage); 228 if (error != 0) { 229 taskq_destroy(scosmb_output_taskq); 230 taskq_destroy(scosmb_input_taskq); 231 mutex_destroy(&scosmb_mutex); 232 mboxsc_fini(DMSC_KEY); 233 mboxsc_fini(SCDM_KEY); 234 } 235 236 return (error); 237 } 238 239 /* 240 * _fini 241 * 242 * Loadable module support routine. Since this routine shouldn't be unloaded (it 243 * provides a critical service, and its symbols may be referenced externally), 244 * EBUSY is returned to prevent unloading. 245 */ 246 int 247 _fini(void) 248 { 249 return (EBUSY); 250 } 251 252 /* 253 * _info 254 * 255 * Loadable module support routine. 256 */ 257 int 258 _info(struct modinfo *modinfop) 259 { 260 int error = 0; 261 262 error = mod_info(&modlinkage, modinfop); 263 return (error); 264 } 265 266 /* 267 * scosmb_inbox_handler() - mbox API event handler. 268 * 269 * This routine adds an entry to the scosmb_input_taskq that will cause the 270 * scosmb_process_input() routine to be called to service the SCDM mailbox. The 271 * possibility that taskq_dispatch may fail when given KM_NOSLEEP is safely 272 * ignored because there can only be one message waiting in the mailbox at any 273 * given time, so the current message will end up being handled by one of the 274 * previously queued jobs (and a previous message presumably timed out before we 275 * got around to reading it). 276 */ 277 static void 278 scosmb_inbox_handler() 279 { 280 (void) taskq_dispatch(scosmb_input_taskq, scosmb_process_input, NULL, 281 KM_NOSLEEP); 282 } 283 284 /* 285 * dp_get_cores() 286 * 287 * Checks cpu implementation for the input cpuid and returns 288 * the number of cores. 289 * If implementation cannot be determined, returns 1 290 */ 291 static int 292 dp_get_cores(uint16_t cpuid) 293 { 294 int exp, ii, impl = 0, nc, slot; 295 296 exp = STARCAT_CPUID_TO_EXPANDER(cpuid); 297 slot = STARCAT_CPUID_TO_BOARDSLOT(cpuid); 298 if (slot == 1) 299 nc = STARCAT_SLOT1_CPU_MAX; 300 else 301 nc = plat_max_cpu_units_per_board(); 302 303 /* find first with valid implementation */ 304 for (ii = 0; ii < nc; ii++) 305 if (cpu[MAKE_CPUID(exp, slot, ii)]) { 306 impl = cpunodes[MAKE_CPUID(exp, slot, ii)]. 307 implementation; 308 break; 309 } 310 311 if (IS_JAGUAR(impl) || IS_PANTHER(impl)) 312 return (2); 313 else 314 return (1); 315 316 } 317 318 /* 319 * dp_payload_add_cpus() 320 * 321 * From datapath mailbox message, determines the number of and safari IDs 322 * for affected cpus, then adds this info to the datapath ereport. 323 * 324 * Input maxcat (if set) is a count of maxcat cpus actually present - it is 325 * a count of cpuids, which takes into account multi-core architecture. 326 */ 327 static int 328 dp_payload_add_cpus(plat_datapath_info_t *dpmsg, nvlist_t *erp, int maxcat) 329 { 330 int jj = 0, numcpus = 0, nummaxcpus = 0; 331 int count, exp, ii, num, ncores, ret, slot, port; 332 uint16_t *dparray, cpuid; 333 uint64_t *snarray; 334 335 /* check for multiple core architectures */ 336 ncores = dp_get_cores(dpmsg->cpuid); 337 338 /* 339 * Determine the number of cpu cores impacted 340 */ 341 switch (dpmsg->type) { 342 case DP_CDS_TYPE: 343 if (maxcat) 344 nummaxcpus = ncores; 345 else 346 numcpus = ncores; 347 break; 348 349 case DP_DX_TYPE: 350 if (maxcat) 351 nummaxcpus = 2 * ncores; 352 else 353 numcpus = 2 * ncores; 354 break; 355 356 case DP_EX_TYPE: 357 if (maxcat) 358 nummaxcpus = STARCAT_SLOT1_CPU_MAX; 359 else 360 numcpus = plat_max_cpu_units_per_board(); 361 break; 362 363 case DP_CP_TYPE: 364 /* 365 * SC-DE supplies the base cpuid affected, if 366 * maxcat id was given, there's no slot 0 board 367 * present. 368 */ 369 370 if (!maxcat) { 371 /* Slot 0 id was given - set numcpus */ 372 numcpus = plat_max_cpu_units_per_board(); 373 } 374 375 /* there may/may not be maxcats. set a count anyway */ 376 nummaxcpus = STARCAT_SLOT1_CPU_MAX; 377 378 break; 379 380 default: 381 ASSERT(0); 382 return (-1); 383 } 384 385 /* Allocate space for cores */ 386 num = numcpus + nummaxcpus; 387 dparray = kmem_zalloc(num * sizeof (uint16_t *), KM_SLEEP); 388 389 /* 390 * populate dparray with impacted cores (only those present) 391 */ 392 exp = STARCAT_CPUID_TO_EXPANDER(dpmsg->cpuid); 393 slot = STARCAT_CPUID_TO_BOARDSLOT(dpmsg->cpuid); 394 port = STARCAT_CPUID_TO_LPORT(dpmsg->cpuid); 395 396 mutex_enter(&cpu_lock); 397 398 switch (dpmsg->type) { 399 case DP_CDS_TYPE: 400 /* 401 * For a CDS error, it's the reporting cpuid 402 * and it's other core (if present) 403 */ 404 cpuid = dpmsg->cpuid & 0xFFFB; /* core 0 */ 405 if (cpu[cpuid]) 406 dparray[jj++] = cpuid; 407 408 cpuid = dpmsg->cpuid | 0x4; /* core 1 */ 409 if (cpu[cpuid]) 410 dparray[jj++] = cpuid; 411 break; 412 413 case DP_DX_TYPE: 414 /* 415 * For a DX error, it's the reporting cpuid (all 416 * cores), and the other CPU sharing the same 417 * DX<-->DCDS interface (all cores) 418 */ 419 420 /* reporting cpuid */ 421 cpuid = dpmsg->cpuid & 0xFFFB; /* core 0 */ 422 423 if (cpu[cpuid]) 424 dparray[jj++] = cpuid; 425 426 cpuid = dpmsg->cpuid | 0x4; /* core 1 */ 427 if (cpu[cpuid]) 428 dparray[jj++] = cpuid; 429 430 /* find partner cpuid */ 431 if (port == 0 || port == 2) 432 cpuid = dpmsg->cpuid | 0x1; 433 else 434 cpuid = dpmsg->cpuid & 0xFFFE; 435 436 /* add partner cpuid */ 437 cpuid &= 0xFFFB; /* core 0 */ 438 if (cpu[cpuid]) 439 dparray[jj++] = cpuid; 440 441 cpuid |= 0x4; /* core 1 */ 442 if (cpu[cpuid]) 443 dparray[jj++] = cpuid; 444 break; 445 446 case DP_EX_TYPE: 447 /* 448 * For an EX error, it is all cpuids (all cores) 449 * on the reporting board 450 */ 451 452 if (slot == 1) /* maxcat */ 453 count = nummaxcpus; 454 else 455 count = numcpus; 456 457 for (ii = 0; ii < count; ii++) { 458 cpuid = MAKE_CPUID(exp, slot, ii); 459 if (cpu[cpuid]) 460 dparray[jj++] = cpuid; 461 } 462 break; 463 464 case DP_CP_TYPE: 465 /* 466 * For a CP error, it is all cpuids (all cores) 467 * on both boards (SB & IO) in the boardset 468 */ 469 470 /* Do slot 0 */ 471 for (ii = 0; ii < numcpus; ii++) { 472 cpuid = MAKE_CPUID(exp, 0, ii); 473 if (cpu[cpuid]) 474 dparray[jj++] = cpuid; 475 } 476 477 /* Do slot 1 */ 478 for (ii = 0; ii < nummaxcpus; ii++) { 479 cpuid = MAKE_CPUID(exp, 1, ii); 480 if (cpu[cpuid]) 481 dparray[jj++] = cpuid; 482 } 483 break; 484 } 485 486 mutex_exit(&cpu_lock); 487 488 /* 489 * The datapath message could not be associated with any 490 * configured CPU. 491 */ 492 if (!jj) { 493 kmem_free(dparray, num * sizeof (uint16_t *)); 494 ret = nvlist_add_uint32(erp, DP_LIST_SIZE, jj); 495 ASSERT(ret == 0); 496 return (-1); 497 } 498 499 snarray = kmem_zalloc(jj * sizeof (uint64_t *), KM_SLEEP); 500 for (ii = 0; ii < jj; ii++) 501 snarray[ii] = cpunodes[dparray[ii]].device_id; 502 503 ret = nvlist_add_uint32(erp, DP_LIST_SIZE, jj); 504 ret |= nvlist_add_uint16_array(erp, DP_LIST, dparray, jj); 505 ret |= nvlist_add_uint64_array(erp, SN_LIST, snarray, jj); 506 ASSERT(ret == 0); 507 508 kmem_free(dparray, num * sizeof (uint16_t *)); 509 kmem_free(snarray, jj * sizeof (uint64_t *)); 510 511 return (0); 512 } 513 514 /* 515 * dp_trans_event() - datapath message handler. 516 * 517 * Process datapath error and fault messages received from the SC. Checks 518 * for, and disregards, messages associated with I/O boards. Otherwise, 519 * extracts message info to produce a datapath ereport. 520 */ 521 static void 522 dp_trans_event(plat_datapath_info_t *dpmsg, int msgtype) 523 { 524 nvlist_t *erp, *detector, *hcelem; 525 char buf[FM_MAX_CLASS]; 526 int exp, slot, i, maxcat = 0; 527 528 /* check for I/O board message */ 529 exp = STARCAT_CPUID_TO_EXPANDER(dpmsg->cpuid); 530 slot = STARCAT_CPUID_TO_BOARDSLOT(dpmsg->cpuid); 531 532 if (slot) { 533 mutex_enter(&cpu_lock); 534 for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) { 535 if (cpu[MAKE_CPUID(exp, slot, i)]) { 536 /* maxcat cpu present */ 537 maxcat++; 538 } 539 } 540 mutex_exit(&cpu_lock); 541 542 /* 543 * Ignore I/O board msg 544 */ 545 if (maxcat == 0) 546 return; 547 } 548 549 /* allocate space for ereport */ 550 erp = fm_nvlist_create(NULL); 551 552 /* 553 * 554 * Member Name Data Type Comments 555 * ----------- --------- ----------- 556 * version uint8 0 557 * class string "asic" 558 * ENA uint64 ENA Format 1 559 * detector fmri aggregated ID data for SC-DE 560 * 561 * Datapath ereport subclasses and data payloads: 562 * There will be two types of ereports (error and fault) which will be 563 * identified by the "type" member. 564 * 565 * ereport.asic.starcat.cds.cds-dp 566 * ereport.asic.starcat.dx.dx-dp 567 * ereport.asic.starcat.sdi.sdi-dp 568 * ereport.asic.starcat.cp.cp-dp 569 * 570 * Member Name Data Type Comments 571 * ----------- --------- ----------- 572 * erptype uint16 derived from message type: error or 573 * fault 574 * t-value uint32 SC's datapath SERD timeout threshold 575 * dp-list-sz uint8 number of dp-list array elements 576 * dp-list array of uint16 Safari IDs of affected cpus 577 * sn-list array of uint64 Serial numbers of affected cpus 578 * 579 */ 580 581 /* compose common ereport elements */ 582 detector = fm_nvlist_create(NULL); 583 584 /* 585 * Create legacy FMRI for the detector 586 */ 587 switch (dpmsg->type) { 588 case DP_CDS_TYPE: 589 case DP_DX_TYPE: 590 if (slot == 1) 591 (void) snprintf(buf, FM_MAX_CLASS, "IO%d", exp); 592 else 593 (void) snprintf(buf, FM_MAX_CLASS, "SB%d", exp); 594 break; 595 596 case DP_EX_TYPE: 597 (void) snprintf(buf, FM_MAX_CLASS, "EX%d", exp); 598 break; 599 600 case DP_CP_TYPE: 601 (void) snprintf(buf, FM_MAX_CLASS, "CP"); 602 break; 603 604 default: 605 (void) snprintf(buf, FM_MAX_CLASS, "UNKNOWN"); 606 break; 607 } 608 609 hcelem = fm_nvlist_create(NULL); 610 611 (void) nvlist_add_string(hcelem, FM_FMRI_HC_NAME, FM_FMRI_LEGACY_HC); 612 (void) nvlist_add_string(hcelem, FM_FMRI_HC_ID, buf); 613 614 (void) nvlist_add_uint8(detector, FM_VERSION, FM_HC_SCHEME_VERSION); 615 (void) nvlist_add_string(detector, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 616 (void) nvlist_add_string(detector, FM_FMRI_HC_ROOT, ""); 617 (void) nvlist_add_uint32(detector, FM_FMRI_HC_LIST_SZ, 1); 618 (void) nvlist_add_nvlist_array(detector, FM_FMRI_HC_LIST, &hcelem, 1); 619 620 /* build ereport class name */ 621 (void) snprintf(buf, FM_MAX_CLASS, "asic.starcat.%s.%s-%s", 622 dperrtype[dpmsg->type], dperrtype[dpmsg->type], 623 FM_ERROR_DATAPATH); 624 625 fm_ereport_set(erp, FM_EREPORT_VERSION, buf, 626 fm_ena_generate(0, FM_ENA_FMT1), detector, NULL); 627 628 /* add payload elements */ 629 if (msgtype == SCDM_DP_ERROR_MSG) { 630 fm_payload_set(erp, 631 DP_EREPORT_TYPE, DATA_TYPE_UINT16, DP_ERROR, NULL); 632 } else { 633 fm_payload_set(erp, 634 DP_EREPORT_TYPE, DATA_TYPE_UINT16, DP_FAULT, NULL); 635 } 636 637 fm_payload_set(erp, DP_TVALUE, DATA_TYPE_UINT32, dpmsg->t_value, NULL); 638 639 if (dp_payload_add_cpus(dpmsg, erp, maxcat) == 0) { 640 /* post ereport */ 641 fm_ereport_post(erp, EVCH_SLEEP); 642 } 643 644 /* free ereport memory */ 645 fm_nvlist_destroy(erp, FM_NVA_FREE); 646 fm_nvlist_destroy(detector, FM_NVA_FREE); 647 648 } 649 650 /* 651 * scosmb_process_input() - incoming message processing routine 652 * 653 * this routine attempts to read a message from the SCDM mailbox and, if 654 * successful, processes the command. if an unrecoverable error is encountered, 655 * the scosmb_task thread will be terminated. 656 */ 657 /* ARGSUSED0 */ 658 static void 659 scosmb_process_input(void *unused) 660 { 661 int error; 662 scosmb_msgdata_t msg; 663 proc_t *initpp; 664 plat_capability_data_t *cap; /* capability msg contents ptr */ 665 int cap_size; 666 int cap_ver_len; 667 scosmb_msgdata_t *cap_msgdatap; /* capability msg response */ 668 int max_size; 669 670 /* 671 * Attempt to read a message from the SCDM mailbox. 672 * 673 * Setup a local buffer to read incoming messages from the SC. 674 */ 675 cap_ver_len = strlen(utsname.release) + strlen(utsname.version) + 2; 676 cap_size = sizeof (plat_capability_data_t) + cap_ver_len; 677 max_size = MAX(cap_size, sizeof (plat_dimm_sid_board_data_t)); 678 679 msg.type = 0; 680 msg.cmd = 0; 681 msg.transid = 0; 682 msg.length = max_size; 683 msg.log_error = 0; 684 msg.data = kmem_zalloc(max_size, KM_SLEEP); 685 686 error = mboxsc_getmsg(SCDM_KEY, &msg.type, &msg.cmd, &msg.transid, 687 &msg.length, msg.data, GETMSG_TIMEOUT_MS); 688 689 /* 690 * If EAGAIN or ETIMEDOUT was received, give up. The SC can just try 691 * again if it was important. If any other non-zero error was 692 * encountered, the mailbox service is broken, and there's nothing more 693 * we can do. 694 */ 695 mutex_enter(&scosmb_mutex); 696 if ((error == EAGAIN) || (error == ETIMEDOUT)) { 697 mutex_exit(&scosmb_mutex); 698 return; 699 } else if (error != 0) { 700 /* 701 * The mailbox service appears to be badly broken. If it was 702 * working previously, generate a warning and set a flag to 703 * avoid repeating the warning on subsequent failures. 704 */ 705 if (!scosmb_mboxsc_failed) { 706 scosmb_mboxsc_failed = TRUE; 707 cmn_err(CE_WARN, "%s mboxsc error (0x%x)\n", scosmb_hdr, 708 error); 709 } 710 mutex_exit(&scosmb_mutex); 711 return; 712 } else { 713 /* 714 * If the mailbox module failed previously, it appears to have 715 * recovered, so we'll want to generate a warning if it fails 716 * again. 717 */ 718 scosmb_mboxsc_failed = FALSE; 719 } 720 mutex_exit(&scosmb_mutex); 721 722 /* 723 * A message was successfully received, so go ahead and process it. 724 */ 725 switch (msg.cmd) { 726 727 case SCDM_GOTO_OBP: /* jump to OBP */ 728 debug_enter("SC requested jump to OBP"); 729 break; 730 731 case SCDM_GOTO_PANIC: /* Panic the domain */ 732 cmn_err(CE_PANIC, "%s SC requested PANIC\n", scosmb_hdr); 733 break; 734 735 case SCDM_SHUTDOWN: /* graceful shutdown */ 736 cmn_err(CE_WARN, "%s SC requested a shutdown ", scosmb_hdr); 737 (void) kadmin(A_SHUTDOWN, AD_HALT, NULL, kcred); 738 /* 739 * In the event kadmin does not bring down the 740 * domain, environmental shutdown is forced 741 */ 742 /*FALLTHROUGH*/ 743 case SCDM_ENVIRON: /* environmental shutdown */ 744 /* 745 * Send SIGPWR to init(1) it will run rc0, 746 * which will uadmin to power down. 747 */ 748 mutex_enter(&pidlock); 749 initpp = prfind(P_INITPID); 750 mutex_exit(&pidlock); 751 752 753 /* 754 * If we're still booting and init(1) isn't set up yet, 755 * simply halt. 756 */ 757 if (initpp == NULL) { 758 extern void halt(char *); 759 cmn_err(CE_WARN, "%s Environmental Interrupt", 760 scosmb_hdr); 761 power_down((char *)NULL); 762 halt("Power off the System!\n"); 763 } 764 765 /* 766 * else, graceful shutdown with inittab and all 767 * getting involved 768 */ 769 psignal(initpp, SIGPWR); 770 break; 771 772 case SCDM_GET_NODENAME: 773 scosmb_update_nodename(msg.transid); 774 break; 775 776 case SCDM_LOG_ECC_CAP_RESP: 777 /* 778 * The SC has responded to our initiator capability message 779 * issued during the boot flow via scosmb_update_nodename(). 780 * 781 * Parse the incoming data, and appropriately set SC 782 * capabilities... 783 */ 784 cap = (plat_capability_data_t *)msg.data; 785 plat_ecc_capability_sc_set(cap->capd_capability); 786 break; 787 788 case SCDM_LOG_ECC_CAP_INIT: 789 /* 790 * The SC has initiated a capability messaging exchange with 791 * the OS. 792 * 793 * We start out just as we do for an SC response capability 794 * message, a parse of incoming data to appropriately set SC 795 * described capabilities... 796 */ 797 cap = (plat_capability_data_t *)msg.data; 798 plat_ecc_capability_sc_set(cap->capd_capability); 799 /* 800 * The next step is setting up our Response to the SC. 801 * 802 * Allocate memory for message data, initialize appropriately, 803 * and place a new job on the scosmb_output_taskq for 804 * SCDM_LOG_ECC_CAP_RESP, our OS capability messaging response 805 * to the SC initiated sequence detected here. 806 */ 807 cap_msgdatap = kmem_zalloc(sizeof (scosmb_msgdata_t), KM_SLEEP); 808 cap_msgdatap->type = MBOXSC_MSG_EVENT; 809 cap_msgdatap->cmd = SCDM_LOG_ECC_CAP_RESP; 810 cap_msgdatap->transid = 0; 811 (void) taskq_dispatch(scosmb_output_taskq, 812 (task_func_t *)scosmb_process_output, cap_msgdatap, 813 KM_SLEEP); 814 break; 815 816 case SCDM_DP_ERROR_MSG: 817 case SCDM_DP_FAULT_MSG: 818 dp_trans_event(msg.data, msg.cmd); 819 break; 820 821 case SCDM_DIMM_SERIAL_ID: 822 (void) plat_store_mem_sids(msg.data); 823 break; 824 825 default: 826 cmn_err(CE_WARN, "%s invalid command (0x%x)\n", scosmb_hdr, 827 msg.cmd); 828 break; 829 } 830 831 /* 832 * Free up buffer for incoming messasge data that we allocated earlier 833 */ 834 kmem_free(msg.data, max_size); 835 } 836 837 /* 838 * scosmb_process_output() - outgoing message processing routine 839 * 840 * This routine handles jobs that are queued on the scosmb_output_taskq, or 841 * sent directly from scosmb_log_ecc_error. Each job corresponds to a single 842 * mailbox message that needs to be sent to the SC via the DMSC mailbox. Some 843 * processing of the message may be performed before it is sent to the SC, 844 * depending on the value of the command field. 845 */ 846 static int 847 scosmb_process_output(scosmb_msgdata_t *msgdatap) 848 { 849 int error; 850 int length; 851 char nodename[_SYS_NMLN]; 852 void *free_data; 853 int free_data_len; 854 int cap_size; 855 int cap_ver_len; 856 plat_capability_data_t *cap = NULL; 857 858 /* 859 * This shouldn't ever happen, but it can't hurt to check anyway. 860 */ 861 if (msgdatap == NULL) { 862 return (EINVAL); 863 } 864 865 /* 866 * If data was passed in, we'll need to free it before returning. 867 */ 868 free_data = msgdatap->data; 869 free_data_len = msgdatap->length; 870 871 /* 872 * Some commands may need additional processing prior to transmission. 873 */ 874 switch (msgdatap->cmd) { 875 /* 876 * Since the SC is only interested in the most recent value of 877 * utsname.nodename, we wait until now to collect that data. We 878 * also use a global flag to prevent multiple event-type 879 * nodename messages from being queued at the same time for the 880 * same reason. 881 */ 882 case SCDM_GET_NODENAME: 883 mutex_enter(&scosmb_mutex); 884 length = strlen(utsname.nodename); 885 ASSERT(length < _SYS_NMLN); 886 if (length == 0) { 887 msgdatap->length = 0; 888 msgdatap->data = NULL; 889 } else { 890 bcopy(utsname.nodename, nodename, length); 891 nodename[length++] = '\0'; 892 msgdatap->data = nodename; 893 msgdatap->length = length; 894 } 895 if (msgdatap->transid == 0) { 896 scosmb_nodename_event_pending = FALSE; 897 } 898 mutex_exit(&scosmb_mutex); 899 break; 900 901 /* 902 * SCDM_LOG_ECC_CAP_INIT 903 * Initiator Capability message from OS to SC 904 * 905 * We construct and send an initiator capability message 906 * every time we go through scosmb_update_nodename(), which 907 * works out to getting an "initiator" capability message 908 * sent from the OS to the SC during the OS boot flow. 909 * 910 * The SC also issues a request to scosmb_update_nodename() 911 * during an SC reboot. Which results in an additional 912 * capability message exchange during SC reboot scenarios. 913 * 914 * SCDM_LOG_ECC_CAP_RESP 915 * Response Capability message from SC to OS 916 * 917 * In certain scenarios, the SC could initiate a capability 918 * messaging exchange with the OS. Processing starts in 919 * scosmb_process_input(), where we detect an incoming 920 * initiator capability message from the SC. We finish 921 * processing here, by sending a response capability message 922 * back to the SC that reflects OS capabilities. 923 */ 924 case SCDM_LOG_ECC_CAP_INIT: 925 /*FALLTHROUGH*/ 926 case SCDM_LOG_ECC_CAP_RESP: 927 mutex_enter(&scosmb_mutex); 928 929 cap_ver_len = strlen(utsname.release) + 930 strlen(utsname.version) + 2; 931 932 cap_size = sizeof (plat_capability_data_t) + 933 cap_ver_len; 934 935 cap = kmem_zalloc(cap_size, KM_SLEEP); 936 937 cap->capd_major_version = PLAT_ECC_CAP_VERSION_MAJOR; 938 cap->capd_minor_version = PLAT_ECC_CAP_VERSION_MINOR; 939 cap->capd_msg_type = PLAT_ECC_CAPABILITY_MESSAGE; 940 cap->capd_msg_length = cap_size; 941 942 cap->capd_capability = 943 PLAT_ECC_CAPABILITY_DOMAIN_DEFAULT; 944 945 /* 946 * Build the capability solaris_version string: 947 * utsname.release + " " + utsname.version 948 */ 949 (void) snprintf(cap->capd_solaris_version, 950 cap_ver_len, "%s %s", utsname.release, 951 utsname.version); 952 953 /* 954 * The capability message is constructed, now plug it 955 * into the starcat msgdatap: 956 */ 957 msgdatap->data = (plat_capability_data_t *)cap; 958 msgdatap->length = cap_size; 959 960 /* 961 * Finished with initiator/response capability 962 * message set up. 963 * 964 * Note that after sending an "initiator" capability 965 * message, we can expect a subsequent "response" 966 * capability message from the SC, which we will 967 * pick up and minimally handle later, 968 * in scosmb_process_input(). 969 * 970 * If we're sending a "response" capability message 971 * to the SC, then we're done once the message is sent. 972 */ 973 974 if (msgdatap->transid == 0) { 975 scosmb_nodename_event_pending = FALSE; 976 } 977 mutex_exit(&scosmb_mutex); 978 break; 979 980 default: 981 break; 982 } 983 984 /* 985 * Attempt to send the message. 986 */ 987 error = mboxsc_putmsg(DMSC_KEY, msgdatap->type, msgdatap->cmd, 988 &msgdatap->transid, msgdatap->length, msgdatap->data, 989 ecc_message_timeout_ms); 990 991 /* 992 * Free any allocated memory that was passed in. 993 */ 994 if (free_data != NULL) { 995 kmem_free(free_data, free_data_len); 996 } 997 998 if (cap != NULL) { 999 kmem_free(cap, cap_size); 1000 } 1001 1002 kmem_free(msgdatap, sizeof (scosmb_msgdata_t)); 1003 1004 /* 1005 * If EAGAIN or ETIMEDOUT was received, give up. The sender can try 1006 * again if it was important. If any other non-zero error was 1007 * encountered, the mailbox service is broken, and there's nothing more 1008 * we can do. 1009 */ 1010 mutex_enter(&scosmb_mutex); 1011 if ((error == EAGAIN) || (error == ETIMEDOUT)) { 1012 if (msgdatap->log_error && !scosmb_mboxsc_timedout) { 1013 /* 1014 * Indictment mailbox messages use the return value to 1015 * indicate a problem in the mailbox. For Error 1016 * mailbox messages, we'll have to use a syslog message. 1017 */ 1018 scosmb_mboxsc_timedout = TRUE; 1019 cmn_err(CE_NOTE, "!Solaris failed to send a message " 1020 "(0x%x/0x%x) to the System Controller. Error: %d", 1021 msgdatap->type, msgdatap->cmd, error); 1022 } 1023 } else if (error != 0) { 1024 /* 1025 * The mailbox service appears to be badly broken. If it was 1026 * working previously, generate a warning and set a flag to 1027 * avoid repeating the warning on subsequent failures. 1028 */ 1029 if (msgdatap->log_error && !scosmb_mboxsc_failed) { 1030 scosmb_mboxsc_failed = TRUE; 1031 cmn_err(CE_NOTE, "!An internal error (%d) occurred " 1032 "while processing this message (0x%x/0x%x)", 1033 error, msgdatap->type, msgdatap->cmd); 1034 } 1035 } else { 1036 /* 1037 * If the mailbox module failed previously, it appears to have 1038 * recovered, so we'll want to generate a warning if it fails 1039 * again. 1040 */ 1041 scosmb_mboxsc_failed = scosmb_mboxsc_timedout = FALSE; 1042 } 1043 mutex_exit(&scosmb_mutex); 1044 return (error); 1045 } 1046 1047 /* 1048 * scosmb_update_nodename() - nodename update routine 1049 * 1050 * this routine, which may be invoked from outside of the scosmb module, will 1051 * cause the current nodename to be sent to the SC. The mailbox message sent to 1052 * the SC will use the indicated transaction ID, and will either be a reply 1053 * message if the ID is non-zero or an event message if it is 0. 1054 * 1055 * Capability messaging enhancements: 1056 * Every time we move through this code flow, we put an "initiator 1057 * capability message" on the message output taskq. This action will 1058 * get a capability message sent to the SC from the OS during boot 1059 * scenarios. A capability message exchange will also happen for 1060 * SC reboot scenarios, as the SC will initiate a nodename update 1061 * as a matter of course while coming back up. 1062 * 1063 * We'll also get an extraneous capability message sent 1064 * to the SC from time to time, but that won't hurt anything. 1065 */ 1066 void 1067 scosmb_update_nodename(uint64_t transid) 1068 { 1069 scosmb_msgdata_t *msgdatap, *cap_msgdatap; 1070 1071 /* 1072 * If we're generating an unsolicited nodename update (presumably having 1073 * been called from platmod:plat_nodename_set()), there's no need to add 1074 * a new job to the queue if there is already one on it that will be 1075 * sending the latest nodename data. 1076 */ 1077 mutex_enter(&scosmb_mutex); 1078 if (transid == 0) { 1079 if (scosmb_nodename_event_pending) { 1080 mutex_exit(&scosmb_mutex); 1081 return; 1082 } else { 1083 scosmb_nodename_event_pending = TRUE; 1084 } 1085 } 1086 mutex_exit(&scosmb_mutex); 1087 1088 /* 1089 * Allocate memory for the message data, initialize it, and place a new 1090 * job on the scosmb_output_taskq for SCDM_GET_NODENAME. 1091 */ 1092 msgdatap = (scosmb_msgdata_t *)kmem_zalloc(sizeof (scosmb_msgdata_t), 1093 KM_SLEEP); 1094 1095 msgdatap->type = (transid == 0) ? MBOXSC_MSG_EVENT : MBOXSC_MSG_REPLY; 1096 msgdatap->cmd = SCDM_GET_NODENAME; 1097 msgdatap->transid = transid; 1098 msgdatap->log_error = 1; 1099 1100 (void) taskq_dispatch(scosmb_output_taskq, 1101 (task_func_t *)scosmb_process_output, msgdatap, KM_SLEEP); 1102 1103 /* 1104 * Next, allocate memory, initialize, and place a new job on the 1105 * scosmb_output_taskq for SCDM_LOG_ECC_CAP_INIT. That's a 1106 * capability message, where we're the initiator. 1107 */ 1108 cap_msgdatap = kmem_zalloc(sizeof (scosmb_msgdata_t), KM_SLEEP); 1109 1110 cap_msgdatap->type = (transid == 0) ? 1111 MBOXSC_MSG_EVENT : MBOXSC_MSG_REPLY; 1112 cap_msgdatap->cmd = SCDM_LOG_ECC_CAP_INIT; 1113 cap_msgdatap->transid = transid; 1114 cap_msgdatap->log_error = 1; 1115 1116 (void) taskq_dispatch(scosmb_output_taskq, 1117 (task_func_t *)scosmb_process_output, cap_msgdatap, KM_SLEEP); 1118 } 1119 1120 /* 1121 * scosmb_log_ecc_error() - Record ECC error information to SC 1122 * For ECC error messages, send the messages through a taskq mechanism 1123 * to prevent impaired system performance during ECC floods. Indictment 1124 * messages have already passed through a taskq, so directly call the 1125 * output function. 1126 */ 1127 int 1128 scosmb_log_ecc_error(plat_ecc_message_type_t msg_type, void *datap) 1129 { 1130 scosmb_msgdata_t *msg_header_ptr; 1131 uint32_t msg_cmd, msg_length; 1132 int sleep_flag, log_error; 1133 int do_queue; /* Set to 1 if taskq needed */ 1134 1135 /* 1136 * Set header type and length for message 1137 */ 1138 switch (msg_type) { 1139 case PLAT_ECC_ERROR_MESSAGE: 1140 /* 1141 * We do not want to sleep in an error logging thread. So, 1142 * we set the NOSLEEP flag and go through a taskq before we 1143 * send the message. 1144 */ 1145 msg_cmd = SCDM_LOG_ECC_ERROR; 1146 msg_length = sizeof (plat_ecc_error_data_t); 1147 sleep_flag = KM_NOSLEEP; 1148 log_error = 1; 1149 do_queue = 1; 1150 break; 1151 case PLAT_ECC_ERROR2_MESSAGE: 1152 msg_cmd = SCDM_LOG_ECC; 1153 msg_length = sizeof (plat_ecc_error2_data_t); 1154 sleep_flag = KM_NOSLEEP; 1155 log_error = 1; 1156 do_queue = 1; 1157 break; 1158 case PLAT_ECC_INDICTMENT_MESSAGE: 1159 /* 1160 * For indictment messages, we're allowed to sleep, and we 1161 * can directly call the output function, since we've already 1162 * gone through a taskq 1163 */ 1164 msg_cmd = SCDM_LOG_ECC_INDICTMENT; 1165 msg_length = sizeof (plat_ecc_indictment_data_t); 1166 sleep_flag = KM_SLEEP; 1167 log_error = 0; 1168 do_queue = 0; 1169 break; 1170 case PLAT_ECC_INDICTMENT2_MESSAGE: 1171 /* 1172 * For indictment2 messages, we're allowed to sleep, and we 1173 * can directly call the output function, since we've already 1174 * gone through a taskq 1175 */ 1176 msg_cmd = SCDM_LOG_ECC; 1177 msg_length = sizeof (plat_ecc_indictment2_data_t); 1178 sleep_flag = KM_SLEEP; 1179 log_error = 0; 1180 do_queue = 0; 1181 break; 1182 1183 case PLAT_ECC_DIMM_SID_MESSAGE: 1184 /* 1185 * For DIMM sid request messages, we're allowed to sleep, and we 1186 * can directly call the output function, since we've already 1187 * gone through a taskq 1188 */ 1189 msg_cmd = SCDM_DIMM_SERIAL_ID; 1190 msg_length = sizeof (plat_dimm_sid_request_data_t); 1191 sleep_flag = KM_SLEEP; 1192 log_error = 0; 1193 do_queue = 0; 1194 break; 1195 1196 default: 1197 return (EINVAL); 1198 } 1199 1200 /* 1201 * Allocate memory for the mailbox message header. 1202 */ 1203 msg_header_ptr = 1204 (scosmb_msgdata_t *)kmem_zalloc(sizeof (scosmb_msgdata_t), 1205 sleep_flag); 1206 1207 if (msg_header_ptr == NULL) { 1208 #ifdef DEBUG 1209 cmn_err(CE_WARN, "failed to allocate space for scosmb " 1210 "message header."); 1211 #endif /* DEBUG */ 1212 return (ENOMEM); 1213 } 1214 1215 msg_header_ptr->type = MBOXSC_MSG_EVENT; 1216 msg_header_ptr->cmd = msg_cmd; 1217 msg_header_ptr->transid = 0; 1218 msg_header_ptr->log_error = log_error; 1219 1220 /* 1221 * Allocate memory for the mailbox message payload. 1222 */ 1223 msg_header_ptr->length = msg_length; 1224 msg_header_ptr->data = kmem_zalloc((size_t)msg_length, sleep_flag); 1225 1226 if (msg_header_ptr->data == NULL) { 1227 #ifdef DEBUG 1228 cmn_err(CE_WARN, "failed to allocate space for scosmb " 1229 "message data."); 1230 #endif /* DEBUG */ 1231 kmem_free(msg_header_ptr, sizeof (scosmb_msgdata_t)); 1232 return (ENOMEM); 1233 } 1234 1235 bcopy(datap, msg_header_ptr->data, (size_t)msg_length); 1236 1237 /* 1238 * Based on our earlier look at the message type, we either go through 1239 * a taskq or directly call the output function. 1240 */ 1241 if (do_queue != 0) { 1242 /* 1243 * Place a new job on the scosmb_output_taskq. 1244 */ 1245 if (taskq_dispatch(scosmb_output_taskq, 1246 (task_func_t *)scosmb_process_output, 1247 (void *)msg_header_ptr, TQ_NOSLEEP) == 0) { 1248 #ifdef DEBUG 1249 cmn_err(CE_WARN, "failed to dispatch a task to send " 1250 "ECC mailbox message."); 1251 #endif /* DEBUG */ 1252 kmem_free(msg_header_ptr->data, msg_header_ptr->length); 1253 kmem_free(msg_header_ptr, sizeof (scosmb_msgdata_t)); 1254 return (ENOMEM); 1255 } 1256 return (0); 1257 } else { 1258 return (scosmb_process_output(msg_header_ptr)); 1259 } 1260 } 1261