103831d35Sstevel /* 203831d35Sstevel * CDDL HEADER START 303831d35Sstevel * 403831d35Sstevel * The contents of this file are subject to the terms of the 503831d35Sstevel * Common Development and Distribution License (the "License"). 603831d35Sstevel * You may not use this file except in compliance with the License. 703831d35Sstevel * 803831d35Sstevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 903831d35Sstevel * or http://www.opensolaris.org/os/licensing. 1003831d35Sstevel * See the License for the specific language governing permissions 1103831d35Sstevel * and limitations under the License. 1203831d35Sstevel * 1303831d35Sstevel * When distributing Covered Code, include this CDDL HEADER in each 1403831d35Sstevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1503831d35Sstevel * If applicable, add the following below this CDDL HEADER, with the 1603831d35Sstevel * fields enclosed by brackets "[]" replaced with your own identifying 1703831d35Sstevel * information: Portions Copyright [yyyy] [name of copyright owner] 1803831d35Sstevel * 1903831d35Sstevel * CDDL HEADER END 2003831d35Sstevel */ 2103831d35Sstevel 2203831d35Sstevel /* 23*07d06da5SSurya Prakki * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 2403831d35Sstevel * Use is subject to license terms. 2503831d35Sstevel */ 2603831d35Sstevel 2703831d35Sstevel /* 2803831d35Sstevel * This file contains the Starcat Solaris Mailbox Client module. This module 2903831d35Sstevel * handles mailbox messages from the SC to the OS (as opposed to messages sent 3003831d35Sstevel * to specific drivers) and vice versa. Two task queues are created upon 3103831d35Sstevel * startup; one handles reading and processing of all incoming messages, while 3203831d35Sstevel * the other handles transmission of all outgoing messages. 3303831d35Sstevel */ 3403831d35Sstevel 3503831d35Sstevel #include <sys/types.h> 3603831d35Sstevel #include <sys/param.h> 3703831d35Sstevel #include <sys/systm.h> 3803831d35Sstevel #include <sys/sysmacros.h> 3903831d35Sstevel #include <sys/sunddi.h> 4003831d35Sstevel #include <sys/errno.h> 4103831d35Sstevel #include <sys/cmn_err.h> 4203831d35Sstevel #include <sys/condvar.h> 4303831d35Sstevel #include <sys/mutex.h> 4403831d35Sstevel #include <sys/disp.h> 4503831d35Sstevel #include <sys/thread.h> 4603831d35Sstevel #include <sys/debug.h> 4703831d35Sstevel #include <sys/cpu_sgnblk_defs.h> 4803831d35Sstevel #include <sys/machsystm.h> 4903831d35Sstevel #include <sys/modctl.h> 5003831d35Sstevel #include <sys/iosramio.h> 5103831d35Sstevel #include <sys/mboxsc.h> 5203831d35Sstevel #include <sys/promif.h> 5303831d35Sstevel #include <sys/uadmin.h> 5403831d35Sstevel #include <sys/cred.h> 5503831d35Sstevel #include <sys/taskq.h> 5603831d35Sstevel #include <sys/utsname.h> 5703831d35Sstevel #include <sys/plat_ecc_unum.h> 5803831d35Sstevel #include <sys/fm/protocol.h> 5903831d35Sstevel #include <sys/fm/util.h> 6003831d35Sstevel #include <sys/starcat.h> 6103831d35Sstevel #include <sys/plat_ecc_dimm.h> 6203831d35Sstevel #include <sys/plat_datapath.h> 6303831d35Sstevel 6403831d35Sstevel /* mailbox keys */ 6503831d35Sstevel #define SCDM_KEY 0x5343444d /* 'S', 'C', 'D', 'M' */ 6603831d35Sstevel #define DMSC_KEY 0x444d5343 /* 'D', 'M', 'S', 'C' */ 6703831d35Sstevel 6803831d35Sstevel /* mailbox commands */ 6903831d35Sstevel #define SCDM_CMD ('S' << 8) /* generic SSP */ 7003831d35Sstevel #define SCDM_CMD_SUCCESS (SCDM_CMD | 0x1) 7103831d35Sstevel #define SCDM_GOTO_OBP (SCDM_CMD | 0x2) 7203831d35Sstevel #define SCDM_GOTO_PANIC (SCDM_CMD | 0x3) 7303831d35Sstevel #define SCDM_ENVIRON (SCDM_CMD | 0x4) /* environmental intr */ 7403831d35Sstevel #define SCDM_SHUTDOWN (SCDM_CMD | 0x5) /* setkeyswitch STANDBY */ 7503831d35Sstevel #define SCDM_GET_NODENAME (SCDM_CMD | 0x6) /* get domain nodename */ 7603831d35Sstevel #define SCDM_LOG_ECC_ERROR (SCDM_CMD | 0x7) /* ECC error logging */ 7703831d35Sstevel #define SCDM_LOG_ECC_INDICTMENT (SCDM_CMD | 0x8) /* ECC indictment logging */ 7803831d35Sstevel #define SCDM_LOG_ECC (SCDM_CMD | 0x9) /* ECC info */ 7903831d35Sstevel #define SCDM_LOG_ECC_CAP_INIT (SCDM_CMD | 0xa) /* ECC Capability Init */ 8003831d35Sstevel #define SCDM_LOG_ECC_CAP_RESP (SCDM_CMD | 0xb) /* ECC Capability Response */ 8103831d35Sstevel #define SCDM_DIMM_SERIAL_ID (SCDM_CMD | 0xc) /* DIMM ser# req/resp */ 8203831d35Sstevel #define SCDM_DP_ERROR_MSG (SCDM_CMD | 0xd) /* datapath error */ 8303831d35Sstevel #define SCDM_DP_FAULT_MSG (SCDM_CMD | 0xe) /* datapath fault */ 8403831d35Sstevel 8503831d35Sstevel /* general constants */ 8603831d35Sstevel #define GETMSG_TIMEOUT_MS 500 8703831d35Sstevel #define PUTMSG_TIMEOUT_MS 6000 8803831d35Sstevel #define MIN_INPUTQ_TASKS 2 8903831d35Sstevel #define MAX_INPUTQ_TASKS 4 9003831d35Sstevel #define MIN_OUTPUTQ_TASKS 2 9103831d35Sstevel #define MAX_OUTPUTQ_TASKS 512 9203831d35Sstevel #ifndef TRUE 9303831d35Sstevel #define TRUE 1 9403831d35Sstevel #endif 9503831d35Sstevel #ifndef FALSE 9603831d35Sstevel #define FALSE 0 9703831d35Sstevel #endif 9803831d35Sstevel 9903831d35Sstevel clock_t ecc_message_timeout_ms = PUTMSG_TIMEOUT_MS; 10003831d35Sstevel 10103831d35Sstevel /* 10203831d35Sstevel * When a message needs to be sent to the SC, an scosmb_msgdata_t should be 10303831d35Sstevel * populated with the data to be used for the message, and a call to 10403831d35Sstevel * scosmb_process_output should be dispatched on the scosmb_output_taskq, with 10503831d35Sstevel * the address of the scosmb_msgdata_t structure as its arg. The "length" and 10603831d35Sstevel * "data" fields can be used if the message needs to include data beyond the 10703831d35Sstevel * header fields (type, cmd, and transid) and that information must be recorded 10803831d35Sstevel * when the message is placed on the taskq. If appropriate for the message type 10903831d35Sstevel * (e.g. nodename info that should always be the most recent available), the 11003831d35Sstevel * "data" field can be set to NULL and the additional data can be assembled 11103831d35Sstevel * immediately prior to sending the message in scosmb_process_output(). 11203831d35Sstevel * 11303831d35Sstevel * If log_error is set, any errors in delivering the message cause a 11403831d35Sstevel * cmn_err() message to be issued. If it is zero, the error is expressed 11503831d35Sstevel * only through return values. 11603831d35Sstevel */ 11703831d35Sstevel typedef struct { 11803831d35Sstevel uint32_t type; 11903831d35Sstevel uint32_t cmd; 12003831d35Sstevel uint64_t transid; 12103831d35Sstevel uint32_t length; 12203831d35Sstevel int log_error; 12303831d35Sstevel void *data; 12403831d35Sstevel } scosmb_msgdata_t; 12503831d35Sstevel 12603831d35Sstevel /* 12703831d35Sstevel * Datapath error and fault messages arrive unsolicited. The message data 12803831d35Sstevel * is contained in a plat_datapath_info_t structure. 12903831d35Sstevel */ 13003831d35Sstevel typedef struct { 13103831d35Sstevel uint8_t type; /* CDS, DX, EX, CP */ 13203831d35Sstevel uint8_t pad; /* for alignment */ 13303831d35Sstevel uint16_t cpuid; /* Safari ID of base CPU */ 13403831d35Sstevel uint32_t t_value; /* SERD timeout threshold (seconds) */ 13503831d35Sstevel } plat_datapath_info_t; 13603831d35Sstevel 13703831d35Sstevel /* externally visible routines */ 13803831d35Sstevel void scosmb_update_nodename(uint64_t transid); 13903831d35Sstevel 14003831d35Sstevel /* local routines */ 14103831d35Sstevel static void scosmb_inbox_handler(); 14203831d35Sstevel static void scosmb_process_input(void *unused); 14303831d35Sstevel static int scosmb_process_output(scosmb_msgdata_t *arg); 14403831d35Sstevel 14503831d35Sstevel /* local variables */ 14603831d35Sstevel static uint8_t scosmb_mboxsc_failed = FALSE; 14703831d35Sstevel static uint8_t scosmb_mboxsc_timedout = FALSE; 14803831d35Sstevel static uint8_t scosmb_nodename_event_pending = FALSE; 14903831d35Sstevel static char scosmb_hdr[] = "SCOSMB:"; 15003831d35Sstevel static kmutex_t scosmb_mutex; 15103831d35Sstevel static taskq_t *scosmb_input_taskq = NULL; 15203831d35Sstevel static taskq_t *scosmb_output_taskq = NULL; 15303831d35Sstevel 15403831d35Sstevel static char *dperrtype[] = { 15503831d35Sstevel DP_ERROR_CDS, 15603831d35Sstevel DP_ERROR_DX, 15703831d35Sstevel DP_ERROR_EX, 15803831d35Sstevel DP_ERROR_CP 15903831d35Sstevel }; 16003831d35Sstevel 16103831d35Sstevel /* 16203831d35Sstevel * Structures from modctl.h used for loadable module support. 16303831d35Sstevel * SCOSMB is a "miscellaneous" module. 16403831d35Sstevel */ 16503831d35Sstevel extern struct mod_ops mod_miscops; 16603831d35Sstevel 16703831d35Sstevel static struct modlmisc modlmisc = { 16803831d35Sstevel &mod_miscops, 16903831d35Sstevel "Sun Fire 15000 OS Mbox Client v1.10", 17003831d35Sstevel }; 17103831d35Sstevel 17203831d35Sstevel static struct modlinkage modlinkage = { 17303831d35Sstevel MODREV_1, 17403831d35Sstevel (void *)&modlmisc, 17503831d35Sstevel NULL 17603831d35Sstevel }; 17703831d35Sstevel 17803831d35Sstevel 17903831d35Sstevel /* 18003831d35Sstevel * _init 18103831d35Sstevel * 18203831d35Sstevel * Loadable module support routine. Initializes mutex and condition variables 18303831d35Sstevel * and starts thread. 18403831d35Sstevel */ 18503831d35Sstevel int 18603831d35Sstevel _init(void) 18703831d35Sstevel { 18803831d35Sstevel int error; 18903831d35Sstevel 19003831d35Sstevel /* 19103831d35Sstevel * Initialize the mailboxes 19203831d35Sstevel */ 19303831d35Sstevel if ((error = mboxsc_init(SCDM_KEY, MBOXSC_MBOX_IN, 19403831d35Sstevel scosmb_inbox_handler)) != 0) { 19503831d35Sstevel cmn_err(CE_WARN, "%s mboxsc_init failed (0x%x)\n", scosmb_hdr, 19603831d35Sstevel error); 19703831d35Sstevel return (error); 19803831d35Sstevel } 19903831d35Sstevel 20003831d35Sstevel if ((error = mboxsc_init(DMSC_KEY, MBOXSC_MBOX_OUT, NULL)) != 0) { 20103831d35Sstevel cmn_err(CE_WARN, "%s mboxsc_init failed (0x%x)\n", scosmb_hdr, 20203831d35Sstevel error); 203*07d06da5SSurya Prakki (void) mboxsc_fini(SCDM_KEY); 20403831d35Sstevel return (error); 20503831d35Sstevel } 20603831d35Sstevel 20703831d35Sstevel /* 20803831d35Sstevel * Initialize the global lock 20903831d35Sstevel */ 21003831d35Sstevel mutex_init(&scosmb_mutex, NULL, MUTEX_DEFAULT, NULL); 21103831d35Sstevel 21203831d35Sstevel /* 21303831d35Sstevel * Create the task queues used for processing input and output messages 21403831d35Sstevel */ 21503831d35Sstevel scosmb_input_taskq = taskq_create("scosmb_input_taskq", 1, 21603831d35Sstevel minclsyspri, MIN_INPUTQ_TASKS, MAX_INPUTQ_TASKS, TASKQ_PREPOPULATE); 21703831d35Sstevel scosmb_output_taskq = taskq_create("scosmb_output_taskq", 1, 21803831d35Sstevel minclsyspri, MIN_OUTPUTQ_TASKS, MAX_OUTPUTQ_TASKS, 21903831d35Sstevel TASKQ_PREPOPULATE); 22003831d35Sstevel 22103831d35Sstevel /* 22203831d35Sstevel * Attempt to install the module. If unsuccessful, uninitialize 22303831d35Sstevel * everything. 22403831d35Sstevel */ 22503831d35Sstevel error = mod_install(&modlinkage); 22603831d35Sstevel if (error != 0) { 22703831d35Sstevel taskq_destroy(scosmb_output_taskq); 22803831d35Sstevel taskq_destroy(scosmb_input_taskq); 22903831d35Sstevel mutex_destroy(&scosmb_mutex); 230*07d06da5SSurya Prakki (void) mboxsc_fini(DMSC_KEY); 231*07d06da5SSurya Prakki (void) mboxsc_fini(SCDM_KEY); 23203831d35Sstevel } 23303831d35Sstevel 23403831d35Sstevel return (error); 23503831d35Sstevel } 23603831d35Sstevel 23703831d35Sstevel /* 23803831d35Sstevel * _fini 23903831d35Sstevel * 24003831d35Sstevel * Loadable module support routine. Since this routine shouldn't be unloaded (it 24103831d35Sstevel * provides a critical service, and its symbols may be referenced externally), 24203831d35Sstevel * EBUSY is returned to prevent unloading. 24303831d35Sstevel */ 24403831d35Sstevel int 24503831d35Sstevel _fini(void) 24603831d35Sstevel { 24703831d35Sstevel return (EBUSY); 24803831d35Sstevel } 24903831d35Sstevel 25003831d35Sstevel /* 25103831d35Sstevel * _info 25203831d35Sstevel * 25303831d35Sstevel * Loadable module support routine. 25403831d35Sstevel */ 25503831d35Sstevel int 25603831d35Sstevel _info(struct modinfo *modinfop) 25703831d35Sstevel { 25803831d35Sstevel int error = 0; 25903831d35Sstevel 26003831d35Sstevel error = mod_info(&modlinkage, modinfop); 26103831d35Sstevel return (error); 26203831d35Sstevel } 26303831d35Sstevel 26403831d35Sstevel /* 26503831d35Sstevel * scosmb_inbox_handler() - mbox API event handler. 26603831d35Sstevel * 26703831d35Sstevel * This routine adds an entry to the scosmb_input_taskq that will cause the 26803831d35Sstevel * scosmb_process_input() routine to be called to service the SCDM mailbox. The 26903831d35Sstevel * possibility that taskq_dispatch may fail when given KM_NOSLEEP is safely 27003831d35Sstevel * ignored because there can only be one message waiting in the mailbox at any 27103831d35Sstevel * given time, so the current message will end up being handled by one of the 27203831d35Sstevel * previously queued jobs (and a previous message presumably timed out before we 27303831d35Sstevel * got around to reading it). 27403831d35Sstevel */ 27503831d35Sstevel static void 27603831d35Sstevel scosmb_inbox_handler() 27703831d35Sstevel { 27803831d35Sstevel (void) taskq_dispatch(scosmb_input_taskq, scosmb_process_input, NULL, 27903831d35Sstevel KM_NOSLEEP); 28003831d35Sstevel } 28103831d35Sstevel 28203831d35Sstevel /* 28303831d35Sstevel * dp_get_cores() 28403831d35Sstevel * 28503831d35Sstevel * Checks cpu implementation for the input cpuid and returns 28603831d35Sstevel * the number of cores. 28703831d35Sstevel * If implementation cannot be determined, returns 1 28803831d35Sstevel */ 28903831d35Sstevel static int 29003831d35Sstevel dp_get_cores(uint16_t cpuid) 29103831d35Sstevel { 29203831d35Sstevel int exp, ii, impl = 0, nc, slot; 29303831d35Sstevel 29403831d35Sstevel exp = STARCAT_CPUID_TO_EXPANDER(cpuid); 29503831d35Sstevel slot = STARCAT_CPUID_TO_BOARDSLOT(cpuid); 29603831d35Sstevel if (slot == 1) 29703831d35Sstevel nc = STARCAT_SLOT1_CPU_MAX; 29803831d35Sstevel else 29903831d35Sstevel nc = plat_max_cpu_units_per_board(); 30003831d35Sstevel 30103831d35Sstevel /* find first with valid implementation */ 30203831d35Sstevel for (ii = 0; ii < nc; ii++) 30303831d35Sstevel if (cpu[MAKE_CPUID(exp, slot, ii)]) { 30403831d35Sstevel impl = cpunodes[MAKE_CPUID(exp, slot, ii)]. 30503831d35Sstevel implementation; 30603831d35Sstevel break; 30703831d35Sstevel } 30803831d35Sstevel 30903831d35Sstevel if (IS_JAGUAR(impl) || IS_PANTHER(impl)) 31003831d35Sstevel return (2); 31103831d35Sstevel else 31203831d35Sstevel return (1); 31303831d35Sstevel 31403831d35Sstevel } 31503831d35Sstevel 31603831d35Sstevel /* 31703831d35Sstevel * dp_payload_add_cpus() 31803831d35Sstevel * 31903831d35Sstevel * From datapath mailbox message, determines the number of and safari IDs 32003831d35Sstevel * for affected cpus, then adds this info to the datapath ereport. 32103831d35Sstevel * 32203831d35Sstevel * Input maxcat (if set) is a count of maxcat cpus actually present - it is 32303831d35Sstevel * a count of cpuids, which takes into account multi-core architecture. 32403831d35Sstevel */ 32503831d35Sstevel static int 32603831d35Sstevel dp_payload_add_cpus(plat_datapath_info_t *dpmsg, nvlist_t *erp, int maxcat) 32703831d35Sstevel { 32803831d35Sstevel int jj = 0, numcpus = 0, nummaxcpus = 0; 32903831d35Sstevel int count, exp, ii, num, ncores, ret, slot, port; 33003831d35Sstevel uint16_t *dparray, cpuid; 33103831d35Sstevel uint64_t *snarray; 33203831d35Sstevel 33303831d35Sstevel /* check for multiple core architectures */ 33403831d35Sstevel ncores = dp_get_cores(dpmsg->cpuid); 33503831d35Sstevel 33603831d35Sstevel /* 33703831d35Sstevel * Determine the number of cpu cores impacted 33803831d35Sstevel */ 33903831d35Sstevel switch (dpmsg->type) { 34003831d35Sstevel case DP_CDS_TYPE: 34103831d35Sstevel if (maxcat) 34203831d35Sstevel nummaxcpus = ncores; 34303831d35Sstevel else 34403831d35Sstevel numcpus = ncores; 34503831d35Sstevel break; 34603831d35Sstevel 34703831d35Sstevel case DP_DX_TYPE: 34803831d35Sstevel if (maxcat) 34903831d35Sstevel nummaxcpus = 2 * ncores; 35003831d35Sstevel else 35103831d35Sstevel numcpus = 2 * ncores; 35203831d35Sstevel break; 35303831d35Sstevel 35403831d35Sstevel case DP_EX_TYPE: 35503831d35Sstevel if (maxcat) 35603831d35Sstevel nummaxcpus = STARCAT_SLOT1_CPU_MAX; 35703831d35Sstevel else 35803831d35Sstevel numcpus = plat_max_cpu_units_per_board(); 35903831d35Sstevel break; 36003831d35Sstevel 36103831d35Sstevel case DP_CP_TYPE: 36203831d35Sstevel /* 36303831d35Sstevel * SC-DE supplies the base cpuid affected, if 36403831d35Sstevel * maxcat id was given, there's no slot 0 board 36503831d35Sstevel * present. 36603831d35Sstevel */ 36703831d35Sstevel 36803831d35Sstevel if (!maxcat) { 36903831d35Sstevel /* Slot 0 id was given - set numcpus */ 37003831d35Sstevel numcpus = plat_max_cpu_units_per_board(); 37103831d35Sstevel } 37203831d35Sstevel 37303831d35Sstevel /* there may/may not be maxcats. set a count anyway */ 37403831d35Sstevel nummaxcpus = STARCAT_SLOT1_CPU_MAX; 37503831d35Sstevel 37603831d35Sstevel break; 37703831d35Sstevel 37803831d35Sstevel default: 37903831d35Sstevel ASSERT(0); 38003831d35Sstevel return (-1); 38103831d35Sstevel } 38203831d35Sstevel 38303831d35Sstevel /* Allocate space for cores */ 38403831d35Sstevel num = numcpus + nummaxcpus; 38503831d35Sstevel dparray = kmem_zalloc(num * sizeof (uint16_t *), KM_SLEEP); 38603831d35Sstevel 38703831d35Sstevel /* 38803831d35Sstevel * populate dparray with impacted cores (only those present) 38903831d35Sstevel */ 39003831d35Sstevel exp = STARCAT_CPUID_TO_EXPANDER(dpmsg->cpuid); 39103831d35Sstevel slot = STARCAT_CPUID_TO_BOARDSLOT(dpmsg->cpuid); 39203831d35Sstevel port = STARCAT_CPUID_TO_LPORT(dpmsg->cpuid); 39303831d35Sstevel 39403831d35Sstevel mutex_enter(&cpu_lock); 39503831d35Sstevel 39603831d35Sstevel switch (dpmsg->type) { 39703831d35Sstevel case DP_CDS_TYPE: 39803831d35Sstevel /* 39903831d35Sstevel * For a CDS error, it's the reporting cpuid 40003831d35Sstevel * and it's other core (if present) 40103831d35Sstevel */ 40203831d35Sstevel cpuid = dpmsg->cpuid & 0xFFFB; /* core 0 */ 40303831d35Sstevel if (cpu[cpuid]) 40403831d35Sstevel dparray[jj++] = cpuid; 40503831d35Sstevel 40603831d35Sstevel cpuid = dpmsg->cpuid | 0x4; /* core 1 */ 40703831d35Sstevel if (cpu[cpuid]) 40803831d35Sstevel dparray[jj++] = cpuid; 40903831d35Sstevel break; 41003831d35Sstevel 41103831d35Sstevel case DP_DX_TYPE: 41203831d35Sstevel /* 41303831d35Sstevel * For a DX error, it's the reporting cpuid (all 41403831d35Sstevel * cores), and the other CPU sharing the same 41503831d35Sstevel * DX<-->DCDS interface (all cores) 41603831d35Sstevel */ 41703831d35Sstevel 41803831d35Sstevel /* reporting cpuid */ 41903831d35Sstevel cpuid = dpmsg->cpuid & 0xFFFB; /* core 0 */ 42003831d35Sstevel 42103831d35Sstevel if (cpu[cpuid]) 42203831d35Sstevel dparray[jj++] = cpuid; 42303831d35Sstevel 42403831d35Sstevel cpuid = dpmsg->cpuid | 0x4; /* core 1 */ 42503831d35Sstevel if (cpu[cpuid]) 42603831d35Sstevel dparray[jj++] = cpuid; 42703831d35Sstevel 42803831d35Sstevel /* find partner cpuid */ 42903831d35Sstevel if (port == 0 || port == 2) 43003831d35Sstevel cpuid = dpmsg->cpuid | 0x1; 43103831d35Sstevel else 43203831d35Sstevel cpuid = dpmsg->cpuid & 0xFFFE; 43303831d35Sstevel 43403831d35Sstevel /* add partner cpuid */ 43503831d35Sstevel cpuid &= 0xFFFB; /* core 0 */ 43603831d35Sstevel if (cpu[cpuid]) 43703831d35Sstevel dparray[jj++] = cpuid; 43803831d35Sstevel 43903831d35Sstevel cpuid |= 0x4; /* core 1 */ 44003831d35Sstevel if (cpu[cpuid]) 44103831d35Sstevel dparray[jj++] = cpuid; 44203831d35Sstevel break; 44303831d35Sstevel 44403831d35Sstevel case DP_EX_TYPE: 44503831d35Sstevel /* 44603831d35Sstevel * For an EX error, it is all cpuids (all cores) 44703831d35Sstevel * on the reporting board 44803831d35Sstevel */ 44903831d35Sstevel 45003831d35Sstevel if (slot == 1) /* maxcat */ 45103831d35Sstevel count = nummaxcpus; 45203831d35Sstevel else 45303831d35Sstevel count = numcpus; 45403831d35Sstevel 45503831d35Sstevel for (ii = 0; ii < count; ii++) { 45603831d35Sstevel cpuid = MAKE_CPUID(exp, slot, ii); 45703831d35Sstevel if (cpu[cpuid]) 45803831d35Sstevel dparray[jj++] = cpuid; 45903831d35Sstevel } 46003831d35Sstevel break; 46103831d35Sstevel 46203831d35Sstevel case DP_CP_TYPE: 46303831d35Sstevel /* 46403831d35Sstevel * For a CP error, it is all cpuids (all cores) 46503831d35Sstevel * on both boards (SB & IO) in the boardset 46603831d35Sstevel */ 46703831d35Sstevel 46803831d35Sstevel /* Do slot 0 */ 46903831d35Sstevel for (ii = 0; ii < numcpus; ii++) { 47003831d35Sstevel cpuid = MAKE_CPUID(exp, 0, ii); 47103831d35Sstevel if (cpu[cpuid]) 47203831d35Sstevel dparray[jj++] = cpuid; 47303831d35Sstevel } 47403831d35Sstevel 47503831d35Sstevel /* Do slot 1 */ 47603831d35Sstevel for (ii = 0; ii < nummaxcpus; ii++) { 47703831d35Sstevel cpuid = MAKE_CPUID(exp, 1, ii); 47803831d35Sstevel if (cpu[cpuid]) 47903831d35Sstevel dparray[jj++] = cpuid; 48003831d35Sstevel } 48103831d35Sstevel break; 48203831d35Sstevel } 48303831d35Sstevel 48403831d35Sstevel mutex_exit(&cpu_lock); 48503831d35Sstevel 48603831d35Sstevel /* 48703831d35Sstevel * The datapath message could not be associated with any 48803831d35Sstevel * configured CPU. 48903831d35Sstevel */ 49003831d35Sstevel if (!jj) { 49103831d35Sstevel kmem_free(dparray, num * sizeof (uint16_t *)); 49203831d35Sstevel ret = nvlist_add_uint32(erp, DP_LIST_SIZE, jj); 49303831d35Sstevel ASSERT(ret == 0); 49403831d35Sstevel return (-1); 49503831d35Sstevel } 49603831d35Sstevel 49703831d35Sstevel snarray = kmem_zalloc(jj * sizeof (uint64_t *), KM_SLEEP); 49803831d35Sstevel for (ii = 0; ii < jj; ii++) 49903831d35Sstevel snarray[ii] = cpunodes[dparray[ii]].device_id; 50003831d35Sstevel 50103831d35Sstevel ret = nvlist_add_uint32(erp, DP_LIST_SIZE, jj); 50203831d35Sstevel ret |= nvlist_add_uint16_array(erp, DP_LIST, dparray, jj); 50303831d35Sstevel ret |= nvlist_add_uint64_array(erp, SN_LIST, snarray, jj); 50403831d35Sstevel ASSERT(ret == 0); 50503831d35Sstevel 50603831d35Sstevel kmem_free(dparray, num * sizeof (uint16_t *)); 50703831d35Sstevel kmem_free(snarray, jj * sizeof (uint64_t *)); 50803831d35Sstevel 50903831d35Sstevel return (0); 51003831d35Sstevel } 51103831d35Sstevel 51203831d35Sstevel /* 51303831d35Sstevel * dp_trans_event() - datapath message handler. 51403831d35Sstevel * 51503831d35Sstevel * Process datapath error and fault messages received from the SC. Checks 51603831d35Sstevel * for, and disregards, messages associated with I/O boards. Otherwise, 51703831d35Sstevel * extracts message info to produce a datapath ereport. 51803831d35Sstevel */ 51903831d35Sstevel static void 52003831d35Sstevel dp_trans_event(plat_datapath_info_t *dpmsg, int msgtype) 52103831d35Sstevel { 52203831d35Sstevel nvlist_t *erp, *detector, *hcelem; 52303831d35Sstevel char buf[FM_MAX_CLASS]; 52403831d35Sstevel int exp, slot, i, maxcat = 0; 52503831d35Sstevel 52603831d35Sstevel /* check for I/O board message */ 52703831d35Sstevel exp = STARCAT_CPUID_TO_EXPANDER(dpmsg->cpuid); 52803831d35Sstevel slot = STARCAT_CPUID_TO_BOARDSLOT(dpmsg->cpuid); 52903831d35Sstevel 53003831d35Sstevel if (slot) { 53103831d35Sstevel mutex_enter(&cpu_lock); 53203831d35Sstevel for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) { 53303831d35Sstevel if (cpu[MAKE_CPUID(exp, slot, i)]) { 53403831d35Sstevel /* maxcat cpu present */ 53503831d35Sstevel maxcat++; 53603831d35Sstevel } 53703831d35Sstevel } 53803831d35Sstevel mutex_exit(&cpu_lock); 53903831d35Sstevel 54003831d35Sstevel /* 54103831d35Sstevel * Ignore I/O board msg 54203831d35Sstevel */ 54303831d35Sstevel if (maxcat == 0) 54403831d35Sstevel return; 54503831d35Sstevel } 54603831d35Sstevel 54703831d35Sstevel /* allocate space for ereport */ 54803831d35Sstevel erp = fm_nvlist_create(NULL); 54903831d35Sstevel 55003831d35Sstevel /* 55103831d35Sstevel * 55203831d35Sstevel * Member Name Data Type Comments 55303831d35Sstevel * ----------- --------- ----------- 55403831d35Sstevel * version uint8 0 55503831d35Sstevel * class string "asic" 55603831d35Sstevel * ENA uint64 ENA Format 1 55703831d35Sstevel * detector fmri aggregated ID data for SC-DE 55803831d35Sstevel * 55903831d35Sstevel * Datapath ereport subclasses and data payloads: 56003831d35Sstevel * There will be two types of ereports (error and fault) which will be 56103831d35Sstevel * identified by the "type" member. 56203831d35Sstevel * 56303831d35Sstevel * ereport.asic.starcat.cds.cds-dp 56403831d35Sstevel * ereport.asic.starcat.dx.dx-dp 56503831d35Sstevel * ereport.asic.starcat.sdi.sdi-dp 56603831d35Sstevel * ereport.asic.starcat.cp.cp-dp 56703831d35Sstevel * 56803831d35Sstevel * Member Name Data Type Comments 56903831d35Sstevel * ----------- --------- ----------- 57003831d35Sstevel * erptype uint16 derived from message type: error or 57103831d35Sstevel * fault 57203831d35Sstevel * t-value uint32 SC's datapath SERD timeout threshold 57303831d35Sstevel * dp-list-sz uint8 number of dp-list array elements 57403831d35Sstevel * dp-list array of uint16 Safari IDs of affected cpus 57503831d35Sstevel * sn-list array of uint64 Serial numbers of affected cpus 57603831d35Sstevel * 57703831d35Sstevel */ 57803831d35Sstevel 57903831d35Sstevel /* compose common ereport elements */ 58003831d35Sstevel detector = fm_nvlist_create(NULL); 58103831d35Sstevel 58203831d35Sstevel /* 58303831d35Sstevel * Create legacy FMRI for the detector 58403831d35Sstevel */ 58503831d35Sstevel switch (dpmsg->type) { 58603831d35Sstevel case DP_CDS_TYPE: 58703831d35Sstevel case DP_DX_TYPE: 58803831d35Sstevel if (slot == 1) 58903831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "IO%d", exp); 59003831d35Sstevel else 59103831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "SB%d", exp); 59203831d35Sstevel break; 59303831d35Sstevel 59403831d35Sstevel case DP_EX_TYPE: 59503831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "EX%d", exp); 59603831d35Sstevel break; 59703831d35Sstevel 59803831d35Sstevel case DP_CP_TYPE: 59903831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "CP"); 60003831d35Sstevel break; 60103831d35Sstevel 60203831d35Sstevel default: 60303831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "UNKNOWN"); 60403831d35Sstevel break; 60503831d35Sstevel } 60603831d35Sstevel 60703831d35Sstevel hcelem = fm_nvlist_create(NULL); 60803831d35Sstevel 60903831d35Sstevel (void) nvlist_add_string(hcelem, FM_FMRI_HC_NAME, FM_FMRI_LEGACY_HC); 61003831d35Sstevel (void) nvlist_add_string(hcelem, FM_FMRI_HC_ID, buf); 61103831d35Sstevel 61203831d35Sstevel (void) nvlist_add_uint8(detector, FM_VERSION, FM_HC_SCHEME_VERSION); 61303831d35Sstevel (void) nvlist_add_string(detector, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 61403831d35Sstevel (void) nvlist_add_string(detector, FM_FMRI_HC_ROOT, ""); 61503831d35Sstevel (void) nvlist_add_uint32(detector, FM_FMRI_HC_LIST_SZ, 1); 61603831d35Sstevel (void) nvlist_add_nvlist_array(detector, FM_FMRI_HC_LIST, &hcelem, 1); 61703831d35Sstevel 61803831d35Sstevel /* build ereport class name */ 61903831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "asic.starcat.%s.%s-%s", 62003831d35Sstevel dperrtype[dpmsg->type], dperrtype[dpmsg->type], 62103831d35Sstevel FM_ERROR_DATAPATH); 62203831d35Sstevel 62303831d35Sstevel fm_ereport_set(erp, FM_EREPORT_VERSION, buf, 62403831d35Sstevel fm_ena_generate(0, FM_ENA_FMT1), detector, NULL); 62503831d35Sstevel 62603831d35Sstevel /* add payload elements */ 62703831d35Sstevel if (msgtype == SCDM_DP_ERROR_MSG) { 62803831d35Sstevel fm_payload_set(erp, 62903831d35Sstevel DP_EREPORT_TYPE, DATA_TYPE_UINT16, DP_ERROR, NULL); 63003831d35Sstevel } else { 63103831d35Sstevel fm_payload_set(erp, 63203831d35Sstevel DP_EREPORT_TYPE, DATA_TYPE_UINT16, DP_FAULT, NULL); 63303831d35Sstevel } 63403831d35Sstevel 63503831d35Sstevel fm_payload_set(erp, DP_TVALUE, DATA_TYPE_UINT32, dpmsg->t_value, NULL); 63603831d35Sstevel 63703831d35Sstevel if (dp_payload_add_cpus(dpmsg, erp, maxcat) == 0) { 63803831d35Sstevel /* post ereport */ 63903831d35Sstevel fm_ereport_post(erp, EVCH_SLEEP); 64003831d35Sstevel } 64103831d35Sstevel 64203831d35Sstevel /* free ereport memory */ 64303831d35Sstevel fm_nvlist_destroy(erp, FM_NVA_FREE); 64403831d35Sstevel fm_nvlist_destroy(detector, FM_NVA_FREE); 64503831d35Sstevel 64603831d35Sstevel } 64703831d35Sstevel 64803831d35Sstevel /* 64903831d35Sstevel * scosmb_process_input() - incoming message processing routine 65003831d35Sstevel * 65103831d35Sstevel * this routine attempts to read a message from the SCDM mailbox and, if 65203831d35Sstevel * successful, processes the command. if an unrecoverable error is encountered, 65303831d35Sstevel * the scosmb_task thread will be terminated. 65403831d35Sstevel */ 65503831d35Sstevel /* ARGSUSED0 */ 65603831d35Sstevel static void 65703831d35Sstevel scosmb_process_input(void *unused) 65803831d35Sstevel { 65903831d35Sstevel int error; 66003831d35Sstevel scosmb_msgdata_t msg; 66103831d35Sstevel proc_t *initpp; 66203831d35Sstevel plat_capability_data_t *cap; /* capability msg contents ptr */ 66303831d35Sstevel int cap_size; 66403831d35Sstevel int cap_ver_len; 66503831d35Sstevel scosmb_msgdata_t *cap_msgdatap; /* capability msg response */ 66603831d35Sstevel int max_size; 66703831d35Sstevel 66803831d35Sstevel /* 66903831d35Sstevel * Attempt to read a message from the SCDM mailbox. 67003831d35Sstevel * 67103831d35Sstevel * Setup a local buffer to read incoming messages from the SC. 67203831d35Sstevel */ 67303831d35Sstevel cap_ver_len = strlen(utsname.release) + strlen(utsname.version) + 2; 67403831d35Sstevel cap_size = sizeof (plat_capability_data_t) + cap_ver_len; 67503831d35Sstevel max_size = MAX(cap_size, sizeof (plat_dimm_sid_board_data_t)); 67603831d35Sstevel 67703831d35Sstevel msg.type = 0; 67803831d35Sstevel msg.cmd = 0; 67903831d35Sstevel msg.transid = 0; 68003831d35Sstevel msg.length = max_size; 68103831d35Sstevel msg.log_error = 0; 68203831d35Sstevel msg.data = kmem_zalloc(max_size, KM_SLEEP); 68303831d35Sstevel 68403831d35Sstevel error = mboxsc_getmsg(SCDM_KEY, &msg.type, &msg.cmd, &msg.transid, 68503831d35Sstevel &msg.length, msg.data, GETMSG_TIMEOUT_MS); 68603831d35Sstevel 68703831d35Sstevel /* 68803831d35Sstevel * If EAGAIN or ETIMEDOUT was received, give up. The SC can just try 68903831d35Sstevel * again if it was important. If any other non-zero error was 69003831d35Sstevel * encountered, the mailbox service is broken, and there's nothing more 69103831d35Sstevel * we can do. 69203831d35Sstevel */ 69303831d35Sstevel mutex_enter(&scosmb_mutex); 69403831d35Sstevel if ((error == EAGAIN) || (error == ETIMEDOUT)) { 69503831d35Sstevel mutex_exit(&scosmb_mutex); 69603831d35Sstevel return; 69703831d35Sstevel } else if (error != 0) { 69803831d35Sstevel /* 69903831d35Sstevel * The mailbox service appears to be badly broken. If it was 70003831d35Sstevel * working previously, generate a warning and set a flag to 70103831d35Sstevel * avoid repeating the warning on subsequent failures. 70203831d35Sstevel */ 70303831d35Sstevel if (!scosmb_mboxsc_failed) { 70403831d35Sstevel scosmb_mboxsc_failed = TRUE; 70503831d35Sstevel cmn_err(CE_WARN, "%s mboxsc error (0x%x)\n", scosmb_hdr, 70603831d35Sstevel error); 70703831d35Sstevel } 70803831d35Sstevel mutex_exit(&scosmb_mutex); 70903831d35Sstevel return; 71003831d35Sstevel } else { 71103831d35Sstevel /* 71203831d35Sstevel * If the mailbox module failed previously, it appears to have 71303831d35Sstevel * recovered, so we'll want to generate a warning if it fails 71403831d35Sstevel * again. 71503831d35Sstevel */ 71603831d35Sstevel scosmb_mboxsc_failed = FALSE; 71703831d35Sstevel } 71803831d35Sstevel mutex_exit(&scosmb_mutex); 71903831d35Sstevel 72003831d35Sstevel /* 72103831d35Sstevel * A message was successfully received, so go ahead and process it. 72203831d35Sstevel */ 72303831d35Sstevel switch (msg.cmd) { 72403831d35Sstevel 72503831d35Sstevel case SCDM_GOTO_OBP: /* jump to OBP */ 72603831d35Sstevel debug_enter("SC requested jump to OBP"); 72703831d35Sstevel break; 72803831d35Sstevel 72903831d35Sstevel case SCDM_GOTO_PANIC: /* Panic the domain */ 73003831d35Sstevel cmn_err(CE_PANIC, "%s SC requested PANIC\n", scosmb_hdr); 73103831d35Sstevel break; 73203831d35Sstevel 73303831d35Sstevel case SCDM_SHUTDOWN: /* graceful shutdown */ 73403831d35Sstevel cmn_err(CE_WARN, "%s SC requested a shutdown ", scosmb_hdr); 73503831d35Sstevel (void) kadmin(A_SHUTDOWN, AD_HALT, NULL, kcred); 73603831d35Sstevel /* 73703831d35Sstevel * In the event kadmin does not bring down the 73803831d35Sstevel * domain, environmental shutdown is forced 73903831d35Sstevel */ 74003831d35Sstevel /*FALLTHROUGH*/ 74103831d35Sstevel case SCDM_ENVIRON: /* environmental shutdown */ 74203831d35Sstevel /* 74303831d35Sstevel * Send SIGPWR to init(1) it will run rc0, 74403831d35Sstevel * which will uadmin to power down. 74503831d35Sstevel */ 74603831d35Sstevel mutex_enter(&pidlock); 74703831d35Sstevel initpp = prfind(P_INITPID); 74803831d35Sstevel mutex_exit(&pidlock); 74903831d35Sstevel 75003831d35Sstevel 75103831d35Sstevel /* 75203831d35Sstevel * If we're still booting and init(1) isn't set up yet, 75303831d35Sstevel * simply halt. 75403831d35Sstevel */ 75503831d35Sstevel if (initpp == NULL) { 75603831d35Sstevel extern void halt(char *); 75703831d35Sstevel cmn_err(CE_WARN, "%s Environmental Interrupt", 75803831d35Sstevel scosmb_hdr); 75903831d35Sstevel power_down((char *)NULL); 76003831d35Sstevel halt("Power off the System!\n"); 76103831d35Sstevel } 76203831d35Sstevel 76303831d35Sstevel /* 76403831d35Sstevel * else, graceful shutdown with inittab and all 76503831d35Sstevel * getting involved 76603831d35Sstevel */ 76703831d35Sstevel psignal(initpp, SIGPWR); 76803831d35Sstevel break; 76903831d35Sstevel 77003831d35Sstevel case SCDM_GET_NODENAME: 77103831d35Sstevel scosmb_update_nodename(msg.transid); 77203831d35Sstevel break; 77303831d35Sstevel 77403831d35Sstevel case SCDM_LOG_ECC_CAP_RESP: 77503831d35Sstevel /* 77603831d35Sstevel * The SC has responded to our initiator capability message 77703831d35Sstevel * issued during the boot flow via scosmb_update_nodename(). 77803831d35Sstevel * 77903831d35Sstevel * Parse the incoming data, and appropriately set SC 78003831d35Sstevel * capabilities... 78103831d35Sstevel */ 78203831d35Sstevel cap = (plat_capability_data_t *)msg.data; 78303831d35Sstevel plat_ecc_capability_sc_set(cap->capd_capability); 78403831d35Sstevel break; 78503831d35Sstevel 78603831d35Sstevel case SCDM_LOG_ECC_CAP_INIT: 78703831d35Sstevel /* 78803831d35Sstevel * The SC has initiated a capability messaging exchange with 78903831d35Sstevel * the OS. 79003831d35Sstevel * 79103831d35Sstevel * We start out just as we do for an SC response capability 79203831d35Sstevel * message, a parse of incoming data to appropriately set SC 79303831d35Sstevel * described capabilities... 79403831d35Sstevel */ 79503831d35Sstevel cap = (plat_capability_data_t *)msg.data; 79603831d35Sstevel plat_ecc_capability_sc_set(cap->capd_capability); 79703831d35Sstevel /* 79803831d35Sstevel * The next step is setting up our Response to the SC. 79903831d35Sstevel * 80003831d35Sstevel * Allocate memory for message data, initialize appropriately, 80103831d35Sstevel * and place a new job on the scosmb_output_taskq for 80203831d35Sstevel * SCDM_LOG_ECC_CAP_RESP, our OS capability messaging response 80303831d35Sstevel * to the SC initiated sequence detected here. 80403831d35Sstevel */ 80503831d35Sstevel cap_msgdatap = kmem_zalloc(sizeof (scosmb_msgdata_t), KM_SLEEP); 80603831d35Sstevel cap_msgdatap->type = MBOXSC_MSG_EVENT; 80703831d35Sstevel cap_msgdatap->cmd = SCDM_LOG_ECC_CAP_RESP; 80803831d35Sstevel cap_msgdatap->transid = 0; 80903831d35Sstevel (void) taskq_dispatch(scosmb_output_taskq, 81003831d35Sstevel (task_func_t *)scosmb_process_output, cap_msgdatap, 81103831d35Sstevel KM_SLEEP); 81203831d35Sstevel break; 81303831d35Sstevel 81403831d35Sstevel case SCDM_DP_ERROR_MSG: 81503831d35Sstevel case SCDM_DP_FAULT_MSG: 81603831d35Sstevel dp_trans_event(msg.data, msg.cmd); 81703831d35Sstevel break; 81803831d35Sstevel 81903831d35Sstevel case SCDM_DIMM_SERIAL_ID: 82003831d35Sstevel (void) plat_store_mem_sids(msg.data); 82103831d35Sstevel break; 82203831d35Sstevel 82303831d35Sstevel default: 82403831d35Sstevel cmn_err(CE_WARN, "%s invalid command (0x%x)\n", scosmb_hdr, 82503831d35Sstevel msg.cmd); 82603831d35Sstevel break; 82703831d35Sstevel } 82803831d35Sstevel 82903831d35Sstevel /* 83003831d35Sstevel * Free up buffer for incoming messasge data that we allocated earlier 83103831d35Sstevel */ 83203831d35Sstevel kmem_free(msg.data, max_size); 83303831d35Sstevel } 83403831d35Sstevel 83503831d35Sstevel /* 83603831d35Sstevel * scosmb_process_output() - outgoing message processing routine 83703831d35Sstevel * 83803831d35Sstevel * This routine handles jobs that are queued on the scosmb_output_taskq, or 83903831d35Sstevel * sent directly from scosmb_log_ecc_error. Each job corresponds to a single 84003831d35Sstevel * mailbox message that needs to be sent to the SC via the DMSC mailbox. Some 84103831d35Sstevel * processing of the message may be performed before it is sent to the SC, 84203831d35Sstevel * depending on the value of the command field. 84303831d35Sstevel */ 84403831d35Sstevel static int 84503831d35Sstevel scosmb_process_output(scosmb_msgdata_t *msgdatap) 84603831d35Sstevel { 84703831d35Sstevel int error; 84803831d35Sstevel int length; 84903831d35Sstevel char nodename[_SYS_NMLN]; 85003831d35Sstevel void *free_data; 85103831d35Sstevel int free_data_len; 85203831d35Sstevel int cap_size; 85303831d35Sstevel int cap_ver_len; 85403831d35Sstevel plat_capability_data_t *cap = NULL; 85503831d35Sstevel 85603831d35Sstevel /* 85703831d35Sstevel * This shouldn't ever happen, but it can't hurt to check anyway. 85803831d35Sstevel */ 85903831d35Sstevel if (msgdatap == NULL) { 86003831d35Sstevel return (EINVAL); 86103831d35Sstevel } 86203831d35Sstevel 86303831d35Sstevel /* 86403831d35Sstevel * If data was passed in, we'll need to free it before returning. 86503831d35Sstevel */ 86603831d35Sstevel free_data = msgdatap->data; 86703831d35Sstevel free_data_len = msgdatap->length; 86803831d35Sstevel 86903831d35Sstevel /* 87003831d35Sstevel * Some commands may need additional processing prior to transmission. 87103831d35Sstevel */ 87203831d35Sstevel switch (msgdatap->cmd) { 87303831d35Sstevel /* 87403831d35Sstevel * Since the SC is only interested in the most recent value of 87503831d35Sstevel * utsname.nodename, we wait until now to collect that data. We 87603831d35Sstevel * also use a global flag to prevent multiple event-type 87703831d35Sstevel * nodename messages from being queued at the same time for the 87803831d35Sstevel * same reason. 87903831d35Sstevel */ 88003831d35Sstevel case SCDM_GET_NODENAME: 88103831d35Sstevel mutex_enter(&scosmb_mutex); 88203831d35Sstevel length = strlen(utsname.nodename); 88303831d35Sstevel ASSERT(length < _SYS_NMLN); 88403831d35Sstevel if (length == 0) { 88503831d35Sstevel msgdatap->length = 0; 88603831d35Sstevel msgdatap->data = NULL; 88703831d35Sstevel } else { 88803831d35Sstevel bcopy(utsname.nodename, nodename, length); 88903831d35Sstevel nodename[length++] = '\0'; 89003831d35Sstevel msgdatap->data = nodename; 89103831d35Sstevel msgdatap->length = length; 89203831d35Sstevel } 89303831d35Sstevel if (msgdatap->transid == 0) { 89403831d35Sstevel scosmb_nodename_event_pending = FALSE; 89503831d35Sstevel } 89603831d35Sstevel mutex_exit(&scosmb_mutex); 89703831d35Sstevel break; 89803831d35Sstevel 89903831d35Sstevel /* 90003831d35Sstevel * SCDM_LOG_ECC_CAP_INIT 90103831d35Sstevel * Initiator Capability message from OS to SC 90203831d35Sstevel * 90303831d35Sstevel * We construct and send an initiator capability message 90403831d35Sstevel * every time we go through scosmb_update_nodename(), which 90503831d35Sstevel * works out to getting an "initiator" capability message 90603831d35Sstevel * sent from the OS to the SC during the OS boot flow. 90703831d35Sstevel * 90803831d35Sstevel * The SC also issues a request to scosmb_update_nodename() 90903831d35Sstevel * during an SC reboot. Which results in an additional 91003831d35Sstevel * capability message exchange during SC reboot scenarios. 91103831d35Sstevel * 91203831d35Sstevel * SCDM_LOG_ECC_CAP_RESP 91303831d35Sstevel * Response Capability message from SC to OS 91403831d35Sstevel * 91503831d35Sstevel * In certain scenarios, the SC could initiate a capability 91603831d35Sstevel * messaging exchange with the OS. Processing starts in 91703831d35Sstevel * scosmb_process_input(), where we detect an incoming 91803831d35Sstevel * initiator capability message from the SC. We finish 91903831d35Sstevel * processing here, by sending a response capability message 92003831d35Sstevel * back to the SC that reflects OS capabilities. 92103831d35Sstevel */ 92203831d35Sstevel case SCDM_LOG_ECC_CAP_INIT: 92303831d35Sstevel /*FALLTHROUGH*/ 92403831d35Sstevel case SCDM_LOG_ECC_CAP_RESP: 92503831d35Sstevel mutex_enter(&scosmb_mutex); 92603831d35Sstevel 92703831d35Sstevel cap_ver_len = strlen(utsname.release) + 92803831d35Sstevel strlen(utsname.version) + 2; 92903831d35Sstevel 93003831d35Sstevel cap_size = sizeof (plat_capability_data_t) + 93103831d35Sstevel cap_ver_len; 93203831d35Sstevel 93303831d35Sstevel cap = kmem_zalloc(cap_size, KM_SLEEP); 93403831d35Sstevel 93503831d35Sstevel cap->capd_major_version = PLAT_ECC_CAP_VERSION_MAJOR; 93603831d35Sstevel cap->capd_minor_version = PLAT_ECC_CAP_VERSION_MINOR; 93703831d35Sstevel cap->capd_msg_type = PLAT_ECC_CAPABILITY_MESSAGE; 93803831d35Sstevel cap->capd_msg_length = cap_size; 93903831d35Sstevel 94003831d35Sstevel cap->capd_capability = 94103831d35Sstevel PLAT_ECC_CAPABILITY_DOMAIN_DEFAULT; 94203831d35Sstevel 94303831d35Sstevel /* 94403831d35Sstevel * Build the capability solaris_version string: 94503831d35Sstevel * utsname.release + " " + utsname.version 94603831d35Sstevel */ 94703831d35Sstevel (void) snprintf(cap->capd_solaris_version, 94803831d35Sstevel cap_ver_len, "%s %s", utsname.release, 94903831d35Sstevel utsname.version); 95003831d35Sstevel 95103831d35Sstevel /* 95203831d35Sstevel * The capability message is constructed, now plug it 95303831d35Sstevel * into the starcat msgdatap: 95403831d35Sstevel */ 95503831d35Sstevel msgdatap->data = (plat_capability_data_t *)cap; 95603831d35Sstevel msgdatap->length = cap_size; 95703831d35Sstevel 95803831d35Sstevel /* 95903831d35Sstevel * Finished with initiator/response capability 96003831d35Sstevel * message set up. 96103831d35Sstevel * 96203831d35Sstevel * Note that after sending an "initiator" capability 96303831d35Sstevel * message, we can expect a subsequent "response" 96403831d35Sstevel * capability message from the SC, which we will 96503831d35Sstevel * pick up and minimally handle later, 96603831d35Sstevel * in scosmb_process_input(). 96703831d35Sstevel * 96803831d35Sstevel * If we're sending a "response" capability message 96903831d35Sstevel * to the SC, then we're done once the message is sent. 97003831d35Sstevel */ 97103831d35Sstevel 97203831d35Sstevel if (msgdatap->transid == 0) { 97303831d35Sstevel scosmb_nodename_event_pending = FALSE; 97403831d35Sstevel } 97503831d35Sstevel mutex_exit(&scosmb_mutex); 97603831d35Sstevel break; 97703831d35Sstevel 97803831d35Sstevel default: 97903831d35Sstevel break; 98003831d35Sstevel } 98103831d35Sstevel 98203831d35Sstevel /* 98303831d35Sstevel * Attempt to send the message. 98403831d35Sstevel */ 98503831d35Sstevel error = mboxsc_putmsg(DMSC_KEY, msgdatap->type, msgdatap->cmd, 98603831d35Sstevel &msgdatap->transid, msgdatap->length, msgdatap->data, 98703831d35Sstevel ecc_message_timeout_ms); 98803831d35Sstevel 98903831d35Sstevel /* 99003831d35Sstevel * Free any allocated memory that was passed in. 99103831d35Sstevel */ 99203831d35Sstevel if (free_data != NULL) { 99303831d35Sstevel kmem_free(free_data, free_data_len); 99403831d35Sstevel } 99503831d35Sstevel 99603831d35Sstevel if (cap != NULL) { 99703831d35Sstevel kmem_free(cap, cap_size); 99803831d35Sstevel } 99903831d35Sstevel 100003831d35Sstevel kmem_free(msgdatap, sizeof (scosmb_msgdata_t)); 100103831d35Sstevel 100203831d35Sstevel /* 100303831d35Sstevel * If EAGAIN or ETIMEDOUT was received, give up. The sender can try 100403831d35Sstevel * again if it was important. If any other non-zero error was 100503831d35Sstevel * encountered, the mailbox service is broken, and there's nothing more 100603831d35Sstevel * we can do. 100703831d35Sstevel */ 100803831d35Sstevel mutex_enter(&scosmb_mutex); 100903831d35Sstevel if ((error == EAGAIN) || (error == ETIMEDOUT)) { 101003831d35Sstevel if (msgdatap->log_error && !scosmb_mboxsc_timedout) { 101103831d35Sstevel /* 101203831d35Sstevel * Indictment mailbox messages use the return value to 101303831d35Sstevel * indicate a problem in the mailbox. For Error 101403831d35Sstevel * mailbox messages, we'll have to use a syslog message. 101503831d35Sstevel */ 101603831d35Sstevel scosmb_mboxsc_timedout = TRUE; 101703831d35Sstevel cmn_err(CE_NOTE, "!Solaris failed to send a message " 101803831d35Sstevel "(0x%x/0x%x) to the System Controller. Error: %d", 101903831d35Sstevel msgdatap->type, msgdatap->cmd, error); 102003831d35Sstevel } 102103831d35Sstevel } else if (error != 0) { 102203831d35Sstevel /* 102303831d35Sstevel * The mailbox service appears to be badly broken. If it was 102403831d35Sstevel * working previously, generate a warning and set a flag to 102503831d35Sstevel * avoid repeating the warning on subsequent failures. 102603831d35Sstevel */ 102703831d35Sstevel if (msgdatap->log_error && !scosmb_mboxsc_failed) { 102803831d35Sstevel scosmb_mboxsc_failed = TRUE; 102903831d35Sstevel cmn_err(CE_NOTE, "!An internal error (%d) occurred " 103003831d35Sstevel "while processing this message (0x%x/0x%x)", 103103831d35Sstevel error, msgdatap->type, msgdatap->cmd); 103203831d35Sstevel } 103303831d35Sstevel } else { 103403831d35Sstevel /* 103503831d35Sstevel * If the mailbox module failed previously, it appears to have 103603831d35Sstevel * recovered, so we'll want to generate a warning if it fails 103703831d35Sstevel * again. 103803831d35Sstevel */ 103903831d35Sstevel scosmb_mboxsc_failed = scosmb_mboxsc_timedout = FALSE; 104003831d35Sstevel } 104103831d35Sstevel mutex_exit(&scosmb_mutex); 104203831d35Sstevel return (error); 104303831d35Sstevel } 104403831d35Sstevel 104503831d35Sstevel /* 104603831d35Sstevel * scosmb_update_nodename() - nodename update routine 104703831d35Sstevel * 104803831d35Sstevel * this routine, which may be invoked from outside of the scosmb module, will 104903831d35Sstevel * cause the current nodename to be sent to the SC. The mailbox message sent to 105003831d35Sstevel * the SC will use the indicated transaction ID, and will either be a reply 105103831d35Sstevel * message if the ID is non-zero or an event message if it is 0. 105203831d35Sstevel * 105303831d35Sstevel * Capability messaging enhancements: 105403831d35Sstevel * Every time we move through this code flow, we put an "initiator 105503831d35Sstevel * capability message" on the message output taskq. This action will 105603831d35Sstevel * get a capability message sent to the SC from the OS during boot 105703831d35Sstevel * scenarios. A capability message exchange will also happen for 105803831d35Sstevel * SC reboot scenarios, as the SC will initiate a nodename update 105903831d35Sstevel * as a matter of course while coming back up. 106003831d35Sstevel * 106103831d35Sstevel * We'll also get an extraneous capability message sent 106203831d35Sstevel * to the SC from time to time, but that won't hurt anything. 106303831d35Sstevel */ 106403831d35Sstevel void 106503831d35Sstevel scosmb_update_nodename(uint64_t transid) 106603831d35Sstevel { 106703831d35Sstevel scosmb_msgdata_t *msgdatap, *cap_msgdatap; 106803831d35Sstevel 106903831d35Sstevel /* 107003831d35Sstevel * If we're generating an unsolicited nodename update (presumably having 107103831d35Sstevel * been called from platmod:plat_nodename_set()), there's no need to add 107203831d35Sstevel * a new job to the queue if there is already one on it that will be 107303831d35Sstevel * sending the latest nodename data. 107403831d35Sstevel */ 107503831d35Sstevel mutex_enter(&scosmb_mutex); 107603831d35Sstevel if (transid == 0) { 107703831d35Sstevel if (scosmb_nodename_event_pending) { 107803831d35Sstevel mutex_exit(&scosmb_mutex); 107903831d35Sstevel return; 108003831d35Sstevel } else { 108103831d35Sstevel scosmb_nodename_event_pending = TRUE; 108203831d35Sstevel } 108303831d35Sstevel } 108403831d35Sstevel mutex_exit(&scosmb_mutex); 108503831d35Sstevel 108603831d35Sstevel /* 108703831d35Sstevel * Allocate memory for the message data, initialize it, and place a new 108803831d35Sstevel * job on the scosmb_output_taskq for SCDM_GET_NODENAME. 108903831d35Sstevel */ 109003831d35Sstevel msgdatap = (scosmb_msgdata_t *)kmem_zalloc(sizeof (scosmb_msgdata_t), 109103831d35Sstevel KM_SLEEP); 109203831d35Sstevel 109303831d35Sstevel msgdatap->type = (transid == 0) ? MBOXSC_MSG_EVENT : MBOXSC_MSG_REPLY; 109403831d35Sstevel msgdatap->cmd = SCDM_GET_NODENAME; 109503831d35Sstevel msgdatap->transid = transid; 109603831d35Sstevel msgdatap->log_error = 1; 109703831d35Sstevel 109803831d35Sstevel (void) taskq_dispatch(scosmb_output_taskq, 109903831d35Sstevel (task_func_t *)scosmb_process_output, msgdatap, KM_SLEEP); 110003831d35Sstevel 110103831d35Sstevel /* 110203831d35Sstevel * Next, allocate memory, initialize, and place a new job on the 110303831d35Sstevel * scosmb_output_taskq for SCDM_LOG_ECC_CAP_INIT. That's a 110403831d35Sstevel * capability message, where we're the initiator. 110503831d35Sstevel */ 110603831d35Sstevel cap_msgdatap = kmem_zalloc(sizeof (scosmb_msgdata_t), KM_SLEEP); 110703831d35Sstevel 110803831d35Sstevel cap_msgdatap->type = (transid == 0) ? 110903831d35Sstevel MBOXSC_MSG_EVENT : MBOXSC_MSG_REPLY; 111003831d35Sstevel cap_msgdatap->cmd = SCDM_LOG_ECC_CAP_INIT; 111103831d35Sstevel cap_msgdatap->transid = transid; 111203831d35Sstevel cap_msgdatap->log_error = 1; 111303831d35Sstevel 111403831d35Sstevel (void) taskq_dispatch(scosmb_output_taskq, 111503831d35Sstevel (task_func_t *)scosmb_process_output, cap_msgdatap, KM_SLEEP); 111603831d35Sstevel } 111703831d35Sstevel 111803831d35Sstevel /* 111903831d35Sstevel * scosmb_log_ecc_error() - Record ECC error information to SC 112003831d35Sstevel * For ECC error messages, send the messages through a taskq mechanism 112103831d35Sstevel * to prevent impaired system performance during ECC floods. Indictment 112203831d35Sstevel * messages have already passed through a taskq, so directly call the 112303831d35Sstevel * output function. 112403831d35Sstevel */ 112503831d35Sstevel int 112603831d35Sstevel scosmb_log_ecc_error(plat_ecc_message_type_t msg_type, void *datap) 112703831d35Sstevel { 112803831d35Sstevel scosmb_msgdata_t *msg_header_ptr; 112903831d35Sstevel uint32_t msg_cmd, msg_length; 113003831d35Sstevel int sleep_flag, log_error; 113103831d35Sstevel int do_queue; /* Set to 1 if taskq needed */ 113203831d35Sstevel 113303831d35Sstevel /* 113403831d35Sstevel * Set header type and length for message 113503831d35Sstevel */ 113603831d35Sstevel switch (msg_type) { 113703831d35Sstevel case PLAT_ECC_ERROR_MESSAGE: 113803831d35Sstevel /* 113903831d35Sstevel * We do not want to sleep in an error logging thread. So, 114003831d35Sstevel * we set the NOSLEEP flag and go through a taskq before we 114103831d35Sstevel * send the message. 114203831d35Sstevel */ 114303831d35Sstevel msg_cmd = SCDM_LOG_ECC_ERROR; 114403831d35Sstevel msg_length = sizeof (plat_ecc_error_data_t); 114503831d35Sstevel sleep_flag = KM_NOSLEEP; 114603831d35Sstevel log_error = 1; 114703831d35Sstevel do_queue = 1; 114803831d35Sstevel break; 114903831d35Sstevel case PLAT_ECC_ERROR2_MESSAGE: 115003831d35Sstevel msg_cmd = SCDM_LOG_ECC; 115103831d35Sstevel msg_length = sizeof (plat_ecc_error2_data_t); 115203831d35Sstevel sleep_flag = KM_NOSLEEP; 115303831d35Sstevel log_error = 1; 115403831d35Sstevel do_queue = 1; 115503831d35Sstevel break; 115603831d35Sstevel case PLAT_ECC_INDICTMENT_MESSAGE: 115703831d35Sstevel /* 115803831d35Sstevel * For indictment messages, we're allowed to sleep, and we 115903831d35Sstevel * can directly call the output function, since we've already 116003831d35Sstevel * gone through a taskq 116103831d35Sstevel */ 116203831d35Sstevel msg_cmd = SCDM_LOG_ECC_INDICTMENT; 116303831d35Sstevel msg_length = sizeof (plat_ecc_indictment_data_t); 116403831d35Sstevel sleep_flag = KM_SLEEP; 116503831d35Sstevel log_error = 0; 116603831d35Sstevel do_queue = 0; 116703831d35Sstevel break; 116803831d35Sstevel case PLAT_ECC_INDICTMENT2_MESSAGE: 116903831d35Sstevel /* 117003831d35Sstevel * For indictment2 messages, we're allowed to sleep, and we 117103831d35Sstevel * can directly call the output function, since we've already 117203831d35Sstevel * gone through a taskq 117303831d35Sstevel */ 117403831d35Sstevel msg_cmd = SCDM_LOG_ECC; 117503831d35Sstevel msg_length = sizeof (plat_ecc_indictment2_data_t); 117603831d35Sstevel sleep_flag = KM_SLEEP; 117703831d35Sstevel log_error = 0; 117803831d35Sstevel do_queue = 0; 117903831d35Sstevel break; 118003831d35Sstevel 118103831d35Sstevel case PLAT_ECC_DIMM_SID_MESSAGE: 118203831d35Sstevel /* 118303831d35Sstevel * For DIMM sid request messages, we're allowed to sleep, and we 118403831d35Sstevel * can directly call the output function, since we've already 118503831d35Sstevel * gone through a taskq 118603831d35Sstevel */ 118703831d35Sstevel msg_cmd = SCDM_DIMM_SERIAL_ID; 118803831d35Sstevel msg_length = sizeof (plat_dimm_sid_request_data_t); 118903831d35Sstevel sleep_flag = KM_SLEEP; 119003831d35Sstevel log_error = 0; 119103831d35Sstevel do_queue = 0; 119203831d35Sstevel break; 119303831d35Sstevel 119403831d35Sstevel default: 119503831d35Sstevel return (EINVAL); 119603831d35Sstevel } 119703831d35Sstevel 119803831d35Sstevel /* 119903831d35Sstevel * Allocate memory for the mailbox message header. 120003831d35Sstevel */ 120103831d35Sstevel msg_header_ptr = 120203831d35Sstevel (scosmb_msgdata_t *)kmem_zalloc(sizeof (scosmb_msgdata_t), 120303831d35Sstevel sleep_flag); 120403831d35Sstevel 120503831d35Sstevel if (msg_header_ptr == NULL) { 120603831d35Sstevel #ifdef DEBUG 120703831d35Sstevel cmn_err(CE_WARN, "failed to allocate space for scosmb " 120803831d35Sstevel "message header."); 120903831d35Sstevel #endif /* DEBUG */ 121003831d35Sstevel return (ENOMEM); 121103831d35Sstevel } 121203831d35Sstevel 121303831d35Sstevel msg_header_ptr->type = MBOXSC_MSG_EVENT; 121403831d35Sstevel msg_header_ptr->cmd = msg_cmd; 121503831d35Sstevel msg_header_ptr->transid = 0; 121603831d35Sstevel msg_header_ptr->log_error = log_error; 121703831d35Sstevel 121803831d35Sstevel /* 121903831d35Sstevel * Allocate memory for the mailbox message payload. 122003831d35Sstevel */ 122103831d35Sstevel msg_header_ptr->length = msg_length; 122203831d35Sstevel msg_header_ptr->data = kmem_zalloc((size_t)msg_length, sleep_flag); 122303831d35Sstevel 122403831d35Sstevel if (msg_header_ptr->data == NULL) { 122503831d35Sstevel #ifdef DEBUG 122603831d35Sstevel cmn_err(CE_WARN, "failed to allocate space for scosmb " 122703831d35Sstevel "message data."); 122803831d35Sstevel #endif /* DEBUG */ 122903831d35Sstevel kmem_free(msg_header_ptr, sizeof (scosmb_msgdata_t)); 123003831d35Sstevel return (ENOMEM); 123103831d35Sstevel } 123203831d35Sstevel 123303831d35Sstevel bcopy(datap, msg_header_ptr->data, (size_t)msg_length); 123403831d35Sstevel 123503831d35Sstevel /* 123603831d35Sstevel * Based on our earlier look at the message type, we either go through 123703831d35Sstevel * a taskq or directly call the output function. 123803831d35Sstevel */ 123903831d35Sstevel if (do_queue != 0) { 124003831d35Sstevel /* 124103831d35Sstevel * Place a new job on the scosmb_output_taskq. 124203831d35Sstevel */ 124303831d35Sstevel if (taskq_dispatch(scosmb_output_taskq, 124403831d35Sstevel (task_func_t *)scosmb_process_output, 124503831d35Sstevel (void *)msg_header_ptr, TQ_NOSLEEP) == 0) { 124603831d35Sstevel #ifdef DEBUG 124703831d35Sstevel cmn_err(CE_WARN, "failed to dispatch a task to send " 124803831d35Sstevel "ECC mailbox message."); 124903831d35Sstevel #endif /* DEBUG */ 125003831d35Sstevel kmem_free(msg_header_ptr->data, msg_header_ptr->length); 125103831d35Sstevel kmem_free(msg_header_ptr, sizeof (scosmb_msgdata_t)); 125203831d35Sstevel return (ENOMEM); 125303831d35Sstevel } 125403831d35Sstevel return (0); 125503831d35Sstevel } else { 125603831d35Sstevel return (scosmb_process_output(msg_header_ptr)); 125703831d35Sstevel } 125803831d35Sstevel } 1259