103831d35Sstevel /*
203831d35Sstevel * CDDL HEADER START
303831d35Sstevel *
403831d35Sstevel * The contents of this file are subject to the terms of the
503831d35Sstevel * Common Development and Distribution License (the "License").
603831d35Sstevel * You may not use this file except in compliance with the License.
703831d35Sstevel *
803831d35Sstevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
903831d35Sstevel * or http://www.opensolaris.org/os/licensing.
1003831d35Sstevel * See the License for the specific language governing permissions
1103831d35Sstevel * and limitations under the License.
1203831d35Sstevel *
1303831d35Sstevel * When distributing Covered Code, include this CDDL HEADER in each
1403831d35Sstevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1503831d35Sstevel * If applicable, add the following below this CDDL HEADER, with the
1603831d35Sstevel * fields enclosed by brackets "[]" replaced with your own identifying
1703831d35Sstevel * information: Portions Copyright [yyyy] [name of copyright owner]
1803831d35Sstevel *
1903831d35Sstevel * CDDL HEADER END
2003831d35Sstevel */
2103831d35Sstevel
2203831d35Sstevel /*
23*07d06da5SSurya Prakki * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
2403831d35Sstevel * Use is subject to license terms.
2503831d35Sstevel */
2603831d35Sstevel
2703831d35Sstevel /*
2803831d35Sstevel * This file contains the Starcat Solaris Mailbox Client module. This module
2903831d35Sstevel * handles mailbox messages from the SC to the OS (as opposed to messages sent
3003831d35Sstevel * to specific drivers) and vice versa. Two task queues are created upon
3103831d35Sstevel * startup; one handles reading and processing of all incoming messages, while
3203831d35Sstevel * the other handles transmission of all outgoing messages.
3303831d35Sstevel */
3403831d35Sstevel
3503831d35Sstevel #include <sys/types.h>
3603831d35Sstevel #include <sys/param.h>
3703831d35Sstevel #include <sys/systm.h>
3803831d35Sstevel #include <sys/sysmacros.h>
3903831d35Sstevel #include <sys/sunddi.h>
4003831d35Sstevel #include <sys/errno.h>
4103831d35Sstevel #include <sys/cmn_err.h>
4203831d35Sstevel #include <sys/condvar.h>
4303831d35Sstevel #include <sys/mutex.h>
4403831d35Sstevel #include <sys/disp.h>
4503831d35Sstevel #include <sys/thread.h>
4603831d35Sstevel #include <sys/debug.h>
4703831d35Sstevel #include <sys/cpu_sgnblk_defs.h>
4803831d35Sstevel #include <sys/machsystm.h>
4903831d35Sstevel #include <sys/modctl.h>
5003831d35Sstevel #include <sys/iosramio.h>
5103831d35Sstevel #include <sys/mboxsc.h>
5203831d35Sstevel #include <sys/promif.h>
5303831d35Sstevel #include <sys/uadmin.h>
5403831d35Sstevel #include <sys/cred.h>
5503831d35Sstevel #include <sys/taskq.h>
5603831d35Sstevel #include <sys/utsname.h>
5703831d35Sstevel #include <sys/plat_ecc_unum.h>
5803831d35Sstevel #include <sys/fm/protocol.h>
5903831d35Sstevel #include <sys/fm/util.h>
6003831d35Sstevel #include <sys/starcat.h>
6103831d35Sstevel #include <sys/plat_ecc_dimm.h>
6203831d35Sstevel #include <sys/plat_datapath.h>
6303831d35Sstevel
6403831d35Sstevel /* mailbox keys */
6503831d35Sstevel #define SCDM_KEY 0x5343444d /* 'S', 'C', 'D', 'M' */
6603831d35Sstevel #define DMSC_KEY 0x444d5343 /* 'D', 'M', 'S', 'C' */
6703831d35Sstevel
6803831d35Sstevel /* mailbox commands */
6903831d35Sstevel #define SCDM_CMD ('S' << 8) /* generic SSP */
7003831d35Sstevel #define SCDM_CMD_SUCCESS (SCDM_CMD | 0x1)
7103831d35Sstevel #define SCDM_GOTO_OBP (SCDM_CMD | 0x2)
7203831d35Sstevel #define SCDM_GOTO_PANIC (SCDM_CMD | 0x3)
7303831d35Sstevel #define SCDM_ENVIRON (SCDM_CMD | 0x4) /* environmental intr */
7403831d35Sstevel #define SCDM_SHUTDOWN (SCDM_CMD | 0x5) /* setkeyswitch STANDBY */
7503831d35Sstevel #define SCDM_GET_NODENAME (SCDM_CMD | 0x6) /* get domain nodename */
7603831d35Sstevel #define SCDM_LOG_ECC_ERROR (SCDM_CMD | 0x7) /* ECC error logging */
7703831d35Sstevel #define SCDM_LOG_ECC_INDICTMENT (SCDM_CMD | 0x8) /* ECC indictment logging */
7803831d35Sstevel #define SCDM_LOG_ECC (SCDM_CMD | 0x9) /* ECC info */
7903831d35Sstevel #define SCDM_LOG_ECC_CAP_INIT (SCDM_CMD | 0xa) /* ECC Capability Init */
8003831d35Sstevel #define SCDM_LOG_ECC_CAP_RESP (SCDM_CMD | 0xb) /* ECC Capability Response */
8103831d35Sstevel #define SCDM_DIMM_SERIAL_ID (SCDM_CMD | 0xc) /* DIMM ser# req/resp */
8203831d35Sstevel #define SCDM_DP_ERROR_MSG (SCDM_CMD | 0xd) /* datapath error */
8303831d35Sstevel #define SCDM_DP_FAULT_MSG (SCDM_CMD | 0xe) /* datapath fault */
8403831d35Sstevel
8503831d35Sstevel /* general constants */
8603831d35Sstevel #define GETMSG_TIMEOUT_MS 500
8703831d35Sstevel #define PUTMSG_TIMEOUT_MS 6000
8803831d35Sstevel #define MIN_INPUTQ_TASKS 2
8903831d35Sstevel #define MAX_INPUTQ_TASKS 4
9003831d35Sstevel #define MIN_OUTPUTQ_TASKS 2
9103831d35Sstevel #define MAX_OUTPUTQ_TASKS 512
9203831d35Sstevel #ifndef TRUE
9303831d35Sstevel #define TRUE 1
9403831d35Sstevel #endif
9503831d35Sstevel #ifndef FALSE
9603831d35Sstevel #define FALSE 0
9703831d35Sstevel #endif
9803831d35Sstevel
9903831d35Sstevel clock_t ecc_message_timeout_ms = PUTMSG_TIMEOUT_MS;
10003831d35Sstevel
10103831d35Sstevel /*
10203831d35Sstevel * When a message needs to be sent to the SC, an scosmb_msgdata_t should be
10303831d35Sstevel * populated with the data to be used for the message, and a call to
10403831d35Sstevel * scosmb_process_output should be dispatched on the scosmb_output_taskq, with
10503831d35Sstevel * the address of the scosmb_msgdata_t structure as its arg. The "length" and
10603831d35Sstevel * "data" fields can be used if the message needs to include data beyond the
10703831d35Sstevel * header fields (type, cmd, and transid) and that information must be recorded
10803831d35Sstevel * when the message is placed on the taskq. If appropriate for the message type
10903831d35Sstevel * (e.g. nodename info that should always be the most recent available), the
11003831d35Sstevel * "data" field can be set to NULL and the additional data can be assembled
11103831d35Sstevel * immediately prior to sending the message in scosmb_process_output().
11203831d35Sstevel *
11303831d35Sstevel * If log_error is set, any errors in delivering the message cause a
11403831d35Sstevel * cmn_err() message to be issued. If it is zero, the error is expressed
11503831d35Sstevel * only through return values.
11603831d35Sstevel */
11703831d35Sstevel typedef struct {
11803831d35Sstevel uint32_t type;
11903831d35Sstevel uint32_t cmd;
12003831d35Sstevel uint64_t transid;
12103831d35Sstevel uint32_t length;
12203831d35Sstevel int log_error;
12303831d35Sstevel void *data;
12403831d35Sstevel } scosmb_msgdata_t;
12503831d35Sstevel
12603831d35Sstevel /*
12703831d35Sstevel * Datapath error and fault messages arrive unsolicited. The message data
12803831d35Sstevel * is contained in a plat_datapath_info_t structure.
12903831d35Sstevel */
13003831d35Sstevel typedef struct {
13103831d35Sstevel uint8_t type; /* CDS, DX, EX, CP */
13203831d35Sstevel uint8_t pad; /* for alignment */
13303831d35Sstevel uint16_t cpuid; /* Safari ID of base CPU */
13403831d35Sstevel uint32_t t_value; /* SERD timeout threshold (seconds) */
13503831d35Sstevel } plat_datapath_info_t;
13603831d35Sstevel
13703831d35Sstevel /* externally visible routines */
13803831d35Sstevel void scosmb_update_nodename(uint64_t transid);
13903831d35Sstevel
14003831d35Sstevel /* local routines */
14103831d35Sstevel static void scosmb_inbox_handler();
14203831d35Sstevel static void scosmb_process_input(void *unused);
14303831d35Sstevel static int scosmb_process_output(scosmb_msgdata_t *arg);
14403831d35Sstevel
14503831d35Sstevel /* local variables */
14603831d35Sstevel static uint8_t scosmb_mboxsc_failed = FALSE;
14703831d35Sstevel static uint8_t scosmb_mboxsc_timedout = FALSE;
14803831d35Sstevel static uint8_t scosmb_nodename_event_pending = FALSE;
14903831d35Sstevel static char scosmb_hdr[] = "SCOSMB:";
15003831d35Sstevel static kmutex_t scosmb_mutex;
15103831d35Sstevel static taskq_t *scosmb_input_taskq = NULL;
15203831d35Sstevel static taskq_t *scosmb_output_taskq = NULL;
15303831d35Sstevel
15403831d35Sstevel static char *dperrtype[] = {
15503831d35Sstevel DP_ERROR_CDS,
15603831d35Sstevel DP_ERROR_DX,
15703831d35Sstevel DP_ERROR_EX,
15803831d35Sstevel DP_ERROR_CP
15903831d35Sstevel };
16003831d35Sstevel
16103831d35Sstevel /*
16203831d35Sstevel * Structures from modctl.h used for loadable module support.
16303831d35Sstevel * SCOSMB is a "miscellaneous" module.
16403831d35Sstevel */
16503831d35Sstevel extern struct mod_ops mod_miscops;
16603831d35Sstevel
16703831d35Sstevel static struct modlmisc modlmisc = {
16803831d35Sstevel &mod_miscops,
16903831d35Sstevel "Sun Fire 15000 OS Mbox Client v1.10",
17003831d35Sstevel };
17103831d35Sstevel
17203831d35Sstevel static struct modlinkage modlinkage = {
17303831d35Sstevel MODREV_1,
17403831d35Sstevel (void *)&modlmisc,
17503831d35Sstevel NULL
17603831d35Sstevel };
17703831d35Sstevel
17803831d35Sstevel
17903831d35Sstevel /*
18003831d35Sstevel * _init
18103831d35Sstevel *
18203831d35Sstevel * Loadable module support routine. Initializes mutex and condition variables
18303831d35Sstevel * and starts thread.
18403831d35Sstevel */
18503831d35Sstevel int
_init(void)18603831d35Sstevel _init(void)
18703831d35Sstevel {
18803831d35Sstevel int error;
18903831d35Sstevel
19003831d35Sstevel /*
19103831d35Sstevel * Initialize the mailboxes
19203831d35Sstevel */
19303831d35Sstevel if ((error = mboxsc_init(SCDM_KEY, MBOXSC_MBOX_IN,
19403831d35Sstevel scosmb_inbox_handler)) != 0) {
19503831d35Sstevel cmn_err(CE_WARN, "%s mboxsc_init failed (0x%x)\n", scosmb_hdr,
19603831d35Sstevel error);
19703831d35Sstevel return (error);
19803831d35Sstevel }
19903831d35Sstevel
20003831d35Sstevel if ((error = mboxsc_init(DMSC_KEY, MBOXSC_MBOX_OUT, NULL)) != 0) {
20103831d35Sstevel cmn_err(CE_WARN, "%s mboxsc_init failed (0x%x)\n", scosmb_hdr,
20203831d35Sstevel error);
203*07d06da5SSurya Prakki (void) mboxsc_fini(SCDM_KEY);
20403831d35Sstevel return (error);
20503831d35Sstevel }
20603831d35Sstevel
20703831d35Sstevel /*
20803831d35Sstevel * Initialize the global lock
20903831d35Sstevel */
21003831d35Sstevel mutex_init(&scosmb_mutex, NULL, MUTEX_DEFAULT, NULL);
21103831d35Sstevel
21203831d35Sstevel /*
21303831d35Sstevel * Create the task queues used for processing input and output messages
21403831d35Sstevel */
21503831d35Sstevel scosmb_input_taskq = taskq_create("scosmb_input_taskq", 1,
21603831d35Sstevel minclsyspri, MIN_INPUTQ_TASKS, MAX_INPUTQ_TASKS, TASKQ_PREPOPULATE);
21703831d35Sstevel scosmb_output_taskq = taskq_create("scosmb_output_taskq", 1,
21803831d35Sstevel minclsyspri, MIN_OUTPUTQ_TASKS, MAX_OUTPUTQ_TASKS,
21903831d35Sstevel TASKQ_PREPOPULATE);
22003831d35Sstevel
22103831d35Sstevel /*
22203831d35Sstevel * Attempt to install the module. If unsuccessful, uninitialize
22303831d35Sstevel * everything.
22403831d35Sstevel */
22503831d35Sstevel error = mod_install(&modlinkage);
22603831d35Sstevel if (error != 0) {
22703831d35Sstevel taskq_destroy(scosmb_output_taskq);
22803831d35Sstevel taskq_destroy(scosmb_input_taskq);
22903831d35Sstevel mutex_destroy(&scosmb_mutex);
230*07d06da5SSurya Prakki (void) mboxsc_fini(DMSC_KEY);
231*07d06da5SSurya Prakki (void) mboxsc_fini(SCDM_KEY);
23203831d35Sstevel }
23303831d35Sstevel
23403831d35Sstevel return (error);
23503831d35Sstevel }
23603831d35Sstevel
23703831d35Sstevel /*
23803831d35Sstevel * _fini
23903831d35Sstevel *
24003831d35Sstevel * Loadable module support routine. Since this routine shouldn't be unloaded (it
24103831d35Sstevel * provides a critical service, and its symbols may be referenced externally),
24203831d35Sstevel * EBUSY is returned to prevent unloading.
24303831d35Sstevel */
24403831d35Sstevel int
_fini(void)24503831d35Sstevel _fini(void)
24603831d35Sstevel {
24703831d35Sstevel return (EBUSY);
24803831d35Sstevel }
24903831d35Sstevel
25003831d35Sstevel /*
25103831d35Sstevel * _info
25203831d35Sstevel *
25303831d35Sstevel * Loadable module support routine.
25403831d35Sstevel */
25503831d35Sstevel int
_info(struct modinfo * modinfop)25603831d35Sstevel _info(struct modinfo *modinfop)
25703831d35Sstevel {
25803831d35Sstevel int error = 0;
25903831d35Sstevel
26003831d35Sstevel error = mod_info(&modlinkage, modinfop);
26103831d35Sstevel return (error);
26203831d35Sstevel }
26303831d35Sstevel
26403831d35Sstevel /*
26503831d35Sstevel * scosmb_inbox_handler() - mbox API event handler.
26603831d35Sstevel *
26703831d35Sstevel * This routine adds an entry to the scosmb_input_taskq that will cause the
26803831d35Sstevel * scosmb_process_input() routine to be called to service the SCDM mailbox. The
26903831d35Sstevel * possibility that taskq_dispatch may fail when given KM_NOSLEEP is safely
27003831d35Sstevel * ignored because there can only be one message waiting in the mailbox at any
27103831d35Sstevel * given time, so the current message will end up being handled by one of the
27203831d35Sstevel * previously queued jobs (and a previous message presumably timed out before we
27303831d35Sstevel * got around to reading it).
27403831d35Sstevel */
27503831d35Sstevel static void
scosmb_inbox_handler()27603831d35Sstevel scosmb_inbox_handler()
27703831d35Sstevel {
27803831d35Sstevel (void) taskq_dispatch(scosmb_input_taskq, scosmb_process_input, NULL,
27903831d35Sstevel KM_NOSLEEP);
28003831d35Sstevel }
28103831d35Sstevel
28203831d35Sstevel /*
28303831d35Sstevel * dp_get_cores()
28403831d35Sstevel *
28503831d35Sstevel * Checks cpu implementation for the input cpuid and returns
28603831d35Sstevel * the number of cores.
28703831d35Sstevel * If implementation cannot be determined, returns 1
28803831d35Sstevel */
28903831d35Sstevel static int
dp_get_cores(uint16_t cpuid)29003831d35Sstevel dp_get_cores(uint16_t cpuid)
29103831d35Sstevel {
29203831d35Sstevel int exp, ii, impl = 0, nc, slot;
29303831d35Sstevel
29403831d35Sstevel exp = STARCAT_CPUID_TO_EXPANDER(cpuid);
29503831d35Sstevel slot = STARCAT_CPUID_TO_BOARDSLOT(cpuid);
29603831d35Sstevel if (slot == 1)
29703831d35Sstevel nc = STARCAT_SLOT1_CPU_MAX;
29803831d35Sstevel else
29903831d35Sstevel nc = plat_max_cpu_units_per_board();
30003831d35Sstevel
30103831d35Sstevel /* find first with valid implementation */
30203831d35Sstevel for (ii = 0; ii < nc; ii++)
30303831d35Sstevel if (cpu[MAKE_CPUID(exp, slot, ii)]) {
30403831d35Sstevel impl = cpunodes[MAKE_CPUID(exp, slot, ii)].
30503831d35Sstevel implementation;
30603831d35Sstevel break;
30703831d35Sstevel }
30803831d35Sstevel
30903831d35Sstevel if (IS_JAGUAR(impl) || IS_PANTHER(impl))
31003831d35Sstevel return (2);
31103831d35Sstevel else
31203831d35Sstevel return (1);
31303831d35Sstevel
31403831d35Sstevel }
31503831d35Sstevel
31603831d35Sstevel /*
31703831d35Sstevel * dp_payload_add_cpus()
31803831d35Sstevel *
31903831d35Sstevel * From datapath mailbox message, determines the number of and safari IDs
32003831d35Sstevel * for affected cpus, then adds this info to the datapath ereport.
32103831d35Sstevel *
32203831d35Sstevel * Input maxcat (if set) is a count of maxcat cpus actually present - it is
32303831d35Sstevel * a count of cpuids, which takes into account multi-core architecture.
32403831d35Sstevel */
32503831d35Sstevel static int
dp_payload_add_cpus(plat_datapath_info_t * dpmsg,nvlist_t * erp,int maxcat)32603831d35Sstevel dp_payload_add_cpus(plat_datapath_info_t *dpmsg, nvlist_t *erp, int maxcat)
32703831d35Sstevel {
32803831d35Sstevel int jj = 0, numcpus = 0, nummaxcpus = 0;
32903831d35Sstevel int count, exp, ii, num, ncores, ret, slot, port;
33003831d35Sstevel uint16_t *dparray, cpuid;
33103831d35Sstevel uint64_t *snarray;
33203831d35Sstevel
33303831d35Sstevel /* check for multiple core architectures */
33403831d35Sstevel ncores = dp_get_cores(dpmsg->cpuid);
33503831d35Sstevel
33603831d35Sstevel /*
33703831d35Sstevel * Determine the number of cpu cores impacted
33803831d35Sstevel */
33903831d35Sstevel switch (dpmsg->type) {
34003831d35Sstevel case DP_CDS_TYPE:
34103831d35Sstevel if (maxcat)
34203831d35Sstevel nummaxcpus = ncores;
34303831d35Sstevel else
34403831d35Sstevel numcpus = ncores;
34503831d35Sstevel break;
34603831d35Sstevel
34703831d35Sstevel case DP_DX_TYPE:
34803831d35Sstevel if (maxcat)
34903831d35Sstevel nummaxcpus = 2 * ncores;
35003831d35Sstevel else
35103831d35Sstevel numcpus = 2 * ncores;
35203831d35Sstevel break;
35303831d35Sstevel
35403831d35Sstevel case DP_EX_TYPE:
35503831d35Sstevel if (maxcat)
35603831d35Sstevel nummaxcpus = STARCAT_SLOT1_CPU_MAX;
35703831d35Sstevel else
35803831d35Sstevel numcpus = plat_max_cpu_units_per_board();
35903831d35Sstevel break;
36003831d35Sstevel
36103831d35Sstevel case DP_CP_TYPE:
36203831d35Sstevel /*
36303831d35Sstevel * SC-DE supplies the base cpuid affected, if
36403831d35Sstevel * maxcat id was given, there's no slot 0 board
36503831d35Sstevel * present.
36603831d35Sstevel */
36703831d35Sstevel
36803831d35Sstevel if (!maxcat) {
36903831d35Sstevel /* Slot 0 id was given - set numcpus */
37003831d35Sstevel numcpus = plat_max_cpu_units_per_board();
37103831d35Sstevel }
37203831d35Sstevel
37303831d35Sstevel /* there may/may not be maxcats. set a count anyway */
37403831d35Sstevel nummaxcpus = STARCAT_SLOT1_CPU_MAX;
37503831d35Sstevel
37603831d35Sstevel break;
37703831d35Sstevel
37803831d35Sstevel default:
37903831d35Sstevel ASSERT(0);
38003831d35Sstevel return (-1);
38103831d35Sstevel }
38203831d35Sstevel
38303831d35Sstevel /* Allocate space for cores */
38403831d35Sstevel num = numcpus + nummaxcpus;
38503831d35Sstevel dparray = kmem_zalloc(num * sizeof (uint16_t *), KM_SLEEP);
38603831d35Sstevel
38703831d35Sstevel /*
38803831d35Sstevel * populate dparray with impacted cores (only those present)
38903831d35Sstevel */
39003831d35Sstevel exp = STARCAT_CPUID_TO_EXPANDER(dpmsg->cpuid);
39103831d35Sstevel slot = STARCAT_CPUID_TO_BOARDSLOT(dpmsg->cpuid);
39203831d35Sstevel port = STARCAT_CPUID_TO_LPORT(dpmsg->cpuid);
39303831d35Sstevel
39403831d35Sstevel mutex_enter(&cpu_lock);
39503831d35Sstevel
39603831d35Sstevel switch (dpmsg->type) {
39703831d35Sstevel case DP_CDS_TYPE:
39803831d35Sstevel /*
39903831d35Sstevel * For a CDS error, it's the reporting cpuid
40003831d35Sstevel * and it's other core (if present)
40103831d35Sstevel */
40203831d35Sstevel cpuid = dpmsg->cpuid & 0xFFFB; /* core 0 */
40303831d35Sstevel if (cpu[cpuid])
40403831d35Sstevel dparray[jj++] = cpuid;
40503831d35Sstevel
40603831d35Sstevel cpuid = dpmsg->cpuid | 0x4; /* core 1 */
40703831d35Sstevel if (cpu[cpuid])
40803831d35Sstevel dparray[jj++] = cpuid;
40903831d35Sstevel break;
41003831d35Sstevel
41103831d35Sstevel case DP_DX_TYPE:
41203831d35Sstevel /*
41303831d35Sstevel * For a DX error, it's the reporting cpuid (all
41403831d35Sstevel * cores), and the other CPU sharing the same
41503831d35Sstevel * DX<-->DCDS interface (all cores)
41603831d35Sstevel */
41703831d35Sstevel
41803831d35Sstevel /* reporting cpuid */
41903831d35Sstevel cpuid = dpmsg->cpuid & 0xFFFB; /* core 0 */
42003831d35Sstevel
42103831d35Sstevel if (cpu[cpuid])
42203831d35Sstevel dparray[jj++] = cpuid;
42303831d35Sstevel
42403831d35Sstevel cpuid = dpmsg->cpuid | 0x4; /* core 1 */
42503831d35Sstevel if (cpu[cpuid])
42603831d35Sstevel dparray[jj++] = cpuid;
42703831d35Sstevel
42803831d35Sstevel /* find partner cpuid */
42903831d35Sstevel if (port == 0 || port == 2)
43003831d35Sstevel cpuid = dpmsg->cpuid | 0x1;
43103831d35Sstevel else
43203831d35Sstevel cpuid = dpmsg->cpuid & 0xFFFE;
43303831d35Sstevel
43403831d35Sstevel /* add partner cpuid */
43503831d35Sstevel cpuid &= 0xFFFB; /* core 0 */
43603831d35Sstevel if (cpu[cpuid])
43703831d35Sstevel dparray[jj++] = cpuid;
43803831d35Sstevel
43903831d35Sstevel cpuid |= 0x4; /* core 1 */
44003831d35Sstevel if (cpu[cpuid])
44103831d35Sstevel dparray[jj++] = cpuid;
44203831d35Sstevel break;
44303831d35Sstevel
44403831d35Sstevel case DP_EX_TYPE:
44503831d35Sstevel /*
44603831d35Sstevel * For an EX error, it is all cpuids (all cores)
44703831d35Sstevel * on the reporting board
44803831d35Sstevel */
44903831d35Sstevel
45003831d35Sstevel if (slot == 1) /* maxcat */
45103831d35Sstevel count = nummaxcpus;
45203831d35Sstevel else
45303831d35Sstevel count = numcpus;
45403831d35Sstevel
45503831d35Sstevel for (ii = 0; ii < count; ii++) {
45603831d35Sstevel cpuid = MAKE_CPUID(exp, slot, ii);
45703831d35Sstevel if (cpu[cpuid])
45803831d35Sstevel dparray[jj++] = cpuid;
45903831d35Sstevel }
46003831d35Sstevel break;
46103831d35Sstevel
46203831d35Sstevel case DP_CP_TYPE:
46303831d35Sstevel /*
46403831d35Sstevel * For a CP error, it is all cpuids (all cores)
46503831d35Sstevel * on both boards (SB & IO) in the boardset
46603831d35Sstevel */
46703831d35Sstevel
46803831d35Sstevel /* Do slot 0 */
46903831d35Sstevel for (ii = 0; ii < numcpus; ii++) {
47003831d35Sstevel cpuid = MAKE_CPUID(exp, 0, ii);
47103831d35Sstevel if (cpu[cpuid])
47203831d35Sstevel dparray[jj++] = cpuid;
47303831d35Sstevel }
47403831d35Sstevel
47503831d35Sstevel /* Do slot 1 */
47603831d35Sstevel for (ii = 0; ii < nummaxcpus; ii++) {
47703831d35Sstevel cpuid = MAKE_CPUID(exp, 1, ii);
47803831d35Sstevel if (cpu[cpuid])
47903831d35Sstevel dparray[jj++] = cpuid;
48003831d35Sstevel }
48103831d35Sstevel break;
48203831d35Sstevel }
48303831d35Sstevel
48403831d35Sstevel mutex_exit(&cpu_lock);
48503831d35Sstevel
48603831d35Sstevel /*
48703831d35Sstevel * The datapath message could not be associated with any
48803831d35Sstevel * configured CPU.
48903831d35Sstevel */
49003831d35Sstevel if (!jj) {
49103831d35Sstevel kmem_free(dparray, num * sizeof (uint16_t *));
49203831d35Sstevel ret = nvlist_add_uint32(erp, DP_LIST_SIZE, jj);
49303831d35Sstevel ASSERT(ret == 0);
49403831d35Sstevel return (-1);
49503831d35Sstevel }
49603831d35Sstevel
49703831d35Sstevel snarray = kmem_zalloc(jj * sizeof (uint64_t *), KM_SLEEP);
49803831d35Sstevel for (ii = 0; ii < jj; ii++)
49903831d35Sstevel snarray[ii] = cpunodes[dparray[ii]].device_id;
50003831d35Sstevel
50103831d35Sstevel ret = nvlist_add_uint32(erp, DP_LIST_SIZE, jj);
50203831d35Sstevel ret |= nvlist_add_uint16_array(erp, DP_LIST, dparray, jj);
50303831d35Sstevel ret |= nvlist_add_uint64_array(erp, SN_LIST, snarray, jj);
50403831d35Sstevel ASSERT(ret == 0);
50503831d35Sstevel
50603831d35Sstevel kmem_free(dparray, num * sizeof (uint16_t *));
50703831d35Sstevel kmem_free(snarray, jj * sizeof (uint64_t *));
50803831d35Sstevel
50903831d35Sstevel return (0);
51003831d35Sstevel }
51103831d35Sstevel
51203831d35Sstevel /*
51303831d35Sstevel * dp_trans_event() - datapath message handler.
51403831d35Sstevel *
51503831d35Sstevel * Process datapath error and fault messages received from the SC. Checks
51603831d35Sstevel * for, and disregards, messages associated with I/O boards. Otherwise,
51703831d35Sstevel * extracts message info to produce a datapath ereport.
51803831d35Sstevel */
51903831d35Sstevel static void
dp_trans_event(plat_datapath_info_t * dpmsg,int msgtype)52003831d35Sstevel dp_trans_event(plat_datapath_info_t *dpmsg, int msgtype)
52103831d35Sstevel {
52203831d35Sstevel nvlist_t *erp, *detector, *hcelem;
52303831d35Sstevel char buf[FM_MAX_CLASS];
52403831d35Sstevel int exp, slot, i, maxcat = 0;
52503831d35Sstevel
52603831d35Sstevel /* check for I/O board message */
52703831d35Sstevel exp = STARCAT_CPUID_TO_EXPANDER(dpmsg->cpuid);
52803831d35Sstevel slot = STARCAT_CPUID_TO_BOARDSLOT(dpmsg->cpuid);
52903831d35Sstevel
53003831d35Sstevel if (slot) {
53103831d35Sstevel mutex_enter(&cpu_lock);
53203831d35Sstevel for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) {
53303831d35Sstevel if (cpu[MAKE_CPUID(exp, slot, i)]) {
53403831d35Sstevel /* maxcat cpu present */
53503831d35Sstevel maxcat++;
53603831d35Sstevel }
53703831d35Sstevel }
53803831d35Sstevel mutex_exit(&cpu_lock);
53903831d35Sstevel
54003831d35Sstevel /*
54103831d35Sstevel * Ignore I/O board msg
54203831d35Sstevel */
54303831d35Sstevel if (maxcat == 0)
54403831d35Sstevel return;
54503831d35Sstevel }
54603831d35Sstevel
54703831d35Sstevel /* allocate space for ereport */
54803831d35Sstevel erp = fm_nvlist_create(NULL);
54903831d35Sstevel
55003831d35Sstevel /*
55103831d35Sstevel *
55203831d35Sstevel * Member Name Data Type Comments
55303831d35Sstevel * ----------- --------- -----------
55403831d35Sstevel * version uint8 0
55503831d35Sstevel * class string "asic"
55603831d35Sstevel * ENA uint64 ENA Format 1
55703831d35Sstevel * detector fmri aggregated ID data for SC-DE
55803831d35Sstevel *
55903831d35Sstevel * Datapath ereport subclasses and data payloads:
56003831d35Sstevel * There will be two types of ereports (error and fault) which will be
56103831d35Sstevel * identified by the "type" member.
56203831d35Sstevel *
56303831d35Sstevel * ereport.asic.starcat.cds.cds-dp
56403831d35Sstevel * ereport.asic.starcat.dx.dx-dp
56503831d35Sstevel * ereport.asic.starcat.sdi.sdi-dp
56603831d35Sstevel * ereport.asic.starcat.cp.cp-dp
56703831d35Sstevel *
56803831d35Sstevel * Member Name Data Type Comments
56903831d35Sstevel * ----------- --------- -----------
57003831d35Sstevel * erptype uint16 derived from message type: error or
57103831d35Sstevel * fault
57203831d35Sstevel * t-value uint32 SC's datapath SERD timeout threshold
57303831d35Sstevel * dp-list-sz uint8 number of dp-list array elements
57403831d35Sstevel * dp-list array of uint16 Safari IDs of affected cpus
57503831d35Sstevel * sn-list array of uint64 Serial numbers of affected cpus
57603831d35Sstevel *
57703831d35Sstevel */
57803831d35Sstevel
57903831d35Sstevel /* compose common ereport elements */
58003831d35Sstevel detector = fm_nvlist_create(NULL);
58103831d35Sstevel
58203831d35Sstevel /*
58303831d35Sstevel * Create legacy FMRI for the detector
58403831d35Sstevel */
58503831d35Sstevel switch (dpmsg->type) {
58603831d35Sstevel case DP_CDS_TYPE:
58703831d35Sstevel case DP_DX_TYPE:
58803831d35Sstevel if (slot == 1)
58903831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "IO%d", exp);
59003831d35Sstevel else
59103831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "SB%d", exp);
59203831d35Sstevel break;
59303831d35Sstevel
59403831d35Sstevel case DP_EX_TYPE:
59503831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "EX%d", exp);
59603831d35Sstevel break;
59703831d35Sstevel
59803831d35Sstevel case DP_CP_TYPE:
59903831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "CP");
60003831d35Sstevel break;
60103831d35Sstevel
60203831d35Sstevel default:
60303831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "UNKNOWN");
60403831d35Sstevel break;
60503831d35Sstevel }
60603831d35Sstevel
60703831d35Sstevel hcelem = fm_nvlist_create(NULL);
60803831d35Sstevel
60903831d35Sstevel (void) nvlist_add_string(hcelem, FM_FMRI_HC_NAME, FM_FMRI_LEGACY_HC);
61003831d35Sstevel (void) nvlist_add_string(hcelem, FM_FMRI_HC_ID, buf);
61103831d35Sstevel
61203831d35Sstevel (void) nvlist_add_uint8(detector, FM_VERSION, FM_HC_SCHEME_VERSION);
61303831d35Sstevel (void) nvlist_add_string(detector, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
61403831d35Sstevel (void) nvlist_add_string(detector, FM_FMRI_HC_ROOT, "");
61503831d35Sstevel (void) nvlist_add_uint32(detector, FM_FMRI_HC_LIST_SZ, 1);
61603831d35Sstevel (void) nvlist_add_nvlist_array(detector, FM_FMRI_HC_LIST, &hcelem, 1);
61703831d35Sstevel
61803831d35Sstevel /* build ereport class name */
61903831d35Sstevel (void) snprintf(buf, FM_MAX_CLASS, "asic.starcat.%s.%s-%s",
62003831d35Sstevel dperrtype[dpmsg->type], dperrtype[dpmsg->type],
62103831d35Sstevel FM_ERROR_DATAPATH);
62203831d35Sstevel
62303831d35Sstevel fm_ereport_set(erp, FM_EREPORT_VERSION, buf,
62403831d35Sstevel fm_ena_generate(0, FM_ENA_FMT1), detector, NULL);
62503831d35Sstevel
62603831d35Sstevel /* add payload elements */
62703831d35Sstevel if (msgtype == SCDM_DP_ERROR_MSG) {
62803831d35Sstevel fm_payload_set(erp,
62903831d35Sstevel DP_EREPORT_TYPE, DATA_TYPE_UINT16, DP_ERROR, NULL);
63003831d35Sstevel } else {
63103831d35Sstevel fm_payload_set(erp,
63203831d35Sstevel DP_EREPORT_TYPE, DATA_TYPE_UINT16, DP_FAULT, NULL);
63303831d35Sstevel }
63403831d35Sstevel
63503831d35Sstevel fm_payload_set(erp, DP_TVALUE, DATA_TYPE_UINT32, dpmsg->t_value, NULL);
63603831d35Sstevel
63703831d35Sstevel if (dp_payload_add_cpus(dpmsg, erp, maxcat) == 0) {
63803831d35Sstevel /* post ereport */
63903831d35Sstevel fm_ereport_post(erp, EVCH_SLEEP);
64003831d35Sstevel }
64103831d35Sstevel
64203831d35Sstevel /* free ereport memory */
64303831d35Sstevel fm_nvlist_destroy(erp, FM_NVA_FREE);
64403831d35Sstevel fm_nvlist_destroy(detector, FM_NVA_FREE);
64503831d35Sstevel
64603831d35Sstevel }
64703831d35Sstevel
64803831d35Sstevel /*
64903831d35Sstevel * scosmb_process_input() - incoming message processing routine
65003831d35Sstevel *
65103831d35Sstevel * this routine attempts to read a message from the SCDM mailbox and, if
65203831d35Sstevel * successful, processes the command. if an unrecoverable error is encountered,
65303831d35Sstevel * the scosmb_task thread will be terminated.
65403831d35Sstevel */
65503831d35Sstevel /* ARGSUSED0 */
65603831d35Sstevel static void
scosmb_process_input(void * unused)65703831d35Sstevel scosmb_process_input(void *unused)
65803831d35Sstevel {
65903831d35Sstevel int error;
66003831d35Sstevel scosmb_msgdata_t msg;
66103831d35Sstevel proc_t *initpp;
66203831d35Sstevel plat_capability_data_t *cap; /* capability msg contents ptr */
66303831d35Sstevel int cap_size;
66403831d35Sstevel int cap_ver_len;
66503831d35Sstevel scosmb_msgdata_t *cap_msgdatap; /* capability msg response */
66603831d35Sstevel int max_size;
66703831d35Sstevel
66803831d35Sstevel /*
66903831d35Sstevel * Attempt to read a message from the SCDM mailbox.
67003831d35Sstevel *
67103831d35Sstevel * Setup a local buffer to read incoming messages from the SC.
67203831d35Sstevel */
67303831d35Sstevel cap_ver_len = strlen(utsname.release) + strlen(utsname.version) + 2;
67403831d35Sstevel cap_size = sizeof (plat_capability_data_t) + cap_ver_len;
67503831d35Sstevel max_size = MAX(cap_size, sizeof (plat_dimm_sid_board_data_t));
67603831d35Sstevel
67703831d35Sstevel msg.type = 0;
67803831d35Sstevel msg.cmd = 0;
67903831d35Sstevel msg.transid = 0;
68003831d35Sstevel msg.length = max_size;
68103831d35Sstevel msg.log_error = 0;
68203831d35Sstevel msg.data = kmem_zalloc(max_size, KM_SLEEP);
68303831d35Sstevel
68403831d35Sstevel error = mboxsc_getmsg(SCDM_KEY, &msg.type, &msg.cmd, &msg.transid,
68503831d35Sstevel &msg.length, msg.data, GETMSG_TIMEOUT_MS);
68603831d35Sstevel
68703831d35Sstevel /*
68803831d35Sstevel * If EAGAIN or ETIMEDOUT was received, give up. The SC can just try
68903831d35Sstevel * again if it was important. If any other non-zero error was
69003831d35Sstevel * encountered, the mailbox service is broken, and there's nothing more
69103831d35Sstevel * we can do.
69203831d35Sstevel */
69303831d35Sstevel mutex_enter(&scosmb_mutex);
69403831d35Sstevel if ((error == EAGAIN) || (error == ETIMEDOUT)) {
69503831d35Sstevel mutex_exit(&scosmb_mutex);
69603831d35Sstevel return;
69703831d35Sstevel } else if (error != 0) {
69803831d35Sstevel /*
69903831d35Sstevel * The mailbox service appears to be badly broken. If it was
70003831d35Sstevel * working previously, generate a warning and set a flag to
70103831d35Sstevel * avoid repeating the warning on subsequent failures.
70203831d35Sstevel */
70303831d35Sstevel if (!scosmb_mboxsc_failed) {
70403831d35Sstevel scosmb_mboxsc_failed = TRUE;
70503831d35Sstevel cmn_err(CE_WARN, "%s mboxsc error (0x%x)\n", scosmb_hdr,
70603831d35Sstevel error);
70703831d35Sstevel }
70803831d35Sstevel mutex_exit(&scosmb_mutex);
70903831d35Sstevel return;
71003831d35Sstevel } else {
71103831d35Sstevel /*
71203831d35Sstevel * If the mailbox module failed previously, it appears to have
71303831d35Sstevel * recovered, so we'll want to generate a warning if it fails
71403831d35Sstevel * again.
71503831d35Sstevel */
71603831d35Sstevel scosmb_mboxsc_failed = FALSE;
71703831d35Sstevel }
71803831d35Sstevel mutex_exit(&scosmb_mutex);
71903831d35Sstevel
72003831d35Sstevel /*
72103831d35Sstevel * A message was successfully received, so go ahead and process it.
72203831d35Sstevel */
72303831d35Sstevel switch (msg.cmd) {
72403831d35Sstevel
72503831d35Sstevel case SCDM_GOTO_OBP: /* jump to OBP */
72603831d35Sstevel debug_enter("SC requested jump to OBP");
72703831d35Sstevel break;
72803831d35Sstevel
72903831d35Sstevel case SCDM_GOTO_PANIC: /* Panic the domain */
73003831d35Sstevel cmn_err(CE_PANIC, "%s SC requested PANIC\n", scosmb_hdr);
73103831d35Sstevel break;
73203831d35Sstevel
73303831d35Sstevel case SCDM_SHUTDOWN: /* graceful shutdown */
73403831d35Sstevel cmn_err(CE_WARN, "%s SC requested a shutdown ", scosmb_hdr);
73503831d35Sstevel (void) kadmin(A_SHUTDOWN, AD_HALT, NULL, kcred);
73603831d35Sstevel /*
73703831d35Sstevel * In the event kadmin does not bring down the
73803831d35Sstevel * domain, environmental shutdown is forced
73903831d35Sstevel */
74003831d35Sstevel /*FALLTHROUGH*/
74103831d35Sstevel case SCDM_ENVIRON: /* environmental shutdown */
74203831d35Sstevel /*
74303831d35Sstevel * Send SIGPWR to init(1) it will run rc0,
74403831d35Sstevel * which will uadmin to power down.
74503831d35Sstevel */
74603831d35Sstevel mutex_enter(&pidlock);
74703831d35Sstevel initpp = prfind(P_INITPID);
74803831d35Sstevel mutex_exit(&pidlock);
74903831d35Sstevel
75003831d35Sstevel
75103831d35Sstevel /*
75203831d35Sstevel * If we're still booting and init(1) isn't set up yet,
75303831d35Sstevel * simply halt.
75403831d35Sstevel */
75503831d35Sstevel if (initpp == NULL) {
75603831d35Sstevel extern void halt(char *);
75703831d35Sstevel cmn_err(CE_WARN, "%s Environmental Interrupt",
75803831d35Sstevel scosmb_hdr);
75903831d35Sstevel power_down((char *)NULL);
76003831d35Sstevel halt("Power off the System!\n");
76103831d35Sstevel }
76203831d35Sstevel
76303831d35Sstevel /*
76403831d35Sstevel * else, graceful shutdown with inittab and all
76503831d35Sstevel * getting involved
76603831d35Sstevel */
76703831d35Sstevel psignal(initpp, SIGPWR);
76803831d35Sstevel break;
76903831d35Sstevel
77003831d35Sstevel case SCDM_GET_NODENAME:
77103831d35Sstevel scosmb_update_nodename(msg.transid);
77203831d35Sstevel break;
77303831d35Sstevel
77403831d35Sstevel case SCDM_LOG_ECC_CAP_RESP:
77503831d35Sstevel /*
77603831d35Sstevel * The SC has responded to our initiator capability message
77703831d35Sstevel * issued during the boot flow via scosmb_update_nodename().
77803831d35Sstevel *
77903831d35Sstevel * Parse the incoming data, and appropriately set SC
78003831d35Sstevel * capabilities...
78103831d35Sstevel */
78203831d35Sstevel cap = (plat_capability_data_t *)msg.data;
78303831d35Sstevel plat_ecc_capability_sc_set(cap->capd_capability);
78403831d35Sstevel break;
78503831d35Sstevel
78603831d35Sstevel case SCDM_LOG_ECC_CAP_INIT:
78703831d35Sstevel /*
78803831d35Sstevel * The SC has initiated a capability messaging exchange with
78903831d35Sstevel * the OS.
79003831d35Sstevel *
79103831d35Sstevel * We start out just as we do for an SC response capability
79203831d35Sstevel * message, a parse of incoming data to appropriately set SC
79303831d35Sstevel * described capabilities...
79403831d35Sstevel */
79503831d35Sstevel cap = (plat_capability_data_t *)msg.data;
79603831d35Sstevel plat_ecc_capability_sc_set(cap->capd_capability);
79703831d35Sstevel /*
79803831d35Sstevel * The next step is setting up our Response to the SC.
79903831d35Sstevel *
80003831d35Sstevel * Allocate memory for message data, initialize appropriately,
80103831d35Sstevel * and place a new job on the scosmb_output_taskq for
80203831d35Sstevel * SCDM_LOG_ECC_CAP_RESP, our OS capability messaging response
80303831d35Sstevel * to the SC initiated sequence detected here.
80403831d35Sstevel */
80503831d35Sstevel cap_msgdatap = kmem_zalloc(sizeof (scosmb_msgdata_t), KM_SLEEP);
80603831d35Sstevel cap_msgdatap->type = MBOXSC_MSG_EVENT;
80703831d35Sstevel cap_msgdatap->cmd = SCDM_LOG_ECC_CAP_RESP;
80803831d35Sstevel cap_msgdatap->transid = 0;
80903831d35Sstevel (void) taskq_dispatch(scosmb_output_taskq,
81003831d35Sstevel (task_func_t *)scosmb_process_output, cap_msgdatap,
81103831d35Sstevel KM_SLEEP);
81203831d35Sstevel break;
81303831d35Sstevel
81403831d35Sstevel case SCDM_DP_ERROR_MSG:
81503831d35Sstevel case SCDM_DP_FAULT_MSG:
81603831d35Sstevel dp_trans_event(msg.data, msg.cmd);
81703831d35Sstevel break;
81803831d35Sstevel
81903831d35Sstevel case SCDM_DIMM_SERIAL_ID:
82003831d35Sstevel (void) plat_store_mem_sids(msg.data);
82103831d35Sstevel break;
82203831d35Sstevel
82303831d35Sstevel default:
82403831d35Sstevel cmn_err(CE_WARN, "%s invalid command (0x%x)\n", scosmb_hdr,
82503831d35Sstevel msg.cmd);
82603831d35Sstevel break;
82703831d35Sstevel }
82803831d35Sstevel
82903831d35Sstevel /*
83003831d35Sstevel * Free up buffer for incoming messasge data that we allocated earlier
83103831d35Sstevel */
83203831d35Sstevel kmem_free(msg.data, max_size);
83303831d35Sstevel }
83403831d35Sstevel
83503831d35Sstevel /*
83603831d35Sstevel * scosmb_process_output() - outgoing message processing routine
83703831d35Sstevel *
83803831d35Sstevel * This routine handles jobs that are queued on the scosmb_output_taskq, or
83903831d35Sstevel * sent directly from scosmb_log_ecc_error. Each job corresponds to a single
84003831d35Sstevel * mailbox message that needs to be sent to the SC via the DMSC mailbox. Some
84103831d35Sstevel * processing of the message may be performed before it is sent to the SC,
84203831d35Sstevel * depending on the value of the command field.
84303831d35Sstevel */
84403831d35Sstevel static int
scosmb_process_output(scosmb_msgdata_t * msgdatap)84503831d35Sstevel scosmb_process_output(scosmb_msgdata_t *msgdatap)
84603831d35Sstevel {
84703831d35Sstevel int error;
84803831d35Sstevel int length;
84903831d35Sstevel char nodename[_SYS_NMLN];
85003831d35Sstevel void *free_data;
85103831d35Sstevel int free_data_len;
85203831d35Sstevel int cap_size;
85303831d35Sstevel int cap_ver_len;
85403831d35Sstevel plat_capability_data_t *cap = NULL;
85503831d35Sstevel
85603831d35Sstevel /*
85703831d35Sstevel * This shouldn't ever happen, but it can't hurt to check anyway.
85803831d35Sstevel */
85903831d35Sstevel if (msgdatap == NULL) {
86003831d35Sstevel return (EINVAL);
86103831d35Sstevel }
86203831d35Sstevel
86303831d35Sstevel /*
86403831d35Sstevel * If data was passed in, we'll need to free it before returning.
86503831d35Sstevel */
86603831d35Sstevel free_data = msgdatap->data;
86703831d35Sstevel free_data_len = msgdatap->length;
86803831d35Sstevel
86903831d35Sstevel /*
87003831d35Sstevel * Some commands may need additional processing prior to transmission.
87103831d35Sstevel */
87203831d35Sstevel switch (msgdatap->cmd) {
87303831d35Sstevel /*
87403831d35Sstevel * Since the SC is only interested in the most recent value of
87503831d35Sstevel * utsname.nodename, we wait until now to collect that data. We
87603831d35Sstevel * also use a global flag to prevent multiple event-type
87703831d35Sstevel * nodename messages from being queued at the same time for the
87803831d35Sstevel * same reason.
87903831d35Sstevel */
88003831d35Sstevel case SCDM_GET_NODENAME:
88103831d35Sstevel mutex_enter(&scosmb_mutex);
88203831d35Sstevel length = strlen(utsname.nodename);
88303831d35Sstevel ASSERT(length < _SYS_NMLN);
88403831d35Sstevel if (length == 0) {
88503831d35Sstevel msgdatap->length = 0;
88603831d35Sstevel msgdatap->data = NULL;
88703831d35Sstevel } else {
88803831d35Sstevel bcopy(utsname.nodename, nodename, length);
88903831d35Sstevel nodename[length++] = '\0';
89003831d35Sstevel msgdatap->data = nodename;
89103831d35Sstevel msgdatap->length = length;
89203831d35Sstevel }
89303831d35Sstevel if (msgdatap->transid == 0) {
89403831d35Sstevel scosmb_nodename_event_pending = FALSE;
89503831d35Sstevel }
89603831d35Sstevel mutex_exit(&scosmb_mutex);
89703831d35Sstevel break;
89803831d35Sstevel
89903831d35Sstevel /*
90003831d35Sstevel * SCDM_LOG_ECC_CAP_INIT
90103831d35Sstevel * Initiator Capability message from OS to SC
90203831d35Sstevel *
90303831d35Sstevel * We construct and send an initiator capability message
90403831d35Sstevel * every time we go through scosmb_update_nodename(), which
90503831d35Sstevel * works out to getting an "initiator" capability message
90603831d35Sstevel * sent from the OS to the SC during the OS boot flow.
90703831d35Sstevel *
90803831d35Sstevel * The SC also issues a request to scosmb_update_nodename()
90903831d35Sstevel * during an SC reboot. Which results in an additional
91003831d35Sstevel * capability message exchange during SC reboot scenarios.
91103831d35Sstevel *
91203831d35Sstevel * SCDM_LOG_ECC_CAP_RESP
91303831d35Sstevel * Response Capability message from SC to OS
91403831d35Sstevel *
91503831d35Sstevel * In certain scenarios, the SC could initiate a capability
91603831d35Sstevel * messaging exchange with the OS. Processing starts in
91703831d35Sstevel * scosmb_process_input(), where we detect an incoming
91803831d35Sstevel * initiator capability message from the SC. We finish
91903831d35Sstevel * processing here, by sending a response capability message
92003831d35Sstevel * back to the SC that reflects OS capabilities.
92103831d35Sstevel */
92203831d35Sstevel case SCDM_LOG_ECC_CAP_INIT:
92303831d35Sstevel /*FALLTHROUGH*/
92403831d35Sstevel case SCDM_LOG_ECC_CAP_RESP:
92503831d35Sstevel mutex_enter(&scosmb_mutex);
92603831d35Sstevel
92703831d35Sstevel cap_ver_len = strlen(utsname.release) +
92803831d35Sstevel strlen(utsname.version) + 2;
92903831d35Sstevel
93003831d35Sstevel cap_size = sizeof (plat_capability_data_t) +
93103831d35Sstevel cap_ver_len;
93203831d35Sstevel
93303831d35Sstevel cap = kmem_zalloc(cap_size, KM_SLEEP);
93403831d35Sstevel
93503831d35Sstevel cap->capd_major_version = PLAT_ECC_CAP_VERSION_MAJOR;
93603831d35Sstevel cap->capd_minor_version = PLAT_ECC_CAP_VERSION_MINOR;
93703831d35Sstevel cap->capd_msg_type = PLAT_ECC_CAPABILITY_MESSAGE;
93803831d35Sstevel cap->capd_msg_length = cap_size;
93903831d35Sstevel
94003831d35Sstevel cap->capd_capability =
94103831d35Sstevel PLAT_ECC_CAPABILITY_DOMAIN_DEFAULT;
94203831d35Sstevel
94303831d35Sstevel /*
94403831d35Sstevel * Build the capability solaris_version string:
94503831d35Sstevel * utsname.release + " " + utsname.version
94603831d35Sstevel */
94703831d35Sstevel (void) snprintf(cap->capd_solaris_version,
94803831d35Sstevel cap_ver_len, "%s %s", utsname.release,
94903831d35Sstevel utsname.version);
95003831d35Sstevel
95103831d35Sstevel /*
95203831d35Sstevel * The capability message is constructed, now plug it
95303831d35Sstevel * into the starcat msgdatap:
95403831d35Sstevel */
95503831d35Sstevel msgdatap->data = (plat_capability_data_t *)cap;
95603831d35Sstevel msgdatap->length = cap_size;
95703831d35Sstevel
95803831d35Sstevel /*
95903831d35Sstevel * Finished with initiator/response capability
96003831d35Sstevel * message set up.
96103831d35Sstevel *
96203831d35Sstevel * Note that after sending an "initiator" capability
96303831d35Sstevel * message, we can expect a subsequent "response"
96403831d35Sstevel * capability message from the SC, which we will
96503831d35Sstevel * pick up and minimally handle later,
96603831d35Sstevel * in scosmb_process_input().
96703831d35Sstevel *
96803831d35Sstevel * If we're sending a "response" capability message
96903831d35Sstevel * to the SC, then we're done once the message is sent.
97003831d35Sstevel */
97103831d35Sstevel
97203831d35Sstevel if (msgdatap->transid == 0) {
97303831d35Sstevel scosmb_nodename_event_pending = FALSE;
97403831d35Sstevel }
97503831d35Sstevel mutex_exit(&scosmb_mutex);
97603831d35Sstevel break;
97703831d35Sstevel
97803831d35Sstevel default:
97903831d35Sstevel break;
98003831d35Sstevel }
98103831d35Sstevel
98203831d35Sstevel /*
98303831d35Sstevel * Attempt to send the message.
98403831d35Sstevel */
98503831d35Sstevel error = mboxsc_putmsg(DMSC_KEY, msgdatap->type, msgdatap->cmd,
98603831d35Sstevel &msgdatap->transid, msgdatap->length, msgdatap->data,
98703831d35Sstevel ecc_message_timeout_ms);
98803831d35Sstevel
98903831d35Sstevel /*
99003831d35Sstevel * Free any allocated memory that was passed in.
99103831d35Sstevel */
99203831d35Sstevel if (free_data != NULL) {
99303831d35Sstevel kmem_free(free_data, free_data_len);
99403831d35Sstevel }
99503831d35Sstevel
99603831d35Sstevel if (cap != NULL) {
99703831d35Sstevel kmem_free(cap, cap_size);
99803831d35Sstevel }
99903831d35Sstevel
100003831d35Sstevel kmem_free(msgdatap, sizeof (scosmb_msgdata_t));
100103831d35Sstevel
100203831d35Sstevel /*
100303831d35Sstevel * If EAGAIN or ETIMEDOUT was received, give up. The sender can try
100403831d35Sstevel * again if it was important. If any other non-zero error was
100503831d35Sstevel * encountered, the mailbox service is broken, and there's nothing more
100603831d35Sstevel * we can do.
100703831d35Sstevel */
100803831d35Sstevel mutex_enter(&scosmb_mutex);
100903831d35Sstevel if ((error == EAGAIN) || (error == ETIMEDOUT)) {
101003831d35Sstevel if (msgdatap->log_error && !scosmb_mboxsc_timedout) {
101103831d35Sstevel /*
101203831d35Sstevel * Indictment mailbox messages use the return value to
101303831d35Sstevel * indicate a problem in the mailbox. For Error
101403831d35Sstevel * mailbox messages, we'll have to use a syslog message.
101503831d35Sstevel */
101603831d35Sstevel scosmb_mboxsc_timedout = TRUE;
101703831d35Sstevel cmn_err(CE_NOTE, "!Solaris failed to send a message "
101803831d35Sstevel "(0x%x/0x%x) to the System Controller. Error: %d",
101903831d35Sstevel msgdatap->type, msgdatap->cmd, error);
102003831d35Sstevel }
102103831d35Sstevel } else if (error != 0) {
102203831d35Sstevel /*
102303831d35Sstevel * The mailbox service appears to be badly broken. If it was
102403831d35Sstevel * working previously, generate a warning and set a flag to
102503831d35Sstevel * avoid repeating the warning on subsequent failures.
102603831d35Sstevel */
102703831d35Sstevel if (msgdatap->log_error && !scosmb_mboxsc_failed) {
102803831d35Sstevel scosmb_mboxsc_failed = TRUE;
102903831d35Sstevel cmn_err(CE_NOTE, "!An internal error (%d) occurred "
103003831d35Sstevel "while processing this message (0x%x/0x%x)",
103103831d35Sstevel error, msgdatap->type, msgdatap->cmd);
103203831d35Sstevel }
103303831d35Sstevel } else {
103403831d35Sstevel /*
103503831d35Sstevel * If the mailbox module failed previously, it appears to have
103603831d35Sstevel * recovered, so we'll want to generate a warning if it fails
103703831d35Sstevel * again.
103803831d35Sstevel */
103903831d35Sstevel scosmb_mboxsc_failed = scosmb_mboxsc_timedout = FALSE;
104003831d35Sstevel }
104103831d35Sstevel mutex_exit(&scosmb_mutex);
104203831d35Sstevel return (error);
104303831d35Sstevel }
104403831d35Sstevel
104503831d35Sstevel /*
104603831d35Sstevel * scosmb_update_nodename() - nodename update routine
104703831d35Sstevel *
104803831d35Sstevel * this routine, which may be invoked from outside of the scosmb module, will
104903831d35Sstevel * cause the current nodename to be sent to the SC. The mailbox message sent to
105003831d35Sstevel * the SC will use the indicated transaction ID, and will either be a reply
105103831d35Sstevel * message if the ID is non-zero or an event message if it is 0.
105203831d35Sstevel *
105303831d35Sstevel * Capability messaging enhancements:
105403831d35Sstevel * Every time we move through this code flow, we put an "initiator
105503831d35Sstevel * capability message" on the message output taskq. This action will
105603831d35Sstevel * get a capability message sent to the SC from the OS during boot
105703831d35Sstevel * scenarios. A capability message exchange will also happen for
105803831d35Sstevel * SC reboot scenarios, as the SC will initiate a nodename update
105903831d35Sstevel * as a matter of course while coming back up.
106003831d35Sstevel *
106103831d35Sstevel * We'll also get an extraneous capability message sent
106203831d35Sstevel * to the SC from time to time, but that won't hurt anything.
106303831d35Sstevel */
106403831d35Sstevel void
scosmb_update_nodename(uint64_t transid)106503831d35Sstevel scosmb_update_nodename(uint64_t transid)
106603831d35Sstevel {
106703831d35Sstevel scosmb_msgdata_t *msgdatap, *cap_msgdatap;
106803831d35Sstevel
106903831d35Sstevel /*
107003831d35Sstevel * If we're generating an unsolicited nodename update (presumably having
107103831d35Sstevel * been called from platmod:plat_nodename_set()), there's no need to add
107203831d35Sstevel * a new job to the queue if there is already one on it that will be
107303831d35Sstevel * sending the latest nodename data.
107403831d35Sstevel */
107503831d35Sstevel mutex_enter(&scosmb_mutex);
107603831d35Sstevel if (transid == 0) {
107703831d35Sstevel if (scosmb_nodename_event_pending) {
107803831d35Sstevel mutex_exit(&scosmb_mutex);
107903831d35Sstevel return;
108003831d35Sstevel } else {
108103831d35Sstevel scosmb_nodename_event_pending = TRUE;
108203831d35Sstevel }
108303831d35Sstevel }
108403831d35Sstevel mutex_exit(&scosmb_mutex);
108503831d35Sstevel
108603831d35Sstevel /*
108703831d35Sstevel * Allocate memory for the message data, initialize it, and place a new
108803831d35Sstevel * job on the scosmb_output_taskq for SCDM_GET_NODENAME.
108903831d35Sstevel */
109003831d35Sstevel msgdatap = (scosmb_msgdata_t *)kmem_zalloc(sizeof (scosmb_msgdata_t),
109103831d35Sstevel KM_SLEEP);
109203831d35Sstevel
109303831d35Sstevel msgdatap->type = (transid == 0) ? MBOXSC_MSG_EVENT : MBOXSC_MSG_REPLY;
109403831d35Sstevel msgdatap->cmd = SCDM_GET_NODENAME;
109503831d35Sstevel msgdatap->transid = transid;
109603831d35Sstevel msgdatap->log_error = 1;
109703831d35Sstevel
109803831d35Sstevel (void) taskq_dispatch(scosmb_output_taskq,
109903831d35Sstevel (task_func_t *)scosmb_process_output, msgdatap, KM_SLEEP);
110003831d35Sstevel
110103831d35Sstevel /*
110203831d35Sstevel * Next, allocate memory, initialize, and place a new job on the
110303831d35Sstevel * scosmb_output_taskq for SCDM_LOG_ECC_CAP_INIT. That's a
110403831d35Sstevel * capability message, where we're the initiator.
110503831d35Sstevel */
110603831d35Sstevel cap_msgdatap = kmem_zalloc(sizeof (scosmb_msgdata_t), KM_SLEEP);
110703831d35Sstevel
110803831d35Sstevel cap_msgdatap->type = (transid == 0) ?
110903831d35Sstevel MBOXSC_MSG_EVENT : MBOXSC_MSG_REPLY;
111003831d35Sstevel cap_msgdatap->cmd = SCDM_LOG_ECC_CAP_INIT;
111103831d35Sstevel cap_msgdatap->transid = transid;
111203831d35Sstevel cap_msgdatap->log_error = 1;
111303831d35Sstevel
111403831d35Sstevel (void) taskq_dispatch(scosmb_output_taskq,
111503831d35Sstevel (task_func_t *)scosmb_process_output, cap_msgdatap, KM_SLEEP);
111603831d35Sstevel }
111703831d35Sstevel
111803831d35Sstevel /*
111903831d35Sstevel * scosmb_log_ecc_error() - Record ECC error information to SC
112003831d35Sstevel * For ECC error messages, send the messages through a taskq mechanism
112103831d35Sstevel * to prevent impaired system performance during ECC floods. Indictment
112203831d35Sstevel * messages have already passed through a taskq, so directly call the
112303831d35Sstevel * output function.
112403831d35Sstevel */
112503831d35Sstevel int
scosmb_log_ecc_error(plat_ecc_message_type_t msg_type,void * datap)112603831d35Sstevel scosmb_log_ecc_error(plat_ecc_message_type_t msg_type, void *datap)
112703831d35Sstevel {
112803831d35Sstevel scosmb_msgdata_t *msg_header_ptr;
112903831d35Sstevel uint32_t msg_cmd, msg_length;
113003831d35Sstevel int sleep_flag, log_error;
113103831d35Sstevel int do_queue; /* Set to 1 if taskq needed */
113203831d35Sstevel
113303831d35Sstevel /*
113403831d35Sstevel * Set header type and length for message
113503831d35Sstevel */
113603831d35Sstevel switch (msg_type) {
113703831d35Sstevel case PLAT_ECC_ERROR_MESSAGE:
113803831d35Sstevel /*
113903831d35Sstevel * We do not want to sleep in an error logging thread. So,
114003831d35Sstevel * we set the NOSLEEP flag and go through a taskq before we
114103831d35Sstevel * send the message.
114203831d35Sstevel */
114303831d35Sstevel msg_cmd = SCDM_LOG_ECC_ERROR;
114403831d35Sstevel msg_length = sizeof (plat_ecc_error_data_t);
114503831d35Sstevel sleep_flag = KM_NOSLEEP;
114603831d35Sstevel log_error = 1;
114703831d35Sstevel do_queue = 1;
114803831d35Sstevel break;
114903831d35Sstevel case PLAT_ECC_ERROR2_MESSAGE:
115003831d35Sstevel msg_cmd = SCDM_LOG_ECC;
115103831d35Sstevel msg_length = sizeof (plat_ecc_error2_data_t);
115203831d35Sstevel sleep_flag = KM_NOSLEEP;
115303831d35Sstevel log_error = 1;
115403831d35Sstevel do_queue = 1;
115503831d35Sstevel break;
115603831d35Sstevel case PLAT_ECC_INDICTMENT_MESSAGE:
115703831d35Sstevel /*
115803831d35Sstevel * For indictment messages, we're allowed to sleep, and we
115903831d35Sstevel * can directly call the output function, since we've already
116003831d35Sstevel * gone through a taskq
116103831d35Sstevel */
116203831d35Sstevel msg_cmd = SCDM_LOG_ECC_INDICTMENT;
116303831d35Sstevel msg_length = sizeof (plat_ecc_indictment_data_t);
116403831d35Sstevel sleep_flag = KM_SLEEP;
116503831d35Sstevel log_error = 0;
116603831d35Sstevel do_queue = 0;
116703831d35Sstevel break;
116803831d35Sstevel case PLAT_ECC_INDICTMENT2_MESSAGE:
116903831d35Sstevel /*
117003831d35Sstevel * For indictment2 messages, we're allowed to sleep, and we
117103831d35Sstevel * can directly call the output function, since we've already
117203831d35Sstevel * gone through a taskq
117303831d35Sstevel */
117403831d35Sstevel msg_cmd = SCDM_LOG_ECC;
117503831d35Sstevel msg_length = sizeof (plat_ecc_indictment2_data_t);
117603831d35Sstevel sleep_flag = KM_SLEEP;
117703831d35Sstevel log_error = 0;
117803831d35Sstevel do_queue = 0;
117903831d35Sstevel break;
118003831d35Sstevel
118103831d35Sstevel case PLAT_ECC_DIMM_SID_MESSAGE:
118203831d35Sstevel /*
118303831d35Sstevel * For DIMM sid request messages, we're allowed to sleep, and we
118403831d35Sstevel * can directly call the output function, since we've already
118503831d35Sstevel * gone through a taskq
118603831d35Sstevel */
118703831d35Sstevel msg_cmd = SCDM_DIMM_SERIAL_ID;
118803831d35Sstevel msg_length = sizeof (plat_dimm_sid_request_data_t);
118903831d35Sstevel sleep_flag = KM_SLEEP;
119003831d35Sstevel log_error = 0;
119103831d35Sstevel do_queue = 0;
119203831d35Sstevel break;
119303831d35Sstevel
119403831d35Sstevel default:
119503831d35Sstevel return (EINVAL);
119603831d35Sstevel }
119703831d35Sstevel
119803831d35Sstevel /*
119903831d35Sstevel * Allocate memory for the mailbox message header.
120003831d35Sstevel */
120103831d35Sstevel msg_header_ptr =
120203831d35Sstevel (scosmb_msgdata_t *)kmem_zalloc(sizeof (scosmb_msgdata_t),
120303831d35Sstevel sleep_flag);
120403831d35Sstevel
120503831d35Sstevel if (msg_header_ptr == NULL) {
120603831d35Sstevel #ifdef DEBUG
120703831d35Sstevel cmn_err(CE_WARN, "failed to allocate space for scosmb "
120803831d35Sstevel "message header.");
120903831d35Sstevel #endif /* DEBUG */
121003831d35Sstevel return (ENOMEM);
121103831d35Sstevel }
121203831d35Sstevel
121303831d35Sstevel msg_header_ptr->type = MBOXSC_MSG_EVENT;
121403831d35Sstevel msg_header_ptr->cmd = msg_cmd;
121503831d35Sstevel msg_header_ptr->transid = 0;
121603831d35Sstevel msg_header_ptr->log_error = log_error;
121703831d35Sstevel
121803831d35Sstevel /*
121903831d35Sstevel * Allocate memory for the mailbox message payload.
122003831d35Sstevel */
122103831d35Sstevel msg_header_ptr->length = msg_length;
122203831d35Sstevel msg_header_ptr->data = kmem_zalloc((size_t)msg_length, sleep_flag);
122303831d35Sstevel
122403831d35Sstevel if (msg_header_ptr->data == NULL) {
122503831d35Sstevel #ifdef DEBUG
122603831d35Sstevel cmn_err(CE_WARN, "failed to allocate space for scosmb "
122703831d35Sstevel "message data.");
122803831d35Sstevel #endif /* DEBUG */
122903831d35Sstevel kmem_free(msg_header_ptr, sizeof (scosmb_msgdata_t));
123003831d35Sstevel return (ENOMEM);
123103831d35Sstevel }
123203831d35Sstevel
123303831d35Sstevel bcopy(datap, msg_header_ptr->data, (size_t)msg_length);
123403831d35Sstevel
123503831d35Sstevel /*
123603831d35Sstevel * Based on our earlier look at the message type, we either go through
123703831d35Sstevel * a taskq or directly call the output function.
123803831d35Sstevel */
123903831d35Sstevel if (do_queue != 0) {
124003831d35Sstevel /*
124103831d35Sstevel * Place a new job on the scosmb_output_taskq.
124203831d35Sstevel */
124303831d35Sstevel if (taskq_dispatch(scosmb_output_taskq,
124403831d35Sstevel (task_func_t *)scosmb_process_output,
124503831d35Sstevel (void *)msg_header_ptr, TQ_NOSLEEP) == 0) {
124603831d35Sstevel #ifdef DEBUG
124703831d35Sstevel cmn_err(CE_WARN, "failed to dispatch a task to send "
124803831d35Sstevel "ECC mailbox message.");
124903831d35Sstevel #endif /* DEBUG */
125003831d35Sstevel kmem_free(msg_header_ptr->data, msg_header_ptr->length);
125103831d35Sstevel kmem_free(msg_header_ptr, sizeof (scosmb_msgdata_t));
125203831d35Sstevel return (ENOMEM);
125303831d35Sstevel }
125403831d35Sstevel return (0);
125503831d35Sstevel } else {
125603831d35Sstevel return (scosmb_process_output(msg_header_ptr));
125703831d35Sstevel }
125803831d35Sstevel }
1259