103831d35Sstevel /* 203831d35Sstevel * CDDL HEADER START 303831d35Sstevel * 403831d35Sstevel * The contents of this file are subject to the terms of the 5*055d7c80Scarlsonj * Common Development and Distribution License (the "License"). 6*055d7c80Scarlsonj * You may not use this file except in compliance with the License. 703831d35Sstevel * 803831d35Sstevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 903831d35Sstevel * or http://www.opensolaris.org/os/licensing. 1003831d35Sstevel * See the License for the specific language governing permissions 1103831d35Sstevel * and limitations under the License. 1203831d35Sstevel * 1303831d35Sstevel * When distributing Covered Code, include this CDDL HEADER in each 1403831d35Sstevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1503831d35Sstevel * If applicable, add the following below this CDDL HEADER, with the 1603831d35Sstevel * fields enclosed by brackets "[]" replaced with your own identifying 1703831d35Sstevel * information: Portions Copyright [yyyy] [name of copyright owner] 1803831d35Sstevel * 1903831d35Sstevel * CDDL HEADER END 2003831d35Sstevel */ 2103831d35Sstevel /* 22*055d7c80Scarlsonj * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 2303831d35Sstevel * Use is subject to license terms. 2403831d35Sstevel */ 2503831d35Sstevel 2603831d35Sstevel #pragma ident "%Z%%M% %I% %E% SMI" 2703831d35Sstevel 2803831d35Sstevel #include <sys/plat_ecc_unum.h> 2903831d35Sstevel #include <sys/utsname.h> 3003831d35Sstevel #include <sys/cmn_err.h> 3103831d35Sstevel #include <sys/async.h> 3203831d35Sstevel #include <sys/errno.h> 3303831d35Sstevel #include <sys/fm/protocol.h> 3403831d35Sstevel #include <sys/fm/cpu/UltraSPARC-III.h> 3503831d35Sstevel #include <sys/bl.h> 3603831d35Sstevel #include <sys/taskq.h> 3703831d35Sstevel #include <sys/condvar.h> 3803831d35Sstevel #include <sys/plat_ecc_dimm.h> 3903831d35Sstevel 4003831d35Sstevel /* 4103831d35Sstevel * Pointer to platform specific function to initialize a cache of DIMM 4203831d35Sstevel * serial ids 4303831d35Sstevel */ 4403831d35Sstevel int (*p2init_sid_cache)(void); 4503831d35Sstevel 4603831d35Sstevel /* 4703831d35Sstevel * This file contains the common code that is used for parsing 4803831d35Sstevel * ecc unum data and logging it appropriately as the platform 4903831d35Sstevel * that calls this code implements. 5003831d35Sstevel */ 5103831d35Sstevel 5203831d35Sstevel int plat_ecc_dispatch_task(plat_ecc_message_t *); 5303831d35Sstevel static void plat_ecc_send_msg(void *); 5403831d35Sstevel 5503831d35Sstevel #define CHECK_UNUM \ 5603831d35Sstevel if (unum_ptr == NULL) { \ 5703831d35Sstevel break; \ 5803831d35Sstevel } 5903831d35Sstevel 6003831d35Sstevel /* 6103831d35Sstevel * See plat_ecc_unum.h for the meaning of these variables. 6203831d35Sstevel */ 6303831d35Sstevel int ecc_log_fruid_enable = ECC_FRUID_ENABLE_DEFAULT; 6403831d35Sstevel 6503831d35Sstevel uint32_t plat_ecc_capability_map_domain = PLAT_ECC_CAPABILITY_DOMAIN_DEFAULT; 6603831d35Sstevel uint32_t plat_ecc_capability_map_sc = PLAT_ECC_CAPABILITY_SC_DEFAULT; 6703831d35Sstevel uint16_t ecc_error2_mailbox_flags = PLAT_ECC_ERROR2_SEND_DEFAULT; 6803831d35Sstevel uint16_t ecc_indictment2_mailbox_flags = PLAT_ECC_SEND_INDICT2_DEFAULT; 6903831d35Sstevel 7003831d35Sstevel /* 7103831d35Sstevel * We log all ECC errors using the function that is defined as 7203831d35Sstevel * plat_send_ecc_mailbox_msg(); We first parse the unum string and 7303831d35Sstevel * then pass the data to be logged to the plat_send_ecc_mailbox_msg 7403831d35Sstevel * function for logging. Each platform that uses this code needs to 7503831d35Sstevel * implement a suitable function for this purpose. 7603831d35Sstevel */ 7703831d35Sstevel void 7803831d35Sstevel plat_log_fruid_error(int synd_code, struct async_flt *ecc, char *unum, 7903831d35Sstevel uint64_t afsr_bit) 8003831d35Sstevel { 8103831d35Sstevel plat_ecc_error_data_t ecc_error_data; 8203831d35Sstevel enum plat_ecc_type ecc_type = PLAT_ECC_UNKNOWN; 8303831d35Sstevel int board_num; 8403831d35Sstevel int proc_position; 8503831d35Sstevel int invalid_unum = 1; 8603831d35Sstevel 8703831d35Sstevel bzero(&ecc_error_data, sizeof (plat_ecc_error_data_t)); 8803831d35Sstevel ecc_error_data.version = PLAT_ECC_VERSION; 8903831d35Sstevel 9003831d35Sstevel switch (afsr_bit) { 9103831d35Sstevel case C_AFSR_CE: 9203831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_CE; 9303831d35Sstevel break; 9403831d35Sstevel case C_AFSR_UE: 9503831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_UE; 9603831d35Sstevel break; 9703831d35Sstevel case C_AFSR_EDC: 9803831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_EDC; 9903831d35Sstevel break; 10003831d35Sstevel case C_AFSR_EDU: 10103831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_EDU; 10203831d35Sstevel break; 10303831d35Sstevel case C_AFSR_WDC: 10403831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_WDC; 10503831d35Sstevel break; 10603831d35Sstevel case C_AFSR_WDU: 10703831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_WDU; 10803831d35Sstevel break; 10903831d35Sstevel case C_AFSR_CPC: 11003831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_CPC; 11103831d35Sstevel break; 11203831d35Sstevel case C_AFSR_CPU: 11303831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_CPU; 11403831d35Sstevel break; 11503831d35Sstevel case C_AFSR_UCC: 11603831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_UCC; 11703831d35Sstevel break; 11803831d35Sstevel case C_AFSR_UCU: 11903831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_UCU; 12003831d35Sstevel break; 12103831d35Sstevel case C_AFSR_EMC: 12203831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_EMC; 12303831d35Sstevel break; 12403831d35Sstevel case C_AFSR_EMU: 12503831d35Sstevel ecc_error_data.error_code = PLAT_ERROR_CODE_EMU; 12603831d35Sstevel break; 12703831d35Sstevel default: 12803831d35Sstevel /* 12903831d35Sstevel * Do not send messages with unknown error codes, since 13003831d35Sstevel * the SC will not be able to tell what type of error 13103831d35Sstevel * occurred. 13203831d35Sstevel */ 13303831d35Sstevel return; 13403831d35Sstevel } 13503831d35Sstevel 13603831d35Sstevel ecc_error_data.detecting_proc = ecc->flt_bus_id; 13703831d35Sstevel 13803831d35Sstevel if (ecc->flt_in_memory) 13903831d35Sstevel ecc_type = PLAT_ECC_MEMORY; 14003831d35Sstevel else if (ecc->flt_status & ECC_ECACHE) 14103831d35Sstevel ecc_type = PLAT_ECC_ECACHE; 14203831d35Sstevel 14303831d35Sstevel switch (ecc_type) { 14403831d35Sstevel case PLAT_ECC_MEMORY: { 14503831d35Sstevel /* 14603831d35Sstevel * The unum string is expected to be in this form: 14703831d35Sstevel * "/N0/SB12/P0/B0/D2 J13500, ..." 14803831d35Sstevel * for serengeti. As this code is shared with Starcat 14903831d35Sstevel * if N is missing then it is set to 0. 15003831d35Sstevel * From that we will extract the bank number, dimm 15103831d35Sstevel * number, and Jnumber. 15203831d35Sstevel */ 15303831d35Sstevel char *unum_ptr = unum; 15403831d35Sstevel char *jno_ptr = ecc_error_data.Jnumber; 15503831d35Sstevel int i; 15603831d35Sstevel 15703831d35Sstevel /* 15803831d35Sstevel * On Serengeti we expect to find 'N' in the unum string 15903831d35Sstevel * however, on Starcat 'N' does not appear in the unum string. 16003831d35Sstevel * We do not want this code to break at this point, so the 16103831d35Sstevel * unum_ptr is reset to the start of unum string if we fail 16203831d35Sstevel * to find an 'N'. 16303831d35Sstevel */ 16403831d35Sstevel unum_ptr = strchr(unum_ptr, 'N'); 16503831d35Sstevel if (unum_ptr == NULL) { 16603831d35Sstevel ecc_error_data.node_no = 0; 16703831d35Sstevel unum_ptr = unum; 16803831d35Sstevel } else { 16903831d35Sstevel unum_ptr++; 17003831d35Sstevel ecc_error_data.node_no = stoi(&unum_ptr); 17103831d35Sstevel } 17203831d35Sstevel 17303831d35Sstevel /* 17403831d35Sstevel * Now pull out the SB number 17503831d35Sstevel */ 17603831d35Sstevel unum_ptr = strstr(unum_ptr, "SB"); 17703831d35Sstevel CHECK_UNUM; 17803831d35Sstevel unum_ptr += 2; 17903831d35Sstevel board_num = stoi(&unum_ptr); 18003831d35Sstevel 18103831d35Sstevel /* 18203831d35Sstevel * Now pull out the Proc position (relative to the board) 18303831d35Sstevel */ 18403831d35Sstevel unum_ptr = strchr(unum_ptr, 'P'); 18503831d35Sstevel CHECK_UNUM; 18603831d35Sstevel unum_ptr++; 18703831d35Sstevel proc_position = stoi(&unum_ptr); 18803831d35Sstevel 18903831d35Sstevel /* 19003831d35Sstevel * Using the SB number and Proc position we create a FRU 19103831d35Sstevel * cpu id. 19203831d35Sstevel */ 19303831d35Sstevel ecc_error_data.proc_num = 19403831d35Sstevel plat_make_fru_cpuid(board_num, 0, proc_position); 19503831d35Sstevel 19603831d35Sstevel /* 19703831d35Sstevel * Now pull out the Memory Bank number 19803831d35Sstevel */ 19903831d35Sstevel unum_ptr = strchr(unum_ptr, 'B'); 20003831d35Sstevel CHECK_UNUM; 20103831d35Sstevel unum_ptr++; 20203831d35Sstevel ecc_error_data.bank_no = (stoi(&unum_ptr) & 0x01); 20303831d35Sstevel 20403831d35Sstevel /* 20503831d35Sstevel * Now pull out the Dimm number within the Memory Bank. 20603831d35Sstevel */ 20703831d35Sstevel unum_ptr = strchr(unum_ptr, 'D'); 20803831d35Sstevel CHECK_UNUM; 20903831d35Sstevel unum_ptr++; 21003831d35Sstevel ecc_error_data.ecache_dimm_no = (stoi(&unum_ptr) & 0x03); 21103831d35Sstevel 21203831d35Sstevel /* 21303831d35Sstevel * Now pull out the J-number. 21403831d35Sstevel */ 21503831d35Sstevel unum_ptr = strchr(unum_ptr, 'J'); 21603831d35Sstevel CHECK_UNUM; 21703831d35Sstevel unum_ptr++; 21803831d35Sstevel for (i = PLAT_ECC_JNUMBER_LENGTH; 21903831d35Sstevel i > 0 && *unum_ptr >= '0' && *unum_ptr <= '9'; i--) 22003831d35Sstevel *jno_ptr++ = *unum_ptr++; 22103831d35Sstevel *jno_ptr = NULL; 22203831d35Sstevel 22303831d35Sstevel /* 22403831d35Sstevel * If we get here, we can assume the unum is valid 22503831d35Sstevel */ 22603831d35Sstevel invalid_unum = 0; 22703831d35Sstevel break; 22803831d35Sstevel } 22903831d35Sstevel case PLAT_ECC_ECACHE: { 23003831d35Sstevel /* 23103831d35Sstevel * The unum string is expected to be in this form: 23203831d35Sstevel * "[/N0/][SB|IO]12/P0/E0 J13500, ..." 23303831d35Sstevel * for serengeti. As this code is shared with Starcat 23403831d35Sstevel * if N is missing then it is set to 0. IO may only appear 23503831d35Sstevel * on Starcats. From that we will extract the bank number, 23603831d35Sstevel * dimm number, and Jnumber. 23703831d35Sstevel */ 23803831d35Sstevel char *unum_ptr = unum; 23903831d35Sstevel char *jno_ptr = ecc_error_data.Jnumber; 24003831d35Sstevel int is_maxcat = 0; 24103831d35Sstevel int i; 24203831d35Sstevel 24303831d35Sstevel /* 24403831d35Sstevel * On Serengeti we expect to find 'N' in the unum string 24503831d35Sstevel * however, on Starcat 'N' does not appear in the unum string. 24603831d35Sstevel * We do not want this code to break at this point, so the 24703831d35Sstevel * unum_ptr is reset to the start of unum string if we fail 24803831d35Sstevel * to find an 'N'. 24903831d35Sstevel */ 25003831d35Sstevel unum_ptr = strchr(unum_ptr, 'N'); 25103831d35Sstevel if (unum_ptr == NULL) { 25203831d35Sstevel ecc_error_data.node_no = 0; 25303831d35Sstevel unum_ptr = unum; 25403831d35Sstevel } else { 25503831d35Sstevel unum_ptr++; 25603831d35Sstevel ecc_error_data.node_no = stoi(&unum_ptr); 25703831d35Sstevel } 25803831d35Sstevel 25903831d35Sstevel /* 26003831d35Sstevel * Now pull out the SB/IO number 26103831d35Sstevel */ 26203831d35Sstevel unum_ptr = strstr(unum_ptr, "SB"); 26303831d35Sstevel if (unum_ptr == NULL) { 26403831d35Sstevel 26503831d35Sstevel /* 26603831d35Sstevel * Since this is an E$ error, it must have occurred on 26703831d35Sstevel * either a System Board (represented by "SB" in the 26803831d35Sstevel * unum string) or a Maxcat board ("IO" in the unum 26903831d35Sstevel * string). Since we failed the "SB" check, we'll 27003831d35Sstevel * assume this is a maxcat board. 27103831d35Sstevel */ 27203831d35Sstevel is_maxcat = 1; 27303831d35Sstevel unum_ptr = strstr(unum, "IO"); 27403831d35Sstevel } 27503831d35Sstevel CHECK_UNUM; 27603831d35Sstevel unum_ptr += 2; 27703831d35Sstevel board_num = stoi(&unum_ptr); 27803831d35Sstevel 27903831d35Sstevel /* 28003831d35Sstevel * Now pull out the Proc position (relative to the board) 28103831d35Sstevel */ 28203831d35Sstevel unum_ptr = strchr(unum_ptr, 'P'); 28303831d35Sstevel CHECK_UNUM; 28403831d35Sstevel unum_ptr++; 28503831d35Sstevel proc_position = stoi(&unum_ptr); 28603831d35Sstevel 28703831d35Sstevel /* 28803831d35Sstevel * Using the SB/IO number, slot 0/1 value (is_maxcat), and 28903831d35Sstevel * proc position, we create the cpu id. 29003831d35Sstevel */ 29103831d35Sstevel ecc_error_data.proc_num = plat_make_fru_cpuid(board_num, 29203831d35Sstevel is_maxcat, proc_position); 29303831d35Sstevel 29403831d35Sstevel ecc_error_data.bank_no = 0; /* not used */ 29503831d35Sstevel 29603831d35Sstevel unum_ptr = strchr(unum_ptr, 'E'); 29703831d35Sstevel CHECK_UNUM; 29803831d35Sstevel unum_ptr++; 29903831d35Sstevel ecc_error_data.ecache_dimm_no = (stoi(&unum_ptr) & 0x01); 30003831d35Sstevel 30103831d35Sstevel unum_ptr = strchr(unum_ptr, 'J'); 30203831d35Sstevel CHECK_UNUM; 30303831d35Sstevel unum_ptr++; 30403831d35Sstevel for (i = PLAT_ECC_JNUMBER_LENGTH; 30503831d35Sstevel i > 0 && *unum_ptr >= '0' && *unum_ptr <= '9'; i--) 30603831d35Sstevel *jno_ptr++ = *unum_ptr++; 30703831d35Sstevel *jno_ptr = NULL; 30803831d35Sstevel 30903831d35Sstevel /* 31003831d35Sstevel * If we get here, we can assume the unum is valid 31103831d35Sstevel */ 31203831d35Sstevel invalid_unum = 0; 31303831d35Sstevel break; 31403831d35Sstevel } 31503831d35Sstevel default: 31603831d35Sstevel /* 31703831d35Sstevel * Unknown error 31803831d35Sstevel */ 31903831d35Sstevel break; 32003831d35Sstevel } 32103831d35Sstevel 32203831d35Sstevel /* 32303831d35Sstevel * This is where CHECK_UNUM goes when it finds an error 32403831d35Sstevel */ 32503831d35Sstevel 32603831d35Sstevel if (ECC_SYND_DATA_BEGIN <= synd_code && 32703831d35Sstevel synd_code < ECC_SYND_ECC_BEGIN) { 32803831d35Sstevel ecc_error_data.error_type = PLAT_ERROR_TYPE_SINGLE; 32903831d35Sstevel ecc_error_data.databit_type = PLAT_BIT_TYPE_DATA; 33003831d35Sstevel ecc_error_data.databit_no = synd_code; 33103831d35Sstevel } else if (ECC_SYND_ECC_BEGIN <= synd_code && 33203831d35Sstevel synd_code < ECC_SYND_MTAG_BEGIN) { 33303831d35Sstevel ecc_error_data.error_type = PLAT_ERROR_TYPE_SINGLE; 33403831d35Sstevel ecc_error_data.databit_type = PLAT_BIT_TYPE_ECC; 33503831d35Sstevel ecc_error_data.databit_no = synd_code - ECC_SYND_ECC_BEGIN; 33603831d35Sstevel } else if (ECC_SYND_MTAG_BEGIN <= synd_code && 33703831d35Sstevel synd_code < ECC_SYND_MECC_BEGIN) { 33803831d35Sstevel ecc_error_data.error_type = PLAT_ERROR_TYPE_SINGLE; 33903831d35Sstevel ecc_error_data.databit_type = PLAT_BIT_TYPE_MTAG_D; 34003831d35Sstevel ecc_error_data.databit_no = synd_code - ECC_SYND_MTAG_BEGIN; 34103831d35Sstevel } else if (ECC_SYND_MECC_BEGIN <= synd_code && 34203831d35Sstevel synd_code < ECC_SYND_M2) { 34303831d35Sstevel ecc_error_data.error_type = PLAT_ERROR_TYPE_SINGLE; 34403831d35Sstevel ecc_error_data.databit_type = PLAT_BIT_TYPE_MTAG_E; 34503831d35Sstevel ecc_error_data.databit_no = synd_code - ECC_SYND_MECC_BEGIN; 34603831d35Sstevel } else { 34703831d35Sstevel switch (synd_code) { 34803831d35Sstevel case ECC_SYND_M2: 34903831d35Sstevel ecc_error_data.error_type = PLAT_ERROR_TYPE_M2; 35003831d35Sstevel break; 35103831d35Sstevel case ECC_SYND_M3: 35203831d35Sstevel ecc_error_data.error_type = PLAT_ERROR_TYPE_M3; 35303831d35Sstevel break; 35403831d35Sstevel case ECC_SYND_M4: 35503831d35Sstevel ecc_error_data.error_type = PLAT_ERROR_TYPE_M4; 35603831d35Sstevel break; 35703831d35Sstevel case ECC_SYND_M: 35803831d35Sstevel ecc_error_data.error_type = PLAT_ERROR_TYPE_M; 35903831d35Sstevel break; 36003831d35Sstevel default: 36103831d35Sstevel ecc_error_data.error_type = PLAT_ERROR_TYPE_UNK; 36203831d35Sstevel break; 36303831d35Sstevel } 36403831d35Sstevel ecc_error_data.databit_type = PLAT_BIT_TYPE_MULTI; 36503831d35Sstevel ecc_error_data.databit_no = 0; /* not used */ 36603831d35Sstevel } 36703831d35Sstevel 36803831d35Sstevel #ifdef DEBUG 36903831d35Sstevel if (invalid_unum && 37003831d35Sstevel (ecc_error_data.error_code != PLAT_ERROR_CODE_UE) && 37103831d35Sstevel unum && *unum) 37203831d35Sstevel cmn_err(CE_WARN, "Unexpected unum string format: %s\n", unum); 37303831d35Sstevel #endif 37403831d35Sstevel 37503831d35Sstevel /* 37603831d35Sstevel * Send this data off as a mailbox message to the SC. 37703831d35Sstevel */ 37803831d35Sstevel (void) plat_send_ecc_mailbox_msg(PLAT_ECC_ERROR_MESSAGE, 37903831d35Sstevel &ecc_error_data); 38003831d35Sstevel } 38103831d35Sstevel 38203831d35Sstevel /* 38303831d35Sstevel * The unum string for memory is expected to be in this form: 38403831d35Sstevel * "[/N0/]SB12/P0/B0/D2 [J13500]" 38503831d35Sstevel * Or if the unum was generated as the result of a UE: 38603831d35Sstevel * "[/N0/]SB12/P0/B0 [J13500, ...]" 38703831d35Sstevel * From that we will extract the board number, processor position, 38803831d35Sstevel * bank number and jnumber. 38903831d35Sstevel * 39003831d35Sstevel * Return (1) for an invalid unum string. If the unum is for an 39103831d35Sstevel * individual DIMM and there is no jnumber, jnumber will be set 39203831d35Sstevel * to -1 and the caller can decide if the unum is valid. This 39303831d35Sstevel * is because Serengeti does not have jnumbers for bank unums 39403831d35Sstevel * which may be used to create DIMM unums (e.g. for acquiring 39503831d35Sstevel * DIMM serial ids). 39603831d35Sstevel */ 39703831d35Sstevel 39803831d35Sstevel int 39903831d35Sstevel parse_unum_memory(char *unum, int *board, int *pos, int *bank, int *dimm, 40003831d35Sstevel int *jnumber) 40103831d35Sstevel { 40203831d35Sstevel char *c; 40303831d35Sstevel 40403831d35Sstevel if ((c = strstr(unum, "SB")) == NULL) 40503831d35Sstevel return (1); 40603831d35Sstevel c += 2; 40703831d35Sstevel *board = (uint8_t)stoi(&c); 40803831d35Sstevel 40903831d35Sstevel if (*c++ != '/' || *c++ != 'P') 41003831d35Sstevel return (1); 41103831d35Sstevel *pos = stoi(&c); 41203831d35Sstevel 41303831d35Sstevel if (*c++ != '/' || *c++ != 'B') 41403831d35Sstevel return (1); 41503831d35Sstevel *bank = stoi(&c); 41603831d35Sstevel 41703831d35Sstevel if ((c = strchr(c, 'D')) == NULL) { 41803831d35Sstevel *dimm = -1; 41903831d35Sstevel *jnumber = 0; 42003831d35Sstevel return (0); 42103831d35Sstevel } 42203831d35Sstevel c++; 42303831d35Sstevel *dimm = stoi(&c); 42403831d35Sstevel 42503831d35Sstevel if ((c = strchr(c, 'J')) == NULL) { 42603831d35Sstevel *jnumber = -1; 42703831d35Sstevel return (0); 42803831d35Sstevel } 42903831d35Sstevel 43003831d35Sstevel c++; 43103831d35Sstevel *jnumber = (uint16_t)stoi(&c); 43203831d35Sstevel 43303831d35Sstevel return (0); 43403831d35Sstevel } 43503831d35Sstevel 43603831d35Sstevel /* 43703831d35Sstevel * The unum string for ecache is expected to be in this form: 43803831d35Sstevel * "[/N0/][SB|IO]12/P0/E0 J13500, ..." 43903831d35Sstevel * From that we will extract the board number, processor position and 44003831d35Sstevel * junmber. 44103831d35Sstevel * 44203831d35Sstevel * return (1) for any invalid unum string. 44303831d35Sstevel */ 44403831d35Sstevel static int 44503831d35Sstevel parse_unum_ecache(char *unum, int *board, int *pos, int *jnumber, int *maxcat) 44603831d35Sstevel { 44703831d35Sstevel char *c; 44803831d35Sstevel 44903831d35Sstevel if ((c = strstr(unum, "SB")) == NULL) { 45003831d35Sstevel /* 45103831d35Sstevel * Since this is an E$ error, it must have occurred on 45203831d35Sstevel * either a System Board (represented by "SB" in the 45303831d35Sstevel * unum string) or a Maxcat board ("IO" in the unum 45403831d35Sstevel * string). 45503831d35Sstevel */ 45603831d35Sstevel if ((c = strstr(unum, "IO")) == NULL) 45703831d35Sstevel return (1); 45803831d35Sstevel *maxcat = 1; 45903831d35Sstevel } 46003831d35Sstevel 46103831d35Sstevel c += 2; 46203831d35Sstevel *board = (uint8_t)stoi(&c); 46303831d35Sstevel 46403831d35Sstevel if (*c++ != '/' || *c++ != 'P') 46503831d35Sstevel return (1); 46603831d35Sstevel *pos = stoi(&c); 46703831d35Sstevel 46803831d35Sstevel if ((c = strchr(c, 'J')) == NULL) 46903831d35Sstevel return (1); 47003831d35Sstevel 47103831d35Sstevel c++; 47203831d35Sstevel *jnumber = (uint16_t)stoi(&c); 47303831d35Sstevel 47403831d35Sstevel return (0); 47503831d35Sstevel } 47603831d35Sstevel 47703831d35Sstevel /* The following array maps the error to its corresponding set */ 47803831d35Sstevel static int plat_ecc_e2d_map[PLAT_ECC_ERROR2_NUMVALS] = { 47903831d35Sstevel PLAT_ECC_ERROR2_NONE, /* 0x00 */ 48003831d35Sstevel PLAT_ECC_ERROR2_SEND_L2_XXC, /* 0x01 */ 48103831d35Sstevel PLAT_ECC_ERROR2_SEND_L2_XXU, /* 0x02 */ 48203831d35Sstevel PLAT_ECC_ERROR2_SEND_L3_XXC, /* 0x03 */ 48303831d35Sstevel PLAT_ECC_ERROR2_SEND_L3_XXU, /* 0x04 */ 48403831d35Sstevel PLAT_ECC_ERROR2_SEND_MEM_ERRS, /* 0x05 */ 48503831d35Sstevel PLAT_ECC_ERROR2_SEND_MEM_ERRS, /* 0x06 */ 48603831d35Sstevel PLAT_ECC_ERROR2_SEND_MEM_ERRS, /* 0x07 */ 48703831d35Sstevel PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x08 */ 48803831d35Sstevel PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x09 */ 48903831d35Sstevel PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x0a */ 49003831d35Sstevel PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x0b */ 49103831d35Sstevel PLAT_ECC_ERROR2_SEND_L2_TAG_ERRS, /* 0x0c */ 49203831d35Sstevel PLAT_ECC_ERROR2_SEND_L2_TAG_ERRS, /* 0x0d */ 49303831d35Sstevel PLAT_ECC_ERROR2_SEND_L3_TAG_ERRS, /* 0x0e */ 49403831d35Sstevel PLAT_ECC_ERROR2_SEND_L3_TAG_ERRS, /* 0x0f */ 49503831d35Sstevel PLAT_ECC_ERROR2_SEND_L1_PARITY, /* 0x10 */ 49603831d35Sstevel PLAT_ECC_ERROR2_SEND_L1_PARITY, /* 0x11 */ 49703831d35Sstevel PLAT_ECC_ERROR2_SEND_TLB_PARITY, /* 0x12 */ 49803831d35Sstevel PLAT_ECC_ERROR2_SEND_TLB_PARITY, /* 0x13 */ 49903831d35Sstevel PLAT_ECC_ERROR2_SEND_IV_ERRS, /* 0x14 */ 50003831d35Sstevel PLAT_ECC_ERROR2_SEND_IV_ERRS, /* 0x15 */ 50103831d35Sstevel PLAT_ECC_ERROR2_SEND_MTAG_XXC, /* 0x16 */ 50203831d35Sstevel PLAT_ECC_ERROR2_SEND_IV_MTAG_XXC, /* 0x17 */ 50303831d35Sstevel PLAT_ECC_ERROR2_SEND_L3_XXC, /* 0x18 */ 50403831d35Sstevel PLAT_ECC_ERROR2_SEND_PCACHE /* 0x19 */ 50503831d35Sstevel }; 50603831d35Sstevel 50703831d35Sstevel /* 50803831d35Sstevel * log enhanced error information to SC. 50903831d35Sstevel */ 51003831d35Sstevel void 51103831d35Sstevel plat_log_fruid_error2(int msg_type, char *unum, struct async_flt *aflt, 51203831d35Sstevel plat_ecc_ch_async_flt_t *ecc_ch_flt) 51303831d35Sstevel { 51403831d35Sstevel plat_ecc_error2_data_t e2d = {0}; 51503831d35Sstevel int board, pos, bank, dimm, jnumber; 51603831d35Sstevel int maxcat = 0; 51703831d35Sstevel uint16_t flags; 51803831d35Sstevel 51903831d35Sstevel /* Check the flags */ 52003831d35Sstevel flags = plat_ecc_e2d_map[msg_type]; 52103831d35Sstevel if ((ecc_error2_mailbox_flags & flags) == 0) 52203831d35Sstevel return; 52303831d35Sstevel 52403831d35Sstevel /* Fill the header */ 52503831d35Sstevel e2d.ee2d_major_version = PLAT_ECC_ERROR2_VERSION_MAJOR; 52603831d35Sstevel e2d.ee2d_minor_version = PLAT_ECC_ERROR2_VERSION_MINOR; 52703831d35Sstevel e2d.ee2d_msg_type = PLAT_ECC_ERROR2_MESSAGE; 52803831d35Sstevel e2d.ee2d_msg_length = sizeof (plat_ecc_error2_data_t); 52903831d35Sstevel 53003831d35Sstevel /* Fill the data */ 53103831d35Sstevel if (aflt->flt_in_memory) { 53203831d35Sstevel if (parse_unum_memory(unum, &board, &pos, &bank, &dimm, 53303831d35Sstevel &jnumber) || (dimm != -1 && jnumber == -1)) 53403831d35Sstevel return; 53503831d35Sstevel /* 53603831d35Sstevel * Using the SB number and Proc position we create a FRU 53703831d35Sstevel * cpu id. 53803831d35Sstevel */ 53903831d35Sstevel e2d.ee2d_owning_proc = plat_make_fru_cpuid(board, 0, pos); 54003831d35Sstevel e2d.ee2d_jnumber = jnumber; 54103831d35Sstevel e2d.ee2d_bank_number = bank; 54203831d35Sstevel } else if (aflt->flt_status & ECC_ECACHE) { 54303831d35Sstevel if (parse_unum_ecache(unum, &board, &pos, &jnumber, &maxcat)) 54403831d35Sstevel return; 54503831d35Sstevel /* 54603831d35Sstevel * Using the SB number and Proc position we create a FRU 54703831d35Sstevel * cpu id. 54803831d35Sstevel */ 54903831d35Sstevel e2d.ee2d_owning_proc = plat_make_fru_cpuid(board, maxcat, pos); 55003831d35Sstevel e2d.ee2d_jnumber = jnumber; 551*055d7c80Scarlsonj e2d.ee2d_bank_number = (uint8_t)-1; 55203831d35Sstevel } else { 55303831d35Sstevel /* 55403831d35Sstevel * L1 Cache 55503831d35Sstevel */ 55603831d35Sstevel e2d.ee2d_owning_proc = aflt->flt_bus_id; 557*055d7c80Scarlsonj e2d.ee2d_jnumber = (uint16_t)-1; 558*055d7c80Scarlsonj e2d.ee2d_bank_number = (uint8_t)-1; 55903831d35Sstevel } 56003831d35Sstevel 56103831d35Sstevel e2d.ee2d_type = (uint8_t)msg_type; 56203831d35Sstevel e2d.ee2d_afar_status = (uint8_t)ecc_ch_flt->ecaf_afar_status; 56303831d35Sstevel e2d.ee2d_synd_status = (uint8_t)ecc_ch_flt->ecaf_synd_status; 56403831d35Sstevel e2d.ee2d_detecting_proc = aflt->flt_bus_id; 56503831d35Sstevel e2d.ee2d_cpu_impl = cpunodes[e2d.ee2d_owning_proc].implementation; 56603831d35Sstevel e2d.ee2d_timestamp = aflt->flt_id; 56703831d35Sstevel e2d.ee2d_afsr = aflt->flt_stat; 56803831d35Sstevel e2d.ee2d_afar = aflt->flt_addr; 56903831d35Sstevel 57003831d35Sstevel e2d.ee2d_sdw_afsr = ecc_ch_flt->ecaf_sdw_afsr; 57103831d35Sstevel e2d.ee2d_sdw_afar = ecc_ch_flt->ecaf_sdw_afar; 57203831d35Sstevel e2d.ee2d_afsr_ext = ecc_ch_flt->ecaf_afsr_ext; 57303831d35Sstevel e2d.ee2d_sdw_afsr_ext = ecc_ch_flt->ecaf_sdw_afsr_ext; 57403831d35Sstevel 57503831d35Sstevel /* Send the message to SC */ 57603831d35Sstevel (void) plat_send_ecc_mailbox_msg(PLAT_ECC_ERROR2_MESSAGE, &e2d); 57703831d35Sstevel } 57803831d35Sstevel 57903831d35Sstevel uint8_t ecc_indictment_mailbox_disable = PLAT_ECC_INDICTMENT_OK; 58003831d35Sstevel uint8_t ecc_indictment_mailbox_flags = PLAT_ECC_SEND_DEFAULT_INDICT; 58103831d35Sstevel 58203831d35Sstevel /* 58303831d35Sstevel * We log all Solaris indictments of failing hardware. We pull the system 58403831d35Sstevel * board number and jnumber out of the unum string, and calculate the cpuid 58503831d35Sstevel * from some members of the unum string. The rest of the structure is filled 58603831d35Sstevel * in through the other arguments. The data structure is then passed to 58703831d35Sstevel * plat_ecc_dispatch_task(). This function should only be loaded into memory 58803831d35Sstevel * or called on platforms that define a plat_send_ecc_mailbox_msg() function. 58903831d35Sstevel */ 59003831d35Sstevel static int 59103831d35Sstevel plat_log_fruid_indictment(int msg_type, struct async_flt *aflt, char *unum) 59203831d35Sstevel { 59303831d35Sstevel plat_ecc_message_t *wrapperp; 59403831d35Sstevel plat_ecc_indict_msg_contents_t *contentsp; 59503831d35Sstevel char *unum_ptr; 59603831d35Sstevel int is_maxcat = 0; 59703831d35Sstevel 59803831d35Sstevel switch (ecc_indictment_mailbox_disable) { 59903831d35Sstevel case (PLAT_ECC_INDICTMENT_OK): 60003831d35Sstevel case (PLAT_ECC_INDICTMENT_SUSPECT): 60103831d35Sstevel break; 60203831d35Sstevel case (PLAT_ECC_INDICTMENT_NO_SEND): 60303831d35Sstevel default: 60403831d35Sstevel return (ECONNREFUSED); 60503831d35Sstevel } 60603831d35Sstevel 60703831d35Sstevel switch (msg_type) { 60803831d35Sstevel case (PLAT_ECC_INDICT_DIMM): 60903831d35Sstevel if ((ecc_indictment_mailbox_flags & 61003831d35Sstevel PLAT_ECC_SEND_DIMM_INDICT) == 0) 61103831d35Sstevel return (ECONNREFUSED); 61203831d35Sstevel break; 61303831d35Sstevel case (PLAT_ECC_INDICT_ECACHE_CORRECTABLES): 61403831d35Sstevel if ((ecc_indictment_mailbox_flags & 61503831d35Sstevel PLAT_ECC_SEND_ECACHE_XXC_INDICT) == 0) 61603831d35Sstevel return (ECONNREFUSED); 61703831d35Sstevel break; 61803831d35Sstevel case (PLAT_ECC_INDICT_ECACHE_UNCORRECTABLE): 61903831d35Sstevel if ((ecc_indictment_mailbox_flags & 62003831d35Sstevel PLAT_ECC_SEND_ECACHE_XXU_INDICT) == 0) 62103831d35Sstevel return (ECONNREFUSED); 62203831d35Sstevel break; 62303831d35Sstevel default: 62403831d35Sstevel return (ECONNREFUSED); 62503831d35Sstevel } 62603831d35Sstevel 62703831d35Sstevel /* LINTED: E_TRUE_LOGICAL_EXPR */ 62803831d35Sstevel ASSERT(sizeof (plat_ecc_indictment_data_t) == PLAT_ECC_INDICT_SIZE); 62903831d35Sstevel 63003831d35Sstevel wrapperp = (plat_ecc_message_t *) 63103831d35Sstevel kmem_zalloc(sizeof (plat_ecc_message_t), KM_SLEEP); 63203831d35Sstevel 63303831d35Sstevel wrapperp->ecc_msg_status = PLAT_ECC_NO_MSG_ACTIVE; 63403831d35Sstevel wrapperp->ecc_msg_type = PLAT_ECC_INDICTMENT_MESSAGE; 63503831d35Sstevel wrapperp->ecc_msg_len = sizeof (plat_ecc_indictment_data_t); 63603831d35Sstevel wrapperp->ecc_msg_data = kmem_zalloc(wrapperp->ecc_msg_len, KM_SLEEP); 63703831d35Sstevel 63803831d35Sstevel contentsp = &(((plat_ecc_indictment_data_t *) 63903831d35Sstevel wrapperp->ecc_msg_data)->msg_contents); 64003831d35Sstevel 64103831d35Sstevel /* 64203831d35Sstevel * Find board_num, jnumber, and proc position from the unum string. 64303831d35Sstevel * Use the board number, is_maxcat, and proc position to calculate 64403831d35Sstevel * cpuid. 64503831d35Sstevel */ 64603831d35Sstevel unum_ptr = strstr(unum, "SB"); 64703831d35Sstevel if (unum_ptr == NULL) { 64803831d35Sstevel is_maxcat = 1; 64903831d35Sstevel unum_ptr = strstr(unum, "IO"); 65003831d35Sstevel if (unum_ptr == NULL) { 65103831d35Sstevel kmem_free(wrapperp->ecc_msg_data, 65203831d35Sstevel wrapperp->ecc_msg_len); 65303831d35Sstevel kmem_free(wrapperp, sizeof (plat_ecc_message_t)); 65403831d35Sstevel return (EINVAL); 65503831d35Sstevel } 65603831d35Sstevel } 65703831d35Sstevel unum_ptr += 2; 65803831d35Sstevel contentsp->board_num = (uint8_t)stoi(&unum_ptr); 65903831d35Sstevel 66003831d35Sstevel unum_ptr = strchr(unum_ptr, 'P'); 66103831d35Sstevel if (unum_ptr == NULL) { 66203831d35Sstevel kmem_free(wrapperp->ecc_msg_data, wrapperp->ecc_msg_len); 66303831d35Sstevel kmem_free(wrapperp, sizeof (plat_ecc_message_t)); 66403831d35Sstevel return (EINVAL); 66503831d35Sstevel } 66603831d35Sstevel unum_ptr++; 66703831d35Sstevel contentsp->detecting_proc = 66803831d35Sstevel (uint16_t)plat_make_fru_cpuid(contentsp->board_num, is_maxcat, 66903831d35Sstevel stoi(&unum_ptr)); 67003831d35Sstevel 67103831d35Sstevel unum_ptr = strchr(unum_ptr, 'J'); 67203831d35Sstevel if (unum_ptr == NULL) { 67303831d35Sstevel kmem_free(wrapperp->ecc_msg_data, wrapperp->ecc_msg_len); 67403831d35Sstevel kmem_free(wrapperp, sizeof (plat_ecc_message_t)); 67503831d35Sstevel return (EINVAL); 67603831d35Sstevel } 67703831d35Sstevel unum_ptr++; 67803831d35Sstevel contentsp->jnumber = (uint16_t)stoi(&unum_ptr); 67903831d35Sstevel 68003831d35Sstevel /* 68103831d35Sstevel * Fill in the rest of the data 68203831d35Sstevel */ 68303831d35Sstevel contentsp->version = PLAT_ECC_INDICTMENT_VERSION; 68403831d35Sstevel contentsp->indictment_type = msg_type; 68503831d35Sstevel contentsp->indictment_uncertain = ecc_indictment_mailbox_disable; 68603831d35Sstevel contentsp->syndrome = aflt->flt_synd; 68703831d35Sstevel contentsp->afsr = aflt->flt_stat; 68803831d35Sstevel contentsp->afar = aflt->flt_addr; 68903831d35Sstevel 69003831d35Sstevel /* 69103831d35Sstevel * Build the solaris_version string: 69203831d35Sstevel */ 69303831d35Sstevel (void) snprintf(contentsp->solaris_version, 69403831d35Sstevel PLAT_ECC_VERSION_LENGTH, "%s %s", utsname.release, utsname.version); 69503831d35Sstevel 69603831d35Sstevel /* 69703831d35Sstevel * Send the data on to the queuing function 69803831d35Sstevel */ 69903831d35Sstevel return (plat_ecc_dispatch_task(wrapperp)); 70003831d35Sstevel } 70103831d35Sstevel 70203831d35Sstevel /* The following array maps the indictment to its corresponding set */ 70303831d35Sstevel static int plat_ecc_i2d_map[PLAT_ECC_INDICT2_NUMVALS] = { 70403831d35Sstevel PLAT_ECC_INDICT2_NONE, /* 0x00 */ 70503831d35Sstevel PLAT_ECC_SEND_INDICT2_L2_XXU, /* 0x01 */ 70603831d35Sstevel PLAT_ECC_SEND_INDICT2_L2_XXC_SERD, /* 0x02 */ 70703831d35Sstevel PLAT_ECC_SEND_INDICT2_L2_TAG_SERD, /* 0x03 */ 70803831d35Sstevel PLAT_ECC_SEND_INDICT2_L3_XXU, /* 0x04 */ 70903831d35Sstevel PLAT_ECC_SEND_INDICT2_L3_XXC_SERD, /* 0x05 */ 71003831d35Sstevel PLAT_ECC_SEND_INDICT2_L3_TAG_SERD, /* 0x06 */ 71103831d35Sstevel PLAT_ECC_SEND_INDICT2_L1_SERD, /* 0x07 */ 71203831d35Sstevel PLAT_ECC_SEND_INDICT2_L1_SERD, /* 0x08 */ 71303831d35Sstevel PLAT_ECC_SEND_INDICT2_TLB_SERD, /* 0x09 */ 71403831d35Sstevel PLAT_ECC_SEND_INDICT2_TLB_SERD, /* 0x0a */ 71503831d35Sstevel PLAT_ECC_SEND_INDICT2_FPU, /* 0x0b */ 71603831d35Sstevel PLAT_ECC_SEND_INDICT2_PCACHE_SERD /* 0x0c */ 71703831d35Sstevel }; 71803831d35Sstevel 71903831d35Sstevel static int 72003831d35Sstevel plat_log_fruid_indictment2(int msg_type, struct async_flt *aflt, char *unum) 72103831d35Sstevel { 72203831d35Sstevel plat_ecc_message_t *wrapperp; 72303831d35Sstevel plat_ecc_indictment2_data_t *i2d; 72403831d35Sstevel int board, pos, jnumber; 72503831d35Sstevel int maxcat = 0; 72603831d35Sstevel uint16_t flags; 72703831d35Sstevel 72803831d35Sstevel /* 72903831d35Sstevel * If the unum is null or empty, skip parsing it 73003831d35Sstevel */ 73103831d35Sstevel if (unum && unum[0] != '\0') { 73203831d35Sstevel if (parse_unum_ecache(unum, &board, &pos, &jnumber, &maxcat)) 73303831d35Sstevel return (EINVAL); 73403831d35Sstevel } 73503831d35Sstevel 73603831d35Sstevel if ((ecc_indictment_mailbox_disable != PLAT_ECC_INDICTMENT_OK) && 73703831d35Sstevel (ecc_indictment_mailbox_disable != PLAT_ECC_INDICTMENT_SUSPECT)) 73803831d35Sstevel return (ECONNREFUSED); 73903831d35Sstevel 74003831d35Sstevel /* Check the flags */ 74103831d35Sstevel flags = plat_ecc_i2d_map[msg_type]; 74203831d35Sstevel if ((ecc_indictment2_mailbox_flags & flags) == 0) 74303831d35Sstevel return (ECONNREFUSED); 74403831d35Sstevel 74503831d35Sstevel wrapperp = (plat_ecc_message_t *) 74603831d35Sstevel kmem_zalloc(sizeof (plat_ecc_message_t), KM_SLEEP); 74703831d35Sstevel 74803831d35Sstevel /* Initialize the wrapper */ 74903831d35Sstevel wrapperp->ecc_msg_status = PLAT_ECC_NO_MSG_ACTIVE; 75003831d35Sstevel wrapperp->ecc_msg_type = PLAT_ECC_INDICTMENT2_MESSAGE; 75103831d35Sstevel wrapperp->ecc_msg_len = sizeof (plat_ecc_indictment2_data_t); 75203831d35Sstevel wrapperp->ecc_msg_data = kmem_zalloc(wrapperp->ecc_msg_len, KM_SLEEP); 75303831d35Sstevel 75403831d35Sstevel i2d = (plat_ecc_indictment2_data_t *)wrapperp->ecc_msg_data; 75503831d35Sstevel 75603831d35Sstevel /* Fill the header */ 75703831d35Sstevel i2d->ei2d_major_version = PLAT_ECC_INDICT2_MAJOR_VERSION; 75803831d35Sstevel i2d->ei2d_minor_version = PLAT_ECC_INDICT2_MINOR_VERSION; 75903831d35Sstevel i2d->ei2d_msg_type = PLAT_ECC_INDICTMENT2_MESSAGE; 76003831d35Sstevel i2d->ei2d_msg_length = sizeof (plat_ecc_indictment2_data_t); 76103831d35Sstevel 76203831d35Sstevel /* Fill the data */ 76303831d35Sstevel if (unum && unum[0] != '\0') { 76403831d35Sstevel i2d->ei2d_arraigned_proc = plat_make_fru_cpuid(board, maxcat, 76503831d35Sstevel pos); 76603831d35Sstevel i2d->ei2d_board_num = board; 76703831d35Sstevel i2d->ei2d_jnumber = jnumber; 76803831d35Sstevel } else { 76903831d35Sstevel i2d->ei2d_arraigned_proc = aflt->flt_inst; 77003831d35Sstevel i2d->ei2d_board_num = (uint8_t) 77103831d35Sstevel plat_make_fru_boardnum(i2d->ei2d_arraigned_proc); 772*055d7c80Scarlsonj i2d->ei2d_jnumber = (uint16_t)-1; 77303831d35Sstevel } 77403831d35Sstevel 77503831d35Sstevel i2d->ei2d_type = msg_type; 77603831d35Sstevel i2d->ei2d_uncertain = ecc_indictment_mailbox_disable; 77703831d35Sstevel i2d->ei2d_cpu_impl = cpunodes[i2d->ei2d_arraigned_proc].implementation; 77803831d35Sstevel i2d->ei2d_timestamp = aflt->flt_id; 77903831d35Sstevel 78003831d35Sstevel /* 78103831d35Sstevel * Send the data on to the queuing function 78203831d35Sstevel */ 78303831d35Sstevel return (plat_ecc_dispatch_task(wrapperp)); 78403831d35Sstevel } 78503831d35Sstevel 78603831d35Sstevel int 78703831d35Sstevel plat_ecc_capability_send(void) 78803831d35Sstevel { 78903831d35Sstevel plat_ecc_message_t *wrapperp; 79003831d35Sstevel plat_capability_data_t *cap; 79103831d35Sstevel int ver_len; 79203831d35Sstevel 79303831d35Sstevel wrapperp = kmem_zalloc(sizeof (plat_ecc_message_t), KM_SLEEP); 79403831d35Sstevel 79503831d35Sstevel ver_len = strlen(utsname.release) + strlen(utsname.version) + 2; 79603831d35Sstevel 79703831d35Sstevel /* Initialize the wrapper */ 79803831d35Sstevel wrapperp->ecc_msg_status = PLAT_ECC_NO_MSG_ACTIVE; 79903831d35Sstevel wrapperp->ecc_msg_type = PLAT_ECC_CAPABILITY_MESSAGE; 80003831d35Sstevel wrapperp->ecc_msg_len = sizeof (plat_capability_data_t) + ver_len; 80103831d35Sstevel wrapperp->ecc_msg_data = kmem_zalloc(wrapperp->ecc_msg_len, KM_SLEEP); 80203831d35Sstevel 80303831d35Sstevel cap = (plat_capability_data_t *)wrapperp->ecc_msg_data; 80403831d35Sstevel 80503831d35Sstevel /* Fill the header */ 80603831d35Sstevel cap->capd_major_version = PLAT_ECC_CAP_VERSION_MAJOR; 80703831d35Sstevel cap->capd_minor_version = PLAT_ECC_CAP_VERSION_MINOR; 80803831d35Sstevel cap->capd_msg_type = PLAT_ECC_CAPABILITY_MESSAGE; 80903831d35Sstevel cap->capd_msg_length = wrapperp->ecc_msg_len; 81003831d35Sstevel 81103831d35Sstevel /* Set the default domain capability */ 81203831d35Sstevel cap->capd_capability = PLAT_ECC_CAPABILITY_DOMAIN_DEFAULT; 81303831d35Sstevel 81403831d35Sstevel /* 81503831d35Sstevel * Build the solaris_version string: 81603831d35Sstevel * utsname.release + " " + utsname.version 81703831d35Sstevel */ 81803831d35Sstevel (void) snprintf(cap->capd_solaris_version, ver_len, "%s %s", 81903831d35Sstevel utsname.release, utsname.version); 82003831d35Sstevel 82103831d35Sstevel /* 82203831d35Sstevel * Send the data on to the queuing function 82303831d35Sstevel */ 82403831d35Sstevel return (plat_ecc_dispatch_task(wrapperp)); 82503831d35Sstevel } 82603831d35Sstevel 82703831d35Sstevel int 82803831d35Sstevel plat_ecc_capability_sc_get(int type) 82903831d35Sstevel { 83003831d35Sstevel switch (type) { 83103831d35Sstevel case PLAT_ECC_ERROR_MESSAGE: 83203831d35Sstevel if (ecc_log_fruid_enable && 83303831d35Sstevel (!(plat_ecc_capability_map_sc & 83403831d35Sstevel PLAT_ECC_CAPABILITY_ERROR2))) 83503831d35Sstevel return (1); 83603831d35Sstevel break; 83703831d35Sstevel case PLAT_ECC_ERROR2_MESSAGE: 83803831d35Sstevel if (plat_ecc_capability_map_sc & 83903831d35Sstevel PLAT_ECC_CAPABILITY_ERROR2) 84003831d35Sstevel return (1); 84103831d35Sstevel break; 84203831d35Sstevel case PLAT_ECC_INDICTMENT_MESSAGE: 84303831d35Sstevel if (!(plat_ecc_capability_map_sc & 84403831d35Sstevel PLAT_ECC_CAPABILITY_INDICT2) || 84503831d35Sstevel !(plat_ecc_capability_map_domain & 84603831d35Sstevel PLAT_ECC_CAPABILITY_FMA)) 84703831d35Sstevel return (1); 84803831d35Sstevel break; 84903831d35Sstevel case PLAT_ECC_INDICTMENT2_MESSAGE: 85003831d35Sstevel if (plat_ecc_capability_map_sc & 85103831d35Sstevel PLAT_ECC_CAPABILITY_INDICT2) 85203831d35Sstevel return (1); 85303831d35Sstevel break; 85403831d35Sstevel case PLAT_ECC_DIMM_SID_MESSAGE: 85503831d35Sstevel if (plat_ecc_capability_map_sc & 85603831d35Sstevel PLAT_ECC_CAPABILITY_DIMM_SID) 85703831d35Sstevel return (1); 85803831d35Sstevel default: 85903831d35Sstevel return (0); 86003831d35Sstevel } 86103831d35Sstevel return (0); 86203831d35Sstevel } 86303831d35Sstevel 86403831d35Sstevel int plat_ecc_cap_sc_set_cnt = 0; 86503831d35Sstevel 86603831d35Sstevel void 86703831d35Sstevel plat_ecc_capability_sc_set(uint32_t cap) 86803831d35Sstevel { 86903831d35Sstevel plat_ecc_capability_map_sc = cap; 87003831d35Sstevel 87103831d35Sstevel if (!plat_ecc_cap_sc_set_cnt && (cap & PLAT_ECC_CAPABILITY_DIMM_SID)) 87203831d35Sstevel if (p2init_sid_cache) 87303831d35Sstevel p2init_sid_cache(); 87403831d35Sstevel 87503831d35Sstevel plat_ecc_cap_sc_set_cnt++; 87603831d35Sstevel } 87703831d35Sstevel 87803831d35Sstevel /* 87903831d35Sstevel * The following table represents mapping between the indictment1 reason 88003831d35Sstevel * to its type. 88103831d35Sstevel */ 88203831d35Sstevel 88303831d35Sstevel static plat_ecc_bl_map_t plat_ecc_bl_map_v1[] = { 88403831d35Sstevel { "l2cachedata", PLAT_ECC_INDICT_ECACHE_CORRECTABLES }, 88503831d35Sstevel { "l3cachedata", PLAT_ECC_INDICT_ECACHE_CORRECTABLES }, 88603831d35Sstevel { "l2cachedata", PLAT_ECC_INDICT_ECACHE_UNCORRECTABLE }, 88703831d35Sstevel { "l3cachedata", PLAT_ECC_INDICT_ECACHE_UNCORRECTABLE } 88803831d35Sstevel }; 88903831d35Sstevel 89003831d35Sstevel /* 89103831d35Sstevel * The following table represents mapping between the indictment2 reason 89203831d35Sstevel * to its type. 89303831d35Sstevel */ 89403831d35Sstevel 89503831d35Sstevel static plat_ecc_bl_map_t plat_ecc_bl_map_v2[] = { 89603831d35Sstevel { "l2cachedata", PLAT_ECC_INDICT2_L2_SERD }, 89703831d35Sstevel { "l3cachedata", PLAT_ECC_INDICT2_L3_SERD }, 89803831d35Sstevel { "l2cachedata", PLAT_ECC_INDICT2_L2_UE }, 89903831d35Sstevel { "l3cachedata", PLAT_ECC_INDICT2_L3_UE }, 90003831d35Sstevel { "l2cachetag", PLAT_ECC_INDICT2_L2_TAG_SERD }, 90103831d35Sstevel { "l3cachetag", PLAT_ECC_INDICT2_L3_TAG_SERD }, 90203831d35Sstevel { "icache", PLAT_ECC_INDICT2_ICACHE_SERD }, 90303831d35Sstevel { "dcache", PLAT_ECC_INDICT2_DCACHE_SERD }, 90403831d35Sstevel { "pcache", PLAT_ECC_INDICT2_PCACHE_SERD }, 90503831d35Sstevel { "itlb", PLAT_ECC_INDICT2_ITLB_SERD }, 90603831d35Sstevel { "dtlb", PLAT_ECC_INDICT2_DTLB_SERD }, 90703831d35Sstevel { "fpu", PLAT_ECC_INDICT2_FPU } 90803831d35Sstevel }; 90903831d35Sstevel 91003831d35Sstevel /* 91103831d35Sstevel * The following function returns the indictment type for a given version 91203831d35Sstevel */ 91303831d35Sstevel static int 91403831d35Sstevel flt_name_to_msg_type(const char *fault, int indict_version) 91503831d35Sstevel { 91603831d35Sstevel plat_ecc_bl_map_t *mapp; 91703831d35Sstevel char *fltnm = "fault.cpu."; 91803831d35Sstevel int mapsz; 91903831d35Sstevel char *p; 92003831d35Sstevel int i; 92103831d35Sstevel 92203831d35Sstevel /* Check if it starts with proper fault name */ 92303831d35Sstevel if (strncmp(fault, fltnm, strlen(fltnm)) != 0) 92403831d35Sstevel return (PLAT_ECC_INDICT_NONE); 92503831d35Sstevel 92603831d35Sstevel fault += strlen(fltnm); /* c = "ultraSPARC-IV.icache" */ 92703831d35Sstevel 92803831d35Sstevel /* Skip the cpu type */ 92903831d35Sstevel if ((p = strchr(fault, '.')) == NULL) 93003831d35Sstevel return (PLAT_ECC_INDICT_NONE); 93103831d35Sstevel 93203831d35Sstevel p++; /* skip the "." */ 93303831d35Sstevel 93403831d35Sstevel if (indict_version == 0) { 93503831d35Sstevel mapp = plat_ecc_bl_map_v1; 93603831d35Sstevel mapsz = sizeof (plat_ecc_bl_map_v1) / 93703831d35Sstevel sizeof (plat_ecc_bl_map_t); 93803831d35Sstevel } else { 93903831d35Sstevel mapp = plat_ecc_bl_map_v2; 94003831d35Sstevel mapsz = sizeof (plat_ecc_bl_map_v2) / 94103831d35Sstevel sizeof (plat_ecc_bl_map_t); 94203831d35Sstevel } 94303831d35Sstevel for (i = 0; i < mapsz; i++) { 94403831d35Sstevel if (strcmp(p, mapp[i].ebm_reason) == 0) { 94503831d35Sstevel return (mapp[i].ebm_type); 94603831d35Sstevel } 94703831d35Sstevel } 94803831d35Sstevel return (PLAT_ECC_INDICT_NONE); 94903831d35Sstevel } 95003831d35Sstevel 95103831d35Sstevel /* 95203831d35Sstevel * Blacklisting 95303831d35Sstevel */ 95403831d35Sstevel int 95503831d35Sstevel plat_blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class) 95603831d35Sstevel { 95703831d35Sstevel struct async_flt aflt; 95803831d35Sstevel char *unum; 95903831d35Sstevel int msg_type, is_old_indict; 96003831d35Sstevel 96103831d35Sstevel if (fmri == NULL) 96203831d35Sstevel return (EINVAL); 96303831d35Sstevel if (cmd != BLIOC_INSERT) 96403831d35Sstevel return (ENOTSUP); 96503831d35Sstevel 96603831d35Sstevel /* 96703831d35Sstevel * We support both the blacklisting of CPUs via mem-schemed 96803831d35Sstevel * FMRIs that name E$ J-numbers, and CPUs via cpu-schemed FMRIs 96903831d35Sstevel * that name the cpuid. 97003831d35Sstevel */ 97103831d35Sstevel if (strcmp(scheme, FM_FMRI_SCHEME_MEM) == 0) { 97203831d35Sstevel if (nvlist_lookup_string(fmri, FM_FMRI_MEM_UNUM, &unum)) 97303831d35Sstevel return (EINVAL); 974*055d7c80Scarlsonj aflt.flt_inst = (uint_t)-1; 97503831d35Sstevel } else if (strcmp(scheme, FM_FMRI_SCHEME_CPU) == 0) { 97603831d35Sstevel if (nvlist_lookup_uint32(fmri, FM_FMRI_CPU_ID, &aflt.flt_inst)) 97703831d35Sstevel return (EINVAL); 97803831d35Sstevel unum = NULL; 97903831d35Sstevel } else { 98003831d35Sstevel return (ENOTSUP); 98103831d35Sstevel } 98203831d35Sstevel 98303831d35Sstevel /* 98403831d35Sstevel * If the SC cannot handle indictment2, so fall back to old one. 98503831d35Sstevel * Also if the domain does not support FMA, then send only the old one. 98603831d35Sstevel */ 98703831d35Sstevel 98803831d35Sstevel is_old_indict = plat_ecc_capability_sc_get(PLAT_ECC_INDICTMENT_MESSAGE); 98903831d35Sstevel 99003831d35Sstevel if (is_old_indict) 99103831d35Sstevel msg_type = flt_name_to_msg_type(class, 0); 99203831d35Sstevel else 99303831d35Sstevel msg_type = flt_name_to_msg_type(class, 1); 99403831d35Sstevel 99503831d35Sstevel if (msg_type == PLAT_ECC_INDICT_NONE) 99603831d35Sstevel return (ENOTSUP); 99703831d35Sstevel 99803831d35Sstevel /* 99903831d35Sstevel * The current blacklisting interfaces are designed for a world where 100003831d35Sstevel * the SC is much more involved in the diagnosis and error reporting 100103831d35Sstevel * process than it is in the FMA world. As such, the existing 100203831d35Sstevel * interfaces want all kinds of information about the error that's 100303831d35Sstevel * triggering the blacklist. In the FMA world, we don't have access 100403831d35Sstevel * to any of that information by the time we're doing the blacklist, 100503831d35Sstevel * so we fake values. 100603831d35Sstevel */ 100703831d35Sstevel aflt.flt_id = gethrtime(); 100803831d35Sstevel aflt.flt_addr = -1; 100903831d35Sstevel aflt.flt_stat = -1; 1010*055d7c80Scarlsonj aflt.flt_synd = (ushort_t)-1; 101103831d35Sstevel 101203831d35Sstevel if (is_old_indict) { 101303831d35Sstevel if (unum && unum[0] != '\0') 101403831d35Sstevel return (plat_log_fruid_indictment(msg_type, &aflt, 101503831d35Sstevel unum)); 101603831d35Sstevel else 101703831d35Sstevel return (ENOTSUP); 101803831d35Sstevel } else { 101903831d35Sstevel return (plat_log_fruid_indictment2(msg_type, &aflt, unum)); 102003831d35Sstevel } 102103831d35Sstevel } 102203831d35Sstevel 102303831d35Sstevel static kcondvar_t plat_ecc_condvar; 102403831d35Sstevel static kmutex_t plat_ecc_mutex; 102503831d35Sstevel static taskq_t *plat_ecc_taskq; 102603831d35Sstevel 102703831d35Sstevel /* 102803831d35Sstevel * plat_ecc_dispatch_task: Dispatch the task on a taskq and wait for the 102903831d35Sstevel * return value. We use cv_wait_sig to wait for the return values. If a 103003831d35Sstevel * signal interrupts us, we return EINTR. Otherwise, we return the value 103103831d35Sstevel * returned by the mailbox functions. 103203831d35Sstevel * 103303831d35Sstevel * To avoid overloading the lower-level mailbox routines, we use a taskq 103403831d35Sstevel * to serialize all messages. Currently, it is expected that only one 103503831d35Sstevel * process (fmd) will use this ioctl, so the delay caused by the taskq 103603831d35Sstevel * should not have much of an effect. 103703831d35Sstevel */ 103803831d35Sstevel int 103903831d35Sstevel plat_ecc_dispatch_task(plat_ecc_message_t *msg) 104003831d35Sstevel { 104103831d35Sstevel int ret; 104203831d35Sstevel 104303831d35Sstevel ASSERT(msg != NULL); 104403831d35Sstevel ASSERT(plat_ecc_taskq != NULL); 104503831d35Sstevel 104603831d35Sstevel if (taskq_dispatch(plat_ecc_taskq, plat_ecc_send_msg, 104703831d35Sstevel (void *)msg, TQ_NOSLEEP) == NULL) { 104803831d35Sstevel kmem_free(msg->ecc_msg_data, msg->ecc_msg_len); 104903831d35Sstevel kmem_free(msg, sizeof (plat_ecc_message_t)); 105003831d35Sstevel return (ENOMEM); 105103831d35Sstevel } 105203831d35Sstevel mutex_enter(&plat_ecc_mutex); 105303831d35Sstevel 105403831d35Sstevel /* 105503831d35Sstevel * It's possible that the taskq function completed before we 105603831d35Sstevel * acquired the mutex. Check for this first. If this did not 105703831d35Sstevel * happen, we wait for the taskq function to signal us, or an 105803831d35Sstevel * interrupt. We also check ecc_msg_status to protect against 105903831d35Sstevel * spurious wakeups from cv_wait_sig. 106003831d35Sstevel */ 106103831d35Sstevel if (msg->ecc_msg_status == PLAT_ECC_MSG_SENT) { 106203831d35Sstevel ret = msg->ecc_msg_ret; 106303831d35Sstevel kmem_free(msg->ecc_msg_data, msg->ecc_msg_len); 106403831d35Sstevel kmem_free(msg, sizeof (plat_ecc_message_t)); 106503831d35Sstevel } else { 106603831d35Sstevel msg->ecc_msg_status = PLAT_ECC_TASK_DISPATCHED; 106703831d35Sstevel 106803831d35Sstevel while ((ret = cv_wait_sig(&plat_ecc_condvar, 106903831d35Sstevel &plat_ecc_mutex)) != 0 && 107003831d35Sstevel msg->ecc_msg_status == PLAT_ECC_TASK_DISPATCHED) 107103831d35Sstevel ; 107203831d35Sstevel 107303831d35Sstevel if ((ret == 0) && (msg->ecc_msg_status != PLAT_ECC_MSG_SENT)) { 107403831d35Sstevel /* An interrupt was received */ 107503831d35Sstevel msg->ecc_msg_status = PLAT_ECC_INTERRUPT_RECEIVED; 107603831d35Sstevel ret = EINTR; 107703831d35Sstevel } else { 107803831d35Sstevel ret = msg->ecc_msg_ret; 107903831d35Sstevel kmem_free(msg->ecc_msg_data, msg->ecc_msg_len); 108003831d35Sstevel kmem_free(msg, sizeof (plat_ecc_message_t)); 108103831d35Sstevel } 108203831d35Sstevel } 108303831d35Sstevel mutex_exit(&plat_ecc_mutex); 108403831d35Sstevel return (ret); 108503831d35Sstevel } 108603831d35Sstevel 108703831d35Sstevel static void 108803831d35Sstevel plat_ecc_send_msg(void *arg) 108903831d35Sstevel { 109003831d35Sstevel plat_ecc_message_t *msg = arg; 109103831d35Sstevel int ret; 109203831d35Sstevel 109303831d35Sstevel /* 109403831d35Sstevel * Send this data off as a mailbox message to the SC. 109503831d35Sstevel */ 109603831d35Sstevel ret = plat_send_ecc_mailbox_msg(msg->ecc_msg_type, msg->ecc_msg_data); 109703831d35Sstevel 109803831d35Sstevel mutex_enter(&plat_ecc_mutex); 109903831d35Sstevel 110003831d35Sstevel /* 110103831d35Sstevel * If the dispatching function received an interrupt, don't bother 110203831d35Sstevel * signalling it, and throw away the results. Otherwise, set the 110303831d35Sstevel * return value and signal the condvar. 110403831d35Sstevel */ 110503831d35Sstevel if (msg->ecc_msg_status == PLAT_ECC_INTERRUPT_RECEIVED) { 110603831d35Sstevel kmem_free(msg->ecc_msg_data, msg->ecc_msg_len); 110703831d35Sstevel kmem_free(msg, sizeof (plat_ecc_message_t)); 110803831d35Sstevel } else { 110903831d35Sstevel msg->ecc_msg_ret = ret; 111003831d35Sstevel msg->ecc_msg_status = PLAT_ECC_MSG_SENT; 111103831d35Sstevel cv_broadcast(&plat_ecc_condvar); 111203831d35Sstevel } 111303831d35Sstevel 111403831d35Sstevel mutex_exit(&plat_ecc_mutex); 111503831d35Sstevel } 111603831d35Sstevel 111703831d35Sstevel void 111803831d35Sstevel plat_ecc_init(void) 111903831d35Sstevel { 112003831d35Sstevel int bd; 112103831d35Sstevel 112203831d35Sstevel mutex_init(&plat_ecc_mutex, NULL, MUTEX_DEFAULT, NULL); 112303831d35Sstevel cv_init(&plat_ecc_condvar, NULL, CV_DEFAULT, NULL); 112403831d35Sstevel plat_ecc_taskq = taskq_create("plat_ecc_taskq", 1, minclsyspri, 112503831d35Sstevel PLAT_ECC_TASKQ_MIN, PLAT_ECC_TASKQ_MAX, TASKQ_PREPOPULATE); 112603831d35Sstevel ASSERT(plat_ecc_taskq != NULL); 112703831d35Sstevel 112803831d35Sstevel for (bd = 0; bd < plat_max_cpumem_boards(); bd++) { 112903831d35Sstevel mutex_init(&domain_dimm_sids[bd].pdsb_lock, 113003831d35Sstevel NULL, MUTEX_DEFAULT, NULL); 113103831d35Sstevel } 113203831d35Sstevel 113303831d35Sstevel } 1134