1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #ifndef _GCPU_H 27 #define _GCPU_H 28 29 #include <sys/types.h> 30 #include <sys/cpu_module_impl.h> 31 #include <sys/cpu_module_ms.h> 32 #include <sys/ksynch.h> 33 #include <sys/systm.h> 34 #include <sys/fm/util.h> 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #define GCPU_MCA_ERRS_PERCPU 10 /* errorq slots per cpu */ 41 #define GCPU_MCA_MIN_ERRORS 30 /* minimum total errorq slots */ 42 #define GCPU_MCA_MAX_ERRORS 100 /* maximum total errorq slots */ 43 44 typedef struct gcpu_data gcpu_data_t; 45 46 #define GCPU_ERRCODE_MASK_ALL 0xffff 47 48 typedef struct gcpu_error_disp { 49 const char *ged_class_fmt; /* ereport class formatter (last bit) */ 50 const char *ged_compound_fmt; /* compound error formatter */ 51 uint64_t ged_ereport_members; /* ereport payload members */ 52 uint16_t ged_errcode_mask_on; /* errcode bits that must be set ... */ 53 uint16_t ged_errcode_mask_off; /* ... and must be clear for a match */ 54 } gcpu_error_disp_t; 55 56 /* 57 * For errorq_dispatch we need to have a single contiguous structure 58 * capturing all our logout data. We do not know in advance how many 59 * error detector banks there are in this cpu model, so we'll manually 60 * allocate additional space for the gcl_banks array below. 61 */ 62 typedef struct gcpu_bank_logout { 63 uint64_t gbl_status; /* MCi_STATUS value */ 64 uint64_t gbl_addr; /* MCi_ADDR value */ 65 uint64_t gbl_misc; /* MCi_MISC value */ 66 uint64_t gbl_disp; /* Error disposition for this bank */ 67 uint32_t gbl_clrdefcnt; /* Count of deferred status clears */ 68 } gcpu_bank_logout_t; 69 70 /* 71 * The data structure we "logout" all error telemetry from all banks of 72 * a cpu to. The gcl_data array declared with 1 member below will actually 73 * have gcl_nbanks members - variable with the actual cpu model present. 74 * After the gcl_data array there is a further model-specific array that 75 * may be allocated, and gcl_ms_logout will point to that if present. 76 * This cpu logout data must form one contiguous chunk of memory for 77 * dispatch with errorq_dispatch. 78 */ 79 typedef struct gcpu_logout { 80 gcpu_data_t *gcl_gcpu; /* pointer to per-cpu gcpu_data_t */ 81 uintptr_t gcl_ip; /* instruction pointer from #mc trap */ 82 uint64_t gcl_timestamp; /* gethrtime() at logout */ 83 uint64_t gcl_mcg_status; /* MCG_STATUS register value */ 84 uint64_t gcl_flags; /* Flags */ 85 pc_t gcl_stack[FM_STK_DEPTH]; /* saved stack trace, if any */ 86 int gcl_stackdepth; /* saved stack trace depth */ 87 int ismc; /* is a machine check flag */ 88 int gcl_nbanks; /* number of banks in array below */ 89 void *gcl_ms_logout; /* Model-specific area after gcl_data */ 90 gcpu_bank_logout_t gcl_data[1]; /* Bank logout areas - must be last */ 91 } gcpu_logout_t; 92 93 /* 94 * gcl_flag values 95 */ 96 #define GCPU_GCL_F_PRIV 0x1 /* #MC during privileged code */ 97 #define GCPU_GCL_F_TES_P 0x2 /* MCG_CAP indicates TES_P */ 98 99 struct gcpu_bios_bankcfg { 100 uint64_t bios_bank_ctl; 101 uint64_t bios_bank_status; 102 uint64_t bios_bank_addr; 103 uint64_t bios_bank_misc; 104 }; 105 106 struct gcpu_bios_cfg { 107 uint64_t bios_mcg_cap; 108 uint64_t bios_mcg_ctl; 109 struct gcpu_bios_bankcfg *bios_bankcfg; 110 }; 111 112 /* 113 * Events types in poll trace records. Keep these in sync with 114 * the generic cpu mdb module names for each (see gcpu_mpt_dump in mdb). 115 */ 116 #define GCPU_MPT_WHAT_CYC_ERR 0 /* cyclic-induced poll */ 117 #define GCPU_MPT_WHAT_POKE_ERR 1 /* manually-induced poll */ 118 #define GCPU_MPT_WHAT_UNFAULTING 2 /* discarded error state */ 119 #define GCPU_MPT_WHAT_MC_ERR 3 /* MC# */ 120 #define GCPU_MPT_WHAT_CMCI_ERR 4 /* CMCI interrupt */ 121 #define GCPU_MPT_WHAT_XPV_VIRQ 5 /* MCA_VIRQ in dom0 */ 122 #define GCPU_MPT_WHAT_XPV_VIRQ_LOGOUT 6 /* MCA_VIRQ logout complete */ 123 124 typedef struct gcpu_poll_trace { 125 hrtime_t mpt_when; /* timestamp of event */ 126 uint8_t mpt_what; /* GCPU_MPT_WHAT_* (which event?) */ 127 uint8_t mpt_nerr; /* number of errors discovered */ 128 uint16_t mpt_pad1; 129 uint32_t mpt_pad2; 130 } gcpu_poll_trace_t; 131 132 typedef struct gcpu_poll_trace_ctl { 133 gcpu_poll_trace_t *mptc_tbufs; /* trace buffers */ 134 uint_t mptc_curtrace; /* last buffer filled */ 135 } gcpu_poll_trace_ctl_t; 136 137 138 /* 139 * For counting some of the important number or time for runtime 140 * cmci enable/disable 141 */ 142 typedef struct gcpu_mca_cmci { 143 uint32_t cmci_cap; /* cmci capability for this bank */ 144 uint32_t ncmci; /* number of correctable errors between polls */ 145 uint32_t drtcmci; /* duration of no cmci when cmci is disabled */ 146 uint32_t cmci_enabled; /* cmci enable/disable status for this bank */ 147 } gcpu_mca_cmci_t; 148 149 /* Index for gcpu_mca_logout array below */ 150 #define GCPU_MCA_LOGOUT_EXCEPTION 0 /* area for #MC */ 151 #define GCPU_MCA_LOGOUT_POLLER_1 1 /* next/prev poll area */ 152 #define GCPU_MCA_LOGOUT_POLLER_2 2 /* prev/next poll area */ 153 #define GCPU_MCA_LOGOUT_NUM 3 154 155 typedef struct gcpu_mca { 156 gcpu_logout_t *gcpu_mca_logout[GCPU_MCA_LOGOUT_NUM]; 157 uint32_t gcpu_mca_nextpoll_idx; /* logout area for next poll */ 158 struct gcpu_bios_cfg gcpu_mca_bioscfg; 159 uint_t gcpu_mca_nbanks; 160 size_t gcpu_mca_lgsz; /* size of gcpu_mca_logout structs */ 161 uint_t gcpu_mca_flags; /* GCPU_MCA_F_* */ 162 hrtime_t gcpu_mca_lastpoll; 163 gcpu_poll_trace_ctl_t gcpu_polltrace; 164 uint32_t gcpu_mca_first_poll_cmci_enabled; /* cmci on in first poll */ 165 gcpu_mca_cmci_t *gcpu_bank_cmci; 166 } gcpu_mca_t; 167 168 typedef struct gcpu_mce_status { 169 uint_t mce_nerr; /* total errors found in logout of all banks */ 170 uint64_t mce_disp; /* Disposition information */ 171 uint_t mce_npcc; /* number of errors with PCC */ 172 uint_t mce_npcc_ok; /* PCC with CMS_ERRSCOPE_CURCONTEXT_OK */ 173 uint_t mce_nuc; /* number of errors with UC */ 174 uint_t mce_nuc_ok; /* UC with CMS_ERRSCOPE_CLEARED_UC */ 175 uint_t mce_nuc_poisoned; /* UC with CMS_ERRSCOPE_POISONED */ 176 uint_t mce_forcefatal; /* CMS_ERRSCOPE_FORCE_FATAL */ 177 uint_t mce_ignored; /* CMS_ERRSCOPE_IGNORE_ERR */ 178 } gcpu_mce_status_t; 179 180 /* 181 * Flags for gcpu_mca_flags 182 */ 183 #define GCPU_MCA_F_UNFAULTING 0x1 /* CPU exiting faulted state */ 184 185 /* 186 * State shared by all cpus on a chip 187 */ 188 struct gcpu_chipshared { 189 kmutex_t gcpus_cfglock; /* serial MCA config from chip cores */ 190 kmutex_t gcpus_poll_lock; /* serialize pollers on the same chip */ 191 uint32_t gcpus_actv_banks; /* MCA bank numbers active on chip */ 192 volatile uint32_t gcpus_actv_cnt; /* active cpu count in this chip */ 193 }; 194 195 struct gcpu_data { 196 gcpu_mca_t gcpu_mca; /* MCA state for this CPU */ 197 cmi_hdl_t gcpu_hdl; /* associated handle */ 198 struct gcpu_chipshared *gcpu_shared; /* Shared state for the chip */ 199 }; 200 201 #ifdef _KERNEL 202 203 struct regs; 204 205 /* 206 * CMI implementation 207 */ 208 extern int gcpu_init(cmi_hdl_t, void **); 209 extern void gcpu_fini(cmi_hdl_t); 210 extern void gcpu_post_startup(cmi_hdl_t); 211 extern void gcpu_post_mpstartup(cmi_hdl_t); 212 extern void gcpu_faulted_enter(cmi_hdl_t); 213 extern void gcpu_faulted_exit(cmi_hdl_t); 214 extern void gcpu_mca_init(cmi_hdl_t); 215 extern void gcpu_mca_fini(cmi_hdl_t hdl); 216 extern cmi_errno_t gcpu_msrinject(cmi_hdl_t, cmi_mca_regs_t *, uint_t, int); 217 #ifndef __xpv 218 extern uint64_t gcpu_mca_trap(cmi_hdl_t, struct regs *); 219 extern void gcpu_cmci_trap(cmi_hdl_t); 220 extern void gcpu_hdl_poke(cmi_hdl_t); 221 #else 222 extern void gcpu_xpv_panic_callback(void); 223 #endif 224 225 /* 226 * CMI global variable 227 */ 228 extern int cmi_enable_cmci; 229 230 /* 231 * Local functions 232 */ 233 extern void gcpu_mca_poll_init(cmi_hdl_t); 234 extern void gcpu_mca_poll_fini(cmi_hdl_t); 235 extern void gcpu_mca_poll_start(cmi_hdl_t); 236 extern void gcpu_poll_trace_init(gcpu_poll_trace_ctl_t *); 237 extern void gcpu_poll_trace(gcpu_poll_trace_ctl_t *, uint8_t, uint8_t); 238 extern void gcpu_mca_logout(cmi_hdl_t, struct regs *, uint64_t, 239 gcpu_mce_status_t *, boolean_t, int); 240 #ifdef __xpv 241 extern void gcpu_xpv_mca_init(int); 242 #endif /* __xpv */ 243 244 #endif /* _KERNEL */ 245 246 #ifdef __cplusplus 247 } 248 #endif 249 250 #endif /* _GCPU_H */ 251