1 /****************************************************************************** 2 * arch-x86/xen-mca.h 3 * 4 * Contributed by Advanced Micro Devices, Inc. 5 * Author: Christoph Egger <Christoph.Egger@amd.com> 6 * 7 * Guest OS machine check interface to x86 Xen. 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a copy 10 * of this software and associated documentation files (the "Software"), to 11 * deal in the Software without restriction, including without limitation the 12 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 13 * sell copies of the Software, and to permit persons to whom the Software is 14 * furnished to do so, subject to the following conditions: 15 * 16 * The above copyright notice and this permission notice shall be included in 17 * all copies or substantial portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 * DEALINGS IN THE SOFTWARE. 26 */ 27 28 /* Full MCA functionality has the following Usecases from the guest side: 29 * 30 * Must have's: 31 * 1. Dom0 and DomU register machine check trap callback handlers 32 * (already done via "set_trap_table" hypercall) 33 * 2. Dom0 registers machine check event callback handler 34 * (doable via EVTCHNOP_bind_virq) 35 * 3. Dom0 and DomU fetches machine check data 36 * 4. Dom0 wants Xen to notify a DomU 37 * 5. Dom0 gets DomU ID from physical address 38 * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy") 39 * 40 * Nice to have's: 41 * 7. Dom0 wants Xen to deactivate a physical CPU 42 * This is better done as separate task, physical CPU hotplugging, 43 * and hypercall(s) should be sysctl's 44 * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to 45 * move a DomU (or Dom0 itself) away from a malicious page 46 * producing correctable errors. 47 * 9. offlining physical page: 48 * Xen free's and never re-uses a certain physical page. 49 * 10. Testfacility: Allow Dom0 to write values into machine check MSR's 50 * and tell Xen to trigger a machine check 51 */ 52 53 /* 54 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 55 * Use is subject to license terms. 56 */ 57 58 #ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__ 59 #define __XEN_PUBLIC_ARCH_X86_MCA_H__ 60 61 /* Hypercall */ 62 #define __HYPERVISOR_mca __HYPERVISOR_arch_0 63 64 #define XEN_MC_HCALL_SUCCESS 0 65 66 /* 67 * The xen-unstable repo has interface version 0x03000001; out interface 68 * is incompatible with that and any future minor revisions, so we 69 * choose a different version number range that is numerically less 70 * than that used in xen-unstable. 71 */ 72 #define XEN_MCA_INTERFACE_VERSION 0x01ecc001 73 74 /* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */ 75 #define XEN_MC_F_NONURGENT 0x0001 76 /* IN: Dom0 calls hypercall to retrieve urgent telemetry */ 77 #define XEN_MC_F_URGENT 0x0002 78 /* IN: Dom0 acknowledges previosly-fetched telemetry */ 79 #define XEN_MC_F_ACK 0x0004 80 81 /* OUT: All is ok - all flags bits clear */ 82 #define XEN_MC_F_OK 0x0 83 /* OUT: Domain could not fetch data. */ 84 #define XEN_MC_F_FETCHFAILED 0x0001 85 /* OUT: There was no machine check data to fetch. */ 86 #define XEN_MC_F_NODATA 0x0002 87 /* OUT: Between notification time and this hypercall an other 88 * (most likely) correctable error happened. The fetched data, 89 * does not match the original machine check data. */ 90 #define XEN_MC_F_NOMATCH 0x0004 91 92 /* OUT: DomU did not register MC NMI handler. Try something else. */ 93 #define XEN_MC_F_CANNOTHANDLE 0x0008 94 /* OUT: Notifying DomU failed. Retry later or try something else. */ 95 #define XEN_MC_F_NOTDELIVERED 0x0010 96 /* Note, XEN_MC_F_CANNOTHANDLE and XEN_MC_F_NOTDELIVERED are mutually exclusive. */ 97 98 #ifndef __ASSEMBLY__ 99 100 #define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */ 101 102 /* 103 * Machine Check Architecure: 104 * structs are read-only and used to report all kinds of 105 * correctable and uncorrectable errors detected by the HW. 106 * Dom0 and DomU: register a handler to get notified. 107 * Dom0 only: Correctable errors are reported via VIRQ_MCA 108 * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers 109 */ 110 #define MC_TYPE_GLOBAL 0 111 #define MC_TYPE_BANK 1 112 #define MC_TYPE_EXTENDED 2 113 114 struct mcinfo_common { 115 uint16_t type; /* structure type - one of MC_TYPE_* above */ 116 uint16_t size; /* size of this struct in bytes */ 117 }; 118 119 #define MC_FLAG_CORRECTABLE 0x00000001 120 #define MC_FLAG_UNCORRECTABLE 0x00000002 121 #define MC_FLAG_MCE 0x00000004 122 #define MC_FLAG_POLLED 0x00000008 123 124 /* contains global x86 mc information */ 125 struct mcinfo_global { 126 struct mcinfo_common common; 127 128 /* running domain at the time in error (most likely the impacted one) */ 129 uint16_t mc_domid; 130 uint32_t mc_socketid; /* physical socket of the physical core */ 131 uint16_t mc_coreid; /* physical impacted core */ 132 uint16_t mc_core_threadid; /* core thread of physical core */ 133 uint8_t mc_apicid; /* APIC id of physical core */ 134 uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ 135 uint32_t mc_pad0; 136 uint64_t mc_gstatus; /* global status */ 137 uint32_t mc_flags; /* see MC_FLAG_* above */ 138 }; 139 140 /* contains bank local x86 mc information */ 141 struct mcinfo_bank { 142 struct mcinfo_common common; 143 144 uint16_t mc_bank; /* bank nr */ 145 uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0 146 * and if mc_addr is valid. Never valid on DomU. */ 147 uint64_t mc_status; /* bank status */ 148 uint64_t mc_addr; /* bank address, only valid 149 * if addr bit is set in mc_status */ 150 uint64_t mc_misc; 151 }; 152 153 154 struct mcinfo_msr { 155 uint64_t reg; /* MSR */ 156 uint64_t value; /* MSR value */ 157 }; 158 159 /* contains mc information from other 160 * or additional mc MSRs */ 161 struct mcinfo_extended { 162 struct mcinfo_common common; 163 164 /* You can fill up to five registers. 165 * If you need more, then use this structure 166 * multiple times. */ 167 168 uint32_t mc_msrs; /* Number of msr with valid values. */ 169 struct mcinfo_msr mc_msr[12]; 170 }; 171 172 #define MCINFO_MAXSIZE 768 173 174 typedef struct mc_info { 175 /* Number of mcinfo_* entries in mi_data */ 176 uint32_t mi_nentries; 177 178 uint8_t mi_data[MCINFO_MAXSIZE - sizeof(uint32_t)]; 179 } mc_info_t; 180 DEFINE_XEN_GUEST_HANDLE(mc_info_t); 181 182 #define __MC_MSR_ARRAYSIZE 8 183 #define __MC_NMSRS 1 184 #define MC_NCAPS 7 /* 7 CPU feature flag words */ 185 #define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */ 186 #define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */ 187 #define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */ 188 #define MC_CAPS_LINUX 3 /* Linux-defined */ 189 #define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */ 190 #define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ 191 #define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ 192 193 typedef struct mcinfo_logical_cpu { 194 unsigned int mc_cpunr; 195 uint32_t mc_chipid; 196 uint16_t mc_coreid; 197 uint16_t mc_threadid; 198 uint8_t mc_apicid; 199 unsigned int mc_ncores; 200 unsigned int mc_ncores_active; 201 unsigned int mc_nthreads; 202 int mc_cpuid_level; 203 unsigned int mc_family; 204 unsigned int mc_vendor; 205 unsigned int mc_model; 206 unsigned int mc_step; 207 char mc_vendorid[16]; 208 char mc_brandid[64]; 209 uint32_t mc_cpu_caps[MC_NCAPS]; 210 unsigned int mc_cache_size; 211 unsigned int mc_cache_alignment; 212 unsigned int mc_nmsrvals; 213 struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; 214 } xen_mc_logical_cpu_t; 215 DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); 216 217 /* 218 * OS's should use these instead of writing their own lookup function 219 * each with its own bugs and drawbacks. 220 * We use macros instead of static inline functions to allow guests 221 * to include this header in assembly files (*.S). 222 */ 223 /* Prototype: 224 * uint32_t x86_mcinfo_nentries(struct mc_info *mi); 225 */ 226 #define x86_mcinfo_nentries(_mi) \ 227 (_mi)->mi_nentries 228 /* Prototype: 229 * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); 230 */ 231 #define x86_mcinfo_first(_mi) \ 232 (struct mcinfo_common *)((_mi)->mi_data) 233 /* Prototype: 234 * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); 235 */ 236 #define x86_mcinfo_next(_mic) \ 237 (struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size) 238 239 /* Prototype: 240 * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); 241 */ 242 #define x86_mcinfo_lookup(_ret, _mi, _type) \ 243 do { \ 244 uint32_t found, i; \ 245 struct mcinfo_common *_mic; \ 246 \ 247 found = 0; \ 248 (_ret) = NULL; \ 249 if (_mi == NULL) break; \ 250 _mic = x86_mcinfo_first(_mi); \ 251 for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \ 252 if (_mic->type == (_type)) { \ 253 found = 1; \ 254 break; \ 255 } \ 256 _mic = x86_mcinfo_next(_mic); \ 257 } \ 258 (_ret) = found ? _mic : NULL; \ 259 } while (0) 260 261 262 /* Usecase 1 263 * Register machine check trap callback handler 264 * (already done via "set_trap_table" hypercall) 265 */ 266 267 /* Usecase 2 268 * Dom0 registers machine check event callback handler 269 * done by EVTCHNOP_bind_virq 270 */ 271 272 /* Usecase 3 273 * Fetch machine check data from hypervisor. 274 * Note, this hypercall is special, because both Dom0 and DomU must use this. 275 */ 276 #define XEN_MC_CMD_fetch 1 277 struct xen_mc_fetch { 278 /* IN/OUT */ 279 uint32_t flags; /* IN: XEN_MC_F_NONURGENT or XEN_MC_F_URGENT, 280 * XEN_MC_F_ACK if ack'ing an earlier fetch 281 * OUT: XEN_MC_F_OK, XEN_MC_F_FETCHFAILED, 282 * XEN_MC_F_NODATA, XEN_MC_F_NOMATCH */ 283 uint32_t data_sz; /* IN: size of data area */ 284 uint64_t fetch_id; /* OUT: id for ack; IN: id we are ack'ing */ 285 286 287 /* OUT */ 288 XEN_GUEST_HANDLE(mc_info_t) data; 289 }; 290 typedef struct xen_mc_fetch xen_mc_fetch_t; 291 DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t); 292 293 294 /* Usecase 4 295 * This tells the hypervisor to notify a DomU about the machine check error 296 */ 297 #define XEN_MC_CMD_notifydomain 2 298 struct xen_mc_notifydomain { 299 /* IN variables. */ 300 uint16_t mc_domid; /* The unprivileged domain to notify. */ 301 uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify. 302 * Usually echo'd value from the fetch hypercall. */ 303 304 /* IN/OUT variables. */ 305 uint32_t flags; /* IN: XEN_MC_F_URGENT, XEN_MC_F_TRAP 306 * OUT: XEN_MC_F_OK, XEN_MC_F_CANNOTHANDLE, 307 * XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */ 308 }; 309 typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; 310 DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); 311 312 #define XEN_MC_CMD_physcpuinfo 3 313 struct xen_mc_physcpuinfo { 314 /* IN/OUT */ 315 uint32_t ncpus; 316 uint32_t pad0; 317 /* OUT */ 318 XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; 319 }; 320 321 #define XEN_MC_CMD_msrinject 4 322 #define MC_MSRINJ_MAXMSRS 8 323 struct xen_mc_msrinject { 324 /* IN */ 325 unsigned int mcinj_cpunr; /* target processor id */ 326 uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ 327 uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ 328 uint32_t mcinj_pad0; 329 struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; 330 }; 331 332 /* Flags for mcinj_flags above; bits 16-31 are reserved */ 333 #define MC_MSRINJ_F_INTERPOSE 0x1 334 335 #define XEN_MC_CMD_mceinject 5 336 struct xen_mc_mceinject { 337 unsigned int mceinj_cpunr; /* target processor id */ 338 }; 339 340 #define XEN_MC_CMD_offlinecpu 6 341 struct xen_mc_offline { 342 /* IN */ 343 unsigned int mco_cpu; 344 /* IN / OUT */ 345 int mco_flag; /* MC_CPU_P_* */ 346 }; 347 348 #define MC_CPU_P_STATUS 0x0000 349 #define MC_CPU_P_ONLINE 0x0001 350 #define MC_CPU_P_OFFLINE 0x0002 351 #define MC_CPU_P_FAULTED 0x0004 352 #define MC_CPU_P_SPARE 0x0008 353 #define MC_CPU_P_POWEROFF 0x0010 354 355 typedef union { 356 struct xen_mc_fetch mc_fetch; 357 struct xen_mc_notifydomain mc_notifydomain; 358 struct xen_mc_physcpuinfo mc_physcpuinfo; 359 struct xen_mc_msrinject mc_msrinject; 360 struct xen_mc_mceinject mc_mceinject; 361 struct xen_mc_offline mc_offline; 362 } xen_mc_arg_t; 363 364 struct xen_mc { 365 uint32_t cmd; 366 uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ 367 xen_mc_arg_t u; 368 }; 369 typedef struct xen_mc xen_mc_t; 370 DEFINE_XEN_GUEST_HANDLE(xen_mc_t); 371 372 #endif /* __ASSEMBLY__ */ 373 374 #endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */ 375