1 /****************************************************************************** 2 * mcelog.c 3 * Driver for receiving and transferring machine check error infomation 4 * 5 * Copyright (c) 2012 Intel Corporation 6 * Author: Liu, Jinsong <jinsong.liu@intel.com> 7 * Author: Jiang, Yunhong <yunhong.jiang@intel.com> 8 * Author: Ke, Liping <liping.ke@intel.com> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35 #define pr_fmt(fmt) "xen_mcelog: " fmt 36 37 #include <linux/init.h> 38 #include <linux/types.h> 39 #include <linux/kernel.h> 40 #include <linux/slab.h> 41 #include <linux/fs.h> 42 #include <linux/device.h> 43 #include <linux/miscdevice.h> 44 #include <linux/uaccess.h> 45 #include <linux/capability.h> 46 #include <linux/poll.h> 47 #include <linux/sched.h> 48 49 #include <xen/interface/xen.h> 50 #include <xen/events.h> 51 #include <xen/interface/vcpu.h> 52 #include <xen/xen.h> 53 #include <asm/xen/hypercall.h> 54 #include <asm/xen/hypervisor.h> 55 56 static struct mc_info g_mi; 57 static struct mcinfo_logical_cpu *g_physinfo; 58 static uint32_t ncpus; 59 60 static DEFINE_MUTEX(mcelog_lock); 61 62 static struct xen_mce_log xen_mcelog = { 63 .signature = XEN_MCE_LOG_SIGNATURE, 64 .len = XEN_MCE_LOG_LEN, 65 .recordlen = sizeof(struct xen_mce), 66 }; 67 68 static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock); 69 static int xen_mce_chrdev_open_count; /* #times opened */ 70 static int xen_mce_chrdev_open_exclu; /* already open exclusive? */ 71 72 static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait); 73 74 static int xen_mce_chrdev_open(struct inode *inode, struct file *file) 75 { 76 spin_lock(&xen_mce_chrdev_state_lock); 77 78 if (xen_mce_chrdev_open_exclu || 79 (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) { 80 spin_unlock(&xen_mce_chrdev_state_lock); 81 82 return -EBUSY; 83 } 84 85 if (file->f_flags & O_EXCL) 86 xen_mce_chrdev_open_exclu = 1; 87 xen_mce_chrdev_open_count++; 88 89 spin_unlock(&xen_mce_chrdev_state_lock); 90 91 return nonseekable_open(inode, file); 92 } 93 94 static int xen_mce_chrdev_release(struct inode *inode, struct file *file) 95 { 96 spin_lock(&xen_mce_chrdev_state_lock); 97 98 xen_mce_chrdev_open_count--; 99 xen_mce_chrdev_open_exclu = 0; 100 101 spin_unlock(&xen_mce_chrdev_state_lock); 102 103 return 0; 104 } 105 106 static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf, 107 size_t usize, loff_t *off) 108 { 109 char __user *buf = ubuf; 110 unsigned num; 111 int i, err; 112 113 mutex_lock(&mcelog_lock); 114 115 num = xen_mcelog.next; 116 117 /* Only supports full reads right now */ 118 err = -EINVAL; 119 if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce)) 120 goto out; 121 122 err = 0; 123 for (i = 0; i < num; i++) { 124 struct xen_mce *m = &xen_mcelog.entry[i]; 125 126 err |= copy_to_user(buf, m, sizeof(*m)); 127 buf += sizeof(*m); 128 } 129 130 memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce)); 131 xen_mcelog.next = 0; 132 133 if (err) 134 err = -EFAULT; 135 136 out: 137 mutex_unlock(&mcelog_lock); 138 139 return err ? err : buf - ubuf; 140 } 141 142 static __poll_t xen_mce_chrdev_poll(struct file *file, poll_table *wait) 143 { 144 poll_wait(file, &xen_mce_chrdev_wait, wait); 145 146 if (xen_mcelog.next) 147 return EPOLLIN | EPOLLRDNORM; 148 149 return 0; 150 } 151 152 static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd, 153 unsigned long arg) 154 { 155 int __user *p = (int __user *)arg; 156 157 if (!capable(CAP_SYS_ADMIN)) 158 return -EPERM; 159 160 switch (cmd) { 161 case MCE_GET_RECORD_LEN: 162 return put_user(sizeof(struct xen_mce), p); 163 case MCE_GET_LOG_LEN: 164 return put_user(XEN_MCE_LOG_LEN, p); 165 case MCE_GETCLEAR_FLAGS: { 166 unsigned flags; 167 168 do { 169 flags = xen_mcelog.flags; 170 } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags); 171 172 return put_user(flags, p); 173 } 174 default: 175 return -ENOTTY; 176 } 177 } 178 179 static const struct file_operations xen_mce_chrdev_ops = { 180 .open = xen_mce_chrdev_open, 181 .release = xen_mce_chrdev_release, 182 .read = xen_mce_chrdev_read, 183 .poll = xen_mce_chrdev_poll, 184 .unlocked_ioctl = xen_mce_chrdev_ioctl, 185 }; 186 187 static struct miscdevice xen_mce_chrdev_device = { 188 MISC_MCELOG_MINOR, 189 "mcelog", 190 &xen_mce_chrdev_ops, 191 }; 192 193 /* 194 * Caller should hold the mcelog_lock 195 */ 196 static void xen_mce_log(struct xen_mce *mce) 197 { 198 unsigned entry; 199 200 entry = xen_mcelog.next; 201 202 /* 203 * When the buffer fills up discard new entries. 204 * Assume that the earlier errors are the more 205 * interesting ones: 206 */ 207 if (entry >= XEN_MCE_LOG_LEN) { 208 set_bit(XEN_MCE_OVERFLOW, 209 (unsigned long *)&xen_mcelog.flags); 210 return; 211 } 212 213 memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce)); 214 215 xen_mcelog.next++; 216 } 217 218 static int convert_log(struct mc_info *mi) 219 { 220 struct mcinfo_common *mic; 221 struct mcinfo_global *mc_global; 222 struct mcinfo_bank *mc_bank; 223 struct xen_mce m; 224 unsigned int i, j; 225 226 mic = NULL; 227 x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL); 228 if (unlikely(!mic)) { 229 pr_warn("Failed to find global error info\n"); 230 return -ENODEV; 231 } 232 233 memset(&m, 0, sizeof(struct xen_mce)); 234 235 mc_global = (struct mcinfo_global *)mic; 236 m.mcgstatus = mc_global->mc_gstatus; 237 m.apicid = mc_global->mc_apicid; 238 239 for (i = 0; i < ncpus; i++) 240 if (g_physinfo[i].mc_apicid == m.apicid) 241 break; 242 if (unlikely(i == ncpus)) { 243 pr_warn("Failed to match cpu with apicid %d\n", m.apicid); 244 return -ENODEV; 245 } 246 247 m.socketid = g_physinfo[i].mc_chipid; 248 m.cpu = m.extcpu = g_physinfo[i].mc_cpunr; 249 m.cpuvendor = (__u8)g_physinfo[i].mc_vendor; 250 for (j = 0; j < g_physinfo[i].mc_nmsrvals; ++j) 251 switch (g_physinfo[i].mc_msrvalues[j].reg) { 252 case MSR_IA32_MCG_CAP: 253 m.mcgcap = g_physinfo[i].mc_msrvalues[j].value; 254 break; 255 256 case MSR_PPIN: 257 case MSR_AMD_PPIN: 258 m.ppin = g_physinfo[i].mc_msrvalues[j].value; 259 break; 260 } 261 262 mic = NULL; 263 x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK); 264 if (unlikely(!mic)) { 265 pr_warn("Fail to find bank error info\n"); 266 return -ENODEV; 267 } 268 269 do { 270 if ((!mic) || (mic->size == 0) || 271 (mic->type != MC_TYPE_GLOBAL && 272 mic->type != MC_TYPE_BANK && 273 mic->type != MC_TYPE_EXTENDED && 274 mic->type != MC_TYPE_RECOVERY)) 275 break; 276 277 if (mic->type == MC_TYPE_BANK) { 278 mc_bank = (struct mcinfo_bank *)mic; 279 m.misc = mc_bank->mc_misc; 280 m.status = mc_bank->mc_status; 281 m.addr = mc_bank->mc_addr; 282 m.tsc = mc_bank->mc_tsc; 283 m.bank = mc_bank->mc_bank; 284 m.finished = 1; 285 /*log this record*/ 286 xen_mce_log(&m); 287 } 288 mic = x86_mcinfo_next(mic); 289 } while (1); 290 291 return 0; 292 } 293 294 static int mc_queue_handle(uint32_t flags) 295 { 296 struct xen_mc mc_op; 297 int ret = 0; 298 299 mc_op.cmd = XEN_MC_fetch; 300 set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi); 301 do { 302 mc_op.u.mc_fetch.flags = flags; 303 ret = HYPERVISOR_mca(&mc_op); 304 if (ret) { 305 pr_err("Failed to fetch %surgent error log\n", 306 flags == XEN_MC_URGENT ? "" : "non"); 307 break; 308 } 309 310 if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA || 311 mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED) 312 break; 313 else { 314 ret = convert_log(&g_mi); 315 if (ret) 316 pr_warn("Failed to convert this error log, continue acking it anyway\n"); 317 318 mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK; 319 ret = HYPERVISOR_mca(&mc_op); 320 if (ret) { 321 pr_err("Failed to ack previous error log\n"); 322 break; 323 } 324 } 325 } while (1); 326 327 return ret; 328 } 329 330 /* virq handler for machine check error info*/ 331 static void xen_mce_work_fn(struct work_struct *work) 332 { 333 int err; 334 335 mutex_lock(&mcelog_lock); 336 337 /* urgent mc_info */ 338 err = mc_queue_handle(XEN_MC_URGENT); 339 if (err) 340 pr_err("Failed to handle urgent mc_info queue, continue handling nonurgent mc_info queue anyway\n"); 341 342 /* nonurgent mc_info */ 343 err = mc_queue_handle(XEN_MC_NONURGENT); 344 if (err) 345 pr_err("Failed to handle nonurgent mc_info queue\n"); 346 347 /* wake processes polling /dev/mcelog */ 348 wake_up_interruptible(&xen_mce_chrdev_wait); 349 350 mutex_unlock(&mcelog_lock); 351 } 352 static DECLARE_WORK(xen_mce_work, xen_mce_work_fn); 353 354 static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) 355 { 356 schedule_work(&xen_mce_work); 357 return IRQ_HANDLED; 358 } 359 360 static int bind_virq_for_mce(void) 361 { 362 int ret; 363 struct xen_mc mc_op; 364 365 memset(&mc_op, 0, sizeof(struct xen_mc)); 366 367 /* Fetch physical CPU Numbers */ 368 mc_op.cmd = XEN_MC_physcpuinfo; 369 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); 370 ret = HYPERVISOR_mca(&mc_op); 371 if (ret) { 372 pr_err("Failed to get CPU numbers\n"); 373 return ret; 374 } 375 376 /* Fetch each CPU Physical Info for later reference*/ 377 ncpus = mc_op.u.mc_physcpuinfo.ncpus; 378 g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu), 379 GFP_KERNEL); 380 if (!g_physinfo) 381 return -ENOMEM; 382 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); 383 ret = HYPERVISOR_mca(&mc_op); 384 if (ret) { 385 pr_err("Failed to get CPU info\n"); 386 kfree(g_physinfo); 387 return ret; 388 } 389 390 ret = bind_virq_to_irqhandler(VIRQ_MCA, 0, 391 xen_mce_interrupt, 0, "mce", NULL); 392 if (ret < 0) { 393 pr_err("Failed to bind virq\n"); 394 kfree(g_physinfo); 395 return ret; 396 } 397 398 return 0; 399 } 400 401 static int __init xen_late_init_mcelog(void) 402 { 403 int ret; 404 405 /* Only DOM0 is responsible for MCE logging */ 406 if (!xen_initial_domain()) 407 return -ENODEV; 408 409 /* register character device /dev/mcelog for xen mcelog */ 410 ret = misc_register(&xen_mce_chrdev_device); 411 if (ret) 412 return ret; 413 414 ret = bind_virq_for_mce(); 415 if (ret) 416 goto deregister; 417 418 pr_info("/dev/mcelog registered by Xen\n"); 419 420 return 0; 421 422 deregister: 423 misc_deregister(&xen_mce_chrdev_device); 424 return ret; 425 } 426 device_initcall(xen_late_init_mcelog); 427