1 /****************************************************************************** 2 * mcelog.c 3 * Driver for receiving and transferring machine check error infomation 4 * 5 * Copyright (c) 2012 Intel Corporation 6 * Author: Liu, Jinsong <jinsong.liu@intel.com> 7 * Author: Jiang, Yunhong <yunhong.jiang@intel.com> 8 * Author: Ke, Liping <liping.ke@intel.com> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35 #define pr_fmt(fmt) "xen_mcelog: " fmt 36 37 #include <linux/init.h> 38 #include <linux/types.h> 39 #include <linux/kernel.h> 40 #include <linux/slab.h> 41 #include <linux/fs.h> 42 #include <linux/device.h> 43 #include <linux/miscdevice.h> 44 #include <linux/uaccess.h> 45 #include <linux/capability.h> 46 #include <linux/poll.h> 47 #include <linux/sched.h> 48 49 #include <xen/interface/xen.h> 50 #include <xen/events.h> 51 #include <xen/interface/vcpu.h> 52 #include <xen/xen.h> 53 #include <asm/xen/hypercall.h> 54 #include <asm/xen/hypervisor.h> 55 56 static struct mc_info g_mi; 57 static struct mcinfo_logical_cpu *g_physinfo; 58 static uint32_t ncpus; 59 60 static DEFINE_MUTEX(mcelog_lock); 61 62 static struct xen_mce_log xen_mcelog = { 63 .signature = XEN_MCE_LOG_SIGNATURE, 64 .len = XEN_MCE_LOG_LEN, 65 .recordlen = sizeof(struct xen_mce), 66 }; 67 68 static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock); 69 static int xen_mce_chrdev_open_count; /* #times opened */ 70 static int xen_mce_chrdev_open_exclu; /* already open exclusive? */ 71 72 static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait); 73 74 static int xen_mce_chrdev_open(struct inode *inode, struct file *file) 75 { 76 spin_lock(&xen_mce_chrdev_state_lock); 77 78 if (xen_mce_chrdev_open_exclu || 79 (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) { 80 spin_unlock(&xen_mce_chrdev_state_lock); 81 82 return -EBUSY; 83 } 84 85 if (file->f_flags & O_EXCL) 86 xen_mce_chrdev_open_exclu = 1; 87 xen_mce_chrdev_open_count++; 88 89 spin_unlock(&xen_mce_chrdev_state_lock); 90 91 return nonseekable_open(inode, file); 92 } 93 94 static int xen_mce_chrdev_release(struct inode *inode, struct file *file) 95 { 96 spin_lock(&xen_mce_chrdev_state_lock); 97 98 xen_mce_chrdev_open_count--; 99 xen_mce_chrdev_open_exclu = 0; 100 101 spin_unlock(&xen_mce_chrdev_state_lock); 102 103 return 0; 104 } 105 106 static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf, 107 size_t usize, loff_t *off) 108 { 109 char __user *buf = ubuf; 110 unsigned num; 111 int i, err; 112 113 mutex_lock(&mcelog_lock); 114 115 num = xen_mcelog.next; 116 117 /* Only supports full reads right now */ 118 err = -EINVAL; 119 if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce)) 120 goto out; 121 122 err = 0; 123 for (i = 0; i < num; i++) { 124 struct xen_mce *m = &xen_mcelog.entry[i]; 125 126 err |= copy_to_user(buf, m, sizeof(*m)); 127 buf += sizeof(*m); 128 } 129 130 memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce)); 131 xen_mcelog.next = 0; 132 133 if (err) 134 err = -EFAULT; 135 136 out: 137 mutex_unlock(&mcelog_lock); 138 139 return err ? err : buf - ubuf; 140 } 141 142 static __poll_t xen_mce_chrdev_poll(struct file *file, poll_table *wait) 143 { 144 poll_wait(file, &xen_mce_chrdev_wait, wait); 145 146 if (xen_mcelog.next) 147 return EPOLLIN | EPOLLRDNORM; 148 149 return 0; 150 } 151 152 static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd, 153 unsigned long arg) 154 { 155 int __user *p = (int __user *)arg; 156 157 if (!capable(CAP_SYS_ADMIN)) 158 return -EPERM; 159 160 switch (cmd) { 161 case MCE_GET_RECORD_LEN: 162 return put_user(sizeof(struct xen_mce), p); 163 case MCE_GET_LOG_LEN: 164 return put_user(XEN_MCE_LOG_LEN, p); 165 case MCE_GETCLEAR_FLAGS: { 166 unsigned flags; 167 168 flags = xchg(&xen_mcelog.flags, 0); 169 170 return put_user(flags, p); 171 } 172 default: 173 return -ENOTTY; 174 } 175 } 176 177 static const struct file_operations xen_mce_chrdev_ops = { 178 .open = xen_mce_chrdev_open, 179 .release = xen_mce_chrdev_release, 180 .read = xen_mce_chrdev_read, 181 .poll = xen_mce_chrdev_poll, 182 .unlocked_ioctl = xen_mce_chrdev_ioctl, 183 }; 184 185 static struct miscdevice xen_mce_chrdev_device = { 186 MISC_MCELOG_MINOR, 187 "mcelog", 188 &xen_mce_chrdev_ops, 189 }; 190 191 /* 192 * Caller should hold the mcelog_lock 193 */ 194 static void xen_mce_log(struct xen_mce *mce) 195 { 196 unsigned entry; 197 198 entry = xen_mcelog.next; 199 200 /* 201 * When the buffer fills up discard new entries. 202 * Assume that the earlier errors are the more 203 * interesting ones: 204 */ 205 if (entry >= XEN_MCE_LOG_LEN) { 206 set_bit(XEN_MCE_OVERFLOW, 207 (unsigned long *)&xen_mcelog.flags); 208 return; 209 } 210 211 memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce)); 212 213 xen_mcelog.next++; 214 } 215 216 static int convert_log(struct mc_info *mi) 217 { 218 struct mcinfo_common *mic; 219 struct mcinfo_global *mc_global; 220 struct mcinfo_bank *mc_bank; 221 struct xen_mce m; 222 unsigned int i, j; 223 224 mic = NULL; 225 x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL); 226 if (unlikely(!mic)) { 227 pr_warn("Failed to find global error info\n"); 228 return -ENODEV; 229 } 230 231 memset(&m, 0, sizeof(struct xen_mce)); 232 233 mc_global = (struct mcinfo_global *)mic; 234 m.mcgstatus = mc_global->mc_gstatus; 235 m.apicid = mc_global->mc_apicid; 236 237 for (i = 0; i < ncpus; i++) 238 if (g_physinfo[i].mc_apicid == m.apicid) 239 break; 240 if (unlikely(i == ncpus)) { 241 pr_warn("Failed to match cpu with apicid %d\n", m.apicid); 242 return -ENODEV; 243 } 244 245 m.socketid = g_physinfo[i].mc_chipid; 246 m.cpu = m.extcpu = g_physinfo[i].mc_cpunr; 247 m.cpuvendor = (__u8)g_physinfo[i].mc_vendor; 248 for (j = 0; j < g_physinfo[i].mc_nmsrvals; ++j) 249 switch (g_physinfo[i].mc_msrvalues[j].reg) { 250 case MSR_IA32_MCG_CAP: 251 m.mcgcap = g_physinfo[i].mc_msrvalues[j].value; 252 break; 253 254 case MSR_PPIN: 255 case MSR_AMD_PPIN: 256 m.ppin = g_physinfo[i].mc_msrvalues[j].value; 257 break; 258 } 259 260 mic = NULL; 261 x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK); 262 if (unlikely(!mic)) { 263 pr_warn("Fail to find bank error info\n"); 264 return -ENODEV; 265 } 266 267 do { 268 if ((!mic) || (mic->size == 0) || 269 (mic->type != MC_TYPE_GLOBAL && 270 mic->type != MC_TYPE_BANK && 271 mic->type != MC_TYPE_EXTENDED && 272 mic->type != MC_TYPE_RECOVERY)) 273 break; 274 275 if (mic->type == MC_TYPE_BANK) { 276 mc_bank = (struct mcinfo_bank *)mic; 277 m.misc = mc_bank->mc_misc; 278 m.status = mc_bank->mc_status; 279 m.addr = mc_bank->mc_addr; 280 m.tsc = mc_bank->mc_tsc; 281 m.bank = mc_bank->mc_bank; 282 m.finished = 1; 283 /*log this record*/ 284 xen_mce_log(&m); 285 } 286 mic = x86_mcinfo_next(mic); 287 } while (1); 288 289 return 0; 290 } 291 292 static int mc_queue_handle(uint32_t flags) 293 { 294 struct xen_mc mc_op; 295 int ret = 0; 296 297 mc_op.cmd = XEN_MC_fetch; 298 set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi); 299 do { 300 mc_op.u.mc_fetch.flags = flags; 301 ret = HYPERVISOR_mca(&mc_op); 302 if (ret) { 303 pr_err("Failed to fetch %surgent error log\n", 304 flags == XEN_MC_URGENT ? "" : "non"); 305 break; 306 } 307 308 if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA || 309 mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED) 310 break; 311 else { 312 ret = convert_log(&g_mi); 313 if (ret) 314 pr_warn("Failed to convert this error log, continue acking it anyway\n"); 315 316 mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK; 317 ret = HYPERVISOR_mca(&mc_op); 318 if (ret) { 319 pr_err("Failed to ack previous error log\n"); 320 break; 321 } 322 } 323 } while (1); 324 325 return ret; 326 } 327 328 /* virq handler for machine check error info*/ 329 static void xen_mce_work_fn(struct work_struct *work) 330 { 331 int err; 332 333 mutex_lock(&mcelog_lock); 334 335 /* urgent mc_info */ 336 err = mc_queue_handle(XEN_MC_URGENT); 337 if (err) 338 pr_err("Failed to handle urgent mc_info queue, continue handling nonurgent mc_info queue anyway\n"); 339 340 /* nonurgent mc_info */ 341 err = mc_queue_handle(XEN_MC_NONURGENT); 342 if (err) 343 pr_err("Failed to handle nonurgent mc_info queue\n"); 344 345 /* wake processes polling /dev/mcelog */ 346 wake_up_interruptible(&xen_mce_chrdev_wait); 347 348 mutex_unlock(&mcelog_lock); 349 } 350 static DECLARE_WORK(xen_mce_work, xen_mce_work_fn); 351 352 static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) 353 { 354 schedule_work(&xen_mce_work); 355 return IRQ_HANDLED; 356 } 357 358 static int bind_virq_for_mce(void) 359 { 360 int ret; 361 struct xen_mc mc_op; 362 363 memset(&mc_op, 0, sizeof(struct xen_mc)); 364 365 /* Fetch physical CPU Numbers */ 366 mc_op.cmd = XEN_MC_physcpuinfo; 367 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); 368 ret = HYPERVISOR_mca(&mc_op); 369 if (ret) { 370 pr_err("Failed to get CPU numbers\n"); 371 return ret; 372 } 373 374 /* Fetch each CPU Physical Info for later reference*/ 375 ncpus = mc_op.u.mc_physcpuinfo.ncpus; 376 g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu), 377 GFP_KERNEL); 378 if (!g_physinfo) 379 return -ENOMEM; 380 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); 381 ret = HYPERVISOR_mca(&mc_op); 382 if (ret) { 383 pr_err("Failed to get CPU info\n"); 384 kfree(g_physinfo); 385 return ret; 386 } 387 388 ret = bind_virq_to_irqhandler(VIRQ_MCA, 0, 389 xen_mce_interrupt, 0, "mce", NULL); 390 if (ret < 0) { 391 pr_err("Failed to bind virq\n"); 392 kfree(g_physinfo); 393 return ret; 394 } 395 396 return 0; 397 } 398 399 static int __init xen_late_init_mcelog(void) 400 { 401 int ret; 402 403 /* Only DOM0 is responsible for MCE logging */ 404 if (!xen_initial_domain()) 405 return -ENODEV; 406 407 /* register character device /dev/mcelog for xen mcelog */ 408 ret = misc_register(&xen_mce_chrdev_device); 409 if (ret) 410 return ret; 411 412 ret = bind_virq_for_mce(); 413 if (ret) 414 goto deregister; 415 416 pr_info("/dev/mcelog registered by Xen\n"); 417 418 return 0; 419 420 deregister: 421 misc_deregister(&xen_mce_chrdev_device); 422 return ret; 423 } 424 device_initcall(xen_late_init_mcelog); 425