1 /****************************************************************************** 2 * mcelog.c 3 * Driver for receiving and transferring machine check error infomation 4 * 5 * Copyright (c) 2012 Intel Corporation 6 * Author: Liu, Jinsong <jinsong.liu@intel.com> 7 * Author: Jiang, Yunhong <yunhong.jiang@intel.com> 8 * Author: Ke, Liping <liping.ke@intel.com> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35 #include <linux/init.h> 36 #include <linux/types.h> 37 #include <linux/kernel.h> 38 #include <linux/slab.h> 39 #include <linux/fs.h> 40 #include <linux/device.h> 41 #include <linux/miscdevice.h> 42 #include <linux/uaccess.h> 43 #include <linux/capability.h> 44 #include <linux/poll.h> 45 #include <linux/sched.h> 46 47 #include <xen/interface/xen.h> 48 #include <xen/events.h> 49 #include <xen/interface/vcpu.h> 50 #include <xen/xen.h> 51 #include <asm/xen/hypercall.h> 52 #include <asm/xen/hypervisor.h> 53 54 #define XEN_MCELOG "xen_mcelog: " 55 56 static struct mc_info g_mi; 57 static struct mcinfo_logical_cpu *g_physinfo; 58 static uint32_t ncpus; 59 60 static DEFINE_MUTEX(mcelog_lock); 61 62 static struct xen_mce_log xen_mcelog = { 63 .signature = XEN_MCE_LOG_SIGNATURE, 64 .len = XEN_MCE_LOG_LEN, 65 .recordlen = sizeof(struct xen_mce), 66 }; 67 68 static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock); 69 static int xen_mce_chrdev_open_count; /* #times opened */ 70 static int xen_mce_chrdev_open_exclu; /* already open exclusive? */ 71 72 static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait); 73 74 static int xen_mce_chrdev_open(struct inode *inode, struct file *file) 75 { 76 spin_lock(&xen_mce_chrdev_state_lock); 77 78 if (xen_mce_chrdev_open_exclu || 79 (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) { 80 spin_unlock(&xen_mce_chrdev_state_lock); 81 82 return -EBUSY; 83 } 84 85 if (file->f_flags & O_EXCL) 86 xen_mce_chrdev_open_exclu = 1; 87 xen_mce_chrdev_open_count++; 88 89 spin_unlock(&xen_mce_chrdev_state_lock); 90 91 return nonseekable_open(inode, file); 92 } 93 94 static int xen_mce_chrdev_release(struct inode *inode, struct file *file) 95 { 96 spin_lock(&xen_mce_chrdev_state_lock); 97 98 xen_mce_chrdev_open_count--; 99 xen_mce_chrdev_open_exclu = 0; 100 101 spin_unlock(&xen_mce_chrdev_state_lock); 102 103 return 0; 104 } 105 106 static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf, 107 size_t usize, loff_t *off) 108 { 109 char __user *buf = ubuf; 110 unsigned num; 111 int i, err; 112 113 mutex_lock(&mcelog_lock); 114 115 num = xen_mcelog.next; 116 117 /* Only supports full reads right now */ 118 err = -EINVAL; 119 if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce)) 120 goto out; 121 122 err = 0; 123 for (i = 0; i < num; i++) { 124 struct xen_mce *m = &xen_mcelog.entry[i]; 125 126 err |= copy_to_user(buf, m, sizeof(*m)); 127 buf += sizeof(*m); 128 } 129 130 memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce)); 131 xen_mcelog.next = 0; 132 133 if (err) 134 err = -EFAULT; 135 136 out: 137 mutex_unlock(&mcelog_lock); 138 139 return err ? err : buf - ubuf; 140 } 141 142 static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait) 143 { 144 poll_wait(file, &xen_mce_chrdev_wait, wait); 145 146 if (xen_mcelog.next) 147 return POLLIN | POLLRDNORM; 148 149 return 0; 150 } 151 152 static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd, 153 unsigned long arg) 154 { 155 int __user *p = (int __user *)arg; 156 157 if (!capable(CAP_SYS_ADMIN)) 158 return -EPERM; 159 160 switch (cmd) { 161 case MCE_GET_RECORD_LEN: 162 return put_user(sizeof(struct xen_mce), p); 163 case MCE_GET_LOG_LEN: 164 return put_user(XEN_MCE_LOG_LEN, p); 165 case MCE_GETCLEAR_FLAGS: { 166 unsigned flags; 167 168 do { 169 flags = xen_mcelog.flags; 170 } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags); 171 172 return put_user(flags, p); 173 } 174 default: 175 return -ENOTTY; 176 } 177 } 178 179 static const struct file_operations xen_mce_chrdev_ops = { 180 .open = xen_mce_chrdev_open, 181 .release = xen_mce_chrdev_release, 182 .read = xen_mce_chrdev_read, 183 .poll = xen_mce_chrdev_poll, 184 .unlocked_ioctl = xen_mce_chrdev_ioctl, 185 .llseek = no_llseek, 186 }; 187 188 static struct miscdevice xen_mce_chrdev_device = { 189 MISC_MCELOG_MINOR, 190 "mcelog", 191 &xen_mce_chrdev_ops, 192 }; 193 194 /* 195 * Caller should hold the mcelog_lock 196 */ 197 static void xen_mce_log(struct xen_mce *mce) 198 { 199 unsigned entry; 200 201 entry = xen_mcelog.next; 202 203 /* 204 * When the buffer fills up discard new entries. 205 * Assume that the earlier errors are the more 206 * interesting ones: 207 */ 208 if (entry >= XEN_MCE_LOG_LEN) { 209 set_bit(XEN_MCE_OVERFLOW, 210 (unsigned long *)&xen_mcelog.flags); 211 return; 212 } 213 214 memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce)); 215 216 xen_mcelog.next++; 217 } 218 219 static int convert_log(struct mc_info *mi) 220 { 221 struct mcinfo_common *mic; 222 struct mcinfo_global *mc_global; 223 struct mcinfo_bank *mc_bank; 224 struct xen_mce m; 225 uint32_t i; 226 227 mic = NULL; 228 x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL); 229 if (unlikely(!mic)) { 230 pr_warning(XEN_MCELOG "Failed to find global error info\n"); 231 return -ENODEV; 232 } 233 234 memset(&m, 0, sizeof(struct xen_mce)); 235 236 mc_global = (struct mcinfo_global *)mic; 237 m.mcgstatus = mc_global->mc_gstatus; 238 m.apicid = mc_global->mc_apicid; 239 240 for (i = 0; i < ncpus; i++) 241 if (g_physinfo[i].mc_apicid == m.apicid) 242 break; 243 if (unlikely(i == ncpus)) { 244 pr_warning(XEN_MCELOG "Failed to match cpu with apicid %d\n", 245 m.apicid); 246 return -ENODEV; 247 } 248 249 m.socketid = g_physinfo[i].mc_chipid; 250 m.cpu = m.extcpu = g_physinfo[i].mc_cpunr; 251 m.cpuvendor = (__u8)g_physinfo[i].mc_vendor; 252 m.mcgcap = g_physinfo[i].mc_msrvalues[__MC_MSR_MCGCAP].value; 253 254 mic = NULL; 255 x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK); 256 if (unlikely(!mic)) { 257 pr_warning(XEN_MCELOG "Fail to find bank error info\n"); 258 return -ENODEV; 259 } 260 261 do { 262 if ((!mic) || (mic->size == 0) || 263 (mic->type != MC_TYPE_GLOBAL && 264 mic->type != MC_TYPE_BANK && 265 mic->type != MC_TYPE_EXTENDED && 266 mic->type != MC_TYPE_RECOVERY)) 267 break; 268 269 if (mic->type == MC_TYPE_BANK) { 270 mc_bank = (struct mcinfo_bank *)mic; 271 m.misc = mc_bank->mc_misc; 272 m.status = mc_bank->mc_status; 273 m.addr = mc_bank->mc_addr; 274 m.tsc = mc_bank->mc_tsc; 275 m.bank = mc_bank->mc_bank; 276 m.finished = 1; 277 /*log this record*/ 278 xen_mce_log(&m); 279 } 280 mic = x86_mcinfo_next(mic); 281 } while (1); 282 283 return 0; 284 } 285 286 static int mc_queue_handle(uint32_t flags) 287 { 288 struct xen_mc mc_op; 289 int ret = 0; 290 291 mc_op.cmd = XEN_MC_fetch; 292 mc_op.interface_version = XEN_MCA_INTERFACE_VERSION; 293 set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi); 294 do { 295 mc_op.u.mc_fetch.flags = flags; 296 ret = HYPERVISOR_mca(&mc_op); 297 if (ret) { 298 pr_err(XEN_MCELOG "Failed to fetch %s error log\n", 299 (flags == XEN_MC_URGENT) ? 300 "urgnet" : "nonurgent"); 301 break; 302 } 303 304 if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA || 305 mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED) 306 break; 307 else { 308 ret = convert_log(&g_mi); 309 if (ret) 310 pr_warning(XEN_MCELOG 311 "Failed to convert this error log, " 312 "continue acking it anyway\n"); 313 314 mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK; 315 ret = HYPERVISOR_mca(&mc_op); 316 if (ret) { 317 pr_err(XEN_MCELOG 318 "Failed to ack previous error log\n"); 319 break; 320 } 321 } 322 } while (1); 323 324 return ret; 325 } 326 327 /* virq handler for machine check error info*/ 328 static void xen_mce_work_fn(struct work_struct *work) 329 { 330 int err; 331 332 mutex_lock(&mcelog_lock); 333 334 /* urgent mc_info */ 335 err = mc_queue_handle(XEN_MC_URGENT); 336 if (err) 337 pr_err(XEN_MCELOG 338 "Failed to handle urgent mc_info queue, " 339 "continue handling nonurgent mc_info queue anyway.\n"); 340 341 /* nonurgent mc_info */ 342 err = mc_queue_handle(XEN_MC_NONURGENT); 343 if (err) 344 pr_err(XEN_MCELOG 345 "Failed to handle nonurgent mc_info queue.\n"); 346 347 /* wake processes polling /dev/mcelog */ 348 wake_up_interruptible(&xen_mce_chrdev_wait); 349 350 mutex_unlock(&mcelog_lock); 351 } 352 static DECLARE_WORK(xen_mce_work, xen_mce_work_fn); 353 354 static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) 355 { 356 schedule_work(&xen_mce_work); 357 return IRQ_HANDLED; 358 } 359 360 static int bind_virq_for_mce(void) 361 { 362 int ret; 363 struct xen_mc mc_op; 364 365 memset(&mc_op, 0, sizeof(struct xen_mc)); 366 367 /* Fetch physical CPU Numbers */ 368 mc_op.cmd = XEN_MC_physcpuinfo; 369 mc_op.interface_version = XEN_MCA_INTERFACE_VERSION; 370 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); 371 ret = HYPERVISOR_mca(&mc_op); 372 if (ret) { 373 pr_err(XEN_MCELOG "Failed to get CPU numbers\n"); 374 return ret; 375 } 376 377 /* Fetch each CPU Physical Info for later reference*/ 378 ncpus = mc_op.u.mc_physcpuinfo.ncpus; 379 g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu), 380 GFP_KERNEL); 381 if (!g_physinfo) 382 return -ENOMEM; 383 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); 384 ret = HYPERVISOR_mca(&mc_op); 385 if (ret) { 386 pr_err(XEN_MCELOG "Failed to get CPU info\n"); 387 kfree(g_physinfo); 388 return ret; 389 } 390 391 ret = bind_virq_to_irqhandler(VIRQ_MCA, 0, 392 xen_mce_interrupt, 0, "mce", NULL); 393 if (ret < 0) { 394 pr_err(XEN_MCELOG "Failed to bind virq\n"); 395 kfree(g_physinfo); 396 return ret; 397 } 398 399 return 0; 400 } 401 402 static int __init xen_late_init_mcelog(void) 403 { 404 /* Only DOM0 is responsible for MCE logging */ 405 if (xen_initial_domain()) { 406 /* register character device /dev/mcelog for xen mcelog */ 407 if (misc_register(&xen_mce_chrdev_device)) 408 return -ENODEV; 409 return bind_virq_for_mce(); 410 } 411 412 return -ENODEV; 413 } 414 device_initcall(xen_late_init_mcelog); 415