1 /* 2 * Copyright (c) 2009, Microsoft Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 15 * Place - Suite 330, Boston, MA 02111-1307 USA. 16 * 17 * Authors: 18 * Haiyang Zhang <haiyangz@microsoft.com> 19 * Hank Janssen <hjanssen@microsoft.com> 20 * K. Y. Srinivasan <kys@microsoft.com> 21 * 22 */ 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/init.h> 26 #include <linux/module.h> 27 #include <linux/device.h> 28 #include <linux/interrupt.h> 29 #include <linux/sysctl.h> 30 #include <linux/slab.h> 31 #include <linux/acpi.h> 32 #include <linux/completion.h> 33 #include <linux/hyperv.h> 34 #include <linux/kernel_stat.h> 35 #include <linux/clockchips.h> 36 #include <linux/cpu.h> 37 #include <asm/hyperv.h> 38 #include <asm/hypervisor.h> 39 #include <asm/mshyperv.h> 40 #include <linux/notifier.h> 41 #include <linux/ptrace.h> 42 #include <linux/screen_info.h> 43 #include <linux/kdebug.h> 44 #include "hyperv_vmbus.h" 45 46 static struct acpi_device *hv_acpi_dev; 47 48 static struct tasklet_struct msg_dpc; 49 static struct completion probe_event; 50 51 52 static void hyperv_report_panic(struct pt_regs *regs) 53 { 54 static bool panic_reported; 55 56 /* 57 * We prefer to report panic on 'die' chain as we have proper 58 * registers to report, but if we miss it (e.g. on BUG()) we need 59 * to report it on 'panic'. 60 */ 61 if (panic_reported) 62 return; 63 panic_reported = true; 64 65 wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip); 66 wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax); 67 wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx); 68 wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx); 69 wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx); 70 71 /* 72 * Let Hyper-V know there is crash data available 73 */ 74 wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); 75 } 76 77 static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, 78 void *args) 79 { 80 struct pt_regs *regs; 81 82 regs = current_pt_regs(); 83 84 hyperv_report_panic(regs); 85 return NOTIFY_DONE; 86 } 87 88 static int hyperv_die_event(struct notifier_block *nb, unsigned long val, 89 void *args) 90 { 91 struct die_args *die = (struct die_args *)args; 92 struct pt_regs *regs = die->regs; 93 94 hyperv_report_panic(regs); 95 return NOTIFY_DONE; 96 } 97 98 static struct notifier_block hyperv_die_block = { 99 .notifier_call = hyperv_die_event, 100 }; 101 static struct notifier_block hyperv_panic_block = { 102 .notifier_call = hyperv_panic_event, 103 }; 104 105 struct resource *hyperv_mmio; 106 107 static int vmbus_exists(void) 108 { 109 if (hv_acpi_dev == NULL) 110 return -ENODEV; 111 112 return 0; 113 } 114 115 #define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2) 116 static void print_alias_name(struct hv_device *hv_dev, char *alias_name) 117 { 118 int i; 119 for (i = 0; i < VMBUS_ALIAS_LEN; i += 2) 120 sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]); 121 } 122 123 static u8 channel_monitor_group(struct vmbus_channel *channel) 124 { 125 return (u8)channel->offermsg.monitorid / 32; 126 } 127 128 static u8 channel_monitor_offset(struct vmbus_channel *channel) 129 { 130 return (u8)channel->offermsg.monitorid % 32; 131 } 132 133 static u32 channel_pending(struct vmbus_channel *channel, 134 struct hv_monitor_page *monitor_page) 135 { 136 u8 monitor_group = channel_monitor_group(channel); 137 return monitor_page->trigger_group[monitor_group].pending; 138 } 139 140 static u32 channel_latency(struct vmbus_channel *channel, 141 struct hv_monitor_page *monitor_page) 142 { 143 u8 monitor_group = channel_monitor_group(channel); 144 u8 monitor_offset = channel_monitor_offset(channel); 145 return monitor_page->latency[monitor_group][monitor_offset]; 146 } 147 148 static u32 channel_conn_id(struct vmbus_channel *channel, 149 struct hv_monitor_page *monitor_page) 150 { 151 u8 monitor_group = channel_monitor_group(channel); 152 u8 monitor_offset = channel_monitor_offset(channel); 153 return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; 154 } 155 156 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, 157 char *buf) 158 { 159 struct hv_device *hv_dev = device_to_hv_device(dev); 160 161 if (!hv_dev->channel) 162 return -ENODEV; 163 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); 164 } 165 static DEVICE_ATTR_RO(id); 166 167 static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, 168 char *buf) 169 { 170 struct hv_device *hv_dev = device_to_hv_device(dev); 171 172 if (!hv_dev->channel) 173 return -ENODEV; 174 return sprintf(buf, "%d\n", hv_dev->channel->state); 175 } 176 static DEVICE_ATTR_RO(state); 177 178 static ssize_t monitor_id_show(struct device *dev, 179 struct device_attribute *dev_attr, char *buf) 180 { 181 struct hv_device *hv_dev = device_to_hv_device(dev); 182 183 if (!hv_dev->channel) 184 return -ENODEV; 185 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid); 186 } 187 static DEVICE_ATTR_RO(monitor_id); 188 189 static ssize_t class_id_show(struct device *dev, 190 struct device_attribute *dev_attr, char *buf) 191 { 192 struct hv_device *hv_dev = device_to_hv_device(dev); 193 194 if (!hv_dev->channel) 195 return -ENODEV; 196 return sprintf(buf, "{%pUl}\n", 197 hv_dev->channel->offermsg.offer.if_type.b); 198 } 199 static DEVICE_ATTR_RO(class_id); 200 201 static ssize_t device_id_show(struct device *dev, 202 struct device_attribute *dev_attr, char *buf) 203 { 204 struct hv_device *hv_dev = device_to_hv_device(dev); 205 206 if (!hv_dev->channel) 207 return -ENODEV; 208 return sprintf(buf, "{%pUl}\n", 209 hv_dev->channel->offermsg.offer.if_instance.b); 210 } 211 static DEVICE_ATTR_RO(device_id); 212 213 static ssize_t modalias_show(struct device *dev, 214 struct device_attribute *dev_attr, char *buf) 215 { 216 struct hv_device *hv_dev = device_to_hv_device(dev); 217 char alias_name[VMBUS_ALIAS_LEN + 1]; 218 219 print_alias_name(hv_dev, alias_name); 220 return sprintf(buf, "vmbus:%s\n", alias_name); 221 } 222 static DEVICE_ATTR_RO(modalias); 223 224 static ssize_t server_monitor_pending_show(struct device *dev, 225 struct device_attribute *dev_attr, 226 char *buf) 227 { 228 struct hv_device *hv_dev = device_to_hv_device(dev); 229 230 if (!hv_dev->channel) 231 return -ENODEV; 232 return sprintf(buf, "%d\n", 233 channel_pending(hv_dev->channel, 234 vmbus_connection.monitor_pages[1])); 235 } 236 static DEVICE_ATTR_RO(server_monitor_pending); 237 238 static ssize_t client_monitor_pending_show(struct device *dev, 239 struct device_attribute *dev_attr, 240 char *buf) 241 { 242 struct hv_device *hv_dev = device_to_hv_device(dev); 243 244 if (!hv_dev->channel) 245 return -ENODEV; 246 return sprintf(buf, "%d\n", 247 channel_pending(hv_dev->channel, 248 vmbus_connection.monitor_pages[1])); 249 } 250 static DEVICE_ATTR_RO(client_monitor_pending); 251 252 static ssize_t server_monitor_latency_show(struct device *dev, 253 struct device_attribute *dev_attr, 254 char *buf) 255 { 256 struct hv_device *hv_dev = device_to_hv_device(dev); 257 258 if (!hv_dev->channel) 259 return -ENODEV; 260 return sprintf(buf, "%d\n", 261 channel_latency(hv_dev->channel, 262 vmbus_connection.monitor_pages[0])); 263 } 264 static DEVICE_ATTR_RO(server_monitor_latency); 265 266 static ssize_t client_monitor_latency_show(struct device *dev, 267 struct device_attribute *dev_attr, 268 char *buf) 269 { 270 struct hv_device *hv_dev = device_to_hv_device(dev); 271 272 if (!hv_dev->channel) 273 return -ENODEV; 274 return sprintf(buf, "%d\n", 275 channel_latency(hv_dev->channel, 276 vmbus_connection.monitor_pages[1])); 277 } 278 static DEVICE_ATTR_RO(client_monitor_latency); 279 280 static ssize_t server_monitor_conn_id_show(struct device *dev, 281 struct device_attribute *dev_attr, 282 char *buf) 283 { 284 struct hv_device *hv_dev = device_to_hv_device(dev); 285 286 if (!hv_dev->channel) 287 return -ENODEV; 288 return sprintf(buf, "%d\n", 289 channel_conn_id(hv_dev->channel, 290 vmbus_connection.monitor_pages[0])); 291 } 292 static DEVICE_ATTR_RO(server_monitor_conn_id); 293 294 static ssize_t client_monitor_conn_id_show(struct device *dev, 295 struct device_attribute *dev_attr, 296 char *buf) 297 { 298 struct hv_device *hv_dev = device_to_hv_device(dev); 299 300 if (!hv_dev->channel) 301 return -ENODEV; 302 return sprintf(buf, "%d\n", 303 channel_conn_id(hv_dev->channel, 304 vmbus_connection.monitor_pages[1])); 305 } 306 static DEVICE_ATTR_RO(client_monitor_conn_id); 307 308 static ssize_t out_intr_mask_show(struct device *dev, 309 struct device_attribute *dev_attr, char *buf) 310 { 311 struct hv_device *hv_dev = device_to_hv_device(dev); 312 struct hv_ring_buffer_debug_info outbound; 313 314 if (!hv_dev->channel) 315 return -ENODEV; 316 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 317 return sprintf(buf, "%d\n", outbound.current_interrupt_mask); 318 } 319 static DEVICE_ATTR_RO(out_intr_mask); 320 321 static ssize_t out_read_index_show(struct device *dev, 322 struct device_attribute *dev_attr, char *buf) 323 { 324 struct hv_device *hv_dev = device_to_hv_device(dev); 325 struct hv_ring_buffer_debug_info outbound; 326 327 if (!hv_dev->channel) 328 return -ENODEV; 329 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 330 return sprintf(buf, "%d\n", outbound.current_read_index); 331 } 332 static DEVICE_ATTR_RO(out_read_index); 333 334 static ssize_t out_write_index_show(struct device *dev, 335 struct device_attribute *dev_attr, 336 char *buf) 337 { 338 struct hv_device *hv_dev = device_to_hv_device(dev); 339 struct hv_ring_buffer_debug_info outbound; 340 341 if (!hv_dev->channel) 342 return -ENODEV; 343 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 344 return sprintf(buf, "%d\n", outbound.current_write_index); 345 } 346 static DEVICE_ATTR_RO(out_write_index); 347 348 static ssize_t out_read_bytes_avail_show(struct device *dev, 349 struct device_attribute *dev_attr, 350 char *buf) 351 { 352 struct hv_device *hv_dev = device_to_hv_device(dev); 353 struct hv_ring_buffer_debug_info outbound; 354 355 if (!hv_dev->channel) 356 return -ENODEV; 357 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 358 return sprintf(buf, "%d\n", outbound.bytes_avail_toread); 359 } 360 static DEVICE_ATTR_RO(out_read_bytes_avail); 361 362 static ssize_t out_write_bytes_avail_show(struct device *dev, 363 struct device_attribute *dev_attr, 364 char *buf) 365 { 366 struct hv_device *hv_dev = device_to_hv_device(dev); 367 struct hv_ring_buffer_debug_info outbound; 368 369 if (!hv_dev->channel) 370 return -ENODEV; 371 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 372 return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); 373 } 374 static DEVICE_ATTR_RO(out_write_bytes_avail); 375 376 static ssize_t in_intr_mask_show(struct device *dev, 377 struct device_attribute *dev_attr, char *buf) 378 { 379 struct hv_device *hv_dev = device_to_hv_device(dev); 380 struct hv_ring_buffer_debug_info inbound; 381 382 if (!hv_dev->channel) 383 return -ENODEV; 384 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 385 return sprintf(buf, "%d\n", inbound.current_interrupt_mask); 386 } 387 static DEVICE_ATTR_RO(in_intr_mask); 388 389 static ssize_t in_read_index_show(struct device *dev, 390 struct device_attribute *dev_attr, char *buf) 391 { 392 struct hv_device *hv_dev = device_to_hv_device(dev); 393 struct hv_ring_buffer_debug_info inbound; 394 395 if (!hv_dev->channel) 396 return -ENODEV; 397 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 398 return sprintf(buf, "%d\n", inbound.current_read_index); 399 } 400 static DEVICE_ATTR_RO(in_read_index); 401 402 static ssize_t in_write_index_show(struct device *dev, 403 struct device_attribute *dev_attr, char *buf) 404 { 405 struct hv_device *hv_dev = device_to_hv_device(dev); 406 struct hv_ring_buffer_debug_info inbound; 407 408 if (!hv_dev->channel) 409 return -ENODEV; 410 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 411 return sprintf(buf, "%d\n", inbound.current_write_index); 412 } 413 static DEVICE_ATTR_RO(in_write_index); 414 415 static ssize_t in_read_bytes_avail_show(struct device *dev, 416 struct device_attribute *dev_attr, 417 char *buf) 418 { 419 struct hv_device *hv_dev = device_to_hv_device(dev); 420 struct hv_ring_buffer_debug_info inbound; 421 422 if (!hv_dev->channel) 423 return -ENODEV; 424 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 425 return sprintf(buf, "%d\n", inbound.bytes_avail_toread); 426 } 427 static DEVICE_ATTR_RO(in_read_bytes_avail); 428 429 static ssize_t in_write_bytes_avail_show(struct device *dev, 430 struct device_attribute *dev_attr, 431 char *buf) 432 { 433 struct hv_device *hv_dev = device_to_hv_device(dev); 434 struct hv_ring_buffer_debug_info inbound; 435 436 if (!hv_dev->channel) 437 return -ENODEV; 438 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 439 return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); 440 } 441 static DEVICE_ATTR_RO(in_write_bytes_avail); 442 443 static ssize_t channel_vp_mapping_show(struct device *dev, 444 struct device_attribute *dev_attr, 445 char *buf) 446 { 447 struct hv_device *hv_dev = device_to_hv_device(dev); 448 struct vmbus_channel *channel = hv_dev->channel, *cur_sc; 449 unsigned long flags; 450 int buf_size = PAGE_SIZE, n_written, tot_written; 451 struct list_head *cur; 452 453 if (!channel) 454 return -ENODEV; 455 456 tot_written = snprintf(buf, buf_size, "%u:%u\n", 457 channel->offermsg.child_relid, channel->target_cpu); 458 459 spin_lock_irqsave(&channel->lock, flags); 460 461 list_for_each(cur, &channel->sc_list) { 462 if (tot_written >= buf_size - 1) 463 break; 464 465 cur_sc = list_entry(cur, struct vmbus_channel, sc_list); 466 n_written = scnprintf(buf + tot_written, 467 buf_size - tot_written, 468 "%u:%u\n", 469 cur_sc->offermsg.child_relid, 470 cur_sc->target_cpu); 471 tot_written += n_written; 472 } 473 474 spin_unlock_irqrestore(&channel->lock, flags); 475 476 return tot_written; 477 } 478 static DEVICE_ATTR_RO(channel_vp_mapping); 479 480 static ssize_t vendor_show(struct device *dev, 481 struct device_attribute *dev_attr, 482 char *buf) 483 { 484 struct hv_device *hv_dev = device_to_hv_device(dev); 485 return sprintf(buf, "0x%x\n", hv_dev->vendor_id); 486 } 487 static DEVICE_ATTR_RO(vendor); 488 489 static ssize_t device_show(struct device *dev, 490 struct device_attribute *dev_attr, 491 char *buf) 492 { 493 struct hv_device *hv_dev = device_to_hv_device(dev); 494 return sprintf(buf, "0x%x\n", hv_dev->device_id); 495 } 496 static DEVICE_ATTR_RO(device); 497 498 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ 499 static struct attribute *vmbus_attrs[] = { 500 &dev_attr_id.attr, 501 &dev_attr_state.attr, 502 &dev_attr_monitor_id.attr, 503 &dev_attr_class_id.attr, 504 &dev_attr_device_id.attr, 505 &dev_attr_modalias.attr, 506 &dev_attr_server_monitor_pending.attr, 507 &dev_attr_client_monitor_pending.attr, 508 &dev_attr_server_monitor_latency.attr, 509 &dev_attr_client_monitor_latency.attr, 510 &dev_attr_server_monitor_conn_id.attr, 511 &dev_attr_client_monitor_conn_id.attr, 512 &dev_attr_out_intr_mask.attr, 513 &dev_attr_out_read_index.attr, 514 &dev_attr_out_write_index.attr, 515 &dev_attr_out_read_bytes_avail.attr, 516 &dev_attr_out_write_bytes_avail.attr, 517 &dev_attr_in_intr_mask.attr, 518 &dev_attr_in_read_index.attr, 519 &dev_attr_in_write_index.attr, 520 &dev_attr_in_read_bytes_avail.attr, 521 &dev_attr_in_write_bytes_avail.attr, 522 &dev_attr_channel_vp_mapping.attr, 523 &dev_attr_vendor.attr, 524 &dev_attr_device.attr, 525 NULL, 526 }; 527 ATTRIBUTE_GROUPS(vmbus); 528 529 /* 530 * vmbus_uevent - add uevent for our device 531 * 532 * This routine is invoked when a device is added or removed on the vmbus to 533 * generate a uevent to udev in the userspace. The udev will then look at its 534 * rule and the uevent generated here to load the appropriate driver 535 * 536 * The alias string will be of the form vmbus:guid where guid is the string 537 * representation of the device guid (each byte of the guid will be 538 * represented with two hex characters. 539 */ 540 static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) 541 { 542 struct hv_device *dev = device_to_hv_device(device); 543 int ret; 544 char alias_name[VMBUS_ALIAS_LEN + 1]; 545 546 print_alias_name(dev, alias_name); 547 ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name); 548 return ret; 549 } 550 551 static const uuid_le null_guid; 552 553 static inline bool is_null_guid(const uuid_le *guid) 554 { 555 if (uuid_le_cmp(*guid, null_guid)) 556 return false; 557 return true; 558 } 559 560 /* 561 * Return a matching hv_vmbus_device_id pointer. 562 * If there is no match, return NULL. 563 */ 564 static const struct hv_vmbus_device_id *hv_vmbus_get_id( 565 const struct hv_vmbus_device_id *id, 566 const uuid_le *guid) 567 { 568 for (; !is_null_guid(&id->guid); id++) 569 if (!uuid_le_cmp(id->guid, *guid)) 570 return id; 571 572 return NULL; 573 } 574 575 576 577 /* 578 * vmbus_match - Attempt to match the specified device to the specified driver 579 */ 580 static int vmbus_match(struct device *device, struct device_driver *driver) 581 { 582 struct hv_driver *drv = drv_to_hv_drv(driver); 583 struct hv_device *hv_dev = device_to_hv_device(device); 584 585 /* The hv_sock driver handles all hv_sock offers. */ 586 if (is_hvsock_channel(hv_dev->channel)) 587 return drv->hvsock; 588 589 if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) 590 return 1; 591 592 return 0; 593 } 594 595 /* 596 * vmbus_probe - Add the new vmbus's child device 597 */ 598 static int vmbus_probe(struct device *child_device) 599 { 600 int ret = 0; 601 struct hv_driver *drv = 602 drv_to_hv_drv(child_device->driver); 603 struct hv_device *dev = device_to_hv_device(child_device); 604 const struct hv_vmbus_device_id *dev_id; 605 606 dev_id = hv_vmbus_get_id(drv->id_table, &dev->dev_type); 607 if (drv->probe) { 608 ret = drv->probe(dev, dev_id); 609 if (ret != 0) 610 pr_err("probe failed for device %s (%d)\n", 611 dev_name(child_device), ret); 612 613 } else { 614 pr_err("probe not set for driver %s\n", 615 dev_name(child_device)); 616 ret = -ENODEV; 617 } 618 return ret; 619 } 620 621 /* 622 * vmbus_remove - Remove a vmbus device 623 */ 624 static int vmbus_remove(struct device *child_device) 625 { 626 struct hv_driver *drv; 627 struct hv_device *dev = device_to_hv_device(child_device); 628 629 if (child_device->driver) { 630 drv = drv_to_hv_drv(child_device->driver); 631 if (drv->remove) 632 drv->remove(dev); 633 } 634 635 return 0; 636 } 637 638 639 /* 640 * vmbus_shutdown - Shutdown a vmbus device 641 */ 642 static void vmbus_shutdown(struct device *child_device) 643 { 644 struct hv_driver *drv; 645 struct hv_device *dev = device_to_hv_device(child_device); 646 647 648 /* The device may not be attached yet */ 649 if (!child_device->driver) 650 return; 651 652 drv = drv_to_hv_drv(child_device->driver); 653 654 if (drv->shutdown) 655 drv->shutdown(dev); 656 657 return; 658 } 659 660 661 /* 662 * vmbus_device_release - Final callback release of the vmbus child device 663 */ 664 static void vmbus_device_release(struct device *device) 665 { 666 struct hv_device *hv_dev = device_to_hv_device(device); 667 struct vmbus_channel *channel = hv_dev->channel; 668 669 hv_process_channel_removal(channel, 670 channel->offermsg.child_relid); 671 kfree(hv_dev); 672 673 } 674 675 /* The one and only one */ 676 static struct bus_type hv_bus = { 677 .name = "vmbus", 678 .match = vmbus_match, 679 .shutdown = vmbus_shutdown, 680 .remove = vmbus_remove, 681 .probe = vmbus_probe, 682 .uevent = vmbus_uevent, 683 .dev_groups = vmbus_groups, 684 }; 685 686 struct onmessage_work_context { 687 struct work_struct work; 688 struct hv_message msg; 689 }; 690 691 static void vmbus_onmessage_work(struct work_struct *work) 692 { 693 struct onmessage_work_context *ctx; 694 695 /* Do not process messages if we're in DISCONNECTED state */ 696 if (vmbus_connection.conn_state == DISCONNECTED) 697 return; 698 699 ctx = container_of(work, struct onmessage_work_context, 700 work); 701 vmbus_onmessage(&ctx->msg); 702 kfree(ctx); 703 } 704 705 static void hv_process_timer_expiration(struct hv_message *msg, int cpu) 706 { 707 struct clock_event_device *dev = hv_context.clk_evt[cpu]; 708 709 if (dev->event_handler) 710 dev->event_handler(dev); 711 712 msg->header.message_type = HVMSG_NONE; 713 714 /* 715 * Make sure the write to MessageType (ie set to 716 * HVMSG_NONE) happens before we read the 717 * MessagePending and EOMing. Otherwise, the EOMing 718 * will not deliver any more messages since there is 719 * no empty slot 720 */ 721 mb(); 722 723 if (msg->header.message_flags.msg_pending) { 724 /* 725 * This will cause message queue rescan to 726 * possibly deliver another msg from the 727 * hypervisor 728 */ 729 wrmsrl(HV_X64_MSR_EOM, 0); 730 } 731 } 732 733 static void vmbus_on_msg_dpc(unsigned long data) 734 { 735 int cpu = smp_processor_id(); 736 void *page_addr = hv_context.synic_message_page[cpu]; 737 struct hv_message *msg = (struct hv_message *)page_addr + 738 VMBUS_MESSAGE_SINT; 739 struct vmbus_channel_message_header *hdr; 740 struct vmbus_channel_message_table_entry *entry; 741 struct onmessage_work_context *ctx; 742 743 while (1) { 744 if (msg->header.message_type == HVMSG_NONE) 745 /* no msg */ 746 break; 747 748 hdr = (struct vmbus_channel_message_header *)msg->u.payload; 749 750 if (hdr->msgtype >= CHANNELMSG_COUNT) { 751 WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); 752 goto msg_handled; 753 } 754 755 entry = &channel_message_table[hdr->msgtype]; 756 if (entry->handler_type == VMHT_BLOCKING) { 757 ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); 758 if (ctx == NULL) 759 continue; 760 761 INIT_WORK(&ctx->work, vmbus_onmessage_work); 762 memcpy(&ctx->msg, msg, sizeof(*msg)); 763 764 queue_work(vmbus_connection.work_queue, &ctx->work); 765 } else 766 entry->message_handler(hdr); 767 768 msg_handled: 769 msg->header.message_type = HVMSG_NONE; 770 771 /* 772 * Make sure the write to MessageType (ie set to 773 * HVMSG_NONE) happens before we read the 774 * MessagePending and EOMing. Otherwise, the EOMing 775 * will not deliver any more messages since there is 776 * no empty slot 777 */ 778 mb(); 779 780 if (msg->header.message_flags.msg_pending) { 781 /* 782 * This will cause message queue rescan to 783 * possibly deliver another msg from the 784 * hypervisor 785 */ 786 wrmsrl(HV_X64_MSR_EOM, 0); 787 } 788 } 789 } 790 791 static void vmbus_isr(void) 792 { 793 int cpu = smp_processor_id(); 794 void *page_addr; 795 struct hv_message *msg; 796 union hv_synic_event_flags *event; 797 bool handled = false; 798 799 page_addr = hv_context.synic_event_page[cpu]; 800 if (page_addr == NULL) 801 return; 802 803 event = (union hv_synic_event_flags *)page_addr + 804 VMBUS_MESSAGE_SINT; 805 /* 806 * Check for events before checking for messages. This is the order 807 * in which events and messages are checked in Windows guests on 808 * Hyper-V, and the Windows team suggested we do the same. 809 */ 810 811 if ((vmbus_proto_version == VERSION_WS2008) || 812 (vmbus_proto_version == VERSION_WIN7)) { 813 814 /* Since we are a child, we only need to check bit 0 */ 815 if (sync_test_and_clear_bit(0, 816 (unsigned long *) &event->flags32[0])) { 817 handled = true; 818 } 819 } else { 820 /* 821 * Our host is win8 or above. The signaling mechanism 822 * has changed and we can directly look at the event page. 823 * If bit n is set then we have an interrup on the channel 824 * whose id is n. 825 */ 826 handled = true; 827 } 828 829 if (handled) 830 tasklet_schedule(hv_context.event_dpc[cpu]); 831 832 833 page_addr = hv_context.synic_message_page[cpu]; 834 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 835 836 /* Check if there are actual msgs to be processed */ 837 if (msg->header.message_type != HVMSG_NONE) { 838 if (msg->header.message_type == HVMSG_TIMER_EXPIRED) 839 hv_process_timer_expiration(msg, cpu); 840 else 841 tasklet_schedule(&msg_dpc); 842 } 843 } 844 845 846 /* 847 * vmbus_bus_init -Main vmbus driver initialization routine. 848 * 849 * Here, we 850 * - initialize the vmbus driver context 851 * - invoke the vmbus hv main init routine 852 * - retrieve the channel offers 853 */ 854 static int vmbus_bus_init(void) 855 { 856 int ret; 857 858 /* Hypervisor initialization...setup hypercall page..etc */ 859 ret = hv_init(); 860 if (ret != 0) { 861 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); 862 return ret; 863 } 864 865 tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0); 866 867 ret = bus_register(&hv_bus); 868 if (ret) 869 goto err_cleanup; 870 871 hv_setup_vmbus_irq(vmbus_isr); 872 873 ret = hv_synic_alloc(); 874 if (ret) 875 goto err_alloc; 876 /* 877 * Initialize the per-cpu interrupt state and 878 * connect to the host. 879 */ 880 on_each_cpu(hv_synic_init, NULL, 1); 881 ret = vmbus_connect(); 882 if (ret) 883 goto err_connect; 884 885 if (vmbus_proto_version > VERSION_WIN7) 886 cpu_hotplug_disable(); 887 888 /* 889 * Only register if the crash MSRs are available 890 */ 891 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 892 register_die_notifier(&hyperv_die_block); 893 atomic_notifier_chain_register(&panic_notifier_list, 894 &hyperv_panic_block); 895 } 896 897 vmbus_request_offers(); 898 899 return 0; 900 901 err_connect: 902 on_each_cpu(hv_synic_cleanup, NULL, 1); 903 err_alloc: 904 hv_synic_free(); 905 hv_remove_vmbus_irq(); 906 907 bus_unregister(&hv_bus); 908 909 err_cleanup: 910 hv_cleanup(); 911 912 return ret; 913 } 914 915 /** 916 * __vmbus_child_driver_register() - Register a vmbus's driver 917 * @hv_driver: Pointer to driver structure you want to register 918 * @owner: owner module of the drv 919 * @mod_name: module name string 920 * 921 * Registers the given driver with Linux through the 'driver_register()' call 922 * and sets up the hyper-v vmbus handling for this driver. 923 * It will return the state of the 'driver_register()' call. 924 * 925 */ 926 int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) 927 { 928 int ret; 929 930 pr_info("registering driver %s\n", hv_driver->name); 931 932 ret = vmbus_exists(); 933 if (ret < 0) 934 return ret; 935 936 hv_driver->driver.name = hv_driver->name; 937 hv_driver->driver.owner = owner; 938 hv_driver->driver.mod_name = mod_name; 939 hv_driver->driver.bus = &hv_bus; 940 941 ret = driver_register(&hv_driver->driver); 942 943 return ret; 944 } 945 EXPORT_SYMBOL_GPL(__vmbus_driver_register); 946 947 /** 948 * vmbus_driver_unregister() - Unregister a vmbus's driver 949 * @hv_driver: Pointer to driver structure you want to 950 * un-register 951 * 952 * Un-register the given driver that was previous registered with a call to 953 * vmbus_driver_register() 954 */ 955 void vmbus_driver_unregister(struct hv_driver *hv_driver) 956 { 957 pr_info("unregistering driver %s\n", hv_driver->name); 958 959 if (!vmbus_exists()) 960 driver_unregister(&hv_driver->driver); 961 } 962 EXPORT_SYMBOL_GPL(vmbus_driver_unregister); 963 964 /* 965 * vmbus_device_create - Creates and registers a new child device 966 * on the vmbus. 967 */ 968 struct hv_device *vmbus_device_create(const uuid_le *type, 969 const uuid_le *instance, 970 struct vmbus_channel *channel) 971 { 972 struct hv_device *child_device_obj; 973 974 child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); 975 if (!child_device_obj) { 976 pr_err("Unable to allocate device object for child device\n"); 977 return NULL; 978 } 979 980 child_device_obj->channel = channel; 981 memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); 982 memcpy(&child_device_obj->dev_instance, instance, 983 sizeof(uuid_le)); 984 child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ 985 986 987 return child_device_obj; 988 } 989 990 /* 991 * vmbus_device_register - Register the child device 992 */ 993 int vmbus_device_register(struct hv_device *child_device_obj) 994 { 995 int ret = 0; 996 997 dev_set_name(&child_device_obj->device, "vmbus_%d", 998 child_device_obj->channel->id); 999 1000 child_device_obj->device.bus = &hv_bus; 1001 child_device_obj->device.parent = &hv_acpi_dev->dev; 1002 child_device_obj->device.release = vmbus_device_release; 1003 1004 /* 1005 * Register with the LDM. This will kick off the driver/device 1006 * binding...which will eventually call vmbus_match() and vmbus_probe() 1007 */ 1008 ret = device_register(&child_device_obj->device); 1009 1010 if (ret) 1011 pr_err("Unable to register child device\n"); 1012 else 1013 pr_debug("child device %s registered\n", 1014 dev_name(&child_device_obj->device)); 1015 1016 return ret; 1017 } 1018 1019 /* 1020 * vmbus_device_unregister - Remove the specified child device 1021 * from the vmbus. 1022 */ 1023 void vmbus_device_unregister(struct hv_device *device_obj) 1024 { 1025 pr_debug("child device %s unregistered\n", 1026 dev_name(&device_obj->device)); 1027 1028 /* 1029 * Kick off the process of unregistering the device. 1030 * This will call vmbus_remove() and eventually vmbus_device_release() 1031 */ 1032 device_unregister(&device_obj->device); 1033 } 1034 1035 1036 /* 1037 * VMBUS is an acpi enumerated device. Get the information we 1038 * need from DSDT. 1039 */ 1040 #define VTPM_BASE_ADDRESS 0xfed40000 1041 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) 1042 { 1043 resource_size_t start = 0; 1044 resource_size_t end = 0; 1045 struct resource *new_res; 1046 struct resource **old_res = &hyperv_mmio; 1047 struct resource **prev_res = NULL; 1048 1049 switch (res->type) { 1050 1051 /* 1052 * "Address" descriptors are for bus windows. Ignore 1053 * "memory" descriptors, which are for registers on 1054 * devices. 1055 */ 1056 case ACPI_RESOURCE_TYPE_ADDRESS32: 1057 start = res->data.address32.address.minimum; 1058 end = res->data.address32.address.maximum; 1059 break; 1060 1061 case ACPI_RESOURCE_TYPE_ADDRESS64: 1062 start = res->data.address64.address.minimum; 1063 end = res->data.address64.address.maximum; 1064 break; 1065 1066 default: 1067 /* Unused resource type */ 1068 return AE_OK; 1069 1070 } 1071 /* 1072 * Ignore ranges that are below 1MB, as they're not 1073 * necessary or useful here. 1074 */ 1075 if (end < 0x100000) 1076 return AE_OK; 1077 1078 new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); 1079 if (!new_res) 1080 return AE_NO_MEMORY; 1081 1082 /* If this range overlaps the virtual TPM, truncate it. */ 1083 if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) 1084 end = VTPM_BASE_ADDRESS; 1085 1086 new_res->name = "hyperv mmio"; 1087 new_res->flags = IORESOURCE_MEM; 1088 new_res->start = start; 1089 new_res->end = end; 1090 1091 /* 1092 * Stick ranges from higher in address space at the front of the list. 1093 * If two ranges are adjacent, merge them. 1094 */ 1095 do { 1096 if (!*old_res) { 1097 *old_res = new_res; 1098 break; 1099 } 1100 1101 if (((*old_res)->end + 1) == new_res->start) { 1102 (*old_res)->end = new_res->end; 1103 kfree(new_res); 1104 break; 1105 } 1106 1107 if ((*old_res)->start == new_res->end + 1) { 1108 (*old_res)->start = new_res->start; 1109 kfree(new_res); 1110 break; 1111 } 1112 1113 if ((*old_res)->end < new_res->start) { 1114 new_res->sibling = *old_res; 1115 if (prev_res) 1116 (*prev_res)->sibling = new_res; 1117 *old_res = new_res; 1118 break; 1119 } 1120 1121 prev_res = old_res; 1122 old_res = &(*old_res)->sibling; 1123 1124 } while (1); 1125 1126 return AE_OK; 1127 } 1128 1129 static int vmbus_acpi_remove(struct acpi_device *device) 1130 { 1131 struct resource *cur_res; 1132 struct resource *next_res; 1133 1134 if (hyperv_mmio) { 1135 for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { 1136 next_res = cur_res->sibling; 1137 kfree(cur_res); 1138 } 1139 } 1140 1141 return 0; 1142 } 1143 1144 /** 1145 * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. 1146 * @new: If successful, supplied a pointer to the 1147 * allocated MMIO space. 1148 * @device_obj: Identifies the caller 1149 * @min: Minimum guest physical address of the 1150 * allocation 1151 * @max: Maximum guest physical address 1152 * @size: Size of the range to be allocated 1153 * @align: Alignment of the range to be allocated 1154 * @fb_overlap_ok: Whether this allocation can be allowed 1155 * to overlap the video frame buffer. 1156 * 1157 * This function walks the resources granted to VMBus by the 1158 * _CRS object in the ACPI namespace underneath the parent 1159 * "bridge" whether that's a root PCI bus in the Generation 1 1160 * case or a Module Device in the Generation 2 case. It then 1161 * attempts to allocate from the global MMIO pool in a way that 1162 * matches the constraints supplied in these parameters and by 1163 * that _CRS. 1164 * 1165 * Return: 0 on success, -errno on failure 1166 */ 1167 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, 1168 resource_size_t min, resource_size_t max, 1169 resource_size_t size, resource_size_t align, 1170 bool fb_overlap_ok) 1171 { 1172 struct resource *iter; 1173 resource_size_t range_min, range_max, start, local_min, local_max; 1174 const char *dev_n = dev_name(&device_obj->device); 1175 u32 fb_end = screen_info.lfb_base + (screen_info.lfb_size << 1); 1176 int i; 1177 1178 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 1179 if ((iter->start >= max) || (iter->end <= min)) 1180 continue; 1181 1182 range_min = iter->start; 1183 range_max = iter->end; 1184 1185 /* If this range overlaps the frame buffer, split it into 1186 two tries. */ 1187 for (i = 0; i < 2; i++) { 1188 local_min = range_min; 1189 local_max = range_max; 1190 if (fb_overlap_ok || (range_min >= fb_end) || 1191 (range_max <= screen_info.lfb_base)) { 1192 i++; 1193 } else { 1194 if ((range_min <= screen_info.lfb_base) && 1195 (range_max >= screen_info.lfb_base)) { 1196 /* 1197 * The frame buffer is in this window, 1198 * so trim this into the part that 1199 * preceeds the frame buffer. 1200 */ 1201 local_max = screen_info.lfb_base - 1; 1202 range_min = fb_end; 1203 } else { 1204 range_min = fb_end; 1205 continue; 1206 } 1207 } 1208 1209 start = (local_min + align - 1) & ~(align - 1); 1210 for (; start + size - 1 <= local_max; start += align) { 1211 *new = request_mem_region_exclusive(start, size, 1212 dev_n); 1213 if (*new) 1214 return 0; 1215 } 1216 } 1217 } 1218 1219 return -ENXIO; 1220 } 1221 EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); 1222 1223 /** 1224 * vmbus_cpu_number_to_vp_number() - Map CPU to VP. 1225 * @cpu_number: CPU number in Linux terms 1226 * 1227 * This function returns the mapping between the Linux processor 1228 * number and the hypervisor's virtual processor number, useful 1229 * in making hypercalls and such that talk about specific 1230 * processors. 1231 * 1232 * Return: Virtual processor number in Hyper-V terms 1233 */ 1234 int vmbus_cpu_number_to_vp_number(int cpu_number) 1235 { 1236 return hv_context.vp_index[cpu_number]; 1237 } 1238 EXPORT_SYMBOL_GPL(vmbus_cpu_number_to_vp_number); 1239 1240 static int vmbus_acpi_add(struct acpi_device *device) 1241 { 1242 acpi_status result; 1243 int ret_val = -ENODEV; 1244 struct acpi_device *ancestor; 1245 1246 hv_acpi_dev = device; 1247 1248 result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, 1249 vmbus_walk_resources, NULL); 1250 1251 if (ACPI_FAILURE(result)) 1252 goto acpi_walk_err; 1253 /* 1254 * Some ancestor of the vmbus acpi device (Gen1 or Gen2 1255 * firmware) is the VMOD that has the mmio ranges. Get that. 1256 */ 1257 for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { 1258 result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, 1259 vmbus_walk_resources, NULL); 1260 1261 if (ACPI_FAILURE(result)) 1262 continue; 1263 if (hyperv_mmio) 1264 break; 1265 } 1266 ret_val = 0; 1267 1268 acpi_walk_err: 1269 complete(&probe_event); 1270 if (ret_val) 1271 vmbus_acpi_remove(device); 1272 return ret_val; 1273 } 1274 1275 static const struct acpi_device_id vmbus_acpi_device_ids[] = { 1276 {"VMBUS", 0}, 1277 {"VMBus", 0}, 1278 {"", 0}, 1279 }; 1280 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); 1281 1282 static struct acpi_driver vmbus_acpi_driver = { 1283 .name = "vmbus", 1284 .ids = vmbus_acpi_device_ids, 1285 .ops = { 1286 .add = vmbus_acpi_add, 1287 .remove = vmbus_acpi_remove, 1288 }, 1289 }; 1290 1291 static void hv_kexec_handler(void) 1292 { 1293 int cpu; 1294 1295 hv_synic_clockevents_cleanup(); 1296 vmbus_initiate_unload(); 1297 for_each_online_cpu(cpu) 1298 smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); 1299 hv_cleanup(); 1300 }; 1301 1302 static void hv_crash_handler(struct pt_regs *regs) 1303 { 1304 vmbus_initiate_unload(); 1305 /* 1306 * In crash handler we can't schedule synic cleanup for all CPUs, 1307 * doing the cleanup for current CPU only. This should be sufficient 1308 * for kdump. 1309 */ 1310 hv_synic_cleanup(NULL); 1311 hv_cleanup(); 1312 }; 1313 1314 static int __init hv_acpi_init(void) 1315 { 1316 int ret, t; 1317 1318 if (x86_hyper != &x86_hyper_ms_hyperv) 1319 return -ENODEV; 1320 1321 init_completion(&probe_event); 1322 1323 /* 1324 * Get ACPI resources first. 1325 */ 1326 ret = acpi_bus_register_driver(&vmbus_acpi_driver); 1327 1328 if (ret) 1329 return ret; 1330 1331 t = wait_for_completion_timeout(&probe_event, 5*HZ); 1332 if (t == 0) { 1333 ret = -ETIMEDOUT; 1334 goto cleanup; 1335 } 1336 1337 ret = vmbus_bus_init(); 1338 if (ret) 1339 goto cleanup; 1340 1341 hv_setup_kexec_handler(hv_kexec_handler); 1342 hv_setup_crash_handler(hv_crash_handler); 1343 1344 return 0; 1345 1346 cleanup: 1347 acpi_bus_unregister_driver(&vmbus_acpi_driver); 1348 hv_acpi_dev = NULL; 1349 return ret; 1350 } 1351 1352 static void __exit vmbus_exit(void) 1353 { 1354 int cpu; 1355 1356 hv_remove_kexec_handler(); 1357 hv_remove_crash_handler(); 1358 vmbus_connection.conn_state = DISCONNECTED; 1359 hv_synic_clockevents_cleanup(); 1360 vmbus_disconnect(); 1361 hv_remove_vmbus_irq(); 1362 tasklet_kill(&msg_dpc); 1363 vmbus_free_channels(); 1364 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 1365 unregister_die_notifier(&hyperv_die_block); 1366 atomic_notifier_chain_unregister(&panic_notifier_list, 1367 &hyperv_panic_block); 1368 } 1369 bus_unregister(&hv_bus); 1370 hv_cleanup(); 1371 for_each_online_cpu(cpu) { 1372 tasklet_kill(hv_context.event_dpc[cpu]); 1373 smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); 1374 } 1375 hv_synic_free(); 1376 acpi_bus_unregister_driver(&vmbus_acpi_driver); 1377 if (vmbus_proto_version > VERSION_WIN7) 1378 cpu_hotplug_enable(); 1379 } 1380 1381 1382 MODULE_LICENSE("GPL"); 1383 1384 subsys_initcall(hv_acpi_init); 1385 module_exit(vmbus_exit); 1386