1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 * K. Y. Srinivasan <kys@microsoft.com> 9 */ 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/device.h> 15 #include <linux/platform_device.h> 16 #include <linux/interrupt.h> 17 #include <linux/sysctl.h> 18 #include <linux/slab.h> 19 #include <linux/acpi.h> 20 #include <linux/completion.h> 21 #include <linux/hyperv.h> 22 #include <linux/kernel_stat.h> 23 #include <linux/of_address.h> 24 #include <linux/clockchips.h> 25 #include <linux/cpu.h> 26 #include <linux/sched/isolation.h> 27 #include <linux/sched/task_stack.h> 28 29 #include <linux/delay.h> 30 #include <linux/panic_notifier.h> 31 #include <linux/ptrace.h> 32 #include <linux/screen_info.h> 33 #include <linux/efi.h> 34 #include <linux/random.h> 35 #include <linux/kernel.h> 36 #include <linux/syscore_ops.h> 37 #include <linux/dma-map-ops.h> 38 #include <linux/pci.h> 39 #include <clocksource/hyperv_timer.h> 40 #include <asm/mshyperv.h> 41 #include "hyperv_vmbus.h" 42 43 struct vmbus_dynid { 44 struct list_head node; 45 struct hv_vmbus_device_id id; 46 }; 47 48 /* VMBus Root Device */ 49 static struct device *vmbus_root_device; 50 51 static int hyperv_cpuhp_online; 52 53 static long __percpu *vmbus_evt; 54 55 /* Values parsed from ACPI DSDT */ 56 int vmbus_irq; 57 int vmbus_interrupt; 58 59 /* 60 * The panic notifier below is responsible solely for unloading the 61 * vmbus connection, which is necessary in a panic event. 62 * 63 * Notice an intrincate relation of this notifier with Hyper-V 64 * framebuffer panic notifier exists - we need vmbus connection alive 65 * there in order to succeed, so we need to order both with each other 66 * [see hvfb_on_panic()] - this is done using notifiers' priorities. 67 */ 68 static int hv_panic_vmbus_unload(struct notifier_block *nb, unsigned long val, 69 void *args) 70 { 71 vmbus_initiate_unload(true); 72 return NOTIFY_DONE; 73 } 74 static struct notifier_block hyperv_panic_vmbus_unload_block = { 75 .notifier_call = hv_panic_vmbus_unload, 76 .priority = INT_MIN + 1, /* almost the latest one to execute */ 77 }; 78 79 static const char *fb_mmio_name = "fb_range"; 80 static struct resource *fb_mmio; 81 static struct resource *hyperv_mmio; 82 static DEFINE_MUTEX(hyperv_mmio_lock); 83 84 struct device *hv_get_vmbus_root_device(void) 85 { 86 return vmbus_root_device; 87 } 88 EXPORT_SYMBOL_GPL(hv_get_vmbus_root_device); 89 90 static int vmbus_exists(void) 91 { 92 if (vmbus_root_device == NULL) 93 return -ENODEV; 94 95 return 0; 96 } 97 98 static u8 channel_monitor_group(const struct vmbus_channel *channel) 99 { 100 return (u8)channel->offermsg.monitorid / 32; 101 } 102 103 static u8 channel_monitor_offset(const struct vmbus_channel *channel) 104 { 105 return (u8)channel->offermsg.monitorid % 32; 106 } 107 108 static u32 channel_pending(const struct vmbus_channel *channel, 109 const struct hv_monitor_page *monitor_page) 110 { 111 u8 monitor_group = channel_monitor_group(channel); 112 113 return monitor_page->trigger_group[monitor_group].pending; 114 } 115 116 static u32 channel_latency(const struct vmbus_channel *channel, 117 const struct hv_monitor_page *monitor_page) 118 { 119 u8 monitor_group = channel_monitor_group(channel); 120 u8 monitor_offset = channel_monitor_offset(channel); 121 122 return monitor_page->latency[monitor_group][monitor_offset]; 123 } 124 125 static u32 channel_conn_id(struct vmbus_channel *channel, 126 struct hv_monitor_page *monitor_page) 127 { 128 u8 monitor_group = channel_monitor_group(channel); 129 u8 monitor_offset = channel_monitor_offset(channel); 130 131 return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; 132 } 133 134 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, 135 char *buf) 136 { 137 struct hv_device *hv_dev = device_to_hv_device(dev); 138 139 if (!hv_dev->channel) 140 return -ENODEV; 141 return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.child_relid); 142 } 143 static DEVICE_ATTR_RO(id); 144 145 static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, 146 char *buf) 147 { 148 struct hv_device *hv_dev = device_to_hv_device(dev); 149 150 if (!hv_dev->channel) 151 return -ENODEV; 152 return sysfs_emit(buf, "%d\n", hv_dev->channel->state); 153 } 154 static DEVICE_ATTR_RO(state); 155 156 static ssize_t monitor_id_show(struct device *dev, 157 struct device_attribute *dev_attr, char *buf) 158 { 159 struct hv_device *hv_dev = device_to_hv_device(dev); 160 161 if (!hv_dev->channel) 162 return -ENODEV; 163 return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.monitorid); 164 } 165 static DEVICE_ATTR_RO(monitor_id); 166 167 static ssize_t class_id_show(struct device *dev, 168 struct device_attribute *dev_attr, char *buf) 169 { 170 struct hv_device *hv_dev = device_to_hv_device(dev); 171 172 if (!hv_dev->channel) 173 return -ENODEV; 174 return sysfs_emit(buf, "{%pUl}\n", 175 &hv_dev->channel->offermsg.offer.if_type); 176 } 177 static DEVICE_ATTR_RO(class_id); 178 179 static ssize_t device_id_show(struct device *dev, 180 struct device_attribute *dev_attr, char *buf) 181 { 182 struct hv_device *hv_dev = device_to_hv_device(dev); 183 184 if (!hv_dev->channel) 185 return -ENODEV; 186 return sysfs_emit(buf, "{%pUl}\n", 187 &hv_dev->channel->offermsg.offer.if_instance); 188 } 189 static DEVICE_ATTR_RO(device_id); 190 191 static ssize_t modalias_show(struct device *dev, 192 struct device_attribute *dev_attr, char *buf) 193 { 194 struct hv_device *hv_dev = device_to_hv_device(dev); 195 196 return sysfs_emit(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type); 197 } 198 static DEVICE_ATTR_RO(modalias); 199 200 #ifdef CONFIG_NUMA 201 static ssize_t numa_node_show(struct device *dev, 202 struct device_attribute *attr, char *buf) 203 { 204 struct hv_device *hv_dev = device_to_hv_device(dev); 205 206 if (!hv_dev->channel) 207 return -ENODEV; 208 209 return sysfs_emit(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu)); 210 } 211 static DEVICE_ATTR_RO(numa_node); 212 #endif 213 214 static ssize_t server_monitor_pending_show(struct device *dev, 215 struct device_attribute *dev_attr, 216 char *buf) 217 { 218 struct hv_device *hv_dev = device_to_hv_device(dev); 219 220 if (!hv_dev->channel) 221 return -ENODEV; 222 return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel, 223 vmbus_connection.monitor_pages[0])); 224 } 225 static DEVICE_ATTR_RO(server_monitor_pending); 226 227 static ssize_t client_monitor_pending_show(struct device *dev, 228 struct device_attribute *dev_attr, 229 char *buf) 230 { 231 struct hv_device *hv_dev = device_to_hv_device(dev); 232 233 if (!hv_dev->channel) 234 return -ENODEV; 235 return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel, 236 vmbus_connection.monitor_pages[1])); 237 } 238 static DEVICE_ATTR_RO(client_monitor_pending); 239 240 static ssize_t server_monitor_latency_show(struct device *dev, 241 struct device_attribute *dev_attr, 242 char *buf) 243 { 244 struct hv_device *hv_dev = device_to_hv_device(dev); 245 246 if (!hv_dev->channel) 247 return -ENODEV; 248 return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel, 249 vmbus_connection.monitor_pages[0])); 250 } 251 static DEVICE_ATTR_RO(server_monitor_latency); 252 253 static ssize_t client_monitor_latency_show(struct device *dev, 254 struct device_attribute *dev_attr, 255 char *buf) 256 { 257 struct hv_device *hv_dev = device_to_hv_device(dev); 258 259 if (!hv_dev->channel) 260 return -ENODEV; 261 return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel, 262 vmbus_connection.monitor_pages[1])); 263 } 264 static DEVICE_ATTR_RO(client_monitor_latency); 265 266 static ssize_t server_monitor_conn_id_show(struct device *dev, 267 struct device_attribute *dev_attr, 268 char *buf) 269 { 270 struct hv_device *hv_dev = device_to_hv_device(dev); 271 272 if (!hv_dev->channel) 273 return -ENODEV; 274 return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel, 275 vmbus_connection.monitor_pages[0])); 276 } 277 static DEVICE_ATTR_RO(server_monitor_conn_id); 278 279 static ssize_t client_monitor_conn_id_show(struct device *dev, 280 struct device_attribute *dev_attr, 281 char *buf) 282 { 283 struct hv_device *hv_dev = device_to_hv_device(dev); 284 285 if (!hv_dev->channel) 286 return -ENODEV; 287 return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel, 288 vmbus_connection.monitor_pages[1])); 289 } 290 static DEVICE_ATTR_RO(client_monitor_conn_id); 291 292 static ssize_t out_intr_mask_show(struct device *dev, 293 struct device_attribute *dev_attr, char *buf) 294 { 295 struct hv_device *hv_dev = device_to_hv_device(dev); 296 struct hv_ring_buffer_debug_info outbound; 297 int ret; 298 299 if (!hv_dev->channel) 300 return -ENODEV; 301 302 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 303 &outbound); 304 if (ret < 0) 305 return ret; 306 307 return sysfs_emit(buf, "%d\n", outbound.current_interrupt_mask); 308 } 309 static DEVICE_ATTR_RO(out_intr_mask); 310 311 static ssize_t out_read_index_show(struct device *dev, 312 struct device_attribute *dev_attr, char *buf) 313 { 314 struct hv_device *hv_dev = device_to_hv_device(dev); 315 struct hv_ring_buffer_debug_info outbound; 316 int ret; 317 318 if (!hv_dev->channel) 319 return -ENODEV; 320 321 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 322 &outbound); 323 if (ret < 0) 324 return ret; 325 return sysfs_emit(buf, "%d\n", outbound.current_read_index); 326 } 327 static DEVICE_ATTR_RO(out_read_index); 328 329 static ssize_t out_write_index_show(struct device *dev, 330 struct device_attribute *dev_attr, 331 char *buf) 332 { 333 struct hv_device *hv_dev = device_to_hv_device(dev); 334 struct hv_ring_buffer_debug_info outbound; 335 int ret; 336 337 if (!hv_dev->channel) 338 return -ENODEV; 339 340 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 341 &outbound); 342 if (ret < 0) 343 return ret; 344 return sysfs_emit(buf, "%d\n", outbound.current_write_index); 345 } 346 static DEVICE_ATTR_RO(out_write_index); 347 348 static ssize_t out_read_bytes_avail_show(struct device *dev, 349 struct device_attribute *dev_attr, 350 char *buf) 351 { 352 struct hv_device *hv_dev = device_to_hv_device(dev); 353 struct hv_ring_buffer_debug_info outbound; 354 int ret; 355 356 if (!hv_dev->channel) 357 return -ENODEV; 358 359 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 360 &outbound); 361 if (ret < 0) 362 return ret; 363 return sysfs_emit(buf, "%d\n", outbound.bytes_avail_toread); 364 } 365 static DEVICE_ATTR_RO(out_read_bytes_avail); 366 367 static ssize_t out_write_bytes_avail_show(struct device *dev, 368 struct device_attribute *dev_attr, 369 char *buf) 370 { 371 struct hv_device *hv_dev = device_to_hv_device(dev); 372 struct hv_ring_buffer_debug_info outbound; 373 int ret; 374 375 if (!hv_dev->channel) 376 return -ENODEV; 377 378 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 379 &outbound); 380 if (ret < 0) 381 return ret; 382 return sysfs_emit(buf, "%d\n", outbound.bytes_avail_towrite); 383 } 384 static DEVICE_ATTR_RO(out_write_bytes_avail); 385 386 static ssize_t in_intr_mask_show(struct device *dev, 387 struct device_attribute *dev_attr, char *buf) 388 { 389 struct hv_device *hv_dev = device_to_hv_device(dev); 390 struct hv_ring_buffer_debug_info inbound; 391 int ret; 392 393 if (!hv_dev->channel) 394 return -ENODEV; 395 396 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 397 if (ret < 0) 398 return ret; 399 400 return sysfs_emit(buf, "%d\n", inbound.current_interrupt_mask); 401 } 402 static DEVICE_ATTR_RO(in_intr_mask); 403 404 static ssize_t in_read_index_show(struct device *dev, 405 struct device_attribute *dev_attr, char *buf) 406 { 407 struct hv_device *hv_dev = device_to_hv_device(dev); 408 struct hv_ring_buffer_debug_info inbound; 409 int ret; 410 411 if (!hv_dev->channel) 412 return -ENODEV; 413 414 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 415 if (ret < 0) 416 return ret; 417 418 return sysfs_emit(buf, "%d\n", inbound.current_read_index); 419 } 420 static DEVICE_ATTR_RO(in_read_index); 421 422 static ssize_t in_write_index_show(struct device *dev, 423 struct device_attribute *dev_attr, char *buf) 424 { 425 struct hv_device *hv_dev = device_to_hv_device(dev); 426 struct hv_ring_buffer_debug_info inbound; 427 int ret; 428 429 if (!hv_dev->channel) 430 return -ENODEV; 431 432 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 433 if (ret < 0) 434 return ret; 435 436 return sysfs_emit(buf, "%d\n", inbound.current_write_index); 437 } 438 static DEVICE_ATTR_RO(in_write_index); 439 440 static ssize_t in_read_bytes_avail_show(struct device *dev, 441 struct device_attribute *dev_attr, 442 char *buf) 443 { 444 struct hv_device *hv_dev = device_to_hv_device(dev); 445 struct hv_ring_buffer_debug_info inbound; 446 int ret; 447 448 if (!hv_dev->channel) 449 return -ENODEV; 450 451 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 452 if (ret < 0) 453 return ret; 454 455 return sysfs_emit(buf, "%d\n", inbound.bytes_avail_toread); 456 } 457 static DEVICE_ATTR_RO(in_read_bytes_avail); 458 459 static ssize_t in_write_bytes_avail_show(struct device *dev, 460 struct device_attribute *dev_attr, 461 char *buf) 462 { 463 struct hv_device *hv_dev = device_to_hv_device(dev); 464 struct hv_ring_buffer_debug_info inbound; 465 int ret; 466 467 if (!hv_dev->channel) 468 return -ENODEV; 469 470 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 471 if (ret < 0) 472 return ret; 473 474 return sysfs_emit(buf, "%d\n", inbound.bytes_avail_towrite); 475 } 476 static DEVICE_ATTR_RO(in_write_bytes_avail); 477 478 static ssize_t channel_vp_mapping_show(struct device *dev, 479 struct device_attribute *dev_attr, 480 char *buf) 481 { 482 struct hv_device *hv_dev = device_to_hv_device(dev); 483 struct vmbus_channel *channel = hv_dev->channel, *cur_sc; 484 int n_written; 485 struct list_head *cur; 486 487 if (!channel) 488 return -ENODEV; 489 490 mutex_lock(&vmbus_connection.channel_mutex); 491 492 n_written = sysfs_emit(buf, "%u:%u\n", 493 channel->offermsg.child_relid, 494 channel->target_cpu); 495 496 list_for_each(cur, &channel->sc_list) { 497 498 cur_sc = list_entry(cur, struct vmbus_channel, sc_list); 499 n_written += sysfs_emit_at(buf, n_written, "%u:%u\n", 500 cur_sc->offermsg.child_relid, 501 cur_sc->target_cpu); 502 } 503 504 mutex_unlock(&vmbus_connection.channel_mutex); 505 506 return n_written; 507 } 508 static DEVICE_ATTR_RO(channel_vp_mapping); 509 510 static ssize_t vendor_show(struct device *dev, 511 struct device_attribute *dev_attr, 512 char *buf) 513 { 514 struct hv_device *hv_dev = device_to_hv_device(dev); 515 516 return sysfs_emit(buf, "0x%x\n", hv_dev->vendor_id); 517 } 518 static DEVICE_ATTR_RO(vendor); 519 520 static ssize_t device_show(struct device *dev, 521 struct device_attribute *dev_attr, 522 char *buf) 523 { 524 struct hv_device *hv_dev = device_to_hv_device(dev); 525 526 return sysfs_emit(buf, "0x%x\n", hv_dev->device_id); 527 } 528 static DEVICE_ATTR_RO(device); 529 530 static ssize_t driver_override_store(struct device *dev, 531 struct device_attribute *attr, 532 const char *buf, size_t count) 533 { 534 struct hv_device *hv_dev = device_to_hv_device(dev); 535 int ret; 536 537 ret = driver_set_override(dev, &hv_dev->driver_override, buf, count); 538 if (ret) 539 return ret; 540 541 return count; 542 } 543 544 static ssize_t driver_override_show(struct device *dev, 545 struct device_attribute *attr, char *buf) 546 { 547 struct hv_device *hv_dev = device_to_hv_device(dev); 548 ssize_t len; 549 550 device_lock(dev); 551 len = sysfs_emit(buf, "%s\n", hv_dev->driver_override); 552 device_unlock(dev); 553 554 return len; 555 } 556 static DEVICE_ATTR_RW(driver_override); 557 558 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ 559 static struct attribute *vmbus_dev_attrs[] = { 560 &dev_attr_id.attr, 561 &dev_attr_state.attr, 562 &dev_attr_monitor_id.attr, 563 &dev_attr_class_id.attr, 564 &dev_attr_device_id.attr, 565 &dev_attr_modalias.attr, 566 #ifdef CONFIG_NUMA 567 &dev_attr_numa_node.attr, 568 #endif 569 &dev_attr_server_monitor_pending.attr, 570 &dev_attr_client_monitor_pending.attr, 571 &dev_attr_server_monitor_latency.attr, 572 &dev_attr_client_monitor_latency.attr, 573 &dev_attr_server_monitor_conn_id.attr, 574 &dev_attr_client_monitor_conn_id.attr, 575 &dev_attr_out_intr_mask.attr, 576 &dev_attr_out_read_index.attr, 577 &dev_attr_out_write_index.attr, 578 &dev_attr_out_read_bytes_avail.attr, 579 &dev_attr_out_write_bytes_avail.attr, 580 &dev_attr_in_intr_mask.attr, 581 &dev_attr_in_read_index.attr, 582 &dev_attr_in_write_index.attr, 583 &dev_attr_in_read_bytes_avail.attr, 584 &dev_attr_in_write_bytes_avail.attr, 585 &dev_attr_channel_vp_mapping.attr, 586 &dev_attr_vendor.attr, 587 &dev_attr_device.attr, 588 &dev_attr_driver_override.attr, 589 NULL, 590 }; 591 592 /* 593 * Device-level attribute_group callback function. Returns the permission for 594 * each attribute, and returns 0 if an attribute is not visible. 595 */ 596 static umode_t vmbus_dev_attr_is_visible(struct kobject *kobj, 597 struct attribute *attr, int idx) 598 { 599 struct device *dev = kobj_to_dev(kobj); 600 const struct hv_device *hv_dev = device_to_hv_device(dev); 601 602 /* Hide the monitor attributes if the monitor mechanism is not used. */ 603 if (!hv_dev->channel->offermsg.monitor_allocated && 604 (attr == &dev_attr_monitor_id.attr || 605 attr == &dev_attr_server_monitor_pending.attr || 606 attr == &dev_attr_client_monitor_pending.attr || 607 attr == &dev_attr_server_monitor_latency.attr || 608 attr == &dev_attr_client_monitor_latency.attr || 609 attr == &dev_attr_server_monitor_conn_id.attr || 610 attr == &dev_attr_client_monitor_conn_id.attr)) 611 return 0; 612 613 return attr->mode; 614 } 615 616 static const struct attribute_group vmbus_dev_group = { 617 .attrs = vmbus_dev_attrs, 618 .is_visible = vmbus_dev_attr_is_visible 619 }; 620 __ATTRIBUTE_GROUPS(vmbus_dev); 621 622 /* Set up the attribute for /sys/bus/vmbus/hibernation */ 623 static ssize_t hibernation_show(const struct bus_type *bus, char *buf) 624 { 625 return sprintf(buf, "%d\n", !!hv_is_hibernation_supported()); 626 } 627 628 static BUS_ATTR_RO(hibernation); 629 630 static struct attribute *vmbus_bus_attrs[] = { 631 &bus_attr_hibernation.attr, 632 NULL, 633 }; 634 static const struct attribute_group vmbus_bus_group = { 635 .attrs = vmbus_bus_attrs, 636 }; 637 __ATTRIBUTE_GROUPS(vmbus_bus); 638 639 /* 640 * vmbus_uevent - add uevent for our device 641 * 642 * This routine is invoked when a device is added or removed on the vmbus to 643 * generate a uevent to udev in the userspace. The udev will then look at its 644 * rule and the uevent generated here to load the appropriate driver 645 * 646 * The alias string will be of the form vmbus:guid where guid is the string 647 * representation of the device guid (each byte of the guid will be 648 * represented with two hex characters. 649 */ 650 static int vmbus_uevent(const struct device *device, struct kobj_uevent_env *env) 651 { 652 const struct hv_device *dev = device_to_hv_device(device); 653 const char *format = "MODALIAS=vmbus:%*phN"; 654 655 return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type); 656 } 657 658 static const struct hv_vmbus_device_id * 659 hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid) 660 { 661 if (id == NULL) 662 return NULL; /* empty device table */ 663 664 for (; !guid_is_null(&id->guid); id++) 665 if (guid_equal(&id->guid, guid)) 666 return id; 667 668 return NULL; 669 } 670 671 static const struct hv_vmbus_device_id * 672 hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid) 673 { 674 const struct hv_vmbus_device_id *id = NULL; 675 struct vmbus_dynid *dynid; 676 677 spin_lock(&drv->dynids.lock); 678 list_for_each_entry(dynid, &drv->dynids.list, node) { 679 if (guid_equal(&dynid->id.guid, guid)) { 680 id = &dynid->id; 681 break; 682 } 683 } 684 spin_unlock(&drv->dynids.lock); 685 686 return id; 687 } 688 689 static const struct hv_vmbus_device_id vmbus_device_null; 690 691 /* 692 * Return a matching hv_vmbus_device_id pointer. 693 * If there is no match, return NULL. 694 */ 695 static const struct hv_vmbus_device_id *hv_vmbus_get_id(const struct hv_driver *drv, 696 struct hv_device *dev) 697 { 698 const guid_t *guid = &dev->dev_type; 699 const struct hv_vmbus_device_id *id; 700 701 /* When driver_override is set, only bind to the matching driver */ 702 if (dev->driver_override && strcmp(dev->driver_override, drv->name)) 703 return NULL; 704 705 /* Look at the dynamic ids first, before the static ones */ 706 id = hv_vmbus_dynid_match((struct hv_driver *)drv, guid); 707 if (!id) 708 id = hv_vmbus_dev_match(drv->id_table, guid); 709 710 /* driver_override will always match, send a dummy id */ 711 if (!id && dev->driver_override) 712 id = &vmbus_device_null; 713 714 return id; 715 } 716 717 /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */ 718 static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid) 719 { 720 struct vmbus_dynid *dynid; 721 722 dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); 723 if (!dynid) 724 return -ENOMEM; 725 726 dynid->id.guid = *guid; 727 728 spin_lock(&drv->dynids.lock); 729 list_add_tail(&dynid->node, &drv->dynids.list); 730 spin_unlock(&drv->dynids.lock); 731 732 return driver_attach(&drv->driver); 733 } 734 735 static void vmbus_free_dynids(struct hv_driver *drv) 736 { 737 struct vmbus_dynid *dynid, *n; 738 739 spin_lock(&drv->dynids.lock); 740 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 741 list_del(&dynid->node); 742 kfree(dynid); 743 } 744 spin_unlock(&drv->dynids.lock); 745 } 746 747 /* 748 * store_new_id - sysfs frontend to vmbus_add_dynid() 749 * 750 * Allow GUIDs to be added to an existing driver via sysfs. 751 */ 752 static ssize_t new_id_store(struct device_driver *driver, const char *buf, 753 size_t count) 754 { 755 struct hv_driver *drv = drv_to_hv_drv(driver); 756 guid_t guid; 757 ssize_t retval; 758 759 retval = guid_parse(buf, &guid); 760 if (retval) 761 return retval; 762 763 if (hv_vmbus_dynid_match(drv, &guid)) 764 return -EEXIST; 765 766 retval = vmbus_add_dynid(drv, &guid); 767 if (retval) 768 return retval; 769 return count; 770 } 771 static DRIVER_ATTR_WO(new_id); 772 773 /* 774 * store_remove_id - remove a PCI device ID from this driver 775 * 776 * Removes a dynamic pci device ID to this driver. 777 */ 778 static ssize_t remove_id_store(struct device_driver *driver, const char *buf, 779 size_t count) 780 { 781 struct hv_driver *drv = drv_to_hv_drv(driver); 782 struct vmbus_dynid *dynid, *n; 783 guid_t guid; 784 ssize_t retval; 785 786 retval = guid_parse(buf, &guid); 787 if (retval) 788 return retval; 789 790 retval = -ENODEV; 791 spin_lock(&drv->dynids.lock); 792 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 793 struct hv_vmbus_device_id *id = &dynid->id; 794 795 if (guid_equal(&id->guid, &guid)) { 796 list_del(&dynid->node); 797 kfree(dynid); 798 retval = count; 799 break; 800 } 801 } 802 spin_unlock(&drv->dynids.lock); 803 804 return retval; 805 } 806 static DRIVER_ATTR_WO(remove_id); 807 808 static struct attribute *vmbus_drv_attrs[] = { 809 &driver_attr_new_id.attr, 810 &driver_attr_remove_id.attr, 811 NULL, 812 }; 813 ATTRIBUTE_GROUPS(vmbus_drv); 814 815 816 /* 817 * vmbus_match - Attempt to match the specified device to the specified driver 818 */ 819 static int vmbus_match(struct device *device, const struct device_driver *driver) 820 { 821 const struct hv_driver *drv = drv_to_hv_drv(driver); 822 struct hv_device *hv_dev = device_to_hv_device(device); 823 824 /* The hv_sock driver handles all hv_sock offers. */ 825 if (is_hvsock_channel(hv_dev->channel)) 826 return drv->hvsock; 827 828 if (hv_vmbus_get_id(drv, hv_dev)) 829 return 1; 830 831 return 0; 832 } 833 834 /* 835 * vmbus_probe - Add the new vmbus's child device 836 */ 837 static int vmbus_probe(struct device *child_device) 838 { 839 int ret = 0; 840 struct hv_driver *drv = 841 drv_to_hv_drv(child_device->driver); 842 struct hv_device *dev = device_to_hv_device(child_device); 843 const struct hv_vmbus_device_id *dev_id; 844 845 dev_id = hv_vmbus_get_id(drv, dev); 846 if (drv->probe) { 847 ret = drv->probe(dev, dev_id); 848 if (ret != 0) 849 pr_err("probe failed for device %s (%d)\n", 850 dev_name(child_device), ret); 851 852 } else { 853 pr_err("probe not set for driver %s\n", 854 dev_name(child_device)); 855 ret = -ENODEV; 856 } 857 return ret; 858 } 859 860 /* 861 * vmbus_dma_configure -- Configure DMA coherence for VMbus device 862 */ 863 static int vmbus_dma_configure(struct device *child_device) 864 { 865 /* 866 * On ARM64, propagate the DMA coherence setting from the top level 867 * VMbus ACPI device to the child VMbus device being added here. 868 * On x86/x64 coherence is assumed and these calls have no effect. 869 */ 870 hv_setup_dma_ops(child_device, 871 device_get_dma_attr(vmbus_root_device) == DEV_DMA_COHERENT); 872 return 0; 873 } 874 875 /* 876 * vmbus_remove - Remove a vmbus device 877 */ 878 static void vmbus_remove(struct device *child_device) 879 { 880 struct hv_driver *drv; 881 struct hv_device *dev = device_to_hv_device(child_device); 882 883 if (child_device->driver) { 884 drv = drv_to_hv_drv(child_device->driver); 885 if (drv->remove) 886 drv->remove(dev); 887 } 888 } 889 890 /* 891 * vmbus_shutdown - Shutdown a vmbus device 892 */ 893 static void vmbus_shutdown(struct device *child_device) 894 { 895 struct hv_driver *drv; 896 struct hv_device *dev = device_to_hv_device(child_device); 897 898 899 /* The device may not be attached yet */ 900 if (!child_device->driver) 901 return; 902 903 drv = drv_to_hv_drv(child_device->driver); 904 905 if (drv->shutdown) 906 drv->shutdown(dev); 907 } 908 909 #ifdef CONFIG_PM_SLEEP 910 /* 911 * vmbus_suspend - Suspend a vmbus device 912 */ 913 static int vmbus_suspend(struct device *child_device) 914 { 915 struct hv_driver *drv; 916 struct hv_device *dev = device_to_hv_device(child_device); 917 918 /* The device may not be attached yet */ 919 if (!child_device->driver) 920 return 0; 921 922 drv = drv_to_hv_drv(child_device->driver); 923 if (!drv->suspend) 924 return -EOPNOTSUPP; 925 926 return drv->suspend(dev); 927 } 928 929 /* 930 * vmbus_resume - Resume a vmbus device 931 */ 932 static int vmbus_resume(struct device *child_device) 933 { 934 struct hv_driver *drv; 935 struct hv_device *dev = device_to_hv_device(child_device); 936 937 /* The device may not be attached yet */ 938 if (!child_device->driver) 939 return 0; 940 941 drv = drv_to_hv_drv(child_device->driver); 942 if (!drv->resume) 943 return -EOPNOTSUPP; 944 945 return drv->resume(dev); 946 } 947 #else 948 #define vmbus_suspend NULL 949 #define vmbus_resume NULL 950 #endif /* CONFIG_PM_SLEEP */ 951 952 /* 953 * vmbus_device_release - Final callback release of the vmbus child device 954 */ 955 static void vmbus_device_release(struct device *device) 956 { 957 struct hv_device *hv_dev = device_to_hv_device(device); 958 struct vmbus_channel *channel = hv_dev->channel; 959 960 hv_debug_rm_dev_dir(hv_dev); 961 962 mutex_lock(&vmbus_connection.channel_mutex); 963 hv_process_channel_removal(channel); 964 mutex_unlock(&vmbus_connection.channel_mutex); 965 kfree(hv_dev); 966 } 967 968 /* 969 * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm. 970 * 971 * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we 972 * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there 973 * is no way to wake up a Generation-2 VM. 974 * 975 * The other 4 ops are for hibernation. 976 */ 977 978 static const struct dev_pm_ops vmbus_pm = { 979 .suspend_noirq = NULL, 980 .resume_noirq = NULL, 981 .freeze_noirq = vmbus_suspend, 982 .thaw_noirq = vmbus_resume, 983 .poweroff_noirq = vmbus_suspend, 984 .restore_noirq = vmbus_resume, 985 }; 986 987 /* The one and only one */ 988 static const struct bus_type hv_bus = { 989 .name = "vmbus", 990 .match = vmbus_match, 991 .shutdown = vmbus_shutdown, 992 .remove = vmbus_remove, 993 .probe = vmbus_probe, 994 .uevent = vmbus_uevent, 995 .dma_configure = vmbus_dma_configure, 996 .dev_groups = vmbus_dev_groups, 997 .drv_groups = vmbus_drv_groups, 998 .bus_groups = vmbus_bus_groups, 999 .pm = &vmbus_pm, 1000 }; 1001 1002 struct onmessage_work_context { 1003 struct work_struct work; 1004 struct { 1005 struct hv_message_header header; 1006 u8 payload[]; 1007 } msg; 1008 }; 1009 1010 static void vmbus_onmessage_work(struct work_struct *work) 1011 { 1012 struct onmessage_work_context *ctx; 1013 1014 /* Do not process messages if we're in DISCONNECTED state */ 1015 if (vmbus_connection.conn_state == DISCONNECTED) 1016 return; 1017 1018 ctx = container_of(work, struct onmessage_work_context, 1019 work); 1020 vmbus_onmessage((struct vmbus_channel_message_header *) 1021 &ctx->msg.payload); 1022 kfree(ctx); 1023 } 1024 1025 void vmbus_on_msg_dpc(unsigned long data) 1026 { 1027 struct hv_per_cpu_context *hv_cpu = (void *)data; 1028 void *page_addr = hv_cpu->synic_message_page; 1029 struct hv_message msg_copy, *msg = (struct hv_message *)page_addr + 1030 VMBUS_MESSAGE_SINT; 1031 struct vmbus_channel_message_header *hdr; 1032 enum vmbus_channel_message_type msgtype; 1033 const struct vmbus_channel_message_table_entry *entry; 1034 struct onmessage_work_context *ctx; 1035 __u8 payload_size; 1036 u32 message_type; 1037 1038 /* 1039 * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as 1040 * it is being used in 'struct vmbus_channel_message_header' definition 1041 * which is supposed to match hypervisor ABI. 1042 */ 1043 BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32)); 1044 1045 /* 1046 * Since the message is in memory shared with the host, an erroneous or 1047 * malicious Hyper-V could modify the message while vmbus_on_msg_dpc() 1048 * or individual message handlers are executing; to prevent this, copy 1049 * the message into private memory. 1050 */ 1051 memcpy(&msg_copy, msg, sizeof(struct hv_message)); 1052 1053 message_type = msg_copy.header.message_type; 1054 if (message_type == HVMSG_NONE) 1055 /* no msg */ 1056 return; 1057 1058 hdr = (struct vmbus_channel_message_header *)msg_copy.u.payload; 1059 msgtype = hdr->msgtype; 1060 1061 trace_vmbus_on_msg_dpc(hdr); 1062 1063 if (msgtype >= CHANNELMSG_COUNT) { 1064 WARN_ONCE(1, "unknown msgtype=%d\n", msgtype); 1065 goto msg_handled; 1066 } 1067 1068 payload_size = msg_copy.header.payload_size; 1069 if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) { 1070 WARN_ONCE(1, "payload size is too large (%d)\n", payload_size); 1071 goto msg_handled; 1072 } 1073 1074 entry = &channel_message_table[msgtype]; 1075 1076 if (!entry->message_handler) 1077 goto msg_handled; 1078 1079 if (payload_size < entry->min_payload_len) { 1080 WARN_ONCE(1, "message too short: msgtype=%d len=%d\n", msgtype, payload_size); 1081 goto msg_handled; 1082 } 1083 1084 if (entry->handler_type == VMHT_BLOCKING) { 1085 ctx = kmalloc(struct_size(ctx, msg.payload, payload_size), GFP_ATOMIC); 1086 if (ctx == NULL) 1087 return; 1088 1089 INIT_WORK(&ctx->work, vmbus_onmessage_work); 1090 ctx->msg.header = msg_copy.header; 1091 memcpy(&ctx->msg.payload, msg_copy.u.payload, payload_size); 1092 1093 /* 1094 * The host can generate a rescind message while we 1095 * may still be handling the original offer. We deal with 1096 * this condition by relying on the synchronization provided 1097 * by offer_in_progress and by channel_mutex. See also the 1098 * inline comments in vmbus_onoffer_rescind(). 1099 */ 1100 switch (msgtype) { 1101 case CHANNELMSG_RESCIND_CHANNELOFFER: 1102 /* 1103 * If we are handling the rescind message; 1104 * schedule the work on the global work queue. 1105 * 1106 * The OFFER message and the RESCIND message should 1107 * not be handled by the same serialized work queue, 1108 * because the OFFER handler may call vmbus_open(), 1109 * which tries to open the channel by sending an 1110 * OPEN_CHANNEL message to the host and waits for 1111 * the host's response; however, if the host has 1112 * rescinded the channel before it receives the 1113 * OPEN_CHANNEL message, the host just silently 1114 * ignores the OPEN_CHANNEL message; as a result, 1115 * the guest's OFFER handler hangs for ever, if we 1116 * handle the RESCIND message in the same serialized 1117 * work queue: the RESCIND handler can not start to 1118 * run before the OFFER handler finishes. 1119 */ 1120 if (vmbus_connection.ignore_any_offer_msg) 1121 break; 1122 queue_work(vmbus_connection.rescind_work_queue, &ctx->work); 1123 break; 1124 1125 case CHANNELMSG_OFFERCHANNEL: 1126 /* 1127 * The host sends the offer message of a given channel 1128 * before sending the rescind message of the same 1129 * channel. These messages are sent to the guest's 1130 * connect CPU; the guest then starts processing them 1131 * in the tasklet handler on this CPU: 1132 * 1133 * VMBUS_CONNECT_CPU 1134 * 1135 * [vmbus_on_msg_dpc()] 1136 * atomic_inc() // CHANNELMSG_OFFERCHANNEL 1137 * queue_work() 1138 * ... 1139 * [vmbus_on_msg_dpc()] 1140 * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER 1141 * 1142 * We rely on the memory-ordering properties of the 1143 * queue_work() and schedule_work() primitives, which 1144 * guarantee that the atomic increment will be visible 1145 * to the CPUs which will execute the offer & rescind 1146 * works by the time these works will start execution. 1147 */ 1148 if (vmbus_connection.ignore_any_offer_msg) 1149 break; 1150 atomic_inc(&vmbus_connection.offer_in_progress); 1151 fallthrough; 1152 1153 default: 1154 queue_work(vmbus_connection.work_queue, &ctx->work); 1155 } 1156 } else 1157 entry->message_handler(hdr); 1158 1159 msg_handled: 1160 vmbus_signal_eom(msg, message_type); 1161 } 1162 1163 #ifdef CONFIG_PM_SLEEP 1164 /* 1165 * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for 1166 * hibernation, because hv_sock connections can not persist across hibernation. 1167 */ 1168 static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) 1169 { 1170 struct onmessage_work_context *ctx; 1171 struct vmbus_channel_rescind_offer *rescind; 1172 1173 WARN_ON(!is_hvsock_channel(channel)); 1174 1175 /* 1176 * Allocation size is small and the allocation should really not fail, 1177 * otherwise the state of the hv_sock connections ends up in limbo. 1178 */ 1179 ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind), 1180 GFP_KERNEL | __GFP_NOFAIL); 1181 1182 /* 1183 * So far, these are not really used by Linux. Just set them to the 1184 * reasonable values conforming to the definitions of the fields. 1185 */ 1186 ctx->msg.header.message_type = 1; 1187 ctx->msg.header.payload_size = sizeof(*rescind); 1188 1189 /* These values are actually used by Linux. */ 1190 rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload; 1191 rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER; 1192 rescind->child_relid = channel->offermsg.child_relid; 1193 1194 INIT_WORK(&ctx->work, vmbus_onmessage_work); 1195 1196 queue_work(vmbus_connection.work_queue, &ctx->work); 1197 } 1198 #endif /* CONFIG_PM_SLEEP */ 1199 1200 /* 1201 * Schedule all channels with events pending 1202 */ 1203 static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) 1204 { 1205 unsigned long *recv_int_page; 1206 u32 maxbits, relid; 1207 1208 /* 1209 * The event page can be directly checked to get the id of 1210 * the channel that has the interrupt pending. 1211 */ 1212 void *page_addr = hv_cpu->synic_event_page; 1213 union hv_synic_event_flags *event 1214 = (union hv_synic_event_flags *)page_addr + 1215 VMBUS_MESSAGE_SINT; 1216 1217 maxbits = HV_EVENT_FLAGS_COUNT; 1218 recv_int_page = event->flags; 1219 1220 if (unlikely(!recv_int_page)) 1221 return; 1222 1223 for_each_set_bit(relid, recv_int_page, maxbits) { 1224 void (*callback_fn)(void *context); 1225 struct vmbus_channel *channel; 1226 1227 if (!sync_test_and_clear_bit(relid, recv_int_page)) 1228 continue; 1229 1230 /* Special case - vmbus channel protocol msg */ 1231 if (relid == 0) 1232 continue; 1233 1234 /* 1235 * Pairs with the kfree_rcu() in vmbus_chan_release(). 1236 * Guarantees that the channel data structure doesn't 1237 * get freed while the channel pointer below is being 1238 * dereferenced. 1239 */ 1240 rcu_read_lock(); 1241 1242 /* Find channel based on relid */ 1243 channel = relid2channel(relid); 1244 if (channel == NULL) 1245 goto sched_unlock_rcu; 1246 1247 if (channel->rescind) 1248 goto sched_unlock_rcu; 1249 1250 /* 1251 * Make sure that the ring buffer data structure doesn't get 1252 * freed while we dereference the ring buffer pointer. Test 1253 * for the channel's onchannel_callback being NULL within a 1254 * sched_lock critical section. See also the inline comments 1255 * in vmbus_reset_channel_cb(). 1256 */ 1257 spin_lock(&channel->sched_lock); 1258 1259 callback_fn = channel->onchannel_callback; 1260 if (unlikely(callback_fn == NULL)) 1261 goto sched_unlock; 1262 1263 trace_vmbus_chan_sched(channel); 1264 1265 ++channel->interrupts; 1266 1267 switch (channel->callback_mode) { 1268 case HV_CALL_ISR: 1269 (*callback_fn)(channel->channel_callback_context); 1270 break; 1271 1272 case HV_CALL_BATCHED: 1273 hv_begin_read(&channel->inbound); 1274 fallthrough; 1275 case HV_CALL_DIRECT: 1276 tasklet_schedule(&channel->callback_event); 1277 } 1278 1279 sched_unlock: 1280 spin_unlock(&channel->sched_lock); 1281 sched_unlock_rcu: 1282 rcu_read_unlock(); 1283 } 1284 } 1285 1286 static void vmbus_isr(void) 1287 { 1288 struct hv_per_cpu_context *hv_cpu 1289 = this_cpu_ptr(hv_context.cpu_context); 1290 void *page_addr; 1291 struct hv_message *msg; 1292 1293 vmbus_chan_sched(hv_cpu); 1294 1295 page_addr = hv_cpu->synic_message_page; 1296 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 1297 1298 /* Check if there are actual msgs to be processed */ 1299 if (msg->header.message_type != HVMSG_NONE) { 1300 if (msg->header.message_type == HVMSG_TIMER_EXPIRED) { 1301 hv_stimer0_isr(); 1302 vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); 1303 } else 1304 tasklet_schedule(&hv_cpu->msg_dpc); 1305 } 1306 1307 add_interrupt_randomness(vmbus_interrupt); 1308 } 1309 1310 static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id) 1311 { 1312 vmbus_isr(); 1313 return IRQ_HANDLED; 1314 } 1315 1316 static void vmbus_percpu_work(struct work_struct *work) 1317 { 1318 unsigned int cpu = smp_processor_id(); 1319 1320 hv_synic_init(cpu); 1321 } 1322 1323 /* 1324 * vmbus_bus_init -Main vmbus driver initialization routine. 1325 * 1326 * Here, we 1327 * - initialize the vmbus driver context 1328 * - invoke the vmbus hv main init routine 1329 * - retrieve the channel offers 1330 */ 1331 static int vmbus_bus_init(void) 1332 { 1333 int ret, cpu; 1334 struct work_struct __percpu *works; 1335 1336 ret = hv_init(); 1337 if (ret != 0) { 1338 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); 1339 return ret; 1340 } 1341 1342 ret = bus_register(&hv_bus); 1343 if (ret) 1344 return ret; 1345 1346 /* 1347 * VMbus interrupts are best modeled as per-cpu interrupts. If 1348 * on an architecture with support for per-cpu IRQs (e.g. ARM64), 1349 * allocate a per-cpu IRQ using standard Linux kernel functionality. 1350 * If not on such an architecture (e.g., x86/x64), then rely on 1351 * code in the arch-specific portion of the code tree to connect 1352 * the VMbus interrupt handler. 1353 */ 1354 1355 if (vmbus_irq == -1) { 1356 hv_setup_vmbus_handler(vmbus_isr); 1357 } else { 1358 vmbus_evt = alloc_percpu(long); 1359 ret = request_percpu_irq(vmbus_irq, vmbus_percpu_isr, 1360 "Hyper-V VMbus", vmbus_evt); 1361 if (ret) { 1362 pr_err("Can't request Hyper-V VMbus IRQ %d, Err %d", 1363 vmbus_irq, ret); 1364 free_percpu(vmbus_evt); 1365 goto err_setup; 1366 } 1367 } 1368 1369 ret = hv_synic_alloc(); 1370 if (ret) 1371 goto err_alloc; 1372 1373 works = alloc_percpu(struct work_struct); 1374 if (!works) { 1375 ret = -ENOMEM; 1376 goto err_alloc; 1377 } 1378 1379 /* 1380 * Initialize the per-cpu interrupt state and stimer state. 1381 * Then connect to the host. 1382 */ 1383 cpus_read_lock(); 1384 for_each_online_cpu(cpu) { 1385 struct work_struct *work = per_cpu_ptr(works, cpu); 1386 1387 INIT_WORK(work, vmbus_percpu_work); 1388 schedule_work_on(cpu, work); 1389 } 1390 1391 for_each_online_cpu(cpu) 1392 flush_work(per_cpu_ptr(works, cpu)); 1393 1394 /* Register the callbacks for possible CPU online/offline'ing */ 1395 ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", 1396 hv_synic_init, hv_synic_cleanup); 1397 cpus_read_unlock(); 1398 free_percpu(works); 1399 if (ret < 0) 1400 goto err_alloc; 1401 hyperv_cpuhp_online = ret; 1402 1403 ret = vmbus_connect(); 1404 if (ret) 1405 goto err_connect; 1406 1407 /* 1408 * Always register the vmbus unload panic notifier because we 1409 * need to shut the VMbus channel connection on panic. 1410 */ 1411 atomic_notifier_chain_register(&panic_notifier_list, 1412 &hyperv_panic_vmbus_unload_block); 1413 1414 vmbus_request_offers(); 1415 1416 return 0; 1417 1418 err_connect: 1419 cpuhp_remove_state(hyperv_cpuhp_online); 1420 err_alloc: 1421 hv_synic_free(); 1422 if (vmbus_irq == -1) { 1423 hv_remove_vmbus_handler(); 1424 } else { 1425 free_percpu_irq(vmbus_irq, vmbus_evt); 1426 free_percpu(vmbus_evt); 1427 } 1428 err_setup: 1429 bus_unregister(&hv_bus); 1430 return ret; 1431 } 1432 1433 /** 1434 * __vmbus_driver_register() - Register a vmbus's driver 1435 * @hv_driver: Pointer to driver structure you want to register 1436 * @owner: owner module of the drv 1437 * @mod_name: module name string 1438 * 1439 * Registers the given driver with Linux through the 'driver_register()' call 1440 * and sets up the hyper-v vmbus handling for this driver. 1441 * It will return the state of the 'driver_register()' call. 1442 * 1443 */ 1444 int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) 1445 { 1446 int ret; 1447 1448 pr_info("registering driver %s\n", hv_driver->name); 1449 1450 ret = vmbus_exists(); 1451 if (ret < 0) 1452 return ret; 1453 1454 hv_driver->driver.name = hv_driver->name; 1455 hv_driver->driver.owner = owner; 1456 hv_driver->driver.mod_name = mod_name; 1457 hv_driver->driver.bus = &hv_bus; 1458 1459 spin_lock_init(&hv_driver->dynids.lock); 1460 INIT_LIST_HEAD(&hv_driver->dynids.list); 1461 1462 ret = driver_register(&hv_driver->driver); 1463 1464 return ret; 1465 } 1466 EXPORT_SYMBOL_GPL(__vmbus_driver_register); 1467 1468 /** 1469 * vmbus_driver_unregister() - Unregister a vmbus's driver 1470 * @hv_driver: Pointer to driver structure you want to 1471 * un-register 1472 * 1473 * Un-register the given driver that was previous registered with a call to 1474 * vmbus_driver_register() 1475 */ 1476 void vmbus_driver_unregister(struct hv_driver *hv_driver) 1477 { 1478 pr_info("unregistering driver %s\n", hv_driver->name); 1479 1480 if (!vmbus_exists()) { 1481 driver_unregister(&hv_driver->driver); 1482 vmbus_free_dynids(hv_driver); 1483 } 1484 } 1485 EXPORT_SYMBOL_GPL(vmbus_driver_unregister); 1486 1487 1488 /* 1489 * Called when last reference to channel is gone. 1490 */ 1491 static void vmbus_chan_release(struct kobject *kobj) 1492 { 1493 struct vmbus_channel *channel 1494 = container_of(kobj, struct vmbus_channel, kobj); 1495 1496 kfree_rcu(channel, rcu); 1497 } 1498 1499 struct vmbus_chan_attribute { 1500 struct attribute attr; 1501 ssize_t (*show)(struct vmbus_channel *chan, char *buf); 1502 ssize_t (*store)(struct vmbus_channel *chan, 1503 const char *buf, size_t count); 1504 }; 1505 #define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \ 1506 struct vmbus_chan_attribute chan_attr_##_name \ 1507 = __ATTR(_name, _mode, _show, _store) 1508 #define VMBUS_CHAN_ATTR_RW(_name) \ 1509 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name) 1510 #define VMBUS_CHAN_ATTR_RO(_name) \ 1511 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name) 1512 #define VMBUS_CHAN_ATTR_WO(_name) \ 1513 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name) 1514 1515 static ssize_t vmbus_chan_attr_show(struct kobject *kobj, 1516 struct attribute *attr, char *buf) 1517 { 1518 const struct vmbus_chan_attribute *attribute 1519 = container_of(attr, struct vmbus_chan_attribute, attr); 1520 struct vmbus_channel *chan 1521 = container_of(kobj, struct vmbus_channel, kobj); 1522 1523 if (!attribute->show) 1524 return -EIO; 1525 1526 return attribute->show(chan, buf); 1527 } 1528 1529 static ssize_t vmbus_chan_attr_store(struct kobject *kobj, 1530 struct attribute *attr, const char *buf, 1531 size_t count) 1532 { 1533 const struct vmbus_chan_attribute *attribute 1534 = container_of(attr, struct vmbus_chan_attribute, attr); 1535 struct vmbus_channel *chan 1536 = container_of(kobj, struct vmbus_channel, kobj); 1537 1538 if (!attribute->store) 1539 return -EIO; 1540 1541 return attribute->store(chan, buf, count); 1542 } 1543 1544 static const struct sysfs_ops vmbus_chan_sysfs_ops = { 1545 .show = vmbus_chan_attr_show, 1546 .store = vmbus_chan_attr_store, 1547 }; 1548 1549 static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf) 1550 { 1551 struct hv_ring_buffer_info *rbi = &channel->outbound; 1552 ssize_t ret; 1553 1554 mutex_lock(&rbi->ring_buffer_mutex); 1555 if (!rbi->ring_buffer) { 1556 mutex_unlock(&rbi->ring_buffer_mutex); 1557 return -EINVAL; 1558 } 1559 1560 ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1561 mutex_unlock(&rbi->ring_buffer_mutex); 1562 return ret; 1563 } 1564 static VMBUS_CHAN_ATTR_RO(out_mask); 1565 1566 static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf) 1567 { 1568 struct hv_ring_buffer_info *rbi = &channel->inbound; 1569 ssize_t ret; 1570 1571 mutex_lock(&rbi->ring_buffer_mutex); 1572 if (!rbi->ring_buffer) { 1573 mutex_unlock(&rbi->ring_buffer_mutex); 1574 return -EINVAL; 1575 } 1576 1577 ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1578 mutex_unlock(&rbi->ring_buffer_mutex); 1579 return ret; 1580 } 1581 static VMBUS_CHAN_ATTR_RO(in_mask); 1582 1583 static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf) 1584 { 1585 struct hv_ring_buffer_info *rbi = &channel->inbound; 1586 ssize_t ret; 1587 1588 mutex_lock(&rbi->ring_buffer_mutex); 1589 if (!rbi->ring_buffer) { 1590 mutex_unlock(&rbi->ring_buffer_mutex); 1591 return -EINVAL; 1592 } 1593 1594 ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi)); 1595 mutex_unlock(&rbi->ring_buffer_mutex); 1596 return ret; 1597 } 1598 static VMBUS_CHAN_ATTR_RO(read_avail); 1599 1600 static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf) 1601 { 1602 struct hv_ring_buffer_info *rbi = &channel->outbound; 1603 ssize_t ret; 1604 1605 mutex_lock(&rbi->ring_buffer_mutex); 1606 if (!rbi->ring_buffer) { 1607 mutex_unlock(&rbi->ring_buffer_mutex); 1608 return -EINVAL; 1609 } 1610 1611 ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi)); 1612 mutex_unlock(&rbi->ring_buffer_mutex); 1613 return ret; 1614 } 1615 static VMBUS_CHAN_ATTR_RO(write_avail); 1616 1617 static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf) 1618 { 1619 return sprintf(buf, "%u\n", channel->target_cpu); 1620 } 1621 1622 int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu) 1623 { 1624 u32 origin_cpu; 1625 int ret = 0; 1626 1627 lockdep_assert_cpus_held(); 1628 lockdep_assert_held(&vmbus_connection.channel_mutex); 1629 1630 if (vmbus_proto_version < VERSION_WIN10_V4_1) 1631 return -EIO; 1632 1633 /* Validate target_cpu for the cpumask_test_cpu() operation below. */ 1634 if (target_cpu >= nr_cpumask_bits) 1635 return -EINVAL; 1636 1637 if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ))) 1638 return -EINVAL; 1639 1640 if (!cpu_online(target_cpu)) 1641 return -EINVAL; 1642 1643 /* 1644 * Synchronizes vmbus_channel_set_cpu() and channel closure: 1645 * 1646 * { Initially: state = CHANNEL_OPENED } 1647 * 1648 * CPU1 CPU2 1649 * 1650 * [vmbus_channel_set_cpu()] [vmbus_disconnect_ring()] 1651 * 1652 * LOCK channel_mutex LOCK channel_mutex 1653 * LOAD r1 = state LOAD r2 = state 1654 * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED) 1655 * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN 1656 * [...] SEND CLOSECHANNEL 1657 * UNLOCK channel_mutex UNLOCK channel_mutex 1658 * 1659 * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes 1660 * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND 1661 * 1662 * Note. The host processes the channel messages "sequentially", in 1663 * the order in which they are received on a per-partition basis. 1664 */ 1665 1666 /* 1667 * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; 1668 * avoid sending the message and fail here for such channels. 1669 */ 1670 if (channel->state != CHANNEL_OPENED_STATE) { 1671 ret = -EIO; 1672 goto end; 1673 } 1674 1675 origin_cpu = channel->target_cpu; 1676 if (target_cpu == origin_cpu) 1677 goto end; 1678 1679 if (vmbus_send_modifychannel(channel, 1680 hv_cpu_number_to_vp_number(target_cpu))) { 1681 ret = -EIO; 1682 goto end; 1683 } 1684 1685 /* 1686 * For version before VERSION_WIN10_V5_3, the following warning holds: 1687 * 1688 * Warning. At this point, there is *no* guarantee that the host will 1689 * have successfully processed the vmbus_send_modifychannel() request. 1690 * See the header comment of vmbus_send_modifychannel() for more info. 1691 * 1692 * Lags in the processing of the above vmbus_send_modifychannel() can 1693 * result in missed interrupts if the "old" target CPU is taken offline 1694 * before Hyper-V starts sending interrupts to the "new" target CPU. 1695 * But apart from this offlining scenario, the code tolerates such 1696 * lags. It will function correctly even if a channel interrupt comes 1697 * in on a CPU that is different from the channel target_cpu value. 1698 */ 1699 1700 channel->target_cpu = target_cpu; 1701 1702 /* See init_vp_index(). */ 1703 if (hv_is_perf_channel(channel)) 1704 hv_update_allocated_cpus(origin_cpu, target_cpu); 1705 1706 /* Currently set only for storvsc channels. */ 1707 if (channel->change_target_cpu_callback) { 1708 (*channel->change_target_cpu_callback)(channel, 1709 origin_cpu, target_cpu); 1710 } 1711 1712 end: 1713 return ret; 1714 } 1715 1716 static ssize_t target_cpu_store(struct vmbus_channel *channel, 1717 const char *buf, size_t count) 1718 { 1719 u32 target_cpu; 1720 ssize_t ret; 1721 1722 if (sscanf(buf, "%uu", &target_cpu) != 1) 1723 return -EIO; 1724 1725 cpus_read_lock(); 1726 mutex_lock(&vmbus_connection.channel_mutex); 1727 ret = vmbus_channel_set_cpu(channel, target_cpu); 1728 mutex_unlock(&vmbus_connection.channel_mutex); 1729 cpus_read_unlock(); 1730 1731 return ret ?: count; 1732 } 1733 static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store); 1734 1735 static ssize_t channel_pending_show(struct vmbus_channel *channel, 1736 char *buf) 1737 { 1738 return sprintf(buf, "%d\n", 1739 channel_pending(channel, 1740 vmbus_connection.monitor_pages[1])); 1741 } 1742 static VMBUS_CHAN_ATTR(pending, 0444, channel_pending_show, NULL); 1743 1744 static ssize_t channel_latency_show(struct vmbus_channel *channel, 1745 char *buf) 1746 { 1747 return sprintf(buf, "%d\n", 1748 channel_latency(channel, 1749 vmbus_connection.monitor_pages[1])); 1750 } 1751 static VMBUS_CHAN_ATTR(latency, 0444, channel_latency_show, NULL); 1752 1753 static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf) 1754 { 1755 return sprintf(buf, "%llu\n", channel->interrupts); 1756 } 1757 static VMBUS_CHAN_ATTR(interrupts, 0444, channel_interrupts_show, NULL); 1758 1759 static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf) 1760 { 1761 return sprintf(buf, "%llu\n", channel->sig_events); 1762 } 1763 static VMBUS_CHAN_ATTR(events, 0444, channel_events_show, NULL); 1764 1765 static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel, 1766 char *buf) 1767 { 1768 return sprintf(buf, "%llu\n", 1769 (unsigned long long)channel->intr_in_full); 1770 } 1771 static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL); 1772 1773 static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel, 1774 char *buf) 1775 { 1776 return sprintf(buf, "%llu\n", 1777 (unsigned long long)channel->intr_out_empty); 1778 } 1779 static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL); 1780 1781 static ssize_t channel_out_full_first_show(struct vmbus_channel *channel, 1782 char *buf) 1783 { 1784 return sprintf(buf, "%llu\n", 1785 (unsigned long long)channel->out_full_first); 1786 } 1787 static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL); 1788 1789 static ssize_t channel_out_full_total_show(struct vmbus_channel *channel, 1790 char *buf) 1791 { 1792 return sprintf(buf, "%llu\n", 1793 (unsigned long long)channel->out_full_total); 1794 } 1795 static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL); 1796 1797 static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel, 1798 char *buf) 1799 { 1800 return sprintf(buf, "%u\n", channel->offermsg.monitorid); 1801 } 1802 static VMBUS_CHAN_ATTR(monitor_id, 0444, subchannel_monitor_id_show, NULL); 1803 1804 static ssize_t subchannel_id_show(struct vmbus_channel *channel, 1805 char *buf) 1806 { 1807 return sprintf(buf, "%u\n", 1808 channel->offermsg.offer.sub_channel_index); 1809 } 1810 static VMBUS_CHAN_ATTR_RO(subchannel_id); 1811 1812 static int hv_mmap_ring_buffer_wrapper(struct file *filp, struct kobject *kobj, 1813 const struct bin_attribute *attr, 1814 struct vm_area_struct *vma) 1815 { 1816 struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj); 1817 1818 /* 1819 * hv_(create|remove)_ring_sysfs implementation ensures that mmap_ring_buffer 1820 * is not NULL. 1821 */ 1822 return channel->mmap_ring_buffer(channel, vma); 1823 } 1824 1825 static struct bin_attribute chan_attr_ring_buffer = { 1826 .attr = { 1827 .name = "ring", 1828 .mode = 0600, 1829 }, 1830 .mmap = hv_mmap_ring_buffer_wrapper, 1831 }; 1832 static struct attribute *vmbus_chan_attrs[] = { 1833 &chan_attr_out_mask.attr, 1834 &chan_attr_in_mask.attr, 1835 &chan_attr_read_avail.attr, 1836 &chan_attr_write_avail.attr, 1837 &chan_attr_cpu.attr, 1838 &chan_attr_pending.attr, 1839 &chan_attr_latency.attr, 1840 &chan_attr_interrupts.attr, 1841 &chan_attr_events.attr, 1842 &chan_attr_intr_in_full.attr, 1843 &chan_attr_intr_out_empty.attr, 1844 &chan_attr_out_full_first.attr, 1845 &chan_attr_out_full_total.attr, 1846 &chan_attr_monitor_id.attr, 1847 &chan_attr_subchannel_id.attr, 1848 NULL 1849 }; 1850 1851 static struct bin_attribute *vmbus_chan_bin_attrs[] = { 1852 &chan_attr_ring_buffer, 1853 NULL 1854 }; 1855 1856 /* 1857 * Channel-level attribute_group callback function. Returns the permission for 1858 * each attribute, and returns 0 if an attribute is not visible. 1859 */ 1860 static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, 1861 struct attribute *attr, int idx) 1862 { 1863 const struct vmbus_channel *channel = 1864 container_of(kobj, struct vmbus_channel, kobj); 1865 1866 /* Hide the monitor attributes if the monitor mechanism is not used. */ 1867 if (!channel->offermsg.monitor_allocated && 1868 (attr == &chan_attr_pending.attr || 1869 attr == &chan_attr_latency.attr || 1870 attr == &chan_attr_monitor_id.attr)) 1871 return 0; 1872 1873 return attr->mode; 1874 } 1875 1876 static umode_t vmbus_chan_bin_attr_is_visible(struct kobject *kobj, 1877 const struct bin_attribute *attr, int idx) 1878 { 1879 const struct vmbus_channel *channel = 1880 container_of(kobj, struct vmbus_channel, kobj); 1881 1882 /* Hide ring attribute if channel's ring_sysfs_visible is set to false */ 1883 if (attr == &chan_attr_ring_buffer && !channel->ring_sysfs_visible) 1884 return 0; 1885 1886 return attr->attr.mode; 1887 } 1888 1889 static size_t vmbus_chan_bin_size(struct kobject *kobj, 1890 const struct bin_attribute *bin_attr, int a) 1891 { 1892 const struct vmbus_channel *channel = 1893 container_of(kobj, struct vmbus_channel, kobj); 1894 1895 return channel->ringbuffer_pagecount << PAGE_SHIFT; 1896 } 1897 1898 static const struct attribute_group vmbus_chan_group = { 1899 .attrs = vmbus_chan_attrs, 1900 .bin_attrs = vmbus_chan_bin_attrs, 1901 .is_visible = vmbus_chan_attr_is_visible, 1902 .is_bin_visible = vmbus_chan_bin_attr_is_visible, 1903 .bin_size = vmbus_chan_bin_size, 1904 }; 1905 1906 static const struct kobj_type vmbus_chan_ktype = { 1907 .sysfs_ops = &vmbus_chan_sysfs_ops, 1908 .release = vmbus_chan_release, 1909 }; 1910 1911 /** 1912 * hv_create_ring_sysfs() - create "ring" sysfs entry corresponding to ring buffers for a channel. 1913 * @channel: Pointer to vmbus_channel structure 1914 * @hv_mmap_ring_buffer: function pointer for initializing the function to be called on mmap of 1915 * channel's "ring" sysfs node, which is for the ring buffer of that channel. 1916 * Function pointer is of below type: 1917 * int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel, 1918 * struct vm_area_struct *vma)) 1919 * This has a pointer to the channel and a pointer to vm_area_struct, 1920 * used for mmap, as arguments. 1921 * 1922 * Sysfs node for ring buffer of a channel is created along with other fields, however its 1923 * visibility is disabled by default. Sysfs creation needs to be controlled when the use-case 1924 * is running. 1925 * For example, HV_NIC device is used either by uio_hv_generic or hv_netvsc at any given point of 1926 * time, and "ring" sysfs is needed only when uio_hv_generic is bound to that device. To avoid 1927 * exposing the ring buffer by default, this function is reponsible to enable visibility of 1928 * ring for userspace to use. 1929 * Note: Race conditions can happen with userspace and it is not encouraged to create new 1930 * use-cases for this. This was added to maintain backward compatibility, while solving 1931 * one of the race conditions in uio_hv_generic while creating sysfs. 1932 * 1933 * Returns 0 on success or error code on failure. 1934 */ 1935 int hv_create_ring_sysfs(struct vmbus_channel *channel, 1936 int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel, 1937 struct vm_area_struct *vma)) 1938 { 1939 struct kobject *kobj = &channel->kobj; 1940 1941 channel->mmap_ring_buffer = hv_mmap_ring_buffer; 1942 channel->ring_sysfs_visible = true; 1943 1944 return sysfs_update_group(kobj, &vmbus_chan_group); 1945 } 1946 EXPORT_SYMBOL_GPL(hv_create_ring_sysfs); 1947 1948 /** 1949 * hv_remove_ring_sysfs() - remove ring sysfs entry corresponding to ring buffers for a channel. 1950 * @channel: Pointer to vmbus_channel structure 1951 * 1952 * Hide "ring" sysfs for a channel by changing its is_visible attribute and updating sysfs group. 1953 * 1954 * Returns 0 on success or error code on failure. 1955 */ 1956 int hv_remove_ring_sysfs(struct vmbus_channel *channel) 1957 { 1958 struct kobject *kobj = &channel->kobj; 1959 int ret; 1960 1961 channel->ring_sysfs_visible = false; 1962 ret = sysfs_update_group(kobj, &vmbus_chan_group); 1963 channel->mmap_ring_buffer = NULL; 1964 return ret; 1965 } 1966 EXPORT_SYMBOL_GPL(hv_remove_ring_sysfs); 1967 1968 /* 1969 * vmbus_add_channel_kobj - setup a sub-directory under device/channels 1970 */ 1971 int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) 1972 { 1973 const struct device *device = &dev->device; 1974 struct kobject *kobj = &channel->kobj; 1975 u32 relid = channel->offermsg.child_relid; 1976 int ret; 1977 1978 kobj->kset = dev->channels_kset; 1979 ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, 1980 "%u", relid); 1981 if (ret) { 1982 kobject_put(kobj); 1983 return ret; 1984 } 1985 1986 ret = sysfs_create_group(kobj, &vmbus_chan_group); 1987 1988 if (ret) { 1989 /* 1990 * The calling functions' error handling paths will cleanup the 1991 * empty channel directory. 1992 */ 1993 kobject_put(kobj); 1994 dev_err(device, "Unable to set up channel sysfs files\n"); 1995 return ret; 1996 } 1997 1998 kobject_uevent(kobj, KOBJ_ADD); 1999 2000 return 0; 2001 } 2002 2003 /* 2004 * vmbus_remove_channel_attr_group - remove the channel's attribute group 2005 */ 2006 void vmbus_remove_channel_attr_group(struct vmbus_channel *channel) 2007 { 2008 sysfs_remove_group(&channel->kobj, &vmbus_chan_group); 2009 } 2010 2011 /* 2012 * vmbus_device_create - Creates and registers a new child device 2013 * on the vmbus. 2014 */ 2015 struct hv_device *vmbus_device_create(const guid_t *type, 2016 const guid_t *instance, 2017 struct vmbus_channel *channel) 2018 { 2019 struct hv_device *child_device_obj; 2020 2021 child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); 2022 if (!child_device_obj) { 2023 pr_err("Unable to allocate device object for child device\n"); 2024 return NULL; 2025 } 2026 2027 child_device_obj->channel = channel; 2028 guid_copy(&child_device_obj->dev_type, type); 2029 guid_copy(&child_device_obj->dev_instance, instance); 2030 child_device_obj->vendor_id = PCI_VENDOR_ID_MICROSOFT; 2031 2032 return child_device_obj; 2033 } 2034 2035 /* 2036 * vmbus_device_register - Register the child device 2037 */ 2038 int vmbus_device_register(struct hv_device *child_device_obj) 2039 { 2040 struct kobject *kobj = &child_device_obj->device.kobj; 2041 int ret; 2042 2043 dev_set_name(&child_device_obj->device, "%pUl", 2044 &child_device_obj->channel->offermsg.offer.if_instance); 2045 2046 child_device_obj->device.bus = &hv_bus; 2047 child_device_obj->device.parent = vmbus_root_device; 2048 child_device_obj->device.release = vmbus_device_release; 2049 2050 child_device_obj->device.dma_parms = &child_device_obj->dma_parms; 2051 child_device_obj->device.dma_mask = &child_device_obj->dma_mask; 2052 dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); 2053 2054 /* 2055 * Register with the LDM. This will kick off the driver/device 2056 * binding...which will eventually call vmbus_match() and vmbus_probe() 2057 */ 2058 ret = device_register(&child_device_obj->device); 2059 if (ret) { 2060 pr_err("Unable to register child device\n"); 2061 put_device(&child_device_obj->device); 2062 return ret; 2063 } 2064 2065 child_device_obj->channels_kset = kset_create_and_add("channels", 2066 NULL, kobj); 2067 if (!child_device_obj->channels_kset) { 2068 ret = -ENOMEM; 2069 goto err_dev_unregister; 2070 } 2071 2072 ret = vmbus_add_channel_kobj(child_device_obj, 2073 child_device_obj->channel); 2074 if (ret) { 2075 pr_err("Unable to register primary channeln"); 2076 goto err_kset_unregister; 2077 } 2078 hv_debug_add_dev_dir(child_device_obj); 2079 2080 return 0; 2081 2082 err_kset_unregister: 2083 kset_unregister(child_device_obj->channels_kset); 2084 2085 err_dev_unregister: 2086 device_unregister(&child_device_obj->device); 2087 return ret; 2088 } 2089 2090 /* 2091 * vmbus_device_unregister - Remove the specified child device 2092 * from the vmbus. 2093 */ 2094 void vmbus_device_unregister(struct hv_device *device_obj) 2095 { 2096 pr_debug("child device %s unregistered\n", 2097 dev_name(&device_obj->device)); 2098 2099 kset_unregister(device_obj->channels_kset); 2100 2101 /* 2102 * Kick off the process of unregistering the device. 2103 * This will call vmbus_remove() and eventually vmbus_device_release() 2104 */ 2105 device_unregister(&device_obj->device); 2106 } 2107 EXPORT_SYMBOL_GPL(vmbus_device_unregister); 2108 2109 #ifdef CONFIG_ACPI 2110 /* 2111 * VMBUS is an acpi enumerated device. Get the information we 2112 * need from DSDT. 2113 */ 2114 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) 2115 { 2116 resource_size_t start = 0; 2117 resource_size_t end = 0; 2118 struct resource *new_res; 2119 struct resource **old_res = &hyperv_mmio; 2120 struct resource **prev_res = NULL; 2121 struct resource r; 2122 2123 switch (res->type) { 2124 2125 /* 2126 * "Address" descriptors are for bus windows. Ignore 2127 * "memory" descriptors, which are for registers on 2128 * devices. 2129 */ 2130 case ACPI_RESOURCE_TYPE_ADDRESS32: 2131 start = res->data.address32.address.minimum; 2132 end = res->data.address32.address.maximum; 2133 break; 2134 2135 case ACPI_RESOURCE_TYPE_ADDRESS64: 2136 start = res->data.address64.address.minimum; 2137 end = res->data.address64.address.maximum; 2138 break; 2139 2140 /* 2141 * The IRQ information is needed only on ARM64, which Hyper-V 2142 * sets up in the extended format. IRQ information is present 2143 * on x86/x64 in the non-extended format but it is not used by 2144 * Linux. So don't bother checking for the non-extended format. 2145 */ 2146 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: 2147 if (!acpi_dev_resource_interrupt(res, 0, &r)) { 2148 pr_err("Unable to parse Hyper-V ACPI interrupt\n"); 2149 return AE_ERROR; 2150 } 2151 /* ARM64 INTID for VMbus */ 2152 vmbus_interrupt = res->data.extended_irq.interrupts[0]; 2153 /* Linux IRQ number */ 2154 vmbus_irq = r.start; 2155 return AE_OK; 2156 2157 default: 2158 /* Unused resource type */ 2159 return AE_OK; 2160 2161 } 2162 /* 2163 * Ignore ranges that are below 1MB, as they're not 2164 * necessary or useful here. 2165 */ 2166 if (end < 0x100000) 2167 return AE_OK; 2168 2169 new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); 2170 if (!new_res) 2171 return AE_NO_MEMORY; 2172 2173 /* If this range overlaps the virtual TPM, truncate it. */ 2174 if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) 2175 end = VTPM_BASE_ADDRESS; 2176 2177 new_res->name = "hyperv mmio"; 2178 new_res->flags = IORESOURCE_MEM; 2179 new_res->start = start; 2180 new_res->end = end; 2181 2182 /* 2183 * If two ranges are adjacent, merge them. 2184 */ 2185 do { 2186 if (!*old_res) { 2187 *old_res = new_res; 2188 break; 2189 } 2190 2191 if (((*old_res)->end + 1) == new_res->start) { 2192 (*old_res)->end = new_res->end; 2193 kfree(new_res); 2194 break; 2195 } 2196 2197 if ((*old_res)->start == new_res->end + 1) { 2198 (*old_res)->start = new_res->start; 2199 kfree(new_res); 2200 break; 2201 } 2202 2203 if ((*old_res)->start > new_res->end) { 2204 new_res->sibling = *old_res; 2205 if (prev_res) 2206 (*prev_res)->sibling = new_res; 2207 *old_res = new_res; 2208 break; 2209 } 2210 2211 prev_res = old_res; 2212 old_res = &(*old_res)->sibling; 2213 2214 } while (1); 2215 2216 return AE_OK; 2217 } 2218 #endif 2219 2220 static void vmbus_mmio_remove(void) 2221 { 2222 struct resource *cur_res; 2223 struct resource *next_res; 2224 2225 if (hyperv_mmio) { 2226 if (fb_mmio) { 2227 __release_region(hyperv_mmio, fb_mmio->start, 2228 resource_size(fb_mmio)); 2229 fb_mmio = NULL; 2230 } 2231 2232 for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { 2233 next_res = cur_res->sibling; 2234 kfree(cur_res); 2235 } 2236 } 2237 } 2238 2239 static void __maybe_unused vmbus_reserve_fb(void) 2240 { 2241 resource_size_t start = 0, size; 2242 struct pci_dev *pdev; 2243 2244 if (efi_enabled(EFI_BOOT)) { 2245 /* Gen2 VM: get FB base from EFI framebuffer */ 2246 if (IS_ENABLED(CONFIG_SYSFB)) { 2247 start = screen_info.lfb_base; 2248 size = max_t(__u32, screen_info.lfb_size, 0x800000); 2249 } 2250 } else { 2251 /* Gen1 VM: get FB base from PCI */ 2252 pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT, 2253 PCI_DEVICE_ID_HYPERV_VIDEO, NULL); 2254 if (!pdev) 2255 return; 2256 2257 if (pdev->resource[0].flags & IORESOURCE_MEM) { 2258 start = pci_resource_start(pdev, 0); 2259 size = pci_resource_len(pdev, 0); 2260 } 2261 2262 /* 2263 * Release the PCI device so hyperv_drm or hyperv_fb driver can 2264 * grab it later. 2265 */ 2266 pci_dev_put(pdev); 2267 } 2268 2269 if (!start) 2270 return; 2271 2272 /* 2273 * Make a claim for the frame buffer in the resource tree under the 2274 * first node, which will be the one below 4GB. The length seems to 2275 * be underreported, particularly in a Generation 1 VM. So start out 2276 * reserving a larger area and make it smaller until it succeeds. 2277 */ 2278 for (; !fb_mmio && (size >= 0x100000); size >>= 1) 2279 fb_mmio = __request_region(hyperv_mmio, start, size, fb_mmio_name, 0); 2280 } 2281 2282 /** 2283 * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. 2284 * @new: If successful, supplied a pointer to the 2285 * allocated MMIO space. 2286 * @device_obj: Identifies the caller 2287 * @min: Minimum guest physical address of the 2288 * allocation 2289 * @max: Maximum guest physical address 2290 * @size: Size of the range to be allocated 2291 * @align: Alignment of the range to be allocated 2292 * @fb_overlap_ok: Whether this allocation can be allowed 2293 * to overlap the video frame buffer. 2294 * 2295 * This function walks the resources granted to VMBus by the 2296 * _CRS object in the ACPI namespace underneath the parent 2297 * "bridge" whether that's a root PCI bus in the Generation 1 2298 * case or a Module Device in the Generation 2 case. It then 2299 * attempts to allocate from the global MMIO pool in a way that 2300 * matches the constraints supplied in these parameters and by 2301 * that _CRS. 2302 * 2303 * Return: 0 on success, -errno on failure 2304 */ 2305 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, 2306 resource_size_t min, resource_size_t max, 2307 resource_size_t size, resource_size_t align, 2308 bool fb_overlap_ok) 2309 { 2310 struct resource *iter, *shadow; 2311 resource_size_t range_min, range_max, start, end; 2312 const char *dev_n = dev_name(&device_obj->device); 2313 int retval; 2314 2315 retval = -ENXIO; 2316 mutex_lock(&hyperv_mmio_lock); 2317 2318 /* 2319 * If overlaps with frame buffers are allowed, then first attempt to 2320 * make the allocation from within the reserved region. Because it 2321 * is already reserved, no shadow allocation is necessary. 2322 */ 2323 if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) && 2324 !(max < fb_mmio->start)) { 2325 2326 range_min = fb_mmio->start; 2327 range_max = fb_mmio->end; 2328 start = (range_min + align - 1) & ~(align - 1); 2329 for (; start + size - 1 <= range_max; start += align) { 2330 *new = request_mem_region_exclusive(start, size, dev_n); 2331 if (*new) { 2332 retval = 0; 2333 goto exit; 2334 } 2335 } 2336 } 2337 2338 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 2339 if ((iter->start >= max) || (iter->end <= min)) 2340 continue; 2341 2342 range_min = iter->start; 2343 range_max = iter->end; 2344 start = (range_min + align - 1) & ~(align - 1); 2345 for (; start + size - 1 <= range_max; start += align) { 2346 end = start + size - 1; 2347 2348 /* Skip the whole fb_mmio region if not fb_overlap_ok */ 2349 if (!fb_overlap_ok && fb_mmio && 2350 (((start >= fb_mmio->start) && (start <= fb_mmio->end)) || 2351 ((end >= fb_mmio->start) && (end <= fb_mmio->end)))) 2352 continue; 2353 2354 shadow = __request_region(iter, start, size, NULL, 2355 IORESOURCE_BUSY); 2356 if (!shadow) 2357 continue; 2358 2359 *new = request_mem_region_exclusive(start, size, dev_n); 2360 if (*new) { 2361 shadow->name = (char *)*new; 2362 retval = 0; 2363 goto exit; 2364 } 2365 2366 __release_region(iter, start, size); 2367 } 2368 } 2369 2370 exit: 2371 mutex_unlock(&hyperv_mmio_lock); 2372 return retval; 2373 } 2374 EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); 2375 2376 /** 2377 * vmbus_free_mmio() - Free a memory-mapped I/O range. 2378 * @start: Base address of region to release. 2379 * @size: Size of the range to be allocated 2380 * 2381 * This function releases anything requested by 2382 * vmbus_mmio_allocate(). 2383 */ 2384 void vmbus_free_mmio(resource_size_t start, resource_size_t size) 2385 { 2386 struct resource *iter; 2387 2388 mutex_lock(&hyperv_mmio_lock); 2389 2390 /* 2391 * If all bytes of the MMIO range to be released are within the 2392 * special case fb_mmio shadow region, skip releasing the shadow 2393 * region since no corresponding __request_region() was done 2394 * in vmbus_allocate_mmio(). 2395 */ 2396 if (fb_mmio && start >= fb_mmio->start && 2397 (start + size - 1 <= fb_mmio->end)) 2398 goto skip_shadow_release; 2399 2400 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 2401 if ((iter->start >= start + size) || (iter->end <= start)) 2402 continue; 2403 2404 __release_region(iter, start, size); 2405 } 2406 2407 skip_shadow_release: 2408 release_mem_region(start, size); 2409 mutex_unlock(&hyperv_mmio_lock); 2410 2411 } 2412 EXPORT_SYMBOL_GPL(vmbus_free_mmio); 2413 2414 #ifdef CONFIG_ACPI 2415 static int vmbus_acpi_add(struct platform_device *pdev) 2416 { 2417 acpi_status result; 2418 int ret_val = -ENODEV; 2419 struct acpi_device *ancestor; 2420 struct acpi_device *device = ACPI_COMPANION(&pdev->dev); 2421 2422 vmbus_root_device = &device->dev; 2423 2424 /* 2425 * Older versions of Hyper-V for ARM64 fail to include the _CCA 2426 * method on the top level VMbus device in the DSDT. But devices 2427 * are hardware coherent in all current Hyper-V use cases, so fix 2428 * up the ACPI device to behave as if _CCA is present and indicates 2429 * hardware coherence. 2430 */ 2431 ACPI_COMPANION_SET(&device->dev, device); 2432 if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) && 2433 device_get_dma_attr(&device->dev) == DEV_DMA_NOT_SUPPORTED) { 2434 pr_info("No ACPI _CCA found; assuming coherent device I/O\n"); 2435 device->flags.cca_seen = true; 2436 device->flags.coherent_dma = true; 2437 } 2438 2439 result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, 2440 vmbus_walk_resources, NULL); 2441 2442 if (ACPI_FAILURE(result)) 2443 goto acpi_walk_err; 2444 /* 2445 * Some ancestor of the vmbus acpi device (Gen1 or Gen2 2446 * firmware) is the VMOD that has the mmio ranges. Get that. 2447 */ 2448 for (ancestor = acpi_dev_parent(device); 2449 ancestor && ancestor->handle != ACPI_ROOT_OBJECT; 2450 ancestor = acpi_dev_parent(ancestor)) { 2451 result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, 2452 vmbus_walk_resources, NULL); 2453 2454 if (ACPI_FAILURE(result)) 2455 continue; 2456 if (hyperv_mmio) { 2457 vmbus_reserve_fb(); 2458 break; 2459 } 2460 } 2461 ret_val = 0; 2462 2463 acpi_walk_err: 2464 if (ret_val) 2465 vmbus_mmio_remove(); 2466 return ret_val; 2467 } 2468 #else 2469 static int vmbus_acpi_add(struct platform_device *pdev) 2470 { 2471 return 0; 2472 } 2473 #endif 2474 2475 static int vmbus_set_irq(struct platform_device *pdev) 2476 { 2477 struct irq_data *data; 2478 int irq; 2479 irq_hw_number_t hwirq; 2480 2481 irq = platform_get_irq(pdev, 0); 2482 /* platform_get_irq() may not return 0. */ 2483 if (irq < 0) 2484 return irq; 2485 2486 data = irq_get_irq_data(irq); 2487 if (!data) { 2488 pr_err("No interrupt data for VMBus virq %d\n", irq); 2489 return -ENODEV; 2490 } 2491 hwirq = irqd_to_hwirq(data); 2492 2493 vmbus_irq = irq; 2494 vmbus_interrupt = hwirq; 2495 pr_debug("VMBus virq %d, hwirq %d\n", vmbus_irq, vmbus_interrupt); 2496 2497 return 0; 2498 } 2499 2500 static int vmbus_device_add(struct platform_device *pdev) 2501 { 2502 struct resource **cur_res = &hyperv_mmio; 2503 struct of_range range; 2504 struct of_range_parser parser; 2505 struct device_node *np = pdev->dev.of_node; 2506 int ret; 2507 2508 vmbus_root_device = &pdev->dev; 2509 2510 ret = of_range_parser_init(&parser, np); 2511 if (ret) 2512 return ret; 2513 2514 if (!__is_defined(HYPERVISOR_CALLBACK_VECTOR)) 2515 ret = vmbus_set_irq(pdev); 2516 if (ret) 2517 return ret; 2518 2519 for_each_of_range(&parser, &range) { 2520 struct resource *res; 2521 2522 res = kzalloc(sizeof(*res), GFP_KERNEL); 2523 if (!res) { 2524 vmbus_mmio_remove(); 2525 return -ENOMEM; 2526 } 2527 2528 res->name = "hyperv mmio"; 2529 res->flags = range.flags; 2530 res->start = range.cpu_addr; 2531 res->end = range.cpu_addr + range.size; 2532 2533 *cur_res = res; 2534 cur_res = &res->sibling; 2535 } 2536 2537 return ret; 2538 } 2539 2540 static int vmbus_platform_driver_probe(struct platform_device *pdev) 2541 { 2542 if (acpi_disabled) 2543 return vmbus_device_add(pdev); 2544 else 2545 return vmbus_acpi_add(pdev); 2546 } 2547 2548 static void vmbus_platform_driver_remove(struct platform_device *pdev) 2549 { 2550 vmbus_mmio_remove(); 2551 } 2552 2553 #ifdef CONFIG_PM_SLEEP 2554 static int vmbus_bus_suspend(struct device *dev) 2555 { 2556 struct hv_per_cpu_context *hv_cpu = per_cpu_ptr( 2557 hv_context.cpu_context, VMBUS_CONNECT_CPU); 2558 struct vmbus_channel *channel, *sc; 2559 2560 tasklet_disable(&hv_cpu->msg_dpc); 2561 vmbus_connection.ignore_any_offer_msg = true; 2562 /* The tasklet_enable() takes care of providing a memory barrier */ 2563 tasklet_enable(&hv_cpu->msg_dpc); 2564 2565 /* Drain all the workqueues as we are in suspend */ 2566 drain_workqueue(vmbus_connection.rescind_work_queue); 2567 drain_workqueue(vmbus_connection.work_queue); 2568 drain_workqueue(vmbus_connection.handle_primary_chan_wq); 2569 drain_workqueue(vmbus_connection.handle_sub_chan_wq); 2570 2571 mutex_lock(&vmbus_connection.channel_mutex); 2572 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2573 if (!is_hvsock_channel(channel)) 2574 continue; 2575 2576 vmbus_force_channel_rescinded(channel); 2577 } 2578 mutex_unlock(&vmbus_connection.channel_mutex); 2579 2580 /* 2581 * Wait until all the sub-channels and hv_sock channels have been 2582 * cleaned up. Sub-channels should be destroyed upon suspend, otherwise 2583 * they would conflict with the new sub-channels that will be created 2584 * in the resume path. hv_sock channels should also be destroyed, but 2585 * a hv_sock channel of an established hv_sock connection can not be 2586 * really destroyed since it may still be referenced by the userspace 2587 * application, so we just force the hv_sock channel to be rescinded 2588 * by vmbus_force_channel_rescinded(), and the userspace application 2589 * will thoroughly destroy the channel after hibernation. 2590 * 2591 * Note: the counter nr_chan_close_on_suspend may never go above 0 if 2592 * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM. 2593 */ 2594 if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) 2595 wait_for_completion(&vmbus_connection.ready_for_suspend_event); 2596 2597 mutex_lock(&vmbus_connection.channel_mutex); 2598 2599 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2600 /* 2601 * Remove the channel from the array of channels and invalidate 2602 * the channel's relid. Upon resume, vmbus_onoffer() will fix 2603 * up the relid (and other fields, if necessary) and add the 2604 * channel back to the array. 2605 */ 2606 vmbus_channel_unmap_relid(channel); 2607 channel->offermsg.child_relid = INVALID_RELID; 2608 2609 if (is_hvsock_channel(channel)) { 2610 if (!channel->rescind) { 2611 pr_err("hv_sock channel not rescinded!\n"); 2612 WARN_ON_ONCE(1); 2613 } 2614 continue; 2615 } 2616 2617 list_for_each_entry(sc, &channel->sc_list, sc_list) { 2618 pr_err("Sub-channel not deleted!\n"); 2619 WARN_ON_ONCE(1); 2620 } 2621 } 2622 2623 mutex_unlock(&vmbus_connection.channel_mutex); 2624 2625 vmbus_initiate_unload(false); 2626 2627 return 0; 2628 } 2629 2630 static int vmbus_bus_resume(struct device *dev) 2631 { 2632 struct vmbus_channel *channel; 2633 struct vmbus_channel_msginfo *msginfo; 2634 size_t msgsize; 2635 int ret; 2636 2637 vmbus_connection.ignore_any_offer_msg = false; 2638 2639 /* 2640 * We only use the 'vmbus_proto_version', which was in use before 2641 * hibernation, to re-negotiate with the host. 2642 */ 2643 if (!vmbus_proto_version) { 2644 pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version); 2645 return -EINVAL; 2646 } 2647 2648 msgsize = sizeof(*msginfo) + 2649 sizeof(struct vmbus_channel_initiate_contact); 2650 2651 msginfo = kzalloc(msgsize, GFP_KERNEL); 2652 2653 if (msginfo == NULL) 2654 return -ENOMEM; 2655 2656 ret = vmbus_negotiate_version(msginfo, vmbus_proto_version); 2657 2658 kfree(msginfo); 2659 2660 if (ret != 0) 2661 return ret; 2662 2663 vmbus_request_offers(); 2664 2665 mutex_lock(&vmbus_connection.channel_mutex); 2666 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2667 if (channel->offermsg.child_relid != INVALID_RELID) 2668 continue; 2669 2670 /* hvsock channels are not expected to be present. */ 2671 if (is_hvsock_channel(channel)) 2672 continue; 2673 2674 pr_err("channel %pUl/%pUl not present after resume.\n", 2675 &channel->offermsg.offer.if_type, 2676 &channel->offermsg.offer.if_instance); 2677 /* ToDo: Cleanup these channels here */ 2678 } 2679 mutex_unlock(&vmbus_connection.channel_mutex); 2680 2681 /* Reset the event for the next suspend. */ 2682 reinit_completion(&vmbus_connection.ready_for_suspend_event); 2683 2684 return 0; 2685 } 2686 #else 2687 #define vmbus_bus_suspend NULL 2688 #define vmbus_bus_resume NULL 2689 #endif /* CONFIG_PM_SLEEP */ 2690 2691 static const __maybe_unused struct of_device_id vmbus_of_match[] = { 2692 { 2693 .compatible = "microsoft,vmbus", 2694 }, 2695 { 2696 /* sentinel */ 2697 }, 2698 }; 2699 MODULE_DEVICE_TABLE(of, vmbus_of_match); 2700 2701 static const __maybe_unused struct acpi_device_id vmbus_acpi_device_ids[] = { 2702 {"VMBUS", 0}, 2703 {"VMBus", 0}, 2704 {"", 0}, 2705 }; 2706 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); 2707 2708 /* 2709 * Note: we must use the "no_irq" ops, otherwise hibernation can not work with 2710 * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in 2711 * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see 2712 * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() -> 2713 * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's 2714 * resume callback must also run via the "noirq" ops. 2715 * 2716 * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment 2717 * earlier in this file before vmbus_pm. 2718 */ 2719 2720 static const struct dev_pm_ops vmbus_bus_pm = { 2721 .suspend_noirq = NULL, 2722 .resume_noirq = NULL, 2723 .freeze_noirq = vmbus_bus_suspend, 2724 .thaw_noirq = vmbus_bus_resume, 2725 .poweroff_noirq = vmbus_bus_suspend, 2726 .restore_noirq = vmbus_bus_resume 2727 }; 2728 2729 static struct platform_driver vmbus_platform_driver = { 2730 .probe = vmbus_platform_driver_probe, 2731 .remove = vmbus_platform_driver_remove, 2732 .driver = { 2733 .name = "vmbus", 2734 .acpi_match_table = ACPI_PTR(vmbus_acpi_device_ids), 2735 .of_match_table = of_match_ptr(vmbus_of_match), 2736 .pm = &vmbus_bus_pm, 2737 .probe_type = PROBE_FORCE_SYNCHRONOUS, 2738 } 2739 }; 2740 2741 static void hv_kexec_handler(void) 2742 { 2743 hv_stimer_global_cleanup(); 2744 vmbus_initiate_unload(false); 2745 /* Make sure conn_state is set as hv_synic_cleanup checks for it */ 2746 mb(); 2747 cpuhp_remove_state(hyperv_cpuhp_online); 2748 }; 2749 2750 static void hv_crash_handler(struct pt_regs *regs) 2751 { 2752 int cpu; 2753 2754 vmbus_initiate_unload(true); 2755 /* 2756 * In crash handler we can't schedule synic cleanup for all CPUs, 2757 * doing the cleanup for current CPU only. This should be sufficient 2758 * for kdump. 2759 */ 2760 cpu = smp_processor_id(); 2761 hv_stimer_cleanup(cpu); 2762 hv_synic_disable_regs(cpu); 2763 }; 2764 2765 static int hv_synic_suspend(void) 2766 { 2767 /* 2768 * When we reach here, all the non-boot CPUs have been offlined. 2769 * If we're in a legacy configuration where stimer Direct Mode is 2770 * not enabled, the stimers on the non-boot CPUs have been unbound 2771 * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() -> 2772 * hv_stimer_cleanup() -> clockevents_unbind_device(). 2773 * 2774 * hv_synic_suspend() only runs on CPU0 with interrupts disabled. 2775 * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because: 2776 * 1) it's unnecessary as interrupts remain disabled between 2777 * syscore_suspend() and syscore_resume(): see create_image() and 2778 * resume_target_kernel() 2779 * 2) the stimer on CPU0 is automatically disabled later by 2780 * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... 2781 * -> clockevents_shutdown() -> ... -> hv_ce_shutdown() 2782 * 3) a warning would be triggered if we call 2783 * clockevents_unbind_device(), which may sleep, in an 2784 * interrupts-disabled context. 2785 */ 2786 2787 hv_synic_disable_regs(0); 2788 2789 return 0; 2790 } 2791 2792 static void hv_synic_resume(void) 2793 { 2794 hv_synic_enable_regs(0); 2795 2796 /* 2797 * Note: we don't need to call hv_stimer_init(0), because the timer 2798 * on CPU0 is not unbound in hv_synic_suspend(), and the timer is 2799 * automatically re-enabled in timekeeping_resume(). 2800 */ 2801 } 2802 2803 /* The callbacks run only on CPU0, with irqs_disabled. */ 2804 static struct syscore_ops hv_synic_syscore_ops = { 2805 .suspend = hv_synic_suspend, 2806 .resume = hv_synic_resume, 2807 }; 2808 2809 static int __init hv_acpi_init(void) 2810 { 2811 int ret; 2812 2813 if (!hv_is_hyperv_initialized()) 2814 return -ENODEV; 2815 2816 if (hv_root_partition() && !hv_nested) 2817 return 0; 2818 2819 /* 2820 * Get ACPI resources first. 2821 */ 2822 ret = platform_driver_register(&vmbus_platform_driver); 2823 if (ret) 2824 return ret; 2825 2826 if (!vmbus_root_device) { 2827 ret = -ENODEV; 2828 goto cleanup; 2829 } 2830 2831 /* 2832 * If we're on an architecture with a hardcoded hypervisor 2833 * vector (i.e. x86/x64), override the VMbus interrupt found 2834 * in the ACPI tables. Ensure vmbus_irq is not set since the 2835 * normal Linux IRQ mechanism is not used in this case. 2836 */ 2837 #ifdef HYPERVISOR_CALLBACK_VECTOR 2838 vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR; 2839 vmbus_irq = -1; 2840 #endif 2841 2842 hv_debug_init(); 2843 2844 ret = vmbus_bus_init(); 2845 if (ret) 2846 goto cleanup; 2847 2848 hv_setup_kexec_handler(hv_kexec_handler); 2849 hv_setup_crash_handler(hv_crash_handler); 2850 2851 register_syscore_ops(&hv_synic_syscore_ops); 2852 2853 return 0; 2854 2855 cleanup: 2856 platform_driver_unregister(&vmbus_platform_driver); 2857 vmbus_root_device = NULL; 2858 return ret; 2859 } 2860 2861 static void __exit vmbus_exit(void) 2862 { 2863 int cpu; 2864 2865 unregister_syscore_ops(&hv_synic_syscore_ops); 2866 2867 hv_remove_kexec_handler(); 2868 hv_remove_crash_handler(); 2869 vmbus_connection.conn_state = DISCONNECTED; 2870 hv_stimer_global_cleanup(); 2871 vmbus_disconnect(); 2872 if (vmbus_irq == -1) { 2873 hv_remove_vmbus_handler(); 2874 } else { 2875 free_percpu_irq(vmbus_irq, vmbus_evt); 2876 free_percpu(vmbus_evt); 2877 } 2878 for_each_online_cpu(cpu) { 2879 struct hv_per_cpu_context *hv_cpu 2880 = per_cpu_ptr(hv_context.cpu_context, cpu); 2881 2882 tasklet_kill(&hv_cpu->msg_dpc); 2883 } 2884 hv_debug_rm_all_dir(); 2885 2886 vmbus_free_channels(); 2887 kfree(vmbus_connection.channels); 2888 2889 /* 2890 * The vmbus panic notifier is always registered, hence we should 2891 * also unconditionally unregister it here as well. 2892 */ 2893 atomic_notifier_chain_unregister(&panic_notifier_list, 2894 &hyperv_panic_vmbus_unload_block); 2895 2896 bus_unregister(&hv_bus); 2897 2898 cpuhp_remove_state(hyperv_cpuhp_online); 2899 hv_synic_free(); 2900 platform_driver_unregister(&vmbus_platform_driver); 2901 } 2902 2903 2904 MODULE_LICENSE("GPL"); 2905 MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver"); 2906 2907 subsys_initcall(hv_acpi_init); 2908 module_exit(vmbus_exit); 2909