1 /* 2 * KVMGT - the implementation of Intel mediated pass-through framework for KVM 3 * 4 * Copyright(c) 2014-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Kevin Tian <kevin.tian@intel.com> 27 * Jike Song <jike.song@intel.com> 28 * Xiaoguang Chen <xiaoguang.chen@intel.com> 29 */ 30 31 #include <linux/init.h> 32 #include <linux/device.h> 33 #include <linux/mm.h> 34 #include <linux/types.h> 35 #include <linux/list.h> 36 #include <linux/rbtree.h> 37 #include <linux/spinlock.h> 38 #include <linux/eventfd.h> 39 #include <linux/uuid.h> 40 #include <linux/kvm_host.h> 41 #include <linux/vfio.h> 42 43 #include "i915_drv.h" 44 #include "gvt.h" 45 46 static inline long kvmgt_pin_pages(struct device *dev, unsigned long *user_pfn, 47 long npage, int prot, unsigned long *phys_pfn) 48 { 49 return 0; 50 } 51 static inline long kvmgt_unpin_pages(struct device *dev, unsigned long *pfn, 52 long npage) 53 { 54 return 0; 55 } 56 57 static const struct intel_gvt_ops *intel_gvt_ops; 58 59 60 /* helper macros copied from vfio-pci */ 61 #define VFIO_PCI_OFFSET_SHIFT 40 62 #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) 63 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) 64 #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) 65 66 struct vfio_region { 67 u32 type; 68 u32 subtype; 69 size_t size; 70 u32 flags; 71 }; 72 73 struct kvmgt_pgfn { 74 gfn_t gfn; 75 struct hlist_node hnode; 76 }; 77 78 struct kvmgt_guest_info { 79 struct kvm *kvm; 80 struct intel_vgpu *vgpu; 81 struct kvm_page_track_notifier_node track_node; 82 #define NR_BKT (1 << 18) 83 struct hlist_head ptable[NR_BKT]; 84 #undef NR_BKT 85 }; 86 87 struct gvt_dma { 88 struct rb_node node; 89 gfn_t gfn; 90 kvm_pfn_t pfn; 91 }; 92 93 static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) 94 { 95 struct rb_node *node = vgpu->vdev.cache.rb_node; 96 struct gvt_dma *ret = NULL; 97 98 while (node) { 99 struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node); 100 101 if (gfn < itr->gfn) 102 node = node->rb_left; 103 else if (gfn > itr->gfn) 104 node = node->rb_right; 105 else { 106 ret = itr; 107 goto out; 108 } 109 } 110 111 out: 112 return ret; 113 } 114 115 static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) 116 { 117 struct gvt_dma *entry; 118 119 mutex_lock(&vgpu->vdev.cache_lock); 120 entry = __gvt_cache_find(vgpu, gfn); 121 mutex_unlock(&vgpu->vdev.cache_lock); 122 123 return entry == NULL ? 0 : entry->pfn; 124 } 125 126 static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn) 127 { 128 struct gvt_dma *new, *itr; 129 struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL; 130 131 new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); 132 if (!new) 133 return; 134 135 new->gfn = gfn; 136 new->pfn = pfn; 137 138 mutex_lock(&vgpu->vdev.cache_lock); 139 while (*link) { 140 parent = *link; 141 itr = rb_entry(parent, struct gvt_dma, node); 142 143 if (gfn == itr->gfn) 144 goto out; 145 else if (gfn < itr->gfn) 146 link = &parent->rb_left; 147 else 148 link = &parent->rb_right; 149 } 150 151 rb_link_node(&new->node, parent, link); 152 rb_insert_color(&new->node, &vgpu->vdev.cache); 153 mutex_unlock(&vgpu->vdev.cache_lock); 154 return; 155 156 out: 157 mutex_unlock(&vgpu->vdev.cache_lock); 158 kfree(new); 159 } 160 161 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, 162 struct gvt_dma *entry) 163 { 164 rb_erase(&entry->node, &vgpu->vdev.cache); 165 kfree(entry); 166 } 167 168 static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn) 169 { 170 struct device *dev = vgpu->vdev.mdev; 171 struct gvt_dma *this; 172 unsigned long pfn; 173 174 mutex_lock(&vgpu->vdev.cache_lock); 175 this = __gvt_cache_find(vgpu, gfn); 176 if (!this) { 177 mutex_unlock(&vgpu->vdev.cache_lock); 178 return; 179 } 180 181 pfn = this->pfn; 182 WARN_ON((kvmgt_unpin_pages(dev, &pfn, 1) != 1)); 183 __gvt_cache_remove_entry(vgpu, this); 184 mutex_unlock(&vgpu->vdev.cache_lock); 185 } 186 187 static void gvt_cache_init(struct intel_vgpu *vgpu) 188 { 189 vgpu->vdev.cache = RB_ROOT; 190 mutex_init(&vgpu->vdev.cache_lock); 191 } 192 193 static void gvt_cache_destroy(struct intel_vgpu *vgpu) 194 { 195 struct gvt_dma *dma; 196 struct rb_node *node = NULL; 197 struct device *dev = vgpu->vdev.mdev; 198 unsigned long pfn; 199 200 mutex_lock(&vgpu->vdev.cache_lock); 201 while ((node = rb_first(&vgpu->vdev.cache))) { 202 dma = rb_entry(node, struct gvt_dma, node); 203 pfn = dma->pfn; 204 205 kvmgt_unpin_pages(dev, &pfn, 1); 206 __gvt_cache_remove_entry(vgpu, dma); 207 } 208 mutex_unlock(&vgpu->vdev.cache_lock); 209 } 210 211 static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt, 212 const char *name) 213 { 214 int i; 215 struct intel_vgpu_type *t; 216 const char *driver_name = dev_driver_string( 217 &gvt->dev_priv->drm.pdev->dev); 218 219 for (i = 0; i < gvt->num_types; i++) { 220 t = &gvt->types[i]; 221 if (!strncmp(t->name, name + strlen(driver_name) + 1, 222 sizeof(t->name))) 223 return t; 224 } 225 226 return NULL; 227 } 228 229 static struct attribute *type_attrs[] = { 230 NULL, 231 }; 232 233 static struct attribute_group *intel_vgpu_type_groups[] = { 234 [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, 235 }; 236 237 static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) 238 { 239 int i, j; 240 struct intel_vgpu_type *type; 241 struct attribute_group *group; 242 243 for (i = 0; i < gvt->num_types; i++) { 244 type = &gvt->types[i]; 245 246 group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); 247 if (WARN_ON(!group)) 248 goto unwind; 249 250 group->name = type->name; 251 group->attrs = type_attrs; 252 intel_vgpu_type_groups[i] = group; 253 } 254 255 return true; 256 257 unwind: 258 for (j = 0; j < i; j++) { 259 group = intel_vgpu_type_groups[j]; 260 kfree(group); 261 } 262 263 return false; 264 } 265 266 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) 267 { 268 int i; 269 struct attribute_group *group; 270 271 for (i = 0; i < gvt->num_types; i++) { 272 group = intel_vgpu_type_groups[i]; 273 kfree(group); 274 } 275 } 276 277 static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) 278 { 279 hash_init(info->ptable); 280 } 281 282 static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info) 283 { 284 struct kvmgt_pgfn *p; 285 struct hlist_node *tmp; 286 int i; 287 288 hash_for_each_safe(info->ptable, i, tmp, p, hnode) { 289 hash_del(&p->hnode); 290 kfree(p); 291 } 292 } 293 294 static struct kvmgt_pgfn * 295 __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn) 296 { 297 struct kvmgt_pgfn *p, *res = NULL; 298 299 hash_for_each_possible(info->ptable, p, hnode, gfn) { 300 if (gfn == p->gfn) { 301 res = p; 302 break; 303 } 304 } 305 306 return res; 307 } 308 309 static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info, 310 gfn_t gfn) 311 { 312 struct kvmgt_pgfn *p; 313 314 p = __kvmgt_protect_table_find(info, gfn); 315 return !!p; 316 } 317 318 static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn) 319 { 320 struct kvmgt_pgfn *p; 321 322 if (kvmgt_gfn_is_write_protected(info, gfn)) 323 return; 324 325 p = kmalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC); 326 if (WARN(!p, "gfn: 0x%llx\n", gfn)) 327 return; 328 329 p->gfn = gfn; 330 hash_add(info->ptable, &p->hnode, gfn); 331 } 332 333 static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, 334 gfn_t gfn) 335 { 336 struct kvmgt_pgfn *p; 337 338 p = __kvmgt_protect_table_find(info, gfn); 339 if (p) { 340 hash_del(&p->hnode); 341 kfree(p); 342 } 343 } 344 345 static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) 346 { 347 if (!intel_gvt_init_vgpu_type_groups(gvt)) 348 return -EFAULT; 349 350 intel_gvt_ops = ops; 351 352 /* MDEV is not yet available */ 353 return -ENODEV; 354 } 355 356 static void kvmgt_host_exit(struct device *dev, void *gvt) 357 { 358 intel_gvt_cleanup_vgpu_type_groups(gvt); 359 } 360 361 static int kvmgt_write_protect_add(unsigned long handle, u64 gfn) 362 { 363 struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; 364 struct kvm *kvm = info->kvm; 365 struct kvm_memory_slot *slot; 366 int idx; 367 368 idx = srcu_read_lock(&kvm->srcu); 369 slot = gfn_to_memslot(kvm, gfn); 370 371 spin_lock(&kvm->mmu_lock); 372 373 if (kvmgt_gfn_is_write_protected(info, gfn)) 374 goto out; 375 376 kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); 377 kvmgt_protect_table_add(info, gfn); 378 379 out: 380 spin_unlock(&kvm->mmu_lock); 381 srcu_read_unlock(&kvm->srcu, idx); 382 return 0; 383 } 384 385 static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn) 386 { 387 struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; 388 struct kvm *kvm = info->kvm; 389 struct kvm_memory_slot *slot; 390 int idx; 391 392 idx = srcu_read_lock(&kvm->srcu); 393 slot = gfn_to_memslot(kvm, gfn); 394 395 spin_lock(&kvm->mmu_lock); 396 397 if (!kvmgt_gfn_is_write_protected(info, gfn)) 398 goto out; 399 400 kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); 401 kvmgt_protect_table_del(info, gfn); 402 403 out: 404 spin_unlock(&kvm->mmu_lock); 405 srcu_read_unlock(&kvm->srcu, idx); 406 return 0; 407 } 408 409 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, 410 const u8 *val, int len, 411 struct kvm_page_track_notifier_node *node) 412 { 413 struct kvmgt_guest_info *info = container_of(node, 414 struct kvmgt_guest_info, track_node); 415 416 if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) 417 intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa, 418 (void *)val, len); 419 } 420 421 static void kvmgt_page_track_flush_slot(struct kvm *kvm, 422 struct kvm_memory_slot *slot, 423 struct kvm_page_track_notifier_node *node) 424 { 425 int i; 426 gfn_t gfn; 427 struct kvmgt_guest_info *info = container_of(node, 428 struct kvmgt_guest_info, track_node); 429 430 spin_lock(&kvm->mmu_lock); 431 for (i = 0; i < slot->npages; i++) { 432 gfn = slot->base_gfn + i; 433 if (kvmgt_gfn_is_write_protected(info, gfn)) { 434 kvm_slot_page_track_remove_page(kvm, slot, gfn, 435 KVM_PAGE_TRACK_WRITE); 436 kvmgt_protect_table_del(info, gfn); 437 } 438 } 439 spin_unlock(&kvm->mmu_lock); 440 } 441 442 static bool kvmgt_check_guest(void) 443 { 444 unsigned int eax, ebx, ecx, edx; 445 char s[12]; 446 unsigned int *i; 447 448 eax = KVM_CPUID_SIGNATURE; 449 ebx = ecx = edx = 0; 450 451 asm volatile ("cpuid" 452 : "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) 453 : 454 : "cc", "memory"); 455 i = (unsigned int *)s; 456 i[0] = ebx; 457 i[1] = ecx; 458 i[2] = edx; 459 460 return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM")); 461 } 462 463 /** 464 * NOTE: 465 * It's actually impossible to check if we are running in KVM host, 466 * since the "KVM host" is simply native. So we only dectect guest here. 467 */ 468 static int kvmgt_detect_host(void) 469 { 470 #ifdef CONFIG_INTEL_IOMMU 471 if (intel_iommu_gfx_mapped) { 472 gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n"); 473 return -ENODEV; 474 } 475 #endif 476 return kvmgt_check_guest() ? -ENODEV : 0; 477 } 478 479 static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle) 480 { 481 /* nothing to do here */ 482 return 0; 483 } 484 485 static void kvmgt_detach_vgpu(unsigned long handle) 486 { 487 /* nothing to do here */ 488 } 489 490 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) 491 { 492 struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; 493 struct intel_vgpu *vgpu = info->vgpu; 494 495 if (vgpu->vdev.msi_trigger) 496 return eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1; 497 498 return false; 499 } 500 501 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) 502 { 503 unsigned long pfn; 504 struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; 505 int rc; 506 507 pfn = gvt_cache_find(info->vgpu, gfn); 508 if (pfn != 0) 509 return pfn; 510 511 rc = kvmgt_pin_pages(info->vgpu->vdev.mdev, &gfn, 1, 512 IOMMU_READ | IOMMU_WRITE, &pfn); 513 if (rc != 1) { 514 gvt_err("vfio_pin_pages failed for gfn: 0x%lx\n", gfn); 515 return 0; 516 } 517 518 gvt_cache_add(info->vgpu, gfn, pfn); 519 return pfn; 520 } 521 522 static void *kvmgt_gpa_to_hva(unsigned long handle, unsigned long gpa) 523 { 524 unsigned long pfn; 525 gfn_t gfn = gpa_to_gfn(gpa); 526 527 pfn = kvmgt_gfn_to_pfn(handle, gfn); 528 if (!pfn) 529 return NULL; 530 531 return (char *)pfn_to_kaddr(pfn) + offset_in_page(gpa); 532 } 533 534 static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, 535 void *buf, unsigned long len, bool write) 536 { 537 void *hva = NULL; 538 539 hva = kvmgt_gpa_to_hva(handle, gpa); 540 if (!hva) 541 return -EFAULT; 542 543 if (write) 544 memcpy(hva, buf, len); 545 else 546 memcpy(buf, hva, len); 547 548 return 0; 549 } 550 551 static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa, 552 void *buf, unsigned long len) 553 { 554 return kvmgt_rw_gpa(handle, gpa, buf, len, false); 555 } 556 557 static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa, 558 void *buf, unsigned long len) 559 { 560 return kvmgt_rw_gpa(handle, gpa, buf, len, true); 561 } 562 563 static unsigned long kvmgt_virt_to_pfn(void *addr) 564 { 565 return PFN_DOWN(__pa(addr)); 566 } 567 568 struct intel_gvt_mpt kvmgt_mpt = { 569 .detect_host = kvmgt_detect_host, 570 .host_init = kvmgt_host_init, 571 .host_exit = kvmgt_host_exit, 572 .attach_vgpu = kvmgt_attach_vgpu, 573 .detach_vgpu = kvmgt_detach_vgpu, 574 .inject_msi = kvmgt_inject_msi, 575 .from_virt_to_mfn = kvmgt_virt_to_pfn, 576 .set_wp_page = kvmgt_write_protect_add, 577 .unset_wp_page = kvmgt_write_protect_remove, 578 .read_gpa = kvmgt_read_gpa, 579 .write_gpa = kvmgt_write_gpa, 580 .gfn_to_mfn = kvmgt_gfn_to_pfn, 581 }; 582 EXPORT_SYMBOL_GPL(kvmgt_mpt); 583 584 static int __init kvmgt_init(void) 585 { 586 return 0; 587 } 588 589 static void __exit kvmgt_exit(void) 590 { 591 } 592 593 module_init(kvmgt_init); 594 module_exit(kvmgt_exit); 595 596 MODULE_LICENSE("GPL and additional rights"); 597 MODULE_AUTHOR("Intel Corporation"); 598