xref: /linux/drivers/gpu/drm/i915/gvt/kvmgt.c (revision 372e2db7210df7c45ead46429aeb1443ba148060)
1 /*
2  * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3  *
4  * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Kevin Tian <kevin.tian@intel.com>
27  *    Jike Song <jike.song@intel.com>
28  *    Xiaoguang Chen <xiaoguang.chen@intel.com>
29  */
30 
31 #include <linux/init.h>
32 #include <linux/device.h>
33 #include <linux/mm.h>
34 #include <linux/types.h>
35 #include <linux/list.h>
36 #include <linux/rbtree.h>
37 #include <linux/spinlock.h>
38 #include <linux/eventfd.h>
39 #include <linux/uuid.h>
40 #include <linux/kvm_host.h>
41 #include <linux/vfio.h>
42 
43 #include "i915_drv.h"
44 #include "gvt.h"
45 
46 static inline long kvmgt_pin_pages(struct device *dev, unsigned long *user_pfn,
47 			long npage, int prot, unsigned long *phys_pfn)
48 {
49 	return 0;
50 }
51 static inline long kvmgt_unpin_pages(struct device *dev, unsigned long *pfn,
52 			long npage)
53 {
54 	return 0;
55 }
56 
57 static const struct intel_gvt_ops *intel_gvt_ops;
58 
59 
60 /* helper macros copied from vfio-pci */
61 #define VFIO_PCI_OFFSET_SHIFT   40
62 #define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
63 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
64 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
65 
66 struct vfio_region {
67 	u32				type;
68 	u32				subtype;
69 	size_t				size;
70 	u32				flags;
71 };
72 
73 struct kvmgt_pgfn {
74 	gfn_t gfn;
75 	struct hlist_node hnode;
76 };
77 
78 struct kvmgt_guest_info {
79 	struct kvm *kvm;
80 	struct intel_vgpu *vgpu;
81 	struct kvm_page_track_notifier_node track_node;
82 #define NR_BKT (1 << 18)
83 	struct hlist_head ptable[NR_BKT];
84 #undef NR_BKT
85 };
86 
87 struct gvt_dma {
88 	struct rb_node node;
89 	gfn_t gfn;
90 	kvm_pfn_t pfn;
91 };
92 
93 static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
94 {
95 	struct rb_node *node = vgpu->vdev.cache.rb_node;
96 	struct gvt_dma *ret = NULL;
97 
98 	while (node) {
99 		struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
100 
101 		if (gfn < itr->gfn)
102 			node = node->rb_left;
103 		else if (gfn > itr->gfn)
104 			node = node->rb_right;
105 		else {
106 			ret = itr;
107 			goto out;
108 		}
109 	}
110 
111 out:
112 	return ret;
113 }
114 
115 static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
116 {
117 	struct gvt_dma *entry;
118 
119 	mutex_lock(&vgpu->vdev.cache_lock);
120 	entry = __gvt_cache_find(vgpu, gfn);
121 	mutex_unlock(&vgpu->vdev.cache_lock);
122 
123 	return entry == NULL ? 0 : entry->pfn;
124 }
125 
126 static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn)
127 {
128 	struct gvt_dma *new, *itr;
129 	struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
130 
131 	new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
132 	if (!new)
133 		return;
134 
135 	new->gfn = gfn;
136 	new->pfn = pfn;
137 
138 	mutex_lock(&vgpu->vdev.cache_lock);
139 	while (*link) {
140 		parent = *link;
141 		itr = rb_entry(parent, struct gvt_dma, node);
142 
143 		if (gfn == itr->gfn)
144 			goto out;
145 		else if (gfn < itr->gfn)
146 			link = &parent->rb_left;
147 		else
148 			link = &parent->rb_right;
149 	}
150 
151 	rb_link_node(&new->node, parent, link);
152 	rb_insert_color(&new->node, &vgpu->vdev.cache);
153 	mutex_unlock(&vgpu->vdev.cache_lock);
154 	return;
155 
156 out:
157 	mutex_unlock(&vgpu->vdev.cache_lock);
158 	kfree(new);
159 }
160 
161 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
162 				struct gvt_dma *entry)
163 {
164 	rb_erase(&entry->node, &vgpu->vdev.cache);
165 	kfree(entry);
166 }
167 
168 static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
169 {
170 	struct device *dev = vgpu->vdev.mdev;
171 	struct gvt_dma *this;
172 	unsigned long pfn;
173 
174 	mutex_lock(&vgpu->vdev.cache_lock);
175 	this  = __gvt_cache_find(vgpu, gfn);
176 	if (!this) {
177 		mutex_unlock(&vgpu->vdev.cache_lock);
178 		return;
179 	}
180 
181 	pfn = this->pfn;
182 	WARN_ON((kvmgt_unpin_pages(dev, &pfn, 1) != 1));
183 	__gvt_cache_remove_entry(vgpu, this);
184 	mutex_unlock(&vgpu->vdev.cache_lock);
185 }
186 
187 static void gvt_cache_init(struct intel_vgpu *vgpu)
188 {
189 	vgpu->vdev.cache = RB_ROOT;
190 	mutex_init(&vgpu->vdev.cache_lock);
191 }
192 
193 static void gvt_cache_destroy(struct intel_vgpu *vgpu)
194 {
195 	struct gvt_dma *dma;
196 	struct rb_node *node = NULL;
197 	struct device *dev = vgpu->vdev.mdev;
198 	unsigned long pfn;
199 
200 	mutex_lock(&vgpu->vdev.cache_lock);
201 	while ((node = rb_first(&vgpu->vdev.cache))) {
202 		dma = rb_entry(node, struct gvt_dma, node);
203 		pfn = dma->pfn;
204 
205 		kvmgt_unpin_pages(dev, &pfn, 1);
206 		__gvt_cache_remove_entry(vgpu, dma);
207 	}
208 	mutex_unlock(&vgpu->vdev.cache_lock);
209 }
210 
211 static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
212 		const char *name)
213 {
214 	int i;
215 	struct intel_vgpu_type *t;
216 	const char *driver_name = dev_driver_string(
217 			&gvt->dev_priv->drm.pdev->dev);
218 
219 	for (i = 0; i < gvt->num_types; i++) {
220 		t = &gvt->types[i];
221 		if (!strncmp(t->name, name + strlen(driver_name) + 1,
222 			sizeof(t->name)))
223 			return t;
224 	}
225 
226 	return NULL;
227 }
228 
229 static struct attribute *type_attrs[] = {
230 	NULL,
231 };
232 
233 static struct attribute_group *intel_vgpu_type_groups[] = {
234 	[0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
235 };
236 
237 static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
238 {
239 	int i, j;
240 	struct intel_vgpu_type *type;
241 	struct attribute_group *group;
242 
243 	for (i = 0; i < gvt->num_types; i++) {
244 		type = &gvt->types[i];
245 
246 		group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
247 		if (WARN_ON(!group))
248 			goto unwind;
249 
250 		group->name = type->name;
251 		group->attrs = type_attrs;
252 		intel_vgpu_type_groups[i] = group;
253 	}
254 
255 	return true;
256 
257 unwind:
258 	for (j = 0; j < i; j++) {
259 		group = intel_vgpu_type_groups[j];
260 		kfree(group);
261 	}
262 
263 	return false;
264 }
265 
266 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
267 {
268 	int i;
269 	struct attribute_group *group;
270 
271 	for (i = 0; i < gvt->num_types; i++) {
272 		group = intel_vgpu_type_groups[i];
273 		kfree(group);
274 	}
275 }
276 
277 static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
278 {
279 	hash_init(info->ptable);
280 }
281 
282 static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
283 {
284 	struct kvmgt_pgfn *p;
285 	struct hlist_node *tmp;
286 	int i;
287 
288 	hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
289 		hash_del(&p->hnode);
290 		kfree(p);
291 	}
292 }
293 
294 static struct kvmgt_pgfn *
295 __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
296 {
297 	struct kvmgt_pgfn *p, *res = NULL;
298 
299 	hash_for_each_possible(info->ptable, p, hnode, gfn) {
300 		if (gfn == p->gfn) {
301 			res = p;
302 			break;
303 		}
304 	}
305 
306 	return res;
307 }
308 
309 static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
310 				gfn_t gfn)
311 {
312 	struct kvmgt_pgfn *p;
313 
314 	p = __kvmgt_protect_table_find(info, gfn);
315 	return !!p;
316 }
317 
318 static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
319 {
320 	struct kvmgt_pgfn *p;
321 
322 	if (kvmgt_gfn_is_write_protected(info, gfn))
323 		return;
324 
325 	p = kmalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
326 	if (WARN(!p, "gfn: 0x%llx\n", gfn))
327 		return;
328 
329 	p->gfn = gfn;
330 	hash_add(info->ptable, &p->hnode, gfn);
331 }
332 
333 static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
334 				gfn_t gfn)
335 {
336 	struct kvmgt_pgfn *p;
337 
338 	p = __kvmgt_protect_table_find(info, gfn);
339 	if (p) {
340 		hash_del(&p->hnode);
341 		kfree(p);
342 	}
343 }
344 
345 static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
346 {
347 	if (!intel_gvt_init_vgpu_type_groups(gvt))
348 		return -EFAULT;
349 
350 	intel_gvt_ops = ops;
351 
352 	/* MDEV is not yet available */
353 	return -ENODEV;
354 }
355 
356 static void kvmgt_host_exit(struct device *dev, void *gvt)
357 {
358 	intel_gvt_cleanup_vgpu_type_groups(gvt);
359 }
360 
361 static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
362 {
363 	struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
364 	struct kvm *kvm = info->kvm;
365 	struct kvm_memory_slot *slot;
366 	int idx;
367 
368 	idx = srcu_read_lock(&kvm->srcu);
369 	slot = gfn_to_memslot(kvm, gfn);
370 
371 	spin_lock(&kvm->mmu_lock);
372 
373 	if (kvmgt_gfn_is_write_protected(info, gfn))
374 		goto out;
375 
376 	kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
377 	kvmgt_protect_table_add(info, gfn);
378 
379 out:
380 	spin_unlock(&kvm->mmu_lock);
381 	srcu_read_unlock(&kvm->srcu, idx);
382 	return 0;
383 }
384 
385 static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
386 {
387 	struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
388 	struct kvm *kvm = info->kvm;
389 	struct kvm_memory_slot *slot;
390 	int idx;
391 
392 	idx = srcu_read_lock(&kvm->srcu);
393 	slot = gfn_to_memslot(kvm, gfn);
394 
395 	spin_lock(&kvm->mmu_lock);
396 
397 	if (!kvmgt_gfn_is_write_protected(info, gfn))
398 		goto out;
399 
400 	kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
401 	kvmgt_protect_table_del(info, gfn);
402 
403 out:
404 	spin_unlock(&kvm->mmu_lock);
405 	srcu_read_unlock(&kvm->srcu, idx);
406 	return 0;
407 }
408 
409 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
410 		const u8 *val, int len,
411 		struct kvm_page_track_notifier_node *node)
412 {
413 	struct kvmgt_guest_info *info = container_of(node,
414 					struct kvmgt_guest_info, track_node);
415 
416 	if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
417 		intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
418 					(void *)val, len);
419 }
420 
421 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
422 		struct kvm_memory_slot *slot,
423 		struct kvm_page_track_notifier_node *node)
424 {
425 	int i;
426 	gfn_t gfn;
427 	struct kvmgt_guest_info *info = container_of(node,
428 					struct kvmgt_guest_info, track_node);
429 
430 	spin_lock(&kvm->mmu_lock);
431 	for (i = 0; i < slot->npages; i++) {
432 		gfn = slot->base_gfn + i;
433 		if (kvmgt_gfn_is_write_protected(info, gfn)) {
434 			kvm_slot_page_track_remove_page(kvm, slot, gfn,
435 						KVM_PAGE_TRACK_WRITE);
436 			kvmgt_protect_table_del(info, gfn);
437 		}
438 	}
439 	spin_unlock(&kvm->mmu_lock);
440 }
441 
442 static bool kvmgt_check_guest(void)
443 {
444 	unsigned int eax, ebx, ecx, edx;
445 	char s[12];
446 	unsigned int *i;
447 
448 	eax = KVM_CPUID_SIGNATURE;
449 	ebx = ecx = edx = 0;
450 
451 	asm volatile ("cpuid"
452 		      : "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
453 		      :
454 		      : "cc", "memory");
455 	i = (unsigned int *)s;
456 	i[0] = ebx;
457 	i[1] = ecx;
458 	i[2] = edx;
459 
460 	return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM"));
461 }
462 
463 /**
464  * NOTE:
465  * It's actually impossible to check if we are running in KVM host,
466  * since the "KVM host" is simply native. So we only dectect guest here.
467  */
468 static int kvmgt_detect_host(void)
469 {
470 #ifdef CONFIG_INTEL_IOMMU
471 	if (intel_iommu_gfx_mapped) {
472 		gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n");
473 		return -ENODEV;
474 	}
475 #endif
476 	return kvmgt_check_guest() ? -ENODEV : 0;
477 }
478 
479 static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
480 {
481 	/* nothing to do here */
482 	return 0;
483 }
484 
485 static void kvmgt_detach_vgpu(unsigned long handle)
486 {
487 	/* nothing to do here */
488 }
489 
490 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
491 {
492 	struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
493 	struct intel_vgpu *vgpu = info->vgpu;
494 
495 	if (vgpu->vdev.msi_trigger)
496 		return eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1;
497 
498 	return false;
499 }
500 
501 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
502 {
503 	unsigned long pfn;
504 	struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
505 	int rc;
506 
507 	pfn = gvt_cache_find(info->vgpu, gfn);
508 	if (pfn != 0)
509 		return pfn;
510 
511 	rc = kvmgt_pin_pages(info->vgpu->vdev.mdev, &gfn, 1,
512 			     IOMMU_READ | IOMMU_WRITE, &pfn);
513 	if (rc != 1) {
514 		gvt_err("vfio_pin_pages failed for gfn: 0x%lx\n", gfn);
515 		return 0;
516 	}
517 
518 	gvt_cache_add(info->vgpu, gfn, pfn);
519 	return pfn;
520 }
521 
522 static void *kvmgt_gpa_to_hva(unsigned long handle, unsigned long gpa)
523 {
524 	unsigned long pfn;
525 	gfn_t gfn = gpa_to_gfn(gpa);
526 
527 	pfn = kvmgt_gfn_to_pfn(handle, gfn);
528 	if (!pfn)
529 		return NULL;
530 
531 	return (char *)pfn_to_kaddr(pfn) + offset_in_page(gpa);
532 }
533 
534 static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
535 			void *buf, unsigned long len, bool write)
536 {
537 	void *hva = NULL;
538 
539 	hva = kvmgt_gpa_to_hva(handle, gpa);
540 	if (!hva)
541 		return -EFAULT;
542 
543 	if (write)
544 		memcpy(hva, buf, len);
545 	else
546 		memcpy(buf, hva, len);
547 
548 	return 0;
549 }
550 
551 static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
552 			void *buf, unsigned long len)
553 {
554 	return kvmgt_rw_gpa(handle, gpa, buf, len, false);
555 }
556 
557 static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
558 			void *buf, unsigned long len)
559 {
560 	return kvmgt_rw_gpa(handle, gpa, buf, len, true);
561 }
562 
563 static unsigned long kvmgt_virt_to_pfn(void *addr)
564 {
565 	return PFN_DOWN(__pa(addr));
566 }
567 
568 struct intel_gvt_mpt kvmgt_mpt = {
569 	.detect_host = kvmgt_detect_host,
570 	.host_init = kvmgt_host_init,
571 	.host_exit = kvmgt_host_exit,
572 	.attach_vgpu = kvmgt_attach_vgpu,
573 	.detach_vgpu = kvmgt_detach_vgpu,
574 	.inject_msi = kvmgt_inject_msi,
575 	.from_virt_to_mfn = kvmgt_virt_to_pfn,
576 	.set_wp_page = kvmgt_write_protect_add,
577 	.unset_wp_page = kvmgt_write_protect_remove,
578 	.read_gpa = kvmgt_read_gpa,
579 	.write_gpa = kvmgt_write_gpa,
580 	.gfn_to_mfn = kvmgt_gfn_to_pfn,
581 };
582 EXPORT_SYMBOL_GPL(kvmgt_mpt);
583 
584 static int __init kvmgt_init(void)
585 {
586 	return 0;
587 }
588 
589 static void __exit kvmgt_exit(void)
590 {
591 }
592 
593 module_init(kvmgt_init);
594 module_exit(kvmgt_exit);
595 
596 MODULE_LICENSE("GPL and additional rights");
597 MODULE_AUTHOR("Intel Corporation");
598