xref: /linux/drivers/gpu/drm/i915/gvt/gtt.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1 /*
2  * GTT virtualization
3  *
4  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Zhi Wang <zhi.a.wang@intel.com>
27  *    Zhenyu Wang <zhenyuw@linux.intel.com>
28  *    Xiao Zheng <xiao.zheng@intel.com>
29  *
30  * Contributors:
31  *    Min He <min.he@intel.com>
32  *    Bing Niu <bing.niu@intel.com>
33  *
34  */
35 
36 #include <linux/vmalloc.h>
37 
38 #include <drm/drm_print.h>
39 
40 #include "gt/intel_gt_regs.h"
41 
42 #include "gvt.h"
43 #include "i915_drv.h"
44 #include "i915_pvinfo.h"
45 #include "trace.h"
46 
47 #if defined(VERBOSE_DEBUG)
48 #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
49 #else
50 #define gvt_vdbg_mm(fmt, args...)
51 #endif
52 
53 static bool enable_out_of_sync = false;
54 static int preallocated_oos_pages = 8192;
55 
56 /*
57  * validate a gm address and related range size,
58  * translate it to host gm address
59  */
intel_gvt_ggtt_validate_range(struct intel_vgpu * vgpu,u64 addr,u32 size)60 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
61 {
62 	if (size == 0)
63 		return vgpu_gmadr_is_valid(vgpu, addr);
64 
65 	if (vgpu_gmadr_is_aperture(vgpu, addr) &&
66 	    vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
67 		return true;
68 	else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
69 		 vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
70 		return true;
71 
72 	gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
73 		     addr, size);
74 	return false;
75 }
76 
77 #define gtt_type_is_entry(type) \
78 	(type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
79 	 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
80 	 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
81 
82 #define gtt_type_is_pt(type) \
83 	(type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
84 
85 #define gtt_type_is_pte_pt(type) \
86 	(type == GTT_TYPE_PPGTT_PTE_PT)
87 
88 #define gtt_type_is_root_pointer(type) \
89 	(gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
90 
91 #define gtt_init_entry(e, t, p, v) do { \
92 	(e)->type = t; \
93 	(e)->pdev = p; \
94 	memcpy(&(e)->val64, &v, sizeof(v)); \
95 } while (0)
96 
97 /*
98  * Mappings between GTT_TYPE* enumerations.
99  * Following information can be found according to the given type:
100  * - type of next level page table
101  * - type of entry inside this level page table
102  * - type of entry with PSE set
103  *
104  * If the given type doesn't have such a kind of information,
105  * e.g. give a l4 root entry type, then request to get its PSE type,
106  * give a PTE page table type, then request to get its next level page
107  * table type, as we know l4 root entry doesn't have a PSE bit,
108  * and a PTE page table doesn't have a next level page table type,
109  * GTT_TYPE_INVALID will be returned. This is useful when traversing a
110  * page table.
111  */
112 
113 struct gtt_type_table_entry {
114 	int entry_type;
115 	int pt_type;
116 	int next_pt_type;
117 	int pse_entry_type;
118 };
119 
120 #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
121 	[type] = { \
122 		.entry_type = e_type, \
123 		.pt_type = cpt_type, \
124 		.next_pt_type = npt_type, \
125 		.pse_entry_type = pse_type, \
126 	}
127 
128 static const struct gtt_type_table_entry gtt_type_table[] = {
129 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
130 			GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
131 			GTT_TYPE_INVALID,
132 			GTT_TYPE_PPGTT_PML4_PT,
133 			GTT_TYPE_INVALID),
134 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
135 			GTT_TYPE_PPGTT_PML4_ENTRY,
136 			GTT_TYPE_PPGTT_PML4_PT,
137 			GTT_TYPE_PPGTT_PDP_PT,
138 			GTT_TYPE_INVALID),
139 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
140 			GTT_TYPE_PPGTT_PML4_ENTRY,
141 			GTT_TYPE_PPGTT_PML4_PT,
142 			GTT_TYPE_PPGTT_PDP_PT,
143 			GTT_TYPE_INVALID),
144 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
145 			GTT_TYPE_PPGTT_PDP_ENTRY,
146 			GTT_TYPE_PPGTT_PDP_PT,
147 			GTT_TYPE_PPGTT_PDE_PT,
148 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
149 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
150 			GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
151 			GTT_TYPE_INVALID,
152 			GTT_TYPE_PPGTT_PDE_PT,
153 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
154 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
155 			GTT_TYPE_PPGTT_PDP_ENTRY,
156 			GTT_TYPE_PPGTT_PDP_PT,
157 			GTT_TYPE_PPGTT_PDE_PT,
158 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
159 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
160 			GTT_TYPE_PPGTT_PDE_ENTRY,
161 			GTT_TYPE_PPGTT_PDE_PT,
162 			GTT_TYPE_PPGTT_PTE_PT,
163 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
164 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
165 			GTT_TYPE_PPGTT_PDE_ENTRY,
166 			GTT_TYPE_PPGTT_PDE_PT,
167 			GTT_TYPE_PPGTT_PTE_PT,
168 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
169 	/* We take IPS bit as 'PSE' for PTE level. */
170 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
171 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
172 			GTT_TYPE_PPGTT_PTE_PT,
173 			GTT_TYPE_INVALID,
174 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
175 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
176 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
177 			GTT_TYPE_PPGTT_PTE_PT,
178 			GTT_TYPE_INVALID,
179 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
180 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
181 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
182 			GTT_TYPE_PPGTT_PTE_PT,
183 			GTT_TYPE_INVALID,
184 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
185 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
186 			GTT_TYPE_PPGTT_PDE_ENTRY,
187 			GTT_TYPE_PPGTT_PDE_PT,
188 			GTT_TYPE_INVALID,
189 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
190 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
191 			GTT_TYPE_PPGTT_PDP_ENTRY,
192 			GTT_TYPE_PPGTT_PDP_PT,
193 			GTT_TYPE_INVALID,
194 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
195 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
196 			GTT_TYPE_GGTT_PTE,
197 			GTT_TYPE_INVALID,
198 			GTT_TYPE_INVALID,
199 			GTT_TYPE_INVALID),
200 };
201 
get_next_pt_type(int type)202 static inline int get_next_pt_type(int type)
203 {
204 	return gtt_type_table[type].next_pt_type;
205 }
206 
get_entry_type(int type)207 static inline int get_entry_type(int type)
208 {
209 	return gtt_type_table[type].entry_type;
210 }
211 
get_pse_type(int type)212 static inline int get_pse_type(int type)
213 {
214 	return gtt_type_table[type].pse_entry_type;
215 }
216 
read_pte64(struct i915_ggtt * ggtt,unsigned long index)217 static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index)
218 {
219 	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
220 
221 	return readq(addr);
222 }
223 
ggtt_invalidate(struct intel_gt * gt)224 static void ggtt_invalidate(struct intel_gt *gt)
225 {
226 	intel_wakeref_t wakeref;
227 
228 	wakeref = mmio_hw_access_pre(gt);
229 	intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
230 	mmio_hw_access_post(gt, wakeref);
231 }
232 
write_pte64(struct i915_ggtt * ggtt,unsigned long index,u64 pte)233 static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte)
234 {
235 	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
236 
237 	writeq(pte, addr);
238 }
239 
gtt_get_entry64(void * pt,struct intel_gvt_gtt_entry * e,unsigned long index,bool hypervisor_access,unsigned long gpa,struct intel_vgpu * vgpu)240 static inline int gtt_get_entry64(void *pt,
241 		struct intel_gvt_gtt_entry *e,
242 		unsigned long index, bool hypervisor_access, unsigned long gpa,
243 		struct intel_vgpu *vgpu)
244 {
245 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
246 	int ret;
247 
248 	if (WARN_ON(info->gtt_entry_size != 8))
249 		return -EINVAL;
250 
251 	if (hypervisor_access) {
252 		ret = intel_gvt_read_gpa(vgpu, gpa +
253 				(index << info->gtt_entry_size_shift),
254 				&e->val64, 8);
255 		if (WARN_ON(ret))
256 			return ret;
257 	} else if (!pt) {
258 		e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index);
259 	} else {
260 		e->val64 = *((u64 *)pt + index);
261 	}
262 	return 0;
263 }
264 
gtt_set_entry64(void * pt,struct intel_gvt_gtt_entry * e,unsigned long index,bool hypervisor_access,unsigned long gpa,struct intel_vgpu * vgpu)265 static inline int gtt_set_entry64(void *pt,
266 		struct intel_gvt_gtt_entry *e,
267 		unsigned long index, bool hypervisor_access, unsigned long gpa,
268 		struct intel_vgpu *vgpu)
269 {
270 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
271 	int ret;
272 
273 	if (WARN_ON(info->gtt_entry_size != 8))
274 		return -EINVAL;
275 
276 	if (hypervisor_access) {
277 		ret = intel_gvt_write_gpa(vgpu, gpa +
278 				(index << info->gtt_entry_size_shift),
279 				&e->val64, 8);
280 		if (WARN_ON(ret))
281 			return ret;
282 	} else if (!pt) {
283 		write_pte64(vgpu->gvt->gt->ggtt, index, e->val64);
284 	} else {
285 		*((u64 *)pt + index) = e->val64;
286 	}
287 	return 0;
288 }
289 
290 #define GTT_HAW 46
291 
292 #define ADDR_1G_MASK	GENMASK_ULL(GTT_HAW - 1, 30)
293 #define ADDR_2M_MASK	GENMASK_ULL(GTT_HAW - 1, 21)
294 #define ADDR_64K_MASK	GENMASK_ULL(GTT_HAW - 1, 16)
295 #define ADDR_4K_MASK	GENMASK_ULL(GTT_HAW - 1, 12)
296 
297 #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
298 #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
299 
300 #define GTT_64K_PTE_STRIDE 16
301 
gen8_gtt_get_pfn(struct intel_gvt_gtt_entry * e)302 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
303 {
304 	unsigned long pfn;
305 
306 	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
307 		pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
308 	else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
309 		pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
310 	else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
311 		pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
312 	else
313 		pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
314 	return pfn;
315 }
316 
gen8_gtt_set_pfn(struct intel_gvt_gtt_entry * e,unsigned long pfn)317 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
318 {
319 	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
320 		e->val64 &= ~ADDR_1G_MASK;
321 		pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
322 	} else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
323 		e->val64 &= ~ADDR_2M_MASK;
324 		pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
325 	} else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
326 		e->val64 &= ~ADDR_64K_MASK;
327 		pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
328 	} else {
329 		e->val64 &= ~ADDR_4K_MASK;
330 		pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
331 	}
332 
333 	e->val64 |= (pfn << PAGE_SHIFT);
334 }
335 
gen8_gtt_test_pse(struct intel_gvt_gtt_entry * e)336 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
337 {
338 	return !!(e->val64 & _PAGE_PSE);
339 }
340 
gen8_gtt_clear_pse(struct intel_gvt_gtt_entry * e)341 static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
342 {
343 	if (gen8_gtt_test_pse(e)) {
344 		switch (e->type) {
345 		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
346 			e->val64 &= ~_PAGE_PSE;
347 			e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
348 			break;
349 		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
350 			e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
351 			e->val64 &= ~_PAGE_PSE;
352 			break;
353 		default:
354 			WARN_ON(1);
355 		}
356 	}
357 }
358 
gen8_gtt_test_ips(struct intel_gvt_gtt_entry * e)359 static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
360 {
361 	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
362 		return false;
363 
364 	return !!(e->val64 & GEN8_PDE_IPS_64K);
365 }
366 
gen8_gtt_clear_ips(struct intel_gvt_gtt_entry * e)367 static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
368 {
369 	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
370 		return;
371 
372 	e->val64 &= ~GEN8_PDE_IPS_64K;
373 }
374 
gen8_gtt_test_present(struct intel_gvt_gtt_entry * e)375 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
376 {
377 	/*
378 	 * i915 writes PDP root pointer registers without present bit,
379 	 * it also works, so we need to treat root pointer entry
380 	 * specifically.
381 	 */
382 	if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
383 			|| e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
384 		return (e->val64 != 0);
385 	else
386 		return (e->val64 & GEN8_PAGE_PRESENT);
387 }
388 
gtt_entry_clear_present(struct intel_gvt_gtt_entry * e)389 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
390 {
391 	e->val64 &= ~GEN8_PAGE_PRESENT;
392 }
393 
gtt_entry_set_present(struct intel_gvt_gtt_entry * e)394 static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
395 {
396 	e->val64 |= GEN8_PAGE_PRESENT;
397 }
398 
gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry * e)399 static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
400 {
401 	return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
402 }
403 
gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry * e)404 static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
405 {
406 	e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
407 }
408 
gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry * e)409 static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
410 {
411 	e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
412 }
413 
414 /*
415  * Per-platform GMA routines.
416  */
gma_to_ggtt_pte_index(unsigned long gma)417 static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
418 {
419 	unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
420 
421 	trace_gma_index(__func__, gma, x);
422 	return x;
423 }
424 
425 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
426 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
427 { \
428 	unsigned long x = (exp); \
429 	trace_gma_index(__func__, gma, x); \
430 	return x; \
431 }
432 
433 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
434 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
435 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
436 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
437 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
438 
439 static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
440 	.get_entry = gtt_get_entry64,
441 	.set_entry = gtt_set_entry64,
442 	.clear_present = gtt_entry_clear_present,
443 	.set_present = gtt_entry_set_present,
444 	.test_present = gen8_gtt_test_present,
445 	.test_pse = gen8_gtt_test_pse,
446 	.clear_pse = gen8_gtt_clear_pse,
447 	.clear_ips = gen8_gtt_clear_ips,
448 	.test_ips = gen8_gtt_test_ips,
449 	.clear_64k_splited = gen8_gtt_clear_64k_splited,
450 	.set_64k_splited = gen8_gtt_set_64k_splited,
451 	.test_64k_splited = gen8_gtt_test_64k_splited,
452 	.get_pfn = gen8_gtt_get_pfn,
453 	.set_pfn = gen8_gtt_set_pfn,
454 };
455 
456 static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
457 	.gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
458 	.gma_to_pte_index = gen8_gma_to_pte_index,
459 	.gma_to_pde_index = gen8_gma_to_pde_index,
460 	.gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
461 	.gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
462 	.gma_to_pml4_index = gen8_gma_to_pml4_index,
463 };
464 
465 /* Update entry type per pse and ips bit. */
update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops * pte_ops,struct intel_gvt_gtt_entry * entry,bool ips)466 static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops,
467 	struct intel_gvt_gtt_entry *entry, bool ips)
468 {
469 	switch (entry->type) {
470 	case GTT_TYPE_PPGTT_PDE_ENTRY:
471 	case GTT_TYPE_PPGTT_PDP_ENTRY:
472 		if (pte_ops->test_pse(entry))
473 			entry->type = get_pse_type(entry->type);
474 		break;
475 	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
476 		if (ips)
477 			entry->type = get_pse_type(entry->type);
478 		break;
479 	default:
480 		GEM_BUG_ON(!gtt_type_is_entry(entry->type));
481 	}
482 
483 	GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
484 }
485 
486 /*
487  * MM helpers.
488  */
_ppgtt_get_root_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index,bool guest)489 static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
490 		struct intel_gvt_gtt_entry *entry, unsigned long index,
491 		bool guest)
492 {
493 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
494 
495 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
496 
497 	entry->type = mm->ppgtt_mm.root_entry_type;
498 	pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
499 			   mm->ppgtt_mm.shadow_pdps,
500 			   entry, index, false, 0, mm->vgpu);
501 	update_entry_type_for_real(pte_ops, entry, false);
502 }
503 
ppgtt_get_guest_root_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)504 static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
505 		struct intel_gvt_gtt_entry *entry, unsigned long index)
506 {
507 	_ppgtt_get_root_entry(mm, entry, index, true);
508 }
509 
ppgtt_get_shadow_root_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)510 static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
511 		struct intel_gvt_gtt_entry *entry, unsigned long index)
512 {
513 	_ppgtt_get_root_entry(mm, entry, index, false);
514 }
515 
_ppgtt_set_root_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index,bool guest)516 static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
517 		struct intel_gvt_gtt_entry *entry, unsigned long index,
518 		bool guest)
519 {
520 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
521 
522 	pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
523 			   mm->ppgtt_mm.shadow_pdps,
524 			   entry, index, false, 0, mm->vgpu);
525 }
526 
ppgtt_set_shadow_root_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)527 static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
528 		struct intel_gvt_gtt_entry *entry, unsigned long index)
529 {
530 	_ppgtt_set_root_entry(mm, entry, index, false);
531 }
532 
ggtt_get_guest_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)533 static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
534 		struct intel_gvt_gtt_entry *entry, unsigned long index)
535 {
536 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
537 
538 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
539 
540 	entry->type = GTT_TYPE_GGTT_PTE;
541 	pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
542 			   false, 0, mm->vgpu);
543 }
544 
ggtt_set_guest_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)545 static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
546 		struct intel_gvt_gtt_entry *entry, unsigned long index)
547 {
548 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
549 
550 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
551 
552 	pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
553 			   false, 0, mm->vgpu);
554 }
555 
ggtt_get_host_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)556 static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
557 		struct intel_gvt_gtt_entry *entry, unsigned long index)
558 {
559 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
560 
561 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
562 
563 	pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
564 }
565 
ggtt_set_host_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)566 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
567 		struct intel_gvt_gtt_entry *entry, unsigned long index)
568 {
569 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
570 	unsigned long offset = index;
571 
572 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
573 
574 	if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
575 		offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
576 		mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64;
577 	} else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
578 		offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
579 		mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64;
580 	}
581 
582 	pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
583 }
584 
585 /*
586  * PPGTT shadow page table helpers.
587  */
ppgtt_spt_get_entry(struct intel_vgpu_ppgtt_spt * spt,void * page_table,int type,struct intel_gvt_gtt_entry * e,unsigned long index,bool guest)588 static inline int ppgtt_spt_get_entry(
589 		struct intel_vgpu_ppgtt_spt *spt,
590 		void *page_table, int type,
591 		struct intel_gvt_gtt_entry *e, unsigned long index,
592 		bool guest)
593 {
594 	struct intel_gvt *gvt = spt->vgpu->gvt;
595 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
596 	int ret;
597 
598 	e->type = get_entry_type(type);
599 
600 	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
601 		return -EINVAL;
602 
603 	ret = ops->get_entry(page_table, e, index, guest,
604 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
605 			spt->vgpu);
606 	if (ret)
607 		return ret;
608 
609 	update_entry_type_for_real(ops, e, guest ?
610 				   spt->guest_page.pde_ips : false);
611 
612 	gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
613 		    type, e->type, index, e->val64);
614 	return 0;
615 }
616 
ppgtt_spt_set_entry(struct intel_vgpu_ppgtt_spt * spt,void * page_table,int type,struct intel_gvt_gtt_entry * e,unsigned long index,bool guest)617 static inline int ppgtt_spt_set_entry(
618 		struct intel_vgpu_ppgtt_spt *spt,
619 		void *page_table, int type,
620 		struct intel_gvt_gtt_entry *e, unsigned long index,
621 		bool guest)
622 {
623 	struct intel_gvt *gvt = spt->vgpu->gvt;
624 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
625 
626 	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
627 		return -EINVAL;
628 
629 	gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
630 		    type, e->type, index, e->val64);
631 
632 	return ops->set_entry(page_table, e, index, guest,
633 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
634 			spt->vgpu);
635 }
636 
637 #define ppgtt_get_guest_entry(spt, e, index) \
638 	ppgtt_spt_get_entry(spt, NULL, \
639 		spt->guest_page.type, e, index, true)
640 
641 #define ppgtt_set_guest_entry(spt, e, index) \
642 	ppgtt_spt_set_entry(spt, NULL, \
643 		spt->guest_page.type, e, index, true)
644 
645 #define ppgtt_get_shadow_entry(spt, e, index) \
646 	ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
647 		spt->shadow_page.type, e, index, false)
648 
649 #define ppgtt_set_shadow_entry(spt, e, index) \
650 	ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
651 		spt->shadow_page.type, e, index, false)
652 
alloc_spt(gfp_t gfp_mask)653 static void *alloc_spt(gfp_t gfp_mask)
654 {
655 	struct intel_vgpu_ppgtt_spt *spt;
656 
657 	spt = kzalloc_obj(*spt, gfp_mask);
658 	if (!spt)
659 		return NULL;
660 
661 	spt->shadow_page.page = alloc_page(gfp_mask);
662 	if (!spt->shadow_page.page) {
663 		kfree(spt);
664 		return NULL;
665 	}
666 	return spt;
667 }
668 
free_spt(struct intel_vgpu_ppgtt_spt * spt)669 static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
670 {
671 	__free_page(spt->shadow_page.page);
672 	kfree(spt);
673 }
674 
675 static int detach_oos_page(struct intel_vgpu *vgpu,
676 		struct intel_vgpu_oos_page *oos_page);
677 
ppgtt_free_spt(struct intel_vgpu_ppgtt_spt * spt)678 static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
679 {
680 	struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev;
681 
682 	trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
683 
684 	dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
685 		       DMA_BIDIRECTIONAL);
686 
687 	radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
688 
689 	if (spt->guest_page.gfn) {
690 		if (spt->guest_page.oos_page)
691 			detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
692 
693 		intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
694 	}
695 
696 	list_del_init(&spt->post_shadow_list);
697 	free_spt(spt);
698 }
699 
ppgtt_free_all_spt(struct intel_vgpu * vgpu)700 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
701 {
702 	struct intel_vgpu_ppgtt_spt *spt, *spn;
703 	struct radix_tree_iter iter;
704 	LIST_HEAD(all_spt);
705 	void __rcu **slot;
706 
707 	rcu_read_lock();
708 	radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
709 		spt = radix_tree_deref_slot(slot);
710 		list_move(&spt->post_shadow_list, &all_spt);
711 	}
712 	rcu_read_unlock();
713 
714 	list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
715 		ppgtt_free_spt(spt);
716 }
717 
718 static int ppgtt_handle_guest_write_page_table_bytes(
719 		struct intel_vgpu_ppgtt_spt *spt,
720 		u64 pa, void *p_data, int bytes);
721 
ppgtt_write_protection_handler(struct intel_vgpu_page_track * page_track,u64 gpa,void * data,int bytes)722 static int ppgtt_write_protection_handler(
723 		struct intel_vgpu_page_track *page_track,
724 		u64 gpa, void *data, int bytes)
725 {
726 	struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
727 
728 	int ret;
729 
730 	if (bytes != 4 && bytes != 8)
731 		return -EINVAL;
732 
733 	ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
734 	if (ret)
735 		return ret;
736 	return ret;
737 }
738 
739 /* Find a spt by guest gfn. */
intel_vgpu_find_spt_by_gfn(struct intel_vgpu * vgpu,unsigned long gfn)740 static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
741 		struct intel_vgpu *vgpu, unsigned long gfn)
742 {
743 	struct intel_vgpu_page_track *track;
744 
745 	track = intel_vgpu_find_page_track(vgpu, gfn);
746 	if (track && track->handler == ppgtt_write_protection_handler)
747 		return track->priv_data;
748 
749 	return NULL;
750 }
751 
752 /* Find the spt by shadow page mfn. */
intel_vgpu_find_spt_by_mfn(struct intel_vgpu * vgpu,unsigned long mfn)753 static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
754 		struct intel_vgpu *vgpu, unsigned long mfn)
755 {
756 	return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
757 }
758 
759 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
760 
761 /* Allocate shadow page table without guest page. */
ppgtt_alloc_spt(struct intel_vgpu * vgpu,enum intel_gvt_gtt_type type)762 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
763 		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
764 {
765 	struct device *kdev = vgpu->gvt->gt->i915->drm.dev;
766 	struct intel_vgpu_ppgtt_spt *spt = NULL;
767 	dma_addr_t daddr;
768 	int ret;
769 
770 retry:
771 	spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
772 	if (!spt) {
773 		if (reclaim_one_ppgtt_mm(vgpu->gvt))
774 			goto retry;
775 
776 		gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
777 		return ERR_PTR(-ENOMEM);
778 	}
779 
780 	spt->vgpu = vgpu;
781 	atomic_set(&spt->refcount, 1);
782 	INIT_LIST_HEAD(&spt->post_shadow_list);
783 
784 	/*
785 	 * Init shadow_page.
786 	 */
787 	spt->shadow_page.type = type;
788 	daddr = dma_map_page(kdev, spt->shadow_page.page,
789 			     0, 4096, DMA_BIDIRECTIONAL);
790 	if (dma_mapping_error(kdev, daddr)) {
791 		gvt_vgpu_err("fail to map dma addr\n");
792 		ret = -EINVAL;
793 		goto err_free_spt;
794 	}
795 	spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
796 	spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
797 
798 	ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
799 	if (ret)
800 		goto err_unmap_dma;
801 
802 	return spt;
803 
804 err_unmap_dma:
805 	dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL);
806 err_free_spt:
807 	free_spt(spt);
808 	return ERR_PTR(ret);
809 }
810 
811 /* Allocate shadow page table associated with specific gfn. */
ppgtt_alloc_spt_gfn(struct intel_vgpu * vgpu,enum intel_gvt_gtt_type type,unsigned long gfn,bool guest_pde_ips)812 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
813 		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
814 		unsigned long gfn, bool guest_pde_ips)
815 {
816 	struct intel_vgpu_ppgtt_spt *spt;
817 	int ret;
818 
819 	spt = ppgtt_alloc_spt(vgpu, type);
820 	if (IS_ERR(spt))
821 		return spt;
822 
823 	/*
824 	 * Init guest_page.
825 	 */
826 	ret = intel_vgpu_register_page_track(vgpu, gfn,
827 			ppgtt_write_protection_handler, spt);
828 	if (ret) {
829 		ppgtt_free_spt(spt);
830 		return ERR_PTR(ret);
831 	}
832 
833 	spt->guest_page.type = type;
834 	spt->guest_page.gfn = gfn;
835 	spt->guest_page.pde_ips = guest_pde_ips;
836 
837 	trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
838 
839 	return spt;
840 }
841 
842 #define pt_entry_size_shift(spt) \
843 	((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
844 
845 #define pt_entries(spt) \
846 	(I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
847 
848 #define for_each_present_guest_entry(spt, e, i) \
849 	for (i = 0; i < pt_entries(spt); \
850 	     i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
851 		if (!ppgtt_get_guest_entry(spt, e, i) && \
852 		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
853 
854 #define for_each_present_shadow_entry(spt, e, i) \
855 	for (i = 0; i < pt_entries(spt); \
856 	     i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
857 		if (!ppgtt_get_shadow_entry(spt, e, i) && \
858 		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
859 
860 #define for_each_shadow_entry(spt, e, i) \
861 	for (i = 0; i < pt_entries(spt); \
862 	     i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
863 		if (!ppgtt_get_shadow_entry(spt, e, i))
864 
ppgtt_get_spt(struct intel_vgpu_ppgtt_spt * spt)865 static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
866 {
867 	int v = atomic_read(&spt->refcount);
868 
869 	trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
870 	atomic_inc(&spt->refcount);
871 }
872 
ppgtt_put_spt(struct intel_vgpu_ppgtt_spt * spt)873 static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
874 {
875 	int v = atomic_read(&spt->refcount);
876 
877 	trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
878 	return atomic_dec_return(&spt->refcount);
879 }
880 
881 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
882 
ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu * vgpu,struct intel_gvt_gtt_entry * e)883 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
884 		struct intel_gvt_gtt_entry *e)
885 {
886 	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
887 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
888 	struct intel_vgpu_ppgtt_spt *s;
889 	enum intel_gvt_gtt_type cur_pt_type;
890 
891 	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
892 
893 	if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
894 		&& e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
895 		cur_pt_type = get_next_pt_type(e->type);
896 
897 		if (!gtt_type_is_pt(cur_pt_type) ||
898 				!gtt_type_is_pt(cur_pt_type + 1)) {
899 			drm_WARN(&i915->drm, 1,
900 				 "Invalid page table type, cur_pt_type is: %d\n",
901 				 cur_pt_type);
902 			return -EINVAL;
903 		}
904 
905 		cur_pt_type += 1;
906 
907 		if (ops->get_pfn(e) ==
908 			vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
909 			return 0;
910 	}
911 	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
912 	if (!s) {
913 		gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
914 				ops->get_pfn(e));
915 		return -ENXIO;
916 	}
917 	return ppgtt_invalidate_spt(s);
918 }
919 
ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt * spt,struct intel_gvt_gtt_entry * entry)920 static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
921 		struct intel_gvt_gtt_entry *entry)
922 {
923 	struct intel_vgpu *vgpu = spt->vgpu;
924 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
925 	unsigned long pfn;
926 	int type;
927 
928 	pfn = ops->get_pfn(entry);
929 	type = spt->shadow_page.type;
930 
931 	/* Uninitialized spte or unshadowed spte. */
932 	if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
933 		return;
934 
935 	intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
936 }
937 
ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt * spt)938 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
939 {
940 	struct intel_vgpu *vgpu = spt->vgpu;
941 	struct intel_gvt_gtt_entry e;
942 	unsigned long index;
943 	int ret;
944 
945 	trace_spt_change(spt->vgpu->id, "die", spt,
946 			spt->guest_page.gfn, spt->shadow_page.type);
947 
948 	if (ppgtt_put_spt(spt) > 0)
949 		return 0;
950 
951 	for_each_present_shadow_entry(spt, &e, index) {
952 		switch (e.type) {
953 		case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
954 			gvt_vdbg_mm("invalidate 4K entry\n");
955 			ppgtt_invalidate_pte(spt, &e);
956 			break;
957 		case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
958 			/* We don't setup 64K shadow entry so far. */
959 			WARN(1, "suspicious 64K gtt entry\n");
960 			continue;
961 		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
962 			gvt_vdbg_mm("invalidate 2M entry\n");
963 			continue;
964 		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
965 			WARN(1, "GVT doesn't support 1GB page\n");
966 			continue;
967 		case GTT_TYPE_PPGTT_PML4_ENTRY:
968 		case GTT_TYPE_PPGTT_PDP_ENTRY:
969 		case GTT_TYPE_PPGTT_PDE_ENTRY:
970 			gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
971 			ret = ppgtt_invalidate_spt_by_shadow_entry(
972 					spt->vgpu, &e);
973 			if (ret)
974 				goto fail;
975 			break;
976 		default:
977 			GEM_BUG_ON(1);
978 		}
979 	}
980 
981 	trace_spt_change(spt->vgpu->id, "release", spt,
982 			 spt->guest_page.gfn, spt->shadow_page.type);
983 	ppgtt_free_spt(spt);
984 	return 0;
985 fail:
986 	gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
987 			spt, e.val64, e.type);
988 	return ret;
989 }
990 
vgpu_ips_enabled(struct intel_vgpu * vgpu)991 static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
992 {
993 	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
994 
995 	if (GRAPHICS_VER(dev_priv) == 9) {
996 		u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
997 			GAMW_ECO_ENABLE_64K_IPS_FIELD;
998 
999 		return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
1000 	} else if (GRAPHICS_VER(dev_priv) >= 11) {
1001 		/* 64K paging only controlled by IPS bit in PTE now. */
1002 		return true;
1003 	} else
1004 		return false;
1005 }
1006 
1007 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
1008 
ppgtt_populate_spt_by_guest_entry(struct intel_vgpu * vgpu,struct intel_gvt_gtt_entry * we)1009 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
1010 		struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
1011 {
1012 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1013 	struct intel_vgpu_ppgtt_spt *spt = NULL;
1014 	bool ips = false;
1015 	int ret;
1016 
1017 	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
1018 
1019 	if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1020 		ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
1021 
1022 	spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
1023 	if (spt) {
1024 		ppgtt_get_spt(spt);
1025 
1026 		if (ips != spt->guest_page.pde_ips) {
1027 			spt->guest_page.pde_ips = ips;
1028 
1029 			gvt_dbg_mm("reshadow PDE since ips changed\n");
1030 			clear_page(spt->shadow_page.vaddr);
1031 			ret = ppgtt_populate_spt(spt);
1032 			if (ret) {
1033 				ppgtt_put_spt(spt);
1034 				goto err;
1035 			}
1036 		}
1037 	} else {
1038 		int type = get_next_pt_type(we->type);
1039 
1040 		if (!gtt_type_is_pt(type)) {
1041 			ret = -EINVAL;
1042 			goto err;
1043 		}
1044 
1045 		spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
1046 		if (IS_ERR(spt)) {
1047 			ret = PTR_ERR(spt);
1048 			goto err;
1049 		}
1050 
1051 		ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
1052 		if (ret)
1053 			goto err_free_spt;
1054 
1055 		ret = ppgtt_populate_spt(spt);
1056 		if (ret)
1057 			goto err_free_spt;
1058 
1059 		trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
1060 				 spt->shadow_page.type);
1061 	}
1062 	return spt;
1063 
1064 err_free_spt:
1065 	ppgtt_free_spt(spt);
1066 	spt = NULL;
1067 err:
1068 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1069 		     spt, we->val64, we->type);
1070 	return ERR_PTR(ret);
1071 }
1072 
ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry * se,struct intel_vgpu_ppgtt_spt * s,struct intel_gvt_gtt_entry * ge)1073 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
1074 		struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
1075 {
1076 	const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
1077 
1078 	se->type = ge->type;
1079 	se->val64 = ge->val64;
1080 
1081 	/* Because we always split 64KB pages, so clear IPS in shadow PDE. */
1082 	if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1083 		ops->clear_ips(se);
1084 
1085 	ops->set_pfn(se, s->shadow_page.mfn);
1086 }
1087 
split_2MB_gtt_entry(struct intel_vgpu * vgpu,struct intel_vgpu_ppgtt_spt * spt,unsigned long index,struct intel_gvt_gtt_entry * se)1088 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
1089 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1090 	struct intel_gvt_gtt_entry *se)
1091 {
1092 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1093 	struct intel_vgpu_ppgtt_spt *sub_spt;
1094 	struct intel_gvt_gtt_entry sub_se;
1095 	unsigned long start_gfn;
1096 	dma_addr_t dma_addr;
1097 	unsigned long sub_index;
1098 	int ret;
1099 
1100 	gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
1101 
1102 	start_gfn = ops->get_pfn(se);
1103 
1104 	sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
1105 	if (IS_ERR(sub_spt))
1106 		return PTR_ERR(sub_spt);
1107 
1108 	for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
1109 		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index,
1110 						   PAGE_SIZE, &dma_addr);
1111 		if (ret)
1112 			goto err;
1113 		sub_se.val64 = se->val64;
1114 
1115 		/* Copy the PAT field from PDE. */
1116 		sub_se.val64 &= ~_PAGE_PAT;
1117 		sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
1118 
1119 		ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
1120 		ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
1121 	}
1122 
1123 	/* Clear dirty field. */
1124 	se->val64 &= ~_PAGE_DIRTY;
1125 
1126 	ops->clear_pse(se);
1127 	ops->clear_ips(se);
1128 	ops->set_pfn(se, sub_spt->shadow_page.mfn);
1129 	ppgtt_set_shadow_entry(spt, se, index);
1130 	return 0;
1131 err:
1132 	/* Cancel the existing address mappings of DMA addr. */
1133 	for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) {
1134 		gvt_vdbg_mm("invalidate 4K entry\n");
1135 		ppgtt_invalidate_pte(sub_spt, &sub_se);
1136 	}
1137 	/* Release the new allocated spt. */
1138 	trace_spt_change(sub_spt->vgpu->id, "release", sub_spt,
1139 		sub_spt->guest_page.gfn, sub_spt->shadow_page.type);
1140 	ppgtt_free_spt(sub_spt);
1141 	return ret;
1142 }
1143 
split_64KB_gtt_entry(struct intel_vgpu * vgpu,struct intel_vgpu_ppgtt_spt * spt,unsigned long index,struct intel_gvt_gtt_entry * se)1144 static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
1145 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1146 	struct intel_gvt_gtt_entry *se)
1147 {
1148 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1149 	struct intel_gvt_gtt_entry entry = *se;
1150 	unsigned long start_gfn;
1151 	dma_addr_t dma_addr;
1152 	int i, ret;
1153 
1154 	gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
1155 
1156 	GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
1157 
1158 	start_gfn = ops->get_pfn(se);
1159 
1160 	entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
1161 	ops->set_64k_splited(&entry);
1162 
1163 	for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1164 		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + i,
1165 						   PAGE_SIZE, &dma_addr);
1166 		if (ret)
1167 			return ret;
1168 
1169 		ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
1170 		ppgtt_set_shadow_entry(spt, &entry, index + i);
1171 	}
1172 	return 0;
1173 }
1174 
ppgtt_populate_shadow_entry(struct intel_vgpu * vgpu,struct intel_vgpu_ppgtt_spt * spt,unsigned long index,struct intel_gvt_gtt_entry * ge)1175 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
1176 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1177 	struct intel_gvt_gtt_entry *ge)
1178 {
1179 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
1180 	struct intel_gvt_gtt_entry se = *ge;
1181 	unsigned long gfn;
1182 	dma_addr_t dma_addr;
1183 	int ret;
1184 
1185 	if (!pte_ops->test_present(ge))
1186 		return 0;
1187 
1188 	gfn = pte_ops->get_pfn(ge);
1189 
1190 	switch (ge->type) {
1191 	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1192 		gvt_vdbg_mm("shadow 4K gtt entry\n");
1193 		ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
1194 		if (ret)
1195 			return -ENXIO;
1196 		break;
1197 	case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1198 		gvt_vdbg_mm("shadow 64K gtt entry\n");
1199 		/*
1200 		 * The layout of 64K page is special, the page size is
1201 		 * controlled by upper PDE. To be simple, we always split
1202 		 * 64K page to smaller 4K pages in shadow PT.
1203 		 */
1204 		return split_64KB_gtt_entry(vgpu, spt, index, &se);
1205 	case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1206 		gvt_vdbg_mm("shadow 2M gtt entry\n");
1207 		if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
1208 		    intel_gvt_dma_map_guest_page(vgpu, gfn,
1209 						 I915_GTT_PAGE_SIZE_2M, &dma_addr))
1210 			return split_2MB_gtt_entry(vgpu, spt, index, &se);
1211 		break;
1212 	case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1213 		gvt_vgpu_err("GVT doesn't support 1GB entry\n");
1214 		return -EINVAL;
1215 	default:
1216 		GEM_BUG_ON(1);
1217 		return -EINVAL;
1218 	}
1219 
1220 	/* Successfully shadowed a 4K or 2M page (without splitting). */
1221 	pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
1222 	ppgtt_set_shadow_entry(spt, &se, index);
1223 	return 0;
1224 }
1225 
ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt * spt)1226 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
1227 {
1228 	struct intel_vgpu *vgpu = spt->vgpu;
1229 	struct intel_vgpu_ppgtt_spt *s;
1230 	struct intel_gvt_gtt_entry se, ge;
1231 	unsigned long i;
1232 	int ret;
1233 
1234 	trace_spt_change(spt->vgpu->id, "born", spt,
1235 			 spt->guest_page.gfn, spt->shadow_page.type);
1236 
1237 	for_each_present_guest_entry(spt, &ge, i) {
1238 		if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
1239 			s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1240 			if (IS_ERR(s)) {
1241 				ret = PTR_ERR(s);
1242 				goto fail;
1243 			}
1244 			ppgtt_get_shadow_entry(spt, &se, i);
1245 			ppgtt_generate_shadow_entry(&se, s, &ge);
1246 			ppgtt_set_shadow_entry(spt, &se, i);
1247 		} else {
1248 			ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
1249 			if (ret)
1250 				goto fail;
1251 		}
1252 	}
1253 	return 0;
1254 fail:
1255 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1256 			spt, ge.val64, ge.type);
1257 	return ret;
1258 }
1259 
ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt * spt,struct intel_gvt_gtt_entry * se,unsigned long index)1260 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
1261 		struct intel_gvt_gtt_entry *se, unsigned long index)
1262 {
1263 	struct intel_vgpu *vgpu = spt->vgpu;
1264 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1265 	int ret;
1266 
1267 	trace_spt_guest_change(spt->vgpu->id, "remove", spt,
1268 			       spt->shadow_page.type, se->val64, index);
1269 
1270 	gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
1271 		    se->type, index, se->val64);
1272 
1273 	if (!ops->test_present(se))
1274 		return 0;
1275 
1276 	if (ops->get_pfn(se) ==
1277 	    vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
1278 		return 0;
1279 
1280 	if (gtt_type_is_pt(get_next_pt_type(se->type))) {
1281 		struct intel_vgpu_ppgtt_spt *s =
1282 			intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
1283 		if (!s) {
1284 			gvt_vgpu_err("fail to find guest page\n");
1285 			ret = -ENXIO;
1286 			goto fail;
1287 		}
1288 		ret = ppgtt_invalidate_spt(s);
1289 		if (ret)
1290 			goto fail;
1291 	} else {
1292 		/* We don't setup 64K shadow entry so far. */
1293 		WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
1294 		     "suspicious 64K entry\n");
1295 		ppgtt_invalidate_pte(spt, se);
1296 	}
1297 
1298 	return 0;
1299 fail:
1300 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1301 			spt, se->val64, se->type);
1302 	return ret;
1303 }
1304 
ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt * spt,struct intel_gvt_gtt_entry * we,unsigned long index)1305 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
1306 		struct intel_gvt_gtt_entry *we, unsigned long index)
1307 {
1308 	struct intel_vgpu *vgpu = spt->vgpu;
1309 	struct intel_gvt_gtt_entry m;
1310 	struct intel_vgpu_ppgtt_spt *s;
1311 	int ret;
1312 
1313 	trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
1314 			       we->val64, index);
1315 
1316 	gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
1317 		    we->type, index, we->val64);
1318 
1319 	if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1320 		s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
1321 		if (IS_ERR(s)) {
1322 			ret = PTR_ERR(s);
1323 			goto fail;
1324 		}
1325 		ppgtt_get_shadow_entry(spt, &m, index);
1326 		ppgtt_generate_shadow_entry(&m, s, we);
1327 		ppgtt_set_shadow_entry(spt, &m, index);
1328 	} else {
1329 		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
1330 		if (ret)
1331 			goto fail;
1332 	}
1333 	return 0;
1334 fail:
1335 	gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
1336 		spt, we->val64, we->type);
1337 	return ret;
1338 }
1339 
sync_oos_page(struct intel_vgpu * vgpu,struct intel_vgpu_oos_page * oos_page)1340 static int sync_oos_page(struct intel_vgpu *vgpu,
1341 		struct intel_vgpu_oos_page *oos_page)
1342 {
1343 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1344 	struct intel_gvt *gvt = vgpu->gvt;
1345 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1346 	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1347 	struct intel_gvt_gtt_entry old, new;
1348 	int index;
1349 	int ret;
1350 
1351 	trace_oos_change(vgpu->id, "sync", oos_page->id,
1352 			 spt, spt->guest_page.type);
1353 
1354 	old.type = new.type = get_entry_type(spt->guest_page.type);
1355 	old.val64 = new.val64 = 0;
1356 
1357 	for (index = 0; index < (I915_GTT_PAGE_SIZE >>
1358 				info->gtt_entry_size_shift); index++) {
1359 		ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1360 		ops->get_entry(NULL, &new, index, true,
1361 			       spt->guest_page.gfn << PAGE_SHIFT, vgpu);
1362 
1363 		if (old.val64 == new.val64
1364 			&& !test_and_clear_bit(index, spt->post_shadow_bitmap))
1365 			continue;
1366 
1367 		trace_oos_sync(vgpu->id, oos_page->id,
1368 				spt, spt->guest_page.type,
1369 				new.val64, index);
1370 
1371 		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
1372 		if (ret)
1373 			return ret;
1374 
1375 		ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
1376 	}
1377 
1378 	spt->guest_page.write_cnt = 0;
1379 	list_del_init(&spt->post_shadow_list);
1380 	return 0;
1381 }
1382 
detach_oos_page(struct intel_vgpu * vgpu,struct intel_vgpu_oos_page * oos_page)1383 static int detach_oos_page(struct intel_vgpu *vgpu,
1384 		struct intel_vgpu_oos_page *oos_page)
1385 {
1386 	struct intel_gvt *gvt = vgpu->gvt;
1387 	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1388 
1389 	trace_oos_change(vgpu->id, "detach", oos_page->id,
1390 			 spt, spt->guest_page.type);
1391 
1392 	spt->guest_page.write_cnt = 0;
1393 	spt->guest_page.oos_page = NULL;
1394 	oos_page->spt = NULL;
1395 
1396 	list_del_init(&oos_page->vm_list);
1397 	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1398 
1399 	return 0;
1400 }
1401 
attach_oos_page(struct intel_vgpu_oos_page * oos_page,struct intel_vgpu_ppgtt_spt * spt)1402 static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
1403 		struct intel_vgpu_ppgtt_spt *spt)
1404 {
1405 	struct intel_gvt *gvt = spt->vgpu->gvt;
1406 	int ret;
1407 
1408 	ret = intel_gvt_read_gpa(spt->vgpu,
1409 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
1410 			oos_page->mem, I915_GTT_PAGE_SIZE);
1411 	if (ret)
1412 		return ret;
1413 
1414 	oos_page->spt = spt;
1415 	spt->guest_page.oos_page = oos_page;
1416 
1417 	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1418 
1419 	trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
1420 			 spt, spt->guest_page.type);
1421 	return 0;
1422 }
1423 
ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt * spt)1424 static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
1425 {
1426 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1427 	int ret;
1428 
1429 	ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
1430 	if (ret)
1431 		return ret;
1432 
1433 	trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
1434 			 spt, spt->guest_page.type);
1435 
1436 	list_del_init(&oos_page->vm_list);
1437 	return sync_oos_page(spt->vgpu, oos_page);
1438 }
1439 
ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt * spt)1440 static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
1441 {
1442 	struct intel_gvt *gvt = spt->vgpu->gvt;
1443 	struct intel_gvt_gtt *gtt = &gvt->gtt;
1444 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1445 	int ret;
1446 
1447 	WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1448 
1449 	if (list_empty(&gtt->oos_page_free_list_head)) {
1450 		oos_page = container_of(gtt->oos_page_use_list_head.next,
1451 			struct intel_vgpu_oos_page, list);
1452 		ret = ppgtt_set_guest_page_sync(oos_page->spt);
1453 		if (ret)
1454 			return ret;
1455 		ret = detach_oos_page(spt->vgpu, oos_page);
1456 		if (ret)
1457 			return ret;
1458 	} else
1459 		oos_page = container_of(gtt->oos_page_free_list_head.next,
1460 			struct intel_vgpu_oos_page, list);
1461 	return attach_oos_page(oos_page, spt);
1462 }
1463 
ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt * spt)1464 static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
1465 {
1466 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1467 
1468 	if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1469 		return -EINVAL;
1470 
1471 	trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
1472 			 spt, spt->guest_page.type);
1473 
1474 	list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
1475 	return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
1476 }
1477 
1478 /**
1479  * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1480  * @vgpu: a vGPU
1481  *
1482  * This function is called before submitting a guest workload to host,
1483  * to sync all the out-of-synced shadow for vGPU
1484  *
1485  * Returns:
1486  * Zero on success, negative error code if failed.
1487  */
intel_vgpu_sync_oos_pages(struct intel_vgpu * vgpu)1488 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1489 {
1490 	struct list_head *pos, *n;
1491 	struct intel_vgpu_oos_page *oos_page;
1492 	int ret;
1493 
1494 	if (!enable_out_of_sync)
1495 		return 0;
1496 
1497 	list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1498 		oos_page = container_of(pos,
1499 				struct intel_vgpu_oos_page, vm_list);
1500 		ret = ppgtt_set_guest_page_sync(oos_page->spt);
1501 		if (ret)
1502 			return ret;
1503 	}
1504 	return 0;
1505 }
1506 
1507 /*
1508  * The heart of PPGTT shadow page table.
1509  */
ppgtt_handle_guest_write_page_table(struct intel_vgpu_ppgtt_spt * spt,struct intel_gvt_gtt_entry * we,unsigned long index)1510 static int ppgtt_handle_guest_write_page_table(
1511 		struct intel_vgpu_ppgtt_spt *spt,
1512 		struct intel_gvt_gtt_entry *we, unsigned long index)
1513 {
1514 	struct intel_vgpu *vgpu = spt->vgpu;
1515 	int type = spt->shadow_page.type;
1516 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1517 	struct intel_gvt_gtt_entry old_se;
1518 	int new_present;
1519 	int i, ret;
1520 
1521 	new_present = ops->test_present(we);
1522 
1523 	/*
1524 	 * Adding the new entry first and then removing the old one, that can
1525 	 * guarantee the ppgtt table is validated during the window between
1526 	 * adding and removal.
1527 	 */
1528 	ppgtt_get_shadow_entry(spt, &old_se, index);
1529 
1530 	if (new_present) {
1531 		ret = ppgtt_handle_guest_entry_add(spt, we, index);
1532 		if (ret)
1533 			goto fail;
1534 	}
1535 
1536 	ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
1537 	if (ret)
1538 		goto fail;
1539 
1540 	if (!new_present) {
1541 		/* For 64KB splited entries, we need clear them all. */
1542 		if (ops->test_64k_splited(&old_se) &&
1543 		    !(index % GTT_64K_PTE_STRIDE)) {
1544 			gvt_vdbg_mm("remove splited 64K shadow entries\n");
1545 			for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1546 				ops->clear_64k_splited(&old_se);
1547 				ops->set_pfn(&old_se,
1548 					vgpu->gtt.scratch_pt[type].page_mfn);
1549 				ppgtt_set_shadow_entry(spt, &old_se, index + i);
1550 			}
1551 		} else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
1552 			   old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
1553 			ops->clear_pse(&old_se);
1554 			ops->set_pfn(&old_se,
1555 				     vgpu->gtt.scratch_pt[type].page_mfn);
1556 			ppgtt_set_shadow_entry(spt, &old_se, index);
1557 		} else {
1558 			ops->set_pfn(&old_se,
1559 				     vgpu->gtt.scratch_pt[type].page_mfn);
1560 			ppgtt_set_shadow_entry(spt, &old_se, index);
1561 		}
1562 	}
1563 
1564 	return 0;
1565 fail:
1566 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1567 			spt, we->val64, we->type);
1568 	return ret;
1569 }
1570 
1571 
1572 
can_do_out_of_sync(struct intel_vgpu_ppgtt_spt * spt)1573 static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
1574 {
1575 	return enable_out_of_sync
1576 		&& gtt_type_is_pte_pt(spt->guest_page.type)
1577 		&& spt->guest_page.write_cnt >= 2;
1578 }
1579 
ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt * spt,unsigned long index)1580 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1581 		unsigned long index)
1582 {
1583 	set_bit(index, spt->post_shadow_bitmap);
1584 	if (!list_empty(&spt->post_shadow_list))
1585 		return;
1586 
1587 	list_add_tail(&spt->post_shadow_list,
1588 			&spt->vgpu->gtt.post_shadow_list_head);
1589 }
1590 
1591 /**
1592  * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1593  * @vgpu: a vGPU
1594  *
1595  * This function is called before submitting a guest workload to host,
1596  * to flush all the post shadows for a vGPU.
1597  *
1598  * Returns:
1599  * Zero on success, negative error code if failed.
1600  */
intel_vgpu_flush_post_shadow(struct intel_vgpu * vgpu)1601 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1602 {
1603 	struct list_head *pos, *n;
1604 	struct intel_vgpu_ppgtt_spt *spt;
1605 	struct intel_gvt_gtt_entry ge;
1606 	unsigned long index;
1607 	int ret;
1608 
1609 	list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1610 		spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1611 				post_shadow_list);
1612 
1613 		for_each_set_bit(index, spt->post_shadow_bitmap,
1614 				GTT_ENTRY_NUM_IN_ONE_PAGE) {
1615 			ppgtt_get_guest_entry(spt, &ge, index);
1616 
1617 			ret = ppgtt_handle_guest_write_page_table(spt,
1618 							&ge, index);
1619 			if (ret)
1620 				return ret;
1621 			clear_bit(index, spt->post_shadow_bitmap);
1622 		}
1623 		list_del_init(&spt->post_shadow_list);
1624 	}
1625 	return 0;
1626 }
1627 
ppgtt_handle_guest_write_page_table_bytes(struct intel_vgpu_ppgtt_spt * spt,u64 pa,void * p_data,int bytes)1628 static int ppgtt_handle_guest_write_page_table_bytes(
1629 		struct intel_vgpu_ppgtt_spt *spt,
1630 		u64 pa, void *p_data, int bytes)
1631 {
1632 	struct intel_vgpu *vgpu = spt->vgpu;
1633 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1634 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1635 	struct intel_gvt_gtt_entry we, se;
1636 	unsigned long index;
1637 	int ret;
1638 
1639 	index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1640 
1641 	ppgtt_get_guest_entry(spt, &we, index);
1642 
1643 	/*
1644 	 * For page table which has 64K gtt entry, only PTE#0, PTE#16,
1645 	 * PTE#32, ... PTE#496 are used. Unused PTEs update should be
1646 	 * ignored.
1647 	 */
1648 	if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
1649 	    (index % GTT_64K_PTE_STRIDE)) {
1650 		gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
1651 			    index);
1652 		return 0;
1653 	}
1654 
1655 	if (bytes == info->gtt_entry_size) {
1656 		ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
1657 		if (ret)
1658 			return ret;
1659 	} else {
1660 		if (!test_bit(index, spt->post_shadow_bitmap)) {
1661 			int type = spt->shadow_page.type;
1662 
1663 			ppgtt_get_shadow_entry(spt, &se, index);
1664 			ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
1665 			if (ret)
1666 				return ret;
1667 			ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1668 			ppgtt_set_shadow_entry(spt, &se, index);
1669 		}
1670 		ppgtt_set_post_shadow(spt, index);
1671 	}
1672 
1673 	if (!enable_out_of_sync)
1674 		return 0;
1675 
1676 	spt->guest_page.write_cnt++;
1677 
1678 	if (spt->guest_page.oos_page)
1679 		ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
1680 				false, 0, vgpu);
1681 
1682 	if (can_do_out_of_sync(spt)) {
1683 		if (!spt->guest_page.oos_page)
1684 			ppgtt_allocate_oos_page(spt);
1685 
1686 		ret = ppgtt_set_guest_page_oos(spt);
1687 		if (ret < 0)
1688 			return ret;
1689 	}
1690 	return 0;
1691 }
1692 
invalidate_ppgtt_mm(struct intel_vgpu_mm * mm)1693 static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
1694 {
1695 	struct intel_vgpu *vgpu = mm->vgpu;
1696 	struct intel_gvt *gvt = vgpu->gvt;
1697 	struct intel_gvt_gtt *gtt = &gvt->gtt;
1698 	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1699 	struct intel_gvt_gtt_entry se;
1700 	int index;
1701 
1702 	if (!mm->ppgtt_mm.shadowed)
1703 		return;
1704 
1705 	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1706 		ppgtt_get_shadow_root_entry(mm, &se, index);
1707 
1708 		if (!ops->test_present(&se))
1709 			continue;
1710 
1711 		ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
1712 		se.val64 = 0;
1713 		ppgtt_set_shadow_root_entry(mm, &se, index);
1714 
1715 		trace_spt_guest_change(vgpu->id, "destroy root pointer",
1716 				       NULL, se.type, se.val64, index);
1717 	}
1718 
1719 	mm->ppgtt_mm.shadowed = false;
1720 }
1721 
1722 
shadow_ppgtt_mm(struct intel_vgpu_mm * mm)1723 static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
1724 {
1725 	struct intel_vgpu *vgpu = mm->vgpu;
1726 	struct intel_gvt *gvt = vgpu->gvt;
1727 	struct intel_gvt_gtt *gtt = &gvt->gtt;
1728 	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1729 	struct intel_vgpu_ppgtt_spt *spt;
1730 	struct intel_gvt_gtt_entry ge, se;
1731 	int index, ret;
1732 
1733 	if (mm->ppgtt_mm.shadowed)
1734 		return 0;
1735 
1736 	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1737 		return -EINVAL;
1738 
1739 	mm->ppgtt_mm.shadowed = true;
1740 
1741 	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1742 		ppgtt_get_guest_root_entry(mm, &ge, index);
1743 
1744 		if (!ops->test_present(&ge))
1745 			continue;
1746 
1747 		trace_spt_guest_change(vgpu->id, __func__, NULL,
1748 				       ge.type, ge.val64, index);
1749 
1750 		spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1751 		if (IS_ERR(spt)) {
1752 			gvt_vgpu_err("fail to populate guest root pointer\n");
1753 			ret = PTR_ERR(spt);
1754 			goto fail;
1755 		}
1756 		ppgtt_generate_shadow_entry(&se, spt, &ge);
1757 		ppgtt_set_shadow_root_entry(mm, &se, index);
1758 
1759 		trace_spt_guest_change(vgpu->id, "populate root pointer",
1760 				       NULL, se.type, se.val64, index);
1761 	}
1762 
1763 	return 0;
1764 fail:
1765 	invalidate_ppgtt_mm(mm);
1766 	return ret;
1767 }
1768 
vgpu_alloc_mm(struct intel_vgpu * vgpu)1769 static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1770 {
1771 	struct intel_vgpu_mm *mm;
1772 
1773 	mm = kzalloc_obj(*mm);
1774 	if (!mm)
1775 		return NULL;
1776 
1777 	mm->vgpu = vgpu;
1778 	kref_init(&mm->ref);
1779 	atomic_set(&mm->pincount, 0);
1780 
1781 	return mm;
1782 }
1783 
vgpu_free_mm(struct intel_vgpu_mm * mm)1784 static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1785 {
1786 	kfree(mm);
1787 }
1788 
1789 /**
1790  * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
1791  * @vgpu: a vGPU
1792  * @root_entry_type: ppgtt root entry type
1793  * @pdps: guest pdps.
1794  *
1795  * This function is used to create a ppgtt mm object for a vGPU.
1796  *
1797  * Returns:
1798  * Zero on success, negative error code in pointer if failed.
1799  */
intel_vgpu_create_ppgtt_mm(struct intel_vgpu * vgpu,enum intel_gvt_gtt_type root_entry_type,u64 pdps[])1800 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
1801 		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
1802 {
1803 	struct intel_gvt *gvt = vgpu->gvt;
1804 	struct intel_vgpu_mm *mm;
1805 	int ret;
1806 
1807 	mm = vgpu_alloc_mm(vgpu);
1808 	if (!mm)
1809 		return ERR_PTR(-ENOMEM);
1810 
1811 	mm->type = INTEL_GVT_MM_PPGTT;
1812 
1813 	GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1814 		   root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1815 	mm->ppgtt_mm.root_entry_type = root_entry_type;
1816 
1817 	INIT_LIST_HEAD(&mm->ppgtt_mm.list);
1818 	INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
1819 	INIT_LIST_HEAD(&mm->ppgtt_mm.link);
1820 
1821 	if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1822 		mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1823 	else
1824 		memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1825 		       sizeof(mm->ppgtt_mm.guest_pdps));
1826 
1827 	ret = shadow_ppgtt_mm(mm);
1828 	if (ret) {
1829 		gvt_vgpu_err("failed to shadow ppgtt mm\n");
1830 		vgpu_free_mm(mm);
1831 		return ERR_PTR(ret);
1832 	}
1833 
1834 	list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
1835 
1836 	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1837 	list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
1838 	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1839 
1840 	return mm;
1841 }
1842 
intel_vgpu_create_ggtt_mm(struct intel_vgpu * vgpu)1843 static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1844 {
1845 	struct intel_vgpu_mm *mm;
1846 	unsigned long nr_entries;
1847 
1848 	mm = vgpu_alloc_mm(vgpu);
1849 	if (!mm)
1850 		return ERR_PTR(-ENOMEM);
1851 
1852 	mm->type = INTEL_GVT_MM_GGTT;
1853 
1854 	nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1855 	mm->ggtt_mm.virtual_ggtt =
1856 		vzalloc(array_size(nr_entries,
1857 				   vgpu->gvt->device_info.gtt_entry_size));
1858 	if (!mm->ggtt_mm.virtual_ggtt) {
1859 		vgpu_free_mm(mm);
1860 		return ERR_PTR(-ENOMEM);
1861 	}
1862 
1863 	mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1864 	if (!mm->ggtt_mm.host_ggtt_aperture) {
1865 		vfree(mm->ggtt_mm.virtual_ggtt);
1866 		vgpu_free_mm(mm);
1867 		return ERR_PTR(-ENOMEM);
1868 	}
1869 
1870 	mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1871 	if (!mm->ggtt_mm.host_ggtt_hidden) {
1872 		vfree(mm->ggtt_mm.host_ggtt_aperture);
1873 		vfree(mm->ggtt_mm.virtual_ggtt);
1874 		vgpu_free_mm(mm);
1875 		return ERR_PTR(-ENOMEM);
1876 	}
1877 
1878 	return mm;
1879 }
1880 
1881 /**
1882  * _intel_vgpu_mm_release - destroy a mm object
1883  * @mm_ref: a kref object
1884  *
1885  * This function is used to destroy a mm object for vGPU
1886  *
1887  */
_intel_vgpu_mm_release(struct kref * mm_ref)1888 void _intel_vgpu_mm_release(struct kref *mm_ref)
1889 {
1890 	struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1891 
1892 	if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1893 		gvt_err("vgpu mm pin count bug detected\n");
1894 
1895 	if (mm->type == INTEL_GVT_MM_PPGTT) {
1896 		list_del(&mm->ppgtt_mm.list);
1897 
1898 		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1899 		list_del(&mm->ppgtt_mm.lru_list);
1900 		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1901 
1902 		invalidate_ppgtt_mm(mm);
1903 	} else {
1904 		vfree(mm->ggtt_mm.virtual_ggtt);
1905 		vfree(mm->ggtt_mm.host_ggtt_aperture);
1906 		vfree(mm->ggtt_mm.host_ggtt_hidden);
1907 	}
1908 
1909 	vgpu_free_mm(mm);
1910 }
1911 
1912 /**
1913  * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
1914  * @mm: a vGPU mm object
1915  *
1916  * This function is called when user doesn't want to use a vGPU mm object
1917  */
intel_vgpu_unpin_mm(struct intel_vgpu_mm * mm)1918 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
1919 {
1920 	atomic_dec_if_positive(&mm->pincount);
1921 }
1922 
1923 /**
1924  * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1925  * @mm: target vgpu mm
1926  *
1927  * This function is called when user wants to use a vGPU mm object. If this
1928  * mm object hasn't been shadowed yet, the shadow will be populated at this
1929  * time.
1930  *
1931  * Returns:
1932  * Zero on success, negative error code if failed.
1933  */
intel_vgpu_pin_mm(struct intel_vgpu_mm * mm)1934 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
1935 {
1936 	int ret;
1937 
1938 	atomic_inc(&mm->pincount);
1939 
1940 	if (mm->type == INTEL_GVT_MM_PPGTT) {
1941 		ret = shadow_ppgtt_mm(mm);
1942 		if (ret)
1943 			return ret;
1944 
1945 		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1946 		list_move_tail(&mm->ppgtt_mm.lru_list,
1947 			       &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
1948 		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1949 	}
1950 
1951 	return 0;
1952 }
1953 
reclaim_one_ppgtt_mm(struct intel_gvt * gvt)1954 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
1955 {
1956 	struct intel_vgpu_mm *mm;
1957 	struct list_head *pos, *n;
1958 
1959 	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1960 
1961 	list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
1962 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
1963 
1964 		if (atomic_read(&mm->pincount))
1965 			continue;
1966 
1967 		list_del_init(&mm->ppgtt_mm.lru_list);
1968 		mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1969 		invalidate_ppgtt_mm(mm);
1970 		return 1;
1971 	}
1972 	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1973 	return 0;
1974 }
1975 
1976 /*
1977  * GMA translation APIs.
1978  */
ppgtt_get_next_level_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * e,unsigned long index,bool guest)1979 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
1980 		struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
1981 {
1982 	struct intel_vgpu *vgpu = mm->vgpu;
1983 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1984 	struct intel_vgpu_ppgtt_spt *s;
1985 
1986 	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
1987 	if (!s)
1988 		return -ENXIO;
1989 
1990 	if (!guest)
1991 		ppgtt_get_shadow_entry(s, e, index);
1992 	else
1993 		ppgtt_get_guest_entry(s, e, index);
1994 	return 0;
1995 }
1996 
1997 /**
1998  * intel_vgpu_gma_to_gpa - translate a gma to GPA
1999  * @mm: mm object. could be a PPGTT or GGTT mm object
2000  * @gma: graphics memory address in this mm object
2001  *
2002  * This function is used to translate a graphics memory address in specific
2003  * graphics memory space to guest physical address.
2004  *
2005  * Returns:
2006  * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
2007  */
intel_vgpu_gma_to_gpa(struct intel_vgpu_mm * mm,unsigned long gma)2008 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
2009 {
2010 	struct intel_vgpu *vgpu = mm->vgpu;
2011 	struct intel_gvt *gvt = vgpu->gvt;
2012 	const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
2013 	const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
2014 	unsigned long gpa = INTEL_GVT_INVALID_ADDR;
2015 	unsigned long gma_index[4];
2016 	struct intel_gvt_gtt_entry e;
2017 	int i, levels = 0;
2018 	int ret;
2019 
2020 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
2021 		   mm->type != INTEL_GVT_MM_PPGTT);
2022 
2023 	if (mm->type == INTEL_GVT_MM_GGTT) {
2024 		if (!vgpu_gmadr_is_valid(vgpu, gma))
2025 			goto err;
2026 
2027 		ggtt_get_guest_entry(mm, &e,
2028 			gma_ops->gma_to_ggtt_pte_index(gma));
2029 
2030 		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
2031 			+ (gma & ~I915_GTT_PAGE_MASK);
2032 
2033 		trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
2034 	} else {
2035 		switch (mm->ppgtt_mm.root_entry_type) {
2036 		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2037 			ppgtt_get_shadow_root_entry(mm, &e, 0);
2038 
2039 			gma_index[0] = gma_ops->gma_to_pml4_index(gma);
2040 			gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
2041 			gma_index[2] = gma_ops->gma_to_pde_index(gma);
2042 			gma_index[3] = gma_ops->gma_to_pte_index(gma);
2043 			levels = 4;
2044 			break;
2045 		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2046 			ppgtt_get_shadow_root_entry(mm, &e,
2047 					gma_ops->gma_to_l3_pdp_index(gma));
2048 
2049 			gma_index[0] = gma_ops->gma_to_pde_index(gma);
2050 			gma_index[1] = gma_ops->gma_to_pte_index(gma);
2051 			levels = 2;
2052 			break;
2053 		default:
2054 			GEM_BUG_ON(1);
2055 		}
2056 
2057 		/* walk the shadow page table and get gpa from guest entry */
2058 		for (i = 0; i < levels; i++) {
2059 			ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
2060 				(i == levels - 1));
2061 			if (ret)
2062 				goto err;
2063 
2064 			if (!pte_ops->test_present(&e)) {
2065 				gvt_dbg_core("GMA 0x%lx is not present\n", gma);
2066 				goto err;
2067 			}
2068 		}
2069 
2070 		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
2071 					(gma & ~I915_GTT_PAGE_MASK);
2072 		trace_gma_translate(vgpu->id, "ppgtt", 0,
2073 				    mm->ppgtt_mm.root_entry_type, gma, gpa);
2074 	}
2075 
2076 	return gpa;
2077 err:
2078 	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
2079 	return INTEL_GVT_INVALID_ADDR;
2080 }
2081 
emulate_ggtt_mmio_read(struct intel_vgpu * vgpu,unsigned int off,void * p_data,unsigned int bytes)2082 static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
2083 	unsigned int off, void *p_data, unsigned int bytes)
2084 {
2085 	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2086 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2087 	unsigned long index = off >> info->gtt_entry_size_shift;
2088 	unsigned long gma;
2089 	struct intel_gvt_gtt_entry e;
2090 
2091 	if (bytes != 4 && bytes != 8)
2092 		return -EINVAL;
2093 
2094 	gma = index << I915_GTT_PAGE_SHIFT;
2095 	if (!intel_gvt_ggtt_validate_range(vgpu,
2096 					   gma, 1 << I915_GTT_PAGE_SHIFT)) {
2097 		gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma);
2098 		memset(p_data, 0, bytes);
2099 		return 0;
2100 	}
2101 
2102 	ggtt_get_guest_entry(ggtt_mm, &e, index);
2103 	memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
2104 			bytes);
2105 	return 0;
2106 }
2107 
2108 /**
2109  * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read
2110  * @vgpu: a vGPU
2111  * @off: register offset
2112  * @p_data: data will be returned to guest
2113  * @bytes: data length
2114  *
2115  * This function is used to emulate the GTT MMIO register read
2116  *
2117  * Returns:
2118  * Zero on success, error code if failed.
2119  */
intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu * vgpu,unsigned int off,void * p_data,unsigned int bytes)2120 int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
2121 	void *p_data, unsigned int bytes)
2122 {
2123 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2124 	int ret;
2125 
2126 	if (bytes != 4 && bytes != 8)
2127 		return -EINVAL;
2128 
2129 	off -= info->gtt_start_offset;
2130 	ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
2131 	return ret;
2132 }
2133 
ggtt_invalidate_pte(struct intel_vgpu * vgpu,struct intel_gvt_gtt_entry * entry)2134 static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
2135 		struct intel_gvt_gtt_entry *entry)
2136 {
2137 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2138 	unsigned long pfn;
2139 
2140 	pfn = pte_ops->get_pfn(entry);
2141 	if (pfn != vgpu->gvt->gtt.scratch_mfn)
2142 		intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
2143 }
2144 
emulate_ggtt_mmio_write(struct intel_vgpu * vgpu,unsigned int off,void * p_data,unsigned int bytes)2145 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
2146 	void *p_data, unsigned int bytes)
2147 {
2148 	struct intel_gvt *gvt = vgpu->gvt;
2149 	const struct intel_gvt_device_info *info = &gvt->device_info;
2150 	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2151 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
2152 	unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
2153 	unsigned long gma, gfn;
2154 	struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2155 	struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2156 	dma_addr_t dma_addr;
2157 	int ret;
2158 	struct intel_gvt_partial_pte *partial_pte, *pos, *n;
2159 	bool partial_update = false;
2160 
2161 	if (bytes != 4 && bytes != 8)
2162 		return -EINVAL;
2163 
2164 	gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
2165 
2166 	/* the VM may configure the whole GM space when ballooning is used */
2167 	if (!vgpu_gmadr_is_valid(vgpu, gma))
2168 		return 0;
2169 
2170 	e.type = GTT_TYPE_GGTT_PTE;
2171 	memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
2172 			bytes);
2173 
2174 	/* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
2175 	 * write, save the first 4 bytes in a list and update virtual
2176 	 * PTE. Only update shadow PTE when the second 4 bytes comes.
2177 	 */
2178 	if (bytes < info->gtt_entry_size) {
2179 		bool found = false;
2180 
2181 		list_for_each_entry_safe(pos, n,
2182 				&ggtt_mm->ggtt_mm.partial_pte_list, list) {
2183 			if (g_gtt_index == pos->offset >>
2184 					info->gtt_entry_size_shift) {
2185 				if (off != pos->offset) {
2186 					/* the second partial part*/
2187 					int last_off = pos->offset &
2188 						(info->gtt_entry_size - 1);
2189 
2190 					memcpy((void *)&e.val64 + last_off,
2191 						(void *)&pos->data + last_off,
2192 						bytes);
2193 
2194 					list_del(&pos->list);
2195 					kfree(pos);
2196 					found = true;
2197 					break;
2198 				}
2199 
2200 				/* update of the first partial part */
2201 				pos->data = e.val64;
2202 				ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2203 				return 0;
2204 			}
2205 		}
2206 
2207 		if (!found) {
2208 			/* the first partial part */
2209 			partial_pte = kzalloc_obj(*partial_pte);
2210 			if (!partial_pte)
2211 				return -ENOMEM;
2212 			partial_pte->offset = off;
2213 			partial_pte->data = e.val64;
2214 			list_add_tail(&partial_pte->list,
2215 				&ggtt_mm->ggtt_mm.partial_pte_list);
2216 			partial_update = true;
2217 		}
2218 	}
2219 
2220 	if (!partial_update && (ops->test_present(&e))) {
2221 		gfn = ops->get_pfn(&e);
2222 		m.val64 = e.val64;
2223 		m.type = e.type;
2224 
2225 		ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
2226 						   &dma_addr);
2227 		if (ret) {
2228 			gvt_vgpu_err("fail to populate guest ggtt entry\n");
2229 			/* guest driver may read/write the entry when partial
2230 			 * update the entry in this situation p2m will fail
2231 			 * setting the shadow entry to point to a scratch page
2232 			 */
2233 			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2234 		} else
2235 			ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
2236 	} else {
2237 		ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2238 		ops->clear_present(&m);
2239 	}
2240 
2241 	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2242 
2243 	ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
2244 	ggtt_invalidate_pte(vgpu, &e);
2245 
2246 	ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
2247 	ggtt_invalidate(gvt->gt);
2248 	return 0;
2249 }
2250 
2251 /*
2252  * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
2253  * @vgpu: a vGPU
2254  * @off: register offset
2255  * @p_data: data from guest write
2256  * @bytes: data length
2257  *
2258  * This function is used to emulate the GTT MMIO register write
2259  *
2260  * Returns:
2261  * Zero on success, error code if failed.
2262  */
intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu * vgpu,unsigned int off,void * p_data,unsigned int bytes)2263 int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
2264 		unsigned int off, void *p_data, unsigned int bytes)
2265 {
2266 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2267 	int ret;
2268 	struct intel_vgpu_submission *s = &vgpu->submission;
2269 	struct intel_engine_cs *engine;
2270 	int i;
2271 
2272 	if (bytes != 4 && bytes != 8)
2273 		return -EINVAL;
2274 
2275 	off -= info->gtt_start_offset;
2276 	ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
2277 
2278 	/* if ggtt of last submitted context is written,
2279 	 * that context is probably got unpinned.
2280 	 * Set last shadowed ctx to invalid.
2281 	 */
2282 	for_each_engine(engine, vgpu->gvt->gt, i) {
2283 		if (!s->last_ctx[i].valid)
2284 			continue;
2285 
2286 		if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
2287 			s->last_ctx[i].valid = false;
2288 	}
2289 	return ret;
2290 }
2291 
alloc_scratch_pages(struct intel_vgpu * vgpu,enum intel_gvt_gtt_type type)2292 static int alloc_scratch_pages(struct intel_vgpu *vgpu,
2293 		enum intel_gvt_gtt_type type)
2294 {
2295 	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
2296 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2297 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2298 	int page_entry_num = I915_GTT_PAGE_SIZE >>
2299 				vgpu->gvt->device_info.gtt_entry_size_shift;
2300 	void *scratch_pt;
2301 	int i;
2302 	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
2303 	dma_addr_t daddr;
2304 
2305 	if (drm_WARN_ON(&i915->drm,
2306 			type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
2307 		return -EINVAL;
2308 
2309 	scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
2310 	if (!scratch_pt) {
2311 		gvt_vgpu_err("fail to allocate scratch page\n");
2312 		return -ENOMEM;
2313 	}
2314 
2315 	daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL);
2316 	if (dma_mapping_error(dev, daddr)) {
2317 		gvt_vgpu_err("fail to dmamap scratch_pt\n");
2318 		__free_page(virt_to_page(scratch_pt));
2319 		return -ENOMEM;
2320 	}
2321 	gtt->scratch_pt[type].page_mfn =
2322 		(unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2323 	gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
2324 	gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
2325 			vgpu->id, type, gtt->scratch_pt[type].page_mfn);
2326 
2327 	/* Build the tree by full filled the scratch pt with the entries which
2328 	 * point to the next level scratch pt or scratch page. The
2329 	 * scratch_pt[type] indicate the scratch pt/scratch page used by the
2330 	 * 'type' pt.
2331 	 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
2332 	 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
2333 	 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
2334 	 */
2335 	if (type > GTT_TYPE_PPGTT_PTE_PT) {
2336 		struct intel_gvt_gtt_entry se;
2337 
2338 		memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
2339 		se.type = get_entry_type(type - 1);
2340 		ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
2341 
2342 		/* The entry parameters like present/writeable/cache type
2343 		 * set to the same as i915's scratch page tree.
2344 		 */
2345 		se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
2346 		if (type == GTT_TYPE_PPGTT_PDE_PT)
2347 			se.val64 |= PPAT_CACHED;
2348 
2349 		for (i = 0; i < page_entry_num; i++)
2350 			ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
2351 	}
2352 
2353 	return 0;
2354 }
2355 
release_scratch_page_tree(struct intel_vgpu * vgpu)2356 static int release_scratch_page_tree(struct intel_vgpu *vgpu)
2357 {
2358 	int i;
2359 	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
2360 	dma_addr_t daddr;
2361 
2362 	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2363 		if (vgpu->gtt.scratch_pt[i].page != NULL) {
2364 			daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
2365 					I915_GTT_PAGE_SHIFT);
2366 			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2367 			__free_page(vgpu->gtt.scratch_pt[i].page);
2368 			vgpu->gtt.scratch_pt[i].page = NULL;
2369 			vgpu->gtt.scratch_pt[i].page_mfn = 0;
2370 		}
2371 	}
2372 
2373 	return 0;
2374 }
2375 
create_scratch_page_tree(struct intel_vgpu * vgpu)2376 static int create_scratch_page_tree(struct intel_vgpu *vgpu)
2377 {
2378 	int i, ret;
2379 
2380 	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2381 		ret = alloc_scratch_pages(vgpu, i);
2382 		if (ret)
2383 			goto err;
2384 	}
2385 
2386 	return 0;
2387 
2388 err:
2389 	release_scratch_page_tree(vgpu);
2390 	return ret;
2391 }
2392 
2393 /**
2394  * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
2395  * @vgpu: a vGPU
2396  *
2397  * This function is used to initialize per-vGPU graphics memory virtualization
2398  * components.
2399  *
2400  * Returns:
2401  * Zero on success, error code if failed.
2402  */
intel_vgpu_init_gtt(struct intel_vgpu * vgpu)2403 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
2404 {
2405 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2406 
2407 	INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
2408 
2409 	INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
2410 	INIT_LIST_HEAD(&gtt->oos_page_list_head);
2411 	INIT_LIST_HEAD(&gtt->post_shadow_list_head);
2412 
2413 	gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2414 	if (IS_ERR(gtt->ggtt_mm)) {
2415 		gvt_vgpu_err("fail to create mm for ggtt.\n");
2416 		return PTR_ERR(gtt->ggtt_mm);
2417 	}
2418 
2419 	intel_vgpu_reset_ggtt(vgpu, false);
2420 
2421 	INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
2422 
2423 	return create_scratch_page_tree(vgpu);
2424 }
2425 
intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu * vgpu)2426 void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
2427 {
2428 	struct list_head *pos, *n;
2429 	struct intel_vgpu_mm *mm;
2430 
2431 	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2432 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2433 		intel_vgpu_destroy_mm(mm);
2434 	}
2435 
2436 	if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2437 		gvt_err("vgpu ppgtt mm is not fully destroyed\n");
2438 
2439 	if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
2440 		gvt_err("Why we still has spt not freed?\n");
2441 		ppgtt_free_all_spt(vgpu);
2442 	}
2443 }
2444 
intel_vgpu_destroy_ggtt_mm(struct intel_vgpu * vgpu)2445 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2446 {
2447 	struct intel_gvt_partial_pte *pos, *next;
2448 
2449 	list_for_each_entry_safe(pos, next,
2450 				 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
2451 				 list) {
2452 		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
2453 			pos->offset, pos->data);
2454 		kfree(pos);
2455 	}
2456 	intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
2457 	vgpu->gtt.ggtt_mm = NULL;
2458 }
2459 
2460 /**
2461  * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2462  * @vgpu: a vGPU
2463  *
2464  * This function is used to clean up per-vGPU graphics memory virtualization
2465  * components.
2466  *
2467  * Returns:
2468  * Zero on success, error code if failed.
2469  */
intel_vgpu_clean_gtt(struct intel_vgpu * vgpu)2470 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2471 {
2472 	intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2473 	intel_vgpu_destroy_ggtt_mm(vgpu);
2474 	release_scratch_page_tree(vgpu);
2475 }
2476 
clean_spt_oos(struct intel_gvt * gvt)2477 static void clean_spt_oos(struct intel_gvt *gvt)
2478 {
2479 	struct intel_gvt_gtt *gtt = &gvt->gtt;
2480 	struct list_head *pos, *n;
2481 	struct intel_vgpu_oos_page *oos_page;
2482 
2483 	WARN(!list_empty(&gtt->oos_page_use_list_head),
2484 		"someone is still using oos page\n");
2485 
2486 	list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2487 		oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2488 		list_del(&oos_page->list);
2489 		free_page((unsigned long)oos_page->mem);
2490 		kfree(oos_page);
2491 	}
2492 }
2493 
setup_spt_oos(struct intel_gvt * gvt)2494 static int setup_spt_oos(struct intel_gvt *gvt)
2495 {
2496 	struct intel_gvt_gtt *gtt = &gvt->gtt;
2497 	struct intel_vgpu_oos_page *oos_page;
2498 	int i;
2499 	int ret;
2500 
2501 	INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2502 	INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2503 
2504 	for (i = 0; i < preallocated_oos_pages; i++) {
2505 		oos_page = kzalloc_obj(*oos_page);
2506 		if (!oos_page) {
2507 			ret = -ENOMEM;
2508 			goto fail;
2509 		}
2510 		oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0);
2511 		if (!oos_page->mem) {
2512 			ret = -ENOMEM;
2513 			kfree(oos_page);
2514 			goto fail;
2515 		}
2516 
2517 		INIT_LIST_HEAD(&oos_page->list);
2518 		INIT_LIST_HEAD(&oos_page->vm_list);
2519 		oos_page->id = i;
2520 		list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2521 	}
2522 
2523 	gvt_dbg_mm("%d oos pages preallocated\n", i);
2524 
2525 	return 0;
2526 fail:
2527 	clean_spt_oos(gvt);
2528 	return ret;
2529 }
2530 
2531 /**
2532  * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2533  * @vgpu: a vGPU
2534  * @pdps: pdp root array
2535  *
2536  * This function is used to find a PPGTT mm object from mm object pool
2537  *
2538  * Returns:
2539  * pointer to mm object on success, NULL if failed.
2540  */
intel_vgpu_find_ppgtt_mm(struct intel_vgpu * vgpu,u64 pdps[])2541 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
2542 		u64 pdps[])
2543 {
2544 	struct intel_vgpu_mm *mm;
2545 	struct list_head *pos;
2546 
2547 	list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2548 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2549 
2550 		switch (mm->ppgtt_mm.root_entry_type) {
2551 		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2552 			if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
2553 				return mm;
2554 			break;
2555 		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2556 			if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
2557 				    sizeof(mm->ppgtt_mm.guest_pdps)))
2558 				return mm;
2559 			break;
2560 		default:
2561 			GEM_BUG_ON(1);
2562 		}
2563 	}
2564 	return NULL;
2565 }
2566 
2567 /**
2568  * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
2569  * @vgpu: a vGPU
2570  * @root_entry_type: ppgtt root entry type
2571  * @pdps: guest pdps
2572  *
2573  * This function is used to find or create a PPGTT mm object from a guest.
2574  *
2575  * Returns:
2576  * Zero on success, negative error code if failed.
2577  */
intel_vgpu_get_ppgtt_mm(struct intel_vgpu * vgpu,enum intel_gvt_gtt_type root_entry_type,u64 pdps[])2578 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
2579 		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
2580 {
2581 	struct intel_vgpu_mm *mm;
2582 
2583 	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2584 	if (mm) {
2585 		intel_vgpu_mm_get(mm);
2586 	} else {
2587 		mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
2588 		if (IS_ERR(mm))
2589 			gvt_vgpu_err("fail to create mm\n");
2590 	}
2591 	return mm;
2592 }
2593 
2594 /**
2595  * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
2596  * @vgpu: a vGPU
2597  * @pdps: guest pdps
2598  *
2599  * This function is used to find a PPGTT mm object from a guest and destroy it.
2600  *
2601  * Returns:
2602  * Zero on success, negative error code if failed.
2603  */
intel_vgpu_put_ppgtt_mm(struct intel_vgpu * vgpu,u64 pdps[])2604 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
2605 {
2606 	struct intel_vgpu_mm *mm;
2607 
2608 	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2609 	if (!mm) {
2610 		gvt_vgpu_err("fail to find ppgtt instance.\n");
2611 		return -EINVAL;
2612 	}
2613 	intel_vgpu_mm_put(mm);
2614 	return 0;
2615 }
2616 
2617 /**
2618  * intel_gvt_init_gtt - initialize mm components of a GVT device
2619  * @gvt: GVT device
2620  *
2621  * This function is called at the initialization stage, to initialize
2622  * the mm components of a GVT device.
2623  *
2624  * Returns:
2625  * zero on success, negative error code if failed.
2626  */
intel_gvt_init_gtt(struct intel_gvt * gvt)2627 int intel_gvt_init_gtt(struct intel_gvt *gvt)
2628 {
2629 	int ret;
2630 	void *page;
2631 	struct device *dev = gvt->gt->i915->drm.dev;
2632 	dma_addr_t daddr;
2633 
2634 	gvt_dbg_core("init gtt\n");
2635 
2636 	gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2637 	gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
2638 
2639 	page = (void *)get_zeroed_page(GFP_KERNEL);
2640 	if (!page) {
2641 		gvt_err("fail to allocate scratch ggtt page\n");
2642 		return -ENOMEM;
2643 	}
2644 
2645 	daddr = dma_map_page(dev, virt_to_page(page), 0,
2646 			4096, DMA_BIDIRECTIONAL);
2647 	if (dma_mapping_error(dev, daddr)) {
2648 		gvt_err("fail to dmamap scratch ggtt page\n");
2649 		__free_page(virt_to_page(page));
2650 		return -ENOMEM;
2651 	}
2652 
2653 	gvt->gtt.scratch_page = virt_to_page(page);
2654 	gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2655 
2656 	if (enable_out_of_sync) {
2657 		ret = setup_spt_oos(gvt);
2658 		if (ret) {
2659 			gvt_err("fail to initialize SPT oos\n");
2660 			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2661 			__free_page(gvt->gtt.scratch_page);
2662 			return ret;
2663 		}
2664 	}
2665 	INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
2666 	mutex_init(&gvt->gtt.ppgtt_mm_lock);
2667 	return 0;
2668 }
2669 
2670 /**
2671  * intel_gvt_clean_gtt - clean up mm components of a GVT device
2672  * @gvt: GVT device
2673  *
2674  * This function is called at the driver unloading stage, to clean up
2675  * the mm components of a GVT device.
2676  *
2677  */
intel_gvt_clean_gtt(struct intel_gvt * gvt)2678 void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2679 {
2680 	struct device *dev = gvt->gt->i915->drm.dev;
2681 	dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
2682 					I915_GTT_PAGE_SHIFT);
2683 
2684 	dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2685 
2686 	__free_page(gvt->gtt.scratch_page);
2687 
2688 	if (enable_out_of_sync)
2689 		clean_spt_oos(gvt);
2690 }
2691 
2692 /**
2693  * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
2694  * @vgpu: a vGPU
2695  *
2696  * This function is called when invalidate all PPGTT instances of a vGPU.
2697  *
2698  */
intel_vgpu_invalidate_ppgtt(struct intel_vgpu * vgpu)2699 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
2700 {
2701 	struct list_head *pos, *n;
2702 	struct intel_vgpu_mm *mm;
2703 
2704 	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2705 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2706 		if (mm->type == INTEL_GVT_MM_PPGTT) {
2707 			mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2708 			list_del_init(&mm->ppgtt_mm.lru_list);
2709 			mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2710 			if (mm->ppgtt_mm.shadowed)
2711 				invalidate_ppgtt_mm(mm);
2712 		}
2713 	}
2714 }
2715 
2716 /**
2717  * intel_vgpu_reset_ggtt - reset the GGTT entry
2718  * @vgpu: a vGPU
2719  * @invalidate_old: invalidate old entries
2720  *
2721  * This function is called at the vGPU create stage
2722  * to reset all the GGTT entries.
2723  *
2724  */
intel_vgpu_reset_ggtt(struct intel_vgpu * vgpu,bool invalidate_old)2725 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
2726 {
2727 	struct intel_gvt *gvt = vgpu->gvt;
2728 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2729 	struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
2730 	struct intel_gvt_gtt_entry old_entry;
2731 	u32 index;
2732 	u32 num_entries;
2733 
2734 	pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2735 	pte_ops->set_present(&entry);
2736 
2737 	index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2738 	num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2739 	while (num_entries--) {
2740 		if (invalidate_old) {
2741 			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2742 			ggtt_invalidate_pte(vgpu, &old_entry);
2743 		}
2744 		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2745 	}
2746 
2747 	index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2748 	num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2749 	while (num_entries--) {
2750 		if (invalidate_old) {
2751 			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2752 			ggtt_invalidate_pte(vgpu, &old_entry);
2753 		}
2754 		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2755 	}
2756 
2757 	ggtt_invalidate(gvt->gt);
2758 }
2759 
2760 /**
2761  * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
2762  * @gvt: intel gvt device
2763  *
2764  * This function is called at driver resume stage to restore
2765  * GGTT entries of every vGPU.
2766  *
2767  */
intel_gvt_restore_ggtt(struct intel_gvt * gvt)2768 void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
2769 {
2770 	struct intel_vgpu *vgpu;
2771 	struct intel_vgpu_mm *mm;
2772 	int id;
2773 	gen8_pte_t pte;
2774 	u32 idx, num_low, num_hi, offset;
2775 
2776 	/* Restore dirty host ggtt for all vGPUs */
2777 	idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) {
2778 		mm = vgpu->gtt.ggtt_mm;
2779 
2780 		num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2781 		offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2782 		for (idx = 0; idx < num_low; idx++) {
2783 			pte = mm->ggtt_mm.host_ggtt_aperture[idx];
2784 			if (pte & GEN8_PAGE_PRESENT)
2785 				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
2786 		}
2787 
2788 		num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2789 		offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2790 		for (idx = 0; idx < num_hi; idx++) {
2791 			pte = mm->ggtt_mm.host_ggtt_hidden[idx];
2792 			if (pte & GEN8_PAGE_PRESENT)
2793 				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
2794 		}
2795 	}
2796 }
2797