xref: /linux/drivers/gpu/drm/i915/gvt/gtt.c (revision f6e8dc9edf963dbc99085e54f6ced6da9daa6100)
1 /*
2  * GTT virtualization
3  *
4  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Zhi Wang <zhi.a.wang@intel.com>
27  *    Zhenyu Wang <zhenyuw@linux.intel.com>
28  *    Xiao Zheng <xiao.zheng@intel.com>
29  *
30  * Contributors:
31  *    Min He <min.he@intel.com>
32  *    Bing Niu <bing.niu@intel.com>
33  *
34  */
35 
36 #include <drm/drm_print.h>
37 
38 #include "i915_drv.h"
39 #include "gvt.h"
40 #include "i915_pvinfo.h"
41 #include "trace.h"
42 
43 #include "gt/intel_gt_regs.h"
44 #include <linux/vmalloc.h>
45 
46 #if defined(VERBOSE_DEBUG)
47 #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
48 #else
49 #define gvt_vdbg_mm(fmt, args...)
50 #endif
51 
52 static bool enable_out_of_sync = false;
53 static int preallocated_oos_pages = 8192;
54 
55 /*
56  * validate a gm address and related range size,
57  * translate it to host gm address
58  */
59 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
60 {
61 	if (size == 0)
62 		return vgpu_gmadr_is_valid(vgpu, addr);
63 
64 	if (vgpu_gmadr_is_aperture(vgpu, addr) &&
65 	    vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
66 		return true;
67 	else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
68 		 vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
69 		return true;
70 
71 	gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
72 		     addr, size);
73 	return false;
74 }
75 
76 #define gtt_type_is_entry(type) \
77 	(type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
78 	 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
79 	 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
80 
81 #define gtt_type_is_pt(type) \
82 	(type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
83 
84 #define gtt_type_is_pte_pt(type) \
85 	(type == GTT_TYPE_PPGTT_PTE_PT)
86 
87 #define gtt_type_is_root_pointer(type) \
88 	(gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
89 
90 #define gtt_init_entry(e, t, p, v) do { \
91 	(e)->type = t; \
92 	(e)->pdev = p; \
93 	memcpy(&(e)->val64, &v, sizeof(v)); \
94 } while (0)
95 
96 /*
97  * Mappings between GTT_TYPE* enumerations.
98  * Following information can be found according to the given type:
99  * - type of next level page table
100  * - type of entry inside this level page table
101  * - type of entry with PSE set
102  *
103  * If the given type doesn't have such a kind of information,
104  * e.g. give a l4 root entry type, then request to get its PSE type,
105  * give a PTE page table type, then request to get its next level page
106  * table type, as we know l4 root entry doesn't have a PSE bit,
107  * and a PTE page table doesn't have a next level page table type,
108  * GTT_TYPE_INVALID will be returned. This is useful when traversing a
109  * page table.
110  */
111 
112 struct gtt_type_table_entry {
113 	int entry_type;
114 	int pt_type;
115 	int next_pt_type;
116 	int pse_entry_type;
117 };
118 
119 #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
120 	[type] = { \
121 		.entry_type = e_type, \
122 		.pt_type = cpt_type, \
123 		.next_pt_type = npt_type, \
124 		.pse_entry_type = pse_type, \
125 	}
126 
127 static const struct gtt_type_table_entry gtt_type_table[] = {
128 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
129 			GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
130 			GTT_TYPE_INVALID,
131 			GTT_TYPE_PPGTT_PML4_PT,
132 			GTT_TYPE_INVALID),
133 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
134 			GTT_TYPE_PPGTT_PML4_ENTRY,
135 			GTT_TYPE_PPGTT_PML4_PT,
136 			GTT_TYPE_PPGTT_PDP_PT,
137 			GTT_TYPE_INVALID),
138 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
139 			GTT_TYPE_PPGTT_PML4_ENTRY,
140 			GTT_TYPE_PPGTT_PML4_PT,
141 			GTT_TYPE_PPGTT_PDP_PT,
142 			GTT_TYPE_INVALID),
143 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
144 			GTT_TYPE_PPGTT_PDP_ENTRY,
145 			GTT_TYPE_PPGTT_PDP_PT,
146 			GTT_TYPE_PPGTT_PDE_PT,
147 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
148 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
149 			GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
150 			GTT_TYPE_INVALID,
151 			GTT_TYPE_PPGTT_PDE_PT,
152 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
153 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
154 			GTT_TYPE_PPGTT_PDP_ENTRY,
155 			GTT_TYPE_PPGTT_PDP_PT,
156 			GTT_TYPE_PPGTT_PDE_PT,
157 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
158 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
159 			GTT_TYPE_PPGTT_PDE_ENTRY,
160 			GTT_TYPE_PPGTT_PDE_PT,
161 			GTT_TYPE_PPGTT_PTE_PT,
162 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
163 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
164 			GTT_TYPE_PPGTT_PDE_ENTRY,
165 			GTT_TYPE_PPGTT_PDE_PT,
166 			GTT_TYPE_PPGTT_PTE_PT,
167 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
168 	/* We take IPS bit as 'PSE' for PTE level. */
169 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
170 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
171 			GTT_TYPE_PPGTT_PTE_PT,
172 			GTT_TYPE_INVALID,
173 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
174 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
175 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
176 			GTT_TYPE_PPGTT_PTE_PT,
177 			GTT_TYPE_INVALID,
178 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
179 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
180 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
181 			GTT_TYPE_PPGTT_PTE_PT,
182 			GTT_TYPE_INVALID,
183 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
184 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
185 			GTT_TYPE_PPGTT_PDE_ENTRY,
186 			GTT_TYPE_PPGTT_PDE_PT,
187 			GTT_TYPE_INVALID,
188 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
189 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
190 			GTT_TYPE_PPGTT_PDP_ENTRY,
191 			GTT_TYPE_PPGTT_PDP_PT,
192 			GTT_TYPE_INVALID,
193 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
194 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
195 			GTT_TYPE_GGTT_PTE,
196 			GTT_TYPE_INVALID,
197 			GTT_TYPE_INVALID,
198 			GTT_TYPE_INVALID),
199 };
200 
201 static inline int get_next_pt_type(int type)
202 {
203 	return gtt_type_table[type].next_pt_type;
204 }
205 
206 static inline int get_entry_type(int type)
207 {
208 	return gtt_type_table[type].entry_type;
209 }
210 
211 static inline int get_pse_type(int type)
212 {
213 	return gtt_type_table[type].pse_entry_type;
214 }
215 
216 static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index)
217 {
218 	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
219 
220 	return readq(addr);
221 }
222 
223 static void ggtt_invalidate(struct intel_gt *gt)
224 {
225 	intel_wakeref_t wakeref;
226 
227 	wakeref = mmio_hw_access_pre(gt);
228 	intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
229 	mmio_hw_access_post(gt, wakeref);
230 }
231 
232 static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte)
233 {
234 	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
235 
236 	writeq(pte, addr);
237 }
238 
239 static inline int gtt_get_entry64(void *pt,
240 		struct intel_gvt_gtt_entry *e,
241 		unsigned long index, bool hypervisor_access, unsigned long gpa,
242 		struct intel_vgpu *vgpu)
243 {
244 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
245 	int ret;
246 
247 	if (WARN_ON(info->gtt_entry_size != 8))
248 		return -EINVAL;
249 
250 	if (hypervisor_access) {
251 		ret = intel_gvt_read_gpa(vgpu, gpa +
252 				(index << info->gtt_entry_size_shift),
253 				&e->val64, 8);
254 		if (WARN_ON(ret))
255 			return ret;
256 	} else if (!pt) {
257 		e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index);
258 	} else {
259 		e->val64 = *((u64 *)pt + index);
260 	}
261 	return 0;
262 }
263 
264 static inline int gtt_set_entry64(void *pt,
265 		struct intel_gvt_gtt_entry *e,
266 		unsigned long index, bool hypervisor_access, unsigned long gpa,
267 		struct intel_vgpu *vgpu)
268 {
269 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
270 	int ret;
271 
272 	if (WARN_ON(info->gtt_entry_size != 8))
273 		return -EINVAL;
274 
275 	if (hypervisor_access) {
276 		ret = intel_gvt_write_gpa(vgpu, gpa +
277 				(index << info->gtt_entry_size_shift),
278 				&e->val64, 8);
279 		if (WARN_ON(ret))
280 			return ret;
281 	} else if (!pt) {
282 		write_pte64(vgpu->gvt->gt->ggtt, index, e->val64);
283 	} else {
284 		*((u64 *)pt + index) = e->val64;
285 	}
286 	return 0;
287 }
288 
289 #define GTT_HAW 46
290 
291 #define ADDR_1G_MASK	GENMASK_ULL(GTT_HAW - 1, 30)
292 #define ADDR_2M_MASK	GENMASK_ULL(GTT_HAW - 1, 21)
293 #define ADDR_64K_MASK	GENMASK_ULL(GTT_HAW - 1, 16)
294 #define ADDR_4K_MASK	GENMASK_ULL(GTT_HAW - 1, 12)
295 
296 #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
297 #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
298 
299 #define GTT_64K_PTE_STRIDE 16
300 
301 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
302 {
303 	unsigned long pfn;
304 
305 	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
306 		pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
307 	else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
308 		pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
309 	else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
310 		pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
311 	else
312 		pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
313 	return pfn;
314 }
315 
316 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
317 {
318 	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
319 		e->val64 &= ~ADDR_1G_MASK;
320 		pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
321 	} else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
322 		e->val64 &= ~ADDR_2M_MASK;
323 		pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
324 	} else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
325 		e->val64 &= ~ADDR_64K_MASK;
326 		pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
327 	} else {
328 		e->val64 &= ~ADDR_4K_MASK;
329 		pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
330 	}
331 
332 	e->val64 |= (pfn << PAGE_SHIFT);
333 }
334 
335 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
336 {
337 	return !!(e->val64 & _PAGE_PSE);
338 }
339 
340 static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
341 {
342 	if (gen8_gtt_test_pse(e)) {
343 		switch (e->type) {
344 		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
345 			e->val64 &= ~_PAGE_PSE;
346 			e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
347 			break;
348 		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
349 			e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
350 			e->val64 &= ~_PAGE_PSE;
351 			break;
352 		default:
353 			WARN_ON(1);
354 		}
355 	}
356 }
357 
358 static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
359 {
360 	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
361 		return false;
362 
363 	return !!(e->val64 & GEN8_PDE_IPS_64K);
364 }
365 
366 static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
367 {
368 	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
369 		return;
370 
371 	e->val64 &= ~GEN8_PDE_IPS_64K;
372 }
373 
374 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
375 {
376 	/*
377 	 * i915 writes PDP root pointer registers without present bit,
378 	 * it also works, so we need to treat root pointer entry
379 	 * specifically.
380 	 */
381 	if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
382 			|| e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
383 		return (e->val64 != 0);
384 	else
385 		return (e->val64 & GEN8_PAGE_PRESENT);
386 }
387 
388 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
389 {
390 	e->val64 &= ~GEN8_PAGE_PRESENT;
391 }
392 
393 static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
394 {
395 	e->val64 |= GEN8_PAGE_PRESENT;
396 }
397 
398 static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
399 {
400 	return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
401 }
402 
403 static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
404 {
405 	e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
406 }
407 
408 static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
409 {
410 	e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
411 }
412 
413 /*
414  * Per-platform GMA routines.
415  */
416 static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
417 {
418 	unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
419 
420 	trace_gma_index(__func__, gma, x);
421 	return x;
422 }
423 
424 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
425 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
426 { \
427 	unsigned long x = (exp); \
428 	trace_gma_index(__func__, gma, x); \
429 	return x; \
430 }
431 
432 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
433 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
434 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
435 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
436 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
437 
438 static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
439 	.get_entry = gtt_get_entry64,
440 	.set_entry = gtt_set_entry64,
441 	.clear_present = gtt_entry_clear_present,
442 	.set_present = gtt_entry_set_present,
443 	.test_present = gen8_gtt_test_present,
444 	.test_pse = gen8_gtt_test_pse,
445 	.clear_pse = gen8_gtt_clear_pse,
446 	.clear_ips = gen8_gtt_clear_ips,
447 	.test_ips = gen8_gtt_test_ips,
448 	.clear_64k_splited = gen8_gtt_clear_64k_splited,
449 	.set_64k_splited = gen8_gtt_set_64k_splited,
450 	.test_64k_splited = gen8_gtt_test_64k_splited,
451 	.get_pfn = gen8_gtt_get_pfn,
452 	.set_pfn = gen8_gtt_set_pfn,
453 };
454 
455 static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
456 	.gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
457 	.gma_to_pte_index = gen8_gma_to_pte_index,
458 	.gma_to_pde_index = gen8_gma_to_pde_index,
459 	.gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
460 	.gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
461 	.gma_to_pml4_index = gen8_gma_to_pml4_index,
462 };
463 
464 /* Update entry type per pse and ips bit. */
465 static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops,
466 	struct intel_gvt_gtt_entry *entry, bool ips)
467 {
468 	switch (entry->type) {
469 	case GTT_TYPE_PPGTT_PDE_ENTRY:
470 	case GTT_TYPE_PPGTT_PDP_ENTRY:
471 		if (pte_ops->test_pse(entry))
472 			entry->type = get_pse_type(entry->type);
473 		break;
474 	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
475 		if (ips)
476 			entry->type = get_pse_type(entry->type);
477 		break;
478 	default:
479 		GEM_BUG_ON(!gtt_type_is_entry(entry->type));
480 	}
481 
482 	GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
483 }
484 
485 /*
486  * MM helpers.
487  */
488 static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
489 		struct intel_gvt_gtt_entry *entry, unsigned long index,
490 		bool guest)
491 {
492 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
493 
494 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
495 
496 	entry->type = mm->ppgtt_mm.root_entry_type;
497 	pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
498 			   mm->ppgtt_mm.shadow_pdps,
499 			   entry, index, false, 0, mm->vgpu);
500 	update_entry_type_for_real(pte_ops, entry, false);
501 }
502 
503 static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
504 		struct intel_gvt_gtt_entry *entry, unsigned long index)
505 {
506 	_ppgtt_get_root_entry(mm, entry, index, true);
507 }
508 
509 static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
510 		struct intel_gvt_gtt_entry *entry, unsigned long index)
511 {
512 	_ppgtt_get_root_entry(mm, entry, index, false);
513 }
514 
515 static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
516 		struct intel_gvt_gtt_entry *entry, unsigned long index,
517 		bool guest)
518 {
519 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
520 
521 	pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
522 			   mm->ppgtt_mm.shadow_pdps,
523 			   entry, index, false, 0, mm->vgpu);
524 }
525 
526 static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
527 		struct intel_gvt_gtt_entry *entry, unsigned long index)
528 {
529 	_ppgtt_set_root_entry(mm, entry, index, false);
530 }
531 
532 static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
533 		struct intel_gvt_gtt_entry *entry, unsigned long index)
534 {
535 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
536 
537 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
538 
539 	entry->type = GTT_TYPE_GGTT_PTE;
540 	pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
541 			   false, 0, mm->vgpu);
542 }
543 
544 static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
545 		struct intel_gvt_gtt_entry *entry, unsigned long index)
546 {
547 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
548 
549 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
550 
551 	pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
552 			   false, 0, mm->vgpu);
553 }
554 
555 static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
556 		struct intel_gvt_gtt_entry *entry, unsigned long index)
557 {
558 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
559 
560 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
561 
562 	pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
563 }
564 
565 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
566 		struct intel_gvt_gtt_entry *entry, unsigned long index)
567 {
568 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
569 	unsigned long offset = index;
570 
571 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
572 
573 	if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
574 		offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
575 		mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64;
576 	} else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
577 		offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
578 		mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64;
579 	}
580 
581 	pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
582 }
583 
584 /*
585  * PPGTT shadow page table helpers.
586  */
587 static inline int ppgtt_spt_get_entry(
588 		struct intel_vgpu_ppgtt_spt *spt,
589 		void *page_table, int type,
590 		struct intel_gvt_gtt_entry *e, unsigned long index,
591 		bool guest)
592 {
593 	struct intel_gvt *gvt = spt->vgpu->gvt;
594 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
595 	int ret;
596 
597 	e->type = get_entry_type(type);
598 
599 	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
600 		return -EINVAL;
601 
602 	ret = ops->get_entry(page_table, e, index, guest,
603 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
604 			spt->vgpu);
605 	if (ret)
606 		return ret;
607 
608 	update_entry_type_for_real(ops, e, guest ?
609 				   spt->guest_page.pde_ips : false);
610 
611 	gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
612 		    type, e->type, index, e->val64);
613 	return 0;
614 }
615 
616 static inline int ppgtt_spt_set_entry(
617 		struct intel_vgpu_ppgtt_spt *spt,
618 		void *page_table, int type,
619 		struct intel_gvt_gtt_entry *e, unsigned long index,
620 		bool guest)
621 {
622 	struct intel_gvt *gvt = spt->vgpu->gvt;
623 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
624 
625 	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
626 		return -EINVAL;
627 
628 	gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
629 		    type, e->type, index, e->val64);
630 
631 	return ops->set_entry(page_table, e, index, guest,
632 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
633 			spt->vgpu);
634 }
635 
636 #define ppgtt_get_guest_entry(spt, e, index) \
637 	ppgtt_spt_get_entry(spt, NULL, \
638 		spt->guest_page.type, e, index, true)
639 
640 #define ppgtt_set_guest_entry(spt, e, index) \
641 	ppgtt_spt_set_entry(spt, NULL, \
642 		spt->guest_page.type, e, index, true)
643 
644 #define ppgtt_get_shadow_entry(spt, e, index) \
645 	ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
646 		spt->shadow_page.type, e, index, false)
647 
648 #define ppgtt_set_shadow_entry(spt, e, index) \
649 	ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
650 		spt->shadow_page.type, e, index, false)
651 
652 static void *alloc_spt(gfp_t gfp_mask)
653 {
654 	struct intel_vgpu_ppgtt_spt *spt;
655 
656 	spt = kzalloc(sizeof(*spt), gfp_mask);
657 	if (!spt)
658 		return NULL;
659 
660 	spt->shadow_page.page = alloc_page(gfp_mask);
661 	if (!spt->shadow_page.page) {
662 		kfree(spt);
663 		return NULL;
664 	}
665 	return spt;
666 }
667 
668 static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
669 {
670 	__free_page(spt->shadow_page.page);
671 	kfree(spt);
672 }
673 
674 static int detach_oos_page(struct intel_vgpu *vgpu,
675 		struct intel_vgpu_oos_page *oos_page);
676 
677 static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
678 {
679 	struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev;
680 
681 	trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
682 
683 	dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
684 		       DMA_BIDIRECTIONAL);
685 
686 	radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
687 
688 	if (spt->guest_page.gfn) {
689 		if (spt->guest_page.oos_page)
690 			detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
691 
692 		intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
693 	}
694 
695 	list_del_init(&spt->post_shadow_list);
696 	free_spt(spt);
697 }
698 
699 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
700 {
701 	struct intel_vgpu_ppgtt_spt *spt, *spn;
702 	struct radix_tree_iter iter;
703 	LIST_HEAD(all_spt);
704 	void __rcu **slot;
705 
706 	rcu_read_lock();
707 	radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
708 		spt = radix_tree_deref_slot(slot);
709 		list_move(&spt->post_shadow_list, &all_spt);
710 	}
711 	rcu_read_unlock();
712 
713 	list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
714 		ppgtt_free_spt(spt);
715 }
716 
717 static int ppgtt_handle_guest_write_page_table_bytes(
718 		struct intel_vgpu_ppgtt_spt *spt,
719 		u64 pa, void *p_data, int bytes);
720 
721 static int ppgtt_write_protection_handler(
722 		struct intel_vgpu_page_track *page_track,
723 		u64 gpa, void *data, int bytes)
724 {
725 	struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
726 
727 	int ret;
728 
729 	if (bytes != 4 && bytes != 8)
730 		return -EINVAL;
731 
732 	ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
733 	if (ret)
734 		return ret;
735 	return ret;
736 }
737 
738 /* Find a spt by guest gfn. */
739 static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
740 		struct intel_vgpu *vgpu, unsigned long gfn)
741 {
742 	struct intel_vgpu_page_track *track;
743 
744 	track = intel_vgpu_find_page_track(vgpu, gfn);
745 	if (track && track->handler == ppgtt_write_protection_handler)
746 		return track->priv_data;
747 
748 	return NULL;
749 }
750 
751 /* Find the spt by shadow page mfn. */
752 static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
753 		struct intel_vgpu *vgpu, unsigned long mfn)
754 {
755 	return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
756 }
757 
758 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
759 
760 /* Allocate shadow page table without guest page. */
761 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
762 		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
763 {
764 	struct device *kdev = vgpu->gvt->gt->i915->drm.dev;
765 	struct intel_vgpu_ppgtt_spt *spt = NULL;
766 	dma_addr_t daddr;
767 	int ret;
768 
769 retry:
770 	spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
771 	if (!spt) {
772 		if (reclaim_one_ppgtt_mm(vgpu->gvt))
773 			goto retry;
774 
775 		gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
776 		return ERR_PTR(-ENOMEM);
777 	}
778 
779 	spt->vgpu = vgpu;
780 	atomic_set(&spt->refcount, 1);
781 	INIT_LIST_HEAD(&spt->post_shadow_list);
782 
783 	/*
784 	 * Init shadow_page.
785 	 */
786 	spt->shadow_page.type = type;
787 	daddr = dma_map_page(kdev, spt->shadow_page.page,
788 			     0, 4096, DMA_BIDIRECTIONAL);
789 	if (dma_mapping_error(kdev, daddr)) {
790 		gvt_vgpu_err("fail to map dma addr\n");
791 		ret = -EINVAL;
792 		goto err_free_spt;
793 	}
794 	spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
795 	spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
796 
797 	ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
798 	if (ret)
799 		goto err_unmap_dma;
800 
801 	return spt;
802 
803 err_unmap_dma:
804 	dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL);
805 err_free_spt:
806 	free_spt(spt);
807 	return ERR_PTR(ret);
808 }
809 
810 /* Allocate shadow page table associated with specific gfn. */
811 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
812 		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
813 		unsigned long gfn, bool guest_pde_ips)
814 {
815 	struct intel_vgpu_ppgtt_spt *spt;
816 	int ret;
817 
818 	spt = ppgtt_alloc_spt(vgpu, type);
819 	if (IS_ERR(spt))
820 		return spt;
821 
822 	/*
823 	 * Init guest_page.
824 	 */
825 	ret = intel_vgpu_register_page_track(vgpu, gfn,
826 			ppgtt_write_protection_handler, spt);
827 	if (ret) {
828 		ppgtt_free_spt(spt);
829 		return ERR_PTR(ret);
830 	}
831 
832 	spt->guest_page.type = type;
833 	spt->guest_page.gfn = gfn;
834 	spt->guest_page.pde_ips = guest_pde_ips;
835 
836 	trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
837 
838 	return spt;
839 }
840 
841 #define pt_entry_size_shift(spt) \
842 	((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
843 
844 #define pt_entries(spt) \
845 	(I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
846 
847 #define for_each_present_guest_entry(spt, e, i) \
848 	for (i = 0; i < pt_entries(spt); \
849 	     i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
850 		if (!ppgtt_get_guest_entry(spt, e, i) && \
851 		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
852 
853 #define for_each_present_shadow_entry(spt, e, i) \
854 	for (i = 0; i < pt_entries(spt); \
855 	     i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
856 		if (!ppgtt_get_shadow_entry(spt, e, i) && \
857 		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
858 
859 #define for_each_shadow_entry(spt, e, i) \
860 	for (i = 0; i < pt_entries(spt); \
861 	     i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
862 		if (!ppgtt_get_shadow_entry(spt, e, i))
863 
864 static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
865 {
866 	int v = atomic_read(&spt->refcount);
867 
868 	trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
869 	atomic_inc(&spt->refcount);
870 }
871 
872 static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
873 {
874 	int v = atomic_read(&spt->refcount);
875 
876 	trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
877 	return atomic_dec_return(&spt->refcount);
878 }
879 
880 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
881 
882 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
883 		struct intel_gvt_gtt_entry *e)
884 {
885 	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
886 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
887 	struct intel_vgpu_ppgtt_spt *s;
888 	enum intel_gvt_gtt_type cur_pt_type;
889 
890 	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
891 
892 	if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
893 		&& e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
894 		cur_pt_type = get_next_pt_type(e->type);
895 
896 		if (!gtt_type_is_pt(cur_pt_type) ||
897 				!gtt_type_is_pt(cur_pt_type + 1)) {
898 			drm_WARN(&i915->drm, 1,
899 				 "Invalid page table type, cur_pt_type is: %d\n",
900 				 cur_pt_type);
901 			return -EINVAL;
902 		}
903 
904 		cur_pt_type += 1;
905 
906 		if (ops->get_pfn(e) ==
907 			vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
908 			return 0;
909 	}
910 	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
911 	if (!s) {
912 		gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
913 				ops->get_pfn(e));
914 		return -ENXIO;
915 	}
916 	return ppgtt_invalidate_spt(s);
917 }
918 
919 static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
920 		struct intel_gvt_gtt_entry *entry)
921 {
922 	struct intel_vgpu *vgpu = spt->vgpu;
923 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
924 	unsigned long pfn;
925 	int type;
926 
927 	pfn = ops->get_pfn(entry);
928 	type = spt->shadow_page.type;
929 
930 	/* Uninitialized spte or unshadowed spte. */
931 	if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
932 		return;
933 
934 	intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
935 }
936 
937 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
938 {
939 	struct intel_vgpu *vgpu = spt->vgpu;
940 	struct intel_gvt_gtt_entry e;
941 	unsigned long index;
942 	int ret;
943 
944 	trace_spt_change(spt->vgpu->id, "die", spt,
945 			spt->guest_page.gfn, spt->shadow_page.type);
946 
947 	if (ppgtt_put_spt(spt) > 0)
948 		return 0;
949 
950 	for_each_present_shadow_entry(spt, &e, index) {
951 		switch (e.type) {
952 		case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
953 			gvt_vdbg_mm("invalidate 4K entry\n");
954 			ppgtt_invalidate_pte(spt, &e);
955 			break;
956 		case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
957 			/* We don't setup 64K shadow entry so far. */
958 			WARN(1, "suspicious 64K gtt entry\n");
959 			continue;
960 		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
961 			gvt_vdbg_mm("invalidate 2M entry\n");
962 			continue;
963 		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
964 			WARN(1, "GVT doesn't support 1GB page\n");
965 			continue;
966 		case GTT_TYPE_PPGTT_PML4_ENTRY:
967 		case GTT_TYPE_PPGTT_PDP_ENTRY:
968 		case GTT_TYPE_PPGTT_PDE_ENTRY:
969 			gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
970 			ret = ppgtt_invalidate_spt_by_shadow_entry(
971 					spt->vgpu, &e);
972 			if (ret)
973 				goto fail;
974 			break;
975 		default:
976 			GEM_BUG_ON(1);
977 		}
978 	}
979 
980 	trace_spt_change(spt->vgpu->id, "release", spt,
981 			 spt->guest_page.gfn, spt->shadow_page.type);
982 	ppgtt_free_spt(spt);
983 	return 0;
984 fail:
985 	gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
986 			spt, e.val64, e.type);
987 	return ret;
988 }
989 
990 static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
991 {
992 	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
993 
994 	if (GRAPHICS_VER(dev_priv) == 9) {
995 		u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
996 			GAMW_ECO_ENABLE_64K_IPS_FIELD;
997 
998 		return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
999 	} else if (GRAPHICS_VER(dev_priv) >= 11) {
1000 		/* 64K paging only controlled by IPS bit in PTE now. */
1001 		return true;
1002 	} else
1003 		return false;
1004 }
1005 
1006 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
1007 
1008 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
1009 		struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
1010 {
1011 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1012 	struct intel_vgpu_ppgtt_spt *spt = NULL;
1013 	bool ips = false;
1014 	int ret;
1015 
1016 	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
1017 
1018 	if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1019 		ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
1020 
1021 	spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
1022 	if (spt) {
1023 		ppgtt_get_spt(spt);
1024 
1025 		if (ips != spt->guest_page.pde_ips) {
1026 			spt->guest_page.pde_ips = ips;
1027 
1028 			gvt_dbg_mm("reshadow PDE since ips changed\n");
1029 			clear_page(spt->shadow_page.vaddr);
1030 			ret = ppgtt_populate_spt(spt);
1031 			if (ret) {
1032 				ppgtt_put_spt(spt);
1033 				goto err;
1034 			}
1035 		}
1036 	} else {
1037 		int type = get_next_pt_type(we->type);
1038 
1039 		if (!gtt_type_is_pt(type)) {
1040 			ret = -EINVAL;
1041 			goto err;
1042 		}
1043 
1044 		spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
1045 		if (IS_ERR(spt)) {
1046 			ret = PTR_ERR(spt);
1047 			goto err;
1048 		}
1049 
1050 		ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
1051 		if (ret)
1052 			goto err_free_spt;
1053 
1054 		ret = ppgtt_populate_spt(spt);
1055 		if (ret)
1056 			goto err_free_spt;
1057 
1058 		trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
1059 				 spt->shadow_page.type);
1060 	}
1061 	return spt;
1062 
1063 err_free_spt:
1064 	ppgtt_free_spt(spt);
1065 	spt = NULL;
1066 err:
1067 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1068 		     spt, we->val64, we->type);
1069 	return ERR_PTR(ret);
1070 }
1071 
1072 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
1073 		struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
1074 {
1075 	const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
1076 
1077 	se->type = ge->type;
1078 	se->val64 = ge->val64;
1079 
1080 	/* Because we always split 64KB pages, so clear IPS in shadow PDE. */
1081 	if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1082 		ops->clear_ips(se);
1083 
1084 	ops->set_pfn(se, s->shadow_page.mfn);
1085 }
1086 
1087 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
1088 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1089 	struct intel_gvt_gtt_entry *se)
1090 {
1091 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1092 	struct intel_vgpu_ppgtt_spt *sub_spt;
1093 	struct intel_gvt_gtt_entry sub_se;
1094 	unsigned long start_gfn;
1095 	dma_addr_t dma_addr;
1096 	unsigned long sub_index;
1097 	int ret;
1098 
1099 	gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
1100 
1101 	start_gfn = ops->get_pfn(se);
1102 
1103 	sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
1104 	if (IS_ERR(sub_spt))
1105 		return PTR_ERR(sub_spt);
1106 
1107 	for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
1108 		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index,
1109 						   PAGE_SIZE, &dma_addr);
1110 		if (ret)
1111 			goto err;
1112 		sub_se.val64 = se->val64;
1113 
1114 		/* Copy the PAT field from PDE. */
1115 		sub_se.val64 &= ~_PAGE_PAT;
1116 		sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
1117 
1118 		ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
1119 		ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
1120 	}
1121 
1122 	/* Clear dirty field. */
1123 	se->val64 &= ~_PAGE_DIRTY;
1124 
1125 	ops->clear_pse(se);
1126 	ops->clear_ips(se);
1127 	ops->set_pfn(se, sub_spt->shadow_page.mfn);
1128 	ppgtt_set_shadow_entry(spt, se, index);
1129 	return 0;
1130 err:
1131 	/* Cancel the existing address mappings of DMA addr. */
1132 	for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) {
1133 		gvt_vdbg_mm("invalidate 4K entry\n");
1134 		ppgtt_invalidate_pte(sub_spt, &sub_se);
1135 	}
1136 	/* Release the new allocated spt. */
1137 	trace_spt_change(sub_spt->vgpu->id, "release", sub_spt,
1138 		sub_spt->guest_page.gfn, sub_spt->shadow_page.type);
1139 	ppgtt_free_spt(sub_spt);
1140 	return ret;
1141 }
1142 
1143 static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
1144 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1145 	struct intel_gvt_gtt_entry *se)
1146 {
1147 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1148 	struct intel_gvt_gtt_entry entry = *se;
1149 	unsigned long start_gfn;
1150 	dma_addr_t dma_addr;
1151 	int i, ret;
1152 
1153 	gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
1154 
1155 	GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
1156 
1157 	start_gfn = ops->get_pfn(se);
1158 
1159 	entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
1160 	ops->set_64k_splited(&entry);
1161 
1162 	for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1163 		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + i,
1164 						   PAGE_SIZE, &dma_addr);
1165 		if (ret)
1166 			return ret;
1167 
1168 		ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
1169 		ppgtt_set_shadow_entry(spt, &entry, index + i);
1170 	}
1171 	return 0;
1172 }
1173 
1174 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
1175 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1176 	struct intel_gvt_gtt_entry *ge)
1177 {
1178 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
1179 	struct intel_gvt_gtt_entry se = *ge;
1180 	unsigned long gfn;
1181 	dma_addr_t dma_addr;
1182 	int ret;
1183 
1184 	if (!pte_ops->test_present(ge))
1185 		return 0;
1186 
1187 	gfn = pte_ops->get_pfn(ge);
1188 
1189 	switch (ge->type) {
1190 	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1191 		gvt_vdbg_mm("shadow 4K gtt entry\n");
1192 		ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
1193 		if (ret)
1194 			return -ENXIO;
1195 		break;
1196 	case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1197 		gvt_vdbg_mm("shadow 64K gtt entry\n");
1198 		/*
1199 		 * The layout of 64K page is special, the page size is
1200 		 * controlled by upper PDE. To be simple, we always split
1201 		 * 64K page to smaller 4K pages in shadow PT.
1202 		 */
1203 		return split_64KB_gtt_entry(vgpu, spt, index, &se);
1204 	case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1205 		gvt_vdbg_mm("shadow 2M gtt entry\n");
1206 		if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
1207 		    intel_gvt_dma_map_guest_page(vgpu, gfn,
1208 						 I915_GTT_PAGE_SIZE_2M, &dma_addr))
1209 			return split_2MB_gtt_entry(vgpu, spt, index, &se);
1210 		break;
1211 	case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1212 		gvt_vgpu_err("GVT doesn't support 1GB entry\n");
1213 		return -EINVAL;
1214 	default:
1215 		GEM_BUG_ON(1);
1216 		return -EINVAL;
1217 	}
1218 
1219 	/* Successfully shadowed a 4K or 2M page (without splitting). */
1220 	pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
1221 	ppgtt_set_shadow_entry(spt, &se, index);
1222 	return 0;
1223 }
1224 
1225 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
1226 {
1227 	struct intel_vgpu *vgpu = spt->vgpu;
1228 	struct intel_vgpu_ppgtt_spt *s;
1229 	struct intel_gvt_gtt_entry se, ge;
1230 	unsigned long i;
1231 	int ret;
1232 
1233 	trace_spt_change(spt->vgpu->id, "born", spt,
1234 			 spt->guest_page.gfn, spt->shadow_page.type);
1235 
1236 	for_each_present_guest_entry(spt, &ge, i) {
1237 		if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
1238 			s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1239 			if (IS_ERR(s)) {
1240 				ret = PTR_ERR(s);
1241 				goto fail;
1242 			}
1243 			ppgtt_get_shadow_entry(spt, &se, i);
1244 			ppgtt_generate_shadow_entry(&se, s, &ge);
1245 			ppgtt_set_shadow_entry(spt, &se, i);
1246 		} else {
1247 			ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
1248 			if (ret)
1249 				goto fail;
1250 		}
1251 	}
1252 	return 0;
1253 fail:
1254 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1255 			spt, ge.val64, ge.type);
1256 	return ret;
1257 }
1258 
1259 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
1260 		struct intel_gvt_gtt_entry *se, unsigned long index)
1261 {
1262 	struct intel_vgpu *vgpu = spt->vgpu;
1263 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1264 	int ret;
1265 
1266 	trace_spt_guest_change(spt->vgpu->id, "remove", spt,
1267 			       spt->shadow_page.type, se->val64, index);
1268 
1269 	gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
1270 		    se->type, index, se->val64);
1271 
1272 	if (!ops->test_present(se))
1273 		return 0;
1274 
1275 	if (ops->get_pfn(se) ==
1276 	    vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
1277 		return 0;
1278 
1279 	if (gtt_type_is_pt(get_next_pt_type(se->type))) {
1280 		struct intel_vgpu_ppgtt_spt *s =
1281 			intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
1282 		if (!s) {
1283 			gvt_vgpu_err("fail to find guest page\n");
1284 			ret = -ENXIO;
1285 			goto fail;
1286 		}
1287 		ret = ppgtt_invalidate_spt(s);
1288 		if (ret)
1289 			goto fail;
1290 	} else {
1291 		/* We don't setup 64K shadow entry so far. */
1292 		WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
1293 		     "suspicious 64K entry\n");
1294 		ppgtt_invalidate_pte(spt, se);
1295 	}
1296 
1297 	return 0;
1298 fail:
1299 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1300 			spt, se->val64, se->type);
1301 	return ret;
1302 }
1303 
1304 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
1305 		struct intel_gvt_gtt_entry *we, unsigned long index)
1306 {
1307 	struct intel_vgpu *vgpu = spt->vgpu;
1308 	struct intel_gvt_gtt_entry m;
1309 	struct intel_vgpu_ppgtt_spt *s;
1310 	int ret;
1311 
1312 	trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
1313 			       we->val64, index);
1314 
1315 	gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
1316 		    we->type, index, we->val64);
1317 
1318 	if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1319 		s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
1320 		if (IS_ERR(s)) {
1321 			ret = PTR_ERR(s);
1322 			goto fail;
1323 		}
1324 		ppgtt_get_shadow_entry(spt, &m, index);
1325 		ppgtt_generate_shadow_entry(&m, s, we);
1326 		ppgtt_set_shadow_entry(spt, &m, index);
1327 	} else {
1328 		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
1329 		if (ret)
1330 			goto fail;
1331 	}
1332 	return 0;
1333 fail:
1334 	gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
1335 		spt, we->val64, we->type);
1336 	return ret;
1337 }
1338 
1339 static int sync_oos_page(struct intel_vgpu *vgpu,
1340 		struct intel_vgpu_oos_page *oos_page)
1341 {
1342 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1343 	struct intel_gvt *gvt = vgpu->gvt;
1344 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1345 	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1346 	struct intel_gvt_gtt_entry old, new;
1347 	int index;
1348 	int ret;
1349 
1350 	trace_oos_change(vgpu->id, "sync", oos_page->id,
1351 			 spt, spt->guest_page.type);
1352 
1353 	old.type = new.type = get_entry_type(spt->guest_page.type);
1354 	old.val64 = new.val64 = 0;
1355 
1356 	for (index = 0; index < (I915_GTT_PAGE_SIZE >>
1357 				info->gtt_entry_size_shift); index++) {
1358 		ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1359 		ops->get_entry(NULL, &new, index, true,
1360 			       spt->guest_page.gfn << PAGE_SHIFT, vgpu);
1361 
1362 		if (old.val64 == new.val64
1363 			&& !test_and_clear_bit(index, spt->post_shadow_bitmap))
1364 			continue;
1365 
1366 		trace_oos_sync(vgpu->id, oos_page->id,
1367 				spt, spt->guest_page.type,
1368 				new.val64, index);
1369 
1370 		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
1371 		if (ret)
1372 			return ret;
1373 
1374 		ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
1375 	}
1376 
1377 	spt->guest_page.write_cnt = 0;
1378 	list_del_init(&spt->post_shadow_list);
1379 	return 0;
1380 }
1381 
1382 static int detach_oos_page(struct intel_vgpu *vgpu,
1383 		struct intel_vgpu_oos_page *oos_page)
1384 {
1385 	struct intel_gvt *gvt = vgpu->gvt;
1386 	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1387 
1388 	trace_oos_change(vgpu->id, "detach", oos_page->id,
1389 			 spt, spt->guest_page.type);
1390 
1391 	spt->guest_page.write_cnt = 0;
1392 	spt->guest_page.oos_page = NULL;
1393 	oos_page->spt = NULL;
1394 
1395 	list_del_init(&oos_page->vm_list);
1396 	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1397 
1398 	return 0;
1399 }
1400 
1401 static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
1402 		struct intel_vgpu_ppgtt_spt *spt)
1403 {
1404 	struct intel_gvt *gvt = spt->vgpu->gvt;
1405 	int ret;
1406 
1407 	ret = intel_gvt_read_gpa(spt->vgpu,
1408 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
1409 			oos_page->mem, I915_GTT_PAGE_SIZE);
1410 	if (ret)
1411 		return ret;
1412 
1413 	oos_page->spt = spt;
1414 	spt->guest_page.oos_page = oos_page;
1415 
1416 	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1417 
1418 	trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
1419 			 spt, spt->guest_page.type);
1420 	return 0;
1421 }
1422 
1423 static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
1424 {
1425 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1426 	int ret;
1427 
1428 	ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
1429 	if (ret)
1430 		return ret;
1431 
1432 	trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
1433 			 spt, spt->guest_page.type);
1434 
1435 	list_del_init(&oos_page->vm_list);
1436 	return sync_oos_page(spt->vgpu, oos_page);
1437 }
1438 
1439 static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
1440 {
1441 	struct intel_gvt *gvt = spt->vgpu->gvt;
1442 	struct intel_gvt_gtt *gtt = &gvt->gtt;
1443 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1444 	int ret;
1445 
1446 	WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1447 
1448 	if (list_empty(&gtt->oos_page_free_list_head)) {
1449 		oos_page = container_of(gtt->oos_page_use_list_head.next,
1450 			struct intel_vgpu_oos_page, list);
1451 		ret = ppgtt_set_guest_page_sync(oos_page->spt);
1452 		if (ret)
1453 			return ret;
1454 		ret = detach_oos_page(spt->vgpu, oos_page);
1455 		if (ret)
1456 			return ret;
1457 	} else
1458 		oos_page = container_of(gtt->oos_page_free_list_head.next,
1459 			struct intel_vgpu_oos_page, list);
1460 	return attach_oos_page(oos_page, spt);
1461 }
1462 
1463 static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
1464 {
1465 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1466 
1467 	if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1468 		return -EINVAL;
1469 
1470 	trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
1471 			 spt, spt->guest_page.type);
1472 
1473 	list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
1474 	return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
1475 }
1476 
1477 /**
1478  * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1479  * @vgpu: a vGPU
1480  *
1481  * This function is called before submitting a guest workload to host,
1482  * to sync all the out-of-synced shadow for vGPU
1483  *
1484  * Returns:
1485  * Zero on success, negative error code if failed.
1486  */
1487 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1488 {
1489 	struct list_head *pos, *n;
1490 	struct intel_vgpu_oos_page *oos_page;
1491 	int ret;
1492 
1493 	if (!enable_out_of_sync)
1494 		return 0;
1495 
1496 	list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1497 		oos_page = container_of(pos,
1498 				struct intel_vgpu_oos_page, vm_list);
1499 		ret = ppgtt_set_guest_page_sync(oos_page->spt);
1500 		if (ret)
1501 			return ret;
1502 	}
1503 	return 0;
1504 }
1505 
1506 /*
1507  * The heart of PPGTT shadow page table.
1508  */
1509 static int ppgtt_handle_guest_write_page_table(
1510 		struct intel_vgpu_ppgtt_spt *spt,
1511 		struct intel_gvt_gtt_entry *we, unsigned long index)
1512 {
1513 	struct intel_vgpu *vgpu = spt->vgpu;
1514 	int type = spt->shadow_page.type;
1515 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1516 	struct intel_gvt_gtt_entry old_se;
1517 	int new_present;
1518 	int i, ret;
1519 
1520 	new_present = ops->test_present(we);
1521 
1522 	/*
1523 	 * Adding the new entry first and then removing the old one, that can
1524 	 * guarantee the ppgtt table is validated during the window between
1525 	 * adding and removal.
1526 	 */
1527 	ppgtt_get_shadow_entry(spt, &old_se, index);
1528 
1529 	if (new_present) {
1530 		ret = ppgtt_handle_guest_entry_add(spt, we, index);
1531 		if (ret)
1532 			goto fail;
1533 	}
1534 
1535 	ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
1536 	if (ret)
1537 		goto fail;
1538 
1539 	if (!new_present) {
1540 		/* For 64KB splited entries, we need clear them all. */
1541 		if (ops->test_64k_splited(&old_se) &&
1542 		    !(index % GTT_64K_PTE_STRIDE)) {
1543 			gvt_vdbg_mm("remove splited 64K shadow entries\n");
1544 			for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1545 				ops->clear_64k_splited(&old_se);
1546 				ops->set_pfn(&old_se,
1547 					vgpu->gtt.scratch_pt[type].page_mfn);
1548 				ppgtt_set_shadow_entry(spt, &old_se, index + i);
1549 			}
1550 		} else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
1551 			   old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
1552 			ops->clear_pse(&old_se);
1553 			ops->set_pfn(&old_se,
1554 				     vgpu->gtt.scratch_pt[type].page_mfn);
1555 			ppgtt_set_shadow_entry(spt, &old_se, index);
1556 		} else {
1557 			ops->set_pfn(&old_se,
1558 				     vgpu->gtt.scratch_pt[type].page_mfn);
1559 			ppgtt_set_shadow_entry(spt, &old_se, index);
1560 		}
1561 	}
1562 
1563 	return 0;
1564 fail:
1565 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1566 			spt, we->val64, we->type);
1567 	return ret;
1568 }
1569 
1570 
1571 
1572 static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
1573 {
1574 	return enable_out_of_sync
1575 		&& gtt_type_is_pte_pt(spt->guest_page.type)
1576 		&& spt->guest_page.write_cnt >= 2;
1577 }
1578 
1579 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1580 		unsigned long index)
1581 {
1582 	set_bit(index, spt->post_shadow_bitmap);
1583 	if (!list_empty(&spt->post_shadow_list))
1584 		return;
1585 
1586 	list_add_tail(&spt->post_shadow_list,
1587 			&spt->vgpu->gtt.post_shadow_list_head);
1588 }
1589 
1590 /**
1591  * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1592  * @vgpu: a vGPU
1593  *
1594  * This function is called before submitting a guest workload to host,
1595  * to flush all the post shadows for a vGPU.
1596  *
1597  * Returns:
1598  * Zero on success, negative error code if failed.
1599  */
1600 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1601 {
1602 	struct list_head *pos, *n;
1603 	struct intel_vgpu_ppgtt_spt *spt;
1604 	struct intel_gvt_gtt_entry ge;
1605 	unsigned long index;
1606 	int ret;
1607 
1608 	list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1609 		spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1610 				post_shadow_list);
1611 
1612 		for_each_set_bit(index, spt->post_shadow_bitmap,
1613 				GTT_ENTRY_NUM_IN_ONE_PAGE) {
1614 			ppgtt_get_guest_entry(spt, &ge, index);
1615 
1616 			ret = ppgtt_handle_guest_write_page_table(spt,
1617 							&ge, index);
1618 			if (ret)
1619 				return ret;
1620 			clear_bit(index, spt->post_shadow_bitmap);
1621 		}
1622 		list_del_init(&spt->post_shadow_list);
1623 	}
1624 	return 0;
1625 }
1626 
1627 static int ppgtt_handle_guest_write_page_table_bytes(
1628 		struct intel_vgpu_ppgtt_spt *spt,
1629 		u64 pa, void *p_data, int bytes)
1630 {
1631 	struct intel_vgpu *vgpu = spt->vgpu;
1632 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1633 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1634 	struct intel_gvt_gtt_entry we, se;
1635 	unsigned long index;
1636 	int ret;
1637 
1638 	index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1639 
1640 	ppgtt_get_guest_entry(spt, &we, index);
1641 
1642 	/*
1643 	 * For page table which has 64K gtt entry, only PTE#0, PTE#16,
1644 	 * PTE#32, ... PTE#496 are used. Unused PTEs update should be
1645 	 * ignored.
1646 	 */
1647 	if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
1648 	    (index % GTT_64K_PTE_STRIDE)) {
1649 		gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
1650 			    index);
1651 		return 0;
1652 	}
1653 
1654 	if (bytes == info->gtt_entry_size) {
1655 		ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
1656 		if (ret)
1657 			return ret;
1658 	} else {
1659 		if (!test_bit(index, spt->post_shadow_bitmap)) {
1660 			int type = spt->shadow_page.type;
1661 
1662 			ppgtt_get_shadow_entry(spt, &se, index);
1663 			ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
1664 			if (ret)
1665 				return ret;
1666 			ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1667 			ppgtt_set_shadow_entry(spt, &se, index);
1668 		}
1669 		ppgtt_set_post_shadow(spt, index);
1670 	}
1671 
1672 	if (!enable_out_of_sync)
1673 		return 0;
1674 
1675 	spt->guest_page.write_cnt++;
1676 
1677 	if (spt->guest_page.oos_page)
1678 		ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
1679 				false, 0, vgpu);
1680 
1681 	if (can_do_out_of_sync(spt)) {
1682 		if (!spt->guest_page.oos_page)
1683 			ppgtt_allocate_oos_page(spt);
1684 
1685 		ret = ppgtt_set_guest_page_oos(spt);
1686 		if (ret < 0)
1687 			return ret;
1688 	}
1689 	return 0;
1690 }
1691 
1692 static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
1693 {
1694 	struct intel_vgpu *vgpu = mm->vgpu;
1695 	struct intel_gvt *gvt = vgpu->gvt;
1696 	struct intel_gvt_gtt *gtt = &gvt->gtt;
1697 	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1698 	struct intel_gvt_gtt_entry se;
1699 	int index;
1700 
1701 	if (!mm->ppgtt_mm.shadowed)
1702 		return;
1703 
1704 	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1705 		ppgtt_get_shadow_root_entry(mm, &se, index);
1706 
1707 		if (!ops->test_present(&se))
1708 			continue;
1709 
1710 		ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
1711 		se.val64 = 0;
1712 		ppgtt_set_shadow_root_entry(mm, &se, index);
1713 
1714 		trace_spt_guest_change(vgpu->id, "destroy root pointer",
1715 				       NULL, se.type, se.val64, index);
1716 	}
1717 
1718 	mm->ppgtt_mm.shadowed = false;
1719 }
1720 
1721 
1722 static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
1723 {
1724 	struct intel_vgpu *vgpu = mm->vgpu;
1725 	struct intel_gvt *gvt = vgpu->gvt;
1726 	struct intel_gvt_gtt *gtt = &gvt->gtt;
1727 	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1728 	struct intel_vgpu_ppgtt_spt *spt;
1729 	struct intel_gvt_gtt_entry ge, se;
1730 	int index, ret;
1731 
1732 	if (mm->ppgtt_mm.shadowed)
1733 		return 0;
1734 
1735 	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1736 		return -EINVAL;
1737 
1738 	mm->ppgtt_mm.shadowed = true;
1739 
1740 	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1741 		ppgtt_get_guest_root_entry(mm, &ge, index);
1742 
1743 		if (!ops->test_present(&ge))
1744 			continue;
1745 
1746 		trace_spt_guest_change(vgpu->id, __func__, NULL,
1747 				       ge.type, ge.val64, index);
1748 
1749 		spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1750 		if (IS_ERR(spt)) {
1751 			gvt_vgpu_err("fail to populate guest root pointer\n");
1752 			ret = PTR_ERR(spt);
1753 			goto fail;
1754 		}
1755 		ppgtt_generate_shadow_entry(&se, spt, &ge);
1756 		ppgtt_set_shadow_root_entry(mm, &se, index);
1757 
1758 		trace_spt_guest_change(vgpu->id, "populate root pointer",
1759 				       NULL, se.type, se.val64, index);
1760 	}
1761 
1762 	return 0;
1763 fail:
1764 	invalidate_ppgtt_mm(mm);
1765 	return ret;
1766 }
1767 
1768 static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1769 {
1770 	struct intel_vgpu_mm *mm;
1771 
1772 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1773 	if (!mm)
1774 		return NULL;
1775 
1776 	mm->vgpu = vgpu;
1777 	kref_init(&mm->ref);
1778 	atomic_set(&mm->pincount, 0);
1779 
1780 	return mm;
1781 }
1782 
1783 static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1784 {
1785 	kfree(mm);
1786 }
1787 
1788 /**
1789  * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
1790  * @vgpu: a vGPU
1791  * @root_entry_type: ppgtt root entry type
1792  * @pdps: guest pdps.
1793  *
1794  * This function is used to create a ppgtt mm object for a vGPU.
1795  *
1796  * Returns:
1797  * Zero on success, negative error code in pointer if failed.
1798  */
1799 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
1800 		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
1801 {
1802 	struct intel_gvt *gvt = vgpu->gvt;
1803 	struct intel_vgpu_mm *mm;
1804 	int ret;
1805 
1806 	mm = vgpu_alloc_mm(vgpu);
1807 	if (!mm)
1808 		return ERR_PTR(-ENOMEM);
1809 
1810 	mm->type = INTEL_GVT_MM_PPGTT;
1811 
1812 	GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1813 		   root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1814 	mm->ppgtt_mm.root_entry_type = root_entry_type;
1815 
1816 	INIT_LIST_HEAD(&mm->ppgtt_mm.list);
1817 	INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
1818 	INIT_LIST_HEAD(&mm->ppgtt_mm.link);
1819 
1820 	if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1821 		mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1822 	else
1823 		memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1824 		       sizeof(mm->ppgtt_mm.guest_pdps));
1825 
1826 	ret = shadow_ppgtt_mm(mm);
1827 	if (ret) {
1828 		gvt_vgpu_err("failed to shadow ppgtt mm\n");
1829 		vgpu_free_mm(mm);
1830 		return ERR_PTR(ret);
1831 	}
1832 
1833 	list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
1834 
1835 	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1836 	list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
1837 	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1838 
1839 	return mm;
1840 }
1841 
1842 static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1843 {
1844 	struct intel_vgpu_mm *mm;
1845 	unsigned long nr_entries;
1846 
1847 	mm = vgpu_alloc_mm(vgpu);
1848 	if (!mm)
1849 		return ERR_PTR(-ENOMEM);
1850 
1851 	mm->type = INTEL_GVT_MM_GGTT;
1852 
1853 	nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1854 	mm->ggtt_mm.virtual_ggtt =
1855 		vzalloc(array_size(nr_entries,
1856 				   vgpu->gvt->device_info.gtt_entry_size));
1857 	if (!mm->ggtt_mm.virtual_ggtt) {
1858 		vgpu_free_mm(mm);
1859 		return ERR_PTR(-ENOMEM);
1860 	}
1861 
1862 	mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1863 	if (!mm->ggtt_mm.host_ggtt_aperture) {
1864 		vfree(mm->ggtt_mm.virtual_ggtt);
1865 		vgpu_free_mm(mm);
1866 		return ERR_PTR(-ENOMEM);
1867 	}
1868 
1869 	mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1870 	if (!mm->ggtt_mm.host_ggtt_hidden) {
1871 		vfree(mm->ggtt_mm.host_ggtt_aperture);
1872 		vfree(mm->ggtt_mm.virtual_ggtt);
1873 		vgpu_free_mm(mm);
1874 		return ERR_PTR(-ENOMEM);
1875 	}
1876 
1877 	return mm;
1878 }
1879 
1880 /**
1881  * _intel_vgpu_mm_release - destroy a mm object
1882  * @mm_ref: a kref object
1883  *
1884  * This function is used to destroy a mm object for vGPU
1885  *
1886  */
1887 void _intel_vgpu_mm_release(struct kref *mm_ref)
1888 {
1889 	struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1890 
1891 	if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1892 		gvt_err("vgpu mm pin count bug detected\n");
1893 
1894 	if (mm->type == INTEL_GVT_MM_PPGTT) {
1895 		list_del(&mm->ppgtt_mm.list);
1896 
1897 		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1898 		list_del(&mm->ppgtt_mm.lru_list);
1899 		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1900 
1901 		invalidate_ppgtt_mm(mm);
1902 	} else {
1903 		vfree(mm->ggtt_mm.virtual_ggtt);
1904 		vfree(mm->ggtt_mm.host_ggtt_aperture);
1905 		vfree(mm->ggtt_mm.host_ggtt_hidden);
1906 	}
1907 
1908 	vgpu_free_mm(mm);
1909 }
1910 
1911 /**
1912  * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
1913  * @mm: a vGPU mm object
1914  *
1915  * This function is called when user doesn't want to use a vGPU mm object
1916  */
1917 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
1918 {
1919 	atomic_dec_if_positive(&mm->pincount);
1920 }
1921 
1922 /**
1923  * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1924  * @mm: target vgpu mm
1925  *
1926  * This function is called when user wants to use a vGPU mm object. If this
1927  * mm object hasn't been shadowed yet, the shadow will be populated at this
1928  * time.
1929  *
1930  * Returns:
1931  * Zero on success, negative error code if failed.
1932  */
1933 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
1934 {
1935 	int ret;
1936 
1937 	atomic_inc(&mm->pincount);
1938 
1939 	if (mm->type == INTEL_GVT_MM_PPGTT) {
1940 		ret = shadow_ppgtt_mm(mm);
1941 		if (ret)
1942 			return ret;
1943 
1944 		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1945 		list_move_tail(&mm->ppgtt_mm.lru_list,
1946 			       &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
1947 		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1948 	}
1949 
1950 	return 0;
1951 }
1952 
1953 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
1954 {
1955 	struct intel_vgpu_mm *mm;
1956 	struct list_head *pos, *n;
1957 
1958 	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1959 
1960 	list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
1961 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
1962 
1963 		if (atomic_read(&mm->pincount))
1964 			continue;
1965 
1966 		list_del_init(&mm->ppgtt_mm.lru_list);
1967 		mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1968 		invalidate_ppgtt_mm(mm);
1969 		return 1;
1970 	}
1971 	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1972 	return 0;
1973 }
1974 
1975 /*
1976  * GMA translation APIs.
1977  */
1978 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
1979 		struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
1980 {
1981 	struct intel_vgpu *vgpu = mm->vgpu;
1982 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1983 	struct intel_vgpu_ppgtt_spt *s;
1984 
1985 	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
1986 	if (!s)
1987 		return -ENXIO;
1988 
1989 	if (!guest)
1990 		ppgtt_get_shadow_entry(s, e, index);
1991 	else
1992 		ppgtt_get_guest_entry(s, e, index);
1993 	return 0;
1994 }
1995 
1996 /**
1997  * intel_vgpu_gma_to_gpa - translate a gma to GPA
1998  * @mm: mm object. could be a PPGTT or GGTT mm object
1999  * @gma: graphics memory address in this mm object
2000  *
2001  * This function is used to translate a graphics memory address in specific
2002  * graphics memory space to guest physical address.
2003  *
2004  * Returns:
2005  * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
2006  */
2007 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
2008 {
2009 	struct intel_vgpu *vgpu = mm->vgpu;
2010 	struct intel_gvt *gvt = vgpu->gvt;
2011 	const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
2012 	const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
2013 	unsigned long gpa = INTEL_GVT_INVALID_ADDR;
2014 	unsigned long gma_index[4];
2015 	struct intel_gvt_gtt_entry e;
2016 	int i, levels = 0;
2017 	int ret;
2018 
2019 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
2020 		   mm->type != INTEL_GVT_MM_PPGTT);
2021 
2022 	if (mm->type == INTEL_GVT_MM_GGTT) {
2023 		if (!vgpu_gmadr_is_valid(vgpu, gma))
2024 			goto err;
2025 
2026 		ggtt_get_guest_entry(mm, &e,
2027 			gma_ops->gma_to_ggtt_pte_index(gma));
2028 
2029 		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
2030 			+ (gma & ~I915_GTT_PAGE_MASK);
2031 
2032 		trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
2033 	} else {
2034 		switch (mm->ppgtt_mm.root_entry_type) {
2035 		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2036 			ppgtt_get_shadow_root_entry(mm, &e, 0);
2037 
2038 			gma_index[0] = gma_ops->gma_to_pml4_index(gma);
2039 			gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
2040 			gma_index[2] = gma_ops->gma_to_pde_index(gma);
2041 			gma_index[3] = gma_ops->gma_to_pte_index(gma);
2042 			levels = 4;
2043 			break;
2044 		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2045 			ppgtt_get_shadow_root_entry(mm, &e,
2046 					gma_ops->gma_to_l3_pdp_index(gma));
2047 
2048 			gma_index[0] = gma_ops->gma_to_pde_index(gma);
2049 			gma_index[1] = gma_ops->gma_to_pte_index(gma);
2050 			levels = 2;
2051 			break;
2052 		default:
2053 			GEM_BUG_ON(1);
2054 		}
2055 
2056 		/* walk the shadow page table and get gpa from guest entry */
2057 		for (i = 0; i < levels; i++) {
2058 			ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
2059 				(i == levels - 1));
2060 			if (ret)
2061 				goto err;
2062 
2063 			if (!pte_ops->test_present(&e)) {
2064 				gvt_dbg_core("GMA 0x%lx is not present\n", gma);
2065 				goto err;
2066 			}
2067 		}
2068 
2069 		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
2070 					(gma & ~I915_GTT_PAGE_MASK);
2071 		trace_gma_translate(vgpu->id, "ppgtt", 0,
2072 				    mm->ppgtt_mm.root_entry_type, gma, gpa);
2073 	}
2074 
2075 	return gpa;
2076 err:
2077 	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
2078 	return INTEL_GVT_INVALID_ADDR;
2079 }
2080 
2081 static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
2082 	unsigned int off, void *p_data, unsigned int bytes)
2083 {
2084 	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2085 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2086 	unsigned long index = off >> info->gtt_entry_size_shift;
2087 	unsigned long gma;
2088 	struct intel_gvt_gtt_entry e;
2089 
2090 	if (bytes != 4 && bytes != 8)
2091 		return -EINVAL;
2092 
2093 	gma = index << I915_GTT_PAGE_SHIFT;
2094 	if (!intel_gvt_ggtt_validate_range(vgpu,
2095 					   gma, 1 << I915_GTT_PAGE_SHIFT)) {
2096 		gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma);
2097 		memset(p_data, 0, bytes);
2098 		return 0;
2099 	}
2100 
2101 	ggtt_get_guest_entry(ggtt_mm, &e, index);
2102 	memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
2103 			bytes);
2104 	return 0;
2105 }
2106 
2107 /**
2108  * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read
2109  * @vgpu: a vGPU
2110  * @off: register offset
2111  * @p_data: data will be returned to guest
2112  * @bytes: data length
2113  *
2114  * This function is used to emulate the GTT MMIO register read
2115  *
2116  * Returns:
2117  * Zero on success, error code if failed.
2118  */
2119 int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
2120 	void *p_data, unsigned int bytes)
2121 {
2122 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2123 	int ret;
2124 
2125 	if (bytes != 4 && bytes != 8)
2126 		return -EINVAL;
2127 
2128 	off -= info->gtt_start_offset;
2129 	ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
2130 	return ret;
2131 }
2132 
2133 static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
2134 		struct intel_gvt_gtt_entry *entry)
2135 {
2136 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2137 	unsigned long pfn;
2138 
2139 	pfn = pte_ops->get_pfn(entry);
2140 	if (pfn != vgpu->gvt->gtt.scratch_mfn)
2141 		intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
2142 }
2143 
2144 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
2145 	void *p_data, unsigned int bytes)
2146 {
2147 	struct intel_gvt *gvt = vgpu->gvt;
2148 	const struct intel_gvt_device_info *info = &gvt->device_info;
2149 	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2150 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
2151 	unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
2152 	unsigned long gma, gfn;
2153 	struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2154 	struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2155 	dma_addr_t dma_addr;
2156 	int ret;
2157 	struct intel_gvt_partial_pte *partial_pte, *pos, *n;
2158 	bool partial_update = false;
2159 
2160 	if (bytes != 4 && bytes != 8)
2161 		return -EINVAL;
2162 
2163 	gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
2164 
2165 	/* the VM may configure the whole GM space when ballooning is used */
2166 	if (!vgpu_gmadr_is_valid(vgpu, gma))
2167 		return 0;
2168 
2169 	e.type = GTT_TYPE_GGTT_PTE;
2170 	memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
2171 			bytes);
2172 
2173 	/* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
2174 	 * write, save the first 4 bytes in a list and update virtual
2175 	 * PTE. Only update shadow PTE when the second 4 bytes comes.
2176 	 */
2177 	if (bytes < info->gtt_entry_size) {
2178 		bool found = false;
2179 
2180 		list_for_each_entry_safe(pos, n,
2181 				&ggtt_mm->ggtt_mm.partial_pte_list, list) {
2182 			if (g_gtt_index == pos->offset >>
2183 					info->gtt_entry_size_shift) {
2184 				if (off != pos->offset) {
2185 					/* the second partial part*/
2186 					int last_off = pos->offset &
2187 						(info->gtt_entry_size - 1);
2188 
2189 					memcpy((void *)&e.val64 + last_off,
2190 						(void *)&pos->data + last_off,
2191 						bytes);
2192 
2193 					list_del(&pos->list);
2194 					kfree(pos);
2195 					found = true;
2196 					break;
2197 				}
2198 
2199 				/* update of the first partial part */
2200 				pos->data = e.val64;
2201 				ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2202 				return 0;
2203 			}
2204 		}
2205 
2206 		if (!found) {
2207 			/* the first partial part */
2208 			partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
2209 			if (!partial_pte)
2210 				return -ENOMEM;
2211 			partial_pte->offset = off;
2212 			partial_pte->data = e.val64;
2213 			list_add_tail(&partial_pte->list,
2214 				&ggtt_mm->ggtt_mm.partial_pte_list);
2215 			partial_update = true;
2216 		}
2217 	}
2218 
2219 	if (!partial_update && (ops->test_present(&e))) {
2220 		gfn = ops->get_pfn(&e);
2221 		m.val64 = e.val64;
2222 		m.type = e.type;
2223 
2224 		ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
2225 						   &dma_addr);
2226 		if (ret) {
2227 			gvt_vgpu_err("fail to populate guest ggtt entry\n");
2228 			/* guest driver may read/write the entry when partial
2229 			 * update the entry in this situation p2m will fail
2230 			 * setting the shadow entry to point to a scratch page
2231 			 */
2232 			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2233 		} else
2234 			ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
2235 	} else {
2236 		ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2237 		ops->clear_present(&m);
2238 	}
2239 
2240 	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2241 
2242 	ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
2243 	ggtt_invalidate_pte(vgpu, &e);
2244 
2245 	ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
2246 	ggtt_invalidate(gvt->gt);
2247 	return 0;
2248 }
2249 
2250 /*
2251  * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
2252  * @vgpu: a vGPU
2253  * @off: register offset
2254  * @p_data: data from guest write
2255  * @bytes: data length
2256  *
2257  * This function is used to emulate the GTT MMIO register write
2258  *
2259  * Returns:
2260  * Zero on success, error code if failed.
2261  */
2262 int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
2263 		unsigned int off, void *p_data, unsigned int bytes)
2264 {
2265 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2266 	int ret;
2267 	struct intel_vgpu_submission *s = &vgpu->submission;
2268 	struct intel_engine_cs *engine;
2269 	int i;
2270 
2271 	if (bytes != 4 && bytes != 8)
2272 		return -EINVAL;
2273 
2274 	off -= info->gtt_start_offset;
2275 	ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
2276 
2277 	/* if ggtt of last submitted context is written,
2278 	 * that context is probably got unpinned.
2279 	 * Set last shadowed ctx to invalid.
2280 	 */
2281 	for_each_engine(engine, vgpu->gvt->gt, i) {
2282 		if (!s->last_ctx[i].valid)
2283 			continue;
2284 
2285 		if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
2286 			s->last_ctx[i].valid = false;
2287 	}
2288 	return ret;
2289 }
2290 
2291 static int alloc_scratch_pages(struct intel_vgpu *vgpu,
2292 		enum intel_gvt_gtt_type type)
2293 {
2294 	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
2295 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2296 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2297 	int page_entry_num = I915_GTT_PAGE_SIZE >>
2298 				vgpu->gvt->device_info.gtt_entry_size_shift;
2299 	void *scratch_pt;
2300 	int i;
2301 	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
2302 	dma_addr_t daddr;
2303 
2304 	if (drm_WARN_ON(&i915->drm,
2305 			type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
2306 		return -EINVAL;
2307 
2308 	scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
2309 	if (!scratch_pt) {
2310 		gvt_vgpu_err("fail to allocate scratch page\n");
2311 		return -ENOMEM;
2312 	}
2313 
2314 	daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL);
2315 	if (dma_mapping_error(dev, daddr)) {
2316 		gvt_vgpu_err("fail to dmamap scratch_pt\n");
2317 		__free_page(virt_to_page(scratch_pt));
2318 		return -ENOMEM;
2319 	}
2320 	gtt->scratch_pt[type].page_mfn =
2321 		(unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2322 	gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
2323 	gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
2324 			vgpu->id, type, gtt->scratch_pt[type].page_mfn);
2325 
2326 	/* Build the tree by full filled the scratch pt with the entries which
2327 	 * point to the next level scratch pt or scratch page. The
2328 	 * scratch_pt[type] indicate the scratch pt/scratch page used by the
2329 	 * 'type' pt.
2330 	 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
2331 	 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
2332 	 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
2333 	 */
2334 	if (type > GTT_TYPE_PPGTT_PTE_PT) {
2335 		struct intel_gvt_gtt_entry se;
2336 
2337 		memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
2338 		se.type = get_entry_type(type - 1);
2339 		ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
2340 
2341 		/* The entry parameters like present/writeable/cache type
2342 		 * set to the same as i915's scratch page tree.
2343 		 */
2344 		se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
2345 		if (type == GTT_TYPE_PPGTT_PDE_PT)
2346 			se.val64 |= PPAT_CACHED;
2347 
2348 		for (i = 0; i < page_entry_num; i++)
2349 			ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
2350 	}
2351 
2352 	return 0;
2353 }
2354 
2355 static int release_scratch_page_tree(struct intel_vgpu *vgpu)
2356 {
2357 	int i;
2358 	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
2359 	dma_addr_t daddr;
2360 
2361 	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2362 		if (vgpu->gtt.scratch_pt[i].page != NULL) {
2363 			daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
2364 					I915_GTT_PAGE_SHIFT);
2365 			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2366 			__free_page(vgpu->gtt.scratch_pt[i].page);
2367 			vgpu->gtt.scratch_pt[i].page = NULL;
2368 			vgpu->gtt.scratch_pt[i].page_mfn = 0;
2369 		}
2370 	}
2371 
2372 	return 0;
2373 }
2374 
2375 static int create_scratch_page_tree(struct intel_vgpu *vgpu)
2376 {
2377 	int i, ret;
2378 
2379 	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2380 		ret = alloc_scratch_pages(vgpu, i);
2381 		if (ret)
2382 			goto err;
2383 	}
2384 
2385 	return 0;
2386 
2387 err:
2388 	release_scratch_page_tree(vgpu);
2389 	return ret;
2390 }
2391 
2392 /**
2393  * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
2394  * @vgpu: a vGPU
2395  *
2396  * This function is used to initialize per-vGPU graphics memory virtualization
2397  * components.
2398  *
2399  * Returns:
2400  * Zero on success, error code if failed.
2401  */
2402 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
2403 {
2404 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2405 
2406 	INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
2407 
2408 	INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
2409 	INIT_LIST_HEAD(&gtt->oos_page_list_head);
2410 	INIT_LIST_HEAD(&gtt->post_shadow_list_head);
2411 
2412 	gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2413 	if (IS_ERR(gtt->ggtt_mm)) {
2414 		gvt_vgpu_err("fail to create mm for ggtt.\n");
2415 		return PTR_ERR(gtt->ggtt_mm);
2416 	}
2417 
2418 	intel_vgpu_reset_ggtt(vgpu, false);
2419 
2420 	INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
2421 
2422 	return create_scratch_page_tree(vgpu);
2423 }
2424 
2425 void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
2426 {
2427 	struct list_head *pos, *n;
2428 	struct intel_vgpu_mm *mm;
2429 
2430 	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2431 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2432 		intel_vgpu_destroy_mm(mm);
2433 	}
2434 
2435 	if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2436 		gvt_err("vgpu ppgtt mm is not fully destroyed\n");
2437 
2438 	if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
2439 		gvt_err("Why we still has spt not freed?\n");
2440 		ppgtt_free_all_spt(vgpu);
2441 	}
2442 }
2443 
2444 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2445 {
2446 	struct intel_gvt_partial_pte *pos, *next;
2447 
2448 	list_for_each_entry_safe(pos, next,
2449 				 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
2450 				 list) {
2451 		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
2452 			pos->offset, pos->data);
2453 		kfree(pos);
2454 	}
2455 	intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
2456 	vgpu->gtt.ggtt_mm = NULL;
2457 }
2458 
2459 /**
2460  * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2461  * @vgpu: a vGPU
2462  *
2463  * This function is used to clean up per-vGPU graphics memory virtualization
2464  * components.
2465  *
2466  * Returns:
2467  * Zero on success, error code if failed.
2468  */
2469 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2470 {
2471 	intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2472 	intel_vgpu_destroy_ggtt_mm(vgpu);
2473 	release_scratch_page_tree(vgpu);
2474 }
2475 
2476 static void clean_spt_oos(struct intel_gvt *gvt)
2477 {
2478 	struct intel_gvt_gtt *gtt = &gvt->gtt;
2479 	struct list_head *pos, *n;
2480 	struct intel_vgpu_oos_page *oos_page;
2481 
2482 	WARN(!list_empty(&gtt->oos_page_use_list_head),
2483 		"someone is still using oos page\n");
2484 
2485 	list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2486 		oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2487 		list_del(&oos_page->list);
2488 		free_page((unsigned long)oos_page->mem);
2489 		kfree(oos_page);
2490 	}
2491 }
2492 
2493 static int setup_spt_oos(struct intel_gvt *gvt)
2494 {
2495 	struct intel_gvt_gtt *gtt = &gvt->gtt;
2496 	struct intel_vgpu_oos_page *oos_page;
2497 	int i;
2498 	int ret;
2499 
2500 	INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2501 	INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2502 
2503 	for (i = 0; i < preallocated_oos_pages; i++) {
2504 		oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2505 		if (!oos_page) {
2506 			ret = -ENOMEM;
2507 			goto fail;
2508 		}
2509 		oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0);
2510 		if (!oos_page->mem) {
2511 			ret = -ENOMEM;
2512 			kfree(oos_page);
2513 			goto fail;
2514 		}
2515 
2516 		INIT_LIST_HEAD(&oos_page->list);
2517 		INIT_LIST_HEAD(&oos_page->vm_list);
2518 		oos_page->id = i;
2519 		list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2520 	}
2521 
2522 	gvt_dbg_mm("%d oos pages preallocated\n", i);
2523 
2524 	return 0;
2525 fail:
2526 	clean_spt_oos(gvt);
2527 	return ret;
2528 }
2529 
2530 /**
2531  * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2532  * @vgpu: a vGPU
2533  * @pdps: pdp root array
2534  *
2535  * This function is used to find a PPGTT mm object from mm object pool
2536  *
2537  * Returns:
2538  * pointer to mm object on success, NULL if failed.
2539  */
2540 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
2541 		u64 pdps[])
2542 {
2543 	struct intel_vgpu_mm *mm;
2544 	struct list_head *pos;
2545 
2546 	list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2547 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2548 
2549 		switch (mm->ppgtt_mm.root_entry_type) {
2550 		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2551 			if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
2552 				return mm;
2553 			break;
2554 		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2555 			if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
2556 				    sizeof(mm->ppgtt_mm.guest_pdps)))
2557 				return mm;
2558 			break;
2559 		default:
2560 			GEM_BUG_ON(1);
2561 		}
2562 	}
2563 	return NULL;
2564 }
2565 
2566 /**
2567  * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
2568  * @vgpu: a vGPU
2569  * @root_entry_type: ppgtt root entry type
2570  * @pdps: guest pdps
2571  *
2572  * This function is used to find or create a PPGTT mm object from a guest.
2573  *
2574  * Returns:
2575  * Zero on success, negative error code if failed.
2576  */
2577 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
2578 		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
2579 {
2580 	struct intel_vgpu_mm *mm;
2581 
2582 	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2583 	if (mm) {
2584 		intel_vgpu_mm_get(mm);
2585 	} else {
2586 		mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
2587 		if (IS_ERR(mm))
2588 			gvt_vgpu_err("fail to create mm\n");
2589 	}
2590 	return mm;
2591 }
2592 
2593 /**
2594  * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
2595  * @vgpu: a vGPU
2596  * @pdps: guest pdps
2597  *
2598  * This function is used to find a PPGTT mm object from a guest and destroy it.
2599  *
2600  * Returns:
2601  * Zero on success, negative error code if failed.
2602  */
2603 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
2604 {
2605 	struct intel_vgpu_mm *mm;
2606 
2607 	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2608 	if (!mm) {
2609 		gvt_vgpu_err("fail to find ppgtt instance.\n");
2610 		return -EINVAL;
2611 	}
2612 	intel_vgpu_mm_put(mm);
2613 	return 0;
2614 }
2615 
2616 /**
2617  * intel_gvt_init_gtt - initialize mm components of a GVT device
2618  * @gvt: GVT device
2619  *
2620  * This function is called at the initialization stage, to initialize
2621  * the mm components of a GVT device.
2622  *
2623  * Returns:
2624  * zero on success, negative error code if failed.
2625  */
2626 int intel_gvt_init_gtt(struct intel_gvt *gvt)
2627 {
2628 	int ret;
2629 	void *page;
2630 	struct device *dev = gvt->gt->i915->drm.dev;
2631 	dma_addr_t daddr;
2632 
2633 	gvt_dbg_core("init gtt\n");
2634 
2635 	gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2636 	gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
2637 
2638 	page = (void *)get_zeroed_page(GFP_KERNEL);
2639 	if (!page) {
2640 		gvt_err("fail to allocate scratch ggtt page\n");
2641 		return -ENOMEM;
2642 	}
2643 
2644 	daddr = dma_map_page(dev, virt_to_page(page), 0,
2645 			4096, DMA_BIDIRECTIONAL);
2646 	if (dma_mapping_error(dev, daddr)) {
2647 		gvt_err("fail to dmamap scratch ggtt page\n");
2648 		__free_page(virt_to_page(page));
2649 		return -ENOMEM;
2650 	}
2651 
2652 	gvt->gtt.scratch_page = virt_to_page(page);
2653 	gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2654 
2655 	if (enable_out_of_sync) {
2656 		ret = setup_spt_oos(gvt);
2657 		if (ret) {
2658 			gvt_err("fail to initialize SPT oos\n");
2659 			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2660 			__free_page(gvt->gtt.scratch_page);
2661 			return ret;
2662 		}
2663 	}
2664 	INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
2665 	mutex_init(&gvt->gtt.ppgtt_mm_lock);
2666 	return 0;
2667 }
2668 
2669 /**
2670  * intel_gvt_clean_gtt - clean up mm components of a GVT device
2671  * @gvt: GVT device
2672  *
2673  * This function is called at the driver unloading stage, to clean up
2674  * the mm components of a GVT device.
2675  *
2676  */
2677 void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2678 {
2679 	struct device *dev = gvt->gt->i915->drm.dev;
2680 	dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
2681 					I915_GTT_PAGE_SHIFT);
2682 
2683 	dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2684 
2685 	__free_page(gvt->gtt.scratch_page);
2686 
2687 	if (enable_out_of_sync)
2688 		clean_spt_oos(gvt);
2689 }
2690 
2691 /**
2692  * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
2693  * @vgpu: a vGPU
2694  *
2695  * This function is called when invalidate all PPGTT instances of a vGPU.
2696  *
2697  */
2698 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
2699 {
2700 	struct list_head *pos, *n;
2701 	struct intel_vgpu_mm *mm;
2702 
2703 	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2704 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2705 		if (mm->type == INTEL_GVT_MM_PPGTT) {
2706 			mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2707 			list_del_init(&mm->ppgtt_mm.lru_list);
2708 			mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2709 			if (mm->ppgtt_mm.shadowed)
2710 				invalidate_ppgtt_mm(mm);
2711 		}
2712 	}
2713 }
2714 
2715 /**
2716  * intel_vgpu_reset_ggtt - reset the GGTT entry
2717  * @vgpu: a vGPU
2718  * @invalidate_old: invalidate old entries
2719  *
2720  * This function is called at the vGPU create stage
2721  * to reset all the GGTT entries.
2722  *
2723  */
2724 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
2725 {
2726 	struct intel_gvt *gvt = vgpu->gvt;
2727 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2728 	struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
2729 	struct intel_gvt_gtt_entry old_entry;
2730 	u32 index;
2731 	u32 num_entries;
2732 
2733 	pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2734 	pte_ops->set_present(&entry);
2735 
2736 	index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2737 	num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2738 	while (num_entries--) {
2739 		if (invalidate_old) {
2740 			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2741 			ggtt_invalidate_pte(vgpu, &old_entry);
2742 		}
2743 		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2744 	}
2745 
2746 	index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2747 	num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2748 	while (num_entries--) {
2749 		if (invalidate_old) {
2750 			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2751 			ggtt_invalidate_pte(vgpu, &old_entry);
2752 		}
2753 		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2754 	}
2755 
2756 	ggtt_invalidate(gvt->gt);
2757 }
2758 
2759 /**
2760  * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
2761  * @gvt: intel gvt device
2762  *
2763  * This function is called at driver resume stage to restore
2764  * GGTT entries of every vGPU.
2765  *
2766  */
2767 void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
2768 {
2769 	struct intel_vgpu *vgpu;
2770 	struct intel_vgpu_mm *mm;
2771 	int id;
2772 	gen8_pte_t pte;
2773 	u32 idx, num_low, num_hi, offset;
2774 
2775 	/* Restore dirty host ggtt for all vGPUs */
2776 	idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) {
2777 		mm = vgpu->gtt.ggtt_mm;
2778 
2779 		num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2780 		offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2781 		for (idx = 0; idx < num_low; idx++) {
2782 			pte = mm->ggtt_mm.host_ggtt_aperture[idx];
2783 			if (pte & GEN8_PAGE_PRESENT)
2784 				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
2785 		}
2786 
2787 		num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2788 		offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2789 		for (idx = 0; idx < num_hi; idx++) {
2790 			pte = mm->ggtt_mm.host_ggtt_hidden[idx];
2791 			if (pte & GEN8_PAGE_PRESENT)
2792 				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
2793 		}
2794 	}
2795 }
2796