xref: /linux/drivers/gpu/drm/i915/gvt/gtt.c (revision e47a324d6f07c9ef252cfce1f14cfa5110cbed99)
1 /*
2  * GTT virtualization
3  *
4  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Zhi Wang <zhi.a.wang@intel.com>
27  *    Zhenyu Wang <zhenyuw@linux.intel.com>
28  *    Xiao Zheng <xiao.zheng@intel.com>
29  *
30  * Contributors:
31  *    Min He <min.he@intel.com>
32  *    Bing Niu <bing.niu@intel.com>
33  *
34  */
35 
36 #include "i915_drv.h"
37 #include "gvt.h"
38 #include "i915_pvinfo.h"
39 #include "trace.h"
40 
41 #include "gt/intel_gt_regs.h"
42 #include <linux/vmalloc.h>
43 
44 #if defined(VERBOSE_DEBUG)
45 #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
46 #else
47 #define gvt_vdbg_mm(fmt, args...)
48 #endif
49 
50 static bool enable_out_of_sync = false;
51 static int preallocated_oos_pages = 8192;
52 
53 /*
54  * validate a gm address and related range size,
55  * translate it to host gm address
56  */
57 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
58 {
59 	if (size == 0)
60 		return vgpu_gmadr_is_valid(vgpu, addr);
61 
62 	if (vgpu_gmadr_is_aperture(vgpu, addr) &&
63 	    vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
64 		return true;
65 	else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
66 		 vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
67 		return true;
68 
69 	gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
70 		     addr, size);
71 	return false;
72 }
73 
74 #define gtt_type_is_entry(type) \
75 	(type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
76 	 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
77 	 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
78 
79 #define gtt_type_is_pt(type) \
80 	(type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
81 
82 #define gtt_type_is_pte_pt(type) \
83 	(type == GTT_TYPE_PPGTT_PTE_PT)
84 
85 #define gtt_type_is_root_pointer(type) \
86 	(gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
87 
88 #define gtt_init_entry(e, t, p, v) do { \
89 	(e)->type = t; \
90 	(e)->pdev = p; \
91 	memcpy(&(e)->val64, &v, sizeof(v)); \
92 } while (0)
93 
94 /*
95  * Mappings between GTT_TYPE* enumerations.
96  * Following information can be found according to the given type:
97  * - type of next level page table
98  * - type of entry inside this level page table
99  * - type of entry with PSE set
100  *
101  * If the given type doesn't have such a kind of information,
102  * e.g. give a l4 root entry type, then request to get its PSE type,
103  * give a PTE page table type, then request to get its next level page
104  * table type, as we know l4 root entry doesn't have a PSE bit,
105  * and a PTE page table doesn't have a next level page table type,
106  * GTT_TYPE_INVALID will be returned. This is useful when traversing a
107  * page table.
108  */
109 
110 struct gtt_type_table_entry {
111 	int entry_type;
112 	int pt_type;
113 	int next_pt_type;
114 	int pse_entry_type;
115 };
116 
117 #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
118 	[type] = { \
119 		.entry_type = e_type, \
120 		.pt_type = cpt_type, \
121 		.next_pt_type = npt_type, \
122 		.pse_entry_type = pse_type, \
123 	}
124 
125 static const struct gtt_type_table_entry gtt_type_table[] = {
126 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
127 			GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
128 			GTT_TYPE_INVALID,
129 			GTT_TYPE_PPGTT_PML4_PT,
130 			GTT_TYPE_INVALID),
131 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
132 			GTT_TYPE_PPGTT_PML4_ENTRY,
133 			GTT_TYPE_PPGTT_PML4_PT,
134 			GTT_TYPE_PPGTT_PDP_PT,
135 			GTT_TYPE_INVALID),
136 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
137 			GTT_TYPE_PPGTT_PML4_ENTRY,
138 			GTT_TYPE_PPGTT_PML4_PT,
139 			GTT_TYPE_PPGTT_PDP_PT,
140 			GTT_TYPE_INVALID),
141 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
142 			GTT_TYPE_PPGTT_PDP_ENTRY,
143 			GTT_TYPE_PPGTT_PDP_PT,
144 			GTT_TYPE_PPGTT_PDE_PT,
145 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
146 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
147 			GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
148 			GTT_TYPE_INVALID,
149 			GTT_TYPE_PPGTT_PDE_PT,
150 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
151 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
152 			GTT_TYPE_PPGTT_PDP_ENTRY,
153 			GTT_TYPE_PPGTT_PDP_PT,
154 			GTT_TYPE_PPGTT_PDE_PT,
155 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
156 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
157 			GTT_TYPE_PPGTT_PDE_ENTRY,
158 			GTT_TYPE_PPGTT_PDE_PT,
159 			GTT_TYPE_PPGTT_PTE_PT,
160 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
161 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
162 			GTT_TYPE_PPGTT_PDE_ENTRY,
163 			GTT_TYPE_PPGTT_PDE_PT,
164 			GTT_TYPE_PPGTT_PTE_PT,
165 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
166 	/* We take IPS bit as 'PSE' for PTE level. */
167 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
168 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
169 			GTT_TYPE_PPGTT_PTE_PT,
170 			GTT_TYPE_INVALID,
171 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
172 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
173 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
174 			GTT_TYPE_PPGTT_PTE_PT,
175 			GTT_TYPE_INVALID,
176 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
177 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
178 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
179 			GTT_TYPE_PPGTT_PTE_PT,
180 			GTT_TYPE_INVALID,
181 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
182 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
183 			GTT_TYPE_PPGTT_PDE_ENTRY,
184 			GTT_TYPE_PPGTT_PDE_PT,
185 			GTT_TYPE_INVALID,
186 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
187 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
188 			GTT_TYPE_PPGTT_PDP_ENTRY,
189 			GTT_TYPE_PPGTT_PDP_PT,
190 			GTT_TYPE_INVALID,
191 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
192 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
193 			GTT_TYPE_GGTT_PTE,
194 			GTT_TYPE_INVALID,
195 			GTT_TYPE_INVALID,
196 			GTT_TYPE_INVALID),
197 };
198 
199 static inline int get_next_pt_type(int type)
200 {
201 	return gtt_type_table[type].next_pt_type;
202 }
203 
204 static inline int get_entry_type(int type)
205 {
206 	return gtt_type_table[type].entry_type;
207 }
208 
209 static inline int get_pse_type(int type)
210 {
211 	return gtt_type_table[type].pse_entry_type;
212 }
213 
214 static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index)
215 {
216 	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
217 
218 	return readq(addr);
219 }
220 
221 static void ggtt_invalidate(struct intel_gt *gt)
222 {
223 	intel_wakeref_t wakeref;
224 
225 	wakeref = mmio_hw_access_pre(gt);
226 	intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
227 	mmio_hw_access_post(gt, wakeref);
228 }
229 
230 static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte)
231 {
232 	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
233 
234 	writeq(pte, addr);
235 }
236 
237 static inline int gtt_get_entry64(void *pt,
238 		struct intel_gvt_gtt_entry *e,
239 		unsigned long index, bool hypervisor_access, unsigned long gpa,
240 		struct intel_vgpu *vgpu)
241 {
242 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
243 	int ret;
244 
245 	if (WARN_ON(info->gtt_entry_size != 8))
246 		return -EINVAL;
247 
248 	if (hypervisor_access) {
249 		ret = intel_gvt_read_gpa(vgpu, gpa +
250 				(index << info->gtt_entry_size_shift),
251 				&e->val64, 8);
252 		if (WARN_ON(ret))
253 			return ret;
254 	} else if (!pt) {
255 		e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index);
256 	} else {
257 		e->val64 = *((u64 *)pt + index);
258 	}
259 	return 0;
260 }
261 
262 static inline int gtt_set_entry64(void *pt,
263 		struct intel_gvt_gtt_entry *e,
264 		unsigned long index, bool hypervisor_access, unsigned long gpa,
265 		struct intel_vgpu *vgpu)
266 {
267 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
268 	int ret;
269 
270 	if (WARN_ON(info->gtt_entry_size != 8))
271 		return -EINVAL;
272 
273 	if (hypervisor_access) {
274 		ret = intel_gvt_write_gpa(vgpu, gpa +
275 				(index << info->gtt_entry_size_shift),
276 				&e->val64, 8);
277 		if (WARN_ON(ret))
278 			return ret;
279 	} else if (!pt) {
280 		write_pte64(vgpu->gvt->gt->ggtt, index, e->val64);
281 	} else {
282 		*((u64 *)pt + index) = e->val64;
283 	}
284 	return 0;
285 }
286 
287 #define GTT_HAW 46
288 
289 #define ADDR_1G_MASK	GENMASK_ULL(GTT_HAW - 1, 30)
290 #define ADDR_2M_MASK	GENMASK_ULL(GTT_HAW - 1, 21)
291 #define ADDR_64K_MASK	GENMASK_ULL(GTT_HAW - 1, 16)
292 #define ADDR_4K_MASK	GENMASK_ULL(GTT_HAW - 1, 12)
293 
294 #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
295 #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
296 
297 #define GTT_64K_PTE_STRIDE 16
298 
299 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
300 {
301 	unsigned long pfn;
302 
303 	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
304 		pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
305 	else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
306 		pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
307 	else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
308 		pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
309 	else
310 		pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
311 	return pfn;
312 }
313 
314 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
315 {
316 	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
317 		e->val64 &= ~ADDR_1G_MASK;
318 		pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
319 	} else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
320 		e->val64 &= ~ADDR_2M_MASK;
321 		pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
322 	} else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
323 		e->val64 &= ~ADDR_64K_MASK;
324 		pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
325 	} else {
326 		e->val64 &= ~ADDR_4K_MASK;
327 		pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
328 	}
329 
330 	e->val64 |= (pfn << PAGE_SHIFT);
331 }
332 
333 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
334 {
335 	return !!(e->val64 & _PAGE_PSE);
336 }
337 
338 static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
339 {
340 	if (gen8_gtt_test_pse(e)) {
341 		switch (e->type) {
342 		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
343 			e->val64 &= ~_PAGE_PSE;
344 			e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
345 			break;
346 		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
347 			e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
348 			e->val64 &= ~_PAGE_PSE;
349 			break;
350 		default:
351 			WARN_ON(1);
352 		}
353 	}
354 }
355 
356 static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
357 {
358 	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
359 		return false;
360 
361 	return !!(e->val64 & GEN8_PDE_IPS_64K);
362 }
363 
364 static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
365 {
366 	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
367 		return;
368 
369 	e->val64 &= ~GEN8_PDE_IPS_64K;
370 }
371 
372 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
373 {
374 	/*
375 	 * i915 writes PDP root pointer registers without present bit,
376 	 * it also works, so we need to treat root pointer entry
377 	 * specifically.
378 	 */
379 	if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
380 			|| e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
381 		return (e->val64 != 0);
382 	else
383 		return (e->val64 & GEN8_PAGE_PRESENT);
384 }
385 
386 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
387 {
388 	e->val64 &= ~GEN8_PAGE_PRESENT;
389 }
390 
391 static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
392 {
393 	e->val64 |= GEN8_PAGE_PRESENT;
394 }
395 
396 static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
397 {
398 	return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
399 }
400 
401 static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
402 {
403 	e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
404 }
405 
406 static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
407 {
408 	e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
409 }
410 
411 /*
412  * Per-platform GMA routines.
413  */
414 static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
415 {
416 	unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
417 
418 	trace_gma_index(__func__, gma, x);
419 	return x;
420 }
421 
422 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
423 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
424 { \
425 	unsigned long x = (exp); \
426 	trace_gma_index(__func__, gma, x); \
427 	return x; \
428 }
429 
430 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
431 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
432 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
433 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
434 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
435 
436 static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
437 	.get_entry = gtt_get_entry64,
438 	.set_entry = gtt_set_entry64,
439 	.clear_present = gtt_entry_clear_present,
440 	.set_present = gtt_entry_set_present,
441 	.test_present = gen8_gtt_test_present,
442 	.test_pse = gen8_gtt_test_pse,
443 	.clear_pse = gen8_gtt_clear_pse,
444 	.clear_ips = gen8_gtt_clear_ips,
445 	.test_ips = gen8_gtt_test_ips,
446 	.clear_64k_splited = gen8_gtt_clear_64k_splited,
447 	.set_64k_splited = gen8_gtt_set_64k_splited,
448 	.test_64k_splited = gen8_gtt_test_64k_splited,
449 	.get_pfn = gen8_gtt_get_pfn,
450 	.set_pfn = gen8_gtt_set_pfn,
451 };
452 
453 static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
454 	.gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
455 	.gma_to_pte_index = gen8_gma_to_pte_index,
456 	.gma_to_pde_index = gen8_gma_to_pde_index,
457 	.gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
458 	.gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
459 	.gma_to_pml4_index = gen8_gma_to_pml4_index,
460 };
461 
462 /* Update entry type per pse and ips bit. */
463 static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops,
464 	struct intel_gvt_gtt_entry *entry, bool ips)
465 {
466 	switch (entry->type) {
467 	case GTT_TYPE_PPGTT_PDE_ENTRY:
468 	case GTT_TYPE_PPGTT_PDP_ENTRY:
469 		if (pte_ops->test_pse(entry))
470 			entry->type = get_pse_type(entry->type);
471 		break;
472 	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
473 		if (ips)
474 			entry->type = get_pse_type(entry->type);
475 		break;
476 	default:
477 		GEM_BUG_ON(!gtt_type_is_entry(entry->type));
478 	}
479 
480 	GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
481 }
482 
483 /*
484  * MM helpers.
485  */
486 static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
487 		struct intel_gvt_gtt_entry *entry, unsigned long index,
488 		bool guest)
489 {
490 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
491 
492 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
493 
494 	entry->type = mm->ppgtt_mm.root_entry_type;
495 	pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
496 			   mm->ppgtt_mm.shadow_pdps,
497 			   entry, index, false, 0, mm->vgpu);
498 	update_entry_type_for_real(pte_ops, entry, false);
499 }
500 
501 static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
502 		struct intel_gvt_gtt_entry *entry, unsigned long index)
503 {
504 	_ppgtt_get_root_entry(mm, entry, index, true);
505 }
506 
507 static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
508 		struct intel_gvt_gtt_entry *entry, unsigned long index)
509 {
510 	_ppgtt_get_root_entry(mm, entry, index, false);
511 }
512 
513 static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
514 		struct intel_gvt_gtt_entry *entry, unsigned long index,
515 		bool guest)
516 {
517 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
518 
519 	pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
520 			   mm->ppgtt_mm.shadow_pdps,
521 			   entry, index, false, 0, mm->vgpu);
522 }
523 
524 static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
525 		struct intel_gvt_gtt_entry *entry, unsigned long index)
526 {
527 	_ppgtt_set_root_entry(mm, entry, index, false);
528 }
529 
530 static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
531 		struct intel_gvt_gtt_entry *entry, unsigned long index)
532 {
533 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
534 
535 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
536 
537 	entry->type = GTT_TYPE_GGTT_PTE;
538 	pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
539 			   false, 0, mm->vgpu);
540 }
541 
542 static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
543 		struct intel_gvt_gtt_entry *entry, unsigned long index)
544 {
545 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
546 
547 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
548 
549 	pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
550 			   false, 0, mm->vgpu);
551 }
552 
553 static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
554 		struct intel_gvt_gtt_entry *entry, unsigned long index)
555 {
556 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
557 
558 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
559 
560 	pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
561 }
562 
563 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
564 		struct intel_gvt_gtt_entry *entry, unsigned long index)
565 {
566 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
567 	unsigned long offset = index;
568 
569 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
570 
571 	if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
572 		offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
573 		mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64;
574 	} else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
575 		offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
576 		mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64;
577 	}
578 
579 	pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
580 }
581 
582 /*
583  * PPGTT shadow page table helpers.
584  */
585 static inline int ppgtt_spt_get_entry(
586 		struct intel_vgpu_ppgtt_spt *spt,
587 		void *page_table, int type,
588 		struct intel_gvt_gtt_entry *e, unsigned long index,
589 		bool guest)
590 {
591 	struct intel_gvt *gvt = spt->vgpu->gvt;
592 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
593 	int ret;
594 
595 	e->type = get_entry_type(type);
596 
597 	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
598 		return -EINVAL;
599 
600 	ret = ops->get_entry(page_table, e, index, guest,
601 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
602 			spt->vgpu);
603 	if (ret)
604 		return ret;
605 
606 	update_entry_type_for_real(ops, e, guest ?
607 				   spt->guest_page.pde_ips : false);
608 
609 	gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
610 		    type, e->type, index, e->val64);
611 	return 0;
612 }
613 
614 static inline int ppgtt_spt_set_entry(
615 		struct intel_vgpu_ppgtt_spt *spt,
616 		void *page_table, int type,
617 		struct intel_gvt_gtt_entry *e, unsigned long index,
618 		bool guest)
619 {
620 	struct intel_gvt *gvt = spt->vgpu->gvt;
621 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
622 
623 	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
624 		return -EINVAL;
625 
626 	gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
627 		    type, e->type, index, e->val64);
628 
629 	return ops->set_entry(page_table, e, index, guest,
630 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
631 			spt->vgpu);
632 }
633 
634 #define ppgtt_get_guest_entry(spt, e, index) \
635 	ppgtt_spt_get_entry(spt, NULL, \
636 		spt->guest_page.type, e, index, true)
637 
638 #define ppgtt_set_guest_entry(spt, e, index) \
639 	ppgtt_spt_set_entry(spt, NULL, \
640 		spt->guest_page.type, e, index, true)
641 
642 #define ppgtt_get_shadow_entry(spt, e, index) \
643 	ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
644 		spt->shadow_page.type, e, index, false)
645 
646 #define ppgtt_set_shadow_entry(spt, e, index) \
647 	ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
648 		spt->shadow_page.type, e, index, false)
649 
650 static void *alloc_spt(gfp_t gfp_mask)
651 {
652 	struct intel_vgpu_ppgtt_spt *spt;
653 
654 	spt = kzalloc(sizeof(*spt), gfp_mask);
655 	if (!spt)
656 		return NULL;
657 
658 	spt->shadow_page.page = alloc_page(gfp_mask);
659 	if (!spt->shadow_page.page) {
660 		kfree(spt);
661 		return NULL;
662 	}
663 	return spt;
664 }
665 
666 static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
667 {
668 	__free_page(spt->shadow_page.page);
669 	kfree(spt);
670 }
671 
672 static int detach_oos_page(struct intel_vgpu *vgpu,
673 		struct intel_vgpu_oos_page *oos_page);
674 
675 static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
676 {
677 	struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev;
678 
679 	trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
680 
681 	dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
682 		       DMA_BIDIRECTIONAL);
683 
684 	radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
685 
686 	if (spt->guest_page.gfn) {
687 		if (spt->guest_page.oos_page)
688 			detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
689 
690 		intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
691 	}
692 
693 	list_del_init(&spt->post_shadow_list);
694 	free_spt(spt);
695 }
696 
697 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
698 {
699 	struct intel_vgpu_ppgtt_spt *spt, *spn;
700 	struct radix_tree_iter iter;
701 	LIST_HEAD(all_spt);
702 	void __rcu **slot;
703 
704 	rcu_read_lock();
705 	radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
706 		spt = radix_tree_deref_slot(slot);
707 		list_move(&spt->post_shadow_list, &all_spt);
708 	}
709 	rcu_read_unlock();
710 
711 	list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
712 		ppgtt_free_spt(spt);
713 }
714 
715 static int ppgtt_handle_guest_write_page_table_bytes(
716 		struct intel_vgpu_ppgtt_spt *spt,
717 		u64 pa, void *p_data, int bytes);
718 
719 static int ppgtt_write_protection_handler(
720 		struct intel_vgpu_page_track *page_track,
721 		u64 gpa, void *data, int bytes)
722 {
723 	struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
724 
725 	int ret;
726 
727 	if (bytes != 4 && bytes != 8)
728 		return -EINVAL;
729 
730 	ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
731 	if (ret)
732 		return ret;
733 	return ret;
734 }
735 
736 /* Find a spt by guest gfn. */
737 static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
738 		struct intel_vgpu *vgpu, unsigned long gfn)
739 {
740 	struct intel_vgpu_page_track *track;
741 
742 	track = intel_vgpu_find_page_track(vgpu, gfn);
743 	if (track && track->handler == ppgtt_write_protection_handler)
744 		return track->priv_data;
745 
746 	return NULL;
747 }
748 
749 /* Find the spt by shadow page mfn. */
750 static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
751 		struct intel_vgpu *vgpu, unsigned long mfn)
752 {
753 	return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
754 }
755 
756 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
757 
758 /* Allocate shadow page table without guest page. */
759 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
760 		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
761 {
762 	struct device *kdev = vgpu->gvt->gt->i915->drm.dev;
763 	struct intel_vgpu_ppgtt_spt *spt = NULL;
764 	dma_addr_t daddr;
765 	int ret;
766 
767 retry:
768 	spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
769 	if (!spt) {
770 		if (reclaim_one_ppgtt_mm(vgpu->gvt))
771 			goto retry;
772 
773 		gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
774 		return ERR_PTR(-ENOMEM);
775 	}
776 
777 	spt->vgpu = vgpu;
778 	atomic_set(&spt->refcount, 1);
779 	INIT_LIST_HEAD(&spt->post_shadow_list);
780 
781 	/*
782 	 * Init shadow_page.
783 	 */
784 	spt->shadow_page.type = type;
785 	daddr = dma_map_page(kdev, spt->shadow_page.page,
786 			     0, 4096, DMA_BIDIRECTIONAL);
787 	if (dma_mapping_error(kdev, daddr)) {
788 		gvt_vgpu_err("fail to map dma addr\n");
789 		ret = -EINVAL;
790 		goto err_free_spt;
791 	}
792 	spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
793 	spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
794 
795 	ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
796 	if (ret)
797 		goto err_unmap_dma;
798 
799 	return spt;
800 
801 err_unmap_dma:
802 	dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL);
803 err_free_spt:
804 	free_spt(spt);
805 	return ERR_PTR(ret);
806 }
807 
808 /* Allocate shadow page table associated with specific gfn. */
809 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
810 		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
811 		unsigned long gfn, bool guest_pde_ips)
812 {
813 	struct intel_vgpu_ppgtt_spt *spt;
814 	int ret;
815 
816 	spt = ppgtt_alloc_spt(vgpu, type);
817 	if (IS_ERR(spt))
818 		return spt;
819 
820 	/*
821 	 * Init guest_page.
822 	 */
823 	ret = intel_vgpu_register_page_track(vgpu, gfn,
824 			ppgtt_write_protection_handler, spt);
825 	if (ret) {
826 		ppgtt_free_spt(spt);
827 		return ERR_PTR(ret);
828 	}
829 
830 	spt->guest_page.type = type;
831 	spt->guest_page.gfn = gfn;
832 	spt->guest_page.pde_ips = guest_pde_ips;
833 
834 	trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
835 
836 	return spt;
837 }
838 
839 #define pt_entry_size_shift(spt) \
840 	((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
841 
842 #define pt_entries(spt) \
843 	(I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
844 
845 #define for_each_present_guest_entry(spt, e, i) \
846 	for (i = 0; i < pt_entries(spt); \
847 	     i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
848 		if (!ppgtt_get_guest_entry(spt, e, i) && \
849 		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
850 
851 #define for_each_present_shadow_entry(spt, e, i) \
852 	for (i = 0; i < pt_entries(spt); \
853 	     i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
854 		if (!ppgtt_get_shadow_entry(spt, e, i) && \
855 		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
856 
857 #define for_each_shadow_entry(spt, e, i) \
858 	for (i = 0; i < pt_entries(spt); \
859 	     i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
860 		if (!ppgtt_get_shadow_entry(spt, e, i))
861 
862 static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
863 {
864 	int v = atomic_read(&spt->refcount);
865 
866 	trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
867 	atomic_inc(&spt->refcount);
868 }
869 
870 static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
871 {
872 	int v = atomic_read(&spt->refcount);
873 
874 	trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
875 	return atomic_dec_return(&spt->refcount);
876 }
877 
878 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
879 
880 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
881 		struct intel_gvt_gtt_entry *e)
882 {
883 	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
884 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
885 	struct intel_vgpu_ppgtt_spt *s;
886 	enum intel_gvt_gtt_type cur_pt_type;
887 
888 	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
889 
890 	if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
891 		&& e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
892 		cur_pt_type = get_next_pt_type(e->type);
893 
894 		if (!gtt_type_is_pt(cur_pt_type) ||
895 				!gtt_type_is_pt(cur_pt_type + 1)) {
896 			drm_WARN(&i915->drm, 1,
897 				 "Invalid page table type, cur_pt_type is: %d\n",
898 				 cur_pt_type);
899 			return -EINVAL;
900 		}
901 
902 		cur_pt_type += 1;
903 
904 		if (ops->get_pfn(e) ==
905 			vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
906 			return 0;
907 	}
908 	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
909 	if (!s) {
910 		gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
911 				ops->get_pfn(e));
912 		return -ENXIO;
913 	}
914 	return ppgtt_invalidate_spt(s);
915 }
916 
917 static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
918 		struct intel_gvt_gtt_entry *entry)
919 {
920 	struct intel_vgpu *vgpu = spt->vgpu;
921 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
922 	unsigned long pfn;
923 	int type;
924 
925 	pfn = ops->get_pfn(entry);
926 	type = spt->shadow_page.type;
927 
928 	/* Uninitialized spte or unshadowed spte. */
929 	if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
930 		return;
931 
932 	intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
933 }
934 
935 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
936 {
937 	struct intel_vgpu *vgpu = spt->vgpu;
938 	struct intel_gvt_gtt_entry e;
939 	unsigned long index;
940 	int ret;
941 
942 	trace_spt_change(spt->vgpu->id, "die", spt,
943 			spt->guest_page.gfn, spt->shadow_page.type);
944 
945 	if (ppgtt_put_spt(spt) > 0)
946 		return 0;
947 
948 	for_each_present_shadow_entry(spt, &e, index) {
949 		switch (e.type) {
950 		case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
951 			gvt_vdbg_mm("invalidate 4K entry\n");
952 			ppgtt_invalidate_pte(spt, &e);
953 			break;
954 		case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
955 			/* We don't setup 64K shadow entry so far. */
956 			WARN(1, "suspicious 64K gtt entry\n");
957 			continue;
958 		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
959 			gvt_vdbg_mm("invalidate 2M entry\n");
960 			continue;
961 		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
962 			WARN(1, "GVT doesn't support 1GB page\n");
963 			continue;
964 		case GTT_TYPE_PPGTT_PML4_ENTRY:
965 		case GTT_TYPE_PPGTT_PDP_ENTRY:
966 		case GTT_TYPE_PPGTT_PDE_ENTRY:
967 			gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
968 			ret = ppgtt_invalidate_spt_by_shadow_entry(
969 					spt->vgpu, &e);
970 			if (ret)
971 				goto fail;
972 			break;
973 		default:
974 			GEM_BUG_ON(1);
975 		}
976 	}
977 
978 	trace_spt_change(spt->vgpu->id, "release", spt,
979 			 spt->guest_page.gfn, spt->shadow_page.type);
980 	ppgtt_free_spt(spt);
981 	return 0;
982 fail:
983 	gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
984 			spt, e.val64, e.type);
985 	return ret;
986 }
987 
988 static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
989 {
990 	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
991 
992 	if (GRAPHICS_VER(dev_priv) == 9) {
993 		u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
994 			GAMW_ECO_ENABLE_64K_IPS_FIELD;
995 
996 		return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
997 	} else if (GRAPHICS_VER(dev_priv) >= 11) {
998 		/* 64K paging only controlled by IPS bit in PTE now. */
999 		return true;
1000 	} else
1001 		return false;
1002 }
1003 
1004 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
1005 
1006 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
1007 		struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
1008 {
1009 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1010 	struct intel_vgpu_ppgtt_spt *spt = NULL;
1011 	bool ips = false;
1012 	int ret;
1013 
1014 	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
1015 
1016 	if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1017 		ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
1018 
1019 	spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
1020 	if (spt) {
1021 		ppgtt_get_spt(spt);
1022 
1023 		if (ips != spt->guest_page.pde_ips) {
1024 			spt->guest_page.pde_ips = ips;
1025 
1026 			gvt_dbg_mm("reshadow PDE since ips changed\n");
1027 			clear_page(spt->shadow_page.vaddr);
1028 			ret = ppgtt_populate_spt(spt);
1029 			if (ret) {
1030 				ppgtt_put_spt(spt);
1031 				goto err;
1032 			}
1033 		}
1034 	} else {
1035 		int type = get_next_pt_type(we->type);
1036 
1037 		if (!gtt_type_is_pt(type)) {
1038 			ret = -EINVAL;
1039 			goto err;
1040 		}
1041 
1042 		spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
1043 		if (IS_ERR(spt)) {
1044 			ret = PTR_ERR(spt);
1045 			goto err;
1046 		}
1047 
1048 		ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
1049 		if (ret)
1050 			goto err_free_spt;
1051 
1052 		ret = ppgtt_populate_spt(spt);
1053 		if (ret)
1054 			goto err_free_spt;
1055 
1056 		trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
1057 				 spt->shadow_page.type);
1058 	}
1059 	return spt;
1060 
1061 err_free_spt:
1062 	ppgtt_free_spt(spt);
1063 	spt = NULL;
1064 err:
1065 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1066 		     spt, we->val64, we->type);
1067 	return ERR_PTR(ret);
1068 }
1069 
1070 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
1071 		struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
1072 {
1073 	const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
1074 
1075 	se->type = ge->type;
1076 	se->val64 = ge->val64;
1077 
1078 	/* Because we always split 64KB pages, so clear IPS in shadow PDE. */
1079 	if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1080 		ops->clear_ips(se);
1081 
1082 	ops->set_pfn(se, s->shadow_page.mfn);
1083 }
1084 
1085 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
1086 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1087 	struct intel_gvt_gtt_entry *se)
1088 {
1089 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1090 	struct intel_vgpu_ppgtt_spt *sub_spt;
1091 	struct intel_gvt_gtt_entry sub_se;
1092 	unsigned long start_gfn;
1093 	dma_addr_t dma_addr;
1094 	unsigned long sub_index;
1095 	int ret;
1096 
1097 	gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
1098 
1099 	start_gfn = ops->get_pfn(se);
1100 
1101 	sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
1102 	if (IS_ERR(sub_spt))
1103 		return PTR_ERR(sub_spt);
1104 
1105 	for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
1106 		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index,
1107 						   PAGE_SIZE, &dma_addr);
1108 		if (ret)
1109 			goto err;
1110 		sub_se.val64 = se->val64;
1111 
1112 		/* Copy the PAT field from PDE. */
1113 		sub_se.val64 &= ~_PAGE_PAT;
1114 		sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
1115 
1116 		ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
1117 		ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
1118 	}
1119 
1120 	/* Clear dirty field. */
1121 	se->val64 &= ~_PAGE_DIRTY;
1122 
1123 	ops->clear_pse(se);
1124 	ops->clear_ips(se);
1125 	ops->set_pfn(se, sub_spt->shadow_page.mfn);
1126 	ppgtt_set_shadow_entry(spt, se, index);
1127 	return 0;
1128 err:
1129 	/* Cancel the existing address mappings of DMA addr. */
1130 	for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) {
1131 		gvt_vdbg_mm("invalidate 4K entry\n");
1132 		ppgtt_invalidate_pte(sub_spt, &sub_se);
1133 	}
1134 	/* Release the new allocated spt. */
1135 	trace_spt_change(sub_spt->vgpu->id, "release", sub_spt,
1136 		sub_spt->guest_page.gfn, sub_spt->shadow_page.type);
1137 	ppgtt_free_spt(sub_spt);
1138 	return ret;
1139 }
1140 
1141 static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
1142 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1143 	struct intel_gvt_gtt_entry *se)
1144 {
1145 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1146 	struct intel_gvt_gtt_entry entry = *se;
1147 	unsigned long start_gfn;
1148 	dma_addr_t dma_addr;
1149 	int i, ret;
1150 
1151 	gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
1152 
1153 	GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
1154 
1155 	start_gfn = ops->get_pfn(se);
1156 
1157 	entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
1158 	ops->set_64k_splited(&entry);
1159 
1160 	for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1161 		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + i,
1162 						   PAGE_SIZE, &dma_addr);
1163 		if (ret)
1164 			return ret;
1165 
1166 		ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
1167 		ppgtt_set_shadow_entry(spt, &entry, index + i);
1168 	}
1169 	return 0;
1170 }
1171 
1172 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
1173 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1174 	struct intel_gvt_gtt_entry *ge)
1175 {
1176 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
1177 	struct intel_gvt_gtt_entry se = *ge;
1178 	unsigned long gfn;
1179 	dma_addr_t dma_addr;
1180 	int ret;
1181 
1182 	if (!pte_ops->test_present(ge))
1183 		return 0;
1184 
1185 	gfn = pte_ops->get_pfn(ge);
1186 
1187 	switch (ge->type) {
1188 	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1189 		gvt_vdbg_mm("shadow 4K gtt entry\n");
1190 		ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
1191 		if (ret)
1192 			return -ENXIO;
1193 		break;
1194 	case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1195 		gvt_vdbg_mm("shadow 64K gtt entry\n");
1196 		/*
1197 		 * The layout of 64K page is special, the page size is
1198 		 * controlled by upper PDE. To be simple, we always split
1199 		 * 64K page to smaller 4K pages in shadow PT.
1200 		 */
1201 		return split_64KB_gtt_entry(vgpu, spt, index, &se);
1202 	case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1203 		gvt_vdbg_mm("shadow 2M gtt entry\n");
1204 		if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
1205 		    intel_gvt_dma_map_guest_page(vgpu, gfn,
1206 						 I915_GTT_PAGE_SIZE_2M, &dma_addr))
1207 			return split_2MB_gtt_entry(vgpu, spt, index, &se);
1208 		break;
1209 	case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1210 		gvt_vgpu_err("GVT doesn't support 1GB entry\n");
1211 		return -EINVAL;
1212 	default:
1213 		GEM_BUG_ON(1);
1214 		return -EINVAL;
1215 	}
1216 
1217 	/* Successfully shadowed a 4K or 2M page (without splitting). */
1218 	pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
1219 	ppgtt_set_shadow_entry(spt, &se, index);
1220 	return 0;
1221 }
1222 
1223 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
1224 {
1225 	struct intel_vgpu *vgpu = spt->vgpu;
1226 	struct intel_vgpu_ppgtt_spt *s;
1227 	struct intel_gvt_gtt_entry se, ge;
1228 	unsigned long i;
1229 	int ret;
1230 
1231 	trace_spt_change(spt->vgpu->id, "born", spt,
1232 			 spt->guest_page.gfn, spt->shadow_page.type);
1233 
1234 	for_each_present_guest_entry(spt, &ge, i) {
1235 		if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
1236 			s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1237 			if (IS_ERR(s)) {
1238 				ret = PTR_ERR(s);
1239 				goto fail;
1240 			}
1241 			ppgtt_get_shadow_entry(spt, &se, i);
1242 			ppgtt_generate_shadow_entry(&se, s, &ge);
1243 			ppgtt_set_shadow_entry(spt, &se, i);
1244 		} else {
1245 			ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
1246 			if (ret)
1247 				goto fail;
1248 		}
1249 	}
1250 	return 0;
1251 fail:
1252 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1253 			spt, ge.val64, ge.type);
1254 	return ret;
1255 }
1256 
1257 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
1258 		struct intel_gvt_gtt_entry *se, unsigned long index)
1259 {
1260 	struct intel_vgpu *vgpu = spt->vgpu;
1261 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1262 	int ret;
1263 
1264 	trace_spt_guest_change(spt->vgpu->id, "remove", spt,
1265 			       spt->shadow_page.type, se->val64, index);
1266 
1267 	gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
1268 		    se->type, index, se->val64);
1269 
1270 	if (!ops->test_present(se))
1271 		return 0;
1272 
1273 	if (ops->get_pfn(se) ==
1274 	    vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
1275 		return 0;
1276 
1277 	if (gtt_type_is_pt(get_next_pt_type(se->type))) {
1278 		struct intel_vgpu_ppgtt_spt *s =
1279 			intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
1280 		if (!s) {
1281 			gvt_vgpu_err("fail to find guest page\n");
1282 			ret = -ENXIO;
1283 			goto fail;
1284 		}
1285 		ret = ppgtt_invalidate_spt(s);
1286 		if (ret)
1287 			goto fail;
1288 	} else {
1289 		/* We don't setup 64K shadow entry so far. */
1290 		WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
1291 		     "suspicious 64K entry\n");
1292 		ppgtt_invalidate_pte(spt, se);
1293 	}
1294 
1295 	return 0;
1296 fail:
1297 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1298 			spt, se->val64, se->type);
1299 	return ret;
1300 }
1301 
1302 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
1303 		struct intel_gvt_gtt_entry *we, unsigned long index)
1304 {
1305 	struct intel_vgpu *vgpu = spt->vgpu;
1306 	struct intel_gvt_gtt_entry m;
1307 	struct intel_vgpu_ppgtt_spt *s;
1308 	int ret;
1309 
1310 	trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
1311 			       we->val64, index);
1312 
1313 	gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
1314 		    we->type, index, we->val64);
1315 
1316 	if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1317 		s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
1318 		if (IS_ERR(s)) {
1319 			ret = PTR_ERR(s);
1320 			goto fail;
1321 		}
1322 		ppgtt_get_shadow_entry(spt, &m, index);
1323 		ppgtt_generate_shadow_entry(&m, s, we);
1324 		ppgtt_set_shadow_entry(spt, &m, index);
1325 	} else {
1326 		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
1327 		if (ret)
1328 			goto fail;
1329 	}
1330 	return 0;
1331 fail:
1332 	gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
1333 		spt, we->val64, we->type);
1334 	return ret;
1335 }
1336 
1337 static int sync_oos_page(struct intel_vgpu *vgpu,
1338 		struct intel_vgpu_oos_page *oos_page)
1339 {
1340 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1341 	struct intel_gvt *gvt = vgpu->gvt;
1342 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1343 	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1344 	struct intel_gvt_gtt_entry old, new;
1345 	int index;
1346 	int ret;
1347 
1348 	trace_oos_change(vgpu->id, "sync", oos_page->id,
1349 			 spt, spt->guest_page.type);
1350 
1351 	old.type = new.type = get_entry_type(spt->guest_page.type);
1352 	old.val64 = new.val64 = 0;
1353 
1354 	for (index = 0; index < (I915_GTT_PAGE_SIZE >>
1355 				info->gtt_entry_size_shift); index++) {
1356 		ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1357 		ops->get_entry(NULL, &new, index, true,
1358 			       spt->guest_page.gfn << PAGE_SHIFT, vgpu);
1359 
1360 		if (old.val64 == new.val64
1361 			&& !test_and_clear_bit(index, spt->post_shadow_bitmap))
1362 			continue;
1363 
1364 		trace_oos_sync(vgpu->id, oos_page->id,
1365 				spt, spt->guest_page.type,
1366 				new.val64, index);
1367 
1368 		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
1369 		if (ret)
1370 			return ret;
1371 
1372 		ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
1373 	}
1374 
1375 	spt->guest_page.write_cnt = 0;
1376 	list_del_init(&spt->post_shadow_list);
1377 	return 0;
1378 }
1379 
1380 static int detach_oos_page(struct intel_vgpu *vgpu,
1381 		struct intel_vgpu_oos_page *oos_page)
1382 {
1383 	struct intel_gvt *gvt = vgpu->gvt;
1384 	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1385 
1386 	trace_oos_change(vgpu->id, "detach", oos_page->id,
1387 			 spt, spt->guest_page.type);
1388 
1389 	spt->guest_page.write_cnt = 0;
1390 	spt->guest_page.oos_page = NULL;
1391 	oos_page->spt = NULL;
1392 
1393 	list_del_init(&oos_page->vm_list);
1394 	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1395 
1396 	return 0;
1397 }
1398 
1399 static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
1400 		struct intel_vgpu_ppgtt_spt *spt)
1401 {
1402 	struct intel_gvt *gvt = spt->vgpu->gvt;
1403 	int ret;
1404 
1405 	ret = intel_gvt_read_gpa(spt->vgpu,
1406 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
1407 			oos_page->mem, I915_GTT_PAGE_SIZE);
1408 	if (ret)
1409 		return ret;
1410 
1411 	oos_page->spt = spt;
1412 	spt->guest_page.oos_page = oos_page;
1413 
1414 	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1415 
1416 	trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
1417 			 spt, spt->guest_page.type);
1418 	return 0;
1419 }
1420 
1421 static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
1422 {
1423 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1424 	int ret;
1425 
1426 	ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
1427 	if (ret)
1428 		return ret;
1429 
1430 	trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
1431 			 spt, spt->guest_page.type);
1432 
1433 	list_del_init(&oos_page->vm_list);
1434 	return sync_oos_page(spt->vgpu, oos_page);
1435 }
1436 
1437 static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
1438 {
1439 	struct intel_gvt *gvt = spt->vgpu->gvt;
1440 	struct intel_gvt_gtt *gtt = &gvt->gtt;
1441 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1442 	int ret;
1443 
1444 	WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1445 
1446 	if (list_empty(&gtt->oos_page_free_list_head)) {
1447 		oos_page = container_of(gtt->oos_page_use_list_head.next,
1448 			struct intel_vgpu_oos_page, list);
1449 		ret = ppgtt_set_guest_page_sync(oos_page->spt);
1450 		if (ret)
1451 			return ret;
1452 		ret = detach_oos_page(spt->vgpu, oos_page);
1453 		if (ret)
1454 			return ret;
1455 	} else
1456 		oos_page = container_of(gtt->oos_page_free_list_head.next,
1457 			struct intel_vgpu_oos_page, list);
1458 	return attach_oos_page(oos_page, spt);
1459 }
1460 
1461 static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
1462 {
1463 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1464 
1465 	if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1466 		return -EINVAL;
1467 
1468 	trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
1469 			 spt, spt->guest_page.type);
1470 
1471 	list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
1472 	return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
1473 }
1474 
1475 /**
1476  * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1477  * @vgpu: a vGPU
1478  *
1479  * This function is called before submitting a guest workload to host,
1480  * to sync all the out-of-synced shadow for vGPU
1481  *
1482  * Returns:
1483  * Zero on success, negative error code if failed.
1484  */
1485 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1486 {
1487 	struct list_head *pos, *n;
1488 	struct intel_vgpu_oos_page *oos_page;
1489 	int ret;
1490 
1491 	if (!enable_out_of_sync)
1492 		return 0;
1493 
1494 	list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1495 		oos_page = container_of(pos,
1496 				struct intel_vgpu_oos_page, vm_list);
1497 		ret = ppgtt_set_guest_page_sync(oos_page->spt);
1498 		if (ret)
1499 			return ret;
1500 	}
1501 	return 0;
1502 }
1503 
1504 /*
1505  * The heart of PPGTT shadow page table.
1506  */
1507 static int ppgtt_handle_guest_write_page_table(
1508 		struct intel_vgpu_ppgtt_spt *spt,
1509 		struct intel_gvt_gtt_entry *we, unsigned long index)
1510 {
1511 	struct intel_vgpu *vgpu = spt->vgpu;
1512 	int type = spt->shadow_page.type;
1513 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1514 	struct intel_gvt_gtt_entry old_se;
1515 	int new_present;
1516 	int i, ret;
1517 
1518 	new_present = ops->test_present(we);
1519 
1520 	/*
1521 	 * Adding the new entry first and then removing the old one, that can
1522 	 * guarantee the ppgtt table is validated during the window between
1523 	 * adding and removal.
1524 	 */
1525 	ppgtt_get_shadow_entry(spt, &old_se, index);
1526 
1527 	if (new_present) {
1528 		ret = ppgtt_handle_guest_entry_add(spt, we, index);
1529 		if (ret)
1530 			goto fail;
1531 	}
1532 
1533 	ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
1534 	if (ret)
1535 		goto fail;
1536 
1537 	if (!new_present) {
1538 		/* For 64KB splited entries, we need clear them all. */
1539 		if (ops->test_64k_splited(&old_se) &&
1540 		    !(index % GTT_64K_PTE_STRIDE)) {
1541 			gvt_vdbg_mm("remove splited 64K shadow entries\n");
1542 			for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1543 				ops->clear_64k_splited(&old_se);
1544 				ops->set_pfn(&old_se,
1545 					vgpu->gtt.scratch_pt[type].page_mfn);
1546 				ppgtt_set_shadow_entry(spt, &old_se, index + i);
1547 			}
1548 		} else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
1549 			   old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
1550 			ops->clear_pse(&old_se);
1551 			ops->set_pfn(&old_se,
1552 				     vgpu->gtt.scratch_pt[type].page_mfn);
1553 			ppgtt_set_shadow_entry(spt, &old_se, index);
1554 		} else {
1555 			ops->set_pfn(&old_se,
1556 				     vgpu->gtt.scratch_pt[type].page_mfn);
1557 			ppgtt_set_shadow_entry(spt, &old_se, index);
1558 		}
1559 	}
1560 
1561 	return 0;
1562 fail:
1563 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1564 			spt, we->val64, we->type);
1565 	return ret;
1566 }
1567 
1568 
1569 
1570 static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
1571 {
1572 	return enable_out_of_sync
1573 		&& gtt_type_is_pte_pt(spt->guest_page.type)
1574 		&& spt->guest_page.write_cnt >= 2;
1575 }
1576 
1577 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1578 		unsigned long index)
1579 {
1580 	set_bit(index, spt->post_shadow_bitmap);
1581 	if (!list_empty(&spt->post_shadow_list))
1582 		return;
1583 
1584 	list_add_tail(&spt->post_shadow_list,
1585 			&spt->vgpu->gtt.post_shadow_list_head);
1586 }
1587 
1588 /**
1589  * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1590  * @vgpu: a vGPU
1591  *
1592  * This function is called before submitting a guest workload to host,
1593  * to flush all the post shadows for a vGPU.
1594  *
1595  * Returns:
1596  * Zero on success, negative error code if failed.
1597  */
1598 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1599 {
1600 	struct list_head *pos, *n;
1601 	struct intel_vgpu_ppgtt_spt *spt;
1602 	struct intel_gvt_gtt_entry ge;
1603 	unsigned long index;
1604 	int ret;
1605 
1606 	list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1607 		spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1608 				post_shadow_list);
1609 
1610 		for_each_set_bit(index, spt->post_shadow_bitmap,
1611 				GTT_ENTRY_NUM_IN_ONE_PAGE) {
1612 			ppgtt_get_guest_entry(spt, &ge, index);
1613 
1614 			ret = ppgtt_handle_guest_write_page_table(spt,
1615 							&ge, index);
1616 			if (ret)
1617 				return ret;
1618 			clear_bit(index, spt->post_shadow_bitmap);
1619 		}
1620 		list_del_init(&spt->post_shadow_list);
1621 	}
1622 	return 0;
1623 }
1624 
1625 static int ppgtt_handle_guest_write_page_table_bytes(
1626 		struct intel_vgpu_ppgtt_spt *spt,
1627 		u64 pa, void *p_data, int bytes)
1628 {
1629 	struct intel_vgpu *vgpu = spt->vgpu;
1630 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1631 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1632 	struct intel_gvt_gtt_entry we, se;
1633 	unsigned long index;
1634 	int ret;
1635 
1636 	index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1637 
1638 	ppgtt_get_guest_entry(spt, &we, index);
1639 
1640 	/*
1641 	 * For page table which has 64K gtt entry, only PTE#0, PTE#16,
1642 	 * PTE#32, ... PTE#496 are used. Unused PTEs update should be
1643 	 * ignored.
1644 	 */
1645 	if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
1646 	    (index % GTT_64K_PTE_STRIDE)) {
1647 		gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
1648 			    index);
1649 		return 0;
1650 	}
1651 
1652 	if (bytes == info->gtt_entry_size) {
1653 		ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
1654 		if (ret)
1655 			return ret;
1656 	} else {
1657 		if (!test_bit(index, spt->post_shadow_bitmap)) {
1658 			int type = spt->shadow_page.type;
1659 
1660 			ppgtt_get_shadow_entry(spt, &se, index);
1661 			ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
1662 			if (ret)
1663 				return ret;
1664 			ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1665 			ppgtt_set_shadow_entry(spt, &se, index);
1666 		}
1667 		ppgtt_set_post_shadow(spt, index);
1668 	}
1669 
1670 	if (!enable_out_of_sync)
1671 		return 0;
1672 
1673 	spt->guest_page.write_cnt++;
1674 
1675 	if (spt->guest_page.oos_page)
1676 		ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
1677 				false, 0, vgpu);
1678 
1679 	if (can_do_out_of_sync(spt)) {
1680 		if (!spt->guest_page.oos_page)
1681 			ppgtt_allocate_oos_page(spt);
1682 
1683 		ret = ppgtt_set_guest_page_oos(spt);
1684 		if (ret < 0)
1685 			return ret;
1686 	}
1687 	return 0;
1688 }
1689 
1690 static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
1691 {
1692 	struct intel_vgpu *vgpu = mm->vgpu;
1693 	struct intel_gvt *gvt = vgpu->gvt;
1694 	struct intel_gvt_gtt *gtt = &gvt->gtt;
1695 	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1696 	struct intel_gvt_gtt_entry se;
1697 	int index;
1698 
1699 	if (!mm->ppgtt_mm.shadowed)
1700 		return;
1701 
1702 	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1703 		ppgtt_get_shadow_root_entry(mm, &se, index);
1704 
1705 		if (!ops->test_present(&se))
1706 			continue;
1707 
1708 		ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
1709 		se.val64 = 0;
1710 		ppgtt_set_shadow_root_entry(mm, &se, index);
1711 
1712 		trace_spt_guest_change(vgpu->id, "destroy root pointer",
1713 				       NULL, se.type, se.val64, index);
1714 	}
1715 
1716 	mm->ppgtt_mm.shadowed = false;
1717 }
1718 
1719 
1720 static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
1721 {
1722 	struct intel_vgpu *vgpu = mm->vgpu;
1723 	struct intel_gvt *gvt = vgpu->gvt;
1724 	struct intel_gvt_gtt *gtt = &gvt->gtt;
1725 	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1726 	struct intel_vgpu_ppgtt_spt *spt;
1727 	struct intel_gvt_gtt_entry ge, se;
1728 	int index, ret;
1729 
1730 	if (mm->ppgtt_mm.shadowed)
1731 		return 0;
1732 
1733 	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1734 		return -EINVAL;
1735 
1736 	mm->ppgtt_mm.shadowed = true;
1737 
1738 	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1739 		ppgtt_get_guest_root_entry(mm, &ge, index);
1740 
1741 		if (!ops->test_present(&ge))
1742 			continue;
1743 
1744 		trace_spt_guest_change(vgpu->id, __func__, NULL,
1745 				       ge.type, ge.val64, index);
1746 
1747 		spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1748 		if (IS_ERR(spt)) {
1749 			gvt_vgpu_err("fail to populate guest root pointer\n");
1750 			ret = PTR_ERR(spt);
1751 			goto fail;
1752 		}
1753 		ppgtt_generate_shadow_entry(&se, spt, &ge);
1754 		ppgtt_set_shadow_root_entry(mm, &se, index);
1755 
1756 		trace_spt_guest_change(vgpu->id, "populate root pointer",
1757 				       NULL, se.type, se.val64, index);
1758 	}
1759 
1760 	return 0;
1761 fail:
1762 	invalidate_ppgtt_mm(mm);
1763 	return ret;
1764 }
1765 
1766 static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1767 {
1768 	struct intel_vgpu_mm *mm;
1769 
1770 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1771 	if (!mm)
1772 		return NULL;
1773 
1774 	mm->vgpu = vgpu;
1775 	kref_init(&mm->ref);
1776 	atomic_set(&mm->pincount, 0);
1777 
1778 	return mm;
1779 }
1780 
1781 static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1782 {
1783 	kfree(mm);
1784 }
1785 
1786 /**
1787  * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
1788  * @vgpu: a vGPU
1789  * @root_entry_type: ppgtt root entry type
1790  * @pdps: guest pdps.
1791  *
1792  * This function is used to create a ppgtt mm object for a vGPU.
1793  *
1794  * Returns:
1795  * Zero on success, negative error code in pointer if failed.
1796  */
1797 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
1798 		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
1799 {
1800 	struct intel_gvt *gvt = vgpu->gvt;
1801 	struct intel_vgpu_mm *mm;
1802 	int ret;
1803 
1804 	mm = vgpu_alloc_mm(vgpu);
1805 	if (!mm)
1806 		return ERR_PTR(-ENOMEM);
1807 
1808 	mm->type = INTEL_GVT_MM_PPGTT;
1809 
1810 	GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1811 		   root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1812 	mm->ppgtt_mm.root_entry_type = root_entry_type;
1813 
1814 	INIT_LIST_HEAD(&mm->ppgtt_mm.list);
1815 	INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
1816 	INIT_LIST_HEAD(&mm->ppgtt_mm.link);
1817 
1818 	if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1819 		mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1820 	else
1821 		memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1822 		       sizeof(mm->ppgtt_mm.guest_pdps));
1823 
1824 	ret = shadow_ppgtt_mm(mm);
1825 	if (ret) {
1826 		gvt_vgpu_err("failed to shadow ppgtt mm\n");
1827 		vgpu_free_mm(mm);
1828 		return ERR_PTR(ret);
1829 	}
1830 
1831 	list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
1832 
1833 	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1834 	list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
1835 	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1836 
1837 	return mm;
1838 }
1839 
1840 static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1841 {
1842 	struct intel_vgpu_mm *mm;
1843 	unsigned long nr_entries;
1844 
1845 	mm = vgpu_alloc_mm(vgpu);
1846 	if (!mm)
1847 		return ERR_PTR(-ENOMEM);
1848 
1849 	mm->type = INTEL_GVT_MM_GGTT;
1850 
1851 	nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1852 	mm->ggtt_mm.virtual_ggtt =
1853 		vzalloc(array_size(nr_entries,
1854 				   vgpu->gvt->device_info.gtt_entry_size));
1855 	if (!mm->ggtt_mm.virtual_ggtt) {
1856 		vgpu_free_mm(mm);
1857 		return ERR_PTR(-ENOMEM);
1858 	}
1859 
1860 	mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1861 	if (!mm->ggtt_mm.host_ggtt_aperture) {
1862 		vfree(mm->ggtt_mm.virtual_ggtt);
1863 		vgpu_free_mm(mm);
1864 		return ERR_PTR(-ENOMEM);
1865 	}
1866 
1867 	mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1868 	if (!mm->ggtt_mm.host_ggtt_hidden) {
1869 		vfree(mm->ggtt_mm.host_ggtt_aperture);
1870 		vfree(mm->ggtt_mm.virtual_ggtt);
1871 		vgpu_free_mm(mm);
1872 		return ERR_PTR(-ENOMEM);
1873 	}
1874 
1875 	return mm;
1876 }
1877 
1878 /**
1879  * _intel_vgpu_mm_release - destroy a mm object
1880  * @mm_ref: a kref object
1881  *
1882  * This function is used to destroy a mm object for vGPU
1883  *
1884  */
1885 void _intel_vgpu_mm_release(struct kref *mm_ref)
1886 {
1887 	struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1888 
1889 	if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1890 		gvt_err("vgpu mm pin count bug detected\n");
1891 
1892 	if (mm->type == INTEL_GVT_MM_PPGTT) {
1893 		list_del(&mm->ppgtt_mm.list);
1894 
1895 		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1896 		list_del(&mm->ppgtt_mm.lru_list);
1897 		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1898 
1899 		invalidate_ppgtt_mm(mm);
1900 	} else {
1901 		vfree(mm->ggtt_mm.virtual_ggtt);
1902 		vfree(mm->ggtt_mm.host_ggtt_aperture);
1903 		vfree(mm->ggtt_mm.host_ggtt_hidden);
1904 	}
1905 
1906 	vgpu_free_mm(mm);
1907 }
1908 
1909 /**
1910  * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
1911  * @mm: a vGPU mm object
1912  *
1913  * This function is called when user doesn't want to use a vGPU mm object
1914  */
1915 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
1916 {
1917 	atomic_dec_if_positive(&mm->pincount);
1918 }
1919 
1920 /**
1921  * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1922  * @mm: target vgpu mm
1923  *
1924  * This function is called when user wants to use a vGPU mm object. If this
1925  * mm object hasn't been shadowed yet, the shadow will be populated at this
1926  * time.
1927  *
1928  * Returns:
1929  * Zero on success, negative error code if failed.
1930  */
1931 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
1932 {
1933 	int ret;
1934 
1935 	atomic_inc(&mm->pincount);
1936 
1937 	if (mm->type == INTEL_GVT_MM_PPGTT) {
1938 		ret = shadow_ppgtt_mm(mm);
1939 		if (ret)
1940 			return ret;
1941 
1942 		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1943 		list_move_tail(&mm->ppgtt_mm.lru_list,
1944 			       &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
1945 		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1946 	}
1947 
1948 	return 0;
1949 }
1950 
1951 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
1952 {
1953 	struct intel_vgpu_mm *mm;
1954 	struct list_head *pos, *n;
1955 
1956 	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1957 
1958 	list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
1959 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
1960 
1961 		if (atomic_read(&mm->pincount))
1962 			continue;
1963 
1964 		list_del_init(&mm->ppgtt_mm.lru_list);
1965 		mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1966 		invalidate_ppgtt_mm(mm);
1967 		return 1;
1968 	}
1969 	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1970 	return 0;
1971 }
1972 
1973 /*
1974  * GMA translation APIs.
1975  */
1976 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
1977 		struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
1978 {
1979 	struct intel_vgpu *vgpu = mm->vgpu;
1980 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1981 	struct intel_vgpu_ppgtt_spt *s;
1982 
1983 	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
1984 	if (!s)
1985 		return -ENXIO;
1986 
1987 	if (!guest)
1988 		ppgtt_get_shadow_entry(s, e, index);
1989 	else
1990 		ppgtt_get_guest_entry(s, e, index);
1991 	return 0;
1992 }
1993 
1994 /**
1995  * intel_vgpu_gma_to_gpa - translate a gma to GPA
1996  * @mm: mm object. could be a PPGTT or GGTT mm object
1997  * @gma: graphics memory address in this mm object
1998  *
1999  * This function is used to translate a graphics memory address in specific
2000  * graphics memory space to guest physical address.
2001  *
2002  * Returns:
2003  * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
2004  */
2005 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
2006 {
2007 	struct intel_vgpu *vgpu = mm->vgpu;
2008 	struct intel_gvt *gvt = vgpu->gvt;
2009 	const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
2010 	const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
2011 	unsigned long gpa = INTEL_GVT_INVALID_ADDR;
2012 	unsigned long gma_index[4];
2013 	struct intel_gvt_gtt_entry e;
2014 	int i, levels = 0;
2015 	int ret;
2016 
2017 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
2018 		   mm->type != INTEL_GVT_MM_PPGTT);
2019 
2020 	if (mm->type == INTEL_GVT_MM_GGTT) {
2021 		if (!vgpu_gmadr_is_valid(vgpu, gma))
2022 			goto err;
2023 
2024 		ggtt_get_guest_entry(mm, &e,
2025 			gma_ops->gma_to_ggtt_pte_index(gma));
2026 
2027 		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
2028 			+ (gma & ~I915_GTT_PAGE_MASK);
2029 
2030 		trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
2031 	} else {
2032 		switch (mm->ppgtt_mm.root_entry_type) {
2033 		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2034 			ppgtt_get_shadow_root_entry(mm, &e, 0);
2035 
2036 			gma_index[0] = gma_ops->gma_to_pml4_index(gma);
2037 			gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
2038 			gma_index[2] = gma_ops->gma_to_pde_index(gma);
2039 			gma_index[3] = gma_ops->gma_to_pte_index(gma);
2040 			levels = 4;
2041 			break;
2042 		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2043 			ppgtt_get_shadow_root_entry(mm, &e,
2044 					gma_ops->gma_to_l3_pdp_index(gma));
2045 
2046 			gma_index[0] = gma_ops->gma_to_pde_index(gma);
2047 			gma_index[1] = gma_ops->gma_to_pte_index(gma);
2048 			levels = 2;
2049 			break;
2050 		default:
2051 			GEM_BUG_ON(1);
2052 		}
2053 
2054 		/* walk the shadow page table and get gpa from guest entry */
2055 		for (i = 0; i < levels; i++) {
2056 			ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
2057 				(i == levels - 1));
2058 			if (ret)
2059 				goto err;
2060 
2061 			if (!pte_ops->test_present(&e)) {
2062 				gvt_dbg_core("GMA 0x%lx is not present\n", gma);
2063 				goto err;
2064 			}
2065 		}
2066 
2067 		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
2068 					(gma & ~I915_GTT_PAGE_MASK);
2069 		trace_gma_translate(vgpu->id, "ppgtt", 0,
2070 				    mm->ppgtt_mm.root_entry_type, gma, gpa);
2071 	}
2072 
2073 	return gpa;
2074 err:
2075 	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
2076 	return INTEL_GVT_INVALID_ADDR;
2077 }
2078 
2079 static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
2080 	unsigned int off, void *p_data, unsigned int bytes)
2081 {
2082 	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2083 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2084 	unsigned long index = off >> info->gtt_entry_size_shift;
2085 	unsigned long gma;
2086 	struct intel_gvt_gtt_entry e;
2087 
2088 	if (bytes != 4 && bytes != 8)
2089 		return -EINVAL;
2090 
2091 	gma = index << I915_GTT_PAGE_SHIFT;
2092 	if (!intel_gvt_ggtt_validate_range(vgpu,
2093 					   gma, 1 << I915_GTT_PAGE_SHIFT)) {
2094 		gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma);
2095 		memset(p_data, 0, bytes);
2096 		return 0;
2097 	}
2098 
2099 	ggtt_get_guest_entry(ggtt_mm, &e, index);
2100 	memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
2101 			bytes);
2102 	return 0;
2103 }
2104 
2105 /**
2106  * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read
2107  * @vgpu: a vGPU
2108  * @off: register offset
2109  * @p_data: data will be returned to guest
2110  * @bytes: data length
2111  *
2112  * This function is used to emulate the GTT MMIO register read
2113  *
2114  * Returns:
2115  * Zero on success, error code if failed.
2116  */
2117 int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
2118 	void *p_data, unsigned int bytes)
2119 {
2120 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2121 	int ret;
2122 
2123 	if (bytes != 4 && bytes != 8)
2124 		return -EINVAL;
2125 
2126 	off -= info->gtt_start_offset;
2127 	ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
2128 	return ret;
2129 }
2130 
2131 static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
2132 		struct intel_gvt_gtt_entry *entry)
2133 {
2134 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2135 	unsigned long pfn;
2136 
2137 	pfn = pte_ops->get_pfn(entry);
2138 	if (pfn != vgpu->gvt->gtt.scratch_mfn)
2139 		intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
2140 }
2141 
2142 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
2143 	void *p_data, unsigned int bytes)
2144 {
2145 	struct intel_gvt *gvt = vgpu->gvt;
2146 	const struct intel_gvt_device_info *info = &gvt->device_info;
2147 	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2148 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
2149 	unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
2150 	unsigned long gma, gfn;
2151 	struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2152 	struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2153 	dma_addr_t dma_addr;
2154 	int ret;
2155 	struct intel_gvt_partial_pte *partial_pte, *pos, *n;
2156 	bool partial_update = false;
2157 
2158 	if (bytes != 4 && bytes != 8)
2159 		return -EINVAL;
2160 
2161 	gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
2162 
2163 	/* the VM may configure the whole GM space when ballooning is used */
2164 	if (!vgpu_gmadr_is_valid(vgpu, gma))
2165 		return 0;
2166 
2167 	e.type = GTT_TYPE_GGTT_PTE;
2168 	memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
2169 			bytes);
2170 
2171 	/* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
2172 	 * write, save the first 4 bytes in a list and update virtual
2173 	 * PTE. Only update shadow PTE when the second 4 bytes comes.
2174 	 */
2175 	if (bytes < info->gtt_entry_size) {
2176 		bool found = false;
2177 
2178 		list_for_each_entry_safe(pos, n,
2179 				&ggtt_mm->ggtt_mm.partial_pte_list, list) {
2180 			if (g_gtt_index == pos->offset >>
2181 					info->gtt_entry_size_shift) {
2182 				if (off != pos->offset) {
2183 					/* the second partial part*/
2184 					int last_off = pos->offset &
2185 						(info->gtt_entry_size - 1);
2186 
2187 					memcpy((void *)&e.val64 + last_off,
2188 						(void *)&pos->data + last_off,
2189 						bytes);
2190 
2191 					list_del(&pos->list);
2192 					kfree(pos);
2193 					found = true;
2194 					break;
2195 				}
2196 
2197 				/* update of the first partial part */
2198 				pos->data = e.val64;
2199 				ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2200 				return 0;
2201 			}
2202 		}
2203 
2204 		if (!found) {
2205 			/* the first partial part */
2206 			partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
2207 			if (!partial_pte)
2208 				return -ENOMEM;
2209 			partial_pte->offset = off;
2210 			partial_pte->data = e.val64;
2211 			list_add_tail(&partial_pte->list,
2212 				&ggtt_mm->ggtt_mm.partial_pte_list);
2213 			partial_update = true;
2214 		}
2215 	}
2216 
2217 	if (!partial_update && (ops->test_present(&e))) {
2218 		gfn = ops->get_pfn(&e);
2219 		m.val64 = e.val64;
2220 		m.type = e.type;
2221 
2222 		ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
2223 						   &dma_addr);
2224 		if (ret) {
2225 			gvt_vgpu_err("fail to populate guest ggtt entry\n");
2226 			/* guest driver may read/write the entry when partial
2227 			 * update the entry in this situation p2m will fail
2228 			 * setting the shadow entry to point to a scratch page
2229 			 */
2230 			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2231 		} else
2232 			ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
2233 	} else {
2234 		ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2235 		ops->clear_present(&m);
2236 	}
2237 
2238 	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2239 
2240 	ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
2241 	ggtt_invalidate_pte(vgpu, &e);
2242 
2243 	ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
2244 	ggtt_invalidate(gvt->gt);
2245 	return 0;
2246 }
2247 
2248 /*
2249  * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
2250  * @vgpu: a vGPU
2251  * @off: register offset
2252  * @p_data: data from guest write
2253  * @bytes: data length
2254  *
2255  * This function is used to emulate the GTT MMIO register write
2256  *
2257  * Returns:
2258  * Zero on success, error code if failed.
2259  */
2260 int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
2261 		unsigned int off, void *p_data, unsigned int bytes)
2262 {
2263 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2264 	int ret;
2265 	struct intel_vgpu_submission *s = &vgpu->submission;
2266 	struct intel_engine_cs *engine;
2267 	int i;
2268 
2269 	if (bytes != 4 && bytes != 8)
2270 		return -EINVAL;
2271 
2272 	off -= info->gtt_start_offset;
2273 	ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
2274 
2275 	/* if ggtt of last submitted context is written,
2276 	 * that context is probably got unpinned.
2277 	 * Set last shadowed ctx to invalid.
2278 	 */
2279 	for_each_engine(engine, vgpu->gvt->gt, i) {
2280 		if (!s->last_ctx[i].valid)
2281 			continue;
2282 
2283 		if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
2284 			s->last_ctx[i].valid = false;
2285 	}
2286 	return ret;
2287 }
2288 
2289 static int alloc_scratch_pages(struct intel_vgpu *vgpu,
2290 		enum intel_gvt_gtt_type type)
2291 {
2292 	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
2293 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2294 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2295 	int page_entry_num = I915_GTT_PAGE_SIZE >>
2296 				vgpu->gvt->device_info.gtt_entry_size_shift;
2297 	void *scratch_pt;
2298 	int i;
2299 	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
2300 	dma_addr_t daddr;
2301 
2302 	if (drm_WARN_ON(&i915->drm,
2303 			type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
2304 		return -EINVAL;
2305 
2306 	scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
2307 	if (!scratch_pt) {
2308 		gvt_vgpu_err("fail to allocate scratch page\n");
2309 		return -ENOMEM;
2310 	}
2311 
2312 	daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL);
2313 	if (dma_mapping_error(dev, daddr)) {
2314 		gvt_vgpu_err("fail to dmamap scratch_pt\n");
2315 		__free_page(virt_to_page(scratch_pt));
2316 		return -ENOMEM;
2317 	}
2318 	gtt->scratch_pt[type].page_mfn =
2319 		(unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2320 	gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
2321 	gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
2322 			vgpu->id, type, gtt->scratch_pt[type].page_mfn);
2323 
2324 	/* Build the tree by full filled the scratch pt with the entries which
2325 	 * point to the next level scratch pt or scratch page. The
2326 	 * scratch_pt[type] indicate the scratch pt/scratch page used by the
2327 	 * 'type' pt.
2328 	 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
2329 	 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
2330 	 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
2331 	 */
2332 	if (type > GTT_TYPE_PPGTT_PTE_PT) {
2333 		struct intel_gvt_gtt_entry se;
2334 
2335 		memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
2336 		se.type = get_entry_type(type - 1);
2337 		ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
2338 
2339 		/* The entry parameters like present/writeable/cache type
2340 		 * set to the same as i915's scratch page tree.
2341 		 */
2342 		se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
2343 		if (type == GTT_TYPE_PPGTT_PDE_PT)
2344 			se.val64 |= PPAT_CACHED;
2345 
2346 		for (i = 0; i < page_entry_num; i++)
2347 			ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
2348 	}
2349 
2350 	return 0;
2351 }
2352 
2353 static int release_scratch_page_tree(struct intel_vgpu *vgpu)
2354 {
2355 	int i;
2356 	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
2357 	dma_addr_t daddr;
2358 
2359 	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2360 		if (vgpu->gtt.scratch_pt[i].page != NULL) {
2361 			daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
2362 					I915_GTT_PAGE_SHIFT);
2363 			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2364 			__free_page(vgpu->gtt.scratch_pt[i].page);
2365 			vgpu->gtt.scratch_pt[i].page = NULL;
2366 			vgpu->gtt.scratch_pt[i].page_mfn = 0;
2367 		}
2368 	}
2369 
2370 	return 0;
2371 }
2372 
2373 static int create_scratch_page_tree(struct intel_vgpu *vgpu)
2374 {
2375 	int i, ret;
2376 
2377 	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2378 		ret = alloc_scratch_pages(vgpu, i);
2379 		if (ret)
2380 			goto err;
2381 	}
2382 
2383 	return 0;
2384 
2385 err:
2386 	release_scratch_page_tree(vgpu);
2387 	return ret;
2388 }
2389 
2390 /**
2391  * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
2392  * @vgpu: a vGPU
2393  *
2394  * This function is used to initialize per-vGPU graphics memory virtualization
2395  * components.
2396  *
2397  * Returns:
2398  * Zero on success, error code if failed.
2399  */
2400 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
2401 {
2402 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2403 
2404 	INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
2405 
2406 	INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
2407 	INIT_LIST_HEAD(&gtt->oos_page_list_head);
2408 	INIT_LIST_HEAD(&gtt->post_shadow_list_head);
2409 
2410 	gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2411 	if (IS_ERR(gtt->ggtt_mm)) {
2412 		gvt_vgpu_err("fail to create mm for ggtt.\n");
2413 		return PTR_ERR(gtt->ggtt_mm);
2414 	}
2415 
2416 	intel_vgpu_reset_ggtt(vgpu, false);
2417 
2418 	INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
2419 
2420 	return create_scratch_page_tree(vgpu);
2421 }
2422 
2423 void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
2424 {
2425 	struct list_head *pos, *n;
2426 	struct intel_vgpu_mm *mm;
2427 
2428 	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2429 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2430 		intel_vgpu_destroy_mm(mm);
2431 	}
2432 
2433 	if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2434 		gvt_err("vgpu ppgtt mm is not fully destroyed\n");
2435 
2436 	if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
2437 		gvt_err("Why we still has spt not freed?\n");
2438 		ppgtt_free_all_spt(vgpu);
2439 	}
2440 }
2441 
2442 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2443 {
2444 	struct intel_gvt_partial_pte *pos, *next;
2445 
2446 	list_for_each_entry_safe(pos, next,
2447 				 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
2448 				 list) {
2449 		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
2450 			pos->offset, pos->data);
2451 		kfree(pos);
2452 	}
2453 	intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
2454 	vgpu->gtt.ggtt_mm = NULL;
2455 }
2456 
2457 /**
2458  * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2459  * @vgpu: a vGPU
2460  *
2461  * This function is used to clean up per-vGPU graphics memory virtualization
2462  * components.
2463  *
2464  * Returns:
2465  * Zero on success, error code if failed.
2466  */
2467 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2468 {
2469 	intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2470 	intel_vgpu_destroy_ggtt_mm(vgpu);
2471 	release_scratch_page_tree(vgpu);
2472 }
2473 
2474 static void clean_spt_oos(struct intel_gvt *gvt)
2475 {
2476 	struct intel_gvt_gtt *gtt = &gvt->gtt;
2477 	struct list_head *pos, *n;
2478 	struct intel_vgpu_oos_page *oos_page;
2479 
2480 	WARN(!list_empty(&gtt->oos_page_use_list_head),
2481 		"someone is still using oos page\n");
2482 
2483 	list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2484 		oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2485 		list_del(&oos_page->list);
2486 		free_page((unsigned long)oos_page->mem);
2487 		kfree(oos_page);
2488 	}
2489 }
2490 
2491 static int setup_spt_oos(struct intel_gvt *gvt)
2492 {
2493 	struct intel_gvt_gtt *gtt = &gvt->gtt;
2494 	struct intel_vgpu_oos_page *oos_page;
2495 	int i;
2496 	int ret;
2497 
2498 	INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2499 	INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2500 
2501 	for (i = 0; i < preallocated_oos_pages; i++) {
2502 		oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2503 		if (!oos_page) {
2504 			ret = -ENOMEM;
2505 			goto fail;
2506 		}
2507 		oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0);
2508 		if (!oos_page->mem) {
2509 			ret = -ENOMEM;
2510 			kfree(oos_page);
2511 			goto fail;
2512 		}
2513 
2514 		INIT_LIST_HEAD(&oos_page->list);
2515 		INIT_LIST_HEAD(&oos_page->vm_list);
2516 		oos_page->id = i;
2517 		list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2518 	}
2519 
2520 	gvt_dbg_mm("%d oos pages preallocated\n", i);
2521 
2522 	return 0;
2523 fail:
2524 	clean_spt_oos(gvt);
2525 	return ret;
2526 }
2527 
2528 /**
2529  * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2530  * @vgpu: a vGPU
2531  * @pdps: pdp root array
2532  *
2533  * This function is used to find a PPGTT mm object from mm object pool
2534  *
2535  * Returns:
2536  * pointer to mm object on success, NULL if failed.
2537  */
2538 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
2539 		u64 pdps[])
2540 {
2541 	struct intel_vgpu_mm *mm;
2542 	struct list_head *pos;
2543 
2544 	list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2545 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2546 
2547 		switch (mm->ppgtt_mm.root_entry_type) {
2548 		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2549 			if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
2550 				return mm;
2551 			break;
2552 		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2553 			if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
2554 				    sizeof(mm->ppgtt_mm.guest_pdps)))
2555 				return mm;
2556 			break;
2557 		default:
2558 			GEM_BUG_ON(1);
2559 		}
2560 	}
2561 	return NULL;
2562 }
2563 
2564 /**
2565  * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
2566  * @vgpu: a vGPU
2567  * @root_entry_type: ppgtt root entry type
2568  * @pdps: guest pdps
2569  *
2570  * This function is used to find or create a PPGTT mm object from a guest.
2571  *
2572  * Returns:
2573  * Zero on success, negative error code if failed.
2574  */
2575 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
2576 		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
2577 {
2578 	struct intel_vgpu_mm *mm;
2579 
2580 	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2581 	if (mm) {
2582 		intel_vgpu_mm_get(mm);
2583 	} else {
2584 		mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
2585 		if (IS_ERR(mm))
2586 			gvt_vgpu_err("fail to create mm\n");
2587 	}
2588 	return mm;
2589 }
2590 
2591 /**
2592  * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
2593  * @vgpu: a vGPU
2594  * @pdps: guest pdps
2595  *
2596  * This function is used to find a PPGTT mm object from a guest and destroy it.
2597  *
2598  * Returns:
2599  * Zero on success, negative error code if failed.
2600  */
2601 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
2602 {
2603 	struct intel_vgpu_mm *mm;
2604 
2605 	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2606 	if (!mm) {
2607 		gvt_vgpu_err("fail to find ppgtt instance.\n");
2608 		return -EINVAL;
2609 	}
2610 	intel_vgpu_mm_put(mm);
2611 	return 0;
2612 }
2613 
2614 /**
2615  * intel_gvt_init_gtt - initialize mm components of a GVT device
2616  * @gvt: GVT device
2617  *
2618  * This function is called at the initialization stage, to initialize
2619  * the mm components of a GVT device.
2620  *
2621  * Returns:
2622  * zero on success, negative error code if failed.
2623  */
2624 int intel_gvt_init_gtt(struct intel_gvt *gvt)
2625 {
2626 	int ret;
2627 	void *page;
2628 	struct device *dev = gvt->gt->i915->drm.dev;
2629 	dma_addr_t daddr;
2630 
2631 	gvt_dbg_core("init gtt\n");
2632 
2633 	gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2634 	gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
2635 
2636 	page = (void *)get_zeroed_page(GFP_KERNEL);
2637 	if (!page) {
2638 		gvt_err("fail to allocate scratch ggtt page\n");
2639 		return -ENOMEM;
2640 	}
2641 
2642 	daddr = dma_map_page(dev, virt_to_page(page), 0,
2643 			4096, DMA_BIDIRECTIONAL);
2644 	if (dma_mapping_error(dev, daddr)) {
2645 		gvt_err("fail to dmamap scratch ggtt page\n");
2646 		__free_page(virt_to_page(page));
2647 		return -ENOMEM;
2648 	}
2649 
2650 	gvt->gtt.scratch_page = virt_to_page(page);
2651 	gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2652 
2653 	if (enable_out_of_sync) {
2654 		ret = setup_spt_oos(gvt);
2655 		if (ret) {
2656 			gvt_err("fail to initialize SPT oos\n");
2657 			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2658 			__free_page(gvt->gtt.scratch_page);
2659 			return ret;
2660 		}
2661 	}
2662 	INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
2663 	mutex_init(&gvt->gtt.ppgtt_mm_lock);
2664 	return 0;
2665 }
2666 
2667 /**
2668  * intel_gvt_clean_gtt - clean up mm components of a GVT device
2669  * @gvt: GVT device
2670  *
2671  * This function is called at the driver unloading stage, to clean up
2672  * the mm components of a GVT device.
2673  *
2674  */
2675 void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2676 {
2677 	struct device *dev = gvt->gt->i915->drm.dev;
2678 	dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
2679 					I915_GTT_PAGE_SHIFT);
2680 
2681 	dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2682 
2683 	__free_page(gvt->gtt.scratch_page);
2684 
2685 	if (enable_out_of_sync)
2686 		clean_spt_oos(gvt);
2687 }
2688 
2689 /**
2690  * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
2691  * @vgpu: a vGPU
2692  *
2693  * This function is called when invalidate all PPGTT instances of a vGPU.
2694  *
2695  */
2696 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
2697 {
2698 	struct list_head *pos, *n;
2699 	struct intel_vgpu_mm *mm;
2700 
2701 	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2702 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2703 		if (mm->type == INTEL_GVT_MM_PPGTT) {
2704 			mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2705 			list_del_init(&mm->ppgtt_mm.lru_list);
2706 			mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2707 			if (mm->ppgtt_mm.shadowed)
2708 				invalidate_ppgtt_mm(mm);
2709 		}
2710 	}
2711 }
2712 
2713 /**
2714  * intel_vgpu_reset_ggtt - reset the GGTT entry
2715  * @vgpu: a vGPU
2716  * @invalidate_old: invalidate old entries
2717  *
2718  * This function is called at the vGPU create stage
2719  * to reset all the GGTT entries.
2720  *
2721  */
2722 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
2723 {
2724 	struct intel_gvt *gvt = vgpu->gvt;
2725 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2726 	struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
2727 	struct intel_gvt_gtt_entry old_entry;
2728 	u32 index;
2729 	u32 num_entries;
2730 
2731 	pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2732 	pte_ops->set_present(&entry);
2733 
2734 	index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2735 	num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2736 	while (num_entries--) {
2737 		if (invalidate_old) {
2738 			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2739 			ggtt_invalidate_pte(vgpu, &old_entry);
2740 		}
2741 		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2742 	}
2743 
2744 	index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2745 	num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2746 	while (num_entries--) {
2747 		if (invalidate_old) {
2748 			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2749 			ggtt_invalidate_pte(vgpu, &old_entry);
2750 		}
2751 		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2752 	}
2753 
2754 	ggtt_invalidate(gvt->gt);
2755 }
2756 
2757 /**
2758  * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
2759  * @gvt: intel gvt device
2760  *
2761  * This function is called at driver resume stage to restore
2762  * GGTT entries of every vGPU.
2763  *
2764  */
2765 void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
2766 {
2767 	struct intel_vgpu *vgpu;
2768 	struct intel_vgpu_mm *mm;
2769 	int id;
2770 	gen8_pte_t pte;
2771 	u32 idx, num_low, num_hi, offset;
2772 
2773 	/* Restore dirty host ggtt for all vGPUs */
2774 	idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) {
2775 		mm = vgpu->gtt.ggtt_mm;
2776 
2777 		num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2778 		offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2779 		for (idx = 0; idx < num_low; idx++) {
2780 			pte = mm->ggtt_mm.host_ggtt_aperture[idx];
2781 			if (pte & GEN8_PAGE_PRESENT)
2782 				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
2783 		}
2784 
2785 		num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2786 		offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2787 		for (idx = 0; idx < num_hi; idx++) {
2788 			pte = mm->ggtt_mm.host_ggtt_hidden[idx];
2789 			if (pte & GEN8_PAGE_PRESENT)
2790 				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
2791 		}
2792 	}
2793 }
2794