xref: /linux/drivers/gpu/drm/i915/gt/gen8_ppgtt.c (revision ad30469a841b50dbb541df4d6971d891f703c297)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/log2.h>
7 
8 #include "gem/i915_gem_lmem.h"
9 
10 #include "gen8_ppgtt.h"
11 #include "i915_scatterlist.h"
12 #include "i915_trace.h"
13 #include "i915_pvinfo.h"
14 #include "i915_vgpu.h"
15 #include "intel_gt.h"
16 #include "intel_gtt.h"
17 
18 static u64 gen8_pde_encode(const dma_addr_t addr,
19 			   const enum i915_cache_level level)
20 {
21 	u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
22 
23 	if (level != I915_CACHE_NONE)
24 		pde |= PPAT_CACHED_PDE;
25 	else
26 		pde |= PPAT_UNCACHED;
27 
28 	return pde;
29 }
30 
31 static u64 gen8_pte_encode(dma_addr_t addr,
32 			   unsigned int pat_index,
33 			   u32 flags)
34 {
35 	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
36 
37 	if (unlikely(flags & PTE_READ_ONLY))
38 		pte &= ~GEN8_PAGE_RW;
39 
40 	/*
41 	 * For pre-gen12 platforms pat_index is the same as enum
42 	 * i915_cache_level, so the switch-case here is still valid.
43 	 * See translation table defined by LEGACY_CACHELEVEL.
44 	 */
45 	switch (pat_index) {
46 	case I915_CACHE_NONE:
47 		pte |= PPAT_UNCACHED;
48 		break;
49 	case I915_CACHE_WT:
50 		pte |= PPAT_DISPLAY_ELLC;
51 		break;
52 	default:
53 		pte |= PPAT_CACHED;
54 		break;
55 	}
56 
57 	return pte;
58 }
59 
60 static u64 gen12_pte_encode(dma_addr_t addr,
61 			    unsigned int pat_index,
62 			    u32 flags)
63 {
64 	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
65 
66 	if (unlikely(flags & PTE_READ_ONLY))
67 		pte &= ~GEN8_PAGE_RW;
68 
69 	if (flags & PTE_LM)
70 		pte |= GEN12_PPGTT_PTE_LM;
71 
72 	if (pat_index & BIT(0))
73 		pte |= GEN12_PPGTT_PTE_PAT0;
74 
75 	if (pat_index & BIT(1))
76 		pte |= GEN12_PPGTT_PTE_PAT1;
77 
78 	if (pat_index & BIT(2))
79 		pte |= GEN12_PPGTT_PTE_PAT2;
80 
81 	if (pat_index & BIT(3))
82 		pte |= MTL_PPGTT_PTE_PAT3;
83 
84 	return pte;
85 }
86 
87 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
88 {
89 	struct drm_i915_private *i915 = ppgtt->vm.i915;
90 	struct intel_uncore *uncore = ppgtt->vm.gt->uncore;
91 	enum vgt_g2v_type msg;
92 	int i;
93 
94 	if (create)
95 		atomic_inc(px_used(ppgtt->pd)); /* never remove */
96 	else
97 		atomic_dec(px_used(ppgtt->pd));
98 
99 	mutex_lock(&i915->vgpu.lock);
100 
101 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
102 		const u64 daddr = px_dma(ppgtt->pd);
103 
104 		intel_uncore_write(uncore,
105 				   vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
106 		intel_uncore_write(uncore,
107 				   vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
108 
109 		msg = create ?
110 			VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
111 			VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY;
112 	} else {
113 		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
114 			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
115 
116 			intel_uncore_write(uncore,
117 					   vgtif_reg(pdp[i].lo),
118 					   lower_32_bits(daddr));
119 			intel_uncore_write(uncore,
120 					   vgtif_reg(pdp[i].hi),
121 					   upper_32_bits(daddr));
122 		}
123 
124 		msg = create ?
125 			VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
126 			VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY;
127 	}
128 
129 	/* g2v_notify atomically (via hv trap) consumes the message packet. */
130 	intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg);
131 
132 	mutex_unlock(&i915->vgpu.lock);
133 }
134 
135 /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
136 #define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
137 #define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
138 #define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
139 #define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
140 #define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
141 #define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
142 #define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
143 
144 #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
145 
146 static unsigned int
147 gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
148 {
149 	const int shift = gen8_pd_shift(lvl);
150 	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
151 
152 	GEM_BUG_ON(start >= end);
153 	end += ~mask >> gen8_pd_shift(1);
154 
155 	*idx = i915_pde_index(start, shift);
156 	if ((start ^ end) & mask)
157 		return GEN8_PDES - *idx;
158 	else
159 		return i915_pde_index(end, shift) - *idx;
160 }
161 
162 static bool gen8_pd_contains(u64 start, u64 end, int lvl)
163 {
164 	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
165 
166 	GEM_BUG_ON(start >= end);
167 	return (start ^ end) & mask && (start & ~mask) == 0;
168 }
169 
170 static unsigned int gen8_pt_count(u64 start, u64 end)
171 {
172 	GEM_BUG_ON(start >= end);
173 	if ((start ^ end) >> gen8_pd_shift(1))
174 		return GEN8_PDES - (start & (GEN8_PDES - 1));
175 	else
176 		return end - start;
177 }
178 
179 static unsigned int gen8_pd_top_count(const struct i915_address_space *vm)
180 {
181 	unsigned int shift = __gen8_pte_shift(vm->top);
182 
183 	return (vm->total + (1ull << shift) - 1) >> shift;
184 }
185 
186 static struct i915_page_directory *
187 gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
188 {
189 	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
190 
191 	if (vm->top == 2)
192 		return ppgtt->pd;
193 	else
194 		return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
195 }
196 
197 static struct i915_page_directory *
198 gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
199 {
200 	return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
201 }
202 
203 static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
204 				 struct i915_page_directory *pd,
205 				 int count, int lvl)
206 {
207 	if (lvl) {
208 		void **pde = pd->entry;
209 
210 		do {
211 			if (!*pde)
212 				continue;
213 
214 			__gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
215 		} while (pde++, --count);
216 	}
217 
218 	free_px(vm, &pd->pt, lvl);
219 }
220 
221 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
222 {
223 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
224 
225 	if (intel_vgpu_active(vm->i915))
226 		gen8_ppgtt_notify_vgt(ppgtt, false);
227 
228 	if (ppgtt->pd)
229 		__gen8_ppgtt_cleanup(vm, ppgtt->pd,
230 				     gen8_pd_top_count(vm), vm->top);
231 
232 	free_scratch(vm);
233 }
234 
235 static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
236 			      struct i915_page_directory * const pd,
237 			      u64 start, const u64 end, int lvl)
238 {
239 	const struct drm_i915_gem_object * const scratch = vm->scratch[lvl];
240 	unsigned int idx, len;
241 
242 	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
243 
244 	len = gen8_pd_range(start, end, lvl--, &idx);
245 	GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
246 		  __func__, vm, lvl + 1, start, end,
247 		  idx, len, atomic_read(px_used(pd)));
248 	GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
249 
250 	do {
251 		struct i915_page_table *pt = pd->entry[idx];
252 
253 		if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
254 		    gen8_pd_contains(start, end, lvl)) {
255 			GTT_TRACE("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
256 				  __func__, vm, lvl + 1, idx, start, end);
257 			clear_pd_entry(pd, idx, scratch);
258 			__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
259 			start += (u64)I915_PDES << gen8_pd_shift(lvl);
260 			continue;
261 		}
262 
263 		if (lvl) {
264 			start = __gen8_ppgtt_clear(vm, as_pd(pt),
265 						   start, end, lvl);
266 		} else {
267 			unsigned int count;
268 			unsigned int pte = gen8_pd_index(start, 0);
269 			unsigned int num_ptes;
270 			u64 *vaddr;
271 
272 			count = gen8_pt_count(start, end);
273 			GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
274 				  __func__, vm, lvl, start, end,
275 				  gen8_pd_index(start, 0), count,
276 				  atomic_read(&pt->used));
277 			GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
278 
279 			num_ptes = count;
280 			if (pt->is_compact) {
281 				GEM_BUG_ON(num_ptes % 16);
282 				GEM_BUG_ON(pte % 16);
283 				num_ptes /= 16;
284 				pte /= 16;
285 			}
286 
287 			vaddr = px_vaddr(pt);
288 			memset64(vaddr + pte,
289 				 vm->scratch[0]->encode,
290 				 num_ptes);
291 
292 			atomic_sub(count, &pt->used);
293 			start += count;
294 		}
295 
296 		if (release_pd_entry(pd, idx, pt, scratch))
297 			free_px(vm, pt, lvl);
298 	} while (idx++, --len);
299 
300 	return start;
301 }
302 
303 static void gen8_ppgtt_clear(struct i915_address_space *vm,
304 			     u64 start, u64 length)
305 {
306 	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
307 	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
308 	GEM_BUG_ON(range_overflows(start, length, vm->total));
309 
310 	start >>= GEN8_PTE_SHIFT;
311 	length >>= GEN8_PTE_SHIFT;
312 	GEM_BUG_ON(length == 0);
313 
314 	__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
315 			   start, start + length, vm->top);
316 }
317 
318 static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
319 			       struct i915_vm_pt_stash *stash,
320 			       struct i915_page_directory * const pd,
321 			       u64 * const start, const u64 end, int lvl)
322 {
323 	unsigned int idx, len;
324 
325 	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
326 
327 	len = gen8_pd_range(*start, end, lvl--, &idx);
328 	GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
329 		  __func__, vm, lvl + 1, *start, end,
330 		  idx, len, atomic_read(px_used(pd)));
331 	GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
332 
333 	spin_lock(&pd->lock);
334 	GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
335 	do {
336 		struct i915_page_table *pt = pd->entry[idx];
337 
338 		if (!pt) {
339 			spin_unlock(&pd->lock);
340 
341 			GTT_TRACE("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
342 				  __func__, vm, lvl + 1, idx);
343 
344 			pt = stash->pt[!!lvl];
345 			__i915_gem_object_pin_pages(pt->base);
346 
347 			fill_px(pt, vm->scratch[lvl]->encode);
348 
349 			spin_lock(&pd->lock);
350 			if (likely(!pd->entry[idx])) {
351 				stash->pt[!!lvl] = pt->stash;
352 				atomic_set(&pt->used, 0);
353 				set_pd_entry(pd, idx, pt);
354 			} else {
355 				pt = pd->entry[idx];
356 			}
357 		}
358 
359 		if (lvl) {
360 			atomic_inc(&pt->used);
361 			spin_unlock(&pd->lock);
362 
363 			__gen8_ppgtt_alloc(vm, stash,
364 					   as_pd(pt), start, end, lvl);
365 
366 			spin_lock(&pd->lock);
367 			atomic_dec(&pt->used);
368 			GEM_BUG_ON(!atomic_read(&pt->used));
369 		} else {
370 			unsigned int count = gen8_pt_count(*start, end);
371 
372 			GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
373 				  __func__, vm, lvl, *start, end,
374 				  gen8_pd_index(*start, 0), count,
375 				  atomic_read(&pt->used));
376 
377 			atomic_add(count, &pt->used);
378 			/* All other pdes may be simultaneously removed */
379 			GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES);
380 			*start += count;
381 		}
382 	} while (idx++, --len);
383 	spin_unlock(&pd->lock);
384 }
385 
386 static void gen8_ppgtt_alloc(struct i915_address_space *vm,
387 			     struct i915_vm_pt_stash *stash,
388 			     u64 start, u64 length)
389 {
390 	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
391 	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
392 	GEM_BUG_ON(range_overflows(start, length, vm->total));
393 
394 	start >>= GEN8_PTE_SHIFT;
395 	length >>= GEN8_PTE_SHIFT;
396 	GEM_BUG_ON(length == 0);
397 
398 	__gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd,
399 			   &start, start + length, vm->top);
400 }
401 
402 static void __gen8_ppgtt_foreach(struct i915_address_space *vm,
403 				 struct i915_page_directory *pd,
404 				 u64 *start, u64 end, int lvl,
405 				 void (*fn)(struct i915_address_space *vm,
406 					    struct i915_page_table *pt,
407 					    void *data),
408 				 void *data)
409 {
410 	unsigned int idx, len;
411 
412 	len = gen8_pd_range(*start, end, lvl--, &idx);
413 
414 	spin_lock(&pd->lock);
415 	do {
416 		struct i915_page_table *pt = pd->entry[idx];
417 
418 		atomic_inc(&pt->used);
419 		spin_unlock(&pd->lock);
420 
421 		if (lvl) {
422 			__gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl,
423 					     fn, data);
424 		} else {
425 			fn(vm, pt, data);
426 			*start += gen8_pt_count(*start, end);
427 		}
428 
429 		spin_lock(&pd->lock);
430 		atomic_dec(&pt->used);
431 	} while (idx++, --len);
432 	spin_unlock(&pd->lock);
433 }
434 
435 static void gen8_ppgtt_foreach(struct i915_address_space *vm,
436 			       u64 start, u64 length,
437 			       void (*fn)(struct i915_address_space *vm,
438 					  struct i915_page_table *pt,
439 					  void *data),
440 			       void *data)
441 {
442 	start >>= GEN8_PTE_SHIFT;
443 	length >>= GEN8_PTE_SHIFT;
444 
445 	__gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd,
446 			     &start, start + length, vm->top,
447 			     fn, data);
448 }
449 
450 static __always_inline u64
451 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
452 		      struct i915_page_directory *pdp,
453 		      struct sgt_dma *iter,
454 		      u64 idx,
455 		      unsigned int pat_index,
456 		      u32 flags)
457 {
458 	struct i915_page_directory *pd;
459 	const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags);
460 	gen8_pte_t *vaddr;
461 
462 	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
463 	vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
464 	do {
465 		GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
466 		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
467 
468 		iter->dma += I915_GTT_PAGE_SIZE;
469 		if (iter->dma >= iter->max) {
470 			iter->sg = __sg_next(iter->sg);
471 			if (!iter->sg || sg_dma_len(iter->sg) == 0) {
472 				idx = 0;
473 				break;
474 			}
475 
476 			iter->dma = sg_dma_address(iter->sg);
477 			iter->max = iter->dma + sg_dma_len(iter->sg);
478 		}
479 
480 		if (gen8_pd_index(++idx, 0) == 0) {
481 			if (gen8_pd_index(idx, 1) == 0) {
482 				/* Limited by sg length for 3lvl */
483 				if (gen8_pd_index(idx, 2) == 0)
484 					break;
485 
486 				pd = pdp->entry[gen8_pd_index(idx, 2)];
487 			}
488 
489 			drm_clflush_virt_range(vaddr, PAGE_SIZE);
490 			vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
491 		}
492 	} while (1);
493 	drm_clflush_virt_range(vaddr, PAGE_SIZE);
494 
495 	return idx;
496 }
497 
498 static void
499 xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
500 			  struct i915_vma_resource *vma_res,
501 			  struct sgt_dma *iter,
502 			  unsigned int pat_index,
503 			  u32 flags)
504 {
505 	const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
506 	unsigned int rem = sg_dma_len(iter->sg);
507 	u64 start = vma_res->start;
508 	u64 end = start + vma_res->vma_size;
509 
510 	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
511 
512 	do {
513 		struct i915_page_directory * const pdp =
514 			gen8_pdp_for_page_address(vm, start);
515 		struct i915_page_directory * const pd =
516 			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
517 		struct i915_page_table *pt =
518 			i915_pt_entry(pd, __gen8_pte_index(start, 1));
519 		gen8_pte_t encode = pte_encode;
520 		unsigned int page_size;
521 		gen8_pte_t *vaddr;
522 		u16 index, max, nent, i;
523 
524 		max = I915_PDES;
525 		nent = 1;
526 
527 		if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
528 		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
529 		    rem >= I915_GTT_PAGE_SIZE_2M &&
530 		    !__gen8_pte_index(start, 0)) {
531 			index = __gen8_pte_index(start, 1);
532 			encode |= GEN8_PDE_PS_2M;
533 			page_size = I915_GTT_PAGE_SIZE_2M;
534 
535 			vaddr = px_vaddr(pd);
536 		} else {
537 			index =  __gen8_pte_index(start, 0);
538 			page_size = I915_GTT_PAGE_SIZE;
539 
540 			if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
541 				/*
542 				 * Device local-memory on these platforms should
543 				 * always use 64K pages or larger (including GTT
544 				 * alignment), therefore if we know the whole
545 				 * page-table needs to be filled we can always
546 				 * safely use the compact-layout. Otherwise fall
547 				 * back to the TLB hint with PS64. If this is
548 				 * system memory we only bother with PS64.
549 				 */
550 				if ((encode & GEN12_PPGTT_PTE_LM) &&
551 				    end - start >= SZ_2M && !index) {
552 					index = __gen8_pte_index(start, 0) / 16;
553 					page_size = I915_GTT_PAGE_SIZE_64K;
554 
555 					max /= 16;
556 
557 					vaddr = px_vaddr(pd);
558 					vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
559 
560 					pt->is_compact = true;
561 				} else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
562 					   rem >= I915_GTT_PAGE_SIZE_64K &&
563 					   !(index % 16)) {
564 					encode |= GEN12_PTE_PS64;
565 					page_size = I915_GTT_PAGE_SIZE_64K;
566 					nent = 16;
567 				}
568 			}
569 
570 			vaddr = px_vaddr(pt);
571 		}
572 
573 		do {
574 			GEM_BUG_ON(rem < page_size);
575 
576 			for (i = 0; i < nent; i++) {
577 				vaddr[index++] =
578 					encode | (iter->dma + i *
579 						  I915_GTT_PAGE_SIZE);
580 			}
581 
582 			start += page_size;
583 			iter->dma += page_size;
584 			rem -= page_size;
585 			if (iter->dma >= iter->max) {
586 				iter->sg = __sg_next(iter->sg);
587 				if (!iter->sg)
588 					break;
589 
590 				rem = sg_dma_len(iter->sg);
591 				if (!rem)
592 					break;
593 
594 				iter->dma = sg_dma_address(iter->sg);
595 				iter->max = iter->dma + rem;
596 
597 				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
598 					break;
599 			}
600 		} while (rem >= page_size && index < max);
601 
602 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
603 		vma_res->page_sizes_gtt |= page_size;
604 	} while (iter->sg && sg_dma_len(iter->sg));
605 }
606 
607 static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
608 				   struct i915_vma_resource *vma_res,
609 				   struct sgt_dma *iter,
610 				   unsigned int pat_index,
611 				   u32 flags)
612 {
613 	const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
614 	unsigned int rem = sg_dma_len(iter->sg);
615 	u64 start = vma_res->start;
616 
617 	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
618 
619 	do {
620 		struct i915_page_directory * const pdp =
621 			gen8_pdp_for_page_address(vm, start);
622 		struct i915_page_directory * const pd =
623 			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
624 		gen8_pte_t encode = pte_encode;
625 		unsigned int maybe_64K = -1;
626 		unsigned int page_size;
627 		gen8_pte_t *vaddr;
628 		u16 index;
629 
630 		if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
631 		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
632 		    rem >= I915_GTT_PAGE_SIZE_2M &&
633 		    !__gen8_pte_index(start, 0)) {
634 			index = __gen8_pte_index(start, 1);
635 			encode |= GEN8_PDE_PS_2M;
636 			page_size = I915_GTT_PAGE_SIZE_2M;
637 
638 			vaddr = px_vaddr(pd);
639 		} else {
640 			struct i915_page_table *pt =
641 				i915_pt_entry(pd, __gen8_pte_index(start, 1));
642 
643 			index = __gen8_pte_index(start, 0);
644 			page_size = I915_GTT_PAGE_SIZE;
645 
646 			if (!index &&
647 			    vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
648 			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
649 			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
650 			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
651 				maybe_64K = __gen8_pte_index(start, 1);
652 
653 			vaddr = px_vaddr(pt);
654 		}
655 
656 		do {
657 			GEM_BUG_ON(sg_dma_len(iter->sg) < page_size);
658 			vaddr[index++] = encode | iter->dma;
659 
660 			start += page_size;
661 			iter->dma += page_size;
662 			rem -= page_size;
663 			if (iter->dma >= iter->max) {
664 				iter->sg = __sg_next(iter->sg);
665 				if (!iter->sg)
666 					break;
667 
668 				rem = sg_dma_len(iter->sg);
669 				if (!rem)
670 					break;
671 
672 				iter->dma = sg_dma_address(iter->sg);
673 				iter->max = iter->dma + rem;
674 
675 				if (maybe_64K != -1 && index < I915_PDES &&
676 				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
677 				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
678 				       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
679 					maybe_64K = -1;
680 
681 				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
682 					break;
683 			}
684 		} while (rem >= page_size && index < I915_PDES);
685 
686 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
687 
688 		/*
689 		 * Is it safe to mark the 2M block as 64K? -- Either we have
690 		 * filled whole page-table with 64K entries, or filled part of
691 		 * it and have reached the end of the sg table and we have
692 		 * enough padding.
693 		 */
694 		if (maybe_64K != -1 &&
695 		    (index == I915_PDES ||
696 		     (i915_vm_has_scratch_64K(vm) &&
697 		      !iter->sg && IS_ALIGNED(vma_res->start +
698 					      vma_res->node_size,
699 					      I915_GTT_PAGE_SIZE_2M)))) {
700 			vaddr = px_vaddr(pd);
701 			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
702 			drm_clflush_virt_range(vaddr, PAGE_SIZE);
703 			page_size = I915_GTT_PAGE_SIZE_64K;
704 
705 			/*
706 			 * We write all 4K page entries, even when using 64K
707 			 * pages. In order to verify that the HW isn't cheating
708 			 * by using the 4K PTE instead of the 64K PTE, we want
709 			 * to remove all the surplus entries. If the HW skipped
710 			 * the 64K PTE, it will read/write into the scratch page
711 			 * instead - which we detect as missing results during
712 			 * selftests.
713 			 */
714 			if (I915_SELFTEST_ONLY(vm->scrub_64K)) {
715 				u16 i;
716 
717 				encode = vm->scratch[0]->encode;
718 				vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K));
719 
720 				for (i = 1; i < index; i += 16)
721 					memset64(vaddr + i, encode, 15);
722 
723 				drm_clflush_virt_range(vaddr, PAGE_SIZE);
724 			}
725 		}
726 
727 		vma_res->page_sizes_gtt |= page_size;
728 	} while (iter->sg && sg_dma_len(iter->sg));
729 }
730 
731 static void gen8_ppgtt_insert(struct i915_address_space *vm,
732 			      struct i915_vma_resource *vma_res,
733 			      unsigned int pat_index,
734 			      u32 flags)
735 {
736 	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
737 	struct sgt_dma iter = sgt_dma(vma_res);
738 
739 	if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
740 		if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 50))
741 			xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
742 		else
743 			gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
744 	} else  {
745 		u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
746 
747 		do {
748 			struct i915_page_directory * const pdp =
749 				gen8_pdp_for_page_index(vm, idx);
750 
751 			idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
752 						    pat_index, flags);
753 		} while (idx);
754 
755 		vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
756 	}
757 }
758 
759 static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
760 				    dma_addr_t addr,
761 				    u64 offset,
762 				    unsigned int pat_index,
763 				    u32 flags)
764 {
765 	u64 idx = offset >> GEN8_PTE_SHIFT;
766 	struct i915_page_directory * const pdp =
767 		gen8_pdp_for_page_index(vm, idx);
768 	struct i915_page_directory *pd =
769 		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
770 	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
771 	gen8_pte_t *vaddr;
772 
773 	GEM_BUG_ON(pt->is_compact);
774 
775 	vaddr = px_vaddr(pt);
776 	vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags);
777 	drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
778 }
779 
780 static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
781 					    dma_addr_t addr,
782 					    u64 offset,
783 					    unsigned int pat_index,
784 					    u32 flags)
785 {
786 	u64 idx = offset >> GEN8_PTE_SHIFT;
787 	struct i915_page_directory * const pdp =
788 		gen8_pdp_for_page_index(vm, idx);
789 	struct i915_page_directory *pd =
790 		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
791 	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
792 	gen8_pte_t *vaddr;
793 
794 	GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
795 	GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
796 
797 	/* XXX: we don't strictly need to use this layout */
798 
799 	if (!pt->is_compact) {
800 		vaddr = px_vaddr(pd);
801 		vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
802 		pt->is_compact = true;
803 	}
804 
805 	vaddr = px_vaddr(pt);
806 	vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags);
807 }
808 
809 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
810 				       dma_addr_t addr,
811 				       u64 offset,
812 				       unsigned int pat_index,
813 				       u32 flags)
814 {
815 	if (flags & PTE_LM)
816 		return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
817 						       pat_index, flags);
818 
819 	return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags);
820 }
821 
822 static int gen8_init_scratch(struct i915_address_space *vm)
823 {
824 	u32 pte_flags;
825 	int ret;
826 	int i;
827 
828 	/*
829 	 * If everybody agrees to not to write into the scratch page,
830 	 * we can reuse it for all vm, keeping contexts and processes separate.
831 	 */
832 	if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) {
833 		struct i915_address_space *clone = vm->gt->vm;
834 
835 		GEM_BUG_ON(!clone->has_read_only);
836 
837 		vm->scratch_order = clone->scratch_order;
838 		for (i = 0; i <= vm->top; i++)
839 			vm->scratch[i] = i915_gem_object_get(clone->scratch[i]);
840 
841 		return 0;
842 	}
843 
844 	ret = setup_scratch_page(vm);
845 	if (ret)
846 		return ret;
847 
848 	pte_flags = vm->has_read_only;
849 	if (i915_gem_object_is_lmem(vm->scratch[0]))
850 		pte_flags |= PTE_LM;
851 
852 	vm->scratch[0]->encode =
853 		vm->pte_encode(px_dma(vm->scratch[0]),
854 			       i915_gem_get_pat_index(vm->i915,
855 						      I915_CACHE_NONE),
856 			       pte_flags);
857 
858 	for (i = 1; i <= vm->top; i++) {
859 		struct drm_i915_gem_object *obj;
860 
861 		obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
862 		if (IS_ERR(obj)) {
863 			ret = PTR_ERR(obj);
864 			goto free_scratch;
865 		}
866 
867 		ret = map_pt_dma(vm, obj);
868 		if (ret) {
869 			i915_gem_object_put(obj);
870 			goto free_scratch;
871 		}
872 
873 		fill_px(obj, vm->scratch[i - 1]->encode);
874 		obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE);
875 
876 		vm->scratch[i] = obj;
877 	}
878 
879 	return 0;
880 
881 free_scratch:
882 	while (i--)
883 		i915_gem_object_put(vm->scratch[i]);
884 	vm->scratch[0] = NULL;
885 	return ret;
886 }
887 
888 static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
889 {
890 	struct i915_address_space *vm = &ppgtt->vm;
891 	struct i915_page_directory *pd = ppgtt->pd;
892 	unsigned int idx;
893 
894 	GEM_BUG_ON(vm->top != 2);
895 	GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
896 
897 	for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
898 		struct i915_page_directory *pde;
899 		int err;
900 
901 		pde = alloc_pd(vm);
902 		if (IS_ERR(pde))
903 			return PTR_ERR(pde);
904 
905 		err = map_pt_dma(vm, pde->pt.base);
906 		if (err) {
907 			free_pd(vm, pde);
908 			return err;
909 		}
910 
911 		fill_px(pde, vm->scratch[1]->encode);
912 		set_pd_entry(pd, idx, pde);
913 		atomic_inc(px_used(pde)); /* keep pinned */
914 	}
915 	wmb();
916 
917 	return 0;
918 }
919 
920 static struct i915_page_directory *
921 gen8_alloc_top_pd(struct i915_address_space *vm)
922 {
923 	const unsigned int count = gen8_pd_top_count(vm);
924 	struct i915_page_directory *pd;
925 	int err;
926 
927 	GEM_BUG_ON(count > I915_PDES);
928 
929 	pd = __alloc_pd(count);
930 	if (unlikely(!pd))
931 		return ERR_PTR(-ENOMEM);
932 
933 	pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
934 	if (IS_ERR(pd->pt.base)) {
935 		err = PTR_ERR(pd->pt.base);
936 		pd->pt.base = NULL;
937 		goto err_pd;
938 	}
939 
940 	err = map_pt_dma(vm, pd->pt.base);
941 	if (err)
942 		goto err_pd;
943 
944 	fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
945 	atomic_inc(px_used(pd)); /* mark as pinned */
946 	return pd;
947 
948 err_pd:
949 	free_pd(vm, pd);
950 	return ERR_PTR(err);
951 }
952 
953 /*
954  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
955  * with a net effect resembling a 2-level page table in normal x86 terms. Each
956  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
957  * space.
958  *
959  */
960 struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
961 				     unsigned long lmem_pt_obj_flags)
962 {
963 	struct i915_page_directory *pd;
964 	struct i915_ppgtt *ppgtt;
965 	int err;
966 
967 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
968 	if (!ppgtt)
969 		return ERR_PTR(-ENOMEM);
970 
971 	ppgtt_init(ppgtt, gt, lmem_pt_obj_flags);
972 	ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
973 	ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
974 
975 	/*
976 	 * From bdw, there is hw support for read-only pages in the PPGTT.
977 	 *
978 	 * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
979 	 * for now.
980 	 *
981 	 * Gen12 has inherited the same read-only fault issue from gen11.
982 	 */
983 	ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
984 
985 	if (HAS_LMEM(gt->i915))
986 		ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
987 	else
988 		ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
989 
990 	/*
991 	 * Using SMEM here instead of LMEM has the advantage of not reserving
992 	 * high performance memory for a "never" used filler page. It also
993 	 * removes the device access that would be required to initialise the
994 	 * scratch page, reducing pressure on an even scarcer resource.
995 	 */
996 	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
997 
998 	if (GRAPHICS_VER(gt->i915) >= 12)
999 		ppgtt->vm.pte_encode = gen12_pte_encode;
1000 	else
1001 		ppgtt->vm.pte_encode = gen8_pte_encode;
1002 
1003 	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
1004 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
1005 	if (HAS_64K_PAGES(gt->i915))
1006 		ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
1007 	else
1008 		ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
1009 	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
1010 	ppgtt->vm.clear_range = gen8_ppgtt_clear;
1011 	ppgtt->vm.foreach = gen8_ppgtt_foreach;
1012 	ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
1013 
1014 	err = gen8_init_scratch(&ppgtt->vm);
1015 	if (err)
1016 		goto err_put;
1017 
1018 	pd = gen8_alloc_top_pd(&ppgtt->vm);
1019 	if (IS_ERR(pd)) {
1020 		err = PTR_ERR(pd);
1021 		goto err_put;
1022 	}
1023 	ppgtt->pd = pd;
1024 
1025 	if (!i915_vm_is_4lvl(&ppgtt->vm)) {
1026 		err = gen8_preallocate_top_level_pdp(ppgtt);
1027 		if (err)
1028 			goto err_put;
1029 	}
1030 
1031 	if (intel_vgpu_active(gt->i915))
1032 		gen8_ppgtt_notify_vgt(ppgtt, true);
1033 
1034 	return ppgtt;
1035 
1036 err_put:
1037 	i915_vm_put(&ppgtt->vm);
1038 	return ERR_PTR(err);
1039 }
1040