xref: /linux/drivers/gpu/drm/i915/i915_gem_gtt.c (revision 59024954a1e7e26b62680e1f2b5725249a6c09f7)
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25 
26 #include <linux/seq_file.h>
27 #include <linux/stop_machine.h>
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_vgpu.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 
35 /**
36  * DOC: Global GTT views
37  *
38  * Background and previous state
39  *
40  * Historically objects could exists (be bound) in global GTT space only as
41  * singular instances with a view representing all of the object's backing pages
42  * in a linear fashion. This view will be called a normal view.
43  *
44  * To support multiple views of the same object, where the number of mapped
45  * pages is not equal to the backing store, or where the layout of the pages
46  * is not linear, concept of a GGTT view was added.
47  *
48  * One example of an alternative view is a stereo display driven by a single
49  * image. In this case we would have a framebuffer looking like this
50  * (2x2 pages):
51  *
52  *    12
53  *    34
54  *
55  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
56  * rendering. In contrast, fed to the display engine would be an alternative
57  * view which could look something like this:
58  *
59  *   1212
60  *   3434
61  *
62  * In this example both the size and layout of pages in the alternative view is
63  * different from the normal view.
64  *
65  * Implementation and usage
66  *
67  * GGTT views are implemented using VMAs and are distinguished via enum
68  * i915_ggtt_view_type and struct i915_ggtt_view.
69  *
70  * A new flavour of core GEM functions which work with GGTT bound objects were
71  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
72  * renaming  in large amounts of code. They take the struct i915_ggtt_view
73  * parameter encapsulating all metadata required to implement a view.
74  *
75  * As a helper for callers which are only interested in the normal view,
76  * globally const i915_ggtt_view_normal singleton instance exists. All old core
77  * GEM API functions, the ones not taking the view parameter, are operating on,
78  * or with the normal GGTT view.
79  *
80  * Code wanting to add or use a new GGTT view needs to:
81  *
82  * 1. Add a new enum with a suitable name.
83  * 2. Extend the metadata in the i915_ggtt_view structure if required.
84  * 3. Add support to i915_get_vma_pages().
85  *
86  * New views are required to build a scatter-gather table from within the
87  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
88  * exists for the lifetime of an VMA.
89  *
90  * Core API is designed to have copy semantics which means that passed in
91  * struct i915_ggtt_view does not need to be persistent (left around after
92  * calling the core API functions).
93  *
94  */
95 
96 static inline struct i915_ggtt *
97 i915_vm_to_ggtt(struct i915_address_space *vm)
98 {
99 	GEM_BUG_ON(!i915_is_ggtt(vm));
100 	return container_of(vm, struct i915_ggtt, base);
101 }
102 
103 static int
104 i915_get_ggtt_vma_pages(struct i915_vma *vma);
105 
106 const struct i915_ggtt_view i915_ggtt_view_normal = {
107 	.type = I915_GGTT_VIEW_NORMAL,
108 };
109 const struct i915_ggtt_view i915_ggtt_view_rotated = {
110 	.type = I915_GGTT_VIEW_ROTATED,
111 };
112 
113 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
114 			       	int enable_ppgtt)
115 {
116 	bool has_aliasing_ppgtt;
117 	bool has_full_ppgtt;
118 	bool has_full_48bit_ppgtt;
119 
120 	has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6;
121 	has_full_ppgtt = INTEL_GEN(dev_priv) >= 7;
122 	has_full_48bit_ppgtt =
123 	       	IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9;
124 
125 	if (intel_vgpu_active(dev_priv)) {
126 		/* emulation is too hard */
127 		has_full_ppgtt = false;
128 		has_full_48bit_ppgtt = false;
129 	}
130 
131 	if (!has_aliasing_ppgtt)
132 		return 0;
133 
134 	/*
135 	 * We don't allow disabling PPGTT for gen9+ as it's a requirement for
136 	 * execlists, the sole mechanism available to submit work.
137 	 */
138 	if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
139 		return 0;
140 
141 	if (enable_ppgtt == 1)
142 		return 1;
143 
144 	if (enable_ppgtt == 2 && has_full_ppgtt)
145 		return 2;
146 
147 	if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
148 		return 3;
149 
150 #ifdef CONFIG_INTEL_IOMMU
151 	/* Disable ppgtt on SNB if VT-d is on. */
152 	if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) {
153 		DRM_INFO("Disabling PPGTT because VT-d is on\n");
154 		return 0;
155 	}
156 #endif
157 
158 	/* Early VLV doesn't have this */
159 	if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
160 		DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
161 		return 0;
162 	}
163 
164 	if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt)
165 		return has_full_48bit_ppgtt ? 3 : 2;
166 	else
167 		return has_aliasing_ppgtt ? 1 : 0;
168 }
169 
170 static int ppgtt_bind_vma(struct i915_vma *vma,
171 			  enum i915_cache_level cache_level,
172 			  u32 unused)
173 {
174 	u32 pte_flags = 0;
175 
176 	/* Currently applicable only to VLV */
177 	if (vma->obj->gt_ro)
178 		pte_flags |= PTE_READ_ONLY;
179 
180 	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
181 				cache_level, pte_flags);
182 
183 	return 0;
184 }
185 
186 static void ppgtt_unbind_vma(struct i915_vma *vma)
187 {
188 	vma->vm->clear_range(vma->vm,
189 			     vma->node.start,
190 			     vma->obj->base.size,
191 			     true);
192 }
193 
194 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
195 				  enum i915_cache_level level,
196 				  bool valid)
197 {
198 	gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
199 	pte |= addr;
200 
201 	switch (level) {
202 	case I915_CACHE_NONE:
203 		pte |= PPAT_UNCACHED_INDEX;
204 		break;
205 	case I915_CACHE_WT:
206 		pte |= PPAT_DISPLAY_ELLC_INDEX;
207 		break;
208 	default:
209 		pte |= PPAT_CACHED_INDEX;
210 		break;
211 	}
212 
213 	return pte;
214 }
215 
216 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
217 				  const enum i915_cache_level level)
218 {
219 	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
220 	pde |= addr;
221 	if (level != I915_CACHE_NONE)
222 		pde |= PPAT_CACHED_PDE_INDEX;
223 	else
224 		pde |= PPAT_UNCACHED_INDEX;
225 	return pde;
226 }
227 
228 #define gen8_pdpe_encode gen8_pde_encode
229 #define gen8_pml4e_encode gen8_pde_encode
230 
231 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
232 				 enum i915_cache_level level,
233 				 bool valid, u32 unused)
234 {
235 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
236 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
237 
238 	switch (level) {
239 	case I915_CACHE_L3_LLC:
240 	case I915_CACHE_LLC:
241 		pte |= GEN6_PTE_CACHE_LLC;
242 		break;
243 	case I915_CACHE_NONE:
244 		pte |= GEN6_PTE_UNCACHED;
245 		break;
246 	default:
247 		MISSING_CASE(level);
248 	}
249 
250 	return pte;
251 }
252 
253 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
254 				 enum i915_cache_level level,
255 				 bool valid, u32 unused)
256 {
257 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
258 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
259 
260 	switch (level) {
261 	case I915_CACHE_L3_LLC:
262 		pte |= GEN7_PTE_CACHE_L3_LLC;
263 		break;
264 	case I915_CACHE_LLC:
265 		pte |= GEN6_PTE_CACHE_LLC;
266 		break;
267 	case I915_CACHE_NONE:
268 		pte |= GEN6_PTE_UNCACHED;
269 		break;
270 	default:
271 		MISSING_CASE(level);
272 	}
273 
274 	return pte;
275 }
276 
277 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
278 				 enum i915_cache_level level,
279 				 bool valid, u32 flags)
280 {
281 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
282 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
283 
284 	if (!(flags & PTE_READ_ONLY))
285 		pte |= BYT_PTE_WRITEABLE;
286 
287 	if (level != I915_CACHE_NONE)
288 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
289 
290 	return pte;
291 }
292 
293 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
294 				 enum i915_cache_level level,
295 				 bool valid, u32 unused)
296 {
297 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
298 	pte |= HSW_PTE_ADDR_ENCODE(addr);
299 
300 	if (level != I915_CACHE_NONE)
301 		pte |= HSW_WB_LLC_AGE3;
302 
303 	return pte;
304 }
305 
306 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
307 				  enum i915_cache_level level,
308 				  bool valid, u32 unused)
309 {
310 	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
311 	pte |= HSW_PTE_ADDR_ENCODE(addr);
312 
313 	switch (level) {
314 	case I915_CACHE_NONE:
315 		break;
316 	case I915_CACHE_WT:
317 		pte |= HSW_WT_ELLC_LLC_AGE3;
318 		break;
319 	default:
320 		pte |= HSW_WB_ELLC_LLC_AGE3;
321 		break;
322 	}
323 
324 	return pte;
325 }
326 
327 static int __setup_page_dma(struct drm_device *dev,
328 			    struct i915_page_dma *p, gfp_t flags)
329 {
330 	struct device *device = &dev->pdev->dev;
331 
332 	p->page = alloc_page(flags);
333 	if (!p->page)
334 		return -ENOMEM;
335 
336 	p->daddr = dma_map_page(device,
337 				p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
338 
339 	if (dma_mapping_error(device, p->daddr)) {
340 		__free_page(p->page);
341 		return -EINVAL;
342 	}
343 
344 	return 0;
345 }
346 
347 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
348 {
349 	return __setup_page_dma(dev, p, GFP_KERNEL);
350 }
351 
352 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
353 {
354 	if (WARN_ON(!p->page))
355 		return;
356 
357 	dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
358 	__free_page(p->page);
359 	memset(p, 0, sizeof(*p));
360 }
361 
362 static void *kmap_page_dma(struct i915_page_dma *p)
363 {
364 	return kmap_atomic(p->page);
365 }
366 
367 /* We use the flushing unmap only with ppgtt structures:
368  * page directories, page tables and scratch pages.
369  */
370 static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
371 {
372 	/* There are only few exceptions for gen >=6. chv and bxt.
373 	 * And we are not sure about the latter so play safe for now.
374 	 */
375 	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
376 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
377 
378 	kunmap_atomic(vaddr);
379 }
380 
381 #define kmap_px(px) kmap_page_dma(px_base(px))
382 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
383 
384 #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
385 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
386 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
387 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
388 
389 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
390 			  const uint64_t val)
391 {
392 	int i;
393 	uint64_t * const vaddr = kmap_page_dma(p);
394 
395 	for (i = 0; i < 512; i++)
396 		vaddr[i] = val;
397 
398 	kunmap_page_dma(dev, vaddr);
399 }
400 
401 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
402 			     const uint32_t val32)
403 {
404 	uint64_t v = val32;
405 
406 	v = v << 32 | val32;
407 
408 	fill_page_dma(dev, p, v);
409 }
410 
411 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
412 {
413 	struct i915_page_scratch *sp;
414 	int ret;
415 
416 	sp = kzalloc(sizeof(*sp), GFP_KERNEL);
417 	if (sp == NULL)
418 		return ERR_PTR(-ENOMEM);
419 
420 	ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
421 	if (ret) {
422 		kfree(sp);
423 		return ERR_PTR(ret);
424 	}
425 
426 	set_pages_uc(px_page(sp), 1);
427 
428 	return sp;
429 }
430 
431 static void free_scratch_page(struct drm_device *dev,
432 			      struct i915_page_scratch *sp)
433 {
434 	set_pages_wb(px_page(sp), 1);
435 
436 	cleanup_px(dev, sp);
437 	kfree(sp);
438 }
439 
440 static struct i915_page_table *alloc_pt(struct drm_device *dev)
441 {
442 	struct i915_page_table *pt;
443 	const size_t count = INTEL_INFO(dev)->gen >= 8 ?
444 		GEN8_PTES : GEN6_PTES;
445 	int ret = -ENOMEM;
446 
447 	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
448 	if (!pt)
449 		return ERR_PTR(-ENOMEM);
450 
451 	pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
452 				GFP_KERNEL);
453 
454 	if (!pt->used_ptes)
455 		goto fail_bitmap;
456 
457 	ret = setup_px(dev, pt);
458 	if (ret)
459 		goto fail_page_m;
460 
461 	return pt;
462 
463 fail_page_m:
464 	kfree(pt->used_ptes);
465 fail_bitmap:
466 	kfree(pt);
467 
468 	return ERR_PTR(ret);
469 }
470 
471 static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
472 {
473 	cleanup_px(dev, pt);
474 	kfree(pt->used_ptes);
475 	kfree(pt);
476 }
477 
478 static void gen8_initialize_pt(struct i915_address_space *vm,
479 			       struct i915_page_table *pt)
480 {
481 	gen8_pte_t scratch_pte;
482 
483 	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
484 				      I915_CACHE_LLC, true);
485 
486 	fill_px(vm->dev, pt, scratch_pte);
487 }
488 
489 static void gen6_initialize_pt(struct i915_address_space *vm,
490 			       struct i915_page_table *pt)
491 {
492 	gen6_pte_t scratch_pte;
493 
494 	WARN_ON(px_dma(vm->scratch_page) == 0);
495 
496 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
497 				     I915_CACHE_LLC, true, 0);
498 
499 	fill32_px(vm->dev, pt, scratch_pte);
500 }
501 
502 static struct i915_page_directory *alloc_pd(struct drm_device *dev)
503 {
504 	struct i915_page_directory *pd;
505 	int ret = -ENOMEM;
506 
507 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
508 	if (!pd)
509 		return ERR_PTR(-ENOMEM);
510 
511 	pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
512 				sizeof(*pd->used_pdes), GFP_KERNEL);
513 	if (!pd->used_pdes)
514 		goto fail_bitmap;
515 
516 	ret = setup_px(dev, pd);
517 	if (ret)
518 		goto fail_page_m;
519 
520 	return pd;
521 
522 fail_page_m:
523 	kfree(pd->used_pdes);
524 fail_bitmap:
525 	kfree(pd);
526 
527 	return ERR_PTR(ret);
528 }
529 
530 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
531 {
532 	if (px_page(pd)) {
533 		cleanup_px(dev, pd);
534 		kfree(pd->used_pdes);
535 		kfree(pd);
536 	}
537 }
538 
539 static void gen8_initialize_pd(struct i915_address_space *vm,
540 			       struct i915_page_directory *pd)
541 {
542 	gen8_pde_t scratch_pde;
543 
544 	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
545 
546 	fill_px(vm->dev, pd, scratch_pde);
547 }
548 
549 static int __pdp_init(struct drm_device *dev,
550 		      struct i915_page_directory_pointer *pdp)
551 {
552 	size_t pdpes = I915_PDPES_PER_PDP(dev);
553 
554 	pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
555 				  sizeof(unsigned long),
556 				  GFP_KERNEL);
557 	if (!pdp->used_pdpes)
558 		return -ENOMEM;
559 
560 	pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
561 				      GFP_KERNEL);
562 	if (!pdp->page_directory) {
563 		kfree(pdp->used_pdpes);
564 		/* the PDP might be the statically allocated top level. Keep it
565 		 * as clean as possible */
566 		pdp->used_pdpes = NULL;
567 		return -ENOMEM;
568 	}
569 
570 	return 0;
571 }
572 
573 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
574 {
575 	kfree(pdp->used_pdpes);
576 	kfree(pdp->page_directory);
577 	pdp->page_directory = NULL;
578 }
579 
580 static struct
581 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
582 {
583 	struct i915_page_directory_pointer *pdp;
584 	int ret = -ENOMEM;
585 
586 	WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
587 
588 	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
589 	if (!pdp)
590 		return ERR_PTR(-ENOMEM);
591 
592 	ret = __pdp_init(dev, pdp);
593 	if (ret)
594 		goto fail_bitmap;
595 
596 	ret = setup_px(dev, pdp);
597 	if (ret)
598 		goto fail_page_m;
599 
600 	return pdp;
601 
602 fail_page_m:
603 	__pdp_fini(pdp);
604 fail_bitmap:
605 	kfree(pdp);
606 
607 	return ERR_PTR(ret);
608 }
609 
610 static void free_pdp(struct drm_device *dev,
611 		     struct i915_page_directory_pointer *pdp)
612 {
613 	__pdp_fini(pdp);
614 	if (USES_FULL_48BIT_PPGTT(dev)) {
615 		cleanup_px(dev, pdp);
616 		kfree(pdp);
617 	}
618 }
619 
620 static void gen8_initialize_pdp(struct i915_address_space *vm,
621 				struct i915_page_directory_pointer *pdp)
622 {
623 	gen8_ppgtt_pdpe_t scratch_pdpe;
624 
625 	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
626 
627 	fill_px(vm->dev, pdp, scratch_pdpe);
628 }
629 
630 static void gen8_initialize_pml4(struct i915_address_space *vm,
631 				 struct i915_pml4 *pml4)
632 {
633 	gen8_ppgtt_pml4e_t scratch_pml4e;
634 
635 	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
636 					  I915_CACHE_LLC);
637 
638 	fill_px(vm->dev, pml4, scratch_pml4e);
639 }
640 
641 static void
642 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
643 			  struct i915_page_directory_pointer *pdp,
644 			  struct i915_page_directory *pd,
645 			  int index)
646 {
647 	gen8_ppgtt_pdpe_t *page_directorypo;
648 
649 	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
650 		return;
651 
652 	page_directorypo = kmap_px(pdp);
653 	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
654 	kunmap_px(ppgtt, page_directorypo);
655 }
656 
657 static void
658 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
659 				  struct i915_pml4 *pml4,
660 				  struct i915_page_directory_pointer *pdp,
661 				  int index)
662 {
663 	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
664 
665 	WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
666 	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
667 	kunmap_px(ppgtt, pagemap);
668 }
669 
670 /* Broadwell Page Directory Pointer Descriptors */
671 static int gen8_write_pdp(struct drm_i915_gem_request *req,
672 			  unsigned entry,
673 			  dma_addr_t addr)
674 {
675 	struct intel_engine_cs *engine = req->engine;
676 	int ret;
677 
678 	BUG_ON(entry >= 4);
679 
680 	ret = intel_ring_begin(req, 6);
681 	if (ret)
682 		return ret;
683 
684 	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
685 	intel_ring_emit_reg(engine, GEN8_RING_PDP_UDW(engine, entry));
686 	intel_ring_emit(engine, upper_32_bits(addr));
687 	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
688 	intel_ring_emit_reg(engine, GEN8_RING_PDP_LDW(engine, entry));
689 	intel_ring_emit(engine, lower_32_bits(addr));
690 	intel_ring_advance(engine);
691 
692 	return 0;
693 }
694 
695 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
696 				 struct drm_i915_gem_request *req)
697 {
698 	int i, ret;
699 
700 	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
701 		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
702 
703 		ret = gen8_write_pdp(req, i, pd_daddr);
704 		if (ret)
705 			return ret;
706 	}
707 
708 	return 0;
709 }
710 
711 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
712 			      struct drm_i915_gem_request *req)
713 {
714 	return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
715 }
716 
717 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
718 				       struct i915_page_directory_pointer *pdp,
719 				       uint64_t start,
720 				       uint64_t length,
721 				       gen8_pte_t scratch_pte)
722 {
723 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
724 	gen8_pte_t *pt_vaddr;
725 	unsigned pdpe = gen8_pdpe_index(start);
726 	unsigned pde = gen8_pde_index(start);
727 	unsigned pte = gen8_pte_index(start);
728 	unsigned num_entries = length >> PAGE_SHIFT;
729 	unsigned last_pte, i;
730 
731 	if (WARN_ON(!pdp))
732 		return;
733 
734 	while (num_entries) {
735 		struct i915_page_directory *pd;
736 		struct i915_page_table *pt;
737 
738 		if (WARN_ON(!pdp->page_directory[pdpe]))
739 			break;
740 
741 		pd = pdp->page_directory[pdpe];
742 
743 		if (WARN_ON(!pd->page_table[pde]))
744 			break;
745 
746 		pt = pd->page_table[pde];
747 
748 		if (WARN_ON(!px_page(pt)))
749 			break;
750 
751 		last_pte = pte + num_entries;
752 		if (last_pte > GEN8_PTES)
753 			last_pte = GEN8_PTES;
754 
755 		pt_vaddr = kmap_px(pt);
756 
757 		for (i = pte; i < last_pte; i++) {
758 			pt_vaddr[i] = scratch_pte;
759 			num_entries--;
760 		}
761 
762 		kunmap_px(ppgtt, pt_vaddr);
763 
764 		pte = 0;
765 		if (++pde == I915_PDES) {
766 			if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
767 				break;
768 			pde = 0;
769 		}
770 	}
771 }
772 
773 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
774 				   uint64_t start,
775 				   uint64_t length,
776 				   bool use_scratch)
777 {
778 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
779 	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
780 						 I915_CACHE_LLC, use_scratch);
781 
782 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
783 		gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
784 					   scratch_pte);
785 	} else {
786 		uint64_t pml4e;
787 		struct i915_page_directory_pointer *pdp;
788 
789 		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
790 			gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
791 						   scratch_pte);
792 		}
793 	}
794 }
795 
796 static void
797 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
798 			      struct i915_page_directory_pointer *pdp,
799 			      struct sg_page_iter *sg_iter,
800 			      uint64_t start,
801 			      enum i915_cache_level cache_level)
802 {
803 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
804 	gen8_pte_t *pt_vaddr;
805 	unsigned pdpe = gen8_pdpe_index(start);
806 	unsigned pde = gen8_pde_index(start);
807 	unsigned pte = gen8_pte_index(start);
808 
809 	pt_vaddr = NULL;
810 
811 	while (__sg_page_iter_next(sg_iter)) {
812 		if (pt_vaddr == NULL) {
813 			struct i915_page_directory *pd = pdp->page_directory[pdpe];
814 			struct i915_page_table *pt = pd->page_table[pde];
815 			pt_vaddr = kmap_px(pt);
816 		}
817 
818 		pt_vaddr[pte] =
819 			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
820 					cache_level, true);
821 		if (++pte == GEN8_PTES) {
822 			kunmap_px(ppgtt, pt_vaddr);
823 			pt_vaddr = NULL;
824 			if (++pde == I915_PDES) {
825 				if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
826 					break;
827 				pde = 0;
828 			}
829 			pte = 0;
830 		}
831 	}
832 
833 	if (pt_vaddr)
834 		kunmap_px(ppgtt, pt_vaddr);
835 }
836 
837 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
838 				      struct sg_table *pages,
839 				      uint64_t start,
840 				      enum i915_cache_level cache_level,
841 				      u32 unused)
842 {
843 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
844 	struct sg_page_iter sg_iter;
845 
846 	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
847 
848 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
849 		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
850 					      cache_level);
851 	} else {
852 		struct i915_page_directory_pointer *pdp;
853 		uint64_t pml4e;
854 		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
855 
856 		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
857 			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
858 						      start, cache_level);
859 		}
860 	}
861 }
862 
863 static void gen8_free_page_tables(struct drm_device *dev,
864 				  struct i915_page_directory *pd)
865 {
866 	int i;
867 
868 	if (!px_page(pd))
869 		return;
870 
871 	for_each_set_bit(i, pd->used_pdes, I915_PDES) {
872 		if (WARN_ON(!pd->page_table[i]))
873 			continue;
874 
875 		free_pt(dev, pd->page_table[i]);
876 		pd->page_table[i] = NULL;
877 	}
878 }
879 
880 static int gen8_init_scratch(struct i915_address_space *vm)
881 {
882 	struct drm_device *dev = vm->dev;
883 	int ret;
884 
885 	vm->scratch_page = alloc_scratch_page(dev);
886 	if (IS_ERR(vm->scratch_page))
887 		return PTR_ERR(vm->scratch_page);
888 
889 	vm->scratch_pt = alloc_pt(dev);
890 	if (IS_ERR(vm->scratch_pt)) {
891 		ret = PTR_ERR(vm->scratch_pt);
892 		goto free_scratch_page;
893 	}
894 
895 	vm->scratch_pd = alloc_pd(dev);
896 	if (IS_ERR(vm->scratch_pd)) {
897 		ret = PTR_ERR(vm->scratch_pd);
898 		goto free_pt;
899 	}
900 
901 	if (USES_FULL_48BIT_PPGTT(dev)) {
902 		vm->scratch_pdp = alloc_pdp(dev);
903 		if (IS_ERR(vm->scratch_pdp)) {
904 			ret = PTR_ERR(vm->scratch_pdp);
905 			goto free_pd;
906 		}
907 	}
908 
909 	gen8_initialize_pt(vm, vm->scratch_pt);
910 	gen8_initialize_pd(vm, vm->scratch_pd);
911 	if (USES_FULL_48BIT_PPGTT(dev))
912 		gen8_initialize_pdp(vm, vm->scratch_pdp);
913 
914 	return 0;
915 
916 free_pd:
917 	free_pd(dev, vm->scratch_pd);
918 free_pt:
919 	free_pt(dev, vm->scratch_pt);
920 free_scratch_page:
921 	free_scratch_page(dev, vm->scratch_page);
922 
923 	return ret;
924 }
925 
926 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
927 {
928 	enum vgt_g2v_type msg;
929 	struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
930 	int i;
931 
932 	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
933 		u64 daddr = px_dma(&ppgtt->pml4);
934 
935 		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
936 		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
937 
938 		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
939 				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
940 	} else {
941 		for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
942 			u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
943 
944 			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
945 			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
946 		}
947 
948 		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
949 				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
950 	}
951 
952 	I915_WRITE(vgtif_reg(g2v_notify), msg);
953 
954 	return 0;
955 }
956 
957 static void gen8_free_scratch(struct i915_address_space *vm)
958 {
959 	struct drm_device *dev = vm->dev;
960 
961 	if (USES_FULL_48BIT_PPGTT(dev))
962 		free_pdp(dev, vm->scratch_pdp);
963 	free_pd(dev, vm->scratch_pd);
964 	free_pt(dev, vm->scratch_pt);
965 	free_scratch_page(dev, vm->scratch_page);
966 }
967 
968 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
969 				    struct i915_page_directory_pointer *pdp)
970 {
971 	int i;
972 
973 	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
974 		if (WARN_ON(!pdp->page_directory[i]))
975 			continue;
976 
977 		gen8_free_page_tables(dev, pdp->page_directory[i]);
978 		free_pd(dev, pdp->page_directory[i]);
979 	}
980 
981 	free_pdp(dev, pdp);
982 }
983 
984 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
985 {
986 	int i;
987 
988 	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
989 		if (WARN_ON(!ppgtt->pml4.pdps[i]))
990 			continue;
991 
992 		gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
993 	}
994 
995 	cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
996 }
997 
998 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
999 {
1000 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1001 
1002 	if (intel_vgpu_active(to_i915(vm->dev)))
1003 		gen8_ppgtt_notify_vgt(ppgtt, false);
1004 
1005 	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
1006 		gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
1007 	else
1008 		gen8_ppgtt_cleanup_4lvl(ppgtt);
1009 
1010 	gen8_free_scratch(vm);
1011 }
1012 
1013 /**
1014  * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
1015  * @vm:	Master vm structure.
1016  * @pd:	Page directory for this address range.
1017  * @start:	Starting virtual address to begin allocations.
1018  * @length:	Size of the allocations.
1019  * @new_pts:	Bitmap set by function with new allocations. Likely used by the
1020  *		caller to free on error.
1021  *
1022  * Allocate the required number of page tables. Extremely similar to
1023  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1024  * the page directory boundary (instead of the page directory pointer). That
1025  * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1026  * possible, and likely that the caller will need to use multiple calls of this
1027  * function to achieve the appropriate allocation.
1028  *
1029  * Return: 0 if success; negative error code otherwise.
1030  */
1031 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1032 				     struct i915_page_directory *pd,
1033 				     uint64_t start,
1034 				     uint64_t length,
1035 				     unsigned long *new_pts)
1036 {
1037 	struct drm_device *dev = vm->dev;
1038 	struct i915_page_table *pt;
1039 	uint32_t pde;
1040 
1041 	gen8_for_each_pde(pt, pd, start, length, pde) {
1042 		/* Don't reallocate page tables */
1043 		if (test_bit(pde, pd->used_pdes)) {
1044 			/* Scratch is never allocated this way */
1045 			WARN_ON(pt == vm->scratch_pt);
1046 			continue;
1047 		}
1048 
1049 		pt = alloc_pt(dev);
1050 		if (IS_ERR(pt))
1051 			goto unwind_out;
1052 
1053 		gen8_initialize_pt(vm, pt);
1054 		pd->page_table[pde] = pt;
1055 		__set_bit(pde, new_pts);
1056 		trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1057 	}
1058 
1059 	return 0;
1060 
1061 unwind_out:
1062 	for_each_set_bit(pde, new_pts, I915_PDES)
1063 		free_pt(dev, pd->page_table[pde]);
1064 
1065 	return -ENOMEM;
1066 }
1067 
1068 /**
1069  * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1070  * @vm:	Master vm structure.
1071  * @pdp:	Page directory pointer for this address range.
1072  * @start:	Starting virtual address to begin allocations.
1073  * @length:	Size of the allocations.
1074  * @new_pds:	Bitmap set by function with new allocations. Likely used by the
1075  *		caller to free on error.
1076  *
1077  * Allocate the required number of page directories starting at the pde index of
1078  * @start, and ending at the pde index @start + @length. This function will skip
1079  * over already allocated page directories within the range, and only allocate
1080  * new ones, setting the appropriate pointer within the pdp as well as the
1081  * correct position in the bitmap @new_pds.
1082  *
1083  * The function will only allocate the pages within the range for a give page
1084  * directory pointer. In other words, if @start + @length straddles a virtually
1085  * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1086  * required by the caller, This is not currently possible, and the BUG in the
1087  * code will prevent it.
1088  *
1089  * Return: 0 if success; negative error code otherwise.
1090  */
1091 static int
1092 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1093 				  struct i915_page_directory_pointer *pdp,
1094 				  uint64_t start,
1095 				  uint64_t length,
1096 				  unsigned long *new_pds)
1097 {
1098 	struct drm_device *dev = vm->dev;
1099 	struct i915_page_directory *pd;
1100 	uint32_t pdpe;
1101 	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1102 
1103 	WARN_ON(!bitmap_empty(new_pds, pdpes));
1104 
1105 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1106 		if (test_bit(pdpe, pdp->used_pdpes))
1107 			continue;
1108 
1109 		pd = alloc_pd(dev);
1110 		if (IS_ERR(pd))
1111 			goto unwind_out;
1112 
1113 		gen8_initialize_pd(vm, pd);
1114 		pdp->page_directory[pdpe] = pd;
1115 		__set_bit(pdpe, new_pds);
1116 		trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
1117 	}
1118 
1119 	return 0;
1120 
1121 unwind_out:
1122 	for_each_set_bit(pdpe, new_pds, pdpes)
1123 		free_pd(dev, pdp->page_directory[pdpe]);
1124 
1125 	return -ENOMEM;
1126 }
1127 
1128 /**
1129  * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1130  * @vm:	Master vm structure.
1131  * @pml4:	Page map level 4 for this address range.
1132  * @start:	Starting virtual address to begin allocations.
1133  * @length:	Size of the allocations.
1134  * @new_pdps:	Bitmap set by function with new allocations. Likely used by the
1135  *		caller to free on error.
1136  *
1137  * Allocate the required number of page directory pointers. Extremely similar to
1138  * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1139  * The main difference is here we are limited by the pml4 boundary (instead of
1140  * the page directory pointer).
1141  *
1142  * Return: 0 if success; negative error code otherwise.
1143  */
1144 static int
1145 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1146 				  struct i915_pml4 *pml4,
1147 				  uint64_t start,
1148 				  uint64_t length,
1149 				  unsigned long *new_pdps)
1150 {
1151 	struct drm_device *dev = vm->dev;
1152 	struct i915_page_directory_pointer *pdp;
1153 	uint32_t pml4e;
1154 
1155 	WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1156 
1157 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1158 		if (!test_bit(pml4e, pml4->used_pml4es)) {
1159 			pdp = alloc_pdp(dev);
1160 			if (IS_ERR(pdp))
1161 				goto unwind_out;
1162 
1163 			gen8_initialize_pdp(vm, pdp);
1164 			pml4->pdps[pml4e] = pdp;
1165 			__set_bit(pml4e, new_pdps);
1166 			trace_i915_page_directory_pointer_entry_alloc(vm,
1167 								      pml4e,
1168 								      start,
1169 								      GEN8_PML4E_SHIFT);
1170 		}
1171 	}
1172 
1173 	return 0;
1174 
1175 unwind_out:
1176 	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1177 		free_pdp(dev, pml4->pdps[pml4e]);
1178 
1179 	return -ENOMEM;
1180 }
1181 
1182 static void
1183 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
1184 {
1185 	kfree(new_pts);
1186 	kfree(new_pds);
1187 }
1188 
1189 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1190  * of these are based on the number of PDPEs in the system.
1191  */
1192 static
1193 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1194 					 unsigned long **new_pts,
1195 					 uint32_t pdpes)
1196 {
1197 	unsigned long *pds;
1198 	unsigned long *pts;
1199 
1200 	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1201 	if (!pds)
1202 		return -ENOMEM;
1203 
1204 	pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1205 		      GFP_TEMPORARY);
1206 	if (!pts)
1207 		goto err_out;
1208 
1209 	*new_pds = pds;
1210 	*new_pts = pts;
1211 
1212 	return 0;
1213 
1214 err_out:
1215 	free_gen8_temp_bitmaps(pds, pts);
1216 	return -ENOMEM;
1217 }
1218 
1219 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1220  * the page table structures, we mark them dirty so that
1221  * context switching/execlist queuing code takes extra steps
1222  * to ensure that tlbs are flushed.
1223  */
1224 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1225 {
1226 	ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1227 }
1228 
1229 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1230 				    struct i915_page_directory_pointer *pdp,
1231 				    uint64_t start,
1232 				    uint64_t length)
1233 {
1234 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1235 	unsigned long *new_page_dirs, *new_page_tables;
1236 	struct drm_device *dev = vm->dev;
1237 	struct i915_page_directory *pd;
1238 	const uint64_t orig_start = start;
1239 	const uint64_t orig_length = length;
1240 	uint32_t pdpe;
1241 	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1242 	int ret;
1243 
1244 	/* Wrap is never okay since we can only represent 48b, and we don't
1245 	 * actually use the other side of the canonical address space.
1246 	 */
1247 	if (WARN_ON(start + length < start))
1248 		return -ENODEV;
1249 
1250 	if (WARN_ON(start + length > vm->total))
1251 		return -ENODEV;
1252 
1253 	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1254 	if (ret)
1255 		return ret;
1256 
1257 	/* Do the allocations first so we can easily bail out */
1258 	ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1259 						new_page_dirs);
1260 	if (ret) {
1261 		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1262 		return ret;
1263 	}
1264 
1265 	/* For every page directory referenced, allocate page tables */
1266 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1267 		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1268 						new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1269 		if (ret)
1270 			goto err_out;
1271 	}
1272 
1273 	start = orig_start;
1274 	length = orig_length;
1275 
1276 	/* Allocations have completed successfully, so set the bitmaps, and do
1277 	 * the mappings. */
1278 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1279 		gen8_pde_t *const page_directory = kmap_px(pd);
1280 		struct i915_page_table *pt;
1281 		uint64_t pd_len = length;
1282 		uint64_t pd_start = start;
1283 		uint32_t pde;
1284 
1285 		/* Every pd should be allocated, we just did that above. */
1286 		WARN_ON(!pd);
1287 
1288 		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1289 			/* Same reasoning as pd */
1290 			WARN_ON(!pt);
1291 			WARN_ON(!pd_len);
1292 			WARN_ON(!gen8_pte_count(pd_start, pd_len));
1293 
1294 			/* Set our used ptes within the page table */
1295 			bitmap_set(pt->used_ptes,
1296 				   gen8_pte_index(pd_start),
1297 				   gen8_pte_count(pd_start, pd_len));
1298 
1299 			/* Our pde is now pointing to the pagetable, pt */
1300 			__set_bit(pde, pd->used_pdes);
1301 
1302 			/* Map the PDE to the page table */
1303 			page_directory[pde] = gen8_pde_encode(px_dma(pt),
1304 							      I915_CACHE_LLC);
1305 			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1306 							gen8_pte_index(start),
1307 							gen8_pte_count(start, length),
1308 							GEN8_PTES);
1309 
1310 			/* NB: We haven't yet mapped ptes to pages. At this
1311 			 * point we're still relying on insert_entries() */
1312 		}
1313 
1314 		kunmap_px(ppgtt, page_directory);
1315 		__set_bit(pdpe, pdp->used_pdpes);
1316 		gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1317 	}
1318 
1319 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1320 	mark_tlbs_dirty(ppgtt);
1321 	return 0;
1322 
1323 err_out:
1324 	while (pdpe--) {
1325 		unsigned long temp;
1326 
1327 		for_each_set_bit(temp, new_page_tables + pdpe *
1328 				BITS_TO_LONGS(I915_PDES), I915_PDES)
1329 			free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1330 	}
1331 
1332 	for_each_set_bit(pdpe, new_page_dirs, pdpes)
1333 		free_pd(dev, pdp->page_directory[pdpe]);
1334 
1335 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1336 	mark_tlbs_dirty(ppgtt);
1337 	return ret;
1338 }
1339 
1340 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1341 				    struct i915_pml4 *pml4,
1342 				    uint64_t start,
1343 				    uint64_t length)
1344 {
1345 	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1346 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1347 	struct i915_page_directory_pointer *pdp;
1348 	uint64_t pml4e;
1349 	int ret = 0;
1350 
1351 	/* Do the pml4 allocations first, so we don't need to track the newly
1352 	 * allocated tables below the pdp */
1353 	bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
1354 
1355 	/* The pagedirectory and pagetable allocations are done in the shared 3
1356 	 * and 4 level code. Just allocate the pdps.
1357 	 */
1358 	ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1359 						new_pdps);
1360 	if (ret)
1361 		return ret;
1362 
1363 	WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1364 	     "The allocation has spanned more than 512GB. "
1365 	     "It is highly likely this is incorrect.");
1366 
1367 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1368 		WARN_ON(!pdp);
1369 
1370 		ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1371 		if (ret)
1372 			goto err_out;
1373 
1374 		gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1375 	}
1376 
1377 	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1378 		  GEN8_PML4ES_PER_PML4);
1379 
1380 	return 0;
1381 
1382 err_out:
1383 	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1384 		gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1385 
1386 	return ret;
1387 }
1388 
1389 static int gen8_alloc_va_range(struct i915_address_space *vm,
1390 			       uint64_t start, uint64_t length)
1391 {
1392 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1393 
1394 	if (USES_FULL_48BIT_PPGTT(vm->dev))
1395 		return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1396 	else
1397 		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1398 }
1399 
1400 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1401 			  uint64_t start, uint64_t length,
1402 			  gen8_pte_t scratch_pte,
1403 			  struct seq_file *m)
1404 {
1405 	struct i915_page_directory *pd;
1406 	uint32_t pdpe;
1407 
1408 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1409 		struct i915_page_table *pt;
1410 		uint64_t pd_len = length;
1411 		uint64_t pd_start = start;
1412 		uint32_t pde;
1413 
1414 		if (!test_bit(pdpe, pdp->used_pdpes))
1415 			continue;
1416 
1417 		seq_printf(m, "\tPDPE #%d\n", pdpe);
1418 		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1419 			uint32_t  pte;
1420 			gen8_pte_t *pt_vaddr;
1421 
1422 			if (!test_bit(pde, pd->used_pdes))
1423 				continue;
1424 
1425 			pt_vaddr = kmap_px(pt);
1426 			for (pte = 0; pte < GEN8_PTES; pte += 4) {
1427 				uint64_t va =
1428 					(pdpe << GEN8_PDPE_SHIFT) |
1429 					(pde << GEN8_PDE_SHIFT) |
1430 					(pte << GEN8_PTE_SHIFT);
1431 				int i;
1432 				bool found = false;
1433 
1434 				for (i = 0; i < 4; i++)
1435 					if (pt_vaddr[pte + i] != scratch_pte)
1436 						found = true;
1437 				if (!found)
1438 					continue;
1439 
1440 				seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1441 				for (i = 0; i < 4; i++) {
1442 					if (pt_vaddr[pte + i] != scratch_pte)
1443 						seq_printf(m, " %llx", pt_vaddr[pte + i]);
1444 					else
1445 						seq_puts(m, "  SCRATCH ");
1446 				}
1447 				seq_puts(m, "\n");
1448 			}
1449 			/* don't use kunmap_px, it could trigger
1450 			 * an unnecessary flush.
1451 			 */
1452 			kunmap_atomic(pt_vaddr);
1453 		}
1454 	}
1455 }
1456 
1457 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1458 {
1459 	struct i915_address_space *vm = &ppgtt->base;
1460 	uint64_t start = ppgtt->base.start;
1461 	uint64_t length = ppgtt->base.total;
1462 	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
1463 						 I915_CACHE_LLC, true);
1464 
1465 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1466 		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1467 	} else {
1468 		uint64_t pml4e;
1469 		struct i915_pml4 *pml4 = &ppgtt->pml4;
1470 		struct i915_page_directory_pointer *pdp;
1471 
1472 		gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1473 			if (!test_bit(pml4e, pml4->used_pml4es))
1474 				continue;
1475 
1476 			seq_printf(m, "    PML4E #%llu\n", pml4e);
1477 			gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1478 		}
1479 	}
1480 }
1481 
1482 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1483 {
1484 	unsigned long *new_page_dirs, *new_page_tables;
1485 	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1486 	int ret;
1487 
1488 	/* We allocate temp bitmap for page tables for no gain
1489 	 * but as this is for init only, lets keep the things simple
1490 	 */
1491 	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1492 	if (ret)
1493 		return ret;
1494 
1495 	/* Allocate for all pdps regardless of how the ppgtt
1496 	 * was defined.
1497 	 */
1498 	ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1499 						0, 1ULL << 32,
1500 						new_page_dirs);
1501 	if (!ret)
1502 		*ppgtt->pdp.used_pdpes = *new_page_dirs;
1503 
1504 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1505 
1506 	return ret;
1507 }
1508 
1509 /*
1510  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1511  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1512  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1513  * space.
1514  *
1515  */
1516 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1517 {
1518 	int ret;
1519 
1520 	ret = gen8_init_scratch(&ppgtt->base);
1521 	if (ret)
1522 		return ret;
1523 
1524 	ppgtt->base.start = 0;
1525 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1526 	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1527 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1528 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1529 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1530 	ppgtt->base.bind_vma = ppgtt_bind_vma;
1531 	ppgtt->debug_dump = gen8_dump_ppgtt;
1532 
1533 	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1534 		ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
1535 		if (ret)
1536 			goto free_scratch;
1537 
1538 		gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1539 
1540 		ppgtt->base.total = 1ULL << 48;
1541 		ppgtt->switch_mm = gen8_48b_mm_switch;
1542 	} else {
1543 		ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1544 		if (ret)
1545 			goto free_scratch;
1546 
1547 		ppgtt->base.total = 1ULL << 32;
1548 		ppgtt->switch_mm = gen8_legacy_mm_switch;
1549 		trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1550 							      0, 0,
1551 							      GEN8_PML4E_SHIFT);
1552 
1553 		if (intel_vgpu_active(to_i915(ppgtt->base.dev))) {
1554 			ret = gen8_preallocate_top_level_pdps(ppgtt);
1555 			if (ret)
1556 				goto free_scratch;
1557 		}
1558 	}
1559 
1560 	if (intel_vgpu_active(to_i915(ppgtt->base.dev)))
1561 		gen8_ppgtt_notify_vgt(ppgtt, true);
1562 
1563 	return 0;
1564 
1565 free_scratch:
1566 	gen8_free_scratch(&ppgtt->base);
1567 	return ret;
1568 }
1569 
1570 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1571 {
1572 	struct i915_address_space *vm = &ppgtt->base;
1573 	struct i915_page_table *unused;
1574 	gen6_pte_t scratch_pte;
1575 	uint32_t pd_entry;
1576 	uint32_t  pte, pde;
1577 	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1578 
1579 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1580 				     I915_CACHE_LLC, true, 0);
1581 
1582 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
1583 		u32 expected;
1584 		gen6_pte_t *pt_vaddr;
1585 		const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1586 		pd_entry = readl(ppgtt->pd_addr + pde);
1587 		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1588 
1589 		if (pd_entry != expected)
1590 			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1591 				   pde,
1592 				   pd_entry,
1593 				   expected);
1594 		seq_printf(m, "\tPDE: %x\n", pd_entry);
1595 
1596 		pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1597 
1598 		for (pte = 0; pte < GEN6_PTES; pte+=4) {
1599 			unsigned long va =
1600 				(pde * PAGE_SIZE * GEN6_PTES) +
1601 				(pte * PAGE_SIZE);
1602 			int i;
1603 			bool found = false;
1604 			for (i = 0; i < 4; i++)
1605 				if (pt_vaddr[pte + i] != scratch_pte)
1606 					found = true;
1607 			if (!found)
1608 				continue;
1609 
1610 			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1611 			for (i = 0; i < 4; i++) {
1612 				if (pt_vaddr[pte + i] != scratch_pte)
1613 					seq_printf(m, " %08x", pt_vaddr[pte + i]);
1614 				else
1615 					seq_puts(m, "  SCRATCH ");
1616 			}
1617 			seq_puts(m, "\n");
1618 		}
1619 		kunmap_px(ppgtt, pt_vaddr);
1620 	}
1621 }
1622 
1623 /* Write pde (index) from the page directory @pd to the page table @pt */
1624 static void gen6_write_pde(struct i915_page_directory *pd,
1625 			    const int pde, struct i915_page_table *pt)
1626 {
1627 	/* Caller needs to make sure the write completes if necessary */
1628 	struct i915_hw_ppgtt *ppgtt =
1629 		container_of(pd, struct i915_hw_ppgtt, pd);
1630 	u32 pd_entry;
1631 
1632 	pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1633 	pd_entry |= GEN6_PDE_VALID;
1634 
1635 	writel(pd_entry, ppgtt->pd_addr + pde);
1636 }
1637 
1638 /* Write all the page tables found in the ppgtt structure to incrementing page
1639  * directories. */
1640 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1641 				  struct i915_page_directory *pd,
1642 				  uint32_t start, uint32_t length)
1643 {
1644 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1645 	struct i915_page_table *pt;
1646 	uint32_t pde;
1647 
1648 	gen6_for_each_pde(pt, pd, start, length, pde)
1649 		gen6_write_pde(pd, pde, pt);
1650 
1651 	/* Make sure write is complete before other code can use this page
1652 	 * table. Also require for WC mapped PTEs */
1653 	readl(ggtt->gsm);
1654 }
1655 
1656 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1657 {
1658 	BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1659 
1660 	return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1661 }
1662 
1663 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1664 			 struct drm_i915_gem_request *req)
1665 {
1666 	struct intel_engine_cs *engine = req->engine;
1667 	int ret;
1668 
1669 	/* NB: TLBs must be flushed and invalidated before a switch */
1670 	ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1671 	if (ret)
1672 		return ret;
1673 
1674 	ret = intel_ring_begin(req, 6);
1675 	if (ret)
1676 		return ret;
1677 
1678 	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2));
1679 	intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine));
1680 	intel_ring_emit(engine, PP_DIR_DCLV_2G);
1681 	intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine));
1682 	intel_ring_emit(engine, get_pd_offset(ppgtt));
1683 	intel_ring_emit(engine, MI_NOOP);
1684 	intel_ring_advance(engine);
1685 
1686 	return 0;
1687 }
1688 
1689 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1690 			  struct drm_i915_gem_request *req)
1691 {
1692 	struct intel_engine_cs *engine = req->engine;
1693 	int ret;
1694 
1695 	/* NB: TLBs must be flushed and invalidated before a switch */
1696 	ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1697 	if (ret)
1698 		return ret;
1699 
1700 	ret = intel_ring_begin(req, 6);
1701 	if (ret)
1702 		return ret;
1703 
1704 	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2));
1705 	intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine));
1706 	intel_ring_emit(engine, PP_DIR_DCLV_2G);
1707 	intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine));
1708 	intel_ring_emit(engine, get_pd_offset(ppgtt));
1709 	intel_ring_emit(engine, MI_NOOP);
1710 	intel_ring_advance(engine);
1711 
1712 	/* XXX: RCS is the only one to auto invalidate the TLBs? */
1713 	if (engine->id != RCS) {
1714 		ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1715 		if (ret)
1716 			return ret;
1717 	}
1718 
1719 	return 0;
1720 }
1721 
1722 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1723 			  struct drm_i915_gem_request *req)
1724 {
1725 	struct intel_engine_cs *engine = req->engine;
1726 	struct drm_i915_private *dev_priv = req->i915;
1727 
1728 	I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1729 	I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1730 	return 0;
1731 }
1732 
1733 static void gen8_ppgtt_enable(struct drm_device *dev)
1734 {
1735 	struct drm_i915_private *dev_priv = to_i915(dev);
1736 	struct intel_engine_cs *engine;
1737 
1738 	for_each_engine(engine, dev_priv) {
1739 		u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
1740 		I915_WRITE(RING_MODE_GEN7(engine),
1741 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1742 	}
1743 }
1744 
1745 static void gen7_ppgtt_enable(struct drm_device *dev)
1746 {
1747 	struct drm_i915_private *dev_priv = to_i915(dev);
1748 	struct intel_engine_cs *engine;
1749 	uint32_t ecochk, ecobits;
1750 
1751 	ecobits = I915_READ(GAC_ECO_BITS);
1752 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1753 
1754 	ecochk = I915_READ(GAM_ECOCHK);
1755 	if (IS_HASWELL(dev)) {
1756 		ecochk |= ECOCHK_PPGTT_WB_HSW;
1757 	} else {
1758 		ecochk |= ECOCHK_PPGTT_LLC_IVB;
1759 		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1760 	}
1761 	I915_WRITE(GAM_ECOCHK, ecochk);
1762 
1763 	for_each_engine(engine, dev_priv) {
1764 		/* GFX_MODE is per-ring on gen7+ */
1765 		I915_WRITE(RING_MODE_GEN7(engine),
1766 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1767 	}
1768 }
1769 
1770 static void gen6_ppgtt_enable(struct drm_device *dev)
1771 {
1772 	struct drm_i915_private *dev_priv = to_i915(dev);
1773 	uint32_t ecochk, gab_ctl, ecobits;
1774 
1775 	ecobits = I915_READ(GAC_ECO_BITS);
1776 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1777 		   ECOBITS_PPGTT_CACHE64B);
1778 
1779 	gab_ctl = I915_READ(GAB_CTL);
1780 	I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1781 
1782 	ecochk = I915_READ(GAM_ECOCHK);
1783 	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1784 
1785 	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1786 }
1787 
1788 /* PPGTT support for Sandybdrige/Gen6 and later */
1789 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1790 				   uint64_t start,
1791 				   uint64_t length,
1792 				   bool use_scratch)
1793 {
1794 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1795 	gen6_pte_t *pt_vaddr, scratch_pte;
1796 	unsigned first_entry = start >> PAGE_SHIFT;
1797 	unsigned num_entries = length >> PAGE_SHIFT;
1798 	unsigned act_pt = first_entry / GEN6_PTES;
1799 	unsigned first_pte = first_entry % GEN6_PTES;
1800 	unsigned last_pte, i;
1801 
1802 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1803 				     I915_CACHE_LLC, true, 0);
1804 
1805 	while (num_entries) {
1806 		last_pte = first_pte + num_entries;
1807 		if (last_pte > GEN6_PTES)
1808 			last_pte = GEN6_PTES;
1809 
1810 		pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1811 
1812 		for (i = first_pte; i < last_pte; i++)
1813 			pt_vaddr[i] = scratch_pte;
1814 
1815 		kunmap_px(ppgtt, pt_vaddr);
1816 
1817 		num_entries -= last_pte - first_pte;
1818 		first_pte = 0;
1819 		act_pt++;
1820 	}
1821 }
1822 
1823 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1824 				      struct sg_table *pages,
1825 				      uint64_t start,
1826 				      enum i915_cache_level cache_level, u32 flags)
1827 {
1828 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1829 	unsigned first_entry = start >> PAGE_SHIFT;
1830 	unsigned act_pt = first_entry / GEN6_PTES;
1831 	unsigned act_pte = first_entry % GEN6_PTES;
1832 	gen6_pte_t *pt_vaddr = NULL;
1833 	struct sgt_iter sgt_iter;
1834 	dma_addr_t addr;
1835 
1836 	for_each_sgt_dma(addr, sgt_iter, pages) {
1837 		if (pt_vaddr == NULL)
1838 			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1839 
1840 		pt_vaddr[act_pte] =
1841 			vm->pte_encode(addr, cache_level, true, flags);
1842 
1843 		if (++act_pte == GEN6_PTES) {
1844 			kunmap_px(ppgtt, pt_vaddr);
1845 			pt_vaddr = NULL;
1846 			act_pt++;
1847 			act_pte = 0;
1848 		}
1849 	}
1850 
1851 	if (pt_vaddr)
1852 		kunmap_px(ppgtt, pt_vaddr);
1853 }
1854 
1855 static int gen6_alloc_va_range(struct i915_address_space *vm,
1856 			       uint64_t start_in, uint64_t length_in)
1857 {
1858 	DECLARE_BITMAP(new_page_tables, I915_PDES);
1859 	struct drm_device *dev = vm->dev;
1860 	struct drm_i915_private *dev_priv = to_i915(dev);
1861 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1862 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1863 	struct i915_page_table *pt;
1864 	uint32_t start, length, start_save, length_save;
1865 	uint32_t pde;
1866 	int ret;
1867 
1868 	if (WARN_ON(start_in + length_in > ppgtt->base.total))
1869 		return -ENODEV;
1870 
1871 	start = start_save = start_in;
1872 	length = length_save = length_in;
1873 
1874 	bitmap_zero(new_page_tables, I915_PDES);
1875 
1876 	/* The allocation is done in two stages so that we can bail out with
1877 	 * minimal amount of pain. The first stage finds new page tables that
1878 	 * need allocation. The second stage marks use ptes within the page
1879 	 * tables.
1880 	 */
1881 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1882 		if (pt != vm->scratch_pt) {
1883 			WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1884 			continue;
1885 		}
1886 
1887 		/* We've already allocated a page table */
1888 		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1889 
1890 		pt = alloc_pt(dev);
1891 		if (IS_ERR(pt)) {
1892 			ret = PTR_ERR(pt);
1893 			goto unwind_out;
1894 		}
1895 
1896 		gen6_initialize_pt(vm, pt);
1897 
1898 		ppgtt->pd.page_table[pde] = pt;
1899 		__set_bit(pde, new_page_tables);
1900 		trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1901 	}
1902 
1903 	start = start_save;
1904 	length = length_save;
1905 
1906 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1907 		DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1908 
1909 		bitmap_zero(tmp_bitmap, GEN6_PTES);
1910 		bitmap_set(tmp_bitmap, gen6_pte_index(start),
1911 			   gen6_pte_count(start, length));
1912 
1913 		if (__test_and_clear_bit(pde, new_page_tables))
1914 			gen6_write_pde(&ppgtt->pd, pde, pt);
1915 
1916 		trace_i915_page_table_entry_map(vm, pde, pt,
1917 					 gen6_pte_index(start),
1918 					 gen6_pte_count(start, length),
1919 					 GEN6_PTES);
1920 		bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1921 				GEN6_PTES);
1922 	}
1923 
1924 	WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1925 
1926 	/* Make sure write is complete before other code can use this page
1927 	 * table. Also require for WC mapped PTEs */
1928 	readl(ggtt->gsm);
1929 
1930 	mark_tlbs_dirty(ppgtt);
1931 	return 0;
1932 
1933 unwind_out:
1934 	for_each_set_bit(pde, new_page_tables, I915_PDES) {
1935 		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1936 
1937 		ppgtt->pd.page_table[pde] = vm->scratch_pt;
1938 		free_pt(vm->dev, pt);
1939 	}
1940 
1941 	mark_tlbs_dirty(ppgtt);
1942 	return ret;
1943 }
1944 
1945 static int gen6_init_scratch(struct i915_address_space *vm)
1946 {
1947 	struct drm_device *dev = vm->dev;
1948 
1949 	vm->scratch_page = alloc_scratch_page(dev);
1950 	if (IS_ERR(vm->scratch_page))
1951 		return PTR_ERR(vm->scratch_page);
1952 
1953 	vm->scratch_pt = alloc_pt(dev);
1954 	if (IS_ERR(vm->scratch_pt)) {
1955 		free_scratch_page(dev, vm->scratch_page);
1956 		return PTR_ERR(vm->scratch_pt);
1957 	}
1958 
1959 	gen6_initialize_pt(vm, vm->scratch_pt);
1960 
1961 	return 0;
1962 }
1963 
1964 static void gen6_free_scratch(struct i915_address_space *vm)
1965 {
1966 	struct drm_device *dev = vm->dev;
1967 
1968 	free_pt(dev, vm->scratch_pt);
1969 	free_scratch_page(dev, vm->scratch_page);
1970 }
1971 
1972 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1973 {
1974 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1975 	struct i915_page_directory *pd = &ppgtt->pd;
1976 	struct drm_device *dev = vm->dev;
1977 	struct i915_page_table *pt;
1978 	uint32_t pde;
1979 
1980 	drm_mm_remove_node(&ppgtt->node);
1981 
1982 	gen6_for_all_pdes(pt, pd, pde)
1983 		if (pt != vm->scratch_pt)
1984 			free_pt(dev, pt);
1985 
1986 	gen6_free_scratch(vm);
1987 }
1988 
1989 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1990 {
1991 	struct i915_address_space *vm = &ppgtt->base;
1992 	struct drm_device *dev = ppgtt->base.dev;
1993 	struct drm_i915_private *dev_priv = to_i915(dev);
1994 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1995 	bool retried = false;
1996 	int ret;
1997 
1998 	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1999 	 * allocator works in address space sizes, so it's multiplied by page
2000 	 * size. We allocate at the top of the GTT to avoid fragmentation.
2001 	 */
2002 	BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
2003 
2004 	ret = gen6_init_scratch(vm);
2005 	if (ret)
2006 		return ret;
2007 
2008 alloc:
2009 	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
2010 						  &ppgtt->node, GEN6_PD_SIZE,
2011 						  GEN6_PD_ALIGN, 0,
2012 						  0, ggtt->base.total,
2013 						  DRM_MM_TOPDOWN);
2014 	if (ret == -ENOSPC && !retried) {
2015 		ret = i915_gem_evict_something(dev, &ggtt->base,
2016 					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
2017 					       I915_CACHE_NONE,
2018 					       0, ggtt->base.total,
2019 					       0);
2020 		if (ret)
2021 			goto err_out;
2022 
2023 		retried = true;
2024 		goto alloc;
2025 	}
2026 
2027 	if (ret)
2028 		goto err_out;
2029 
2030 
2031 	if (ppgtt->node.start < ggtt->mappable_end)
2032 		DRM_DEBUG("Forced to use aperture for PDEs\n");
2033 
2034 	return 0;
2035 
2036 err_out:
2037 	gen6_free_scratch(vm);
2038 	return ret;
2039 }
2040 
2041 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2042 {
2043 	return gen6_ppgtt_allocate_page_directories(ppgtt);
2044 }
2045 
2046 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2047 				  uint64_t start, uint64_t length)
2048 {
2049 	struct i915_page_table *unused;
2050 	uint32_t pde;
2051 
2052 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
2053 		ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2054 }
2055 
2056 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2057 {
2058 	struct drm_device *dev = ppgtt->base.dev;
2059 	struct drm_i915_private *dev_priv = to_i915(dev);
2060 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2061 	int ret;
2062 
2063 	ppgtt->base.pte_encode = ggtt->base.pte_encode;
2064 	if (intel_vgpu_active(dev_priv) || IS_GEN6(dev))
2065 		ppgtt->switch_mm = gen6_mm_switch;
2066 	else if (IS_HASWELL(dev))
2067 		ppgtt->switch_mm = hsw_mm_switch;
2068 	else if (IS_GEN7(dev))
2069 		ppgtt->switch_mm = gen7_mm_switch;
2070 	else
2071 		BUG();
2072 
2073 	ret = gen6_ppgtt_alloc(ppgtt);
2074 	if (ret)
2075 		return ret;
2076 
2077 	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
2078 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2079 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2080 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2081 	ppgtt->base.bind_vma = ppgtt_bind_vma;
2082 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2083 	ppgtt->base.start = 0;
2084 	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2085 	ppgtt->debug_dump = gen6_dump_ppgtt;
2086 
2087 	ppgtt->pd.base.ggtt_offset =
2088 		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2089 
2090 	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
2091 		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2092 
2093 	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2094 
2095 	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2096 
2097 	DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2098 			 ppgtt->node.size >> 20,
2099 			 ppgtt->node.start / PAGE_SIZE);
2100 
2101 	DRM_DEBUG("Adding PPGTT at offset %x\n",
2102 		  ppgtt->pd.base.ggtt_offset << 10);
2103 
2104 	return 0;
2105 }
2106 
2107 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2108 {
2109 	ppgtt->base.dev = dev;
2110 
2111 	if (INTEL_INFO(dev)->gen < 8)
2112 		return gen6_ppgtt_init(ppgtt);
2113 	else
2114 		return gen8_ppgtt_init(ppgtt);
2115 }
2116 
2117 static void i915_address_space_init(struct i915_address_space *vm,
2118 				    struct drm_i915_private *dev_priv)
2119 {
2120 	drm_mm_init(&vm->mm, vm->start, vm->total);
2121 	vm->dev = &dev_priv->drm;
2122 	INIT_LIST_HEAD(&vm->active_list);
2123 	INIT_LIST_HEAD(&vm->inactive_list);
2124 	list_add_tail(&vm->global_link, &dev_priv->vm_list);
2125 }
2126 
2127 static void gtt_write_workarounds(struct drm_device *dev)
2128 {
2129 	struct drm_i915_private *dev_priv = to_i915(dev);
2130 
2131 	/* This function is for gtt related workarounds. This function is
2132 	 * called on driver load and after a GPU reset, so you can place
2133 	 * workarounds here even if they get overwritten by GPU reset.
2134 	 */
2135 	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
2136 	if (IS_BROADWELL(dev))
2137 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2138 	else if (IS_CHERRYVIEW(dev))
2139 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2140 	else if (IS_SKYLAKE(dev))
2141 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2142 	else if (IS_BROXTON(dev))
2143 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2144 }
2145 
2146 static int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2147 {
2148 	struct drm_i915_private *dev_priv = to_i915(dev);
2149 	int ret = 0;
2150 
2151 	ret = __hw_ppgtt_init(dev, ppgtt);
2152 	if (ret == 0) {
2153 		kref_init(&ppgtt->ref);
2154 		i915_address_space_init(&ppgtt->base, dev_priv);
2155 	}
2156 
2157 	return ret;
2158 }
2159 
2160 int i915_ppgtt_init_hw(struct drm_device *dev)
2161 {
2162 	gtt_write_workarounds(dev);
2163 
2164 	/* In the case of execlists, PPGTT is enabled by the context descriptor
2165 	 * and the PDPs are contained within the context itself.  We don't
2166 	 * need to do anything here. */
2167 	if (i915.enable_execlists)
2168 		return 0;
2169 
2170 	if (!USES_PPGTT(dev))
2171 		return 0;
2172 
2173 	if (IS_GEN6(dev))
2174 		gen6_ppgtt_enable(dev);
2175 	else if (IS_GEN7(dev))
2176 		gen7_ppgtt_enable(dev);
2177 	else if (INTEL_INFO(dev)->gen >= 8)
2178 		gen8_ppgtt_enable(dev);
2179 	else
2180 		MISSING_CASE(INTEL_INFO(dev)->gen);
2181 
2182 	return 0;
2183 }
2184 
2185 struct i915_hw_ppgtt *
2186 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2187 {
2188 	struct i915_hw_ppgtt *ppgtt;
2189 	int ret;
2190 
2191 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2192 	if (!ppgtt)
2193 		return ERR_PTR(-ENOMEM);
2194 
2195 	ret = i915_ppgtt_init(dev, ppgtt);
2196 	if (ret) {
2197 		kfree(ppgtt);
2198 		return ERR_PTR(ret);
2199 	}
2200 
2201 	ppgtt->file_priv = fpriv;
2202 
2203 	trace_i915_ppgtt_create(&ppgtt->base);
2204 
2205 	return ppgtt;
2206 }
2207 
2208 void  i915_ppgtt_release(struct kref *kref)
2209 {
2210 	struct i915_hw_ppgtt *ppgtt =
2211 		container_of(kref, struct i915_hw_ppgtt, ref);
2212 
2213 	trace_i915_ppgtt_release(&ppgtt->base);
2214 
2215 	/* vmas should already be unbound */
2216 	WARN_ON(!list_empty(&ppgtt->base.active_list));
2217 	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2218 
2219 	list_del(&ppgtt->base.global_link);
2220 	drm_mm_takedown(&ppgtt->base.mm);
2221 
2222 	ppgtt->base.cleanup(&ppgtt->base);
2223 	kfree(ppgtt);
2224 }
2225 
2226 extern int intel_iommu_gfx_mapped;
2227 /* Certain Gen5 chipsets require require idling the GPU before
2228  * unmapping anything from the GTT when VT-d is enabled.
2229  */
2230 static bool needs_idle_maps(struct drm_device *dev)
2231 {
2232 #ifdef CONFIG_INTEL_IOMMU
2233 	/* Query intel_iommu to see if we need the workaround. Presumably that
2234 	 * was loaded first.
2235 	 */
2236 	if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
2237 		return true;
2238 #endif
2239 	return false;
2240 }
2241 
2242 static bool do_idling(struct drm_i915_private *dev_priv)
2243 {
2244 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2245 	bool ret = dev_priv->mm.interruptible;
2246 
2247 	if (unlikely(ggtt->do_idle_maps)) {
2248 		dev_priv->mm.interruptible = false;
2249 		if (i915_gem_wait_for_idle(dev_priv)) {
2250 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2251 			/* Wait a bit, in hopes it avoids the hang */
2252 			udelay(10);
2253 		}
2254 	}
2255 
2256 	return ret;
2257 }
2258 
2259 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2260 {
2261 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2262 
2263 	if (unlikely(ggtt->do_idle_maps))
2264 		dev_priv->mm.interruptible = interruptible;
2265 }
2266 
2267 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2268 {
2269 	struct intel_engine_cs *engine;
2270 
2271 	if (INTEL_INFO(dev_priv)->gen < 6)
2272 		return;
2273 
2274 	for_each_engine(engine, dev_priv) {
2275 		u32 fault_reg;
2276 		fault_reg = I915_READ(RING_FAULT_REG(engine));
2277 		if (fault_reg & RING_FAULT_VALID) {
2278 			DRM_DEBUG_DRIVER("Unexpected fault\n"
2279 					 "\tAddr: 0x%08lx\n"
2280 					 "\tAddress space: %s\n"
2281 					 "\tSource ID: %d\n"
2282 					 "\tType: %d\n",
2283 					 fault_reg & PAGE_MASK,
2284 					 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2285 					 RING_FAULT_SRCID(fault_reg),
2286 					 RING_FAULT_FAULT_TYPE(fault_reg));
2287 			I915_WRITE(RING_FAULT_REG(engine),
2288 				   fault_reg & ~RING_FAULT_VALID);
2289 		}
2290 	}
2291 	POSTING_READ(RING_FAULT_REG(&dev_priv->engine[RCS]));
2292 }
2293 
2294 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2295 {
2296 	if (INTEL_INFO(dev_priv)->gen < 6) {
2297 		intel_gtt_chipset_flush();
2298 	} else {
2299 		I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2300 		POSTING_READ(GFX_FLSH_CNTL_GEN6);
2301 	}
2302 }
2303 
2304 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2305 {
2306 	struct drm_i915_private *dev_priv = to_i915(dev);
2307 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2308 
2309 	/* Don't bother messing with faults pre GEN6 as we have little
2310 	 * documentation supporting that it's a good idea.
2311 	 */
2312 	if (INTEL_INFO(dev)->gen < 6)
2313 		return;
2314 
2315 	i915_check_and_clear_faults(dev_priv);
2316 
2317 	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
2318 			     true);
2319 
2320 	i915_ggtt_flush(dev_priv);
2321 }
2322 
2323 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2324 {
2325 	if (!dma_map_sg(&obj->base.dev->pdev->dev,
2326 			obj->pages->sgl, obj->pages->nents,
2327 			PCI_DMA_BIDIRECTIONAL))
2328 		return -ENOSPC;
2329 
2330 	return 0;
2331 }
2332 
2333 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2334 {
2335 #ifdef writeq
2336 	writeq(pte, addr);
2337 #else
2338 	iowrite32((u32)pte, addr);
2339 	iowrite32(pte >> 32, addr + 4);
2340 #endif
2341 }
2342 
2343 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2344 				  dma_addr_t addr,
2345 				  uint64_t offset,
2346 				  enum i915_cache_level level,
2347 				  u32 unused)
2348 {
2349 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2350 	gen8_pte_t __iomem *pte =
2351 		(gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
2352 		(offset >> PAGE_SHIFT);
2353 	int rpm_atomic_seq;
2354 
2355 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2356 
2357 	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
2358 
2359 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2360 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2361 
2362 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2363 }
2364 
2365 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2366 				     struct sg_table *st,
2367 				     uint64_t start,
2368 				     enum i915_cache_level level, u32 unused)
2369 {
2370 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2371 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2372 	struct sgt_iter sgt_iter;
2373 	gen8_pte_t __iomem *gtt_entries;
2374 	gen8_pte_t gtt_entry;
2375 	dma_addr_t addr;
2376 	int rpm_atomic_seq;
2377 	int i = 0;
2378 
2379 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2380 
2381 	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
2382 
2383 	for_each_sgt_dma(addr, sgt_iter, st) {
2384 		gtt_entry = gen8_pte_encode(addr, level, true);
2385 		gen8_set_pte(&gtt_entries[i++], gtt_entry);
2386 	}
2387 
2388 	/*
2389 	 * XXX: This serves as a posting read to make sure that the PTE has
2390 	 * actually been updated. There is some concern that even though
2391 	 * registers and PTEs are within the same BAR that they are potentially
2392 	 * of NUMA access patterns. Therefore, even with the way we assume
2393 	 * hardware should work, we must keep this posting read for paranoia.
2394 	 */
2395 	if (i != 0)
2396 		WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
2397 
2398 	/* This next bit makes the above posting read even more important. We
2399 	 * want to flush the TLBs only after we're certain all the PTE updates
2400 	 * have finished.
2401 	 */
2402 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2403 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2404 
2405 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2406 }
2407 
2408 struct insert_entries {
2409 	struct i915_address_space *vm;
2410 	struct sg_table *st;
2411 	uint64_t start;
2412 	enum i915_cache_level level;
2413 	u32 flags;
2414 };
2415 
2416 static int gen8_ggtt_insert_entries__cb(void *_arg)
2417 {
2418 	struct insert_entries *arg = _arg;
2419 	gen8_ggtt_insert_entries(arg->vm, arg->st,
2420 				 arg->start, arg->level, arg->flags);
2421 	return 0;
2422 }
2423 
2424 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2425 					  struct sg_table *st,
2426 					  uint64_t start,
2427 					  enum i915_cache_level level,
2428 					  u32 flags)
2429 {
2430 	struct insert_entries arg = { vm, st, start, level, flags };
2431 	stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
2432 }
2433 
2434 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2435 				  dma_addr_t addr,
2436 				  uint64_t offset,
2437 				  enum i915_cache_level level,
2438 				  u32 flags)
2439 {
2440 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2441 	gen6_pte_t __iomem *pte =
2442 		(gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
2443 		(offset >> PAGE_SHIFT);
2444 	int rpm_atomic_seq;
2445 
2446 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2447 
2448 	iowrite32(vm->pte_encode(addr, level, true, flags), pte);
2449 
2450 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2451 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2452 
2453 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2454 }
2455 
2456 /*
2457  * Binds an object into the global gtt with the specified cache level. The object
2458  * will be accessible to the GPU via commands whose operands reference offsets
2459  * within the global GTT as well as accessible by the GPU through the GMADR
2460  * mapped BAR (dev_priv->mm.gtt->gtt).
2461  */
2462 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2463 				     struct sg_table *st,
2464 				     uint64_t start,
2465 				     enum i915_cache_level level, u32 flags)
2466 {
2467 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2468 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2469 	struct sgt_iter sgt_iter;
2470 	gen6_pte_t __iomem *gtt_entries;
2471 	gen6_pte_t gtt_entry;
2472 	dma_addr_t addr;
2473 	int rpm_atomic_seq;
2474 	int i = 0;
2475 
2476 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2477 
2478 	gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
2479 
2480 	for_each_sgt_dma(addr, sgt_iter, st) {
2481 		gtt_entry = vm->pte_encode(addr, level, true, flags);
2482 		iowrite32(gtt_entry, &gtt_entries[i++]);
2483 	}
2484 
2485 	/* XXX: This serves as a posting read to make sure that the PTE has
2486 	 * actually been updated. There is some concern that even though
2487 	 * registers and PTEs are within the same BAR that they are potentially
2488 	 * of NUMA access patterns. Therefore, even with the way we assume
2489 	 * hardware should work, we must keep this posting read for paranoia.
2490 	 */
2491 	if (i != 0)
2492 		WARN_ON(readl(&gtt_entries[i-1]) != gtt_entry);
2493 
2494 	/* This next bit makes the above posting read even more important. We
2495 	 * want to flush the TLBs only after we're certain all the PTE updates
2496 	 * have finished.
2497 	 */
2498 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2499 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2500 
2501 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2502 }
2503 
2504 static void nop_clear_range(struct i915_address_space *vm,
2505 			    uint64_t start,
2506 			    uint64_t length,
2507 			    bool use_scratch)
2508 {
2509 }
2510 
2511 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2512 				  uint64_t start,
2513 				  uint64_t length,
2514 				  bool use_scratch)
2515 {
2516 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2517 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2518 	unsigned first_entry = start >> PAGE_SHIFT;
2519 	unsigned num_entries = length >> PAGE_SHIFT;
2520 	gen8_pte_t scratch_pte, __iomem *gtt_base =
2521 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2522 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2523 	int i;
2524 	int rpm_atomic_seq;
2525 
2526 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2527 
2528 	if (WARN(num_entries > max_entries,
2529 		 "First entry = %d; Num entries = %d (max=%d)\n",
2530 		 first_entry, num_entries, max_entries))
2531 		num_entries = max_entries;
2532 
2533 	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
2534 				      I915_CACHE_LLC,
2535 				      use_scratch);
2536 	for (i = 0; i < num_entries; i++)
2537 		gen8_set_pte(&gtt_base[i], scratch_pte);
2538 	readl(gtt_base);
2539 
2540 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2541 }
2542 
2543 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2544 				  uint64_t start,
2545 				  uint64_t length,
2546 				  bool use_scratch)
2547 {
2548 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2549 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2550 	unsigned first_entry = start >> PAGE_SHIFT;
2551 	unsigned num_entries = length >> PAGE_SHIFT;
2552 	gen6_pte_t scratch_pte, __iomem *gtt_base =
2553 		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2554 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2555 	int i;
2556 	int rpm_atomic_seq;
2557 
2558 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2559 
2560 	if (WARN(num_entries > max_entries,
2561 		 "First entry = %d; Num entries = %d (max=%d)\n",
2562 		 first_entry, num_entries, max_entries))
2563 		num_entries = max_entries;
2564 
2565 	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2566 				     I915_CACHE_LLC, use_scratch, 0);
2567 
2568 	for (i = 0; i < num_entries; i++)
2569 		iowrite32(scratch_pte, &gtt_base[i]);
2570 	readl(gtt_base);
2571 
2572 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2573 }
2574 
2575 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2576 				  dma_addr_t addr,
2577 				  uint64_t offset,
2578 				  enum i915_cache_level cache_level,
2579 				  u32 unused)
2580 {
2581 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2582 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2583 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2584 	int rpm_atomic_seq;
2585 
2586 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2587 
2588 	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2589 
2590 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2591 }
2592 
2593 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2594 				     struct sg_table *pages,
2595 				     uint64_t start,
2596 				     enum i915_cache_level cache_level, u32 unused)
2597 {
2598 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2599 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2600 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2601 	int rpm_atomic_seq;
2602 
2603 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2604 
2605 	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2606 
2607 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2608 
2609 }
2610 
2611 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2612 				  uint64_t start,
2613 				  uint64_t length,
2614 				  bool unused)
2615 {
2616 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
2617 	unsigned first_entry = start >> PAGE_SHIFT;
2618 	unsigned num_entries = length >> PAGE_SHIFT;
2619 	int rpm_atomic_seq;
2620 
2621 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2622 
2623 	intel_gtt_clear_range(first_entry, num_entries);
2624 
2625 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2626 }
2627 
2628 static int ggtt_bind_vma(struct i915_vma *vma,
2629 			 enum i915_cache_level cache_level,
2630 			 u32 flags)
2631 {
2632 	struct drm_i915_gem_object *obj = vma->obj;
2633 	u32 pte_flags = 0;
2634 	int ret;
2635 
2636 	ret = i915_get_ggtt_vma_pages(vma);
2637 	if (ret)
2638 		return ret;
2639 
2640 	/* Currently applicable only to VLV */
2641 	if (obj->gt_ro)
2642 		pte_flags |= PTE_READ_ONLY;
2643 
2644 	vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
2645 				vma->node.start,
2646 				cache_level, pte_flags);
2647 
2648 	/*
2649 	 * Without aliasing PPGTT there's no difference between
2650 	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2651 	 * upgrade to both bound if we bind either to avoid double-binding.
2652 	 */
2653 	vma->bound |= GLOBAL_BIND | LOCAL_BIND;
2654 
2655 	return 0;
2656 }
2657 
2658 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2659 				 enum i915_cache_level cache_level,
2660 				 u32 flags)
2661 {
2662 	u32 pte_flags;
2663 	int ret;
2664 
2665 	ret = i915_get_ggtt_vma_pages(vma);
2666 	if (ret)
2667 		return ret;
2668 
2669 	/* Currently applicable only to VLV */
2670 	pte_flags = 0;
2671 	if (vma->obj->gt_ro)
2672 		pte_flags |= PTE_READ_ONLY;
2673 
2674 
2675 	if (flags & GLOBAL_BIND) {
2676 		vma->vm->insert_entries(vma->vm,
2677 					vma->ggtt_view.pages,
2678 					vma->node.start,
2679 					cache_level, pte_flags);
2680 	}
2681 
2682 	if (flags & LOCAL_BIND) {
2683 		struct i915_hw_ppgtt *appgtt =
2684 			to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
2685 		appgtt->base.insert_entries(&appgtt->base,
2686 					    vma->ggtt_view.pages,
2687 					    vma->node.start,
2688 					    cache_level, pte_flags);
2689 	}
2690 
2691 	return 0;
2692 }
2693 
2694 static void ggtt_unbind_vma(struct i915_vma *vma)
2695 {
2696 	struct drm_device *dev = vma->vm->dev;
2697 	struct drm_i915_private *dev_priv = to_i915(dev);
2698 	struct drm_i915_gem_object *obj = vma->obj;
2699 	const uint64_t size = min_t(uint64_t,
2700 				    obj->base.size,
2701 				    vma->node.size);
2702 
2703 	if (vma->bound & GLOBAL_BIND) {
2704 		vma->vm->clear_range(vma->vm,
2705 				     vma->node.start,
2706 				     size,
2707 				     true);
2708 	}
2709 
2710 	if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
2711 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2712 
2713 		appgtt->base.clear_range(&appgtt->base,
2714 					 vma->node.start,
2715 					 size,
2716 					 true);
2717 	}
2718 }
2719 
2720 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2721 {
2722 	struct drm_device *dev = obj->base.dev;
2723 	struct drm_i915_private *dev_priv = to_i915(dev);
2724 	bool interruptible;
2725 
2726 	interruptible = do_idling(dev_priv);
2727 
2728 	dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2729 		     PCI_DMA_BIDIRECTIONAL);
2730 
2731 	undo_idling(dev_priv, interruptible);
2732 }
2733 
2734 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2735 				  unsigned long color,
2736 				  u64 *start,
2737 				  u64 *end)
2738 {
2739 	if (node->color != color)
2740 		*start += 4096;
2741 
2742 	if (!list_empty(&node->node_list)) {
2743 		node = list_entry(node->node_list.next,
2744 				  struct drm_mm_node,
2745 				  node_list);
2746 		if (node->allocated && node->color != color)
2747 			*end -= 4096;
2748 	}
2749 }
2750 
2751 static int i915_gem_setup_global_gtt(struct drm_device *dev,
2752 				     u64 start,
2753 				     u64 mappable_end,
2754 				     u64 end)
2755 {
2756 	/* Let GEM Manage all of the aperture.
2757 	 *
2758 	 * However, leave one page at the end still bound to the scratch page.
2759 	 * There are a number of places where the hardware apparently prefetches
2760 	 * past the end of the object, and we've seen multiple hangs with the
2761 	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
2762 	 * aperture.  One page should be enough to keep any prefetching inside
2763 	 * of the aperture.
2764 	 */
2765 	struct drm_i915_private *dev_priv = to_i915(dev);
2766 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2767 	struct drm_mm_node *entry;
2768 	struct drm_i915_gem_object *obj;
2769 	unsigned long hole_start, hole_end;
2770 	int ret;
2771 
2772 	BUG_ON(mappable_end > end);
2773 
2774 	ggtt->base.start = start;
2775 
2776 	/* Subtract the guard page before address space initialization to
2777 	 * shrink the range used by drm_mm */
2778 	ggtt->base.total = end - start - PAGE_SIZE;
2779 	i915_address_space_init(&ggtt->base, dev_priv);
2780 	ggtt->base.total += PAGE_SIZE;
2781 
2782 	ret = intel_vgt_balloon(dev_priv);
2783 	if (ret)
2784 		return ret;
2785 
2786 	if (!HAS_LLC(dev))
2787 		ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
2788 
2789 	/* Mark any preallocated objects as occupied */
2790 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2791 		struct i915_vma *vma = i915_gem_obj_to_vma(obj, &ggtt->base);
2792 
2793 		DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
2794 			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
2795 
2796 		WARN_ON(i915_gem_obj_ggtt_bound(obj));
2797 		ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
2798 		if (ret) {
2799 			DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2800 			return ret;
2801 		}
2802 		vma->bound |= GLOBAL_BIND;
2803 		__i915_vma_set_map_and_fenceable(vma);
2804 		list_add_tail(&vma->vm_link, &ggtt->base.inactive_list);
2805 	}
2806 
2807 	/* Clear any non-preallocated blocks */
2808 	drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2809 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2810 			      hole_start, hole_end);
2811 		ggtt->base.clear_range(&ggtt->base, hole_start,
2812 				     hole_end - hole_start, true);
2813 	}
2814 
2815 	/* And finally clear the reserved guard page */
2816 	ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true);
2817 
2818 	if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
2819 		struct i915_hw_ppgtt *ppgtt;
2820 
2821 		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2822 		if (!ppgtt)
2823 			return -ENOMEM;
2824 
2825 		ret = __hw_ppgtt_init(dev, ppgtt);
2826 		if (ret) {
2827 			ppgtt->base.cleanup(&ppgtt->base);
2828 			kfree(ppgtt);
2829 			return ret;
2830 		}
2831 
2832 		if (ppgtt->base.allocate_va_range)
2833 			ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2834 							    ppgtt->base.total);
2835 		if (ret) {
2836 			ppgtt->base.cleanup(&ppgtt->base);
2837 			kfree(ppgtt);
2838 			return ret;
2839 		}
2840 
2841 		ppgtt->base.clear_range(&ppgtt->base,
2842 					ppgtt->base.start,
2843 					ppgtt->base.total,
2844 					true);
2845 
2846 		dev_priv->mm.aliasing_ppgtt = ppgtt;
2847 		WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
2848 		ggtt->base.bind_vma = aliasing_gtt_bind_vma;
2849 	}
2850 
2851 	return 0;
2852 }
2853 
2854 /**
2855  * i915_gem_init_ggtt - Initialize GEM for Global GTT
2856  * @dev: DRM device
2857  */
2858 void i915_gem_init_ggtt(struct drm_device *dev)
2859 {
2860 	struct drm_i915_private *dev_priv = to_i915(dev);
2861 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2862 
2863 	i915_gem_setup_global_gtt(dev, 0, ggtt->mappable_end, ggtt->base.total);
2864 }
2865 
2866 /**
2867  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2868  * @dev: DRM device
2869  */
2870 void i915_ggtt_cleanup_hw(struct drm_device *dev)
2871 {
2872 	struct drm_i915_private *dev_priv = to_i915(dev);
2873 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2874 
2875 	if (dev_priv->mm.aliasing_ppgtt) {
2876 		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2877 
2878 		ppgtt->base.cleanup(&ppgtt->base);
2879 		kfree(ppgtt);
2880 	}
2881 
2882 	i915_gem_cleanup_stolen(dev);
2883 
2884 	if (drm_mm_initialized(&ggtt->base.mm)) {
2885 		intel_vgt_deballoon(dev_priv);
2886 
2887 		drm_mm_takedown(&ggtt->base.mm);
2888 		list_del(&ggtt->base.global_link);
2889 	}
2890 
2891 	ggtt->base.cleanup(&ggtt->base);
2892 }
2893 
2894 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2895 {
2896 	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2897 	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2898 	return snb_gmch_ctl << 20;
2899 }
2900 
2901 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2902 {
2903 	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2904 	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2905 	if (bdw_gmch_ctl)
2906 		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2907 
2908 #ifdef CONFIG_X86_32
2909 	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2910 	if (bdw_gmch_ctl > 4)
2911 		bdw_gmch_ctl = 4;
2912 #endif
2913 
2914 	return bdw_gmch_ctl << 20;
2915 }
2916 
2917 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2918 {
2919 	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2920 	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2921 
2922 	if (gmch_ctrl)
2923 		return 1 << (20 + gmch_ctrl);
2924 
2925 	return 0;
2926 }
2927 
2928 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2929 {
2930 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2931 	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2932 	return snb_gmch_ctl << 25; /* 32 MB units */
2933 }
2934 
2935 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2936 {
2937 	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2938 	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2939 	return bdw_gmch_ctl << 25; /* 32 MB units */
2940 }
2941 
2942 static size_t chv_get_stolen_size(u16 gmch_ctrl)
2943 {
2944 	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2945 	gmch_ctrl &= SNB_GMCH_GMS_MASK;
2946 
2947 	/*
2948 	 * 0x0  to 0x10: 32MB increments starting at 0MB
2949 	 * 0x11 to 0x16: 4MB increments starting at 8MB
2950 	 * 0x17 to 0x1d: 4MB increments start at 36MB
2951 	 */
2952 	if (gmch_ctrl < 0x11)
2953 		return gmch_ctrl << 25;
2954 	else if (gmch_ctrl < 0x17)
2955 		return (gmch_ctrl - 0x11 + 2) << 22;
2956 	else
2957 		return (gmch_ctrl - 0x17 + 9) << 22;
2958 }
2959 
2960 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2961 {
2962 	gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2963 	gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2964 
2965 	if (gen9_gmch_ctl < 0xf0)
2966 		return gen9_gmch_ctl << 25; /* 32 MB units */
2967 	else
2968 		/* 4MB increments starting at 0xf0 for 4MB */
2969 		return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2970 }
2971 
2972 static int ggtt_probe_common(struct drm_device *dev,
2973 			     size_t gtt_size)
2974 {
2975 	struct drm_i915_private *dev_priv = to_i915(dev);
2976 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2977 	struct i915_page_scratch *scratch_page;
2978 	phys_addr_t ggtt_phys_addr;
2979 
2980 	/* For Modern GENs the PTEs and register space are split in the BAR */
2981 	ggtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2982 			 (pci_resource_len(dev->pdev, 0) / 2);
2983 
2984 	/*
2985 	 * On BXT writes larger than 64 bit to the GTT pagetable range will be
2986 	 * dropped. For WC mappings in general we have 64 byte burst writes
2987 	 * when the WC buffer is flushed, so we can't use it, but have to
2988 	 * resort to an uncached mapping. The WC issue is easily caught by the
2989 	 * readback check when writing GTT PTE entries.
2990 	 */
2991 	if (IS_BROXTON(dev))
2992 		ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size);
2993 	else
2994 		ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size);
2995 	if (!ggtt->gsm) {
2996 		DRM_ERROR("Failed to map the gtt page table\n");
2997 		return -ENOMEM;
2998 	}
2999 
3000 	scratch_page = alloc_scratch_page(dev);
3001 	if (IS_ERR(scratch_page)) {
3002 		DRM_ERROR("Scratch setup failed\n");
3003 		/* iounmap will also get called at remove, but meh */
3004 		iounmap(ggtt->gsm);
3005 		return PTR_ERR(scratch_page);
3006 	}
3007 
3008 	ggtt->base.scratch_page = scratch_page;
3009 
3010 	return 0;
3011 }
3012 
3013 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3014  * bits. When using advanced contexts each context stores its own PAT, but
3015  * writing this data shouldn't be harmful even in those cases. */
3016 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
3017 {
3018 	uint64_t pat;
3019 
3020 	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
3021 	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
3022 	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
3023 	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
3024 	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
3025 	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
3026 	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
3027 	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3028 
3029 	if (!USES_PPGTT(dev_priv))
3030 		/* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3031 		 * so RTL will always use the value corresponding to
3032 		 * pat_sel = 000".
3033 		 * So let's disable cache for GGTT to avoid screen corruptions.
3034 		 * MOCS still can be used though.
3035 		 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3036 		 * before this patch, i.e. the same uncached + snooping access
3037 		 * like on gen6/7 seems to be in effect.
3038 		 * - So this just fixes blitter/render access. Again it looks
3039 		 * like it's not just uncached access, but uncached + snooping.
3040 		 * So we can still hold onto all our assumptions wrt cpu
3041 		 * clflushing on LLC machines.
3042 		 */
3043 		pat = GEN8_PPAT(0, GEN8_PPAT_UC);
3044 
3045 	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
3046 	 * write would work. */
3047 	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3048 	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3049 }
3050 
3051 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
3052 {
3053 	uint64_t pat;
3054 
3055 	/*
3056 	 * Map WB on BDW to snooped on CHV.
3057 	 *
3058 	 * Only the snoop bit has meaning for CHV, the rest is
3059 	 * ignored.
3060 	 *
3061 	 * The hardware will never snoop for certain types of accesses:
3062 	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
3063 	 * - PPGTT page tables
3064 	 * - some other special cycles
3065 	 *
3066 	 * As with BDW, we also need to consider the following for GT accesses:
3067 	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
3068 	 * so RTL will always use the value corresponding to
3069 	 * pat_sel = 000".
3070 	 * Which means we must set the snoop bit in PAT entry 0
3071 	 * in order to keep the global status page working.
3072 	 */
3073 	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3074 	      GEN8_PPAT(1, 0) |
3075 	      GEN8_PPAT(2, 0) |
3076 	      GEN8_PPAT(3, 0) |
3077 	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3078 	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3079 	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3080 	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
3081 
3082 	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3083 	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3084 }
3085 
3086 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3087 {
3088 	struct drm_device *dev = ggtt->base.dev;
3089 	struct drm_i915_private *dev_priv = to_i915(dev);
3090 	u16 snb_gmch_ctl;
3091 	int ret;
3092 
3093 	/* TODO: We're not aware of mappable constraints on gen8 yet */
3094 	ggtt->mappable_base = pci_resource_start(dev->pdev, 2);
3095 	ggtt->mappable_end = pci_resource_len(dev->pdev, 2);
3096 
3097 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
3098 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
3099 
3100 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3101 
3102 	if (INTEL_INFO(dev)->gen >= 9) {
3103 		ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
3104 		ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl);
3105 	} else if (IS_CHERRYVIEW(dev)) {
3106 		ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
3107 		ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl);
3108 	} else {
3109 		ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
3110 		ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl);
3111 	}
3112 
3113 	ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3114 
3115 	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3116 		chv_setup_private_ppat(dev_priv);
3117 	else
3118 		bdw_setup_private_ppat(dev_priv);
3119 
3120 	ret = ggtt_probe_common(dev, ggtt->size);
3121 
3122 	ggtt->base.bind_vma = ggtt_bind_vma;
3123 	ggtt->base.unbind_vma = ggtt_unbind_vma;
3124 	ggtt->base.insert_page = gen8_ggtt_insert_page;
3125 	ggtt->base.clear_range = nop_clear_range;
3126 	if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3127 		ggtt->base.clear_range = gen8_ggtt_clear_range;
3128 
3129 	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
3130 	if (IS_CHERRYVIEW(dev_priv))
3131 		ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
3132 
3133 	return ret;
3134 }
3135 
3136 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3137 {
3138 	struct drm_device *dev = ggtt->base.dev;
3139 	u16 snb_gmch_ctl;
3140 	int ret;
3141 
3142 	ggtt->mappable_base = pci_resource_start(dev->pdev, 2);
3143 	ggtt->mappable_end = pci_resource_len(dev->pdev, 2);
3144 
3145 	/* 64/512MB is the current min/max we actually know of, but this is just
3146 	 * a coarse sanity check.
3147 	 */
3148 	if ((ggtt->mappable_end < (64<<20) || (ggtt->mappable_end > (512<<20)))) {
3149 		DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
3150 		return -ENXIO;
3151 	}
3152 
3153 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
3154 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
3155 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3156 
3157 	ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
3158 	ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl);
3159 	ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3160 
3161 	ret = ggtt_probe_common(dev, ggtt->size);
3162 
3163 	ggtt->base.clear_range = gen6_ggtt_clear_range;
3164 	ggtt->base.insert_page = gen6_ggtt_insert_page;
3165 	ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3166 	ggtt->base.bind_vma = ggtt_bind_vma;
3167 	ggtt->base.unbind_vma = ggtt_unbind_vma;
3168 
3169 	return ret;
3170 }
3171 
3172 static void gen6_gmch_remove(struct i915_address_space *vm)
3173 {
3174 	struct i915_ggtt *ggtt = container_of(vm, struct i915_ggtt, base);
3175 
3176 	iounmap(ggtt->gsm);
3177 	free_scratch_page(vm->dev, vm->scratch_page);
3178 }
3179 
3180 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3181 {
3182 	struct drm_device *dev = ggtt->base.dev;
3183 	struct drm_i915_private *dev_priv = to_i915(dev);
3184 	int ret;
3185 
3186 	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3187 	if (!ret) {
3188 		DRM_ERROR("failed to set up gmch\n");
3189 		return -EIO;
3190 	}
3191 
3192 	intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size,
3193 		      &ggtt->mappable_base, &ggtt->mappable_end);
3194 
3195 	ggtt->do_idle_maps = needs_idle_maps(&dev_priv->drm);
3196 	ggtt->base.insert_page = i915_ggtt_insert_page;
3197 	ggtt->base.insert_entries = i915_ggtt_insert_entries;
3198 	ggtt->base.clear_range = i915_ggtt_clear_range;
3199 	ggtt->base.bind_vma = ggtt_bind_vma;
3200 	ggtt->base.unbind_vma = ggtt_unbind_vma;
3201 
3202 	if (unlikely(ggtt->do_idle_maps))
3203 		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3204 
3205 	return 0;
3206 }
3207 
3208 static void i915_gmch_remove(struct i915_address_space *vm)
3209 {
3210 	intel_gmch_remove();
3211 }
3212 
3213 /**
3214  * i915_ggtt_init_hw - Initialize GGTT hardware
3215  * @dev: DRM device
3216  */
3217 int i915_ggtt_init_hw(struct drm_device *dev)
3218 {
3219 	struct drm_i915_private *dev_priv = to_i915(dev);
3220 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
3221 	int ret;
3222 
3223 	if (INTEL_INFO(dev)->gen <= 5) {
3224 		ggtt->probe = i915_gmch_probe;
3225 		ggtt->base.cleanup = i915_gmch_remove;
3226 	} else if (INTEL_INFO(dev)->gen < 8) {
3227 		ggtt->probe = gen6_gmch_probe;
3228 		ggtt->base.cleanup = gen6_gmch_remove;
3229 
3230 		if (HAS_EDRAM(dev))
3231 			ggtt->base.pte_encode = iris_pte_encode;
3232 		else if (IS_HASWELL(dev))
3233 			ggtt->base.pte_encode = hsw_pte_encode;
3234 		else if (IS_VALLEYVIEW(dev))
3235 			ggtt->base.pte_encode = byt_pte_encode;
3236 		else if (INTEL_INFO(dev)->gen >= 7)
3237 			ggtt->base.pte_encode = ivb_pte_encode;
3238 		else
3239 			ggtt->base.pte_encode = snb_pte_encode;
3240 	} else {
3241 		ggtt->probe = gen8_gmch_probe;
3242 		ggtt->base.cleanup = gen6_gmch_remove;
3243 	}
3244 
3245 	ggtt->base.dev = dev;
3246 	ggtt->base.is_ggtt = true;
3247 
3248 	ret = ggtt->probe(ggtt);
3249 	if (ret)
3250 		return ret;
3251 
3252 	if ((ggtt->base.total - 1) >> 32) {
3253 		DRM_ERROR("We never expected a Global GTT with more than 32bits"
3254 			  "of address space! Found %lldM!\n",
3255 			  ggtt->base.total >> 20);
3256 		ggtt->base.total = 1ULL << 32;
3257 		ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3258 	}
3259 
3260 	/*
3261 	 * Initialise stolen early so that we may reserve preallocated
3262 	 * objects for the BIOS to KMS transition.
3263 	 */
3264 	ret = i915_gem_init_stolen(dev);
3265 	if (ret)
3266 		goto out_gtt_cleanup;
3267 
3268 	/* GMADR is the PCI mmio aperture into the global GTT. */
3269 	DRM_INFO("Memory usable by graphics device = %lluM\n",
3270 		 ggtt->base.total >> 20);
3271 	DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
3272 	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20);
3273 #ifdef CONFIG_INTEL_IOMMU
3274 	if (intel_iommu_gfx_mapped)
3275 		DRM_INFO("VT-d active for gfx access\n");
3276 #endif
3277 
3278 	return 0;
3279 
3280 out_gtt_cleanup:
3281 	ggtt->base.cleanup(&ggtt->base);
3282 
3283 	return ret;
3284 }
3285 
3286 int i915_ggtt_enable_hw(struct drm_device *dev)
3287 {
3288 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
3289 		return -EIO;
3290 
3291 	return 0;
3292 }
3293 
3294 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3295 {
3296 	struct drm_i915_private *dev_priv = to_i915(dev);
3297 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
3298 	struct drm_i915_gem_object *obj;
3299 	struct i915_vma *vma;
3300 
3301 	i915_check_and_clear_faults(dev_priv);
3302 
3303 	/* First fill our portion of the GTT with scratch pages */
3304 	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
3305 			       true);
3306 
3307 	/* Cache flush objects bound into GGTT and rebind them. */
3308 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3309 		list_for_each_entry(vma, &obj->vma_list, obj_link) {
3310 			if (vma->vm != &ggtt->base)
3311 				continue;
3312 
3313 			WARN_ON(i915_vma_bind(vma, obj->cache_level,
3314 					      PIN_UPDATE));
3315 		}
3316 
3317 		if (obj->pin_display)
3318 			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3319 	}
3320 
3321 	if (INTEL_INFO(dev)->gen >= 8) {
3322 		if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3323 			chv_setup_private_ppat(dev_priv);
3324 		else
3325 			bdw_setup_private_ppat(dev_priv);
3326 
3327 		return;
3328 	}
3329 
3330 	if (USES_PPGTT(dev)) {
3331 		struct i915_address_space *vm;
3332 
3333 		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3334 			/* TODO: Perhaps it shouldn't be gen6 specific */
3335 
3336 			struct i915_hw_ppgtt *ppgtt;
3337 
3338 			if (vm->is_ggtt)
3339 				ppgtt = dev_priv->mm.aliasing_ppgtt;
3340 			else
3341 				ppgtt = i915_vm_to_ppgtt(vm);
3342 
3343 			gen6_write_page_range(dev_priv, &ppgtt->pd,
3344 					      0, ppgtt->base.total);
3345 		}
3346 	}
3347 
3348 	i915_ggtt_flush(dev_priv);
3349 }
3350 
3351 static struct i915_vma *
3352 __i915_gem_vma_create(struct drm_i915_gem_object *obj,
3353 		      struct i915_address_space *vm,
3354 		      const struct i915_ggtt_view *ggtt_view)
3355 {
3356 	struct i915_vma *vma;
3357 
3358 	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3359 		return ERR_PTR(-EINVAL);
3360 
3361 	vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3362 	if (vma == NULL)
3363 		return ERR_PTR(-ENOMEM);
3364 
3365 	INIT_LIST_HEAD(&vma->vm_link);
3366 	INIT_LIST_HEAD(&vma->obj_link);
3367 	INIT_LIST_HEAD(&vma->exec_list);
3368 	vma->vm = vm;
3369 	vma->obj = obj;
3370 	vma->is_ggtt = i915_is_ggtt(vm);
3371 
3372 	if (i915_is_ggtt(vm))
3373 		vma->ggtt_view = *ggtt_view;
3374 	else
3375 		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3376 
3377 	list_add_tail(&vma->obj_link, &obj->vma_list);
3378 
3379 	return vma;
3380 }
3381 
3382 struct i915_vma *
3383 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3384 				  struct i915_address_space *vm)
3385 {
3386 	struct i915_vma *vma;
3387 
3388 	vma = i915_gem_obj_to_vma(obj, vm);
3389 	if (!vma)
3390 		vma = __i915_gem_vma_create(obj, vm,
3391 					    i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
3392 
3393 	return vma;
3394 }
3395 
3396 struct i915_vma *
3397 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3398 				       const struct i915_ggtt_view *view)
3399 {
3400 	struct drm_device *dev = obj->base.dev;
3401 	struct drm_i915_private *dev_priv = to_i915(dev);
3402 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
3403 	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
3404 
3405 	if (!vma)
3406 		vma = __i915_gem_vma_create(obj, &ggtt->base, view);
3407 
3408 	return vma;
3409 
3410 }
3411 
3412 static struct scatterlist *
3413 rotate_pages(const dma_addr_t *in, unsigned int offset,
3414 	     unsigned int width, unsigned int height,
3415 	     unsigned int stride,
3416 	     struct sg_table *st, struct scatterlist *sg)
3417 {
3418 	unsigned int column, row;
3419 	unsigned int src_idx;
3420 
3421 	for (column = 0; column < width; column++) {
3422 		src_idx = stride * (height - 1) + column;
3423 		for (row = 0; row < height; row++) {
3424 			st->nents++;
3425 			/* We don't need the pages, but need to initialize
3426 			 * the entries so the sg list can be happily traversed.
3427 			 * The only thing we need are DMA addresses.
3428 			 */
3429 			sg_set_page(sg, NULL, PAGE_SIZE, 0);
3430 			sg_dma_address(sg) = in[offset + src_idx];
3431 			sg_dma_len(sg) = PAGE_SIZE;
3432 			sg = sg_next(sg);
3433 			src_idx -= stride;
3434 		}
3435 	}
3436 
3437 	return sg;
3438 }
3439 
3440 static struct sg_table *
3441 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
3442 			  struct drm_i915_gem_object *obj)
3443 {
3444 	const size_t n_pages = obj->base.size / PAGE_SIZE;
3445 	unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height;
3446 	unsigned int size_pages_uv;
3447 	struct sgt_iter sgt_iter;
3448 	dma_addr_t dma_addr;
3449 	unsigned long i;
3450 	dma_addr_t *page_addr_list;
3451 	struct sg_table *st;
3452 	unsigned int uv_start_page;
3453 	struct scatterlist *sg;
3454 	int ret = -ENOMEM;
3455 
3456 	/* Allocate a temporary list of source pages for random access. */
3457 	page_addr_list = drm_malloc_gfp(n_pages,
3458 					sizeof(dma_addr_t),
3459 					GFP_TEMPORARY);
3460 	if (!page_addr_list)
3461 		return ERR_PTR(ret);
3462 
3463 	/* Account for UV plane with NV12. */
3464 	if (rot_info->pixel_format == DRM_FORMAT_NV12)
3465 		size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height;
3466 	else
3467 		size_pages_uv = 0;
3468 
3469 	/* Allocate target SG list. */
3470 	st = kmalloc(sizeof(*st), GFP_KERNEL);
3471 	if (!st)
3472 		goto err_st_alloc;
3473 
3474 	ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
3475 	if (ret)
3476 		goto err_sg_alloc;
3477 
3478 	/* Populate source page list from the object. */
3479 	i = 0;
3480 	for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
3481 		page_addr_list[i++] = dma_addr;
3482 
3483 	GEM_BUG_ON(i != n_pages);
3484 	st->nents = 0;
3485 	sg = st->sgl;
3486 
3487 	/* Rotate the pages. */
3488 	sg = rotate_pages(page_addr_list, 0,
3489 			  rot_info->plane[0].width, rot_info->plane[0].height,
3490 			  rot_info->plane[0].width,
3491 			  st, sg);
3492 
3493 	/* Append the UV plane if NV12. */
3494 	if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3495 		uv_start_page = size_pages;
3496 
3497 		/* Check for tile-row un-alignment. */
3498 		if (offset_in_page(rot_info->uv_offset))
3499 			uv_start_page--;
3500 
3501 		rot_info->uv_start_page = uv_start_page;
3502 
3503 		sg = rotate_pages(page_addr_list, rot_info->uv_start_page,
3504 				  rot_info->plane[1].width, rot_info->plane[1].height,
3505 				  rot_info->plane[1].width,
3506 				  st, sg);
3507 	}
3508 
3509 	DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n",
3510 		      obj->base.size, rot_info->plane[0].width,
3511 		      rot_info->plane[0].height, size_pages + size_pages_uv,
3512 		      size_pages);
3513 
3514 	drm_free_large(page_addr_list);
3515 
3516 	return st;
3517 
3518 err_sg_alloc:
3519 	kfree(st);
3520 err_st_alloc:
3521 	drm_free_large(page_addr_list);
3522 
3523 	DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n",
3524 		      obj->base.size, ret, rot_info->plane[0].width,
3525 		      rot_info->plane[0].height, size_pages + size_pages_uv,
3526 		      size_pages);
3527 	return ERR_PTR(ret);
3528 }
3529 
3530 static struct sg_table *
3531 intel_partial_pages(const struct i915_ggtt_view *view,
3532 		    struct drm_i915_gem_object *obj)
3533 {
3534 	struct sg_table *st;
3535 	struct scatterlist *sg;
3536 	struct sg_page_iter obj_sg_iter;
3537 	int ret = -ENOMEM;
3538 
3539 	st = kmalloc(sizeof(*st), GFP_KERNEL);
3540 	if (!st)
3541 		goto err_st_alloc;
3542 
3543 	ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3544 	if (ret)
3545 		goto err_sg_alloc;
3546 
3547 	sg = st->sgl;
3548 	st->nents = 0;
3549 	for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3550 		view->params.partial.offset)
3551 	{
3552 		if (st->nents >= view->params.partial.size)
3553 			break;
3554 
3555 		sg_set_page(sg, NULL, PAGE_SIZE, 0);
3556 		sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3557 		sg_dma_len(sg) = PAGE_SIZE;
3558 
3559 		sg = sg_next(sg);
3560 		st->nents++;
3561 	}
3562 
3563 	return st;
3564 
3565 err_sg_alloc:
3566 	kfree(st);
3567 err_st_alloc:
3568 	return ERR_PTR(ret);
3569 }
3570 
3571 static int
3572 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3573 {
3574 	int ret = 0;
3575 
3576 	if (vma->ggtt_view.pages)
3577 		return 0;
3578 
3579 	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3580 		vma->ggtt_view.pages = vma->obj->pages;
3581 	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3582 		vma->ggtt_view.pages =
3583 			intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
3584 	else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3585 		vma->ggtt_view.pages =
3586 			intel_partial_pages(&vma->ggtt_view, vma->obj);
3587 	else
3588 		WARN_ONCE(1, "GGTT view %u not implemented!\n",
3589 			  vma->ggtt_view.type);
3590 
3591 	if (!vma->ggtt_view.pages) {
3592 		DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3593 			  vma->ggtt_view.type);
3594 		ret = -EINVAL;
3595 	} else if (IS_ERR(vma->ggtt_view.pages)) {
3596 		ret = PTR_ERR(vma->ggtt_view.pages);
3597 		vma->ggtt_view.pages = NULL;
3598 		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3599 			  vma->ggtt_view.type, ret);
3600 	}
3601 
3602 	return ret;
3603 }
3604 
3605 /**
3606  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3607  * @vma: VMA to map
3608  * @cache_level: mapping cache level
3609  * @flags: flags like global or local mapping
3610  *
3611  * DMA addresses are taken from the scatter-gather table of this object (or of
3612  * this VMA in case of non-default GGTT views) and PTE entries set up.
3613  * Note that DMA addresses are also the only part of the SG table we care about.
3614  */
3615 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3616 		  u32 flags)
3617 {
3618 	int ret;
3619 	u32 bind_flags;
3620 
3621 	if (WARN_ON(flags == 0))
3622 		return -EINVAL;
3623 
3624 	bind_flags = 0;
3625 	if (flags & PIN_GLOBAL)
3626 		bind_flags |= GLOBAL_BIND;
3627 	if (flags & PIN_USER)
3628 		bind_flags |= LOCAL_BIND;
3629 
3630 	if (flags & PIN_UPDATE)
3631 		bind_flags |= vma->bound;
3632 	else
3633 		bind_flags &= ~vma->bound;
3634 
3635 	if (bind_flags == 0)
3636 		return 0;
3637 
3638 	if (vma->bound == 0 && vma->vm->allocate_va_range) {
3639 		/* XXX: i915_vma_pin() will fix this +- hack */
3640 		vma->pin_count++;
3641 		trace_i915_va_alloc(vma);
3642 		ret = vma->vm->allocate_va_range(vma->vm,
3643 						 vma->node.start,
3644 						 vma->node.size);
3645 		vma->pin_count--;
3646 		if (ret)
3647 			return ret;
3648 	}
3649 
3650 	ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3651 	if (ret)
3652 		return ret;
3653 
3654 	vma->bound |= bind_flags;
3655 
3656 	return 0;
3657 }
3658 
3659 /**
3660  * i915_ggtt_view_size - Get the size of a GGTT view.
3661  * @obj: Object the view is of.
3662  * @view: The view in question.
3663  *
3664  * @return The size of the GGTT view in bytes.
3665  */
3666 size_t
3667 i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3668 		    const struct i915_ggtt_view *view)
3669 {
3670 	if (view->type == I915_GGTT_VIEW_NORMAL) {
3671 		return obj->base.size;
3672 	} else if (view->type == I915_GGTT_VIEW_ROTATED) {
3673 		return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT;
3674 	} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3675 		return view->params.partial.size << PAGE_SHIFT;
3676 	} else {
3677 		WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3678 		return obj->base.size;
3679 	}
3680 }
3681 
3682 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
3683 {
3684 	void __iomem *ptr;
3685 
3686 	lockdep_assert_held(&vma->vm->dev->struct_mutex);
3687 	if (WARN_ON(!vma->obj->map_and_fenceable))
3688 		return ERR_PTR(-ENODEV);
3689 
3690 	GEM_BUG_ON(!vma->is_ggtt);
3691 	GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0);
3692 
3693 	ptr = vma->iomap;
3694 	if (ptr == NULL) {
3695 		ptr = io_mapping_map_wc(i915_vm_to_ggtt(vma->vm)->mappable,
3696 					vma->node.start,
3697 					vma->node.size);
3698 		if (ptr == NULL)
3699 			return ERR_PTR(-ENOMEM);
3700 
3701 		vma->iomap = ptr;
3702 	}
3703 
3704 	vma->pin_count++;
3705 	return ptr;
3706 }
3707