xref: /linux/drivers/gpu/drm/imagination/pvr_mmu.c (revision 7f81907b7e3f93dfed2e903af52659baa4944341)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright (c) 2023 Imagination Technologies Ltd. */
3 
4 #include "pvr_mmu.h"
5 
6 #include "pvr_ccb.h"
7 #include "pvr_device.h"
8 #include "pvr_fw.h"
9 #include "pvr_gem.h"
10 #include "pvr_power.h"
11 #include "pvr_rogue_fwif.h"
12 #include "pvr_rogue_mmu_defs.h"
13 
14 #include <drm/drm_drv.h>
15 #include <linux/atomic.h>
16 #include <linux/bitops.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/kmemleak.h>
19 #include <linux/minmax.h>
20 #include <linux/property.h>
21 #include <linux/sizes.h>
22 
23 #define PVR_SHIFT_FROM_SIZE(size_) (__builtin_ctzll(size_))
24 #define PVR_MASK_FROM_SIZE(size_) (~((size_) - U64_C(1)))
25 
26 /*
27  * The value of the device page size (%PVR_DEVICE_PAGE_SIZE) is currently
28  * pegged to the host page size (%PAGE_SIZE). This chunk of macro goodness both
29  * ensures that the selected host page size corresponds to a valid device page
30  * size and sets up values needed by the MMU code below.
31  */
32 #if (PVR_DEVICE_PAGE_SIZE == SZ_4K)
33 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_4KB
34 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_4KB_RANGE_SHIFT
35 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_4KB_RANGE_CLRMSK
36 #elif (PVR_DEVICE_PAGE_SIZE == SZ_16K)
37 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_16KB
38 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_16KB_RANGE_SHIFT
39 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_16KB_RANGE_CLRMSK
40 #elif (PVR_DEVICE_PAGE_SIZE == SZ_64K)
41 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_64KB
42 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_64KB_RANGE_SHIFT
43 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_64KB_RANGE_CLRMSK
44 #elif (PVR_DEVICE_PAGE_SIZE == SZ_256K)
45 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_256KB
46 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_256KB_RANGE_SHIFT
47 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_256KB_RANGE_CLRMSK
48 #elif (PVR_DEVICE_PAGE_SIZE == SZ_1M)
49 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_1MB
50 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_1MB_RANGE_SHIFT
51 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_1MB_RANGE_CLRMSK
52 #elif (PVR_DEVICE_PAGE_SIZE == SZ_2M)
53 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_2MB
54 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_2MB_RANGE_SHIFT
55 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_2MB_RANGE_CLRMSK
56 #else
57 # error Unsupported device page size PVR_DEVICE_PAGE_SIZE
58 #endif
59 
60 #define ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X   \
61 	(ROGUE_MMUCTRL_ENTRIES_PT_VALUE >> \
62 	 (PVR_DEVICE_PAGE_SHIFT - PVR_SHIFT_FROM_SIZE(SZ_4K)))
63 
64 enum pvr_mmu_sync_level {
65 	PVR_MMU_SYNC_LEVEL_NONE = -1,
66 	PVR_MMU_SYNC_LEVEL_0 = 0,
67 	PVR_MMU_SYNC_LEVEL_1 = 1,
68 	PVR_MMU_SYNC_LEVEL_2 = 2,
69 };
70 
71 #define PVR_MMU_SYNC_LEVEL_0_FLAGS (ROGUE_FWIF_MMUCACHEDATA_FLAGS_PT | \
72 				    ROGUE_FWIF_MMUCACHEDATA_FLAGS_INTERRUPT | \
73 				    ROGUE_FWIF_MMUCACHEDATA_FLAGS_TLB)
74 #define PVR_MMU_SYNC_LEVEL_1_FLAGS (PVR_MMU_SYNC_LEVEL_0_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PD)
75 #define PVR_MMU_SYNC_LEVEL_2_FLAGS (PVR_MMU_SYNC_LEVEL_1_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PC)
76 
77 /**
78  * pvr_mmu_set_flush_flags() - Set MMU cache flush flags for next call to
79  *                             pvr_mmu_flush_exec().
80  * @pvr_dev: Target PowerVR device.
81  * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS.
82  *
83  * This function must be called following any possible change to the MMU page
84  * tables.
85  */
86 static void pvr_mmu_set_flush_flags(struct pvr_device *pvr_dev, u32 flags)
87 {
88 	atomic_fetch_or(flags, &pvr_dev->mmu_flush_cache_flags);
89 }
90 
91 /**
92  * pvr_mmu_flush_request_all() - Request flush of all MMU caches when
93  * subsequently calling pvr_mmu_flush_exec().
94  * @pvr_dev: Target PowerVR device.
95  *
96  * This function must be called following any possible change to the MMU page
97  * tables.
98  */
99 void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev)
100 {
101 	pvr_mmu_set_flush_flags(pvr_dev, PVR_MMU_SYNC_LEVEL_2_FLAGS);
102 }
103 
104 /**
105  * pvr_mmu_flush_exec() - Execute a flush of all MMU caches previously
106  * requested.
107  * @pvr_dev: Target PowerVR device.
108  * @wait: Do not return until the flush is completed.
109  *
110  * This function must be called prior to submitting any new GPU job. The flush
111  * will complete before the jobs are scheduled, so this can be called once after
112  * a series of maps. However, a single unmap should always be immediately
113  * followed by a flush and it should be explicitly waited by setting @wait.
114  *
115  * As a failure to flush the MMU caches could risk memory corruption, if the
116  * flush fails (implying the firmware is not responding) then the GPU device is
117  * marked as lost.
118  *
119  * Returns:
120  *  * 0 on success when @wait is true, or
121  *  * -%EIO if the device is unavailable, or
122  *  * Any error encountered while submitting the flush command via the KCCB.
123  */
124 int pvr_mmu_flush_exec(struct pvr_device *pvr_dev, bool wait)
125 {
126 	struct rogue_fwif_kccb_cmd cmd_mmu_cache = {};
127 	struct rogue_fwif_mmucachedata *cmd_mmu_cache_data =
128 		&cmd_mmu_cache.cmd_data.mmu_cache_data;
129 	int err = 0;
130 	u32 slot;
131 	int idx;
132 
133 	if (!drm_dev_enter(from_pvr_device(pvr_dev), &idx))
134 		return -EIO;
135 
136 	/* Can't flush MMU if the firmware hasn't booted yet. */
137 	if (!pvr_dev->fw_dev.booted)
138 		goto err_drm_dev_exit;
139 
140 	cmd_mmu_cache_data->cache_flags =
141 		atomic_xchg(&pvr_dev->mmu_flush_cache_flags, 0);
142 
143 	if (!cmd_mmu_cache_data->cache_flags)
144 		goto err_drm_dev_exit;
145 
146 	cmd_mmu_cache.cmd_type = ROGUE_FWIF_KCCB_CMD_MMUCACHE;
147 
148 	pvr_fw_object_get_fw_addr(pvr_dev->fw_dev.mem.mmucache_sync_obj,
149 				  &cmd_mmu_cache_data->mmu_cache_sync_fw_addr);
150 	cmd_mmu_cache_data->mmu_cache_sync_update_value = 0;
151 
152 	err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot);
153 	if (err)
154 		goto err_reset_and_retry;
155 
156 	err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL);
157 	if (err)
158 		goto err_reset_and_retry;
159 
160 	drm_dev_exit(idx);
161 
162 	return 0;
163 
164 err_reset_and_retry:
165 	/*
166 	 * Flush command failure is most likely the result of a firmware lockup. Hard
167 	 * reset the GPU and retry.
168 	 */
169 	err = pvr_power_reset(pvr_dev, true);
170 	if (err)
171 		goto err_drm_dev_exit; /* Device is lost. */
172 
173 	/* Retry sending flush request. */
174 	err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot);
175 	if (err) {
176 		pvr_device_lost(pvr_dev);
177 		goto err_drm_dev_exit;
178 	}
179 
180 	if (wait) {
181 		err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL);
182 		if (err)
183 			pvr_device_lost(pvr_dev);
184 	}
185 
186 err_drm_dev_exit:
187 	drm_dev_exit(idx);
188 
189 	return err;
190 }
191 
192 /**
193  * DOC: PowerVR Virtual Memory Handling
194  */
195 /**
196  * DOC: PowerVR Virtual Memory Handling (constants)
197  *
198  * .. c:macro:: PVR_IDX_INVALID
199  *
200  *    Default value for a u16-based index.
201  *
202  *    This value cannot be zero, since zero is a valid index value.
203  */
204 #define PVR_IDX_INVALID ((u16)(-1))
205 
206 /**
207  * DOC: MMU backing pages
208  */
209 /**
210  * DOC: MMU backing pages (constants)
211  *
212  * .. c:macro:: PVR_MMU_BACKING_PAGE_SIZE
213  *
214  *    Page size of a PowerVR device's integrated MMU. The CPU page size must be
215  *    at least as large as this value for the current implementation; this is
216  *    checked at compile-time.
217  */
218 #define PVR_MMU_BACKING_PAGE_SIZE SZ_4K
219 static_assert(PAGE_SIZE >= PVR_MMU_BACKING_PAGE_SIZE);
220 
221 /**
222  * struct pvr_mmu_backing_page - Represents a single page used to back a page
223  *                              table of any level.
224  * @dma_addr: DMA address of this page.
225  * @host_ptr: CPU address of this page.
226  * @pvr_dev: The PowerVR device to which this page is associated. **For
227  *           internal use only.**
228  */
229 struct pvr_mmu_backing_page {
230 	dma_addr_t dma_addr;
231 	void *host_ptr;
232 /* private: internal use only */
233 	struct page *raw_page;
234 	struct pvr_device *pvr_dev;
235 };
236 
237 /**
238  * pvr_mmu_backing_page_init() - Initialize a MMU backing page.
239  * @page: Target backing page.
240  * @pvr_dev: Target PowerVR device.
241  *
242  * This function performs three distinct operations:
243  *
244  * 1. Allocate a single page,
245  * 2. Map the page to the CPU, and
246  * 3. Map the page to DMA-space.
247  *
248  * It is expected that @page be zeroed (e.g. from kzalloc()) before calling
249  * this function.
250  *
251  * Return:
252  *  * 0 on success, or
253  *  * -%ENOMEM if allocation of the backing page or mapping of the backing
254  *    page to DMA fails.
255  */
256 static int
257 pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page,
258 			  struct pvr_device *pvr_dev)
259 {
260 	struct device *dev = from_pvr_device(pvr_dev)->dev;
261 
262 	struct page *raw_page;
263 	pgprot_t prot;
264 	int err;
265 
266 	dma_addr_t dma_addr;
267 	void *host_ptr;
268 
269 	raw_page = alloc_page(__GFP_ZERO | GFP_KERNEL);
270 	if (!raw_page)
271 		return -ENOMEM;
272 
273 	prot = PAGE_KERNEL;
274 	if (device_get_dma_attr(dev) != DEV_DMA_COHERENT)
275 		prot = pgprot_writecombine(prot);
276 
277 	host_ptr = vmap(&raw_page, 1, VM_MAP, prot);
278 	if (!host_ptr) {
279 		err = -ENOMEM;
280 		goto err_free_page;
281 	}
282 
283 	dma_addr = dma_map_page(dev, raw_page, 0, PVR_MMU_BACKING_PAGE_SIZE,
284 				DMA_TO_DEVICE);
285 	if (dma_mapping_error(dev, dma_addr)) {
286 		err = -ENOMEM;
287 		goto err_unmap_page;
288 	}
289 
290 	page->dma_addr = dma_addr;
291 	page->host_ptr = host_ptr;
292 	page->pvr_dev = pvr_dev;
293 	page->raw_page = raw_page;
294 	kmemleak_alloc(page->host_ptr, PAGE_SIZE, 1, GFP_KERNEL);
295 
296 	return 0;
297 
298 err_unmap_page:
299 	vunmap(host_ptr);
300 
301 err_free_page:
302 	__free_page(raw_page);
303 
304 	return err;
305 }
306 
307 /**
308  * pvr_mmu_backing_page_fini() - Teardown a MMU backing page.
309  * @page: Target backing page.
310  *
311  * This function performs the mirror operations to pvr_mmu_backing_page_init(),
312  * in reverse order:
313  *
314  * 1. Unmap the page from DMA-space,
315  * 2. Unmap the page from the CPU, and
316  * 3. Free the page.
317  *
318  * It also zeros @page.
319  *
320  * It is a no-op to call this function a second (or further) time on any @page.
321  */
322 static void
323 pvr_mmu_backing_page_fini(struct pvr_mmu_backing_page *page)
324 {
325 	struct device *dev;
326 
327 	/* Do nothing if no allocation is present. */
328 	if (!page->pvr_dev)
329 		return;
330 
331 	dev = from_pvr_device(page->pvr_dev)->dev;
332 
333 	dma_unmap_page(dev, page->dma_addr, PVR_MMU_BACKING_PAGE_SIZE,
334 		       DMA_TO_DEVICE);
335 
336 	kmemleak_free(page->host_ptr);
337 	vunmap(page->host_ptr);
338 
339 	__free_page(page->raw_page);
340 
341 	memset(page, 0, sizeof(*page));
342 }
343 
344 /**
345  * pvr_mmu_backing_page_sync() - Flush a MMU backing page from the CPU to the
346  * device.
347  * @page: Target backing page.
348  * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS.
349  *
350  * .. caution::
351  *
352  *    **This is potentially an expensive function call.** Only call
353  *    pvr_mmu_backing_page_sync() once you're sure you have no more changes to
354  *    make to the backing page in the immediate future.
355  */
356 static void
357 pvr_mmu_backing_page_sync(struct pvr_mmu_backing_page *page, u32 flags)
358 {
359 	struct pvr_device *pvr_dev = page->pvr_dev;
360 	struct device *dev;
361 
362 	/*
363 	 * Do nothing if no allocation is present. This may be the case if
364 	 * we are unmapping pages.
365 	 */
366 	if (!pvr_dev)
367 		return;
368 
369 	dev = from_pvr_device(pvr_dev)->dev;
370 
371 	dma_sync_single_for_device(dev, page->dma_addr,
372 				   PVR_MMU_BACKING_PAGE_SIZE, DMA_TO_DEVICE);
373 
374 	pvr_mmu_set_flush_flags(pvr_dev, flags);
375 }
376 
377 /**
378  * DOC: Raw page tables
379  */
380 
381 #define PVR_PAGE_TABLE_TYPEOF_ENTRY(level_) \
382 	typeof_member(struct pvr_page_table_l##level_##_entry_raw, val)
383 
384 #define PVR_PAGE_TABLE_FIELD_GET(level_, name_, field_, entry_)           \
385 	(((entry_).val &                                           \
386 	  ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK) >> \
387 	 ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT)
388 
389 #define PVR_PAGE_TABLE_FIELD_PREP(level_, name_, field_, val_)            \
390 	((((PVR_PAGE_TABLE_TYPEOF_ENTRY(level_))(val_))            \
391 	  << ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT) & \
392 	 ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK)
393 
394 /**
395  * struct pvr_page_table_l2_entry_raw - A single entry in a level 2 page table.
396  * @val: The raw value of this entry.
397  *
398  * This type is a structure for type-checking purposes. At compile-time, its
399  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE.
400  *
401  * The value stored in this structure can be decoded using the following bitmap:
402  *
403  * .. flat-table::
404  *    :widths: 1 5
405  *    :stub-columns: 1
406  *
407  *    * - 31..4
408  *      - **Level 1 Page Table Base Address:** Bits 39..12 of the L1
409  *        page table base address, which is 4KiB aligned.
410  *
411  *    * - 3..2
412  *      - *(reserved)*
413  *
414  *    * - 1
415  *      - **Pending:** When valid bit is not set, indicates that a valid
416  *        entry is pending and the MMU should wait for the driver to map
417  *        the entry. This is used to support page demand mapping of
418  *        memory.
419  *
420  *    * - 0
421  *      - **Valid:** Indicates that the entry contains a valid L1 page
422  *        table. If the valid bit is not set, then an attempted use of
423  *        the page would result in a page fault.
424  */
425 struct pvr_page_table_l2_entry_raw {
426 	u32 val;
427 } __packed;
428 static_assert(sizeof(struct pvr_page_table_l2_entry_raw) * 8 ==
429 	      ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE);
430 
431 static bool
432 pvr_page_table_l2_entry_raw_is_valid(struct pvr_page_table_l2_entry_raw entry)
433 {
434 	return PVR_PAGE_TABLE_FIELD_GET(2, PC, VALID, entry);
435 }
436 
437 /**
438  * pvr_page_table_l2_entry_raw_set() - Write a valid entry into a raw level 2
439  *                                     page table.
440  * @entry: Target raw level 2 page table entry.
441  * @child_table_dma_addr: DMA address of the level 1 page table to be
442  *                        associated with @entry.
443  *
444  * When calling this function, @child_table_dma_addr must be a valid DMA
445  * address and a multiple of %ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE.
446  */
447 static void
448 pvr_page_table_l2_entry_raw_set(struct pvr_page_table_l2_entry_raw *entry,
449 				dma_addr_t child_table_dma_addr)
450 {
451 	child_table_dma_addr >>= ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT;
452 
453 	WRITE_ONCE(entry->val,
454 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, VALID, true) |
455 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, ENTRY_PENDING, false) |
456 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, PD_BASE, child_table_dma_addr));
457 }
458 
459 static void
460 pvr_page_table_l2_entry_raw_clear(struct pvr_page_table_l2_entry_raw *entry)
461 {
462 	WRITE_ONCE(entry->val, 0);
463 }
464 
465 /**
466  * struct pvr_page_table_l1_entry_raw - A single entry in a level 1 page table.
467  * @val: The raw value of this entry.
468  *
469  * This type is a structure for type-checking purposes. At compile-time, its
470  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE.
471  *
472  * The value stored in this structure can be decoded using the following bitmap:
473  *
474  * .. flat-table::
475  *    :widths: 1 5
476  *    :stub-columns: 1
477  *
478  *    * - 63..41
479  *      - *(reserved)*
480  *
481  *    * - 40
482  *      - **Pending:** When valid bit is not set, indicates that a valid entry
483  *        is pending and the MMU should wait for the driver to map the entry.
484  *        This is used to support page demand mapping of memory.
485  *
486  *    * - 39..5
487  *      - **Level 0 Page Table Base Address:** The way this value is
488  *        interpreted depends on the page size. Bits not specified in the
489  *        table below (e.g. bits 11..5 for page size 4KiB) should be
490  *        considered reserved.
491  *
492  *        This table shows the bits used in an L1 page table entry to
493  *        represent the Physical Table Base Address for a given Page Size.
494  *        Since each L1 page table entry covers 2MiB of address space, the
495  *        maximum page size is 2MiB.
496  *
497  *        .. flat-table::
498  *           :widths: 1 1 1 1
499  *           :header-rows: 1
500  *           :stub-columns: 1
501  *
502  *           * - Page size
503  *             - L0 page table base address bits
504  *             - Number of L0 page table entries
505  *             - Size of L0 page table
506  *
507  *           * - 4KiB
508  *             - 39..12
509  *             - 512
510  *             - 4KiB
511  *
512  *           * - 16KiB
513  *             - 39..10
514  *             - 128
515  *             - 1KiB
516  *
517  *           * - 64KiB
518  *             - 39..8
519  *             - 32
520  *             - 256B
521  *
522  *           * - 256KiB
523  *             - 39..6
524  *             - 8
525  *             - 64B
526  *
527  *           * - 1MiB
528  *             - 39..5 (4 = '0')
529  *             - 2
530  *             - 16B
531  *
532  *           * - 2MiB
533  *             - 39..5 (4..3 = '00')
534  *             - 1
535  *             - 8B
536  *
537  *    * - 4
538  *      - *(reserved)*
539  *
540  *    * - 3..1
541  *      - **Page Size:** Sets the page size, from 4KiB to 2MiB.
542  *
543  *    * - 0
544  *      - **Valid:** Indicates that the entry contains a valid L0 page table.
545  *        If the valid bit is not set, then an attempted use of the page would
546  *        result in a page fault.
547  */
548 struct pvr_page_table_l1_entry_raw {
549 	u64 val;
550 } __packed;
551 static_assert(sizeof(struct pvr_page_table_l1_entry_raw) * 8 ==
552 	      ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE);
553 
554 static bool
555 pvr_page_table_l1_entry_raw_is_valid(struct pvr_page_table_l1_entry_raw entry)
556 {
557 	return PVR_PAGE_TABLE_FIELD_GET(1, PD, VALID, entry);
558 }
559 
560 /**
561  * pvr_page_table_l1_entry_raw_set() - Write a valid entry into a raw level 1
562  *                                     page table.
563  * @entry: Target raw level 1 page table entry.
564  * @child_table_dma_addr: DMA address of the level 0 page table to be
565  *                        associated with @entry.
566  *
567  * When calling this function, @child_table_dma_addr must be a valid DMA
568  * address and a multiple of 4 KiB.
569  */
570 static void
571 pvr_page_table_l1_entry_raw_set(struct pvr_page_table_l1_entry_raw *entry,
572 				dma_addr_t child_table_dma_addr)
573 {
574 	WRITE_ONCE(entry->val,
575 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, VALID, true) |
576 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, ENTRY_PENDING, false) |
577 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, PAGE_SIZE, ROGUE_MMUCTRL_PAGE_SIZE_X) |
578 		   /*
579 		    * The use of a 4K-specific macro here is correct. It is
580 		    * a future optimization to allocate sub-host-page-sized
581 		    * blocks for individual tables, so the condition that any
582 		    * page table address is aligned to the size of the
583 		    * largest (a 4KB) table currently holds.
584 		    */
585 		   (child_table_dma_addr & ~ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_CLRMSK));
586 }
587 
588 static void
589 pvr_page_table_l1_entry_raw_clear(struct pvr_page_table_l1_entry_raw *entry)
590 {
591 	WRITE_ONCE(entry->val, 0);
592 }
593 
594 /**
595  * struct pvr_page_table_l0_entry_raw - A single entry in a level 0 page table.
596  * @val: The raw value of this entry.
597  *
598  * This type is a structure for type-checking purposes. At compile-time, its
599  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE.
600  *
601  * The value stored in this structure can be decoded using the following bitmap:
602  *
603  * .. flat-table::
604  *    :widths: 1 5
605  *    :stub-columns: 1
606  *
607  *    * - 63
608  *      - *(reserved)*
609  *
610  *    * - 62
611  *      - **PM/FW Protect:** Indicates a protected region which only the
612  *        Parameter Manager (PM) or firmware processor can write to.
613  *
614  *    * - 61..40
615  *      - **VP Page (High):** Virtual-physical page used for Parameter Manager
616  *        (PM) memory. This field is only used if the additional level of PB
617  *        virtualization is enabled. The VP Page field is needed by the PM in
618  *        order to correctly reconstitute the free lists after render
619  *        completion. This (High) field holds bits 39..18 of the value; the
620  *        Low field holds bits 17..12. Bits 11..0 are always zero because the
621  *        value is always aligned to the 4KiB page size.
622  *
623  *    * - 39..12
624  *      - **Physical Page Address:** The way this value is interpreted depends
625  *        on the page size. Bits not specified in the table below (e.g. bits
626  *        20..12 for page size 2MiB) should be considered reserved.
627  *
628  *        This table shows the bits used in an L0 page table entry to represent
629  *        the Physical Page Address for a given page size (as defined in the
630  *        associated L1 page table entry).
631  *
632  *        .. flat-table::
633  *           :widths: 1 1
634  *           :header-rows: 1
635  *           :stub-columns: 1
636  *
637  *           * - Page size
638  *             - Physical address bits
639  *
640  *           * - 4KiB
641  *             - 39..12
642  *
643  *           * - 16KiB
644  *             - 39..14
645  *
646  *           * - 64KiB
647  *             - 39..16
648  *
649  *           * - 256KiB
650  *             - 39..18
651  *
652  *           * - 1MiB
653  *             - 39..20
654  *
655  *           * - 2MiB
656  *             - 39..21
657  *
658  *    * - 11..6
659  *      - **VP Page (Low):** Continuation of VP Page (High).
660  *
661  *    * - 5
662  *      - **Pending:** When valid bit is not set, indicates that a valid entry
663  *        is pending and the MMU should wait for the driver to map the entry.
664  *        This is used to support page demand mapping of memory.
665  *
666  *    * - 4
667  *      - **PM Src:** Set on Parameter Manager (PM) allocated page table
668  *        entries when indicated by the PM. Note that this bit will only be set
669  *        by the PM, not by the device driver.
670  *
671  *    * - 3
672  *      - **SLC Bypass Control:** Specifies requests to this page should bypass
673  *        the System Level Cache (SLC), if enabled in SLC configuration.
674  *
675  *    * - 2
676  *      - **Cache Coherency:** Indicates that the page is coherent (i.e. it
677  *        does not require a cache flush between operations on the CPU and the
678  *        device).
679  *
680  *    * - 1
681  *      - **Read Only:** If set, this bit indicates that the page is read only.
682  *        An attempted write to this page would result in a write-protection
683  *        fault.
684  *
685  *    * - 0
686  *      - **Valid:** Indicates that the entry contains a valid page. If the
687  *        valid bit is not set, then an attempted use of the page would result
688  *        in a page fault.
689  */
690 struct pvr_page_table_l0_entry_raw {
691 	u64 val;
692 } __packed;
693 static_assert(sizeof(struct pvr_page_table_l0_entry_raw) * 8 ==
694 	      ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE);
695 
696 /**
697  * struct pvr_page_flags_raw - The configurable flags from a single entry in a
698  *                             level 0 page table.
699  * @val: The raw value of these flags. Since these are a strict subset of
700  *       &struct pvr_page_table_l0_entry_raw; use that type for our member here.
701  *
702  * The flags stored in this type are: PM/FW Protect; SLC Bypass Control; Cache
703  * Coherency, and Read Only (bits 62, 3, 2 and 1 respectively).
704  *
705  * This type should never be instantiated directly; instead use
706  * pvr_page_flags_raw_create() to ensure only valid bits of @val are set.
707  */
708 struct pvr_page_flags_raw {
709 	struct pvr_page_table_l0_entry_raw val;
710 } __packed;
711 static_assert(sizeof(struct pvr_page_flags_raw) ==
712 	      sizeof(struct pvr_page_table_l0_entry_raw));
713 
714 static bool
715 pvr_page_table_l0_entry_raw_is_valid(struct pvr_page_table_l0_entry_raw entry)
716 {
717 	return PVR_PAGE_TABLE_FIELD_GET(0, PT, VALID, entry);
718 }
719 
720 /**
721  * pvr_page_table_l0_entry_raw_set() - Write a valid entry into a raw level 0
722  *                                     page table.
723  * @entry: Target raw level 0 page table entry.
724  * @dma_addr: DMA address of the physical page to be associated with @entry.
725  * @flags: Options to be set on @entry.
726  *
727  * When calling this function, @child_table_dma_addr must be a valid DMA
728  * address and a multiple of %PVR_DEVICE_PAGE_SIZE.
729  *
730  * The @flags parameter is directly assigned into @entry. It is the callers
731  * responsibility to ensure that only bits specified in
732  * &struct pvr_page_flags_raw are set in @flags.
733  */
734 static void
735 pvr_page_table_l0_entry_raw_set(struct pvr_page_table_l0_entry_raw *entry,
736 				dma_addr_t dma_addr,
737 				struct pvr_page_flags_raw flags)
738 {
739 	WRITE_ONCE(entry->val, PVR_PAGE_TABLE_FIELD_PREP(0, PT, VALID, true) |
740 			       PVR_PAGE_TABLE_FIELD_PREP(0, PT, ENTRY_PENDING, false) |
741 			       (dma_addr & ~ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK) |
742 			       flags.val.val);
743 }
744 
745 static void
746 pvr_page_table_l0_entry_raw_clear(struct pvr_page_table_l0_entry_raw *entry)
747 {
748 	WRITE_ONCE(entry->val, 0);
749 }
750 
751 /**
752  * pvr_page_flags_raw_create() - Initialize the flag bits of a raw level 0 page
753  *                               table entry.
754  * @read_only: This page is read-only (see: Read Only).
755  * @cache_coherent: This page does not require cache flushes (see: Cache
756  *                  Coherency).
757  * @slc_bypass: This page bypasses the device cache (see: SLC Bypass Control).
758  * @pm_fw_protect: This page is only for use by the firmware or Parameter
759  *                 Manager (see PM/FW Protect).
760  *
761  * For more details on the use of these four options, see their respective
762  * entries in the table under &struct pvr_page_table_l0_entry_raw.
763  *
764  * Return:
765  * A new &struct pvr_page_flags_raw instance which can be passed directly to
766  * pvr_page_table_l0_entry_raw_set() or pvr_page_table_l0_insert().
767  */
768 static struct pvr_page_flags_raw
769 pvr_page_flags_raw_create(bool read_only, bool cache_coherent, bool slc_bypass,
770 			  bool pm_fw_protect)
771 {
772 	struct pvr_page_flags_raw flags;
773 
774 	flags.val.val =
775 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, READ_ONLY, read_only) |
776 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, CC, cache_coherent) |
777 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, SLC_BYPASS_CTRL, slc_bypass) |
778 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, PM_META_PROTECT, pm_fw_protect);
779 
780 	return flags;
781 }
782 
783 /**
784  * struct pvr_page_table_l2_raw - The raw data of a level 2 page table.
785  *
786  * This type is a structure for type-checking purposes. At compile-time, its
787  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
788  */
789 struct pvr_page_table_l2_raw {
790 	/** @entries: The raw values of this table. */
791 	struct pvr_page_table_l2_entry_raw
792 		entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
793 } __packed;
794 static_assert(sizeof(struct pvr_page_table_l2_raw) == PVR_MMU_BACKING_PAGE_SIZE);
795 
796 /**
797  * struct pvr_page_table_l1_raw - The raw data of a level 1 page table.
798  *
799  * This type is a structure for type-checking purposes. At compile-time, its
800  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
801  */
802 struct pvr_page_table_l1_raw {
803 	/** @entries: The raw values of this table. */
804 	struct pvr_page_table_l1_entry_raw
805 		entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
806 } __packed;
807 static_assert(sizeof(struct pvr_page_table_l1_raw) == PVR_MMU_BACKING_PAGE_SIZE);
808 
809 /**
810  * struct pvr_page_table_l0_raw - The raw data of a level 0 page table.
811  *
812  * This type is a structure for type-checking purposes. At compile-time, its
813  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
814  *
815  * .. caution::
816  *
817  *    The size of level 0 page tables is variable depending on the page size
818  *    specified in the associated level 1 page table entry. Since the device
819  *    page size in use is pegged to the host page size, it cannot vary at
820  *    runtime. This structure is therefore only defined to contain the required
821  *    number of entries for the current device page size. **You should never
822  *    read or write beyond the last supported entry.**
823  */
824 struct pvr_page_table_l0_raw {
825 	/** @entries: The raw values of this table. */
826 	struct pvr_page_table_l0_entry_raw
827 		entries[ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X];
828 } __packed;
829 static_assert(sizeof(struct pvr_page_table_l0_raw) <= PVR_MMU_BACKING_PAGE_SIZE);
830 
831 /**
832  * DOC: Mirror page tables
833  */
834 
835 /*
836  * We pre-declare these types because they cross-depend on pointers to each
837  * other.
838  */
839 struct pvr_page_table_l1;
840 struct pvr_page_table_l0;
841 
842 /**
843  * struct pvr_page_table_l2 - A wrapped level 2 page table.
844  *
845  * To access the raw part of this table, use pvr_page_table_l2_get_raw().
846  * Alternatively to access a raw entry directly, use
847  * pvr_page_table_l2_get_entry_raw().
848  *
849  * A level 2 page table forms the root of the page table tree structure, so
850  * this type has no &parent or &parent_idx members.
851  */
852 struct pvr_page_table_l2 {
853 	/**
854 	 * @entries: The children of this node in the page table tree
855 	 * structure. These are also mirror tables. The indexing of this array
856 	 * is identical to that of the raw equivalent
857 	 * (&pvr_page_table_l1_raw.entries).
858 	 */
859 	struct pvr_page_table_l1 *entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
860 
861 	/**
862 	 * @backing_page: A handle to the memory which holds the raw
863 	 * equivalent of this table. **For internal use only.**
864 	 */
865 	struct pvr_mmu_backing_page backing_page;
866 
867 	/**
868 	 * @entry_count: The current number of valid entries (that we know of)
869 	 * in this table. This value is essentially a refcount - the table is
870 	 * destroyed when this value is decremented to zero by
871 	 * pvr_page_table_l2_remove().
872 	 */
873 	u16 entry_count;
874 };
875 
876 /**
877  * pvr_page_table_l2_init() - Initialize a level 2 page table.
878  * @table: Target level 2 page table.
879  * @pvr_dev: Target PowerVR device
880  *
881  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
882  * this function.
883  *
884  * Return:
885  *  * 0 on success, or
886  *  * Any error encountered while intializing &table->backing_page using
887  *    pvr_mmu_backing_page_init().
888  */
889 static int
890 pvr_page_table_l2_init(struct pvr_page_table_l2 *table,
891 		       struct pvr_device *pvr_dev)
892 {
893 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
894 }
895 
896 /**
897  * pvr_page_table_l2_fini() - Teardown a level 2 page table.
898  * @table: Target level 2 page table.
899  *
900  * It is an error to attempt to use @table after calling this function.
901  */
902 static void
903 pvr_page_table_l2_fini(struct pvr_page_table_l2 *table)
904 {
905 	pvr_mmu_backing_page_fini(&table->backing_page);
906 }
907 
908 /**
909  * pvr_page_table_l2_sync() - Flush a level 2 page table from the CPU to the
910  *                            device.
911  * @table: Target level 2 page table.
912  *
913  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
914  * warning there applies here too: **Only call pvr_page_table_l2_sync() once
915  * you're sure you have no more changes to make to** @table **in the immediate
916  * future.**
917  *
918  * If child level 1 page tables of @table also need to be flushed, this should
919  * be done first using pvr_page_table_l1_sync() *before* calling this function.
920  */
921 static void
922 pvr_page_table_l2_sync(struct pvr_page_table_l2 *table)
923 {
924 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_2_FLAGS);
925 }
926 
927 /**
928  * pvr_page_table_l2_get_raw() - Access the raw equivalent of a mirror level 2
929  *                               page table.
930  * @table: Target level 2 page table.
931  *
932  * Essentially returns the CPU address of the raw equivalent of @table, cast to
933  * a &struct pvr_page_table_l2_raw pointer.
934  *
935  * You probably want to call pvr_page_table_l2_get_entry_raw() instead.
936  *
937  * Return:
938  * The raw equivalent of @table.
939  */
940 static struct pvr_page_table_l2_raw *
941 pvr_page_table_l2_get_raw(struct pvr_page_table_l2 *table)
942 {
943 	return table->backing_page.host_ptr;
944 }
945 
946 /**
947  * pvr_page_table_l2_get_entry_raw() - Access an entry from the raw equivalent
948  *                                     of a mirror level 2 page table.
949  * @table: Target level 2 page table.
950  * @idx: Index of the entry to access.
951  *
952  * Technically this function returns a pointer to a slot in a raw level 2 page
953  * table, since the returned "entry" is not guaranteed to be valid. The caller
954  * must verify the validity of the entry at the returned address (perhaps using
955  * pvr_page_table_l2_entry_raw_is_valid()) before reading or overwriting it.
956  *
957  * The value of @idx is not checked here; it is the callers responsibility to
958  * ensure @idx refers to a valid index within @table before dereferencing the
959  * returned pointer.
960  *
961  * Return:
962  * A pointer to the requested raw level 2 page table entry.
963  */
964 static struct pvr_page_table_l2_entry_raw *
965 pvr_page_table_l2_get_entry_raw(struct pvr_page_table_l2 *table, u16 idx)
966 {
967 	return &pvr_page_table_l2_get_raw(table)->entries[idx];
968 }
969 
970 /**
971  * pvr_page_table_l2_entry_is_valid() - Check if a level 2 page table entry is
972  *                                      marked as valid.
973  * @table: Target level 2 page table.
974  * @idx: Index of the entry to check.
975  *
976  * The value of @idx is not checked here; it is the callers responsibility to
977  * ensure @idx refers to a valid index within @table before calling this
978  * function.
979  */
980 static bool
981 pvr_page_table_l2_entry_is_valid(struct pvr_page_table_l2 *table, u16 idx)
982 {
983 	struct pvr_page_table_l2_entry_raw entry_raw =
984 		*pvr_page_table_l2_get_entry_raw(table, idx);
985 
986 	return pvr_page_table_l2_entry_raw_is_valid(entry_raw);
987 }
988 
989 /**
990  * struct pvr_page_table_l1 - A wrapped level 1 page table.
991  *
992  * To access the raw part of this table, use pvr_page_table_l1_get_raw().
993  * Alternatively to access a raw entry directly, use
994  * pvr_page_table_l1_get_entry_raw().
995  */
996 struct pvr_page_table_l1 {
997 	/**
998 	 * @entries: The children of this node in the page table tree
999 	 * structure. These are also mirror tables. The indexing of this array
1000 	 * is identical to that of the raw equivalent
1001 	 * (&pvr_page_table_l0_raw.entries).
1002 	 */
1003 	struct pvr_page_table_l0 *entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
1004 
1005 	/**
1006 	 * @backing_page: A handle to the memory which holds the raw
1007 	 * equivalent of this table. **For internal use only.**
1008 	 */
1009 	struct pvr_mmu_backing_page backing_page;
1010 
1011 	union {
1012 		/**
1013 		 * @parent: The parent of this node in the page table tree structure.
1014 		 *
1015 		 * This is also a mirror table.
1016 		 *
1017 		 * Only valid when the L1 page table is active. When the L1 page table
1018 		 * has been removed and queued for destruction, the next_free field
1019 		 * should be used instead.
1020 		 */
1021 		struct pvr_page_table_l2 *parent;
1022 
1023 		/**
1024 		 * @next_free: Pointer to the next L1 page table to take/free.
1025 		 *
1026 		 * Used to form a linked list of L1 page tables. This is used
1027 		 * when preallocating tables and when the page table has been
1028 		 * removed and queued for destruction.
1029 		 */
1030 		struct pvr_page_table_l1 *next_free;
1031 	};
1032 
1033 	/**
1034 	 * @parent_idx: The index of the entry in the parent table (see
1035 	 * @parent) which corresponds to this table.
1036 	 */
1037 	u16 parent_idx;
1038 
1039 	/**
1040 	 * @entry_count: The current number of valid entries (that we know of)
1041 	 * in this table. This value is essentially a refcount - the table is
1042 	 * destroyed when this value is decremented to zero by
1043 	 * pvr_page_table_l1_remove().
1044 	 */
1045 	u16 entry_count;
1046 };
1047 
1048 /**
1049  * pvr_page_table_l1_init() - Initialize a level 1 page table.
1050  * @table: Target level 1 page table.
1051  * @pvr_dev: Target PowerVR device
1052  *
1053  * When this function returns successfully, @table is still not considered
1054  * valid. It must be inserted into the page table tree structure with
1055  * pvr_page_table_l2_insert() before it is ready for use.
1056  *
1057  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
1058  * this function.
1059  *
1060  * Return:
1061  *  * 0 on success, or
1062  *  * Any error encountered while intializing &table->backing_page using
1063  *    pvr_mmu_backing_page_init().
1064  */
1065 static int
1066 pvr_page_table_l1_init(struct pvr_page_table_l1 *table,
1067 		       struct pvr_device *pvr_dev)
1068 {
1069 	table->parent_idx = PVR_IDX_INVALID;
1070 
1071 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
1072 }
1073 
1074 /**
1075  * pvr_page_table_l1_free() - Teardown a level 1 page table.
1076  * @table: Target level 1 page table.
1077  *
1078  * It is an error to attempt to use @table after calling this function, even
1079  * indirectly. This includes calling pvr_page_table_l2_remove(), which must
1080  * be called *before* pvr_page_table_l1_free().
1081  */
1082 static void
1083 pvr_page_table_l1_free(struct pvr_page_table_l1 *table)
1084 {
1085 	pvr_mmu_backing_page_fini(&table->backing_page);
1086 	kfree(table);
1087 }
1088 
1089 /**
1090  * pvr_page_table_l1_sync() - Flush a level 1 page table from the CPU to the
1091  *                            device.
1092  * @table: Target level 1 page table.
1093  *
1094  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
1095  * warning there applies here too: **Only call pvr_page_table_l1_sync() once
1096  * you're sure you have no more changes to make to** @table **in the immediate
1097  * future.**
1098  *
1099  * If child level 0 page tables of @table also need to be flushed, this should
1100  * be done first using pvr_page_table_l0_sync() *before* calling this function.
1101  */
1102 static void
1103 pvr_page_table_l1_sync(struct pvr_page_table_l1 *table)
1104 {
1105 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_1_FLAGS);
1106 }
1107 
1108 /**
1109  * pvr_page_table_l1_get_raw() - Access the raw equivalent of a mirror level 1
1110  *                               page table.
1111  * @table: Target level 1 page table.
1112  *
1113  * Essentially returns the CPU address of the raw equivalent of @table, cast to
1114  * a &struct pvr_page_table_l1_raw pointer.
1115  *
1116  * You probably want to call pvr_page_table_l1_get_entry_raw() instead.
1117  *
1118  * Return:
1119  * The raw equivalent of @table.
1120  */
1121 static struct pvr_page_table_l1_raw *
1122 pvr_page_table_l1_get_raw(struct pvr_page_table_l1 *table)
1123 {
1124 	return table->backing_page.host_ptr;
1125 }
1126 
1127 /**
1128  * pvr_page_table_l1_get_entry_raw() - Access an entry from the raw equivalent
1129  *                                     of a mirror level 1 page table.
1130  * @table: Target level 1 page table.
1131  * @idx: Index of the entry to access.
1132  *
1133  * Technically this function returns a pointer to a slot in a raw level 1 page
1134  * table, since the returned "entry" is not guaranteed to be valid. The caller
1135  * must verify the validity of the entry at the returned address (perhaps using
1136  * pvr_page_table_l1_entry_raw_is_valid()) before reading or overwriting it.
1137  *
1138  * The value of @idx is not checked here; it is the callers responsibility to
1139  * ensure @idx refers to a valid index within @table before dereferencing the
1140  * returned pointer.
1141  *
1142  * Return:
1143  * A pointer to the requested raw level 1 page table entry.
1144  */
1145 static struct pvr_page_table_l1_entry_raw *
1146 pvr_page_table_l1_get_entry_raw(struct pvr_page_table_l1 *table, u16 idx)
1147 {
1148 	return &pvr_page_table_l1_get_raw(table)->entries[idx];
1149 }
1150 
1151 /**
1152  * pvr_page_table_l1_entry_is_valid() - Check if a level 1 page table entry is
1153  *                                      marked as valid.
1154  * @table: Target level 1 page table.
1155  * @idx: Index of the entry to check.
1156  *
1157  * The value of @idx is not checked here; it is the callers responsibility to
1158  * ensure @idx refers to a valid index within @table before calling this
1159  * function.
1160  */
1161 static bool
1162 pvr_page_table_l1_entry_is_valid(struct pvr_page_table_l1 *table, u16 idx)
1163 {
1164 	struct pvr_page_table_l1_entry_raw entry_raw =
1165 		*pvr_page_table_l1_get_entry_raw(table, idx);
1166 
1167 	return pvr_page_table_l1_entry_raw_is_valid(entry_raw);
1168 }
1169 
1170 /**
1171  * struct pvr_page_table_l0 - A wrapped level 0 page table.
1172  *
1173  * To access the raw part of this table, use pvr_page_table_l0_get_raw().
1174  * Alternatively to access a raw entry directly, use
1175  * pvr_page_table_l0_get_entry_raw().
1176  *
1177  * There is no mirror representation of an individual page, so this type has no
1178  * &entries member.
1179  */
1180 struct pvr_page_table_l0 {
1181 	/**
1182 	 * @backing_page: A handle to the memory which holds the raw
1183 	 * equivalent of this table. **For internal use only.**
1184 	 */
1185 	struct pvr_mmu_backing_page backing_page;
1186 
1187 	union {
1188 		/**
1189 		 * @parent: The parent of this node in the page table tree structure.
1190 		 *
1191 		 * This is also a mirror table.
1192 		 *
1193 		 * Only valid when the L0 page table is active. When the L0 page table
1194 		 * has been removed and queued for destruction, the next_free field
1195 		 * should be used instead.
1196 		 */
1197 		struct pvr_page_table_l1 *parent;
1198 
1199 		/**
1200 		 * @next_free: Pointer to the next L0 page table to take/free.
1201 		 *
1202 		 * Used to form a linked list of L0 page tables. This is used
1203 		 * when preallocating tables and when the page table has been
1204 		 * removed and queued for destruction.
1205 		 */
1206 		struct pvr_page_table_l0 *next_free;
1207 	};
1208 
1209 	/**
1210 	 * @parent_idx: The index of the entry in the parent table (see
1211 	 * @parent) which corresponds to this table.
1212 	 */
1213 	u16 parent_idx;
1214 
1215 	/**
1216 	 * @entry_count: The current number of valid entries (that we know of)
1217 	 * in this table. This value is essentially a refcount - the table is
1218 	 * destroyed when this value is decremented to zero by
1219 	 * pvr_page_table_l0_remove().
1220 	 */
1221 	u16 entry_count;
1222 };
1223 
1224 /**
1225  * pvr_page_table_l0_init() - Initialize a level 0 page table.
1226  * @table: Target level 0 page table.
1227  * @pvr_dev: Target PowerVR device
1228  *
1229  * When this function returns successfully, @table is still not considered
1230  * valid. It must be inserted into the page table tree structure with
1231  * pvr_page_table_l1_insert() before it is ready for use.
1232  *
1233  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
1234  * this function.
1235  *
1236  * Return:
1237  *  * 0 on success, or
1238  *  * Any error encountered while intializing &table->backing_page using
1239  *    pvr_mmu_backing_page_init().
1240  */
1241 static int
1242 pvr_page_table_l0_init(struct pvr_page_table_l0 *table,
1243 		       struct pvr_device *pvr_dev)
1244 {
1245 	table->parent_idx = PVR_IDX_INVALID;
1246 
1247 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
1248 }
1249 
1250 /**
1251  * pvr_page_table_l0_free() - Teardown a level 0 page table.
1252  * @table: Target level 0 page table.
1253  *
1254  * It is an error to attempt to use @table after calling this function, even
1255  * indirectly. This includes calling pvr_page_table_l1_remove(), which must
1256  * be called *before* pvr_page_table_l0_free().
1257  */
1258 static void
1259 pvr_page_table_l0_free(struct pvr_page_table_l0 *table)
1260 {
1261 	pvr_mmu_backing_page_fini(&table->backing_page);
1262 	kfree(table);
1263 }
1264 
1265 /**
1266  * pvr_page_table_l0_sync() - Flush a level 0 page table from the CPU to the
1267  *                            device.
1268  * @table: Target level 0 page table.
1269  *
1270  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
1271  * warning there applies here too: **Only call pvr_page_table_l0_sync() once
1272  * you're sure you have no more changes to make to** @table **in the immediate
1273  * future.**
1274  *
1275  * If child pages of @table also need to be flushed, this should be done first
1276  * using a DMA sync function (e.g. dma_sync_sg_for_device()) *before* calling
1277  * this function.
1278  */
1279 static void
1280 pvr_page_table_l0_sync(struct pvr_page_table_l0 *table)
1281 {
1282 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_0_FLAGS);
1283 }
1284 
1285 /**
1286  * pvr_page_table_l0_get_raw() - Access the raw equivalent of a mirror level 0
1287  *                               page table.
1288  * @table: Target level 0 page table.
1289  *
1290  * Essentially returns the CPU address of the raw equivalent of @table, cast to
1291  * a &struct pvr_page_table_l0_raw pointer.
1292  *
1293  * You probably want to call pvr_page_table_l0_get_entry_raw() instead.
1294  *
1295  * Return:
1296  * The raw equivalent of @table.
1297  */
1298 static struct pvr_page_table_l0_raw *
1299 pvr_page_table_l0_get_raw(struct pvr_page_table_l0 *table)
1300 {
1301 	return table->backing_page.host_ptr;
1302 }
1303 
1304 /**
1305  * pvr_page_table_l0_get_entry_raw() - Access an entry from the raw equivalent
1306  *                                     of a mirror level 0 page table.
1307  * @table: Target level 0 page table.
1308  * @idx: Index of the entry to access.
1309  *
1310  * Technically this function returns a pointer to a slot in a raw level 0 page
1311  * table, since the returned "entry" is not guaranteed to be valid. The caller
1312  * must verify the validity of the entry at the returned address (perhaps using
1313  * pvr_page_table_l0_entry_raw_is_valid()) before reading or overwriting it.
1314  *
1315  * The value of @idx is not checked here; it is the callers responsibility to
1316  * ensure @idx refers to a valid index within @table before dereferencing the
1317  * returned pointer. This is espcially important for level 0 page tables, which
1318  * can have a variable number of entries.
1319  *
1320  * Return:
1321  * A pointer to the requested raw level 0 page table entry.
1322  */
1323 static struct pvr_page_table_l0_entry_raw *
1324 pvr_page_table_l0_get_entry_raw(struct pvr_page_table_l0 *table, u16 idx)
1325 {
1326 	return &pvr_page_table_l0_get_raw(table)->entries[idx];
1327 }
1328 
1329 /**
1330  * pvr_page_table_l0_entry_is_valid() - Check if a level 0 page table entry is
1331  *                                      marked as valid.
1332  * @table: Target level 0 page table.
1333  * @idx: Index of the entry to check.
1334  *
1335  * The value of @idx is not checked here; it is the callers responsibility to
1336  * ensure @idx refers to a valid index within @table before calling this
1337  * function.
1338  */
1339 static bool
1340 pvr_page_table_l0_entry_is_valid(struct pvr_page_table_l0 *table, u16 idx)
1341 {
1342 	struct pvr_page_table_l0_entry_raw entry_raw =
1343 		*pvr_page_table_l0_get_entry_raw(table, idx);
1344 
1345 	return pvr_page_table_l0_entry_raw_is_valid(entry_raw);
1346 }
1347 
1348 /**
1349  * struct pvr_mmu_context - context holding data for operations at page
1350  * catalogue level, intended for use with a VM context.
1351  */
1352 struct pvr_mmu_context {
1353 	/** @pvr_dev: The PVR device associated with the owning VM context. */
1354 	struct pvr_device *pvr_dev;
1355 
1356 	/** @page_table_l2: The MMU table root. */
1357 	struct pvr_page_table_l2 page_table_l2;
1358 };
1359 
1360 /**
1361  * struct pvr_page_table_ptr - A reference to a single physical page as indexed
1362  * by the page table structure.
1363  *
1364  * Intended for embedding in a &struct pvr_mmu_op_context.
1365  */
1366 struct pvr_page_table_ptr {
1367 	/**
1368 	 * @l1_table: A cached handle to the level 1 page table the
1369 	 * context is currently traversing.
1370 	 */
1371 	struct pvr_page_table_l1 *l1_table;
1372 
1373 	/**
1374 	 * @l0_table: A cached handle to the level 0 page table the
1375 	 * context is currently traversing.
1376 	 */
1377 	struct pvr_page_table_l0 *l0_table;
1378 
1379 	/**
1380 	 * @l2_idx: Index into the level 2 page table the context is
1381 	 * currently referencing.
1382 	 */
1383 	u16 l2_idx;
1384 
1385 	/**
1386 	 * @l1_idx: Index into the level 1 page table the context is
1387 	 * currently referencing.
1388 	 */
1389 	u16 l1_idx;
1390 
1391 	/**
1392 	 * @l0_idx: Index into the level 0 page table the context is
1393 	 * currently referencing.
1394 	 */
1395 	u16 l0_idx;
1396 };
1397 
1398 /**
1399  * struct pvr_mmu_op_context - context holding data for individual
1400  * device-virtual mapping operations. Intended for use with a VM bind operation.
1401  */
1402 struct pvr_mmu_op_context {
1403 	/** @mmu_ctx: The MMU context associated with the owning VM context. */
1404 	struct pvr_mmu_context *mmu_ctx;
1405 
1406 	/** @map: Data specifically for map operations. */
1407 	struct {
1408 		/**
1409 		 * @sgt: Scatter gather table containing pages pinned for use by
1410 		 * this context - these are currently pinned when initialising
1411 		 * the VM bind operation.
1412 		 */
1413 		struct sg_table *sgt;
1414 
1415 		/** @sgt_offset: Start address of the device-virtual mapping. */
1416 		u64 sgt_offset;
1417 
1418 		/**
1419 		 * @l1_prealloc_tables: Preallocated l1 page table objects
1420 		 * use by this context when creating a page mapping. Linked list
1421 		 * fully created during initialisation.
1422 		 */
1423 		struct pvr_page_table_l1 *l1_prealloc_tables;
1424 
1425 		/**
1426 		 * @l0_prealloc_tables: Preallocated l0 page table objects
1427 		 * use by this context when creating a page mapping. Linked list
1428 		 * fully created during initialisation.
1429 		 */
1430 		struct pvr_page_table_l0 *l0_prealloc_tables;
1431 	} map;
1432 
1433 	/** @unmap: Data specifically for unmap operations. */
1434 	struct {
1435 		/**
1436 		 * @l1_free_tables: Collects page table objects freed by unmap
1437 		 * ops. Linked list empty at creation.
1438 		 */
1439 		struct pvr_page_table_l1 *l1_free_tables;
1440 
1441 		/**
1442 		 * @l0_free_tables: Collects page table objects freed by unmap
1443 		 * ops. Linked list empty at creation.
1444 		 */
1445 		struct pvr_page_table_l0 *l0_free_tables;
1446 	} unmap;
1447 
1448 	/**
1449 	 * @curr_page: A reference to a single physical page as indexed by the
1450 	 * page table structure.
1451 	 */
1452 	struct pvr_page_table_ptr curr_page;
1453 
1454 	/**
1455 	 * @sync_level_required: The maximum level of the page table tree
1456 	 * structure which has (possibly) been modified since it was last
1457 	 * flushed to the device.
1458 	 *
1459 	 * This field should only be set with pvr_mmu_op_context_require_sync()
1460 	 * or indirectly by pvr_mmu_op_context_sync_partial().
1461 	 */
1462 	enum pvr_mmu_sync_level sync_level_required;
1463 };
1464 
1465 /**
1466  * pvr_page_table_l2_insert() - Insert an entry referring to a level 1 page
1467  * table into a level 2 page table.
1468  * @op_ctx: Target MMU op context pointing at the entry to insert the L1 page
1469  * table into.
1470  * @child_table: Target level 1 page table to be referenced by the new entry.
1471  *
1472  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1473  * valid L2 entry.
1474  *
1475  * It is the caller's responsibility to execute any memory barries to ensure
1476  * that the creation of @child_table is ordered before the L2 entry is inserted.
1477  */
1478 static void
1479 pvr_page_table_l2_insert(struct pvr_mmu_op_context *op_ctx,
1480 			 struct pvr_page_table_l1 *child_table)
1481 {
1482 	struct pvr_page_table_l2 *l2_table =
1483 		&op_ctx->mmu_ctx->page_table_l2;
1484 	struct pvr_page_table_l2_entry_raw *entry_raw =
1485 		pvr_page_table_l2_get_entry_raw(l2_table,
1486 						op_ctx->curr_page.l2_idx);
1487 
1488 	pvr_page_table_l2_entry_raw_set(entry_raw,
1489 					child_table->backing_page.dma_addr);
1490 
1491 	child_table->parent = l2_table;
1492 	child_table->parent_idx = op_ctx->curr_page.l2_idx;
1493 	l2_table->entries[op_ctx->curr_page.l2_idx] = child_table;
1494 	++l2_table->entry_count;
1495 	op_ctx->curr_page.l1_table = child_table;
1496 }
1497 
1498 /**
1499  * pvr_page_table_l2_remove() - Remove a level 1 page table from a level 2 page
1500  * table.
1501  * @op_ctx: Target MMU op context pointing at the L2 entry to remove.
1502  *
1503  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1504  * valid L2 entry.
1505  */
1506 static void
1507 pvr_page_table_l2_remove(struct pvr_mmu_op_context *op_ctx)
1508 {
1509 	struct pvr_page_table_l2 *l2_table =
1510 		&op_ctx->mmu_ctx->page_table_l2;
1511 	struct pvr_page_table_l2_entry_raw *entry_raw =
1512 		pvr_page_table_l2_get_entry_raw(l2_table,
1513 						op_ctx->curr_page.l1_table->parent_idx);
1514 
1515 	WARN_ON(op_ctx->curr_page.l1_table->parent != l2_table);
1516 
1517 	pvr_page_table_l2_entry_raw_clear(entry_raw);
1518 
1519 	l2_table->entries[op_ctx->curr_page.l1_table->parent_idx] = NULL;
1520 	op_ctx->curr_page.l1_table->parent_idx = PVR_IDX_INVALID;
1521 	op_ctx->curr_page.l1_table->next_free = op_ctx->unmap.l1_free_tables;
1522 	op_ctx->unmap.l1_free_tables = op_ctx->curr_page.l1_table;
1523 	op_ctx->curr_page.l1_table = NULL;
1524 
1525 	--l2_table->entry_count;
1526 }
1527 
1528 /**
1529  * pvr_page_table_l1_insert() - Insert an entry referring to a level 0 page
1530  * table into a level 1 page table.
1531  * @op_ctx: Target MMU op context pointing at the entry to insert the L0 page
1532  * table into.
1533  * @child_table: L0 page table to insert.
1534  *
1535  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1536  * valid L1 entry.
1537  *
1538  * It is the caller's responsibility to execute any memory barries to ensure
1539  * that the creation of @child_table is ordered before the L1 entry is inserted.
1540  */
1541 static void
1542 pvr_page_table_l1_insert(struct pvr_mmu_op_context *op_ctx,
1543 			 struct pvr_page_table_l0 *child_table)
1544 {
1545 	struct pvr_page_table_l1_entry_raw *entry_raw =
1546 		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l1_table,
1547 						op_ctx->curr_page.l1_idx);
1548 
1549 	pvr_page_table_l1_entry_raw_set(entry_raw,
1550 					child_table->backing_page.dma_addr);
1551 
1552 	child_table->parent = op_ctx->curr_page.l1_table;
1553 	child_table->parent_idx = op_ctx->curr_page.l1_idx;
1554 	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx] = child_table;
1555 	++op_ctx->curr_page.l1_table->entry_count;
1556 	op_ctx->curr_page.l0_table = child_table;
1557 }
1558 
1559 /**
1560  * pvr_page_table_l1_remove() - Remove a level 0 page table from a level 1 page
1561  *                              table.
1562  * @op_ctx: Target MMU op context pointing at the L1 entry to remove.
1563  *
1564  * If this function results in the L1 table becoming empty, it will be removed
1565  * from its parent level 2 page table and destroyed.
1566  *
1567  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1568  * valid L1 entry.
1569  */
1570 static void
1571 pvr_page_table_l1_remove(struct pvr_mmu_op_context *op_ctx)
1572 {
1573 	struct pvr_page_table_l1_entry_raw *entry_raw =
1574 		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l0_table->parent,
1575 						op_ctx->curr_page.l0_table->parent_idx);
1576 
1577 	WARN_ON(op_ctx->curr_page.l0_table->parent !=
1578 		op_ctx->curr_page.l1_table);
1579 
1580 	pvr_page_table_l1_entry_raw_clear(entry_raw);
1581 
1582 	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l0_table->parent_idx] = NULL;
1583 	op_ctx->curr_page.l0_table->parent_idx = PVR_IDX_INVALID;
1584 	op_ctx->curr_page.l0_table->next_free = op_ctx->unmap.l0_free_tables;
1585 	op_ctx->unmap.l0_free_tables = op_ctx->curr_page.l0_table;
1586 	op_ctx->curr_page.l0_table = NULL;
1587 
1588 	if (--op_ctx->curr_page.l1_table->entry_count == 0) {
1589 		/* Clear the parent L2 page table entry. */
1590 		if (op_ctx->curr_page.l1_table->parent_idx != PVR_IDX_INVALID)
1591 			pvr_page_table_l2_remove(op_ctx);
1592 	}
1593 }
1594 
1595 /**
1596  * pvr_page_table_l0_insert() - Insert an entry referring to a physical page
1597  * into a level 0 page table.
1598  * @op_ctx: Target MMU op context pointing at the L0 entry to insert.
1599  * @dma_addr: Target DMA address to be referenced by the new entry.
1600  * @flags: Page options to be stored in the new entry.
1601  *
1602  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1603  * valid L0 entry.
1604  */
1605 static void
1606 pvr_page_table_l0_insert(struct pvr_mmu_op_context *op_ctx,
1607 			 dma_addr_t dma_addr, struct pvr_page_flags_raw flags)
1608 {
1609 	struct pvr_page_table_l0_entry_raw *entry_raw =
1610 		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
1611 						op_ctx->curr_page.l0_idx);
1612 
1613 	pvr_page_table_l0_entry_raw_set(entry_raw, dma_addr, flags);
1614 
1615 	/*
1616 	 * There is no entry to set here - we don't keep a mirror of
1617 	 * individual pages.
1618 	 */
1619 
1620 	++op_ctx->curr_page.l0_table->entry_count;
1621 }
1622 
1623 /**
1624  * pvr_page_table_l0_remove() - Remove a physical page from a level 0 page
1625  * table.
1626  * @op_ctx: Target MMU op context pointing at the L0 entry to remove.
1627  *
1628  * If this function results in the L0 table becoming empty, it will be removed
1629  * from its parent L1 page table and destroyed.
1630  *
1631  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1632  * valid L0 entry.
1633  */
1634 static void
1635 pvr_page_table_l0_remove(struct pvr_mmu_op_context *op_ctx)
1636 {
1637 	struct pvr_page_table_l0_entry_raw *entry_raw =
1638 		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
1639 						op_ctx->curr_page.l0_idx);
1640 
1641 	pvr_page_table_l0_entry_raw_clear(entry_raw);
1642 
1643 	/*
1644 	 * There is no entry to clear here - we don't keep a mirror of
1645 	 * individual pages.
1646 	 */
1647 
1648 	if (--op_ctx->curr_page.l0_table->entry_count == 0) {
1649 		/* Clear the parent L1 page table entry. */
1650 		if (op_ctx->curr_page.l0_table->parent_idx != PVR_IDX_INVALID)
1651 			pvr_page_table_l1_remove(op_ctx);
1652 	}
1653 }
1654 
1655 /**
1656  * DOC: Page table index utilities
1657  */
1658 
1659 /**
1660  * pvr_page_table_l2_idx() - Calculate the level 2 page table index for a
1661  *                           device-virtual address.
1662  * @device_addr: Target device-virtual address.
1663  *
1664  * This function does not perform any bounds checking - it is the caller's
1665  * responsibility to ensure that @device_addr is valid before interpreting
1666  * the result.
1667  *
1668  * Return:
1669  * The index into a level 2 page table corresponding to @device_addr.
1670  */
1671 static u16
1672 pvr_page_table_l2_idx(u64 device_addr)
1673 {
1674 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PC_INDEX_CLRMSK) >>
1675 	       ROGUE_MMUCTRL_VADDR_PC_INDEX_SHIFT;
1676 }
1677 
1678 /**
1679  * pvr_page_table_l1_idx() - Calculate the level 1 page table index for a
1680  *                           device-virtual address.
1681  * @device_addr: Target device-virtual address.
1682  *
1683  * This function does not perform any bounds checking - it is the caller's
1684  * responsibility to ensure that @device_addr is valid before interpreting
1685  * the result.
1686  *
1687  * Return:
1688  * The index into a level 1 page table corresponding to @device_addr.
1689  */
1690 static u16
1691 pvr_page_table_l1_idx(u64 device_addr)
1692 {
1693 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PD_INDEX_CLRMSK) >>
1694 	       ROGUE_MMUCTRL_VADDR_PD_INDEX_SHIFT;
1695 }
1696 
1697 /**
1698  * pvr_page_table_l0_idx() - Calculate the level 0 page table index for a
1699  *                           device-virtual address.
1700  * @device_addr: Target device-virtual address.
1701  *
1702  * This function does not perform any bounds checking - it is the caller's
1703  * responsibility to ensure that @device_addr is valid before interpreting
1704  * the result.
1705  *
1706  * Return:
1707  * The index into a level 0 page table corresponding to @device_addr.
1708  */
1709 static u16
1710 pvr_page_table_l0_idx(u64 device_addr)
1711 {
1712 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PT_INDEX_CLRMSK) >>
1713 	       ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT;
1714 }
1715 
1716 /**
1717  * DOC: High-level page table operations
1718  */
1719 
1720 /**
1721  * pvr_page_table_l1_get_or_insert() - Retrieves (optionally inserting if
1722  * necessary) a level 1 page table from the specified level 2 page table entry.
1723  * @op_ctx: Target MMU op context.
1724  * @should_insert: [IN] Specifies whether new page tables should be inserted
1725  * when empty page table entries are encountered during traversal.
1726  *
1727  * Return:
1728  *  * 0 on success, or
1729  *
1730  *    If @should_insert is %false:
1731  *     * -%ENXIO if a level 1 page table would have been inserted.
1732  *
1733  *    If @should_insert is %true:
1734  *     * Any error encountered while inserting the level 1 page table.
1735  */
1736 static int
1737 pvr_page_table_l1_get_or_insert(struct pvr_mmu_op_context *op_ctx,
1738 				bool should_insert)
1739 {
1740 	struct pvr_page_table_l2 *l2_table =
1741 		&op_ctx->mmu_ctx->page_table_l2;
1742 	struct pvr_page_table_l1 *table;
1743 
1744 	if (pvr_page_table_l2_entry_is_valid(l2_table,
1745 					     op_ctx->curr_page.l2_idx)) {
1746 		op_ctx->curr_page.l1_table =
1747 			l2_table->entries[op_ctx->curr_page.l2_idx];
1748 		return 0;
1749 	}
1750 
1751 	if (!should_insert)
1752 		return -ENXIO;
1753 
1754 	/* Take a prealloced table. */
1755 	table = op_ctx->map.l1_prealloc_tables;
1756 	if (!table)
1757 		return -ENOMEM;
1758 
1759 	/* Pop */
1760 	op_ctx->map.l1_prealloc_tables = table->next_free;
1761 	table->next_free = NULL;
1762 
1763 	/* Ensure new table is fully written out before adding to L2 page table. */
1764 	wmb();
1765 
1766 	pvr_page_table_l2_insert(op_ctx, table);
1767 
1768 	return 0;
1769 }
1770 
1771 /**
1772  * pvr_page_table_l0_get_or_insert() - Retrieves (optionally inserting if
1773  * necessary) a level 0 page table from the specified level 1 page table entry.
1774  * @op_ctx: Target MMU op context.
1775  * @should_insert: [IN] Specifies whether new page tables should be inserted
1776  * when empty page table entries are encountered during traversal.
1777  *
1778  * Return:
1779  *  * 0 on success,
1780  *
1781  *    If @should_insert is %false:
1782  *     * -%ENXIO if a level 0 page table would have been inserted.
1783  *
1784  *    If @should_insert is %true:
1785  *     * Any error encountered while inserting the level 0 page table.
1786  */
1787 static int
1788 pvr_page_table_l0_get_or_insert(struct pvr_mmu_op_context *op_ctx,
1789 				bool should_insert)
1790 {
1791 	struct pvr_page_table_l0 *table;
1792 
1793 	if (pvr_page_table_l1_entry_is_valid(op_ctx->curr_page.l1_table,
1794 					     op_ctx->curr_page.l1_idx)) {
1795 		op_ctx->curr_page.l0_table =
1796 			op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx];
1797 		return 0;
1798 	}
1799 
1800 	if (!should_insert)
1801 		return -ENXIO;
1802 
1803 	/* Take a prealloced table. */
1804 	table = op_ctx->map.l0_prealloc_tables;
1805 	if (!table)
1806 		return -ENOMEM;
1807 
1808 	/* Pop */
1809 	op_ctx->map.l0_prealloc_tables = table->next_free;
1810 	table->next_free = NULL;
1811 
1812 	/* Ensure new table is fully written out before adding to L1 page table. */
1813 	wmb();
1814 
1815 	pvr_page_table_l1_insert(op_ctx, table);
1816 
1817 	return 0;
1818 }
1819 
1820 /**
1821  * pvr_mmu_context_create() - Create an MMU context.
1822  * @pvr_dev: PVR device associated with owning VM context.
1823  *
1824  * Returns:
1825  *  * Newly created MMU context object on success, or
1826  *  * -%ENOMEM if no memory is available,
1827  *  * Any error code returned by pvr_page_table_l2_init().
1828  */
1829 struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev)
1830 {
1831 	struct pvr_mmu_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1832 	int err;
1833 
1834 	if (!ctx)
1835 		return ERR_PTR(-ENOMEM);
1836 
1837 	err = pvr_page_table_l2_init(&ctx->page_table_l2, pvr_dev);
1838 	if (err)
1839 		return ERR_PTR(err);
1840 
1841 	ctx->pvr_dev = pvr_dev;
1842 
1843 	return ctx;
1844 }
1845 
1846 /**
1847  * pvr_mmu_context_destroy() - Destroy an MMU context.
1848  * @ctx: Target MMU context.
1849  */
1850 void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx)
1851 {
1852 	pvr_page_table_l2_fini(&ctx->page_table_l2);
1853 	kfree(ctx);
1854 }
1855 
1856 /**
1857  * pvr_mmu_get_root_table_dma_addr() - Get the DMA address of the root of the
1858  * page table structure behind a VM context.
1859  * @ctx: Target MMU context.
1860  */
1861 dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx)
1862 {
1863 	return ctx->page_table_l2.backing_page.dma_addr;
1864 }
1865 
1866 /**
1867  * pvr_page_table_l1_alloc() - Allocate a l1 page_table object.
1868  * @ctx: MMU context of owning VM context.
1869  *
1870  * Returns:
1871  *  * Newly created page table object on success, or
1872  *  * -%ENOMEM if no memory is available,
1873  *  * Any error code returned by pvr_page_table_l1_init().
1874  */
1875 static struct pvr_page_table_l1 *
1876 pvr_page_table_l1_alloc(struct pvr_mmu_context *ctx)
1877 {
1878 	int err;
1879 
1880 	struct pvr_page_table_l1 *table =
1881 		kzalloc(sizeof(*table), GFP_KERNEL);
1882 
1883 	if (!table)
1884 		return ERR_PTR(-ENOMEM);
1885 
1886 	err = pvr_page_table_l1_init(table, ctx->pvr_dev);
1887 	if (err) {
1888 		kfree(table);
1889 		return ERR_PTR(err);
1890 	}
1891 
1892 	return table;
1893 }
1894 
1895 /**
1896  * pvr_page_table_l0_alloc() - Allocate a l0 page_table object.
1897  * @ctx: MMU context of owning VM context.
1898  *
1899  * Returns:
1900  *  * Newly created page table object on success, or
1901  *  * -%ENOMEM if no memory is available,
1902  *  * Any error code returned by pvr_page_table_l0_init().
1903  */
1904 static struct pvr_page_table_l0 *
1905 pvr_page_table_l0_alloc(struct pvr_mmu_context *ctx)
1906 {
1907 	int err;
1908 
1909 	struct pvr_page_table_l0 *table =
1910 		kzalloc(sizeof(*table), GFP_KERNEL);
1911 
1912 	if (!table)
1913 		return ERR_PTR(-ENOMEM);
1914 
1915 	err = pvr_page_table_l0_init(table, ctx->pvr_dev);
1916 	if (err) {
1917 		kfree(table);
1918 		return ERR_PTR(err);
1919 	}
1920 
1921 	return table;
1922 }
1923 
1924 /**
1925  * pvr_mmu_op_context_require_sync() - Mark an MMU op context as requiring a
1926  * sync operation for the referenced page tables up to a specified level.
1927  * @op_ctx: Target MMU op context.
1928  * @level: Maximum page table level for which a sync is required.
1929  */
1930 static void
1931 pvr_mmu_op_context_require_sync(struct pvr_mmu_op_context *op_ctx,
1932 				enum pvr_mmu_sync_level level)
1933 {
1934 	if (op_ctx->sync_level_required < level)
1935 		op_ctx->sync_level_required = level;
1936 }
1937 
1938 /**
1939  * pvr_mmu_op_context_sync_manual() - Trigger a sync of some or all of the
1940  * page tables referenced by a MMU op context.
1941  * @op_ctx: Target MMU op context.
1942  * @level: Maximum page table level to sync.
1943  *
1944  * Do not call this function directly. Instead use
1945  * pvr_mmu_op_context_sync_partial() which is checked against the current
1946  * value of &op_ctx->sync_level_required as set by
1947  * pvr_mmu_op_context_require_sync().
1948  */
1949 static void
1950 pvr_mmu_op_context_sync_manual(struct pvr_mmu_op_context *op_ctx,
1951 			       enum pvr_mmu_sync_level level)
1952 {
1953 	/*
1954 	 * We sync the page table levels in ascending order (starting from the
1955 	 * leaf node) to ensure consistency.
1956 	 */
1957 
1958 	WARN_ON(level < PVR_MMU_SYNC_LEVEL_NONE);
1959 
1960 	if (level <= PVR_MMU_SYNC_LEVEL_NONE)
1961 		return;
1962 
1963 	if (op_ctx->curr_page.l0_table)
1964 		pvr_page_table_l0_sync(op_ctx->curr_page.l0_table);
1965 
1966 	if (level < PVR_MMU_SYNC_LEVEL_1)
1967 		return;
1968 
1969 	if (op_ctx->curr_page.l1_table)
1970 		pvr_page_table_l1_sync(op_ctx->curr_page.l1_table);
1971 
1972 	if (level < PVR_MMU_SYNC_LEVEL_2)
1973 		return;
1974 
1975 	pvr_page_table_l2_sync(&op_ctx->mmu_ctx->page_table_l2);
1976 }
1977 
1978 /**
1979  * pvr_mmu_op_context_sync_partial() - Trigger a sync of some or all of the
1980  * page tables referenced by a MMU op context.
1981  * @op_ctx: Target MMU op context.
1982  * @level: Requested page table level to sync up to (inclusive).
1983  *
1984  * If @level is greater than the maximum level recorded by @op_ctx as requiring
1985  * a sync operation, only the previously recorded maximum will be used.
1986  *
1987  * Additionally, if @level is greater than or equal to the maximum level
1988  * recorded by @op_ctx as requiring a sync operation, that maximum level will be
1989  * reset as a full sync will be performed. This is equivalent to calling
1990  * pvr_mmu_op_context_sync().
1991  */
1992 static void
1993 pvr_mmu_op_context_sync_partial(struct pvr_mmu_op_context *op_ctx,
1994 				enum pvr_mmu_sync_level level)
1995 {
1996 	/*
1997 	 * If the requested sync level is greater than or equal to the
1998 	 * currently required sync level, we do two things:
1999 	 *  * Don't waste time syncing levels we haven't previously marked as
2000 	 *    requiring a sync, and
2001 	 *  * Reset the required sync level since we are about to sync
2002 	 *    everything that was previously marked as requiring a sync.
2003 	 */
2004 	if (level >= op_ctx->sync_level_required) {
2005 		level = op_ctx->sync_level_required;
2006 		op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2007 	}
2008 
2009 	pvr_mmu_op_context_sync_manual(op_ctx, level);
2010 }
2011 
2012 /**
2013  * pvr_mmu_op_context_sync() - Trigger a sync of every page table referenced by
2014  * a MMU op context.
2015  * @op_ctx: Target MMU op context.
2016  *
2017  * The maximum level marked internally as requiring a sync will be reset so
2018  * that subsequent calls to this function will be no-ops unless @op_ctx is
2019  * otherwise updated.
2020  */
2021 static void
2022 pvr_mmu_op_context_sync(struct pvr_mmu_op_context *op_ctx)
2023 {
2024 	pvr_mmu_op_context_sync_manual(op_ctx, op_ctx->sync_level_required);
2025 
2026 	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2027 }
2028 
2029 /**
2030  * pvr_mmu_op_context_load_tables() - Load pointers to tables in each level of
2031  * the page table tree structure needed to reference the physical page
2032  * referenced by a MMU op context.
2033  * @op_ctx: Target MMU op context.
2034  * @should_create: Specifies whether new page tables should be created when
2035  * empty page table entries are encountered during traversal.
2036  * @load_level_required: Maximum page table level to load.
2037  *
2038  * If @should_create is %true, this function may modify the stored required
2039  * sync level of @op_ctx as new page tables are created and inserted into their
2040  * respective parents.
2041  *
2042  * Since there is only one root page table, it is technically incorrect to call
2043  * this function with a value of @load_level_required greater than or equal to
2044  * the root level number. However, this is not explicitly disallowed here.
2045  *
2046  * Return:
2047  *  * 0 on success,
2048  *  * Any error returned by pvr_page_table_l1_get_or_create() if
2049  *    @load_level_required >= 1 except -%ENXIO, or
2050  *  * Any error returned by pvr_page_table_l0_get_or_create() if
2051  *    @load_level_required >= 0 except -%ENXIO.
2052  */
2053 static int
2054 pvr_mmu_op_context_load_tables(struct pvr_mmu_op_context *op_ctx,
2055 			       bool should_create,
2056 			       enum pvr_mmu_sync_level load_level_required)
2057 {
2058 	const struct pvr_page_table_l1 *l1_head_before =
2059 		op_ctx->map.l1_prealloc_tables;
2060 	const struct pvr_page_table_l0 *l0_head_before =
2061 		op_ctx->map.l0_prealloc_tables;
2062 	int err;
2063 
2064 	/* Clear tables we're about to fetch in case of error states. */
2065 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1)
2066 		op_ctx->curr_page.l1_table = NULL;
2067 
2068 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0)
2069 		op_ctx->curr_page.l0_table = NULL;
2070 
2071 	/* Get or create L1 page table. */
2072 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1) {
2073 		err = pvr_page_table_l1_get_or_insert(op_ctx, should_create);
2074 		if (err) {
2075 			/*
2076 			 * If @should_create is %false and no L1 page table was
2077 			 * found, return early but without an error. Since
2078 			 * pvr_page_table_l1_get_or_create() can only return
2079 			 * -%ENXIO if @should_create is %false, there is no
2080 			 * need to check it here.
2081 			 */
2082 			if (err == -ENXIO)
2083 				err = 0;
2084 
2085 			return err;
2086 		}
2087 	}
2088 
2089 	/* Get or create L0 page table. */
2090 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0) {
2091 		err = pvr_page_table_l0_get_or_insert(op_ctx, should_create);
2092 		if (err) {
2093 			/*
2094 			 * If @should_create is %false and no L0 page table was
2095 			 * found, return early but without an error. Since
2096 			 * pvr_page_table_l0_get_or_insert() can only return
2097 			 * -%ENXIO if @should_create is %false, there is no
2098 			 * need to check it here.
2099 			 */
2100 			if (err == -ENXIO)
2101 				err = 0;
2102 
2103 			/*
2104 			 * At this point, an L1 page table could have been
2105 			 * inserted but is now empty due to the failed attempt
2106 			 * at inserting an L0 page table. In this instance, we
2107 			 * must remove the empty L1 page table ourselves as
2108 			 * pvr_page_table_l1_remove() is never called as part
2109 			 * of the error path in
2110 			 * pvr_page_table_l0_get_or_insert().
2111 			 */
2112 			if (l1_head_before != op_ctx->map.l1_prealloc_tables) {
2113 				pvr_page_table_l2_remove(op_ctx);
2114 				pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
2115 			}
2116 
2117 			return err;
2118 		}
2119 	}
2120 
2121 	/*
2122 	 * A sync is only needed if table objects were inserted. This can be
2123 	 * inferred by checking if the pointer at the head of the linked list
2124 	 * has changed.
2125 	 */
2126 	if (l1_head_before != op_ctx->map.l1_prealloc_tables)
2127 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
2128 	else if (l0_head_before != op_ctx->map.l0_prealloc_tables)
2129 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_1);
2130 
2131 	return 0;
2132 }
2133 
2134 /**
2135  * pvr_mmu_op_context_set_curr_page() - Reassign the current page of an MMU op
2136  * context, syncing any page tables previously assigned to it which are no
2137  * longer relevant.
2138  * @op_ctx: Target MMU op context.
2139  * @device_addr: New pointer target.
2140  * @should_create: Specify whether new page tables should be created when
2141  * empty page table entries are encountered during traversal.
2142  *
2143  * This function performs a full sync on the pointer, regardless of which
2144  * levels are modified.
2145  *
2146  * Return:
2147  *  * 0 on success, or
2148  *  * Any error returned by pvr_mmu_op_context_load_tables().
2149  */
2150 static int
2151 pvr_mmu_op_context_set_curr_page(struct pvr_mmu_op_context *op_ctx,
2152 				 u64 device_addr, bool should_create)
2153 {
2154 	pvr_mmu_op_context_sync(op_ctx);
2155 
2156 	op_ctx->curr_page.l2_idx = pvr_page_table_l2_idx(device_addr);
2157 	op_ctx->curr_page.l1_idx = pvr_page_table_l1_idx(device_addr);
2158 	op_ctx->curr_page.l0_idx = pvr_page_table_l0_idx(device_addr);
2159 	op_ctx->curr_page.l1_table = NULL;
2160 	op_ctx->curr_page.l0_table = NULL;
2161 
2162 	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
2163 					      PVR_MMU_SYNC_LEVEL_1);
2164 }
2165 
2166 /**
2167  * pvr_mmu_op_context_next_page() - Advance the current page of an MMU op
2168  * context.
2169  * @op_ctx: Target MMU op context.
2170  * @should_create: Specify whether new page tables should be created when
2171  * empty page table entries are encountered during traversal.
2172  *
2173  * If @should_create is %false, it is the caller's responsibility to verify that
2174  * the state of the table references in @op_ctx is valid on return. If -%ENXIO
2175  * is returned, at least one of the table references is invalid. It should be
2176  * noted that @op_ctx as a whole will be left in a valid state if -%ENXIO is
2177  * returned, unlike other error codes. The caller should check which references
2178  * are invalid by comparing them to %NULL. Only &@ptr->l2_table is guaranteed
2179  * to be valid, since it represents the root of the page table tree structure.
2180  *
2181  * Return:
2182  *  * 0 on success,
2183  *  * -%EPERM if the operation would wrap at the top of the page table
2184  *    hierarchy,
2185  *  * -%ENXIO if @should_create is %false and a page table of any level would
2186  *    have otherwise been created, or
2187  *  * Any error returned while attempting to create missing page tables if
2188  *    @should_create is %true.
2189  */
2190 static int
2191 pvr_mmu_op_context_next_page(struct pvr_mmu_op_context *op_ctx,
2192 			     bool should_create)
2193 {
2194 	s8 load_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2195 
2196 	if (++op_ctx->curr_page.l0_idx != ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X)
2197 		goto load_tables;
2198 
2199 	op_ctx->curr_page.l0_idx = 0;
2200 	load_level_required = PVR_MMU_SYNC_LEVEL_0;
2201 
2202 	if (++op_ctx->curr_page.l1_idx != ROGUE_MMUCTRL_ENTRIES_PD_VALUE)
2203 		goto load_tables;
2204 
2205 	op_ctx->curr_page.l1_idx = 0;
2206 	load_level_required = PVR_MMU_SYNC_LEVEL_1;
2207 
2208 	if (++op_ctx->curr_page.l2_idx != ROGUE_MMUCTRL_ENTRIES_PC_VALUE)
2209 		goto load_tables;
2210 
2211 	/*
2212 	 * If the pattern continued, we would set &op_ctx->curr_page.l2_idx to
2213 	 * zero here. However, that would wrap the top layer of the page table
2214 	 * hierarchy which is not a valid operation. Instead, we warn and return
2215 	 * an error.
2216 	 */
2217 	WARN(true,
2218 	     "%s(%p) attempted to loop the top of the page table hierarchy",
2219 	     __func__, op_ctx);
2220 	return -EPERM;
2221 
2222 	/* If indices have wrapped, we need to load new tables. */
2223 load_tables:
2224 	/* First, flush tables which will be unloaded. */
2225 	pvr_mmu_op_context_sync_partial(op_ctx, load_level_required);
2226 
2227 	/* Then load tables from the required level down. */
2228 	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
2229 					      load_level_required);
2230 }
2231 
2232 /**
2233  * DOC: Single page operations
2234  */
2235 
2236 /**
2237  * pvr_page_create() - Create a device-virtual memory page and insert it into
2238  * a level 0 page table.
2239  * @op_ctx: Target MMU op context pointing at the device-virtual address of the
2240  * target page.
2241  * @dma_addr: DMA address of the physical page backing the created page.
2242  * @flags: Page options saved on the level 0 page table entry for reading by
2243  *         the device.
2244  *
2245  * Return:
2246  *  * 0 on success, or
2247  *  * -%EEXIST if the requested page already exists.
2248  */
2249 static int
2250 pvr_page_create(struct pvr_mmu_op_context *op_ctx, dma_addr_t dma_addr,
2251 		struct pvr_page_flags_raw flags)
2252 {
2253 	/* Do not create a new page if one already exists. */
2254 	if (pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
2255 					     op_ctx->curr_page.l0_idx)) {
2256 		return -EEXIST;
2257 	}
2258 
2259 	pvr_page_table_l0_insert(op_ctx, dma_addr, flags);
2260 
2261 	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2262 
2263 	return 0;
2264 }
2265 
2266 /**
2267  * pvr_page_destroy() - Destroy a device page after removing it from its
2268  * parent level 0 page table.
2269  * @op_ctx: Target MMU op context.
2270  */
2271 static void
2272 pvr_page_destroy(struct pvr_mmu_op_context *op_ctx)
2273 {
2274 	/* Do nothing if the page does not exist. */
2275 	if (!pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
2276 					      op_ctx->curr_page.l0_idx)) {
2277 		return;
2278 	}
2279 
2280 	/* Clear the parent L0 page table entry. */
2281 	pvr_page_table_l0_remove(op_ctx);
2282 
2283 	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2284 }
2285 
2286 /**
2287  * pvr_mmu_op_context_destroy() - Destroy an MMU op context.
2288  * @op_ctx: Target MMU op context.
2289  */
2290 void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx)
2291 {
2292 	const bool flush_caches =
2293 		op_ctx->sync_level_required != PVR_MMU_SYNC_LEVEL_NONE;
2294 
2295 	pvr_mmu_op_context_sync(op_ctx);
2296 
2297 	/* Unmaps should be flushed immediately. Map flushes can be deferred. */
2298 	if (flush_caches && !op_ctx->map.sgt)
2299 		pvr_mmu_flush_exec(op_ctx->mmu_ctx->pvr_dev, true);
2300 
2301 	while (op_ctx->map.l0_prealloc_tables) {
2302 		struct pvr_page_table_l0 *tmp = op_ctx->map.l0_prealloc_tables;
2303 
2304 		op_ctx->map.l0_prealloc_tables =
2305 			op_ctx->map.l0_prealloc_tables->next_free;
2306 		pvr_page_table_l0_free(tmp);
2307 	}
2308 
2309 	while (op_ctx->map.l1_prealloc_tables) {
2310 		struct pvr_page_table_l1 *tmp = op_ctx->map.l1_prealloc_tables;
2311 
2312 		op_ctx->map.l1_prealloc_tables =
2313 			op_ctx->map.l1_prealloc_tables->next_free;
2314 		pvr_page_table_l1_free(tmp);
2315 	}
2316 
2317 	while (op_ctx->unmap.l0_free_tables) {
2318 		struct pvr_page_table_l0 *tmp = op_ctx->unmap.l0_free_tables;
2319 
2320 		op_ctx->unmap.l0_free_tables =
2321 			op_ctx->unmap.l0_free_tables->next_free;
2322 		pvr_page_table_l0_free(tmp);
2323 	}
2324 
2325 	while (op_ctx->unmap.l1_free_tables) {
2326 		struct pvr_page_table_l1 *tmp = op_ctx->unmap.l1_free_tables;
2327 
2328 		op_ctx->unmap.l1_free_tables =
2329 			op_ctx->unmap.l1_free_tables->next_free;
2330 		pvr_page_table_l1_free(tmp);
2331 	}
2332 
2333 	kfree(op_ctx);
2334 }
2335 
2336 /**
2337  * pvr_mmu_op_context_create() - Create an MMU op context.
2338  * @ctx: MMU context associated with owning VM context.
2339  * @sgt: Scatter gather table containing pages pinned for use by this context.
2340  * @sgt_offset: Start offset of the requested device-virtual memory mapping.
2341  * @size: Size in bytes of the requested device-virtual memory mapping. For an
2342  * unmapping, this should be zero so that no page tables are allocated.
2343  *
2344  * Returns:
2345  *  * Newly created MMU op context object on success, or
2346  *  * -%ENOMEM if no memory is available,
2347  *  * Any error code returned by pvr_page_table_l2_init().
2348  */
2349 struct pvr_mmu_op_context *
2350 pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, struct sg_table *sgt,
2351 			  u64 sgt_offset, u64 size)
2352 {
2353 	int err;
2354 
2355 	struct pvr_mmu_op_context *op_ctx =
2356 		kzalloc(sizeof(*op_ctx), GFP_KERNEL);
2357 
2358 	if (!op_ctx)
2359 		return ERR_PTR(-ENOMEM);
2360 
2361 	op_ctx->mmu_ctx = ctx;
2362 	op_ctx->map.sgt = sgt;
2363 	op_ctx->map.sgt_offset = sgt_offset;
2364 	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2365 
2366 	if (size) {
2367 		/*
2368 		 * The number of page table objects we need to prealloc is
2369 		 * indicated by the mapping size, start offset and the sizes
2370 		 * of the areas mapped per PT or PD. The range calculation is
2371 		 * identical to that for the index into a table for a device
2372 		 * address, so we reuse those functions here.
2373 		 */
2374 		const u32 l1_start_idx = pvr_page_table_l2_idx(sgt_offset);
2375 		const u32 l1_end_idx = pvr_page_table_l2_idx(sgt_offset + size);
2376 		const u32 l1_count = l1_end_idx - l1_start_idx + 1;
2377 		const u32 l0_start_idx = pvr_page_table_l1_idx(sgt_offset);
2378 		const u32 l0_end_idx = pvr_page_table_l1_idx(sgt_offset + size);
2379 		const u32 l0_count = l0_end_idx - l0_start_idx + 1;
2380 
2381 		/*
2382 		 * Alloc and push page table entries until we have enough of
2383 		 * each type, ending with linked lists of l0 and l1 entries in
2384 		 * reverse order.
2385 		 */
2386 		for (int i = 0; i < l1_count; i++) {
2387 			struct pvr_page_table_l1 *l1_tmp =
2388 				pvr_page_table_l1_alloc(ctx);
2389 
2390 			err = PTR_ERR_OR_ZERO(l1_tmp);
2391 			if (err)
2392 				goto err_cleanup;
2393 
2394 			l1_tmp->next_free = op_ctx->map.l1_prealloc_tables;
2395 			op_ctx->map.l1_prealloc_tables = l1_tmp;
2396 		}
2397 
2398 		for (int i = 0; i < l0_count; i++) {
2399 			struct pvr_page_table_l0 *l0_tmp =
2400 				pvr_page_table_l0_alloc(ctx);
2401 
2402 			err = PTR_ERR_OR_ZERO(l0_tmp);
2403 			if (err)
2404 				goto err_cleanup;
2405 
2406 			l0_tmp->next_free = op_ctx->map.l0_prealloc_tables;
2407 			op_ctx->map.l0_prealloc_tables = l0_tmp;
2408 		}
2409 	}
2410 
2411 	return op_ctx;
2412 
2413 err_cleanup:
2414 	pvr_mmu_op_context_destroy(op_ctx);
2415 
2416 	return ERR_PTR(err);
2417 }
2418 
2419 /**
2420  * pvr_mmu_op_context_unmap_curr_page() - Unmap pages from a memory context
2421  * starting from the current page of an MMU op context.
2422  * @op_ctx: Target MMU op context pointing at the first page to unmap.
2423  * @nr_pages: Number of pages to unmap.
2424  *
2425  * Return:
2426  *  * 0 on success, or
2427  *  * Any error encountered while advancing @op_ctx.curr_page with
2428  *    pvr_mmu_op_context_next_page() (except -%ENXIO).
2429  */
2430 static int
2431 pvr_mmu_op_context_unmap_curr_page(struct pvr_mmu_op_context *op_ctx,
2432 				   u64 nr_pages)
2433 {
2434 	int err;
2435 
2436 	if (nr_pages == 0)
2437 		return 0;
2438 
2439 	/*
2440 	 * Destroy first page outside loop, as it doesn't require a page
2441 	 * advance beforehand. If the L0 page table reference in
2442 	 * @op_ctx.curr_page is %NULL, there cannot be a mapped page at
2443 	 * @op_ctx.curr_page (so skip ahead).
2444 	 */
2445 	if (op_ctx->curr_page.l0_table)
2446 		pvr_page_destroy(op_ctx);
2447 
2448 	for (u64 page = 1; page < nr_pages; ++page) {
2449 		err = pvr_mmu_op_context_next_page(op_ctx, false);
2450 		/*
2451 		 * If the page table tree structure at @op_ctx.curr_page is
2452 		 * incomplete, skip ahead. We don't care about unmapping pages
2453 		 * that cannot exist.
2454 		 *
2455 		 * FIXME: This could be made more efficient by jumping ahead
2456 		 * using pvr_mmu_op_context_set_curr_page().
2457 		 */
2458 		if (err == -ENXIO)
2459 			continue;
2460 		else if (err)
2461 			return err;
2462 
2463 		pvr_page_destroy(op_ctx);
2464 	}
2465 
2466 	return 0;
2467 }
2468 
2469 /**
2470  * pvr_mmu_unmap() - Unmap pages from a memory context.
2471  * @op_ctx: Target MMU op context.
2472  * @device_addr: First device-virtual address to unmap.
2473  * @size: Size in bytes to unmap.
2474  *
2475  * The total amount of device-virtual memory unmapped is
2476  * @nr_pages * %PVR_DEVICE_PAGE_SIZE.
2477  *
2478  * Returns:
2479  *  * 0 on success, or
2480  *  * Any error code returned by pvr_page_table_ptr_init(), or
2481  *  * Any error code returned by pvr_page_table_ptr_unmap().
2482  */
2483 int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size)
2484 {
2485 	int err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, false);
2486 
2487 	if (err)
2488 		return err;
2489 
2490 	return pvr_mmu_op_context_unmap_curr_page(op_ctx,
2491 						  size >> PVR_DEVICE_PAGE_SHIFT);
2492 }
2493 
2494 /**
2495  * pvr_mmu_map_sgl() - Map part of a scatter-gather table entry to
2496  * device-virtual memory.
2497  * @op_ctx: Target MMU op context pointing to the first page that should be
2498  * mapped.
2499  * @sgl: Target scatter-gather table entry.
2500  * @offset: Offset into @sgl to map from. Must result in a starting address
2501  * from @sgl which is CPU page-aligned.
2502  * @size: Size of the memory to be mapped in bytes. Must be a non-zero multiple
2503  * of the device page size.
2504  * @page_flags: Page options to be applied to every device-virtual memory page
2505  * in the created mapping.
2506  *
2507  * Return:
2508  *  * 0 on success,
2509  *  * -%EINVAL if the range specified by @offset and @size is not completely
2510  *    within @sgl, or
2511  *  * Any error encountered while creating a page with pvr_page_create(), or
2512  *  * Any error encountered while advancing @op_ctx.curr_page with
2513  *    pvr_mmu_op_context_next_page().
2514  */
2515 static int
2516 pvr_mmu_map_sgl(struct pvr_mmu_op_context *op_ctx, struct scatterlist *sgl,
2517 		u64 offset, u64 size, struct pvr_page_flags_raw page_flags)
2518 {
2519 	const unsigned int pages = size >> PVR_DEVICE_PAGE_SHIFT;
2520 	dma_addr_t dma_addr = sg_dma_address(sgl) + offset;
2521 	const unsigned int dma_len = sg_dma_len(sgl);
2522 	struct pvr_page_table_ptr ptr_copy;
2523 	unsigned int page;
2524 	int err;
2525 
2526 	if (size > dma_len || offset > dma_len - size)
2527 		return -EINVAL;
2528 
2529 	/*
2530 	 * Before progressing, save a copy of the start pointer so we can use
2531 	 * it again if we enter an error state and have to destroy pages.
2532 	 */
2533 	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
2534 
2535 	/*
2536 	 * Create first page outside loop, as it doesn't require a page advance
2537 	 * beforehand.
2538 	 */
2539 	err = pvr_page_create(op_ctx, dma_addr, page_flags);
2540 	if (err)
2541 		return err;
2542 
2543 	for (page = 1; page < pages; ++page) {
2544 		err = pvr_mmu_op_context_next_page(op_ctx, true);
2545 		if (err)
2546 			goto err_destroy_pages;
2547 
2548 		dma_addr += PVR_DEVICE_PAGE_SIZE;
2549 
2550 		err = pvr_page_create(op_ctx, dma_addr, page_flags);
2551 		if (err)
2552 			goto err_destroy_pages;
2553 	}
2554 
2555 	return 0;
2556 
2557 err_destroy_pages:
2558 	memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
2559 	err = pvr_mmu_op_context_unmap_curr_page(op_ctx, page);
2560 
2561 	return err;
2562 }
2563 
2564 /**
2565  * pvr_mmu_map() - Map an object's virtual memory to physical memory.
2566  * @op_ctx: Target MMU op context.
2567  * @size: Size of memory to be mapped in bytes. Must be a non-zero multiple
2568  * of the device page size.
2569  * @flags: Flags from pvr_gem_object associated with the mapping.
2570  * @device_addr: Virtual device address to map to. Must be device page-aligned.
2571  *
2572  * Returns:
2573  *  * 0 on success, or
2574  *  * Any error code returned by pvr_page_table_ptr_init(), or
2575  *  * Any error code returned by pvr_mmu_map_sgl(), or
2576  *  * Any error code returned by pvr_page_table_ptr_next_page().
2577  */
2578 int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags,
2579 		u64 device_addr)
2580 {
2581 	struct pvr_page_table_ptr ptr_copy;
2582 	struct pvr_page_flags_raw flags_raw;
2583 	struct scatterlist *sgl;
2584 	u64 mapped_size = 0;
2585 	unsigned int count;
2586 	int err;
2587 
2588 	if (!size)
2589 		return 0;
2590 
2591 	if ((op_ctx->map.sgt_offset | size) & ~PVR_DEVICE_PAGE_MASK)
2592 		return -EINVAL;
2593 
2594 	err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, true);
2595 	if (err)
2596 		return -EINVAL;
2597 
2598 	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
2599 
2600 	flags_raw = pvr_page_flags_raw_create(false, false,
2601 					      flags & DRM_PVR_BO_BYPASS_DEVICE_CACHE,
2602 					      flags & DRM_PVR_BO_PM_FW_PROTECT);
2603 
2604 	/* Map scatter gather table */
2605 	for_each_sgtable_dma_sg(op_ctx->map.sgt, sgl, count) {
2606 		const size_t sgl_len = sg_dma_len(sgl);
2607 		u64 sgl_offset, map_sgl_len;
2608 
2609 		if (sgl_len <= op_ctx->map.sgt_offset) {
2610 			op_ctx->map.sgt_offset -= sgl_len;
2611 			continue;
2612 		}
2613 
2614 		sgl_offset = op_ctx->map.sgt_offset;
2615 		map_sgl_len = min_t(u64, sgl_len - sgl_offset, size - mapped_size);
2616 
2617 		err = pvr_mmu_map_sgl(op_ctx, sgl, sgl_offset, map_sgl_len,
2618 				      flags_raw);
2619 		if (err)
2620 			break;
2621 
2622 		/*
2623 		 * Flag the L0 page table as requiring a flush when the MMU op
2624 		 * context is destroyed.
2625 		 */
2626 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2627 
2628 		op_ctx->map.sgt_offset = 0;
2629 		mapped_size += map_sgl_len;
2630 
2631 		if (mapped_size >= size)
2632 			break;
2633 
2634 		err = pvr_mmu_op_context_next_page(op_ctx, true);
2635 		if (err)
2636 			break;
2637 	}
2638 
2639 	if (err && mapped_size) {
2640 		memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
2641 		pvr_mmu_op_context_unmap_curr_page(op_ctx,
2642 						   mapped_size >> PVR_DEVICE_PAGE_SHIFT);
2643 	}
2644 
2645 	return err;
2646 }
2647