xref: /linux/drivers/gpu/drm/imagination/pvr_mmu.c (revision 0f657938e4345a77be871d906f3e0de3c58a7a49)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright (c) 2023 Imagination Technologies Ltd. */
3 
4 #include "pvr_mmu.h"
5 
6 #include "pvr_ccb.h"
7 #include "pvr_device.h"
8 #include "pvr_fw.h"
9 #include "pvr_gem.h"
10 #include "pvr_power.h"
11 #include "pvr_rogue_fwif.h"
12 #include "pvr_rogue_mmu_defs.h"
13 
14 #include <drm/drm_drv.h>
15 #include <linux/atomic.h>
16 #include <linux/bitops.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/kmemleak.h>
19 #include <linux/minmax.h>
20 #include <linux/sizes.h>
21 
22 #define PVR_SHIFT_FROM_SIZE(size_) (__builtin_ctzll(size_))
23 #define PVR_MASK_FROM_SIZE(size_) (~((size_) - U64_C(1)))
24 
25 /*
26  * The value of the device page size (%PVR_DEVICE_PAGE_SIZE) is currently
27  * pegged to the host page size (%PAGE_SIZE). This chunk of macro goodness both
28  * ensures that the selected host page size corresponds to a valid device page
29  * size and sets up values needed by the MMU code below.
30  */
31 #if (PVR_DEVICE_PAGE_SIZE == SZ_4K)
32 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_4KB
33 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_4KB_RANGE_SHIFT
34 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_4KB_RANGE_CLRMSK
35 #elif (PVR_DEVICE_PAGE_SIZE == SZ_16K)
36 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_16KB
37 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_16KB_RANGE_SHIFT
38 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_16KB_RANGE_CLRMSK
39 #elif (PVR_DEVICE_PAGE_SIZE == SZ_64K)
40 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_64KB
41 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_64KB_RANGE_SHIFT
42 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_64KB_RANGE_CLRMSK
43 #elif (PVR_DEVICE_PAGE_SIZE == SZ_256K)
44 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_256KB
45 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_256KB_RANGE_SHIFT
46 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_256KB_RANGE_CLRMSK
47 #elif (PVR_DEVICE_PAGE_SIZE == SZ_1M)
48 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_1MB
49 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_1MB_RANGE_SHIFT
50 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_1MB_RANGE_CLRMSK
51 #elif (PVR_DEVICE_PAGE_SIZE == SZ_2M)
52 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_2MB
53 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_2MB_RANGE_SHIFT
54 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_2MB_RANGE_CLRMSK
55 #else
56 # error Unsupported device page size PVR_DEVICE_PAGE_SIZE
57 #endif
58 
59 #define ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X   \
60 	(ROGUE_MMUCTRL_ENTRIES_PT_VALUE >> \
61 	 (PVR_DEVICE_PAGE_SHIFT - PVR_SHIFT_FROM_SIZE(SZ_4K)))
62 
63 enum pvr_mmu_sync_level {
64 	PVR_MMU_SYNC_LEVEL_NONE = -1,
65 	PVR_MMU_SYNC_LEVEL_0 = 0,
66 	PVR_MMU_SYNC_LEVEL_1 = 1,
67 	PVR_MMU_SYNC_LEVEL_2 = 2,
68 };
69 
70 #define PVR_MMU_SYNC_LEVEL_0_FLAGS (ROGUE_FWIF_MMUCACHEDATA_FLAGS_PT | \
71 				    ROGUE_FWIF_MMUCACHEDATA_FLAGS_INTERRUPT | \
72 				    ROGUE_FWIF_MMUCACHEDATA_FLAGS_TLB)
73 #define PVR_MMU_SYNC_LEVEL_1_FLAGS (PVR_MMU_SYNC_LEVEL_0_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PD)
74 #define PVR_MMU_SYNC_LEVEL_2_FLAGS (PVR_MMU_SYNC_LEVEL_1_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PC)
75 
76 /**
77  * pvr_mmu_set_flush_flags() - Set MMU cache flush flags for next call to
78  *                             pvr_mmu_flush_exec().
79  * @pvr_dev: Target PowerVR device.
80  * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS.
81  *
82  * This function must be called following any possible change to the MMU page
83  * tables.
84  */
85 static void pvr_mmu_set_flush_flags(struct pvr_device *pvr_dev, u32 flags)
86 {
87 	atomic_fetch_or(flags, &pvr_dev->mmu_flush_cache_flags);
88 }
89 
90 /**
91  * pvr_mmu_flush_request_all() - Request flush of all MMU caches when
92  * subsequently calling pvr_mmu_flush_exec().
93  * @pvr_dev: Target PowerVR device.
94  *
95  * This function must be called following any possible change to the MMU page
96  * tables.
97  */
98 void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev)
99 {
100 	pvr_mmu_set_flush_flags(pvr_dev, PVR_MMU_SYNC_LEVEL_2_FLAGS);
101 }
102 
103 /**
104  * pvr_mmu_flush_exec() - Execute a flush of all MMU caches previously
105  * requested.
106  * @pvr_dev: Target PowerVR device.
107  * @wait: Do not return until the flush is completed.
108  *
109  * This function must be called prior to submitting any new GPU job. The flush
110  * will complete before the jobs are scheduled, so this can be called once after
111  * a series of maps. However, a single unmap should always be immediately
112  * followed by a flush and it should be explicitly waited by setting @wait.
113  *
114  * As a failure to flush the MMU caches could risk memory corruption, if the
115  * flush fails (implying the firmware is not responding) then the GPU device is
116  * marked as lost.
117  *
118  * Returns:
119  *  * 0 on success when @wait is true, or
120  *  * -%EIO if the device is unavailable, or
121  *  * Any error encountered while submitting the flush command via the KCCB.
122  */
123 int pvr_mmu_flush_exec(struct pvr_device *pvr_dev, bool wait)
124 {
125 	struct rogue_fwif_kccb_cmd cmd_mmu_cache = {};
126 	struct rogue_fwif_mmucachedata *cmd_mmu_cache_data =
127 		&cmd_mmu_cache.cmd_data.mmu_cache_data;
128 	int err = 0;
129 	u32 slot;
130 	int idx;
131 
132 	if (!drm_dev_enter(from_pvr_device(pvr_dev), &idx))
133 		return -EIO;
134 
135 	/* Can't flush MMU if the firmware hasn't booted yet. */
136 	if (!pvr_dev->fw_dev.booted)
137 		goto err_drm_dev_exit;
138 
139 	cmd_mmu_cache_data->cache_flags =
140 		atomic_xchg(&pvr_dev->mmu_flush_cache_flags, 0);
141 
142 	if (!cmd_mmu_cache_data->cache_flags)
143 		goto err_drm_dev_exit;
144 
145 	cmd_mmu_cache.cmd_type = ROGUE_FWIF_KCCB_CMD_MMUCACHE;
146 
147 	pvr_fw_object_get_fw_addr(pvr_dev->fw_dev.mem.mmucache_sync_obj,
148 				  &cmd_mmu_cache_data->mmu_cache_sync_fw_addr);
149 	cmd_mmu_cache_data->mmu_cache_sync_update_value = 0;
150 
151 	err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot);
152 	if (err)
153 		goto err_reset_and_retry;
154 
155 	err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL);
156 	if (err)
157 		goto err_reset_and_retry;
158 
159 	drm_dev_exit(idx);
160 
161 	return 0;
162 
163 err_reset_and_retry:
164 	/*
165 	 * Flush command failure is most likely the result of a firmware lockup. Hard
166 	 * reset the GPU and retry.
167 	 */
168 	err = pvr_power_reset(pvr_dev, true);
169 	if (err)
170 		goto err_drm_dev_exit; /* Device is lost. */
171 
172 	/* Retry sending flush request. */
173 	err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot);
174 	if (err) {
175 		pvr_device_lost(pvr_dev);
176 		goto err_drm_dev_exit;
177 	}
178 
179 	if (wait) {
180 		err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL);
181 		if (err)
182 			pvr_device_lost(pvr_dev);
183 	}
184 
185 err_drm_dev_exit:
186 	drm_dev_exit(idx);
187 
188 	return err;
189 }
190 
191 /**
192  * DOC: PowerVR Virtual Memory Handling
193  */
194 /**
195  * DOC: PowerVR Virtual Memory Handling (constants)
196  *
197  * .. c:macro:: PVR_IDX_INVALID
198  *
199  *    Default value for a u16-based index.
200  *
201  *    This value cannot be zero, since zero is a valid index value.
202  */
203 #define PVR_IDX_INVALID ((u16)(-1))
204 
205 /**
206  * DOC: MMU backing pages
207  */
208 /**
209  * DOC: MMU backing pages (constants)
210  *
211  * .. c:macro:: PVR_MMU_BACKING_PAGE_SIZE
212  *
213  *    Page size of a PowerVR device's integrated MMU. The CPU page size must be
214  *    at least as large as this value for the current implementation; this is
215  *    checked at compile-time.
216  */
217 #define PVR_MMU_BACKING_PAGE_SIZE SZ_4K
218 static_assert(PAGE_SIZE >= PVR_MMU_BACKING_PAGE_SIZE);
219 
220 /**
221  * struct pvr_mmu_backing_page - Represents a single page used to back a page
222  *                              table of any level.
223  * @dma_addr: DMA address of this page.
224  * @host_ptr: CPU address of this page.
225  * @pvr_dev: The PowerVR device to which this page is associated. **For
226  *           internal use only.**
227  */
228 struct pvr_mmu_backing_page {
229 	dma_addr_t dma_addr;
230 	void *host_ptr;
231 /* private: internal use only */
232 	struct page *raw_page;
233 	struct pvr_device *pvr_dev;
234 };
235 
236 /**
237  * pvr_mmu_backing_page_init() - Initialize a MMU backing page.
238  * @page: Target backing page.
239  * @pvr_dev: Target PowerVR device.
240  *
241  * This function performs three distinct operations:
242  *
243  * 1. Allocate a single page,
244  * 2. Map the page to the CPU, and
245  * 3. Map the page to DMA-space.
246  *
247  * It is expected that @page be zeroed (e.g. from kzalloc()) before calling
248  * this function.
249  *
250  * Return:
251  *  * 0 on success, or
252  *  * -%ENOMEM if allocation of the backing page or mapping of the backing
253  *    page to DMA fails.
254  */
255 static int
256 pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page,
257 			  struct pvr_device *pvr_dev)
258 {
259 	struct device *dev = from_pvr_device(pvr_dev)->dev;
260 
261 	struct page *raw_page;
262 	int err;
263 
264 	dma_addr_t dma_addr;
265 	void *host_ptr;
266 
267 	raw_page = alloc_page(__GFP_ZERO | GFP_KERNEL);
268 	if (!raw_page)
269 		return -ENOMEM;
270 
271 	host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
272 	if (!host_ptr) {
273 		err = -ENOMEM;
274 		goto err_free_page;
275 	}
276 
277 	dma_addr = dma_map_page(dev, raw_page, 0, PVR_MMU_BACKING_PAGE_SIZE,
278 				DMA_TO_DEVICE);
279 	if (dma_mapping_error(dev, dma_addr)) {
280 		err = -ENOMEM;
281 		goto err_unmap_page;
282 	}
283 
284 	page->dma_addr = dma_addr;
285 	page->host_ptr = host_ptr;
286 	page->pvr_dev = pvr_dev;
287 	page->raw_page = raw_page;
288 	kmemleak_alloc(page->host_ptr, PAGE_SIZE, 1, GFP_KERNEL);
289 
290 	return 0;
291 
292 err_unmap_page:
293 	vunmap(host_ptr);
294 
295 err_free_page:
296 	__free_page(raw_page);
297 
298 	return err;
299 }
300 
301 /**
302  * pvr_mmu_backing_page_fini() - Teardown a MMU backing page.
303  * @page: Target backing page.
304  *
305  * This function performs the mirror operations to pvr_mmu_backing_page_init(),
306  * in reverse order:
307  *
308  * 1. Unmap the page from DMA-space,
309  * 2. Unmap the page from the CPU, and
310  * 3. Free the page.
311  *
312  * It also zeros @page.
313  *
314  * It is a no-op to call this function a second (or further) time on any @page.
315  */
316 static void
317 pvr_mmu_backing_page_fini(struct pvr_mmu_backing_page *page)
318 {
319 	struct device *dev = from_pvr_device(page->pvr_dev)->dev;
320 
321 	/* Do nothing if no allocation is present. */
322 	if (!page->pvr_dev)
323 		return;
324 
325 	dma_unmap_page(dev, page->dma_addr, PVR_MMU_BACKING_PAGE_SIZE,
326 		       DMA_TO_DEVICE);
327 
328 	kmemleak_free(page->host_ptr);
329 	vunmap(page->host_ptr);
330 
331 	__free_page(page->raw_page);
332 
333 	memset(page, 0, sizeof(*page));
334 }
335 
336 /**
337  * pvr_mmu_backing_page_sync() - Flush a MMU backing page from the CPU to the
338  * device.
339  * @page: Target backing page.
340  * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS.
341  *
342  * .. caution::
343  *
344  *    **This is potentially an expensive function call.** Only call
345  *    pvr_mmu_backing_page_sync() once you're sure you have no more changes to
346  *    make to the backing page in the immediate future.
347  */
348 static void
349 pvr_mmu_backing_page_sync(struct pvr_mmu_backing_page *page, u32 flags)
350 {
351 	struct pvr_device *pvr_dev = page->pvr_dev;
352 	struct device *dev;
353 
354 	/*
355 	 * Do nothing if no allocation is present. This may be the case if
356 	 * we are unmapping pages.
357 	 */
358 	if (!pvr_dev)
359 		return;
360 
361 	dev = from_pvr_device(pvr_dev)->dev;
362 
363 	dma_sync_single_for_device(dev, page->dma_addr,
364 				   PVR_MMU_BACKING_PAGE_SIZE, DMA_TO_DEVICE);
365 
366 	pvr_mmu_set_flush_flags(pvr_dev, flags);
367 }
368 
369 /**
370  * DOC: Raw page tables
371  */
372 
373 #define PVR_PAGE_TABLE_TYPEOF_ENTRY(level_) \
374 	typeof_member(struct pvr_page_table_l##level_##_entry_raw, val)
375 
376 #define PVR_PAGE_TABLE_FIELD_GET(level_, name_, field_, entry_)           \
377 	(((entry_).val &                                           \
378 	  ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK) >> \
379 	 ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT)
380 
381 #define PVR_PAGE_TABLE_FIELD_PREP(level_, name_, field_, val_)            \
382 	((((PVR_PAGE_TABLE_TYPEOF_ENTRY(level_))(val_))            \
383 	  << ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT) & \
384 	 ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK)
385 
386 /**
387  * struct pvr_page_table_l2_entry_raw - A single entry in a level 2 page table.
388  * @val: The raw value of this entry.
389  *
390  * This type is a structure for type-checking purposes. At compile-time, its
391  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE.
392  *
393  * The value stored in this structure can be decoded using the following bitmap:
394  *
395  * .. flat-table::
396  *    :widths: 1 5
397  *    :stub-columns: 1
398  *
399  *    * - 31..4
400  *      - **Level 1 Page Table Base Address:** Bits 39..12 of the L1
401  *        page table base address, which is 4KiB aligned.
402  *
403  *    * - 3..2
404  *      - *(reserved)*
405  *
406  *    * - 1
407  *      - **Pending:** When valid bit is not set, indicates that a valid
408  *        entry is pending and the MMU should wait for the driver to map
409  *        the entry. This is used to support page demand mapping of
410  *        memory.
411  *
412  *    * - 0
413  *      - **Valid:** Indicates that the entry contains a valid L1 page
414  *        table. If the valid bit is not set, then an attempted use of
415  *        the page would result in a page fault.
416  */
417 struct pvr_page_table_l2_entry_raw {
418 	u32 val;
419 } __packed;
420 static_assert(sizeof(struct pvr_page_table_l2_entry_raw) * 8 ==
421 	      ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE);
422 
423 static bool
424 pvr_page_table_l2_entry_raw_is_valid(struct pvr_page_table_l2_entry_raw entry)
425 {
426 	return PVR_PAGE_TABLE_FIELD_GET(2, PC, VALID, entry);
427 }
428 
429 /**
430  * pvr_page_table_l2_entry_raw_set() - Write a valid entry into a raw level 2
431  *                                     page table.
432  * @entry: Target raw level 2 page table entry.
433  * @child_table_dma_addr: DMA address of the level 1 page table to be
434  *                        associated with @entry.
435  *
436  * When calling this function, @child_table_dma_addr must be a valid DMA
437  * address and a multiple of %ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE.
438  */
439 static void
440 pvr_page_table_l2_entry_raw_set(struct pvr_page_table_l2_entry_raw *entry,
441 				dma_addr_t child_table_dma_addr)
442 {
443 	child_table_dma_addr >>= ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT;
444 
445 	WRITE_ONCE(entry->val,
446 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, VALID, true) |
447 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, ENTRY_PENDING, false) |
448 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, PD_BASE, child_table_dma_addr));
449 }
450 
451 static void
452 pvr_page_table_l2_entry_raw_clear(struct pvr_page_table_l2_entry_raw *entry)
453 {
454 	WRITE_ONCE(entry->val, 0);
455 }
456 
457 /**
458  * struct pvr_page_table_l1_entry_raw - A single entry in a level 1 page table.
459  * @val: The raw value of this entry.
460  *
461  * This type is a structure for type-checking purposes. At compile-time, its
462  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE.
463  *
464  * The value stored in this structure can be decoded using the following bitmap:
465  *
466  * .. flat-table::
467  *    :widths: 1 5
468  *    :stub-columns: 1
469  *
470  *    * - 63..41
471  *      - *(reserved)*
472  *
473  *    * - 40
474  *      - **Pending:** When valid bit is not set, indicates that a valid entry
475  *        is pending and the MMU should wait for the driver to map the entry.
476  *        This is used to support page demand mapping of memory.
477  *
478  *    * - 39..5
479  *      - **Level 0 Page Table Base Address:** The way this value is
480  *        interpreted depends on the page size. Bits not specified in the
481  *        table below (e.g. bits 11..5 for page size 4KiB) should be
482  *        considered reserved.
483  *
484  *        This table shows the bits used in an L1 page table entry to
485  *        represent the Physical Table Base Address for a given Page Size.
486  *        Since each L1 page table entry covers 2MiB of address space, the
487  *        maximum page size is 2MiB.
488  *
489  *        .. flat-table::
490  *           :widths: 1 1 1 1
491  *           :header-rows: 1
492  *           :stub-columns: 1
493  *
494  *           * - Page size
495  *             - L0 page table base address bits
496  *             - Number of L0 page table entries
497  *             - Size of L0 page table
498  *
499  *           * - 4KiB
500  *             - 39..12
501  *             - 512
502  *             - 4KiB
503  *
504  *           * - 16KiB
505  *             - 39..10
506  *             - 128
507  *             - 1KiB
508  *
509  *           * - 64KiB
510  *             - 39..8
511  *             - 32
512  *             - 256B
513  *
514  *           * - 256KiB
515  *             - 39..6
516  *             - 8
517  *             - 64B
518  *
519  *           * - 1MiB
520  *             - 39..5 (4 = '0')
521  *             - 2
522  *             - 16B
523  *
524  *           * - 2MiB
525  *             - 39..5 (4..3 = '00')
526  *             - 1
527  *             - 8B
528  *
529  *    * - 4
530  *      - *(reserved)*
531  *
532  *    * - 3..1
533  *      - **Page Size:** Sets the page size, from 4KiB to 2MiB.
534  *
535  *    * - 0
536  *      - **Valid:** Indicates that the entry contains a valid L0 page table.
537  *        If the valid bit is not set, then an attempted use of the page would
538  *        result in a page fault.
539  */
540 struct pvr_page_table_l1_entry_raw {
541 	u64 val;
542 } __packed;
543 static_assert(sizeof(struct pvr_page_table_l1_entry_raw) * 8 ==
544 	      ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE);
545 
546 static bool
547 pvr_page_table_l1_entry_raw_is_valid(struct pvr_page_table_l1_entry_raw entry)
548 {
549 	return PVR_PAGE_TABLE_FIELD_GET(1, PD, VALID, entry);
550 }
551 
552 /**
553  * pvr_page_table_l1_entry_raw_set() - Write a valid entry into a raw level 1
554  *                                     page table.
555  * @entry: Target raw level 1 page table entry.
556  * @child_table_dma_addr: DMA address of the level 0 page table to be
557  *                        associated with @entry.
558  *
559  * When calling this function, @child_table_dma_addr must be a valid DMA
560  * address and a multiple of 4 KiB.
561  */
562 static void
563 pvr_page_table_l1_entry_raw_set(struct pvr_page_table_l1_entry_raw *entry,
564 				dma_addr_t child_table_dma_addr)
565 {
566 	WRITE_ONCE(entry->val,
567 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, VALID, true) |
568 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, ENTRY_PENDING, false) |
569 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, PAGE_SIZE, ROGUE_MMUCTRL_PAGE_SIZE_X) |
570 		   /*
571 		    * The use of a 4K-specific macro here is correct. It is
572 		    * a future optimization to allocate sub-host-page-sized
573 		    * blocks for individual tables, so the condition that any
574 		    * page table address is aligned to the size of the
575 		    * largest (a 4KB) table currently holds.
576 		    */
577 		   (child_table_dma_addr & ~ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_CLRMSK));
578 }
579 
580 static void
581 pvr_page_table_l1_entry_raw_clear(struct pvr_page_table_l1_entry_raw *entry)
582 {
583 	WRITE_ONCE(entry->val, 0);
584 }
585 
586 /**
587  * struct pvr_page_table_l0_entry_raw - A single entry in a level 0 page table.
588  * @val: The raw value of this entry.
589  *
590  * This type is a structure for type-checking purposes. At compile-time, its
591  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE.
592  *
593  * The value stored in this structure can be decoded using the following bitmap:
594  *
595  * .. flat-table::
596  *    :widths: 1 5
597  *    :stub-columns: 1
598  *
599  *    * - 63
600  *      - *(reserved)*
601  *
602  *    * - 62
603  *      - **PM/FW Protect:** Indicates a protected region which only the
604  *        Parameter Manager (PM) or firmware processor can write to.
605  *
606  *    * - 61..40
607  *      - **VP Page (High):** Virtual-physical page used for Parameter Manager
608  *        (PM) memory. This field is only used if the additional level of PB
609  *        virtualization is enabled. The VP Page field is needed by the PM in
610  *        order to correctly reconstitute the free lists after render
611  *        completion. This (High) field holds bits 39..18 of the value; the
612  *        Low field holds bits 17..12. Bits 11..0 are always zero because the
613  *        value is always aligned to the 4KiB page size.
614  *
615  *    * - 39..12
616  *      - **Physical Page Address:** The way this value is interpreted depends
617  *        on the page size. Bits not specified in the table below (e.g. bits
618  *        20..12 for page size 2MiB) should be considered reserved.
619  *
620  *        This table shows the bits used in an L0 page table entry to represent
621  *        the Physical Page Address for a given page size (as defined in the
622  *        associated L1 page table entry).
623  *
624  *        .. flat-table::
625  *           :widths: 1 1
626  *           :header-rows: 1
627  *           :stub-columns: 1
628  *
629  *           * - Page size
630  *             - Physical address bits
631  *
632  *           * - 4KiB
633  *             - 39..12
634  *
635  *           * - 16KiB
636  *             - 39..14
637  *
638  *           * - 64KiB
639  *             - 39..16
640  *
641  *           * - 256KiB
642  *             - 39..18
643  *
644  *           * - 1MiB
645  *             - 39..20
646  *
647  *           * - 2MiB
648  *             - 39..21
649  *
650  *    * - 11..6
651  *      - **VP Page (Low):** Continuation of VP Page (High).
652  *
653  *    * - 5
654  *      - **Pending:** When valid bit is not set, indicates that a valid entry
655  *        is pending and the MMU should wait for the driver to map the entry.
656  *        This is used to support page demand mapping of memory.
657  *
658  *    * - 4
659  *      - **PM Src:** Set on Parameter Manager (PM) allocated page table
660  *        entries when indicated by the PM. Note that this bit will only be set
661  *        by the PM, not by the device driver.
662  *
663  *    * - 3
664  *      - **SLC Bypass Control:** Specifies requests to this page should bypass
665  *        the System Level Cache (SLC), if enabled in SLC configuration.
666  *
667  *    * - 2
668  *      - **Cache Coherency:** Indicates that the page is coherent (i.e. it
669  *        does not require a cache flush between operations on the CPU and the
670  *        device).
671  *
672  *    * - 1
673  *      - **Read Only:** If set, this bit indicates that the page is read only.
674  *        An attempted write to this page would result in a write-protection
675  *        fault.
676  *
677  *    * - 0
678  *      - **Valid:** Indicates that the entry contains a valid page. If the
679  *        valid bit is not set, then an attempted use of the page would result
680  *        in a page fault.
681  */
682 struct pvr_page_table_l0_entry_raw {
683 	u64 val;
684 } __packed;
685 static_assert(sizeof(struct pvr_page_table_l0_entry_raw) * 8 ==
686 	      ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE);
687 
688 /**
689  * struct pvr_page_flags_raw - The configurable flags from a single entry in a
690  *                             level 0 page table.
691  * @val: The raw value of these flags. Since these are a strict subset of
692  *       &struct pvr_page_table_l0_entry_raw; use that type for our member here.
693  *
694  * The flags stored in this type are: PM/FW Protect; SLC Bypass Control; Cache
695  * Coherency, and Read Only (bits 62, 3, 2 and 1 respectively).
696  *
697  * This type should never be instantiated directly; instead use
698  * pvr_page_flags_raw_create() to ensure only valid bits of @val are set.
699  */
700 struct pvr_page_flags_raw {
701 	struct pvr_page_table_l0_entry_raw val;
702 } __packed;
703 static_assert(sizeof(struct pvr_page_flags_raw) ==
704 	      sizeof(struct pvr_page_table_l0_entry_raw));
705 
706 static bool
707 pvr_page_table_l0_entry_raw_is_valid(struct pvr_page_table_l0_entry_raw entry)
708 {
709 	return PVR_PAGE_TABLE_FIELD_GET(0, PT, VALID, entry);
710 }
711 
712 /**
713  * pvr_page_table_l0_entry_raw_set() - Write a valid entry into a raw level 0
714  *                                     page table.
715  * @entry: Target raw level 0 page table entry.
716  * @dma_addr: DMA address of the physical page to be associated with @entry.
717  * @flags: Options to be set on @entry.
718  *
719  * When calling this function, @child_table_dma_addr must be a valid DMA
720  * address and a multiple of %PVR_DEVICE_PAGE_SIZE.
721  *
722  * The @flags parameter is directly assigned into @entry. It is the callers
723  * responsibility to ensure that only bits specified in
724  * &struct pvr_page_flags_raw are set in @flags.
725  */
726 static void
727 pvr_page_table_l0_entry_raw_set(struct pvr_page_table_l0_entry_raw *entry,
728 				dma_addr_t dma_addr,
729 				struct pvr_page_flags_raw flags)
730 {
731 	WRITE_ONCE(entry->val, PVR_PAGE_TABLE_FIELD_PREP(0, PT, VALID, true) |
732 			       PVR_PAGE_TABLE_FIELD_PREP(0, PT, ENTRY_PENDING, false) |
733 			       (dma_addr & ~ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK) |
734 			       flags.val.val);
735 }
736 
737 static void
738 pvr_page_table_l0_entry_raw_clear(struct pvr_page_table_l0_entry_raw *entry)
739 {
740 	WRITE_ONCE(entry->val, 0);
741 }
742 
743 /**
744  * pvr_page_flags_raw_create() - Initialize the flag bits of a raw level 0 page
745  *                               table entry.
746  * @read_only: This page is read-only (see: Read Only).
747  * @cache_coherent: This page does not require cache flushes (see: Cache
748  *                  Coherency).
749  * @slc_bypass: This page bypasses the device cache (see: SLC Bypass Control).
750  * @pm_fw_protect: This page is only for use by the firmware or Parameter
751  *                 Manager (see PM/FW Protect).
752  *
753  * For more details on the use of these four options, see their respective
754  * entries in the table under &struct pvr_page_table_l0_entry_raw.
755  *
756  * Return:
757  * A new &struct pvr_page_flags_raw instance which can be passed directly to
758  * pvr_page_table_l0_entry_raw_set() or pvr_page_table_l0_insert().
759  */
760 static struct pvr_page_flags_raw
761 pvr_page_flags_raw_create(bool read_only, bool cache_coherent, bool slc_bypass,
762 			  bool pm_fw_protect)
763 {
764 	struct pvr_page_flags_raw flags;
765 
766 	flags.val.val =
767 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, READ_ONLY, read_only) |
768 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, CC, cache_coherent) |
769 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, SLC_BYPASS_CTRL, slc_bypass) |
770 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, PM_META_PROTECT, pm_fw_protect);
771 
772 	return flags;
773 }
774 
775 /**
776  * struct pvr_page_table_l2_raw - The raw data of a level 2 page table.
777  *
778  * This type is a structure for type-checking purposes. At compile-time, its
779  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
780  */
781 struct pvr_page_table_l2_raw {
782 	/** @entries: The raw values of this table. */
783 	struct pvr_page_table_l2_entry_raw
784 		entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
785 } __packed;
786 static_assert(sizeof(struct pvr_page_table_l2_raw) == PVR_MMU_BACKING_PAGE_SIZE);
787 
788 /**
789  * struct pvr_page_table_l1_raw - The raw data of a level 1 page table.
790  *
791  * This type is a structure for type-checking purposes. At compile-time, its
792  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
793  */
794 struct pvr_page_table_l1_raw {
795 	/** @entries: The raw values of this table. */
796 	struct pvr_page_table_l1_entry_raw
797 		entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
798 } __packed;
799 static_assert(sizeof(struct pvr_page_table_l1_raw) == PVR_MMU_BACKING_PAGE_SIZE);
800 
801 /**
802  * struct pvr_page_table_l0_raw - The raw data of a level 0 page table.
803  *
804  * This type is a structure for type-checking purposes. At compile-time, its
805  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
806  *
807  * .. caution::
808  *
809  *    The size of level 0 page tables is variable depending on the page size
810  *    specified in the associated level 1 page table entry. Since the device
811  *    page size in use is pegged to the host page size, it cannot vary at
812  *    runtime. This structure is therefore only defined to contain the required
813  *    number of entries for the current device page size. **You should never
814  *    read or write beyond the last supported entry.**
815  */
816 struct pvr_page_table_l0_raw {
817 	/** @entries: The raw values of this table. */
818 	struct pvr_page_table_l0_entry_raw
819 		entries[ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X];
820 } __packed;
821 static_assert(sizeof(struct pvr_page_table_l0_raw) <= PVR_MMU_BACKING_PAGE_SIZE);
822 
823 /**
824  * DOC: Mirror page tables
825  */
826 
827 /*
828  * We pre-declare these types because they cross-depend on pointers to each
829  * other.
830  */
831 struct pvr_page_table_l1;
832 struct pvr_page_table_l0;
833 
834 /**
835  * struct pvr_page_table_l2 - A wrapped level 2 page table.
836  *
837  * To access the raw part of this table, use pvr_page_table_l2_get_raw().
838  * Alternatively to access a raw entry directly, use
839  * pvr_page_table_l2_get_entry_raw().
840  *
841  * A level 2 page table forms the root of the page table tree structure, so
842  * this type has no &parent or &parent_idx members.
843  */
844 struct pvr_page_table_l2 {
845 	/**
846 	 * @entries: The children of this node in the page table tree
847 	 * structure. These are also mirror tables. The indexing of this array
848 	 * is identical to that of the raw equivalent
849 	 * (&pvr_page_table_l1_raw.entries).
850 	 */
851 	struct pvr_page_table_l1 *entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
852 
853 	/**
854 	 * @backing_page: A handle to the memory which holds the raw
855 	 * equivalent of this table. **For internal use only.**
856 	 */
857 	struct pvr_mmu_backing_page backing_page;
858 
859 	/**
860 	 * @entry_count: The current number of valid entries (that we know of)
861 	 * in this table. This value is essentially a refcount - the table is
862 	 * destroyed when this value is decremented to zero by
863 	 * pvr_page_table_l2_remove().
864 	 */
865 	u16 entry_count;
866 };
867 
868 /**
869  * pvr_page_table_l2_init() - Initialize a level 2 page table.
870  * @table: Target level 2 page table.
871  * @pvr_dev: Target PowerVR device
872  *
873  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
874  * this function.
875  *
876  * Return:
877  *  * 0 on success, or
878  *  * Any error encountered while intializing &table->backing_page using
879  *    pvr_mmu_backing_page_init().
880  */
881 static int
882 pvr_page_table_l2_init(struct pvr_page_table_l2 *table,
883 		       struct pvr_device *pvr_dev)
884 {
885 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
886 }
887 
888 /**
889  * pvr_page_table_l2_fini() - Teardown a level 2 page table.
890  * @table: Target level 2 page table.
891  *
892  * It is an error to attempt to use @table after calling this function.
893  */
894 static void
895 pvr_page_table_l2_fini(struct pvr_page_table_l2 *table)
896 {
897 	pvr_mmu_backing_page_fini(&table->backing_page);
898 }
899 
900 /**
901  * pvr_page_table_l2_sync() - Flush a level 2 page table from the CPU to the
902  *                            device.
903  * @table: Target level 2 page table.
904  *
905  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
906  * warning there applies here too: **Only call pvr_page_table_l2_sync() once
907  * you're sure you have no more changes to make to** @table **in the immediate
908  * future.**
909  *
910  * If child level 1 page tables of @table also need to be flushed, this should
911  * be done first using pvr_page_table_l1_sync() *before* calling this function.
912  */
913 static void
914 pvr_page_table_l2_sync(struct pvr_page_table_l2 *table)
915 {
916 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_2_FLAGS);
917 }
918 
919 /**
920  * pvr_page_table_l2_get_raw() - Access the raw equivalent of a mirror level 2
921  *                               page table.
922  * @table: Target level 2 page table.
923  *
924  * Essentially returns the CPU address of the raw equivalent of @table, cast to
925  * a &struct pvr_page_table_l2_raw pointer.
926  *
927  * You probably want to call pvr_page_table_l2_get_entry_raw() instead.
928  *
929  * Return:
930  * The raw equivalent of @table.
931  */
932 static struct pvr_page_table_l2_raw *
933 pvr_page_table_l2_get_raw(struct pvr_page_table_l2 *table)
934 {
935 	return table->backing_page.host_ptr;
936 }
937 
938 /**
939  * pvr_page_table_l2_get_entry_raw() - Access an entry from the raw equivalent
940  *                                     of a mirror level 2 page table.
941  * @table: Target level 2 page table.
942  * @idx: Index of the entry to access.
943  *
944  * Technically this function returns a pointer to a slot in a raw level 2 page
945  * table, since the returned "entry" is not guaranteed to be valid. The caller
946  * must verify the validity of the entry at the returned address (perhaps using
947  * pvr_page_table_l2_entry_raw_is_valid()) before reading or overwriting it.
948  *
949  * The value of @idx is not checked here; it is the callers responsibility to
950  * ensure @idx refers to a valid index within @table before dereferencing the
951  * returned pointer.
952  *
953  * Return:
954  * A pointer to the requested raw level 2 page table entry.
955  */
956 static struct pvr_page_table_l2_entry_raw *
957 pvr_page_table_l2_get_entry_raw(struct pvr_page_table_l2 *table, u16 idx)
958 {
959 	return &pvr_page_table_l2_get_raw(table)->entries[idx];
960 }
961 
962 /**
963  * pvr_page_table_l2_entry_is_valid() - Check if a level 2 page table entry is
964  *                                      marked as valid.
965  * @table: Target level 2 page table.
966  * @idx: Index of the entry to check.
967  *
968  * The value of @idx is not checked here; it is the callers responsibility to
969  * ensure @idx refers to a valid index within @table before calling this
970  * function.
971  */
972 static bool
973 pvr_page_table_l2_entry_is_valid(struct pvr_page_table_l2 *table, u16 idx)
974 {
975 	struct pvr_page_table_l2_entry_raw entry_raw =
976 		*pvr_page_table_l2_get_entry_raw(table, idx);
977 
978 	return pvr_page_table_l2_entry_raw_is_valid(entry_raw);
979 }
980 
981 /**
982  * struct pvr_page_table_l1 - A wrapped level 1 page table.
983  *
984  * To access the raw part of this table, use pvr_page_table_l1_get_raw().
985  * Alternatively to access a raw entry directly, use
986  * pvr_page_table_l1_get_entry_raw().
987  */
988 struct pvr_page_table_l1 {
989 	/**
990 	 * @entries: The children of this node in the page table tree
991 	 * structure. These are also mirror tables. The indexing of this array
992 	 * is identical to that of the raw equivalent
993 	 * (&pvr_page_table_l0_raw.entries).
994 	 */
995 	struct pvr_page_table_l0 *entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
996 
997 	/**
998 	 * @backing_page: A handle to the memory which holds the raw
999 	 * equivalent of this table. **For internal use only.**
1000 	 */
1001 	struct pvr_mmu_backing_page backing_page;
1002 
1003 	union {
1004 		/**
1005 		 * @parent: The parent of this node in the page table tree structure.
1006 		 *
1007 		 * This is also a mirror table.
1008 		 *
1009 		 * Only valid when the L1 page table is active. When the L1 page table
1010 		 * has been removed and queued for destruction, the next_free field
1011 		 * should be used instead.
1012 		 */
1013 		struct pvr_page_table_l2 *parent;
1014 
1015 		/**
1016 		 * @next_free: Pointer to the next L1 page table to take/free.
1017 		 *
1018 		 * Used to form a linked list of L1 page tables. This is used
1019 		 * when preallocating tables and when the page table has been
1020 		 * removed and queued for destruction.
1021 		 */
1022 		struct pvr_page_table_l1 *next_free;
1023 	};
1024 
1025 	/**
1026 	 * @parent_idx: The index of the entry in the parent table (see
1027 	 * @parent) which corresponds to this table.
1028 	 */
1029 	u16 parent_idx;
1030 
1031 	/**
1032 	 * @entry_count: The current number of valid entries (that we know of)
1033 	 * in this table. This value is essentially a refcount - the table is
1034 	 * destroyed when this value is decremented to zero by
1035 	 * pvr_page_table_l1_remove().
1036 	 */
1037 	u16 entry_count;
1038 };
1039 
1040 /**
1041  * pvr_page_table_l1_init() - Initialize a level 1 page table.
1042  * @table: Target level 1 page table.
1043  * @pvr_dev: Target PowerVR device
1044  *
1045  * When this function returns successfully, @table is still not considered
1046  * valid. It must be inserted into the page table tree structure with
1047  * pvr_page_table_l2_insert() before it is ready for use.
1048  *
1049  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
1050  * this function.
1051  *
1052  * Return:
1053  *  * 0 on success, or
1054  *  * Any error encountered while intializing &table->backing_page using
1055  *    pvr_mmu_backing_page_init().
1056  */
1057 static int
1058 pvr_page_table_l1_init(struct pvr_page_table_l1 *table,
1059 		       struct pvr_device *pvr_dev)
1060 {
1061 	table->parent_idx = PVR_IDX_INVALID;
1062 
1063 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
1064 }
1065 
1066 /**
1067  * pvr_page_table_l1_free() - Teardown a level 1 page table.
1068  * @table: Target level 1 page table.
1069  *
1070  * It is an error to attempt to use @table after calling this function, even
1071  * indirectly. This includes calling pvr_page_table_l2_remove(), which must
1072  * be called *before* pvr_page_table_l1_free().
1073  */
1074 static void
1075 pvr_page_table_l1_free(struct pvr_page_table_l1 *table)
1076 {
1077 	pvr_mmu_backing_page_fini(&table->backing_page);
1078 	kfree(table);
1079 }
1080 
1081 /**
1082  * pvr_page_table_l1_sync() - Flush a level 1 page table from the CPU to the
1083  *                            device.
1084  * @table: Target level 1 page table.
1085  *
1086  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
1087  * warning there applies here too: **Only call pvr_page_table_l1_sync() once
1088  * you're sure you have no more changes to make to** @table **in the immediate
1089  * future.**
1090  *
1091  * If child level 0 page tables of @table also need to be flushed, this should
1092  * be done first using pvr_page_table_l0_sync() *before* calling this function.
1093  */
1094 static void
1095 pvr_page_table_l1_sync(struct pvr_page_table_l1 *table)
1096 {
1097 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_1_FLAGS);
1098 }
1099 
1100 /**
1101  * pvr_page_table_l1_get_raw() - Access the raw equivalent of a mirror level 1
1102  *                               page table.
1103  * @table: Target level 1 page table.
1104  *
1105  * Essentially returns the CPU address of the raw equivalent of @table, cast to
1106  * a &struct pvr_page_table_l1_raw pointer.
1107  *
1108  * You probably want to call pvr_page_table_l1_get_entry_raw() instead.
1109  *
1110  * Return:
1111  * The raw equivalent of @table.
1112  */
1113 static struct pvr_page_table_l1_raw *
1114 pvr_page_table_l1_get_raw(struct pvr_page_table_l1 *table)
1115 {
1116 	return table->backing_page.host_ptr;
1117 }
1118 
1119 /**
1120  * pvr_page_table_l1_get_entry_raw() - Access an entry from the raw equivalent
1121  *                                     of a mirror level 1 page table.
1122  * @table: Target level 1 page table.
1123  * @idx: Index of the entry to access.
1124  *
1125  * Technically this function returns a pointer to a slot in a raw level 1 page
1126  * table, since the returned "entry" is not guaranteed to be valid. The caller
1127  * must verify the validity of the entry at the returned address (perhaps using
1128  * pvr_page_table_l1_entry_raw_is_valid()) before reading or overwriting it.
1129  *
1130  * The value of @idx is not checked here; it is the callers responsibility to
1131  * ensure @idx refers to a valid index within @table before dereferencing the
1132  * returned pointer.
1133  *
1134  * Return:
1135  * A pointer to the requested raw level 1 page table entry.
1136  */
1137 static struct pvr_page_table_l1_entry_raw *
1138 pvr_page_table_l1_get_entry_raw(struct pvr_page_table_l1 *table, u16 idx)
1139 {
1140 	return &pvr_page_table_l1_get_raw(table)->entries[idx];
1141 }
1142 
1143 /**
1144  * pvr_page_table_l1_entry_is_valid() - Check if a level 1 page table entry is
1145  *                                      marked as valid.
1146  * @table: Target level 1 page table.
1147  * @idx: Index of the entry to check.
1148  *
1149  * The value of @idx is not checked here; it is the callers responsibility to
1150  * ensure @idx refers to a valid index within @table before calling this
1151  * function.
1152  */
1153 static bool
1154 pvr_page_table_l1_entry_is_valid(struct pvr_page_table_l1 *table, u16 idx)
1155 {
1156 	struct pvr_page_table_l1_entry_raw entry_raw =
1157 		*pvr_page_table_l1_get_entry_raw(table, idx);
1158 
1159 	return pvr_page_table_l1_entry_raw_is_valid(entry_raw);
1160 }
1161 
1162 /**
1163  * struct pvr_page_table_l0 - A wrapped level 0 page table.
1164  *
1165  * To access the raw part of this table, use pvr_page_table_l0_get_raw().
1166  * Alternatively to access a raw entry directly, use
1167  * pvr_page_table_l0_get_entry_raw().
1168  *
1169  * There is no mirror representation of an individual page, so this type has no
1170  * &entries member.
1171  */
1172 struct pvr_page_table_l0 {
1173 	/**
1174 	 * @backing_page: A handle to the memory which holds the raw
1175 	 * equivalent of this table. **For internal use only.**
1176 	 */
1177 	struct pvr_mmu_backing_page backing_page;
1178 
1179 	union {
1180 		/**
1181 		 * @parent: The parent of this node in the page table tree structure.
1182 		 *
1183 		 * This is also a mirror table.
1184 		 *
1185 		 * Only valid when the L0 page table is active. When the L0 page table
1186 		 * has been removed and queued for destruction, the next_free field
1187 		 * should be used instead.
1188 		 */
1189 		struct pvr_page_table_l1 *parent;
1190 
1191 		/**
1192 		 * @next_free: Pointer to the next L0 page table to take/free.
1193 		 *
1194 		 * Used to form a linked list of L0 page tables. This is used
1195 		 * when preallocating tables and when the page table has been
1196 		 * removed and queued for destruction.
1197 		 */
1198 		struct pvr_page_table_l0 *next_free;
1199 	};
1200 
1201 	/**
1202 	 * @parent_idx: The index of the entry in the parent table (see
1203 	 * @parent) which corresponds to this table.
1204 	 */
1205 	u16 parent_idx;
1206 
1207 	/**
1208 	 * @entry_count: The current number of valid entries (that we know of)
1209 	 * in this table. This value is essentially a refcount - the table is
1210 	 * destroyed when this value is decremented to zero by
1211 	 * pvr_page_table_l0_remove().
1212 	 */
1213 	u16 entry_count;
1214 };
1215 
1216 /**
1217  * pvr_page_table_l0_init() - Initialize a level 0 page table.
1218  * @table: Target level 0 page table.
1219  * @pvr_dev: Target PowerVR device
1220  *
1221  * When this function returns successfully, @table is still not considered
1222  * valid. It must be inserted into the page table tree structure with
1223  * pvr_page_table_l1_insert() before it is ready for use.
1224  *
1225  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
1226  * this function.
1227  *
1228  * Return:
1229  *  * 0 on success, or
1230  *  * Any error encountered while intializing &table->backing_page using
1231  *    pvr_mmu_backing_page_init().
1232  */
1233 static int
1234 pvr_page_table_l0_init(struct pvr_page_table_l0 *table,
1235 		       struct pvr_device *pvr_dev)
1236 {
1237 	table->parent_idx = PVR_IDX_INVALID;
1238 
1239 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
1240 }
1241 
1242 /**
1243  * pvr_page_table_l0_free() - Teardown a level 0 page table.
1244  * @table: Target level 0 page table.
1245  *
1246  * It is an error to attempt to use @table after calling this function, even
1247  * indirectly. This includes calling pvr_page_table_l1_remove(), which must
1248  * be called *before* pvr_page_table_l0_free().
1249  */
1250 static void
1251 pvr_page_table_l0_free(struct pvr_page_table_l0 *table)
1252 {
1253 	pvr_mmu_backing_page_fini(&table->backing_page);
1254 	kfree(table);
1255 }
1256 
1257 /**
1258  * pvr_page_table_l0_sync() - Flush a level 0 page table from the CPU to the
1259  *                            device.
1260  * @table: Target level 0 page table.
1261  *
1262  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
1263  * warning there applies here too: **Only call pvr_page_table_l0_sync() once
1264  * you're sure you have no more changes to make to** @table **in the immediate
1265  * future.**
1266  *
1267  * If child pages of @table also need to be flushed, this should be done first
1268  * using a DMA sync function (e.g. dma_sync_sg_for_device()) *before* calling
1269  * this function.
1270  */
1271 static void
1272 pvr_page_table_l0_sync(struct pvr_page_table_l0 *table)
1273 {
1274 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_0_FLAGS);
1275 }
1276 
1277 /**
1278  * pvr_page_table_l0_get_raw() - Access the raw equivalent of a mirror level 0
1279  *                               page table.
1280  * @table: Target level 0 page table.
1281  *
1282  * Essentially returns the CPU address of the raw equivalent of @table, cast to
1283  * a &struct pvr_page_table_l0_raw pointer.
1284  *
1285  * You probably want to call pvr_page_table_l0_get_entry_raw() instead.
1286  *
1287  * Return:
1288  * The raw equivalent of @table.
1289  */
1290 static struct pvr_page_table_l0_raw *
1291 pvr_page_table_l0_get_raw(struct pvr_page_table_l0 *table)
1292 {
1293 	return table->backing_page.host_ptr;
1294 }
1295 
1296 /**
1297  * pvr_page_table_l0_get_entry_raw() - Access an entry from the raw equivalent
1298  *                                     of a mirror level 0 page table.
1299  * @table: Target level 0 page table.
1300  * @idx: Index of the entry to access.
1301  *
1302  * Technically this function returns a pointer to a slot in a raw level 0 page
1303  * table, since the returned "entry" is not guaranteed to be valid. The caller
1304  * must verify the validity of the entry at the returned address (perhaps using
1305  * pvr_page_table_l0_entry_raw_is_valid()) before reading or overwriting it.
1306  *
1307  * The value of @idx is not checked here; it is the callers responsibility to
1308  * ensure @idx refers to a valid index within @table before dereferencing the
1309  * returned pointer. This is espcially important for level 0 page tables, which
1310  * can have a variable number of entries.
1311  *
1312  * Return:
1313  * A pointer to the requested raw level 0 page table entry.
1314  */
1315 static struct pvr_page_table_l0_entry_raw *
1316 pvr_page_table_l0_get_entry_raw(struct pvr_page_table_l0 *table, u16 idx)
1317 {
1318 	return &pvr_page_table_l0_get_raw(table)->entries[idx];
1319 }
1320 
1321 /**
1322  * pvr_page_table_l0_entry_is_valid() - Check if a level 0 page table entry is
1323  *                                      marked as valid.
1324  * @table: Target level 0 page table.
1325  * @idx: Index of the entry to check.
1326  *
1327  * The value of @idx is not checked here; it is the callers responsibility to
1328  * ensure @idx refers to a valid index within @table before calling this
1329  * function.
1330  */
1331 static bool
1332 pvr_page_table_l0_entry_is_valid(struct pvr_page_table_l0 *table, u16 idx)
1333 {
1334 	struct pvr_page_table_l0_entry_raw entry_raw =
1335 		*pvr_page_table_l0_get_entry_raw(table, idx);
1336 
1337 	return pvr_page_table_l0_entry_raw_is_valid(entry_raw);
1338 }
1339 
1340 /**
1341  * struct pvr_mmu_context - context holding data for operations at page
1342  * catalogue level, intended for use with a VM context.
1343  */
1344 struct pvr_mmu_context {
1345 	/** @pvr_dev: The PVR device associated with the owning VM context. */
1346 	struct pvr_device *pvr_dev;
1347 
1348 	/** @page_table_l2: The MMU table root. */
1349 	struct pvr_page_table_l2 page_table_l2;
1350 };
1351 
1352 /**
1353  * struct pvr_page_table_ptr - A reference to a single physical page as indexed
1354  * by the page table structure.
1355  *
1356  * Intended for embedding in a &struct pvr_mmu_op_context.
1357  */
1358 struct pvr_page_table_ptr {
1359 	/**
1360 	 * @l1_table: A cached handle to the level 1 page table the
1361 	 * context is currently traversing.
1362 	 */
1363 	struct pvr_page_table_l1 *l1_table;
1364 
1365 	/**
1366 	 * @l0_table: A cached handle to the level 0 page table the
1367 	 * context is currently traversing.
1368 	 */
1369 	struct pvr_page_table_l0 *l0_table;
1370 
1371 	/**
1372 	 * @l2_idx: Index into the level 2 page table the context is
1373 	 * currently referencing.
1374 	 */
1375 	u16 l2_idx;
1376 
1377 	/**
1378 	 * @l1_idx: Index into the level 1 page table the context is
1379 	 * currently referencing.
1380 	 */
1381 	u16 l1_idx;
1382 
1383 	/**
1384 	 * @l0_idx: Index into the level 0 page table the context is
1385 	 * currently referencing.
1386 	 */
1387 	u16 l0_idx;
1388 };
1389 
1390 /**
1391  * struct pvr_mmu_op_context - context holding data for individual
1392  * device-virtual mapping operations. Intended for use with a VM bind operation.
1393  */
1394 struct pvr_mmu_op_context {
1395 	/** @mmu_ctx: The MMU context associated with the owning VM context. */
1396 	struct pvr_mmu_context *mmu_ctx;
1397 
1398 	/** @map: Data specifically for map operations. */
1399 	struct {
1400 		/**
1401 		 * @sgt: Scatter gather table containing pages pinned for use by
1402 		 * this context - these are currently pinned when initialising
1403 		 * the VM bind operation.
1404 		 */
1405 		struct sg_table *sgt;
1406 
1407 		/** @sgt_offset: Start address of the device-virtual mapping. */
1408 		u64 sgt_offset;
1409 
1410 		/**
1411 		 * @l1_prealloc_tables: Preallocated l1 page table objects
1412 		 * use by this context when creating a page mapping. Linked list
1413 		 * fully created during initialisation.
1414 		 */
1415 		struct pvr_page_table_l1 *l1_prealloc_tables;
1416 
1417 		/**
1418 		 * @l0_prealloc_tables: Preallocated l0 page table objects
1419 		 * use by this context when creating a page mapping. Linked list
1420 		 * fully created during initialisation.
1421 		 */
1422 		struct pvr_page_table_l0 *l0_prealloc_tables;
1423 	} map;
1424 
1425 	/** @unmap: Data specifically for unmap operations. */
1426 	struct {
1427 		/**
1428 		 * @l1_free_tables: Collects page table objects freed by unmap
1429 		 * ops. Linked list empty at creation.
1430 		 */
1431 		struct pvr_page_table_l1 *l1_free_tables;
1432 
1433 		/**
1434 		 * @l0_free_tables: Collects page table objects freed by unmap
1435 		 * ops. Linked list empty at creation.
1436 		 */
1437 		struct pvr_page_table_l0 *l0_free_tables;
1438 	} unmap;
1439 
1440 	/**
1441 	 * @curr_page: A reference to a single physical page as indexed by the
1442 	 * page table structure.
1443 	 */
1444 	struct pvr_page_table_ptr curr_page;
1445 
1446 	/**
1447 	 * @sync_level_required: The maximum level of the page table tree
1448 	 * structure which has (possibly) been modified since it was last
1449 	 * flushed to the device.
1450 	 *
1451 	 * This field should only be set with pvr_mmu_op_context_require_sync()
1452 	 * or indirectly by pvr_mmu_op_context_sync_partial().
1453 	 */
1454 	enum pvr_mmu_sync_level sync_level_required;
1455 };
1456 
1457 /**
1458  * pvr_page_table_l2_insert() - Insert an entry referring to a level 1 page
1459  * table into a level 2 page table.
1460  * @op_ctx: Target MMU op context pointing at the entry to insert the L1 page
1461  * table into.
1462  * @child_table: Target level 1 page table to be referenced by the new entry.
1463  *
1464  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1465  * valid L2 entry.
1466  *
1467  * It is the caller's responsibility to execute any memory barries to ensure
1468  * that the creation of @child_table is ordered before the L2 entry is inserted.
1469  */
1470 static void
1471 pvr_page_table_l2_insert(struct pvr_mmu_op_context *op_ctx,
1472 			 struct pvr_page_table_l1 *child_table)
1473 {
1474 	struct pvr_page_table_l2 *l2_table =
1475 		&op_ctx->mmu_ctx->page_table_l2;
1476 	struct pvr_page_table_l2_entry_raw *entry_raw =
1477 		pvr_page_table_l2_get_entry_raw(l2_table,
1478 						op_ctx->curr_page.l2_idx);
1479 
1480 	pvr_page_table_l2_entry_raw_set(entry_raw,
1481 					child_table->backing_page.dma_addr);
1482 
1483 	child_table->parent = l2_table;
1484 	child_table->parent_idx = op_ctx->curr_page.l2_idx;
1485 	l2_table->entries[op_ctx->curr_page.l2_idx] = child_table;
1486 	++l2_table->entry_count;
1487 	op_ctx->curr_page.l1_table = child_table;
1488 }
1489 
1490 /**
1491  * pvr_page_table_l2_remove() - Remove a level 1 page table from a level 2 page
1492  * table.
1493  * @op_ctx: Target MMU op context pointing at the L2 entry to remove.
1494  *
1495  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1496  * valid L2 entry.
1497  */
1498 static void
1499 pvr_page_table_l2_remove(struct pvr_mmu_op_context *op_ctx)
1500 {
1501 	struct pvr_page_table_l2 *l2_table =
1502 		&op_ctx->mmu_ctx->page_table_l2;
1503 	struct pvr_page_table_l2_entry_raw *entry_raw =
1504 		pvr_page_table_l2_get_entry_raw(l2_table,
1505 						op_ctx->curr_page.l1_table->parent_idx);
1506 
1507 	WARN_ON(op_ctx->curr_page.l1_table->parent != l2_table);
1508 
1509 	pvr_page_table_l2_entry_raw_clear(entry_raw);
1510 
1511 	l2_table->entries[op_ctx->curr_page.l1_table->parent_idx] = NULL;
1512 	op_ctx->curr_page.l1_table->parent_idx = PVR_IDX_INVALID;
1513 	op_ctx->curr_page.l1_table->next_free = op_ctx->unmap.l1_free_tables;
1514 	op_ctx->unmap.l1_free_tables = op_ctx->curr_page.l1_table;
1515 	op_ctx->curr_page.l1_table = NULL;
1516 
1517 	--l2_table->entry_count;
1518 }
1519 
1520 /**
1521  * pvr_page_table_l1_insert() - Insert an entry referring to a level 0 page
1522  * table into a level 1 page table.
1523  * @op_ctx: Target MMU op context pointing at the entry to insert the L0 page
1524  * table into.
1525  * @child_table: L0 page table to insert.
1526  *
1527  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1528  * valid L1 entry.
1529  *
1530  * It is the caller's responsibility to execute any memory barries to ensure
1531  * that the creation of @child_table is ordered before the L1 entry is inserted.
1532  */
1533 static void
1534 pvr_page_table_l1_insert(struct pvr_mmu_op_context *op_ctx,
1535 			 struct pvr_page_table_l0 *child_table)
1536 {
1537 	struct pvr_page_table_l1_entry_raw *entry_raw =
1538 		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l1_table,
1539 						op_ctx->curr_page.l1_idx);
1540 
1541 	pvr_page_table_l1_entry_raw_set(entry_raw,
1542 					child_table->backing_page.dma_addr);
1543 
1544 	child_table->parent = op_ctx->curr_page.l1_table;
1545 	child_table->parent_idx = op_ctx->curr_page.l1_idx;
1546 	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx] = child_table;
1547 	++op_ctx->curr_page.l1_table->entry_count;
1548 	op_ctx->curr_page.l0_table = child_table;
1549 }
1550 
1551 /**
1552  * pvr_page_table_l1_remove() - Remove a level 0 page table from a level 1 page
1553  *                              table.
1554  * @op_ctx: Target MMU op context pointing at the L1 entry to remove.
1555  *
1556  * If this function results in the L1 table becoming empty, it will be removed
1557  * from its parent level 2 page table and destroyed.
1558  *
1559  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1560  * valid L1 entry.
1561  */
1562 static void
1563 pvr_page_table_l1_remove(struct pvr_mmu_op_context *op_ctx)
1564 {
1565 	struct pvr_page_table_l1_entry_raw *entry_raw =
1566 		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l0_table->parent,
1567 						op_ctx->curr_page.l0_table->parent_idx);
1568 
1569 	WARN_ON(op_ctx->curr_page.l0_table->parent !=
1570 		op_ctx->curr_page.l1_table);
1571 
1572 	pvr_page_table_l1_entry_raw_clear(entry_raw);
1573 
1574 	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l0_table->parent_idx] = NULL;
1575 	op_ctx->curr_page.l0_table->parent_idx = PVR_IDX_INVALID;
1576 	op_ctx->curr_page.l0_table->next_free = op_ctx->unmap.l0_free_tables;
1577 	op_ctx->unmap.l0_free_tables = op_ctx->curr_page.l0_table;
1578 	op_ctx->curr_page.l0_table = NULL;
1579 
1580 	if (--op_ctx->curr_page.l1_table->entry_count == 0) {
1581 		/* Clear the parent L2 page table entry. */
1582 		if (op_ctx->curr_page.l1_table->parent_idx != PVR_IDX_INVALID)
1583 			pvr_page_table_l2_remove(op_ctx);
1584 	}
1585 }
1586 
1587 /**
1588  * pvr_page_table_l0_insert() - Insert an entry referring to a physical page
1589  * into a level 0 page table.
1590  * @op_ctx: Target MMU op context pointing at the L0 entry to insert.
1591  * @dma_addr: Target DMA address to be referenced by the new entry.
1592  * @flags: Page options to be stored in the new entry.
1593  *
1594  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1595  * valid L0 entry.
1596  */
1597 static void
1598 pvr_page_table_l0_insert(struct pvr_mmu_op_context *op_ctx,
1599 			 dma_addr_t dma_addr, struct pvr_page_flags_raw flags)
1600 {
1601 	struct pvr_page_table_l0_entry_raw *entry_raw =
1602 		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
1603 						op_ctx->curr_page.l0_idx);
1604 
1605 	pvr_page_table_l0_entry_raw_set(entry_raw, dma_addr, flags);
1606 
1607 	/*
1608 	 * There is no entry to set here - we don't keep a mirror of
1609 	 * individual pages.
1610 	 */
1611 
1612 	++op_ctx->curr_page.l0_table->entry_count;
1613 }
1614 
1615 /**
1616  * pvr_page_table_l0_remove() - Remove a physical page from a level 0 page
1617  * table.
1618  * @op_ctx: Target MMU op context pointing at the L0 entry to remove.
1619  *
1620  * If this function results in the L0 table becoming empty, it will be removed
1621  * from its parent L1 page table and destroyed.
1622  *
1623  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1624  * valid L0 entry.
1625  */
1626 static void
1627 pvr_page_table_l0_remove(struct pvr_mmu_op_context *op_ctx)
1628 {
1629 	struct pvr_page_table_l0_entry_raw *entry_raw =
1630 		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
1631 						op_ctx->curr_page.l0_idx);
1632 
1633 	pvr_page_table_l0_entry_raw_clear(entry_raw);
1634 
1635 	/*
1636 	 * There is no entry to clear here - we don't keep a mirror of
1637 	 * individual pages.
1638 	 */
1639 
1640 	if (--op_ctx->curr_page.l0_table->entry_count == 0) {
1641 		/* Clear the parent L1 page table entry. */
1642 		if (op_ctx->curr_page.l0_table->parent_idx != PVR_IDX_INVALID)
1643 			pvr_page_table_l1_remove(op_ctx);
1644 	}
1645 }
1646 
1647 /**
1648  * DOC: Page table index utilities
1649  */
1650 
1651 /**
1652  * pvr_page_table_l2_idx() - Calculate the level 2 page table index for a
1653  *                           device-virtual address.
1654  * @device_addr: Target device-virtual address.
1655  *
1656  * This function does not perform any bounds checking - it is the caller's
1657  * responsibility to ensure that @device_addr is valid before interpreting
1658  * the result.
1659  *
1660  * Return:
1661  * The index into a level 2 page table corresponding to @device_addr.
1662  */
1663 static u16
1664 pvr_page_table_l2_idx(u64 device_addr)
1665 {
1666 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PC_INDEX_CLRMSK) >>
1667 	       ROGUE_MMUCTRL_VADDR_PC_INDEX_SHIFT;
1668 }
1669 
1670 /**
1671  * pvr_page_table_l1_idx() - Calculate the level 1 page table index for a
1672  *                           device-virtual address.
1673  * @device_addr: Target device-virtual address.
1674  *
1675  * This function does not perform any bounds checking - it is the caller's
1676  * responsibility to ensure that @device_addr is valid before interpreting
1677  * the result.
1678  *
1679  * Return:
1680  * The index into a level 1 page table corresponding to @device_addr.
1681  */
1682 static u16
1683 pvr_page_table_l1_idx(u64 device_addr)
1684 {
1685 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PD_INDEX_CLRMSK) >>
1686 	       ROGUE_MMUCTRL_VADDR_PD_INDEX_SHIFT;
1687 }
1688 
1689 /**
1690  * pvr_page_table_l0_idx() - Calculate the level 0 page table index for a
1691  *                           device-virtual address.
1692  * @device_addr: Target device-virtual address.
1693  *
1694  * This function does not perform any bounds checking - it is the caller's
1695  * responsibility to ensure that @device_addr is valid before interpreting
1696  * the result.
1697  *
1698  * Return:
1699  * The index into a level 0 page table corresponding to @device_addr.
1700  */
1701 static u16
1702 pvr_page_table_l0_idx(u64 device_addr)
1703 {
1704 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PT_INDEX_CLRMSK) >>
1705 	       ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT;
1706 }
1707 
1708 /**
1709  * DOC: High-level page table operations
1710  */
1711 
1712 /**
1713  * pvr_page_table_l1_get_or_insert() - Retrieves (optionally inserting if
1714  * necessary) a level 1 page table from the specified level 2 page table entry.
1715  * @op_ctx: Target MMU op context.
1716  * @should_insert: [IN] Specifies whether new page tables should be inserted
1717  * when empty page table entries are encountered during traversal.
1718  *
1719  * Return:
1720  *  * 0 on success, or
1721  *
1722  *    If @should_insert is %false:
1723  *     * -%ENXIO if a level 1 page table would have been inserted.
1724  *
1725  *    If @should_insert is %true:
1726  *     * Any error encountered while inserting the level 1 page table.
1727  */
1728 static int
1729 pvr_page_table_l1_get_or_insert(struct pvr_mmu_op_context *op_ctx,
1730 				bool should_insert)
1731 {
1732 	struct pvr_page_table_l2 *l2_table =
1733 		&op_ctx->mmu_ctx->page_table_l2;
1734 	struct pvr_page_table_l1 *table;
1735 
1736 	if (pvr_page_table_l2_entry_is_valid(l2_table,
1737 					     op_ctx->curr_page.l2_idx)) {
1738 		op_ctx->curr_page.l1_table =
1739 			l2_table->entries[op_ctx->curr_page.l2_idx];
1740 		return 0;
1741 	}
1742 
1743 	if (!should_insert)
1744 		return -ENXIO;
1745 
1746 	/* Take a prealloced table. */
1747 	table = op_ctx->map.l1_prealloc_tables;
1748 	if (!table)
1749 		return -ENOMEM;
1750 
1751 	/* Pop */
1752 	op_ctx->map.l1_prealloc_tables = table->next_free;
1753 	table->next_free = NULL;
1754 
1755 	/* Ensure new table is fully written out before adding to L2 page table. */
1756 	wmb();
1757 
1758 	pvr_page_table_l2_insert(op_ctx, table);
1759 
1760 	return 0;
1761 }
1762 
1763 /**
1764  * pvr_page_table_l0_get_or_insert() - Retrieves (optionally inserting if
1765  * necessary) a level 0 page table from the specified level 1 page table entry.
1766  * @op_ctx: Target MMU op context.
1767  * @should_insert: [IN] Specifies whether new page tables should be inserted
1768  * when empty page table entries are encountered during traversal.
1769  *
1770  * Return:
1771  *  * 0 on success,
1772  *
1773  *    If @should_insert is %false:
1774  *     * -%ENXIO if a level 0 page table would have been inserted.
1775  *
1776  *    If @should_insert is %true:
1777  *     * Any error encountered while inserting the level 0 page table.
1778  */
1779 static int
1780 pvr_page_table_l0_get_or_insert(struct pvr_mmu_op_context *op_ctx,
1781 				bool should_insert)
1782 {
1783 	struct pvr_page_table_l0 *table;
1784 
1785 	if (pvr_page_table_l1_entry_is_valid(op_ctx->curr_page.l1_table,
1786 					     op_ctx->curr_page.l1_idx)) {
1787 		op_ctx->curr_page.l0_table =
1788 			op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx];
1789 		return 0;
1790 	}
1791 
1792 	if (!should_insert)
1793 		return -ENXIO;
1794 
1795 	/* Take a prealloced table. */
1796 	table = op_ctx->map.l0_prealloc_tables;
1797 	if (!table)
1798 		return -ENOMEM;
1799 
1800 	/* Pop */
1801 	op_ctx->map.l0_prealloc_tables = table->next_free;
1802 	table->next_free = NULL;
1803 
1804 	/* Ensure new table is fully written out before adding to L1 page table. */
1805 	wmb();
1806 
1807 	pvr_page_table_l1_insert(op_ctx, table);
1808 
1809 	return 0;
1810 }
1811 
1812 /**
1813  * pvr_mmu_context_create() - Create an MMU context.
1814  * @pvr_dev: PVR device associated with owning VM context.
1815  *
1816  * Returns:
1817  *  * Newly created MMU context object on success, or
1818  *  * -%ENOMEM if no memory is available,
1819  *  * Any error code returned by pvr_page_table_l2_init().
1820  */
1821 struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev)
1822 {
1823 	struct pvr_mmu_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1824 	int err;
1825 
1826 	if (!ctx)
1827 		return ERR_PTR(-ENOMEM);
1828 
1829 	err = pvr_page_table_l2_init(&ctx->page_table_l2, pvr_dev);
1830 	if (err)
1831 		return ERR_PTR(err);
1832 
1833 	ctx->pvr_dev = pvr_dev;
1834 
1835 	return ctx;
1836 }
1837 
1838 /**
1839  * pvr_mmu_context_destroy() - Destroy an MMU context.
1840  * @ctx: Target MMU context.
1841  */
1842 void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx)
1843 {
1844 	pvr_page_table_l2_fini(&ctx->page_table_l2);
1845 	kfree(ctx);
1846 }
1847 
1848 /**
1849  * pvr_mmu_get_root_table_dma_addr() - Get the DMA address of the root of the
1850  * page table structure behind a VM context.
1851  * @ctx: Target MMU context.
1852  */
1853 dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx)
1854 {
1855 	return ctx->page_table_l2.backing_page.dma_addr;
1856 }
1857 
1858 /**
1859  * pvr_page_table_l1_alloc() - Allocate a l1 page_table object.
1860  * @ctx: MMU context of owning VM context.
1861  *
1862  * Returns:
1863  *  * Newly created page table object on success, or
1864  *  * -%ENOMEM if no memory is available,
1865  *  * Any error code returned by pvr_page_table_l1_init().
1866  */
1867 static struct pvr_page_table_l1 *
1868 pvr_page_table_l1_alloc(struct pvr_mmu_context *ctx)
1869 {
1870 	int err;
1871 
1872 	struct pvr_page_table_l1 *table =
1873 		kzalloc(sizeof(*table), GFP_KERNEL);
1874 
1875 	if (!table)
1876 		return ERR_PTR(-ENOMEM);
1877 
1878 	err = pvr_page_table_l1_init(table, ctx->pvr_dev);
1879 	if (err) {
1880 		kfree(table);
1881 		return ERR_PTR(err);
1882 	}
1883 
1884 	return table;
1885 }
1886 
1887 /**
1888  * pvr_page_table_l0_alloc() - Allocate a l0 page_table object.
1889  * @ctx: MMU context of owning VM context.
1890  *
1891  * Returns:
1892  *  * Newly created page table object on success, or
1893  *  * -%ENOMEM if no memory is available,
1894  *  * Any error code returned by pvr_page_table_l0_init().
1895  */
1896 static struct pvr_page_table_l0 *
1897 pvr_page_table_l0_alloc(struct pvr_mmu_context *ctx)
1898 {
1899 	int err;
1900 
1901 	struct pvr_page_table_l0 *table =
1902 		kzalloc(sizeof(*table), GFP_KERNEL);
1903 
1904 	if (!table)
1905 		return ERR_PTR(-ENOMEM);
1906 
1907 	err = pvr_page_table_l0_init(table, ctx->pvr_dev);
1908 	if (err) {
1909 		kfree(table);
1910 		return ERR_PTR(err);
1911 	}
1912 
1913 	return table;
1914 }
1915 
1916 /**
1917  * pvr_mmu_op_context_require_sync() - Mark an MMU op context as requiring a
1918  * sync operation for the referenced page tables up to a specified level.
1919  * @op_ctx: Target MMU op context.
1920  * @level: Maximum page table level for which a sync is required.
1921  */
1922 static void
1923 pvr_mmu_op_context_require_sync(struct pvr_mmu_op_context *op_ctx,
1924 				enum pvr_mmu_sync_level level)
1925 {
1926 	if (op_ctx->sync_level_required < level)
1927 		op_ctx->sync_level_required = level;
1928 }
1929 
1930 /**
1931  * pvr_mmu_op_context_sync_manual() - Trigger a sync of some or all of the
1932  * page tables referenced by a MMU op context.
1933  * @op_ctx: Target MMU op context.
1934  * @level: Maximum page table level to sync.
1935  *
1936  * Do not call this function directly. Instead use
1937  * pvr_mmu_op_context_sync_partial() which is checked against the current
1938  * value of &op_ctx->sync_level_required as set by
1939  * pvr_mmu_op_context_require_sync().
1940  */
1941 static void
1942 pvr_mmu_op_context_sync_manual(struct pvr_mmu_op_context *op_ctx,
1943 			       enum pvr_mmu_sync_level level)
1944 {
1945 	/*
1946 	 * We sync the page table levels in ascending order (starting from the
1947 	 * leaf node) to ensure consistency.
1948 	 */
1949 
1950 	WARN_ON(level < PVR_MMU_SYNC_LEVEL_NONE);
1951 
1952 	if (level <= PVR_MMU_SYNC_LEVEL_NONE)
1953 		return;
1954 
1955 	if (op_ctx->curr_page.l0_table)
1956 		pvr_page_table_l0_sync(op_ctx->curr_page.l0_table);
1957 
1958 	if (level < PVR_MMU_SYNC_LEVEL_1)
1959 		return;
1960 
1961 	if (op_ctx->curr_page.l1_table)
1962 		pvr_page_table_l1_sync(op_ctx->curr_page.l1_table);
1963 
1964 	if (level < PVR_MMU_SYNC_LEVEL_2)
1965 		return;
1966 
1967 	pvr_page_table_l2_sync(&op_ctx->mmu_ctx->page_table_l2);
1968 }
1969 
1970 /**
1971  * pvr_mmu_op_context_sync_partial() - Trigger a sync of some or all of the
1972  * page tables referenced by a MMU op context.
1973  * @op_ctx: Target MMU op context.
1974  * @level: Requested page table level to sync up to (inclusive).
1975  *
1976  * If @level is greater than the maximum level recorded by @op_ctx as requiring
1977  * a sync operation, only the previously recorded maximum will be used.
1978  *
1979  * Additionally, if @level is greater than or equal to the maximum level
1980  * recorded by @op_ctx as requiring a sync operation, that maximum level will be
1981  * reset as a full sync will be performed. This is equivalent to calling
1982  * pvr_mmu_op_context_sync().
1983  */
1984 static void
1985 pvr_mmu_op_context_sync_partial(struct pvr_mmu_op_context *op_ctx,
1986 				enum pvr_mmu_sync_level level)
1987 {
1988 	/*
1989 	 * If the requested sync level is greater than or equal to the
1990 	 * currently required sync level, we do two things:
1991 	 *  * Don't waste time syncing levels we haven't previously marked as
1992 	 *    requiring a sync, and
1993 	 *  * Reset the required sync level since we are about to sync
1994 	 *    everything that was previously marked as requiring a sync.
1995 	 */
1996 	if (level >= op_ctx->sync_level_required) {
1997 		level = op_ctx->sync_level_required;
1998 		op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
1999 	}
2000 
2001 	pvr_mmu_op_context_sync_manual(op_ctx, level);
2002 }
2003 
2004 /**
2005  * pvr_mmu_op_context_sync() - Trigger a sync of every page table referenced by
2006  * a MMU op context.
2007  * @op_ctx: Target MMU op context.
2008  *
2009  * The maximum level marked internally as requiring a sync will be reset so
2010  * that subsequent calls to this function will be no-ops unless @op_ctx is
2011  * otherwise updated.
2012  */
2013 static void
2014 pvr_mmu_op_context_sync(struct pvr_mmu_op_context *op_ctx)
2015 {
2016 	pvr_mmu_op_context_sync_manual(op_ctx, op_ctx->sync_level_required);
2017 
2018 	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2019 }
2020 
2021 /**
2022  * pvr_mmu_op_context_load_tables() - Load pointers to tables in each level of
2023  * the page table tree structure needed to reference the physical page
2024  * referenced by a MMU op context.
2025  * @op_ctx: Target MMU op context.
2026  * @should_create: Specifies whether new page tables should be created when
2027  * empty page table entries are encountered during traversal.
2028  * @load_level_required: Maximum page table level to load.
2029  *
2030  * If @should_create is %true, this function may modify the stored required
2031  * sync level of @op_ctx as new page tables are created and inserted into their
2032  * respective parents.
2033  *
2034  * Since there is only one root page table, it is technically incorrect to call
2035  * this function with a value of @load_level_required greater than or equal to
2036  * the root level number. However, this is not explicitly disallowed here.
2037  *
2038  * Return:
2039  *  * 0 on success,
2040  *  * Any error returned by pvr_page_table_l1_get_or_create() if
2041  *    @load_level_required >= 1 except -%ENXIO, or
2042  *  * Any error returned by pvr_page_table_l0_get_or_create() if
2043  *    @load_level_required >= 0 except -%ENXIO.
2044  */
2045 static int
2046 pvr_mmu_op_context_load_tables(struct pvr_mmu_op_context *op_ctx,
2047 			       bool should_create,
2048 			       enum pvr_mmu_sync_level load_level_required)
2049 {
2050 	const struct pvr_page_table_l1 *l1_head_before =
2051 		op_ctx->map.l1_prealloc_tables;
2052 	const struct pvr_page_table_l0 *l0_head_before =
2053 		op_ctx->map.l0_prealloc_tables;
2054 	int err;
2055 
2056 	/* Clear tables we're about to fetch in case of error states. */
2057 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1)
2058 		op_ctx->curr_page.l1_table = NULL;
2059 
2060 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0)
2061 		op_ctx->curr_page.l0_table = NULL;
2062 
2063 	/* Get or create L1 page table. */
2064 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1) {
2065 		err = pvr_page_table_l1_get_or_insert(op_ctx, should_create);
2066 		if (err) {
2067 			/*
2068 			 * If @should_create is %false and no L1 page table was
2069 			 * found, return early but without an error. Since
2070 			 * pvr_page_table_l1_get_or_create() can only return
2071 			 * -%ENXIO if @should_create is %false, there is no
2072 			 * need to check it here.
2073 			 */
2074 			if (err == -ENXIO)
2075 				err = 0;
2076 
2077 			return err;
2078 		}
2079 	}
2080 
2081 	/* Get or create L0 page table. */
2082 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0) {
2083 		err = pvr_page_table_l0_get_or_insert(op_ctx, should_create);
2084 		if (err) {
2085 			/*
2086 			 * If @should_create is %false and no L0 page table was
2087 			 * found, return early but without an error. Since
2088 			 * pvr_page_table_l0_get_or_insert() can only return
2089 			 * -%ENXIO if @should_create is %false, there is no
2090 			 * need to check it here.
2091 			 */
2092 			if (err == -ENXIO)
2093 				err = 0;
2094 
2095 			/*
2096 			 * At this point, an L1 page table could have been
2097 			 * inserted but is now empty due to the failed attempt
2098 			 * at inserting an L0 page table. In this instance, we
2099 			 * must remove the empty L1 page table ourselves as
2100 			 * pvr_page_table_l1_remove() is never called as part
2101 			 * of the error path in
2102 			 * pvr_page_table_l0_get_or_insert().
2103 			 */
2104 			if (l1_head_before != op_ctx->map.l1_prealloc_tables) {
2105 				pvr_page_table_l2_remove(op_ctx);
2106 				pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
2107 			}
2108 
2109 			return err;
2110 		}
2111 	}
2112 
2113 	/*
2114 	 * A sync is only needed if table objects were inserted. This can be
2115 	 * inferred by checking if the pointer at the head of the linked list
2116 	 * has changed.
2117 	 */
2118 	if (l1_head_before != op_ctx->map.l1_prealloc_tables)
2119 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
2120 	else if (l0_head_before != op_ctx->map.l0_prealloc_tables)
2121 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_1);
2122 
2123 	return 0;
2124 }
2125 
2126 /**
2127  * pvr_mmu_op_context_set_curr_page() - Reassign the current page of an MMU op
2128  * context, syncing any page tables previously assigned to it which are no
2129  * longer relevant.
2130  * @op_ctx: Target MMU op context.
2131  * @device_addr: New pointer target.
2132  * @should_create: Specify whether new page tables should be created when
2133  * empty page table entries are encountered during traversal.
2134  *
2135  * This function performs a full sync on the pointer, regardless of which
2136  * levels are modified.
2137  *
2138  * Return:
2139  *  * 0 on success, or
2140  *  * Any error returned by pvr_mmu_op_context_load_tables().
2141  */
2142 static int
2143 pvr_mmu_op_context_set_curr_page(struct pvr_mmu_op_context *op_ctx,
2144 				 u64 device_addr, bool should_create)
2145 {
2146 	pvr_mmu_op_context_sync(op_ctx);
2147 
2148 	op_ctx->curr_page.l2_idx = pvr_page_table_l2_idx(device_addr);
2149 	op_ctx->curr_page.l1_idx = pvr_page_table_l1_idx(device_addr);
2150 	op_ctx->curr_page.l0_idx = pvr_page_table_l0_idx(device_addr);
2151 	op_ctx->curr_page.l1_table = NULL;
2152 	op_ctx->curr_page.l0_table = NULL;
2153 
2154 	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
2155 					      PVR_MMU_SYNC_LEVEL_1);
2156 }
2157 
2158 /**
2159  * pvr_mmu_op_context_next_page() - Advance the current page of an MMU op
2160  * context.
2161  * @op_ctx: Target MMU op context.
2162  * @should_create: Specify whether new page tables should be created when
2163  * empty page table entries are encountered during traversal.
2164  *
2165  * If @should_create is %false, it is the caller's responsibility to verify that
2166  * the state of the table references in @op_ctx is valid on return. If -%ENXIO
2167  * is returned, at least one of the table references is invalid. It should be
2168  * noted that @op_ctx as a whole will be left in a valid state if -%ENXIO is
2169  * returned, unlike other error codes. The caller should check which references
2170  * are invalid by comparing them to %NULL. Only &@ptr->l2_table is guaranteed
2171  * to be valid, since it represents the root of the page table tree structure.
2172  *
2173  * Return:
2174  *  * 0 on success,
2175  *  * -%EPERM if the operation would wrap at the top of the page table
2176  *    hierarchy,
2177  *  * -%ENXIO if @should_create is %false and a page table of any level would
2178  *    have otherwise been created, or
2179  *  * Any error returned while attempting to create missing page tables if
2180  *    @should_create is %true.
2181  */
2182 static int
2183 pvr_mmu_op_context_next_page(struct pvr_mmu_op_context *op_ctx,
2184 			     bool should_create)
2185 {
2186 	s8 load_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2187 
2188 	if (++op_ctx->curr_page.l0_idx != ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X)
2189 		goto load_tables;
2190 
2191 	op_ctx->curr_page.l0_idx = 0;
2192 	load_level_required = PVR_MMU_SYNC_LEVEL_0;
2193 
2194 	if (++op_ctx->curr_page.l1_idx != ROGUE_MMUCTRL_ENTRIES_PD_VALUE)
2195 		goto load_tables;
2196 
2197 	op_ctx->curr_page.l1_idx = 0;
2198 	load_level_required = PVR_MMU_SYNC_LEVEL_1;
2199 
2200 	if (++op_ctx->curr_page.l2_idx != ROGUE_MMUCTRL_ENTRIES_PC_VALUE)
2201 		goto load_tables;
2202 
2203 	/*
2204 	 * If the pattern continued, we would set &op_ctx->curr_page.l2_idx to
2205 	 * zero here. However, that would wrap the top layer of the page table
2206 	 * hierarchy which is not a valid operation. Instead, we warn and return
2207 	 * an error.
2208 	 */
2209 	WARN(true,
2210 	     "%s(%p) attempted to loop the top of the page table hierarchy",
2211 	     __func__, op_ctx);
2212 	return -EPERM;
2213 
2214 	/* If indices have wrapped, we need to load new tables. */
2215 load_tables:
2216 	/* First, flush tables which will be unloaded. */
2217 	pvr_mmu_op_context_sync_partial(op_ctx, load_level_required);
2218 
2219 	/* Then load tables from the required level down. */
2220 	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
2221 					      load_level_required);
2222 }
2223 
2224 /**
2225  * DOC: Single page operations
2226  */
2227 
2228 /**
2229  * pvr_page_create() - Create a device-virtual memory page and insert it into
2230  * a level 0 page table.
2231  * @op_ctx: Target MMU op context pointing at the device-virtual address of the
2232  * target page.
2233  * @dma_addr: DMA address of the physical page backing the created page.
2234  * @flags: Page options saved on the level 0 page table entry for reading by
2235  *         the device.
2236  *
2237  * Return:
2238  *  * 0 on success, or
2239  *  * -%EEXIST if the requested page already exists.
2240  */
2241 static int
2242 pvr_page_create(struct pvr_mmu_op_context *op_ctx, dma_addr_t dma_addr,
2243 		struct pvr_page_flags_raw flags)
2244 {
2245 	/* Do not create a new page if one already exists. */
2246 	if (pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
2247 					     op_ctx->curr_page.l0_idx)) {
2248 		return -EEXIST;
2249 	}
2250 
2251 	pvr_page_table_l0_insert(op_ctx, dma_addr, flags);
2252 
2253 	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2254 
2255 	return 0;
2256 }
2257 
2258 /**
2259  * pvr_page_destroy() - Destroy a device page after removing it from its
2260  * parent level 0 page table.
2261  * @op_ctx: Target MMU op context.
2262  */
2263 static void
2264 pvr_page_destroy(struct pvr_mmu_op_context *op_ctx)
2265 {
2266 	/* Do nothing if the page does not exist. */
2267 	if (!pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
2268 					      op_ctx->curr_page.l0_idx)) {
2269 		return;
2270 	}
2271 
2272 	/* Clear the parent L0 page table entry. */
2273 	pvr_page_table_l0_remove(op_ctx);
2274 
2275 	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2276 }
2277 
2278 /**
2279  * pvr_mmu_op_context_destroy() - Destroy an MMU op context.
2280  * @op_ctx: Target MMU op context.
2281  */
2282 void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx)
2283 {
2284 	const bool flush_caches =
2285 		op_ctx->sync_level_required != PVR_MMU_SYNC_LEVEL_NONE;
2286 
2287 	pvr_mmu_op_context_sync(op_ctx);
2288 
2289 	/* Unmaps should be flushed immediately. Map flushes can be deferred. */
2290 	if (flush_caches && !op_ctx->map.sgt)
2291 		pvr_mmu_flush_exec(op_ctx->mmu_ctx->pvr_dev, true);
2292 
2293 	while (op_ctx->map.l0_prealloc_tables) {
2294 		struct pvr_page_table_l0 *tmp = op_ctx->map.l0_prealloc_tables;
2295 
2296 		op_ctx->map.l0_prealloc_tables =
2297 			op_ctx->map.l0_prealloc_tables->next_free;
2298 		pvr_page_table_l0_free(tmp);
2299 	}
2300 
2301 	while (op_ctx->map.l1_prealloc_tables) {
2302 		struct pvr_page_table_l1 *tmp = op_ctx->map.l1_prealloc_tables;
2303 
2304 		op_ctx->map.l1_prealloc_tables =
2305 			op_ctx->map.l1_prealloc_tables->next_free;
2306 		pvr_page_table_l1_free(tmp);
2307 	}
2308 
2309 	while (op_ctx->unmap.l0_free_tables) {
2310 		struct pvr_page_table_l0 *tmp = op_ctx->unmap.l0_free_tables;
2311 
2312 		op_ctx->unmap.l0_free_tables =
2313 			op_ctx->unmap.l0_free_tables->next_free;
2314 		pvr_page_table_l0_free(tmp);
2315 	}
2316 
2317 	while (op_ctx->unmap.l1_free_tables) {
2318 		struct pvr_page_table_l1 *tmp = op_ctx->unmap.l1_free_tables;
2319 
2320 		op_ctx->unmap.l1_free_tables =
2321 			op_ctx->unmap.l1_free_tables->next_free;
2322 		pvr_page_table_l1_free(tmp);
2323 	}
2324 
2325 	kfree(op_ctx);
2326 }
2327 
2328 /**
2329  * pvr_mmu_op_context_create() - Create an MMU op context.
2330  * @ctx: MMU context associated with owning VM context.
2331  * @sgt: Scatter gather table containing pages pinned for use by this context.
2332  * @sgt_offset: Start offset of the requested device-virtual memory mapping.
2333  * @size: Size in bytes of the requested device-virtual memory mapping. For an
2334  * unmapping, this should be zero so that no page tables are allocated.
2335  *
2336  * Returns:
2337  *  * Newly created MMU op context object on success, or
2338  *  * -%ENOMEM if no memory is available,
2339  *  * Any error code returned by pvr_page_table_l2_init().
2340  */
2341 struct pvr_mmu_op_context *
2342 pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, struct sg_table *sgt,
2343 			  u64 sgt_offset, u64 size)
2344 {
2345 	int err;
2346 
2347 	struct pvr_mmu_op_context *op_ctx =
2348 		kzalloc(sizeof(*op_ctx), GFP_KERNEL);
2349 
2350 	if (!op_ctx)
2351 		return ERR_PTR(-ENOMEM);
2352 
2353 	op_ctx->mmu_ctx = ctx;
2354 	op_ctx->map.sgt = sgt;
2355 	op_ctx->map.sgt_offset = sgt_offset;
2356 	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2357 
2358 	if (size) {
2359 		/*
2360 		 * The number of page table objects we need to prealloc is
2361 		 * indicated by the mapping size, start offset and the sizes
2362 		 * of the areas mapped per PT or PD. The range calculation is
2363 		 * identical to that for the index into a table for a device
2364 		 * address, so we reuse those functions here.
2365 		 */
2366 		const u32 l1_start_idx = pvr_page_table_l2_idx(sgt_offset);
2367 		const u32 l1_end_idx = pvr_page_table_l2_idx(sgt_offset + size);
2368 		const u32 l1_count = l1_end_idx - l1_start_idx + 1;
2369 		const u32 l0_start_idx = pvr_page_table_l1_idx(sgt_offset);
2370 		const u32 l0_end_idx = pvr_page_table_l1_idx(sgt_offset + size);
2371 		const u32 l0_count = l0_end_idx - l0_start_idx + 1;
2372 
2373 		/*
2374 		 * Alloc and push page table entries until we have enough of
2375 		 * each type, ending with linked lists of l0 and l1 entries in
2376 		 * reverse order.
2377 		 */
2378 		for (int i = 0; i < l1_count; i++) {
2379 			struct pvr_page_table_l1 *l1_tmp =
2380 				pvr_page_table_l1_alloc(ctx);
2381 
2382 			err = PTR_ERR_OR_ZERO(l1_tmp);
2383 			if (err)
2384 				goto err_cleanup;
2385 
2386 			l1_tmp->next_free = op_ctx->map.l1_prealloc_tables;
2387 			op_ctx->map.l1_prealloc_tables = l1_tmp;
2388 		}
2389 
2390 		for (int i = 0; i < l0_count; i++) {
2391 			struct pvr_page_table_l0 *l0_tmp =
2392 				pvr_page_table_l0_alloc(ctx);
2393 
2394 			err = PTR_ERR_OR_ZERO(l0_tmp);
2395 			if (err)
2396 				goto err_cleanup;
2397 
2398 			l0_tmp->next_free = op_ctx->map.l0_prealloc_tables;
2399 			op_ctx->map.l0_prealloc_tables = l0_tmp;
2400 		}
2401 	}
2402 
2403 	return op_ctx;
2404 
2405 err_cleanup:
2406 	pvr_mmu_op_context_destroy(op_ctx);
2407 
2408 	return ERR_PTR(err);
2409 }
2410 
2411 /**
2412  * pvr_mmu_op_context_unmap_curr_page() - Unmap pages from a memory context
2413  * starting from the current page of an MMU op context.
2414  * @op_ctx: Target MMU op context pointing at the first page to unmap.
2415  * @nr_pages: Number of pages to unmap.
2416  *
2417  * Return:
2418  *  * 0 on success, or
2419  *  * Any error encountered while advancing @op_ctx.curr_page with
2420  *    pvr_mmu_op_context_next_page() (except -%ENXIO).
2421  */
2422 static int
2423 pvr_mmu_op_context_unmap_curr_page(struct pvr_mmu_op_context *op_ctx,
2424 				   u64 nr_pages)
2425 {
2426 	int err;
2427 
2428 	if (nr_pages == 0)
2429 		return 0;
2430 
2431 	/*
2432 	 * Destroy first page outside loop, as it doesn't require a page
2433 	 * advance beforehand. If the L0 page table reference in
2434 	 * @op_ctx.curr_page is %NULL, there cannot be a mapped page at
2435 	 * @op_ctx.curr_page (so skip ahead).
2436 	 */
2437 	if (op_ctx->curr_page.l0_table)
2438 		pvr_page_destroy(op_ctx);
2439 
2440 	for (u64 page = 1; page < nr_pages; ++page) {
2441 		err = pvr_mmu_op_context_next_page(op_ctx, false);
2442 		/*
2443 		 * If the page table tree structure at @op_ctx.curr_page is
2444 		 * incomplete, skip ahead. We don't care about unmapping pages
2445 		 * that cannot exist.
2446 		 *
2447 		 * FIXME: This could be made more efficient by jumping ahead
2448 		 * using pvr_mmu_op_context_set_curr_page().
2449 		 */
2450 		if (err == -ENXIO)
2451 			continue;
2452 		else if (err)
2453 			return err;
2454 
2455 		pvr_page_destroy(op_ctx);
2456 	}
2457 
2458 	return 0;
2459 }
2460 
2461 /**
2462  * pvr_mmu_unmap() - Unmap pages from a memory context.
2463  * @op_ctx: Target MMU op context.
2464  * @device_addr: First device-virtual address to unmap.
2465  * @size: Size in bytes to unmap.
2466  *
2467  * The total amount of device-virtual memory unmapped is
2468  * @nr_pages * %PVR_DEVICE_PAGE_SIZE.
2469  *
2470  * Returns:
2471  *  * 0 on success, or
2472  *  * Any error code returned by pvr_page_table_ptr_init(), or
2473  *  * Any error code returned by pvr_page_table_ptr_unmap().
2474  */
2475 int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size)
2476 {
2477 	int err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, false);
2478 
2479 	if (err)
2480 		return err;
2481 
2482 	return pvr_mmu_op_context_unmap_curr_page(op_ctx,
2483 						  size >> PVR_DEVICE_PAGE_SHIFT);
2484 }
2485 
2486 /**
2487  * pvr_mmu_map_sgl() - Map part of a scatter-gather table entry to
2488  * device-virtual memory.
2489  * @op_ctx: Target MMU op context pointing to the first page that should be
2490  * mapped.
2491  * @sgl: Target scatter-gather table entry.
2492  * @offset: Offset into @sgl to map from. Must result in a starting address
2493  * from @sgl which is CPU page-aligned.
2494  * @size: Size of the memory to be mapped in bytes. Must be a non-zero multiple
2495  * of the device page size.
2496  * @page_flags: Page options to be applied to every device-virtual memory page
2497  * in the created mapping.
2498  *
2499  * Return:
2500  *  * 0 on success,
2501  *  * -%EINVAL if the range specified by @offset and @size is not completely
2502  *    within @sgl, or
2503  *  * Any error encountered while creating a page with pvr_page_create(), or
2504  *  * Any error encountered while advancing @op_ctx.curr_page with
2505  *    pvr_mmu_op_context_next_page().
2506  */
2507 static int
2508 pvr_mmu_map_sgl(struct pvr_mmu_op_context *op_ctx, struct scatterlist *sgl,
2509 		u64 offset, u64 size, struct pvr_page_flags_raw page_flags)
2510 {
2511 	const unsigned int pages = size >> PVR_DEVICE_PAGE_SHIFT;
2512 	dma_addr_t dma_addr = sg_dma_address(sgl) + offset;
2513 	const unsigned int dma_len = sg_dma_len(sgl);
2514 	struct pvr_page_table_ptr ptr_copy;
2515 	unsigned int page;
2516 	int err;
2517 
2518 	if (size > dma_len || offset > dma_len - size)
2519 		return -EINVAL;
2520 
2521 	/*
2522 	 * Before progressing, save a copy of the start pointer so we can use
2523 	 * it again if we enter an error state and have to destroy pages.
2524 	 */
2525 	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
2526 
2527 	/*
2528 	 * Create first page outside loop, as it doesn't require a page advance
2529 	 * beforehand.
2530 	 */
2531 	err = pvr_page_create(op_ctx, dma_addr, page_flags);
2532 	if (err)
2533 		return err;
2534 
2535 	for (page = 1; page < pages; ++page) {
2536 		err = pvr_mmu_op_context_next_page(op_ctx, true);
2537 		if (err)
2538 			goto err_destroy_pages;
2539 
2540 		dma_addr += PVR_DEVICE_PAGE_SIZE;
2541 
2542 		err = pvr_page_create(op_ctx, dma_addr, page_flags);
2543 		if (err)
2544 			goto err_destroy_pages;
2545 	}
2546 
2547 	return 0;
2548 
2549 err_destroy_pages:
2550 	memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
2551 	err = pvr_mmu_op_context_unmap_curr_page(op_ctx, page);
2552 
2553 	return err;
2554 }
2555 
2556 /**
2557  * pvr_mmu_map() - Map an object's virtual memory to physical memory.
2558  * @op_ctx: Target MMU op context.
2559  * @size: Size of memory to be mapped in bytes. Must be a non-zero multiple
2560  * of the device page size.
2561  * @flags: Flags from pvr_gem_object associated with the mapping.
2562  * @device_addr: Virtual device address to map to. Must be device page-aligned.
2563  *
2564  * Returns:
2565  *  * 0 on success, or
2566  *  * Any error code returned by pvr_page_table_ptr_init(), or
2567  *  * Any error code returned by pvr_mmu_map_sgl(), or
2568  *  * Any error code returned by pvr_page_table_ptr_next_page().
2569  */
2570 int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags,
2571 		u64 device_addr)
2572 {
2573 	struct pvr_page_table_ptr ptr_copy;
2574 	struct pvr_page_flags_raw flags_raw;
2575 	struct scatterlist *sgl;
2576 	u64 mapped_size = 0;
2577 	unsigned int count;
2578 	int err;
2579 
2580 	if (!size)
2581 		return 0;
2582 
2583 	if ((op_ctx->map.sgt_offset | size) & ~PVR_DEVICE_PAGE_MASK)
2584 		return -EINVAL;
2585 
2586 	err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, true);
2587 	if (err)
2588 		return -EINVAL;
2589 
2590 	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
2591 
2592 	flags_raw = pvr_page_flags_raw_create(false, false,
2593 					      flags & DRM_PVR_BO_BYPASS_DEVICE_CACHE,
2594 					      flags & DRM_PVR_BO_PM_FW_PROTECT);
2595 
2596 	/* Map scatter gather table */
2597 	for_each_sgtable_dma_sg(op_ctx->map.sgt, sgl, count) {
2598 		const size_t sgl_len = sg_dma_len(sgl);
2599 		u64 sgl_offset, map_sgl_len;
2600 
2601 		if (sgl_len <= op_ctx->map.sgt_offset) {
2602 			op_ctx->map.sgt_offset -= sgl_len;
2603 			continue;
2604 		}
2605 
2606 		sgl_offset = op_ctx->map.sgt_offset;
2607 		map_sgl_len = min_t(u64, sgl_len - sgl_offset, size - mapped_size);
2608 
2609 		err = pvr_mmu_map_sgl(op_ctx, sgl, sgl_offset, map_sgl_len,
2610 				      flags_raw);
2611 		if (err)
2612 			break;
2613 
2614 		/*
2615 		 * Flag the L0 page table as requiring a flush when the MMU op
2616 		 * context is destroyed.
2617 		 */
2618 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2619 
2620 		op_ctx->map.sgt_offset = 0;
2621 		mapped_size += map_sgl_len;
2622 
2623 		if (mapped_size >= size)
2624 			break;
2625 
2626 		err = pvr_mmu_op_context_next_page(op_ctx, true);
2627 		if (err)
2628 			break;
2629 	}
2630 
2631 	if (err && mapped_size) {
2632 		memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
2633 		pvr_mmu_op_context_unmap_curr_page(op_ctx,
2634 						   mapped_size >> PVR_DEVICE_PAGE_SHIFT);
2635 	}
2636 
2637 	return err;
2638 }
2639