xref: /linux/drivers/gpu/drm/imagination/pvr_mmu.c (revision ab779466166348eecf17d20f620aa9a47965c934)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright (c) 2023 Imagination Technologies Ltd. */
3 
4 #include "pvr_mmu.h"
5 
6 #include "pvr_ccb.h"
7 #include "pvr_device.h"
8 #include "pvr_fw.h"
9 #include "pvr_gem.h"
10 #include "pvr_power.h"
11 #include "pvr_rogue_fwif.h"
12 #include "pvr_rogue_mmu_defs.h"
13 
14 #include <drm/drm_drv.h>
15 #include <linux/atomic.h>
16 #include <linux/bitops.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/kmemleak.h>
19 #include <linux/minmax.h>
20 #include <linux/sizes.h>
21 
22 #define PVR_SHIFT_FROM_SIZE(size_) (__builtin_ctzll(size_))
23 #define PVR_MASK_FROM_SIZE(size_) (~((size_) - U64_C(1)))
24 
25 /*
26  * The value of the device page size (%PVR_DEVICE_PAGE_SIZE) is currently
27  * pegged to the host page size (%PAGE_SIZE). This chunk of macro goodness both
28  * ensures that the selected host page size corresponds to a valid device page
29  * size and sets up values needed by the MMU code below.
30  */
31 #if (PVR_DEVICE_PAGE_SIZE == SZ_4K)
32 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_4KB
33 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_4KB_RANGE_SHIFT
34 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_4KB_RANGE_CLRMSK
35 #elif (PVR_DEVICE_PAGE_SIZE == SZ_16K)
36 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_16KB
37 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_16KB_RANGE_SHIFT
38 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_16KB_RANGE_CLRMSK
39 #elif (PVR_DEVICE_PAGE_SIZE == SZ_64K)
40 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_64KB
41 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_64KB_RANGE_SHIFT
42 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_64KB_RANGE_CLRMSK
43 #elif (PVR_DEVICE_PAGE_SIZE == SZ_256K)
44 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_256KB
45 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_256KB_RANGE_SHIFT
46 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_256KB_RANGE_CLRMSK
47 #elif (PVR_DEVICE_PAGE_SIZE == SZ_1M)
48 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_1MB
49 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_1MB_RANGE_SHIFT
50 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_1MB_RANGE_CLRMSK
51 #elif (PVR_DEVICE_PAGE_SIZE == SZ_2M)
52 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_2MB
53 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_2MB_RANGE_SHIFT
54 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_2MB_RANGE_CLRMSK
55 #else
56 # error Unsupported device page size PVR_DEVICE_PAGE_SIZE
57 #endif
58 
59 #define ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X   \
60 	(ROGUE_MMUCTRL_ENTRIES_PT_VALUE >> \
61 	 (PVR_DEVICE_PAGE_SHIFT - PVR_SHIFT_FROM_SIZE(SZ_4K)))
62 
63 enum pvr_mmu_sync_level {
64 	PVR_MMU_SYNC_LEVEL_NONE = -1,
65 	PVR_MMU_SYNC_LEVEL_0 = 0,
66 	PVR_MMU_SYNC_LEVEL_1 = 1,
67 	PVR_MMU_SYNC_LEVEL_2 = 2,
68 };
69 
70 #define PVR_MMU_SYNC_LEVEL_0_FLAGS (ROGUE_FWIF_MMUCACHEDATA_FLAGS_PT | \
71 				    ROGUE_FWIF_MMUCACHEDATA_FLAGS_INTERRUPT | \
72 				    ROGUE_FWIF_MMUCACHEDATA_FLAGS_TLB)
73 #define PVR_MMU_SYNC_LEVEL_1_FLAGS (PVR_MMU_SYNC_LEVEL_0_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PD)
74 #define PVR_MMU_SYNC_LEVEL_2_FLAGS (PVR_MMU_SYNC_LEVEL_1_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PC)
75 
76 /**
77  * pvr_mmu_set_flush_flags() - Set MMU cache flush flags for next call to
78  *                             pvr_mmu_flush_exec().
79  * @pvr_dev: Target PowerVR device.
80  * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS.
81  *
82  * This function must be called following any possible change to the MMU page
83  * tables.
84  */
85 static void pvr_mmu_set_flush_flags(struct pvr_device *pvr_dev, u32 flags)
86 {
87 	atomic_fetch_or(flags, &pvr_dev->mmu_flush_cache_flags);
88 }
89 
90 /**
91  * pvr_mmu_flush_request_all() - Request flush of all MMU caches when
92  * subsequently calling pvr_mmu_flush_exec().
93  * @pvr_dev: Target PowerVR device.
94  *
95  * This function must be called following any possible change to the MMU page
96  * tables.
97  */
98 void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev)
99 {
100 	pvr_mmu_set_flush_flags(pvr_dev, PVR_MMU_SYNC_LEVEL_2_FLAGS);
101 }
102 
103 /**
104  * pvr_mmu_flush_exec() - Execute a flush of all MMU caches previously
105  * requested.
106  * @pvr_dev: Target PowerVR device.
107  * @wait: Do not return until the flush is completed.
108  *
109  * This function must be called prior to submitting any new GPU job. The flush
110  * will complete before the jobs are scheduled, so this can be called once after
111  * a series of maps. However, a single unmap should always be immediately
112  * followed by a flush and it should be explicitly waited by setting @wait.
113  *
114  * As a failure to flush the MMU caches could risk memory corruption, if the
115  * flush fails (implying the firmware is not responding) then the GPU device is
116  * marked as lost.
117  *
118  * Returns:
119  *  * 0 on success when @wait is true, or
120  *  * -%EIO if the device is unavailable, or
121  *  * Any error encountered while submitting the flush command via the KCCB.
122  */
123 int pvr_mmu_flush_exec(struct pvr_device *pvr_dev, bool wait)
124 {
125 	struct rogue_fwif_kccb_cmd cmd_mmu_cache = {};
126 	struct rogue_fwif_mmucachedata *cmd_mmu_cache_data =
127 		&cmd_mmu_cache.cmd_data.mmu_cache_data;
128 	int err = 0;
129 	u32 slot;
130 	int idx;
131 
132 	if (!drm_dev_enter(from_pvr_device(pvr_dev), &idx))
133 		return -EIO;
134 
135 	/* Can't flush MMU if the firmware hasn't booted yet. */
136 	if (!pvr_dev->fw_dev.booted)
137 		goto err_drm_dev_exit;
138 
139 	cmd_mmu_cache_data->cache_flags =
140 		atomic_xchg(&pvr_dev->mmu_flush_cache_flags, 0);
141 
142 	if (!cmd_mmu_cache_data->cache_flags)
143 		goto err_drm_dev_exit;
144 
145 	cmd_mmu_cache.cmd_type = ROGUE_FWIF_KCCB_CMD_MMUCACHE;
146 
147 	pvr_fw_object_get_fw_addr(pvr_dev->fw_dev.mem.mmucache_sync_obj,
148 				  &cmd_mmu_cache_data->mmu_cache_sync_fw_addr);
149 	cmd_mmu_cache_data->mmu_cache_sync_update_value = 0;
150 
151 	err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot);
152 	if (err)
153 		goto err_reset_and_retry;
154 
155 	err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL);
156 	if (err)
157 		goto err_reset_and_retry;
158 
159 	drm_dev_exit(idx);
160 
161 	return 0;
162 
163 err_reset_and_retry:
164 	/*
165 	 * Flush command failure is most likely the result of a firmware lockup. Hard
166 	 * reset the GPU and retry.
167 	 */
168 	err = pvr_power_reset(pvr_dev, true);
169 	if (err)
170 		goto err_drm_dev_exit; /* Device is lost. */
171 
172 	/* Retry sending flush request. */
173 	err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot);
174 	if (err) {
175 		pvr_device_lost(pvr_dev);
176 		goto err_drm_dev_exit;
177 	}
178 
179 	if (wait) {
180 		err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL);
181 		if (err)
182 			pvr_device_lost(pvr_dev);
183 	}
184 
185 err_drm_dev_exit:
186 	drm_dev_exit(idx);
187 
188 	return err;
189 }
190 
191 /**
192  * DOC: PowerVR Virtual Memory Handling
193  */
194 /**
195  * DOC: PowerVR Virtual Memory Handling (constants)
196  *
197  * .. c:macro:: PVR_IDX_INVALID
198  *
199  *    Default value for a u16-based index.
200  *
201  *    This value cannot be zero, since zero is a valid index value.
202  */
203 #define PVR_IDX_INVALID ((u16)(-1))
204 
205 /**
206  * DOC: MMU backing pages
207  */
208 /**
209  * DOC: MMU backing pages (constants)
210  *
211  * .. c:macro:: PVR_MMU_BACKING_PAGE_SIZE
212  *
213  *    Page size of a PowerVR device's integrated MMU. The CPU page size must be
214  *    at least as large as this value for the current implementation; this is
215  *    checked at compile-time.
216  */
217 #define PVR_MMU_BACKING_PAGE_SIZE SZ_4K
218 static_assert(PAGE_SIZE >= PVR_MMU_BACKING_PAGE_SIZE);
219 
220 /**
221  * struct pvr_mmu_backing_page - Represents a single page used to back a page
222  *                              table of any level.
223  * @dma_addr: DMA address of this page.
224  * @host_ptr: CPU address of this page.
225  * @pvr_dev: The PowerVR device to which this page is associated. **For
226  *           internal use only.**
227  */
228 struct pvr_mmu_backing_page {
229 	dma_addr_t dma_addr;
230 	void *host_ptr;
231 /* private: internal use only */
232 	struct page *raw_page;
233 	struct pvr_device *pvr_dev;
234 };
235 
236 /**
237  * pvr_mmu_backing_page_init() - Initialize a MMU backing page.
238  * @page: Target backing page.
239  * @pvr_dev: Target PowerVR device.
240  *
241  * This function performs three distinct operations:
242  *
243  * 1. Allocate a single page,
244  * 2. Map the page to the CPU, and
245  * 3. Map the page to DMA-space.
246  *
247  * It is expected that @page be zeroed (e.g. from kzalloc()) before calling
248  * this function.
249  *
250  * Return:
251  *  * 0 on success, or
252  *  * -%ENOMEM if allocation of the backing page or mapping of the backing
253  *    page to DMA fails.
254  */
255 static int
256 pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page,
257 			  struct pvr_device *pvr_dev)
258 {
259 	struct device *dev = from_pvr_device(pvr_dev)->dev;
260 
261 	struct page *raw_page;
262 	int err;
263 
264 	dma_addr_t dma_addr;
265 	void *host_ptr;
266 
267 	raw_page = alloc_page(__GFP_ZERO | GFP_KERNEL);
268 	if (!raw_page)
269 		return -ENOMEM;
270 
271 	host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
272 	if (!host_ptr) {
273 		err = -ENOMEM;
274 		goto err_free_page;
275 	}
276 
277 	dma_addr = dma_map_page(dev, raw_page, 0, PVR_MMU_BACKING_PAGE_SIZE,
278 				DMA_TO_DEVICE);
279 	if (dma_mapping_error(dev, dma_addr)) {
280 		err = -ENOMEM;
281 		goto err_unmap_page;
282 	}
283 
284 	page->dma_addr = dma_addr;
285 	page->host_ptr = host_ptr;
286 	page->pvr_dev = pvr_dev;
287 	page->raw_page = raw_page;
288 	kmemleak_alloc(page->host_ptr, PAGE_SIZE, 1, GFP_KERNEL);
289 
290 	return 0;
291 
292 err_unmap_page:
293 	vunmap(host_ptr);
294 
295 err_free_page:
296 	__free_page(raw_page);
297 
298 	return err;
299 }
300 
301 /**
302  * pvr_mmu_backing_page_fini() - Teardown a MMU backing page.
303  * @page: Target backing page.
304  *
305  * This function performs the mirror operations to pvr_mmu_backing_page_init(),
306  * in reverse order:
307  *
308  * 1. Unmap the page from DMA-space,
309  * 2. Unmap the page from the CPU, and
310  * 3. Free the page.
311  *
312  * It also zeros @page.
313  *
314  * It is a no-op to call this function a second (or further) time on any @page.
315  */
316 static void
317 pvr_mmu_backing_page_fini(struct pvr_mmu_backing_page *page)
318 {
319 	struct device *dev = from_pvr_device(page->pvr_dev)->dev;
320 
321 	/* Do nothing if no allocation is present. */
322 	if (!page->pvr_dev)
323 		return;
324 
325 	dma_unmap_page(dev, page->dma_addr, PVR_MMU_BACKING_PAGE_SIZE,
326 		       DMA_TO_DEVICE);
327 
328 	kmemleak_free(page->host_ptr);
329 	vunmap(page->host_ptr);
330 
331 	__free_page(page->raw_page);
332 
333 	memset(page, 0, sizeof(*page));
334 }
335 
336 /**
337  * pvr_mmu_backing_page_sync() - Flush a MMU backing page from the CPU to the
338  *                              device.
339  * @page: Target backing page.
340  *
341  * .. caution::
342  *
343  *    **This is potentially an expensive function call.** Only call
344  *    pvr_mmu_backing_page_sync() once you're sure you have no more changes to
345  *    make to the backing page in the immediate future.
346  */
347 static void
348 pvr_mmu_backing_page_sync(struct pvr_mmu_backing_page *page, u32 flags)
349 {
350 	struct pvr_device *pvr_dev = page->pvr_dev;
351 	struct device *dev;
352 
353 	/*
354 	 * Do nothing if no allocation is present. This may be the case if
355 	 * we are unmapping pages.
356 	 */
357 	if (!pvr_dev)
358 		return;
359 
360 	dev = from_pvr_device(pvr_dev)->dev;
361 
362 	dma_sync_single_for_device(dev, page->dma_addr,
363 				   PVR_MMU_BACKING_PAGE_SIZE, DMA_TO_DEVICE);
364 
365 	pvr_mmu_set_flush_flags(pvr_dev, flags);
366 }
367 
368 /**
369  * DOC: Raw page tables
370  */
371 
372 #define PVR_PAGE_TABLE_TYPEOF_ENTRY(level_) \
373 	typeof_member(struct pvr_page_table_l##level_##_entry_raw, val)
374 
375 #define PVR_PAGE_TABLE_FIELD_GET(level_, name_, field_, entry_)           \
376 	(((entry_).val &                                           \
377 	  ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK) >> \
378 	 ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT)
379 
380 #define PVR_PAGE_TABLE_FIELD_PREP(level_, name_, field_, val_)            \
381 	((((PVR_PAGE_TABLE_TYPEOF_ENTRY(level_))(val_))            \
382 	  << ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT) & \
383 	 ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK)
384 
385 /**
386  * struct pvr_page_table_l2_entry_raw - A single entry in a level 2 page table.
387  * @val: The raw value of this entry.
388  *
389  * This type is a structure for type-checking purposes. At compile-time, its
390  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE.
391  *
392  * The value stored in this structure can be decoded using the following bitmap:
393  *
394  * .. flat-table::
395  *    :widths: 1 5
396  *    :stub-columns: 1
397  *
398  *    * - 31..4
399  *      - **Level 1 Page Table Base Address:** Bits 39..12 of the L1
400  *        page table base address, which is 4KiB aligned.
401  *
402  *    * - 3..2
403  *      - *(reserved)*
404  *
405  *    * - 1
406  *      - **Pending:** When valid bit is not set, indicates that a valid
407  *        entry is pending and the MMU should wait for the driver to map
408  *        the entry. This is used to support page demand mapping of
409  *        memory.
410  *
411  *    * - 0
412  *      - **Valid:** Indicates that the entry contains a valid L1 page
413  *        table. If the valid bit is not set, then an attempted use of
414  *        the page would result in a page fault.
415  */
416 struct pvr_page_table_l2_entry_raw {
417 	u32 val;
418 } __packed;
419 static_assert(sizeof(struct pvr_page_table_l2_entry_raw) * 8 ==
420 	      ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE);
421 
422 static bool
423 pvr_page_table_l2_entry_raw_is_valid(struct pvr_page_table_l2_entry_raw entry)
424 {
425 	return PVR_PAGE_TABLE_FIELD_GET(2, PC, VALID, entry);
426 }
427 
428 /**
429  * pvr_page_table_l2_entry_raw_set() - Write a valid entry into a raw level 2
430  *                                     page table.
431  * @entry: Target raw level 2 page table entry.
432  * @child_table_dma_addr: DMA address of the level 1 page table to be
433  *                        associated with @entry.
434  *
435  * When calling this function, @child_table_dma_addr must be a valid DMA
436  * address and a multiple of %ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE.
437  */
438 static void
439 pvr_page_table_l2_entry_raw_set(struct pvr_page_table_l2_entry_raw *entry,
440 				dma_addr_t child_table_dma_addr)
441 {
442 	child_table_dma_addr >>= ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT;
443 
444 	WRITE_ONCE(entry->val,
445 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, VALID, true) |
446 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, ENTRY_PENDING, false) |
447 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, PD_BASE, child_table_dma_addr));
448 }
449 
450 static void
451 pvr_page_table_l2_entry_raw_clear(struct pvr_page_table_l2_entry_raw *entry)
452 {
453 	WRITE_ONCE(entry->val, 0);
454 }
455 
456 /**
457  * struct pvr_page_table_l1_entry_raw - A single entry in a level 1 page table.
458  * @val: The raw value of this entry.
459  *
460  * This type is a structure for type-checking purposes. At compile-time, its
461  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE.
462  *
463  * The value stored in this structure can be decoded using the following bitmap:
464  *
465  * .. flat-table::
466  *    :widths: 1 5
467  *    :stub-columns: 1
468  *
469  *    * - 63..41
470  *      - *(reserved)*
471  *
472  *    * - 40
473  *      - **Pending:** When valid bit is not set, indicates that a valid entry
474  *        is pending and the MMU should wait for the driver to map the entry.
475  *        This is used to support page demand mapping of memory.
476  *
477  *    * - 39..5
478  *      - **Level 0 Page Table Base Address:** The way this value is
479  *        interpreted depends on the page size. Bits not specified in the
480  *        table below (e.g. bits 11..5 for page size 4KiB) should be
481  *        considered reserved.
482  *
483  *        This table shows the bits used in an L1 page table entry to
484  *        represent the Physical Table Base Address for a given Page Size.
485  *        Since each L1 page table entry covers 2MiB of address space, the
486  *        maximum page size is 2MiB.
487  *
488  *        .. flat-table::
489  *           :widths: 1 1 1 1
490  *           :header-rows: 1
491  *           :stub-columns: 1
492  *
493  *           * - Page size
494  *             - L0 page table base address bits
495  *             - Number of L0 page table entries
496  *             - Size of L0 page table
497  *
498  *           * - 4KiB
499  *             - 39..12
500  *             - 512
501  *             - 4KiB
502  *
503  *           * - 16KiB
504  *             - 39..10
505  *             - 128
506  *             - 1KiB
507  *
508  *           * - 64KiB
509  *             - 39..8
510  *             - 32
511  *             - 256B
512  *
513  *           * - 256KiB
514  *             - 39..6
515  *             - 8
516  *             - 64B
517  *
518  *           * - 1MiB
519  *             - 39..5 (4 = '0')
520  *             - 2
521  *             - 16B
522  *
523  *           * - 2MiB
524  *             - 39..5 (4..3 = '00')
525  *             - 1
526  *             - 8B
527  *
528  *    * - 4
529  *      - *(reserved)*
530  *
531  *    * - 3..1
532  *      - **Page Size:** Sets the page size, from 4KiB to 2MiB.
533  *
534  *    * - 0
535  *      - **Valid:** Indicates that the entry contains a valid L0 page table.
536  *        If the valid bit is not set, then an attempted use of the page would
537  *        result in a page fault.
538  */
539 struct pvr_page_table_l1_entry_raw {
540 	u64 val;
541 } __packed;
542 static_assert(sizeof(struct pvr_page_table_l1_entry_raw) * 8 ==
543 	      ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE);
544 
545 static bool
546 pvr_page_table_l1_entry_raw_is_valid(struct pvr_page_table_l1_entry_raw entry)
547 {
548 	return PVR_PAGE_TABLE_FIELD_GET(1, PD, VALID, entry);
549 }
550 
551 /**
552  * pvr_page_table_l1_entry_raw_set() - Write a valid entry into a raw level 1
553  *                                     page table.
554  * @entry: Target raw level 1 page table entry.
555  * @child_table_dma_addr: DMA address of the level 0 page table to be
556  *                        associated with @entry.
557  *
558  * When calling this function, @child_table_dma_addr must be a valid DMA
559  * address and a multiple of 4 KiB.
560  */
561 static void
562 pvr_page_table_l1_entry_raw_set(struct pvr_page_table_l1_entry_raw *entry,
563 				dma_addr_t child_table_dma_addr)
564 {
565 	WRITE_ONCE(entry->val,
566 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, VALID, true) |
567 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, ENTRY_PENDING, false) |
568 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, PAGE_SIZE, ROGUE_MMUCTRL_PAGE_SIZE_X) |
569 		   /*
570 		    * The use of a 4K-specific macro here is correct. It is
571 		    * a future optimization to allocate sub-host-page-sized
572 		    * blocks for individual tables, so the condition that any
573 		    * page table address is aligned to the size of the
574 		    * largest (a 4KB) table currently holds.
575 		    */
576 		   (child_table_dma_addr & ~ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_CLRMSK));
577 }
578 
579 static void
580 pvr_page_table_l1_entry_raw_clear(struct pvr_page_table_l1_entry_raw *entry)
581 {
582 	WRITE_ONCE(entry->val, 0);
583 }
584 
585 /**
586  * struct pvr_page_table_l0_entry_raw - A single entry in a level 0 page table.
587  * @val: The raw value of this entry.
588  *
589  * This type is a structure for type-checking purposes. At compile-time, its
590  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE.
591  *
592  * The value stored in this structure can be decoded using the following bitmap:
593  *
594  * .. flat-table::
595  *    :widths: 1 5
596  *    :stub-columns: 1
597  *
598  *    * - 63
599  *      - *(reserved)*
600  *
601  *    * - 62
602  *      - **PM/FW Protect:** Indicates a protected region which only the
603  *        Parameter Manager (PM) or firmware processor can write to.
604  *
605  *    * - 61..40
606  *      - **VP Page (High):** Virtual-physical page used for Parameter Manager
607  *        (PM) memory. This field is only used if the additional level of PB
608  *        virtualization is enabled. The VP Page field is needed by the PM in
609  *        order to correctly reconstitute the free lists after render
610  *        completion. This (High) field holds bits 39..18 of the value; the
611  *        Low field holds bits 17..12. Bits 11..0 are always zero because the
612  *        value is always aligned to the 4KiB page size.
613  *
614  *    * - 39..12
615  *      - **Physical Page Address:** The way this value is interpreted depends
616  *        on the page size. Bits not specified in the table below (e.g. bits
617  *        20..12 for page size 2MiB) should be considered reserved.
618  *
619  *        This table shows the bits used in an L0 page table entry to represent
620  *        the Physical Page Address for a given page size (as defined in the
621  *        associated L1 page table entry).
622  *
623  *        .. flat-table::
624  *           :widths: 1 1
625  *           :header-rows: 1
626  *           :stub-columns: 1
627  *
628  *           * - Page size
629  *             - Physical address bits
630  *
631  *           * - 4KiB
632  *             - 39..12
633  *
634  *           * - 16KiB
635  *             - 39..14
636  *
637  *           * - 64KiB
638  *             - 39..16
639  *
640  *           * - 256KiB
641  *             - 39..18
642  *
643  *           * - 1MiB
644  *             - 39..20
645  *
646  *           * - 2MiB
647  *             - 39..21
648  *
649  *    * - 11..6
650  *      - **VP Page (Low):** Continuation of VP Page (High).
651  *
652  *    * - 5
653  *      - **Pending:** When valid bit is not set, indicates that a valid entry
654  *        is pending and the MMU should wait for the driver to map the entry.
655  *        This is used to support page demand mapping of memory.
656  *
657  *    * - 4
658  *      - **PM Src:** Set on Parameter Manager (PM) allocated page table
659  *        entries when indicated by the PM. Note that this bit will only be set
660  *        by the PM, not by the device driver.
661  *
662  *    * - 3
663  *      - **SLC Bypass Control:** Specifies requests to this page should bypass
664  *        the System Level Cache (SLC), if enabled in SLC configuration.
665  *
666  *    * - 2
667  *      - **Cache Coherency:** Indicates that the page is coherent (i.e. it
668  *        does not require a cache flush between operations on the CPU and the
669  *        device).
670  *
671  *    * - 1
672  *      - **Read Only:** If set, this bit indicates that the page is read only.
673  *        An attempted write to this page would result in a write-protection
674  *        fault.
675  *
676  *    * - 0
677  *      - **Valid:** Indicates that the entry contains a valid page. If the
678  *        valid bit is not set, then an attempted use of the page would result
679  *        in a page fault.
680  */
681 struct pvr_page_table_l0_entry_raw {
682 	u64 val;
683 } __packed;
684 static_assert(sizeof(struct pvr_page_table_l0_entry_raw) * 8 ==
685 	      ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE);
686 
687 /**
688  * struct pvr_page_flags_raw - The configurable flags from a single entry in a
689  *                             level 0 page table.
690  * @val: The raw value of these flags. Since these are a strict subset of
691  *       &struct pvr_page_table_l0_entry_raw; use that type for our member here.
692  *
693  * The flags stored in this type are: PM/FW Protect; SLC Bypass Control; Cache
694  * Coherency, and Read Only (bits 62, 3, 2 and 1 respectively).
695  *
696  * This type should never be instantiated directly; instead use
697  * pvr_page_flags_raw_create() to ensure only valid bits of @val are set.
698  */
699 struct pvr_page_flags_raw {
700 	struct pvr_page_table_l0_entry_raw val;
701 } __packed;
702 static_assert(sizeof(struct pvr_page_flags_raw) ==
703 	      sizeof(struct pvr_page_table_l0_entry_raw));
704 
705 static bool
706 pvr_page_table_l0_entry_raw_is_valid(struct pvr_page_table_l0_entry_raw entry)
707 {
708 	return PVR_PAGE_TABLE_FIELD_GET(0, PT, VALID, entry);
709 }
710 
711 /**
712  * pvr_page_table_l0_entry_raw_set() - Write a valid entry into a raw level 0
713  *                                     page table.
714  * @entry: Target raw level 0 page table entry.
715  * @dma_addr: DMA address of the physical page to be associated with @entry.
716  * @flags: Options to be set on @entry.
717  *
718  * When calling this function, @child_table_dma_addr must be a valid DMA
719  * address and a multiple of %PVR_DEVICE_PAGE_SIZE.
720  *
721  * The @flags parameter is directly assigned into @entry. It is the callers
722  * responsibility to ensure that only bits specified in
723  * &struct pvr_page_flags_raw are set in @flags.
724  */
725 static void
726 pvr_page_table_l0_entry_raw_set(struct pvr_page_table_l0_entry_raw *entry,
727 				dma_addr_t dma_addr,
728 				struct pvr_page_flags_raw flags)
729 {
730 	WRITE_ONCE(entry->val, PVR_PAGE_TABLE_FIELD_PREP(0, PT, VALID, true) |
731 			       PVR_PAGE_TABLE_FIELD_PREP(0, PT, ENTRY_PENDING, false) |
732 			       (dma_addr & ~ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK) |
733 			       flags.val.val);
734 }
735 
736 static void
737 pvr_page_table_l0_entry_raw_clear(struct pvr_page_table_l0_entry_raw *entry)
738 {
739 	WRITE_ONCE(entry->val, 0);
740 }
741 
742 /**
743  * pvr_page_flags_raw_create() - Initialize the flag bits of a raw level 0 page
744  *                               table entry.
745  * @read_only: This page is read-only (see: Read Only).
746  * @cache_coherent: This page does not require cache flushes (see: Cache
747  *                  Coherency).
748  * @slc_bypass: This page bypasses the device cache (see: SLC Bypass Control).
749  * @pm_fw_protect: This page is only for use by the firmware or Parameter
750  *                 Manager (see PM/FW Protect).
751  *
752  * For more details on the use of these four options, see their respective
753  * entries in the table under &struct pvr_page_table_l0_entry_raw.
754  *
755  * Return:
756  * A new &struct pvr_page_flags_raw instance which can be passed directly to
757  * pvr_page_table_l0_entry_raw_set() or pvr_page_table_l0_insert().
758  */
759 static struct pvr_page_flags_raw
760 pvr_page_flags_raw_create(bool read_only, bool cache_coherent, bool slc_bypass,
761 			  bool pm_fw_protect)
762 {
763 	struct pvr_page_flags_raw flags;
764 
765 	flags.val.val =
766 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, READ_ONLY, read_only) |
767 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, CC, cache_coherent) |
768 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, SLC_BYPASS_CTRL, slc_bypass) |
769 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, PM_META_PROTECT, pm_fw_protect);
770 
771 	return flags;
772 }
773 
774 /**
775  * struct pvr_page_table_l2_raw - The raw data of a level 2 page table.
776  *
777  * This type is a structure for type-checking purposes. At compile-time, its
778  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
779  */
780 struct pvr_page_table_l2_raw {
781 	/** @entries: The raw values of this table. */
782 	struct pvr_page_table_l2_entry_raw
783 		entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
784 } __packed;
785 static_assert(sizeof(struct pvr_page_table_l2_raw) == PVR_MMU_BACKING_PAGE_SIZE);
786 
787 /**
788  * struct pvr_page_table_l1_raw - The raw data of a level 1 page table.
789  *
790  * This type is a structure for type-checking purposes. At compile-time, its
791  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
792  */
793 struct pvr_page_table_l1_raw {
794 	/** @entries: The raw values of this table. */
795 	struct pvr_page_table_l1_entry_raw
796 		entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
797 } __packed;
798 static_assert(sizeof(struct pvr_page_table_l1_raw) == PVR_MMU_BACKING_PAGE_SIZE);
799 
800 /**
801  * struct pvr_page_table_l0_raw - The raw data of a level 0 page table.
802  *
803  * This type is a structure for type-checking purposes. At compile-time, its
804  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
805  *
806  * .. caution::
807  *
808  *    The size of level 0 page tables is variable depending on the page size
809  *    specified in the associated level 1 page table entry. Since the device
810  *    page size in use is pegged to the host page size, it cannot vary at
811  *    runtime. This structure is therefore only defined to contain the required
812  *    number of entries for the current device page size. **You should never
813  *    read or write beyond the last supported entry.**
814  */
815 struct pvr_page_table_l0_raw {
816 	/** @entries: The raw values of this table. */
817 	struct pvr_page_table_l0_entry_raw
818 		entries[ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X];
819 } __packed;
820 static_assert(sizeof(struct pvr_page_table_l0_raw) <= PVR_MMU_BACKING_PAGE_SIZE);
821 
822 /**
823  * DOC: Mirror page tables
824  */
825 
826 /*
827  * We pre-declare these types because they cross-depend on pointers to each
828  * other.
829  */
830 struct pvr_page_table_l1;
831 struct pvr_page_table_l0;
832 
833 /**
834  * struct pvr_page_table_l2 - A wrapped level 2 page table.
835  *
836  * To access the raw part of this table, use pvr_page_table_l2_get_raw().
837  * Alternatively to access a raw entry directly, use
838  * pvr_page_table_l2_get_entry_raw().
839  *
840  * A level 2 page table forms the root of the page table tree structure, so
841  * this type has no &parent or &parent_idx members.
842  */
843 struct pvr_page_table_l2 {
844 	/**
845 	 * @entries: The children of this node in the page table tree
846 	 * structure. These are also mirror tables. The indexing of this array
847 	 * is identical to that of the raw equivalent
848 	 * (&pvr_page_table_l1_raw.entries).
849 	 */
850 	struct pvr_page_table_l1 *entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
851 
852 	/**
853 	 * @backing_page: A handle to the memory which holds the raw
854 	 * equivalent of this table. **For internal use only.**
855 	 */
856 	struct pvr_mmu_backing_page backing_page;
857 
858 	/**
859 	 * @entry_count: The current number of valid entries (that we know of)
860 	 * in this table. This value is essentially a refcount - the table is
861 	 * destroyed when this value is decremented to zero by
862 	 * pvr_page_table_l2_remove().
863 	 */
864 	u16 entry_count;
865 };
866 
867 /**
868  * pvr_page_table_l2_init() - Initialize a level 2 page table.
869  * @table: Target level 2 page table.
870  * @pvr_dev: Target PowerVR device
871  *
872  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
873  * this function.
874  *
875  * Return:
876  *  * 0 on success, or
877  *  * Any error encountered while intializing &table->backing_page using
878  *    pvr_mmu_backing_page_init().
879  */
880 static int
881 pvr_page_table_l2_init(struct pvr_page_table_l2 *table,
882 		       struct pvr_device *pvr_dev)
883 {
884 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
885 }
886 
887 /**
888  * pvr_page_table_l2_fini() - Teardown a level 2 page table.
889  * @table: Target level 2 page table.
890  *
891  * It is an error to attempt to use @table after calling this function.
892  */
893 static void
894 pvr_page_table_l2_fini(struct pvr_page_table_l2 *table)
895 {
896 	pvr_mmu_backing_page_fini(&table->backing_page);
897 }
898 
899 /**
900  * pvr_page_table_l2_sync() - Flush a level 2 page table from the CPU to the
901  *                            device.
902  * @table: Target level 2 page table.
903  *
904  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
905  * warning there applies here too: **Only call pvr_page_table_l2_sync() once
906  * you're sure you have no more changes to make to** @table **in the immediate
907  * future.**
908  *
909  * If child level 1 page tables of @table also need to be flushed, this should
910  * be done first using pvr_page_table_l1_sync() *before* calling this function.
911  */
912 static void
913 pvr_page_table_l2_sync(struct pvr_page_table_l2 *table)
914 {
915 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_2_FLAGS);
916 }
917 
918 /**
919  * pvr_page_table_l2_get_raw() - Access the raw equivalent of a mirror level 2
920  *                               page table.
921  * @table: Target level 2 page table.
922  *
923  * Essentially returns the CPU address of the raw equivalent of @table, cast to
924  * a &struct pvr_page_table_l2_raw pointer.
925  *
926  * You probably want to call pvr_page_table_l2_get_entry_raw() instead.
927  *
928  * Return:
929  * The raw equivalent of @table.
930  */
931 static struct pvr_page_table_l2_raw *
932 pvr_page_table_l2_get_raw(struct pvr_page_table_l2 *table)
933 {
934 	return table->backing_page.host_ptr;
935 }
936 
937 /**
938  * pvr_page_table_l2_get_entry_raw() - Access an entry from the raw equivalent
939  *                                     of a mirror level 2 page table.
940  * @table: Target level 2 page table.
941  * @idx: Index of the entry to access.
942  *
943  * Technically this function returns a pointer to a slot in a raw level 2 page
944  * table, since the returned "entry" is not guaranteed to be valid. The caller
945  * must verify the validity of the entry at the returned address (perhaps using
946  * pvr_page_table_l2_entry_raw_is_valid()) before reading or overwriting it.
947  *
948  * The value of @idx is not checked here; it is the callers responsibility to
949  * ensure @idx refers to a valid index within @table before dereferencing the
950  * returned pointer.
951  *
952  * Return:
953  * A pointer to the requested raw level 2 page table entry.
954  */
955 static struct pvr_page_table_l2_entry_raw *
956 pvr_page_table_l2_get_entry_raw(struct pvr_page_table_l2 *table, u16 idx)
957 {
958 	return &pvr_page_table_l2_get_raw(table)->entries[idx];
959 }
960 
961 /**
962  * pvr_page_table_l2_entry_is_valid() - Check if a level 2 page table entry is
963  *                                      marked as valid.
964  * @table: Target level 2 page table.
965  * @idx: Index of the entry to check.
966  *
967  * The value of @idx is not checked here; it is the callers responsibility to
968  * ensure @idx refers to a valid index within @table before calling this
969  * function.
970  */
971 static bool
972 pvr_page_table_l2_entry_is_valid(struct pvr_page_table_l2 *table, u16 idx)
973 {
974 	struct pvr_page_table_l2_entry_raw entry_raw =
975 		*pvr_page_table_l2_get_entry_raw(table, idx);
976 
977 	return pvr_page_table_l2_entry_raw_is_valid(entry_raw);
978 }
979 
980 /**
981  * struct pvr_page_table_l1 - A wrapped level 1 page table.
982  *
983  * To access the raw part of this table, use pvr_page_table_l1_get_raw().
984  * Alternatively to access a raw entry directly, use
985  * pvr_page_table_l1_get_entry_raw().
986  */
987 struct pvr_page_table_l1 {
988 	/**
989 	 * @entries: The children of this node in the page table tree
990 	 * structure. These are also mirror tables. The indexing of this array
991 	 * is identical to that of the raw equivalent
992 	 * (&pvr_page_table_l0_raw.entries).
993 	 */
994 	struct pvr_page_table_l0 *entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
995 
996 	/**
997 	 * @backing_page: A handle to the memory which holds the raw
998 	 * equivalent of this table. **For internal use only.**
999 	 */
1000 	struct pvr_mmu_backing_page backing_page;
1001 
1002 	union {
1003 		/**
1004 		 * @parent: The parent of this node in the page table tree structure.
1005 		 *
1006 		 * This is also a mirror table.
1007 		 *
1008 		 * Only valid when the L1 page table is active. When the L1 page table
1009 		 * has been removed and queued for destruction, the next_free field
1010 		 * should be used instead.
1011 		 */
1012 		struct pvr_page_table_l2 *parent;
1013 
1014 		/**
1015 		 * @next_free: Pointer to the next L1 page table to take/free.
1016 		 *
1017 		 * Used to form a linked list of L1 page tables. This is used
1018 		 * when preallocating tables and when the page table has been
1019 		 * removed and queued for destruction.
1020 		 */
1021 		struct pvr_page_table_l1 *next_free;
1022 	};
1023 
1024 	/**
1025 	 * @parent_idx: The index of the entry in the parent table (see
1026 	 * @parent) which corresponds to this table.
1027 	 */
1028 	u16 parent_idx;
1029 
1030 	/**
1031 	 * @entry_count: The current number of valid entries (that we know of)
1032 	 * in this table. This value is essentially a refcount - the table is
1033 	 * destroyed when this value is decremented to zero by
1034 	 * pvr_page_table_l1_remove().
1035 	 */
1036 	u16 entry_count;
1037 };
1038 
1039 /**
1040  * pvr_page_table_l1_init() - Initialize a level 1 page table.
1041  * @table: Target level 1 page table.
1042  * @pvr_dev: Target PowerVR device
1043  *
1044  * When this function returns successfully, @table is still not considered
1045  * valid. It must be inserted into the page table tree structure with
1046  * pvr_page_table_l2_insert() before it is ready for use.
1047  *
1048  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
1049  * this function.
1050  *
1051  * Return:
1052  *  * 0 on success, or
1053  *  * Any error encountered while intializing &table->backing_page using
1054  *    pvr_mmu_backing_page_init().
1055  */
1056 static int
1057 pvr_page_table_l1_init(struct pvr_page_table_l1 *table,
1058 		       struct pvr_device *pvr_dev)
1059 {
1060 	table->parent_idx = PVR_IDX_INVALID;
1061 
1062 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
1063 }
1064 
1065 /**
1066  * pvr_page_table_l1_free() - Teardown a level 1 page table.
1067  * @table: Target level 1 page table.
1068  *
1069  * It is an error to attempt to use @table after calling this function, even
1070  * indirectly. This includes calling pvr_page_table_l2_remove(), which must
1071  * be called *before* pvr_page_table_l1_free().
1072  */
1073 static void
1074 pvr_page_table_l1_free(struct pvr_page_table_l1 *table)
1075 {
1076 	pvr_mmu_backing_page_fini(&table->backing_page);
1077 	kfree(table);
1078 }
1079 
1080 /**
1081  * pvr_page_table_l1_sync() - Flush a level 1 page table from the CPU to the
1082  *                            device.
1083  * @table: Target level 1 page table.
1084  *
1085  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
1086  * warning there applies here too: **Only call pvr_page_table_l1_sync() once
1087  * you're sure you have no more changes to make to** @table **in the immediate
1088  * future.**
1089  *
1090  * If child level 0 page tables of @table also need to be flushed, this should
1091  * be done first using pvr_page_table_l0_sync() *before* calling this function.
1092  */
1093 static void
1094 pvr_page_table_l1_sync(struct pvr_page_table_l1 *table)
1095 {
1096 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_1_FLAGS);
1097 }
1098 
1099 /**
1100  * pvr_page_table_l1_get_raw() - Access the raw equivalent of a mirror level 1
1101  *                               page table.
1102  * @table: Target level 1 page table.
1103  *
1104  * Essentially returns the CPU address of the raw equivalent of @table, cast to
1105  * a &struct pvr_page_table_l1_raw pointer.
1106  *
1107  * You probably want to call pvr_page_table_l1_get_entry_raw() instead.
1108  *
1109  * Return:
1110  * The raw equivalent of @table.
1111  */
1112 static struct pvr_page_table_l1_raw *
1113 pvr_page_table_l1_get_raw(struct pvr_page_table_l1 *table)
1114 {
1115 	return table->backing_page.host_ptr;
1116 }
1117 
1118 /**
1119  * pvr_page_table_l1_get_entry_raw() - Access an entry from the raw equivalent
1120  *                                     of a mirror level 1 page table.
1121  * @table: Target level 1 page table.
1122  * @idx: Index of the entry to access.
1123  *
1124  * Technically this function returns a pointer to a slot in a raw level 1 page
1125  * table, since the returned "entry" is not guaranteed to be valid. The caller
1126  * must verify the validity of the entry at the returned address (perhaps using
1127  * pvr_page_table_l1_entry_raw_is_valid()) before reading or overwriting it.
1128  *
1129  * The value of @idx is not checked here; it is the callers responsibility to
1130  * ensure @idx refers to a valid index within @table before dereferencing the
1131  * returned pointer.
1132  *
1133  * Return:
1134  * A pointer to the requested raw level 1 page table entry.
1135  */
1136 static struct pvr_page_table_l1_entry_raw *
1137 pvr_page_table_l1_get_entry_raw(struct pvr_page_table_l1 *table, u16 idx)
1138 {
1139 	return &pvr_page_table_l1_get_raw(table)->entries[idx];
1140 }
1141 
1142 /**
1143  * pvr_page_table_l1_entry_is_valid() - Check if a level 1 page table entry is
1144  *                                      marked as valid.
1145  * @table: Target level 1 page table.
1146  * @idx: Index of the entry to check.
1147  *
1148  * The value of @idx is not checked here; it is the callers responsibility to
1149  * ensure @idx refers to a valid index within @table before calling this
1150  * function.
1151  */
1152 static bool
1153 pvr_page_table_l1_entry_is_valid(struct pvr_page_table_l1 *table, u16 idx)
1154 {
1155 	struct pvr_page_table_l1_entry_raw entry_raw =
1156 		*pvr_page_table_l1_get_entry_raw(table, idx);
1157 
1158 	return pvr_page_table_l1_entry_raw_is_valid(entry_raw);
1159 }
1160 
1161 /**
1162  * struct pvr_page_table_l0 - A wrapped level 0 page table.
1163  *
1164  * To access the raw part of this table, use pvr_page_table_l0_get_raw().
1165  * Alternatively to access a raw entry directly, use
1166  * pvr_page_table_l0_get_entry_raw().
1167  *
1168  * There is no mirror representation of an individual page, so this type has no
1169  * &entries member.
1170  */
1171 struct pvr_page_table_l0 {
1172 	/**
1173 	 * @backing_page: A handle to the memory which holds the raw
1174 	 * equivalent of this table. **For internal use only.**
1175 	 */
1176 	struct pvr_mmu_backing_page backing_page;
1177 
1178 	union {
1179 		/**
1180 		 * @parent: The parent of this node in the page table tree structure.
1181 		 *
1182 		 * This is also a mirror table.
1183 		 *
1184 		 * Only valid when the L0 page table is active. When the L0 page table
1185 		 * has been removed and queued for destruction, the next_free field
1186 		 * should be used instead.
1187 		 */
1188 		struct pvr_page_table_l1 *parent;
1189 
1190 		/**
1191 		 * @next_free: Pointer to the next L0 page table to take/free.
1192 		 *
1193 		 * Used to form a linked list of L0 page tables. This is used
1194 		 * when preallocating tables and when the page table has been
1195 		 * removed and queued for destruction.
1196 		 */
1197 		struct pvr_page_table_l0 *next_free;
1198 	};
1199 
1200 	/**
1201 	 * @parent_idx: The index of the entry in the parent table (see
1202 	 * @parent) which corresponds to this table.
1203 	 */
1204 	u16 parent_idx;
1205 
1206 	/**
1207 	 * @entry_count: The current number of valid entries (that we know of)
1208 	 * in this table. This value is essentially a refcount - the table is
1209 	 * destroyed when this value is decremented to zero by
1210 	 * pvr_page_table_l0_remove().
1211 	 */
1212 	u16 entry_count;
1213 };
1214 
1215 /**
1216  * pvr_page_table_l0_init() - Initialize a level 0 page table.
1217  * @table: Target level 0 page table.
1218  * @pvr_dev: Target PowerVR device
1219  *
1220  * When this function returns successfully, @table is still not considered
1221  * valid. It must be inserted into the page table tree structure with
1222  * pvr_page_table_l1_insert() before it is ready for use.
1223  *
1224  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
1225  * this function.
1226  *
1227  * Return:
1228  *  * 0 on success, or
1229  *  * Any error encountered while intializing &table->backing_page using
1230  *    pvr_mmu_backing_page_init().
1231  */
1232 static int
1233 pvr_page_table_l0_init(struct pvr_page_table_l0 *table,
1234 		       struct pvr_device *pvr_dev)
1235 {
1236 	table->parent_idx = PVR_IDX_INVALID;
1237 
1238 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
1239 }
1240 
1241 /**
1242  * pvr_page_table_l0_free() - Teardown a level 0 page table.
1243  * @table: Target level 0 page table.
1244  *
1245  * It is an error to attempt to use @table after calling this function, even
1246  * indirectly. This includes calling pvr_page_table_l1_remove(), which must
1247  * be called *before* pvr_page_table_l0_free().
1248  */
1249 static void
1250 pvr_page_table_l0_free(struct pvr_page_table_l0 *table)
1251 {
1252 	pvr_mmu_backing_page_fini(&table->backing_page);
1253 	kfree(table);
1254 }
1255 
1256 /**
1257  * pvr_page_table_l0_sync() - Flush a level 0 page table from the CPU to the
1258  *                            device.
1259  * @table: Target level 0 page table.
1260  *
1261  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
1262  * warning there applies here too: **Only call pvr_page_table_l0_sync() once
1263  * you're sure you have no more changes to make to** @table **in the immediate
1264  * future.**
1265  *
1266  * If child pages of @table also need to be flushed, this should be done first
1267  * using a DMA sync function (e.g. dma_sync_sg_for_device()) *before* calling
1268  * this function.
1269  */
1270 static void
1271 pvr_page_table_l0_sync(struct pvr_page_table_l0 *table)
1272 {
1273 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_0_FLAGS);
1274 }
1275 
1276 /**
1277  * pvr_page_table_l0_get_raw() - Access the raw equivalent of a mirror level 0
1278  *                               page table.
1279  * @table: Target level 0 page table.
1280  *
1281  * Essentially returns the CPU address of the raw equivalent of @table, cast to
1282  * a &struct pvr_page_table_l0_raw pointer.
1283  *
1284  * You probably want to call pvr_page_table_l0_get_entry_raw() instead.
1285  *
1286  * Return:
1287  * The raw equivalent of @table.
1288  */
1289 static struct pvr_page_table_l0_raw *
1290 pvr_page_table_l0_get_raw(struct pvr_page_table_l0 *table)
1291 {
1292 	return table->backing_page.host_ptr;
1293 }
1294 
1295 /**
1296  * pvr_page_table_l0_get_entry_raw() - Access an entry from the raw equivalent
1297  *                                     of a mirror level 0 page table.
1298  * @table: Target level 0 page table.
1299  * @idx: Index of the entry to access.
1300  *
1301  * Technically this function returns a pointer to a slot in a raw level 0 page
1302  * table, since the returned "entry" is not guaranteed to be valid. The caller
1303  * must verify the validity of the entry at the returned address (perhaps using
1304  * pvr_page_table_l0_entry_raw_is_valid()) before reading or overwriting it.
1305  *
1306  * The value of @idx is not checked here; it is the callers responsibility to
1307  * ensure @idx refers to a valid index within @table before dereferencing the
1308  * returned pointer. This is espcially important for level 0 page tables, which
1309  * can have a variable number of entries.
1310  *
1311  * Return:
1312  * A pointer to the requested raw level 0 page table entry.
1313  */
1314 static struct pvr_page_table_l0_entry_raw *
1315 pvr_page_table_l0_get_entry_raw(struct pvr_page_table_l0 *table, u16 idx)
1316 {
1317 	return &pvr_page_table_l0_get_raw(table)->entries[idx];
1318 }
1319 
1320 /**
1321  * pvr_page_table_l0_entry_is_valid() - Check if a level 0 page table entry is
1322  *                                      marked as valid.
1323  * @table: Target level 0 page table.
1324  * @idx: Index of the entry to check.
1325  *
1326  * The value of @idx is not checked here; it is the callers responsibility to
1327  * ensure @idx refers to a valid index within @table before calling this
1328  * function.
1329  */
1330 static bool
1331 pvr_page_table_l0_entry_is_valid(struct pvr_page_table_l0 *table, u16 idx)
1332 {
1333 	struct pvr_page_table_l0_entry_raw entry_raw =
1334 		*pvr_page_table_l0_get_entry_raw(table, idx);
1335 
1336 	return pvr_page_table_l0_entry_raw_is_valid(entry_raw);
1337 }
1338 
1339 /**
1340  * struct pvr_mmu_context - context holding data for operations at page
1341  * catalogue level, intended for use with a VM context.
1342  */
1343 struct pvr_mmu_context {
1344 	/** @pvr_dev: The PVR device associated with the owning VM context. */
1345 	struct pvr_device *pvr_dev;
1346 
1347 	/** @page_table_l2: The MMU table root. */
1348 	struct pvr_page_table_l2 page_table_l2;
1349 };
1350 
1351 /**
1352  * struct pvr_page_table_ptr - A reference to a single physical page as indexed
1353  * by the page table structure.
1354  *
1355  * Intended for embedding in a &struct pvr_mmu_op_context.
1356  */
1357 struct pvr_page_table_ptr {
1358 	/**
1359 	 * @l1_table: A cached handle to the level 1 page table the
1360 	 * context is currently traversing.
1361 	 */
1362 	struct pvr_page_table_l1 *l1_table;
1363 
1364 	/**
1365 	 * @l0_table: A cached handle to the level 0 page table the
1366 	 * context is currently traversing.
1367 	 */
1368 	struct pvr_page_table_l0 *l0_table;
1369 
1370 	/**
1371 	 * @l2_idx: Index into the level 2 page table the context is
1372 	 * currently referencing.
1373 	 */
1374 	u16 l2_idx;
1375 
1376 	/**
1377 	 * @l1_idx: Index into the level 1 page table the context is
1378 	 * currently referencing.
1379 	 */
1380 	u16 l1_idx;
1381 
1382 	/**
1383 	 * @l0_idx: Index into the level 0 page table the context is
1384 	 * currently referencing.
1385 	 */
1386 	u16 l0_idx;
1387 };
1388 
1389 /**
1390  * struct pvr_mmu_op_context - context holding data for individual
1391  * device-virtual mapping operations. Intended for use with a VM bind operation.
1392  */
1393 struct pvr_mmu_op_context {
1394 	/** @mmu_ctx: The MMU context associated with the owning VM context. */
1395 	struct pvr_mmu_context *mmu_ctx;
1396 
1397 	/** @map: Data specifically for map operations. */
1398 	struct {
1399 		/**
1400 		 * @sgt: Scatter gather table containing pages pinned for use by
1401 		 * this context - these are currently pinned when initialising
1402 		 * the VM bind operation.
1403 		 */
1404 		struct sg_table *sgt;
1405 
1406 		/** @sgt_offset: Start address of the device-virtual mapping. */
1407 		u64 sgt_offset;
1408 
1409 		/**
1410 		 * @l1_prealloc_tables: Preallocated l1 page table objects
1411 		 * use by this context when creating a page mapping. Linked list
1412 		 * fully created during initialisation.
1413 		 */
1414 		struct pvr_page_table_l1 *l1_prealloc_tables;
1415 
1416 		/**
1417 		 * @l0_prealloc_tables: Preallocated l0 page table objects
1418 		 * use by this context when creating a page mapping. Linked list
1419 		 * fully created during initialisation.
1420 		 */
1421 		struct pvr_page_table_l0 *l0_prealloc_tables;
1422 	} map;
1423 
1424 	/** @unmap: Data specifically for unmap operations. */
1425 	struct {
1426 		/**
1427 		 * @l1_free_tables: Collects page table objects freed by unmap
1428 		 * ops. Linked list empty at creation.
1429 		 */
1430 		struct pvr_page_table_l1 *l1_free_tables;
1431 
1432 		/**
1433 		 * @l0_free_tables: Collects page table objects freed by unmap
1434 		 * ops. Linked list empty at creation.
1435 		 */
1436 		struct pvr_page_table_l0 *l0_free_tables;
1437 	} unmap;
1438 
1439 	/**
1440 	 * @curr_page: A reference to a single physical page as indexed by the
1441 	 * page table structure.
1442 	 */
1443 	struct pvr_page_table_ptr curr_page;
1444 
1445 	/**
1446 	 * @sync_level_required: The maximum level of the page table tree
1447 	 * structure which has (possibly) been modified since it was last
1448 	 * flushed to the device.
1449 	 *
1450 	 * This field should only be set with pvr_mmu_op_context_require_sync()
1451 	 * or indirectly by pvr_mmu_op_context_sync_partial().
1452 	 */
1453 	enum pvr_mmu_sync_level sync_level_required;
1454 };
1455 
1456 /**
1457  * pvr_page_table_l2_insert() - Insert an entry referring to a level 1 page
1458  * table into a level 2 page table.
1459  * @op_ctx: Target MMU op context pointing at the entry to insert the L1 page
1460  * table into.
1461  * @child_table: Target level 1 page table to be referenced by the new entry.
1462  *
1463  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1464  * valid L2 entry.
1465  *
1466  * It is the caller's responsibility to execute any memory barries to ensure
1467  * that the creation of @child_table is ordered before the L2 entry is inserted.
1468  */
1469 static void
1470 pvr_page_table_l2_insert(struct pvr_mmu_op_context *op_ctx,
1471 			 struct pvr_page_table_l1 *child_table)
1472 {
1473 	struct pvr_page_table_l2 *l2_table =
1474 		&op_ctx->mmu_ctx->page_table_l2;
1475 	struct pvr_page_table_l2_entry_raw *entry_raw =
1476 		pvr_page_table_l2_get_entry_raw(l2_table,
1477 						op_ctx->curr_page.l2_idx);
1478 
1479 	pvr_page_table_l2_entry_raw_set(entry_raw,
1480 					child_table->backing_page.dma_addr);
1481 
1482 	child_table->parent = l2_table;
1483 	child_table->parent_idx = op_ctx->curr_page.l2_idx;
1484 	l2_table->entries[op_ctx->curr_page.l2_idx] = child_table;
1485 	++l2_table->entry_count;
1486 	op_ctx->curr_page.l1_table = child_table;
1487 }
1488 
1489 /**
1490  * pvr_page_table_l2_remove() - Remove a level 1 page table from a level 2 page
1491  * table.
1492  * @op_ctx: Target MMU op context pointing at the L2 entry to remove.
1493  *
1494  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1495  * valid L2 entry.
1496  */
1497 static void
1498 pvr_page_table_l2_remove(struct pvr_mmu_op_context *op_ctx)
1499 {
1500 	struct pvr_page_table_l2 *l2_table =
1501 		&op_ctx->mmu_ctx->page_table_l2;
1502 	struct pvr_page_table_l2_entry_raw *entry_raw =
1503 		pvr_page_table_l2_get_entry_raw(l2_table,
1504 						op_ctx->curr_page.l1_table->parent_idx);
1505 
1506 	WARN_ON(op_ctx->curr_page.l1_table->parent != l2_table);
1507 
1508 	pvr_page_table_l2_entry_raw_clear(entry_raw);
1509 
1510 	l2_table->entries[op_ctx->curr_page.l1_table->parent_idx] = NULL;
1511 	op_ctx->curr_page.l1_table->parent_idx = PVR_IDX_INVALID;
1512 	op_ctx->curr_page.l1_table->next_free = op_ctx->unmap.l1_free_tables;
1513 	op_ctx->unmap.l1_free_tables = op_ctx->curr_page.l1_table;
1514 	op_ctx->curr_page.l1_table = NULL;
1515 
1516 	--l2_table->entry_count;
1517 }
1518 
1519 /**
1520  * pvr_page_table_l1_insert() - Insert an entry referring to a level 0 page
1521  * table into a level 1 page table.
1522  * @op_ctx: Target MMU op context pointing at the entry to insert the L0 page
1523  * table into.
1524  * @child_table: L0 page table to insert.
1525  *
1526  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1527  * valid L1 entry.
1528  *
1529  * It is the caller's responsibility to execute any memory barries to ensure
1530  * that the creation of @child_table is ordered before the L1 entry is inserted.
1531  */
1532 static void
1533 pvr_page_table_l1_insert(struct pvr_mmu_op_context *op_ctx,
1534 			 struct pvr_page_table_l0 *child_table)
1535 {
1536 	struct pvr_page_table_l1_entry_raw *entry_raw =
1537 		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l1_table,
1538 						op_ctx->curr_page.l1_idx);
1539 
1540 	pvr_page_table_l1_entry_raw_set(entry_raw,
1541 					child_table->backing_page.dma_addr);
1542 
1543 	child_table->parent = op_ctx->curr_page.l1_table;
1544 	child_table->parent_idx = op_ctx->curr_page.l1_idx;
1545 	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx] = child_table;
1546 	++op_ctx->curr_page.l1_table->entry_count;
1547 	op_ctx->curr_page.l0_table = child_table;
1548 }
1549 
1550 /**
1551  * pvr_page_table_l1_remove() - Remove a level 0 page table from a level 1 page
1552  *                              table.
1553  * @op_ctx: Target MMU op context pointing at the L1 entry to remove.
1554  *
1555  * If this function results in the L1 table becoming empty, it will be removed
1556  * from its parent level 2 page table and destroyed.
1557  *
1558  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1559  * valid L1 entry.
1560  */
1561 static void
1562 pvr_page_table_l1_remove(struct pvr_mmu_op_context *op_ctx)
1563 {
1564 	struct pvr_page_table_l1_entry_raw *entry_raw =
1565 		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l0_table->parent,
1566 						op_ctx->curr_page.l0_table->parent_idx);
1567 
1568 	WARN_ON(op_ctx->curr_page.l0_table->parent !=
1569 		op_ctx->curr_page.l1_table);
1570 
1571 	pvr_page_table_l1_entry_raw_clear(entry_raw);
1572 
1573 	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l0_table->parent_idx] = NULL;
1574 	op_ctx->curr_page.l0_table->parent_idx = PVR_IDX_INVALID;
1575 	op_ctx->curr_page.l0_table->next_free = op_ctx->unmap.l0_free_tables;
1576 	op_ctx->unmap.l0_free_tables = op_ctx->curr_page.l0_table;
1577 	op_ctx->curr_page.l0_table = NULL;
1578 
1579 	if (--op_ctx->curr_page.l1_table->entry_count == 0) {
1580 		/* Clear the parent L2 page table entry. */
1581 		if (op_ctx->curr_page.l1_table->parent_idx != PVR_IDX_INVALID)
1582 			pvr_page_table_l2_remove(op_ctx);
1583 	}
1584 }
1585 
1586 /**
1587  * pvr_page_table_l0_insert() - Insert an entry referring to a physical page
1588  * into a level 0 page table.
1589  * @op_ctx: Target MMU op context pointing at the L0 entry to insert.
1590  * @dma_addr: Target DMA address to be referenced by the new entry.
1591  * @flags: Page options to be stored in the new entry.
1592  *
1593  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1594  * valid L0 entry.
1595  */
1596 static void
1597 pvr_page_table_l0_insert(struct pvr_mmu_op_context *op_ctx,
1598 			 dma_addr_t dma_addr, struct pvr_page_flags_raw flags)
1599 {
1600 	struct pvr_page_table_l0_entry_raw *entry_raw =
1601 		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
1602 						op_ctx->curr_page.l0_idx);
1603 
1604 	pvr_page_table_l0_entry_raw_set(entry_raw, dma_addr, flags);
1605 
1606 	/*
1607 	 * There is no entry to set here - we don't keep a mirror of
1608 	 * individual pages.
1609 	 */
1610 
1611 	++op_ctx->curr_page.l0_table->entry_count;
1612 }
1613 
1614 /**
1615  * pvr_page_table_l0_remove() - Remove a physical page from a level 0 page
1616  * table.
1617  * @op_ctx: Target MMU op context pointing at the L0 entry to remove.
1618  *
1619  * If this function results in the L0 table becoming empty, it will be removed
1620  * from its parent L1 page table and destroyed.
1621  *
1622  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1623  * valid L0 entry.
1624  */
1625 static void
1626 pvr_page_table_l0_remove(struct pvr_mmu_op_context *op_ctx)
1627 {
1628 	struct pvr_page_table_l0_entry_raw *entry_raw =
1629 		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
1630 						op_ctx->curr_page.l0_idx);
1631 
1632 	pvr_page_table_l0_entry_raw_clear(entry_raw);
1633 
1634 	/*
1635 	 * There is no entry to clear here - we don't keep a mirror of
1636 	 * individual pages.
1637 	 */
1638 
1639 	if (--op_ctx->curr_page.l0_table->entry_count == 0) {
1640 		/* Clear the parent L1 page table entry. */
1641 		if (op_ctx->curr_page.l0_table->parent_idx != PVR_IDX_INVALID)
1642 			pvr_page_table_l1_remove(op_ctx);
1643 	}
1644 }
1645 
1646 /**
1647  * DOC: Page table index utilities
1648  */
1649 
1650 /**
1651  * pvr_page_table_l2_idx() - Calculate the level 2 page table index for a
1652  *                           device-virtual address.
1653  * @device_addr: Target device-virtual address.
1654  *
1655  * This function does not perform any bounds checking - it is the caller's
1656  * responsibility to ensure that @device_addr is valid before interpreting
1657  * the result.
1658  *
1659  * Return:
1660  * The index into a level 2 page table corresponding to @device_addr.
1661  */
1662 static u16
1663 pvr_page_table_l2_idx(u64 device_addr)
1664 {
1665 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PC_INDEX_CLRMSK) >>
1666 	       ROGUE_MMUCTRL_VADDR_PC_INDEX_SHIFT;
1667 }
1668 
1669 /**
1670  * pvr_page_table_l1_idx() - Calculate the level 1 page table index for a
1671  *                           device-virtual address.
1672  * @device_addr: Target device-virtual address.
1673  *
1674  * This function does not perform any bounds checking - it is the caller's
1675  * responsibility to ensure that @device_addr is valid before interpreting
1676  * the result.
1677  *
1678  * Return:
1679  * The index into a level 1 page table corresponding to @device_addr.
1680  */
1681 static u16
1682 pvr_page_table_l1_idx(u64 device_addr)
1683 {
1684 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PD_INDEX_CLRMSK) >>
1685 	       ROGUE_MMUCTRL_VADDR_PD_INDEX_SHIFT;
1686 }
1687 
1688 /**
1689  * pvr_page_table_l0_idx() - Calculate the level 0 page table index for a
1690  *                           device-virtual address.
1691  * @device_addr: Target device-virtual address.
1692  *
1693  * This function does not perform any bounds checking - it is the caller's
1694  * responsibility to ensure that @device_addr is valid before interpreting
1695  * the result.
1696  *
1697  * Return:
1698  * The index into a level 0 page table corresponding to @device_addr.
1699  */
1700 static u16
1701 pvr_page_table_l0_idx(u64 device_addr)
1702 {
1703 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PT_INDEX_CLRMSK) >>
1704 	       ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT;
1705 }
1706 
1707 /**
1708  * DOC: High-level page table operations
1709  */
1710 
1711 /**
1712  * pvr_page_table_l1_get_or_insert() - Retrieves (optionally inserting if
1713  * necessary) a level 1 page table from the specified level 2 page table entry.
1714  * @op_ctx: Target MMU op context.
1715  * @should_insert: [IN] Specifies whether new page tables should be inserted
1716  * when empty page table entries are encountered during traversal.
1717  *
1718  * Return:
1719  *  * 0 on success, or
1720  *
1721  *    If @should_insert is %false:
1722  *     * -%ENXIO if a level 1 page table would have been inserted.
1723  *
1724  *    If @should_insert is %true:
1725  *     * Any error encountered while inserting the level 1 page table.
1726  */
1727 static int
1728 pvr_page_table_l1_get_or_insert(struct pvr_mmu_op_context *op_ctx,
1729 				bool should_insert)
1730 {
1731 	struct pvr_page_table_l2 *l2_table =
1732 		&op_ctx->mmu_ctx->page_table_l2;
1733 	struct pvr_page_table_l1 *table;
1734 
1735 	if (pvr_page_table_l2_entry_is_valid(l2_table,
1736 					     op_ctx->curr_page.l2_idx)) {
1737 		op_ctx->curr_page.l1_table =
1738 			l2_table->entries[op_ctx->curr_page.l2_idx];
1739 		return 0;
1740 	}
1741 
1742 	if (!should_insert)
1743 		return -ENXIO;
1744 
1745 	/* Take a prealloced table. */
1746 	table = op_ctx->map.l1_prealloc_tables;
1747 	if (!table)
1748 		return -ENOMEM;
1749 
1750 	/* Pop */
1751 	op_ctx->map.l1_prealloc_tables = table->next_free;
1752 	table->next_free = NULL;
1753 
1754 	/* Ensure new table is fully written out before adding to L2 page table. */
1755 	wmb();
1756 
1757 	pvr_page_table_l2_insert(op_ctx, table);
1758 
1759 	return 0;
1760 }
1761 
1762 /**
1763  * pvr_page_table_l0_get_or_insert() - Retrieves (optionally inserting if
1764  * necessary) a level 0 page table from the specified level 1 page table entry.
1765  * @op_ctx: Target MMU op context.
1766  * @should_insert: [IN] Specifies whether new page tables should be inserted
1767  * when empty page table entries are encountered during traversal.
1768  *
1769  * Return:
1770  *  * 0 on success,
1771  *
1772  *    If @should_insert is %false:
1773  *     * -%ENXIO if a level 0 page table would have been inserted.
1774  *
1775  *    If @should_insert is %true:
1776  *     * Any error encountered while inserting the level 0 page table.
1777  */
1778 static int
1779 pvr_page_table_l0_get_or_insert(struct pvr_mmu_op_context *op_ctx,
1780 				bool should_insert)
1781 {
1782 	struct pvr_page_table_l0 *table;
1783 
1784 	if (pvr_page_table_l1_entry_is_valid(op_ctx->curr_page.l1_table,
1785 					     op_ctx->curr_page.l1_idx)) {
1786 		op_ctx->curr_page.l0_table =
1787 			op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx];
1788 		return 0;
1789 	}
1790 
1791 	if (!should_insert)
1792 		return -ENXIO;
1793 
1794 	/* Take a prealloced table. */
1795 	table = op_ctx->map.l0_prealloc_tables;
1796 	if (!table)
1797 		return -ENOMEM;
1798 
1799 	/* Pop */
1800 	op_ctx->map.l0_prealloc_tables = table->next_free;
1801 	table->next_free = NULL;
1802 
1803 	/* Ensure new table is fully written out before adding to L1 page table. */
1804 	wmb();
1805 
1806 	pvr_page_table_l1_insert(op_ctx, table);
1807 
1808 	return 0;
1809 }
1810 
1811 /**
1812  * pvr_mmu_context_create() - Create an MMU context.
1813  * @pvr_dev: PVR device associated with owning VM context.
1814  *
1815  * Returns:
1816  *  * Newly created MMU context object on success, or
1817  *  * -%ENOMEM if no memory is available,
1818  *  * Any error code returned by pvr_page_table_l2_init().
1819  */
1820 struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev)
1821 {
1822 	struct pvr_mmu_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1823 	int err;
1824 
1825 	if (!ctx)
1826 		return ERR_PTR(-ENOMEM);
1827 
1828 	err = pvr_page_table_l2_init(&ctx->page_table_l2, pvr_dev);
1829 	if (err)
1830 		return ERR_PTR(err);
1831 
1832 	ctx->pvr_dev = pvr_dev;
1833 
1834 	return ctx;
1835 }
1836 
1837 /**
1838  * pvr_mmu_context_destroy() - Destroy an MMU context.
1839  * @ctx: Target MMU context.
1840  */
1841 void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx)
1842 {
1843 	pvr_page_table_l2_fini(&ctx->page_table_l2);
1844 	kfree(ctx);
1845 }
1846 
1847 /**
1848  * pvr_mmu_get_root_table_dma_addr() - Get the DMA address of the root of the
1849  * page table structure behind a VM context.
1850  * @ctx: Target MMU context.
1851  */
1852 dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx)
1853 {
1854 	return ctx->page_table_l2.backing_page.dma_addr;
1855 }
1856 
1857 /**
1858  * pvr_page_table_l1_alloc() - Allocate a l1 page_table object.
1859  * @ctx: MMU context of owning VM context.
1860  *
1861  * Returns:
1862  *  * Newly created page table object on success, or
1863  *  * -%ENOMEM if no memory is available,
1864  *  * Any error code returned by pvr_page_table_l1_init().
1865  */
1866 static struct pvr_page_table_l1 *
1867 pvr_page_table_l1_alloc(struct pvr_mmu_context *ctx)
1868 {
1869 	int err;
1870 
1871 	struct pvr_page_table_l1 *table =
1872 		kzalloc(sizeof(*table), GFP_KERNEL);
1873 
1874 	if (!table)
1875 		return ERR_PTR(-ENOMEM);
1876 
1877 	err = pvr_page_table_l1_init(table, ctx->pvr_dev);
1878 	if (err) {
1879 		kfree(table);
1880 		return ERR_PTR(err);
1881 	}
1882 
1883 	return table;
1884 }
1885 
1886 /**
1887  * pvr_page_table_l0_alloc() - Allocate a l0 page_table object.
1888  * @ctx: MMU context of owning VM context.
1889  *
1890  * Returns:
1891  *  * Newly created page table object on success, or
1892  *  * -%ENOMEM if no memory is available,
1893  *  * Any error code returned by pvr_page_table_l0_init().
1894  */
1895 static struct pvr_page_table_l0 *
1896 pvr_page_table_l0_alloc(struct pvr_mmu_context *ctx)
1897 {
1898 	int err;
1899 
1900 	struct pvr_page_table_l0 *table =
1901 		kzalloc(sizeof(*table), GFP_KERNEL);
1902 
1903 	if (!table)
1904 		return ERR_PTR(-ENOMEM);
1905 
1906 	err = pvr_page_table_l0_init(table, ctx->pvr_dev);
1907 	if (err) {
1908 		kfree(table);
1909 		return ERR_PTR(err);
1910 	}
1911 
1912 	return table;
1913 }
1914 
1915 /**
1916  * pvr_mmu_op_context_require_sync() - Mark an MMU op context as requiring a
1917  * sync operation for the referenced page tables up to a specified level.
1918  * @op_ctx: Target MMU op context.
1919  * @level: Maximum page table level for which a sync is required.
1920  */
1921 static void
1922 pvr_mmu_op_context_require_sync(struct pvr_mmu_op_context *op_ctx,
1923 				enum pvr_mmu_sync_level level)
1924 {
1925 	if (op_ctx->sync_level_required < level)
1926 		op_ctx->sync_level_required = level;
1927 }
1928 
1929 /**
1930  * pvr_mmu_op_context_sync_manual() - Trigger a sync of some or all of the
1931  * page tables referenced by a MMU op context.
1932  * @op_ctx: Target MMU op context.
1933  * @level: Maximum page table level to sync.
1934  *
1935  * Do not call this function directly. Instead use
1936  * pvr_mmu_op_context_sync_partial() which is checked against the current
1937  * value of &op_ctx->sync_level_required as set by
1938  * pvr_mmu_op_context_require_sync().
1939  */
1940 static void
1941 pvr_mmu_op_context_sync_manual(struct pvr_mmu_op_context *op_ctx,
1942 			       enum pvr_mmu_sync_level level)
1943 {
1944 	/*
1945 	 * We sync the page table levels in ascending order (starting from the
1946 	 * leaf node) to ensure consistency.
1947 	 */
1948 
1949 	WARN_ON(level < PVR_MMU_SYNC_LEVEL_NONE);
1950 
1951 	if (level <= PVR_MMU_SYNC_LEVEL_NONE)
1952 		return;
1953 
1954 	if (op_ctx->curr_page.l0_table)
1955 		pvr_page_table_l0_sync(op_ctx->curr_page.l0_table);
1956 
1957 	if (level < PVR_MMU_SYNC_LEVEL_1)
1958 		return;
1959 
1960 	if (op_ctx->curr_page.l1_table)
1961 		pvr_page_table_l1_sync(op_ctx->curr_page.l1_table);
1962 
1963 	if (level < PVR_MMU_SYNC_LEVEL_2)
1964 		return;
1965 
1966 	pvr_page_table_l2_sync(&op_ctx->mmu_ctx->page_table_l2);
1967 }
1968 
1969 /**
1970  * pvr_mmu_op_context_sync_partial() - Trigger a sync of some or all of the
1971  * page tables referenced by a MMU op context.
1972  * @op_ctx: Target MMU op context.
1973  * @level: Requested page table level to sync up to (inclusive).
1974  *
1975  * If @level is greater than the maximum level recorded by @op_ctx as requiring
1976  * a sync operation, only the previously recorded maximum will be used.
1977  *
1978  * Additionally, if @level is greater than or equal to the maximum level
1979  * recorded by @op_ctx as requiring a sync operation, that maximum level will be
1980  * reset as a full sync will be performed. This is equivalent to calling
1981  * pvr_mmu_op_context_sync().
1982  */
1983 static void
1984 pvr_mmu_op_context_sync_partial(struct pvr_mmu_op_context *op_ctx,
1985 				enum pvr_mmu_sync_level level)
1986 {
1987 	/*
1988 	 * If the requested sync level is greater than or equal to the
1989 	 * currently required sync level, we do two things:
1990 	 *  * Don't waste time syncing levels we haven't previously marked as
1991 	 *    requiring a sync, and
1992 	 *  * Reset the required sync level since we are about to sync
1993 	 *    everything that was previously marked as requiring a sync.
1994 	 */
1995 	if (level >= op_ctx->sync_level_required) {
1996 		level = op_ctx->sync_level_required;
1997 		op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
1998 	}
1999 
2000 	pvr_mmu_op_context_sync_manual(op_ctx, level);
2001 }
2002 
2003 /**
2004  * pvr_mmu_op_context_sync() - Trigger a sync of every page table referenced by
2005  * a MMU op context.
2006  * @op_ctx: Target MMU op context.
2007  *
2008  * The maximum level marked internally as requiring a sync will be reset so
2009  * that subsequent calls to this function will be no-ops unless @op_ctx is
2010  * otherwise updated.
2011  */
2012 static void
2013 pvr_mmu_op_context_sync(struct pvr_mmu_op_context *op_ctx)
2014 {
2015 	pvr_mmu_op_context_sync_manual(op_ctx, op_ctx->sync_level_required);
2016 
2017 	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2018 }
2019 
2020 /**
2021  * pvr_mmu_op_context_load_tables() - Load pointers to tables in each level of
2022  * the page table tree structure needed to reference the physical page
2023  * referenced by a MMU op context.
2024  * @op_ctx: Target MMU op context.
2025  * @should_create: Specifies whether new page tables should be created when
2026  * empty page table entries are encountered during traversal.
2027  * @load_level_required: Maximum page table level to load.
2028  *
2029  * If @should_create is %true, this function may modify the stored required
2030  * sync level of @op_ctx as new page tables are created and inserted into their
2031  * respective parents.
2032  *
2033  * Since there is only one root page table, it is technically incorrect to call
2034  * this function with a value of @load_level_required greater than or equal to
2035  * the root level number. However, this is not explicitly disallowed here.
2036  *
2037  * Return:
2038  *  * 0 on success,
2039  *  * Any error returned by pvr_page_table_l1_get_or_create() if
2040  *    @load_level_required >= 1 except -%ENXIO, or
2041  *  * Any error returned by pvr_page_table_l0_get_or_create() if
2042  *    @load_level_required >= 0 except -%ENXIO.
2043  */
2044 static int
2045 pvr_mmu_op_context_load_tables(struct pvr_mmu_op_context *op_ctx,
2046 			       bool should_create,
2047 			       enum pvr_mmu_sync_level load_level_required)
2048 {
2049 	const struct pvr_page_table_l1 *l1_head_before =
2050 		op_ctx->map.l1_prealloc_tables;
2051 	const struct pvr_page_table_l0 *l0_head_before =
2052 		op_ctx->map.l0_prealloc_tables;
2053 	int err;
2054 
2055 	/* Clear tables we're about to fetch in case of error states. */
2056 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1)
2057 		op_ctx->curr_page.l1_table = NULL;
2058 
2059 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0)
2060 		op_ctx->curr_page.l0_table = NULL;
2061 
2062 	/* Get or create L1 page table. */
2063 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1) {
2064 		err = pvr_page_table_l1_get_or_insert(op_ctx, should_create);
2065 		if (err) {
2066 			/*
2067 			 * If @should_create is %false and no L1 page table was
2068 			 * found, return early but without an error. Since
2069 			 * pvr_page_table_l1_get_or_create() can only return
2070 			 * -%ENXIO if @should_create is %false, there is no
2071 			 * need to check it here.
2072 			 */
2073 			if (err == -ENXIO)
2074 				err = 0;
2075 
2076 			return err;
2077 		}
2078 	}
2079 
2080 	/* Get or create L0 page table. */
2081 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0) {
2082 		err = pvr_page_table_l0_get_or_insert(op_ctx, should_create);
2083 		if (err) {
2084 			/*
2085 			 * If @should_create is %false and no L0 page table was
2086 			 * found, return early but without an error. Since
2087 			 * pvr_page_table_l0_get_or_insert() can only return
2088 			 * -%ENXIO if @should_create is %false, there is no
2089 			 * need to check it here.
2090 			 */
2091 			if (err == -ENXIO)
2092 				err = 0;
2093 
2094 			/*
2095 			 * At this point, an L1 page table could have been
2096 			 * inserted but is now empty due to the failed attempt
2097 			 * at inserting an L0 page table. In this instance, we
2098 			 * must remove the empty L1 page table ourselves as
2099 			 * pvr_page_table_l1_remove() is never called as part
2100 			 * of the error path in
2101 			 * pvr_page_table_l0_get_or_insert().
2102 			 */
2103 			if (l1_head_before != op_ctx->map.l1_prealloc_tables) {
2104 				pvr_page_table_l2_remove(op_ctx);
2105 				pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
2106 			}
2107 
2108 			return err;
2109 		}
2110 	}
2111 
2112 	/*
2113 	 * A sync is only needed if table objects were inserted. This can be
2114 	 * inferred by checking if the pointer at the head of the linked list
2115 	 * has changed.
2116 	 */
2117 	if (l1_head_before != op_ctx->map.l1_prealloc_tables)
2118 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
2119 	else if (l0_head_before != op_ctx->map.l0_prealloc_tables)
2120 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_1);
2121 
2122 	return 0;
2123 }
2124 
2125 /**
2126  * pvr_mmu_op_context_set_curr_page() - Reassign the current page of an MMU op
2127  * context, syncing any page tables previously assigned to it which are no
2128  * longer relevant.
2129  * @op_ctx: Target MMU op context.
2130  * @device_addr: New pointer target.
2131  * @should_create: Specify whether new page tables should be created when
2132  * empty page table entries are encountered during traversal.
2133  *
2134  * This function performs a full sync on the pointer, regardless of which
2135  * levels are modified.
2136  *
2137  * Return:
2138  *  * 0 on success, or
2139  *  * Any error returned by pvr_mmu_op_context_load_tables().
2140  */
2141 static int
2142 pvr_mmu_op_context_set_curr_page(struct pvr_mmu_op_context *op_ctx,
2143 				 u64 device_addr, bool should_create)
2144 {
2145 	pvr_mmu_op_context_sync(op_ctx);
2146 
2147 	op_ctx->curr_page.l2_idx = pvr_page_table_l2_idx(device_addr);
2148 	op_ctx->curr_page.l1_idx = pvr_page_table_l1_idx(device_addr);
2149 	op_ctx->curr_page.l0_idx = pvr_page_table_l0_idx(device_addr);
2150 	op_ctx->curr_page.l1_table = NULL;
2151 	op_ctx->curr_page.l0_table = NULL;
2152 
2153 	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
2154 					      PVR_MMU_SYNC_LEVEL_1);
2155 }
2156 
2157 /**
2158  * pvr_mmu_op_context_next_page() - Advance the current page of an MMU op
2159  * context.
2160  * @op_ctx: Target MMU op context.
2161  * @should_create: Specify whether new page tables should be created when
2162  * empty page table entries are encountered during traversal.
2163  *
2164  * If @should_create is %false, it is the caller's responsibility to verify that
2165  * the state of the table references in @op_ctx is valid on return. If -%ENXIO
2166  * is returned, at least one of the table references is invalid. It should be
2167  * noted that @op_ctx as a whole will be left in a valid state if -%ENXIO is
2168  * returned, unlike other error codes. The caller should check which references
2169  * are invalid by comparing them to %NULL. Only &@ptr->l2_table is guaranteed
2170  * to be valid, since it represents the root of the page table tree structure.
2171  *
2172  * Return:
2173  *  * 0 on success,
2174  *  * -%EPERM if the operation would wrap at the top of the page table
2175  *    hierarchy,
2176  *  * -%ENXIO if @should_create is %false and a page table of any level would
2177  *    have otherwise been created, or
2178  *  * Any error returned while attempting to create missing page tables if
2179  *    @should_create is %true.
2180  */
2181 static int
2182 pvr_mmu_op_context_next_page(struct pvr_mmu_op_context *op_ctx,
2183 			     bool should_create)
2184 {
2185 	s8 load_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2186 
2187 	if (++op_ctx->curr_page.l0_idx != ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X)
2188 		goto load_tables;
2189 
2190 	op_ctx->curr_page.l0_idx = 0;
2191 	load_level_required = PVR_MMU_SYNC_LEVEL_0;
2192 
2193 	if (++op_ctx->curr_page.l1_idx != ROGUE_MMUCTRL_ENTRIES_PD_VALUE)
2194 		goto load_tables;
2195 
2196 	op_ctx->curr_page.l1_idx = 0;
2197 	load_level_required = PVR_MMU_SYNC_LEVEL_1;
2198 
2199 	if (++op_ctx->curr_page.l2_idx != ROGUE_MMUCTRL_ENTRIES_PC_VALUE)
2200 		goto load_tables;
2201 
2202 	/*
2203 	 * If the pattern continued, we would set &op_ctx->curr_page.l2_idx to
2204 	 * zero here. However, that would wrap the top layer of the page table
2205 	 * hierarchy which is not a valid operation. Instead, we warn and return
2206 	 * an error.
2207 	 */
2208 	WARN(true,
2209 	     "%s(%p) attempted to loop the top of the page table hierarchy",
2210 	     __func__, op_ctx);
2211 	return -EPERM;
2212 
2213 	/* If indices have wrapped, we need to load new tables. */
2214 load_tables:
2215 	/* First, flush tables which will be unloaded. */
2216 	pvr_mmu_op_context_sync_partial(op_ctx, load_level_required);
2217 
2218 	/* Then load tables from the required level down. */
2219 	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
2220 					      load_level_required);
2221 }
2222 
2223 /**
2224  * DOC: Single page operations
2225  */
2226 
2227 /**
2228  * pvr_page_create() - Create a device-virtual memory page and insert it into
2229  * a level 0 page table.
2230  * @op_ctx: Target MMU op context pointing at the device-virtual address of the
2231  * target page.
2232  * @dma_addr: DMA address of the physical page backing the created page.
2233  * @flags: Page options saved on the level 0 page table entry for reading by
2234  *         the device.
2235  *
2236  * Return:
2237  *  * 0 on success, or
2238  *  * -%EEXIST if the requested page already exists.
2239  */
2240 static int
2241 pvr_page_create(struct pvr_mmu_op_context *op_ctx, dma_addr_t dma_addr,
2242 		struct pvr_page_flags_raw flags)
2243 {
2244 	/* Do not create a new page if one already exists. */
2245 	if (pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
2246 					     op_ctx->curr_page.l0_idx)) {
2247 		return -EEXIST;
2248 	}
2249 
2250 	pvr_page_table_l0_insert(op_ctx, dma_addr, flags);
2251 
2252 	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2253 
2254 	return 0;
2255 }
2256 
2257 /**
2258  * pvr_page_destroy() - Destroy a device page after removing it from its
2259  * parent level 0 page table.
2260  * @op_ctx: Target MMU op context.
2261  */
2262 static void
2263 pvr_page_destroy(struct pvr_mmu_op_context *op_ctx)
2264 {
2265 	/* Do nothing if the page does not exist. */
2266 	if (!pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
2267 					      op_ctx->curr_page.l0_idx)) {
2268 		return;
2269 	}
2270 
2271 	/* Clear the parent L0 page table entry. */
2272 	pvr_page_table_l0_remove(op_ctx);
2273 
2274 	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2275 }
2276 
2277 /**
2278  * pvr_mmu_op_context_destroy() - Destroy an MMU op context.
2279  * @op_ctx: Target MMU op context.
2280  */
2281 void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx)
2282 {
2283 	const bool flush_caches =
2284 		op_ctx->sync_level_required != PVR_MMU_SYNC_LEVEL_NONE;
2285 
2286 	pvr_mmu_op_context_sync(op_ctx);
2287 
2288 	/* Unmaps should be flushed immediately. Map flushes can be deferred. */
2289 	if (flush_caches && !op_ctx->map.sgt)
2290 		pvr_mmu_flush_exec(op_ctx->mmu_ctx->pvr_dev, true);
2291 
2292 	while (op_ctx->map.l0_prealloc_tables) {
2293 		struct pvr_page_table_l0 *tmp = op_ctx->map.l0_prealloc_tables;
2294 
2295 		op_ctx->map.l0_prealloc_tables =
2296 			op_ctx->map.l0_prealloc_tables->next_free;
2297 		pvr_page_table_l0_free(tmp);
2298 	}
2299 
2300 	while (op_ctx->map.l1_prealloc_tables) {
2301 		struct pvr_page_table_l1 *tmp = op_ctx->map.l1_prealloc_tables;
2302 
2303 		op_ctx->map.l1_prealloc_tables =
2304 			op_ctx->map.l1_prealloc_tables->next_free;
2305 		pvr_page_table_l1_free(tmp);
2306 	}
2307 
2308 	while (op_ctx->unmap.l0_free_tables) {
2309 		struct pvr_page_table_l0 *tmp = op_ctx->unmap.l0_free_tables;
2310 
2311 		op_ctx->unmap.l0_free_tables =
2312 			op_ctx->unmap.l0_free_tables->next_free;
2313 		pvr_page_table_l0_free(tmp);
2314 	}
2315 
2316 	while (op_ctx->unmap.l1_free_tables) {
2317 		struct pvr_page_table_l1 *tmp = op_ctx->unmap.l1_free_tables;
2318 
2319 		op_ctx->unmap.l1_free_tables =
2320 			op_ctx->unmap.l1_free_tables->next_free;
2321 		pvr_page_table_l1_free(tmp);
2322 	}
2323 
2324 	kfree(op_ctx);
2325 }
2326 
2327 /**
2328  * pvr_mmu_op_context_create() - Create an MMU op context.
2329  * @ctx: MMU context associated with owning VM context.
2330  * @sgt: Scatter gather table containing pages pinned for use by this context.
2331  * @sgt_offset: Start offset of the requested device-virtual memory mapping.
2332  * @size: Size in bytes of the requested device-virtual memory mapping. For an
2333  * unmapping, this should be zero so that no page tables are allocated.
2334  *
2335  * Returns:
2336  *  * Newly created MMU op context object on success, or
2337  *  * -%ENOMEM if no memory is available,
2338  *  * Any error code returned by pvr_page_table_l2_init().
2339  */
2340 struct pvr_mmu_op_context *
2341 pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, struct sg_table *sgt,
2342 			  u64 sgt_offset, u64 size)
2343 {
2344 	int err;
2345 
2346 	struct pvr_mmu_op_context *op_ctx =
2347 		kzalloc(sizeof(*op_ctx), GFP_KERNEL);
2348 
2349 	if (!op_ctx)
2350 		return ERR_PTR(-ENOMEM);
2351 
2352 	op_ctx->mmu_ctx = ctx;
2353 	op_ctx->map.sgt = sgt;
2354 	op_ctx->map.sgt_offset = sgt_offset;
2355 	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2356 
2357 	if (size) {
2358 		/*
2359 		 * The number of page table objects we need to prealloc is
2360 		 * indicated by the mapping size, start offset and the sizes
2361 		 * of the areas mapped per PT or PD. The range calculation is
2362 		 * identical to that for the index into a table for a device
2363 		 * address, so we reuse those functions here.
2364 		 */
2365 		const u32 l1_start_idx = pvr_page_table_l2_idx(sgt_offset);
2366 		const u32 l1_end_idx = pvr_page_table_l2_idx(sgt_offset + size);
2367 		const u32 l1_count = l1_end_idx - l1_start_idx + 1;
2368 		const u32 l0_start_idx = pvr_page_table_l1_idx(sgt_offset);
2369 		const u32 l0_end_idx = pvr_page_table_l1_idx(sgt_offset + size);
2370 		const u32 l0_count = l0_end_idx - l0_start_idx + 1;
2371 
2372 		/*
2373 		 * Alloc and push page table entries until we have enough of
2374 		 * each type, ending with linked lists of l0 and l1 entries in
2375 		 * reverse order.
2376 		 */
2377 		for (int i = 0; i < l1_count; i++) {
2378 			struct pvr_page_table_l1 *l1_tmp =
2379 				pvr_page_table_l1_alloc(ctx);
2380 
2381 			err = PTR_ERR_OR_ZERO(l1_tmp);
2382 			if (err)
2383 				goto err_cleanup;
2384 
2385 			l1_tmp->next_free = op_ctx->map.l1_prealloc_tables;
2386 			op_ctx->map.l1_prealloc_tables = l1_tmp;
2387 		}
2388 
2389 		for (int i = 0; i < l0_count; i++) {
2390 			struct pvr_page_table_l0 *l0_tmp =
2391 				pvr_page_table_l0_alloc(ctx);
2392 
2393 			err = PTR_ERR_OR_ZERO(l0_tmp);
2394 			if (err)
2395 				goto err_cleanup;
2396 
2397 			l0_tmp->next_free = op_ctx->map.l0_prealloc_tables;
2398 			op_ctx->map.l0_prealloc_tables = l0_tmp;
2399 		}
2400 	}
2401 
2402 	return op_ctx;
2403 
2404 err_cleanup:
2405 	pvr_mmu_op_context_destroy(op_ctx);
2406 
2407 	return ERR_PTR(err);
2408 }
2409 
2410 /**
2411  * pvr_mmu_op_context_unmap_curr_page() - Unmap pages from a memory context
2412  * starting from the current page of an MMU op context.
2413  * @op_ctx: Target MMU op context pointing at the first page to unmap.
2414  * @nr_pages: Number of pages to unmap.
2415  *
2416  * Return:
2417  *  * 0 on success, or
2418  *  * Any error encountered while advancing @op_ctx.curr_page with
2419  *    pvr_mmu_op_context_next_page() (except -%ENXIO).
2420  */
2421 static int
2422 pvr_mmu_op_context_unmap_curr_page(struct pvr_mmu_op_context *op_ctx,
2423 				   u64 nr_pages)
2424 {
2425 	int err;
2426 
2427 	if (nr_pages == 0)
2428 		return 0;
2429 
2430 	/*
2431 	 * Destroy first page outside loop, as it doesn't require a page
2432 	 * advance beforehand. If the L0 page table reference in
2433 	 * @op_ctx.curr_page is %NULL, there cannot be a mapped page at
2434 	 * @op_ctx.curr_page (so skip ahead).
2435 	 */
2436 	if (op_ctx->curr_page.l0_table)
2437 		pvr_page_destroy(op_ctx);
2438 
2439 	for (u64 page = 1; page < nr_pages; ++page) {
2440 		err = pvr_mmu_op_context_next_page(op_ctx, false);
2441 		/*
2442 		 * If the page table tree structure at @op_ctx.curr_page is
2443 		 * incomplete, skip ahead. We don't care about unmapping pages
2444 		 * that cannot exist.
2445 		 *
2446 		 * FIXME: This could be made more efficient by jumping ahead
2447 		 * using pvr_mmu_op_context_set_curr_page().
2448 		 */
2449 		if (err == -ENXIO)
2450 			continue;
2451 		else if (err)
2452 			return err;
2453 
2454 		pvr_page_destroy(op_ctx);
2455 	}
2456 
2457 	return 0;
2458 }
2459 
2460 /**
2461  * pvr_mmu_unmap() - Unmap pages from a memory context.
2462  * @op_ctx: Target MMU op context.
2463  * @device_addr: First device-virtual address to unmap.
2464  * @size: Size in bytes to unmap.
2465  *
2466  * The total amount of device-virtual memory unmapped is
2467  * @nr_pages * %PVR_DEVICE_PAGE_SIZE.
2468  *
2469  * Returns:
2470  *  * 0 on success, or
2471  *  * Any error code returned by pvr_page_table_ptr_init(), or
2472  *  * Any error code returned by pvr_page_table_ptr_unmap().
2473  */
2474 int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size)
2475 {
2476 	int err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, false);
2477 
2478 	if (err)
2479 		return err;
2480 
2481 	return pvr_mmu_op_context_unmap_curr_page(op_ctx,
2482 						  size >> PVR_DEVICE_PAGE_SHIFT);
2483 }
2484 
2485 /**
2486  * pvr_mmu_map_sgl() - Map part of a scatter-gather table entry to
2487  * device-virtual memory.
2488  * @op_ctx: Target MMU op context pointing to the first page that should be
2489  * mapped.
2490  * @sgl: Target scatter-gather table entry.
2491  * @offset: Offset into @sgl to map from. Must result in a starting address
2492  * from @sgl which is CPU page-aligned.
2493  * @size: Size of the memory to be mapped in bytes. Must be a non-zero multiple
2494  * of the device page size.
2495  * @page_flags: Page options to be applied to every device-virtual memory page
2496  * in the created mapping.
2497  *
2498  * Return:
2499  *  * 0 on success,
2500  *  * -%EINVAL if the range specified by @offset and @size is not completely
2501  *    within @sgl, or
2502  *  * Any error encountered while creating a page with pvr_page_create(), or
2503  *  * Any error encountered while advancing @op_ctx.curr_page with
2504  *    pvr_mmu_op_context_next_page().
2505  */
2506 static int
2507 pvr_mmu_map_sgl(struct pvr_mmu_op_context *op_ctx, struct scatterlist *sgl,
2508 		u64 offset, u64 size, struct pvr_page_flags_raw page_flags)
2509 {
2510 	const unsigned int pages = size >> PVR_DEVICE_PAGE_SHIFT;
2511 	dma_addr_t dma_addr = sg_dma_address(sgl) + offset;
2512 	const unsigned int dma_len = sg_dma_len(sgl);
2513 	struct pvr_page_table_ptr ptr_copy;
2514 	unsigned int page;
2515 	int err;
2516 
2517 	if (size > dma_len || offset > dma_len - size)
2518 		return -EINVAL;
2519 
2520 	/*
2521 	 * Before progressing, save a copy of the start pointer so we can use
2522 	 * it again if we enter an error state and have to destroy pages.
2523 	 */
2524 	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
2525 
2526 	/*
2527 	 * Create first page outside loop, as it doesn't require a page advance
2528 	 * beforehand.
2529 	 */
2530 	err = pvr_page_create(op_ctx, dma_addr, page_flags);
2531 	if (err)
2532 		return err;
2533 
2534 	for (page = 1; page < pages; ++page) {
2535 		err = pvr_mmu_op_context_next_page(op_ctx, true);
2536 		if (err)
2537 			goto err_destroy_pages;
2538 
2539 		dma_addr += PVR_DEVICE_PAGE_SIZE;
2540 
2541 		err = pvr_page_create(op_ctx, dma_addr, page_flags);
2542 		if (err)
2543 			goto err_destroy_pages;
2544 	}
2545 
2546 	return 0;
2547 
2548 err_destroy_pages:
2549 	memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
2550 	err = pvr_mmu_op_context_unmap_curr_page(op_ctx, page);
2551 
2552 	return err;
2553 }
2554 
2555 /**
2556  * pvr_mmu_map() - Map an object's virtual memory to physical memory.
2557  * @op_ctx: Target MMU op context.
2558  * @size: Size of memory to be mapped in bytes. Must be a non-zero multiple
2559  * of the device page size.
2560  * @flags: Flags from pvr_gem_object associated with the mapping.
2561  * @device_addr: Virtual device address to map to. Must be device page-aligned.
2562  *
2563  * Returns:
2564  *  * 0 on success, or
2565  *  * Any error code returned by pvr_page_table_ptr_init(), or
2566  *  * Any error code returned by pvr_mmu_map_sgl(), or
2567  *  * Any error code returned by pvr_page_table_ptr_next_page().
2568  */
2569 int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags,
2570 		u64 device_addr)
2571 {
2572 	struct pvr_page_table_ptr ptr_copy;
2573 	struct pvr_page_flags_raw flags_raw;
2574 	struct scatterlist *sgl;
2575 	u64 mapped_size = 0;
2576 	unsigned int count;
2577 	int err;
2578 
2579 	if (!size)
2580 		return 0;
2581 
2582 	if ((op_ctx->map.sgt_offset | size) & ~PVR_DEVICE_PAGE_MASK)
2583 		return -EINVAL;
2584 
2585 	err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, true);
2586 	if (err)
2587 		return -EINVAL;
2588 
2589 	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
2590 
2591 	flags_raw = pvr_page_flags_raw_create(false, false,
2592 					      flags & DRM_PVR_BO_BYPASS_DEVICE_CACHE,
2593 					      flags & DRM_PVR_BO_PM_FW_PROTECT);
2594 
2595 	/* Map scatter gather table */
2596 	for_each_sgtable_dma_sg(op_ctx->map.sgt, sgl, count) {
2597 		const size_t sgl_len = sg_dma_len(sgl);
2598 		u64 sgl_offset, map_sgl_len;
2599 
2600 		if (sgl_len <= op_ctx->map.sgt_offset) {
2601 			op_ctx->map.sgt_offset -= sgl_len;
2602 			continue;
2603 		}
2604 
2605 		sgl_offset = op_ctx->map.sgt_offset;
2606 		map_sgl_len = min_t(u64, sgl_len - sgl_offset, size - mapped_size);
2607 
2608 		err = pvr_mmu_map_sgl(op_ctx, sgl, sgl_offset, map_sgl_len,
2609 				      flags_raw);
2610 		if (err)
2611 			break;
2612 
2613 		/*
2614 		 * Flag the L0 page table as requiring a flush when the MMU op
2615 		 * context is destroyed.
2616 		 */
2617 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2618 
2619 		op_ctx->map.sgt_offset = 0;
2620 		mapped_size += map_sgl_len;
2621 
2622 		if (mapped_size >= size)
2623 			break;
2624 
2625 		err = pvr_mmu_op_context_next_page(op_ctx, true);
2626 		if (err)
2627 			break;
2628 	}
2629 
2630 	if (err && mapped_size) {
2631 		memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
2632 		pvr_mmu_op_context_unmap_curr_page(op_ctx,
2633 						   mapped_size >> PVR_DEVICE_PAGE_SHIFT);
2634 	}
2635 
2636 	return err;
2637 }
2638