xref: /linux/drivers/gpu/drm/imagination/pvr_mmu.c (revision ea518afc992032f7570c0a89ac9240b387dc0faf)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright (c) 2023 Imagination Technologies Ltd. */
3 
4 #include "pvr_mmu.h"
5 
6 #include "pvr_ccb.h"
7 #include "pvr_device.h"
8 #include "pvr_fw.h"
9 #include "pvr_gem.h"
10 #include "pvr_power.h"
11 #include "pvr_rogue_fwif.h"
12 #include "pvr_rogue_mmu_defs.h"
13 
14 #include <drm/drm_drv.h>
15 #include <linux/atomic.h>
16 #include <linux/bitops.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/kmemleak.h>
19 #include <linux/minmax.h>
20 #include <linux/sizes.h>
21 
22 #define PVR_SHIFT_FROM_SIZE(size_) (__builtin_ctzll(size_))
23 #define PVR_MASK_FROM_SIZE(size_) (~((size_) - U64_C(1)))
24 
25 /*
26  * The value of the device page size (%PVR_DEVICE_PAGE_SIZE) is currently
27  * pegged to the host page size (%PAGE_SIZE). This chunk of macro goodness both
28  * ensures that the selected host page size corresponds to a valid device page
29  * size and sets up values needed by the MMU code below.
30  */
31 #if (PVR_DEVICE_PAGE_SIZE == SZ_4K)
32 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_4KB
33 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_4KB_RANGE_SHIFT
34 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_4KB_RANGE_CLRMSK
35 #elif (PVR_DEVICE_PAGE_SIZE == SZ_16K)
36 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_16KB
37 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_16KB_RANGE_SHIFT
38 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_16KB_RANGE_CLRMSK
39 #elif (PVR_DEVICE_PAGE_SIZE == SZ_64K)
40 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_64KB
41 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_64KB_RANGE_SHIFT
42 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_64KB_RANGE_CLRMSK
43 #elif (PVR_DEVICE_PAGE_SIZE == SZ_256K)
44 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_256KB
45 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_256KB_RANGE_SHIFT
46 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_256KB_RANGE_CLRMSK
47 #elif (PVR_DEVICE_PAGE_SIZE == SZ_1M)
48 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_1MB
49 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_1MB_RANGE_SHIFT
50 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_1MB_RANGE_CLRMSK
51 #elif (PVR_DEVICE_PAGE_SIZE == SZ_2M)
52 # define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_2MB
53 # define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_2MB_RANGE_SHIFT
54 # define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_2MB_RANGE_CLRMSK
55 #else
56 # error Unsupported device page size PVR_DEVICE_PAGE_SIZE
57 #endif
58 
59 #define ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X   \
60 	(ROGUE_MMUCTRL_ENTRIES_PT_VALUE >> \
61 	 (PVR_DEVICE_PAGE_SHIFT - PVR_SHIFT_FROM_SIZE(SZ_4K)))
62 
63 enum pvr_mmu_sync_level {
64 	PVR_MMU_SYNC_LEVEL_NONE = -1,
65 	PVR_MMU_SYNC_LEVEL_0 = 0,
66 	PVR_MMU_SYNC_LEVEL_1 = 1,
67 	PVR_MMU_SYNC_LEVEL_2 = 2,
68 };
69 
70 #define PVR_MMU_SYNC_LEVEL_0_FLAGS (ROGUE_FWIF_MMUCACHEDATA_FLAGS_PT | \
71 				    ROGUE_FWIF_MMUCACHEDATA_FLAGS_INTERRUPT | \
72 				    ROGUE_FWIF_MMUCACHEDATA_FLAGS_TLB)
73 #define PVR_MMU_SYNC_LEVEL_1_FLAGS (PVR_MMU_SYNC_LEVEL_0_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PD)
74 #define PVR_MMU_SYNC_LEVEL_2_FLAGS (PVR_MMU_SYNC_LEVEL_1_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PC)
75 
76 /**
77  * pvr_mmu_set_flush_flags() - Set MMU cache flush flags for next call to
78  *                             pvr_mmu_flush_exec().
79  * @pvr_dev: Target PowerVR device.
80  * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS.
81  *
82  * This function must be called following any possible change to the MMU page
83  * tables.
84  */
85 static void pvr_mmu_set_flush_flags(struct pvr_device *pvr_dev, u32 flags)
86 {
87 	atomic_fetch_or(flags, &pvr_dev->mmu_flush_cache_flags);
88 }
89 
90 /**
91  * pvr_mmu_flush_request_all() - Request flush of all MMU caches when
92  * subsequently calling pvr_mmu_flush_exec().
93  * @pvr_dev: Target PowerVR device.
94  *
95  * This function must be called following any possible change to the MMU page
96  * tables.
97  */
98 void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev)
99 {
100 	pvr_mmu_set_flush_flags(pvr_dev, PVR_MMU_SYNC_LEVEL_2_FLAGS);
101 }
102 
103 /**
104  * pvr_mmu_flush_exec() - Execute a flush of all MMU caches previously
105  * requested.
106  * @pvr_dev: Target PowerVR device.
107  * @wait: Do not return until the flush is completed.
108  *
109  * This function must be called prior to submitting any new GPU job. The flush
110  * will complete before the jobs are scheduled, so this can be called once after
111  * a series of maps. However, a single unmap should always be immediately
112  * followed by a flush and it should be explicitly waited by setting @wait.
113  *
114  * As a failure to flush the MMU caches could risk memory corruption, if the
115  * flush fails (implying the firmware is not responding) then the GPU device is
116  * marked as lost.
117  *
118  * Returns:
119  *  * 0 on success when @wait is true, or
120  *  * -%EIO if the device is unavailable, or
121  *  * Any error encountered while submitting the flush command via the KCCB.
122  */
123 int pvr_mmu_flush_exec(struct pvr_device *pvr_dev, bool wait)
124 {
125 	struct rogue_fwif_kccb_cmd cmd_mmu_cache = {};
126 	struct rogue_fwif_mmucachedata *cmd_mmu_cache_data =
127 		&cmd_mmu_cache.cmd_data.mmu_cache_data;
128 	int err = 0;
129 	u32 slot;
130 	int idx;
131 
132 	if (!drm_dev_enter(from_pvr_device(pvr_dev), &idx))
133 		return -EIO;
134 
135 	/* Can't flush MMU if the firmware hasn't booted yet. */
136 	if (!pvr_dev->fw_dev.booted)
137 		goto err_drm_dev_exit;
138 
139 	cmd_mmu_cache_data->cache_flags =
140 		atomic_xchg(&pvr_dev->mmu_flush_cache_flags, 0);
141 
142 	if (!cmd_mmu_cache_data->cache_flags)
143 		goto err_drm_dev_exit;
144 
145 	cmd_mmu_cache.cmd_type = ROGUE_FWIF_KCCB_CMD_MMUCACHE;
146 
147 	pvr_fw_object_get_fw_addr(pvr_dev->fw_dev.mem.mmucache_sync_obj,
148 				  &cmd_mmu_cache_data->mmu_cache_sync_fw_addr);
149 	cmd_mmu_cache_data->mmu_cache_sync_update_value = 0;
150 
151 	err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot);
152 	if (err)
153 		goto err_reset_and_retry;
154 
155 	err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL);
156 	if (err)
157 		goto err_reset_and_retry;
158 
159 	drm_dev_exit(idx);
160 
161 	return 0;
162 
163 err_reset_and_retry:
164 	/*
165 	 * Flush command failure is most likely the result of a firmware lockup. Hard
166 	 * reset the GPU and retry.
167 	 */
168 	err = pvr_power_reset(pvr_dev, true);
169 	if (err)
170 		goto err_drm_dev_exit; /* Device is lost. */
171 
172 	/* Retry sending flush request. */
173 	err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot);
174 	if (err) {
175 		pvr_device_lost(pvr_dev);
176 		goto err_drm_dev_exit;
177 	}
178 
179 	if (wait) {
180 		err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL);
181 		if (err)
182 			pvr_device_lost(pvr_dev);
183 	}
184 
185 err_drm_dev_exit:
186 	drm_dev_exit(idx);
187 
188 	return err;
189 }
190 
191 /**
192  * DOC: PowerVR Virtual Memory Handling
193  */
194 /**
195  * DOC: PowerVR Virtual Memory Handling (constants)
196  *
197  * .. c:macro:: PVR_IDX_INVALID
198  *
199  *    Default value for a u16-based index.
200  *
201  *    This value cannot be zero, since zero is a valid index value.
202  */
203 #define PVR_IDX_INVALID ((u16)(-1))
204 
205 /**
206  * DOC: MMU backing pages
207  */
208 /**
209  * DOC: MMU backing pages (constants)
210  *
211  * .. c:macro:: PVR_MMU_BACKING_PAGE_SIZE
212  *
213  *    Page size of a PowerVR device's integrated MMU. The CPU page size must be
214  *    at least as large as this value for the current implementation; this is
215  *    checked at compile-time.
216  */
217 #define PVR_MMU_BACKING_PAGE_SIZE SZ_4K
218 static_assert(PAGE_SIZE >= PVR_MMU_BACKING_PAGE_SIZE);
219 
220 /**
221  * struct pvr_mmu_backing_page - Represents a single page used to back a page
222  *                              table of any level.
223  * @dma_addr: DMA address of this page.
224  * @host_ptr: CPU address of this page.
225  * @pvr_dev: The PowerVR device to which this page is associated. **For
226  *           internal use only.**
227  */
228 struct pvr_mmu_backing_page {
229 	dma_addr_t dma_addr;
230 	void *host_ptr;
231 /* private: internal use only */
232 	struct page *raw_page;
233 	struct pvr_device *pvr_dev;
234 };
235 
236 /**
237  * pvr_mmu_backing_page_init() - Initialize a MMU backing page.
238  * @page: Target backing page.
239  * @pvr_dev: Target PowerVR device.
240  *
241  * This function performs three distinct operations:
242  *
243  * 1. Allocate a single page,
244  * 2. Map the page to the CPU, and
245  * 3. Map the page to DMA-space.
246  *
247  * It is expected that @page be zeroed (e.g. from kzalloc()) before calling
248  * this function.
249  *
250  * Return:
251  *  * 0 on success, or
252  *  * -%ENOMEM if allocation of the backing page or mapping of the backing
253  *    page to DMA fails.
254  */
255 static int
256 pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page,
257 			  struct pvr_device *pvr_dev)
258 {
259 	struct device *dev = from_pvr_device(pvr_dev)->dev;
260 
261 	struct page *raw_page;
262 	int err;
263 
264 	dma_addr_t dma_addr;
265 	void *host_ptr;
266 
267 	raw_page = alloc_page(__GFP_ZERO | GFP_KERNEL);
268 	if (!raw_page)
269 		return -ENOMEM;
270 
271 	host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
272 	if (!host_ptr) {
273 		err = -ENOMEM;
274 		goto err_free_page;
275 	}
276 
277 	dma_addr = dma_map_page(dev, raw_page, 0, PVR_MMU_BACKING_PAGE_SIZE,
278 				DMA_TO_DEVICE);
279 	if (dma_mapping_error(dev, dma_addr)) {
280 		err = -ENOMEM;
281 		goto err_unmap_page;
282 	}
283 
284 	page->dma_addr = dma_addr;
285 	page->host_ptr = host_ptr;
286 	page->pvr_dev = pvr_dev;
287 	page->raw_page = raw_page;
288 	kmemleak_alloc(page->host_ptr, PAGE_SIZE, 1, GFP_KERNEL);
289 
290 	return 0;
291 
292 err_unmap_page:
293 	vunmap(host_ptr);
294 
295 err_free_page:
296 	__free_page(raw_page);
297 
298 	return err;
299 }
300 
301 /**
302  * pvr_mmu_backing_page_fini() - Teardown a MMU backing page.
303  * @page: Target backing page.
304  *
305  * This function performs the mirror operations to pvr_mmu_backing_page_init(),
306  * in reverse order:
307  *
308  * 1. Unmap the page from DMA-space,
309  * 2. Unmap the page from the CPU, and
310  * 3. Free the page.
311  *
312  * It also zeros @page.
313  *
314  * It is a no-op to call this function a second (or further) time on any @page.
315  */
316 static void
317 pvr_mmu_backing_page_fini(struct pvr_mmu_backing_page *page)
318 {
319 	struct device *dev;
320 
321 	/* Do nothing if no allocation is present. */
322 	if (!page->pvr_dev)
323 		return;
324 
325 	dev = from_pvr_device(page->pvr_dev)->dev;
326 
327 	dma_unmap_page(dev, page->dma_addr, PVR_MMU_BACKING_PAGE_SIZE,
328 		       DMA_TO_DEVICE);
329 
330 	kmemleak_free(page->host_ptr);
331 	vunmap(page->host_ptr);
332 
333 	__free_page(page->raw_page);
334 
335 	memset(page, 0, sizeof(*page));
336 }
337 
338 /**
339  * pvr_mmu_backing_page_sync() - Flush a MMU backing page from the CPU to the
340  * device.
341  * @page: Target backing page.
342  * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS.
343  *
344  * .. caution::
345  *
346  *    **This is potentially an expensive function call.** Only call
347  *    pvr_mmu_backing_page_sync() once you're sure you have no more changes to
348  *    make to the backing page in the immediate future.
349  */
350 static void
351 pvr_mmu_backing_page_sync(struct pvr_mmu_backing_page *page, u32 flags)
352 {
353 	struct pvr_device *pvr_dev = page->pvr_dev;
354 	struct device *dev;
355 
356 	/*
357 	 * Do nothing if no allocation is present. This may be the case if
358 	 * we are unmapping pages.
359 	 */
360 	if (!pvr_dev)
361 		return;
362 
363 	dev = from_pvr_device(pvr_dev)->dev;
364 
365 	dma_sync_single_for_device(dev, page->dma_addr,
366 				   PVR_MMU_BACKING_PAGE_SIZE, DMA_TO_DEVICE);
367 
368 	pvr_mmu_set_flush_flags(pvr_dev, flags);
369 }
370 
371 /**
372  * DOC: Raw page tables
373  */
374 
375 #define PVR_PAGE_TABLE_TYPEOF_ENTRY(level_) \
376 	typeof_member(struct pvr_page_table_l##level_##_entry_raw, val)
377 
378 #define PVR_PAGE_TABLE_FIELD_GET(level_, name_, field_, entry_)           \
379 	(((entry_).val &                                           \
380 	  ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK) >> \
381 	 ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT)
382 
383 #define PVR_PAGE_TABLE_FIELD_PREP(level_, name_, field_, val_)            \
384 	((((PVR_PAGE_TABLE_TYPEOF_ENTRY(level_))(val_))            \
385 	  << ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT) & \
386 	 ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK)
387 
388 /**
389  * struct pvr_page_table_l2_entry_raw - A single entry in a level 2 page table.
390  * @val: The raw value of this entry.
391  *
392  * This type is a structure for type-checking purposes. At compile-time, its
393  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE.
394  *
395  * The value stored in this structure can be decoded using the following bitmap:
396  *
397  * .. flat-table::
398  *    :widths: 1 5
399  *    :stub-columns: 1
400  *
401  *    * - 31..4
402  *      - **Level 1 Page Table Base Address:** Bits 39..12 of the L1
403  *        page table base address, which is 4KiB aligned.
404  *
405  *    * - 3..2
406  *      - *(reserved)*
407  *
408  *    * - 1
409  *      - **Pending:** When valid bit is not set, indicates that a valid
410  *        entry is pending and the MMU should wait for the driver to map
411  *        the entry. This is used to support page demand mapping of
412  *        memory.
413  *
414  *    * - 0
415  *      - **Valid:** Indicates that the entry contains a valid L1 page
416  *        table. If the valid bit is not set, then an attempted use of
417  *        the page would result in a page fault.
418  */
419 struct pvr_page_table_l2_entry_raw {
420 	u32 val;
421 } __packed;
422 static_assert(sizeof(struct pvr_page_table_l2_entry_raw) * 8 ==
423 	      ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE);
424 
425 static bool
426 pvr_page_table_l2_entry_raw_is_valid(struct pvr_page_table_l2_entry_raw entry)
427 {
428 	return PVR_PAGE_TABLE_FIELD_GET(2, PC, VALID, entry);
429 }
430 
431 /**
432  * pvr_page_table_l2_entry_raw_set() - Write a valid entry into a raw level 2
433  *                                     page table.
434  * @entry: Target raw level 2 page table entry.
435  * @child_table_dma_addr: DMA address of the level 1 page table to be
436  *                        associated with @entry.
437  *
438  * When calling this function, @child_table_dma_addr must be a valid DMA
439  * address and a multiple of %ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE.
440  */
441 static void
442 pvr_page_table_l2_entry_raw_set(struct pvr_page_table_l2_entry_raw *entry,
443 				dma_addr_t child_table_dma_addr)
444 {
445 	child_table_dma_addr >>= ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT;
446 
447 	WRITE_ONCE(entry->val,
448 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, VALID, true) |
449 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, ENTRY_PENDING, false) |
450 		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, PD_BASE, child_table_dma_addr));
451 }
452 
453 static void
454 pvr_page_table_l2_entry_raw_clear(struct pvr_page_table_l2_entry_raw *entry)
455 {
456 	WRITE_ONCE(entry->val, 0);
457 }
458 
459 /**
460  * struct pvr_page_table_l1_entry_raw - A single entry in a level 1 page table.
461  * @val: The raw value of this entry.
462  *
463  * This type is a structure for type-checking purposes. At compile-time, its
464  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE.
465  *
466  * The value stored in this structure can be decoded using the following bitmap:
467  *
468  * .. flat-table::
469  *    :widths: 1 5
470  *    :stub-columns: 1
471  *
472  *    * - 63..41
473  *      - *(reserved)*
474  *
475  *    * - 40
476  *      - **Pending:** When valid bit is not set, indicates that a valid entry
477  *        is pending and the MMU should wait for the driver to map the entry.
478  *        This is used to support page demand mapping of memory.
479  *
480  *    * - 39..5
481  *      - **Level 0 Page Table Base Address:** The way this value is
482  *        interpreted depends on the page size. Bits not specified in the
483  *        table below (e.g. bits 11..5 for page size 4KiB) should be
484  *        considered reserved.
485  *
486  *        This table shows the bits used in an L1 page table entry to
487  *        represent the Physical Table Base Address for a given Page Size.
488  *        Since each L1 page table entry covers 2MiB of address space, the
489  *        maximum page size is 2MiB.
490  *
491  *        .. flat-table::
492  *           :widths: 1 1 1 1
493  *           :header-rows: 1
494  *           :stub-columns: 1
495  *
496  *           * - Page size
497  *             - L0 page table base address bits
498  *             - Number of L0 page table entries
499  *             - Size of L0 page table
500  *
501  *           * - 4KiB
502  *             - 39..12
503  *             - 512
504  *             - 4KiB
505  *
506  *           * - 16KiB
507  *             - 39..10
508  *             - 128
509  *             - 1KiB
510  *
511  *           * - 64KiB
512  *             - 39..8
513  *             - 32
514  *             - 256B
515  *
516  *           * - 256KiB
517  *             - 39..6
518  *             - 8
519  *             - 64B
520  *
521  *           * - 1MiB
522  *             - 39..5 (4 = '0')
523  *             - 2
524  *             - 16B
525  *
526  *           * - 2MiB
527  *             - 39..5 (4..3 = '00')
528  *             - 1
529  *             - 8B
530  *
531  *    * - 4
532  *      - *(reserved)*
533  *
534  *    * - 3..1
535  *      - **Page Size:** Sets the page size, from 4KiB to 2MiB.
536  *
537  *    * - 0
538  *      - **Valid:** Indicates that the entry contains a valid L0 page table.
539  *        If the valid bit is not set, then an attempted use of the page would
540  *        result in a page fault.
541  */
542 struct pvr_page_table_l1_entry_raw {
543 	u64 val;
544 } __packed;
545 static_assert(sizeof(struct pvr_page_table_l1_entry_raw) * 8 ==
546 	      ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE);
547 
548 static bool
549 pvr_page_table_l1_entry_raw_is_valid(struct pvr_page_table_l1_entry_raw entry)
550 {
551 	return PVR_PAGE_TABLE_FIELD_GET(1, PD, VALID, entry);
552 }
553 
554 /**
555  * pvr_page_table_l1_entry_raw_set() - Write a valid entry into a raw level 1
556  *                                     page table.
557  * @entry: Target raw level 1 page table entry.
558  * @child_table_dma_addr: DMA address of the level 0 page table to be
559  *                        associated with @entry.
560  *
561  * When calling this function, @child_table_dma_addr must be a valid DMA
562  * address and a multiple of 4 KiB.
563  */
564 static void
565 pvr_page_table_l1_entry_raw_set(struct pvr_page_table_l1_entry_raw *entry,
566 				dma_addr_t child_table_dma_addr)
567 {
568 	WRITE_ONCE(entry->val,
569 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, VALID, true) |
570 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, ENTRY_PENDING, false) |
571 		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, PAGE_SIZE, ROGUE_MMUCTRL_PAGE_SIZE_X) |
572 		   /*
573 		    * The use of a 4K-specific macro here is correct. It is
574 		    * a future optimization to allocate sub-host-page-sized
575 		    * blocks for individual tables, so the condition that any
576 		    * page table address is aligned to the size of the
577 		    * largest (a 4KB) table currently holds.
578 		    */
579 		   (child_table_dma_addr & ~ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_CLRMSK));
580 }
581 
582 static void
583 pvr_page_table_l1_entry_raw_clear(struct pvr_page_table_l1_entry_raw *entry)
584 {
585 	WRITE_ONCE(entry->val, 0);
586 }
587 
588 /**
589  * struct pvr_page_table_l0_entry_raw - A single entry in a level 0 page table.
590  * @val: The raw value of this entry.
591  *
592  * This type is a structure for type-checking purposes. At compile-time, its
593  * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE.
594  *
595  * The value stored in this structure can be decoded using the following bitmap:
596  *
597  * .. flat-table::
598  *    :widths: 1 5
599  *    :stub-columns: 1
600  *
601  *    * - 63
602  *      - *(reserved)*
603  *
604  *    * - 62
605  *      - **PM/FW Protect:** Indicates a protected region which only the
606  *        Parameter Manager (PM) or firmware processor can write to.
607  *
608  *    * - 61..40
609  *      - **VP Page (High):** Virtual-physical page used for Parameter Manager
610  *        (PM) memory. This field is only used if the additional level of PB
611  *        virtualization is enabled. The VP Page field is needed by the PM in
612  *        order to correctly reconstitute the free lists after render
613  *        completion. This (High) field holds bits 39..18 of the value; the
614  *        Low field holds bits 17..12. Bits 11..0 are always zero because the
615  *        value is always aligned to the 4KiB page size.
616  *
617  *    * - 39..12
618  *      - **Physical Page Address:** The way this value is interpreted depends
619  *        on the page size. Bits not specified in the table below (e.g. bits
620  *        20..12 for page size 2MiB) should be considered reserved.
621  *
622  *        This table shows the bits used in an L0 page table entry to represent
623  *        the Physical Page Address for a given page size (as defined in the
624  *        associated L1 page table entry).
625  *
626  *        .. flat-table::
627  *           :widths: 1 1
628  *           :header-rows: 1
629  *           :stub-columns: 1
630  *
631  *           * - Page size
632  *             - Physical address bits
633  *
634  *           * - 4KiB
635  *             - 39..12
636  *
637  *           * - 16KiB
638  *             - 39..14
639  *
640  *           * - 64KiB
641  *             - 39..16
642  *
643  *           * - 256KiB
644  *             - 39..18
645  *
646  *           * - 1MiB
647  *             - 39..20
648  *
649  *           * - 2MiB
650  *             - 39..21
651  *
652  *    * - 11..6
653  *      - **VP Page (Low):** Continuation of VP Page (High).
654  *
655  *    * - 5
656  *      - **Pending:** When valid bit is not set, indicates that a valid entry
657  *        is pending and the MMU should wait for the driver to map the entry.
658  *        This is used to support page demand mapping of memory.
659  *
660  *    * - 4
661  *      - **PM Src:** Set on Parameter Manager (PM) allocated page table
662  *        entries when indicated by the PM. Note that this bit will only be set
663  *        by the PM, not by the device driver.
664  *
665  *    * - 3
666  *      - **SLC Bypass Control:** Specifies requests to this page should bypass
667  *        the System Level Cache (SLC), if enabled in SLC configuration.
668  *
669  *    * - 2
670  *      - **Cache Coherency:** Indicates that the page is coherent (i.e. it
671  *        does not require a cache flush between operations on the CPU and the
672  *        device).
673  *
674  *    * - 1
675  *      - **Read Only:** If set, this bit indicates that the page is read only.
676  *        An attempted write to this page would result in a write-protection
677  *        fault.
678  *
679  *    * - 0
680  *      - **Valid:** Indicates that the entry contains a valid page. If the
681  *        valid bit is not set, then an attempted use of the page would result
682  *        in a page fault.
683  */
684 struct pvr_page_table_l0_entry_raw {
685 	u64 val;
686 } __packed;
687 static_assert(sizeof(struct pvr_page_table_l0_entry_raw) * 8 ==
688 	      ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE);
689 
690 /**
691  * struct pvr_page_flags_raw - The configurable flags from a single entry in a
692  *                             level 0 page table.
693  * @val: The raw value of these flags. Since these are a strict subset of
694  *       &struct pvr_page_table_l0_entry_raw; use that type for our member here.
695  *
696  * The flags stored in this type are: PM/FW Protect; SLC Bypass Control; Cache
697  * Coherency, and Read Only (bits 62, 3, 2 and 1 respectively).
698  *
699  * This type should never be instantiated directly; instead use
700  * pvr_page_flags_raw_create() to ensure only valid bits of @val are set.
701  */
702 struct pvr_page_flags_raw {
703 	struct pvr_page_table_l0_entry_raw val;
704 } __packed;
705 static_assert(sizeof(struct pvr_page_flags_raw) ==
706 	      sizeof(struct pvr_page_table_l0_entry_raw));
707 
708 static bool
709 pvr_page_table_l0_entry_raw_is_valid(struct pvr_page_table_l0_entry_raw entry)
710 {
711 	return PVR_PAGE_TABLE_FIELD_GET(0, PT, VALID, entry);
712 }
713 
714 /**
715  * pvr_page_table_l0_entry_raw_set() - Write a valid entry into a raw level 0
716  *                                     page table.
717  * @entry: Target raw level 0 page table entry.
718  * @dma_addr: DMA address of the physical page to be associated with @entry.
719  * @flags: Options to be set on @entry.
720  *
721  * When calling this function, @child_table_dma_addr must be a valid DMA
722  * address and a multiple of %PVR_DEVICE_PAGE_SIZE.
723  *
724  * The @flags parameter is directly assigned into @entry. It is the callers
725  * responsibility to ensure that only bits specified in
726  * &struct pvr_page_flags_raw are set in @flags.
727  */
728 static void
729 pvr_page_table_l0_entry_raw_set(struct pvr_page_table_l0_entry_raw *entry,
730 				dma_addr_t dma_addr,
731 				struct pvr_page_flags_raw flags)
732 {
733 	WRITE_ONCE(entry->val, PVR_PAGE_TABLE_FIELD_PREP(0, PT, VALID, true) |
734 			       PVR_PAGE_TABLE_FIELD_PREP(0, PT, ENTRY_PENDING, false) |
735 			       (dma_addr & ~ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK) |
736 			       flags.val.val);
737 }
738 
739 static void
740 pvr_page_table_l0_entry_raw_clear(struct pvr_page_table_l0_entry_raw *entry)
741 {
742 	WRITE_ONCE(entry->val, 0);
743 }
744 
745 /**
746  * pvr_page_flags_raw_create() - Initialize the flag bits of a raw level 0 page
747  *                               table entry.
748  * @read_only: This page is read-only (see: Read Only).
749  * @cache_coherent: This page does not require cache flushes (see: Cache
750  *                  Coherency).
751  * @slc_bypass: This page bypasses the device cache (see: SLC Bypass Control).
752  * @pm_fw_protect: This page is only for use by the firmware or Parameter
753  *                 Manager (see PM/FW Protect).
754  *
755  * For more details on the use of these four options, see their respective
756  * entries in the table under &struct pvr_page_table_l0_entry_raw.
757  *
758  * Return:
759  * A new &struct pvr_page_flags_raw instance which can be passed directly to
760  * pvr_page_table_l0_entry_raw_set() or pvr_page_table_l0_insert().
761  */
762 static struct pvr_page_flags_raw
763 pvr_page_flags_raw_create(bool read_only, bool cache_coherent, bool slc_bypass,
764 			  bool pm_fw_protect)
765 {
766 	struct pvr_page_flags_raw flags;
767 
768 	flags.val.val =
769 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, READ_ONLY, read_only) |
770 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, CC, cache_coherent) |
771 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, SLC_BYPASS_CTRL, slc_bypass) |
772 		PVR_PAGE_TABLE_FIELD_PREP(0, PT, PM_META_PROTECT, pm_fw_protect);
773 
774 	return flags;
775 }
776 
777 /**
778  * struct pvr_page_table_l2_raw - The raw data of a level 2 page table.
779  *
780  * This type is a structure for type-checking purposes. At compile-time, its
781  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
782  */
783 struct pvr_page_table_l2_raw {
784 	/** @entries: The raw values of this table. */
785 	struct pvr_page_table_l2_entry_raw
786 		entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
787 } __packed;
788 static_assert(sizeof(struct pvr_page_table_l2_raw) == PVR_MMU_BACKING_PAGE_SIZE);
789 
790 /**
791  * struct pvr_page_table_l1_raw - The raw data of a level 1 page table.
792  *
793  * This type is a structure for type-checking purposes. At compile-time, its
794  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
795  */
796 struct pvr_page_table_l1_raw {
797 	/** @entries: The raw values of this table. */
798 	struct pvr_page_table_l1_entry_raw
799 		entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
800 } __packed;
801 static_assert(sizeof(struct pvr_page_table_l1_raw) == PVR_MMU_BACKING_PAGE_SIZE);
802 
803 /**
804  * struct pvr_page_table_l0_raw - The raw data of a level 0 page table.
805  *
806  * This type is a structure for type-checking purposes. At compile-time, its
807  * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
808  *
809  * .. caution::
810  *
811  *    The size of level 0 page tables is variable depending on the page size
812  *    specified in the associated level 1 page table entry. Since the device
813  *    page size in use is pegged to the host page size, it cannot vary at
814  *    runtime. This structure is therefore only defined to contain the required
815  *    number of entries for the current device page size. **You should never
816  *    read or write beyond the last supported entry.**
817  */
818 struct pvr_page_table_l0_raw {
819 	/** @entries: The raw values of this table. */
820 	struct pvr_page_table_l0_entry_raw
821 		entries[ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X];
822 } __packed;
823 static_assert(sizeof(struct pvr_page_table_l0_raw) <= PVR_MMU_BACKING_PAGE_SIZE);
824 
825 /**
826  * DOC: Mirror page tables
827  */
828 
829 /*
830  * We pre-declare these types because they cross-depend on pointers to each
831  * other.
832  */
833 struct pvr_page_table_l1;
834 struct pvr_page_table_l0;
835 
836 /**
837  * struct pvr_page_table_l2 - A wrapped level 2 page table.
838  *
839  * To access the raw part of this table, use pvr_page_table_l2_get_raw().
840  * Alternatively to access a raw entry directly, use
841  * pvr_page_table_l2_get_entry_raw().
842  *
843  * A level 2 page table forms the root of the page table tree structure, so
844  * this type has no &parent or &parent_idx members.
845  */
846 struct pvr_page_table_l2 {
847 	/**
848 	 * @entries: The children of this node in the page table tree
849 	 * structure. These are also mirror tables. The indexing of this array
850 	 * is identical to that of the raw equivalent
851 	 * (&pvr_page_table_l1_raw.entries).
852 	 */
853 	struct pvr_page_table_l1 *entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
854 
855 	/**
856 	 * @backing_page: A handle to the memory which holds the raw
857 	 * equivalent of this table. **For internal use only.**
858 	 */
859 	struct pvr_mmu_backing_page backing_page;
860 
861 	/**
862 	 * @entry_count: The current number of valid entries (that we know of)
863 	 * in this table. This value is essentially a refcount - the table is
864 	 * destroyed when this value is decremented to zero by
865 	 * pvr_page_table_l2_remove().
866 	 */
867 	u16 entry_count;
868 };
869 
870 /**
871  * pvr_page_table_l2_init() - Initialize a level 2 page table.
872  * @table: Target level 2 page table.
873  * @pvr_dev: Target PowerVR device
874  *
875  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
876  * this function.
877  *
878  * Return:
879  *  * 0 on success, or
880  *  * Any error encountered while intializing &table->backing_page using
881  *    pvr_mmu_backing_page_init().
882  */
883 static int
884 pvr_page_table_l2_init(struct pvr_page_table_l2 *table,
885 		       struct pvr_device *pvr_dev)
886 {
887 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
888 }
889 
890 /**
891  * pvr_page_table_l2_fini() - Teardown a level 2 page table.
892  * @table: Target level 2 page table.
893  *
894  * It is an error to attempt to use @table after calling this function.
895  */
896 static void
897 pvr_page_table_l2_fini(struct pvr_page_table_l2 *table)
898 {
899 	pvr_mmu_backing_page_fini(&table->backing_page);
900 }
901 
902 /**
903  * pvr_page_table_l2_sync() - Flush a level 2 page table from the CPU to the
904  *                            device.
905  * @table: Target level 2 page table.
906  *
907  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
908  * warning there applies here too: **Only call pvr_page_table_l2_sync() once
909  * you're sure you have no more changes to make to** @table **in the immediate
910  * future.**
911  *
912  * If child level 1 page tables of @table also need to be flushed, this should
913  * be done first using pvr_page_table_l1_sync() *before* calling this function.
914  */
915 static void
916 pvr_page_table_l2_sync(struct pvr_page_table_l2 *table)
917 {
918 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_2_FLAGS);
919 }
920 
921 /**
922  * pvr_page_table_l2_get_raw() - Access the raw equivalent of a mirror level 2
923  *                               page table.
924  * @table: Target level 2 page table.
925  *
926  * Essentially returns the CPU address of the raw equivalent of @table, cast to
927  * a &struct pvr_page_table_l2_raw pointer.
928  *
929  * You probably want to call pvr_page_table_l2_get_entry_raw() instead.
930  *
931  * Return:
932  * The raw equivalent of @table.
933  */
934 static struct pvr_page_table_l2_raw *
935 pvr_page_table_l2_get_raw(struct pvr_page_table_l2 *table)
936 {
937 	return table->backing_page.host_ptr;
938 }
939 
940 /**
941  * pvr_page_table_l2_get_entry_raw() - Access an entry from the raw equivalent
942  *                                     of a mirror level 2 page table.
943  * @table: Target level 2 page table.
944  * @idx: Index of the entry to access.
945  *
946  * Technically this function returns a pointer to a slot in a raw level 2 page
947  * table, since the returned "entry" is not guaranteed to be valid. The caller
948  * must verify the validity of the entry at the returned address (perhaps using
949  * pvr_page_table_l2_entry_raw_is_valid()) before reading or overwriting it.
950  *
951  * The value of @idx is not checked here; it is the callers responsibility to
952  * ensure @idx refers to a valid index within @table before dereferencing the
953  * returned pointer.
954  *
955  * Return:
956  * A pointer to the requested raw level 2 page table entry.
957  */
958 static struct pvr_page_table_l2_entry_raw *
959 pvr_page_table_l2_get_entry_raw(struct pvr_page_table_l2 *table, u16 idx)
960 {
961 	return &pvr_page_table_l2_get_raw(table)->entries[idx];
962 }
963 
964 /**
965  * pvr_page_table_l2_entry_is_valid() - Check if a level 2 page table entry is
966  *                                      marked as valid.
967  * @table: Target level 2 page table.
968  * @idx: Index of the entry to check.
969  *
970  * The value of @idx is not checked here; it is the callers responsibility to
971  * ensure @idx refers to a valid index within @table before calling this
972  * function.
973  */
974 static bool
975 pvr_page_table_l2_entry_is_valid(struct pvr_page_table_l2 *table, u16 idx)
976 {
977 	struct pvr_page_table_l2_entry_raw entry_raw =
978 		*pvr_page_table_l2_get_entry_raw(table, idx);
979 
980 	return pvr_page_table_l2_entry_raw_is_valid(entry_raw);
981 }
982 
983 /**
984  * struct pvr_page_table_l1 - A wrapped level 1 page table.
985  *
986  * To access the raw part of this table, use pvr_page_table_l1_get_raw().
987  * Alternatively to access a raw entry directly, use
988  * pvr_page_table_l1_get_entry_raw().
989  */
990 struct pvr_page_table_l1 {
991 	/**
992 	 * @entries: The children of this node in the page table tree
993 	 * structure. These are also mirror tables. The indexing of this array
994 	 * is identical to that of the raw equivalent
995 	 * (&pvr_page_table_l0_raw.entries).
996 	 */
997 	struct pvr_page_table_l0 *entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
998 
999 	/**
1000 	 * @backing_page: A handle to the memory which holds the raw
1001 	 * equivalent of this table. **For internal use only.**
1002 	 */
1003 	struct pvr_mmu_backing_page backing_page;
1004 
1005 	union {
1006 		/**
1007 		 * @parent: The parent of this node in the page table tree structure.
1008 		 *
1009 		 * This is also a mirror table.
1010 		 *
1011 		 * Only valid when the L1 page table is active. When the L1 page table
1012 		 * has been removed and queued for destruction, the next_free field
1013 		 * should be used instead.
1014 		 */
1015 		struct pvr_page_table_l2 *parent;
1016 
1017 		/**
1018 		 * @next_free: Pointer to the next L1 page table to take/free.
1019 		 *
1020 		 * Used to form a linked list of L1 page tables. This is used
1021 		 * when preallocating tables and when the page table has been
1022 		 * removed and queued for destruction.
1023 		 */
1024 		struct pvr_page_table_l1 *next_free;
1025 	};
1026 
1027 	/**
1028 	 * @parent_idx: The index of the entry in the parent table (see
1029 	 * @parent) which corresponds to this table.
1030 	 */
1031 	u16 parent_idx;
1032 
1033 	/**
1034 	 * @entry_count: The current number of valid entries (that we know of)
1035 	 * in this table. This value is essentially a refcount - the table is
1036 	 * destroyed when this value is decremented to zero by
1037 	 * pvr_page_table_l1_remove().
1038 	 */
1039 	u16 entry_count;
1040 };
1041 
1042 /**
1043  * pvr_page_table_l1_init() - Initialize a level 1 page table.
1044  * @table: Target level 1 page table.
1045  * @pvr_dev: Target PowerVR device
1046  *
1047  * When this function returns successfully, @table is still not considered
1048  * valid. It must be inserted into the page table tree structure with
1049  * pvr_page_table_l2_insert() before it is ready for use.
1050  *
1051  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
1052  * this function.
1053  *
1054  * Return:
1055  *  * 0 on success, or
1056  *  * Any error encountered while intializing &table->backing_page using
1057  *    pvr_mmu_backing_page_init().
1058  */
1059 static int
1060 pvr_page_table_l1_init(struct pvr_page_table_l1 *table,
1061 		       struct pvr_device *pvr_dev)
1062 {
1063 	table->parent_idx = PVR_IDX_INVALID;
1064 
1065 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
1066 }
1067 
1068 /**
1069  * pvr_page_table_l1_free() - Teardown a level 1 page table.
1070  * @table: Target level 1 page table.
1071  *
1072  * It is an error to attempt to use @table after calling this function, even
1073  * indirectly. This includes calling pvr_page_table_l2_remove(), which must
1074  * be called *before* pvr_page_table_l1_free().
1075  */
1076 static void
1077 pvr_page_table_l1_free(struct pvr_page_table_l1 *table)
1078 {
1079 	pvr_mmu_backing_page_fini(&table->backing_page);
1080 	kfree(table);
1081 }
1082 
1083 /**
1084  * pvr_page_table_l1_sync() - Flush a level 1 page table from the CPU to the
1085  *                            device.
1086  * @table: Target level 1 page table.
1087  *
1088  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
1089  * warning there applies here too: **Only call pvr_page_table_l1_sync() once
1090  * you're sure you have no more changes to make to** @table **in the immediate
1091  * future.**
1092  *
1093  * If child level 0 page tables of @table also need to be flushed, this should
1094  * be done first using pvr_page_table_l0_sync() *before* calling this function.
1095  */
1096 static void
1097 pvr_page_table_l1_sync(struct pvr_page_table_l1 *table)
1098 {
1099 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_1_FLAGS);
1100 }
1101 
1102 /**
1103  * pvr_page_table_l1_get_raw() - Access the raw equivalent of a mirror level 1
1104  *                               page table.
1105  * @table: Target level 1 page table.
1106  *
1107  * Essentially returns the CPU address of the raw equivalent of @table, cast to
1108  * a &struct pvr_page_table_l1_raw pointer.
1109  *
1110  * You probably want to call pvr_page_table_l1_get_entry_raw() instead.
1111  *
1112  * Return:
1113  * The raw equivalent of @table.
1114  */
1115 static struct pvr_page_table_l1_raw *
1116 pvr_page_table_l1_get_raw(struct pvr_page_table_l1 *table)
1117 {
1118 	return table->backing_page.host_ptr;
1119 }
1120 
1121 /**
1122  * pvr_page_table_l1_get_entry_raw() - Access an entry from the raw equivalent
1123  *                                     of a mirror level 1 page table.
1124  * @table: Target level 1 page table.
1125  * @idx: Index of the entry to access.
1126  *
1127  * Technically this function returns a pointer to a slot in a raw level 1 page
1128  * table, since the returned "entry" is not guaranteed to be valid. The caller
1129  * must verify the validity of the entry at the returned address (perhaps using
1130  * pvr_page_table_l1_entry_raw_is_valid()) before reading or overwriting it.
1131  *
1132  * The value of @idx is not checked here; it is the callers responsibility to
1133  * ensure @idx refers to a valid index within @table before dereferencing the
1134  * returned pointer.
1135  *
1136  * Return:
1137  * A pointer to the requested raw level 1 page table entry.
1138  */
1139 static struct pvr_page_table_l1_entry_raw *
1140 pvr_page_table_l1_get_entry_raw(struct pvr_page_table_l1 *table, u16 idx)
1141 {
1142 	return &pvr_page_table_l1_get_raw(table)->entries[idx];
1143 }
1144 
1145 /**
1146  * pvr_page_table_l1_entry_is_valid() - Check if a level 1 page table entry is
1147  *                                      marked as valid.
1148  * @table: Target level 1 page table.
1149  * @idx: Index of the entry to check.
1150  *
1151  * The value of @idx is not checked here; it is the callers responsibility to
1152  * ensure @idx refers to a valid index within @table before calling this
1153  * function.
1154  */
1155 static bool
1156 pvr_page_table_l1_entry_is_valid(struct pvr_page_table_l1 *table, u16 idx)
1157 {
1158 	struct pvr_page_table_l1_entry_raw entry_raw =
1159 		*pvr_page_table_l1_get_entry_raw(table, idx);
1160 
1161 	return pvr_page_table_l1_entry_raw_is_valid(entry_raw);
1162 }
1163 
1164 /**
1165  * struct pvr_page_table_l0 - A wrapped level 0 page table.
1166  *
1167  * To access the raw part of this table, use pvr_page_table_l0_get_raw().
1168  * Alternatively to access a raw entry directly, use
1169  * pvr_page_table_l0_get_entry_raw().
1170  *
1171  * There is no mirror representation of an individual page, so this type has no
1172  * &entries member.
1173  */
1174 struct pvr_page_table_l0 {
1175 	/**
1176 	 * @backing_page: A handle to the memory which holds the raw
1177 	 * equivalent of this table. **For internal use only.**
1178 	 */
1179 	struct pvr_mmu_backing_page backing_page;
1180 
1181 	union {
1182 		/**
1183 		 * @parent: The parent of this node in the page table tree structure.
1184 		 *
1185 		 * This is also a mirror table.
1186 		 *
1187 		 * Only valid when the L0 page table is active. When the L0 page table
1188 		 * has been removed and queued for destruction, the next_free field
1189 		 * should be used instead.
1190 		 */
1191 		struct pvr_page_table_l1 *parent;
1192 
1193 		/**
1194 		 * @next_free: Pointer to the next L0 page table to take/free.
1195 		 *
1196 		 * Used to form a linked list of L0 page tables. This is used
1197 		 * when preallocating tables and when the page table has been
1198 		 * removed and queued for destruction.
1199 		 */
1200 		struct pvr_page_table_l0 *next_free;
1201 	};
1202 
1203 	/**
1204 	 * @parent_idx: The index of the entry in the parent table (see
1205 	 * @parent) which corresponds to this table.
1206 	 */
1207 	u16 parent_idx;
1208 
1209 	/**
1210 	 * @entry_count: The current number of valid entries (that we know of)
1211 	 * in this table. This value is essentially a refcount - the table is
1212 	 * destroyed when this value is decremented to zero by
1213 	 * pvr_page_table_l0_remove().
1214 	 */
1215 	u16 entry_count;
1216 };
1217 
1218 /**
1219  * pvr_page_table_l0_init() - Initialize a level 0 page table.
1220  * @table: Target level 0 page table.
1221  * @pvr_dev: Target PowerVR device
1222  *
1223  * When this function returns successfully, @table is still not considered
1224  * valid. It must be inserted into the page table tree structure with
1225  * pvr_page_table_l1_insert() before it is ready for use.
1226  *
1227  * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
1228  * this function.
1229  *
1230  * Return:
1231  *  * 0 on success, or
1232  *  * Any error encountered while intializing &table->backing_page using
1233  *    pvr_mmu_backing_page_init().
1234  */
1235 static int
1236 pvr_page_table_l0_init(struct pvr_page_table_l0 *table,
1237 		       struct pvr_device *pvr_dev)
1238 {
1239 	table->parent_idx = PVR_IDX_INVALID;
1240 
1241 	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
1242 }
1243 
1244 /**
1245  * pvr_page_table_l0_free() - Teardown a level 0 page table.
1246  * @table: Target level 0 page table.
1247  *
1248  * It is an error to attempt to use @table after calling this function, even
1249  * indirectly. This includes calling pvr_page_table_l1_remove(), which must
1250  * be called *before* pvr_page_table_l0_free().
1251  */
1252 static void
1253 pvr_page_table_l0_free(struct pvr_page_table_l0 *table)
1254 {
1255 	pvr_mmu_backing_page_fini(&table->backing_page);
1256 	kfree(table);
1257 }
1258 
1259 /**
1260  * pvr_page_table_l0_sync() - Flush a level 0 page table from the CPU to the
1261  *                            device.
1262  * @table: Target level 0 page table.
1263  *
1264  * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
1265  * warning there applies here too: **Only call pvr_page_table_l0_sync() once
1266  * you're sure you have no more changes to make to** @table **in the immediate
1267  * future.**
1268  *
1269  * If child pages of @table also need to be flushed, this should be done first
1270  * using a DMA sync function (e.g. dma_sync_sg_for_device()) *before* calling
1271  * this function.
1272  */
1273 static void
1274 pvr_page_table_l0_sync(struct pvr_page_table_l0 *table)
1275 {
1276 	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_0_FLAGS);
1277 }
1278 
1279 /**
1280  * pvr_page_table_l0_get_raw() - Access the raw equivalent of a mirror level 0
1281  *                               page table.
1282  * @table: Target level 0 page table.
1283  *
1284  * Essentially returns the CPU address of the raw equivalent of @table, cast to
1285  * a &struct pvr_page_table_l0_raw pointer.
1286  *
1287  * You probably want to call pvr_page_table_l0_get_entry_raw() instead.
1288  *
1289  * Return:
1290  * The raw equivalent of @table.
1291  */
1292 static struct pvr_page_table_l0_raw *
1293 pvr_page_table_l0_get_raw(struct pvr_page_table_l0 *table)
1294 {
1295 	return table->backing_page.host_ptr;
1296 }
1297 
1298 /**
1299  * pvr_page_table_l0_get_entry_raw() - Access an entry from the raw equivalent
1300  *                                     of a mirror level 0 page table.
1301  * @table: Target level 0 page table.
1302  * @idx: Index of the entry to access.
1303  *
1304  * Technically this function returns a pointer to a slot in a raw level 0 page
1305  * table, since the returned "entry" is not guaranteed to be valid. The caller
1306  * must verify the validity of the entry at the returned address (perhaps using
1307  * pvr_page_table_l0_entry_raw_is_valid()) before reading or overwriting it.
1308  *
1309  * The value of @idx is not checked here; it is the callers responsibility to
1310  * ensure @idx refers to a valid index within @table before dereferencing the
1311  * returned pointer. This is espcially important for level 0 page tables, which
1312  * can have a variable number of entries.
1313  *
1314  * Return:
1315  * A pointer to the requested raw level 0 page table entry.
1316  */
1317 static struct pvr_page_table_l0_entry_raw *
1318 pvr_page_table_l0_get_entry_raw(struct pvr_page_table_l0 *table, u16 idx)
1319 {
1320 	return &pvr_page_table_l0_get_raw(table)->entries[idx];
1321 }
1322 
1323 /**
1324  * pvr_page_table_l0_entry_is_valid() - Check if a level 0 page table entry is
1325  *                                      marked as valid.
1326  * @table: Target level 0 page table.
1327  * @idx: Index of the entry to check.
1328  *
1329  * The value of @idx is not checked here; it is the callers responsibility to
1330  * ensure @idx refers to a valid index within @table before calling this
1331  * function.
1332  */
1333 static bool
1334 pvr_page_table_l0_entry_is_valid(struct pvr_page_table_l0 *table, u16 idx)
1335 {
1336 	struct pvr_page_table_l0_entry_raw entry_raw =
1337 		*pvr_page_table_l0_get_entry_raw(table, idx);
1338 
1339 	return pvr_page_table_l0_entry_raw_is_valid(entry_raw);
1340 }
1341 
1342 /**
1343  * struct pvr_mmu_context - context holding data for operations at page
1344  * catalogue level, intended for use with a VM context.
1345  */
1346 struct pvr_mmu_context {
1347 	/** @pvr_dev: The PVR device associated with the owning VM context. */
1348 	struct pvr_device *pvr_dev;
1349 
1350 	/** @page_table_l2: The MMU table root. */
1351 	struct pvr_page_table_l2 page_table_l2;
1352 };
1353 
1354 /**
1355  * struct pvr_page_table_ptr - A reference to a single physical page as indexed
1356  * by the page table structure.
1357  *
1358  * Intended for embedding in a &struct pvr_mmu_op_context.
1359  */
1360 struct pvr_page_table_ptr {
1361 	/**
1362 	 * @l1_table: A cached handle to the level 1 page table the
1363 	 * context is currently traversing.
1364 	 */
1365 	struct pvr_page_table_l1 *l1_table;
1366 
1367 	/**
1368 	 * @l0_table: A cached handle to the level 0 page table the
1369 	 * context is currently traversing.
1370 	 */
1371 	struct pvr_page_table_l0 *l0_table;
1372 
1373 	/**
1374 	 * @l2_idx: Index into the level 2 page table the context is
1375 	 * currently referencing.
1376 	 */
1377 	u16 l2_idx;
1378 
1379 	/**
1380 	 * @l1_idx: Index into the level 1 page table the context is
1381 	 * currently referencing.
1382 	 */
1383 	u16 l1_idx;
1384 
1385 	/**
1386 	 * @l0_idx: Index into the level 0 page table the context is
1387 	 * currently referencing.
1388 	 */
1389 	u16 l0_idx;
1390 };
1391 
1392 /**
1393  * struct pvr_mmu_op_context - context holding data for individual
1394  * device-virtual mapping operations. Intended for use with a VM bind operation.
1395  */
1396 struct pvr_mmu_op_context {
1397 	/** @mmu_ctx: The MMU context associated with the owning VM context. */
1398 	struct pvr_mmu_context *mmu_ctx;
1399 
1400 	/** @map: Data specifically for map operations. */
1401 	struct {
1402 		/**
1403 		 * @sgt: Scatter gather table containing pages pinned for use by
1404 		 * this context - these are currently pinned when initialising
1405 		 * the VM bind operation.
1406 		 */
1407 		struct sg_table *sgt;
1408 
1409 		/** @sgt_offset: Start address of the device-virtual mapping. */
1410 		u64 sgt_offset;
1411 
1412 		/**
1413 		 * @l1_prealloc_tables: Preallocated l1 page table objects
1414 		 * use by this context when creating a page mapping. Linked list
1415 		 * fully created during initialisation.
1416 		 */
1417 		struct pvr_page_table_l1 *l1_prealloc_tables;
1418 
1419 		/**
1420 		 * @l0_prealloc_tables: Preallocated l0 page table objects
1421 		 * use by this context when creating a page mapping. Linked list
1422 		 * fully created during initialisation.
1423 		 */
1424 		struct pvr_page_table_l0 *l0_prealloc_tables;
1425 	} map;
1426 
1427 	/** @unmap: Data specifically for unmap operations. */
1428 	struct {
1429 		/**
1430 		 * @l1_free_tables: Collects page table objects freed by unmap
1431 		 * ops. Linked list empty at creation.
1432 		 */
1433 		struct pvr_page_table_l1 *l1_free_tables;
1434 
1435 		/**
1436 		 * @l0_free_tables: Collects page table objects freed by unmap
1437 		 * ops. Linked list empty at creation.
1438 		 */
1439 		struct pvr_page_table_l0 *l0_free_tables;
1440 	} unmap;
1441 
1442 	/**
1443 	 * @curr_page: A reference to a single physical page as indexed by the
1444 	 * page table structure.
1445 	 */
1446 	struct pvr_page_table_ptr curr_page;
1447 
1448 	/**
1449 	 * @sync_level_required: The maximum level of the page table tree
1450 	 * structure which has (possibly) been modified since it was last
1451 	 * flushed to the device.
1452 	 *
1453 	 * This field should only be set with pvr_mmu_op_context_require_sync()
1454 	 * or indirectly by pvr_mmu_op_context_sync_partial().
1455 	 */
1456 	enum pvr_mmu_sync_level sync_level_required;
1457 };
1458 
1459 /**
1460  * pvr_page_table_l2_insert() - Insert an entry referring to a level 1 page
1461  * table into a level 2 page table.
1462  * @op_ctx: Target MMU op context pointing at the entry to insert the L1 page
1463  * table into.
1464  * @child_table: Target level 1 page table to be referenced by the new entry.
1465  *
1466  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1467  * valid L2 entry.
1468  *
1469  * It is the caller's responsibility to execute any memory barries to ensure
1470  * that the creation of @child_table is ordered before the L2 entry is inserted.
1471  */
1472 static void
1473 pvr_page_table_l2_insert(struct pvr_mmu_op_context *op_ctx,
1474 			 struct pvr_page_table_l1 *child_table)
1475 {
1476 	struct pvr_page_table_l2 *l2_table =
1477 		&op_ctx->mmu_ctx->page_table_l2;
1478 	struct pvr_page_table_l2_entry_raw *entry_raw =
1479 		pvr_page_table_l2_get_entry_raw(l2_table,
1480 						op_ctx->curr_page.l2_idx);
1481 
1482 	pvr_page_table_l2_entry_raw_set(entry_raw,
1483 					child_table->backing_page.dma_addr);
1484 
1485 	child_table->parent = l2_table;
1486 	child_table->parent_idx = op_ctx->curr_page.l2_idx;
1487 	l2_table->entries[op_ctx->curr_page.l2_idx] = child_table;
1488 	++l2_table->entry_count;
1489 	op_ctx->curr_page.l1_table = child_table;
1490 }
1491 
1492 /**
1493  * pvr_page_table_l2_remove() - Remove a level 1 page table from a level 2 page
1494  * table.
1495  * @op_ctx: Target MMU op context pointing at the L2 entry to remove.
1496  *
1497  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1498  * valid L2 entry.
1499  */
1500 static void
1501 pvr_page_table_l2_remove(struct pvr_mmu_op_context *op_ctx)
1502 {
1503 	struct pvr_page_table_l2 *l2_table =
1504 		&op_ctx->mmu_ctx->page_table_l2;
1505 	struct pvr_page_table_l2_entry_raw *entry_raw =
1506 		pvr_page_table_l2_get_entry_raw(l2_table,
1507 						op_ctx->curr_page.l1_table->parent_idx);
1508 
1509 	WARN_ON(op_ctx->curr_page.l1_table->parent != l2_table);
1510 
1511 	pvr_page_table_l2_entry_raw_clear(entry_raw);
1512 
1513 	l2_table->entries[op_ctx->curr_page.l1_table->parent_idx] = NULL;
1514 	op_ctx->curr_page.l1_table->parent_idx = PVR_IDX_INVALID;
1515 	op_ctx->curr_page.l1_table->next_free = op_ctx->unmap.l1_free_tables;
1516 	op_ctx->unmap.l1_free_tables = op_ctx->curr_page.l1_table;
1517 	op_ctx->curr_page.l1_table = NULL;
1518 
1519 	--l2_table->entry_count;
1520 }
1521 
1522 /**
1523  * pvr_page_table_l1_insert() - Insert an entry referring to a level 0 page
1524  * table into a level 1 page table.
1525  * @op_ctx: Target MMU op context pointing at the entry to insert the L0 page
1526  * table into.
1527  * @child_table: L0 page table to insert.
1528  *
1529  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1530  * valid L1 entry.
1531  *
1532  * It is the caller's responsibility to execute any memory barries to ensure
1533  * that the creation of @child_table is ordered before the L1 entry is inserted.
1534  */
1535 static void
1536 pvr_page_table_l1_insert(struct pvr_mmu_op_context *op_ctx,
1537 			 struct pvr_page_table_l0 *child_table)
1538 {
1539 	struct pvr_page_table_l1_entry_raw *entry_raw =
1540 		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l1_table,
1541 						op_ctx->curr_page.l1_idx);
1542 
1543 	pvr_page_table_l1_entry_raw_set(entry_raw,
1544 					child_table->backing_page.dma_addr);
1545 
1546 	child_table->parent = op_ctx->curr_page.l1_table;
1547 	child_table->parent_idx = op_ctx->curr_page.l1_idx;
1548 	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx] = child_table;
1549 	++op_ctx->curr_page.l1_table->entry_count;
1550 	op_ctx->curr_page.l0_table = child_table;
1551 }
1552 
1553 /**
1554  * pvr_page_table_l1_remove() - Remove a level 0 page table from a level 1 page
1555  *                              table.
1556  * @op_ctx: Target MMU op context pointing at the L1 entry to remove.
1557  *
1558  * If this function results in the L1 table becoming empty, it will be removed
1559  * from its parent level 2 page table and destroyed.
1560  *
1561  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1562  * valid L1 entry.
1563  */
1564 static void
1565 pvr_page_table_l1_remove(struct pvr_mmu_op_context *op_ctx)
1566 {
1567 	struct pvr_page_table_l1_entry_raw *entry_raw =
1568 		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l0_table->parent,
1569 						op_ctx->curr_page.l0_table->parent_idx);
1570 
1571 	WARN_ON(op_ctx->curr_page.l0_table->parent !=
1572 		op_ctx->curr_page.l1_table);
1573 
1574 	pvr_page_table_l1_entry_raw_clear(entry_raw);
1575 
1576 	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l0_table->parent_idx] = NULL;
1577 	op_ctx->curr_page.l0_table->parent_idx = PVR_IDX_INVALID;
1578 	op_ctx->curr_page.l0_table->next_free = op_ctx->unmap.l0_free_tables;
1579 	op_ctx->unmap.l0_free_tables = op_ctx->curr_page.l0_table;
1580 	op_ctx->curr_page.l0_table = NULL;
1581 
1582 	if (--op_ctx->curr_page.l1_table->entry_count == 0) {
1583 		/* Clear the parent L2 page table entry. */
1584 		if (op_ctx->curr_page.l1_table->parent_idx != PVR_IDX_INVALID)
1585 			pvr_page_table_l2_remove(op_ctx);
1586 	}
1587 }
1588 
1589 /**
1590  * pvr_page_table_l0_insert() - Insert an entry referring to a physical page
1591  * into a level 0 page table.
1592  * @op_ctx: Target MMU op context pointing at the L0 entry to insert.
1593  * @dma_addr: Target DMA address to be referenced by the new entry.
1594  * @flags: Page options to be stored in the new entry.
1595  *
1596  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1597  * valid L0 entry.
1598  */
1599 static void
1600 pvr_page_table_l0_insert(struct pvr_mmu_op_context *op_ctx,
1601 			 dma_addr_t dma_addr, struct pvr_page_flags_raw flags)
1602 {
1603 	struct pvr_page_table_l0_entry_raw *entry_raw =
1604 		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
1605 						op_ctx->curr_page.l0_idx);
1606 
1607 	pvr_page_table_l0_entry_raw_set(entry_raw, dma_addr, flags);
1608 
1609 	/*
1610 	 * There is no entry to set here - we don't keep a mirror of
1611 	 * individual pages.
1612 	 */
1613 
1614 	++op_ctx->curr_page.l0_table->entry_count;
1615 }
1616 
1617 /**
1618  * pvr_page_table_l0_remove() - Remove a physical page from a level 0 page
1619  * table.
1620  * @op_ctx: Target MMU op context pointing at the L0 entry to remove.
1621  *
1622  * If this function results in the L0 table becoming empty, it will be removed
1623  * from its parent L1 page table and destroyed.
1624  *
1625  * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
1626  * valid L0 entry.
1627  */
1628 static void
1629 pvr_page_table_l0_remove(struct pvr_mmu_op_context *op_ctx)
1630 {
1631 	struct pvr_page_table_l0_entry_raw *entry_raw =
1632 		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
1633 						op_ctx->curr_page.l0_idx);
1634 
1635 	pvr_page_table_l0_entry_raw_clear(entry_raw);
1636 
1637 	/*
1638 	 * There is no entry to clear here - we don't keep a mirror of
1639 	 * individual pages.
1640 	 */
1641 
1642 	if (--op_ctx->curr_page.l0_table->entry_count == 0) {
1643 		/* Clear the parent L1 page table entry. */
1644 		if (op_ctx->curr_page.l0_table->parent_idx != PVR_IDX_INVALID)
1645 			pvr_page_table_l1_remove(op_ctx);
1646 	}
1647 }
1648 
1649 /**
1650  * DOC: Page table index utilities
1651  */
1652 
1653 /**
1654  * pvr_page_table_l2_idx() - Calculate the level 2 page table index for a
1655  *                           device-virtual address.
1656  * @device_addr: Target device-virtual address.
1657  *
1658  * This function does not perform any bounds checking - it is the caller's
1659  * responsibility to ensure that @device_addr is valid before interpreting
1660  * the result.
1661  *
1662  * Return:
1663  * The index into a level 2 page table corresponding to @device_addr.
1664  */
1665 static u16
1666 pvr_page_table_l2_idx(u64 device_addr)
1667 {
1668 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PC_INDEX_CLRMSK) >>
1669 	       ROGUE_MMUCTRL_VADDR_PC_INDEX_SHIFT;
1670 }
1671 
1672 /**
1673  * pvr_page_table_l1_idx() - Calculate the level 1 page table index for a
1674  *                           device-virtual address.
1675  * @device_addr: Target device-virtual address.
1676  *
1677  * This function does not perform any bounds checking - it is the caller's
1678  * responsibility to ensure that @device_addr is valid before interpreting
1679  * the result.
1680  *
1681  * Return:
1682  * The index into a level 1 page table corresponding to @device_addr.
1683  */
1684 static u16
1685 pvr_page_table_l1_idx(u64 device_addr)
1686 {
1687 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PD_INDEX_CLRMSK) >>
1688 	       ROGUE_MMUCTRL_VADDR_PD_INDEX_SHIFT;
1689 }
1690 
1691 /**
1692  * pvr_page_table_l0_idx() - Calculate the level 0 page table index for a
1693  *                           device-virtual address.
1694  * @device_addr: Target device-virtual address.
1695  *
1696  * This function does not perform any bounds checking - it is the caller's
1697  * responsibility to ensure that @device_addr is valid before interpreting
1698  * the result.
1699  *
1700  * Return:
1701  * The index into a level 0 page table corresponding to @device_addr.
1702  */
1703 static u16
1704 pvr_page_table_l0_idx(u64 device_addr)
1705 {
1706 	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PT_INDEX_CLRMSK) >>
1707 	       ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT;
1708 }
1709 
1710 /**
1711  * DOC: High-level page table operations
1712  */
1713 
1714 /**
1715  * pvr_page_table_l1_get_or_insert() - Retrieves (optionally inserting if
1716  * necessary) a level 1 page table from the specified level 2 page table entry.
1717  * @op_ctx: Target MMU op context.
1718  * @should_insert: [IN] Specifies whether new page tables should be inserted
1719  * when empty page table entries are encountered during traversal.
1720  *
1721  * Return:
1722  *  * 0 on success, or
1723  *
1724  *    If @should_insert is %false:
1725  *     * -%ENXIO if a level 1 page table would have been inserted.
1726  *
1727  *    If @should_insert is %true:
1728  *     * Any error encountered while inserting the level 1 page table.
1729  */
1730 static int
1731 pvr_page_table_l1_get_or_insert(struct pvr_mmu_op_context *op_ctx,
1732 				bool should_insert)
1733 {
1734 	struct pvr_page_table_l2 *l2_table =
1735 		&op_ctx->mmu_ctx->page_table_l2;
1736 	struct pvr_page_table_l1 *table;
1737 
1738 	if (pvr_page_table_l2_entry_is_valid(l2_table,
1739 					     op_ctx->curr_page.l2_idx)) {
1740 		op_ctx->curr_page.l1_table =
1741 			l2_table->entries[op_ctx->curr_page.l2_idx];
1742 		return 0;
1743 	}
1744 
1745 	if (!should_insert)
1746 		return -ENXIO;
1747 
1748 	/* Take a prealloced table. */
1749 	table = op_ctx->map.l1_prealloc_tables;
1750 	if (!table)
1751 		return -ENOMEM;
1752 
1753 	/* Pop */
1754 	op_ctx->map.l1_prealloc_tables = table->next_free;
1755 	table->next_free = NULL;
1756 
1757 	/* Ensure new table is fully written out before adding to L2 page table. */
1758 	wmb();
1759 
1760 	pvr_page_table_l2_insert(op_ctx, table);
1761 
1762 	return 0;
1763 }
1764 
1765 /**
1766  * pvr_page_table_l0_get_or_insert() - Retrieves (optionally inserting if
1767  * necessary) a level 0 page table from the specified level 1 page table entry.
1768  * @op_ctx: Target MMU op context.
1769  * @should_insert: [IN] Specifies whether new page tables should be inserted
1770  * when empty page table entries are encountered during traversal.
1771  *
1772  * Return:
1773  *  * 0 on success,
1774  *
1775  *    If @should_insert is %false:
1776  *     * -%ENXIO if a level 0 page table would have been inserted.
1777  *
1778  *    If @should_insert is %true:
1779  *     * Any error encountered while inserting the level 0 page table.
1780  */
1781 static int
1782 pvr_page_table_l0_get_or_insert(struct pvr_mmu_op_context *op_ctx,
1783 				bool should_insert)
1784 {
1785 	struct pvr_page_table_l0 *table;
1786 
1787 	if (pvr_page_table_l1_entry_is_valid(op_ctx->curr_page.l1_table,
1788 					     op_ctx->curr_page.l1_idx)) {
1789 		op_ctx->curr_page.l0_table =
1790 			op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx];
1791 		return 0;
1792 	}
1793 
1794 	if (!should_insert)
1795 		return -ENXIO;
1796 
1797 	/* Take a prealloced table. */
1798 	table = op_ctx->map.l0_prealloc_tables;
1799 	if (!table)
1800 		return -ENOMEM;
1801 
1802 	/* Pop */
1803 	op_ctx->map.l0_prealloc_tables = table->next_free;
1804 	table->next_free = NULL;
1805 
1806 	/* Ensure new table is fully written out before adding to L1 page table. */
1807 	wmb();
1808 
1809 	pvr_page_table_l1_insert(op_ctx, table);
1810 
1811 	return 0;
1812 }
1813 
1814 /**
1815  * pvr_mmu_context_create() - Create an MMU context.
1816  * @pvr_dev: PVR device associated with owning VM context.
1817  *
1818  * Returns:
1819  *  * Newly created MMU context object on success, or
1820  *  * -%ENOMEM if no memory is available,
1821  *  * Any error code returned by pvr_page_table_l2_init().
1822  */
1823 struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev)
1824 {
1825 	struct pvr_mmu_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1826 	int err;
1827 
1828 	if (!ctx)
1829 		return ERR_PTR(-ENOMEM);
1830 
1831 	err = pvr_page_table_l2_init(&ctx->page_table_l2, pvr_dev);
1832 	if (err)
1833 		return ERR_PTR(err);
1834 
1835 	ctx->pvr_dev = pvr_dev;
1836 
1837 	return ctx;
1838 }
1839 
1840 /**
1841  * pvr_mmu_context_destroy() - Destroy an MMU context.
1842  * @ctx: Target MMU context.
1843  */
1844 void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx)
1845 {
1846 	pvr_page_table_l2_fini(&ctx->page_table_l2);
1847 	kfree(ctx);
1848 }
1849 
1850 /**
1851  * pvr_mmu_get_root_table_dma_addr() - Get the DMA address of the root of the
1852  * page table structure behind a VM context.
1853  * @ctx: Target MMU context.
1854  */
1855 dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx)
1856 {
1857 	return ctx->page_table_l2.backing_page.dma_addr;
1858 }
1859 
1860 /**
1861  * pvr_page_table_l1_alloc() - Allocate a l1 page_table object.
1862  * @ctx: MMU context of owning VM context.
1863  *
1864  * Returns:
1865  *  * Newly created page table object on success, or
1866  *  * -%ENOMEM if no memory is available,
1867  *  * Any error code returned by pvr_page_table_l1_init().
1868  */
1869 static struct pvr_page_table_l1 *
1870 pvr_page_table_l1_alloc(struct pvr_mmu_context *ctx)
1871 {
1872 	int err;
1873 
1874 	struct pvr_page_table_l1 *table =
1875 		kzalloc(sizeof(*table), GFP_KERNEL);
1876 
1877 	if (!table)
1878 		return ERR_PTR(-ENOMEM);
1879 
1880 	err = pvr_page_table_l1_init(table, ctx->pvr_dev);
1881 	if (err) {
1882 		kfree(table);
1883 		return ERR_PTR(err);
1884 	}
1885 
1886 	return table;
1887 }
1888 
1889 /**
1890  * pvr_page_table_l0_alloc() - Allocate a l0 page_table object.
1891  * @ctx: MMU context of owning VM context.
1892  *
1893  * Returns:
1894  *  * Newly created page table object on success, or
1895  *  * -%ENOMEM if no memory is available,
1896  *  * Any error code returned by pvr_page_table_l0_init().
1897  */
1898 static struct pvr_page_table_l0 *
1899 pvr_page_table_l0_alloc(struct pvr_mmu_context *ctx)
1900 {
1901 	int err;
1902 
1903 	struct pvr_page_table_l0 *table =
1904 		kzalloc(sizeof(*table), GFP_KERNEL);
1905 
1906 	if (!table)
1907 		return ERR_PTR(-ENOMEM);
1908 
1909 	err = pvr_page_table_l0_init(table, ctx->pvr_dev);
1910 	if (err) {
1911 		kfree(table);
1912 		return ERR_PTR(err);
1913 	}
1914 
1915 	return table;
1916 }
1917 
1918 /**
1919  * pvr_mmu_op_context_require_sync() - Mark an MMU op context as requiring a
1920  * sync operation for the referenced page tables up to a specified level.
1921  * @op_ctx: Target MMU op context.
1922  * @level: Maximum page table level for which a sync is required.
1923  */
1924 static void
1925 pvr_mmu_op_context_require_sync(struct pvr_mmu_op_context *op_ctx,
1926 				enum pvr_mmu_sync_level level)
1927 {
1928 	if (op_ctx->sync_level_required < level)
1929 		op_ctx->sync_level_required = level;
1930 }
1931 
1932 /**
1933  * pvr_mmu_op_context_sync_manual() - Trigger a sync of some or all of the
1934  * page tables referenced by a MMU op context.
1935  * @op_ctx: Target MMU op context.
1936  * @level: Maximum page table level to sync.
1937  *
1938  * Do not call this function directly. Instead use
1939  * pvr_mmu_op_context_sync_partial() which is checked against the current
1940  * value of &op_ctx->sync_level_required as set by
1941  * pvr_mmu_op_context_require_sync().
1942  */
1943 static void
1944 pvr_mmu_op_context_sync_manual(struct pvr_mmu_op_context *op_ctx,
1945 			       enum pvr_mmu_sync_level level)
1946 {
1947 	/*
1948 	 * We sync the page table levels in ascending order (starting from the
1949 	 * leaf node) to ensure consistency.
1950 	 */
1951 
1952 	WARN_ON(level < PVR_MMU_SYNC_LEVEL_NONE);
1953 
1954 	if (level <= PVR_MMU_SYNC_LEVEL_NONE)
1955 		return;
1956 
1957 	if (op_ctx->curr_page.l0_table)
1958 		pvr_page_table_l0_sync(op_ctx->curr_page.l0_table);
1959 
1960 	if (level < PVR_MMU_SYNC_LEVEL_1)
1961 		return;
1962 
1963 	if (op_ctx->curr_page.l1_table)
1964 		pvr_page_table_l1_sync(op_ctx->curr_page.l1_table);
1965 
1966 	if (level < PVR_MMU_SYNC_LEVEL_2)
1967 		return;
1968 
1969 	pvr_page_table_l2_sync(&op_ctx->mmu_ctx->page_table_l2);
1970 }
1971 
1972 /**
1973  * pvr_mmu_op_context_sync_partial() - Trigger a sync of some or all of the
1974  * page tables referenced by a MMU op context.
1975  * @op_ctx: Target MMU op context.
1976  * @level: Requested page table level to sync up to (inclusive).
1977  *
1978  * If @level is greater than the maximum level recorded by @op_ctx as requiring
1979  * a sync operation, only the previously recorded maximum will be used.
1980  *
1981  * Additionally, if @level is greater than or equal to the maximum level
1982  * recorded by @op_ctx as requiring a sync operation, that maximum level will be
1983  * reset as a full sync will be performed. This is equivalent to calling
1984  * pvr_mmu_op_context_sync().
1985  */
1986 static void
1987 pvr_mmu_op_context_sync_partial(struct pvr_mmu_op_context *op_ctx,
1988 				enum pvr_mmu_sync_level level)
1989 {
1990 	/*
1991 	 * If the requested sync level is greater than or equal to the
1992 	 * currently required sync level, we do two things:
1993 	 *  * Don't waste time syncing levels we haven't previously marked as
1994 	 *    requiring a sync, and
1995 	 *  * Reset the required sync level since we are about to sync
1996 	 *    everything that was previously marked as requiring a sync.
1997 	 */
1998 	if (level >= op_ctx->sync_level_required) {
1999 		level = op_ctx->sync_level_required;
2000 		op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2001 	}
2002 
2003 	pvr_mmu_op_context_sync_manual(op_ctx, level);
2004 }
2005 
2006 /**
2007  * pvr_mmu_op_context_sync() - Trigger a sync of every page table referenced by
2008  * a MMU op context.
2009  * @op_ctx: Target MMU op context.
2010  *
2011  * The maximum level marked internally as requiring a sync will be reset so
2012  * that subsequent calls to this function will be no-ops unless @op_ctx is
2013  * otherwise updated.
2014  */
2015 static void
2016 pvr_mmu_op_context_sync(struct pvr_mmu_op_context *op_ctx)
2017 {
2018 	pvr_mmu_op_context_sync_manual(op_ctx, op_ctx->sync_level_required);
2019 
2020 	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2021 }
2022 
2023 /**
2024  * pvr_mmu_op_context_load_tables() - Load pointers to tables in each level of
2025  * the page table tree structure needed to reference the physical page
2026  * referenced by a MMU op context.
2027  * @op_ctx: Target MMU op context.
2028  * @should_create: Specifies whether new page tables should be created when
2029  * empty page table entries are encountered during traversal.
2030  * @load_level_required: Maximum page table level to load.
2031  *
2032  * If @should_create is %true, this function may modify the stored required
2033  * sync level of @op_ctx as new page tables are created and inserted into their
2034  * respective parents.
2035  *
2036  * Since there is only one root page table, it is technically incorrect to call
2037  * this function with a value of @load_level_required greater than or equal to
2038  * the root level number. However, this is not explicitly disallowed here.
2039  *
2040  * Return:
2041  *  * 0 on success,
2042  *  * Any error returned by pvr_page_table_l1_get_or_create() if
2043  *    @load_level_required >= 1 except -%ENXIO, or
2044  *  * Any error returned by pvr_page_table_l0_get_or_create() if
2045  *    @load_level_required >= 0 except -%ENXIO.
2046  */
2047 static int
2048 pvr_mmu_op_context_load_tables(struct pvr_mmu_op_context *op_ctx,
2049 			       bool should_create,
2050 			       enum pvr_mmu_sync_level load_level_required)
2051 {
2052 	const struct pvr_page_table_l1 *l1_head_before =
2053 		op_ctx->map.l1_prealloc_tables;
2054 	const struct pvr_page_table_l0 *l0_head_before =
2055 		op_ctx->map.l0_prealloc_tables;
2056 	int err;
2057 
2058 	/* Clear tables we're about to fetch in case of error states. */
2059 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1)
2060 		op_ctx->curr_page.l1_table = NULL;
2061 
2062 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0)
2063 		op_ctx->curr_page.l0_table = NULL;
2064 
2065 	/* Get or create L1 page table. */
2066 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1) {
2067 		err = pvr_page_table_l1_get_or_insert(op_ctx, should_create);
2068 		if (err) {
2069 			/*
2070 			 * If @should_create is %false and no L1 page table was
2071 			 * found, return early but without an error. Since
2072 			 * pvr_page_table_l1_get_or_create() can only return
2073 			 * -%ENXIO if @should_create is %false, there is no
2074 			 * need to check it here.
2075 			 */
2076 			if (err == -ENXIO)
2077 				err = 0;
2078 
2079 			return err;
2080 		}
2081 	}
2082 
2083 	/* Get or create L0 page table. */
2084 	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0) {
2085 		err = pvr_page_table_l0_get_or_insert(op_ctx, should_create);
2086 		if (err) {
2087 			/*
2088 			 * If @should_create is %false and no L0 page table was
2089 			 * found, return early but without an error. Since
2090 			 * pvr_page_table_l0_get_or_insert() can only return
2091 			 * -%ENXIO if @should_create is %false, there is no
2092 			 * need to check it here.
2093 			 */
2094 			if (err == -ENXIO)
2095 				err = 0;
2096 
2097 			/*
2098 			 * At this point, an L1 page table could have been
2099 			 * inserted but is now empty due to the failed attempt
2100 			 * at inserting an L0 page table. In this instance, we
2101 			 * must remove the empty L1 page table ourselves as
2102 			 * pvr_page_table_l1_remove() is never called as part
2103 			 * of the error path in
2104 			 * pvr_page_table_l0_get_or_insert().
2105 			 */
2106 			if (l1_head_before != op_ctx->map.l1_prealloc_tables) {
2107 				pvr_page_table_l2_remove(op_ctx);
2108 				pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
2109 			}
2110 
2111 			return err;
2112 		}
2113 	}
2114 
2115 	/*
2116 	 * A sync is only needed if table objects were inserted. This can be
2117 	 * inferred by checking if the pointer at the head of the linked list
2118 	 * has changed.
2119 	 */
2120 	if (l1_head_before != op_ctx->map.l1_prealloc_tables)
2121 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
2122 	else if (l0_head_before != op_ctx->map.l0_prealloc_tables)
2123 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_1);
2124 
2125 	return 0;
2126 }
2127 
2128 /**
2129  * pvr_mmu_op_context_set_curr_page() - Reassign the current page of an MMU op
2130  * context, syncing any page tables previously assigned to it which are no
2131  * longer relevant.
2132  * @op_ctx: Target MMU op context.
2133  * @device_addr: New pointer target.
2134  * @should_create: Specify whether new page tables should be created when
2135  * empty page table entries are encountered during traversal.
2136  *
2137  * This function performs a full sync on the pointer, regardless of which
2138  * levels are modified.
2139  *
2140  * Return:
2141  *  * 0 on success, or
2142  *  * Any error returned by pvr_mmu_op_context_load_tables().
2143  */
2144 static int
2145 pvr_mmu_op_context_set_curr_page(struct pvr_mmu_op_context *op_ctx,
2146 				 u64 device_addr, bool should_create)
2147 {
2148 	pvr_mmu_op_context_sync(op_ctx);
2149 
2150 	op_ctx->curr_page.l2_idx = pvr_page_table_l2_idx(device_addr);
2151 	op_ctx->curr_page.l1_idx = pvr_page_table_l1_idx(device_addr);
2152 	op_ctx->curr_page.l0_idx = pvr_page_table_l0_idx(device_addr);
2153 	op_ctx->curr_page.l1_table = NULL;
2154 	op_ctx->curr_page.l0_table = NULL;
2155 
2156 	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
2157 					      PVR_MMU_SYNC_LEVEL_1);
2158 }
2159 
2160 /**
2161  * pvr_mmu_op_context_next_page() - Advance the current page of an MMU op
2162  * context.
2163  * @op_ctx: Target MMU op context.
2164  * @should_create: Specify whether new page tables should be created when
2165  * empty page table entries are encountered during traversal.
2166  *
2167  * If @should_create is %false, it is the caller's responsibility to verify that
2168  * the state of the table references in @op_ctx is valid on return. If -%ENXIO
2169  * is returned, at least one of the table references is invalid. It should be
2170  * noted that @op_ctx as a whole will be left in a valid state if -%ENXIO is
2171  * returned, unlike other error codes. The caller should check which references
2172  * are invalid by comparing them to %NULL. Only &@ptr->l2_table is guaranteed
2173  * to be valid, since it represents the root of the page table tree structure.
2174  *
2175  * Return:
2176  *  * 0 on success,
2177  *  * -%EPERM if the operation would wrap at the top of the page table
2178  *    hierarchy,
2179  *  * -%ENXIO if @should_create is %false and a page table of any level would
2180  *    have otherwise been created, or
2181  *  * Any error returned while attempting to create missing page tables if
2182  *    @should_create is %true.
2183  */
2184 static int
2185 pvr_mmu_op_context_next_page(struct pvr_mmu_op_context *op_ctx,
2186 			     bool should_create)
2187 {
2188 	s8 load_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2189 
2190 	if (++op_ctx->curr_page.l0_idx != ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X)
2191 		goto load_tables;
2192 
2193 	op_ctx->curr_page.l0_idx = 0;
2194 	load_level_required = PVR_MMU_SYNC_LEVEL_0;
2195 
2196 	if (++op_ctx->curr_page.l1_idx != ROGUE_MMUCTRL_ENTRIES_PD_VALUE)
2197 		goto load_tables;
2198 
2199 	op_ctx->curr_page.l1_idx = 0;
2200 	load_level_required = PVR_MMU_SYNC_LEVEL_1;
2201 
2202 	if (++op_ctx->curr_page.l2_idx != ROGUE_MMUCTRL_ENTRIES_PC_VALUE)
2203 		goto load_tables;
2204 
2205 	/*
2206 	 * If the pattern continued, we would set &op_ctx->curr_page.l2_idx to
2207 	 * zero here. However, that would wrap the top layer of the page table
2208 	 * hierarchy which is not a valid operation. Instead, we warn and return
2209 	 * an error.
2210 	 */
2211 	WARN(true,
2212 	     "%s(%p) attempted to loop the top of the page table hierarchy",
2213 	     __func__, op_ctx);
2214 	return -EPERM;
2215 
2216 	/* If indices have wrapped, we need to load new tables. */
2217 load_tables:
2218 	/* First, flush tables which will be unloaded. */
2219 	pvr_mmu_op_context_sync_partial(op_ctx, load_level_required);
2220 
2221 	/* Then load tables from the required level down. */
2222 	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
2223 					      load_level_required);
2224 }
2225 
2226 /**
2227  * DOC: Single page operations
2228  */
2229 
2230 /**
2231  * pvr_page_create() - Create a device-virtual memory page and insert it into
2232  * a level 0 page table.
2233  * @op_ctx: Target MMU op context pointing at the device-virtual address of the
2234  * target page.
2235  * @dma_addr: DMA address of the physical page backing the created page.
2236  * @flags: Page options saved on the level 0 page table entry for reading by
2237  *         the device.
2238  *
2239  * Return:
2240  *  * 0 on success, or
2241  *  * -%EEXIST if the requested page already exists.
2242  */
2243 static int
2244 pvr_page_create(struct pvr_mmu_op_context *op_ctx, dma_addr_t dma_addr,
2245 		struct pvr_page_flags_raw flags)
2246 {
2247 	/* Do not create a new page if one already exists. */
2248 	if (pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
2249 					     op_ctx->curr_page.l0_idx)) {
2250 		return -EEXIST;
2251 	}
2252 
2253 	pvr_page_table_l0_insert(op_ctx, dma_addr, flags);
2254 
2255 	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2256 
2257 	return 0;
2258 }
2259 
2260 /**
2261  * pvr_page_destroy() - Destroy a device page after removing it from its
2262  * parent level 0 page table.
2263  * @op_ctx: Target MMU op context.
2264  */
2265 static void
2266 pvr_page_destroy(struct pvr_mmu_op_context *op_ctx)
2267 {
2268 	/* Do nothing if the page does not exist. */
2269 	if (!pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
2270 					      op_ctx->curr_page.l0_idx)) {
2271 		return;
2272 	}
2273 
2274 	/* Clear the parent L0 page table entry. */
2275 	pvr_page_table_l0_remove(op_ctx);
2276 
2277 	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2278 }
2279 
2280 /**
2281  * pvr_mmu_op_context_destroy() - Destroy an MMU op context.
2282  * @op_ctx: Target MMU op context.
2283  */
2284 void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx)
2285 {
2286 	const bool flush_caches =
2287 		op_ctx->sync_level_required != PVR_MMU_SYNC_LEVEL_NONE;
2288 
2289 	pvr_mmu_op_context_sync(op_ctx);
2290 
2291 	/* Unmaps should be flushed immediately. Map flushes can be deferred. */
2292 	if (flush_caches && !op_ctx->map.sgt)
2293 		pvr_mmu_flush_exec(op_ctx->mmu_ctx->pvr_dev, true);
2294 
2295 	while (op_ctx->map.l0_prealloc_tables) {
2296 		struct pvr_page_table_l0 *tmp = op_ctx->map.l0_prealloc_tables;
2297 
2298 		op_ctx->map.l0_prealloc_tables =
2299 			op_ctx->map.l0_prealloc_tables->next_free;
2300 		pvr_page_table_l0_free(tmp);
2301 	}
2302 
2303 	while (op_ctx->map.l1_prealloc_tables) {
2304 		struct pvr_page_table_l1 *tmp = op_ctx->map.l1_prealloc_tables;
2305 
2306 		op_ctx->map.l1_prealloc_tables =
2307 			op_ctx->map.l1_prealloc_tables->next_free;
2308 		pvr_page_table_l1_free(tmp);
2309 	}
2310 
2311 	while (op_ctx->unmap.l0_free_tables) {
2312 		struct pvr_page_table_l0 *tmp = op_ctx->unmap.l0_free_tables;
2313 
2314 		op_ctx->unmap.l0_free_tables =
2315 			op_ctx->unmap.l0_free_tables->next_free;
2316 		pvr_page_table_l0_free(tmp);
2317 	}
2318 
2319 	while (op_ctx->unmap.l1_free_tables) {
2320 		struct pvr_page_table_l1 *tmp = op_ctx->unmap.l1_free_tables;
2321 
2322 		op_ctx->unmap.l1_free_tables =
2323 			op_ctx->unmap.l1_free_tables->next_free;
2324 		pvr_page_table_l1_free(tmp);
2325 	}
2326 
2327 	kfree(op_ctx);
2328 }
2329 
2330 /**
2331  * pvr_mmu_op_context_create() - Create an MMU op context.
2332  * @ctx: MMU context associated with owning VM context.
2333  * @sgt: Scatter gather table containing pages pinned for use by this context.
2334  * @sgt_offset: Start offset of the requested device-virtual memory mapping.
2335  * @size: Size in bytes of the requested device-virtual memory mapping. For an
2336  * unmapping, this should be zero so that no page tables are allocated.
2337  *
2338  * Returns:
2339  *  * Newly created MMU op context object on success, or
2340  *  * -%ENOMEM if no memory is available,
2341  *  * Any error code returned by pvr_page_table_l2_init().
2342  */
2343 struct pvr_mmu_op_context *
2344 pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, struct sg_table *sgt,
2345 			  u64 sgt_offset, u64 size)
2346 {
2347 	int err;
2348 
2349 	struct pvr_mmu_op_context *op_ctx =
2350 		kzalloc(sizeof(*op_ctx), GFP_KERNEL);
2351 
2352 	if (!op_ctx)
2353 		return ERR_PTR(-ENOMEM);
2354 
2355 	op_ctx->mmu_ctx = ctx;
2356 	op_ctx->map.sgt = sgt;
2357 	op_ctx->map.sgt_offset = sgt_offset;
2358 	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
2359 
2360 	if (size) {
2361 		/*
2362 		 * The number of page table objects we need to prealloc is
2363 		 * indicated by the mapping size, start offset and the sizes
2364 		 * of the areas mapped per PT or PD. The range calculation is
2365 		 * identical to that for the index into a table for a device
2366 		 * address, so we reuse those functions here.
2367 		 */
2368 		const u32 l1_start_idx = pvr_page_table_l2_idx(sgt_offset);
2369 		const u32 l1_end_idx = pvr_page_table_l2_idx(sgt_offset + size);
2370 		const u32 l1_count = l1_end_idx - l1_start_idx + 1;
2371 		const u32 l0_start_idx = pvr_page_table_l1_idx(sgt_offset);
2372 		const u32 l0_end_idx = pvr_page_table_l1_idx(sgt_offset + size);
2373 		const u32 l0_count = l0_end_idx - l0_start_idx + 1;
2374 
2375 		/*
2376 		 * Alloc and push page table entries until we have enough of
2377 		 * each type, ending with linked lists of l0 and l1 entries in
2378 		 * reverse order.
2379 		 */
2380 		for (int i = 0; i < l1_count; i++) {
2381 			struct pvr_page_table_l1 *l1_tmp =
2382 				pvr_page_table_l1_alloc(ctx);
2383 
2384 			err = PTR_ERR_OR_ZERO(l1_tmp);
2385 			if (err)
2386 				goto err_cleanup;
2387 
2388 			l1_tmp->next_free = op_ctx->map.l1_prealloc_tables;
2389 			op_ctx->map.l1_prealloc_tables = l1_tmp;
2390 		}
2391 
2392 		for (int i = 0; i < l0_count; i++) {
2393 			struct pvr_page_table_l0 *l0_tmp =
2394 				pvr_page_table_l0_alloc(ctx);
2395 
2396 			err = PTR_ERR_OR_ZERO(l0_tmp);
2397 			if (err)
2398 				goto err_cleanup;
2399 
2400 			l0_tmp->next_free = op_ctx->map.l0_prealloc_tables;
2401 			op_ctx->map.l0_prealloc_tables = l0_tmp;
2402 		}
2403 	}
2404 
2405 	return op_ctx;
2406 
2407 err_cleanup:
2408 	pvr_mmu_op_context_destroy(op_ctx);
2409 
2410 	return ERR_PTR(err);
2411 }
2412 
2413 /**
2414  * pvr_mmu_op_context_unmap_curr_page() - Unmap pages from a memory context
2415  * starting from the current page of an MMU op context.
2416  * @op_ctx: Target MMU op context pointing at the first page to unmap.
2417  * @nr_pages: Number of pages to unmap.
2418  *
2419  * Return:
2420  *  * 0 on success, or
2421  *  * Any error encountered while advancing @op_ctx.curr_page with
2422  *    pvr_mmu_op_context_next_page() (except -%ENXIO).
2423  */
2424 static int
2425 pvr_mmu_op_context_unmap_curr_page(struct pvr_mmu_op_context *op_ctx,
2426 				   u64 nr_pages)
2427 {
2428 	int err;
2429 
2430 	if (nr_pages == 0)
2431 		return 0;
2432 
2433 	/*
2434 	 * Destroy first page outside loop, as it doesn't require a page
2435 	 * advance beforehand. If the L0 page table reference in
2436 	 * @op_ctx.curr_page is %NULL, there cannot be a mapped page at
2437 	 * @op_ctx.curr_page (so skip ahead).
2438 	 */
2439 	if (op_ctx->curr_page.l0_table)
2440 		pvr_page_destroy(op_ctx);
2441 
2442 	for (u64 page = 1; page < nr_pages; ++page) {
2443 		err = pvr_mmu_op_context_next_page(op_ctx, false);
2444 		/*
2445 		 * If the page table tree structure at @op_ctx.curr_page is
2446 		 * incomplete, skip ahead. We don't care about unmapping pages
2447 		 * that cannot exist.
2448 		 *
2449 		 * FIXME: This could be made more efficient by jumping ahead
2450 		 * using pvr_mmu_op_context_set_curr_page().
2451 		 */
2452 		if (err == -ENXIO)
2453 			continue;
2454 		else if (err)
2455 			return err;
2456 
2457 		pvr_page_destroy(op_ctx);
2458 	}
2459 
2460 	return 0;
2461 }
2462 
2463 /**
2464  * pvr_mmu_unmap() - Unmap pages from a memory context.
2465  * @op_ctx: Target MMU op context.
2466  * @device_addr: First device-virtual address to unmap.
2467  * @size: Size in bytes to unmap.
2468  *
2469  * The total amount of device-virtual memory unmapped is
2470  * @nr_pages * %PVR_DEVICE_PAGE_SIZE.
2471  *
2472  * Returns:
2473  *  * 0 on success, or
2474  *  * Any error code returned by pvr_page_table_ptr_init(), or
2475  *  * Any error code returned by pvr_page_table_ptr_unmap().
2476  */
2477 int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size)
2478 {
2479 	int err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, false);
2480 
2481 	if (err)
2482 		return err;
2483 
2484 	return pvr_mmu_op_context_unmap_curr_page(op_ctx,
2485 						  size >> PVR_DEVICE_PAGE_SHIFT);
2486 }
2487 
2488 /**
2489  * pvr_mmu_map_sgl() - Map part of a scatter-gather table entry to
2490  * device-virtual memory.
2491  * @op_ctx: Target MMU op context pointing to the first page that should be
2492  * mapped.
2493  * @sgl: Target scatter-gather table entry.
2494  * @offset: Offset into @sgl to map from. Must result in a starting address
2495  * from @sgl which is CPU page-aligned.
2496  * @size: Size of the memory to be mapped in bytes. Must be a non-zero multiple
2497  * of the device page size.
2498  * @page_flags: Page options to be applied to every device-virtual memory page
2499  * in the created mapping.
2500  *
2501  * Return:
2502  *  * 0 on success,
2503  *  * -%EINVAL if the range specified by @offset and @size is not completely
2504  *    within @sgl, or
2505  *  * Any error encountered while creating a page with pvr_page_create(), or
2506  *  * Any error encountered while advancing @op_ctx.curr_page with
2507  *    pvr_mmu_op_context_next_page().
2508  */
2509 static int
2510 pvr_mmu_map_sgl(struct pvr_mmu_op_context *op_ctx, struct scatterlist *sgl,
2511 		u64 offset, u64 size, struct pvr_page_flags_raw page_flags)
2512 {
2513 	const unsigned int pages = size >> PVR_DEVICE_PAGE_SHIFT;
2514 	dma_addr_t dma_addr = sg_dma_address(sgl) + offset;
2515 	const unsigned int dma_len = sg_dma_len(sgl);
2516 	struct pvr_page_table_ptr ptr_copy;
2517 	unsigned int page;
2518 	int err;
2519 
2520 	if (size > dma_len || offset > dma_len - size)
2521 		return -EINVAL;
2522 
2523 	/*
2524 	 * Before progressing, save a copy of the start pointer so we can use
2525 	 * it again if we enter an error state and have to destroy pages.
2526 	 */
2527 	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
2528 
2529 	/*
2530 	 * Create first page outside loop, as it doesn't require a page advance
2531 	 * beforehand.
2532 	 */
2533 	err = pvr_page_create(op_ctx, dma_addr, page_flags);
2534 	if (err)
2535 		return err;
2536 
2537 	for (page = 1; page < pages; ++page) {
2538 		err = pvr_mmu_op_context_next_page(op_ctx, true);
2539 		if (err)
2540 			goto err_destroy_pages;
2541 
2542 		dma_addr += PVR_DEVICE_PAGE_SIZE;
2543 
2544 		err = pvr_page_create(op_ctx, dma_addr, page_flags);
2545 		if (err)
2546 			goto err_destroy_pages;
2547 	}
2548 
2549 	return 0;
2550 
2551 err_destroy_pages:
2552 	memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
2553 	err = pvr_mmu_op_context_unmap_curr_page(op_ctx, page);
2554 
2555 	return err;
2556 }
2557 
2558 /**
2559  * pvr_mmu_map() - Map an object's virtual memory to physical memory.
2560  * @op_ctx: Target MMU op context.
2561  * @size: Size of memory to be mapped in bytes. Must be a non-zero multiple
2562  * of the device page size.
2563  * @flags: Flags from pvr_gem_object associated with the mapping.
2564  * @device_addr: Virtual device address to map to. Must be device page-aligned.
2565  *
2566  * Returns:
2567  *  * 0 on success, or
2568  *  * Any error code returned by pvr_page_table_ptr_init(), or
2569  *  * Any error code returned by pvr_mmu_map_sgl(), or
2570  *  * Any error code returned by pvr_page_table_ptr_next_page().
2571  */
2572 int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags,
2573 		u64 device_addr)
2574 {
2575 	struct pvr_page_table_ptr ptr_copy;
2576 	struct pvr_page_flags_raw flags_raw;
2577 	struct scatterlist *sgl;
2578 	u64 mapped_size = 0;
2579 	unsigned int count;
2580 	int err;
2581 
2582 	if (!size)
2583 		return 0;
2584 
2585 	if ((op_ctx->map.sgt_offset | size) & ~PVR_DEVICE_PAGE_MASK)
2586 		return -EINVAL;
2587 
2588 	err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, true);
2589 	if (err)
2590 		return -EINVAL;
2591 
2592 	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
2593 
2594 	flags_raw = pvr_page_flags_raw_create(false, false,
2595 					      flags & DRM_PVR_BO_BYPASS_DEVICE_CACHE,
2596 					      flags & DRM_PVR_BO_PM_FW_PROTECT);
2597 
2598 	/* Map scatter gather table */
2599 	for_each_sgtable_dma_sg(op_ctx->map.sgt, sgl, count) {
2600 		const size_t sgl_len = sg_dma_len(sgl);
2601 		u64 sgl_offset, map_sgl_len;
2602 
2603 		if (sgl_len <= op_ctx->map.sgt_offset) {
2604 			op_ctx->map.sgt_offset -= sgl_len;
2605 			continue;
2606 		}
2607 
2608 		sgl_offset = op_ctx->map.sgt_offset;
2609 		map_sgl_len = min_t(u64, sgl_len - sgl_offset, size - mapped_size);
2610 
2611 		err = pvr_mmu_map_sgl(op_ctx, sgl, sgl_offset, map_sgl_len,
2612 				      flags_raw);
2613 		if (err)
2614 			break;
2615 
2616 		/*
2617 		 * Flag the L0 page table as requiring a flush when the MMU op
2618 		 * context is destroyed.
2619 		 */
2620 		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
2621 
2622 		op_ctx->map.sgt_offset = 0;
2623 		mapped_size += map_sgl_len;
2624 
2625 		if (mapped_size >= size)
2626 			break;
2627 
2628 		err = pvr_mmu_op_context_next_page(op_ctx, true);
2629 		if (err)
2630 			break;
2631 	}
2632 
2633 	if (err && mapped_size) {
2634 		memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
2635 		pvr_mmu_op_context_unmap_curr_page(op_ctx,
2636 						   mapped_size >> PVR_DEVICE_PAGE_SHIFT);
2637 	}
2638 
2639 	return err;
2640 }
2641