xref: /linux/arch/arm64/include/asm/kvm_pgtable.h (revision a11de92523f75a8140cf8eea3ce9b628f7a3cc77)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2020 Google LLC
4  * Author: Will Deacon <will@kernel.org>
5  */
6 
7 #ifndef __ARM64_KVM_PGTABLE_H__
8 #define __ARM64_KVM_PGTABLE_H__
9 
10 #include <linux/bits.h>
11 #include <linux/kvm_host.h>
12 #include <linux/types.h>
13 
14 #define KVM_PGTABLE_MAX_LEVELS		4U
15 
16 static inline u64 kvm_get_parange(u64 mmfr0)
17 {
18 	u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
19 				ID_AA64MMFR0_PARANGE_SHIFT);
20 	if (parange > ID_AA64MMFR0_PARANGE_MAX)
21 		parange = ID_AA64MMFR0_PARANGE_MAX;
22 
23 	return parange;
24 }
25 
26 typedef u64 kvm_pte_t;
27 
28 #define KVM_PTE_VALID			BIT(0)
29 
30 #define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
31 #define KVM_PTE_ADDR_51_48		GENMASK(15, 12)
32 
33 static inline bool kvm_pte_valid(kvm_pte_t pte)
34 {
35 	return pte & KVM_PTE_VALID;
36 }
37 
38 static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
39 {
40 	u64 pa = pte & KVM_PTE_ADDR_MASK;
41 
42 	if (PAGE_SHIFT == 16)
43 		pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
44 
45 	return pa;
46 }
47 
48 static inline u64 kvm_granule_shift(u32 level)
49 {
50 	/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
51 	return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
52 }
53 
54 static inline u64 kvm_granule_size(u32 level)
55 {
56 	return BIT(kvm_granule_shift(level));
57 }
58 
59 static inline bool kvm_level_supports_block_mapping(u32 level)
60 {
61 	/*
62 	 * Reject invalid block mappings and don't bother with 4TB mappings for
63 	 * 52-bit PAs.
64 	 */
65 	return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
66 }
67 
68 /**
69  * struct kvm_pgtable_mm_ops - Memory management callbacks.
70  * @zalloc_page:		Allocate a single zeroed memory page.
71  *				The @arg parameter can be used by the walker
72  *				to pass a memcache. The initial refcount of
73  *				the page is 1.
74  * @zalloc_pages_exact:		Allocate an exact number of zeroed memory pages.
75  *				The @size parameter is in bytes, and is rounded
76  *				up to the next page boundary. The resulting
77  *				allocation is physically contiguous.
78  * @free_pages_exact:		Free an exact number of memory pages previously
79  *				allocated by zalloc_pages_exact.
80  * @get_page:			Increment the refcount on a page.
81  * @put_page:			Decrement the refcount on a page. When the
82  *				refcount reaches 0 the page is automatically
83  *				freed.
84  * @page_count:			Return the refcount of a page.
85  * @phys_to_virt:		Convert a physical address into a virtual
86  *				address	mapped in the current context.
87  * @virt_to_phys:		Convert a virtual address mapped in the current
88  *				context into a physical address.
89  * @dcache_clean_inval_poc:	Clean and invalidate the data cache to the PoC
90  *				for the	specified memory address range.
91  * @icache_inval_pou:		Invalidate the instruction cache to the PoU
92  *				for the specified memory address range.
93  */
94 struct kvm_pgtable_mm_ops {
95 	void*		(*zalloc_page)(void *arg);
96 	void*		(*zalloc_pages_exact)(size_t size);
97 	void		(*free_pages_exact)(void *addr, size_t size);
98 	void		(*get_page)(void *addr);
99 	void		(*put_page)(void *addr);
100 	int		(*page_count)(void *addr);
101 	void*		(*phys_to_virt)(phys_addr_t phys);
102 	phys_addr_t	(*virt_to_phys)(void *addr);
103 	void		(*dcache_clean_inval_poc)(void *addr, size_t size);
104 	void		(*icache_inval_pou)(void *addr, size_t size);
105 };
106 
107 /**
108  * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
109  * @KVM_PGTABLE_S2_NOFWB:	Don't enforce Normal-WB even if the CPUs have
110  *				ARM64_HAS_STAGE2_FWB.
111  * @KVM_PGTABLE_S2_IDMAP:	Only use identity mappings.
112  */
113 enum kvm_pgtable_stage2_flags {
114 	KVM_PGTABLE_S2_NOFWB			= BIT(0),
115 	KVM_PGTABLE_S2_IDMAP			= BIT(1),
116 };
117 
118 /**
119  * enum kvm_pgtable_prot - Page-table permissions and attributes.
120  * @KVM_PGTABLE_PROT_X:		Execute permission.
121  * @KVM_PGTABLE_PROT_W:		Write permission.
122  * @KVM_PGTABLE_PROT_R:		Read permission.
123  * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
124  * @KVM_PGTABLE_PROT_SW0:	Software bit 0.
125  * @KVM_PGTABLE_PROT_SW1:	Software bit 1.
126  * @KVM_PGTABLE_PROT_SW2:	Software bit 2.
127  * @KVM_PGTABLE_PROT_SW3:	Software bit 3.
128  */
129 enum kvm_pgtable_prot {
130 	KVM_PGTABLE_PROT_X			= BIT(0),
131 	KVM_PGTABLE_PROT_W			= BIT(1),
132 	KVM_PGTABLE_PROT_R			= BIT(2),
133 
134 	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
135 
136 	KVM_PGTABLE_PROT_SW0			= BIT(55),
137 	KVM_PGTABLE_PROT_SW1			= BIT(56),
138 	KVM_PGTABLE_PROT_SW2			= BIT(57),
139 	KVM_PGTABLE_PROT_SW3			= BIT(58),
140 };
141 
142 #define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
143 #define KVM_PGTABLE_PROT_RWX	(KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
144 
145 #define PKVM_HOST_MEM_PROT	KVM_PGTABLE_PROT_RWX
146 #define PKVM_HOST_MMIO_PROT	KVM_PGTABLE_PROT_RW
147 
148 #define PAGE_HYP		KVM_PGTABLE_PROT_RW
149 #define PAGE_HYP_EXEC		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
150 #define PAGE_HYP_RO		(KVM_PGTABLE_PROT_R)
151 #define PAGE_HYP_DEVICE		(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
152 
153 typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
154 					   enum kvm_pgtable_prot prot);
155 
156 /**
157  * struct kvm_pgtable - KVM page-table.
158  * @ia_bits:		Maximum input address size, in bits.
159  * @start_level:	Level at which the page-table walk starts.
160  * @pgd:		Pointer to the first top-level entry of the page-table.
161  * @mm_ops:		Memory management callbacks.
162  * @mmu:		Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
163  * @flags:		Stage-2 page-table flags.
164  * @force_pte_cb:	Function that returns true if page level mappings must
165  *			be used instead of block mappings.
166  */
167 struct kvm_pgtable {
168 	u32					ia_bits;
169 	u32					start_level;
170 	kvm_pte_t				*pgd;
171 	struct kvm_pgtable_mm_ops		*mm_ops;
172 
173 	/* Stage-2 only */
174 	struct kvm_s2_mmu			*mmu;
175 	enum kvm_pgtable_stage2_flags		flags;
176 	kvm_pgtable_force_pte_cb_t		force_pte_cb;
177 };
178 
179 /**
180  * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
181  * @KVM_PGTABLE_WALK_LEAF:		Visit leaf entries, including invalid
182  *					entries.
183  * @KVM_PGTABLE_WALK_TABLE_PRE:		Visit table entries before their
184  *					children.
185  * @KVM_PGTABLE_WALK_TABLE_POST:	Visit table entries after their
186  *					children.
187  */
188 enum kvm_pgtable_walk_flags {
189 	KVM_PGTABLE_WALK_LEAF			= BIT(0),
190 	KVM_PGTABLE_WALK_TABLE_PRE		= BIT(1),
191 	KVM_PGTABLE_WALK_TABLE_POST		= BIT(2),
192 };
193 
194 typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level,
195 					kvm_pte_t *ptep,
196 					enum kvm_pgtable_walk_flags flag,
197 					void * const arg);
198 
199 /**
200  * struct kvm_pgtable_walker - Hook into a page-table walk.
201  * @cb:		Callback function to invoke during the walk.
202  * @arg:	Argument passed to the callback function.
203  * @flags:	Bitwise-OR of flags to identify the entry types on which to
204  *		invoke the callback function.
205  */
206 struct kvm_pgtable_walker {
207 	const kvm_pgtable_visitor_fn_t		cb;
208 	void * const				arg;
209 	const enum kvm_pgtable_walk_flags	flags;
210 };
211 
212 /**
213  * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
214  * @pgt:	Uninitialised page-table structure to initialise.
215  * @va_bits:	Maximum virtual address bits.
216  * @mm_ops:	Memory management callbacks.
217  *
218  * Return: 0 on success, negative error code on failure.
219  */
220 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
221 			 struct kvm_pgtable_mm_ops *mm_ops);
222 
223 /**
224  * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table.
225  * @pgt:	Page-table structure initialised by kvm_pgtable_hyp_init().
226  *
227  * The page-table is assumed to be unreachable by any hardware walkers prior
228  * to freeing and therefore no TLB invalidation is performed.
229  */
230 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
231 
232 /**
233  * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table.
234  * @pgt:	Page-table structure initialised by kvm_pgtable_hyp_init().
235  * @addr:	Virtual address at which to place the mapping.
236  * @size:	Size of the mapping.
237  * @phys:	Physical address of the memory to map.
238  * @prot:	Permissions and attributes for the mapping.
239  *
240  * The offset of @addr within a page is ignored, @size is rounded-up to
241  * the next page boundary and @phys is rounded-down to the previous page
242  * boundary.
243  *
244  * If device attributes are not explicitly requested in @prot, then the
245  * mapping will be normal, cacheable. Attempts to install a new mapping
246  * for a virtual address that is already mapped will be rejected with an
247  * error and a WARN().
248  *
249  * Return: 0 on success, negative error code on failure.
250  */
251 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
252 			enum kvm_pgtable_prot prot);
253 
254 /**
255  * kvm_get_vtcr() - Helper to construct VTCR_EL2
256  * @mmfr0:	Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
257  * @mmfr1:	Sanitized value of SYS_ID_AA64MMFR1_EL1 register.
258  * @phys_shfit:	Value to set in VTCR_EL2.T0SZ.
259  *
260  * The VTCR value is common across all the physical CPUs on the system.
261  * We use system wide sanitised values to fill in different fields,
262  * except for Hardware Management of Access Flags. HA Flag is set
263  * unconditionally on all CPUs, as it is safe to run with or without
264  * the feature and the bit is RES0 on CPUs that don't support it.
265  *
266  * Return: VTCR_EL2 value
267  */
268 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
269 
270 /**
271  * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
272  * @pgt:	Uninitialised page-table structure to initialise.
273  * @arch:	Arch-specific KVM structure representing the guest virtual
274  *		machine.
275  * @mm_ops:	Memory management callbacks.
276  * @flags:	Stage-2 configuration flags.
277  * @force_pte_cb: Function that returns true if page level mappings must
278  *		be used instead of block mappings.
279  *
280  * Return: 0 on success, negative error code on failure.
281  */
282 int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
283 			      struct kvm_pgtable_mm_ops *mm_ops,
284 			      enum kvm_pgtable_stage2_flags flags,
285 			      kvm_pgtable_force_pte_cb_t force_pte_cb);
286 
287 #define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
288 	__kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL)
289 
290 /**
291  * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
292  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
293  *
294  * The page-table is assumed to be unreachable by any hardware walkers prior
295  * to freeing and therefore no TLB invalidation is performed.
296  */
297 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
298 
299 /**
300  * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
301  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
302  * @addr:	Intermediate physical address at which to place the mapping.
303  * @size:	Size of the mapping.
304  * @phys:	Physical address of the memory to map.
305  * @prot:	Permissions and attributes for the mapping.
306  * @mc:		Cache of pre-allocated and zeroed memory from which to allocate
307  *		page-table pages.
308  *
309  * The offset of @addr within a page is ignored, @size is rounded-up to
310  * the next page boundary and @phys is rounded-down to the previous page
311  * boundary.
312  *
313  * If device attributes are not explicitly requested in @prot, then the
314  * mapping will be normal, cacheable.
315  *
316  * Note that the update of a valid leaf PTE in this function will be aborted,
317  * if it's trying to recreate the exact same mapping or only change the access
318  * permissions. Instead, the vCPU will exit one more time from guest if still
319  * needed and then go through the path of relaxing permissions.
320  *
321  * Note that this function will both coalesce existing table entries and split
322  * existing block mappings, relying on page-faults to fault back areas outside
323  * of the new mapping lazily.
324  *
325  * Return: 0 on success, negative error code on failure.
326  */
327 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
328 			   u64 phys, enum kvm_pgtable_prot prot,
329 			   void *mc);
330 
331 /**
332  * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
333  *				    track ownership.
334  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
335  * @addr:	Base intermediate physical address to annotate.
336  * @size:	Size of the annotated range.
337  * @mc:		Cache of pre-allocated and zeroed memory from which to allocate
338  *		page-table pages.
339  * @owner_id:	Unique identifier for the owner of the page.
340  *
341  * By default, all page-tables are owned by identifier 0. This function can be
342  * used to mark portions of the IPA space as owned by other entities. When a
343  * stage 2 is used with identity-mappings, these annotations allow to use the
344  * page-table data structure as a simple rmap.
345  *
346  * Return: 0 on success, negative error code on failure.
347  */
348 int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
349 				 void *mc, u8 owner_id);
350 
351 /**
352  * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
353  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
354  * @addr:	Intermediate physical address from which to remove the mapping.
355  * @size:	Size of the mapping.
356  *
357  * The offset of @addr within a page is ignored and @size is rounded-up to
358  * the next page boundary.
359  *
360  * TLB invalidation is performed for each page-table entry cleared during the
361  * unmapping operation and the reference count for the page-table page
362  * containing the cleared entry is decremented, with unreferenced pages being
363  * freed. Unmapping a cacheable page will ensure that it is clean to the PoC if
364  * FWB is not supported by the CPU.
365  *
366  * Return: 0 on success, negative error code on failure.
367  */
368 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
369 
370 /**
371  * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range
372  *                                  without TLB invalidation.
373  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
374  * @addr:	Intermediate physical address from which to write-protect,
375  * @size:	Size of the range.
376  *
377  * The offset of @addr within a page is ignored and @size is rounded-up to
378  * the next page boundary.
379  *
380  * Note that it is the caller's responsibility to invalidate the TLB after
381  * calling this function to ensure that the updated permissions are visible
382  * to the CPUs.
383  *
384  * Return: 0 on success, negative error code on failure.
385  */
386 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
387 
388 /**
389  * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
390  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
391  * @addr:	Intermediate physical address to identify the page-table entry.
392  *
393  * The offset of @addr within a page is ignored.
394  *
395  * If there is a valid, leaf page-table entry used to translate @addr, then
396  * set the access flag in that entry.
397  *
398  * Return: The old page-table entry prior to setting the flag, 0 on failure.
399  */
400 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
401 
402 /**
403  * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry.
404  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
405  * @addr:	Intermediate physical address to identify the page-table entry.
406  *
407  * The offset of @addr within a page is ignored.
408  *
409  * If there is a valid, leaf page-table entry used to translate @addr, then
410  * clear the access flag in that entry.
411  *
412  * Note that it is the caller's responsibility to invalidate the TLB after
413  * calling this function to ensure that the updated permissions are visible
414  * to the CPUs.
415  *
416  * Return: The old page-table entry prior to clearing the flag, 0 on failure.
417  */
418 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
419 
420 /**
421  * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
422  *				      page-table entry.
423  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
424  * @addr:	Intermediate physical address to identify the page-table entry.
425  * @prot:	Additional permissions to grant for the mapping.
426  *
427  * The offset of @addr within a page is ignored.
428  *
429  * If there is a valid, leaf page-table entry used to translate @addr, then
430  * relax the permissions in that entry according to the read, write and
431  * execute permissions specified by @prot. No permissions are removed, and
432  * TLB invalidation is performed after updating the entry. Software bits cannot
433  * be set or cleared using kvm_pgtable_stage2_relax_perms().
434  *
435  * Return: 0 on success, negative error code on failure.
436  */
437 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
438 				   enum kvm_pgtable_prot prot);
439 
440 /**
441  * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the
442  *				   access flag set.
443  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
444  * @addr:	Intermediate physical address to identify the page-table entry.
445  *
446  * The offset of @addr within a page is ignored.
447  *
448  * Return: True if the page-table entry has the access flag set, false otherwise.
449  */
450 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
451 
452 /**
453  * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
454  * 				      of Coherency for guest stage-2 address
455  *				      range.
456  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
457  * @addr:	Intermediate physical address from which to flush.
458  * @size:	Size of the range.
459  *
460  * The offset of @addr within a page is ignored and @size is rounded-up to
461  * the next page boundary.
462  *
463  * Return: 0 on success, negative error code on failure.
464  */
465 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
466 
467 /**
468  * kvm_pgtable_walk() - Walk a page-table.
469  * @pgt:	Page-table structure initialised by kvm_pgtable_*_init().
470  * @addr:	Input address for the start of the walk.
471  * @size:	Size of the range to walk.
472  * @walker:	Walker callback description.
473  *
474  * The offset of @addr within a page is ignored and @size is rounded-up to
475  * the next page boundary.
476  *
477  * The walker will walk the page-table entries corresponding to the input
478  * address range specified, visiting entries according to the walker flags.
479  * Invalid entries are treated as leaf entries. Leaf entries are reloaded
480  * after invoking the walker callback, allowing the walker to descend into
481  * a newly installed table.
482  *
483  * Returning a negative error code from the walker callback function will
484  * terminate the walk immediately with the same error code.
485  *
486  * Return: 0 on success, negative error code on failure.
487  */
488 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
489 		     struct kvm_pgtable_walker *walker);
490 
491 /**
492  * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
493  *			    with its level.
494  * @pgt:	Page-table structure initialised by kvm_pgtable_*_init()
495  *		or a similar initialiser.
496  * @addr:	Input address for the start of the walk.
497  * @ptep:	Pointer to storage for the retrieved PTE.
498  * @level:	Pointer to storage for the level of the retrieved PTE.
499  *
500  * The offset of @addr within a page is ignored.
501  *
502  * The walker will walk the page-table entries corresponding to the input
503  * address specified, retrieving the leaf corresponding to this address.
504  * Invalid entries are treated as leaf entries.
505  *
506  * Return: 0 on success, negative error code on failure.
507  */
508 int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
509 			 kvm_pte_t *ptep, u32 *level);
510 
511 /**
512  * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
513  *				   stage-2 Page-Table Entry.
514  * @pte:	Page-table entry
515  *
516  * Return: protection attributes of the page-table entry in the enum
517  *	   kvm_pgtable_prot format.
518  */
519 enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
520 
521 /**
522  * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
523  *				Page-Table Entry.
524  * @pte:	Page-table entry
525  *
526  * Return: protection attributes of the page-table entry in the enum
527  *	   kvm_pgtable_prot format.
528  */
529 enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
530 #endif	/* __ARM64_KVM_PGTABLE_H__ */
531