xref: /linux/arch/arm64/mm/pageattr.c (revision 53597deca0e38c30e6cd4ba2114fa42d2bcd85bb)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2014, The Linux Foundation. All rights reserved.
4  */
5 #include <linux/kernel.h>
6 #include <linux/mm.h>
7 #include <linux/module.h>
8 #include <linux/mem_encrypt.h>
9 #include <linux/sched.h>
10 #include <linux/vmalloc.h>
11 #include <linux/pagewalk.h>
12 
13 #include <asm/cacheflush.h>
14 #include <asm/pgtable-prot.h>
15 #include <asm/set_memory.h>
16 #include <asm/tlbflush.h>
17 #include <asm/kfence.h>
18 
19 struct page_change_data {
20 	pgprot_t set_mask;
21 	pgprot_t clear_mask;
22 };
23 
24 static ptdesc_t set_pageattr_masks(ptdesc_t val, struct mm_walk *walk)
25 {
26 	struct page_change_data *masks = walk->private;
27 
28 	/*
29 	 * Some users clear and set bits which alias each other (e.g. PTE_NG and
30 	 * PTE_PRESENT_INVALID). It is therefore important that we always clear
31 	 * first then set.
32 	 */
33 	val &= ~(pgprot_val(masks->clear_mask));
34 	val |= (pgprot_val(masks->set_mask));
35 
36 	return val;
37 }
38 
39 static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
40 			      unsigned long next, struct mm_walk *walk)
41 {
42 	pud_t val = pudp_get(pud);
43 
44 	if (pud_leaf(val)) {
45 		if (WARN_ON_ONCE((next - addr) != PUD_SIZE))
46 			return -EINVAL;
47 		val = __pud(set_pageattr_masks(pud_val(val), walk));
48 		set_pud(pud, val);
49 		walk->action = ACTION_CONTINUE;
50 	}
51 
52 	return 0;
53 }
54 
55 static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
56 			      unsigned long next, struct mm_walk *walk)
57 {
58 	pmd_t val = pmdp_get(pmd);
59 
60 	if (pmd_leaf(val)) {
61 		if (WARN_ON_ONCE((next - addr) != PMD_SIZE))
62 			return -EINVAL;
63 		val = __pmd(set_pageattr_masks(pmd_val(val), walk));
64 		set_pmd(pmd, val);
65 		walk->action = ACTION_CONTINUE;
66 	}
67 
68 	return 0;
69 }
70 
71 static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
72 			      unsigned long next, struct mm_walk *walk)
73 {
74 	pte_t val = __ptep_get(pte);
75 
76 	val = __pte(set_pageattr_masks(pte_val(val), walk));
77 	__set_pte(pte, val);
78 
79 	return 0;
80 }
81 
82 static const struct mm_walk_ops pageattr_ops = {
83 	.pud_entry	= pageattr_pud_entry,
84 	.pmd_entry	= pageattr_pmd_entry,
85 	.pte_entry	= pageattr_pte_entry,
86 };
87 
88 bool rodata_full __ro_after_init = true;
89 
90 bool can_set_direct_map(void)
91 {
92 	/*
93 	 * rodata_full, DEBUG_PAGEALLOC and a Realm guest all require linear
94 	 * map to be mapped at page granularity, so that it is possible to
95 	 * protect/unprotect single pages.
96 	 *
97 	 * KFENCE pool requires page-granular mapping if initialized late.
98 	 *
99 	 * Realms need to make pages shared/protected at page granularity.
100 	 */
101 	return rodata_full || debug_pagealloc_enabled() ||
102 		arm64_kfence_can_set_direct_map() || is_realm_world();
103 }
104 
105 static int update_range_prot(unsigned long start, unsigned long size,
106 			     pgprot_t set_mask, pgprot_t clear_mask)
107 {
108 	struct page_change_data data;
109 	int ret;
110 
111 	data.set_mask = set_mask;
112 	data.clear_mask = clear_mask;
113 
114 	ret = split_kernel_leaf_mapping(start, start + size);
115 	if (WARN_ON_ONCE(ret))
116 		return ret;
117 
118 	lazy_mmu_mode_enable();
119 
120 	/*
121 	 * The caller must ensure that the range we are operating on does not
122 	 * partially overlap a block mapping, or a cont mapping. Any such case
123 	 * must be eliminated by splitting the mapping.
124 	 */
125 	ret = walk_kernel_page_table_range_lockless(start, start + size,
126 						    &pageattr_ops, NULL, &data);
127 	lazy_mmu_mode_disable();
128 
129 	return ret;
130 }
131 
132 static int __change_memory_common(unsigned long start, unsigned long size,
133 				  pgprot_t set_mask, pgprot_t clear_mask)
134 {
135 	int ret;
136 
137 	ret = update_range_prot(start, size, set_mask, clear_mask);
138 
139 	/*
140 	 * If the memory is being switched from present-invalid to valid without
141 	 * changing any other bits then a TLBI isn't required as a non-valid
142 	 * entry cannot be cached in the TLB.
143 	 */
144 	if (pgprot_val(set_mask) != PTE_PRESENT_VALID_KERNEL ||
145 	    pgprot_val(clear_mask) != PTE_PRESENT_INVALID)
146 		flush_tlb_kernel_range(start, start + size);
147 	return ret;
148 }
149 
150 static int change_memory_common(unsigned long addr, int numpages,
151 				pgprot_t set_mask, pgprot_t clear_mask)
152 {
153 	unsigned long start = addr;
154 	unsigned long size = PAGE_SIZE * numpages;
155 	unsigned long end = start + size;
156 	struct vm_struct *area;
157 	int ret;
158 
159 	if (!PAGE_ALIGNED(addr)) {
160 		start &= PAGE_MASK;
161 		end = start + size;
162 		WARN_ON_ONCE(1);
163 	}
164 
165 	/*
166 	 * Kernel VA mappings are always live, and splitting live section
167 	 * mappings into page mappings may cause TLB conflicts. This means
168 	 * we have to ensure that changing the permission bits of the range
169 	 * we are operating on does not result in such splitting.
170 	 *
171 	 * Let's restrict ourselves to mappings created by vmalloc (or vmap).
172 	 * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page
173 	 * mappings are updated and splitting is never needed.
174 	 *
175 	 * So check whether the [addr, addr + size) interval is entirely
176 	 * covered by precisely one VM area that has the VM_ALLOC flag set.
177 	 */
178 	area = find_vm_area((void *)addr);
179 	if (!area ||
180 	    ((unsigned long)kasan_reset_tag((void *)end) >
181 	     (unsigned long)kasan_reset_tag(area->addr) + area->size) ||
182 	    ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC))
183 		return -EINVAL;
184 
185 	if (!numpages)
186 		return 0;
187 
188 	/*
189 	 * If we are manipulating read-only permissions, apply the same
190 	 * change to the linear mapping of the pages that back this VM area.
191 	 */
192 	if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
193 			    pgprot_val(clear_mask) == PTE_RDONLY)) {
194 		unsigned long idx = ((unsigned long)kasan_reset_tag((void *)start) -
195 				     (unsigned long)kasan_reset_tag(area->addr))
196 				    >> PAGE_SHIFT;
197 		for (; numpages; idx++, numpages--) {
198 			ret = __change_memory_common((u64)page_address(area->pages[idx]),
199 						     PAGE_SIZE, set_mask, clear_mask);
200 			if (ret)
201 				return ret;
202 		}
203 	}
204 
205 	/*
206 	 * Get rid of potentially aliasing lazily unmapped vm areas that may
207 	 * have permissions set that deviate from the ones we are setting here.
208 	 */
209 	vm_unmap_aliases();
210 
211 	return __change_memory_common(start, size, set_mask, clear_mask);
212 }
213 
214 int set_memory_ro(unsigned long addr, int numpages)
215 {
216 	return change_memory_common(addr, numpages,
217 					__pgprot(PTE_RDONLY),
218 					__pgprot(PTE_WRITE));
219 }
220 
221 int set_memory_rw(unsigned long addr, int numpages)
222 {
223 	return change_memory_common(addr, numpages,
224 					__pgprot(PTE_WRITE),
225 					__pgprot(PTE_RDONLY));
226 }
227 
228 int set_memory_nx(unsigned long addr, int numpages)
229 {
230 	return change_memory_common(addr, numpages,
231 					__pgprot(PTE_PXN),
232 					__pgprot(PTE_MAYBE_GP));
233 }
234 
235 int set_memory_x(unsigned long addr, int numpages)
236 {
237 	return change_memory_common(addr, numpages,
238 					__pgprot(PTE_MAYBE_GP),
239 					__pgprot(PTE_PXN));
240 }
241 
242 int set_memory_valid(unsigned long addr, int numpages, int enable)
243 {
244 	if (enable)
245 		return __change_memory_common(addr, PAGE_SIZE * numpages,
246 					__pgprot(PTE_PRESENT_VALID_KERNEL),
247 					__pgprot(PTE_PRESENT_INVALID));
248 	else
249 		return __change_memory_common(addr, PAGE_SIZE * numpages,
250 					__pgprot(PTE_PRESENT_INVALID),
251 					__pgprot(PTE_PRESENT_VALID_KERNEL));
252 }
253 
254 int set_direct_map_invalid_noflush(struct page *page)
255 {
256 	pgprot_t clear_mask = __pgprot(PTE_PRESENT_VALID_KERNEL);
257 	pgprot_t set_mask = __pgprot(PTE_PRESENT_INVALID);
258 
259 	if (!can_set_direct_map())
260 		return 0;
261 
262 	return update_range_prot((unsigned long)page_address(page),
263 				 PAGE_SIZE, set_mask, clear_mask);
264 }
265 
266 int set_direct_map_default_noflush(struct page *page)
267 {
268 	pgprot_t set_mask = __pgprot(PTE_PRESENT_VALID_KERNEL | PTE_WRITE);
269 	pgprot_t clear_mask = __pgprot(PTE_PRESENT_INVALID | PTE_RDONLY);
270 
271 	if (!can_set_direct_map())
272 		return 0;
273 
274 	return update_range_prot((unsigned long)page_address(page),
275 				 PAGE_SIZE, set_mask, clear_mask);
276 }
277 
278 static int __set_memory_enc_dec(unsigned long addr,
279 				int numpages,
280 				bool encrypt)
281 {
282 	unsigned long set_prot = 0, clear_prot = 0;
283 	phys_addr_t start, end;
284 	int ret;
285 
286 	if (!is_realm_world())
287 		return 0;
288 
289 	if (!__is_lm_address(addr))
290 		return -EINVAL;
291 
292 	start = __virt_to_phys(addr);
293 	end = start + numpages * PAGE_SIZE;
294 
295 	if (encrypt)
296 		clear_prot = PROT_NS_SHARED;
297 	else
298 		set_prot = PROT_NS_SHARED;
299 
300 	/*
301 	 * Break the mapping before we make any changes to avoid stale TLB
302 	 * entries or Synchronous External Aborts caused by RIPAS_EMPTY
303 	 */
304 	ret = __change_memory_common(addr, PAGE_SIZE * numpages,
305 				     __pgprot(set_prot | PTE_PRESENT_INVALID),
306 				     __pgprot(clear_prot | PTE_PRESENT_VALID_KERNEL));
307 
308 	if (ret)
309 		return ret;
310 
311 	if (encrypt)
312 		ret = rsi_set_memory_range_protected(start, end);
313 	else
314 		ret = rsi_set_memory_range_shared(start, end);
315 
316 	if (ret)
317 		return ret;
318 
319 	return __change_memory_common(addr, PAGE_SIZE * numpages,
320 				      __pgprot(PTE_PRESENT_VALID_KERNEL),
321 				      __pgprot(PTE_PRESENT_INVALID));
322 }
323 
324 static int realm_set_memory_encrypted(unsigned long addr, int numpages)
325 {
326 	int ret = __set_memory_enc_dec(addr, numpages, true);
327 
328 	/*
329 	 * If the request to change state fails, then the only sensible cause
330 	 * of action for the caller is to leak the memory
331 	 */
332 	WARN(ret, "Failed to encrypt memory, %d pages will be leaked",
333 	     numpages);
334 
335 	return ret;
336 }
337 
338 static int realm_set_memory_decrypted(unsigned long addr, int numpages)
339 {
340 	int ret = __set_memory_enc_dec(addr, numpages, false);
341 
342 	WARN(ret, "Failed to decrypt memory, %d pages will be leaked",
343 	     numpages);
344 
345 	return ret;
346 }
347 
348 static const struct arm64_mem_crypt_ops realm_crypt_ops = {
349 	.encrypt = realm_set_memory_encrypted,
350 	.decrypt = realm_set_memory_decrypted,
351 };
352 
353 int realm_register_memory_enc_ops(void)
354 {
355 	return arm64_mem_crypt_ops_register(&realm_crypt_ops);
356 }
357 
358 int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
359 {
360 	unsigned long addr = (unsigned long)page_address(page);
361 
362 	if (!can_set_direct_map())
363 		return 0;
364 
365 	return set_memory_valid(addr, nr, valid);
366 }
367 
368 #ifdef CONFIG_DEBUG_PAGEALLOC
369 /*
370  * This is - apart from the return value - doing the same
371  * thing as the new set_direct_map_valid_noflush() function.
372  *
373  * Unify? Explain the conceptual differences?
374  */
375 void __kernel_map_pages(struct page *page, int numpages, int enable)
376 {
377 	if (!can_set_direct_map())
378 		return;
379 
380 	set_memory_valid((unsigned long)page_address(page), numpages, enable);
381 }
382 #endif /* CONFIG_DEBUG_PAGEALLOC */
383 
384 /*
385  * This function is used to determine if a linear map page has been marked as
386  * not-valid. Walk the page table and check the PTE_VALID bit.
387  *
388  * Because this is only called on the kernel linear map,  p?d_sect() implies
389  * p?d_present(). When debug_pagealloc is enabled, sections mappings are
390  * disabled.
391  */
392 bool kernel_page_present(struct page *page)
393 {
394 	pgd_t *pgdp;
395 	p4d_t *p4dp;
396 	pud_t *pudp, pud;
397 	pmd_t *pmdp, pmd;
398 	pte_t *ptep;
399 	unsigned long addr = (unsigned long)page_address(page);
400 
401 	pgdp = pgd_offset_k(addr);
402 	if (pgd_none(READ_ONCE(*pgdp)))
403 		return false;
404 
405 	p4dp = p4d_offset(pgdp, addr);
406 	if (p4d_none(READ_ONCE(*p4dp)))
407 		return false;
408 
409 	pudp = pud_offset(p4dp, addr);
410 	pud = READ_ONCE(*pudp);
411 	if (pud_none(pud))
412 		return false;
413 	if (pud_leaf(pud))
414 		return pud_valid(pud);
415 
416 	pmdp = pmd_offset(pudp, addr);
417 	pmd = READ_ONCE(*pmdp);
418 	if (pmd_none(pmd))
419 		return false;
420 	if (pmd_leaf(pmd))
421 		return pmd_valid(pmd);
422 
423 	ptep = pte_offset_kernel(pmdp, addr);
424 	return pte_valid(__ptep_get(ptep));
425 }
426