xref: /linux/arch/s390/boot/vmem.c (revision d0d106a2bd21499901299160744e5fe9f4c83ddb)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/sched/task.h>
3 #include <linux/pgtable.h>
4 #include <linux/kasan.h>
5 #include <asm/page-states.h>
6 #include <asm/pgalloc.h>
7 #include <asm/facility.h>
8 #include <asm/sections.h>
9 #include <asm/ctlreg.h>
10 #include <asm/physmem_info.h>
11 #include <asm/maccess.h>
12 #include <asm/abs_lowcore.h>
13 #include "decompressor.h"
14 #include "boot.h"
15 
16 struct ctlreg __bootdata_preserved(s390_invalid_asce);
17 
18 #ifdef CONFIG_PROC_FS
19 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
20 #endif
21 
22 #define init_mm			(*(struct mm_struct *)vmlinux.init_mm_off)
23 #define swapper_pg_dir		vmlinux.swapper_pg_dir_off
24 #define invalid_pg_dir		vmlinux.invalid_pg_dir_off
25 
26 enum populate_mode {
27 	POPULATE_NONE,
28 	POPULATE_DIRECT,
29 	POPULATE_LOWCORE,
30 	POPULATE_ABS_LOWCORE,
31 	POPULATE_IDENTITY,
32 	POPULATE_KERNEL,
33 #ifdef CONFIG_KASAN
34 	POPULATE_KASAN_MAP_SHADOW,
35 	POPULATE_KASAN_ZERO_SHADOW,
36 	POPULATE_KASAN_SHALLOW
37 #endif
38 };
39 
40 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode);
41 
42 #ifdef CONFIG_KASAN
43 
44 #define kasan_early_shadow_page	vmlinux.kasan_early_shadow_page_off
45 #define kasan_early_shadow_pte	((pte_t *)vmlinux.kasan_early_shadow_pte_off)
46 #define kasan_early_shadow_pmd	((pmd_t *)vmlinux.kasan_early_shadow_pmd_off)
47 #define kasan_early_shadow_pud	((pud_t *)vmlinux.kasan_early_shadow_pud_off)
48 #define kasan_early_shadow_p4d	((p4d_t *)vmlinux.kasan_early_shadow_p4d_off)
49 #define __sha(x)		((unsigned long)kasan_mem_to_shadow((void *)x))
50 
51 static pte_t pte_z;
52 
53 static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
54 {
55 	start = PAGE_ALIGN_DOWN(__sha(start));
56 	end = PAGE_ALIGN(__sha(end));
57 	pgtable_populate(start, end, mode);
58 }
59 
60 static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
61 {
62 	pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
63 	pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
64 	p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
65 	unsigned long memgap_start = 0;
66 	unsigned long start, end;
67 	int i;
68 
69 	pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
70 	crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
71 	crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
72 	crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
73 	memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
74 	__arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER);
75 	__arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER);
76 	__arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
77 	__arch_set_page_dat(kasan_early_shadow_pte, 1);
78 
79 	for_each_physmem_usable_range(i, &start, &end) {
80 		kasan_populate((unsigned long)__identity_va(start),
81 			       (unsigned long)__identity_va(end),
82 			       POPULATE_KASAN_MAP_SHADOW);
83 		if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) {
84 			kasan_populate((unsigned long)__identity_va(memgap_start),
85 				       (unsigned long)__identity_va(start),
86 				       POPULATE_KASAN_ZERO_SHADOW);
87 		}
88 		memgap_start = end;
89 	}
90 	kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW);
91 	kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
92 	kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
93 	/* shallowly populate kasan shadow for vmalloc and modules */
94 	kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
95 	/* populate kasan shadow for untracked memory */
96 	kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START,
97 		       POPULATE_KASAN_ZERO_SHADOW);
98 	kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
99 }
100 
101 static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
102 					   unsigned long end, enum populate_mode mode)
103 {
104 	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
105 	    IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
106 		pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d);
107 		return true;
108 	}
109 	return false;
110 }
111 
112 static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr,
113 					   unsigned long end, enum populate_mode mode)
114 {
115 	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
116 	    IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) {
117 		p4d_populate(&init_mm, p4d, kasan_early_shadow_pud);
118 		return true;
119 	}
120 	return false;
121 }
122 
123 static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr,
124 					   unsigned long end, enum populate_mode mode)
125 {
126 	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
127 	    IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) {
128 		pud_populate(&init_mm, pud, kasan_early_shadow_pmd);
129 		return true;
130 	}
131 	return false;
132 }
133 
134 static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr,
135 					   unsigned long end, enum populate_mode mode)
136 {
137 	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
138 	    IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
139 		pmd_populate(&init_mm, pmd, kasan_early_shadow_pte);
140 		return true;
141 	}
142 	return false;
143 }
144 
145 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
146 {
147 	if (mode == POPULATE_KASAN_ZERO_SHADOW) {
148 		set_pte(pte, pte_z);
149 		return true;
150 	}
151 	return false;
152 }
153 #else
154 
155 static inline void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
156 {
157 }
158 
159 static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
160 						  unsigned long end, enum populate_mode mode)
161 {
162 	return false;
163 }
164 
165 static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr,
166 						  unsigned long end, enum populate_mode mode)
167 {
168 	return false;
169 }
170 
171 static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr,
172 						  unsigned long end, enum populate_mode mode)
173 {
174 	return false;
175 }
176 
177 static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr,
178 						  unsigned long end, enum populate_mode mode)
179 {
180 	return false;
181 }
182 
183 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
184 {
185 	return false;
186 }
187 
188 #endif
189 
190 /*
191  * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though.
192  */
193 static inline pte_t *__virt_to_kpte(unsigned long va)
194 {
195 	return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
196 }
197 
198 static void *boot_crst_alloc(unsigned long val)
199 {
200 	unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
201 	unsigned long *table;
202 
203 	table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
204 	crst_table_init(table, val);
205 	__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
206 	return table;
207 }
208 
209 static pte_t *boot_pte_alloc(void)
210 {
211 	static void *pte_leftover;
212 	pte_t *pte;
213 
214 	/*
215 	 * handling pte_leftovers this way helps to avoid memory fragmentation
216 	 * during POPULATE_KASAN_MAP_SHADOW when EDAT is off
217 	 */
218 	if (!pte_leftover) {
219 		pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
220 		pte = pte_leftover + _PAGE_TABLE_SIZE;
221 		__arch_set_page_dat(pte, 1);
222 	} else {
223 		pte = pte_leftover;
224 		pte_leftover = NULL;
225 	}
226 
227 	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
228 	return pte;
229 }
230 
231 static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode)
232 {
233 	switch (mode) {
234 	case POPULATE_NONE:
235 		return -1;
236 	case POPULATE_DIRECT:
237 		return addr;
238 	case POPULATE_LOWCORE:
239 		return __lowcore_pa(addr);
240 	case POPULATE_ABS_LOWCORE:
241 		return __abs_lowcore_pa(addr);
242 	case POPULATE_KERNEL:
243 		return __kernel_pa(addr);
244 	case POPULATE_IDENTITY:
245 		return __identity_pa(addr);
246 #ifdef CONFIG_KASAN
247 	case POPULATE_KASAN_MAP_SHADOW:
248 		addr = physmem_alloc_top_down(RR_VMEM, size, size);
249 		memset((void *)addr, 0, size);
250 		return addr;
251 #endif
252 	default:
253 		return -1;
254 	}
255 }
256 
257 static bool large_allowed(enum populate_mode mode)
258 {
259 	return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL);
260 }
261 
262 static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end,
263 			  enum populate_mode mode)
264 {
265 	unsigned long size = end - addr;
266 
267 	return machine.has_edat2 && large_allowed(mode) &&
268 	       IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) &&
269 	       IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE);
270 }
271 
272 static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end,
273 			  enum populate_mode mode)
274 {
275 	unsigned long size = end - addr;
276 
277 	return machine.has_edat1 && large_allowed(mode) &&
278 	       IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) &&
279 	       IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE);
280 }
281 
282 static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
283 				 enum populate_mode mode)
284 {
285 	unsigned long pages = 0;
286 	pte_t *pte, entry;
287 
288 	pte = pte_offset_kernel(pmd, addr);
289 	for (; addr < end; addr += PAGE_SIZE, pte++) {
290 		if (pte_none(*pte)) {
291 			if (kasan_pte_populate_zero_shadow(pte, mode))
292 				continue;
293 			entry = __pte(_pa(addr, PAGE_SIZE, mode));
294 			entry = set_pte_bit(entry, PAGE_KERNEL);
295 			set_pte(pte, entry);
296 			pages++;
297 		}
298 	}
299 	if (mode == POPULATE_IDENTITY)
300 		update_page_count(PG_DIRECT_MAP_4K, pages);
301 }
302 
303 static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
304 				 enum populate_mode mode)
305 {
306 	unsigned long next, pages = 0;
307 	pmd_t *pmd, entry;
308 	pte_t *pte;
309 
310 	pmd = pmd_offset(pud, addr);
311 	for (; addr < end; addr = next, pmd++) {
312 		next = pmd_addr_end(addr, end);
313 		if (pmd_none(*pmd)) {
314 			if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode))
315 				continue;
316 			if (can_large_pmd(pmd, addr, next, mode)) {
317 				entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
318 				entry = set_pmd_bit(entry, SEGMENT_KERNEL);
319 				set_pmd(pmd, entry);
320 				pages++;
321 				continue;
322 			}
323 			pte = boot_pte_alloc();
324 			pmd_populate(&init_mm, pmd, pte);
325 		} else if (pmd_leaf(*pmd)) {
326 			continue;
327 		}
328 		pgtable_pte_populate(pmd, addr, next, mode);
329 	}
330 	if (mode == POPULATE_IDENTITY)
331 		update_page_count(PG_DIRECT_MAP_1M, pages);
332 }
333 
334 static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
335 				 enum populate_mode mode)
336 {
337 	unsigned long next, pages = 0;
338 	pud_t *pud, entry;
339 	pmd_t *pmd;
340 
341 	pud = pud_offset(p4d, addr);
342 	for (; addr < end; addr = next, pud++) {
343 		next = pud_addr_end(addr, end);
344 		if (pud_none(*pud)) {
345 			if (kasan_pud_populate_zero_shadow(pud, addr, next, mode))
346 				continue;
347 			if (can_large_pud(pud, addr, next, mode)) {
348 				entry = __pud(_pa(addr, _REGION3_SIZE, mode));
349 				entry = set_pud_bit(entry, REGION3_KERNEL);
350 				set_pud(pud, entry);
351 				pages++;
352 				continue;
353 			}
354 			pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
355 			pud_populate(&init_mm, pud, pmd);
356 		} else if (pud_leaf(*pud)) {
357 			continue;
358 		}
359 		pgtable_pmd_populate(pud, addr, next, mode);
360 	}
361 	if (mode == POPULATE_IDENTITY)
362 		update_page_count(PG_DIRECT_MAP_2G, pages);
363 }
364 
365 static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end,
366 				 enum populate_mode mode)
367 {
368 	unsigned long next;
369 	p4d_t *p4d;
370 	pud_t *pud;
371 
372 	p4d = p4d_offset(pgd, addr);
373 	for (; addr < end; addr = next, p4d++) {
374 		next = p4d_addr_end(addr, end);
375 		if (p4d_none(*p4d)) {
376 			if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode))
377 				continue;
378 			pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY);
379 			p4d_populate(&init_mm, p4d, pud);
380 		}
381 		pgtable_pud_populate(p4d, addr, next, mode);
382 	}
383 }
384 
385 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode)
386 {
387 	unsigned long next;
388 	pgd_t *pgd;
389 	p4d_t *p4d;
390 
391 	pgd = pgd_offset(&init_mm, addr);
392 	for (; addr < end; addr = next, pgd++) {
393 		next = pgd_addr_end(addr, end);
394 		if (pgd_none(*pgd)) {
395 			if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode))
396 				continue;
397 			p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY);
398 			pgd_populate(&init_mm, pgd, p4d);
399 		}
400 #ifdef CONFIG_KASAN
401 		if (mode == POPULATE_KASAN_SHALLOW)
402 			continue;
403 #endif
404 		pgtable_p4d_populate(pgd, addr, next, mode);
405 	}
406 }
407 
408 void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit)
409 {
410 	unsigned long lowcore_address = 0;
411 	unsigned long start, end;
412 	unsigned long asce_type;
413 	unsigned long asce_bits;
414 	pgd_t *init_mm_pgd;
415 	int i;
416 
417 	/*
418 	 * Mark whole memory as no-dat. This must be done before any
419 	 * page tables are allocated, or kernel image builtin pages
420 	 * are marked as dat tables.
421 	 */
422 	for_each_physmem_online_range(i, &start, &end)
423 		__arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);
424 
425 	/*
426 	 * init_mm->pgd contains virtual address of swapper_pg_dir.
427 	 * It is unusable at this stage since DAT is yet off. Swap
428 	 * it for physical address of swapper_pg_dir and restore
429 	 * the virtual address after all page tables are created.
430 	 */
431 	init_mm_pgd = init_mm.pgd;
432 	init_mm.pgd = (pgd_t *)swapper_pg_dir;
433 
434 	if (asce_limit == _REGION1_SIZE) {
435 		asce_type = _REGION2_ENTRY_EMPTY;
436 		asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
437 	} else {
438 		asce_type = _REGION3_ENTRY_EMPTY;
439 		asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
440 	}
441 	s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
442 
443 	crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
444 	crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
445 	__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
446 	__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
447 
448 	if (relocate_lowcore)
449 		lowcore_address = LOWCORE_ALT_ADDRESS;
450 
451 	/*
452 	 * To allow prefixing the lowcore must be mapped with 4KB pages.
453 	 * To prevent creation of a large page at address 0 first map
454 	 * the lowcore and create the identity mapping only afterwards.
455 	 */
456 	pgtable_populate(lowcore_address,
457 			 lowcore_address + sizeof(struct lowcore),
458 			 POPULATE_LOWCORE);
459 	for_each_physmem_usable_range(i, &start, &end) {
460 		pgtable_populate((unsigned long)__identity_va(start),
461 				 (unsigned long)__identity_va(end),
462 				 POPULATE_IDENTITY);
463 	}
464 
465 	/*
466 	 * [kernel_start..kernel_start + TEXT_OFFSET] region is never
467 	 * accessed as per the linker script:
468 	 *
469 	 *	. = TEXT_OFFSET;
470 	 *
471 	 * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to
472 	 * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region.
473 	 */
474 	pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL);
475 	pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT);
476 	pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
477 			 POPULATE_ABS_LOWCORE);
478 	pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
479 			 POPULATE_NONE);
480 	memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area));
481 
482 	kasan_populate_shadow(kernel_start, kernel_end);
483 
484 	get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits;
485 	get_lowcore()->user_asce = s390_invalid_asce;
486 
487 	local_ctl_load(1, &get_lowcore()->kernel_asce);
488 	local_ctl_load(7, &get_lowcore()->user_asce);
489 	local_ctl_load(13, &get_lowcore()->kernel_asce);
490 
491 	init_mm.context.asce = get_lowcore()->kernel_asce.val;
492 	init_mm.pgd = init_mm_pgd;
493 }
494