1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2019 SiFive
4 */
5
6 #include <linux/efi.h>
7 #include <linux/init.h>
8 #include <linux/debugfs.h>
9 #include <linux/memory_hotplug.h>
10 #include <linux/seq_file.h>
11 #include <linux/ptdump.h>
12
13 #include <linux/pgtable.h>
14 #include <asm/kasan.h>
15
16 #define pt_dump_seq_printf(m, fmt, args...) \
17 ({ \
18 if (m) \
19 seq_printf(m, fmt, ##args); \
20 })
21
22 #define pt_dump_seq_puts(m, fmt) \
23 ({ \
24 if (m) \
25 seq_printf(m, fmt); \
26 })
27
28 /*
29 * The page dumper groups page table entries of the same type into a single
30 * description. It uses pg_state to track the range information while
31 * iterating over the pte entries. When the continuity is broken it then
32 * dumps out a description of the range.
33 */
34 struct pg_state {
35 struct ptdump_state ptdump;
36 struct seq_file *seq;
37 const struct addr_marker *marker;
38 unsigned long start_address;
39 unsigned long start_pa;
40 unsigned long last_pa;
41 int level;
42 u64 current_prot;
43 bool check_wx;
44 unsigned long wx_pages;
45 };
46
47 /* Address marker */
48 struct addr_marker {
49 unsigned long start_address;
50 const char *name;
51 };
52
53 /* Private information for debugfs */
54 struct ptd_mm_info {
55 struct mm_struct *mm;
56 const struct addr_marker *markers;
57 unsigned long base_addr;
58 unsigned long end;
59 };
60
61 enum address_markers_idx {
62 FIXMAP_START_NR,
63 FIXMAP_END_NR,
64 PCI_IO_START_NR,
65 PCI_IO_END_NR,
66 #ifdef CONFIG_SPARSEMEM_VMEMMAP
67 VMEMMAP_START_NR,
68 VMEMMAP_END_NR,
69 #endif
70 VMALLOC_START_NR,
71 VMALLOC_END_NR,
72 PAGE_OFFSET_NR,
73 #ifdef CONFIG_KASAN
74 KASAN_SHADOW_START_NR,
75 KASAN_SHADOW_END_NR,
76 #endif
77 #ifdef CONFIG_64BIT
78 MODULES_MAPPING_NR,
79 KERNEL_MAPPING_NR,
80 #endif
81 END_OF_SPACE_NR
82 };
83
84 static struct addr_marker address_markers[] = {
85 {0, "Fixmap start"},
86 {0, "Fixmap end"},
87 {0, "PCI I/O start"},
88 {0, "PCI I/O end"},
89 #ifdef CONFIG_SPARSEMEM_VMEMMAP
90 {0, "vmemmap start"},
91 {0, "vmemmap end"},
92 #endif
93 {0, "vmalloc() area"},
94 {0, "vmalloc() end"},
95 {0, "Linear mapping"},
96 #ifdef CONFIG_KASAN
97 {0, "Kasan shadow start"},
98 {0, "Kasan shadow end"},
99 #endif
100 #ifdef CONFIG_64BIT
101 {0, "Modules/BPF mapping"},
102 {0, "Kernel mapping"},
103 #endif
104 {-1, NULL},
105 };
106
107 static struct ptd_mm_info kernel_ptd_info = {
108 .mm = &init_mm,
109 .markers = address_markers,
110 .base_addr = 0,
111 .end = ULONG_MAX,
112 };
113
114 #ifdef CONFIG_EFI
115 static struct addr_marker efi_addr_markers[] = {
116 { 0, "UEFI runtime start" },
117 { SZ_1G, "UEFI runtime end" },
118 { -1, NULL }
119 };
120
121 static struct ptd_mm_info efi_ptd_info = {
122 .mm = &efi_mm,
123 .markers = efi_addr_markers,
124 .base_addr = 0,
125 .end = SZ_2G,
126 };
127 #endif
128
129 /* Page Table Entry */
130 struct prot_bits {
131 u64 mask;
132 const char *set;
133 const char *clear;
134 };
135
136 static const struct prot_bits pte_bits[] = {
137 {
138 #ifdef CONFIG_64BIT
139 .mask = _PAGE_NAPOT,
140 .set = "N",
141 .clear = ".",
142 }, {
143 .mask = _PAGE_MTMASK_SVPBMT,
144 .set = "MT(%s)",
145 .clear = " .. ",
146 }, {
147 #endif
148 .mask = _PAGE_SOFT,
149 .set = "RSW(%d)",
150 .clear = " .. ",
151 }, {
152 .mask = _PAGE_DIRTY,
153 .set = "D",
154 .clear = ".",
155 }, {
156 .mask = _PAGE_ACCESSED,
157 .set = "A",
158 .clear = ".",
159 }, {
160 .mask = _PAGE_GLOBAL,
161 .set = "G",
162 .clear = ".",
163 }, {
164 .mask = _PAGE_USER,
165 .set = "U",
166 .clear = ".",
167 }, {
168 .mask = _PAGE_EXEC,
169 .set = "X",
170 .clear = ".",
171 }, {
172 .mask = _PAGE_WRITE,
173 .set = "W",
174 .clear = ".",
175 }, {
176 .mask = _PAGE_READ,
177 .set = "R",
178 .clear = ".",
179 }, {
180 .mask = _PAGE_PRESENT,
181 .set = "V",
182 .clear = ".",
183 }
184 };
185
186 /* Page Level */
187 struct pg_level {
188 const char *name;
189 u64 mask;
190 };
191
192 static struct pg_level pg_level[] = {
193 { /* pgd */
194 .name = "PGD",
195 }, { /* p4d */
196 .name = (CONFIG_PGTABLE_LEVELS > 4) ? "P4D" : "PGD",
197 }, { /* pud */
198 .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD",
199 }, { /* pmd */
200 .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD",
201 }, { /* pte */
202 .name = "PTE",
203 },
204 };
205
dump_prot(struct pg_state * st)206 static void dump_prot(struct pg_state *st)
207 {
208 unsigned int i;
209
210 for (i = 0; i < ARRAY_SIZE(pte_bits); i++) {
211 char s[7];
212 unsigned long val;
213
214 val = st->current_prot & pte_bits[i].mask;
215 if (val) {
216 if (pte_bits[i].mask == _PAGE_SOFT)
217 sprintf(s, pte_bits[i].set, val >> 8);
218 #ifdef CONFIG_64BIT
219 else if (pte_bits[i].mask == _PAGE_MTMASK_SVPBMT) {
220 if (val == _PAGE_NOCACHE_SVPBMT)
221 sprintf(s, pte_bits[i].set, "NC");
222 else if (val == _PAGE_IO_SVPBMT)
223 sprintf(s, pte_bits[i].set, "IO");
224 else
225 sprintf(s, pte_bits[i].set, "??");
226 }
227 #endif
228 else
229 sprintf(s, "%s", pte_bits[i].set);
230 } else {
231 sprintf(s, "%s", pte_bits[i].clear);
232 }
233
234 pt_dump_seq_printf(st->seq, " %s", s);
235 }
236 }
237
238 #ifdef CONFIG_64BIT
239 #define ADDR_FORMAT "0x%016lx"
240 #else
241 #define ADDR_FORMAT "0x%08lx"
242 #endif
dump_addr(struct pg_state * st,unsigned long addr)243 static void dump_addr(struct pg_state *st, unsigned long addr)
244 {
245 static const char units[] = "KMGTPE";
246 const char *unit = units;
247 unsigned long delta;
248
249 pt_dump_seq_printf(st->seq, ADDR_FORMAT "-" ADDR_FORMAT " ",
250 st->start_address, addr);
251
252 pt_dump_seq_printf(st->seq, " " ADDR_FORMAT " ", st->start_pa);
253 delta = (addr - st->start_address) >> 10;
254
255 while (!(delta & 1023) && unit[1]) {
256 delta >>= 10;
257 unit++;
258 }
259
260 pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
261 pg_level[st->level].name);
262 }
263
note_prot_wx(struct pg_state * st,unsigned long addr)264 static void note_prot_wx(struct pg_state *st, unsigned long addr)
265 {
266 if (!st->check_wx)
267 return;
268
269 if ((st->current_prot & (_PAGE_WRITE | _PAGE_EXEC)) !=
270 (_PAGE_WRITE | _PAGE_EXEC))
271 return;
272
273 WARN_ONCE(1, "riscv/mm: Found insecure W+X mapping at address %p/%pS\n",
274 (void *)st->start_address, (void *)st->start_address);
275
276 st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
277 }
278
note_page(struct ptdump_state * pt_st,unsigned long addr,int level,u64 val)279 static void note_page(struct ptdump_state *pt_st, unsigned long addr,
280 int level, u64 val)
281 {
282 struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
283 u64 pa = PFN_PHYS(pte_pfn(__pte(val)));
284 u64 prot = 0;
285
286 if (level >= 0)
287 prot = val & pg_level[level].mask;
288
289 if (st->level == -1) {
290 st->level = level;
291 st->current_prot = prot;
292 st->start_address = addr;
293 st->start_pa = pa;
294 st->last_pa = pa;
295 pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
296 } else if (prot != st->current_prot ||
297 level != st->level || addr >= st->marker[1].start_address) {
298 if (st->current_prot) {
299 note_prot_wx(st, addr);
300 dump_addr(st, addr);
301 dump_prot(st);
302 pt_dump_seq_puts(st->seq, "\n");
303 }
304
305 while (addr >= st->marker[1].start_address) {
306 st->marker++;
307 pt_dump_seq_printf(st->seq, "---[ %s ]---\n",
308 st->marker->name);
309 }
310
311 st->start_address = addr;
312 st->start_pa = pa;
313 st->last_pa = pa;
314 st->current_prot = prot;
315 st->level = level;
316 } else {
317 st->last_pa = pa;
318 }
319 }
320
ptdump_walk(struct seq_file * s,struct ptd_mm_info * pinfo)321 static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
322 {
323 struct pg_state st = {
324 .seq = s,
325 .marker = pinfo->markers,
326 .level = -1,
327 .ptdump = {
328 .note_page = note_page,
329 .range = (struct ptdump_range[]) {
330 {pinfo->base_addr, pinfo->end},
331 {0, 0}
332 }
333 }
334 };
335
336 ptdump_walk_pgd(&st.ptdump, pinfo->mm, NULL);
337 }
338
ptdump_check_wx(void)339 bool ptdump_check_wx(void)
340 {
341 struct pg_state st = {
342 .seq = NULL,
343 .marker = (struct addr_marker[]) {
344 {0, NULL},
345 {-1, NULL},
346 },
347 .level = -1,
348 .check_wx = true,
349 .ptdump = {
350 .note_page = note_page,
351 .range = (struct ptdump_range[]) {
352 {KERN_VIRT_START, ULONG_MAX},
353 {0, 0}
354 }
355 }
356 };
357
358 ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
359
360 if (st.wx_pages) {
361 pr_warn("Checked W+X mappings: failed, %lu W+X pages found\n",
362 st.wx_pages);
363
364 return false;
365 } else {
366 pr_info("Checked W+X mappings: passed, no W+X pages found\n");
367
368 return true;
369 }
370 }
371
ptdump_show(struct seq_file * m,void * v)372 static int ptdump_show(struct seq_file *m, void *v)
373 {
374 get_online_mems();
375 ptdump_walk(m, m->private);
376 put_online_mems();
377
378 return 0;
379 }
380
381 DEFINE_SHOW_ATTRIBUTE(ptdump);
382
ptdump_init(void)383 static int __init ptdump_init(void)
384 {
385 unsigned int i, j;
386
387 address_markers[FIXMAP_START_NR].start_address = FIXADDR_START;
388 address_markers[FIXMAP_END_NR].start_address = FIXADDR_TOP;
389 address_markers[PCI_IO_START_NR].start_address = PCI_IO_START;
390 address_markers[PCI_IO_END_NR].start_address = PCI_IO_END;
391 #ifdef CONFIG_SPARSEMEM_VMEMMAP
392 address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
393 address_markers[VMEMMAP_END_NR].start_address = VMEMMAP_END;
394 #endif
395 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
396 address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
397 address_markers[PAGE_OFFSET_NR].start_address = PAGE_OFFSET;
398 #ifdef CONFIG_KASAN
399 address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
400 address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
401 #endif
402 #ifdef CONFIG_64BIT
403 address_markers[MODULES_MAPPING_NR].start_address = MODULES_VADDR;
404 address_markers[KERNEL_MAPPING_NR].start_address = kernel_map.virt_addr;
405 #endif
406
407 kernel_ptd_info.base_addr = KERN_VIRT_START;
408
409 pg_level[1].name = pgtable_l5_enabled ? "P4D" : "PGD";
410 pg_level[2].name = pgtable_l4_enabled ? "PUD" : "PGD";
411
412 for (i = 0; i < ARRAY_SIZE(pg_level); i++)
413 for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
414 pg_level[i].mask |= pte_bits[j].mask;
415
416 debugfs_create_file("kernel_page_tables", 0400, NULL, &kernel_ptd_info,
417 &ptdump_fops);
418 #ifdef CONFIG_EFI
419 if (efi_enabled(EFI_RUNTIME_SERVICES))
420 debugfs_create_file("efi_page_tables", 0400, NULL, &efi_ptd_info,
421 &ptdump_fops);
422 #endif
423
424 return 0;
425 }
426
427 device_initcall(ptdump_init);
428