1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 *
26 * Copyright 2018 Joyent, Inc.
27 */
28
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/archsystm.h>
32 #include <sys/debug.h>
33 #include <sys/bootconf.h>
34 #include <sys/bootsvcs.h>
35 #include <sys/bootinfo.h>
36 #include <sys/mman.h>
37 #include <sys/cmn_err.h>
38 #include <sys/param.h>
39 #include <sys/machparam.h>
40 #include <sys/machsystm.h>
41 #include <sys/promif.h>
42 #include <sys/kobj.h>
43 #ifdef __xpv
44 #include <sys/hypervisor.h>
45 #endif
46 #include <vm/kboot_mmu.h>
47 #include <vm/hat_pte.h>
48 #include <vm/hat_i86.h>
49 #include <vm/seg_kmem.h>
50
51 #if 0
52 /*
53 * Joe's debug printing
54 */
55 #define DBG(x) \
56 bop_printf(NULL, "kboot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x));
57 #else
58 #define DBG(x) /* naught */
59 #endif
60
61 /*
62 * Page table and memory stuff.
63 */
64 static caddr_t window;
65 static caddr_t pte_to_window;
66
67 /*
68 * this are needed by mmu_init()
69 */
70 int kbm_nx_support = 0; /* NX bit in PTEs is in use */
71 int kbm_pae_support = 0; /* PAE is 64 bit Page table entries */
72 int kbm_pge_support = 0; /* PGE is Page table global bit enabled */
73 int kbm_largepage_support = 0;
74 uint_t kbm_nucleus_size = 0;
75
76 #define BOOT_SHIFT(l) (shift_amt[l])
77 #define BOOT_SZ(l) ((size_t)1 << BOOT_SHIFT(l))
78 #define BOOT_OFFSET(l) (BOOT_SZ(l) - 1)
79 #define BOOT_MASK(l) (~BOOT_OFFSET(l))
80
81 /*
82 * Initialize memory management parameters for boot time page table management
83 */
84 void
kbm_init(struct xboot_info * bi)85 kbm_init(struct xboot_info *bi)
86 {
87 /*
88 * configure mmu information
89 */
90 kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size;
91 kbm_largepage_support = bi->bi_use_largepage;
92 kbm_nx_support = bi->bi_use_nx;
93 kbm_pae_support = bi->bi_use_pae;
94 kbm_pge_support = bi->bi_use_pge;
95 window = bi->bi_pt_window;
96 DBG(window);
97 pte_to_window = bi->bi_pte_to_pt_window;
98 DBG(pte_to_window);
99 if (kbm_pae_support) {
100 shift_amt = shift_amt_pae;
101 ptes_per_table = 512;
102 pte_size = 8;
103 lpagesize = TWO_MEG;
104 top_level = 3;
105 } else {
106 shift_amt = shift_amt_nopae;
107 ptes_per_table = 1024;
108 pte_size = 4;
109 lpagesize = FOUR_MEG;
110 top_level = 1;
111 }
112
113 #ifdef __xpv
114 xen_info = bi->bi_xen_start_info;
115 mfn_list = (mfn_t *)xen_info->mfn_list;
116 DBG(mfn_list);
117 mfn_count = xen_info->nr_pages;
118 DBG(mfn_count);
119 #endif
120 top_page_table = bi->bi_top_page_table;
121 DBG(top_page_table);
122 }
123
124 /*
125 * Change the addressible page table window to point at a given page
126 */
127 /*ARGSUSED*/
128 void *
kbm_remap_window(paddr_t physaddr,int writeable)129 kbm_remap_window(paddr_t physaddr, int writeable)
130 {
131 x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE;
132
133 DBG(physaddr);
134
135 #ifdef __xpv
136 if (!writeable)
137 pt_bits &= ~PT_WRITABLE;
138 if (HYPERVISOR_update_va_mapping((uintptr_t)window,
139 pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0)
140 bop_panic("HYPERVISOR_update_va_mapping() failed");
141 #else
142 if (kbm_pae_support)
143 *((x86pte_t *)pte_to_window) = physaddr | pt_bits;
144 else
145 *((x86pte32_t *)pte_to_window) = physaddr | pt_bits;
146 mmu_invlpg(window);
147 #endif
148 DBG(window);
149 return (window);
150 }
151
152 /*
153 * Add a mapping for the physical page at the given virtual address.
154 */
155 void
kbm_map(uintptr_t va,paddr_t pa,uint_t level,uint_t is_kernel)156 kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel)
157 {
158 x86pte_t *ptep;
159 paddr_t pte_physaddr;
160 x86pte_t pteval;
161
162 if (khat_running)
163 panic("kbm_map() called too late");
164
165 pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE;
166 if (level >= 1)
167 pteval |= PT_PAGESIZE;
168 if (kbm_pge_support && is_kernel)
169 pteval |= PT_GLOBAL;
170
171 #ifdef __xpv
172 /*
173 * try update_va_mapping first - fails if page table is missing.
174 */
175 if (HYPERVISOR_update_va_mapping(va, pteval,
176 UVMF_INVLPG | UVMF_LOCAL) == 0)
177 return;
178 #endif
179
180 /*
181 * Find the pte that will map this address. This creates any
182 * missing intermediate level page tables.
183 */
184 ptep = find_pte(va, &pte_physaddr, level, 0);
185 if (ptep == NULL)
186 bop_panic("kbm_map: find_pte returned NULL");
187
188 #ifdef __xpv
189 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
190 bop_panic("HYPERVISOR_update_va_mapping() failed");
191 #else
192 if (kbm_pae_support)
193 *ptep = pteval;
194 else
195 *((x86pte32_t *)ptep) = pteval;
196 mmu_invlpg((caddr_t)va);
197 #endif
198 }
199
200 #ifdef __xpv
201
202 /*
203 * Add a mapping for the machine page at the given virtual address.
204 */
205 void
kbm_map_ma(maddr_t ma,uintptr_t va,uint_t level)206 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
207 {
208 paddr_t pte_physaddr;
209 x86pte_t pteval;
210
211 pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE;
212 if (level == 1)
213 pteval |= PT_PAGESIZE;
214
215 /*
216 * try update_va_mapping first - fails if page table is missing.
217 */
218 if (HYPERVISOR_update_va_mapping(va,
219 pteval, UVMF_INVLPG | UVMF_LOCAL) == 0)
220 return;
221
222 /*
223 * Find the pte that will map this address. This creates any
224 * missing intermediate level page tables
225 */
226 (void) find_pte(va, &pte_physaddr, level, 0);
227
228 if (HYPERVISOR_update_va_mapping(va,
229 pteval, UVMF_INVLPG | UVMF_LOCAL) != 0)
230 bop_panic("HYPERVISOR_update_va_mapping failed");
231 }
232
233 #endif /* __xpv */
234
235
236 /*
237 * Probe the boot time page tables to find the first mapping
238 * including va (or higher) and return non-zero if one is found.
239 * va is updated to the starting address and len to the pagesize.
240 * pp will be set to point to the 1st page_t of the mapped page(s).
241 *
242 * Note that if va is in the middle of a large page, the returned va
243 * will be less than what was asked for.
244 */
245 int
kbm_probe(uintptr_t * va,size_t * len,pfn_t * pfn,uint_t * prot)246 kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot)
247 {
248 uintptr_t probe_va;
249 x86pte_t *ptep;
250 paddr_t pte_physaddr;
251 x86pte_t pte_val;
252 level_t l;
253
254 if (khat_running)
255 panic("kbm_probe() called too late");
256 *len = 0;
257 *pfn = PFN_INVALID;
258 *prot = 0;
259 probe_va = *va;
260 restart_new_va:
261 l = top_level;
262 for (;;) {
263 if (IN_VA_HOLE(probe_va))
264 probe_va = mmu.hole_end;
265
266 if (IN_HYPERVISOR_VA(probe_va))
267 #if defined(__xpv)
268 probe_va = HYPERVISOR_VIRT_END;
269 #else
270 return (0);
271 #endif
272
273 /*
274 * If we don't have a valid PTP/PTE at this level
275 * then we can bump VA by this level's pagesize and try again.
276 * When the probe_va wraps around, we are done.
277 */
278 ptep = find_pte(probe_va, &pte_physaddr, l, 1);
279 if (ptep == NULL)
280 bop_panic("kbm_probe: find_pte returned NULL");
281 if (kbm_pae_support)
282 pte_val = *ptep;
283 else
284 pte_val = *((x86pte32_t *)ptep);
285 if (!PTE_ISVALID(pte_val)) {
286 probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l);
287 if (probe_va <= *va)
288 return (0);
289 goto restart_new_va;
290 }
291
292 /*
293 * If this entry is a pointer to a lower level page table
294 * go down to it.
295 */
296 if (!PTE_ISPAGE(pte_val, l)) {
297 ASSERT(l > 0);
298 --l;
299 continue;
300 }
301
302 /*
303 * We found a boot level page table entry
304 */
305 *len = BOOT_SZ(l);
306 *va = probe_va & ~(*len - 1);
307 *pfn = PTE2PFN(pte_val, l);
308
309
310 *prot = PROT_READ | PROT_EXEC;
311 if (PTE_GET(pte_val, PT_WRITABLE))
312 *prot |= PROT_WRITE;
313
314 /*
315 * pt_nx is cleared if processor doesn't support NX bit
316 */
317 if (PTE_GET(pte_val, mmu.pt_nx))
318 *prot &= ~PROT_EXEC;
319
320 return (1);
321 }
322 }
323
324
325 /*
326 * Destroy a boot loader page table 4K mapping.
327 */
328 void
kbm_unmap(uintptr_t va)329 kbm_unmap(uintptr_t va)
330 {
331 if (khat_running)
332 panic("kbm_unmap() called too late");
333 else {
334 #ifdef __xpv
335 (void) HYPERVISOR_update_va_mapping(va, 0,
336 UVMF_INVLPG | UVMF_LOCAL);
337 #else
338 x86pte_t *ptep;
339 level_t level = 0;
340 uint_t probe_only = 1;
341
342 ptep = find_pte(va, NULL, level, probe_only);
343 if (ptep == NULL)
344 return;
345
346 if (kbm_pae_support)
347 *ptep = 0;
348 else
349 *((x86pte32_t *)ptep) = 0;
350 mmu_invlpg((caddr_t)va);
351 #endif
352 }
353 }
354
355
356 /*
357 * Change a boot loader page table 4K mapping.
358 * Returns the pfn of the old mapping.
359 */
360 pfn_t
kbm_remap(uintptr_t va,pfn_t pfn)361 kbm_remap(uintptr_t va, pfn_t pfn)
362 {
363 x86pte_t *ptep;
364 level_t level = 0;
365 uint_t probe_only = 1;
366 x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE |
367 PT_NOCONSIST | PT_VALID;
368 x86pte_t old_pte;
369
370 if (khat_running)
371 panic("kbm_remap() called too late");
372 ptep = find_pte(va, NULL, level, probe_only);
373 if (ptep == NULL)
374 bop_panic("kbm_remap: find_pte returned NULL");
375
376 if (kbm_pae_support)
377 old_pte = *ptep;
378 else
379 old_pte = *((x86pte32_t *)ptep);
380
381 #ifdef __xpv
382 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
383 bop_panic("HYPERVISOR_update_va_mapping() failed");
384 #else
385 if (kbm_pae_support)
386 *((x86pte_t *)ptep) = pte_val;
387 else
388 *((x86pte32_t *)ptep) = pte_val;
389 mmu_invlpg((caddr_t)va);
390 #endif
391
392 if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1)
393 return (PFN_INVALID);
394 return (mmu_btop(ma_to_pa(old_pte)));
395 }
396
397
398 /*
399 * Change a boot loader page table 4K mapping to read only.
400 */
401 void
kbm_read_only(uintptr_t va,paddr_t pa)402 kbm_read_only(uintptr_t va, paddr_t pa)
403 {
404 x86pte_t pte_val = pa_to_ma(pa) |
405 PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID;
406
407 #ifdef __xpv
408 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
409 bop_panic("HYPERVISOR_update_va_mapping() failed");
410 #else
411 x86pte_t *ptep;
412 level_t level = 0;
413
414 ptep = find_pte(va, NULL, level, 0);
415 if (ptep == NULL)
416 bop_panic("kbm_read_only: find_pte returned NULL");
417
418 if (kbm_pae_support)
419 *ptep = pte_val;
420 else
421 *((x86pte32_t *)ptep) = pte_val;
422 mmu_invlpg((caddr_t)va);
423 #endif
424 }
425
426 /*
427 * interfaces for kernel debugger to access physical memory
428 */
429 static x86pte_t save_pte;
430
431 void *
kbm_push(paddr_t pa)432 kbm_push(paddr_t pa)
433 {
434 static int first_time = 1;
435
436 if (first_time) {
437 first_time = 0;
438 return (window);
439 }
440
441 if (kbm_pae_support)
442 save_pte = *((x86pte_t *)pte_to_window);
443 else
444 save_pte = *((x86pte32_t *)pte_to_window);
445 return (kbm_remap_window(pa, 0));
446 }
447
448 void
kbm_pop(void)449 kbm_pop(void)
450 {
451 #ifdef __xpv
452 if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte,
453 UVMF_INVLPG | UVMF_LOCAL) < 0)
454 bop_panic("HYPERVISOR_update_va_mapping() failed");
455 #else
456 if (kbm_pae_support)
457 *((x86pte_t *)pte_to_window) = save_pte;
458 else
459 *((x86pte32_t *)pte_to_window) = save_pte;
460 mmu_invlpg(window);
461 #endif
462 }
463
464 x86pte_t
get_pteval(paddr_t table,uint_t index)465 get_pteval(paddr_t table, uint_t index)
466 {
467 void *table_ptr = kbm_remap_window(table, 0);
468
469 if (kbm_pae_support)
470 return (((x86pte_t *)table_ptr)[index]);
471 return (((x86pte32_t *)table_ptr)[index]);
472 }
473
474 #ifndef __xpv
475 void
set_pteval(paddr_t table,uint_t index,uint_t level,x86pte_t pteval)476 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
477 {
478 void *table_ptr = kbm_remap_window(table, 0);
479 if (kbm_pae_support)
480 ((x86pte_t *)table_ptr)[index] = pteval;
481 else
482 ((x86pte32_t *)table_ptr)[index] = pteval;
483 if (level == top_level && level == 2)
484 reload_cr3();
485 }
486 #endif
487
488 paddr_t
make_ptable(x86pte_t * pteval,uint_t level)489 make_ptable(x86pte_t *pteval, uint_t level)
490 {
491 paddr_t new_table;
492 void *table_ptr;
493
494 new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE);
495 table_ptr = kbm_remap_window(new_table, 1);
496 bzero(table_ptr, MMU_PAGESIZE);
497 #ifdef __xpv
498 /* Remove write permission to the new page table. */
499 (void) kbm_remap_window(new_table, 0);
500 #endif
501
502 if (level == top_level && level == 2)
503 *pteval = pa_to_ma(new_table) | PT_VALID;
504 else
505 *pteval = pa_to_ma(new_table) |
506 PT_VALID | PT_REF | PT_USER | PT_WRITABLE;
507
508 return (new_table);
509 }
510
511 x86pte_t *
map_pte(paddr_t table,uint_t index)512 map_pte(paddr_t table, uint_t index)
513 {
514 void *table_ptr = kbm_remap_window(table, 0);
515 return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size));
516 }
517