1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/systm.h>
29 #include <sys/archsystm.h>
30 #include <sys/debug.h>
31 #include <sys/bootconf.h>
32 #include <sys/bootsvcs.h>
33 #include <sys/bootinfo.h>
34 #include <sys/mman.h>
35 #include <sys/cmn_err.h>
36 #include <sys/param.h>
37 #include <sys/machparam.h>
38 #include <sys/machsystm.h>
39 #include <sys/promif.h>
40 #include <sys/kobj.h>
41 #ifdef __xpv
42 #include <sys/hypervisor.h>
43 #endif
44 #include <vm/kboot_mmu.h>
45 #include <vm/hat_pte.h>
46 #include <vm/hat_i86.h>
47 #include <vm/seg_kmem.h>
48
49 #if 0
50 /*
51 * Joe's debug printing
52 */
53 #define DBG(x) \
54 bop_printf(NULL, "kboot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x));
55 #else
56 #define DBG(x) /* naught */
57 #endif
58
59 /*
60 * Page table and memory stuff.
61 */
62 static caddr_t window;
63 static caddr_t pte_to_window;
64
65 /*
66 * this are needed by mmu_init()
67 */
68 int kbm_nx_support = 0; /* NX bit in PTEs is in use */
69 int kbm_pae_support = 0; /* PAE is 64 bit Page table entries */
70 int kbm_pge_support = 0; /* PGE is Page table global bit enabled */
71 int kbm_largepage_support = 0;
72 uint_t kbm_nucleus_size = 0;
73
74 #define BOOT_SHIFT(l) (shift_amt[l])
75 #define BOOT_SZ(l) ((size_t)1 << BOOT_SHIFT(l))
76 #define BOOT_OFFSET(l) (BOOT_SZ(l) - 1)
77 #define BOOT_MASK(l) (~BOOT_OFFSET(l))
78
79 /*
80 * Initialize memory management parameters for boot time page table management
81 */
82 void
kbm_init(struct xboot_info * bi)83 kbm_init(struct xboot_info *bi)
84 {
85 /*
86 * configure mmu information
87 */
88 kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size;
89 kbm_largepage_support = bi->bi_use_largepage;
90 kbm_nx_support = bi->bi_use_nx;
91 kbm_pae_support = bi->bi_use_pae;
92 kbm_pge_support = bi->bi_use_pge;
93 window = bi->bi_pt_window;
94 DBG(window);
95 pte_to_window = bi->bi_pte_to_pt_window;
96 DBG(pte_to_window);
97 if (kbm_pae_support) {
98 shift_amt = shift_amt_pae;
99 ptes_per_table = 512;
100 pte_size = 8;
101 lpagesize = TWO_MEG;
102 #ifdef __amd64
103 top_level = 3;
104 #else
105 top_level = 2;
106 #endif
107 } else {
108 shift_amt = shift_amt_nopae;
109 ptes_per_table = 1024;
110 pte_size = 4;
111 lpagesize = FOUR_MEG;
112 top_level = 1;
113 }
114
115 #ifdef __xpv
116 xen_info = bi->bi_xen_start_info;
117 mfn_list = (mfn_t *)xen_info->mfn_list;
118 DBG(mfn_list);
119 mfn_count = xen_info->nr_pages;
120 DBG(mfn_count);
121 #endif
122 top_page_table = bi->bi_top_page_table;
123 DBG(top_page_table);
124 }
125
126 /*
127 * Change the addressible page table window to point at a given page
128 */
129 /*ARGSUSED*/
130 void *
kbm_remap_window(paddr_t physaddr,int writeable)131 kbm_remap_window(paddr_t physaddr, int writeable)
132 {
133 x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE;
134
135 DBG(physaddr);
136
137 #ifdef __xpv
138 if (!writeable)
139 pt_bits &= ~PT_WRITABLE;
140 if (HYPERVISOR_update_va_mapping((uintptr_t)window,
141 pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0)
142 bop_panic("HYPERVISOR_update_va_mapping() failed");
143 #else
144 if (kbm_pae_support)
145 *((x86pte_t *)pte_to_window) = physaddr | pt_bits;
146 else
147 *((x86pte32_t *)pte_to_window) = physaddr | pt_bits;
148 mmu_tlbflush_entry(window);
149 #endif
150 DBG(window);
151 return (window);
152 }
153
154 /*
155 * Add a mapping for the physical page at the given virtual address.
156 */
157 void
kbm_map(uintptr_t va,paddr_t pa,uint_t level,uint_t is_kernel)158 kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel)
159 {
160 x86pte_t *ptep;
161 paddr_t pte_physaddr;
162 x86pte_t pteval;
163
164 if (khat_running)
165 panic("kbm_map() called too late");
166
167 pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE;
168 if (level >= 1)
169 pteval |= PT_PAGESIZE;
170 if (kbm_pge_support && is_kernel)
171 pteval |= PT_GLOBAL;
172
173 #ifdef __xpv
174 /*
175 * try update_va_mapping first - fails if page table is missing.
176 */
177 if (HYPERVISOR_update_va_mapping(va, pteval,
178 UVMF_INVLPG | UVMF_LOCAL) == 0)
179 return;
180 #endif
181
182 /*
183 * Find the pte that will map this address. This creates any
184 * missing intermediate level page tables.
185 */
186 ptep = find_pte(va, &pte_physaddr, level, 0);
187 if (ptep == NULL)
188 bop_panic("kbm_map: find_pte returned NULL");
189
190 #ifdef __xpv
191 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
192 bop_panic("HYPERVISOR_update_va_mapping() failed");
193 #else
194 if (kbm_pae_support)
195 *ptep = pteval;
196 else
197 *((x86pte32_t *)ptep) = pteval;
198 mmu_tlbflush_entry((caddr_t)va);
199 #endif
200 }
201
202 #ifdef __xpv
203
204 /*
205 * Add a mapping for the machine page at the given virtual address.
206 */
207 void
kbm_map_ma(maddr_t ma,uintptr_t va,uint_t level)208 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
209 {
210 paddr_t pte_physaddr;
211 x86pte_t pteval;
212
213 pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE;
214 if (level == 1)
215 pteval |= PT_PAGESIZE;
216
217 /*
218 * try update_va_mapping first - fails if page table is missing.
219 */
220 if (HYPERVISOR_update_va_mapping(va,
221 pteval, UVMF_INVLPG | UVMF_LOCAL) == 0)
222 return;
223
224 /*
225 * Find the pte that will map this address. This creates any
226 * missing intermediate level page tables
227 */
228 (void) find_pte(va, &pte_physaddr, level, 0);
229
230 if (HYPERVISOR_update_va_mapping(va,
231 pteval, UVMF_INVLPG | UVMF_LOCAL) != 0)
232 bop_panic("HYPERVISOR_update_va_mapping failed");
233 }
234
235 #endif /* __xpv */
236
237
238 /*
239 * Probe the boot time page tables to find the first mapping
240 * including va (or higher) and return non-zero if one is found.
241 * va is updated to the starting address and len to the pagesize.
242 * pp will be set to point to the 1st page_t of the mapped page(s).
243 *
244 * Note that if va is in the middle of a large page, the returned va
245 * will be less than what was asked for.
246 */
247 int
kbm_probe(uintptr_t * va,size_t * len,pfn_t * pfn,uint_t * prot)248 kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot)
249 {
250 uintptr_t probe_va;
251 x86pte_t *ptep;
252 paddr_t pte_physaddr;
253 x86pte_t pte_val;
254 level_t l;
255
256 if (khat_running)
257 panic("kbm_probe() called too late");
258 *len = 0;
259 *pfn = PFN_INVALID;
260 *prot = 0;
261 probe_va = *va;
262 restart_new_va:
263 l = top_level;
264 for (;;) {
265 if (IN_VA_HOLE(probe_va))
266 probe_va = mmu.hole_end;
267
268 if (IN_HYPERVISOR_VA(probe_va))
269 #if defined(__amd64) && defined(__xpv)
270 probe_va = HYPERVISOR_VIRT_END;
271 #else
272 return (0);
273 #endif
274
275 /*
276 * If we don't have a valid PTP/PTE at this level
277 * then we can bump VA by this level's pagesize and try again.
278 * When the probe_va wraps around, we are done.
279 */
280 ptep = find_pte(probe_va, &pte_physaddr, l, 1);
281 if (ptep == NULL)
282 bop_panic("kbm_probe: find_pte returned NULL");
283 if (kbm_pae_support)
284 pte_val = *ptep;
285 else
286 pte_val = *((x86pte32_t *)ptep);
287 if (!PTE_ISVALID(pte_val)) {
288 probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l);
289 if (probe_va <= *va)
290 return (0);
291 goto restart_new_va;
292 }
293
294 /*
295 * If this entry is a pointer to a lower level page table
296 * go down to it.
297 */
298 if (!PTE_ISPAGE(pte_val, l)) {
299 ASSERT(l > 0);
300 --l;
301 continue;
302 }
303
304 /*
305 * We found a boot level page table entry
306 */
307 *len = BOOT_SZ(l);
308 *va = probe_va & ~(*len - 1);
309 *pfn = PTE2PFN(pte_val, l);
310
311
312 *prot = PROT_READ | PROT_EXEC;
313 if (PTE_GET(pte_val, PT_WRITABLE))
314 *prot |= PROT_WRITE;
315
316 /*
317 * pt_nx is cleared if processor doesn't support NX bit
318 */
319 if (PTE_GET(pte_val, mmu.pt_nx))
320 *prot &= ~PROT_EXEC;
321
322 return (1);
323 }
324 }
325
326
327 /*
328 * Destroy a boot loader page table 4K mapping.
329 */
330 void
kbm_unmap(uintptr_t va)331 kbm_unmap(uintptr_t va)
332 {
333 if (khat_running)
334 panic("kbm_unmap() called too late");
335 else {
336 #ifdef __xpv
337 (void) HYPERVISOR_update_va_mapping(va, 0,
338 UVMF_INVLPG | UVMF_LOCAL);
339 #else
340 x86pte_t *ptep;
341 level_t level = 0;
342 uint_t probe_only = 1;
343
344 ptep = find_pte(va, NULL, level, probe_only);
345 if (ptep == NULL)
346 return;
347
348 if (kbm_pae_support)
349 *ptep = 0;
350 else
351 *((x86pte32_t *)ptep) = 0;
352 mmu_tlbflush_entry((caddr_t)va);
353 #endif
354 }
355 }
356
357
358 /*
359 * Change a boot loader page table 4K mapping.
360 * Returns the pfn of the old mapping.
361 */
362 pfn_t
kbm_remap(uintptr_t va,pfn_t pfn)363 kbm_remap(uintptr_t va, pfn_t pfn)
364 {
365 x86pte_t *ptep;
366 level_t level = 0;
367 uint_t probe_only = 1;
368 x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE |
369 PT_NOCONSIST | PT_VALID;
370 x86pte_t old_pte;
371
372 if (khat_running)
373 panic("kbm_remap() called too late");
374 ptep = find_pte(va, NULL, level, probe_only);
375 if (ptep == NULL)
376 bop_panic("kbm_remap: find_pte returned NULL");
377
378 if (kbm_pae_support)
379 old_pte = *ptep;
380 else
381 old_pte = *((x86pte32_t *)ptep);
382
383 #ifdef __xpv
384 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
385 bop_panic("HYPERVISOR_update_va_mapping() failed");
386 #else
387 if (kbm_pae_support)
388 *((x86pte_t *)ptep) = pte_val;
389 else
390 *((x86pte32_t *)ptep) = pte_val;
391 mmu_tlbflush_entry((caddr_t)va);
392 #endif
393
394 if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1)
395 return (PFN_INVALID);
396 return (mmu_btop(ma_to_pa(old_pte)));
397 }
398
399
400 /*
401 * Change a boot loader page table 4K mapping to read only.
402 */
403 void
kbm_read_only(uintptr_t va,paddr_t pa)404 kbm_read_only(uintptr_t va, paddr_t pa)
405 {
406 x86pte_t pte_val = pa_to_ma(pa) |
407 PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID;
408
409 #ifdef __xpv
410 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
411 bop_panic("HYPERVISOR_update_va_mapping() failed");
412 #else
413 x86pte_t *ptep;
414 level_t level = 0;
415
416 ptep = find_pte(va, NULL, level, 0);
417 if (ptep == NULL)
418 bop_panic("kbm_read_only: find_pte returned NULL");
419
420 if (kbm_pae_support)
421 *ptep = pte_val;
422 else
423 *((x86pte32_t *)ptep) = pte_val;
424 mmu_tlbflush_entry((caddr_t)va);
425 #endif
426 }
427
428 /*
429 * interfaces for kernel debugger to access physical memory
430 */
431 static x86pte_t save_pte;
432
433 void *
kbm_push(paddr_t pa)434 kbm_push(paddr_t pa)
435 {
436 static int first_time = 1;
437
438 if (first_time) {
439 first_time = 0;
440 return (window);
441 }
442
443 if (kbm_pae_support)
444 save_pte = *((x86pte_t *)pte_to_window);
445 else
446 save_pte = *((x86pte32_t *)pte_to_window);
447 return (kbm_remap_window(pa, 0));
448 }
449
450 void
kbm_pop(void)451 kbm_pop(void)
452 {
453 #ifdef __xpv
454 if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte,
455 UVMF_INVLPG | UVMF_LOCAL) < 0)
456 bop_panic("HYPERVISOR_update_va_mapping() failed");
457 #else
458 if (kbm_pae_support)
459 *((x86pte_t *)pte_to_window) = save_pte;
460 else
461 *((x86pte32_t *)pte_to_window) = save_pte;
462 mmu_tlbflush_entry(window);
463 #endif
464 }
465
466 x86pte_t
get_pteval(paddr_t table,uint_t index)467 get_pteval(paddr_t table, uint_t index)
468 {
469 void *table_ptr = kbm_remap_window(table, 0);
470
471 if (kbm_pae_support)
472 return (((x86pte_t *)table_ptr)[index]);
473 return (((x86pte32_t *)table_ptr)[index]);
474 }
475
476 #ifndef __xpv
477 void
set_pteval(paddr_t table,uint_t index,uint_t level,x86pte_t pteval)478 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
479 {
480 void *table_ptr = kbm_remap_window(table, 0);
481 if (kbm_pae_support)
482 ((x86pte_t *)table_ptr)[index] = pteval;
483 else
484 ((x86pte32_t *)table_ptr)[index] = pteval;
485 if (level == top_level && level == 2)
486 reload_cr3();
487 }
488 #endif
489
490 paddr_t
make_ptable(x86pte_t * pteval,uint_t level)491 make_ptable(x86pte_t *pteval, uint_t level)
492 {
493 paddr_t new_table;
494 void *table_ptr;
495
496 new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE);
497 table_ptr = kbm_remap_window(new_table, 1);
498 bzero(table_ptr, MMU_PAGESIZE);
499 #ifdef __xpv
500 /* Remove write permission to the new page table. */
501 (void) kbm_remap_window(new_table, 0);
502 #endif
503
504 if (level == top_level && level == 2)
505 *pteval = pa_to_ma(new_table) | PT_VALID;
506 else
507 *pteval = pa_to_ma(new_table) |
508 PT_VALID | PT_REF | PT_USER | PT_WRITABLE;
509
510 return (new_table);
511 }
512
513 x86pte_t *
map_pte(paddr_t table,uint_t index)514 map_pte(paddr_t table, uint_t index)
515 {
516 void *table_ptr = kbm_remap_window(table, 0);
517 return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size));
518 }
519