xref: /illumos-gate/usr/src/uts/i86pc/vm/kboot_mmu.c (revision b1e2e3fb17324e9ddf43db264a0c64da7756d9e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright 2018 Joyent, Inc.
27  */
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/archsystm.h>
32 #include <sys/debug.h>
33 #include <sys/bootconf.h>
34 #include <sys/bootsvcs.h>
35 #include <sys/bootinfo.h>
36 #include <sys/mman.h>
37 #include <sys/cmn_err.h>
38 #include <sys/param.h>
39 #include <sys/machparam.h>
40 #include <sys/machsystm.h>
41 #include <sys/promif.h>
42 #include <sys/kobj.h>
43 #ifdef __xpv
44 #include <sys/hypervisor.h>
45 #endif
46 #include <vm/kboot_mmu.h>
47 #include <vm/hat_pte.h>
48 #include <vm/hat_i86.h>
49 #include <vm/seg_kmem.h>
50 
51 #if 0
52 /*
53  * Joe's debug printing
54  */
55 #define	DBG(x)    \
56 	bop_printf(NULL, "kboot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x));
57 #else
58 #define	DBG(x)	/* naught */
59 #endif
60 
61 /*
62  * Page table and memory stuff.
63  */
64 static caddr_t window;
65 static caddr_t pte_to_window;
66 
67 /*
68  * this are needed by mmu_init()
69  */
70 int kbm_nx_support = 0;		/* NX bit in PTEs is in use */
71 int kbm_pae_support = 0;	/* PAE is 64 bit Page table entries */
72 int kbm_pge_support = 0;	/* PGE is Page table global bit enabled */
73 int kbm_largepage_support = 0;
74 uint_t kbm_nucleus_size = 0;
75 
76 #define	BOOT_SHIFT(l)	(shift_amt[l])
77 #define	BOOT_SZ(l)	((size_t)1 << BOOT_SHIFT(l))
78 #define	BOOT_OFFSET(l)	(BOOT_SZ(l) - 1)
79 #define	BOOT_MASK(l)	(~BOOT_OFFSET(l))
80 
81 /*
82  * Initialize memory management parameters for boot time page table management
83  */
84 void
85 kbm_init(struct xboot_info *bi)
86 {
87 	/*
88 	 * configure mmu information
89 	 */
90 	kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size;
91 	kbm_largepage_support = bi->bi_use_largepage;
92 	kbm_nx_support = bi->bi_use_nx;
93 	kbm_pae_support = bi->bi_use_pae;
94 	kbm_pge_support = bi->bi_use_pge;
95 	window = bi->bi_pt_window;
96 	DBG(window);
97 	pte_to_window = bi->bi_pte_to_pt_window;
98 	DBG(pte_to_window);
99 	if (kbm_pae_support) {
100 		shift_amt = shift_amt_pae;
101 		ptes_per_table = 512;
102 		pte_size = 8;
103 		lpagesize = TWO_MEG;
104 #ifdef __amd64
105 		top_level = 3;
106 #else
107 		top_level = 2;
108 #endif
109 	} else {
110 		shift_amt = shift_amt_nopae;
111 		ptes_per_table = 1024;
112 		pte_size = 4;
113 		lpagesize = FOUR_MEG;
114 		top_level = 1;
115 	}
116 
117 #ifdef __xpv
118 	xen_info = bi->bi_xen_start_info;
119 	mfn_list = (mfn_t *)xen_info->mfn_list;
120 	DBG(mfn_list);
121 	mfn_count = xen_info->nr_pages;
122 	DBG(mfn_count);
123 #endif
124 	top_page_table = bi->bi_top_page_table;
125 	DBG(top_page_table);
126 }
127 
128 /*
129  * Change the addressible page table window to point at a given page
130  */
131 /*ARGSUSED*/
132 void *
133 kbm_remap_window(paddr_t physaddr, int writeable)
134 {
135 	x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE;
136 
137 	DBG(physaddr);
138 
139 #ifdef __xpv
140 	if (!writeable)
141 		pt_bits &= ~PT_WRITABLE;
142 	if (HYPERVISOR_update_va_mapping((uintptr_t)window,
143 	    pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0)
144 		bop_panic("HYPERVISOR_update_va_mapping() failed");
145 #else
146 	if (kbm_pae_support)
147 		*((x86pte_t *)pte_to_window) = physaddr | pt_bits;
148 	else
149 		*((x86pte32_t *)pte_to_window) = physaddr | pt_bits;
150 	mmu_invlpg(window);
151 #endif
152 	DBG(window);
153 	return (window);
154 }
155 
156 /*
157  * Add a mapping for the physical page at the given virtual address.
158  */
159 void
160 kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel)
161 {
162 	x86pte_t *ptep;
163 	paddr_t pte_physaddr;
164 	x86pte_t pteval;
165 
166 	if (khat_running)
167 		panic("kbm_map() called too late");
168 
169 	pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE;
170 	if (level >= 1)
171 		pteval |= PT_PAGESIZE;
172 	if (kbm_pge_support && is_kernel)
173 		pteval |= PT_GLOBAL;
174 
175 #ifdef __xpv
176 	/*
177 	 * try update_va_mapping first - fails if page table is missing.
178 	 */
179 	if (HYPERVISOR_update_va_mapping(va, pteval,
180 	    UVMF_INVLPG | UVMF_LOCAL) == 0)
181 		return;
182 #endif
183 
184 	/*
185 	 * Find the pte that will map this address. This creates any
186 	 * missing intermediate level page tables.
187 	 */
188 	ptep = find_pte(va, &pte_physaddr, level, 0);
189 	if (ptep == NULL)
190 		bop_panic("kbm_map: find_pte returned NULL");
191 
192 #ifdef __xpv
193 	if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
194 		bop_panic("HYPERVISOR_update_va_mapping() failed");
195 #else
196 	if (kbm_pae_support)
197 		*ptep = pteval;
198 	else
199 		*((x86pte32_t *)ptep) = pteval;
200 	mmu_invlpg((caddr_t)va);
201 #endif
202 }
203 
204 #ifdef __xpv
205 
206 /*
207  * Add a mapping for the machine page at the given virtual address.
208  */
209 void
210 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
211 {
212 	paddr_t pte_physaddr;
213 	x86pte_t pteval;
214 
215 	pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE;
216 	if (level == 1)
217 		pteval |= PT_PAGESIZE;
218 
219 	/*
220 	 * try update_va_mapping first - fails if page table is missing.
221 	 */
222 	if (HYPERVISOR_update_va_mapping(va,
223 	    pteval, UVMF_INVLPG | UVMF_LOCAL) == 0)
224 		return;
225 
226 	/*
227 	 * Find the pte that will map this address. This creates any
228 	 * missing intermediate level page tables
229 	 */
230 	(void) find_pte(va, &pte_physaddr, level, 0);
231 
232 	if (HYPERVISOR_update_va_mapping(va,
233 	    pteval, UVMF_INVLPG | UVMF_LOCAL) != 0)
234 		bop_panic("HYPERVISOR_update_va_mapping failed");
235 }
236 
237 #endif /* __xpv */
238 
239 
240 /*
241  * Probe the boot time page tables to find the first mapping
242  * including va (or higher) and return non-zero if one is found.
243  * va is updated to the starting address and len to the pagesize.
244  * pp will be set to point to the 1st page_t of the mapped page(s).
245  *
246  * Note that if va is in the middle of a large page, the returned va
247  * will be less than what was asked for.
248  */
249 int
250 kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot)
251 {
252 	uintptr_t	probe_va;
253 	x86pte_t	*ptep;
254 	paddr_t		pte_physaddr;
255 	x86pte_t	pte_val;
256 	level_t		l;
257 
258 	if (khat_running)
259 		panic("kbm_probe() called too late");
260 	*len = 0;
261 	*pfn = PFN_INVALID;
262 	*prot = 0;
263 	probe_va = *va;
264 restart_new_va:
265 	l = top_level;
266 	for (;;) {
267 		if (IN_VA_HOLE(probe_va))
268 			probe_va = mmu.hole_end;
269 
270 		if (IN_HYPERVISOR_VA(probe_va))
271 #if defined(__amd64) && defined(__xpv)
272 			probe_va = HYPERVISOR_VIRT_END;
273 #else
274 			return (0);
275 #endif
276 
277 		/*
278 		 * If we don't have a valid PTP/PTE at this level
279 		 * then we can bump VA by this level's pagesize and try again.
280 		 * When the probe_va wraps around, we are done.
281 		 */
282 		ptep = find_pte(probe_va, &pte_physaddr, l, 1);
283 		if (ptep == NULL)
284 			bop_panic("kbm_probe: find_pte returned NULL");
285 		if (kbm_pae_support)
286 			pte_val = *ptep;
287 		else
288 			pte_val = *((x86pte32_t *)ptep);
289 		if (!PTE_ISVALID(pte_val)) {
290 			probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l);
291 			if (probe_va <= *va)
292 				return (0);
293 			goto restart_new_va;
294 		}
295 
296 		/*
297 		 * If this entry is a pointer to a lower level page table
298 		 * go down to it.
299 		 */
300 		if (!PTE_ISPAGE(pte_val, l)) {
301 			ASSERT(l > 0);
302 			--l;
303 			continue;
304 		}
305 
306 		/*
307 		 * We found a boot level page table entry
308 		 */
309 		*len = BOOT_SZ(l);
310 		*va = probe_va & ~(*len - 1);
311 		*pfn = PTE2PFN(pte_val, l);
312 
313 
314 		*prot = PROT_READ | PROT_EXEC;
315 		if (PTE_GET(pte_val, PT_WRITABLE))
316 			*prot |= PROT_WRITE;
317 
318 		/*
319 		 * pt_nx is cleared if processor doesn't support NX bit
320 		 */
321 		if (PTE_GET(pte_val, mmu.pt_nx))
322 			*prot &= ~PROT_EXEC;
323 
324 		return (1);
325 	}
326 }
327 
328 
329 /*
330  * Destroy a boot loader page table 4K mapping.
331  */
332 void
333 kbm_unmap(uintptr_t va)
334 {
335 	if (khat_running)
336 		panic("kbm_unmap() called too late");
337 	else {
338 #ifdef __xpv
339 		(void) HYPERVISOR_update_va_mapping(va, 0,
340 		    UVMF_INVLPG | UVMF_LOCAL);
341 #else
342 		x86pte_t *ptep;
343 		level_t	level = 0;
344 		uint_t  probe_only = 1;
345 
346 		ptep = find_pte(va, NULL, level, probe_only);
347 		if (ptep == NULL)
348 			return;
349 
350 		if (kbm_pae_support)
351 			*ptep = 0;
352 		else
353 			*((x86pte32_t *)ptep) = 0;
354 		mmu_invlpg((caddr_t)va);
355 #endif
356 	}
357 }
358 
359 
360 /*
361  * Change a boot loader page table 4K mapping.
362  * Returns the pfn of the old mapping.
363  */
364 pfn_t
365 kbm_remap(uintptr_t va, pfn_t pfn)
366 {
367 	x86pte_t *ptep;
368 	level_t	level = 0;
369 	uint_t  probe_only = 1;
370 	x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE |
371 	    PT_NOCONSIST | PT_VALID;
372 	x86pte_t old_pte;
373 
374 	if (khat_running)
375 		panic("kbm_remap() called too late");
376 	ptep = find_pte(va, NULL, level, probe_only);
377 	if (ptep == NULL)
378 		bop_panic("kbm_remap: find_pte returned NULL");
379 
380 	if (kbm_pae_support)
381 		old_pte = *ptep;
382 	else
383 		old_pte = *((x86pte32_t *)ptep);
384 
385 #ifdef __xpv
386 	if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
387 		bop_panic("HYPERVISOR_update_va_mapping() failed");
388 #else
389 	if (kbm_pae_support)
390 		*((x86pte_t *)ptep) = pte_val;
391 	else
392 		*((x86pte32_t *)ptep) = pte_val;
393 	mmu_invlpg((caddr_t)va);
394 #endif
395 
396 	if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1)
397 		return (PFN_INVALID);
398 	return (mmu_btop(ma_to_pa(old_pte)));
399 }
400 
401 
402 /*
403  * Change a boot loader page table 4K mapping to read only.
404  */
405 void
406 kbm_read_only(uintptr_t va, paddr_t pa)
407 {
408 	x86pte_t pte_val = pa_to_ma(pa) |
409 	    PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID;
410 
411 #ifdef __xpv
412 	if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
413 		bop_panic("HYPERVISOR_update_va_mapping() failed");
414 #else
415 	x86pte_t *ptep;
416 	level_t	level = 0;
417 
418 	ptep = find_pte(va, NULL, level, 0);
419 	if (ptep == NULL)
420 		bop_panic("kbm_read_only: find_pte returned NULL");
421 
422 	if (kbm_pae_support)
423 		*ptep = pte_val;
424 	else
425 		*((x86pte32_t *)ptep) = pte_val;
426 	mmu_invlpg((caddr_t)va);
427 #endif
428 }
429 
430 /*
431  * interfaces for kernel debugger to access physical memory
432  */
433 static x86pte_t save_pte;
434 
435 void *
436 kbm_push(paddr_t pa)
437 {
438 	static int first_time = 1;
439 
440 	if (first_time) {
441 		first_time = 0;
442 		return (window);
443 	}
444 
445 	if (kbm_pae_support)
446 		save_pte = *((x86pte_t *)pte_to_window);
447 	else
448 		save_pte = *((x86pte32_t *)pte_to_window);
449 	return (kbm_remap_window(pa, 0));
450 }
451 
452 void
453 kbm_pop(void)
454 {
455 #ifdef __xpv
456 	if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte,
457 	    UVMF_INVLPG | UVMF_LOCAL) < 0)
458 		bop_panic("HYPERVISOR_update_va_mapping() failed");
459 #else
460 	if (kbm_pae_support)
461 		*((x86pte_t *)pte_to_window) = save_pte;
462 	else
463 		*((x86pte32_t *)pte_to_window) = save_pte;
464 	mmu_invlpg(window);
465 #endif
466 }
467 
468 x86pte_t
469 get_pteval(paddr_t table, uint_t index)
470 {
471 	void *table_ptr = kbm_remap_window(table, 0);
472 
473 	if (kbm_pae_support)
474 		return (((x86pte_t *)table_ptr)[index]);
475 	return (((x86pte32_t *)table_ptr)[index]);
476 }
477 
478 #ifndef __xpv
479 void
480 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
481 {
482 	void *table_ptr = kbm_remap_window(table, 0);
483 	if (kbm_pae_support)
484 		((x86pte_t *)table_ptr)[index] = pteval;
485 	else
486 		((x86pte32_t *)table_ptr)[index] = pteval;
487 	if (level == top_level && level == 2)
488 		reload_cr3();
489 }
490 #endif
491 
492 paddr_t
493 make_ptable(x86pte_t *pteval, uint_t level)
494 {
495 	paddr_t new_table;
496 	void *table_ptr;
497 
498 	new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE);
499 	table_ptr = kbm_remap_window(new_table, 1);
500 	bzero(table_ptr, MMU_PAGESIZE);
501 #ifdef __xpv
502 	/* Remove write permission to the new page table.  */
503 	(void) kbm_remap_window(new_table, 0);
504 #endif
505 
506 	if (level == top_level && level == 2)
507 		*pteval = pa_to_ma(new_table) | PT_VALID;
508 	else
509 		*pteval = pa_to_ma(new_table) |
510 		    PT_VALID | PT_REF | PT_USER | PT_WRITABLE;
511 
512 	return (new_table);
513 }
514 
515 x86pte_t *
516 map_pte(paddr_t table, uint_t index)
517 {
518 	void *table_ptr = kbm_remap_window(table, 0);
519 	return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size));
520 }
521