xref: /illumos-gate/usr/src/uts/i86pc/vm/kboot_mmu.c (revision 1fa2a66491e7d8ae0be84e7da4da8e812480c710)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright 2018 Joyent, Inc.
27  */
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/archsystm.h>
32 #include <sys/debug.h>
33 #include <sys/bootconf.h>
34 #include <sys/bootsvcs.h>
35 #include <sys/bootinfo.h>
36 #include <sys/mman.h>
37 #include <sys/cmn_err.h>
38 #include <sys/param.h>
39 #include <sys/machparam.h>
40 #include <sys/machsystm.h>
41 #include <sys/promif.h>
42 #include <sys/kobj.h>
43 #ifdef __xpv
44 #include <sys/hypervisor.h>
45 #endif
46 #include <vm/kboot_mmu.h>
47 #include <vm/hat_pte.h>
48 #include <vm/hat_i86.h>
49 #include <vm/seg_kmem.h>
50 
51 #if 0
52 /*
53  * Joe's debug printing
54  */
55 #define	DBG(x)    \
56 	bop_printf(NULL, "kboot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x));
57 #else
58 #define	DBG(x)	/* naught */
59 #endif
60 
61 /*
62  * Page table and memory stuff.
63  */
64 static caddr_t window;
65 static caddr_t pte_to_window;
66 
67 /*
68  * this are needed by mmu_init()
69  */
70 int kbm_nx_support = 0;		/* NX bit in PTEs is in use */
71 int kbm_pae_support = 0;	/* PAE is 64 bit Page table entries */
72 int kbm_pge_support = 0;	/* PGE is Page table global bit enabled */
73 int kbm_largepage_support = 0;
74 uint_t kbm_nucleus_size = 0;
75 
76 #define	BOOT_SHIFT(l)	(shift_amt[l])
77 #define	BOOT_SZ(l)	((size_t)1 << BOOT_SHIFT(l))
78 #define	BOOT_OFFSET(l)	(BOOT_SZ(l) - 1)
79 #define	BOOT_MASK(l)	(~BOOT_OFFSET(l))
80 
81 /*
82  * Initialize memory management parameters for boot time page table management
83  */
84 void
85 kbm_init(struct xboot_info *bi)
86 {
87 	/*
88 	 * configure mmu information
89 	 */
90 	kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size;
91 	kbm_largepage_support = bi->bi_use_largepage;
92 	kbm_nx_support = bi->bi_use_nx;
93 	kbm_pae_support = bi->bi_use_pae;
94 	kbm_pge_support = bi->bi_use_pge;
95 	window = bi->bi_pt_window;
96 	DBG(window);
97 	pte_to_window = bi->bi_pte_to_pt_window;
98 	DBG(pte_to_window);
99 	if (kbm_pae_support) {
100 		shift_amt = shift_amt_pae;
101 		ptes_per_table = 512;
102 		pte_size = 8;
103 		lpagesize = TWO_MEG;
104 		top_level = 3;
105 	} else {
106 		shift_amt = shift_amt_nopae;
107 		ptes_per_table = 1024;
108 		pte_size = 4;
109 		lpagesize = FOUR_MEG;
110 		top_level = 1;
111 	}
112 
113 #ifdef __xpv
114 	xen_info = bi->bi_xen_start_info;
115 	mfn_list = (mfn_t *)xen_info->mfn_list;
116 	DBG(mfn_list);
117 	mfn_count = xen_info->nr_pages;
118 	DBG(mfn_count);
119 #endif
120 	top_page_table = bi->bi_top_page_table;
121 	DBG(top_page_table);
122 }
123 
124 /*
125  * Change the addressible page table window to point at a given page
126  */
127 /*ARGSUSED*/
128 void *
129 kbm_remap_window(paddr_t physaddr, int writeable)
130 {
131 	x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE;
132 
133 	DBG(physaddr);
134 
135 #ifdef __xpv
136 	if (!writeable)
137 		pt_bits &= ~PT_WRITABLE;
138 	if (HYPERVISOR_update_va_mapping((uintptr_t)window,
139 	    pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0)
140 		bop_panic("HYPERVISOR_update_va_mapping() failed");
141 #else
142 	if (kbm_pae_support)
143 		*((x86pte_t *)pte_to_window) = physaddr | pt_bits;
144 	else
145 		*((x86pte32_t *)pte_to_window) = physaddr | pt_bits;
146 	mmu_invlpg(window);
147 #endif
148 	DBG(window);
149 	return (window);
150 }
151 
152 /*
153  * Add a mapping for the physical page at the given virtual address.
154  */
155 void
156 kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel)
157 {
158 	x86pte_t *ptep;
159 	paddr_t pte_physaddr;
160 	x86pte_t pteval;
161 
162 	if (khat_running)
163 		panic("kbm_map() called too late");
164 
165 	pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE;
166 	if (level >= 1)
167 		pteval |= PT_PAGESIZE;
168 	if (kbm_pge_support && is_kernel)
169 		pteval |= PT_GLOBAL;
170 
171 #ifdef __xpv
172 	/*
173 	 * try update_va_mapping first - fails if page table is missing.
174 	 */
175 	if (HYPERVISOR_update_va_mapping(va, pteval,
176 	    UVMF_INVLPG | UVMF_LOCAL) == 0)
177 		return;
178 #endif
179 
180 	/*
181 	 * Find the pte that will map this address. This creates any
182 	 * missing intermediate level page tables.
183 	 */
184 	ptep = find_pte(va, &pte_physaddr, level, 0);
185 	if (ptep == NULL)
186 		bop_panic("kbm_map: find_pte returned NULL");
187 
188 #ifdef __xpv
189 	if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
190 		bop_panic("HYPERVISOR_update_va_mapping() failed");
191 #else
192 	if (kbm_pae_support)
193 		*ptep = pteval;
194 	else
195 		*((x86pte32_t *)ptep) = pteval;
196 	mmu_invlpg((caddr_t)va);
197 #endif
198 }
199 
200 #ifdef __xpv
201 
202 /*
203  * Add a mapping for the machine page at the given virtual address.
204  */
205 void
206 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
207 {
208 	paddr_t pte_physaddr;
209 	x86pte_t pteval;
210 
211 	pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE;
212 	if (level == 1)
213 		pteval |= PT_PAGESIZE;
214 
215 	/*
216 	 * try update_va_mapping first - fails if page table is missing.
217 	 */
218 	if (HYPERVISOR_update_va_mapping(va,
219 	    pteval, UVMF_INVLPG | UVMF_LOCAL) == 0)
220 		return;
221 
222 	/*
223 	 * Find the pte that will map this address. This creates any
224 	 * missing intermediate level page tables
225 	 */
226 	(void) find_pte(va, &pte_physaddr, level, 0);
227 
228 	if (HYPERVISOR_update_va_mapping(va,
229 	    pteval, UVMF_INVLPG | UVMF_LOCAL) != 0)
230 		bop_panic("HYPERVISOR_update_va_mapping failed");
231 }
232 
233 #endif /* __xpv */
234 
235 
236 /*
237  * Probe the boot time page tables to find the first mapping
238  * including va (or higher) and return non-zero if one is found.
239  * va is updated to the starting address and len to the pagesize.
240  * pp will be set to point to the 1st page_t of the mapped page(s).
241  *
242  * Note that if va is in the middle of a large page, the returned va
243  * will be less than what was asked for.
244  */
245 int
246 kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot)
247 {
248 	uintptr_t	probe_va;
249 	x86pte_t	*ptep;
250 	paddr_t		pte_physaddr;
251 	x86pte_t	pte_val;
252 	level_t		l;
253 
254 	if (khat_running)
255 		panic("kbm_probe() called too late");
256 	*len = 0;
257 	*pfn = PFN_INVALID;
258 	*prot = 0;
259 	probe_va = *va;
260 restart_new_va:
261 	l = top_level;
262 	for (;;) {
263 		if (IN_VA_HOLE(probe_va))
264 			probe_va = mmu.hole_end;
265 
266 		if (IN_HYPERVISOR_VA(probe_va))
267 #if defined(__xpv)
268 			probe_va = HYPERVISOR_VIRT_END;
269 #else
270 			return (0);
271 #endif
272 
273 		/*
274 		 * If we don't have a valid PTP/PTE at this level
275 		 * then we can bump VA by this level's pagesize and try again.
276 		 * When the probe_va wraps around, we are done.
277 		 */
278 		ptep = find_pte(probe_va, &pte_physaddr, l, 1);
279 		if (ptep == NULL)
280 			bop_panic("kbm_probe: find_pte returned NULL");
281 		if (kbm_pae_support)
282 			pte_val = *ptep;
283 		else
284 			pte_val = *((x86pte32_t *)ptep);
285 		if (!PTE_ISVALID(pte_val)) {
286 			probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l);
287 			if (probe_va <= *va)
288 				return (0);
289 			goto restart_new_va;
290 		}
291 
292 		/*
293 		 * If this entry is a pointer to a lower level page table
294 		 * go down to it.
295 		 */
296 		if (!PTE_ISPAGE(pte_val, l)) {
297 			ASSERT(l > 0);
298 			--l;
299 			continue;
300 		}
301 
302 		/*
303 		 * We found a boot level page table entry
304 		 */
305 		*len = BOOT_SZ(l);
306 		*va = probe_va & ~(*len - 1);
307 		*pfn = PTE2PFN(pte_val, l);
308 
309 
310 		*prot = PROT_READ | PROT_EXEC;
311 		if (PTE_GET(pte_val, PT_WRITABLE))
312 			*prot |= PROT_WRITE;
313 
314 		/*
315 		 * pt_nx is cleared if processor doesn't support NX bit
316 		 */
317 		if (PTE_GET(pte_val, mmu.pt_nx))
318 			*prot &= ~PROT_EXEC;
319 
320 		return (1);
321 	}
322 }
323 
324 
325 /*
326  * Destroy a boot loader page table 4K mapping.
327  */
328 void
329 kbm_unmap(uintptr_t va)
330 {
331 	if (khat_running)
332 		panic("kbm_unmap() called too late");
333 	else {
334 #ifdef __xpv
335 		(void) HYPERVISOR_update_va_mapping(va, 0,
336 		    UVMF_INVLPG | UVMF_LOCAL);
337 #else
338 		x86pte_t *ptep;
339 		level_t	level = 0;
340 		uint_t  probe_only = 1;
341 
342 		ptep = find_pte(va, NULL, level, probe_only);
343 		if (ptep == NULL)
344 			return;
345 
346 		if (kbm_pae_support)
347 			*ptep = 0;
348 		else
349 			*((x86pte32_t *)ptep) = 0;
350 		mmu_invlpg((caddr_t)va);
351 #endif
352 	}
353 }
354 
355 
356 /*
357  * Change a boot loader page table 4K mapping.
358  * Returns the pfn of the old mapping.
359  */
360 pfn_t
361 kbm_remap(uintptr_t va, pfn_t pfn)
362 {
363 	x86pte_t *ptep;
364 	level_t	level = 0;
365 	uint_t  probe_only = 1;
366 	x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE |
367 	    PT_NOCONSIST | PT_VALID;
368 	x86pte_t old_pte;
369 
370 	if (khat_running)
371 		panic("kbm_remap() called too late");
372 	ptep = find_pte(va, NULL, level, probe_only);
373 	if (ptep == NULL)
374 		bop_panic("kbm_remap: find_pte returned NULL");
375 
376 	if (kbm_pae_support)
377 		old_pte = *ptep;
378 	else
379 		old_pte = *((x86pte32_t *)ptep);
380 
381 #ifdef __xpv
382 	if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
383 		bop_panic("HYPERVISOR_update_va_mapping() failed");
384 #else
385 	if (kbm_pae_support)
386 		*((x86pte_t *)ptep) = pte_val;
387 	else
388 		*((x86pte32_t *)ptep) = pte_val;
389 	mmu_invlpg((caddr_t)va);
390 #endif
391 
392 	if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1)
393 		return (PFN_INVALID);
394 	return (mmu_btop(ma_to_pa(old_pte)));
395 }
396 
397 
398 /*
399  * Change a boot loader page table 4K mapping to read only.
400  */
401 void
402 kbm_read_only(uintptr_t va, paddr_t pa)
403 {
404 	x86pte_t pte_val = pa_to_ma(pa) |
405 	    PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID;
406 
407 #ifdef __xpv
408 	if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
409 		bop_panic("HYPERVISOR_update_va_mapping() failed");
410 #else
411 	x86pte_t *ptep;
412 	level_t	level = 0;
413 
414 	ptep = find_pte(va, NULL, level, 0);
415 	if (ptep == NULL)
416 		bop_panic("kbm_read_only: find_pte returned NULL");
417 
418 	if (kbm_pae_support)
419 		*ptep = pte_val;
420 	else
421 		*((x86pte32_t *)ptep) = pte_val;
422 	mmu_invlpg((caddr_t)va);
423 #endif
424 }
425 
426 /*
427  * interfaces for kernel debugger to access physical memory
428  */
429 static x86pte_t save_pte;
430 
431 void *
432 kbm_push(paddr_t pa)
433 {
434 	static int first_time = 1;
435 
436 	if (first_time) {
437 		first_time = 0;
438 		return (window);
439 	}
440 
441 	if (kbm_pae_support)
442 		save_pte = *((x86pte_t *)pte_to_window);
443 	else
444 		save_pte = *((x86pte32_t *)pte_to_window);
445 	return (kbm_remap_window(pa, 0));
446 }
447 
448 void
449 kbm_pop(void)
450 {
451 #ifdef __xpv
452 	if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte,
453 	    UVMF_INVLPG | UVMF_LOCAL) < 0)
454 		bop_panic("HYPERVISOR_update_va_mapping() failed");
455 #else
456 	if (kbm_pae_support)
457 		*((x86pte_t *)pte_to_window) = save_pte;
458 	else
459 		*((x86pte32_t *)pte_to_window) = save_pte;
460 	mmu_invlpg(window);
461 #endif
462 }
463 
464 x86pte_t
465 get_pteval(paddr_t table, uint_t index)
466 {
467 	void *table_ptr = kbm_remap_window(table, 0);
468 
469 	if (kbm_pae_support)
470 		return (((x86pte_t *)table_ptr)[index]);
471 	return (((x86pte32_t *)table_ptr)[index]);
472 }
473 
474 #ifndef __xpv
475 void
476 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
477 {
478 	void *table_ptr = kbm_remap_window(table, 0);
479 	if (kbm_pae_support)
480 		((x86pte_t *)table_ptr)[index] = pteval;
481 	else
482 		((x86pte32_t *)table_ptr)[index] = pteval;
483 	if (level == top_level && level == 2)
484 		reload_cr3();
485 }
486 #endif
487 
488 paddr_t
489 make_ptable(x86pte_t *pteval, uint_t level)
490 {
491 	paddr_t new_table;
492 	void *table_ptr;
493 
494 	new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE);
495 	table_ptr = kbm_remap_window(new_table, 1);
496 	bzero(table_ptr, MMU_PAGESIZE);
497 #ifdef __xpv
498 	/* Remove write permission to the new page table.  */
499 	(void) kbm_remap_window(new_table, 0);
500 #endif
501 
502 	if (level == top_level && level == 2)
503 		*pteval = pa_to_ma(new_table) | PT_VALID;
504 	else
505 		*pteval = pa_to_ma(new_table) |
506 		    PT_VALID | PT_REF | PT_USER | PT_WRITABLE;
507 
508 	return (new_table);
509 }
510 
511 x86pte_t *
512 map_pte(paddr_t table, uint_t index)
513 {
514 	void *table_ptr = kbm_remap_window(table, 0);
515 	return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size));
516 }
517