xref: /titanic_44/usr/src/uts/i86pc/vm/kboot_mmu.c (revision f169c0eae91b2ee787cf8d6dcf8edd9159d4c9e2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/systm.h>
29 #include <sys/archsystm.h>
30 #include <sys/debug.h>
31 #include <sys/bootconf.h>
32 #include <sys/bootsvcs.h>
33 #include <sys/bootinfo.h>
34 #include <sys/mman.h>
35 #include <sys/cmn_err.h>
36 #include <sys/param.h>
37 #include <sys/machparam.h>
38 #include <sys/machsystm.h>
39 #include <sys/promif.h>
40 #include <sys/kobj.h>
41 #ifdef __xpv
42 #include <sys/hypervisor.h>
43 #endif
44 #include <vm/kboot_mmu.h>
45 #include <vm/hat_pte.h>
46 #include <vm/hat_i86.h>
47 #include <vm/seg_kmem.h>
48 
49 #if 0
50 /*
51  * Joe's debug printing
52  */
53 #define	DBG(x)    \
54 	bop_printf(NULL, "kboot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x));
55 #else
56 #define	DBG(x)	/* naught */
57 #endif
58 
59 /*
60  * Page table and memory stuff.
61  */
62 static caddr_t window;
63 static caddr_t pte_to_window;
64 
65 /*
66  * this are needed by mmu_init()
67  */
68 int kbm_nx_support = 0;		/* NX bit in PTEs is in use */
69 int kbm_pae_support = 0;	/* PAE is 64 bit Page table entries */
70 int kbm_pge_support = 0;	/* PGE is Page table global bit enabled */
71 int kbm_largepage_support = 0;
72 uint_t kbm_nucleus_size = 0;
73 
74 #define	BOOT_SHIFT(l)	(shift_amt[l])
75 #define	BOOT_SZ(l)	((size_t)1 << BOOT_SHIFT(l))
76 #define	BOOT_OFFSET(l)	(BOOT_SZ(l) - 1)
77 #define	BOOT_MASK(l)	(~BOOT_OFFSET(l))
78 
79 /*
80  * Initialize memory management parameters for boot time page table management
81  */
82 void
83 kbm_init(struct xboot_info *bi)
84 {
85 	/*
86 	 * configure mmu information
87 	 */
88 	kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size;
89 	kbm_largepage_support = bi->bi_use_largepage;
90 	kbm_nx_support = bi->bi_use_nx;
91 	kbm_pae_support = bi->bi_use_pae;
92 	kbm_pge_support = bi->bi_use_pge;
93 	window = bi->bi_pt_window;
94 	DBG(window);
95 	pte_to_window = bi->bi_pte_to_pt_window;
96 	DBG(pte_to_window);
97 	if (kbm_pae_support) {
98 		shift_amt = shift_amt_pae;
99 		ptes_per_table = 512;
100 		pte_size = 8;
101 		lpagesize = TWO_MEG;
102 #ifdef __amd64
103 		top_level = 3;
104 #else
105 		top_level = 2;
106 #endif
107 	} else {
108 		shift_amt = shift_amt_nopae;
109 		ptes_per_table = 1024;
110 		pte_size = 4;
111 		lpagesize = FOUR_MEG;
112 		top_level = 1;
113 	}
114 
115 #ifdef __xpv
116 	xen_info = bi->bi_xen_start_info;
117 	mfn_list = (mfn_t *)xen_info->mfn_list;
118 	DBG(mfn_list);
119 	mfn_count = xen_info->nr_pages;
120 	DBG(mfn_count);
121 #endif
122 	top_page_table = bi->bi_top_page_table;
123 	DBG(top_page_table);
124 }
125 
126 /*
127  * Change the addressible page table window to point at a given page
128  */
129 /*ARGSUSED*/
130 void *
131 kbm_remap_window(paddr_t physaddr, int writeable)
132 {
133 	x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE;
134 
135 	DBG(physaddr);
136 
137 #ifdef __xpv
138 	if (!writeable)
139 		pt_bits &= ~PT_WRITABLE;
140 	if (HYPERVISOR_update_va_mapping((uintptr_t)window,
141 	    pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0)
142 		bop_panic("HYPERVISOR_update_va_mapping() failed");
143 #else
144 	if (kbm_pae_support)
145 		*((x86pte_t *)pte_to_window) = physaddr | pt_bits;
146 	else
147 		*((x86pte32_t *)pte_to_window) = physaddr | pt_bits;
148 	mmu_tlbflush_entry(window);
149 #endif
150 	DBG(window);
151 	return (window);
152 }
153 
154 /*
155  * Add a mapping for the physical page at the given virtual address.
156  */
157 void
158 kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel)
159 {
160 	x86pte_t *ptep;
161 	paddr_t pte_physaddr;
162 	x86pte_t pteval;
163 
164 	if (khat_running)
165 		panic("kbm_map() called too late");
166 
167 	pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE;
168 	if (level >= 1)
169 		pteval |= PT_PAGESIZE;
170 	if (kbm_pge_support && is_kernel)
171 		pteval |= PT_GLOBAL;
172 
173 #ifdef __xpv
174 	/*
175 	 * try update_va_mapping first - fails if page table is missing.
176 	 */
177 	if (HYPERVISOR_update_va_mapping(va, pteval,
178 	    UVMF_INVLPG | UVMF_LOCAL) == 0)
179 		return;
180 #endif
181 
182 	/*
183 	 * Find the pte that will map this address. This creates any
184 	 * missing intermediate level page tables.
185 	 */
186 	ptep = find_pte(va, &pte_physaddr, level, 0);
187 	if (ptep == NULL)
188 		bop_panic("kbm_map: find_pte returned NULL");
189 
190 #ifdef __xpv
191 	if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
192 		bop_panic("HYPERVISOR_update_va_mapping() failed");
193 #else
194 	if (kbm_pae_support)
195 		*ptep = pteval;
196 	else
197 		*((x86pte32_t *)ptep) = pteval;
198 	mmu_tlbflush_entry((caddr_t)va);
199 #endif
200 }
201 
202 #ifdef __xpv
203 
204 /*
205  * Add a mapping for the machine page at the given virtual address.
206  */
207 void
208 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
209 {
210 	paddr_t pte_physaddr;
211 	x86pte_t pteval;
212 
213 	pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE;
214 	if (level == 1)
215 		pteval |= PT_PAGESIZE;
216 
217 	/*
218 	 * try update_va_mapping first - fails if page table is missing.
219 	 */
220 	if (HYPERVISOR_update_va_mapping(va,
221 	    pteval, UVMF_INVLPG | UVMF_LOCAL) == 0)
222 		return;
223 
224 	/*
225 	 * Find the pte that will map this address. This creates any
226 	 * missing intermediate level page tables
227 	 */
228 	(void) find_pte(va, &pte_physaddr, level, 0);
229 
230 	if (HYPERVISOR_update_va_mapping(va,
231 	    pteval, UVMF_INVLPG | UVMF_LOCAL) != 0)
232 		bop_panic("HYPERVISOR_update_va_mapping failed");
233 }
234 
235 #endif /* __xpv */
236 
237 
238 /*
239  * Probe the boot time page tables to find the first mapping
240  * including va (or higher) and return non-zero if one is found.
241  * va is updated to the starting address and len to the pagesize.
242  * pp will be set to point to the 1st page_t of the mapped page(s).
243  *
244  * Note that if va is in the middle of a large page, the returned va
245  * will be less than what was asked for.
246  */
247 int
248 kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot)
249 {
250 	uintptr_t	probe_va;
251 	x86pte_t	*ptep;
252 	paddr_t		pte_physaddr;
253 	x86pte_t	pte_val;
254 	level_t		l;
255 
256 	if (khat_running)
257 		panic("kbm_probe() called too late");
258 	*len = 0;
259 	*pfn = PFN_INVALID;
260 	*prot = 0;
261 	probe_va = *va;
262 restart_new_va:
263 	l = top_level;
264 	for (;;) {
265 		if (IN_VA_HOLE(probe_va))
266 			probe_va = mmu.hole_end;
267 
268 		if (IN_HYPERVISOR_VA(probe_va))
269 #if defined(__amd64) && defined(__xpv)
270 			probe_va = HYPERVISOR_VIRT_END;
271 #else
272 			return (0);
273 #endif
274 
275 		/*
276 		 * If we don't have a valid PTP/PTE at this level
277 		 * then we can bump VA by this level's pagesize and try again.
278 		 * When the probe_va wraps around, we are done.
279 		 */
280 		ptep = find_pte(probe_va, &pte_physaddr, l, 1);
281 		if (ptep == NULL)
282 			bop_panic("kbm_probe: find_pte returned NULL");
283 		if (kbm_pae_support)
284 			pte_val = *ptep;
285 		else
286 			pte_val = *((x86pte32_t *)ptep);
287 		if (!PTE_ISVALID(pte_val)) {
288 			probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l);
289 			if (probe_va <= *va)
290 				return (0);
291 			goto restart_new_va;
292 		}
293 
294 		/*
295 		 * If this entry is a pointer to a lower level page table
296 		 * go down to it.
297 		 */
298 		if (!PTE_ISPAGE(pte_val, l)) {
299 			ASSERT(l > 0);
300 			--l;
301 			continue;
302 		}
303 
304 		/*
305 		 * We found a boot level page table entry
306 		 */
307 		*len = BOOT_SZ(l);
308 		*va = probe_va & ~(*len - 1);
309 		*pfn = PTE2PFN(pte_val, l);
310 
311 
312 		*prot = PROT_READ | PROT_EXEC;
313 		if (PTE_GET(pte_val, PT_WRITABLE))
314 			*prot |= PROT_WRITE;
315 
316 		/*
317 		 * pt_nx is cleared if processor doesn't support NX bit
318 		 */
319 		if (PTE_GET(pte_val, mmu.pt_nx))
320 			*prot &= ~PROT_EXEC;
321 
322 		return (1);
323 	}
324 }
325 
326 
327 /*
328  * Destroy a boot loader page table 4K mapping.
329  */
330 void
331 kbm_unmap(uintptr_t va)
332 {
333 	if (khat_running)
334 		panic("kbm_unmap() called too late");
335 	else {
336 #ifdef __xpv
337 		(void) HYPERVISOR_update_va_mapping(va, 0,
338 		    UVMF_INVLPG | UVMF_LOCAL);
339 #else
340 		x86pte_t *ptep;
341 		level_t	level = 0;
342 		uint_t  probe_only = 1;
343 
344 		ptep = find_pte(va, NULL, level, probe_only);
345 		if (ptep == NULL)
346 			return;
347 
348 		if (kbm_pae_support)
349 			*ptep = 0;
350 		else
351 			*((x86pte32_t *)ptep) = 0;
352 		mmu_tlbflush_entry((caddr_t)va);
353 #endif
354 	}
355 }
356 
357 
358 /*
359  * Change a boot loader page table 4K mapping.
360  * Returns the pfn of the old mapping.
361  */
362 pfn_t
363 kbm_remap(uintptr_t va, pfn_t pfn)
364 {
365 	x86pte_t *ptep;
366 	level_t	level = 0;
367 	uint_t  probe_only = 1;
368 	x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE |
369 	    PT_NOCONSIST | PT_VALID;
370 	x86pte_t old_pte;
371 
372 	if (khat_running)
373 		panic("kbm_remap() called too late");
374 	ptep = find_pte(va, NULL, level, probe_only);
375 	if (ptep == NULL)
376 		bop_panic("kbm_remap: find_pte returned NULL");
377 
378 	if (kbm_pae_support)
379 		old_pte = *ptep;
380 	else
381 		old_pte = *((x86pte32_t *)ptep);
382 
383 #ifdef __xpv
384 	if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
385 		bop_panic("HYPERVISOR_update_va_mapping() failed");
386 #else
387 	if (kbm_pae_support)
388 		*((x86pte_t *)ptep) = pte_val;
389 	else
390 		*((x86pte32_t *)ptep) = pte_val;
391 	mmu_tlbflush_entry((caddr_t)va);
392 #endif
393 
394 	if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1)
395 		return (PFN_INVALID);
396 	return (mmu_btop(ma_to_pa(old_pte)));
397 }
398 
399 
400 /*
401  * Change a boot loader page table 4K mapping to read only.
402  */
403 void
404 kbm_read_only(uintptr_t va, paddr_t pa)
405 {
406 	x86pte_t pte_val = pa_to_ma(pa) |
407 	    PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID;
408 
409 #ifdef __xpv
410 	if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
411 		bop_panic("HYPERVISOR_update_va_mapping() failed");
412 #else
413 	x86pte_t *ptep;
414 	level_t	level = 0;
415 
416 	ptep = find_pte(va, NULL, level, 0);
417 	if (ptep == NULL)
418 		bop_panic("kbm_read_only: find_pte returned NULL");
419 
420 	if (kbm_pae_support)
421 		*ptep = pte_val;
422 	else
423 		*((x86pte32_t *)ptep) = pte_val;
424 	mmu_tlbflush_entry((caddr_t)va);
425 #endif
426 }
427 
428 /*
429  * interfaces for kernel debugger to access physical memory
430  */
431 static x86pte_t save_pte;
432 
433 void *
434 kbm_push(paddr_t pa)
435 {
436 	static int first_time = 1;
437 
438 	if (first_time) {
439 		first_time = 0;
440 		return (window);
441 	}
442 
443 	if (kbm_pae_support)
444 		save_pte = *((x86pte_t *)pte_to_window);
445 	else
446 		save_pte = *((x86pte32_t *)pte_to_window);
447 	return (kbm_remap_window(pa, 0));
448 }
449 
450 void
451 kbm_pop(void)
452 {
453 #ifdef __xpv
454 	if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte,
455 	    UVMF_INVLPG | UVMF_LOCAL) < 0)
456 		bop_panic("HYPERVISOR_update_va_mapping() failed");
457 #else
458 	if (kbm_pae_support)
459 		*((x86pte_t *)pte_to_window) = save_pte;
460 	else
461 		*((x86pte32_t *)pte_to_window) = save_pte;
462 	mmu_tlbflush_entry(window);
463 #endif
464 }
465 
466 x86pte_t
467 get_pteval(paddr_t table, uint_t index)
468 {
469 	void *table_ptr = kbm_remap_window(table, 0);
470 
471 	if (kbm_pae_support)
472 		return (((x86pte_t *)table_ptr)[index]);
473 	return (((x86pte32_t *)table_ptr)[index]);
474 }
475 
476 #ifndef __xpv
477 void
478 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
479 {
480 	void *table_ptr = kbm_remap_window(table, 0);
481 	if (kbm_pae_support)
482 		((x86pte_t *)table_ptr)[index] = pteval;
483 	else
484 		((x86pte32_t *)table_ptr)[index] = pteval;
485 	if (level == top_level && level == 2)
486 		reload_cr3();
487 }
488 #endif
489 
490 paddr_t
491 make_ptable(x86pte_t *pteval, uint_t level)
492 {
493 	paddr_t new_table;
494 	void *table_ptr;
495 
496 	new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE);
497 	table_ptr = kbm_remap_window(new_table, 1);
498 	bzero(table_ptr, MMU_PAGESIZE);
499 #ifdef __xpv
500 	/* Remove write permission to the new page table.  */
501 	(void) kbm_remap_window(new_table, 0);
502 #endif
503 
504 	if (level == top_level && level == 2)
505 		*pteval = pa_to_ma(new_table) | PT_VALID;
506 	else
507 		*pteval = pa_to_ma(new_table) |
508 		    PT_VALID | PT_REF | PT_USER | PT_WRITABLE;
509 
510 	return (new_table);
511 }
512 
513 x86pte_t *
514 map_pte(paddr_t table, uint_t index)
515 {
516 	void *table_ptr = kbm_remap_window(table, 0);
517 	return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size));
518 }
519