1ae115bc7Smrj /*
2ae115bc7Smrj * CDDL HEADER START
3ae115bc7Smrj *
4ae115bc7Smrj * The contents of this file are subject to the terms of the
5ae115bc7Smrj * Common Development and Distribution License (the "License").
6ae115bc7Smrj * You may not use this file except in compliance with the License.
7ae115bc7Smrj *
8ae115bc7Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ae115bc7Smrj * or http://www.opensolaris.org/os/licensing.
10ae115bc7Smrj * See the License for the specific language governing permissions
11ae115bc7Smrj * and limitations under the License.
12ae115bc7Smrj *
13ae115bc7Smrj * When distributing Covered Code, include this CDDL HEADER in each
14ae115bc7Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ae115bc7Smrj * If applicable, add the following below this CDDL HEADER, with the
16ae115bc7Smrj * fields enclosed by brackets "[]" replaced with your own identifying
17ae115bc7Smrj * information: Portions Copyright [yyyy] [name of copyright owner]
18ae115bc7Smrj *
19ae115bc7Smrj * CDDL HEADER END
20ae115bc7Smrj */
21ae115bc7Smrj /*
22*56f33205SJonathan Adams * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23ae115bc7Smrj * Use is subject to license terms.
24ae115bc7Smrj */
25ae115bc7Smrj
26ae115bc7Smrj #include <sys/t_lock.h>
27ae115bc7Smrj #include <sys/memlist.h>
28ae115bc7Smrj #include <sys/cpuvar.h>
29ae115bc7Smrj #include <sys/vmem.h>
30ae115bc7Smrj #include <sys/mman.h>
31ae115bc7Smrj #include <sys/vm.h>
32ae115bc7Smrj #include <sys/kmem.h>
33ae115bc7Smrj #include <sys/cmn_err.h>
34ae115bc7Smrj #include <sys/debug.h>
35ae115bc7Smrj #include <sys/vm_machparam.h>
36ae115bc7Smrj #include <sys/tss.h>
37ae115bc7Smrj #include <sys/vnode.h>
38ae115bc7Smrj #include <vm/hat.h>
39ae115bc7Smrj #include <vm/anon.h>
40ae115bc7Smrj #include <vm/as.h>
41ae115bc7Smrj #include <vm/page.h>
42ae115bc7Smrj #include <vm/seg.h>
43ae115bc7Smrj #include <vm/seg_kmem.h>
44ae115bc7Smrj #include <vm/seg_map.h>
45ae115bc7Smrj #include <vm/hat_i86.h>
46ae115bc7Smrj #include <sys/promif.h>
47ae115bc7Smrj #include <sys/x86_archext.h>
48ae115bc7Smrj #include <sys/systm.h>
49ae115bc7Smrj #include <sys/archsystm.h>
50ae115bc7Smrj #include <sys/sunddi.h>
51ae115bc7Smrj #include <sys/ddidmareq.h>
52ae115bc7Smrj #include <sys/controlregs.h>
53ae115bc7Smrj #include <sys/reboot.h>
54ae115bc7Smrj #include <sys/kdi.h>
55ae115bc7Smrj #include <sys/bootconf.h>
56ae115bc7Smrj #include <sys/bootsvcs.h>
57ae115bc7Smrj #include <sys/bootinfo.h>
58ae115bc7Smrj #include <vm/kboot_mmu.h>
59ae115bc7Smrj
60843e1988Sjohnlev #ifdef __xpv
61843e1988Sjohnlev #include <sys/hypervisor.h>
62843e1988Sjohnlev #endif
63843e1988Sjohnlev
64ae115bc7Smrj caddr_t
i86devmap(pfn_t pf,pgcnt_t pgcnt,uint_t prot)65ae115bc7Smrj i86devmap(pfn_t pf, pgcnt_t pgcnt, uint_t prot)
66ae115bc7Smrj {
67ae115bc7Smrj caddr_t addr;
68ae115bc7Smrj caddr_t addr1;
69ae115bc7Smrj page_t *pp;
70ae115bc7Smrj
71ae115bc7Smrj addr1 = addr = vmem_alloc(heap_arena, mmu_ptob(pgcnt), VM_SLEEP);
72ae115bc7Smrj
73ae115bc7Smrj for (; pgcnt != 0; addr += MMU_PAGESIZE, ++pf, --pgcnt) {
74ae115bc7Smrj pp = page_numtopp_nolock(pf);
75ae115bc7Smrj if (pp == NULL) {
76ae115bc7Smrj hat_devload(kas.a_hat, addr, MMU_PAGESIZE, pf,
77ae115bc7Smrj prot | HAT_NOSYNC, HAT_LOAD_LOCK);
78ae115bc7Smrj } else {
79ae115bc7Smrj hat_memload(kas.a_hat, addr, pp,
80ae115bc7Smrj prot | HAT_NOSYNC, HAT_LOAD_LOCK);
81ae115bc7Smrj }
82ae115bc7Smrj }
83ae115bc7Smrj
84ae115bc7Smrj return (addr1);
85ae115bc7Smrj }
86ae115bc7Smrj
87ae115bc7Smrj /*
88ae115bc7Smrj * This routine is like page_numtopp, but accepts only free pages, which
89ae115bc7Smrj * it allocates (unfrees) and returns with the exclusive lock held.
90ae115bc7Smrj * It is used by machdep.c/dma_init() to find contiguous free pages.
91ae115bc7Smrj *
92ae115bc7Smrj * XXX this and some others should probably be in vm_machdep.c
93ae115bc7Smrj */
94ae115bc7Smrj page_t *
page_numtopp_alloc(pfn_t pfnum)95ae115bc7Smrj page_numtopp_alloc(pfn_t pfnum)
96ae115bc7Smrj {
97ae115bc7Smrj page_t *pp;
98ae115bc7Smrj
99ae115bc7Smrj retry:
100ae115bc7Smrj pp = page_numtopp_nolock(pfnum);
101ae115bc7Smrj if (pp == NULL) {
102ae115bc7Smrj return (NULL);
103ae115bc7Smrj }
104ae115bc7Smrj
105ae115bc7Smrj if (!page_trylock(pp, SE_EXCL)) {
106ae115bc7Smrj return (NULL);
107ae115bc7Smrj }
108ae115bc7Smrj
109ae115bc7Smrj if (page_pptonum(pp) != pfnum) {
110ae115bc7Smrj page_unlock(pp);
111ae115bc7Smrj goto retry;
112ae115bc7Smrj }
113ae115bc7Smrj
114ae115bc7Smrj if (!PP_ISFREE(pp)) {
115ae115bc7Smrj page_unlock(pp);
116ae115bc7Smrj return (NULL);
117ae115bc7Smrj }
118ae115bc7Smrj if (pp->p_szc) {
119ae115bc7Smrj page_demote_free_pages(pp);
120ae115bc7Smrj page_unlock(pp);
121ae115bc7Smrj goto retry;
122ae115bc7Smrj }
123ae115bc7Smrj
124ae115bc7Smrj /* If associated with a vnode, destroy mappings */
125ae115bc7Smrj
126ae115bc7Smrj if (pp->p_vnode) {
127ae115bc7Smrj
128ae115bc7Smrj page_destroy_free(pp);
129ae115bc7Smrj
130ae115bc7Smrj if (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_NO_RECLAIM)) {
131ae115bc7Smrj return (NULL);
132ae115bc7Smrj }
133ae115bc7Smrj
134ae115bc7Smrj if (page_pptonum(pp) != pfnum) {
135ae115bc7Smrj page_unlock(pp);
136ae115bc7Smrj goto retry;
137ae115bc7Smrj }
138ae115bc7Smrj }
139ae115bc7Smrj
1401d03c31eSjohnlev if (!PP_ISFREE(pp)) {
141ae115bc7Smrj page_unlock(pp);
142ae115bc7Smrj return (NULL);
143ae115bc7Smrj }
144ae115bc7Smrj
1451d03c31eSjohnlev if (!page_reclaim(pp, (kmutex_t *)NULL))
1461d03c31eSjohnlev return (NULL);
1471d03c31eSjohnlev
148ae115bc7Smrj return (pp);
149ae115bc7Smrj }
150ae115bc7Smrj
151ae115bc7Smrj /*
152ae115bc7Smrj * Flag is not set early in boot. Once it is set we are no longer
153ae115bc7Smrj * using boot's page tables.
154ae115bc7Smrj */
155ae115bc7Smrj uint_t khat_running = 0;
156ae115bc7Smrj
157ae115bc7Smrj /*
158ae115bc7Smrj * This procedure is callable only while the boot loader is in charge of the
159ae115bc7Smrj * MMU. It assumes that PA == VA for page table pointers. It doesn't live in
160ae115bc7Smrj * kboot_mmu.c since it's used from common code.
161ae115bc7Smrj */
162ae115bc7Smrj pfn_t
va_to_pfn(void * vaddr)163ae115bc7Smrj va_to_pfn(void *vaddr)
164ae115bc7Smrj {
165ae115bc7Smrj uintptr_t des_va = ALIGN2PAGE(vaddr);
166ae115bc7Smrj uintptr_t va = des_va;
167ae115bc7Smrj size_t len;
168ae115bc7Smrj uint_t prot;
169ae115bc7Smrj pfn_t pfn;
170ae115bc7Smrj
171ae115bc7Smrj if (khat_running)
172ae115bc7Smrj panic("va_to_pfn(): called too late\n");
173ae115bc7Smrj
174ae115bc7Smrj if (kbm_probe(&va, &len, &pfn, &prot) == 0)
175ae115bc7Smrj return (PFN_INVALID);
176ae115bc7Smrj if (va > des_va)
177ae115bc7Smrj return (PFN_INVALID);
178ae115bc7Smrj if (va < des_va)
179ae115bc7Smrj pfn += mmu_btop(des_va - va);
180ae115bc7Smrj return (pfn);
181ae115bc7Smrj }
182ae115bc7Smrj
183ae115bc7Smrj /*
184ae115bc7Smrj * Initialize a special area in the kernel that always holds some PTEs for
185ae115bc7Smrj * faster performance. This always holds segmap's PTEs.
186ae115bc7Smrj * In the 32 bit kernel this maps the kernel heap too.
187ae115bc7Smrj */
188ae115bc7Smrj void
hat_kmap_init(uintptr_t base,size_t len)189ae115bc7Smrj hat_kmap_init(uintptr_t base, size_t len)
190ae115bc7Smrj {
191ae115bc7Smrj uintptr_t map_addr; /* base rounded down to large page size */
192ae115bc7Smrj uintptr_t map_eaddr; /* base + len rounded up */
193ae115bc7Smrj size_t map_len;
194ae115bc7Smrj caddr_t ptes; /* mapping area in kernel for kmap ptes */
195ae115bc7Smrj size_t window_size; /* size of mapping area for ptes */
196ae115bc7Smrj ulong_t htable_cnt; /* # of page tables to cover map_len */
197ae115bc7Smrj ulong_t i;
198ae115bc7Smrj htable_t *ht;
199ae115bc7Smrj uintptr_t va;
200ae115bc7Smrj
201ae115bc7Smrj /*
202ae115bc7Smrj * We have to map in an area that matches an entire page table.
203843e1988Sjohnlev * The PTEs are large page aligned to avoid spurious pagefaults
204843e1988Sjohnlev * on the hypervisor.
205ae115bc7Smrj */
206ae115bc7Smrj map_addr = base & LEVEL_MASK(1);
207ae115bc7Smrj map_eaddr = (base + len + LEVEL_SIZE(1) - 1) & LEVEL_MASK(1);
208ae115bc7Smrj map_len = map_eaddr - map_addr;
209ae115bc7Smrj window_size = mmu_btop(map_len) * mmu.pte_size;
210ae115bc7Smrj window_size = (window_size + LEVEL_SIZE(1)) & LEVEL_MASK(1);
211ae115bc7Smrj htable_cnt = map_len >> LEVEL_SHIFT(1);
212ae115bc7Smrj
213ae115bc7Smrj /*
214ae115bc7Smrj * allocate vmem for the kmap_ptes
215ae115bc7Smrj */
216ae115bc7Smrj ptes = vmem_xalloc(heap_arena, window_size, LEVEL_SIZE(1), 0,
217ae115bc7Smrj 0, NULL, NULL, VM_SLEEP);
218ae115bc7Smrj mmu.kmap_htables =
219ae115bc7Smrj kmem_alloc(htable_cnt * sizeof (htable_t *), KM_SLEEP);
220ae115bc7Smrj
221ae115bc7Smrj /*
222ae115bc7Smrj * Map the page tables that cover kmap into the allocated range.
223ae115bc7Smrj * Note we don't ever htable_release() the kmap page tables - they
224ae115bc7Smrj * can't ever be stolen, freed, etc.
225ae115bc7Smrj */
226ae115bc7Smrj for (va = map_addr, i = 0; i < htable_cnt; va += LEVEL_SIZE(1), ++i) {
227ae115bc7Smrj ht = htable_create(kas.a_hat, va, 0, NULL);
228ae115bc7Smrj if (ht == NULL)
229ae115bc7Smrj panic("hat_kmap_init: ht == NULL");
230ae115bc7Smrj mmu.kmap_htables[i] = ht;
231ae115bc7Smrj
232ae115bc7Smrj hat_devload(kas.a_hat, ptes + i * MMU_PAGESIZE,
233ae115bc7Smrj MMU_PAGESIZE, ht->ht_pfn,
234843e1988Sjohnlev #ifdef __xpv
235843e1988Sjohnlev PROT_READ | HAT_NOSYNC | HAT_UNORDERED_OK,
236843e1988Sjohnlev #else
237ae115bc7Smrj PROT_READ | PROT_WRITE | HAT_NOSYNC | HAT_UNORDERED_OK,
238843e1988Sjohnlev #endif
239ae115bc7Smrj HAT_LOAD | HAT_LOAD_NOCONSIST);
240ae115bc7Smrj }
241ae115bc7Smrj
242ae115bc7Smrj /*
243ae115bc7Smrj * set information in mmu to activate handling of kmap
244ae115bc7Smrj */
245ae115bc7Smrj mmu.kmap_addr = map_addr;
246ae115bc7Smrj mmu.kmap_eaddr = map_eaddr;
247ae115bc7Smrj mmu.kmap_ptes = (x86pte_t *)ptes;
248ae115bc7Smrj }
249ae115bc7Smrj
250ae115bc7Smrj extern caddr_t kpm_vbase;
251ae115bc7Smrj extern size_t kpm_size;
252ae115bc7Smrj
253843e1988Sjohnlev #ifdef __xpv
254843e1988Sjohnlev /*
255843e1988Sjohnlev * Create the initial segkpm mappings for the hypervisor. To avoid having
256843e1988Sjohnlev * to deal with page tables being read only, we make all mappings
257843e1988Sjohnlev * read only at first.
258843e1988Sjohnlev */
259843e1988Sjohnlev static void
xen_kpm_create(paddr_t paddr,level_t lvl)260843e1988Sjohnlev xen_kpm_create(paddr_t paddr, level_t lvl)
261843e1988Sjohnlev {
262843e1988Sjohnlev ulong_t pg_off;
263843e1988Sjohnlev
264843e1988Sjohnlev for (pg_off = 0; pg_off < LEVEL_SIZE(lvl); pg_off += MMU_PAGESIZE) {
265843e1988Sjohnlev kbm_map((uintptr_t)kpm_vbase + paddr, (paddr_t)0, 0, 1);
266843e1988Sjohnlev kbm_read_only((uintptr_t)kpm_vbase + paddr + pg_off,
267843e1988Sjohnlev paddr + pg_off);
268843e1988Sjohnlev }
269843e1988Sjohnlev }
270843e1988Sjohnlev
271843e1988Sjohnlev /*
272843e1988Sjohnlev * Try to make all kpm mappings writable. Failures are ok, as those
273843e1988Sjohnlev * are just pagetable, GDT, etc. pages.
274843e1988Sjohnlev */
275843e1988Sjohnlev static void
xen_kpm_finish_init(void)276843e1988Sjohnlev xen_kpm_finish_init(void)
277843e1988Sjohnlev {
278843e1988Sjohnlev pfn_t gdtpfn = mmu_btop(CPU->cpu_m.mcpu_gdtpa);
279843e1988Sjohnlev pfn_t pfn;
280843e1988Sjohnlev page_t *pp;
281843e1988Sjohnlev
282843e1988Sjohnlev for (pfn = 0; pfn < mfn_count; ++pfn) {
283843e1988Sjohnlev /*
284843e1988Sjohnlev * skip gdt
285843e1988Sjohnlev */
286843e1988Sjohnlev if (pfn == gdtpfn)
287843e1988Sjohnlev continue;
288843e1988Sjohnlev
289843e1988Sjohnlev /*
290843e1988Sjohnlev * p_index is a hint that this is a pagetable
291843e1988Sjohnlev */
292843e1988Sjohnlev pp = page_numtopp_nolock(pfn);
293843e1988Sjohnlev if (pp && pp->p_index) {
294843e1988Sjohnlev pp->p_index = 0;
295843e1988Sjohnlev continue;
296843e1988Sjohnlev }
297843e1988Sjohnlev (void) xen_kpm_page(pfn, PT_VALID | PT_WRITABLE);
298843e1988Sjohnlev }
299843e1988Sjohnlev }
300843e1988Sjohnlev #endif
301843e1988Sjohnlev
302ae115bc7Smrj /*
303ae115bc7Smrj * Routine to pre-allocate data structures for hat_kern_setup(). It computes
304ae115bc7Smrj * how many pagetables it needs by walking the boot loader's page tables.
305ae115bc7Smrj */
306ae115bc7Smrj /*ARGSUSED*/
307ae115bc7Smrj void
hat_kern_alloc(caddr_t segmap_base,size_t segmap_size,caddr_t ekernelheap)308ae115bc7Smrj hat_kern_alloc(
309ae115bc7Smrj caddr_t segmap_base,
310ae115bc7Smrj size_t segmap_size,
311ae115bc7Smrj caddr_t ekernelheap)
312ae115bc7Smrj {
313ae115bc7Smrj uintptr_t last_va = (uintptr_t)-1; /* catch 1st time */
314ae115bc7Smrj uintptr_t va = 0;
315ae115bc7Smrj size_t size;
316ae115bc7Smrj pfn_t pfn;
317ae115bc7Smrj uint_t prot;
318ae115bc7Smrj uint_t table_cnt = 1;
319ae115bc7Smrj uint_t mapping_cnt;
320ae115bc7Smrj level_t start_level;
321ae115bc7Smrj level_t l;
322ae115bc7Smrj struct memlist *pmem;
323ae115bc7Smrj level_t lpagel = mmu.max_page_level;
324ae115bc7Smrj uint64_t paddr;
325ae115bc7Smrj int64_t psize;
326843e1988Sjohnlev int nwindows;
327ae115bc7Smrj
328ae115bc7Smrj if (kpm_size > 0) {
329ae115bc7Smrj /*
330843e1988Sjohnlev * Create the kpm page tables. When running on the
331843e1988Sjohnlev * hypervisor these are made read/only at first.
332843e1988Sjohnlev * Later we'll add write permission where possible.
333ae115bc7Smrj */
334*56f33205SJonathan Adams for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
335*56f33205SJonathan Adams paddr = pmem->ml_address;
336*56f33205SJonathan Adams psize = pmem->ml_size;
337ae115bc7Smrj while (psize >= MMU_PAGESIZE) {
33802bc52beSkchow /* find the largest page size */
33902bc52beSkchow for (l = lpagel; l > 0; l--) {
34002bc52beSkchow if ((paddr & LEVEL_OFFSET(l)) == 0 &&
34102bc52beSkchow psize > LEVEL_SIZE(l))
34202bc52beSkchow break;
34302bc52beSkchow }
34402bc52beSkchow
345843e1988Sjohnlev #if defined(__xpv)
346843e1988Sjohnlev /*
347843e1988Sjohnlev * Create read/only mappings to avoid
348843e1988Sjohnlev * conflicting with pagetable usage
349843e1988Sjohnlev */
350843e1988Sjohnlev xen_kpm_create(paddr, l);
351843e1988Sjohnlev #else
352ae115bc7Smrj kbm_map((uintptr_t)kpm_vbase + paddr, paddr,
353ae115bc7Smrj l, 1);
354843e1988Sjohnlev #endif
355ae115bc7Smrj paddr += LEVEL_SIZE(l);
356ae115bc7Smrj psize -= LEVEL_SIZE(l);
357ae115bc7Smrj }
358ae115bc7Smrj }
359843e1988Sjohnlev }
360843e1988Sjohnlev
361843e1988Sjohnlev /*
362843e1988Sjohnlev * If this machine doesn't have a kpm segment, we need to allocate
363843e1988Sjohnlev * a small number of 'windows' which can be used to map pagetables.
364843e1988Sjohnlev */
365843e1988Sjohnlev nwindows = (kpm_size == 0) ? 2 * NCPU : 0;
366843e1988Sjohnlev
367843e1988Sjohnlev #if defined(__xpv)
368843e1988Sjohnlev /*
369843e1988Sjohnlev * On a hypervisor, these windows are also used by the xpv_panic
370843e1988Sjohnlev * code, where we need one window for each level of the pagetable
371843e1988Sjohnlev * hierarchy.
372843e1988Sjohnlev */
373843e1988Sjohnlev nwindows = MAX(nwindows, mmu.max_level);
374843e1988Sjohnlev #endif
375843e1988Sjohnlev
376843e1988Sjohnlev if (nwindows != 0) {
377ae115bc7Smrj /*
378ae115bc7Smrj * Create the page windows and 1 page of VA in
379ae115bc7Smrj * which we map the PTEs of those windows.
380ae115bc7Smrj */
381843e1988Sjohnlev mmu.pwin_base = vmem_xalloc(heap_arena, nwindows * MMU_PAGESIZE,
382ae115bc7Smrj LEVEL_SIZE(1), 0, 0, NULL, NULL, VM_SLEEP);
383843e1988Sjohnlev ASSERT(nwindows <= MMU_PAGESIZE / mmu.pte_size);
384ae115bc7Smrj mmu.pwin_pte_va = vmem_xalloc(heap_arena, MMU_PAGESIZE,
385ae115bc7Smrj MMU_PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP);
386ae115bc7Smrj
387ae115bc7Smrj /*
388ae115bc7Smrj * Find/Create the page table window mappings.
389ae115bc7Smrj */
390ae115bc7Smrj paddr = 0;
391ae115bc7Smrj (void) find_pte((uintptr_t)mmu.pwin_base, &paddr, 0, 0);
392ae115bc7Smrj ASSERT(paddr != 0);
393ae115bc7Smrj ASSERT((paddr & MMU_PAGEOFFSET) == 0);
394ae115bc7Smrj mmu.pwin_pte_pa = paddr;
395843e1988Sjohnlev #ifdef __xpv
396843e1988Sjohnlev (void) find_pte((uintptr_t)mmu.pwin_pte_va, NULL, 0, 0);
397843e1988Sjohnlev kbm_read_only((uintptr_t)mmu.pwin_pte_va, mmu.pwin_pte_pa);
398843e1988Sjohnlev #else
399ae115bc7Smrj kbm_map((uintptr_t)mmu.pwin_pte_va, mmu.pwin_pte_pa, 0, 1);
400843e1988Sjohnlev #endif
401ae115bc7Smrj }
402ae115bc7Smrj
403ae115bc7Smrj /*
404ae115bc7Smrj * Walk the boot loader's page tables and figure out
405ae115bc7Smrj * how many tables and page mappings there will be.
406ae115bc7Smrj */
407ae115bc7Smrj while (kbm_probe(&va, &size, &pfn, &prot) != 0) {
408ae115bc7Smrj /*
409ae115bc7Smrj * At each level, if the last_va falls into a new htable,
410ae115bc7Smrj * increment table_cnt. We can stop at the 1st level where
411ae115bc7Smrj * they are in the same htable.
412ae115bc7Smrj */
413ae115bc7Smrj start_level = 0;
41402bc52beSkchow while (start_level <= mmu.max_page_level) {
41502bc52beSkchow if (size == LEVEL_SIZE(start_level))
41602bc52beSkchow break;
41702bc52beSkchow start_level++;
41802bc52beSkchow }
419ae115bc7Smrj
420ae115bc7Smrj for (l = start_level; l < mmu.max_level; ++l) {
421ae115bc7Smrj if (va >> LEVEL_SHIFT(l + 1) ==
422ae115bc7Smrj last_va >> LEVEL_SHIFT(l + 1))
423ae115bc7Smrj break;
424ae115bc7Smrj ++table_cnt;
425ae115bc7Smrj }
426ae115bc7Smrj last_va = va;
42702bc52beSkchow l = (start_level == 0) ? 1 : start_level;
42802bc52beSkchow va = (va & LEVEL_MASK(l)) + LEVEL_SIZE(l);
429ae115bc7Smrj }
430ae115bc7Smrj
431ae115bc7Smrj /*
432ae115bc7Smrj * Besides the boot loader mappings, we're going to fill in
433ae115bc7Smrj * the entire top level page table for the kernel. Make sure there's
434ae115bc7Smrj * enough reserve for that too.
435ae115bc7Smrj */
436ae115bc7Smrj table_cnt += mmu.top_level_count - ((kernelbase >>
437ae115bc7Smrj LEVEL_SHIFT(mmu.max_level)) & (mmu.top_level_count - 1));
438ae115bc7Smrj
439ae115bc7Smrj #if defined(__i386)
440ae115bc7Smrj /*
441ae115bc7Smrj * The 32 bit PAE hat allocates tables one level below the top when
442ae115bc7Smrj * kernelbase isn't 1 Gig aligned. We'll just be sloppy and allocate
443ae115bc7Smrj * a bunch more to the reserve. Any unused will be returned later.
444ae115bc7Smrj * Note we've already counted these mappings, just not the extra
445ae115bc7Smrj * pagetables.
446ae115bc7Smrj */
447ae115bc7Smrj if (mmu.pae_hat != 0 && (kernelbase & LEVEL_OFFSET(mmu.max_level)) != 0)
448ae115bc7Smrj table_cnt += mmu.ptes_per_table -
449ae115bc7Smrj ((kernelbase & LEVEL_OFFSET(mmu.max_level)) >>
450ae115bc7Smrj LEVEL_SHIFT(mmu.max_level - 1));
451ae115bc7Smrj #endif
452ae115bc7Smrj
453ae115bc7Smrj /*
454ae115bc7Smrj * Add 1/4 more into table_cnt for extra slop. The unused
455ae115bc7Smrj * slop is freed back when we htable_adjust_reserve() later.
456ae115bc7Smrj */
457ae115bc7Smrj table_cnt += table_cnt >> 2;
458ae115bc7Smrj
459ae115bc7Smrj /*
460ae115bc7Smrj * We only need mapping entries (hments) for shared pages.
461ae115bc7Smrj * This should be far, far fewer than the total possible,
462ae115bc7Smrj * We'll allocate enough for 1/16 of all possible PTEs.
463ae115bc7Smrj */
464ae115bc7Smrj mapping_cnt = (table_cnt * mmu.ptes_per_table) >> 4;
465ae115bc7Smrj
466ae115bc7Smrj /*
467ae115bc7Smrj * Now create the initial htable/hment reserves
468ae115bc7Smrj */
469ae115bc7Smrj htable_initial_reserve(table_cnt);
470ae115bc7Smrj hment_reserve(mapping_cnt);
471ae115bc7Smrj x86pte_cpu_init(CPU);
472ae115bc7Smrj }
473ae115bc7Smrj
474ae115bc7Smrj
475ae115bc7Smrj /*
476ae115bc7Smrj * This routine handles the work of creating the kernel's initial mappings
477ae115bc7Smrj * by deciphering the mappings in the page tables created by the boot program.
478ae115bc7Smrj *
479ae115bc7Smrj * We maintain large page mappings, but only to a level 1 pagesize.
480ae115bc7Smrj * The boot loader can only add new mappings once this function starts.
481ae115bc7Smrj * In particular it can not change the pagesize used for any existing
482ae115bc7Smrj * mappings or this code breaks!
483ae115bc7Smrj */
484ae115bc7Smrj
485ae115bc7Smrj void
hat_kern_setup(void)486ae115bc7Smrj hat_kern_setup(void)
487ae115bc7Smrj {
488ae115bc7Smrj /*
489ae115bc7Smrj * Attach htables to the existing pagetables
490ae115bc7Smrj */
491843e1988Sjohnlev /* BEGIN CSTYLED */
492ae115bc7Smrj htable_attach(kas.a_hat, 0, mmu.max_level, NULL,
493843e1988Sjohnlev #ifdef __xpv
494843e1988Sjohnlev mmu_btop(xen_info->pt_base - ONE_GIG));
495843e1988Sjohnlev #else
496ae115bc7Smrj mmu_btop(getcr3()));
497843e1988Sjohnlev #endif
498843e1988Sjohnlev /* END CSTYLED */
499ae115bc7Smrj
500843e1988Sjohnlev #if defined(__i386) && !defined(__xpv)
5010cfdb603Sjosephb CPU->cpu_tss->tss_cr3 = dftss0->tss_cr3 = getcr3();
502ae115bc7Smrj #endif /* __i386 */
503ae115bc7Smrj
504843e1988Sjohnlev #if defined(__xpv) && defined(__amd64)
505843e1988Sjohnlev /*
506843e1988Sjohnlev * Try to make the kpm mappings r/w. Failures here are OK, as
507843e1988Sjohnlev * it's probably just a pagetable
508843e1988Sjohnlev */
509843e1988Sjohnlev xen_kpm_finish_init();
510843e1988Sjohnlev #endif
511843e1988Sjohnlev
512ae115bc7Smrj /*
513ae115bc7Smrj * The kernel HAT is now officially open for business.
514ae115bc7Smrj */
515ae115bc7Smrj khat_running = 1;
516ae115bc7Smrj
517ae115bc7Smrj CPUSET_ATOMIC_ADD(kas.a_hat->hat_cpus, CPU->cpu_id);
518ae115bc7Smrj CPU->cpu_current_hat = kas.a_hat;
519ae115bc7Smrj }
520