1ae115bc7Smrj /* 2ae115bc7Smrj * CDDL HEADER START 3ae115bc7Smrj * 4ae115bc7Smrj * The contents of this file are subject to the terms of the 5ae115bc7Smrj * Common Development and Distribution License (the "License"). 6ae115bc7Smrj * You may not use this file except in compliance with the License. 7ae115bc7Smrj * 8ae115bc7Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9ae115bc7Smrj * or http://www.opensolaris.org/os/licensing. 10ae115bc7Smrj * See the License for the specific language governing permissions 11ae115bc7Smrj * and limitations under the License. 12ae115bc7Smrj * 13ae115bc7Smrj * When distributing Covered Code, include this CDDL HEADER in each 14ae115bc7Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15ae115bc7Smrj * If applicable, add the following below this CDDL HEADER, with the 16ae115bc7Smrj * fields enclosed by brackets "[]" replaced with your own identifying 17ae115bc7Smrj * information: Portions Copyright [yyyy] [name of copyright owner] 18ae115bc7Smrj * 19ae115bc7Smrj * CDDL HEADER END 20ae115bc7Smrj */ 21ae115bc7Smrj /* 22*56f33205SJonathan Adams * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23ae115bc7Smrj * Use is subject to license terms. 24ae115bc7Smrj */ 25ae115bc7Smrj 26ae115bc7Smrj #include <sys/t_lock.h> 27ae115bc7Smrj #include <sys/memlist.h> 28ae115bc7Smrj #include <sys/cpuvar.h> 29ae115bc7Smrj #include <sys/vmem.h> 30ae115bc7Smrj #include <sys/mman.h> 31ae115bc7Smrj #include <sys/vm.h> 32ae115bc7Smrj #include <sys/kmem.h> 33ae115bc7Smrj #include <sys/cmn_err.h> 34ae115bc7Smrj #include <sys/debug.h> 35ae115bc7Smrj #include <sys/vm_machparam.h> 36ae115bc7Smrj #include <sys/tss.h> 37ae115bc7Smrj #include <sys/vnode.h> 38ae115bc7Smrj #include <vm/hat.h> 39ae115bc7Smrj #include <vm/anon.h> 40ae115bc7Smrj #include <vm/as.h> 41ae115bc7Smrj #include <vm/page.h> 42ae115bc7Smrj #include <vm/seg.h> 43ae115bc7Smrj #include <vm/seg_kmem.h> 44ae115bc7Smrj #include <vm/seg_map.h> 45ae115bc7Smrj #include <vm/hat_i86.h> 46ae115bc7Smrj #include <sys/promif.h> 47ae115bc7Smrj #include <sys/x86_archext.h> 48ae115bc7Smrj #include <sys/systm.h> 49ae115bc7Smrj #include <sys/archsystm.h> 50ae115bc7Smrj #include <sys/sunddi.h> 51ae115bc7Smrj #include <sys/ddidmareq.h> 52ae115bc7Smrj #include <sys/controlregs.h> 53ae115bc7Smrj #include <sys/reboot.h> 54ae115bc7Smrj #include <sys/kdi.h> 55ae115bc7Smrj #include <sys/bootconf.h> 56ae115bc7Smrj #include <sys/bootsvcs.h> 57ae115bc7Smrj #include <sys/bootinfo.h> 58ae115bc7Smrj #include <vm/kboot_mmu.h> 59ae115bc7Smrj 60843e1988Sjohnlev #ifdef __xpv 61843e1988Sjohnlev #include <sys/hypervisor.h> 62843e1988Sjohnlev #endif 63843e1988Sjohnlev 64ae115bc7Smrj caddr_t 65ae115bc7Smrj i86devmap(pfn_t pf, pgcnt_t pgcnt, uint_t prot) 66ae115bc7Smrj { 67ae115bc7Smrj caddr_t addr; 68ae115bc7Smrj caddr_t addr1; 69ae115bc7Smrj page_t *pp; 70ae115bc7Smrj 71ae115bc7Smrj addr1 = addr = vmem_alloc(heap_arena, mmu_ptob(pgcnt), VM_SLEEP); 72ae115bc7Smrj 73ae115bc7Smrj for (; pgcnt != 0; addr += MMU_PAGESIZE, ++pf, --pgcnt) { 74ae115bc7Smrj pp = page_numtopp_nolock(pf); 75ae115bc7Smrj if (pp == NULL) { 76ae115bc7Smrj hat_devload(kas.a_hat, addr, MMU_PAGESIZE, pf, 77ae115bc7Smrj prot | HAT_NOSYNC, HAT_LOAD_LOCK); 78ae115bc7Smrj } else { 79ae115bc7Smrj hat_memload(kas.a_hat, addr, pp, 80ae115bc7Smrj prot | HAT_NOSYNC, HAT_LOAD_LOCK); 81ae115bc7Smrj } 82ae115bc7Smrj } 83ae115bc7Smrj 84ae115bc7Smrj return (addr1); 85ae115bc7Smrj } 86ae115bc7Smrj 87ae115bc7Smrj /* 88ae115bc7Smrj * This routine is like page_numtopp, but accepts only free pages, which 89ae115bc7Smrj * it allocates (unfrees) and returns with the exclusive lock held. 90ae115bc7Smrj * It is used by machdep.c/dma_init() to find contiguous free pages. 91ae115bc7Smrj * 92ae115bc7Smrj * XXX this and some others should probably be in vm_machdep.c 93ae115bc7Smrj */ 94ae115bc7Smrj page_t * 95ae115bc7Smrj page_numtopp_alloc(pfn_t pfnum) 96ae115bc7Smrj { 97ae115bc7Smrj page_t *pp; 98ae115bc7Smrj 99ae115bc7Smrj retry: 100ae115bc7Smrj pp = page_numtopp_nolock(pfnum); 101ae115bc7Smrj if (pp == NULL) { 102ae115bc7Smrj return (NULL); 103ae115bc7Smrj } 104ae115bc7Smrj 105ae115bc7Smrj if (!page_trylock(pp, SE_EXCL)) { 106ae115bc7Smrj return (NULL); 107ae115bc7Smrj } 108ae115bc7Smrj 109ae115bc7Smrj if (page_pptonum(pp) != pfnum) { 110ae115bc7Smrj page_unlock(pp); 111ae115bc7Smrj goto retry; 112ae115bc7Smrj } 113ae115bc7Smrj 114ae115bc7Smrj if (!PP_ISFREE(pp)) { 115ae115bc7Smrj page_unlock(pp); 116ae115bc7Smrj return (NULL); 117ae115bc7Smrj } 118ae115bc7Smrj if (pp->p_szc) { 119ae115bc7Smrj page_demote_free_pages(pp); 120ae115bc7Smrj page_unlock(pp); 121ae115bc7Smrj goto retry; 122ae115bc7Smrj } 123ae115bc7Smrj 124ae115bc7Smrj /* If associated with a vnode, destroy mappings */ 125ae115bc7Smrj 126ae115bc7Smrj if (pp->p_vnode) { 127ae115bc7Smrj 128ae115bc7Smrj page_destroy_free(pp); 129ae115bc7Smrj 130ae115bc7Smrj if (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_NO_RECLAIM)) { 131ae115bc7Smrj return (NULL); 132ae115bc7Smrj } 133ae115bc7Smrj 134ae115bc7Smrj if (page_pptonum(pp) != pfnum) { 135ae115bc7Smrj page_unlock(pp); 136ae115bc7Smrj goto retry; 137ae115bc7Smrj } 138ae115bc7Smrj } 139ae115bc7Smrj 1401d03c31eSjohnlev if (!PP_ISFREE(pp)) { 141ae115bc7Smrj page_unlock(pp); 142ae115bc7Smrj return (NULL); 143ae115bc7Smrj } 144ae115bc7Smrj 1451d03c31eSjohnlev if (!page_reclaim(pp, (kmutex_t *)NULL)) 1461d03c31eSjohnlev return (NULL); 1471d03c31eSjohnlev 148ae115bc7Smrj return (pp); 149ae115bc7Smrj } 150ae115bc7Smrj 151ae115bc7Smrj /* 152ae115bc7Smrj * Flag is not set early in boot. Once it is set we are no longer 153ae115bc7Smrj * using boot's page tables. 154ae115bc7Smrj */ 155ae115bc7Smrj uint_t khat_running = 0; 156ae115bc7Smrj 157ae115bc7Smrj /* 158ae115bc7Smrj * This procedure is callable only while the boot loader is in charge of the 159ae115bc7Smrj * MMU. It assumes that PA == VA for page table pointers. It doesn't live in 160ae115bc7Smrj * kboot_mmu.c since it's used from common code. 161ae115bc7Smrj */ 162ae115bc7Smrj pfn_t 163ae115bc7Smrj va_to_pfn(void *vaddr) 164ae115bc7Smrj { 165ae115bc7Smrj uintptr_t des_va = ALIGN2PAGE(vaddr); 166ae115bc7Smrj uintptr_t va = des_va; 167ae115bc7Smrj size_t len; 168ae115bc7Smrj uint_t prot; 169ae115bc7Smrj pfn_t pfn; 170ae115bc7Smrj 171ae115bc7Smrj if (khat_running) 172ae115bc7Smrj panic("va_to_pfn(): called too late\n"); 173ae115bc7Smrj 174ae115bc7Smrj if (kbm_probe(&va, &len, &pfn, &prot) == 0) 175ae115bc7Smrj return (PFN_INVALID); 176ae115bc7Smrj if (va > des_va) 177ae115bc7Smrj return (PFN_INVALID); 178ae115bc7Smrj if (va < des_va) 179ae115bc7Smrj pfn += mmu_btop(des_va - va); 180ae115bc7Smrj return (pfn); 181ae115bc7Smrj } 182ae115bc7Smrj 183ae115bc7Smrj /* 184ae115bc7Smrj * Initialize a special area in the kernel that always holds some PTEs for 185ae115bc7Smrj * faster performance. This always holds segmap's PTEs. 186ae115bc7Smrj * In the 32 bit kernel this maps the kernel heap too. 187ae115bc7Smrj */ 188ae115bc7Smrj void 189ae115bc7Smrj hat_kmap_init(uintptr_t base, size_t len) 190ae115bc7Smrj { 191ae115bc7Smrj uintptr_t map_addr; /* base rounded down to large page size */ 192ae115bc7Smrj uintptr_t map_eaddr; /* base + len rounded up */ 193ae115bc7Smrj size_t map_len; 194ae115bc7Smrj caddr_t ptes; /* mapping area in kernel for kmap ptes */ 195ae115bc7Smrj size_t window_size; /* size of mapping area for ptes */ 196ae115bc7Smrj ulong_t htable_cnt; /* # of page tables to cover map_len */ 197ae115bc7Smrj ulong_t i; 198ae115bc7Smrj htable_t *ht; 199ae115bc7Smrj uintptr_t va; 200ae115bc7Smrj 201ae115bc7Smrj /* 202ae115bc7Smrj * We have to map in an area that matches an entire page table. 203843e1988Sjohnlev * The PTEs are large page aligned to avoid spurious pagefaults 204843e1988Sjohnlev * on the hypervisor. 205ae115bc7Smrj */ 206ae115bc7Smrj map_addr = base & LEVEL_MASK(1); 207ae115bc7Smrj map_eaddr = (base + len + LEVEL_SIZE(1) - 1) & LEVEL_MASK(1); 208ae115bc7Smrj map_len = map_eaddr - map_addr; 209ae115bc7Smrj window_size = mmu_btop(map_len) * mmu.pte_size; 210ae115bc7Smrj window_size = (window_size + LEVEL_SIZE(1)) & LEVEL_MASK(1); 211ae115bc7Smrj htable_cnt = map_len >> LEVEL_SHIFT(1); 212ae115bc7Smrj 213ae115bc7Smrj /* 214ae115bc7Smrj * allocate vmem for the kmap_ptes 215ae115bc7Smrj */ 216ae115bc7Smrj ptes = vmem_xalloc(heap_arena, window_size, LEVEL_SIZE(1), 0, 217ae115bc7Smrj 0, NULL, NULL, VM_SLEEP); 218ae115bc7Smrj mmu.kmap_htables = 219ae115bc7Smrj kmem_alloc(htable_cnt * sizeof (htable_t *), KM_SLEEP); 220ae115bc7Smrj 221ae115bc7Smrj /* 222ae115bc7Smrj * Map the page tables that cover kmap into the allocated range. 223ae115bc7Smrj * Note we don't ever htable_release() the kmap page tables - they 224ae115bc7Smrj * can't ever be stolen, freed, etc. 225ae115bc7Smrj */ 226ae115bc7Smrj for (va = map_addr, i = 0; i < htable_cnt; va += LEVEL_SIZE(1), ++i) { 227ae115bc7Smrj ht = htable_create(kas.a_hat, va, 0, NULL); 228ae115bc7Smrj if (ht == NULL) 229ae115bc7Smrj panic("hat_kmap_init: ht == NULL"); 230ae115bc7Smrj mmu.kmap_htables[i] = ht; 231ae115bc7Smrj 232ae115bc7Smrj hat_devload(kas.a_hat, ptes + i * MMU_PAGESIZE, 233ae115bc7Smrj MMU_PAGESIZE, ht->ht_pfn, 234843e1988Sjohnlev #ifdef __xpv 235843e1988Sjohnlev PROT_READ | HAT_NOSYNC | HAT_UNORDERED_OK, 236843e1988Sjohnlev #else 237ae115bc7Smrj PROT_READ | PROT_WRITE | HAT_NOSYNC | HAT_UNORDERED_OK, 238843e1988Sjohnlev #endif 239ae115bc7Smrj HAT_LOAD | HAT_LOAD_NOCONSIST); 240ae115bc7Smrj } 241ae115bc7Smrj 242ae115bc7Smrj /* 243ae115bc7Smrj * set information in mmu to activate handling of kmap 244ae115bc7Smrj */ 245ae115bc7Smrj mmu.kmap_addr = map_addr; 246ae115bc7Smrj mmu.kmap_eaddr = map_eaddr; 247ae115bc7Smrj mmu.kmap_ptes = (x86pte_t *)ptes; 248ae115bc7Smrj } 249ae115bc7Smrj 250ae115bc7Smrj extern caddr_t kpm_vbase; 251ae115bc7Smrj extern size_t kpm_size; 252ae115bc7Smrj 253843e1988Sjohnlev #ifdef __xpv 254843e1988Sjohnlev /* 255843e1988Sjohnlev * Create the initial segkpm mappings for the hypervisor. To avoid having 256843e1988Sjohnlev * to deal with page tables being read only, we make all mappings 257843e1988Sjohnlev * read only at first. 258843e1988Sjohnlev */ 259843e1988Sjohnlev static void 260843e1988Sjohnlev xen_kpm_create(paddr_t paddr, level_t lvl) 261843e1988Sjohnlev { 262843e1988Sjohnlev ulong_t pg_off; 263843e1988Sjohnlev 264843e1988Sjohnlev for (pg_off = 0; pg_off < LEVEL_SIZE(lvl); pg_off += MMU_PAGESIZE) { 265843e1988Sjohnlev kbm_map((uintptr_t)kpm_vbase + paddr, (paddr_t)0, 0, 1); 266843e1988Sjohnlev kbm_read_only((uintptr_t)kpm_vbase + paddr + pg_off, 267843e1988Sjohnlev paddr + pg_off); 268843e1988Sjohnlev } 269843e1988Sjohnlev } 270843e1988Sjohnlev 271843e1988Sjohnlev /* 272843e1988Sjohnlev * Try to make all kpm mappings writable. Failures are ok, as those 273843e1988Sjohnlev * are just pagetable, GDT, etc. pages. 274843e1988Sjohnlev */ 275843e1988Sjohnlev static void 276843e1988Sjohnlev xen_kpm_finish_init(void) 277843e1988Sjohnlev { 278843e1988Sjohnlev pfn_t gdtpfn = mmu_btop(CPU->cpu_m.mcpu_gdtpa); 279843e1988Sjohnlev pfn_t pfn; 280843e1988Sjohnlev page_t *pp; 281843e1988Sjohnlev 282843e1988Sjohnlev for (pfn = 0; pfn < mfn_count; ++pfn) { 283843e1988Sjohnlev /* 284843e1988Sjohnlev * skip gdt 285843e1988Sjohnlev */ 286843e1988Sjohnlev if (pfn == gdtpfn) 287843e1988Sjohnlev continue; 288843e1988Sjohnlev 289843e1988Sjohnlev /* 290843e1988Sjohnlev * p_index is a hint that this is a pagetable 291843e1988Sjohnlev */ 292843e1988Sjohnlev pp = page_numtopp_nolock(pfn); 293843e1988Sjohnlev if (pp && pp->p_index) { 294843e1988Sjohnlev pp->p_index = 0; 295843e1988Sjohnlev continue; 296843e1988Sjohnlev } 297843e1988Sjohnlev (void) xen_kpm_page(pfn, PT_VALID | PT_WRITABLE); 298843e1988Sjohnlev } 299843e1988Sjohnlev } 300843e1988Sjohnlev #endif 301843e1988Sjohnlev 302ae115bc7Smrj /* 303ae115bc7Smrj * Routine to pre-allocate data structures for hat_kern_setup(). It computes 304ae115bc7Smrj * how many pagetables it needs by walking the boot loader's page tables. 305ae115bc7Smrj */ 306ae115bc7Smrj /*ARGSUSED*/ 307ae115bc7Smrj void 308ae115bc7Smrj hat_kern_alloc( 309ae115bc7Smrj caddr_t segmap_base, 310ae115bc7Smrj size_t segmap_size, 311ae115bc7Smrj caddr_t ekernelheap) 312ae115bc7Smrj { 313ae115bc7Smrj uintptr_t last_va = (uintptr_t)-1; /* catch 1st time */ 314ae115bc7Smrj uintptr_t va = 0; 315ae115bc7Smrj size_t size; 316ae115bc7Smrj pfn_t pfn; 317ae115bc7Smrj uint_t prot; 318ae115bc7Smrj uint_t table_cnt = 1; 319ae115bc7Smrj uint_t mapping_cnt; 320ae115bc7Smrj level_t start_level; 321ae115bc7Smrj level_t l; 322ae115bc7Smrj struct memlist *pmem; 323ae115bc7Smrj level_t lpagel = mmu.max_page_level; 324ae115bc7Smrj uint64_t paddr; 325ae115bc7Smrj int64_t psize; 326843e1988Sjohnlev int nwindows; 327ae115bc7Smrj 328ae115bc7Smrj if (kpm_size > 0) { 329ae115bc7Smrj /* 330843e1988Sjohnlev * Create the kpm page tables. When running on the 331843e1988Sjohnlev * hypervisor these are made read/only at first. 332843e1988Sjohnlev * Later we'll add write permission where possible. 333ae115bc7Smrj */ 334*56f33205SJonathan Adams for (pmem = phys_install; pmem; pmem = pmem->ml_next) { 335*56f33205SJonathan Adams paddr = pmem->ml_address; 336*56f33205SJonathan Adams psize = pmem->ml_size; 337ae115bc7Smrj while (psize >= MMU_PAGESIZE) { 33802bc52beSkchow /* find the largest page size */ 33902bc52beSkchow for (l = lpagel; l > 0; l--) { 34002bc52beSkchow if ((paddr & LEVEL_OFFSET(l)) == 0 && 34102bc52beSkchow psize > LEVEL_SIZE(l)) 34202bc52beSkchow break; 34302bc52beSkchow } 34402bc52beSkchow 345843e1988Sjohnlev #if defined(__xpv) 346843e1988Sjohnlev /* 347843e1988Sjohnlev * Create read/only mappings to avoid 348843e1988Sjohnlev * conflicting with pagetable usage 349843e1988Sjohnlev */ 350843e1988Sjohnlev xen_kpm_create(paddr, l); 351843e1988Sjohnlev #else 352ae115bc7Smrj kbm_map((uintptr_t)kpm_vbase + paddr, paddr, 353ae115bc7Smrj l, 1); 354843e1988Sjohnlev #endif 355ae115bc7Smrj paddr += LEVEL_SIZE(l); 356ae115bc7Smrj psize -= LEVEL_SIZE(l); 357ae115bc7Smrj } 358ae115bc7Smrj } 359843e1988Sjohnlev } 360843e1988Sjohnlev 361843e1988Sjohnlev /* 362843e1988Sjohnlev * If this machine doesn't have a kpm segment, we need to allocate 363843e1988Sjohnlev * a small number of 'windows' which can be used to map pagetables. 364843e1988Sjohnlev */ 365843e1988Sjohnlev nwindows = (kpm_size == 0) ? 2 * NCPU : 0; 366843e1988Sjohnlev 367843e1988Sjohnlev #if defined(__xpv) 368843e1988Sjohnlev /* 369843e1988Sjohnlev * On a hypervisor, these windows are also used by the xpv_panic 370843e1988Sjohnlev * code, where we need one window for each level of the pagetable 371843e1988Sjohnlev * hierarchy. 372843e1988Sjohnlev */ 373843e1988Sjohnlev nwindows = MAX(nwindows, mmu.max_level); 374843e1988Sjohnlev #endif 375843e1988Sjohnlev 376843e1988Sjohnlev if (nwindows != 0) { 377ae115bc7Smrj /* 378ae115bc7Smrj * Create the page windows and 1 page of VA in 379ae115bc7Smrj * which we map the PTEs of those windows. 380ae115bc7Smrj */ 381843e1988Sjohnlev mmu.pwin_base = vmem_xalloc(heap_arena, nwindows * MMU_PAGESIZE, 382ae115bc7Smrj LEVEL_SIZE(1), 0, 0, NULL, NULL, VM_SLEEP); 383843e1988Sjohnlev ASSERT(nwindows <= MMU_PAGESIZE / mmu.pte_size); 384ae115bc7Smrj mmu.pwin_pte_va = vmem_xalloc(heap_arena, MMU_PAGESIZE, 385ae115bc7Smrj MMU_PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP); 386ae115bc7Smrj 387ae115bc7Smrj /* 388ae115bc7Smrj * Find/Create the page table window mappings. 389ae115bc7Smrj */ 390ae115bc7Smrj paddr = 0; 391ae115bc7Smrj (void) find_pte((uintptr_t)mmu.pwin_base, &paddr, 0, 0); 392ae115bc7Smrj ASSERT(paddr != 0); 393ae115bc7Smrj ASSERT((paddr & MMU_PAGEOFFSET) == 0); 394ae115bc7Smrj mmu.pwin_pte_pa = paddr; 395843e1988Sjohnlev #ifdef __xpv 396843e1988Sjohnlev (void) find_pte((uintptr_t)mmu.pwin_pte_va, NULL, 0, 0); 397843e1988Sjohnlev kbm_read_only((uintptr_t)mmu.pwin_pte_va, mmu.pwin_pte_pa); 398843e1988Sjohnlev #else 399ae115bc7Smrj kbm_map((uintptr_t)mmu.pwin_pte_va, mmu.pwin_pte_pa, 0, 1); 400843e1988Sjohnlev #endif 401ae115bc7Smrj } 402ae115bc7Smrj 403ae115bc7Smrj /* 404ae115bc7Smrj * Walk the boot loader's page tables and figure out 405ae115bc7Smrj * how many tables and page mappings there will be. 406ae115bc7Smrj */ 407ae115bc7Smrj while (kbm_probe(&va, &size, &pfn, &prot) != 0) { 408ae115bc7Smrj /* 409ae115bc7Smrj * At each level, if the last_va falls into a new htable, 410ae115bc7Smrj * increment table_cnt. We can stop at the 1st level where 411ae115bc7Smrj * they are in the same htable. 412ae115bc7Smrj */ 413ae115bc7Smrj start_level = 0; 41402bc52beSkchow while (start_level <= mmu.max_page_level) { 41502bc52beSkchow if (size == LEVEL_SIZE(start_level)) 41602bc52beSkchow break; 41702bc52beSkchow start_level++; 41802bc52beSkchow } 419ae115bc7Smrj 420ae115bc7Smrj for (l = start_level; l < mmu.max_level; ++l) { 421ae115bc7Smrj if (va >> LEVEL_SHIFT(l + 1) == 422ae115bc7Smrj last_va >> LEVEL_SHIFT(l + 1)) 423ae115bc7Smrj break; 424ae115bc7Smrj ++table_cnt; 425ae115bc7Smrj } 426ae115bc7Smrj last_va = va; 42702bc52beSkchow l = (start_level == 0) ? 1 : start_level; 42802bc52beSkchow va = (va & LEVEL_MASK(l)) + LEVEL_SIZE(l); 429ae115bc7Smrj } 430ae115bc7Smrj 431ae115bc7Smrj /* 432ae115bc7Smrj * Besides the boot loader mappings, we're going to fill in 433ae115bc7Smrj * the entire top level page table for the kernel. Make sure there's 434ae115bc7Smrj * enough reserve for that too. 435ae115bc7Smrj */ 436ae115bc7Smrj table_cnt += mmu.top_level_count - ((kernelbase >> 437ae115bc7Smrj LEVEL_SHIFT(mmu.max_level)) & (mmu.top_level_count - 1)); 438ae115bc7Smrj 439ae115bc7Smrj #if defined(__i386) 440ae115bc7Smrj /* 441ae115bc7Smrj * The 32 bit PAE hat allocates tables one level below the top when 442ae115bc7Smrj * kernelbase isn't 1 Gig aligned. We'll just be sloppy and allocate 443ae115bc7Smrj * a bunch more to the reserve. Any unused will be returned later. 444ae115bc7Smrj * Note we've already counted these mappings, just not the extra 445ae115bc7Smrj * pagetables. 446ae115bc7Smrj */ 447ae115bc7Smrj if (mmu.pae_hat != 0 && (kernelbase & LEVEL_OFFSET(mmu.max_level)) != 0) 448ae115bc7Smrj table_cnt += mmu.ptes_per_table - 449ae115bc7Smrj ((kernelbase & LEVEL_OFFSET(mmu.max_level)) >> 450ae115bc7Smrj LEVEL_SHIFT(mmu.max_level - 1)); 451ae115bc7Smrj #endif 452ae115bc7Smrj 453ae115bc7Smrj /* 454ae115bc7Smrj * Add 1/4 more into table_cnt for extra slop. The unused 455ae115bc7Smrj * slop is freed back when we htable_adjust_reserve() later. 456ae115bc7Smrj */ 457ae115bc7Smrj table_cnt += table_cnt >> 2; 458ae115bc7Smrj 459ae115bc7Smrj /* 460ae115bc7Smrj * We only need mapping entries (hments) for shared pages. 461ae115bc7Smrj * This should be far, far fewer than the total possible, 462ae115bc7Smrj * We'll allocate enough for 1/16 of all possible PTEs. 463ae115bc7Smrj */ 464ae115bc7Smrj mapping_cnt = (table_cnt * mmu.ptes_per_table) >> 4; 465ae115bc7Smrj 466ae115bc7Smrj /* 467ae115bc7Smrj * Now create the initial htable/hment reserves 468ae115bc7Smrj */ 469ae115bc7Smrj htable_initial_reserve(table_cnt); 470ae115bc7Smrj hment_reserve(mapping_cnt); 471ae115bc7Smrj x86pte_cpu_init(CPU); 472ae115bc7Smrj } 473ae115bc7Smrj 474ae115bc7Smrj 475ae115bc7Smrj /* 476ae115bc7Smrj * This routine handles the work of creating the kernel's initial mappings 477ae115bc7Smrj * by deciphering the mappings in the page tables created by the boot program. 478ae115bc7Smrj * 479ae115bc7Smrj * We maintain large page mappings, but only to a level 1 pagesize. 480ae115bc7Smrj * The boot loader can only add new mappings once this function starts. 481ae115bc7Smrj * In particular it can not change the pagesize used for any existing 482ae115bc7Smrj * mappings or this code breaks! 483ae115bc7Smrj */ 484ae115bc7Smrj 485ae115bc7Smrj void 486ae115bc7Smrj hat_kern_setup(void) 487ae115bc7Smrj { 488ae115bc7Smrj /* 489ae115bc7Smrj * Attach htables to the existing pagetables 490ae115bc7Smrj */ 491843e1988Sjohnlev /* BEGIN CSTYLED */ 492ae115bc7Smrj htable_attach(kas.a_hat, 0, mmu.max_level, NULL, 493843e1988Sjohnlev #ifdef __xpv 494843e1988Sjohnlev mmu_btop(xen_info->pt_base - ONE_GIG)); 495843e1988Sjohnlev #else 496ae115bc7Smrj mmu_btop(getcr3())); 497843e1988Sjohnlev #endif 498843e1988Sjohnlev /* END CSTYLED */ 499ae115bc7Smrj 500843e1988Sjohnlev #if defined(__i386) && !defined(__xpv) 5010cfdb603Sjosephb CPU->cpu_tss->tss_cr3 = dftss0->tss_cr3 = getcr3(); 502ae115bc7Smrj #endif /* __i386 */ 503ae115bc7Smrj 504843e1988Sjohnlev #if defined(__xpv) && defined(__amd64) 505843e1988Sjohnlev /* 506843e1988Sjohnlev * Try to make the kpm mappings r/w. Failures here are OK, as 507843e1988Sjohnlev * it's probably just a pagetable 508843e1988Sjohnlev */ 509843e1988Sjohnlev xen_kpm_finish_init(); 510843e1988Sjohnlev #endif 511843e1988Sjohnlev 512ae115bc7Smrj /* 513ae115bc7Smrj * The kernel HAT is now officially open for business. 514ae115bc7Smrj */ 515ae115bc7Smrj khat_running = 1; 516ae115bc7Smrj 517ae115bc7Smrj CPUSET_ATOMIC_ADD(kas.a_hat->hat_cpus, CPU->cpu_id); 518ae115bc7Smrj CPU->cpu_current_hat = kas.a_hat; 519ae115bc7Smrj } 520