1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/t_lock.h> 29 #include <sys/memlist.h> 30 #include <sys/cpuvar.h> 31 #include <sys/vmem.h> 32 #include <sys/mman.h> 33 #include <sys/vm.h> 34 #include <sys/kmem.h> 35 #include <sys/cmn_err.h> 36 #include <sys/debug.h> 37 #include <sys/vm_machparam.h> 38 #include <sys/tss.h> 39 #include <sys/vnode.h> 40 #include <vm/hat.h> 41 #include <vm/anon.h> 42 #include <vm/as.h> 43 #include <vm/page.h> 44 #include <vm/seg.h> 45 #include <vm/seg_kmem.h> 46 #include <vm/seg_map.h> 47 #include <vm/hat_i86.h> 48 #include <sys/promif.h> 49 #include <sys/x86_archext.h> 50 #include <sys/systm.h> 51 #include <sys/archsystm.h> 52 #include <sys/sunddi.h> 53 #include <sys/ddidmareq.h> 54 #include <sys/controlregs.h> 55 #include <sys/reboot.h> 56 #include <sys/kdi.h> 57 #include <sys/bootconf.h> 58 #include <sys/bootsvcs.h> 59 #include <sys/bootinfo.h> 60 #include <vm/kboot_mmu.h> 61 62 caddr_t 63 i86devmap(pfn_t pf, pgcnt_t pgcnt, uint_t prot) 64 { 65 caddr_t addr; 66 caddr_t addr1; 67 page_t *pp; 68 69 addr1 = addr = vmem_alloc(heap_arena, mmu_ptob(pgcnt), VM_SLEEP); 70 71 for (; pgcnt != 0; addr += MMU_PAGESIZE, ++pf, --pgcnt) { 72 pp = page_numtopp_nolock(pf); 73 if (pp == NULL) { 74 hat_devload(kas.a_hat, addr, MMU_PAGESIZE, pf, 75 prot | HAT_NOSYNC, HAT_LOAD_LOCK); 76 } else { 77 hat_memload(kas.a_hat, addr, pp, 78 prot | HAT_NOSYNC, HAT_LOAD_LOCK); 79 } 80 } 81 82 return (addr1); 83 } 84 85 /* 86 * This routine is like page_numtopp, but accepts only free pages, which 87 * it allocates (unfrees) and returns with the exclusive lock held. 88 * It is used by machdep.c/dma_init() to find contiguous free pages. 89 * 90 * XXX this and some others should probably be in vm_machdep.c 91 */ 92 page_t * 93 page_numtopp_alloc(pfn_t pfnum) 94 { 95 page_t *pp; 96 97 retry: 98 pp = page_numtopp_nolock(pfnum); 99 if (pp == NULL) { 100 return (NULL); 101 } 102 103 if (!page_trylock(pp, SE_EXCL)) { 104 return (NULL); 105 } 106 107 if (page_pptonum(pp) != pfnum) { 108 page_unlock(pp); 109 goto retry; 110 } 111 112 if (!PP_ISFREE(pp)) { 113 page_unlock(pp); 114 return (NULL); 115 } 116 if (pp->p_szc) { 117 page_demote_free_pages(pp); 118 page_unlock(pp); 119 goto retry; 120 } 121 122 /* If associated with a vnode, destroy mappings */ 123 124 if (pp->p_vnode) { 125 126 page_destroy_free(pp); 127 128 if (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_NO_RECLAIM)) { 129 return (NULL); 130 } 131 132 if (page_pptonum(pp) != pfnum) { 133 page_unlock(pp); 134 goto retry; 135 } 136 } 137 138 if (!PP_ISFREE(pp) || !page_reclaim(pp, (kmutex_t *)NULL)) { 139 page_unlock(pp); 140 return (NULL); 141 } 142 143 return (pp); 144 } 145 146 /* 147 * Flag is not set early in boot. Once it is set we are no longer 148 * using boot's page tables. 149 */ 150 uint_t khat_running = 0; 151 152 /* 153 * This procedure is callable only while the boot loader is in charge of the 154 * MMU. It assumes that PA == VA for page table pointers. It doesn't live in 155 * kboot_mmu.c since it's used from common code. 156 */ 157 pfn_t 158 va_to_pfn(void *vaddr) 159 { 160 uintptr_t des_va = ALIGN2PAGE(vaddr); 161 uintptr_t va = des_va; 162 size_t len; 163 uint_t prot; 164 pfn_t pfn; 165 166 if (khat_running) 167 panic("va_to_pfn(): called too late\n"); 168 169 if (kbm_probe(&va, &len, &pfn, &prot) == 0) 170 return (PFN_INVALID); 171 if (va > des_va) 172 return (PFN_INVALID); 173 if (va < des_va) 174 pfn += mmu_btop(des_va - va); 175 return (pfn); 176 } 177 178 /* 179 * Initialize a special area in the kernel that always holds some PTEs for 180 * faster performance. This always holds segmap's PTEs. 181 * In the 32 bit kernel this maps the kernel heap too. 182 */ 183 void 184 hat_kmap_init(uintptr_t base, size_t len) 185 { 186 uintptr_t map_addr; /* base rounded down to large page size */ 187 uintptr_t map_eaddr; /* base + len rounded up */ 188 size_t map_len; 189 caddr_t ptes; /* mapping area in kernel for kmap ptes */ 190 size_t window_size; /* size of mapping area for ptes */ 191 ulong_t htable_cnt; /* # of page tables to cover map_len */ 192 ulong_t i; 193 htable_t *ht; 194 uintptr_t va; 195 196 /* 197 * We have to map in an area that matches an entire page table. 198 */ 199 map_addr = base & LEVEL_MASK(1); 200 map_eaddr = (base + len + LEVEL_SIZE(1) - 1) & LEVEL_MASK(1); 201 map_len = map_eaddr - map_addr; 202 window_size = mmu_btop(map_len) * mmu.pte_size; 203 window_size = (window_size + LEVEL_SIZE(1)) & LEVEL_MASK(1); 204 htable_cnt = map_len >> LEVEL_SHIFT(1); 205 206 /* 207 * allocate vmem for the kmap_ptes 208 */ 209 ptes = vmem_xalloc(heap_arena, window_size, LEVEL_SIZE(1), 0, 210 0, NULL, NULL, VM_SLEEP); 211 mmu.kmap_htables = 212 kmem_alloc(htable_cnt * sizeof (htable_t *), KM_SLEEP); 213 214 /* 215 * Map the page tables that cover kmap into the allocated range. 216 * Note we don't ever htable_release() the kmap page tables - they 217 * can't ever be stolen, freed, etc. 218 */ 219 for (va = map_addr, i = 0; i < htable_cnt; va += LEVEL_SIZE(1), ++i) { 220 ht = htable_create(kas.a_hat, va, 0, NULL); 221 if (ht == NULL) 222 panic("hat_kmap_init: ht == NULL"); 223 mmu.kmap_htables[i] = ht; 224 225 hat_devload(kas.a_hat, ptes + i * MMU_PAGESIZE, 226 MMU_PAGESIZE, ht->ht_pfn, 227 PROT_READ | PROT_WRITE | HAT_NOSYNC | HAT_UNORDERED_OK, 228 HAT_LOAD | HAT_LOAD_NOCONSIST); 229 } 230 231 /* 232 * set information in mmu to activate handling of kmap 233 */ 234 mmu.kmap_addr = map_addr; 235 mmu.kmap_eaddr = map_eaddr; 236 mmu.kmap_ptes = (x86pte_t *)ptes; 237 } 238 239 extern caddr_t kpm_vbase; 240 extern size_t kpm_size; 241 242 /* 243 * Routine to pre-allocate data structures for hat_kern_setup(). It computes 244 * how many pagetables it needs by walking the boot loader's page tables. 245 */ 246 /*ARGSUSED*/ 247 void 248 hat_kern_alloc( 249 caddr_t segmap_base, 250 size_t segmap_size, 251 caddr_t ekernelheap) 252 { 253 uintptr_t last_va = (uintptr_t)-1; /* catch 1st time */ 254 uintptr_t va = 0; 255 size_t size; 256 pfn_t pfn; 257 uint_t prot; 258 uint_t table_cnt = 1; 259 uint_t mapping_cnt; 260 level_t start_level; 261 level_t l; 262 struct memlist *pmem; 263 level_t lpagel = mmu.max_page_level; 264 uint64_t paddr; 265 int64_t psize; 266 267 268 if (kpm_size > 0) { 269 /* 270 * Create the kpm page tables. 271 */ 272 for (pmem = phys_install; pmem; pmem = pmem->next) { 273 paddr = pmem->address; 274 psize = pmem->size; 275 while (psize >= MMU_PAGESIZE) { 276 if ((paddr & LEVEL_OFFSET(lpagel)) == 0 && 277 psize > LEVEL_SIZE(lpagel)) 278 l = lpagel; 279 else 280 l = 0; 281 kbm_map((uintptr_t)kpm_vbase + paddr, paddr, 282 l, 1); 283 paddr += LEVEL_SIZE(l); 284 psize -= LEVEL_SIZE(l); 285 } 286 } 287 } else { 288 /* 289 * Create the page windows and 1 page of VA in 290 * which we map the PTEs of those windows. 291 */ 292 mmu.pwin_base = vmem_xalloc(heap_arena, 2 * NCPU * MMU_PAGESIZE, 293 LEVEL_SIZE(1), 0, 0, NULL, NULL, VM_SLEEP); 294 ASSERT(NCPU * 2 <= MMU_PAGESIZE / mmu.pte_size); 295 mmu.pwin_pte_va = vmem_xalloc(heap_arena, MMU_PAGESIZE, 296 MMU_PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP); 297 298 /* 299 * Find/Create the page table window mappings. 300 */ 301 paddr = 0; 302 (void) find_pte((uintptr_t)mmu.pwin_base, &paddr, 0, 0); 303 ASSERT(paddr != 0); 304 ASSERT((paddr & MMU_PAGEOFFSET) == 0); 305 mmu.pwin_pte_pa = paddr; 306 kbm_map((uintptr_t)mmu.pwin_pte_va, mmu.pwin_pte_pa, 0, 1); 307 } 308 309 /* 310 * Walk the boot loader's page tables and figure out 311 * how many tables and page mappings there will be. 312 */ 313 while (kbm_probe(&va, &size, &pfn, &prot) != 0) { 314 /* 315 * At each level, if the last_va falls into a new htable, 316 * increment table_cnt. We can stop at the 1st level where 317 * they are in the same htable. 318 */ 319 if (size == MMU_PAGESIZE) 320 start_level = 0; 321 else 322 start_level = 1; 323 324 for (l = start_level; l < mmu.max_level; ++l) { 325 if (va >> LEVEL_SHIFT(l + 1) == 326 last_va >> LEVEL_SHIFT(l + 1)) 327 break; 328 ++table_cnt; 329 } 330 last_va = va; 331 va = (va & LEVEL_MASK(1)) + LEVEL_SIZE(1); 332 } 333 334 /* 335 * Besides the boot loader mappings, we're going to fill in 336 * the entire top level page table for the kernel. Make sure there's 337 * enough reserve for that too. 338 */ 339 table_cnt += mmu.top_level_count - ((kernelbase >> 340 LEVEL_SHIFT(mmu.max_level)) & (mmu.top_level_count - 1)); 341 342 #if defined(__i386) 343 /* 344 * The 32 bit PAE hat allocates tables one level below the top when 345 * kernelbase isn't 1 Gig aligned. We'll just be sloppy and allocate 346 * a bunch more to the reserve. Any unused will be returned later. 347 * Note we've already counted these mappings, just not the extra 348 * pagetables. 349 */ 350 if (mmu.pae_hat != 0 && (kernelbase & LEVEL_OFFSET(mmu.max_level)) != 0) 351 table_cnt += mmu.ptes_per_table - 352 ((kernelbase & LEVEL_OFFSET(mmu.max_level)) >> 353 LEVEL_SHIFT(mmu.max_level - 1)); 354 #endif 355 356 /* 357 * Add 1/4 more into table_cnt for extra slop. The unused 358 * slop is freed back when we htable_adjust_reserve() later. 359 */ 360 table_cnt += table_cnt >> 2; 361 362 /* 363 * We only need mapping entries (hments) for shared pages. 364 * This should be far, far fewer than the total possible, 365 * We'll allocate enough for 1/16 of all possible PTEs. 366 */ 367 mapping_cnt = (table_cnt * mmu.ptes_per_table) >> 4; 368 369 /* 370 * Now create the initial htable/hment reserves 371 */ 372 htable_initial_reserve(table_cnt); 373 hment_reserve(mapping_cnt); 374 x86pte_cpu_init(CPU); 375 } 376 377 378 /* 379 * This routine handles the work of creating the kernel's initial mappings 380 * by deciphering the mappings in the page tables created by the boot program. 381 * 382 * We maintain large page mappings, but only to a level 1 pagesize. 383 * The boot loader can only add new mappings once this function starts. 384 * In particular it can not change the pagesize used for any existing 385 * mappings or this code breaks! 386 */ 387 388 void 389 hat_kern_setup(void) 390 { 391 /* 392 * Attach htables to the existing pagetables 393 */ 394 htable_attach(kas.a_hat, 0, mmu.max_level, NULL, 395 mmu_btop(getcr3())); 396 397 #if defined(__i386) 398 CPU->cpu_tss->tss_cr3 = dftss0.tss_cr3 = getcr3(); 399 #endif /* __i386 */ 400 401 /* 402 * The kernel HAT is now officially open for business. 403 */ 404 khat_running = 1; 405 406 CPUSET_ATOMIC_ADD(kas.a_hat->hat_cpus, CPU->cpu_id); 407 CPU->cpu_current_hat = kas.a_hat; 408 } 409