1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> 5 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 20 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Some hw specific parts of this pmap were derived or influenced 29 * by NetBSD's ibm4xx pmap module. More generic code is shared with 30 * a few other pmap modules from the FreeBSD tree. 31 */ 32 33 /* 34 * VM layout notes: 35 * 36 * Kernel and user threads run within one common virtual address space 37 * defined by AS=0. 38 * 39 * 32-bit pmap: 40 * Virtual address space layout: 41 * ----------------------------- 42 * 0x0000_0000 - 0x7fff_ffff : user process 43 * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) 44 * 0xc000_0000 - 0xc0ff_ffff : kernel reserved 45 * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. 46 * 0xc100_0000 - 0xffff_ffff : KVA 47 * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy 48 * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs 49 * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 50 * 0xc200_9000 - 0xfeef_ffff : actual free KVA space 51 * 52 * 64-bit pmap: 53 * Virtual address space layout: 54 * ----------------------------- 55 * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process 56 * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries 57 * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region 58 * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack 59 * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved 60 * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data 61 * endkernel - msgbufp-1 : flat device tree 62 * msgbufp - ptbl_bufs-1 : message buffer 63 * ptbl_bufs - kernel_pdir-1 : kernel page tables 64 * kernel_pdir - kernel_pp2d-1 : kernel page directory 65 * kernel_pp2d - . : kernel pointers to page directory 66 * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy 67 * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs 68 * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables 69 * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space 70 * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region 71 * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region 72 * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map 73 * 0xf000_0000_0000_0000 - +Maxmem : physmem map 74 * - 0xffff_ffff_ffff_ffff : device direct map 75 */ 76 77 #include <sys/cdefs.h> 78 __FBSDID("$FreeBSD$"); 79 80 #include "opt_kstack_pages.h" 81 82 #include <sys/param.h> 83 #include <sys/conf.h> 84 #include <sys/malloc.h> 85 #include <sys/ktr.h> 86 #include <sys/proc.h> 87 #include <sys/user.h> 88 #include <sys/queue.h> 89 #include <sys/systm.h> 90 #include <sys/kernel.h> 91 #include <sys/kerneldump.h> 92 #include <sys/linker.h> 93 #include <sys/msgbuf.h> 94 #include <sys/lock.h> 95 #include <sys/mutex.h> 96 #include <sys/rwlock.h> 97 #include <sys/sched.h> 98 #include <sys/smp.h> 99 #include <sys/vmmeter.h> 100 101 #include <vm/vm.h> 102 #include <vm/vm_page.h> 103 #include <vm/vm_kern.h> 104 #include <vm/vm_pageout.h> 105 #include <vm/vm_extern.h> 106 #include <vm/vm_object.h> 107 #include <vm/vm_param.h> 108 #include <vm/vm_map.h> 109 #include <vm/vm_pager.h> 110 #include <vm/uma.h> 111 112 #include <machine/_inttypes.h> 113 #include <machine/cpu.h> 114 #include <machine/pcb.h> 115 #include <machine/platform.h> 116 117 #include <machine/tlb.h> 118 #include <machine/spr.h> 119 #include <machine/md_var.h> 120 #include <machine/mmuvar.h> 121 #include <machine/pmap.h> 122 #include <machine/pte.h> 123 124 #include "mmu_if.h" 125 126 #define SPARSE_MAPDEV 127 #ifdef DEBUG 128 #define debugf(fmt, args...) printf(fmt, ##args) 129 #else 130 #define debugf(fmt, args...) 131 #endif 132 133 #ifdef __powerpc64__ 134 #define PRI0ptrX "016lx" 135 #else 136 #define PRI0ptrX "08x" 137 #endif 138 139 #define TODO panic("%s: not implemented", __func__); 140 141 extern unsigned char _etext[]; 142 extern unsigned char _end[]; 143 144 extern uint32_t *bootinfo; 145 146 vm_paddr_t kernload; 147 vm_offset_t kernstart; 148 vm_size_t kernsize; 149 150 /* Message buffer and tables. */ 151 static vm_offset_t data_start; 152 static vm_size_t data_end; 153 154 /* Phys/avail memory regions. */ 155 static struct mem_region *availmem_regions; 156 static int availmem_regions_sz; 157 static struct mem_region *physmem_regions; 158 static int physmem_regions_sz; 159 160 /* Reserved KVA space and mutex for mmu_booke_zero_page. */ 161 static vm_offset_t zero_page_va; 162 static struct mtx zero_page_mutex; 163 164 static struct mtx tlbivax_mutex; 165 166 /* Reserved KVA space and mutex for mmu_booke_copy_page. */ 167 static vm_offset_t copy_page_src_va; 168 static vm_offset_t copy_page_dst_va; 169 static struct mtx copy_page_mutex; 170 171 /**************************************************************************/ 172 /* PMAP */ 173 /**************************************************************************/ 174 175 static int mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t, 176 vm_prot_t, u_int flags, int8_t psind); 177 178 unsigned int kptbl_min; /* Index of the first kernel ptbl. */ 179 unsigned int kernel_ptbls; /* Number of KVA ptbls. */ 180 #ifdef __powerpc64__ 181 unsigned int kernel_pdirs; 182 #endif 183 184 /* 185 * If user pmap is processed with mmu_booke_remove and the resident count 186 * drops to 0, there are no more pages to remove, so we need not continue. 187 */ 188 #define PMAP_REMOVE_DONE(pmap) \ 189 ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) 190 191 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 192 extern int elf32_nxstack; 193 #endif 194 195 /**************************************************************************/ 196 /* TLB and TID handling */ 197 /**************************************************************************/ 198 199 /* Translation ID busy table */ 200 static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1]; 201 202 /* 203 * TLB0 capabilities (entry, way numbers etc.). These can vary between e500 204 * core revisions and should be read from h/w registers during early config. 205 */ 206 uint32_t tlb0_entries; 207 uint32_t tlb0_ways; 208 uint32_t tlb0_entries_per_way; 209 uint32_t tlb1_entries; 210 211 #define TLB0_ENTRIES (tlb0_entries) 212 #define TLB0_WAYS (tlb0_ways) 213 #define TLB0_ENTRIES_PER_WAY (tlb0_entries_per_way) 214 215 #define TLB1_ENTRIES (tlb1_entries) 216 217 static vm_offset_t tlb1_map_base = VM_MAXUSER_ADDRESS + PAGE_SIZE; 218 219 static tlbtid_t tid_alloc(struct pmap *); 220 static void tid_flush(tlbtid_t tid); 221 222 #ifdef __powerpc64__ 223 static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t); 224 #else 225 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); 226 #endif 227 228 static void tlb1_read_entry(tlb_entry_t *, unsigned int); 229 static void tlb1_write_entry(tlb_entry_t *, unsigned int); 230 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); 231 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t); 232 233 static vm_size_t tsize2size(unsigned int); 234 static unsigned int size2tsize(vm_size_t); 235 static unsigned int ilog2(unsigned int); 236 237 static void set_mas4_defaults(void); 238 239 static inline void tlb0_flush_entry(vm_offset_t); 240 static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); 241 242 /**************************************************************************/ 243 /* Page table management */ 244 /**************************************************************************/ 245 246 static struct rwlock_padalign pvh_global_lock; 247 248 /* Data for the pv entry allocation mechanism */ 249 static uma_zone_t pvzone; 250 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 251 252 #define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ 253 254 #ifndef PMAP_SHPGPERPROC 255 #define PMAP_SHPGPERPROC 200 256 #endif 257 258 static void ptbl_init(void); 259 static struct ptbl_buf *ptbl_buf_alloc(void); 260 static void ptbl_buf_free(struct ptbl_buf *); 261 static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); 262 263 #ifdef __powerpc64__ 264 static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **, 265 unsigned int, boolean_t); 266 static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int); 267 static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int); 268 static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t); 269 #else 270 static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t); 271 static void ptbl_free(mmu_t, pmap_t, unsigned int); 272 static void ptbl_hold(mmu_t, pmap_t, unsigned int); 273 static int ptbl_unhold(mmu_t, pmap_t, unsigned int); 274 #endif 275 276 static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); 277 static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t); 278 static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t); 279 static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t); 280 static void kernel_pte_alloc(vm_offset_t, vm_offset_t, vm_offset_t); 281 282 static pv_entry_t pv_alloc(void); 283 static void pv_free(pv_entry_t); 284 static void pv_insert(pmap_t, vm_offset_t, vm_page_t); 285 static void pv_remove(pmap_t, vm_offset_t, vm_page_t); 286 287 static void booke_pmap_init_qpages(void); 288 289 /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ 290 #ifdef __powerpc64__ 291 #define PTBL_BUFS (16UL * 16 * 16) 292 #else 293 #define PTBL_BUFS (128 * 16) 294 #endif 295 296 struct ptbl_buf { 297 TAILQ_ENTRY(ptbl_buf) link; /* list link */ 298 vm_offset_t kva; /* va of mapping */ 299 }; 300 301 /* ptbl free list and a lock used for access synchronization. */ 302 static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; 303 static struct mtx ptbl_buf_freelist_lock; 304 305 /* Base address of kva space allocated fot ptbl bufs. */ 306 static vm_offset_t ptbl_buf_pool_vabase; 307 308 /* Pointer to ptbl_buf structures. */ 309 static struct ptbl_buf *ptbl_bufs; 310 311 #ifdef SMP 312 extern tlb_entry_t __boot_tlb1[]; 313 void pmap_bootstrap_ap(volatile uint32_t *); 314 #endif 315 316 /* 317 * Kernel MMU interface 318 */ 319 static void mmu_booke_clear_modify(mmu_t, vm_page_t); 320 static void mmu_booke_copy(mmu_t, pmap_t, pmap_t, vm_offset_t, 321 vm_size_t, vm_offset_t); 322 static void mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t); 323 static void mmu_booke_copy_pages(mmu_t, vm_page_t *, 324 vm_offset_t, vm_page_t *, vm_offset_t, int); 325 static int mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, 326 vm_prot_t, u_int flags, int8_t psind); 327 static void mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 328 vm_page_t, vm_prot_t); 329 static void mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, 330 vm_prot_t); 331 static vm_paddr_t mmu_booke_extract(mmu_t, pmap_t, vm_offset_t); 332 static vm_page_t mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t, 333 vm_prot_t); 334 static void mmu_booke_init(mmu_t); 335 static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t); 336 static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 337 static boolean_t mmu_booke_is_referenced(mmu_t, vm_page_t); 338 static int mmu_booke_ts_referenced(mmu_t, vm_page_t); 339 static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, 340 int); 341 static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t, 342 vm_paddr_t *); 343 static void mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t, 344 vm_object_t, vm_pindex_t, vm_size_t); 345 static boolean_t mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t); 346 static void mmu_booke_page_init(mmu_t, vm_page_t); 347 static int mmu_booke_page_wired_mappings(mmu_t, vm_page_t); 348 static void mmu_booke_pinit(mmu_t, pmap_t); 349 static void mmu_booke_pinit0(mmu_t, pmap_t); 350 static void mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 351 vm_prot_t); 352 static void mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 353 static void mmu_booke_qremove(mmu_t, vm_offset_t, int); 354 static void mmu_booke_release(mmu_t, pmap_t); 355 static void mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 356 static void mmu_booke_remove_all(mmu_t, vm_page_t); 357 static void mmu_booke_remove_write(mmu_t, vm_page_t); 358 static void mmu_booke_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 359 static void mmu_booke_zero_page(mmu_t, vm_page_t); 360 static void mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int); 361 static void mmu_booke_activate(mmu_t, struct thread *); 362 static void mmu_booke_deactivate(mmu_t, struct thread *); 363 static void mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t); 364 static void *mmu_booke_mapdev(mmu_t, vm_paddr_t, vm_size_t); 365 static void *mmu_booke_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t); 366 static void mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t); 367 static vm_paddr_t mmu_booke_kextract(mmu_t, vm_offset_t); 368 static void mmu_booke_kenter(mmu_t, vm_offset_t, vm_paddr_t); 369 static void mmu_booke_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t); 370 static void mmu_booke_kremove(mmu_t, vm_offset_t); 371 static boolean_t mmu_booke_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); 372 static void mmu_booke_sync_icache(mmu_t, pmap_t, vm_offset_t, 373 vm_size_t); 374 static void mmu_booke_dumpsys_map(mmu_t, vm_paddr_t pa, size_t, 375 void **); 376 static void mmu_booke_dumpsys_unmap(mmu_t, vm_paddr_t pa, size_t, 377 void *); 378 static void mmu_booke_scan_init(mmu_t); 379 static vm_offset_t mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m); 380 static void mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr); 381 static int mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, 382 vm_size_t sz, vm_memattr_t mode); 383 384 static mmu_method_t mmu_booke_methods[] = { 385 /* pmap dispatcher interface */ 386 MMUMETHOD(mmu_clear_modify, mmu_booke_clear_modify), 387 MMUMETHOD(mmu_copy, mmu_booke_copy), 388 MMUMETHOD(mmu_copy_page, mmu_booke_copy_page), 389 MMUMETHOD(mmu_copy_pages, mmu_booke_copy_pages), 390 MMUMETHOD(mmu_enter, mmu_booke_enter), 391 MMUMETHOD(mmu_enter_object, mmu_booke_enter_object), 392 MMUMETHOD(mmu_enter_quick, mmu_booke_enter_quick), 393 MMUMETHOD(mmu_extract, mmu_booke_extract), 394 MMUMETHOD(mmu_extract_and_hold, mmu_booke_extract_and_hold), 395 MMUMETHOD(mmu_init, mmu_booke_init), 396 MMUMETHOD(mmu_is_modified, mmu_booke_is_modified), 397 MMUMETHOD(mmu_is_prefaultable, mmu_booke_is_prefaultable), 398 MMUMETHOD(mmu_is_referenced, mmu_booke_is_referenced), 399 MMUMETHOD(mmu_ts_referenced, mmu_booke_ts_referenced), 400 MMUMETHOD(mmu_map, mmu_booke_map), 401 MMUMETHOD(mmu_mincore, mmu_booke_mincore), 402 MMUMETHOD(mmu_object_init_pt, mmu_booke_object_init_pt), 403 MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick), 404 MMUMETHOD(mmu_page_init, mmu_booke_page_init), 405 MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings), 406 MMUMETHOD(mmu_pinit, mmu_booke_pinit), 407 MMUMETHOD(mmu_pinit0, mmu_booke_pinit0), 408 MMUMETHOD(mmu_protect, mmu_booke_protect), 409 MMUMETHOD(mmu_qenter, mmu_booke_qenter), 410 MMUMETHOD(mmu_qremove, mmu_booke_qremove), 411 MMUMETHOD(mmu_release, mmu_booke_release), 412 MMUMETHOD(mmu_remove, mmu_booke_remove), 413 MMUMETHOD(mmu_remove_all, mmu_booke_remove_all), 414 MMUMETHOD(mmu_remove_write, mmu_booke_remove_write), 415 MMUMETHOD(mmu_sync_icache, mmu_booke_sync_icache), 416 MMUMETHOD(mmu_unwire, mmu_booke_unwire), 417 MMUMETHOD(mmu_zero_page, mmu_booke_zero_page), 418 MMUMETHOD(mmu_zero_page_area, mmu_booke_zero_page_area), 419 MMUMETHOD(mmu_activate, mmu_booke_activate), 420 MMUMETHOD(mmu_deactivate, mmu_booke_deactivate), 421 MMUMETHOD(mmu_quick_enter_page, mmu_booke_quick_enter_page), 422 MMUMETHOD(mmu_quick_remove_page, mmu_booke_quick_remove_page), 423 424 /* Internal interfaces */ 425 MMUMETHOD(mmu_bootstrap, mmu_booke_bootstrap), 426 MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped), 427 MMUMETHOD(mmu_mapdev, mmu_booke_mapdev), 428 MMUMETHOD(mmu_mapdev_attr, mmu_booke_mapdev_attr), 429 MMUMETHOD(mmu_kenter, mmu_booke_kenter), 430 MMUMETHOD(mmu_kenter_attr, mmu_booke_kenter_attr), 431 MMUMETHOD(mmu_kextract, mmu_booke_kextract), 432 MMUMETHOD(mmu_kremove, mmu_booke_kremove), 433 MMUMETHOD(mmu_unmapdev, mmu_booke_unmapdev), 434 MMUMETHOD(mmu_change_attr, mmu_booke_change_attr), 435 436 /* dumpsys() support */ 437 MMUMETHOD(mmu_dumpsys_map, mmu_booke_dumpsys_map), 438 MMUMETHOD(mmu_dumpsys_unmap, mmu_booke_dumpsys_unmap), 439 MMUMETHOD(mmu_scan_init, mmu_booke_scan_init), 440 441 { 0, 0 } 442 }; 443 444 MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods, 0); 445 446 static __inline uint32_t 447 tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) 448 { 449 uint32_t attrib; 450 int i; 451 452 if (ma != VM_MEMATTR_DEFAULT) { 453 switch (ma) { 454 case VM_MEMATTR_UNCACHEABLE: 455 return (MAS2_I | MAS2_G); 456 case VM_MEMATTR_WRITE_COMBINING: 457 case VM_MEMATTR_WRITE_BACK: 458 case VM_MEMATTR_PREFETCHABLE: 459 return (MAS2_I); 460 case VM_MEMATTR_WRITE_THROUGH: 461 return (MAS2_W | MAS2_M); 462 case VM_MEMATTR_CACHEABLE: 463 return (MAS2_M); 464 } 465 } 466 467 /* 468 * Assume the page is cache inhibited and access is guarded unless 469 * it's in our available memory array. 470 */ 471 attrib = _TLB_ENTRY_IO; 472 for (i = 0; i < physmem_regions_sz; i++) { 473 if ((pa >= physmem_regions[i].mr_start) && 474 (pa < (physmem_regions[i].mr_start + 475 physmem_regions[i].mr_size))) { 476 attrib = _TLB_ENTRY_MEM; 477 break; 478 } 479 } 480 481 return (attrib); 482 } 483 484 static inline void 485 tlb_miss_lock(void) 486 { 487 #ifdef SMP 488 struct pcpu *pc; 489 490 if (!smp_started) 491 return; 492 493 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 494 if (pc != pcpup) { 495 496 CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, " 497 "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke_tlb_lock); 498 499 KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)), 500 ("tlb_miss_lock: tried to lock self")); 501 502 tlb_lock(pc->pc_booke_tlb_lock); 503 504 CTR1(KTR_PMAP, "%s: locked", __func__); 505 } 506 } 507 #endif 508 } 509 510 static inline void 511 tlb_miss_unlock(void) 512 { 513 #ifdef SMP 514 struct pcpu *pc; 515 516 if (!smp_started) 517 return; 518 519 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 520 if (pc != pcpup) { 521 CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d", 522 __func__, pc->pc_cpuid); 523 524 tlb_unlock(pc->pc_booke_tlb_lock); 525 526 CTR1(KTR_PMAP, "%s: unlocked", __func__); 527 } 528 } 529 #endif 530 } 531 532 /* Return number of entries in TLB0. */ 533 static __inline void 534 tlb0_get_tlbconf(void) 535 { 536 uint32_t tlb0_cfg; 537 538 tlb0_cfg = mfspr(SPR_TLB0CFG); 539 tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK; 540 tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; 541 tlb0_entries_per_way = tlb0_entries / tlb0_ways; 542 } 543 544 /* Return number of entries in TLB1. */ 545 static __inline void 546 tlb1_get_tlbconf(void) 547 { 548 uint32_t tlb1_cfg; 549 550 tlb1_cfg = mfspr(SPR_TLB1CFG); 551 tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; 552 } 553 554 /**************************************************************************/ 555 /* Page table related */ 556 /**************************************************************************/ 557 558 #ifdef __powerpc64__ 559 /* Initialize pool of kva ptbl buffers. */ 560 static void 561 ptbl_init(void) 562 { 563 int i; 564 565 mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); 566 TAILQ_INIT(&ptbl_buf_freelist); 567 568 for (i = 0; i < PTBL_BUFS; i++) { 569 ptbl_bufs[i].kva = ptbl_buf_pool_vabase + 570 i * MAX(PTBL_PAGES,PDIR_PAGES) * PAGE_SIZE; 571 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); 572 } 573 } 574 575 /* Get an sf_buf from the freelist. */ 576 static struct ptbl_buf * 577 ptbl_buf_alloc(void) 578 { 579 struct ptbl_buf *buf; 580 581 mtx_lock(&ptbl_buf_freelist_lock); 582 buf = TAILQ_FIRST(&ptbl_buf_freelist); 583 if (buf != NULL) 584 TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); 585 mtx_unlock(&ptbl_buf_freelist_lock); 586 587 return (buf); 588 } 589 590 /* Return ptbl buff to free pool. */ 591 static void 592 ptbl_buf_free(struct ptbl_buf *buf) 593 { 594 mtx_lock(&ptbl_buf_freelist_lock); 595 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); 596 mtx_unlock(&ptbl_buf_freelist_lock); 597 } 598 599 /* 600 * Search the list of allocated ptbl bufs and find on list of allocated ptbls 601 */ 602 static void 603 ptbl_free_pmap_ptbl(pmap_t pmap, pte_t * ptbl) 604 { 605 struct ptbl_buf *pbuf; 606 607 TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) { 608 if (pbuf->kva == (vm_offset_t) ptbl) { 609 /* Remove from pmap ptbl buf list. */ 610 TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); 611 612 /* Free corresponding ptbl buf. */ 613 ptbl_buf_free(pbuf); 614 615 break; 616 } 617 } 618 } 619 620 /* Get a pointer to a PTE in a page table. */ 621 static __inline pte_t * 622 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 623 { 624 pte_t **pdir; 625 pte_t *ptbl; 626 627 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 628 629 pdir = pmap->pm_pp2d[PP2D_IDX(va)]; 630 if (!pdir) 631 return NULL; 632 ptbl = pdir[PDIR_IDX(va)]; 633 return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL); 634 } 635 636 /* 637 * Search the list of allocated pdir bufs and find on list of allocated pdirs 638 */ 639 static void 640 ptbl_free_pmap_pdir(mmu_t mmu, pmap_t pmap, pte_t ** pdir) 641 { 642 struct ptbl_buf *pbuf; 643 644 TAILQ_FOREACH(pbuf, &pmap->pm_pdir_list, link) { 645 if (pbuf->kva == (vm_offset_t) pdir) { 646 /* Remove from pmap ptbl buf list. */ 647 TAILQ_REMOVE(&pmap->pm_pdir_list, pbuf, link); 648 649 /* Free corresponding pdir buf. */ 650 ptbl_buf_free(pbuf); 651 652 break; 653 } 654 } 655 } 656 /* Free pdir pages and invalidate pdir entry. */ 657 static void 658 pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx) 659 { 660 pte_t **pdir; 661 vm_paddr_t pa; 662 vm_offset_t va; 663 vm_page_t m; 664 int i; 665 666 pdir = pmap->pm_pp2d[pp2d_idx]; 667 668 KASSERT((pdir != NULL), ("pdir_free: null pdir")); 669 670 pmap->pm_pp2d[pp2d_idx] = NULL; 671 672 for (i = 0; i < PDIR_PAGES; i++) { 673 va = ((vm_offset_t) pdir + (i * PAGE_SIZE)); 674 pa = pte_vatopa(mmu, kernel_pmap, va); 675 m = PHYS_TO_VM_PAGE(pa); 676 vm_page_free_zero(m); 677 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 678 pmap_kremove(va); 679 } 680 681 ptbl_free_pmap_pdir(mmu, pmap, pdir); 682 } 683 684 /* 685 * Decrement pdir pages hold count and attempt to free pdir pages. Called 686 * when removing directory entry from pdir. 687 * 688 * Return 1 if pdir pages were freed. 689 */ 690 static int 691 pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx) 692 { 693 pte_t **pdir; 694 vm_paddr_t pa; 695 vm_page_t m; 696 int i; 697 698 KASSERT((pmap != kernel_pmap), 699 ("pdir_unhold: unholding kernel pdir!")); 700 701 pdir = pmap->pm_pp2d[pp2d_idx]; 702 703 KASSERT(((vm_offset_t) pdir >= VM_MIN_KERNEL_ADDRESS), 704 ("pdir_unhold: non kva pdir")); 705 706 /* decrement hold count */ 707 for (i = 0; i < PDIR_PAGES; i++) { 708 pa = pte_vatopa(mmu, kernel_pmap, 709 (vm_offset_t) pdir + (i * PAGE_SIZE)); 710 m = PHYS_TO_VM_PAGE(pa); 711 m->wire_count--; 712 } 713 714 /* 715 * Free pdir pages if there are no dir entries in this pdir. 716 * wire_count has the same value for all ptbl pages, so check the 717 * last page. 718 */ 719 if (m->wire_count == 0) { 720 pdir_free(mmu, pmap, pp2d_idx); 721 return (1); 722 } 723 return (0); 724 } 725 726 /* 727 * Increment hold count for pdir pages. This routine is used when new ptlb 728 * entry is being inserted into pdir. 729 */ 730 static void 731 pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir) 732 { 733 vm_paddr_t pa; 734 vm_page_t m; 735 int i; 736 737 KASSERT((pmap != kernel_pmap), 738 ("pdir_hold: holding kernel pdir!")); 739 740 KASSERT((pdir != NULL), ("pdir_hold: null pdir")); 741 742 for (i = 0; i < PDIR_PAGES; i++) { 743 pa = pte_vatopa(mmu, kernel_pmap, 744 (vm_offset_t) pdir + (i * PAGE_SIZE)); 745 m = PHYS_TO_VM_PAGE(pa); 746 m->wire_count++; 747 } 748 } 749 750 /* Allocate page table. */ 751 static pte_t * 752 ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, 753 boolean_t nosleep) 754 { 755 vm_page_t mtbl [PTBL_PAGES]; 756 vm_page_t m; 757 struct ptbl_buf *pbuf; 758 unsigned int pidx; 759 pte_t *ptbl; 760 int i, j; 761 int req; 762 763 KASSERT((pdir[pdir_idx] == NULL), 764 ("%s: valid ptbl entry exists!", __func__)); 765 766 pbuf = ptbl_buf_alloc(); 767 if (pbuf == NULL) 768 panic("%s: couldn't alloc kernel virtual memory", __func__); 769 770 ptbl = (pte_t *) pbuf->kva; 771 772 for (i = 0; i < PTBL_PAGES; i++) { 773 pidx = (PTBL_PAGES * pdir_idx) + i; 774 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 775 while ((m = vm_page_alloc(NULL, pidx, req)) == NULL) { 776 PMAP_UNLOCK(pmap); 777 rw_wunlock(&pvh_global_lock); 778 if (nosleep) { 779 ptbl_free_pmap_ptbl(pmap, ptbl); 780 for (j = 0; j < i; j++) 781 vm_page_free(mtbl[j]); 782 atomic_subtract_int(&vm_cnt.v_wire_count, i); 783 return (NULL); 784 } 785 VM_WAIT; 786 rw_wlock(&pvh_global_lock); 787 PMAP_LOCK(pmap); 788 } 789 mtbl[i] = m; 790 } 791 792 /* Mapin allocated pages into kernel_pmap. */ 793 mmu_booke_qenter(mmu, (vm_offset_t) ptbl, mtbl, PTBL_PAGES); 794 /* Zero whole ptbl. */ 795 bzero((caddr_t) ptbl, PTBL_PAGES * PAGE_SIZE); 796 797 /* Add pbuf to the pmap ptbl bufs list. */ 798 TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); 799 800 return (ptbl); 801 } 802 803 /* Free ptbl pages and invalidate pdir entry. */ 804 static void 805 ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) 806 { 807 pte_t *ptbl; 808 vm_paddr_t pa; 809 vm_offset_t va; 810 vm_page_t m; 811 int i; 812 813 ptbl = pdir[pdir_idx]; 814 815 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 816 817 pdir[pdir_idx] = NULL; 818 819 for (i = 0; i < PTBL_PAGES; i++) { 820 va = ((vm_offset_t) ptbl + (i * PAGE_SIZE)); 821 pa = pte_vatopa(mmu, kernel_pmap, va); 822 m = PHYS_TO_VM_PAGE(pa); 823 vm_page_free_zero(m); 824 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 825 pmap_kremove(va); 826 } 827 828 ptbl_free_pmap_ptbl(pmap, ptbl); 829 } 830 831 /* 832 * Decrement ptbl pages hold count and attempt to free ptbl pages. Called 833 * when removing pte entry from ptbl. 834 * 835 * Return 1 if ptbl pages were freed. 836 */ 837 static int 838 ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va) 839 { 840 pte_t *ptbl; 841 vm_paddr_t pa; 842 vm_page_t m; 843 u_int pp2d_idx; 844 pte_t **pdir; 845 u_int pdir_idx; 846 int i; 847 848 pp2d_idx = PP2D_IDX(va); 849 pdir_idx = PDIR_IDX(va); 850 851 KASSERT((pmap != kernel_pmap), 852 ("ptbl_unhold: unholding kernel ptbl!")); 853 854 pdir = pmap->pm_pp2d[pp2d_idx]; 855 ptbl = pdir[pdir_idx]; 856 857 KASSERT(((vm_offset_t) ptbl >= VM_MIN_KERNEL_ADDRESS), 858 ("ptbl_unhold: non kva ptbl")); 859 860 /* decrement hold count */ 861 for (i = 0; i < PTBL_PAGES; i++) { 862 pa = pte_vatopa(mmu, kernel_pmap, 863 (vm_offset_t) ptbl + (i * PAGE_SIZE)); 864 m = PHYS_TO_VM_PAGE(pa); 865 m->wire_count--; 866 } 867 868 /* 869 * Free ptbl pages if there are no pte entries in this ptbl. 870 * wire_count has the same value for all ptbl pages, so check the 871 * last page. 872 */ 873 if (m->wire_count == 0) { 874 /* A pair of indirect entries might point to this ptbl page */ 875 #if 0 876 tlb_flush_entry(pmap, va & ~((2UL * PAGE_SIZE_1M) - 1), 877 TLB_SIZE_1M, MAS6_SIND); 878 tlb_flush_entry(pmap, (va & ~((2UL * PAGE_SIZE_1M) - 1)) | PAGE_SIZE_1M, 879 TLB_SIZE_1M, MAS6_SIND); 880 #endif 881 ptbl_free(mmu, pmap, pdir, pdir_idx); 882 pdir_unhold(mmu, pmap, pp2d_idx); 883 return (1); 884 } 885 return (0); 886 } 887 888 /* 889 * Increment hold count for ptbl pages. This routine is used when new pte 890 * entry is being inserted into ptbl. 891 */ 892 static void 893 ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) 894 { 895 vm_paddr_t pa; 896 pte_t *ptbl; 897 vm_page_t m; 898 int i; 899 900 KASSERT((pmap != kernel_pmap), 901 ("ptbl_hold: holding kernel ptbl!")); 902 903 ptbl = pdir[pdir_idx]; 904 905 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 906 907 for (i = 0; i < PTBL_PAGES; i++) { 908 pa = pte_vatopa(mmu, kernel_pmap, 909 (vm_offset_t) ptbl + (i * PAGE_SIZE)); 910 m = PHYS_TO_VM_PAGE(pa); 911 m->wire_count++; 912 } 913 } 914 #else 915 916 /* Initialize pool of kva ptbl buffers. */ 917 static void 918 ptbl_init(void) 919 { 920 int i; 921 922 CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__, 923 (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); 924 CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)", 925 __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); 926 927 mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); 928 TAILQ_INIT(&ptbl_buf_freelist); 929 930 for (i = 0; i < PTBL_BUFS; i++) { 931 ptbl_bufs[i].kva = 932 ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; 933 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); 934 } 935 } 936 937 /* Get a ptbl_buf from the freelist. */ 938 static struct ptbl_buf * 939 ptbl_buf_alloc(void) 940 { 941 struct ptbl_buf *buf; 942 943 mtx_lock(&ptbl_buf_freelist_lock); 944 buf = TAILQ_FIRST(&ptbl_buf_freelist); 945 if (buf != NULL) 946 TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); 947 mtx_unlock(&ptbl_buf_freelist_lock); 948 949 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 950 951 return (buf); 952 } 953 954 /* Return ptbl buff to free pool. */ 955 static void 956 ptbl_buf_free(struct ptbl_buf *buf) 957 { 958 959 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 960 961 mtx_lock(&ptbl_buf_freelist_lock); 962 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); 963 mtx_unlock(&ptbl_buf_freelist_lock); 964 } 965 966 /* 967 * Search the list of allocated ptbl bufs and find on list of allocated ptbls 968 */ 969 static void 970 ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) 971 { 972 struct ptbl_buf *pbuf; 973 974 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 975 976 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 977 978 TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) 979 if (pbuf->kva == (vm_offset_t)ptbl) { 980 /* Remove from pmap ptbl buf list. */ 981 TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); 982 983 /* Free corresponding ptbl buf. */ 984 ptbl_buf_free(pbuf); 985 break; 986 } 987 } 988 989 /* Allocate page table. */ 990 static pte_t * 991 ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep) 992 { 993 vm_page_t mtbl[PTBL_PAGES]; 994 vm_page_t m; 995 struct ptbl_buf *pbuf; 996 unsigned int pidx; 997 pte_t *ptbl; 998 int i, j; 999 1000 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 1001 (pmap == kernel_pmap), pdir_idx); 1002 1003 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1004 ("ptbl_alloc: invalid pdir_idx")); 1005 KASSERT((pmap->pm_pdir[pdir_idx] == NULL), 1006 ("pte_alloc: valid ptbl entry exists!")); 1007 1008 pbuf = ptbl_buf_alloc(); 1009 if (pbuf == NULL) 1010 panic("pte_alloc: couldn't alloc kernel virtual memory"); 1011 1012 ptbl = (pte_t *)pbuf->kva; 1013 1014 CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl); 1015 1016 for (i = 0; i < PTBL_PAGES; i++) { 1017 pidx = (PTBL_PAGES * pdir_idx) + i; 1018 while ((m = vm_page_alloc(NULL, pidx, 1019 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 1020 PMAP_UNLOCK(pmap); 1021 rw_wunlock(&pvh_global_lock); 1022 if (nosleep) { 1023 ptbl_free_pmap_ptbl(pmap, ptbl); 1024 for (j = 0; j < i; j++) 1025 vm_page_free(mtbl[j]); 1026 atomic_subtract_int(&vm_cnt.v_wire_count, i); 1027 return (NULL); 1028 } 1029 VM_WAIT; 1030 rw_wlock(&pvh_global_lock); 1031 PMAP_LOCK(pmap); 1032 } 1033 mtbl[i] = m; 1034 } 1035 1036 /* Map allocated pages into kernel_pmap. */ 1037 mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES); 1038 1039 /* Zero whole ptbl. */ 1040 bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE); 1041 1042 /* Add pbuf to the pmap ptbl bufs list. */ 1043 TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); 1044 1045 return (ptbl); 1046 } 1047 1048 /* Free ptbl pages and invalidate pdir entry. */ 1049 static void 1050 ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 1051 { 1052 pte_t *ptbl; 1053 vm_paddr_t pa; 1054 vm_offset_t va; 1055 vm_page_t m; 1056 int i; 1057 1058 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 1059 (pmap == kernel_pmap), pdir_idx); 1060 1061 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1062 ("ptbl_free: invalid pdir_idx")); 1063 1064 ptbl = pmap->pm_pdir[pdir_idx]; 1065 1066 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 1067 1068 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 1069 1070 /* 1071 * Invalidate the pdir entry as soon as possible, so that other CPUs 1072 * don't attempt to look up the page tables we are releasing. 1073 */ 1074 mtx_lock_spin(&tlbivax_mutex); 1075 tlb_miss_lock(); 1076 1077 pmap->pm_pdir[pdir_idx] = NULL; 1078 1079 tlb_miss_unlock(); 1080 mtx_unlock_spin(&tlbivax_mutex); 1081 1082 for (i = 0; i < PTBL_PAGES; i++) { 1083 va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); 1084 pa = pte_vatopa(mmu, kernel_pmap, va); 1085 m = PHYS_TO_VM_PAGE(pa); 1086 vm_page_free_zero(m); 1087 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1088 mmu_booke_kremove(mmu, va); 1089 } 1090 1091 ptbl_free_pmap_ptbl(pmap, ptbl); 1092 } 1093 1094 /* 1095 * Decrement ptbl pages hold count and attempt to free ptbl pages. 1096 * Called when removing pte entry from ptbl. 1097 * 1098 * Return 1 if ptbl pages were freed. 1099 */ 1100 static int 1101 ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 1102 { 1103 pte_t *ptbl; 1104 vm_paddr_t pa; 1105 vm_page_t m; 1106 int i; 1107 1108 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 1109 (pmap == kernel_pmap), pdir_idx); 1110 1111 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1112 ("ptbl_unhold: invalid pdir_idx")); 1113 KASSERT((pmap != kernel_pmap), 1114 ("ptbl_unhold: unholding kernel ptbl!")); 1115 1116 ptbl = pmap->pm_pdir[pdir_idx]; 1117 1118 //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); 1119 KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), 1120 ("ptbl_unhold: non kva ptbl")); 1121 1122 /* decrement hold count */ 1123 for (i = 0; i < PTBL_PAGES; i++) { 1124 pa = pte_vatopa(mmu, kernel_pmap, 1125 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1126 m = PHYS_TO_VM_PAGE(pa); 1127 m->wire_count--; 1128 } 1129 1130 /* 1131 * Free ptbl pages if there are no pte etries in this ptbl. 1132 * wire_count has the same value for all ptbl pages, so check the last 1133 * page. 1134 */ 1135 if (m->wire_count == 0) { 1136 ptbl_free(mmu, pmap, pdir_idx); 1137 1138 //debugf("ptbl_unhold: e (freed ptbl)\n"); 1139 return (1); 1140 } 1141 1142 return (0); 1143 } 1144 1145 /* 1146 * Increment hold count for ptbl pages. This routine is used when a new pte 1147 * entry is being inserted into the ptbl. 1148 */ 1149 static void 1150 ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 1151 { 1152 vm_paddr_t pa; 1153 pte_t *ptbl; 1154 vm_page_t m; 1155 int i; 1156 1157 CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap, 1158 pdir_idx); 1159 1160 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1161 ("ptbl_hold: invalid pdir_idx")); 1162 KASSERT((pmap != kernel_pmap), 1163 ("ptbl_hold: holding kernel ptbl!")); 1164 1165 ptbl = pmap->pm_pdir[pdir_idx]; 1166 1167 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 1168 1169 for (i = 0; i < PTBL_PAGES; i++) { 1170 pa = pte_vatopa(mmu, kernel_pmap, 1171 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1172 m = PHYS_TO_VM_PAGE(pa); 1173 m->wire_count++; 1174 } 1175 } 1176 #endif 1177 1178 /* Allocate pv_entry structure. */ 1179 pv_entry_t 1180 pv_alloc(void) 1181 { 1182 pv_entry_t pv; 1183 1184 pv_entry_count++; 1185 if (pv_entry_count > pv_entry_high_water) 1186 pagedaemon_wakeup(); 1187 pv = uma_zalloc(pvzone, M_NOWAIT); 1188 1189 return (pv); 1190 } 1191 1192 /* Free pv_entry structure. */ 1193 static __inline void 1194 pv_free(pv_entry_t pve) 1195 { 1196 1197 pv_entry_count--; 1198 uma_zfree(pvzone, pve); 1199 } 1200 1201 1202 /* Allocate and initialize pv_entry structure. */ 1203 static void 1204 pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) 1205 { 1206 pv_entry_t pve; 1207 1208 //int su = (pmap == kernel_pmap); 1209 //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, 1210 // (u_int32_t)pmap, va, (u_int32_t)m); 1211 1212 pve = pv_alloc(); 1213 if (pve == NULL) 1214 panic("pv_insert: no pv entries!"); 1215 1216 pve->pv_pmap = pmap; 1217 pve->pv_va = va; 1218 1219 /* add to pv_list */ 1220 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1221 rw_assert(&pvh_global_lock, RA_WLOCKED); 1222 1223 TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); 1224 1225 //debugf("pv_insert: e\n"); 1226 } 1227 1228 /* Destroy pv entry. */ 1229 static void 1230 pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) 1231 { 1232 pv_entry_t pve; 1233 1234 //int su = (pmap == kernel_pmap); 1235 //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); 1236 1237 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1238 rw_assert(&pvh_global_lock, RA_WLOCKED); 1239 1240 /* find pv entry */ 1241 TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { 1242 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 1243 /* remove from pv_list */ 1244 TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); 1245 if (TAILQ_EMPTY(&m->md.pv_list)) 1246 vm_page_aflag_clear(m, PGA_WRITEABLE); 1247 1248 /* free pv entry struct */ 1249 pv_free(pve); 1250 break; 1251 } 1252 } 1253 1254 //debugf("pv_remove: e\n"); 1255 } 1256 1257 #ifdef __powerpc64__ 1258 /* 1259 * Clean pte entry, try to free page table page if requested. 1260 * 1261 * Return 1 if ptbl pages were freed, otherwise return 0. 1262 */ 1263 static int 1264 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags) 1265 { 1266 vm_page_t m; 1267 pte_t *pte; 1268 1269 pte = pte_find(mmu, pmap, va); 1270 KASSERT(pte != NULL, ("%s: NULL pte", __func__)); 1271 1272 if (!PTE_ISVALID(pte)) 1273 return (0); 1274 1275 /* Get vm_page_t for mapped pte. */ 1276 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1277 1278 if (PTE_ISWIRED(pte)) 1279 pmap->pm_stats.wired_count--; 1280 1281 /* Handle managed entry. */ 1282 if (PTE_ISMANAGED(pte)) { 1283 1284 /* Handle modified pages. */ 1285 if (PTE_ISMODIFIED(pte)) 1286 vm_page_dirty(m); 1287 1288 /* Referenced pages. */ 1289 if (PTE_ISREFERENCED(pte)) 1290 vm_page_aflag_set(m, PGA_REFERENCED); 1291 1292 /* Remove pv_entry from pv_list. */ 1293 pv_remove(pmap, va, m); 1294 } else if (m->md.pv_tracked) { 1295 pv_remove(pmap, va, m); 1296 if (TAILQ_EMPTY(&m->md.pv_list)) 1297 m->md.pv_tracked = false; 1298 } 1299 mtx_lock_spin(&tlbivax_mutex); 1300 tlb_miss_lock(); 1301 1302 tlb0_flush_entry(va); 1303 *pte = 0; 1304 1305 tlb_miss_unlock(); 1306 mtx_unlock_spin(&tlbivax_mutex); 1307 1308 pmap->pm_stats.resident_count--; 1309 1310 if (flags & PTBL_UNHOLD) { 1311 return (ptbl_unhold(mmu, pmap, va)); 1312 } 1313 return (0); 1314 } 1315 1316 /* 1317 * allocate a page of pointers to page directories, do not preallocate the 1318 * page tables 1319 */ 1320 static pte_t ** 1321 pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep) 1322 { 1323 vm_page_t mtbl [PDIR_PAGES]; 1324 vm_page_t m; 1325 struct ptbl_buf *pbuf; 1326 pte_t **pdir; 1327 unsigned int pidx; 1328 int i; 1329 int req; 1330 1331 pbuf = ptbl_buf_alloc(); 1332 1333 if (pbuf == NULL) 1334 panic("%s: couldn't alloc kernel virtual memory", __func__); 1335 1336 /* Allocate pdir pages, this will sleep! */ 1337 for (i = 0; i < PDIR_PAGES; i++) { 1338 pidx = (PDIR_PAGES * pp2d_idx) + i; 1339 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 1340 while ((m = vm_page_alloc(NULL, pidx, req)) == NULL) { 1341 PMAP_UNLOCK(pmap); 1342 VM_WAIT; 1343 PMAP_LOCK(pmap); 1344 } 1345 mtbl[i] = m; 1346 } 1347 1348 /* Mapin allocated pages into kernel_pmap. */ 1349 pdir = (pte_t **) pbuf->kva; 1350 pmap_qenter((vm_offset_t) pdir, mtbl, PDIR_PAGES); 1351 1352 /* Zero whole pdir. */ 1353 bzero((caddr_t) pdir, PDIR_PAGES * PAGE_SIZE); 1354 1355 /* Add pdir to the pmap pdir bufs list. */ 1356 TAILQ_INSERT_TAIL(&pmap->pm_pdir_list, pbuf, link); 1357 1358 return pdir; 1359 } 1360 1361 /* 1362 * Insert PTE for a given page and virtual address. 1363 */ 1364 static int 1365 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1366 boolean_t nosleep) 1367 { 1368 unsigned int pp2d_idx = PP2D_IDX(va); 1369 unsigned int pdir_idx = PDIR_IDX(va); 1370 unsigned int ptbl_idx = PTBL_IDX(va); 1371 pte_t *ptbl, *pte; 1372 pte_t **pdir; 1373 1374 /* Get the page directory pointer. */ 1375 pdir = pmap->pm_pp2d[pp2d_idx]; 1376 if (pdir == NULL) 1377 pdir = pdir_alloc(mmu, pmap, pp2d_idx, nosleep); 1378 1379 /* Get the page table pointer. */ 1380 ptbl = pdir[pdir_idx]; 1381 1382 if (ptbl == NULL) { 1383 /* Allocate page table pages. */ 1384 ptbl = ptbl_alloc(mmu, pmap, pdir, pdir_idx, nosleep); 1385 if (ptbl == NULL) { 1386 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1387 return (ENOMEM); 1388 } 1389 } else { 1390 /* 1391 * Check if there is valid mapping for requested va, if there 1392 * is, remove it. 1393 */ 1394 pte = &pdir[pdir_idx][ptbl_idx]; 1395 if (PTE_ISVALID(pte)) { 1396 pte_remove(mmu, pmap, va, PTBL_HOLD); 1397 } else { 1398 /* 1399 * pte is not used, increment hold count for ptbl 1400 * pages. 1401 */ 1402 if (pmap != kernel_pmap) 1403 ptbl_hold(mmu, pmap, pdir, pdir_idx); 1404 } 1405 } 1406 1407 if (pdir[pdir_idx] == NULL) { 1408 if (pmap != kernel_pmap && pmap->pm_pp2d[pp2d_idx] != NULL) 1409 pdir_hold(mmu, pmap, pdir); 1410 pdir[pdir_idx] = ptbl; 1411 } 1412 if (pmap->pm_pp2d[pp2d_idx] == NULL) 1413 pmap->pm_pp2d[pp2d_idx] = pdir; 1414 1415 /* 1416 * Insert pv_entry into pv_list for mapped page if part of managed 1417 * memory. 1418 */ 1419 if ((m->oflags & VPO_UNMANAGED) == 0) { 1420 flags |= PTE_MANAGED; 1421 1422 /* Create and insert pv entry. */ 1423 pv_insert(pmap, va, m); 1424 } 1425 1426 mtx_lock_spin(&tlbivax_mutex); 1427 tlb_miss_lock(); 1428 1429 tlb0_flush_entry(va); 1430 pmap->pm_stats.resident_count++; 1431 pte = &pdir[pdir_idx][ptbl_idx]; 1432 *pte = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1433 *pte |= (PTE_VALID | flags); 1434 1435 tlb_miss_unlock(); 1436 mtx_unlock_spin(&tlbivax_mutex); 1437 1438 return (0); 1439 } 1440 1441 /* Return the pa for the given pmap/va. */ 1442 static vm_paddr_t 1443 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1444 { 1445 vm_paddr_t pa = 0; 1446 pte_t *pte; 1447 1448 pte = pte_find(mmu, pmap, va); 1449 if ((pte != NULL) && PTE_ISVALID(pte)) 1450 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1451 return (pa); 1452 } 1453 1454 1455 /* allocate pte entries to manage (addr & mask) to (addr & mask) + size */ 1456 static void 1457 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1458 { 1459 int i, j; 1460 vm_offset_t va; 1461 pte_t *pte; 1462 1463 va = addr; 1464 /* Initialize kernel pdir */ 1465 for (i = 0; i < kernel_pdirs; i++) { 1466 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)] = 1467 (pte_t **)(pdir + (i * PAGE_SIZE * PDIR_PAGES)); 1468 for (j = PDIR_IDX(va + (i * PAGE_SIZE * PDIR_NENTRIES * PTBL_NENTRIES)); 1469 j < PDIR_NENTRIES; j++) { 1470 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)][j] = 1471 (pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE * PDIR_PAGES) + 1472 (((i * PDIR_NENTRIES) + j) * PAGE_SIZE * PTBL_PAGES)); 1473 } 1474 } 1475 1476 /* 1477 * Fill in PTEs covering kernel code and data. They are not required 1478 * for address translation, as this area is covered by static TLB1 1479 * entries, but for pte_vatopa() to work correctly with kernel area 1480 * addresses. 1481 */ 1482 for (va = addr; va < data_end; va += PAGE_SIZE) { 1483 pte = &(kernel_pmap->pm_pp2d[PP2D_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]); 1484 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1485 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1486 PTE_VALID | PTE_PS_4KB; 1487 } 1488 } 1489 #else 1490 /* 1491 * Clean pte entry, try to free page table page if requested. 1492 * 1493 * Return 1 if ptbl pages were freed, otherwise return 0. 1494 */ 1495 static int 1496 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, uint8_t flags) 1497 { 1498 unsigned int pdir_idx = PDIR_IDX(va); 1499 unsigned int ptbl_idx = PTBL_IDX(va); 1500 vm_page_t m; 1501 pte_t *ptbl; 1502 pte_t *pte; 1503 1504 //int su = (pmap == kernel_pmap); 1505 //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", 1506 // su, (u_int32_t)pmap, va, flags); 1507 1508 ptbl = pmap->pm_pdir[pdir_idx]; 1509 KASSERT(ptbl, ("pte_remove: null ptbl")); 1510 1511 pte = &ptbl[ptbl_idx]; 1512 1513 if (pte == NULL || !PTE_ISVALID(pte)) 1514 return (0); 1515 1516 if (PTE_ISWIRED(pte)) 1517 pmap->pm_stats.wired_count--; 1518 1519 /* Get vm_page_t for mapped pte. */ 1520 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1521 1522 /* Handle managed entry. */ 1523 if (PTE_ISMANAGED(pte)) { 1524 1525 if (PTE_ISMODIFIED(pte)) 1526 vm_page_dirty(m); 1527 1528 if (PTE_ISREFERENCED(pte)) 1529 vm_page_aflag_set(m, PGA_REFERENCED); 1530 1531 pv_remove(pmap, va, m); 1532 } else if (m->md.pv_tracked) { 1533 /* 1534 * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is 1535 * used. This is needed by the NCSW support code for fast 1536 * VA<->PA translation. 1537 */ 1538 pv_remove(pmap, va, m); 1539 if (TAILQ_EMPTY(&m->md.pv_list)) 1540 m->md.pv_tracked = false; 1541 } 1542 1543 mtx_lock_spin(&tlbivax_mutex); 1544 tlb_miss_lock(); 1545 1546 tlb0_flush_entry(va); 1547 *pte = 0; 1548 1549 tlb_miss_unlock(); 1550 mtx_unlock_spin(&tlbivax_mutex); 1551 1552 pmap->pm_stats.resident_count--; 1553 1554 if (flags & PTBL_UNHOLD) { 1555 //debugf("pte_remove: e (unhold)\n"); 1556 return (ptbl_unhold(mmu, pmap, pdir_idx)); 1557 } 1558 1559 //debugf("pte_remove: e\n"); 1560 return (0); 1561 } 1562 1563 /* 1564 * Insert PTE for a given page and virtual address. 1565 */ 1566 static int 1567 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1568 boolean_t nosleep) 1569 { 1570 unsigned int pdir_idx = PDIR_IDX(va); 1571 unsigned int ptbl_idx = PTBL_IDX(va); 1572 pte_t *ptbl, *pte; 1573 1574 CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__, 1575 pmap == kernel_pmap, pmap, va); 1576 1577 /* Get the page table pointer. */ 1578 ptbl = pmap->pm_pdir[pdir_idx]; 1579 1580 if (ptbl == NULL) { 1581 /* Allocate page table pages. */ 1582 ptbl = ptbl_alloc(mmu, pmap, pdir_idx, nosleep); 1583 if (ptbl == NULL) { 1584 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1585 return (ENOMEM); 1586 } 1587 } else { 1588 /* 1589 * Check if there is valid mapping for requested 1590 * va, if there is, remove it. 1591 */ 1592 pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; 1593 if (PTE_ISVALID(pte)) { 1594 pte_remove(mmu, pmap, va, PTBL_HOLD); 1595 } else { 1596 /* 1597 * pte is not used, increment hold count 1598 * for ptbl pages. 1599 */ 1600 if (pmap != kernel_pmap) 1601 ptbl_hold(mmu, pmap, pdir_idx); 1602 } 1603 } 1604 1605 /* 1606 * Insert pv_entry into pv_list for mapped page if part of managed 1607 * memory. 1608 */ 1609 if ((m->oflags & VPO_UNMANAGED) == 0) { 1610 flags |= PTE_MANAGED; 1611 1612 /* Create and insert pv entry. */ 1613 pv_insert(pmap, va, m); 1614 } 1615 1616 pmap->pm_stats.resident_count++; 1617 1618 mtx_lock_spin(&tlbivax_mutex); 1619 tlb_miss_lock(); 1620 1621 tlb0_flush_entry(va); 1622 if (pmap->pm_pdir[pdir_idx] == NULL) { 1623 /* 1624 * If we just allocated a new page table, hook it in 1625 * the pdir. 1626 */ 1627 pmap->pm_pdir[pdir_idx] = ptbl; 1628 } 1629 pte = &(pmap->pm_pdir[pdir_idx][ptbl_idx]); 1630 *pte = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1631 *pte |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */ 1632 1633 tlb_miss_unlock(); 1634 mtx_unlock_spin(&tlbivax_mutex); 1635 return (0); 1636 } 1637 1638 /* Return the pa for the given pmap/va. */ 1639 static vm_paddr_t 1640 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1641 { 1642 vm_paddr_t pa = 0; 1643 pte_t *pte; 1644 1645 pte = pte_find(mmu, pmap, va); 1646 if ((pte != NULL) && PTE_ISVALID(pte)) 1647 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1648 return (pa); 1649 } 1650 1651 /* Get a pointer to a PTE in a page table. */ 1652 static pte_t * 1653 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1654 { 1655 unsigned int pdir_idx = PDIR_IDX(va); 1656 unsigned int ptbl_idx = PTBL_IDX(va); 1657 1658 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 1659 1660 if (pmap->pm_pdir[pdir_idx]) 1661 return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); 1662 1663 return (NULL); 1664 } 1665 1666 /* Set up kernel page tables. */ 1667 static void 1668 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1669 { 1670 int i; 1671 vm_offset_t va; 1672 pte_t *pte; 1673 1674 /* Initialize kernel pdir */ 1675 for (i = 0; i < kernel_ptbls; i++) 1676 kernel_pmap->pm_pdir[kptbl_min + i] = 1677 (pte_t *)(pdir + (i * PAGE_SIZE * PTBL_PAGES)); 1678 1679 /* 1680 * Fill in PTEs covering kernel code and data. They are not required 1681 * for address translation, as this area is covered by static TLB1 1682 * entries, but for pte_vatopa() to work correctly with kernel area 1683 * addresses. 1684 */ 1685 for (va = addr; va < data_end; va += PAGE_SIZE) { 1686 pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]); 1687 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1688 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1689 PTE_VALID | PTE_PS_4KB; 1690 } 1691 } 1692 #endif 1693 1694 /**************************************************************************/ 1695 /* PMAP related */ 1696 /**************************************************************************/ 1697 1698 /* 1699 * This is called during booke_init, before the system is really initialized. 1700 */ 1701 static void 1702 mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) 1703 { 1704 vm_paddr_t phys_kernelend; 1705 struct mem_region *mp, *mp1; 1706 int cnt, i, j; 1707 vm_paddr_t s, e, sz; 1708 vm_paddr_t physsz, hwphyssz; 1709 u_int phys_avail_count; 1710 vm_size_t kstack0_sz; 1711 vm_offset_t kernel_pdir, kstack0; 1712 vm_paddr_t kstack0_phys; 1713 void *dpcpu; 1714 1715 debugf("mmu_booke_bootstrap: entered\n"); 1716 1717 /* Set interesting system properties */ 1718 hw_direct_map = 0; 1719 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 1720 elf32_nxstack = 1; 1721 #endif 1722 1723 /* Initialize invalidation mutex */ 1724 mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN); 1725 1726 /* Read TLB0 size and associativity. */ 1727 tlb0_get_tlbconf(); 1728 1729 /* 1730 * Align kernel start and end address (kernel image). 1731 * Note that kernel end does not necessarily relate to kernsize. 1732 * kernsize is the size of the kernel that is actually mapped. 1733 */ 1734 kernstart = trunc_page(start); 1735 data_start = round_page(kernelend); 1736 data_end = data_start; 1737 1738 /* 1739 * Addresses of preloaded modules (like file systems) use 1740 * physical addresses. Make sure we relocate those into 1741 * virtual addresses. 1742 */ 1743 preload_addr_relocate = kernstart - kernload; 1744 1745 /* Allocate the dynamic per-cpu area. */ 1746 dpcpu = (void *)data_end; 1747 data_end += DPCPU_SIZE; 1748 1749 /* Allocate space for the message buffer. */ 1750 msgbufp = (struct msgbuf *)data_end; 1751 data_end += msgbufsize; 1752 debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1753 (uintptr_t)msgbufp, data_end); 1754 1755 data_end = round_page(data_end); 1756 1757 /* Allocate space for ptbl_bufs. */ 1758 ptbl_bufs = (struct ptbl_buf *)data_end; 1759 data_end += sizeof(struct ptbl_buf) * PTBL_BUFS; 1760 debugf(" ptbl_bufs at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1761 (uintptr_t)ptbl_bufs, data_end); 1762 1763 data_end = round_page(data_end); 1764 1765 /* Allocate PTE tables for kernel KVA. */ 1766 kernel_pdir = data_end; 1767 kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 1768 PDIR_SIZE); 1769 #ifdef __powerpc64__ 1770 kernel_pdirs = howmany(kernel_ptbls, PDIR_NENTRIES); 1771 data_end += kernel_pdirs * PDIR_PAGES * PAGE_SIZE; 1772 #endif 1773 data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE; 1774 debugf(" kernel ptbls: %d\n", kernel_ptbls); 1775 debugf(" kernel pdir at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1776 kernel_pdir, data_end); 1777 1778 debugf(" data_end: 0x%"PRI0ptrX"\n", data_end); 1779 if (data_end - kernstart > kernsize) { 1780 kernsize += tlb1_mapin_region(kernstart + kernsize, 1781 kernload + kernsize, (data_end - kernstart) - kernsize); 1782 } 1783 data_end = kernstart + kernsize; 1784 debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end); 1785 1786 /* 1787 * Clear the structures - note we can only do it safely after the 1788 * possible additional TLB1 translations are in place (above) so that 1789 * all range up to the currently calculated 'data_end' is covered. 1790 */ 1791 dpcpu_init(dpcpu, 0); 1792 memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE); 1793 #ifdef __powerpc64__ 1794 memset((void *)kernel_pdir, 0, 1795 kernel_pdirs * PDIR_PAGES * PAGE_SIZE + 1796 kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1797 #else 1798 memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1799 #endif 1800 1801 /*******************************************************/ 1802 /* Set the start and end of kva. */ 1803 /*******************************************************/ 1804 virtual_avail = round_page(data_end); 1805 virtual_end = VM_MAX_KERNEL_ADDRESS; 1806 1807 /* Allocate KVA space for page zero/copy operations. */ 1808 zero_page_va = virtual_avail; 1809 virtual_avail += PAGE_SIZE; 1810 copy_page_src_va = virtual_avail; 1811 virtual_avail += PAGE_SIZE; 1812 copy_page_dst_va = virtual_avail; 1813 virtual_avail += PAGE_SIZE; 1814 debugf("zero_page_va = 0x%08x\n", zero_page_va); 1815 debugf("copy_page_src_va = 0x%08x\n", copy_page_src_va); 1816 debugf("copy_page_dst_va = 0x%08x\n", copy_page_dst_va); 1817 1818 /* Initialize page zero/copy mutexes. */ 1819 mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); 1820 mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); 1821 1822 /* Allocate KVA space for ptbl bufs. */ 1823 ptbl_buf_pool_vabase = virtual_avail; 1824 virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; 1825 debugf("ptbl_buf_pool_vabase = 0x%08x end = 0x%08x\n", 1826 ptbl_buf_pool_vabase, virtual_avail); 1827 1828 /* Calculate corresponding physical addresses for the kernel region. */ 1829 phys_kernelend = kernload + kernsize; 1830 debugf("kernel image and allocated data:\n"); 1831 debugf(" kernload = 0x%09llx\n", (uint64_t)kernload); 1832 debugf(" kernstart = 0x%08x\n", kernstart); 1833 debugf(" kernsize = 0x%08x\n", kernsize); 1834 1835 if (sizeof(phys_avail) / sizeof(phys_avail[0]) < availmem_regions_sz) 1836 panic("mmu_booke_bootstrap: phys_avail too small"); 1837 1838 /* 1839 * Remove kernel physical address range from avail regions list. Page 1840 * align all regions. Non-page aligned memory isn't very interesting 1841 * to us. Also, sort the entries for ascending addresses. 1842 */ 1843 1844 /* Retrieve phys/avail mem regions */ 1845 mem_regions(&physmem_regions, &physmem_regions_sz, 1846 &availmem_regions, &availmem_regions_sz); 1847 sz = 0; 1848 cnt = availmem_regions_sz; 1849 debugf("processing avail regions:\n"); 1850 for (mp = availmem_regions; mp->mr_size; mp++) { 1851 s = mp->mr_start; 1852 e = mp->mr_start + mp->mr_size; 1853 debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e); 1854 /* Check whether this region holds all of the kernel. */ 1855 if (s < kernload && e > phys_kernelend) { 1856 availmem_regions[cnt].mr_start = phys_kernelend; 1857 availmem_regions[cnt++].mr_size = e - phys_kernelend; 1858 e = kernload; 1859 } 1860 /* Look whether this regions starts within the kernel. */ 1861 if (s >= kernload && s < phys_kernelend) { 1862 if (e <= phys_kernelend) 1863 goto empty; 1864 s = phys_kernelend; 1865 } 1866 /* Now look whether this region ends within the kernel. */ 1867 if (e > kernload && e <= phys_kernelend) { 1868 if (s >= kernload) 1869 goto empty; 1870 e = kernload; 1871 } 1872 /* Now page align the start and size of the region. */ 1873 s = round_page(s); 1874 e = trunc_page(e); 1875 if (e < s) 1876 e = s; 1877 sz = e - s; 1878 debugf("%09jx-%09jx = %jx\n", 1879 (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz); 1880 1881 /* Check whether some memory is left here. */ 1882 if (sz == 0) { 1883 empty: 1884 memmove(mp, mp + 1, 1885 (cnt - (mp - availmem_regions)) * sizeof(*mp)); 1886 cnt--; 1887 mp--; 1888 continue; 1889 } 1890 1891 /* Do an insertion sort. */ 1892 for (mp1 = availmem_regions; mp1 < mp; mp1++) 1893 if (s < mp1->mr_start) 1894 break; 1895 if (mp1 < mp) { 1896 memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); 1897 mp1->mr_start = s; 1898 mp1->mr_size = sz; 1899 } else { 1900 mp->mr_start = s; 1901 mp->mr_size = sz; 1902 } 1903 } 1904 availmem_regions_sz = cnt; 1905 1906 /*******************************************************/ 1907 /* Steal physical memory for kernel stack from the end */ 1908 /* of the first avail region */ 1909 /*******************************************************/ 1910 kstack0_sz = kstack_pages * PAGE_SIZE; 1911 kstack0_phys = availmem_regions[0].mr_start + 1912 availmem_regions[0].mr_size; 1913 kstack0_phys -= kstack0_sz; 1914 availmem_regions[0].mr_size -= kstack0_sz; 1915 1916 /*******************************************************/ 1917 /* Fill in phys_avail table, based on availmem_regions */ 1918 /*******************************************************/ 1919 phys_avail_count = 0; 1920 physsz = 0; 1921 hwphyssz = 0; 1922 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 1923 1924 debugf("fill in phys_avail:\n"); 1925 for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { 1926 1927 debugf(" region: 0x%jx - 0x%jx (0x%jx)\n", 1928 (uintmax_t)availmem_regions[i].mr_start, 1929 (uintmax_t)availmem_regions[i].mr_start + 1930 availmem_regions[i].mr_size, 1931 (uintmax_t)availmem_regions[i].mr_size); 1932 1933 if (hwphyssz != 0 && 1934 (physsz + availmem_regions[i].mr_size) >= hwphyssz) { 1935 debugf(" hw.physmem adjust\n"); 1936 if (physsz < hwphyssz) { 1937 phys_avail[j] = availmem_regions[i].mr_start; 1938 phys_avail[j + 1] = 1939 availmem_regions[i].mr_start + 1940 hwphyssz - physsz; 1941 physsz = hwphyssz; 1942 phys_avail_count++; 1943 } 1944 break; 1945 } 1946 1947 phys_avail[j] = availmem_regions[i].mr_start; 1948 phys_avail[j + 1] = availmem_regions[i].mr_start + 1949 availmem_regions[i].mr_size; 1950 phys_avail_count++; 1951 physsz += availmem_regions[i].mr_size; 1952 } 1953 physmem = btoc(physsz); 1954 1955 /* Calculate the last available physical address. */ 1956 for (i = 0; phys_avail[i + 2] != 0; i += 2) 1957 ; 1958 Maxmem = powerpc_btop(phys_avail[i + 1]); 1959 1960 debugf("Maxmem = 0x%08lx\n", Maxmem); 1961 debugf("phys_avail_count = %d\n", phys_avail_count); 1962 debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n", 1963 (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem); 1964 1965 /*******************************************************/ 1966 /* Initialize (statically allocated) kernel pmap. */ 1967 /*******************************************************/ 1968 PMAP_LOCK_INIT(kernel_pmap); 1969 #ifndef __powerpc64__ 1970 kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE; 1971 #endif 1972 1973 debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap); 1974 kernel_pte_alloc(virtual_avail, kernstart, kernel_pdir); 1975 for (i = 0; i < MAXCPU; i++) { 1976 kernel_pmap->pm_tid[i] = TID_KERNEL; 1977 1978 /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */ 1979 tidbusy[i][TID_KERNEL] = kernel_pmap; 1980 } 1981 1982 /* Mark kernel_pmap active on all CPUs */ 1983 CPU_FILL(&kernel_pmap->pm_active); 1984 1985 /* 1986 * Initialize the global pv list lock. 1987 */ 1988 rw_init(&pvh_global_lock, "pmap pv global"); 1989 1990 /*******************************************************/ 1991 /* Final setup */ 1992 /*******************************************************/ 1993 1994 /* Enter kstack0 into kernel map, provide guard page */ 1995 kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 1996 thread0.td_kstack = kstack0; 1997 thread0.td_kstack_pages = kstack_pages; 1998 1999 debugf("kstack_sz = 0x%08x\n", kstack0_sz); 2000 debugf("kstack0_phys at 0x%09llx - 0x%09llx\n", 2001 kstack0_phys, kstack0_phys + kstack0_sz); 2002 debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n", 2003 kstack0, kstack0 + kstack0_sz); 2004 2005 virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz; 2006 for (i = 0; i < kstack_pages; i++) { 2007 mmu_booke_kenter(mmu, kstack0, kstack0_phys); 2008 kstack0 += PAGE_SIZE; 2009 kstack0_phys += PAGE_SIZE; 2010 } 2011 2012 pmap_bootstrapped = 1; 2013 2014 debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail); 2015 debugf("virtual_end = %"PRI0ptrX"\n", virtual_end); 2016 2017 debugf("mmu_booke_bootstrap: exit\n"); 2018 } 2019 2020 #ifdef SMP 2021 void 2022 tlb1_ap_prep(void) 2023 { 2024 tlb_entry_t *e, tmp; 2025 unsigned int i; 2026 2027 /* Prepare TLB1 image for AP processors */ 2028 e = __boot_tlb1; 2029 for (i = 0; i < TLB1_ENTRIES; i++) { 2030 tlb1_read_entry(&tmp, i); 2031 2032 if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED)) 2033 memcpy(e++, &tmp, sizeof(tmp)); 2034 } 2035 } 2036 2037 void 2038 pmap_bootstrap_ap(volatile uint32_t *trcp __unused) 2039 { 2040 int i; 2041 2042 /* 2043 * Finish TLB1 configuration: the BSP already set up its TLB1 and we 2044 * have the snapshot of its contents in the s/w __boot_tlb1[] table 2045 * created by tlb1_ap_prep(), so use these values directly to 2046 * (re)program AP's TLB1 hardware. 2047 * 2048 * Start at index 1 because index 0 has the kernel map. 2049 */ 2050 for (i = 1; i < TLB1_ENTRIES; i++) { 2051 if (__boot_tlb1[i].mas1 & MAS1_VALID) 2052 tlb1_write_entry(&__boot_tlb1[i], i); 2053 } 2054 2055 set_mas4_defaults(); 2056 } 2057 #endif 2058 2059 static void 2060 booke_pmap_init_qpages(void) 2061 { 2062 struct pcpu *pc; 2063 int i; 2064 2065 CPU_FOREACH(i) { 2066 pc = pcpu_find(i); 2067 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); 2068 if (pc->pc_qmap_addr == 0) 2069 panic("pmap_init_qpages: unable to allocate KVA"); 2070 } 2071 } 2072 2073 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL); 2074 2075 /* 2076 * Get the physical page address for the given pmap/virtual address. 2077 */ 2078 static vm_paddr_t 2079 mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va) 2080 { 2081 vm_paddr_t pa; 2082 2083 PMAP_LOCK(pmap); 2084 pa = pte_vatopa(mmu, pmap, va); 2085 PMAP_UNLOCK(pmap); 2086 2087 return (pa); 2088 } 2089 2090 /* 2091 * Extract the physical page address associated with the given 2092 * kernel virtual address. 2093 */ 2094 static vm_paddr_t 2095 mmu_booke_kextract(mmu_t mmu, vm_offset_t va) 2096 { 2097 tlb_entry_t e; 2098 vm_paddr_t p = 0; 2099 int i; 2100 2101 if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS) 2102 p = pte_vatopa(mmu, kernel_pmap, va); 2103 2104 if (p == 0) { 2105 /* Check TLB1 mappings */ 2106 for (i = 0; i < TLB1_ENTRIES; i++) { 2107 tlb1_read_entry(&e, i); 2108 if (!(e.mas1 & MAS1_VALID)) 2109 continue; 2110 if (va >= e.virt && va < e.virt + e.size) 2111 return (e.phys + (va - e.virt)); 2112 } 2113 } 2114 2115 return (p); 2116 } 2117 2118 /* 2119 * Initialize the pmap module. 2120 * Called by vm_init, to initialize any structures that the pmap 2121 * system needs to map virtual memory. 2122 */ 2123 static void 2124 mmu_booke_init(mmu_t mmu) 2125 { 2126 int shpgperproc = PMAP_SHPGPERPROC; 2127 2128 /* 2129 * Initialize the address space (zone) for the pv entries. Set a 2130 * high water mark so that the system can recover from excessive 2131 * numbers of pv entries. 2132 */ 2133 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 2134 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 2135 2136 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 2137 pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; 2138 2139 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 2140 pv_entry_high_water = 9 * (pv_entry_max / 10); 2141 2142 uma_zone_reserve_kva(pvzone, pv_entry_max); 2143 2144 /* Pre-fill pvzone with initial number of pv entries. */ 2145 uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); 2146 2147 /* Initialize ptbl allocation. */ 2148 ptbl_init(); 2149 } 2150 2151 /* 2152 * Map a list of wired pages into kernel virtual address space. This is 2153 * intended for temporary mappings which do not need page modification or 2154 * references recorded. Existing mappings in the region are overwritten. 2155 */ 2156 static void 2157 mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) 2158 { 2159 vm_offset_t va; 2160 2161 va = sva; 2162 while (count-- > 0) { 2163 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 2164 va += PAGE_SIZE; 2165 m++; 2166 } 2167 } 2168 2169 /* 2170 * Remove page mappings from kernel virtual address space. Intended for 2171 * temporary mappings entered by mmu_booke_qenter. 2172 */ 2173 static void 2174 mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count) 2175 { 2176 vm_offset_t va; 2177 2178 va = sva; 2179 while (count-- > 0) { 2180 mmu_booke_kremove(mmu, va); 2181 va += PAGE_SIZE; 2182 } 2183 } 2184 2185 /* 2186 * Map a wired page into kernel virtual address space. 2187 */ 2188 static void 2189 mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) 2190 { 2191 2192 mmu_booke_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 2193 } 2194 2195 static void 2196 mmu_booke_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) 2197 { 2198 uint32_t flags; 2199 pte_t *pte; 2200 2201 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2202 (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va")); 2203 2204 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 2205 flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT; 2206 flags |= PTE_PS_4KB; 2207 2208 pte = pte_find(mmu, kernel_pmap, va); 2209 KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va. NULL PTE")); 2210 2211 mtx_lock_spin(&tlbivax_mutex); 2212 tlb_miss_lock(); 2213 2214 if (PTE_ISVALID(pte)) { 2215 2216 CTR1(KTR_PMAP, "%s: replacing entry!", __func__); 2217 2218 /* Flush entry from TLB0 */ 2219 tlb0_flush_entry(va); 2220 } 2221 2222 *pte = PTE_RPN_FROM_PA(pa) | flags; 2223 2224 //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " 2225 // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", 2226 // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); 2227 2228 /* Flush the real memory from the instruction cache. */ 2229 if ((flags & (PTE_I | PTE_G)) == 0) 2230 __syncicache((void *)va, PAGE_SIZE); 2231 2232 tlb_miss_unlock(); 2233 mtx_unlock_spin(&tlbivax_mutex); 2234 } 2235 2236 /* 2237 * Remove a page from kernel page table. 2238 */ 2239 static void 2240 mmu_booke_kremove(mmu_t mmu, vm_offset_t va) 2241 { 2242 pte_t *pte; 2243 2244 CTR2(KTR_PMAP,"%s: s (va = 0x%08x)\n", __func__, va); 2245 2246 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2247 (va <= VM_MAX_KERNEL_ADDRESS)), 2248 ("mmu_booke_kremove: invalid va")); 2249 2250 pte = pte_find(mmu, kernel_pmap, va); 2251 2252 if (!PTE_ISVALID(pte)) { 2253 2254 CTR1(KTR_PMAP, "%s: invalid pte", __func__); 2255 2256 return; 2257 } 2258 2259 mtx_lock_spin(&tlbivax_mutex); 2260 tlb_miss_lock(); 2261 2262 /* Invalidate entry in TLB0, update PTE. */ 2263 tlb0_flush_entry(va); 2264 *pte = 0; 2265 2266 tlb_miss_unlock(); 2267 mtx_unlock_spin(&tlbivax_mutex); 2268 } 2269 2270 /* 2271 * Initialize pmap associated with process 0. 2272 */ 2273 static void 2274 mmu_booke_pinit0(mmu_t mmu, pmap_t pmap) 2275 { 2276 2277 PMAP_LOCK_INIT(pmap); 2278 mmu_booke_pinit(mmu, pmap); 2279 PCPU_SET(curpmap, pmap); 2280 } 2281 2282 /* 2283 * Initialize a preallocated and zeroed pmap structure, 2284 * such as one in a vmspace structure. 2285 */ 2286 static void 2287 mmu_booke_pinit(mmu_t mmu, pmap_t pmap) 2288 { 2289 int i; 2290 2291 CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, 2292 curthread->td_proc->p_pid, curthread->td_proc->p_comm); 2293 2294 KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); 2295 2296 for (i = 0; i < MAXCPU; i++) 2297 pmap->pm_tid[i] = TID_NONE; 2298 CPU_ZERO(&kernel_pmap->pm_active); 2299 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 2300 #ifdef __powerpc64__ 2301 bzero(&pmap->pm_pp2d, sizeof(pte_t **) * PP2D_NENTRIES); 2302 TAILQ_INIT(&pmap->pm_pdir_list); 2303 #else 2304 bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); 2305 #endif 2306 TAILQ_INIT(&pmap->pm_ptbl_list); 2307 } 2308 2309 /* 2310 * Release any resources held by the given physical map. 2311 * Called when a pmap initialized by mmu_booke_pinit is being released. 2312 * Should only be called if the map contains no valid mappings. 2313 */ 2314 static void 2315 mmu_booke_release(mmu_t mmu, pmap_t pmap) 2316 { 2317 2318 KASSERT(pmap->pm_stats.resident_count == 0, 2319 ("pmap_release: pmap resident count %ld != 0", 2320 pmap->pm_stats.resident_count)); 2321 } 2322 2323 /* 2324 * Insert the given physical page at the specified virtual address in the 2325 * target physical map with the protection requested. If specified the page 2326 * will be wired down. 2327 */ 2328 static int 2329 mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2330 vm_prot_t prot, u_int flags, int8_t psind) 2331 { 2332 int error; 2333 2334 rw_wlock(&pvh_global_lock); 2335 PMAP_LOCK(pmap); 2336 error = mmu_booke_enter_locked(mmu, pmap, va, m, prot, flags, psind); 2337 PMAP_UNLOCK(pmap); 2338 rw_wunlock(&pvh_global_lock); 2339 return (error); 2340 } 2341 2342 static int 2343 mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2344 vm_prot_t prot, u_int pmap_flags, int8_t psind __unused) 2345 { 2346 pte_t *pte; 2347 vm_paddr_t pa; 2348 uint32_t flags; 2349 int error, su, sync; 2350 2351 pa = VM_PAGE_TO_PHYS(m); 2352 su = (pmap == kernel_pmap); 2353 sync = 0; 2354 2355 //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " 2356 // "pa=0x%08x prot=0x%08x flags=%#x)\n", 2357 // (u_int32_t)pmap, su, pmap->pm_tid, 2358 // (u_int32_t)m, va, pa, prot, flags); 2359 2360 if (su) { 2361 KASSERT(((va >= virtual_avail) && 2362 (va <= VM_MAX_KERNEL_ADDRESS)), 2363 ("mmu_booke_enter_locked: kernel pmap, non kernel va")); 2364 } else { 2365 KASSERT((va <= VM_MAXUSER_ADDRESS), 2366 ("mmu_booke_enter_locked: user pmap, non user va")); 2367 } 2368 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2369 VM_OBJECT_ASSERT_LOCKED(m->object); 2370 2371 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2372 2373 /* 2374 * If there is an existing mapping, and the physical address has not 2375 * changed, must be protection or wiring change. 2376 */ 2377 if (((pte = pte_find(mmu, pmap, va)) != NULL) && 2378 (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { 2379 2380 /* 2381 * Before actually updating pte->flags we calculate and 2382 * prepare its new value in a helper var. 2383 */ 2384 flags = *pte; 2385 flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); 2386 2387 /* Wiring change, just update stats. */ 2388 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) { 2389 if (!PTE_ISWIRED(pte)) { 2390 flags |= PTE_WIRED; 2391 pmap->pm_stats.wired_count++; 2392 } 2393 } else { 2394 if (PTE_ISWIRED(pte)) { 2395 flags &= ~PTE_WIRED; 2396 pmap->pm_stats.wired_count--; 2397 } 2398 } 2399 2400 if (prot & VM_PROT_WRITE) { 2401 /* Add write permissions. */ 2402 flags |= PTE_SW; 2403 if (!su) 2404 flags |= PTE_UW; 2405 2406 if ((flags & PTE_MANAGED) != 0) 2407 vm_page_aflag_set(m, PGA_WRITEABLE); 2408 } else { 2409 /* Handle modified pages, sense modify status. */ 2410 2411 /* 2412 * The PTE_MODIFIED flag could be set by underlying 2413 * TLB misses since we last read it (above), possibly 2414 * other CPUs could update it so we check in the PTE 2415 * directly rather than rely on that saved local flags 2416 * copy. 2417 */ 2418 if (PTE_ISMODIFIED(pte)) 2419 vm_page_dirty(m); 2420 } 2421 2422 if (prot & VM_PROT_EXECUTE) { 2423 flags |= PTE_SX; 2424 if (!su) 2425 flags |= PTE_UX; 2426 2427 /* 2428 * Check existing flags for execute permissions: if we 2429 * are turning execute permissions on, icache should 2430 * be flushed. 2431 */ 2432 if ((*pte & (PTE_UX | PTE_SX)) == 0) 2433 sync++; 2434 } 2435 2436 flags &= ~PTE_REFERENCED; 2437 2438 /* 2439 * The new flags value is all calculated -- only now actually 2440 * update the PTE. 2441 */ 2442 mtx_lock_spin(&tlbivax_mutex); 2443 tlb_miss_lock(); 2444 2445 tlb0_flush_entry(va); 2446 *pte &= ~PTE_FLAGS_MASK; 2447 *pte |= flags; 2448 2449 tlb_miss_unlock(); 2450 mtx_unlock_spin(&tlbivax_mutex); 2451 2452 } else { 2453 /* 2454 * If there is an existing mapping, but it's for a different 2455 * physical address, pte_enter() will delete the old mapping. 2456 */ 2457 //if ((pte != NULL) && PTE_ISVALID(pte)) 2458 // debugf("mmu_booke_enter_locked: replace\n"); 2459 //else 2460 // debugf("mmu_booke_enter_locked: new\n"); 2461 2462 /* Now set up the flags and install the new mapping. */ 2463 flags = (PTE_SR | PTE_VALID); 2464 flags |= PTE_M; 2465 2466 if (!su) 2467 flags |= PTE_UR; 2468 2469 if (prot & VM_PROT_WRITE) { 2470 flags |= PTE_SW; 2471 if (!su) 2472 flags |= PTE_UW; 2473 2474 if ((m->oflags & VPO_UNMANAGED) == 0) 2475 vm_page_aflag_set(m, PGA_WRITEABLE); 2476 } 2477 2478 if (prot & VM_PROT_EXECUTE) { 2479 flags |= PTE_SX; 2480 if (!su) 2481 flags |= PTE_UX; 2482 } 2483 2484 /* If its wired update stats. */ 2485 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) 2486 flags |= PTE_WIRED; 2487 2488 error = pte_enter(mmu, pmap, m, va, flags, 2489 (pmap_flags & PMAP_ENTER_NOSLEEP) != 0); 2490 if (error != 0) 2491 return (KERN_RESOURCE_SHORTAGE); 2492 2493 if ((flags & PMAP_ENTER_WIRED) != 0) 2494 pmap->pm_stats.wired_count++; 2495 2496 /* Flush the real memory from the instruction cache. */ 2497 if (prot & VM_PROT_EXECUTE) 2498 sync++; 2499 } 2500 2501 if (sync && (su || pmap == PCPU_GET(curpmap))) { 2502 __syncicache((void *)va, PAGE_SIZE); 2503 sync = 0; 2504 } 2505 2506 return (KERN_SUCCESS); 2507 } 2508 2509 /* 2510 * Maps a sequence of resident pages belonging to the same object. 2511 * The sequence begins with the given page m_start. This page is 2512 * mapped at the given virtual address start. Each subsequent page is 2513 * mapped at a virtual address that is offset from start by the same 2514 * amount as the page is offset from m_start within the object. The 2515 * last page in the sequence is the page with the largest offset from 2516 * m_start that can be mapped at a virtual address less than the given 2517 * virtual address end. Not every virtual page between start and end 2518 * is mapped; only those for which a resident page exists with the 2519 * corresponding offset from m_start are mapped. 2520 */ 2521 static void 2522 mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start, 2523 vm_offset_t end, vm_page_t m_start, vm_prot_t prot) 2524 { 2525 vm_page_t m; 2526 vm_pindex_t diff, psize; 2527 2528 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2529 2530 psize = atop(end - start); 2531 m = m_start; 2532 rw_wlock(&pvh_global_lock); 2533 PMAP_LOCK(pmap); 2534 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2535 mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m, 2536 prot & (VM_PROT_READ | VM_PROT_EXECUTE), 2537 PMAP_ENTER_NOSLEEP, 0); 2538 m = TAILQ_NEXT(m, listq); 2539 } 2540 rw_wunlock(&pvh_global_lock); 2541 PMAP_UNLOCK(pmap); 2542 } 2543 2544 static void 2545 mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2546 vm_prot_t prot) 2547 { 2548 2549 rw_wlock(&pvh_global_lock); 2550 PMAP_LOCK(pmap); 2551 mmu_booke_enter_locked(mmu, pmap, va, m, 2552 prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 2553 0); 2554 rw_wunlock(&pvh_global_lock); 2555 PMAP_UNLOCK(pmap); 2556 } 2557 2558 /* 2559 * Remove the given range of addresses from the specified map. 2560 * 2561 * It is assumed that the start and end are properly rounded to the page size. 2562 */ 2563 static void 2564 mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva) 2565 { 2566 pte_t *pte; 2567 uint8_t hold_flag; 2568 2569 int su = (pmap == kernel_pmap); 2570 2571 //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", 2572 // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); 2573 2574 if (su) { 2575 KASSERT(((va >= virtual_avail) && 2576 (va <= VM_MAX_KERNEL_ADDRESS)), 2577 ("mmu_booke_remove: kernel pmap, non kernel va")); 2578 } else { 2579 KASSERT((va <= VM_MAXUSER_ADDRESS), 2580 ("mmu_booke_remove: user pmap, non user va")); 2581 } 2582 2583 if (PMAP_REMOVE_DONE(pmap)) { 2584 //debugf("mmu_booke_remove: e (empty)\n"); 2585 return; 2586 } 2587 2588 hold_flag = PTBL_HOLD_FLAG(pmap); 2589 //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); 2590 2591 rw_wlock(&pvh_global_lock); 2592 PMAP_LOCK(pmap); 2593 for (; va < endva; va += PAGE_SIZE) { 2594 pte = pte_find(mmu, pmap, va); 2595 if ((pte != NULL) && PTE_ISVALID(pte)) 2596 pte_remove(mmu, pmap, va, hold_flag); 2597 } 2598 PMAP_UNLOCK(pmap); 2599 rw_wunlock(&pvh_global_lock); 2600 2601 //debugf("mmu_booke_remove: e\n"); 2602 } 2603 2604 /* 2605 * Remove physical page from all pmaps in which it resides. 2606 */ 2607 static void 2608 mmu_booke_remove_all(mmu_t mmu, vm_page_t m) 2609 { 2610 pv_entry_t pv, pvn; 2611 uint8_t hold_flag; 2612 2613 rw_wlock(&pvh_global_lock); 2614 for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) { 2615 pvn = TAILQ_NEXT(pv, pv_link); 2616 2617 PMAP_LOCK(pv->pv_pmap); 2618 hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); 2619 pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag); 2620 PMAP_UNLOCK(pv->pv_pmap); 2621 } 2622 vm_page_aflag_clear(m, PGA_WRITEABLE); 2623 rw_wunlock(&pvh_global_lock); 2624 } 2625 2626 /* 2627 * Map a range of physical addresses into kernel virtual address space. 2628 */ 2629 static vm_offset_t 2630 mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, 2631 vm_paddr_t pa_end, int prot) 2632 { 2633 vm_offset_t sva = *virt; 2634 vm_offset_t va = sva; 2635 2636 //debugf("mmu_booke_map: s (sva = 0x%08x pa_start = 0x%08x pa_end = 0x%08x)\n", 2637 // sva, pa_start, pa_end); 2638 2639 while (pa_start < pa_end) { 2640 mmu_booke_kenter(mmu, va, pa_start); 2641 va += PAGE_SIZE; 2642 pa_start += PAGE_SIZE; 2643 } 2644 *virt = va; 2645 2646 //debugf("mmu_booke_map: e (va = 0x%08x)\n", va); 2647 return (sva); 2648 } 2649 2650 /* 2651 * The pmap must be activated before it's address space can be accessed in any 2652 * way. 2653 */ 2654 static void 2655 mmu_booke_activate(mmu_t mmu, struct thread *td) 2656 { 2657 pmap_t pmap; 2658 u_int cpuid; 2659 2660 pmap = &td->td_proc->p_vmspace->vm_pmap; 2661 2662 CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%08x)", 2663 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2664 2665 KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); 2666 2667 sched_pin(); 2668 2669 cpuid = PCPU_GET(cpuid); 2670 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 2671 PCPU_SET(curpmap, pmap); 2672 2673 if (pmap->pm_tid[cpuid] == TID_NONE) 2674 tid_alloc(pmap); 2675 2676 /* Load PID0 register with pmap tid value. */ 2677 mtspr(SPR_PID0, pmap->pm_tid[cpuid]); 2678 __asm __volatile("isync"); 2679 2680 mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0); 2681 2682 sched_unpin(); 2683 2684 CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__, 2685 pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm); 2686 } 2687 2688 /* 2689 * Deactivate the specified process's address space. 2690 */ 2691 static void 2692 mmu_booke_deactivate(mmu_t mmu, struct thread *td) 2693 { 2694 pmap_t pmap; 2695 2696 pmap = &td->td_proc->p_vmspace->vm_pmap; 2697 2698 CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%08x", 2699 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2700 2701 td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0); 2702 2703 CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active); 2704 PCPU_SET(curpmap, NULL); 2705 } 2706 2707 /* 2708 * Copy the range specified by src_addr/len 2709 * from the source map to the range dst_addr/len 2710 * in the destination map. 2711 * 2712 * This routine is only advisory and need not do anything. 2713 */ 2714 static void 2715 mmu_booke_copy(mmu_t mmu, pmap_t dst_pmap, pmap_t src_pmap, 2716 vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) 2717 { 2718 2719 } 2720 2721 /* 2722 * Set the physical protection on the specified range of this map as requested. 2723 */ 2724 static void 2725 mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva, 2726 vm_prot_t prot) 2727 { 2728 vm_offset_t va; 2729 vm_page_t m; 2730 pte_t *pte; 2731 2732 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2733 mmu_booke_remove(mmu, pmap, sva, eva); 2734 return; 2735 } 2736 2737 if (prot & VM_PROT_WRITE) 2738 return; 2739 2740 PMAP_LOCK(pmap); 2741 for (va = sva; va < eva; va += PAGE_SIZE) { 2742 if ((pte = pte_find(mmu, pmap, va)) != NULL) { 2743 if (PTE_ISVALID(pte)) { 2744 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2745 2746 mtx_lock_spin(&tlbivax_mutex); 2747 tlb_miss_lock(); 2748 2749 /* Handle modified pages. */ 2750 if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) 2751 vm_page_dirty(m); 2752 2753 tlb0_flush_entry(va); 2754 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2755 2756 tlb_miss_unlock(); 2757 mtx_unlock_spin(&tlbivax_mutex); 2758 } 2759 } 2760 } 2761 PMAP_UNLOCK(pmap); 2762 } 2763 2764 /* 2765 * Clear the write and modified bits in each of the given page's mappings. 2766 */ 2767 static void 2768 mmu_booke_remove_write(mmu_t mmu, vm_page_t m) 2769 { 2770 pv_entry_t pv; 2771 pte_t *pte; 2772 2773 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2774 ("mmu_booke_remove_write: page %p is not managed", m)); 2775 2776 /* 2777 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2778 * set by another thread while the object is locked. Thus, 2779 * if PGA_WRITEABLE is clear, no page table entries need updating. 2780 */ 2781 VM_OBJECT_ASSERT_WLOCKED(m->object); 2782 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2783 return; 2784 rw_wlock(&pvh_global_lock); 2785 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2786 PMAP_LOCK(pv->pv_pmap); 2787 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { 2788 if (PTE_ISVALID(pte)) { 2789 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2790 2791 mtx_lock_spin(&tlbivax_mutex); 2792 tlb_miss_lock(); 2793 2794 /* Handle modified pages. */ 2795 if (PTE_ISMODIFIED(pte)) 2796 vm_page_dirty(m); 2797 2798 /* Flush mapping from TLB0. */ 2799 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2800 2801 tlb_miss_unlock(); 2802 mtx_unlock_spin(&tlbivax_mutex); 2803 } 2804 } 2805 PMAP_UNLOCK(pv->pv_pmap); 2806 } 2807 vm_page_aflag_clear(m, PGA_WRITEABLE); 2808 rw_wunlock(&pvh_global_lock); 2809 } 2810 2811 static void 2812 mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2813 { 2814 pte_t *pte; 2815 pmap_t pmap; 2816 vm_page_t m; 2817 vm_offset_t addr; 2818 vm_paddr_t pa = 0; 2819 int active, valid; 2820 2821 va = trunc_page(va); 2822 sz = round_page(sz); 2823 2824 rw_wlock(&pvh_global_lock); 2825 pmap = PCPU_GET(curpmap); 2826 active = (pm == kernel_pmap || pm == pmap) ? 1 : 0; 2827 while (sz > 0) { 2828 PMAP_LOCK(pm); 2829 pte = pte_find(mmu, pm, va); 2830 valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; 2831 if (valid) 2832 pa = PTE_PA(pte); 2833 PMAP_UNLOCK(pm); 2834 if (valid) { 2835 if (!active) { 2836 /* Create a mapping in the active pmap. */ 2837 addr = 0; 2838 m = PHYS_TO_VM_PAGE(pa); 2839 PMAP_LOCK(pmap); 2840 pte_enter(mmu, pmap, m, addr, 2841 PTE_SR | PTE_VALID | PTE_UR, FALSE); 2842 __syncicache((void *)addr, PAGE_SIZE); 2843 pte_remove(mmu, pmap, addr, PTBL_UNHOLD); 2844 PMAP_UNLOCK(pmap); 2845 } else 2846 __syncicache((void *)va, PAGE_SIZE); 2847 } 2848 va += PAGE_SIZE; 2849 sz -= PAGE_SIZE; 2850 } 2851 rw_wunlock(&pvh_global_lock); 2852 } 2853 2854 /* 2855 * Atomically extract and hold the physical page with the given 2856 * pmap and virtual address pair if that mapping permits the given 2857 * protection. 2858 */ 2859 static vm_page_t 2860 mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, 2861 vm_prot_t prot) 2862 { 2863 pte_t *pte; 2864 vm_page_t m; 2865 uint32_t pte_wbit; 2866 vm_paddr_t pa; 2867 2868 m = NULL; 2869 pa = 0; 2870 PMAP_LOCK(pmap); 2871 retry: 2872 pte = pte_find(mmu, pmap, va); 2873 if ((pte != NULL) && PTE_ISVALID(pte)) { 2874 if (pmap == kernel_pmap) 2875 pte_wbit = PTE_SW; 2876 else 2877 pte_wbit = PTE_UW; 2878 2879 if ((*pte & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { 2880 if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa)) 2881 goto retry; 2882 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2883 vm_page_hold(m); 2884 } 2885 } 2886 2887 PA_UNLOCK_COND(pa); 2888 PMAP_UNLOCK(pmap); 2889 return (m); 2890 } 2891 2892 /* 2893 * Initialize a vm_page's machine-dependent fields. 2894 */ 2895 static void 2896 mmu_booke_page_init(mmu_t mmu, vm_page_t m) 2897 { 2898 2899 m->md.pv_tracked = 0; 2900 TAILQ_INIT(&m->md.pv_list); 2901 } 2902 2903 /* 2904 * mmu_booke_zero_page_area zeros the specified hardware page by 2905 * mapping it into virtual memory and using bzero to clear 2906 * its contents. 2907 * 2908 * off and size must reside within a single page. 2909 */ 2910 static void 2911 mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 2912 { 2913 vm_offset_t va; 2914 2915 /* XXX KASSERT off and size are within a single page? */ 2916 2917 mtx_lock(&zero_page_mutex); 2918 va = zero_page_va; 2919 2920 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2921 bzero((caddr_t)va + off, size); 2922 mmu_booke_kremove(mmu, va); 2923 2924 mtx_unlock(&zero_page_mutex); 2925 } 2926 2927 /* 2928 * mmu_booke_zero_page zeros the specified hardware page. 2929 */ 2930 static void 2931 mmu_booke_zero_page(mmu_t mmu, vm_page_t m) 2932 { 2933 vm_offset_t off, va; 2934 2935 mtx_lock(&zero_page_mutex); 2936 va = zero_page_va; 2937 2938 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2939 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 2940 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 2941 mmu_booke_kremove(mmu, va); 2942 2943 mtx_unlock(&zero_page_mutex); 2944 } 2945 2946 /* 2947 * mmu_booke_copy_page copies the specified (machine independent) page by 2948 * mapping the page into virtual memory and using memcopy to copy the page, 2949 * one machine dependent page at a time. 2950 */ 2951 static void 2952 mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) 2953 { 2954 vm_offset_t sva, dva; 2955 2956 sva = copy_page_src_va; 2957 dva = copy_page_dst_va; 2958 2959 mtx_lock(©_page_mutex); 2960 mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); 2961 mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); 2962 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); 2963 mmu_booke_kremove(mmu, dva); 2964 mmu_booke_kremove(mmu, sva); 2965 mtx_unlock(©_page_mutex); 2966 } 2967 2968 static inline void 2969 mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 2970 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 2971 { 2972 void *a_cp, *b_cp; 2973 vm_offset_t a_pg_offset, b_pg_offset; 2974 int cnt; 2975 2976 mtx_lock(©_page_mutex); 2977 while (xfersize > 0) { 2978 a_pg_offset = a_offset & PAGE_MASK; 2979 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2980 mmu_booke_kenter(mmu, copy_page_src_va, 2981 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); 2982 a_cp = (char *)copy_page_src_va + a_pg_offset; 2983 b_pg_offset = b_offset & PAGE_MASK; 2984 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2985 mmu_booke_kenter(mmu, copy_page_dst_va, 2986 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); 2987 b_cp = (char *)copy_page_dst_va + b_pg_offset; 2988 bcopy(a_cp, b_cp, cnt); 2989 mmu_booke_kremove(mmu, copy_page_dst_va); 2990 mmu_booke_kremove(mmu, copy_page_src_va); 2991 a_offset += cnt; 2992 b_offset += cnt; 2993 xfersize -= cnt; 2994 } 2995 mtx_unlock(©_page_mutex); 2996 } 2997 2998 static vm_offset_t 2999 mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m) 3000 { 3001 vm_paddr_t paddr; 3002 vm_offset_t qaddr; 3003 uint32_t flags; 3004 pte_t *pte; 3005 3006 paddr = VM_PAGE_TO_PHYS(m); 3007 3008 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 3009 flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; 3010 flags |= PTE_PS_4KB; 3011 3012 critical_enter(); 3013 qaddr = PCPU_GET(qmap_addr); 3014 3015 pte = pte_find(mmu, kernel_pmap, qaddr); 3016 3017 KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy")); 3018 3019 /* 3020 * XXX: tlbivax is broadcast to other cores, but qaddr should 3021 * not be present in other TLBs. Is there a better instruction 3022 * sequence to use? Or just forget it & use mmu_booke_kenter()... 3023 */ 3024 __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK)); 3025 __asm __volatile("isync; msync"); 3026 3027 *pte = PTE_RPN_FROM_PA(paddr) | flags; 3028 3029 /* Flush the real memory from the instruction cache. */ 3030 if ((flags & (PTE_I | PTE_G)) == 0) 3031 __syncicache((void *)qaddr, PAGE_SIZE); 3032 3033 return (qaddr); 3034 } 3035 3036 static void 3037 mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr) 3038 { 3039 pte_t *pte; 3040 3041 pte = pte_find(mmu, kernel_pmap, addr); 3042 3043 KASSERT(PCPU_GET(qmap_addr) == addr, 3044 ("mmu_booke_quick_remove_page: invalid address")); 3045 KASSERT(*pte != 0, 3046 ("mmu_booke_quick_remove_page: PTE not in use")); 3047 3048 *pte = 0; 3049 critical_exit(); 3050 } 3051 3052 /* 3053 * Return whether or not the specified physical page was modified 3054 * in any of physical maps. 3055 */ 3056 static boolean_t 3057 mmu_booke_is_modified(mmu_t mmu, vm_page_t m) 3058 { 3059 pte_t *pte; 3060 pv_entry_t pv; 3061 boolean_t rv; 3062 3063 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3064 ("mmu_booke_is_modified: page %p is not managed", m)); 3065 rv = FALSE; 3066 3067 /* 3068 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3069 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3070 * is clear, no PTEs can be modified. 3071 */ 3072 VM_OBJECT_ASSERT_WLOCKED(m->object); 3073 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3074 return (rv); 3075 rw_wlock(&pvh_global_lock); 3076 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3077 PMAP_LOCK(pv->pv_pmap); 3078 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3079 PTE_ISVALID(pte)) { 3080 if (PTE_ISMODIFIED(pte)) 3081 rv = TRUE; 3082 } 3083 PMAP_UNLOCK(pv->pv_pmap); 3084 if (rv) 3085 break; 3086 } 3087 rw_wunlock(&pvh_global_lock); 3088 return (rv); 3089 } 3090 3091 /* 3092 * Return whether or not the specified virtual address is eligible 3093 * for prefault. 3094 */ 3095 static boolean_t 3096 mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr) 3097 { 3098 3099 return (FALSE); 3100 } 3101 3102 /* 3103 * Return whether or not the specified physical page was referenced 3104 * in any physical maps. 3105 */ 3106 static boolean_t 3107 mmu_booke_is_referenced(mmu_t mmu, vm_page_t m) 3108 { 3109 pte_t *pte; 3110 pv_entry_t pv; 3111 boolean_t rv; 3112 3113 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3114 ("mmu_booke_is_referenced: page %p is not managed", m)); 3115 rv = FALSE; 3116 rw_wlock(&pvh_global_lock); 3117 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3118 PMAP_LOCK(pv->pv_pmap); 3119 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3120 PTE_ISVALID(pte)) { 3121 if (PTE_ISREFERENCED(pte)) 3122 rv = TRUE; 3123 } 3124 PMAP_UNLOCK(pv->pv_pmap); 3125 if (rv) 3126 break; 3127 } 3128 rw_wunlock(&pvh_global_lock); 3129 return (rv); 3130 } 3131 3132 /* 3133 * Clear the modify bits on the specified physical page. 3134 */ 3135 static void 3136 mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) 3137 { 3138 pte_t *pte; 3139 pv_entry_t pv; 3140 3141 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3142 ("mmu_booke_clear_modify: page %p is not managed", m)); 3143 VM_OBJECT_ASSERT_WLOCKED(m->object); 3144 KASSERT(!vm_page_xbusied(m), 3145 ("mmu_booke_clear_modify: page %p is exclusive busied", m)); 3146 3147 /* 3148 * If the page is not PG_AWRITEABLE, then no PTEs can be modified. 3149 * If the object containing the page is locked and the page is not 3150 * exclusive busied, then PG_AWRITEABLE cannot be concurrently set. 3151 */ 3152 if ((m->aflags & PGA_WRITEABLE) == 0) 3153 return; 3154 rw_wlock(&pvh_global_lock); 3155 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3156 PMAP_LOCK(pv->pv_pmap); 3157 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3158 PTE_ISVALID(pte)) { 3159 mtx_lock_spin(&tlbivax_mutex); 3160 tlb_miss_lock(); 3161 3162 if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) { 3163 tlb0_flush_entry(pv->pv_va); 3164 *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | 3165 PTE_REFERENCED); 3166 } 3167 3168 tlb_miss_unlock(); 3169 mtx_unlock_spin(&tlbivax_mutex); 3170 } 3171 PMAP_UNLOCK(pv->pv_pmap); 3172 } 3173 rw_wunlock(&pvh_global_lock); 3174 } 3175 3176 /* 3177 * Return a count of reference bits for a page, clearing those bits. 3178 * It is not necessary for every reference bit to be cleared, but it 3179 * is necessary that 0 only be returned when there are truly no 3180 * reference bits set. 3181 * 3182 * As an optimization, update the page's dirty field if a modified bit is 3183 * found while counting reference bits. This opportunistic update can be 3184 * performed at low cost and can eliminate the need for some future calls 3185 * to pmap_is_modified(). However, since this function stops after 3186 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 3187 * dirty pages. Those dirty pages will only be detected by a future call 3188 * to pmap_is_modified(). 3189 */ 3190 static int 3191 mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m) 3192 { 3193 pte_t *pte; 3194 pv_entry_t pv; 3195 int count; 3196 3197 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3198 ("mmu_booke_ts_referenced: page %p is not managed", m)); 3199 count = 0; 3200 rw_wlock(&pvh_global_lock); 3201 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3202 PMAP_LOCK(pv->pv_pmap); 3203 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3204 PTE_ISVALID(pte)) { 3205 if (PTE_ISMODIFIED(pte)) 3206 vm_page_dirty(m); 3207 if (PTE_ISREFERENCED(pte)) { 3208 mtx_lock_spin(&tlbivax_mutex); 3209 tlb_miss_lock(); 3210 3211 tlb0_flush_entry(pv->pv_va); 3212 *pte &= ~PTE_REFERENCED; 3213 3214 tlb_miss_unlock(); 3215 mtx_unlock_spin(&tlbivax_mutex); 3216 3217 if (++count >= PMAP_TS_REFERENCED_MAX) { 3218 PMAP_UNLOCK(pv->pv_pmap); 3219 break; 3220 } 3221 } 3222 } 3223 PMAP_UNLOCK(pv->pv_pmap); 3224 } 3225 rw_wunlock(&pvh_global_lock); 3226 return (count); 3227 } 3228 3229 /* 3230 * Clear the wired attribute from the mappings for the specified range of 3231 * addresses in the given pmap. Every valid mapping within that range must 3232 * have the wired attribute set. In contrast, invalid mappings cannot have 3233 * the wired attribute set, so they are ignored. 3234 * 3235 * The wired attribute of the page table entry is not a hardware feature, so 3236 * there is no need to invalidate any TLB entries. 3237 */ 3238 static void 3239 mmu_booke_unwire(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3240 { 3241 vm_offset_t va; 3242 pte_t *pte; 3243 3244 PMAP_LOCK(pmap); 3245 for (va = sva; va < eva; va += PAGE_SIZE) { 3246 if ((pte = pte_find(mmu, pmap, va)) != NULL && 3247 PTE_ISVALID(pte)) { 3248 if (!PTE_ISWIRED(pte)) 3249 panic("mmu_booke_unwire: pte %p isn't wired", 3250 pte); 3251 *pte &= ~PTE_WIRED; 3252 pmap->pm_stats.wired_count--; 3253 } 3254 } 3255 PMAP_UNLOCK(pmap); 3256 3257 } 3258 3259 /* 3260 * Return true if the pmap's pv is one of the first 16 pvs linked to from this 3261 * page. This count may be changed upwards or downwards in the future; it is 3262 * only necessary that true be returned for a small subset of pmaps for proper 3263 * page aging. 3264 */ 3265 static boolean_t 3266 mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 3267 { 3268 pv_entry_t pv; 3269 int loops; 3270 boolean_t rv; 3271 3272 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3273 ("mmu_booke_page_exists_quick: page %p is not managed", m)); 3274 loops = 0; 3275 rv = FALSE; 3276 rw_wlock(&pvh_global_lock); 3277 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3278 if (pv->pv_pmap == pmap) { 3279 rv = TRUE; 3280 break; 3281 } 3282 if (++loops >= 16) 3283 break; 3284 } 3285 rw_wunlock(&pvh_global_lock); 3286 return (rv); 3287 } 3288 3289 /* 3290 * Return the number of managed mappings to the given physical page that are 3291 * wired. 3292 */ 3293 static int 3294 mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m) 3295 { 3296 pv_entry_t pv; 3297 pte_t *pte; 3298 int count = 0; 3299 3300 if ((m->oflags & VPO_UNMANAGED) != 0) 3301 return (count); 3302 rw_wlock(&pvh_global_lock); 3303 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3304 PMAP_LOCK(pv->pv_pmap); 3305 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) 3306 if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) 3307 count++; 3308 PMAP_UNLOCK(pv->pv_pmap); 3309 } 3310 rw_wunlock(&pvh_global_lock); 3311 return (count); 3312 } 3313 3314 static int 3315 mmu_booke_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3316 { 3317 int i; 3318 vm_offset_t va; 3319 3320 /* 3321 * This currently does not work for entries that 3322 * overlap TLB1 entries. 3323 */ 3324 for (i = 0; i < TLB1_ENTRIES; i ++) { 3325 if (tlb1_iomapped(i, pa, size, &va) == 0) 3326 return (0); 3327 } 3328 3329 return (EFAULT); 3330 } 3331 3332 void 3333 mmu_booke_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) 3334 { 3335 vm_paddr_t ppa; 3336 vm_offset_t ofs; 3337 vm_size_t gran; 3338 3339 /* Minidumps are based on virtual memory addresses. */ 3340 if (do_minidump) { 3341 *va = (void *)(vm_offset_t)pa; 3342 return; 3343 } 3344 3345 /* Raw physical memory dumps don't have a virtual address. */ 3346 /* We always map a 256MB page at 256M. */ 3347 gran = 256 * 1024 * 1024; 3348 ppa = rounddown2(pa, gran); 3349 ofs = pa - ppa; 3350 *va = (void *)gran; 3351 tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO); 3352 3353 if (sz > (gran - ofs)) 3354 tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran, 3355 _TLB_ENTRY_IO); 3356 } 3357 3358 void 3359 mmu_booke_dumpsys_unmap(mmu_t mmu, vm_paddr_t pa, size_t sz, void *va) 3360 { 3361 vm_paddr_t ppa; 3362 vm_offset_t ofs; 3363 vm_size_t gran; 3364 tlb_entry_t e; 3365 int i; 3366 3367 /* Minidumps are based on virtual memory addresses. */ 3368 /* Nothing to do... */ 3369 if (do_minidump) 3370 return; 3371 3372 for (i = 0; i < TLB1_ENTRIES; i++) { 3373 tlb1_read_entry(&e, i); 3374 if (!(e.mas1 & MAS1_VALID)) 3375 break; 3376 } 3377 3378 /* Raw physical memory dumps don't have a virtual address. */ 3379 i--; 3380 e.mas1 = 0; 3381 e.mas2 = 0; 3382 e.mas3 = 0; 3383 tlb1_write_entry(&e, i); 3384 3385 gran = 256 * 1024 * 1024; 3386 ppa = rounddown2(pa, gran); 3387 ofs = pa - ppa; 3388 if (sz > (gran - ofs)) { 3389 i--; 3390 e.mas1 = 0; 3391 e.mas2 = 0; 3392 e.mas3 = 0; 3393 tlb1_write_entry(&e, i); 3394 } 3395 } 3396 3397 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; 3398 3399 void 3400 mmu_booke_scan_init(mmu_t mmu) 3401 { 3402 vm_offset_t va; 3403 pte_t *pte; 3404 int i; 3405 3406 if (!do_minidump) { 3407 /* Initialize phys. segments for dumpsys(). */ 3408 memset(&dump_map, 0, sizeof(dump_map)); 3409 mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, 3410 &availmem_regions_sz); 3411 for (i = 0; i < physmem_regions_sz; i++) { 3412 dump_map[i].pa_start = physmem_regions[i].mr_start; 3413 dump_map[i].pa_size = physmem_regions[i].mr_size; 3414 } 3415 return; 3416 } 3417 3418 /* Virtual segments for minidumps: */ 3419 memset(&dump_map, 0, sizeof(dump_map)); 3420 3421 /* 1st: kernel .data and .bss. */ 3422 dump_map[0].pa_start = trunc_page((uintptr_t)_etext); 3423 dump_map[0].pa_size = 3424 round_page((uintptr_t)_end) - dump_map[0].pa_start; 3425 3426 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ 3427 dump_map[1].pa_start = data_start; 3428 dump_map[1].pa_size = data_end - data_start; 3429 3430 /* 3rd: kernel VM. */ 3431 va = dump_map[1].pa_start + dump_map[1].pa_size; 3432 /* Find start of next chunk (from va). */ 3433 while (va < virtual_end) { 3434 /* Don't dump the buffer cache. */ 3435 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { 3436 va = kmi.buffer_eva; 3437 continue; 3438 } 3439 pte = pte_find(mmu, kernel_pmap, va); 3440 if (pte != NULL && PTE_ISVALID(pte)) 3441 break; 3442 va += PAGE_SIZE; 3443 } 3444 if (va < virtual_end) { 3445 dump_map[2].pa_start = va; 3446 va += PAGE_SIZE; 3447 /* Find last page in chunk. */ 3448 while (va < virtual_end) { 3449 /* Don't run into the buffer cache. */ 3450 if (va == kmi.buffer_sva) 3451 break; 3452 pte = pte_find(mmu, kernel_pmap, va); 3453 if (pte == NULL || !PTE_ISVALID(pte)) 3454 break; 3455 va += PAGE_SIZE; 3456 } 3457 dump_map[2].pa_size = va - dump_map[2].pa_start; 3458 } 3459 } 3460 3461 /* 3462 * Map a set of physical memory pages into the kernel virtual address space. 3463 * Return a pointer to where it is mapped. This routine is intended to be used 3464 * for mapping device memory, NOT real memory. 3465 */ 3466 static void * 3467 mmu_booke_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3468 { 3469 3470 return (mmu_booke_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT)); 3471 } 3472 3473 static void * 3474 mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) 3475 { 3476 tlb_entry_t e; 3477 void *res; 3478 uintptr_t va, tmpva; 3479 vm_size_t sz; 3480 int i; 3481 3482 /* 3483 * Check if this is premapped in TLB1. Note: this should probably also 3484 * check whether a sequence of TLB1 entries exist that match the 3485 * requirement, but now only checks the easy case. 3486 */ 3487 for (i = 0; i < TLB1_ENTRIES; i++) { 3488 tlb1_read_entry(&e, i); 3489 if (!(e.mas1 & MAS1_VALID)) 3490 continue; 3491 if (pa >= e.phys && 3492 (pa + size) <= (e.phys + e.size) && 3493 (ma == VM_MEMATTR_DEFAULT || 3494 tlb_calc_wimg(pa, ma) == 3495 (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED)))) 3496 return (void *)(e.virt + 3497 (vm_offset_t)(pa - e.phys)); 3498 } 3499 3500 size = roundup(size, PAGE_SIZE); 3501 3502 /* 3503 * The device mapping area is between VM_MAXUSER_ADDRESS and 3504 * VM_MIN_KERNEL_ADDRESS. This gives 1GB of device addressing. 3505 */ 3506 #ifdef SPARSE_MAPDEV 3507 /* 3508 * With a sparse mapdev, align to the largest starting region. This 3509 * could feasibly be optimized for a 'best-fit' alignment, but that 3510 * calculation could be very costly. 3511 * Align to the smaller of: 3512 * - first set bit in overlap of (pa & size mask) 3513 * - largest size envelope 3514 * 3515 * It's possible the device mapping may start at a PA that's not larger 3516 * than the size mask, so we need to offset in to maximize the TLB entry 3517 * range and minimize the number of used TLB entries. 3518 */ 3519 do { 3520 tmpva = tlb1_map_base; 3521 sz = ffsl(((1 << flsl(size-1)) - 1) & pa); 3522 sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1; 3523 va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa); 3524 #ifdef __powerpc64__ 3525 } while (!atomic_cmpset_long(&tlb1_map_base, tmpva, va + size)); 3526 #else 3527 } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size)); 3528 #endif 3529 #else 3530 #ifdef __powerpc64__ 3531 va = atomic_fetchadd_long(&tlb1_map_base, size); 3532 #else 3533 va = atomic_fetchadd_int(&tlb1_map_base, size); 3534 #endif 3535 #endif 3536 res = (void *)va; 3537 3538 do { 3539 sz = 1 << (ilog2(size) & ~1); 3540 /* Align size to PA */ 3541 if (pa % sz != 0) { 3542 do { 3543 sz >>= 2; 3544 } while (pa % sz != 0); 3545 } 3546 /* Now align from there to VA */ 3547 if (va % sz != 0) { 3548 do { 3549 sz >>= 2; 3550 } while (va % sz != 0); 3551 } 3552 if (bootverbose) 3553 printf("Wiring VA=%lx to PA=%jx (size=%lx)\n", 3554 va, (uintmax_t)pa, sz); 3555 if (tlb1_set_entry(va, pa, sz, 3556 _TLB_ENTRY_SHARED | tlb_calc_wimg(pa, ma)) < 0) 3557 return (NULL); 3558 size -= sz; 3559 pa += sz; 3560 va += sz; 3561 } while (size > 0); 3562 3563 return (res); 3564 } 3565 3566 /* 3567 * 'Unmap' a range mapped by mmu_booke_mapdev(). 3568 */ 3569 static void 3570 mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 3571 { 3572 #ifdef SUPPORTS_SHRINKING_TLB1 3573 vm_offset_t base, offset; 3574 3575 /* 3576 * Unmap only if this is inside kernel virtual space. 3577 */ 3578 if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { 3579 base = trunc_page(va); 3580 offset = va & PAGE_MASK; 3581 size = roundup(offset + size, PAGE_SIZE); 3582 kva_free(base, size); 3583 } 3584 #endif 3585 } 3586 3587 /* 3588 * mmu_booke_object_init_pt preloads the ptes for a given object into the 3589 * specified pmap. This eliminates the blast of soft faults on process startup 3590 * and immediately after an mmap. 3591 */ 3592 static void 3593 mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3594 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 3595 { 3596 3597 VM_OBJECT_ASSERT_WLOCKED(object); 3598 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3599 ("mmu_booke_object_init_pt: non-device object")); 3600 } 3601 3602 /* 3603 * Perform the pmap work for mincore. 3604 */ 3605 static int 3606 mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3607 vm_paddr_t *locked_pa) 3608 { 3609 3610 /* XXX: this should be implemented at some point */ 3611 return (0); 3612 } 3613 3614 static int 3615 mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, vm_size_t sz, 3616 vm_memattr_t mode) 3617 { 3618 vm_offset_t va; 3619 pte_t *pte; 3620 int i, j; 3621 tlb_entry_t e; 3622 3623 /* Check TLB1 mappings */ 3624 for (i = 0; i < TLB1_ENTRIES; i++) { 3625 tlb1_read_entry(&e, i); 3626 if (!(e.mas1 & MAS1_VALID)) 3627 continue; 3628 if (addr >= e.virt && addr < e.virt + e.size) 3629 break; 3630 } 3631 if (i < TLB1_ENTRIES) { 3632 /* Only allow full mappings to be modified for now. */ 3633 /* Validate the range. */ 3634 for (j = i, va = addr; va < addr + sz; va += e.size, j++) { 3635 tlb1_read_entry(&e, j); 3636 if (va != e.virt || (sz - (va - addr) < e.size)) 3637 return (EINVAL); 3638 } 3639 for (va = addr; va < addr + sz; va += e.size, i++) { 3640 tlb1_read_entry(&e, i); 3641 e.mas2 &= ~MAS2_WIMGE_MASK; 3642 e.mas2 |= tlb_calc_wimg(e.phys, mode); 3643 3644 /* 3645 * Write it out to the TLB. Should really re-sync with other 3646 * cores. 3647 */ 3648 tlb1_write_entry(&e, i); 3649 } 3650 return (0); 3651 } 3652 3653 /* Not in TLB1, try through pmap */ 3654 /* First validate the range. */ 3655 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3656 pte = pte_find(mmu, kernel_pmap, va); 3657 if (pte == NULL || !PTE_ISVALID(pte)) 3658 return (EINVAL); 3659 } 3660 3661 mtx_lock_spin(&tlbivax_mutex); 3662 tlb_miss_lock(); 3663 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3664 pte = pte_find(mmu, kernel_pmap, va); 3665 *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT); 3666 *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT; 3667 tlb0_flush_entry(va); 3668 } 3669 tlb_miss_unlock(); 3670 mtx_unlock_spin(&tlbivax_mutex); 3671 3672 return (0); 3673 } 3674 3675 /**************************************************************************/ 3676 /* TID handling */ 3677 /**************************************************************************/ 3678 3679 /* 3680 * Allocate a TID. If necessary, steal one from someone else. 3681 * The new TID is flushed from the TLB before returning. 3682 */ 3683 static tlbtid_t 3684 tid_alloc(pmap_t pmap) 3685 { 3686 tlbtid_t tid; 3687 int thiscpu; 3688 3689 KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); 3690 3691 CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap); 3692 3693 thiscpu = PCPU_GET(cpuid); 3694 3695 tid = PCPU_GET(tid_next); 3696 if (tid > TID_MAX) 3697 tid = TID_MIN; 3698 PCPU_SET(tid_next, tid + 1); 3699 3700 /* If we are stealing TID then clear the relevant pmap's field */ 3701 if (tidbusy[thiscpu][tid] != NULL) { 3702 3703 CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid); 3704 3705 tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE; 3706 3707 /* Flush all entries from TLB0 matching this TID. */ 3708 tid_flush(tid); 3709 } 3710 3711 tidbusy[thiscpu][tid] = pmap; 3712 pmap->pm_tid[thiscpu] = tid; 3713 __asm __volatile("msync; isync"); 3714 3715 CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid, 3716 PCPU_GET(tid_next)); 3717 3718 return (tid); 3719 } 3720 3721 /**************************************************************************/ 3722 /* TLB0 handling */ 3723 /**************************************************************************/ 3724 3725 static void 3726 #ifdef __powerpc64__ 3727 tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3, 3728 #else 3729 tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3, 3730 #endif 3731 uint32_t mas7) 3732 { 3733 int as; 3734 char desc[3]; 3735 tlbtid_t tid; 3736 vm_size_t size; 3737 unsigned int tsize; 3738 3739 desc[2] = '\0'; 3740 if (mas1 & MAS1_VALID) 3741 desc[0] = 'V'; 3742 else 3743 desc[0] = ' '; 3744 3745 if (mas1 & MAS1_IPROT) 3746 desc[1] = 'P'; 3747 else 3748 desc[1] = ' '; 3749 3750 as = (mas1 & MAS1_TS_MASK) ? 1 : 0; 3751 tid = MAS1_GETTID(mas1); 3752 3753 tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 3754 size = 0; 3755 if (tsize) 3756 size = tsize2size(tsize); 3757 3758 debugf("%3d: (%s) [AS=%d] " 3759 "sz = 0x%08x tsz = %d tid = %d mas1 = 0x%08x " 3760 "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n", 3761 i, desc, as, size, tsize, tid, mas1, mas2, mas3, mas7); 3762 } 3763 3764 /* Convert TLB0 va and way number to tlb0[] table index. */ 3765 static inline unsigned int 3766 tlb0_tableidx(vm_offset_t va, unsigned int way) 3767 { 3768 unsigned int idx; 3769 3770 idx = (way * TLB0_ENTRIES_PER_WAY); 3771 idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; 3772 return (idx); 3773 } 3774 3775 /* 3776 * Invalidate TLB0 entry. 3777 */ 3778 static inline void 3779 tlb0_flush_entry(vm_offset_t va) 3780 { 3781 3782 CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va); 3783 3784 mtx_assert(&tlbivax_mutex, MA_OWNED); 3785 3786 __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK)); 3787 __asm __volatile("isync; msync"); 3788 __asm __volatile("tlbsync; msync"); 3789 3790 CTR1(KTR_PMAP, "%s: e", __func__); 3791 } 3792 3793 /* Print out contents of the MAS registers for each TLB0 entry */ 3794 void 3795 tlb0_print_tlbentries(void) 3796 { 3797 uint32_t mas0, mas1, mas3, mas7; 3798 #ifdef __powerpc64__ 3799 uint64_t mas2; 3800 #else 3801 uint32_t mas2; 3802 #endif 3803 int entryidx, way, idx; 3804 3805 debugf("TLB0 entries:\n"); 3806 for (way = 0; way < TLB0_WAYS; way ++) 3807 for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { 3808 3809 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 3810 mtspr(SPR_MAS0, mas0); 3811 __asm __volatile("isync"); 3812 3813 mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; 3814 mtspr(SPR_MAS2, mas2); 3815 3816 __asm __volatile("isync; tlbre"); 3817 3818 mas1 = mfspr(SPR_MAS1); 3819 mas2 = mfspr(SPR_MAS2); 3820 mas3 = mfspr(SPR_MAS3); 3821 mas7 = mfspr(SPR_MAS7); 3822 3823 idx = tlb0_tableidx(mas2, way); 3824 tlb_print_entry(idx, mas1, mas2, mas3, mas7); 3825 } 3826 } 3827 3828 /**************************************************************************/ 3829 /* TLB1 handling */ 3830 /**************************************************************************/ 3831 3832 /* 3833 * TLB1 mapping notes: 3834 * 3835 * TLB1[0] Kernel text and data. 3836 * TLB1[1-15] Additional kernel text and data mappings (if required), PCI 3837 * windows, other devices mappings. 3838 */ 3839 3840 /* 3841 * Read an entry from given TLB1 slot. 3842 */ 3843 void 3844 tlb1_read_entry(tlb_entry_t *entry, unsigned int slot) 3845 { 3846 register_t msr; 3847 uint32_t mas0; 3848 3849 KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__)); 3850 3851 msr = mfmsr(); 3852 __asm __volatile("wrteei 0"); 3853 3854 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot); 3855 mtspr(SPR_MAS0, mas0); 3856 __asm __volatile("isync; tlbre"); 3857 3858 entry->mas1 = mfspr(SPR_MAS1); 3859 entry->mas2 = mfspr(SPR_MAS2); 3860 entry->mas3 = mfspr(SPR_MAS3); 3861 3862 switch ((mfpvr() >> 16) & 0xFFFF) { 3863 case FSL_E500v2: 3864 case FSL_E500mc: 3865 case FSL_E5500: 3866 case FSL_E6500: 3867 entry->mas7 = mfspr(SPR_MAS7); 3868 break; 3869 default: 3870 entry->mas7 = 0; 3871 break; 3872 } 3873 mtmsr(msr); 3874 3875 entry->virt = entry->mas2 & MAS2_EPN_MASK; 3876 entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) | 3877 (entry->mas3 & MAS3_RPN); 3878 entry->size = 3879 tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT); 3880 } 3881 3882 struct tlbwrite_args { 3883 tlb_entry_t *e; 3884 unsigned int idx; 3885 }; 3886 3887 static void 3888 tlb1_write_entry_int(void *arg) 3889 { 3890 struct tlbwrite_args *args = arg; 3891 uint32_t mas0; 3892 3893 /* Select entry */ 3894 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(args->idx); 3895 3896 mtspr(SPR_MAS0, mas0); 3897 __asm __volatile("isync"); 3898 mtspr(SPR_MAS1, args->e->mas1); 3899 __asm __volatile("isync"); 3900 mtspr(SPR_MAS2, args->e->mas2); 3901 __asm __volatile("isync"); 3902 mtspr(SPR_MAS3, args->e->mas3); 3903 __asm __volatile("isync"); 3904 switch ((mfpvr() >> 16) & 0xFFFF) { 3905 case FSL_E500mc: 3906 case FSL_E5500: 3907 case FSL_E6500: 3908 mtspr(SPR_MAS8, 0); 3909 __asm __volatile("isync"); 3910 /* FALLTHROUGH */ 3911 case FSL_E500v2: 3912 mtspr(SPR_MAS7, args->e->mas7); 3913 __asm __volatile("isync"); 3914 break; 3915 default: 3916 break; 3917 } 3918 3919 __asm __volatile("tlbwe; isync; msync"); 3920 3921 } 3922 3923 static void 3924 tlb1_write_entry_sync(void *arg) 3925 { 3926 /* Empty synchronization point for smp_rendezvous(). */ 3927 } 3928 3929 /* 3930 * Write given entry to TLB1 hardware. 3931 */ 3932 static void 3933 tlb1_write_entry(tlb_entry_t *e, unsigned int idx) 3934 { 3935 struct tlbwrite_args args; 3936 3937 args.e = e; 3938 args.idx = idx; 3939 3940 #ifdef SMP 3941 if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) { 3942 mb(); 3943 smp_rendezvous(tlb1_write_entry_sync, 3944 tlb1_write_entry_int, 3945 tlb1_write_entry_sync, &args); 3946 } else 3947 #endif 3948 { 3949 register_t msr; 3950 3951 msr = mfmsr(); 3952 __asm __volatile("wrteei 0"); 3953 tlb1_write_entry_int(&args); 3954 mtmsr(msr); 3955 } 3956 } 3957 3958 /* 3959 * Return the largest uint value log such that 2^log <= num. 3960 */ 3961 static unsigned int 3962 ilog2(unsigned int num) 3963 { 3964 int lz; 3965 3966 __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); 3967 return (31 - lz); 3968 } 3969 3970 /* 3971 * Convert TLB TSIZE value to mapped region size. 3972 */ 3973 static vm_size_t 3974 tsize2size(unsigned int tsize) 3975 { 3976 3977 /* 3978 * size = 4^tsize KB 3979 * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) 3980 */ 3981 3982 return ((1 << (2 * tsize)) * 1024); 3983 } 3984 3985 /* 3986 * Convert region size (must be power of 4) to TLB TSIZE value. 3987 */ 3988 static unsigned int 3989 size2tsize(vm_size_t size) 3990 { 3991 3992 return (ilog2(size) / 2 - 5); 3993 } 3994 3995 /* 3996 * Register permanent kernel mapping in TLB1. 3997 * 3998 * Entries are created starting from index 0 (current free entry is 3999 * kept in tlb1_idx) and are not supposed to be invalidated. 4000 */ 4001 int 4002 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, 4003 uint32_t flags) 4004 { 4005 tlb_entry_t e; 4006 uint32_t ts, tid; 4007 int tsize, index; 4008 4009 for (index = 0; index < TLB1_ENTRIES; index++) { 4010 tlb1_read_entry(&e, index); 4011 if ((e.mas1 & MAS1_VALID) == 0) 4012 break; 4013 /* Check if we're just updating the flags, and update them. */ 4014 if (e.phys == pa && e.virt == va && e.size == size) { 4015 e.mas2 = (va & MAS2_EPN_MASK) | flags; 4016 tlb1_write_entry(&e, index); 4017 return (0); 4018 } 4019 } 4020 if (index >= TLB1_ENTRIES) { 4021 printf("tlb1_set_entry: TLB1 full!\n"); 4022 return (-1); 4023 } 4024 4025 /* Convert size to TSIZE */ 4026 tsize = size2tsize(size); 4027 4028 tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK; 4029 /* XXX TS is hard coded to 0 for now as we only use single address space */ 4030 ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK; 4031 4032 e.phys = pa; 4033 e.virt = va; 4034 e.size = size; 4035 e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; 4036 e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); 4037 e.mas2 = (va & MAS2_EPN_MASK) | flags; 4038 4039 /* Set supervisor RWX permission bits */ 4040 e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; 4041 e.mas7 = (pa >> 32) & MAS7_RPN; 4042 4043 tlb1_write_entry(&e, index); 4044 4045 /* 4046 * XXX in general TLB1 updates should be propagated between CPUs, 4047 * since current design assumes to have the same TLB1 set-up on all 4048 * cores. 4049 */ 4050 return (0); 4051 } 4052 4053 /* 4054 * Map in contiguous RAM region into the TLB1 using maximum of 4055 * KERNEL_REGION_MAX_TLB_ENTRIES entries. 4056 * 4057 * If necessary round up last entry size and return total size 4058 * used by all allocated entries. 4059 */ 4060 vm_size_t 4061 tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size) 4062 { 4063 vm_size_t pgs[KERNEL_REGION_MAX_TLB_ENTRIES]; 4064 vm_size_t mapped, pgsz, base, mask; 4065 int idx, nents; 4066 4067 /* Round up to the next 1M */ 4068 size = roundup2(size, 1 << 20); 4069 4070 mapped = 0; 4071 idx = 0; 4072 base = va; 4073 pgsz = 64*1024*1024; 4074 while (mapped < size) { 4075 while (mapped < size && idx < KERNEL_REGION_MAX_TLB_ENTRIES) { 4076 while (pgsz > (size - mapped)) 4077 pgsz >>= 2; 4078 pgs[idx++] = pgsz; 4079 mapped += pgsz; 4080 } 4081 4082 /* We under-map. Correct for this. */ 4083 if (mapped < size) { 4084 while (pgs[idx - 1] == pgsz) { 4085 idx--; 4086 mapped -= pgsz; 4087 } 4088 /* XXX We may increase beyond out starting point. */ 4089 pgsz <<= 2; 4090 pgs[idx++] = pgsz; 4091 mapped += pgsz; 4092 } 4093 } 4094 4095 nents = idx; 4096 mask = pgs[0] - 1; 4097 /* Align address to the boundary */ 4098 if (va & mask) { 4099 va = (va + mask) & ~mask; 4100 pa = (pa + mask) & ~mask; 4101 } 4102 4103 for (idx = 0; idx < nents; idx++) { 4104 pgsz = pgs[idx]; 4105 debugf("%u: %llx -> %x, size=%x\n", idx, pa, va, pgsz); 4106 tlb1_set_entry(va, pa, pgsz, 4107 _TLB_ENTRY_SHARED | _TLB_ENTRY_MEM); 4108 pa += pgsz; 4109 va += pgsz; 4110 } 4111 4112 mapped = (va - base); 4113 printf("mapped size 0x%"PRI0ptrX" (wasted space 0x%"PRIxPTR")\n", 4114 mapped, mapped - size); 4115 return (mapped); 4116 } 4117 4118 /* 4119 * TLB1 initialization routine, to be called after the very first 4120 * assembler level setup done in locore.S. 4121 */ 4122 void 4123 tlb1_init() 4124 { 4125 uint32_t mas0, mas1, mas2, mas3, mas7; 4126 uint32_t tsz; 4127 4128 tlb1_get_tlbconf(); 4129 4130 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0); 4131 mtspr(SPR_MAS0, mas0); 4132 __asm __volatile("isync; tlbre"); 4133 4134 mas1 = mfspr(SPR_MAS1); 4135 mas2 = mfspr(SPR_MAS2); 4136 mas3 = mfspr(SPR_MAS3); 4137 mas7 = mfspr(SPR_MAS7); 4138 4139 kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | 4140 (mas3 & MAS3_RPN); 4141 4142 tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4143 kernsize += (tsz > 0) ? tsize2size(tsz) : 0; 4144 4145 /* Setup TLB miss defaults */ 4146 set_mas4_defaults(); 4147 } 4148 4149 /* 4150 * pmap_early_io_unmap() should be used in short conjunction with 4151 * pmap_early_io_map(), as in the following snippet: 4152 * 4153 * x = pmap_early_io_map(...); 4154 * <do something with x> 4155 * pmap_early_io_unmap(x, size); 4156 * 4157 * And avoiding more allocations between. 4158 */ 4159 void 4160 pmap_early_io_unmap(vm_offset_t va, vm_size_t size) 4161 { 4162 int i; 4163 tlb_entry_t e; 4164 vm_size_t isize; 4165 4166 size = roundup(size, PAGE_SIZE); 4167 isize = size; 4168 for (i = 0; i < TLB1_ENTRIES && size > 0; i++) { 4169 tlb1_read_entry(&e, i); 4170 if (!(e.mas1 & MAS1_VALID)) 4171 continue; 4172 if (va <= e.virt && (va + isize) >= (e.virt + e.size)) { 4173 size -= e.size; 4174 e.mas1 &= ~MAS1_VALID; 4175 tlb1_write_entry(&e, i); 4176 } 4177 } 4178 if (tlb1_map_base == va + isize) 4179 tlb1_map_base -= isize; 4180 } 4181 4182 vm_offset_t 4183 pmap_early_io_map(vm_paddr_t pa, vm_size_t size) 4184 { 4185 vm_paddr_t pa_base; 4186 vm_offset_t va, sz; 4187 int i; 4188 tlb_entry_t e; 4189 4190 KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!")); 4191 4192 for (i = 0; i < TLB1_ENTRIES; i++) { 4193 tlb1_read_entry(&e, i); 4194 if (!(e.mas1 & MAS1_VALID)) 4195 continue; 4196 if (pa >= e.phys && (pa + size) <= 4197 (e.phys + e.size)) 4198 return (e.virt + (pa - e.phys)); 4199 } 4200 4201 pa_base = rounddown(pa, PAGE_SIZE); 4202 size = roundup(size + (pa - pa_base), PAGE_SIZE); 4203 tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1)); 4204 va = tlb1_map_base + (pa - pa_base); 4205 4206 do { 4207 sz = 1 << (ilog2(size) & ~1); 4208 tlb1_set_entry(tlb1_map_base, pa_base, sz, 4209 _TLB_ENTRY_SHARED | _TLB_ENTRY_IO); 4210 size -= sz; 4211 pa_base += sz; 4212 tlb1_map_base += sz; 4213 } while (size > 0); 4214 4215 return (va); 4216 } 4217 4218 void 4219 pmap_track_page(pmap_t pmap, vm_offset_t va) 4220 { 4221 vm_paddr_t pa; 4222 vm_page_t page; 4223 struct pv_entry *pve; 4224 4225 va = trunc_page(va); 4226 pa = pmap_kextract(va); 4227 page = PHYS_TO_VM_PAGE(pa); 4228 4229 rw_wlock(&pvh_global_lock); 4230 PMAP_LOCK(pmap); 4231 4232 TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) { 4233 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 4234 goto out; 4235 } 4236 } 4237 page->md.pv_tracked = true; 4238 pv_insert(pmap, va, page); 4239 out: 4240 PMAP_UNLOCK(pmap); 4241 rw_wunlock(&pvh_global_lock); 4242 } 4243 4244 4245 /* 4246 * Setup MAS4 defaults. 4247 * These values are loaded to MAS0-2 on a TLB miss. 4248 */ 4249 static void 4250 set_mas4_defaults(void) 4251 { 4252 uint32_t mas4; 4253 4254 /* Defaults: TLB0, PID0, TSIZED=4K */ 4255 mas4 = MAS4_TLBSELD0; 4256 mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; 4257 #ifdef SMP 4258 mas4 |= MAS4_MD; 4259 #endif 4260 mtspr(SPR_MAS4, mas4); 4261 __asm __volatile("isync"); 4262 } 4263 4264 /* 4265 * Print out contents of the MAS registers for each TLB1 entry 4266 */ 4267 void 4268 tlb1_print_tlbentries(void) 4269 { 4270 uint32_t mas0, mas1, mas3, mas7; 4271 #ifdef __powerpc64__ 4272 uint64_t mas2; 4273 #else 4274 uint32_t mas2; 4275 #endif 4276 int i; 4277 4278 debugf("TLB1 entries:\n"); 4279 for (i = 0; i < TLB1_ENTRIES; i++) { 4280 4281 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); 4282 mtspr(SPR_MAS0, mas0); 4283 4284 __asm __volatile("isync; tlbre"); 4285 4286 mas1 = mfspr(SPR_MAS1); 4287 mas2 = mfspr(SPR_MAS2); 4288 mas3 = mfspr(SPR_MAS3); 4289 mas7 = mfspr(SPR_MAS7); 4290 4291 tlb_print_entry(i, mas1, mas2, mas3, mas7); 4292 } 4293 } 4294 4295 /* 4296 * Return 0 if the physical IO range is encompassed by one of the 4297 * the TLB1 entries, otherwise return related error code. 4298 */ 4299 static int 4300 tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) 4301 { 4302 uint32_t prot; 4303 vm_paddr_t pa_start; 4304 vm_paddr_t pa_end; 4305 unsigned int entry_tsize; 4306 vm_size_t entry_size; 4307 tlb_entry_t e; 4308 4309 *va = (vm_offset_t)NULL; 4310 4311 tlb1_read_entry(&e, i); 4312 /* Skip invalid entries */ 4313 if (!(e.mas1 & MAS1_VALID)) 4314 return (EINVAL); 4315 4316 /* 4317 * The entry must be cache-inhibited, guarded, and r/w 4318 * so it can function as an i/o page 4319 */ 4320 prot = e.mas2 & (MAS2_I | MAS2_G); 4321 if (prot != (MAS2_I | MAS2_G)) 4322 return (EPERM); 4323 4324 prot = e.mas3 & (MAS3_SR | MAS3_SW); 4325 if (prot != (MAS3_SR | MAS3_SW)) 4326 return (EPERM); 4327 4328 /* The address should be within the entry range. */ 4329 entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4330 KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); 4331 4332 entry_size = tsize2size(entry_tsize); 4333 pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | 4334 (e.mas3 & MAS3_RPN); 4335 pa_end = pa_start + entry_size; 4336 4337 if ((pa < pa_start) || ((pa + size) > pa_end)) 4338 return (ERANGE); 4339 4340 /* Return virtual address of this mapping. */ 4341 *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start); 4342 return (0); 4343 } 4344 4345 /* 4346 * Invalidate all TLB0 entries which match the given TID. Note this is 4347 * dedicated for cases when invalidations should NOT be propagated to other 4348 * CPUs. 4349 */ 4350 static void 4351 tid_flush(tlbtid_t tid) 4352 { 4353 register_t msr; 4354 uint32_t mas0, mas1, mas2; 4355 int entry, way; 4356 4357 4358 /* Don't evict kernel translations */ 4359 if (tid == TID_KERNEL) 4360 return; 4361 4362 msr = mfmsr(); 4363 __asm __volatile("wrteei 0"); 4364 4365 for (way = 0; way < TLB0_WAYS; way++) 4366 for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { 4367 4368 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 4369 mtspr(SPR_MAS0, mas0); 4370 __asm __volatile("isync"); 4371 4372 mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; 4373 mtspr(SPR_MAS2, mas2); 4374 4375 __asm __volatile("isync; tlbre"); 4376 4377 mas1 = mfspr(SPR_MAS1); 4378 4379 if (!(mas1 & MAS1_VALID)) 4380 continue; 4381 if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid) 4382 continue; 4383 mas1 &= ~MAS1_VALID; 4384 mtspr(SPR_MAS1, mas1); 4385 __asm __volatile("isync; tlbwe; isync; msync"); 4386 } 4387 mtmsr(msr); 4388 } 4389