1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> 5 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 20 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Some hw specific parts of this pmap were derived or influenced 29 * by NetBSD's ibm4xx pmap module. More generic code is shared with 30 * a few other pmap modules from the FreeBSD tree. 31 */ 32 33 /* 34 * VM layout notes: 35 * 36 * Kernel and user threads run within one common virtual address space 37 * defined by AS=0. 38 * 39 * 32-bit pmap: 40 * Virtual address space layout: 41 * ----------------------------- 42 * 0x0000_0000 - 0x7fff_ffff : user process 43 * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) 44 * 0xc000_0000 - 0xc0ff_ffff : kernel reserved 45 * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. 46 * 0xc100_0000 - 0xffff_ffff : KVA 47 * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy 48 * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs 49 * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 50 * 0xc200_9000 - 0xfeef_ffff : actual free KVA space 51 * 52 * 64-bit pmap: 53 * Virtual address space layout: 54 * ----------------------------- 55 * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process 56 * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries 57 * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region 58 * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack 59 * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved 60 * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data 61 * endkernel - msgbufp-1 : flat device tree 62 * msgbufp - ptbl_bufs-1 : message buffer 63 * ptbl_bufs - kernel_pdir-1 : kernel page tables 64 * kernel_pdir - kernel_pp2d-1 : kernel page directory 65 * kernel_pp2d - . : kernel pointers to page directory 66 * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy 67 * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs 68 * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables 69 * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space 70 * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region 71 * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region 72 * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map 73 * 0xf000_0000_0000_0000 - +Maxmem : physmem map 74 * - 0xffff_ffff_ffff_ffff : device direct map 75 */ 76 77 #include <sys/cdefs.h> 78 __FBSDID("$FreeBSD$"); 79 80 #include "opt_kstack_pages.h" 81 82 #include <sys/param.h> 83 #include <sys/conf.h> 84 #include <sys/malloc.h> 85 #include <sys/ktr.h> 86 #include <sys/proc.h> 87 #include <sys/user.h> 88 #include <sys/queue.h> 89 #include <sys/systm.h> 90 #include <sys/kernel.h> 91 #include <sys/kerneldump.h> 92 #include <sys/linker.h> 93 #include <sys/msgbuf.h> 94 #include <sys/lock.h> 95 #include <sys/mutex.h> 96 #include <sys/rwlock.h> 97 #include <sys/sched.h> 98 #include <sys/smp.h> 99 #include <sys/vmmeter.h> 100 101 #include <vm/vm.h> 102 #include <vm/vm_page.h> 103 #include <vm/vm_kern.h> 104 #include <vm/vm_pageout.h> 105 #include <vm/vm_extern.h> 106 #include <vm/vm_object.h> 107 #include <vm/vm_param.h> 108 #include <vm/vm_map.h> 109 #include <vm/vm_pager.h> 110 #include <vm/uma.h> 111 112 #include <machine/_inttypes.h> 113 #include <machine/cpu.h> 114 #include <machine/pcb.h> 115 #include <machine/platform.h> 116 117 #include <machine/tlb.h> 118 #include <machine/spr.h> 119 #include <machine/md_var.h> 120 #include <machine/mmuvar.h> 121 #include <machine/pmap.h> 122 #include <machine/pte.h> 123 124 #include "mmu_if.h" 125 126 #define SPARSE_MAPDEV 127 #ifdef DEBUG 128 #define debugf(fmt, args...) printf(fmt, ##args) 129 #else 130 #define debugf(fmt, args...) 131 #endif 132 133 #ifdef __powerpc64__ 134 #define PRI0ptrX "016lx" 135 #else 136 #define PRI0ptrX "08x" 137 #endif 138 139 #define TODO panic("%s: not implemented", __func__); 140 141 extern unsigned char _etext[]; 142 extern unsigned char _end[]; 143 144 extern uint32_t *bootinfo; 145 146 vm_paddr_t kernload; 147 vm_offset_t kernstart; 148 vm_size_t kernsize; 149 150 /* Message buffer and tables. */ 151 static vm_offset_t data_start; 152 static vm_size_t data_end; 153 154 /* Phys/avail memory regions. */ 155 static struct mem_region *availmem_regions; 156 static int availmem_regions_sz; 157 static struct mem_region *physmem_regions; 158 static int physmem_regions_sz; 159 160 /* Reserved KVA space and mutex for mmu_booke_zero_page. */ 161 static vm_offset_t zero_page_va; 162 static struct mtx zero_page_mutex; 163 164 static struct mtx tlbivax_mutex; 165 166 /* Reserved KVA space and mutex for mmu_booke_copy_page. */ 167 static vm_offset_t copy_page_src_va; 168 static vm_offset_t copy_page_dst_va; 169 static struct mtx copy_page_mutex; 170 171 /**************************************************************************/ 172 /* PMAP */ 173 /**************************************************************************/ 174 175 static int mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t, 176 vm_prot_t, u_int flags, int8_t psind); 177 178 unsigned int kptbl_min; /* Index of the first kernel ptbl. */ 179 unsigned int kernel_ptbls; /* Number of KVA ptbls. */ 180 #ifdef __powerpc64__ 181 unsigned int kernel_pdirs; 182 #endif 183 184 /* 185 * If user pmap is processed with mmu_booke_remove and the resident count 186 * drops to 0, there are no more pages to remove, so we need not continue. 187 */ 188 #define PMAP_REMOVE_DONE(pmap) \ 189 ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) 190 191 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 192 extern int elf32_nxstack; 193 #endif 194 195 /**************************************************************************/ 196 /* TLB and TID handling */ 197 /**************************************************************************/ 198 199 /* Translation ID busy table */ 200 static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1]; 201 202 /* 203 * TLB0 capabilities (entry, way numbers etc.). These can vary between e500 204 * core revisions and should be read from h/w registers during early config. 205 */ 206 uint32_t tlb0_entries; 207 uint32_t tlb0_ways; 208 uint32_t tlb0_entries_per_way; 209 uint32_t tlb1_entries; 210 211 #define TLB0_ENTRIES (tlb0_entries) 212 #define TLB0_WAYS (tlb0_ways) 213 #define TLB0_ENTRIES_PER_WAY (tlb0_entries_per_way) 214 215 #define TLB1_ENTRIES (tlb1_entries) 216 217 static vm_offset_t tlb1_map_base = VM_MAXUSER_ADDRESS + PAGE_SIZE; 218 219 static tlbtid_t tid_alloc(struct pmap *); 220 static void tid_flush(tlbtid_t tid); 221 222 #ifdef __powerpc64__ 223 static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t); 224 #else 225 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); 226 #endif 227 228 static void tlb1_read_entry(tlb_entry_t *, unsigned int); 229 static void tlb1_write_entry(tlb_entry_t *, unsigned int); 230 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); 231 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t); 232 233 static vm_size_t tsize2size(unsigned int); 234 static unsigned int size2tsize(vm_size_t); 235 static unsigned int ilog2(unsigned int); 236 237 static void set_mas4_defaults(void); 238 239 static inline void tlb0_flush_entry(vm_offset_t); 240 static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); 241 242 /**************************************************************************/ 243 /* Page table management */ 244 /**************************************************************************/ 245 246 static struct rwlock_padalign pvh_global_lock; 247 248 /* Data for the pv entry allocation mechanism */ 249 static uma_zone_t pvzone; 250 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 251 252 #define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ 253 254 #ifndef PMAP_SHPGPERPROC 255 #define PMAP_SHPGPERPROC 200 256 #endif 257 258 static void ptbl_init(void); 259 static struct ptbl_buf *ptbl_buf_alloc(void); 260 static void ptbl_buf_free(struct ptbl_buf *); 261 static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); 262 263 #ifdef __powerpc64__ 264 static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **, 265 unsigned int, boolean_t); 266 static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int); 267 static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int); 268 static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t); 269 #else 270 static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t); 271 static void ptbl_free(mmu_t, pmap_t, unsigned int); 272 static void ptbl_hold(mmu_t, pmap_t, unsigned int); 273 static int ptbl_unhold(mmu_t, pmap_t, unsigned int); 274 #endif 275 276 static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); 277 static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t); 278 static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t); 279 static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t); 280 static void kernel_pte_alloc(vm_offset_t, vm_offset_t, vm_offset_t); 281 282 static pv_entry_t pv_alloc(void); 283 static void pv_free(pv_entry_t); 284 static void pv_insert(pmap_t, vm_offset_t, vm_page_t); 285 static void pv_remove(pmap_t, vm_offset_t, vm_page_t); 286 287 static void booke_pmap_init_qpages(void); 288 289 /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ 290 #ifdef __powerpc64__ 291 #define PTBL_BUFS (16UL * 16 * 16) 292 #else 293 #define PTBL_BUFS (128 * 16) 294 #endif 295 296 struct ptbl_buf { 297 TAILQ_ENTRY(ptbl_buf) link; /* list link */ 298 vm_offset_t kva; /* va of mapping */ 299 }; 300 301 /* ptbl free list and a lock used for access synchronization. */ 302 static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; 303 static struct mtx ptbl_buf_freelist_lock; 304 305 /* Base address of kva space allocated fot ptbl bufs. */ 306 static vm_offset_t ptbl_buf_pool_vabase; 307 308 /* Pointer to ptbl_buf structures. */ 309 static struct ptbl_buf *ptbl_bufs; 310 311 #ifdef SMP 312 extern tlb_entry_t __boot_tlb1[]; 313 void pmap_bootstrap_ap(volatile uint32_t *); 314 #endif 315 316 /* 317 * Kernel MMU interface 318 */ 319 static void mmu_booke_clear_modify(mmu_t, vm_page_t); 320 static void mmu_booke_copy(mmu_t, pmap_t, pmap_t, vm_offset_t, 321 vm_size_t, vm_offset_t); 322 static void mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t); 323 static void mmu_booke_copy_pages(mmu_t, vm_page_t *, 324 vm_offset_t, vm_page_t *, vm_offset_t, int); 325 static int mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, 326 vm_prot_t, u_int flags, int8_t psind); 327 static void mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 328 vm_page_t, vm_prot_t); 329 static void mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, 330 vm_prot_t); 331 static vm_paddr_t mmu_booke_extract(mmu_t, pmap_t, vm_offset_t); 332 static vm_page_t mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t, 333 vm_prot_t); 334 static void mmu_booke_init(mmu_t); 335 static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t); 336 static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 337 static boolean_t mmu_booke_is_referenced(mmu_t, vm_page_t); 338 static int mmu_booke_ts_referenced(mmu_t, vm_page_t); 339 static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, 340 int); 341 static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t, 342 vm_paddr_t *); 343 static void mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t, 344 vm_object_t, vm_pindex_t, vm_size_t); 345 static boolean_t mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t); 346 static void mmu_booke_page_init(mmu_t, vm_page_t); 347 static int mmu_booke_page_wired_mappings(mmu_t, vm_page_t); 348 static void mmu_booke_pinit(mmu_t, pmap_t); 349 static void mmu_booke_pinit0(mmu_t, pmap_t); 350 static void mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 351 vm_prot_t); 352 static void mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 353 static void mmu_booke_qremove(mmu_t, vm_offset_t, int); 354 static void mmu_booke_release(mmu_t, pmap_t); 355 static void mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 356 static void mmu_booke_remove_all(mmu_t, vm_page_t); 357 static void mmu_booke_remove_write(mmu_t, vm_page_t); 358 static void mmu_booke_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 359 static void mmu_booke_zero_page(mmu_t, vm_page_t); 360 static void mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int); 361 static void mmu_booke_activate(mmu_t, struct thread *); 362 static void mmu_booke_deactivate(mmu_t, struct thread *); 363 static void mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t); 364 static void *mmu_booke_mapdev(mmu_t, vm_paddr_t, vm_size_t); 365 static void *mmu_booke_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t); 366 static void mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t); 367 static vm_paddr_t mmu_booke_kextract(mmu_t, vm_offset_t); 368 static void mmu_booke_kenter(mmu_t, vm_offset_t, vm_paddr_t); 369 static void mmu_booke_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t); 370 static void mmu_booke_kremove(mmu_t, vm_offset_t); 371 static boolean_t mmu_booke_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); 372 static void mmu_booke_sync_icache(mmu_t, pmap_t, vm_offset_t, 373 vm_size_t); 374 static void mmu_booke_dumpsys_map(mmu_t, vm_paddr_t pa, size_t, 375 void **); 376 static void mmu_booke_dumpsys_unmap(mmu_t, vm_paddr_t pa, size_t, 377 void *); 378 static void mmu_booke_scan_init(mmu_t); 379 static vm_offset_t mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m); 380 static void mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr); 381 static int mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, 382 vm_size_t sz, vm_memattr_t mode); 383 384 static mmu_method_t mmu_booke_methods[] = { 385 /* pmap dispatcher interface */ 386 MMUMETHOD(mmu_clear_modify, mmu_booke_clear_modify), 387 MMUMETHOD(mmu_copy, mmu_booke_copy), 388 MMUMETHOD(mmu_copy_page, mmu_booke_copy_page), 389 MMUMETHOD(mmu_copy_pages, mmu_booke_copy_pages), 390 MMUMETHOD(mmu_enter, mmu_booke_enter), 391 MMUMETHOD(mmu_enter_object, mmu_booke_enter_object), 392 MMUMETHOD(mmu_enter_quick, mmu_booke_enter_quick), 393 MMUMETHOD(mmu_extract, mmu_booke_extract), 394 MMUMETHOD(mmu_extract_and_hold, mmu_booke_extract_and_hold), 395 MMUMETHOD(mmu_init, mmu_booke_init), 396 MMUMETHOD(mmu_is_modified, mmu_booke_is_modified), 397 MMUMETHOD(mmu_is_prefaultable, mmu_booke_is_prefaultable), 398 MMUMETHOD(mmu_is_referenced, mmu_booke_is_referenced), 399 MMUMETHOD(mmu_ts_referenced, mmu_booke_ts_referenced), 400 MMUMETHOD(mmu_map, mmu_booke_map), 401 MMUMETHOD(mmu_mincore, mmu_booke_mincore), 402 MMUMETHOD(mmu_object_init_pt, mmu_booke_object_init_pt), 403 MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick), 404 MMUMETHOD(mmu_page_init, mmu_booke_page_init), 405 MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings), 406 MMUMETHOD(mmu_pinit, mmu_booke_pinit), 407 MMUMETHOD(mmu_pinit0, mmu_booke_pinit0), 408 MMUMETHOD(mmu_protect, mmu_booke_protect), 409 MMUMETHOD(mmu_qenter, mmu_booke_qenter), 410 MMUMETHOD(mmu_qremove, mmu_booke_qremove), 411 MMUMETHOD(mmu_release, mmu_booke_release), 412 MMUMETHOD(mmu_remove, mmu_booke_remove), 413 MMUMETHOD(mmu_remove_all, mmu_booke_remove_all), 414 MMUMETHOD(mmu_remove_write, mmu_booke_remove_write), 415 MMUMETHOD(mmu_sync_icache, mmu_booke_sync_icache), 416 MMUMETHOD(mmu_unwire, mmu_booke_unwire), 417 MMUMETHOD(mmu_zero_page, mmu_booke_zero_page), 418 MMUMETHOD(mmu_zero_page_area, mmu_booke_zero_page_area), 419 MMUMETHOD(mmu_activate, mmu_booke_activate), 420 MMUMETHOD(mmu_deactivate, mmu_booke_deactivate), 421 MMUMETHOD(mmu_quick_enter_page, mmu_booke_quick_enter_page), 422 MMUMETHOD(mmu_quick_remove_page, mmu_booke_quick_remove_page), 423 424 /* Internal interfaces */ 425 MMUMETHOD(mmu_bootstrap, mmu_booke_bootstrap), 426 MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped), 427 MMUMETHOD(mmu_mapdev, mmu_booke_mapdev), 428 MMUMETHOD(mmu_mapdev_attr, mmu_booke_mapdev_attr), 429 MMUMETHOD(mmu_kenter, mmu_booke_kenter), 430 MMUMETHOD(mmu_kenter_attr, mmu_booke_kenter_attr), 431 MMUMETHOD(mmu_kextract, mmu_booke_kextract), 432 MMUMETHOD(mmu_kremove, mmu_booke_kremove), 433 MMUMETHOD(mmu_unmapdev, mmu_booke_unmapdev), 434 MMUMETHOD(mmu_change_attr, mmu_booke_change_attr), 435 436 /* dumpsys() support */ 437 MMUMETHOD(mmu_dumpsys_map, mmu_booke_dumpsys_map), 438 MMUMETHOD(mmu_dumpsys_unmap, mmu_booke_dumpsys_unmap), 439 MMUMETHOD(mmu_scan_init, mmu_booke_scan_init), 440 441 { 0, 0 } 442 }; 443 444 MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods, 0); 445 446 static __inline uint32_t 447 tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) 448 { 449 uint32_t attrib; 450 int i; 451 452 if (ma != VM_MEMATTR_DEFAULT) { 453 switch (ma) { 454 case VM_MEMATTR_UNCACHEABLE: 455 return (MAS2_I | MAS2_G); 456 case VM_MEMATTR_WRITE_COMBINING: 457 case VM_MEMATTR_WRITE_BACK: 458 case VM_MEMATTR_PREFETCHABLE: 459 return (MAS2_I); 460 case VM_MEMATTR_WRITE_THROUGH: 461 return (MAS2_W | MAS2_M); 462 case VM_MEMATTR_CACHEABLE: 463 return (MAS2_M); 464 } 465 } 466 467 /* 468 * Assume the page is cache inhibited and access is guarded unless 469 * it's in our available memory array. 470 */ 471 attrib = _TLB_ENTRY_IO; 472 for (i = 0; i < physmem_regions_sz; i++) { 473 if ((pa >= physmem_regions[i].mr_start) && 474 (pa < (physmem_regions[i].mr_start + 475 physmem_regions[i].mr_size))) { 476 attrib = _TLB_ENTRY_MEM; 477 break; 478 } 479 } 480 481 return (attrib); 482 } 483 484 static inline void 485 tlb_miss_lock(void) 486 { 487 #ifdef SMP 488 struct pcpu *pc; 489 490 if (!smp_started) 491 return; 492 493 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 494 if (pc != pcpup) { 495 496 CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, " 497 "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke_tlb_lock); 498 499 KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)), 500 ("tlb_miss_lock: tried to lock self")); 501 502 tlb_lock(pc->pc_booke_tlb_lock); 503 504 CTR1(KTR_PMAP, "%s: locked", __func__); 505 } 506 } 507 #endif 508 } 509 510 static inline void 511 tlb_miss_unlock(void) 512 { 513 #ifdef SMP 514 struct pcpu *pc; 515 516 if (!smp_started) 517 return; 518 519 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 520 if (pc != pcpup) { 521 CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d", 522 __func__, pc->pc_cpuid); 523 524 tlb_unlock(pc->pc_booke_tlb_lock); 525 526 CTR1(KTR_PMAP, "%s: unlocked", __func__); 527 } 528 } 529 #endif 530 } 531 532 /* Return number of entries in TLB0. */ 533 static __inline void 534 tlb0_get_tlbconf(void) 535 { 536 uint32_t tlb0_cfg; 537 538 tlb0_cfg = mfspr(SPR_TLB0CFG); 539 tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK; 540 tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; 541 tlb0_entries_per_way = tlb0_entries / tlb0_ways; 542 } 543 544 /* Return number of entries in TLB1. */ 545 static __inline void 546 tlb1_get_tlbconf(void) 547 { 548 uint32_t tlb1_cfg; 549 550 tlb1_cfg = mfspr(SPR_TLB1CFG); 551 tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; 552 } 553 554 /**************************************************************************/ 555 /* Page table related */ 556 /**************************************************************************/ 557 558 #ifdef __powerpc64__ 559 /* Initialize pool of kva ptbl buffers. */ 560 static void 561 ptbl_init(void) 562 { 563 int i; 564 565 mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); 566 TAILQ_INIT(&ptbl_buf_freelist); 567 568 for (i = 0; i < PTBL_BUFS; i++) { 569 ptbl_bufs[i].kva = ptbl_buf_pool_vabase + 570 i * MAX(PTBL_PAGES,PDIR_PAGES) * PAGE_SIZE; 571 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); 572 } 573 } 574 575 /* Get an sf_buf from the freelist. */ 576 static struct ptbl_buf * 577 ptbl_buf_alloc(void) 578 { 579 struct ptbl_buf *buf; 580 581 mtx_lock(&ptbl_buf_freelist_lock); 582 buf = TAILQ_FIRST(&ptbl_buf_freelist); 583 if (buf != NULL) 584 TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); 585 mtx_unlock(&ptbl_buf_freelist_lock); 586 587 return (buf); 588 } 589 590 /* Return ptbl buff to free pool. */ 591 static void 592 ptbl_buf_free(struct ptbl_buf *buf) 593 { 594 mtx_lock(&ptbl_buf_freelist_lock); 595 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); 596 mtx_unlock(&ptbl_buf_freelist_lock); 597 } 598 599 /* 600 * Search the list of allocated ptbl bufs and find on list of allocated ptbls 601 */ 602 static void 603 ptbl_free_pmap_ptbl(pmap_t pmap, pte_t * ptbl) 604 { 605 struct ptbl_buf *pbuf; 606 607 TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) { 608 if (pbuf->kva == (vm_offset_t) ptbl) { 609 /* Remove from pmap ptbl buf list. */ 610 TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); 611 612 /* Free corresponding ptbl buf. */ 613 ptbl_buf_free(pbuf); 614 615 break; 616 } 617 } 618 } 619 620 /* Get a pointer to a PTE in a page table. */ 621 static __inline pte_t * 622 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 623 { 624 pte_t **pdir; 625 pte_t *ptbl; 626 627 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 628 629 pdir = pmap->pm_pp2d[PP2D_IDX(va)]; 630 if (!pdir) 631 return NULL; 632 ptbl = pdir[PDIR_IDX(va)]; 633 return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL); 634 } 635 636 /* 637 * Search the list of allocated pdir bufs and find on list of allocated pdirs 638 */ 639 static void 640 ptbl_free_pmap_pdir(mmu_t mmu, pmap_t pmap, pte_t ** pdir) 641 { 642 struct ptbl_buf *pbuf; 643 644 TAILQ_FOREACH(pbuf, &pmap->pm_pdir_list, link) { 645 if (pbuf->kva == (vm_offset_t) pdir) { 646 /* Remove from pmap ptbl buf list. */ 647 TAILQ_REMOVE(&pmap->pm_pdir_list, pbuf, link); 648 649 /* Free corresponding pdir buf. */ 650 ptbl_buf_free(pbuf); 651 652 break; 653 } 654 } 655 } 656 /* Free pdir pages and invalidate pdir entry. */ 657 static void 658 pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx) 659 { 660 pte_t **pdir; 661 vm_paddr_t pa; 662 vm_offset_t va; 663 vm_page_t m; 664 int i; 665 666 pdir = pmap->pm_pp2d[pp2d_idx]; 667 668 KASSERT((pdir != NULL), ("pdir_free: null pdir")); 669 670 pmap->pm_pp2d[pp2d_idx] = NULL; 671 672 for (i = 0; i < PDIR_PAGES; i++) { 673 va = ((vm_offset_t) pdir + (i * PAGE_SIZE)); 674 pa = pte_vatopa(mmu, kernel_pmap, va); 675 m = PHYS_TO_VM_PAGE(pa); 676 vm_page_free_zero(m); 677 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 678 pmap_kremove(va); 679 } 680 681 ptbl_free_pmap_pdir(mmu, pmap, pdir); 682 } 683 684 /* 685 * Decrement pdir pages hold count and attempt to free pdir pages. Called 686 * when removing directory entry from pdir. 687 * 688 * Return 1 if pdir pages were freed. 689 */ 690 static int 691 pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx) 692 { 693 pte_t **pdir; 694 vm_paddr_t pa; 695 vm_page_t m; 696 int i; 697 698 KASSERT((pmap != kernel_pmap), 699 ("pdir_unhold: unholding kernel pdir!")); 700 701 pdir = pmap->pm_pp2d[pp2d_idx]; 702 703 KASSERT(((vm_offset_t) pdir >= VM_MIN_KERNEL_ADDRESS), 704 ("pdir_unhold: non kva pdir")); 705 706 /* decrement hold count */ 707 for (i = 0; i < PDIR_PAGES; i++) { 708 pa = pte_vatopa(mmu, kernel_pmap, 709 (vm_offset_t) pdir + (i * PAGE_SIZE)); 710 m = PHYS_TO_VM_PAGE(pa); 711 m->wire_count--; 712 } 713 714 /* 715 * Free pdir pages if there are no dir entries in this pdir. 716 * wire_count has the same value for all ptbl pages, so check the 717 * last page. 718 */ 719 if (m->wire_count == 0) { 720 pdir_free(mmu, pmap, pp2d_idx); 721 return (1); 722 } 723 return (0); 724 } 725 726 /* 727 * Increment hold count for pdir pages. This routine is used when new ptlb 728 * entry is being inserted into pdir. 729 */ 730 static void 731 pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir) 732 { 733 vm_paddr_t pa; 734 vm_page_t m; 735 int i; 736 737 KASSERT((pmap != kernel_pmap), 738 ("pdir_hold: holding kernel pdir!")); 739 740 KASSERT((pdir != NULL), ("pdir_hold: null pdir")); 741 742 for (i = 0; i < PDIR_PAGES; i++) { 743 pa = pte_vatopa(mmu, kernel_pmap, 744 (vm_offset_t) pdir + (i * PAGE_SIZE)); 745 m = PHYS_TO_VM_PAGE(pa); 746 m->wire_count++; 747 } 748 } 749 750 /* Allocate page table. */ 751 static pte_t * 752 ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, 753 boolean_t nosleep) 754 { 755 vm_page_t mtbl [PTBL_PAGES]; 756 vm_page_t m; 757 struct ptbl_buf *pbuf; 758 unsigned int pidx; 759 pte_t *ptbl; 760 int i, j; 761 int req; 762 763 KASSERT((pdir[pdir_idx] == NULL), 764 ("%s: valid ptbl entry exists!", __func__)); 765 766 pbuf = ptbl_buf_alloc(); 767 if (pbuf == NULL) 768 panic("%s: couldn't alloc kernel virtual memory", __func__); 769 770 ptbl = (pte_t *) pbuf->kva; 771 772 for (i = 0; i < PTBL_PAGES; i++) { 773 pidx = (PTBL_PAGES * pdir_idx) + i; 774 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 775 while ((m = vm_page_alloc(NULL, pidx, req)) == NULL) { 776 PMAP_UNLOCK(pmap); 777 rw_wunlock(&pvh_global_lock); 778 if (nosleep) { 779 ptbl_free_pmap_ptbl(pmap, ptbl); 780 for (j = 0; j < i; j++) 781 vm_page_free(mtbl[j]); 782 atomic_subtract_int(&vm_cnt.v_wire_count, i); 783 return (NULL); 784 } 785 VM_WAIT; 786 rw_wlock(&pvh_global_lock); 787 PMAP_LOCK(pmap); 788 } 789 mtbl[i] = m; 790 } 791 792 /* Mapin allocated pages into kernel_pmap. */ 793 mmu_booke_qenter(mmu, (vm_offset_t) ptbl, mtbl, PTBL_PAGES); 794 /* Zero whole ptbl. */ 795 bzero((caddr_t) ptbl, PTBL_PAGES * PAGE_SIZE); 796 797 /* Add pbuf to the pmap ptbl bufs list. */ 798 TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); 799 800 return (ptbl); 801 } 802 803 /* Free ptbl pages and invalidate pdir entry. */ 804 static void 805 ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) 806 { 807 pte_t *ptbl; 808 vm_paddr_t pa; 809 vm_offset_t va; 810 vm_page_t m; 811 int i; 812 813 ptbl = pdir[pdir_idx]; 814 815 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 816 817 pdir[pdir_idx] = NULL; 818 819 for (i = 0; i < PTBL_PAGES; i++) { 820 va = ((vm_offset_t) ptbl + (i * PAGE_SIZE)); 821 pa = pte_vatopa(mmu, kernel_pmap, va); 822 m = PHYS_TO_VM_PAGE(pa); 823 vm_page_free_zero(m); 824 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 825 pmap_kremove(va); 826 } 827 828 ptbl_free_pmap_ptbl(pmap, ptbl); 829 } 830 831 /* 832 * Decrement ptbl pages hold count and attempt to free ptbl pages. Called 833 * when removing pte entry from ptbl. 834 * 835 * Return 1 if ptbl pages were freed. 836 */ 837 static int 838 ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va) 839 { 840 pte_t *ptbl; 841 vm_paddr_t pa; 842 vm_page_t m; 843 u_int pp2d_idx; 844 pte_t **pdir; 845 u_int pdir_idx; 846 int i; 847 848 pp2d_idx = PP2D_IDX(va); 849 pdir_idx = PDIR_IDX(va); 850 851 KASSERT((pmap != kernel_pmap), 852 ("ptbl_unhold: unholding kernel ptbl!")); 853 854 pdir = pmap->pm_pp2d[pp2d_idx]; 855 ptbl = pdir[pdir_idx]; 856 857 KASSERT(((vm_offset_t) ptbl >= VM_MIN_KERNEL_ADDRESS), 858 ("ptbl_unhold: non kva ptbl")); 859 860 /* decrement hold count */ 861 for (i = 0; i < PTBL_PAGES; i++) { 862 pa = pte_vatopa(mmu, kernel_pmap, 863 (vm_offset_t) ptbl + (i * PAGE_SIZE)); 864 m = PHYS_TO_VM_PAGE(pa); 865 m->wire_count--; 866 } 867 868 /* 869 * Free ptbl pages if there are no pte entries in this ptbl. 870 * wire_count has the same value for all ptbl pages, so check the 871 * last page. 872 */ 873 if (m->wire_count == 0) { 874 /* A pair of indirect entries might point to this ptbl page */ 875 #if 0 876 tlb_flush_entry(pmap, va & ~((2UL * PAGE_SIZE_1M) - 1), 877 TLB_SIZE_1M, MAS6_SIND); 878 tlb_flush_entry(pmap, (va & ~((2UL * PAGE_SIZE_1M) - 1)) | PAGE_SIZE_1M, 879 TLB_SIZE_1M, MAS6_SIND); 880 #endif 881 ptbl_free(mmu, pmap, pdir, pdir_idx); 882 pdir_unhold(mmu, pmap, pp2d_idx); 883 return (1); 884 } 885 return (0); 886 } 887 888 /* 889 * Increment hold count for ptbl pages. This routine is used when new pte 890 * entry is being inserted into ptbl. 891 */ 892 static void 893 ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) 894 { 895 vm_paddr_t pa; 896 pte_t *ptbl; 897 vm_page_t m; 898 int i; 899 900 KASSERT((pmap != kernel_pmap), 901 ("ptbl_hold: holding kernel ptbl!")); 902 903 ptbl = pdir[pdir_idx]; 904 905 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 906 907 for (i = 0; i < PTBL_PAGES; i++) { 908 pa = pte_vatopa(mmu, kernel_pmap, 909 (vm_offset_t) ptbl + (i * PAGE_SIZE)); 910 m = PHYS_TO_VM_PAGE(pa); 911 m->wire_count++; 912 } 913 } 914 #else 915 916 /* Initialize pool of kva ptbl buffers. */ 917 static void 918 ptbl_init(void) 919 { 920 int i; 921 922 CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__, 923 (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); 924 CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)", 925 __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); 926 927 mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); 928 TAILQ_INIT(&ptbl_buf_freelist); 929 930 for (i = 0; i < PTBL_BUFS; i++) { 931 ptbl_bufs[i].kva = 932 ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; 933 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); 934 } 935 } 936 937 /* Get a ptbl_buf from the freelist. */ 938 static struct ptbl_buf * 939 ptbl_buf_alloc(void) 940 { 941 struct ptbl_buf *buf; 942 943 mtx_lock(&ptbl_buf_freelist_lock); 944 buf = TAILQ_FIRST(&ptbl_buf_freelist); 945 if (buf != NULL) 946 TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); 947 mtx_unlock(&ptbl_buf_freelist_lock); 948 949 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 950 951 return (buf); 952 } 953 954 /* Return ptbl buff to free pool. */ 955 static void 956 ptbl_buf_free(struct ptbl_buf *buf) 957 { 958 959 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 960 961 mtx_lock(&ptbl_buf_freelist_lock); 962 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); 963 mtx_unlock(&ptbl_buf_freelist_lock); 964 } 965 966 /* 967 * Search the list of allocated ptbl bufs and find on list of allocated ptbls 968 */ 969 static void 970 ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) 971 { 972 struct ptbl_buf *pbuf; 973 974 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 975 976 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 977 978 TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) 979 if (pbuf->kva == (vm_offset_t)ptbl) { 980 /* Remove from pmap ptbl buf list. */ 981 TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); 982 983 /* Free corresponding ptbl buf. */ 984 ptbl_buf_free(pbuf); 985 break; 986 } 987 } 988 989 /* Allocate page table. */ 990 static pte_t * 991 ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep) 992 { 993 vm_page_t mtbl[PTBL_PAGES]; 994 vm_page_t m; 995 struct ptbl_buf *pbuf; 996 unsigned int pidx; 997 pte_t *ptbl; 998 int i, j; 999 1000 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 1001 (pmap == kernel_pmap), pdir_idx); 1002 1003 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1004 ("ptbl_alloc: invalid pdir_idx")); 1005 KASSERT((pmap->pm_pdir[pdir_idx] == NULL), 1006 ("pte_alloc: valid ptbl entry exists!")); 1007 1008 pbuf = ptbl_buf_alloc(); 1009 if (pbuf == NULL) 1010 panic("pte_alloc: couldn't alloc kernel virtual memory"); 1011 1012 ptbl = (pte_t *)pbuf->kva; 1013 1014 CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl); 1015 1016 for (i = 0; i < PTBL_PAGES; i++) { 1017 pidx = (PTBL_PAGES * pdir_idx) + i; 1018 while ((m = vm_page_alloc(NULL, pidx, 1019 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 1020 PMAP_UNLOCK(pmap); 1021 rw_wunlock(&pvh_global_lock); 1022 if (nosleep) { 1023 ptbl_free_pmap_ptbl(pmap, ptbl); 1024 for (j = 0; j < i; j++) 1025 vm_page_free(mtbl[j]); 1026 atomic_subtract_int(&vm_cnt.v_wire_count, i); 1027 return (NULL); 1028 } 1029 VM_WAIT; 1030 rw_wlock(&pvh_global_lock); 1031 PMAP_LOCK(pmap); 1032 } 1033 mtbl[i] = m; 1034 } 1035 1036 /* Map allocated pages into kernel_pmap. */ 1037 mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES); 1038 1039 /* Zero whole ptbl. */ 1040 bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE); 1041 1042 /* Add pbuf to the pmap ptbl bufs list. */ 1043 TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); 1044 1045 return (ptbl); 1046 } 1047 1048 /* Free ptbl pages and invalidate pdir entry. */ 1049 static void 1050 ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 1051 { 1052 pte_t *ptbl; 1053 vm_paddr_t pa; 1054 vm_offset_t va; 1055 vm_page_t m; 1056 int i; 1057 1058 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 1059 (pmap == kernel_pmap), pdir_idx); 1060 1061 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1062 ("ptbl_free: invalid pdir_idx")); 1063 1064 ptbl = pmap->pm_pdir[pdir_idx]; 1065 1066 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 1067 1068 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 1069 1070 /* 1071 * Invalidate the pdir entry as soon as possible, so that other CPUs 1072 * don't attempt to look up the page tables we are releasing. 1073 */ 1074 mtx_lock_spin(&tlbivax_mutex); 1075 tlb_miss_lock(); 1076 1077 pmap->pm_pdir[pdir_idx] = NULL; 1078 1079 tlb_miss_unlock(); 1080 mtx_unlock_spin(&tlbivax_mutex); 1081 1082 for (i = 0; i < PTBL_PAGES; i++) { 1083 va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); 1084 pa = pte_vatopa(mmu, kernel_pmap, va); 1085 m = PHYS_TO_VM_PAGE(pa); 1086 vm_page_free_zero(m); 1087 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1088 mmu_booke_kremove(mmu, va); 1089 } 1090 1091 ptbl_free_pmap_ptbl(pmap, ptbl); 1092 } 1093 1094 /* 1095 * Decrement ptbl pages hold count and attempt to free ptbl pages. 1096 * Called when removing pte entry from ptbl. 1097 * 1098 * Return 1 if ptbl pages were freed. 1099 */ 1100 static int 1101 ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 1102 { 1103 pte_t *ptbl; 1104 vm_paddr_t pa; 1105 vm_page_t m; 1106 int i; 1107 1108 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 1109 (pmap == kernel_pmap), pdir_idx); 1110 1111 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1112 ("ptbl_unhold: invalid pdir_idx")); 1113 KASSERT((pmap != kernel_pmap), 1114 ("ptbl_unhold: unholding kernel ptbl!")); 1115 1116 ptbl = pmap->pm_pdir[pdir_idx]; 1117 1118 //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); 1119 KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), 1120 ("ptbl_unhold: non kva ptbl")); 1121 1122 /* decrement hold count */ 1123 for (i = 0; i < PTBL_PAGES; i++) { 1124 pa = pte_vatopa(mmu, kernel_pmap, 1125 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1126 m = PHYS_TO_VM_PAGE(pa); 1127 m->wire_count--; 1128 } 1129 1130 /* 1131 * Free ptbl pages if there are no pte etries in this ptbl. 1132 * wire_count has the same value for all ptbl pages, so check the last 1133 * page. 1134 */ 1135 if (m->wire_count == 0) { 1136 ptbl_free(mmu, pmap, pdir_idx); 1137 1138 //debugf("ptbl_unhold: e (freed ptbl)\n"); 1139 return (1); 1140 } 1141 1142 return (0); 1143 } 1144 1145 /* 1146 * Increment hold count for ptbl pages. This routine is used when a new pte 1147 * entry is being inserted into the ptbl. 1148 */ 1149 static void 1150 ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 1151 { 1152 vm_paddr_t pa; 1153 pte_t *ptbl; 1154 vm_page_t m; 1155 int i; 1156 1157 CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap, 1158 pdir_idx); 1159 1160 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1161 ("ptbl_hold: invalid pdir_idx")); 1162 KASSERT((pmap != kernel_pmap), 1163 ("ptbl_hold: holding kernel ptbl!")); 1164 1165 ptbl = pmap->pm_pdir[pdir_idx]; 1166 1167 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 1168 1169 for (i = 0; i < PTBL_PAGES; i++) { 1170 pa = pte_vatopa(mmu, kernel_pmap, 1171 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1172 m = PHYS_TO_VM_PAGE(pa); 1173 m->wire_count++; 1174 } 1175 } 1176 #endif 1177 1178 /* Allocate pv_entry structure. */ 1179 pv_entry_t 1180 pv_alloc(void) 1181 { 1182 pv_entry_t pv; 1183 1184 pv_entry_count++; 1185 if (pv_entry_count > pv_entry_high_water) 1186 pagedaemon_wakeup(); 1187 pv = uma_zalloc(pvzone, M_NOWAIT); 1188 1189 return (pv); 1190 } 1191 1192 /* Free pv_entry structure. */ 1193 static __inline void 1194 pv_free(pv_entry_t pve) 1195 { 1196 1197 pv_entry_count--; 1198 uma_zfree(pvzone, pve); 1199 } 1200 1201 1202 /* Allocate and initialize pv_entry structure. */ 1203 static void 1204 pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) 1205 { 1206 pv_entry_t pve; 1207 1208 //int su = (pmap == kernel_pmap); 1209 //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, 1210 // (u_int32_t)pmap, va, (u_int32_t)m); 1211 1212 pve = pv_alloc(); 1213 if (pve == NULL) 1214 panic("pv_insert: no pv entries!"); 1215 1216 pve->pv_pmap = pmap; 1217 pve->pv_va = va; 1218 1219 /* add to pv_list */ 1220 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1221 rw_assert(&pvh_global_lock, RA_WLOCKED); 1222 1223 TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); 1224 1225 //debugf("pv_insert: e\n"); 1226 } 1227 1228 /* Destroy pv entry. */ 1229 static void 1230 pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) 1231 { 1232 pv_entry_t pve; 1233 1234 //int su = (pmap == kernel_pmap); 1235 //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); 1236 1237 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1238 rw_assert(&pvh_global_lock, RA_WLOCKED); 1239 1240 /* find pv entry */ 1241 TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { 1242 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 1243 /* remove from pv_list */ 1244 TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); 1245 if (TAILQ_EMPTY(&m->md.pv_list)) 1246 vm_page_aflag_clear(m, PGA_WRITEABLE); 1247 1248 /* free pv entry struct */ 1249 pv_free(pve); 1250 break; 1251 } 1252 } 1253 1254 //debugf("pv_remove: e\n"); 1255 } 1256 1257 #ifdef __powerpc64__ 1258 /* 1259 * Clean pte entry, try to free page table page if requested. 1260 * 1261 * Return 1 if ptbl pages were freed, otherwise return 0. 1262 */ 1263 static int 1264 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags) 1265 { 1266 vm_page_t m; 1267 pte_t *pte; 1268 1269 pte = pte_find(mmu, pmap, va); 1270 KASSERT(pte != NULL, ("%s: NULL pte", __func__)); 1271 1272 if (!PTE_ISVALID(pte)) 1273 return (0); 1274 1275 /* Get vm_page_t for mapped pte. */ 1276 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1277 1278 if (PTE_ISWIRED(pte)) 1279 pmap->pm_stats.wired_count--; 1280 1281 /* Handle managed entry. */ 1282 if (PTE_ISMANAGED(pte)) { 1283 1284 /* Handle modified pages. */ 1285 if (PTE_ISMODIFIED(pte)) 1286 vm_page_dirty(m); 1287 1288 /* Referenced pages. */ 1289 if (PTE_ISREFERENCED(pte)) 1290 vm_page_aflag_set(m, PGA_REFERENCED); 1291 1292 /* Remove pv_entry from pv_list. */ 1293 pv_remove(pmap, va, m); 1294 } 1295 mtx_lock_spin(&tlbivax_mutex); 1296 tlb_miss_lock(); 1297 1298 tlb0_flush_entry(va); 1299 *pte = 0; 1300 1301 tlb_miss_unlock(); 1302 mtx_unlock_spin(&tlbivax_mutex); 1303 1304 pmap->pm_stats.resident_count--; 1305 1306 if (flags & PTBL_UNHOLD) { 1307 return (ptbl_unhold(mmu, pmap, va)); 1308 } 1309 return (0); 1310 } 1311 1312 /* 1313 * allocate a page of pointers to page directories, do not preallocate the 1314 * page tables 1315 */ 1316 static pte_t ** 1317 pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep) 1318 { 1319 vm_page_t mtbl [PDIR_PAGES]; 1320 vm_page_t m; 1321 struct ptbl_buf *pbuf; 1322 pte_t **pdir; 1323 unsigned int pidx; 1324 int i; 1325 int req; 1326 1327 pbuf = ptbl_buf_alloc(); 1328 1329 if (pbuf == NULL) 1330 panic("%s: couldn't alloc kernel virtual memory", __func__); 1331 1332 /* Allocate pdir pages, this will sleep! */ 1333 for (i = 0; i < PDIR_PAGES; i++) { 1334 pidx = (PDIR_PAGES * pp2d_idx) + i; 1335 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 1336 while ((m = vm_page_alloc(NULL, pidx, req)) == NULL) { 1337 PMAP_UNLOCK(pmap); 1338 VM_WAIT; 1339 PMAP_LOCK(pmap); 1340 } 1341 mtbl[i] = m; 1342 } 1343 1344 /* Mapin allocated pages into kernel_pmap. */ 1345 pdir = (pte_t **) pbuf->kva; 1346 pmap_qenter((vm_offset_t) pdir, mtbl, PDIR_PAGES); 1347 1348 /* Zero whole pdir. */ 1349 bzero((caddr_t) pdir, PDIR_PAGES * PAGE_SIZE); 1350 1351 /* Add pdir to the pmap pdir bufs list. */ 1352 TAILQ_INSERT_TAIL(&pmap->pm_pdir_list, pbuf, link); 1353 1354 return pdir; 1355 } 1356 1357 /* 1358 * Insert PTE for a given page and virtual address. 1359 */ 1360 static int 1361 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1362 boolean_t nosleep) 1363 { 1364 unsigned int pp2d_idx = PP2D_IDX(va); 1365 unsigned int pdir_idx = PDIR_IDX(va); 1366 unsigned int ptbl_idx = PTBL_IDX(va); 1367 pte_t *ptbl, *pte; 1368 pte_t **pdir; 1369 1370 /* Get the page directory pointer. */ 1371 pdir = pmap->pm_pp2d[pp2d_idx]; 1372 if (pdir == NULL) 1373 pdir = pdir_alloc(mmu, pmap, pp2d_idx, nosleep); 1374 1375 /* Get the page table pointer. */ 1376 ptbl = pdir[pdir_idx]; 1377 1378 if (ptbl == NULL) { 1379 /* Allocate page table pages. */ 1380 ptbl = ptbl_alloc(mmu, pmap, pdir, pdir_idx, nosleep); 1381 if (ptbl == NULL) { 1382 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1383 return (ENOMEM); 1384 } 1385 } else { 1386 /* 1387 * Check if there is valid mapping for requested va, if there 1388 * is, remove it. 1389 */ 1390 pte = &pdir[pdir_idx][ptbl_idx]; 1391 if (PTE_ISVALID(pte)) { 1392 pte_remove(mmu, pmap, va, PTBL_HOLD); 1393 } else { 1394 /* 1395 * pte is not used, increment hold count for ptbl 1396 * pages. 1397 */ 1398 if (pmap != kernel_pmap) 1399 ptbl_hold(mmu, pmap, pdir, pdir_idx); 1400 } 1401 } 1402 1403 if (pdir[pdir_idx] == NULL) { 1404 if (pmap != kernel_pmap && pmap->pm_pp2d[pp2d_idx] != NULL) 1405 pdir_hold(mmu, pmap, pdir); 1406 pdir[pdir_idx] = ptbl; 1407 } 1408 if (pmap->pm_pp2d[pp2d_idx] == NULL) 1409 pmap->pm_pp2d[pp2d_idx] = pdir; 1410 1411 /* 1412 * Insert pv_entry into pv_list for mapped page if part of managed 1413 * memory. 1414 */ 1415 if ((m->oflags & VPO_UNMANAGED) == 0) { 1416 flags |= PTE_MANAGED; 1417 1418 /* Create and insert pv entry. */ 1419 pv_insert(pmap, va, m); 1420 } 1421 1422 mtx_lock_spin(&tlbivax_mutex); 1423 tlb_miss_lock(); 1424 1425 tlb0_flush_entry(va); 1426 pmap->pm_stats.resident_count++; 1427 pte = &pdir[pdir_idx][ptbl_idx]; 1428 *pte = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1429 *pte |= (PTE_VALID | flags); 1430 1431 tlb_miss_unlock(); 1432 mtx_unlock_spin(&tlbivax_mutex); 1433 1434 return (0); 1435 } 1436 1437 /* Return the pa for the given pmap/va. */ 1438 static vm_paddr_t 1439 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1440 { 1441 vm_paddr_t pa = 0; 1442 pte_t *pte; 1443 1444 pte = pte_find(mmu, pmap, va); 1445 if ((pte != NULL) && PTE_ISVALID(pte)) 1446 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1447 return (pa); 1448 } 1449 1450 1451 /* allocate pte entries to manage (addr & mask) to (addr & mask) + size */ 1452 static void 1453 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1454 { 1455 int i, j; 1456 vm_offset_t va; 1457 pte_t *pte; 1458 1459 va = addr; 1460 /* Initialize kernel pdir */ 1461 for (i = 0; i < kernel_pdirs; i++) { 1462 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)] = 1463 (pte_t **)(pdir + (i * PAGE_SIZE * PDIR_PAGES)); 1464 for (j = PDIR_IDX(va + (i * PAGE_SIZE * PDIR_NENTRIES * PTBL_NENTRIES)); 1465 j < PDIR_NENTRIES; j++) { 1466 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)][j] = 1467 (pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE * PDIR_PAGES) + 1468 (((i * PDIR_NENTRIES) + j) * PAGE_SIZE * PTBL_PAGES)); 1469 } 1470 } 1471 1472 /* 1473 * Fill in PTEs covering kernel code and data. They are not required 1474 * for address translation, as this area is covered by static TLB1 1475 * entries, but for pte_vatopa() to work correctly with kernel area 1476 * addresses. 1477 */ 1478 for (va = addr; va < data_end; va += PAGE_SIZE) { 1479 pte = &(kernel_pmap->pm_pp2d[PP2D_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]); 1480 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1481 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1482 PTE_VALID | PTE_PS_4KB; 1483 } 1484 } 1485 #else 1486 /* 1487 * Clean pte entry, try to free page table page if requested. 1488 * 1489 * Return 1 if ptbl pages were freed, otherwise return 0. 1490 */ 1491 static int 1492 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, uint8_t flags) 1493 { 1494 unsigned int pdir_idx = PDIR_IDX(va); 1495 unsigned int ptbl_idx = PTBL_IDX(va); 1496 vm_page_t m; 1497 pte_t *ptbl; 1498 pte_t *pte; 1499 1500 //int su = (pmap == kernel_pmap); 1501 //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", 1502 // su, (u_int32_t)pmap, va, flags); 1503 1504 ptbl = pmap->pm_pdir[pdir_idx]; 1505 KASSERT(ptbl, ("pte_remove: null ptbl")); 1506 1507 pte = &ptbl[ptbl_idx]; 1508 1509 if (pte == NULL || !PTE_ISVALID(pte)) 1510 return (0); 1511 1512 if (PTE_ISWIRED(pte)) 1513 pmap->pm_stats.wired_count--; 1514 1515 /* Get vm_page_t for mapped pte. */ 1516 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1517 1518 /* Handle managed entry. */ 1519 if (PTE_ISMANAGED(pte)) { 1520 1521 if (PTE_ISMODIFIED(pte)) 1522 vm_page_dirty(m); 1523 1524 if (PTE_ISREFERENCED(pte)) 1525 vm_page_aflag_set(m, PGA_REFERENCED); 1526 1527 pv_remove(pmap, va, m); 1528 } else if (m->md.pv_tracked) { 1529 /* 1530 * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is 1531 * used. This is needed by the NCSW support code for fast 1532 * VA<->PA translation. 1533 */ 1534 pv_remove(pmap, va, m); 1535 if (TAILQ_EMPTY(&m->md.pv_list)) 1536 m->md.pv_tracked = false; 1537 } 1538 1539 mtx_lock_spin(&tlbivax_mutex); 1540 tlb_miss_lock(); 1541 1542 tlb0_flush_entry(va); 1543 *pte = 0; 1544 1545 tlb_miss_unlock(); 1546 mtx_unlock_spin(&tlbivax_mutex); 1547 1548 pmap->pm_stats.resident_count--; 1549 1550 if (flags & PTBL_UNHOLD) { 1551 //debugf("pte_remove: e (unhold)\n"); 1552 return (ptbl_unhold(mmu, pmap, pdir_idx)); 1553 } 1554 1555 //debugf("pte_remove: e\n"); 1556 return (0); 1557 } 1558 1559 /* 1560 * Insert PTE for a given page and virtual address. 1561 */ 1562 static int 1563 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1564 boolean_t nosleep) 1565 { 1566 unsigned int pdir_idx = PDIR_IDX(va); 1567 unsigned int ptbl_idx = PTBL_IDX(va); 1568 pte_t *ptbl, *pte; 1569 1570 CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__, 1571 pmap == kernel_pmap, pmap, va); 1572 1573 /* Get the page table pointer. */ 1574 ptbl = pmap->pm_pdir[pdir_idx]; 1575 1576 if (ptbl == NULL) { 1577 /* Allocate page table pages. */ 1578 ptbl = ptbl_alloc(mmu, pmap, pdir_idx, nosleep); 1579 if (ptbl == NULL) { 1580 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1581 return (ENOMEM); 1582 } 1583 } else { 1584 /* 1585 * Check if there is valid mapping for requested 1586 * va, if there is, remove it. 1587 */ 1588 pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; 1589 if (PTE_ISVALID(pte)) { 1590 pte_remove(mmu, pmap, va, PTBL_HOLD); 1591 } else { 1592 /* 1593 * pte is not used, increment hold count 1594 * for ptbl pages. 1595 */ 1596 if (pmap != kernel_pmap) 1597 ptbl_hold(mmu, pmap, pdir_idx); 1598 } 1599 } 1600 1601 /* 1602 * Insert pv_entry into pv_list for mapped page if part of managed 1603 * memory. 1604 */ 1605 if ((m->oflags & VPO_UNMANAGED) == 0) { 1606 flags |= PTE_MANAGED; 1607 1608 /* Create and insert pv entry. */ 1609 pv_insert(pmap, va, m); 1610 } 1611 1612 pmap->pm_stats.resident_count++; 1613 1614 mtx_lock_spin(&tlbivax_mutex); 1615 tlb_miss_lock(); 1616 1617 tlb0_flush_entry(va); 1618 if (pmap->pm_pdir[pdir_idx] == NULL) { 1619 /* 1620 * If we just allocated a new page table, hook it in 1621 * the pdir. 1622 */ 1623 pmap->pm_pdir[pdir_idx] = ptbl; 1624 } 1625 pte = &(pmap->pm_pdir[pdir_idx][ptbl_idx]); 1626 *pte = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1627 *pte |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */ 1628 1629 tlb_miss_unlock(); 1630 mtx_unlock_spin(&tlbivax_mutex); 1631 return (0); 1632 } 1633 1634 /* Return the pa for the given pmap/va. */ 1635 static vm_paddr_t 1636 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1637 { 1638 vm_paddr_t pa = 0; 1639 pte_t *pte; 1640 1641 pte = pte_find(mmu, pmap, va); 1642 if ((pte != NULL) && PTE_ISVALID(pte)) 1643 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1644 return (pa); 1645 } 1646 1647 /* Get a pointer to a PTE in a page table. */ 1648 static pte_t * 1649 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1650 { 1651 unsigned int pdir_idx = PDIR_IDX(va); 1652 unsigned int ptbl_idx = PTBL_IDX(va); 1653 1654 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 1655 1656 if (pmap->pm_pdir[pdir_idx]) 1657 return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); 1658 1659 return (NULL); 1660 } 1661 1662 /* Set up kernel page tables. */ 1663 static void 1664 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1665 { 1666 int i; 1667 vm_offset_t va; 1668 pte_t *pte; 1669 1670 /* Initialize kernel pdir */ 1671 for (i = 0; i < kernel_ptbls; i++) 1672 kernel_pmap->pm_pdir[kptbl_min + i] = 1673 (pte_t *)(pdir + (i * PAGE_SIZE * PTBL_PAGES)); 1674 1675 /* 1676 * Fill in PTEs covering kernel code and data. They are not required 1677 * for address translation, as this area is covered by static TLB1 1678 * entries, but for pte_vatopa() to work correctly with kernel area 1679 * addresses. 1680 */ 1681 for (va = addr; va < data_end; va += PAGE_SIZE) { 1682 pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]); 1683 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1684 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1685 PTE_VALID | PTE_PS_4KB; 1686 } 1687 } 1688 #endif 1689 1690 /**************************************************************************/ 1691 /* PMAP related */ 1692 /**************************************************************************/ 1693 1694 /* 1695 * This is called during booke_init, before the system is really initialized. 1696 */ 1697 static void 1698 mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) 1699 { 1700 vm_paddr_t phys_kernelend; 1701 struct mem_region *mp, *mp1; 1702 int cnt, i, j; 1703 vm_paddr_t s, e, sz; 1704 vm_paddr_t physsz, hwphyssz; 1705 u_int phys_avail_count; 1706 vm_size_t kstack0_sz; 1707 vm_offset_t kernel_pdir, kstack0; 1708 vm_paddr_t kstack0_phys; 1709 void *dpcpu; 1710 1711 debugf("mmu_booke_bootstrap: entered\n"); 1712 1713 /* Set interesting system properties */ 1714 hw_direct_map = 0; 1715 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 1716 elf32_nxstack = 1; 1717 #endif 1718 1719 /* Initialize invalidation mutex */ 1720 mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN); 1721 1722 /* Read TLB0 size and associativity. */ 1723 tlb0_get_tlbconf(); 1724 1725 /* 1726 * Align kernel start and end address (kernel image). 1727 * Note that kernel end does not necessarily relate to kernsize. 1728 * kernsize is the size of the kernel that is actually mapped. 1729 */ 1730 kernstart = trunc_page(start); 1731 data_start = round_page(kernelend); 1732 data_end = data_start; 1733 1734 /* 1735 * Addresses of preloaded modules (like file systems) use 1736 * physical addresses. Make sure we relocate those into 1737 * virtual addresses. 1738 */ 1739 preload_addr_relocate = kernstart - kernload; 1740 1741 /* Allocate the dynamic per-cpu area. */ 1742 dpcpu = (void *)data_end; 1743 data_end += DPCPU_SIZE; 1744 1745 /* Allocate space for the message buffer. */ 1746 msgbufp = (struct msgbuf *)data_end; 1747 data_end += msgbufsize; 1748 debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1749 (uintptr_t)msgbufp, data_end); 1750 1751 data_end = round_page(data_end); 1752 1753 /* Allocate space for ptbl_bufs. */ 1754 ptbl_bufs = (struct ptbl_buf *)data_end; 1755 data_end += sizeof(struct ptbl_buf) * PTBL_BUFS; 1756 debugf(" ptbl_bufs at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1757 (uintptr_t)ptbl_bufs, data_end); 1758 1759 data_end = round_page(data_end); 1760 1761 /* Allocate PTE tables for kernel KVA. */ 1762 kernel_pdir = data_end; 1763 kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 1764 PDIR_SIZE); 1765 #ifdef __powerpc64__ 1766 kernel_pdirs = howmany(kernel_ptbls, PDIR_NENTRIES); 1767 data_end += kernel_pdirs * PDIR_PAGES * PAGE_SIZE; 1768 #endif 1769 data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE; 1770 debugf(" kernel ptbls: %d\n", kernel_ptbls); 1771 debugf(" kernel pdir at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1772 kernel_pdir, data_end); 1773 1774 debugf(" data_end: 0x%"PRI0ptrX"\n", data_end); 1775 if (data_end - kernstart > kernsize) { 1776 kernsize += tlb1_mapin_region(kernstart + kernsize, 1777 kernload + kernsize, (data_end - kernstart) - kernsize); 1778 } 1779 data_end = kernstart + kernsize; 1780 debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end); 1781 1782 /* 1783 * Clear the structures - note we can only do it safely after the 1784 * possible additional TLB1 translations are in place (above) so that 1785 * all range up to the currently calculated 'data_end' is covered. 1786 */ 1787 dpcpu_init(dpcpu, 0); 1788 memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE); 1789 #ifdef __powerpc64__ 1790 memset((void *)kernel_pdir, 0, 1791 kernel_pdirs * PDIR_PAGES * PAGE_SIZE + 1792 kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1793 #else 1794 memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1795 #endif 1796 1797 /*******************************************************/ 1798 /* Set the start and end of kva. */ 1799 /*******************************************************/ 1800 virtual_avail = round_page(data_end); 1801 virtual_end = VM_MAX_KERNEL_ADDRESS; 1802 1803 /* Allocate KVA space for page zero/copy operations. */ 1804 zero_page_va = virtual_avail; 1805 virtual_avail += PAGE_SIZE; 1806 copy_page_src_va = virtual_avail; 1807 virtual_avail += PAGE_SIZE; 1808 copy_page_dst_va = virtual_avail; 1809 virtual_avail += PAGE_SIZE; 1810 debugf("zero_page_va = 0x%08x\n", zero_page_va); 1811 debugf("copy_page_src_va = 0x%08x\n", copy_page_src_va); 1812 debugf("copy_page_dst_va = 0x%08x\n", copy_page_dst_va); 1813 1814 /* Initialize page zero/copy mutexes. */ 1815 mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); 1816 mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); 1817 1818 /* Allocate KVA space for ptbl bufs. */ 1819 ptbl_buf_pool_vabase = virtual_avail; 1820 virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; 1821 debugf("ptbl_buf_pool_vabase = 0x%08x end = 0x%08x\n", 1822 ptbl_buf_pool_vabase, virtual_avail); 1823 1824 /* Calculate corresponding physical addresses for the kernel region. */ 1825 phys_kernelend = kernload + kernsize; 1826 debugf("kernel image and allocated data:\n"); 1827 debugf(" kernload = 0x%09llx\n", (uint64_t)kernload); 1828 debugf(" kernstart = 0x%08x\n", kernstart); 1829 debugf(" kernsize = 0x%08x\n", kernsize); 1830 1831 if (sizeof(phys_avail) / sizeof(phys_avail[0]) < availmem_regions_sz) 1832 panic("mmu_booke_bootstrap: phys_avail too small"); 1833 1834 /* 1835 * Remove kernel physical address range from avail regions list. Page 1836 * align all regions. Non-page aligned memory isn't very interesting 1837 * to us. Also, sort the entries for ascending addresses. 1838 */ 1839 1840 /* Retrieve phys/avail mem regions */ 1841 mem_regions(&physmem_regions, &physmem_regions_sz, 1842 &availmem_regions, &availmem_regions_sz); 1843 sz = 0; 1844 cnt = availmem_regions_sz; 1845 debugf("processing avail regions:\n"); 1846 for (mp = availmem_regions; mp->mr_size; mp++) { 1847 s = mp->mr_start; 1848 e = mp->mr_start + mp->mr_size; 1849 debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e); 1850 /* Check whether this region holds all of the kernel. */ 1851 if (s < kernload && e > phys_kernelend) { 1852 availmem_regions[cnt].mr_start = phys_kernelend; 1853 availmem_regions[cnt++].mr_size = e - phys_kernelend; 1854 e = kernload; 1855 } 1856 /* Look whether this regions starts within the kernel. */ 1857 if (s >= kernload && s < phys_kernelend) { 1858 if (e <= phys_kernelend) 1859 goto empty; 1860 s = phys_kernelend; 1861 } 1862 /* Now look whether this region ends within the kernel. */ 1863 if (e > kernload && e <= phys_kernelend) { 1864 if (s >= kernload) 1865 goto empty; 1866 e = kernload; 1867 } 1868 /* Now page align the start and size of the region. */ 1869 s = round_page(s); 1870 e = trunc_page(e); 1871 if (e < s) 1872 e = s; 1873 sz = e - s; 1874 debugf("%09jx-%09jx = %jx\n", 1875 (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz); 1876 1877 /* Check whether some memory is left here. */ 1878 if (sz == 0) { 1879 empty: 1880 memmove(mp, mp + 1, 1881 (cnt - (mp - availmem_regions)) * sizeof(*mp)); 1882 cnt--; 1883 mp--; 1884 continue; 1885 } 1886 1887 /* Do an insertion sort. */ 1888 for (mp1 = availmem_regions; mp1 < mp; mp1++) 1889 if (s < mp1->mr_start) 1890 break; 1891 if (mp1 < mp) { 1892 memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); 1893 mp1->mr_start = s; 1894 mp1->mr_size = sz; 1895 } else { 1896 mp->mr_start = s; 1897 mp->mr_size = sz; 1898 } 1899 } 1900 availmem_regions_sz = cnt; 1901 1902 /*******************************************************/ 1903 /* Steal physical memory for kernel stack from the end */ 1904 /* of the first avail region */ 1905 /*******************************************************/ 1906 kstack0_sz = kstack_pages * PAGE_SIZE; 1907 kstack0_phys = availmem_regions[0].mr_start + 1908 availmem_regions[0].mr_size; 1909 kstack0_phys -= kstack0_sz; 1910 availmem_regions[0].mr_size -= kstack0_sz; 1911 1912 /*******************************************************/ 1913 /* Fill in phys_avail table, based on availmem_regions */ 1914 /*******************************************************/ 1915 phys_avail_count = 0; 1916 physsz = 0; 1917 hwphyssz = 0; 1918 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 1919 1920 debugf("fill in phys_avail:\n"); 1921 for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { 1922 1923 debugf(" region: 0x%jx - 0x%jx (0x%jx)\n", 1924 (uintmax_t)availmem_regions[i].mr_start, 1925 (uintmax_t)availmem_regions[i].mr_start + 1926 availmem_regions[i].mr_size, 1927 (uintmax_t)availmem_regions[i].mr_size); 1928 1929 if (hwphyssz != 0 && 1930 (physsz + availmem_regions[i].mr_size) >= hwphyssz) { 1931 debugf(" hw.physmem adjust\n"); 1932 if (physsz < hwphyssz) { 1933 phys_avail[j] = availmem_regions[i].mr_start; 1934 phys_avail[j + 1] = 1935 availmem_regions[i].mr_start + 1936 hwphyssz - physsz; 1937 physsz = hwphyssz; 1938 phys_avail_count++; 1939 } 1940 break; 1941 } 1942 1943 phys_avail[j] = availmem_regions[i].mr_start; 1944 phys_avail[j + 1] = availmem_regions[i].mr_start + 1945 availmem_regions[i].mr_size; 1946 phys_avail_count++; 1947 physsz += availmem_regions[i].mr_size; 1948 } 1949 physmem = btoc(physsz); 1950 1951 /* Calculate the last available physical address. */ 1952 for (i = 0; phys_avail[i + 2] != 0; i += 2) 1953 ; 1954 Maxmem = powerpc_btop(phys_avail[i + 1]); 1955 1956 debugf("Maxmem = 0x%08lx\n", Maxmem); 1957 debugf("phys_avail_count = %d\n", phys_avail_count); 1958 debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n", 1959 (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem); 1960 1961 /*******************************************************/ 1962 /* Initialize (statically allocated) kernel pmap. */ 1963 /*******************************************************/ 1964 PMAP_LOCK_INIT(kernel_pmap); 1965 #ifndef __powerpc64__ 1966 kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE; 1967 #endif 1968 1969 debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap); 1970 kernel_pte_alloc(virtual_avail, kernstart, kernel_pdir); 1971 for (i = 0; i < MAXCPU; i++) { 1972 kernel_pmap->pm_tid[i] = TID_KERNEL; 1973 1974 /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */ 1975 tidbusy[i][TID_KERNEL] = kernel_pmap; 1976 } 1977 1978 /* Mark kernel_pmap active on all CPUs */ 1979 CPU_FILL(&kernel_pmap->pm_active); 1980 1981 /* 1982 * Initialize the global pv list lock. 1983 */ 1984 rw_init(&pvh_global_lock, "pmap pv global"); 1985 1986 /*******************************************************/ 1987 /* Final setup */ 1988 /*******************************************************/ 1989 1990 /* Enter kstack0 into kernel map, provide guard page */ 1991 kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 1992 thread0.td_kstack = kstack0; 1993 thread0.td_kstack_pages = kstack_pages; 1994 1995 debugf("kstack_sz = 0x%08x\n", kstack0_sz); 1996 debugf("kstack0_phys at 0x%09llx - 0x%09llx\n", 1997 kstack0_phys, kstack0_phys + kstack0_sz); 1998 debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n", 1999 kstack0, kstack0 + kstack0_sz); 2000 2001 virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz; 2002 for (i = 0; i < kstack_pages; i++) { 2003 mmu_booke_kenter(mmu, kstack0, kstack0_phys); 2004 kstack0 += PAGE_SIZE; 2005 kstack0_phys += PAGE_SIZE; 2006 } 2007 2008 pmap_bootstrapped = 1; 2009 2010 debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail); 2011 debugf("virtual_end = %"PRI0ptrX"\n", virtual_end); 2012 2013 debugf("mmu_booke_bootstrap: exit\n"); 2014 } 2015 2016 #ifdef SMP 2017 void 2018 tlb1_ap_prep(void) 2019 { 2020 tlb_entry_t *e, tmp; 2021 unsigned int i; 2022 2023 /* Prepare TLB1 image for AP processors */ 2024 e = __boot_tlb1; 2025 for (i = 0; i < TLB1_ENTRIES; i++) { 2026 tlb1_read_entry(&tmp, i); 2027 2028 if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED)) 2029 memcpy(e++, &tmp, sizeof(tmp)); 2030 } 2031 } 2032 2033 void 2034 pmap_bootstrap_ap(volatile uint32_t *trcp __unused) 2035 { 2036 int i; 2037 2038 /* 2039 * Finish TLB1 configuration: the BSP already set up its TLB1 and we 2040 * have the snapshot of its contents in the s/w __boot_tlb1[] table 2041 * created by tlb1_ap_prep(), so use these values directly to 2042 * (re)program AP's TLB1 hardware. 2043 * 2044 * Start at index 1 because index 0 has the kernel map. 2045 */ 2046 for (i = 1; i < TLB1_ENTRIES; i++) { 2047 if (__boot_tlb1[i].mas1 & MAS1_VALID) 2048 tlb1_write_entry(&__boot_tlb1[i], i); 2049 } 2050 2051 set_mas4_defaults(); 2052 } 2053 #endif 2054 2055 static void 2056 booke_pmap_init_qpages(void) 2057 { 2058 struct pcpu *pc; 2059 int i; 2060 2061 CPU_FOREACH(i) { 2062 pc = pcpu_find(i); 2063 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); 2064 if (pc->pc_qmap_addr == 0) 2065 panic("pmap_init_qpages: unable to allocate KVA"); 2066 } 2067 } 2068 2069 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL); 2070 2071 /* 2072 * Get the physical page address for the given pmap/virtual address. 2073 */ 2074 static vm_paddr_t 2075 mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va) 2076 { 2077 vm_paddr_t pa; 2078 2079 PMAP_LOCK(pmap); 2080 pa = pte_vatopa(mmu, pmap, va); 2081 PMAP_UNLOCK(pmap); 2082 2083 return (pa); 2084 } 2085 2086 /* 2087 * Extract the physical page address associated with the given 2088 * kernel virtual address. 2089 */ 2090 static vm_paddr_t 2091 mmu_booke_kextract(mmu_t mmu, vm_offset_t va) 2092 { 2093 tlb_entry_t e; 2094 vm_paddr_t p = 0; 2095 int i; 2096 2097 if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS) 2098 p = pte_vatopa(mmu, kernel_pmap, va); 2099 2100 if (p == 0) { 2101 /* Check TLB1 mappings */ 2102 for (i = 0; i < TLB1_ENTRIES; i++) { 2103 tlb1_read_entry(&e, i); 2104 if (!(e.mas1 & MAS1_VALID)) 2105 continue; 2106 if (va >= e.virt && va < e.virt + e.size) 2107 return (e.phys + (va - e.virt)); 2108 } 2109 } 2110 2111 return (p); 2112 } 2113 2114 /* 2115 * Initialize the pmap module. 2116 * Called by vm_init, to initialize any structures that the pmap 2117 * system needs to map virtual memory. 2118 */ 2119 static void 2120 mmu_booke_init(mmu_t mmu) 2121 { 2122 int shpgperproc = PMAP_SHPGPERPROC; 2123 2124 /* 2125 * Initialize the address space (zone) for the pv entries. Set a 2126 * high water mark so that the system can recover from excessive 2127 * numbers of pv entries. 2128 */ 2129 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 2130 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 2131 2132 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 2133 pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; 2134 2135 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 2136 pv_entry_high_water = 9 * (pv_entry_max / 10); 2137 2138 uma_zone_reserve_kva(pvzone, pv_entry_max); 2139 2140 /* Pre-fill pvzone with initial number of pv entries. */ 2141 uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); 2142 2143 /* Initialize ptbl allocation. */ 2144 ptbl_init(); 2145 } 2146 2147 /* 2148 * Map a list of wired pages into kernel virtual address space. This is 2149 * intended for temporary mappings which do not need page modification or 2150 * references recorded. Existing mappings in the region are overwritten. 2151 */ 2152 static void 2153 mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) 2154 { 2155 vm_offset_t va; 2156 2157 va = sva; 2158 while (count-- > 0) { 2159 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 2160 va += PAGE_SIZE; 2161 m++; 2162 } 2163 } 2164 2165 /* 2166 * Remove page mappings from kernel virtual address space. Intended for 2167 * temporary mappings entered by mmu_booke_qenter. 2168 */ 2169 static void 2170 mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count) 2171 { 2172 vm_offset_t va; 2173 2174 va = sva; 2175 while (count-- > 0) { 2176 mmu_booke_kremove(mmu, va); 2177 va += PAGE_SIZE; 2178 } 2179 } 2180 2181 /* 2182 * Map a wired page into kernel virtual address space. 2183 */ 2184 static void 2185 mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) 2186 { 2187 2188 mmu_booke_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 2189 } 2190 2191 static void 2192 mmu_booke_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) 2193 { 2194 uint32_t flags; 2195 pte_t *pte; 2196 2197 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2198 (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va")); 2199 2200 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 2201 flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT; 2202 flags |= PTE_PS_4KB; 2203 2204 pte = pte_find(mmu, kernel_pmap, va); 2205 KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va. NULL PTE")); 2206 2207 mtx_lock_spin(&tlbivax_mutex); 2208 tlb_miss_lock(); 2209 2210 if (PTE_ISVALID(pte)) { 2211 2212 CTR1(KTR_PMAP, "%s: replacing entry!", __func__); 2213 2214 /* Flush entry from TLB0 */ 2215 tlb0_flush_entry(va); 2216 } 2217 2218 *pte = PTE_RPN_FROM_PA(pa) | flags; 2219 2220 //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " 2221 // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", 2222 // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); 2223 2224 /* Flush the real memory from the instruction cache. */ 2225 if ((flags & (PTE_I | PTE_G)) == 0) 2226 __syncicache((void *)va, PAGE_SIZE); 2227 2228 tlb_miss_unlock(); 2229 mtx_unlock_spin(&tlbivax_mutex); 2230 } 2231 2232 /* 2233 * Remove a page from kernel page table. 2234 */ 2235 static void 2236 mmu_booke_kremove(mmu_t mmu, vm_offset_t va) 2237 { 2238 pte_t *pte; 2239 2240 CTR2(KTR_PMAP,"%s: s (va = 0x%08x)\n", __func__, va); 2241 2242 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2243 (va <= VM_MAX_KERNEL_ADDRESS)), 2244 ("mmu_booke_kremove: invalid va")); 2245 2246 pte = pte_find(mmu, kernel_pmap, va); 2247 2248 if (!PTE_ISVALID(pte)) { 2249 2250 CTR1(KTR_PMAP, "%s: invalid pte", __func__); 2251 2252 return; 2253 } 2254 2255 mtx_lock_spin(&tlbivax_mutex); 2256 tlb_miss_lock(); 2257 2258 /* Invalidate entry in TLB0, update PTE. */ 2259 tlb0_flush_entry(va); 2260 *pte = 0; 2261 2262 tlb_miss_unlock(); 2263 mtx_unlock_spin(&tlbivax_mutex); 2264 } 2265 2266 /* 2267 * Initialize pmap associated with process 0. 2268 */ 2269 static void 2270 mmu_booke_pinit0(mmu_t mmu, pmap_t pmap) 2271 { 2272 2273 PMAP_LOCK_INIT(pmap); 2274 mmu_booke_pinit(mmu, pmap); 2275 PCPU_SET(curpmap, pmap); 2276 } 2277 2278 /* 2279 * Initialize a preallocated and zeroed pmap structure, 2280 * such as one in a vmspace structure. 2281 */ 2282 static void 2283 mmu_booke_pinit(mmu_t mmu, pmap_t pmap) 2284 { 2285 int i; 2286 2287 CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, 2288 curthread->td_proc->p_pid, curthread->td_proc->p_comm); 2289 2290 KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); 2291 2292 for (i = 0; i < MAXCPU; i++) 2293 pmap->pm_tid[i] = TID_NONE; 2294 CPU_ZERO(&kernel_pmap->pm_active); 2295 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 2296 #ifdef __powerpc64__ 2297 bzero(&pmap->pm_pp2d, sizeof(pte_t **) * PP2D_NENTRIES); 2298 TAILQ_INIT(&pmap->pm_pdir_list); 2299 #else 2300 bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); 2301 #endif 2302 TAILQ_INIT(&pmap->pm_ptbl_list); 2303 } 2304 2305 /* 2306 * Release any resources held by the given physical map. 2307 * Called when a pmap initialized by mmu_booke_pinit is being released. 2308 * Should only be called if the map contains no valid mappings. 2309 */ 2310 static void 2311 mmu_booke_release(mmu_t mmu, pmap_t pmap) 2312 { 2313 2314 KASSERT(pmap->pm_stats.resident_count == 0, 2315 ("pmap_release: pmap resident count %ld != 0", 2316 pmap->pm_stats.resident_count)); 2317 } 2318 2319 /* 2320 * Insert the given physical page at the specified virtual address in the 2321 * target physical map with the protection requested. If specified the page 2322 * will be wired down. 2323 */ 2324 static int 2325 mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2326 vm_prot_t prot, u_int flags, int8_t psind) 2327 { 2328 int error; 2329 2330 rw_wlock(&pvh_global_lock); 2331 PMAP_LOCK(pmap); 2332 error = mmu_booke_enter_locked(mmu, pmap, va, m, prot, flags, psind); 2333 PMAP_UNLOCK(pmap); 2334 rw_wunlock(&pvh_global_lock); 2335 return (error); 2336 } 2337 2338 static int 2339 mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2340 vm_prot_t prot, u_int pmap_flags, int8_t psind __unused) 2341 { 2342 pte_t *pte; 2343 vm_paddr_t pa; 2344 uint32_t flags; 2345 int error, su, sync; 2346 2347 pa = VM_PAGE_TO_PHYS(m); 2348 su = (pmap == kernel_pmap); 2349 sync = 0; 2350 2351 //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " 2352 // "pa=0x%08x prot=0x%08x flags=%#x)\n", 2353 // (u_int32_t)pmap, su, pmap->pm_tid, 2354 // (u_int32_t)m, va, pa, prot, flags); 2355 2356 if (su) { 2357 KASSERT(((va >= virtual_avail) && 2358 (va <= VM_MAX_KERNEL_ADDRESS)), 2359 ("mmu_booke_enter_locked: kernel pmap, non kernel va")); 2360 } else { 2361 KASSERT((va <= VM_MAXUSER_ADDRESS), 2362 ("mmu_booke_enter_locked: user pmap, non user va")); 2363 } 2364 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2365 VM_OBJECT_ASSERT_LOCKED(m->object); 2366 2367 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2368 2369 /* 2370 * If there is an existing mapping, and the physical address has not 2371 * changed, must be protection or wiring change. 2372 */ 2373 if (((pte = pte_find(mmu, pmap, va)) != NULL) && 2374 (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { 2375 2376 /* 2377 * Before actually updating pte->flags we calculate and 2378 * prepare its new value in a helper var. 2379 */ 2380 flags = *pte; 2381 flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); 2382 2383 /* Wiring change, just update stats. */ 2384 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) { 2385 if (!PTE_ISWIRED(pte)) { 2386 flags |= PTE_WIRED; 2387 pmap->pm_stats.wired_count++; 2388 } 2389 } else { 2390 if (PTE_ISWIRED(pte)) { 2391 flags &= ~PTE_WIRED; 2392 pmap->pm_stats.wired_count--; 2393 } 2394 } 2395 2396 if (prot & VM_PROT_WRITE) { 2397 /* Add write permissions. */ 2398 flags |= PTE_SW; 2399 if (!su) 2400 flags |= PTE_UW; 2401 2402 if ((flags & PTE_MANAGED) != 0) 2403 vm_page_aflag_set(m, PGA_WRITEABLE); 2404 } else { 2405 /* Handle modified pages, sense modify status. */ 2406 2407 /* 2408 * The PTE_MODIFIED flag could be set by underlying 2409 * TLB misses since we last read it (above), possibly 2410 * other CPUs could update it so we check in the PTE 2411 * directly rather than rely on that saved local flags 2412 * copy. 2413 */ 2414 if (PTE_ISMODIFIED(pte)) 2415 vm_page_dirty(m); 2416 } 2417 2418 if (prot & VM_PROT_EXECUTE) { 2419 flags |= PTE_SX; 2420 if (!su) 2421 flags |= PTE_UX; 2422 2423 /* 2424 * Check existing flags for execute permissions: if we 2425 * are turning execute permissions on, icache should 2426 * be flushed. 2427 */ 2428 if ((*pte & (PTE_UX | PTE_SX)) == 0) 2429 sync++; 2430 } 2431 2432 flags &= ~PTE_REFERENCED; 2433 2434 /* 2435 * The new flags value is all calculated -- only now actually 2436 * update the PTE. 2437 */ 2438 mtx_lock_spin(&tlbivax_mutex); 2439 tlb_miss_lock(); 2440 2441 tlb0_flush_entry(va); 2442 *pte &= ~PTE_FLAGS_MASK; 2443 *pte |= flags; 2444 2445 tlb_miss_unlock(); 2446 mtx_unlock_spin(&tlbivax_mutex); 2447 2448 } else { 2449 /* 2450 * If there is an existing mapping, but it's for a different 2451 * physical address, pte_enter() will delete the old mapping. 2452 */ 2453 //if ((pte != NULL) && PTE_ISVALID(pte)) 2454 // debugf("mmu_booke_enter_locked: replace\n"); 2455 //else 2456 // debugf("mmu_booke_enter_locked: new\n"); 2457 2458 /* Now set up the flags and install the new mapping. */ 2459 flags = (PTE_SR | PTE_VALID); 2460 flags |= PTE_M; 2461 2462 if (!su) 2463 flags |= PTE_UR; 2464 2465 if (prot & VM_PROT_WRITE) { 2466 flags |= PTE_SW; 2467 if (!su) 2468 flags |= PTE_UW; 2469 2470 if ((m->oflags & VPO_UNMANAGED) == 0) 2471 vm_page_aflag_set(m, PGA_WRITEABLE); 2472 } 2473 2474 if (prot & VM_PROT_EXECUTE) { 2475 flags |= PTE_SX; 2476 if (!su) 2477 flags |= PTE_UX; 2478 } 2479 2480 /* If its wired update stats. */ 2481 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) 2482 flags |= PTE_WIRED; 2483 2484 error = pte_enter(mmu, pmap, m, va, flags, 2485 (pmap_flags & PMAP_ENTER_NOSLEEP) != 0); 2486 if (error != 0) 2487 return (KERN_RESOURCE_SHORTAGE); 2488 2489 if ((flags & PMAP_ENTER_WIRED) != 0) 2490 pmap->pm_stats.wired_count++; 2491 2492 /* Flush the real memory from the instruction cache. */ 2493 if (prot & VM_PROT_EXECUTE) 2494 sync++; 2495 } 2496 2497 if (sync && (su || pmap == PCPU_GET(curpmap))) { 2498 __syncicache((void *)va, PAGE_SIZE); 2499 sync = 0; 2500 } 2501 2502 return (KERN_SUCCESS); 2503 } 2504 2505 /* 2506 * Maps a sequence of resident pages belonging to the same object. 2507 * The sequence begins with the given page m_start. This page is 2508 * mapped at the given virtual address start. Each subsequent page is 2509 * mapped at a virtual address that is offset from start by the same 2510 * amount as the page is offset from m_start within the object. The 2511 * last page in the sequence is the page with the largest offset from 2512 * m_start that can be mapped at a virtual address less than the given 2513 * virtual address end. Not every virtual page between start and end 2514 * is mapped; only those for which a resident page exists with the 2515 * corresponding offset from m_start are mapped. 2516 */ 2517 static void 2518 mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start, 2519 vm_offset_t end, vm_page_t m_start, vm_prot_t prot) 2520 { 2521 vm_page_t m; 2522 vm_pindex_t diff, psize; 2523 2524 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2525 2526 psize = atop(end - start); 2527 m = m_start; 2528 rw_wlock(&pvh_global_lock); 2529 PMAP_LOCK(pmap); 2530 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2531 mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m, 2532 prot & (VM_PROT_READ | VM_PROT_EXECUTE), 2533 PMAP_ENTER_NOSLEEP, 0); 2534 m = TAILQ_NEXT(m, listq); 2535 } 2536 rw_wunlock(&pvh_global_lock); 2537 PMAP_UNLOCK(pmap); 2538 } 2539 2540 static void 2541 mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2542 vm_prot_t prot) 2543 { 2544 2545 rw_wlock(&pvh_global_lock); 2546 PMAP_LOCK(pmap); 2547 mmu_booke_enter_locked(mmu, pmap, va, m, 2548 prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 2549 0); 2550 rw_wunlock(&pvh_global_lock); 2551 PMAP_UNLOCK(pmap); 2552 } 2553 2554 /* 2555 * Remove the given range of addresses from the specified map. 2556 * 2557 * It is assumed that the start and end are properly rounded to the page size. 2558 */ 2559 static void 2560 mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva) 2561 { 2562 pte_t *pte; 2563 uint8_t hold_flag; 2564 2565 int su = (pmap == kernel_pmap); 2566 2567 //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", 2568 // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); 2569 2570 if (su) { 2571 KASSERT(((va >= virtual_avail) && 2572 (va <= VM_MAX_KERNEL_ADDRESS)), 2573 ("mmu_booke_remove: kernel pmap, non kernel va")); 2574 } else { 2575 KASSERT((va <= VM_MAXUSER_ADDRESS), 2576 ("mmu_booke_remove: user pmap, non user va")); 2577 } 2578 2579 if (PMAP_REMOVE_DONE(pmap)) { 2580 //debugf("mmu_booke_remove: e (empty)\n"); 2581 return; 2582 } 2583 2584 hold_flag = PTBL_HOLD_FLAG(pmap); 2585 //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); 2586 2587 rw_wlock(&pvh_global_lock); 2588 PMAP_LOCK(pmap); 2589 for (; va < endva; va += PAGE_SIZE) { 2590 pte = pte_find(mmu, pmap, va); 2591 if ((pte != NULL) && PTE_ISVALID(pte)) 2592 pte_remove(mmu, pmap, va, hold_flag); 2593 } 2594 PMAP_UNLOCK(pmap); 2595 rw_wunlock(&pvh_global_lock); 2596 2597 //debugf("mmu_booke_remove: e\n"); 2598 } 2599 2600 /* 2601 * Remove physical page from all pmaps in which it resides. 2602 */ 2603 static void 2604 mmu_booke_remove_all(mmu_t mmu, vm_page_t m) 2605 { 2606 pv_entry_t pv, pvn; 2607 uint8_t hold_flag; 2608 2609 rw_wlock(&pvh_global_lock); 2610 for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) { 2611 pvn = TAILQ_NEXT(pv, pv_link); 2612 2613 PMAP_LOCK(pv->pv_pmap); 2614 hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); 2615 pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag); 2616 PMAP_UNLOCK(pv->pv_pmap); 2617 } 2618 vm_page_aflag_clear(m, PGA_WRITEABLE); 2619 rw_wunlock(&pvh_global_lock); 2620 } 2621 2622 /* 2623 * Map a range of physical addresses into kernel virtual address space. 2624 */ 2625 static vm_offset_t 2626 mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, 2627 vm_paddr_t pa_end, int prot) 2628 { 2629 vm_offset_t sva = *virt; 2630 vm_offset_t va = sva; 2631 2632 //debugf("mmu_booke_map: s (sva = 0x%08x pa_start = 0x%08x pa_end = 0x%08x)\n", 2633 // sva, pa_start, pa_end); 2634 2635 while (pa_start < pa_end) { 2636 mmu_booke_kenter(mmu, va, pa_start); 2637 va += PAGE_SIZE; 2638 pa_start += PAGE_SIZE; 2639 } 2640 *virt = va; 2641 2642 //debugf("mmu_booke_map: e (va = 0x%08x)\n", va); 2643 return (sva); 2644 } 2645 2646 /* 2647 * The pmap must be activated before it's address space can be accessed in any 2648 * way. 2649 */ 2650 static void 2651 mmu_booke_activate(mmu_t mmu, struct thread *td) 2652 { 2653 pmap_t pmap; 2654 u_int cpuid; 2655 2656 pmap = &td->td_proc->p_vmspace->vm_pmap; 2657 2658 CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%08x)", 2659 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2660 2661 KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); 2662 2663 sched_pin(); 2664 2665 cpuid = PCPU_GET(cpuid); 2666 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 2667 PCPU_SET(curpmap, pmap); 2668 2669 if (pmap->pm_tid[cpuid] == TID_NONE) 2670 tid_alloc(pmap); 2671 2672 /* Load PID0 register with pmap tid value. */ 2673 mtspr(SPR_PID0, pmap->pm_tid[cpuid]); 2674 __asm __volatile("isync"); 2675 2676 mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0); 2677 2678 sched_unpin(); 2679 2680 CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__, 2681 pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm); 2682 } 2683 2684 /* 2685 * Deactivate the specified process's address space. 2686 */ 2687 static void 2688 mmu_booke_deactivate(mmu_t mmu, struct thread *td) 2689 { 2690 pmap_t pmap; 2691 2692 pmap = &td->td_proc->p_vmspace->vm_pmap; 2693 2694 CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%08x", 2695 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2696 2697 td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0); 2698 2699 CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active); 2700 PCPU_SET(curpmap, NULL); 2701 } 2702 2703 /* 2704 * Copy the range specified by src_addr/len 2705 * from the source map to the range dst_addr/len 2706 * in the destination map. 2707 * 2708 * This routine is only advisory and need not do anything. 2709 */ 2710 static void 2711 mmu_booke_copy(mmu_t mmu, pmap_t dst_pmap, pmap_t src_pmap, 2712 vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) 2713 { 2714 2715 } 2716 2717 /* 2718 * Set the physical protection on the specified range of this map as requested. 2719 */ 2720 static void 2721 mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva, 2722 vm_prot_t prot) 2723 { 2724 vm_offset_t va; 2725 vm_page_t m; 2726 pte_t *pte; 2727 2728 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2729 mmu_booke_remove(mmu, pmap, sva, eva); 2730 return; 2731 } 2732 2733 if (prot & VM_PROT_WRITE) 2734 return; 2735 2736 PMAP_LOCK(pmap); 2737 for (va = sva; va < eva; va += PAGE_SIZE) { 2738 if ((pte = pte_find(mmu, pmap, va)) != NULL) { 2739 if (PTE_ISVALID(pte)) { 2740 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2741 2742 mtx_lock_spin(&tlbivax_mutex); 2743 tlb_miss_lock(); 2744 2745 /* Handle modified pages. */ 2746 if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) 2747 vm_page_dirty(m); 2748 2749 tlb0_flush_entry(va); 2750 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2751 2752 tlb_miss_unlock(); 2753 mtx_unlock_spin(&tlbivax_mutex); 2754 } 2755 } 2756 } 2757 PMAP_UNLOCK(pmap); 2758 } 2759 2760 /* 2761 * Clear the write and modified bits in each of the given page's mappings. 2762 */ 2763 static void 2764 mmu_booke_remove_write(mmu_t mmu, vm_page_t m) 2765 { 2766 pv_entry_t pv; 2767 pte_t *pte; 2768 2769 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2770 ("mmu_booke_remove_write: page %p is not managed", m)); 2771 2772 /* 2773 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2774 * set by another thread while the object is locked. Thus, 2775 * if PGA_WRITEABLE is clear, no page table entries need updating. 2776 */ 2777 VM_OBJECT_ASSERT_WLOCKED(m->object); 2778 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2779 return; 2780 rw_wlock(&pvh_global_lock); 2781 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2782 PMAP_LOCK(pv->pv_pmap); 2783 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { 2784 if (PTE_ISVALID(pte)) { 2785 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2786 2787 mtx_lock_spin(&tlbivax_mutex); 2788 tlb_miss_lock(); 2789 2790 /* Handle modified pages. */ 2791 if (PTE_ISMODIFIED(pte)) 2792 vm_page_dirty(m); 2793 2794 /* Flush mapping from TLB0. */ 2795 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2796 2797 tlb_miss_unlock(); 2798 mtx_unlock_spin(&tlbivax_mutex); 2799 } 2800 } 2801 PMAP_UNLOCK(pv->pv_pmap); 2802 } 2803 vm_page_aflag_clear(m, PGA_WRITEABLE); 2804 rw_wunlock(&pvh_global_lock); 2805 } 2806 2807 static void 2808 mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2809 { 2810 pte_t *pte; 2811 pmap_t pmap; 2812 vm_page_t m; 2813 vm_offset_t addr; 2814 vm_paddr_t pa = 0; 2815 int active, valid; 2816 2817 va = trunc_page(va); 2818 sz = round_page(sz); 2819 2820 rw_wlock(&pvh_global_lock); 2821 pmap = PCPU_GET(curpmap); 2822 active = (pm == kernel_pmap || pm == pmap) ? 1 : 0; 2823 while (sz > 0) { 2824 PMAP_LOCK(pm); 2825 pte = pte_find(mmu, pm, va); 2826 valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; 2827 if (valid) 2828 pa = PTE_PA(pte); 2829 PMAP_UNLOCK(pm); 2830 if (valid) { 2831 if (!active) { 2832 /* Create a mapping in the active pmap. */ 2833 addr = 0; 2834 m = PHYS_TO_VM_PAGE(pa); 2835 PMAP_LOCK(pmap); 2836 pte_enter(mmu, pmap, m, addr, 2837 PTE_SR | PTE_VALID | PTE_UR, FALSE); 2838 __syncicache((void *)addr, PAGE_SIZE); 2839 pte_remove(mmu, pmap, addr, PTBL_UNHOLD); 2840 PMAP_UNLOCK(pmap); 2841 } else 2842 __syncicache((void *)va, PAGE_SIZE); 2843 } 2844 va += PAGE_SIZE; 2845 sz -= PAGE_SIZE; 2846 } 2847 rw_wunlock(&pvh_global_lock); 2848 } 2849 2850 /* 2851 * Atomically extract and hold the physical page with the given 2852 * pmap and virtual address pair if that mapping permits the given 2853 * protection. 2854 */ 2855 static vm_page_t 2856 mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, 2857 vm_prot_t prot) 2858 { 2859 pte_t *pte; 2860 vm_page_t m; 2861 uint32_t pte_wbit; 2862 vm_paddr_t pa; 2863 2864 m = NULL; 2865 pa = 0; 2866 PMAP_LOCK(pmap); 2867 retry: 2868 pte = pte_find(mmu, pmap, va); 2869 if ((pte != NULL) && PTE_ISVALID(pte)) { 2870 if (pmap == kernel_pmap) 2871 pte_wbit = PTE_SW; 2872 else 2873 pte_wbit = PTE_UW; 2874 2875 if ((*pte & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { 2876 if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa)) 2877 goto retry; 2878 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2879 vm_page_hold(m); 2880 } 2881 } 2882 2883 PA_UNLOCK_COND(pa); 2884 PMAP_UNLOCK(pmap); 2885 return (m); 2886 } 2887 2888 /* 2889 * Initialize a vm_page's machine-dependent fields. 2890 */ 2891 static void 2892 mmu_booke_page_init(mmu_t mmu, vm_page_t m) 2893 { 2894 2895 m->md.pv_tracked = 0; 2896 TAILQ_INIT(&m->md.pv_list); 2897 } 2898 2899 /* 2900 * mmu_booke_zero_page_area zeros the specified hardware page by 2901 * mapping it into virtual memory and using bzero to clear 2902 * its contents. 2903 * 2904 * off and size must reside within a single page. 2905 */ 2906 static void 2907 mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 2908 { 2909 vm_offset_t va; 2910 2911 /* XXX KASSERT off and size are within a single page? */ 2912 2913 mtx_lock(&zero_page_mutex); 2914 va = zero_page_va; 2915 2916 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2917 bzero((caddr_t)va + off, size); 2918 mmu_booke_kremove(mmu, va); 2919 2920 mtx_unlock(&zero_page_mutex); 2921 } 2922 2923 /* 2924 * mmu_booke_zero_page zeros the specified hardware page. 2925 */ 2926 static void 2927 mmu_booke_zero_page(mmu_t mmu, vm_page_t m) 2928 { 2929 vm_offset_t off, va; 2930 2931 mtx_lock(&zero_page_mutex); 2932 va = zero_page_va; 2933 2934 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2935 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 2936 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 2937 mmu_booke_kremove(mmu, va); 2938 2939 mtx_unlock(&zero_page_mutex); 2940 } 2941 2942 /* 2943 * mmu_booke_copy_page copies the specified (machine independent) page by 2944 * mapping the page into virtual memory and using memcopy to copy the page, 2945 * one machine dependent page at a time. 2946 */ 2947 static void 2948 mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) 2949 { 2950 vm_offset_t sva, dva; 2951 2952 sva = copy_page_src_va; 2953 dva = copy_page_dst_va; 2954 2955 mtx_lock(©_page_mutex); 2956 mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); 2957 mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); 2958 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); 2959 mmu_booke_kremove(mmu, dva); 2960 mmu_booke_kremove(mmu, sva); 2961 mtx_unlock(©_page_mutex); 2962 } 2963 2964 static inline void 2965 mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 2966 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 2967 { 2968 void *a_cp, *b_cp; 2969 vm_offset_t a_pg_offset, b_pg_offset; 2970 int cnt; 2971 2972 mtx_lock(©_page_mutex); 2973 while (xfersize > 0) { 2974 a_pg_offset = a_offset & PAGE_MASK; 2975 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2976 mmu_booke_kenter(mmu, copy_page_src_va, 2977 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); 2978 a_cp = (char *)copy_page_src_va + a_pg_offset; 2979 b_pg_offset = b_offset & PAGE_MASK; 2980 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2981 mmu_booke_kenter(mmu, copy_page_dst_va, 2982 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); 2983 b_cp = (char *)copy_page_dst_va + b_pg_offset; 2984 bcopy(a_cp, b_cp, cnt); 2985 mmu_booke_kremove(mmu, copy_page_dst_va); 2986 mmu_booke_kremove(mmu, copy_page_src_va); 2987 a_offset += cnt; 2988 b_offset += cnt; 2989 xfersize -= cnt; 2990 } 2991 mtx_unlock(©_page_mutex); 2992 } 2993 2994 static vm_offset_t 2995 mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m) 2996 { 2997 vm_paddr_t paddr; 2998 vm_offset_t qaddr; 2999 uint32_t flags; 3000 pte_t *pte; 3001 3002 paddr = VM_PAGE_TO_PHYS(m); 3003 3004 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 3005 flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; 3006 flags |= PTE_PS_4KB; 3007 3008 critical_enter(); 3009 qaddr = PCPU_GET(qmap_addr); 3010 3011 pte = pte_find(mmu, kernel_pmap, qaddr); 3012 3013 KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy")); 3014 3015 /* 3016 * XXX: tlbivax is broadcast to other cores, but qaddr should 3017 * not be present in other TLBs. Is there a better instruction 3018 * sequence to use? Or just forget it & use mmu_booke_kenter()... 3019 */ 3020 __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK)); 3021 __asm __volatile("isync; msync"); 3022 3023 *pte = PTE_RPN_FROM_PA(paddr) | flags; 3024 3025 /* Flush the real memory from the instruction cache. */ 3026 if ((flags & (PTE_I | PTE_G)) == 0) 3027 __syncicache((void *)qaddr, PAGE_SIZE); 3028 3029 return (qaddr); 3030 } 3031 3032 static void 3033 mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr) 3034 { 3035 pte_t *pte; 3036 3037 pte = pte_find(mmu, kernel_pmap, addr); 3038 3039 KASSERT(PCPU_GET(qmap_addr) == addr, 3040 ("mmu_booke_quick_remove_page: invalid address")); 3041 KASSERT(*pte != 0, 3042 ("mmu_booke_quick_remove_page: PTE not in use")); 3043 3044 *pte = 0; 3045 critical_exit(); 3046 } 3047 3048 /* 3049 * Return whether or not the specified physical page was modified 3050 * in any of physical maps. 3051 */ 3052 static boolean_t 3053 mmu_booke_is_modified(mmu_t mmu, vm_page_t m) 3054 { 3055 pte_t *pte; 3056 pv_entry_t pv; 3057 boolean_t rv; 3058 3059 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3060 ("mmu_booke_is_modified: page %p is not managed", m)); 3061 rv = FALSE; 3062 3063 /* 3064 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3065 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3066 * is clear, no PTEs can be modified. 3067 */ 3068 VM_OBJECT_ASSERT_WLOCKED(m->object); 3069 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3070 return (rv); 3071 rw_wlock(&pvh_global_lock); 3072 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3073 PMAP_LOCK(pv->pv_pmap); 3074 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3075 PTE_ISVALID(pte)) { 3076 if (PTE_ISMODIFIED(pte)) 3077 rv = TRUE; 3078 } 3079 PMAP_UNLOCK(pv->pv_pmap); 3080 if (rv) 3081 break; 3082 } 3083 rw_wunlock(&pvh_global_lock); 3084 return (rv); 3085 } 3086 3087 /* 3088 * Return whether or not the specified virtual address is eligible 3089 * for prefault. 3090 */ 3091 static boolean_t 3092 mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr) 3093 { 3094 3095 return (FALSE); 3096 } 3097 3098 /* 3099 * Return whether or not the specified physical page was referenced 3100 * in any physical maps. 3101 */ 3102 static boolean_t 3103 mmu_booke_is_referenced(mmu_t mmu, vm_page_t m) 3104 { 3105 pte_t *pte; 3106 pv_entry_t pv; 3107 boolean_t rv; 3108 3109 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3110 ("mmu_booke_is_referenced: page %p is not managed", m)); 3111 rv = FALSE; 3112 rw_wlock(&pvh_global_lock); 3113 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3114 PMAP_LOCK(pv->pv_pmap); 3115 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3116 PTE_ISVALID(pte)) { 3117 if (PTE_ISREFERENCED(pte)) 3118 rv = TRUE; 3119 } 3120 PMAP_UNLOCK(pv->pv_pmap); 3121 if (rv) 3122 break; 3123 } 3124 rw_wunlock(&pvh_global_lock); 3125 return (rv); 3126 } 3127 3128 /* 3129 * Clear the modify bits on the specified physical page. 3130 */ 3131 static void 3132 mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) 3133 { 3134 pte_t *pte; 3135 pv_entry_t pv; 3136 3137 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3138 ("mmu_booke_clear_modify: page %p is not managed", m)); 3139 VM_OBJECT_ASSERT_WLOCKED(m->object); 3140 KASSERT(!vm_page_xbusied(m), 3141 ("mmu_booke_clear_modify: page %p is exclusive busied", m)); 3142 3143 /* 3144 * If the page is not PG_AWRITEABLE, then no PTEs can be modified. 3145 * If the object containing the page is locked and the page is not 3146 * exclusive busied, then PG_AWRITEABLE cannot be concurrently set. 3147 */ 3148 if ((m->aflags & PGA_WRITEABLE) == 0) 3149 return; 3150 rw_wlock(&pvh_global_lock); 3151 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3152 PMAP_LOCK(pv->pv_pmap); 3153 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3154 PTE_ISVALID(pte)) { 3155 mtx_lock_spin(&tlbivax_mutex); 3156 tlb_miss_lock(); 3157 3158 if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) { 3159 tlb0_flush_entry(pv->pv_va); 3160 *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | 3161 PTE_REFERENCED); 3162 } 3163 3164 tlb_miss_unlock(); 3165 mtx_unlock_spin(&tlbivax_mutex); 3166 } 3167 PMAP_UNLOCK(pv->pv_pmap); 3168 } 3169 rw_wunlock(&pvh_global_lock); 3170 } 3171 3172 /* 3173 * Return a count of reference bits for a page, clearing those bits. 3174 * It is not necessary for every reference bit to be cleared, but it 3175 * is necessary that 0 only be returned when there are truly no 3176 * reference bits set. 3177 * 3178 * As an optimization, update the page's dirty field if a modified bit is 3179 * found while counting reference bits. This opportunistic update can be 3180 * performed at low cost and can eliminate the need for some future calls 3181 * to pmap_is_modified(). However, since this function stops after 3182 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 3183 * dirty pages. Those dirty pages will only be detected by a future call 3184 * to pmap_is_modified(). 3185 */ 3186 static int 3187 mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m) 3188 { 3189 pte_t *pte; 3190 pv_entry_t pv; 3191 int count; 3192 3193 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3194 ("mmu_booke_ts_referenced: page %p is not managed", m)); 3195 count = 0; 3196 rw_wlock(&pvh_global_lock); 3197 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3198 PMAP_LOCK(pv->pv_pmap); 3199 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3200 PTE_ISVALID(pte)) { 3201 if (PTE_ISMODIFIED(pte)) 3202 vm_page_dirty(m); 3203 if (PTE_ISREFERENCED(pte)) { 3204 mtx_lock_spin(&tlbivax_mutex); 3205 tlb_miss_lock(); 3206 3207 tlb0_flush_entry(pv->pv_va); 3208 *pte &= ~PTE_REFERENCED; 3209 3210 tlb_miss_unlock(); 3211 mtx_unlock_spin(&tlbivax_mutex); 3212 3213 if (++count >= PMAP_TS_REFERENCED_MAX) { 3214 PMAP_UNLOCK(pv->pv_pmap); 3215 break; 3216 } 3217 } 3218 } 3219 PMAP_UNLOCK(pv->pv_pmap); 3220 } 3221 rw_wunlock(&pvh_global_lock); 3222 return (count); 3223 } 3224 3225 /* 3226 * Clear the wired attribute from the mappings for the specified range of 3227 * addresses in the given pmap. Every valid mapping within that range must 3228 * have the wired attribute set. In contrast, invalid mappings cannot have 3229 * the wired attribute set, so they are ignored. 3230 * 3231 * The wired attribute of the page table entry is not a hardware feature, so 3232 * there is no need to invalidate any TLB entries. 3233 */ 3234 static void 3235 mmu_booke_unwire(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3236 { 3237 vm_offset_t va; 3238 pte_t *pte; 3239 3240 PMAP_LOCK(pmap); 3241 for (va = sva; va < eva; va += PAGE_SIZE) { 3242 if ((pte = pte_find(mmu, pmap, va)) != NULL && 3243 PTE_ISVALID(pte)) { 3244 if (!PTE_ISWIRED(pte)) 3245 panic("mmu_booke_unwire: pte %p isn't wired", 3246 pte); 3247 *pte &= ~PTE_WIRED; 3248 pmap->pm_stats.wired_count--; 3249 } 3250 } 3251 PMAP_UNLOCK(pmap); 3252 3253 } 3254 3255 /* 3256 * Return true if the pmap's pv is one of the first 16 pvs linked to from this 3257 * page. This count may be changed upwards or downwards in the future; it is 3258 * only necessary that true be returned for a small subset of pmaps for proper 3259 * page aging. 3260 */ 3261 static boolean_t 3262 mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 3263 { 3264 pv_entry_t pv; 3265 int loops; 3266 boolean_t rv; 3267 3268 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3269 ("mmu_booke_page_exists_quick: page %p is not managed", m)); 3270 loops = 0; 3271 rv = FALSE; 3272 rw_wlock(&pvh_global_lock); 3273 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3274 if (pv->pv_pmap == pmap) { 3275 rv = TRUE; 3276 break; 3277 } 3278 if (++loops >= 16) 3279 break; 3280 } 3281 rw_wunlock(&pvh_global_lock); 3282 return (rv); 3283 } 3284 3285 /* 3286 * Return the number of managed mappings to the given physical page that are 3287 * wired. 3288 */ 3289 static int 3290 mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m) 3291 { 3292 pv_entry_t pv; 3293 pte_t *pte; 3294 int count = 0; 3295 3296 if ((m->oflags & VPO_UNMANAGED) != 0) 3297 return (count); 3298 rw_wlock(&pvh_global_lock); 3299 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3300 PMAP_LOCK(pv->pv_pmap); 3301 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) 3302 if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) 3303 count++; 3304 PMAP_UNLOCK(pv->pv_pmap); 3305 } 3306 rw_wunlock(&pvh_global_lock); 3307 return (count); 3308 } 3309 3310 static int 3311 mmu_booke_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3312 { 3313 int i; 3314 vm_offset_t va; 3315 3316 /* 3317 * This currently does not work for entries that 3318 * overlap TLB1 entries. 3319 */ 3320 for (i = 0; i < TLB1_ENTRIES; i ++) { 3321 if (tlb1_iomapped(i, pa, size, &va) == 0) 3322 return (0); 3323 } 3324 3325 return (EFAULT); 3326 } 3327 3328 void 3329 mmu_booke_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) 3330 { 3331 vm_paddr_t ppa; 3332 vm_offset_t ofs; 3333 vm_size_t gran; 3334 3335 /* Minidumps are based on virtual memory addresses. */ 3336 if (do_minidump) { 3337 *va = (void *)(vm_offset_t)pa; 3338 return; 3339 } 3340 3341 /* Raw physical memory dumps don't have a virtual address. */ 3342 /* We always map a 256MB page at 256M. */ 3343 gran = 256 * 1024 * 1024; 3344 ppa = rounddown2(pa, gran); 3345 ofs = pa - ppa; 3346 *va = (void *)gran; 3347 tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO); 3348 3349 if (sz > (gran - ofs)) 3350 tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran, 3351 _TLB_ENTRY_IO); 3352 } 3353 3354 void 3355 mmu_booke_dumpsys_unmap(mmu_t mmu, vm_paddr_t pa, size_t sz, void *va) 3356 { 3357 vm_paddr_t ppa; 3358 vm_offset_t ofs; 3359 vm_size_t gran; 3360 tlb_entry_t e; 3361 int i; 3362 3363 /* Minidumps are based on virtual memory addresses. */ 3364 /* Nothing to do... */ 3365 if (do_minidump) 3366 return; 3367 3368 for (i = 0; i < TLB1_ENTRIES; i++) { 3369 tlb1_read_entry(&e, i); 3370 if (!(e.mas1 & MAS1_VALID)) 3371 break; 3372 } 3373 3374 /* Raw physical memory dumps don't have a virtual address. */ 3375 i--; 3376 e.mas1 = 0; 3377 e.mas2 = 0; 3378 e.mas3 = 0; 3379 tlb1_write_entry(&e, i); 3380 3381 gran = 256 * 1024 * 1024; 3382 ppa = rounddown2(pa, gran); 3383 ofs = pa - ppa; 3384 if (sz > (gran - ofs)) { 3385 i--; 3386 e.mas1 = 0; 3387 e.mas2 = 0; 3388 e.mas3 = 0; 3389 tlb1_write_entry(&e, i); 3390 } 3391 } 3392 3393 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; 3394 3395 void 3396 mmu_booke_scan_init(mmu_t mmu) 3397 { 3398 vm_offset_t va; 3399 pte_t *pte; 3400 int i; 3401 3402 if (!do_minidump) { 3403 /* Initialize phys. segments for dumpsys(). */ 3404 memset(&dump_map, 0, sizeof(dump_map)); 3405 mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, 3406 &availmem_regions_sz); 3407 for (i = 0; i < physmem_regions_sz; i++) { 3408 dump_map[i].pa_start = physmem_regions[i].mr_start; 3409 dump_map[i].pa_size = physmem_regions[i].mr_size; 3410 } 3411 return; 3412 } 3413 3414 /* Virtual segments for minidumps: */ 3415 memset(&dump_map, 0, sizeof(dump_map)); 3416 3417 /* 1st: kernel .data and .bss. */ 3418 dump_map[0].pa_start = trunc_page((uintptr_t)_etext); 3419 dump_map[0].pa_size = 3420 round_page((uintptr_t)_end) - dump_map[0].pa_start; 3421 3422 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ 3423 dump_map[1].pa_start = data_start; 3424 dump_map[1].pa_size = data_end - data_start; 3425 3426 /* 3rd: kernel VM. */ 3427 va = dump_map[1].pa_start + dump_map[1].pa_size; 3428 /* Find start of next chunk (from va). */ 3429 while (va < virtual_end) { 3430 /* Don't dump the buffer cache. */ 3431 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { 3432 va = kmi.buffer_eva; 3433 continue; 3434 } 3435 pte = pte_find(mmu, kernel_pmap, va); 3436 if (pte != NULL && PTE_ISVALID(pte)) 3437 break; 3438 va += PAGE_SIZE; 3439 } 3440 if (va < virtual_end) { 3441 dump_map[2].pa_start = va; 3442 va += PAGE_SIZE; 3443 /* Find last page in chunk. */ 3444 while (va < virtual_end) { 3445 /* Don't run into the buffer cache. */ 3446 if (va == kmi.buffer_sva) 3447 break; 3448 pte = pte_find(mmu, kernel_pmap, va); 3449 if (pte == NULL || !PTE_ISVALID(pte)) 3450 break; 3451 va += PAGE_SIZE; 3452 } 3453 dump_map[2].pa_size = va - dump_map[2].pa_start; 3454 } 3455 } 3456 3457 /* 3458 * Map a set of physical memory pages into the kernel virtual address space. 3459 * Return a pointer to where it is mapped. This routine is intended to be used 3460 * for mapping device memory, NOT real memory. 3461 */ 3462 static void * 3463 mmu_booke_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3464 { 3465 3466 return (mmu_booke_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT)); 3467 } 3468 3469 static void * 3470 mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) 3471 { 3472 tlb_entry_t e; 3473 void *res; 3474 uintptr_t va, tmpva; 3475 vm_size_t sz; 3476 int i; 3477 3478 /* 3479 * Check if this is premapped in TLB1. Note: this should probably also 3480 * check whether a sequence of TLB1 entries exist that match the 3481 * requirement, but now only checks the easy case. 3482 */ 3483 for (i = 0; i < TLB1_ENTRIES; i++) { 3484 tlb1_read_entry(&e, i); 3485 if (!(e.mas1 & MAS1_VALID)) 3486 continue; 3487 if (pa >= e.phys && 3488 (pa + size) <= (e.phys + e.size) && 3489 (ma == VM_MEMATTR_DEFAULT || 3490 tlb_calc_wimg(pa, ma) == 3491 (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED)))) 3492 return (void *)(e.virt + 3493 (vm_offset_t)(pa - e.phys)); 3494 } 3495 3496 size = roundup(size, PAGE_SIZE); 3497 3498 /* 3499 * The device mapping area is between VM_MAXUSER_ADDRESS and 3500 * VM_MIN_KERNEL_ADDRESS. This gives 1GB of device addressing. 3501 */ 3502 #ifdef SPARSE_MAPDEV 3503 /* 3504 * With a sparse mapdev, align to the largest starting region. This 3505 * could feasibly be optimized for a 'best-fit' alignment, but that 3506 * calculation could be very costly. 3507 * Align to the smaller of: 3508 * - first set bit in overlap of (pa & size mask) 3509 * - largest size envelope 3510 * 3511 * It's possible the device mapping may start at a PA that's not larger 3512 * than the size mask, so we need to offset in to maximize the TLB entry 3513 * range and minimize the number of used TLB entries. 3514 */ 3515 do { 3516 tmpva = tlb1_map_base; 3517 sz = ffsl(((1 << flsl(size-1)) - 1) & pa); 3518 sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1; 3519 va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa); 3520 #ifdef __powerpc64__ 3521 } while (!atomic_cmpset_long(&tlb1_map_base, tmpva, va + size)); 3522 #else 3523 } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size)); 3524 #endif 3525 #else 3526 #ifdef __powerpc64__ 3527 va = atomic_fetchadd_long(&tlb1_map_base, size); 3528 #else 3529 va = atomic_fetchadd_int(&tlb1_map_base, size); 3530 #endif 3531 #endif 3532 res = (void *)va; 3533 3534 do { 3535 sz = 1 << (ilog2(size) & ~1); 3536 /* Align size to PA */ 3537 if (pa % sz != 0) { 3538 do { 3539 sz >>= 2; 3540 } while (pa % sz != 0); 3541 } 3542 /* Now align from there to VA */ 3543 if (va % sz != 0) { 3544 do { 3545 sz >>= 2; 3546 } while (va % sz != 0); 3547 } 3548 if (bootverbose) 3549 printf("Wiring VA=%lx to PA=%jx (size=%lx)\n", 3550 va, (uintmax_t)pa, sz); 3551 if (tlb1_set_entry(va, pa, sz, 3552 _TLB_ENTRY_SHARED | tlb_calc_wimg(pa, ma)) < 0) 3553 return (NULL); 3554 size -= sz; 3555 pa += sz; 3556 va += sz; 3557 } while (size > 0); 3558 3559 return (res); 3560 } 3561 3562 /* 3563 * 'Unmap' a range mapped by mmu_booke_mapdev(). 3564 */ 3565 static void 3566 mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 3567 { 3568 #ifdef SUPPORTS_SHRINKING_TLB1 3569 vm_offset_t base, offset; 3570 3571 /* 3572 * Unmap only if this is inside kernel virtual space. 3573 */ 3574 if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { 3575 base = trunc_page(va); 3576 offset = va & PAGE_MASK; 3577 size = roundup(offset + size, PAGE_SIZE); 3578 kva_free(base, size); 3579 } 3580 #endif 3581 } 3582 3583 /* 3584 * mmu_booke_object_init_pt preloads the ptes for a given object into the 3585 * specified pmap. This eliminates the blast of soft faults on process startup 3586 * and immediately after an mmap. 3587 */ 3588 static void 3589 mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3590 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 3591 { 3592 3593 VM_OBJECT_ASSERT_WLOCKED(object); 3594 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3595 ("mmu_booke_object_init_pt: non-device object")); 3596 } 3597 3598 /* 3599 * Perform the pmap work for mincore. 3600 */ 3601 static int 3602 mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3603 vm_paddr_t *locked_pa) 3604 { 3605 3606 /* XXX: this should be implemented at some point */ 3607 return (0); 3608 } 3609 3610 static int 3611 mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, vm_size_t sz, 3612 vm_memattr_t mode) 3613 { 3614 vm_offset_t va; 3615 pte_t *pte; 3616 int i, j; 3617 tlb_entry_t e; 3618 3619 /* Check TLB1 mappings */ 3620 for (i = 0; i < TLB1_ENTRIES; i++) { 3621 tlb1_read_entry(&e, i); 3622 if (!(e.mas1 & MAS1_VALID)) 3623 continue; 3624 if (addr >= e.virt && addr < e.virt + e.size) 3625 break; 3626 } 3627 if (i < TLB1_ENTRIES) { 3628 /* Only allow full mappings to be modified for now. */ 3629 /* Validate the range. */ 3630 for (j = i, va = addr; va < addr + sz; va += e.size, j++) { 3631 tlb1_read_entry(&e, j); 3632 if (va != e.virt || (sz - (va - addr) < e.size)) 3633 return (EINVAL); 3634 } 3635 for (va = addr; va < addr + sz; va += e.size, i++) { 3636 tlb1_read_entry(&e, i); 3637 e.mas2 &= ~MAS2_WIMGE_MASK; 3638 e.mas2 |= tlb_calc_wimg(e.phys, mode); 3639 3640 /* 3641 * Write it out to the TLB. Should really re-sync with other 3642 * cores. 3643 */ 3644 tlb1_write_entry(&e, i); 3645 } 3646 return (0); 3647 } 3648 3649 /* Not in TLB1, try through pmap */ 3650 /* First validate the range. */ 3651 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3652 pte = pte_find(mmu, kernel_pmap, va); 3653 if (pte == NULL || !PTE_ISVALID(pte)) 3654 return (EINVAL); 3655 } 3656 3657 mtx_lock_spin(&tlbivax_mutex); 3658 tlb_miss_lock(); 3659 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3660 pte = pte_find(mmu, kernel_pmap, va); 3661 *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT); 3662 *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT; 3663 tlb0_flush_entry(va); 3664 } 3665 tlb_miss_unlock(); 3666 mtx_unlock_spin(&tlbivax_mutex); 3667 3668 return (0); 3669 } 3670 3671 /**************************************************************************/ 3672 /* TID handling */ 3673 /**************************************************************************/ 3674 3675 /* 3676 * Allocate a TID. If necessary, steal one from someone else. 3677 * The new TID is flushed from the TLB before returning. 3678 */ 3679 static tlbtid_t 3680 tid_alloc(pmap_t pmap) 3681 { 3682 tlbtid_t tid; 3683 int thiscpu; 3684 3685 KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); 3686 3687 CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap); 3688 3689 thiscpu = PCPU_GET(cpuid); 3690 3691 tid = PCPU_GET(tid_next); 3692 if (tid > TID_MAX) 3693 tid = TID_MIN; 3694 PCPU_SET(tid_next, tid + 1); 3695 3696 /* If we are stealing TID then clear the relevant pmap's field */ 3697 if (tidbusy[thiscpu][tid] != NULL) { 3698 3699 CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid); 3700 3701 tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE; 3702 3703 /* Flush all entries from TLB0 matching this TID. */ 3704 tid_flush(tid); 3705 } 3706 3707 tidbusy[thiscpu][tid] = pmap; 3708 pmap->pm_tid[thiscpu] = tid; 3709 __asm __volatile("msync; isync"); 3710 3711 CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid, 3712 PCPU_GET(tid_next)); 3713 3714 return (tid); 3715 } 3716 3717 /**************************************************************************/ 3718 /* TLB0 handling */ 3719 /**************************************************************************/ 3720 3721 static void 3722 #ifdef __powerpc64__ 3723 tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3, 3724 #else 3725 tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3, 3726 #endif 3727 uint32_t mas7) 3728 { 3729 int as; 3730 char desc[3]; 3731 tlbtid_t tid; 3732 vm_size_t size; 3733 unsigned int tsize; 3734 3735 desc[2] = '\0'; 3736 if (mas1 & MAS1_VALID) 3737 desc[0] = 'V'; 3738 else 3739 desc[0] = ' '; 3740 3741 if (mas1 & MAS1_IPROT) 3742 desc[1] = 'P'; 3743 else 3744 desc[1] = ' '; 3745 3746 as = (mas1 & MAS1_TS_MASK) ? 1 : 0; 3747 tid = MAS1_GETTID(mas1); 3748 3749 tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 3750 size = 0; 3751 if (tsize) 3752 size = tsize2size(tsize); 3753 3754 debugf("%3d: (%s) [AS=%d] " 3755 "sz = 0x%08x tsz = %d tid = %d mas1 = 0x%08x " 3756 "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n", 3757 i, desc, as, size, tsize, tid, mas1, mas2, mas3, mas7); 3758 } 3759 3760 /* Convert TLB0 va and way number to tlb0[] table index. */ 3761 static inline unsigned int 3762 tlb0_tableidx(vm_offset_t va, unsigned int way) 3763 { 3764 unsigned int idx; 3765 3766 idx = (way * TLB0_ENTRIES_PER_WAY); 3767 idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; 3768 return (idx); 3769 } 3770 3771 /* 3772 * Invalidate TLB0 entry. 3773 */ 3774 static inline void 3775 tlb0_flush_entry(vm_offset_t va) 3776 { 3777 3778 CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va); 3779 3780 mtx_assert(&tlbivax_mutex, MA_OWNED); 3781 3782 __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK)); 3783 __asm __volatile("isync; msync"); 3784 __asm __volatile("tlbsync; msync"); 3785 3786 CTR1(KTR_PMAP, "%s: e", __func__); 3787 } 3788 3789 /* Print out contents of the MAS registers for each TLB0 entry */ 3790 void 3791 tlb0_print_tlbentries(void) 3792 { 3793 uint32_t mas0, mas1, mas3, mas7; 3794 #ifdef __powerpc64__ 3795 uint64_t mas2; 3796 #else 3797 uint32_t mas2; 3798 #endif 3799 int entryidx, way, idx; 3800 3801 debugf("TLB0 entries:\n"); 3802 for (way = 0; way < TLB0_WAYS; way ++) 3803 for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { 3804 3805 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 3806 mtspr(SPR_MAS0, mas0); 3807 __asm __volatile("isync"); 3808 3809 mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; 3810 mtspr(SPR_MAS2, mas2); 3811 3812 __asm __volatile("isync; tlbre"); 3813 3814 mas1 = mfspr(SPR_MAS1); 3815 mas2 = mfspr(SPR_MAS2); 3816 mas3 = mfspr(SPR_MAS3); 3817 mas7 = mfspr(SPR_MAS7); 3818 3819 idx = tlb0_tableidx(mas2, way); 3820 tlb_print_entry(idx, mas1, mas2, mas3, mas7); 3821 } 3822 } 3823 3824 /**************************************************************************/ 3825 /* TLB1 handling */ 3826 /**************************************************************************/ 3827 3828 /* 3829 * TLB1 mapping notes: 3830 * 3831 * TLB1[0] Kernel text and data. 3832 * TLB1[1-15] Additional kernel text and data mappings (if required), PCI 3833 * windows, other devices mappings. 3834 */ 3835 3836 /* 3837 * Read an entry from given TLB1 slot. 3838 */ 3839 void 3840 tlb1_read_entry(tlb_entry_t *entry, unsigned int slot) 3841 { 3842 register_t msr; 3843 uint32_t mas0; 3844 3845 KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__)); 3846 3847 msr = mfmsr(); 3848 __asm __volatile("wrteei 0"); 3849 3850 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot); 3851 mtspr(SPR_MAS0, mas0); 3852 __asm __volatile("isync; tlbre"); 3853 3854 entry->mas1 = mfspr(SPR_MAS1); 3855 entry->mas2 = mfspr(SPR_MAS2); 3856 entry->mas3 = mfspr(SPR_MAS3); 3857 3858 switch ((mfpvr() >> 16) & 0xFFFF) { 3859 case FSL_E500v2: 3860 case FSL_E500mc: 3861 case FSL_E5500: 3862 case FSL_E6500: 3863 entry->mas7 = mfspr(SPR_MAS7); 3864 break; 3865 default: 3866 entry->mas7 = 0; 3867 break; 3868 } 3869 mtmsr(msr); 3870 3871 entry->virt = entry->mas2 & MAS2_EPN_MASK; 3872 entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) | 3873 (entry->mas3 & MAS3_RPN); 3874 entry->size = 3875 tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT); 3876 } 3877 3878 struct tlbwrite_args { 3879 tlb_entry_t *e; 3880 unsigned int idx; 3881 }; 3882 3883 static void 3884 tlb1_write_entry_int(void *arg) 3885 { 3886 struct tlbwrite_args *args = arg; 3887 uint32_t mas0; 3888 3889 /* Select entry */ 3890 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(args->idx); 3891 3892 mtspr(SPR_MAS0, mas0); 3893 __asm __volatile("isync"); 3894 mtspr(SPR_MAS1, args->e->mas1); 3895 __asm __volatile("isync"); 3896 mtspr(SPR_MAS2, args->e->mas2); 3897 __asm __volatile("isync"); 3898 mtspr(SPR_MAS3, args->e->mas3); 3899 __asm __volatile("isync"); 3900 switch ((mfpvr() >> 16) & 0xFFFF) { 3901 case FSL_E500mc: 3902 case FSL_E5500: 3903 case FSL_E6500: 3904 mtspr(SPR_MAS8, 0); 3905 __asm __volatile("isync"); 3906 /* FALLTHROUGH */ 3907 case FSL_E500v2: 3908 mtspr(SPR_MAS7, args->e->mas7); 3909 __asm __volatile("isync"); 3910 break; 3911 default: 3912 break; 3913 } 3914 3915 __asm __volatile("tlbwe; isync; msync"); 3916 3917 } 3918 3919 static void 3920 tlb1_write_entry_sync(void *arg) 3921 { 3922 /* Empty synchronization point for smp_rendezvous(). */ 3923 } 3924 3925 /* 3926 * Write given entry to TLB1 hardware. 3927 */ 3928 static void 3929 tlb1_write_entry(tlb_entry_t *e, unsigned int idx) 3930 { 3931 struct tlbwrite_args args; 3932 3933 args.e = e; 3934 args.idx = idx; 3935 3936 #ifdef SMP 3937 if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) { 3938 mb(); 3939 smp_rendezvous(tlb1_write_entry_sync, 3940 tlb1_write_entry_int, 3941 tlb1_write_entry_sync, &args); 3942 } else 3943 #endif 3944 { 3945 register_t msr; 3946 3947 msr = mfmsr(); 3948 __asm __volatile("wrteei 0"); 3949 tlb1_write_entry_int(&args); 3950 mtmsr(msr); 3951 } 3952 } 3953 3954 /* 3955 * Return the largest uint value log such that 2^log <= num. 3956 */ 3957 static unsigned int 3958 ilog2(unsigned int num) 3959 { 3960 int lz; 3961 3962 __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); 3963 return (31 - lz); 3964 } 3965 3966 /* 3967 * Convert TLB TSIZE value to mapped region size. 3968 */ 3969 static vm_size_t 3970 tsize2size(unsigned int tsize) 3971 { 3972 3973 /* 3974 * size = 4^tsize KB 3975 * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) 3976 */ 3977 3978 return ((1 << (2 * tsize)) * 1024); 3979 } 3980 3981 /* 3982 * Convert region size (must be power of 4) to TLB TSIZE value. 3983 */ 3984 static unsigned int 3985 size2tsize(vm_size_t size) 3986 { 3987 3988 return (ilog2(size) / 2 - 5); 3989 } 3990 3991 /* 3992 * Register permanent kernel mapping in TLB1. 3993 * 3994 * Entries are created starting from index 0 (current free entry is 3995 * kept in tlb1_idx) and are not supposed to be invalidated. 3996 */ 3997 int 3998 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, 3999 uint32_t flags) 4000 { 4001 tlb_entry_t e; 4002 uint32_t ts, tid; 4003 int tsize, index; 4004 4005 for (index = 0; index < TLB1_ENTRIES; index++) { 4006 tlb1_read_entry(&e, index); 4007 if ((e.mas1 & MAS1_VALID) == 0) 4008 break; 4009 /* Check if we're just updating the flags, and update them. */ 4010 if (e.phys == pa && e.virt == va && e.size == size) { 4011 e.mas2 = (va & MAS2_EPN_MASK) | flags; 4012 tlb1_write_entry(&e, index); 4013 return (0); 4014 } 4015 } 4016 if (index >= TLB1_ENTRIES) { 4017 printf("tlb1_set_entry: TLB1 full!\n"); 4018 return (-1); 4019 } 4020 4021 /* Convert size to TSIZE */ 4022 tsize = size2tsize(size); 4023 4024 tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK; 4025 /* XXX TS is hard coded to 0 for now as we only use single address space */ 4026 ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK; 4027 4028 e.phys = pa; 4029 e.virt = va; 4030 e.size = size; 4031 e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; 4032 e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); 4033 e.mas2 = (va & MAS2_EPN_MASK) | flags; 4034 4035 /* Set supervisor RWX permission bits */ 4036 e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; 4037 e.mas7 = (pa >> 32) & MAS7_RPN; 4038 4039 tlb1_write_entry(&e, index); 4040 4041 /* 4042 * XXX in general TLB1 updates should be propagated between CPUs, 4043 * since current design assumes to have the same TLB1 set-up on all 4044 * cores. 4045 */ 4046 return (0); 4047 } 4048 4049 /* 4050 * Map in contiguous RAM region into the TLB1 using maximum of 4051 * KERNEL_REGION_MAX_TLB_ENTRIES entries. 4052 * 4053 * If necessary round up last entry size and return total size 4054 * used by all allocated entries. 4055 */ 4056 vm_size_t 4057 tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size) 4058 { 4059 vm_size_t pgs[KERNEL_REGION_MAX_TLB_ENTRIES]; 4060 vm_size_t mapped, pgsz, base, mask; 4061 int idx, nents; 4062 4063 /* Round up to the next 1M */ 4064 size = roundup2(size, 1 << 20); 4065 4066 mapped = 0; 4067 idx = 0; 4068 base = va; 4069 pgsz = 64*1024*1024; 4070 while (mapped < size) { 4071 while (mapped < size && idx < KERNEL_REGION_MAX_TLB_ENTRIES) { 4072 while (pgsz > (size - mapped)) 4073 pgsz >>= 2; 4074 pgs[idx++] = pgsz; 4075 mapped += pgsz; 4076 } 4077 4078 /* We under-map. Correct for this. */ 4079 if (mapped < size) { 4080 while (pgs[idx - 1] == pgsz) { 4081 idx--; 4082 mapped -= pgsz; 4083 } 4084 /* XXX We may increase beyond out starting point. */ 4085 pgsz <<= 2; 4086 pgs[idx++] = pgsz; 4087 mapped += pgsz; 4088 } 4089 } 4090 4091 nents = idx; 4092 mask = pgs[0] - 1; 4093 /* Align address to the boundary */ 4094 if (va & mask) { 4095 va = (va + mask) & ~mask; 4096 pa = (pa + mask) & ~mask; 4097 } 4098 4099 for (idx = 0; idx < nents; idx++) { 4100 pgsz = pgs[idx]; 4101 debugf("%u: %llx -> %x, size=%x\n", idx, pa, va, pgsz); 4102 tlb1_set_entry(va, pa, pgsz, 4103 _TLB_ENTRY_SHARED | _TLB_ENTRY_MEM); 4104 pa += pgsz; 4105 va += pgsz; 4106 } 4107 4108 mapped = (va - base); 4109 printf("mapped size 0x%"PRI0ptrX" (wasted space 0x%"PRIxPTR")\n", 4110 mapped, mapped - size); 4111 return (mapped); 4112 } 4113 4114 /* 4115 * TLB1 initialization routine, to be called after the very first 4116 * assembler level setup done in locore.S. 4117 */ 4118 void 4119 tlb1_init() 4120 { 4121 uint32_t mas0, mas1, mas2, mas3, mas7; 4122 uint32_t tsz; 4123 4124 tlb1_get_tlbconf(); 4125 4126 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0); 4127 mtspr(SPR_MAS0, mas0); 4128 __asm __volatile("isync; tlbre"); 4129 4130 mas1 = mfspr(SPR_MAS1); 4131 mas2 = mfspr(SPR_MAS2); 4132 mas3 = mfspr(SPR_MAS3); 4133 mas7 = mfspr(SPR_MAS7); 4134 4135 kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | 4136 (mas3 & MAS3_RPN); 4137 4138 tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4139 kernsize += (tsz > 0) ? tsize2size(tsz) : 0; 4140 4141 /* Setup TLB miss defaults */ 4142 set_mas4_defaults(); 4143 } 4144 4145 /* 4146 * pmap_early_io_unmap() should be used in short conjunction with 4147 * pmap_early_io_map(), as in the following snippet: 4148 * 4149 * x = pmap_early_io_map(...); 4150 * <do something with x> 4151 * pmap_early_io_unmap(x, size); 4152 * 4153 * And avoiding more allocations between. 4154 */ 4155 void 4156 pmap_early_io_unmap(vm_offset_t va, vm_size_t size) 4157 { 4158 int i; 4159 tlb_entry_t e; 4160 vm_size_t isize; 4161 4162 size = roundup(size, PAGE_SIZE); 4163 isize = size; 4164 for (i = 0; i < TLB1_ENTRIES && size > 0; i++) { 4165 tlb1_read_entry(&e, i); 4166 if (!(e.mas1 & MAS1_VALID)) 4167 continue; 4168 if (va <= e.virt && (va + isize) >= (e.virt + e.size)) { 4169 size -= e.size; 4170 e.mas1 &= ~MAS1_VALID; 4171 tlb1_write_entry(&e, i); 4172 } 4173 } 4174 if (tlb1_map_base == va + isize) 4175 tlb1_map_base -= isize; 4176 } 4177 4178 vm_offset_t 4179 pmap_early_io_map(vm_paddr_t pa, vm_size_t size) 4180 { 4181 vm_paddr_t pa_base; 4182 vm_offset_t va, sz; 4183 int i; 4184 tlb_entry_t e; 4185 4186 KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!")); 4187 4188 for (i = 0; i < TLB1_ENTRIES; i++) { 4189 tlb1_read_entry(&e, i); 4190 if (!(e.mas1 & MAS1_VALID)) 4191 continue; 4192 if (pa >= e.phys && (pa + size) <= 4193 (e.phys + e.size)) 4194 return (e.virt + (pa - e.phys)); 4195 } 4196 4197 pa_base = rounddown(pa, PAGE_SIZE); 4198 size = roundup(size + (pa - pa_base), PAGE_SIZE); 4199 tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1)); 4200 va = tlb1_map_base + (pa - pa_base); 4201 4202 do { 4203 sz = 1 << (ilog2(size) & ~1); 4204 tlb1_set_entry(tlb1_map_base, pa_base, sz, 4205 _TLB_ENTRY_SHARED | _TLB_ENTRY_IO); 4206 size -= sz; 4207 pa_base += sz; 4208 tlb1_map_base += sz; 4209 } while (size > 0); 4210 4211 return (va); 4212 } 4213 4214 void 4215 pmap_track_page(pmap_t pmap, vm_offset_t va) 4216 { 4217 vm_paddr_t pa; 4218 vm_page_t page; 4219 struct pv_entry *pve; 4220 4221 va = trunc_page(va); 4222 pa = pmap_kextract(va); 4223 4224 rw_wlock(&pvh_global_lock); 4225 PMAP_LOCK(pmap); 4226 page = PHYS_TO_VM_PAGE(pa); 4227 4228 TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) { 4229 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 4230 goto out; 4231 } 4232 } 4233 page->md.pv_tracked = true; 4234 pv_insert(pmap, va, page); 4235 out: 4236 PMAP_UNLOCK(pmap); 4237 rw_wunlock(&pvh_global_lock); 4238 } 4239 4240 4241 /* 4242 * Setup MAS4 defaults. 4243 * These values are loaded to MAS0-2 on a TLB miss. 4244 */ 4245 static void 4246 set_mas4_defaults(void) 4247 { 4248 uint32_t mas4; 4249 4250 /* Defaults: TLB0, PID0, TSIZED=4K */ 4251 mas4 = MAS4_TLBSELD0; 4252 mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; 4253 #ifdef SMP 4254 mas4 |= MAS4_MD; 4255 #endif 4256 mtspr(SPR_MAS4, mas4); 4257 __asm __volatile("isync"); 4258 } 4259 4260 /* 4261 * Print out contents of the MAS registers for each TLB1 entry 4262 */ 4263 void 4264 tlb1_print_tlbentries(void) 4265 { 4266 uint32_t mas0, mas1, mas3, mas7; 4267 #ifdef __powerpc64__ 4268 uint64_t mas2; 4269 #else 4270 uint32_t mas2; 4271 #endif 4272 int i; 4273 4274 debugf("TLB1 entries:\n"); 4275 for (i = 0; i < TLB1_ENTRIES; i++) { 4276 4277 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); 4278 mtspr(SPR_MAS0, mas0); 4279 4280 __asm __volatile("isync; tlbre"); 4281 4282 mas1 = mfspr(SPR_MAS1); 4283 mas2 = mfspr(SPR_MAS2); 4284 mas3 = mfspr(SPR_MAS3); 4285 mas7 = mfspr(SPR_MAS7); 4286 4287 tlb_print_entry(i, mas1, mas2, mas3, mas7); 4288 } 4289 } 4290 4291 /* 4292 * Return 0 if the physical IO range is encompassed by one of the 4293 * the TLB1 entries, otherwise return related error code. 4294 */ 4295 static int 4296 tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) 4297 { 4298 uint32_t prot; 4299 vm_paddr_t pa_start; 4300 vm_paddr_t pa_end; 4301 unsigned int entry_tsize; 4302 vm_size_t entry_size; 4303 tlb_entry_t e; 4304 4305 *va = (vm_offset_t)NULL; 4306 4307 tlb1_read_entry(&e, i); 4308 /* Skip invalid entries */ 4309 if (!(e.mas1 & MAS1_VALID)) 4310 return (EINVAL); 4311 4312 /* 4313 * The entry must be cache-inhibited, guarded, and r/w 4314 * so it can function as an i/o page 4315 */ 4316 prot = e.mas2 & (MAS2_I | MAS2_G); 4317 if (prot != (MAS2_I | MAS2_G)) 4318 return (EPERM); 4319 4320 prot = e.mas3 & (MAS3_SR | MAS3_SW); 4321 if (prot != (MAS3_SR | MAS3_SW)) 4322 return (EPERM); 4323 4324 /* The address should be within the entry range. */ 4325 entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4326 KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); 4327 4328 entry_size = tsize2size(entry_tsize); 4329 pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | 4330 (e.mas3 & MAS3_RPN); 4331 pa_end = pa_start + entry_size; 4332 4333 if ((pa < pa_start) || ((pa + size) > pa_end)) 4334 return (ERANGE); 4335 4336 /* Return virtual address of this mapping. */ 4337 *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start); 4338 return (0); 4339 } 4340 4341 /* 4342 * Invalidate all TLB0 entries which match the given TID. Note this is 4343 * dedicated for cases when invalidations should NOT be propagated to other 4344 * CPUs. 4345 */ 4346 static void 4347 tid_flush(tlbtid_t tid) 4348 { 4349 register_t msr; 4350 uint32_t mas0, mas1, mas2; 4351 int entry, way; 4352 4353 4354 /* Don't evict kernel translations */ 4355 if (tid == TID_KERNEL) 4356 return; 4357 4358 msr = mfmsr(); 4359 __asm __volatile("wrteei 0"); 4360 4361 for (way = 0; way < TLB0_WAYS; way++) 4362 for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { 4363 4364 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 4365 mtspr(SPR_MAS0, mas0); 4366 __asm __volatile("isync"); 4367 4368 mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; 4369 mtspr(SPR_MAS2, mas2); 4370 4371 __asm __volatile("isync; tlbre"); 4372 4373 mas1 = mfspr(SPR_MAS1); 4374 4375 if (!(mas1 & MAS1_VALID)) 4376 continue; 4377 if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid) 4378 continue; 4379 mas1 &= ~MAS1_VALID; 4380 mtspr(SPR_MAS1, mas1); 4381 __asm __volatile("isync; tlbwe; isync; msync"); 4382 } 4383 mtmsr(msr); 4384 } 4385