1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> 5 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 20 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Some hw specific parts of this pmap were derived or influenced 29 * by NetBSD's ibm4xx pmap module. More generic code is shared with 30 * a few other pmap modules from the FreeBSD tree. 31 */ 32 33 /* 34 * VM layout notes: 35 * 36 * Kernel and user threads run within one common virtual address space 37 * defined by AS=0. 38 * 39 * 32-bit pmap: 40 * Virtual address space layout: 41 * ----------------------------- 42 * 0x0000_0000 - 0x7fff_ffff : user process 43 * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) 44 * 0xc000_0000 - 0xc0ff_ffff : kernel reserved 45 * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. 46 * 0xc100_0000 - 0xffff_ffff : KVA 47 * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy 48 * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs 49 * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 50 * 0xc200_9000 - 0xfeef_ffff : actual free KVA space 51 * 52 * 64-bit pmap: 53 * Virtual address space layout: 54 * ----------------------------- 55 * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process 56 * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries 57 * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region 58 * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack 59 * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved 60 * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data 61 * endkernel - msgbufp-1 : flat device tree 62 * msgbufp - kernel_pdir-1 : message buffer 63 * kernel_pdir - kernel_pp2d-1 : kernel page directory 64 * kernel_pp2d - . : kernel pointers to page directory 65 * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy 66 * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs 67 * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables 68 * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space 69 * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region 70 * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region 71 * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map 72 * 0xf000_0000_0000_0000 - +Maxmem : physmem map 73 * - 0xffff_ffff_ffff_ffff : device direct map 74 */ 75 76 #include <sys/cdefs.h> 77 __FBSDID("$FreeBSD$"); 78 79 #include "opt_ddb.h" 80 #include "opt_kstack_pages.h" 81 82 #include <sys/param.h> 83 #include <sys/conf.h> 84 #include <sys/malloc.h> 85 #include <sys/ktr.h> 86 #include <sys/proc.h> 87 #include <sys/user.h> 88 #include <sys/queue.h> 89 #include <sys/systm.h> 90 #include <sys/kernel.h> 91 #include <sys/kerneldump.h> 92 #include <sys/linker.h> 93 #include <sys/msgbuf.h> 94 #include <sys/lock.h> 95 #include <sys/mutex.h> 96 #include <sys/rwlock.h> 97 #include <sys/sched.h> 98 #include <sys/smp.h> 99 #include <sys/vmmeter.h> 100 101 #include <vm/vm.h> 102 #include <vm/vm_page.h> 103 #include <vm/vm_kern.h> 104 #include <vm/vm_pageout.h> 105 #include <vm/vm_extern.h> 106 #include <vm/vm_object.h> 107 #include <vm/vm_param.h> 108 #include <vm/vm_map.h> 109 #include <vm/vm_pager.h> 110 #include <vm/vm_phys.h> 111 #include <vm/vm_pagequeue.h> 112 #include <vm/uma.h> 113 114 #include <machine/_inttypes.h> 115 #include <machine/cpu.h> 116 #include <machine/pcb.h> 117 #include <machine/platform.h> 118 119 #include <machine/tlb.h> 120 #include <machine/spr.h> 121 #include <machine/md_var.h> 122 #include <machine/mmuvar.h> 123 #include <machine/pmap.h> 124 #include <machine/pte.h> 125 126 #include <ddb/ddb.h> 127 128 #include "mmu_if.h" 129 130 #define SPARSE_MAPDEV 131 #ifdef DEBUG 132 #define debugf(fmt, args...) printf(fmt, ##args) 133 #else 134 #define debugf(fmt, args...) 135 #endif 136 137 #ifdef __powerpc64__ 138 #define PRI0ptrX "016lx" 139 #else 140 #define PRI0ptrX "08x" 141 #endif 142 143 #define TODO panic("%s: not implemented", __func__); 144 145 extern unsigned char _etext[]; 146 extern unsigned char _end[]; 147 148 extern uint32_t *bootinfo; 149 150 vm_paddr_t kernload; 151 vm_offset_t kernstart; 152 vm_size_t kernsize; 153 154 /* Message buffer and tables. */ 155 static vm_offset_t data_start; 156 static vm_size_t data_end; 157 158 /* Phys/avail memory regions. */ 159 static struct mem_region *availmem_regions; 160 static int availmem_regions_sz; 161 static struct mem_region *physmem_regions; 162 static int physmem_regions_sz; 163 164 /* Reserved KVA space and mutex for mmu_booke_zero_page. */ 165 static vm_offset_t zero_page_va; 166 static struct mtx zero_page_mutex; 167 168 static struct mtx tlbivax_mutex; 169 170 /* Reserved KVA space and mutex for mmu_booke_copy_page. */ 171 static vm_offset_t copy_page_src_va; 172 static vm_offset_t copy_page_dst_va; 173 static struct mtx copy_page_mutex; 174 175 /**************************************************************************/ 176 /* PMAP */ 177 /**************************************************************************/ 178 179 static int mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t, 180 vm_prot_t, u_int flags, int8_t psind); 181 182 unsigned int kptbl_min; /* Index of the first kernel ptbl. */ 183 unsigned int kernel_ptbls; /* Number of KVA ptbls. */ 184 #ifdef __powerpc64__ 185 unsigned int kernel_pdirs; 186 #endif 187 static uma_zone_t ptbl_root_zone; 188 189 /* 190 * If user pmap is processed with mmu_booke_remove and the resident count 191 * drops to 0, there are no more pages to remove, so we need not continue. 192 */ 193 #define PMAP_REMOVE_DONE(pmap) \ 194 ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) 195 196 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 197 extern int elf32_nxstack; 198 #endif 199 200 /**************************************************************************/ 201 /* TLB and TID handling */ 202 /**************************************************************************/ 203 204 /* Translation ID busy table */ 205 static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1]; 206 207 /* 208 * TLB0 capabilities (entry, way numbers etc.). These can vary between e500 209 * core revisions and should be read from h/w registers during early config. 210 */ 211 uint32_t tlb0_entries; 212 uint32_t tlb0_ways; 213 uint32_t tlb0_entries_per_way; 214 uint32_t tlb1_entries; 215 216 #define TLB0_ENTRIES (tlb0_entries) 217 #define TLB0_WAYS (tlb0_ways) 218 #define TLB0_ENTRIES_PER_WAY (tlb0_entries_per_way) 219 220 #define TLB1_ENTRIES (tlb1_entries) 221 222 static vm_offset_t tlb1_map_base = VM_MAXUSER_ADDRESS + PAGE_SIZE; 223 224 static tlbtid_t tid_alloc(struct pmap *); 225 static void tid_flush(tlbtid_t tid); 226 227 #ifdef DDB 228 #ifdef __powerpc64__ 229 static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t); 230 #else 231 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); 232 #endif 233 #endif 234 235 static void tlb1_read_entry(tlb_entry_t *, unsigned int); 236 static void tlb1_write_entry(tlb_entry_t *, unsigned int); 237 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); 238 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t); 239 240 static vm_size_t tsize2size(unsigned int); 241 static unsigned int size2tsize(vm_size_t); 242 static unsigned int ilog2(unsigned long); 243 244 static void set_mas4_defaults(void); 245 246 static inline void tlb0_flush_entry(vm_offset_t); 247 static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); 248 249 /**************************************************************************/ 250 /* Page table management */ 251 /**************************************************************************/ 252 253 static struct rwlock_padalign pvh_global_lock; 254 255 /* Data for the pv entry allocation mechanism */ 256 static uma_zone_t pvzone; 257 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 258 259 #define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ 260 261 #ifndef PMAP_SHPGPERPROC 262 #define PMAP_SHPGPERPROC 200 263 #endif 264 265 #ifdef __powerpc64__ 266 #define PMAP_ROOT_SIZE (sizeof(pte_t***) * PP2D_NENTRIES) 267 static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **, 268 unsigned int, boolean_t); 269 static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int, vm_page_t); 270 static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int); 271 static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t); 272 #else 273 #define PMAP_ROOT_SIZE (sizeof(pte_t**) * PDIR_NENTRIES) 274 static void ptbl_init(void); 275 static struct ptbl_buf *ptbl_buf_alloc(void); 276 static void ptbl_buf_free(struct ptbl_buf *); 277 static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); 278 279 static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t); 280 static void ptbl_free(mmu_t, pmap_t, unsigned int); 281 static void ptbl_hold(mmu_t, pmap_t, unsigned int); 282 static int ptbl_unhold(mmu_t, pmap_t, unsigned int); 283 #endif 284 285 static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); 286 static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t); 287 static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t); 288 static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t); 289 static void kernel_pte_alloc(vm_offset_t, vm_offset_t, vm_offset_t); 290 291 static pv_entry_t pv_alloc(void); 292 static void pv_free(pv_entry_t); 293 static void pv_insert(pmap_t, vm_offset_t, vm_page_t); 294 static void pv_remove(pmap_t, vm_offset_t, vm_page_t); 295 296 static void booke_pmap_init_qpages(void); 297 298 struct ptbl_buf { 299 TAILQ_ENTRY(ptbl_buf) link; /* list link */ 300 vm_offset_t kva; /* va of mapping */ 301 }; 302 303 #ifndef __powerpc64__ 304 /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ 305 #define PTBL_BUFS (128 * 16) 306 307 /* ptbl free list and a lock used for access synchronization. */ 308 static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; 309 static struct mtx ptbl_buf_freelist_lock; 310 311 /* Base address of kva space allocated fot ptbl bufs. */ 312 static vm_offset_t ptbl_buf_pool_vabase; 313 314 /* Pointer to ptbl_buf structures. */ 315 static struct ptbl_buf *ptbl_bufs; 316 #endif 317 318 #ifdef SMP 319 extern tlb_entry_t __boot_tlb1[]; 320 void pmap_bootstrap_ap(volatile uint32_t *); 321 #endif 322 323 /* 324 * Kernel MMU interface 325 */ 326 static void mmu_booke_clear_modify(mmu_t, vm_page_t); 327 static void mmu_booke_copy(mmu_t, pmap_t, pmap_t, vm_offset_t, 328 vm_size_t, vm_offset_t); 329 static void mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t); 330 static void mmu_booke_copy_pages(mmu_t, vm_page_t *, 331 vm_offset_t, vm_page_t *, vm_offset_t, int); 332 static int mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, 333 vm_prot_t, u_int flags, int8_t psind); 334 static void mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 335 vm_page_t, vm_prot_t); 336 static void mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, 337 vm_prot_t); 338 static vm_paddr_t mmu_booke_extract(mmu_t, pmap_t, vm_offset_t); 339 static vm_page_t mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t, 340 vm_prot_t); 341 static void mmu_booke_init(mmu_t); 342 static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t); 343 static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 344 static boolean_t mmu_booke_is_referenced(mmu_t, vm_page_t); 345 static int mmu_booke_ts_referenced(mmu_t, vm_page_t); 346 static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, 347 int); 348 static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t, 349 vm_paddr_t *); 350 static void mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t, 351 vm_object_t, vm_pindex_t, vm_size_t); 352 static boolean_t mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t); 353 static void mmu_booke_page_init(mmu_t, vm_page_t); 354 static int mmu_booke_page_wired_mappings(mmu_t, vm_page_t); 355 static void mmu_booke_pinit(mmu_t, pmap_t); 356 static void mmu_booke_pinit0(mmu_t, pmap_t); 357 static void mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 358 vm_prot_t); 359 static void mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 360 static void mmu_booke_qremove(mmu_t, vm_offset_t, int); 361 static void mmu_booke_release(mmu_t, pmap_t); 362 static void mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 363 static void mmu_booke_remove_all(mmu_t, vm_page_t); 364 static void mmu_booke_remove_write(mmu_t, vm_page_t); 365 static void mmu_booke_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 366 static void mmu_booke_zero_page(mmu_t, vm_page_t); 367 static void mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int); 368 static void mmu_booke_activate(mmu_t, struct thread *); 369 static void mmu_booke_deactivate(mmu_t, struct thread *); 370 static void mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t); 371 static void *mmu_booke_mapdev(mmu_t, vm_paddr_t, vm_size_t); 372 static void *mmu_booke_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t); 373 static void mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t); 374 static vm_paddr_t mmu_booke_kextract(mmu_t, vm_offset_t); 375 static void mmu_booke_kenter(mmu_t, vm_offset_t, vm_paddr_t); 376 static void mmu_booke_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t); 377 static void mmu_booke_kremove(mmu_t, vm_offset_t); 378 static boolean_t mmu_booke_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); 379 static void mmu_booke_sync_icache(mmu_t, pmap_t, vm_offset_t, 380 vm_size_t); 381 static void mmu_booke_dumpsys_map(mmu_t, vm_paddr_t pa, size_t, 382 void **); 383 static void mmu_booke_dumpsys_unmap(mmu_t, vm_paddr_t pa, size_t, 384 void *); 385 static void mmu_booke_scan_init(mmu_t); 386 static vm_offset_t mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m); 387 static void mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr); 388 static int mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, 389 vm_size_t sz, vm_memattr_t mode); 390 static int mmu_booke_map_user_ptr(mmu_t mmu, pmap_t pm, 391 volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen); 392 static int mmu_booke_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, 393 int *is_user, vm_offset_t *decoded_addr); 394 395 396 static mmu_method_t mmu_booke_methods[] = { 397 /* pmap dispatcher interface */ 398 MMUMETHOD(mmu_clear_modify, mmu_booke_clear_modify), 399 MMUMETHOD(mmu_copy, mmu_booke_copy), 400 MMUMETHOD(mmu_copy_page, mmu_booke_copy_page), 401 MMUMETHOD(mmu_copy_pages, mmu_booke_copy_pages), 402 MMUMETHOD(mmu_enter, mmu_booke_enter), 403 MMUMETHOD(mmu_enter_object, mmu_booke_enter_object), 404 MMUMETHOD(mmu_enter_quick, mmu_booke_enter_quick), 405 MMUMETHOD(mmu_extract, mmu_booke_extract), 406 MMUMETHOD(mmu_extract_and_hold, mmu_booke_extract_and_hold), 407 MMUMETHOD(mmu_init, mmu_booke_init), 408 MMUMETHOD(mmu_is_modified, mmu_booke_is_modified), 409 MMUMETHOD(mmu_is_prefaultable, mmu_booke_is_prefaultable), 410 MMUMETHOD(mmu_is_referenced, mmu_booke_is_referenced), 411 MMUMETHOD(mmu_ts_referenced, mmu_booke_ts_referenced), 412 MMUMETHOD(mmu_map, mmu_booke_map), 413 MMUMETHOD(mmu_mincore, mmu_booke_mincore), 414 MMUMETHOD(mmu_object_init_pt, mmu_booke_object_init_pt), 415 MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick), 416 MMUMETHOD(mmu_page_init, mmu_booke_page_init), 417 MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings), 418 MMUMETHOD(mmu_pinit, mmu_booke_pinit), 419 MMUMETHOD(mmu_pinit0, mmu_booke_pinit0), 420 MMUMETHOD(mmu_protect, mmu_booke_protect), 421 MMUMETHOD(mmu_qenter, mmu_booke_qenter), 422 MMUMETHOD(mmu_qremove, mmu_booke_qremove), 423 MMUMETHOD(mmu_release, mmu_booke_release), 424 MMUMETHOD(mmu_remove, mmu_booke_remove), 425 MMUMETHOD(mmu_remove_all, mmu_booke_remove_all), 426 MMUMETHOD(mmu_remove_write, mmu_booke_remove_write), 427 MMUMETHOD(mmu_sync_icache, mmu_booke_sync_icache), 428 MMUMETHOD(mmu_unwire, mmu_booke_unwire), 429 MMUMETHOD(mmu_zero_page, mmu_booke_zero_page), 430 MMUMETHOD(mmu_zero_page_area, mmu_booke_zero_page_area), 431 MMUMETHOD(mmu_activate, mmu_booke_activate), 432 MMUMETHOD(mmu_deactivate, mmu_booke_deactivate), 433 MMUMETHOD(mmu_quick_enter_page, mmu_booke_quick_enter_page), 434 MMUMETHOD(mmu_quick_remove_page, mmu_booke_quick_remove_page), 435 436 /* Internal interfaces */ 437 MMUMETHOD(mmu_bootstrap, mmu_booke_bootstrap), 438 MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped), 439 MMUMETHOD(mmu_mapdev, mmu_booke_mapdev), 440 MMUMETHOD(mmu_mapdev_attr, mmu_booke_mapdev_attr), 441 MMUMETHOD(mmu_kenter, mmu_booke_kenter), 442 MMUMETHOD(mmu_kenter_attr, mmu_booke_kenter_attr), 443 MMUMETHOD(mmu_kextract, mmu_booke_kextract), 444 MMUMETHOD(mmu_kremove, mmu_booke_kremove), 445 MMUMETHOD(mmu_unmapdev, mmu_booke_unmapdev), 446 MMUMETHOD(mmu_change_attr, mmu_booke_change_attr), 447 MMUMETHOD(mmu_map_user_ptr, mmu_booke_map_user_ptr), 448 MMUMETHOD(mmu_decode_kernel_ptr, mmu_booke_decode_kernel_ptr), 449 450 /* dumpsys() support */ 451 MMUMETHOD(mmu_dumpsys_map, mmu_booke_dumpsys_map), 452 MMUMETHOD(mmu_dumpsys_unmap, mmu_booke_dumpsys_unmap), 453 MMUMETHOD(mmu_scan_init, mmu_booke_scan_init), 454 455 { 0, 0 } 456 }; 457 458 MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods, 0); 459 460 static __inline uint32_t 461 tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) 462 { 463 uint32_t attrib; 464 int i; 465 466 if (ma != VM_MEMATTR_DEFAULT) { 467 switch (ma) { 468 case VM_MEMATTR_UNCACHEABLE: 469 return (MAS2_I | MAS2_G); 470 case VM_MEMATTR_WRITE_COMBINING: 471 case VM_MEMATTR_WRITE_BACK: 472 case VM_MEMATTR_PREFETCHABLE: 473 return (MAS2_I); 474 case VM_MEMATTR_WRITE_THROUGH: 475 return (MAS2_W | MAS2_M); 476 case VM_MEMATTR_CACHEABLE: 477 return (MAS2_M); 478 } 479 } 480 481 /* 482 * Assume the page is cache inhibited and access is guarded unless 483 * it's in our available memory array. 484 */ 485 attrib = _TLB_ENTRY_IO; 486 for (i = 0; i < physmem_regions_sz; i++) { 487 if ((pa >= physmem_regions[i].mr_start) && 488 (pa < (physmem_regions[i].mr_start + 489 physmem_regions[i].mr_size))) { 490 attrib = _TLB_ENTRY_MEM; 491 break; 492 } 493 } 494 495 return (attrib); 496 } 497 498 static inline void 499 tlb_miss_lock(void) 500 { 501 #ifdef SMP 502 struct pcpu *pc; 503 504 if (!smp_started) 505 return; 506 507 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 508 if (pc != pcpup) { 509 510 CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, " 511 "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke.tlb_lock); 512 513 KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)), 514 ("tlb_miss_lock: tried to lock self")); 515 516 tlb_lock(pc->pc_booke.tlb_lock); 517 518 CTR1(KTR_PMAP, "%s: locked", __func__); 519 } 520 } 521 #endif 522 } 523 524 static inline void 525 tlb_miss_unlock(void) 526 { 527 #ifdef SMP 528 struct pcpu *pc; 529 530 if (!smp_started) 531 return; 532 533 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 534 if (pc != pcpup) { 535 CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d", 536 __func__, pc->pc_cpuid); 537 538 tlb_unlock(pc->pc_booke.tlb_lock); 539 540 CTR1(KTR_PMAP, "%s: unlocked", __func__); 541 } 542 } 543 #endif 544 } 545 546 /* Return number of entries in TLB0. */ 547 static __inline void 548 tlb0_get_tlbconf(void) 549 { 550 uint32_t tlb0_cfg; 551 552 tlb0_cfg = mfspr(SPR_TLB0CFG); 553 tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK; 554 tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; 555 tlb0_entries_per_way = tlb0_entries / tlb0_ways; 556 } 557 558 /* Return number of entries in TLB1. */ 559 static __inline void 560 tlb1_get_tlbconf(void) 561 { 562 uint32_t tlb1_cfg; 563 564 tlb1_cfg = mfspr(SPR_TLB1CFG); 565 tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; 566 } 567 568 /**************************************************************************/ 569 /* Page table related */ 570 /**************************************************************************/ 571 572 #ifdef __powerpc64__ 573 /* Initialize pool of kva ptbl buffers. */ 574 static void 575 ptbl_init(void) 576 { 577 } 578 579 /* Get a pointer to a PTE in a page table. */ 580 static __inline pte_t * 581 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 582 { 583 pte_t **pdir; 584 pte_t *ptbl; 585 586 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 587 588 pdir = pmap->pm_pp2d[PP2D_IDX(va)]; 589 if (!pdir) 590 return NULL; 591 ptbl = pdir[PDIR_IDX(va)]; 592 return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL); 593 } 594 595 /* 596 * allocate a page of pointers to page directories, do not preallocate the 597 * page tables 598 */ 599 static pte_t ** 600 pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep) 601 { 602 vm_page_t m; 603 pte_t **pdir; 604 int req; 605 606 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 607 while ((m = vm_page_alloc(NULL, pp2d_idx, req)) == NULL) { 608 PMAP_UNLOCK(pmap); 609 if (nosleep) { 610 return (NULL); 611 } 612 vm_wait(NULL); 613 PMAP_LOCK(pmap); 614 } 615 616 /* Zero whole ptbl. */ 617 pdir = (pte_t **)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 618 mmu_booke_zero_page(mmu, m); 619 620 return (pdir); 621 } 622 623 /* Free pdir pages and invalidate pdir entry. */ 624 static void 625 pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, vm_page_t m) 626 { 627 pte_t **pdir; 628 629 pdir = pmap->pm_pp2d[pp2d_idx]; 630 631 KASSERT((pdir != NULL), ("pdir_free: null pdir")); 632 633 pmap->pm_pp2d[pp2d_idx] = NULL; 634 635 vm_wire_sub(1); 636 vm_page_free_zero(m); 637 } 638 639 /* 640 * Decrement pdir pages hold count and attempt to free pdir pages. Called 641 * when removing directory entry from pdir. 642 * 643 * Return 1 if pdir pages were freed. 644 */ 645 static int 646 pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx) 647 { 648 pte_t **pdir; 649 vm_paddr_t pa; 650 vm_page_t m; 651 652 KASSERT((pmap != kernel_pmap), 653 ("pdir_unhold: unholding kernel pdir!")); 654 655 pdir = pmap->pm_pp2d[pp2d_idx]; 656 657 /* decrement hold count */ 658 pa = DMAP_TO_PHYS((vm_offset_t) pdir); 659 m = PHYS_TO_VM_PAGE(pa); 660 661 /* 662 * Free pdir page if there are no dir entries in this pdir. 663 */ 664 m->wire_count--; 665 if (m->wire_count == 0) { 666 pdir_free(mmu, pmap, pp2d_idx, m); 667 return (1); 668 } 669 return (0); 670 } 671 672 /* 673 * Increment hold count for pdir pages. This routine is used when new ptlb 674 * entry is being inserted into pdir. 675 */ 676 static void 677 pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir) 678 { 679 vm_page_t m; 680 681 KASSERT((pmap != kernel_pmap), 682 ("pdir_hold: holding kernel pdir!")); 683 684 KASSERT((pdir != NULL), ("pdir_hold: null pdir")); 685 686 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pdir)); 687 m->wire_count++; 688 } 689 690 /* Allocate page table. */ 691 static pte_t * 692 ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, 693 boolean_t nosleep) 694 { 695 vm_page_t m; 696 pte_t *ptbl; 697 int req; 698 699 KASSERT((pdir[pdir_idx] == NULL), 700 ("%s: valid ptbl entry exists!", __func__)); 701 702 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 703 while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) { 704 PMAP_UNLOCK(pmap); 705 rw_wunlock(&pvh_global_lock); 706 if (nosleep) { 707 return (NULL); 708 } 709 vm_wait(NULL); 710 rw_wlock(&pvh_global_lock); 711 PMAP_LOCK(pmap); 712 } 713 714 /* Zero whole ptbl. */ 715 ptbl = (pte_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 716 mmu_booke_zero_page(mmu, m); 717 718 return (ptbl); 719 } 720 721 /* Free ptbl pages and invalidate pdir entry. */ 722 static void 723 ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, vm_page_t m) 724 { 725 pte_t *ptbl; 726 727 ptbl = pdir[pdir_idx]; 728 729 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 730 731 pdir[pdir_idx] = NULL; 732 733 vm_wire_sub(1); 734 vm_page_free_zero(m); 735 } 736 737 /* 738 * Decrement ptbl pages hold count and attempt to free ptbl pages. Called 739 * when removing pte entry from ptbl. 740 * 741 * Return 1 if ptbl pages were freed. 742 */ 743 static int 744 ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va) 745 { 746 pte_t *ptbl; 747 vm_page_t m; 748 u_int pp2d_idx; 749 pte_t **pdir; 750 u_int pdir_idx; 751 752 pp2d_idx = PP2D_IDX(va); 753 pdir_idx = PDIR_IDX(va); 754 755 KASSERT((pmap != kernel_pmap), 756 ("ptbl_unhold: unholding kernel ptbl!")); 757 758 pdir = pmap->pm_pp2d[pp2d_idx]; 759 ptbl = pdir[pdir_idx]; 760 761 /* decrement hold count */ 762 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); 763 764 /* 765 * Free ptbl pages if there are no pte entries in this ptbl. 766 * wire_count has the same value for all ptbl pages, so check the 767 * last page. 768 */ 769 m->wire_count--; 770 if (m->wire_count == 0) { 771 ptbl_free(mmu, pmap, pdir, pdir_idx, m); 772 pdir_unhold(mmu, pmap, pp2d_idx); 773 return (1); 774 } 775 return (0); 776 } 777 778 /* 779 * Increment hold count for ptbl pages. This routine is used when new pte 780 * entry is being inserted into ptbl. 781 */ 782 static void 783 ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) 784 { 785 pte_t *ptbl; 786 vm_page_t m; 787 788 KASSERT((pmap != kernel_pmap), 789 ("ptbl_hold: holding kernel ptbl!")); 790 791 ptbl = pdir[pdir_idx]; 792 793 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 794 795 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); 796 m->wire_count++; 797 } 798 #else 799 800 /* Initialize pool of kva ptbl buffers. */ 801 static void 802 ptbl_init(void) 803 { 804 int i; 805 806 CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__, 807 (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); 808 CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)", 809 __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); 810 811 mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); 812 TAILQ_INIT(&ptbl_buf_freelist); 813 814 for (i = 0; i < PTBL_BUFS; i++) { 815 ptbl_bufs[i].kva = 816 ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; 817 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); 818 } 819 } 820 821 /* Get a ptbl_buf from the freelist. */ 822 static struct ptbl_buf * 823 ptbl_buf_alloc(void) 824 { 825 struct ptbl_buf *buf; 826 827 mtx_lock(&ptbl_buf_freelist_lock); 828 buf = TAILQ_FIRST(&ptbl_buf_freelist); 829 if (buf != NULL) 830 TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); 831 mtx_unlock(&ptbl_buf_freelist_lock); 832 833 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 834 835 return (buf); 836 } 837 838 /* Return ptbl buff to free pool. */ 839 static void 840 ptbl_buf_free(struct ptbl_buf *buf) 841 { 842 843 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 844 845 mtx_lock(&ptbl_buf_freelist_lock); 846 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); 847 mtx_unlock(&ptbl_buf_freelist_lock); 848 } 849 850 /* 851 * Search the list of allocated ptbl bufs and find on list of allocated ptbls 852 */ 853 static void 854 ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) 855 { 856 struct ptbl_buf *pbuf; 857 858 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 859 860 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 861 862 TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) 863 if (pbuf->kva == (vm_offset_t)ptbl) { 864 /* Remove from pmap ptbl buf list. */ 865 TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); 866 867 /* Free corresponding ptbl buf. */ 868 ptbl_buf_free(pbuf); 869 break; 870 } 871 } 872 873 /* Allocate page table. */ 874 static pte_t * 875 ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep) 876 { 877 vm_page_t mtbl[PTBL_PAGES]; 878 vm_page_t m; 879 struct ptbl_buf *pbuf; 880 unsigned int pidx; 881 pte_t *ptbl; 882 int i, j; 883 884 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 885 (pmap == kernel_pmap), pdir_idx); 886 887 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 888 ("ptbl_alloc: invalid pdir_idx")); 889 KASSERT((pmap->pm_pdir[pdir_idx] == NULL), 890 ("pte_alloc: valid ptbl entry exists!")); 891 892 pbuf = ptbl_buf_alloc(); 893 if (pbuf == NULL) 894 panic("pte_alloc: couldn't alloc kernel virtual memory"); 895 896 ptbl = (pte_t *)pbuf->kva; 897 898 CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl); 899 900 for (i = 0; i < PTBL_PAGES; i++) { 901 pidx = (PTBL_PAGES * pdir_idx) + i; 902 while ((m = vm_page_alloc(NULL, pidx, 903 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 904 PMAP_UNLOCK(pmap); 905 rw_wunlock(&pvh_global_lock); 906 if (nosleep) { 907 ptbl_free_pmap_ptbl(pmap, ptbl); 908 for (j = 0; j < i; j++) 909 vm_page_free(mtbl[j]); 910 vm_wire_sub(i); 911 return (NULL); 912 } 913 vm_wait(NULL); 914 rw_wlock(&pvh_global_lock); 915 PMAP_LOCK(pmap); 916 } 917 mtbl[i] = m; 918 } 919 920 /* Map allocated pages into kernel_pmap. */ 921 mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES); 922 923 /* Zero whole ptbl. */ 924 bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE); 925 926 /* Add pbuf to the pmap ptbl bufs list. */ 927 TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); 928 929 return (ptbl); 930 } 931 932 /* Free ptbl pages and invalidate pdir entry. */ 933 static void 934 ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 935 { 936 pte_t *ptbl; 937 vm_paddr_t pa; 938 vm_offset_t va; 939 vm_page_t m; 940 int i; 941 942 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 943 (pmap == kernel_pmap), pdir_idx); 944 945 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 946 ("ptbl_free: invalid pdir_idx")); 947 948 ptbl = pmap->pm_pdir[pdir_idx]; 949 950 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 951 952 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 953 954 /* 955 * Invalidate the pdir entry as soon as possible, so that other CPUs 956 * don't attempt to look up the page tables we are releasing. 957 */ 958 mtx_lock_spin(&tlbivax_mutex); 959 tlb_miss_lock(); 960 961 pmap->pm_pdir[pdir_idx] = NULL; 962 963 tlb_miss_unlock(); 964 mtx_unlock_spin(&tlbivax_mutex); 965 966 for (i = 0; i < PTBL_PAGES; i++) { 967 va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); 968 pa = pte_vatopa(mmu, kernel_pmap, va); 969 m = PHYS_TO_VM_PAGE(pa); 970 vm_page_free_zero(m); 971 vm_wire_sub(1); 972 mmu_booke_kremove(mmu, va); 973 } 974 975 ptbl_free_pmap_ptbl(pmap, ptbl); 976 } 977 978 /* 979 * Decrement ptbl pages hold count and attempt to free ptbl pages. 980 * Called when removing pte entry from ptbl. 981 * 982 * Return 1 if ptbl pages were freed. 983 */ 984 static int 985 ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 986 { 987 pte_t *ptbl; 988 vm_paddr_t pa; 989 vm_page_t m; 990 int i; 991 992 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 993 (pmap == kernel_pmap), pdir_idx); 994 995 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 996 ("ptbl_unhold: invalid pdir_idx")); 997 KASSERT((pmap != kernel_pmap), 998 ("ptbl_unhold: unholding kernel ptbl!")); 999 1000 ptbl = pmap->pm_pdir[pdir_idx]; 1001 1002 //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); 1003 KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), 1004 ("ptbl_unhold: non kva ptbl")); 1005 1006 /* decrement hold count */ 1007 for (i = 0; i < PTBL_PAGES; i++) { 1008 pa = pte_vatopa(mmu, kernel_pmap, 1009 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1010 m = PHYS_TO_VM_PAGE(pa); 1011 m->wire_count--; 1012 } 1013 1014 /* 1015 * Free ptbl pages if there are no pte etries in this ptbl. 1016 * wire_count has the same value for all ptbl pages, so check the last 1017 * page. 1018 */ 1019 if (m->wire_count == 0) { 1020 ptbl_free(mmu, pmap, pdir_idx); 1021 1022 //debugf("ptbl_unhold: e (freed ptbl)\n"); 1023 return (1); 1024 } 1025 1026 return (0); 1027 } 1028 1029 /* 1030 * Increment hold count for ptbl pages. This routine is used when a new pte 1031 * entry is being inserted into the ptbl. 1032 */ 1033 static void 1034 ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 1035 { 1036 vm_paddr_t pa; 1037 pte_t *ptbl; 1038 vm_page_t m; 1039 int i; 1040 1041 CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap, 1042 pdir_idx); 1043 1044 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1045 ("ptbl_hold: invalid pdir_idx")); 1046 KASSERT((pmap != kernel_pmap), 1047 ("ptbl_hold: holding kernel ptbl!")); 1048 1049 ptbl = pmap->pm_pdir[pdir_idx]; 1050 1051 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 1052 1053 for (i = 0; i < PTBL_PAGES; i++) { 1054 pa = pte_vatopa(mmu, kernel_pmap, 1055 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1056 m = PHYS_TO_VM_PAGE(pa); 1057 m->wire_count++; 1058 } 1059 } 1060 #endif 1061 1062 /* Allocate pv_entry structure. */ 1063 pv_entry_t 1064 pv_alloc(void) 1065 { 1066 pv_entry_t pv; 1067 1068 pv_entry_count++; 1069 if (pv_entry_count > pv_entry_high_water) 1070 pagedaemon_wakeup(0); /* XXX powerpc NUMA */ 1071 pv = uma_zalloc(pvzone, M_NOWAIT); 1072 1073 return (pv); 1074 } 1075 1076 /* Free pv_entry structure. */ 1077 static __inline void 1078 pv_free(pv_entry_t pve) 1079 { 1080 1081 pv_entry_count--; 1082 uma_zfree(pvzone, pve); 1083 } 1084 1085 1086 /* Allocate and initialize pv_entry structure. */ 1087 static void 1088 pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) 1089 { 1090 pv_entry_t pve; 1091 1092 //int su = (pmap == kernel_pmap); 1093 //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, 1094 // (u_int32_t)pmap, va, (u_int32_t)m); 1095 1096 pve = pv_alloc(); 1097 if (pve == NULL) 1098 panic("pv_insert: no pv entries!"); 1099 1100 pve->pv_pmap = pmap; 1101 pve->pv_va = va; 1102 1103 /* add to pv_list */ 1104 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1105 rw_assert(&pvh_global_lock, RA_WLOCKED); 1106 1107 TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); 1108 1109 //debugf("pv_insert: e\n"); 1110 } 1111 1112 /* Destroy pv entry. */ 1113 static void 1114 pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) 1115 { 1116 pv_entry_t pve; 1117 1118 //int su = (pmap == kernel_pmap); 1119 //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); 1120 1121 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1122 rw_assert(&pvh_global_lock, RA_WLOCKED); 1123 1124 /* find pv entry */ 1125 TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { 1126 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 1127 /* remove from pv_list */ 1128 TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); 1129 if (TAILQ_EMPTY(&m->md.pv_list)) 1130 vm_page_aflag_clear(m, PGA_WRITEABLE); 1131 1132 /* free pv entry struct */ 1133 pv_free(pve); 1134 break; 1135 } 1136 } 1137 1138 //debugf("pv_remove: e\n"); 1139 } 1140 1141 #ifdef __powerpc64__ 1142 /* 1143 * Clean pte entry, try to free page table page if requested. 1144 * 1145 * Return 1 if ptbl pages were freed, otherwise return 0. 1146 */ 1147 static int 1148 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags) 1149 { 1150 vm_page_t m; 1151 pte_t *pte; 1152 1153 pte = pte_find(mmu, pmap, va); 1154 KASSERT(pte != NULL, ("%s: NULL pte", __func__)); 1155 1156 if (!PTE_ISVALID(pte)) 1157 return (0); 1158 1159 /* Get vm_page_t for mapped pte. */ 1160 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1161 1162 if (PTE_ISWIRED(pte)) 1163 pmap->pm_stats.wired_count--; 1164 1165 /* Handle managed entry. */ 1166 if (PTE_ISMANAGED(pte)) { 1167 1168 /* Handle modified pages. */ 1169 if (PTE_ISMODIFIED(pte)) 1170 vm_page_dirty(m); 1171 1172 /* Referenced pages. */ 1173 if (PTE_ISREFERENCED(pte)) 1174 vm_page_aflag_set(m, PGA_REFERENCED); 1175 1176 /* Remove pv_entry from pv_list. */ 1177 pv_remove(pmap, va, m); 1178 } else if (m->md.pv_tracked) { 1179 pv_remove(pmap, va, m); 1180 if (TAILQ_EMPTY(&m->md.pv_list)) 1181 m->md.pv_tracked = false; 1182 } 1183 mtx_lock_spin(&tlbivax_mutex); 1184 tlb_miss_lock(); 1185 1186 tlb0_flush_entry(va); 1187 *pte = 0; 1188 1189 tlb_miss_unlock(); 1190 mtx_unlock_spin(&tlbivax_mutex); 1191 1192 pmap->pm_stats.resident_count--; 1193 1194 if (flags & PTBL_UNHOLD) { 1195 return (ptbl_unhold(mmu, pmap, va)); 1196 } 1197 return (0); 1198 } 1199 1200 /* 1201 * Insert PTE for a given page and virtual address. 1202 */ 1203 static int 1204 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1205 boolean_t nosleep) 1206 { 1207 unsigned int pp2d_idx = PP2D_IDX(va); 1208 unsigned int pdir_idx = PDIR_IDX(va); 1209 unsigned int ptbl_idx = PTBL_IDX(va); 1210 pte_t *ptbl, *pte, pte_tmp; 1211 pte_t **pdir; 1212 1213 /* Get the page directory pointer. */ 1214 pdir = pmap->pm_pp2d[pp2d_idx]; 1215 if (pdir == NULL) 1216 pdir = pdir_alloc(mmu, pmap, pp2d_idx, nosleep); 1217 1218 /* Get the page table pointer. */ 1219 ptbl = pdir[pdir_idx]; 1220 1221 if (ptbl == NULL) { 1222 /* Allocate page table pages. */ 1223 ptbl = ptbl_alloc(mmu, pmap, pdir, pdir_idx, nosleep); 1224 if (ptbl == NULL) { 1225 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1226 return (ENOMEM); 1227 } 1228 pte = &ptbl[ptbl_idx]; 1229 } else { 1230 /* 1231 * Check if there is valid mapping for requested va, if there 1232 * is, remove it. 1233 */ 1234 pte = &ptbl[ptbl_idx]; 1235 if (PTE_ISVALID(pte)) { 1236 pte_remove(mmu, pmap, va, PTBL_HOLD); 1237 } else { 1238 /* 1239 * pte is not used, increment hold count for ptbl 1240 * pages. 1241 */ 1242 if (pmap != kernel_pmap) 1243 ptbl_hold(mmu, pmap, pdir, pdir_idx); 1244 } 1245 } 1246 1247 if (pdir[pdir_idx] == NULL) { 1248 if (pmap != kernel_pmap && pmap->pm_pp2d[pp2d_idx] != NULL) 1249 pdir_hold(mmu, pmap, pdir); 1250 pdir[pdir_idx] = ptbl; 1251 } 1252 if (pmap->pm_pp2d[pp2d_idx] == NULL) 1253 pmap->pm_pp2d[pp2d_idx] = pdir; 1254 1255 /* 1256 * Insert pv_entry into pv_list for mapped page if part of managed 1257 * memory. 1258 */ 1259 if ((m->oflags & VPO_UNMANAGED) == 0) { 1260 flags |= PTE_MANAGED; 1261 1262 /* Create and insert pv entry. */ 1263 pv_insert(pmap, va, m); 1264 } 1265 1266 pmap->pm_stats.resident_count++; 1267 1268 pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1269 pte_tmp |= (PTE_VALID | flags); 1270 1271 mtx_lock_spin(&tlbivax_mutex); 1272 tlb_miss_lock(); 1273 1274 tlb0_flush_entry(va); 1275 *pte = pte_tmp; 1276 1277 tlb_miss_unlock(); 1278 mtx_unlock_spin(&tlbivax_mutex); 1279 1280 return (0); 1281 } 1282 1283 /* Return the pa for the given pmap/va. */ 1284 static vm_paddr_t 1285 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1286 { 1287 vm_paddr_t pa = 0; 1288 pte_t *pte; 1289 1290 pte = pte_find(mmu, pmap, va); 1291 if ((pte != NULL) && PTE_ISVALID(pte)) 1292 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1293 return (pa); 1294 } 1295 1296 1297 /* allocate pte entries to manage (addr & mask) to (addr & mask) + size */ 1298 static void 1299 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1300 { 1301 int i, j; 1302 vm_offset_t va; 1303 pte_t *pte; 1304 1305 va = addr; 1306 /* Initialize kernel pdir */ 1307 for (i = 0; i < kernel_pdirs; i++) { 1308 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)] = 1309 (pte_t **)(pdir + (i * PAGE_SIZE * PDIR_PAGES)); 1310 for (j = PDIR_IDX(va + (i * PAGE_SIZE * PDIR_NENTRIES * PTBL_NENTRIES)); 1311 j < PDIR_NENTRIES; j++) { 1312 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)][j] = 1313 (pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE) + 1314 (((i * PDIR_NENTRIES) + j) * PAGE_SIZE)); 1315 } 1316 } 1317 1318 /* 1319 * Fill in PTEs covering kernel code and data. They are not required 1320 * for address translation, as this area is covered by static TLB1 1321 * entries, but for pte_vatopa() to work correctly with kernel area 1322 * addresses. 1323 */ 1324 for (va = addr; va < data_end; va += PAGE_SIZE) { 1325 pte = &(kernel_pmap->pm_pp2d[PP2D_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]); 1326 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1327 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1328 PTE_VALID | PTE_PS_4KB; 1329 } 1330 } 1331 #else 1332 /* 1333 * Clean pte entry, try to free page table page if requested. 1334 * 1335 * Return 1 if ptbl pages were freed, otherwise return 0. 1336 */ 1337 static int 1338 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, uint8_t flags) 1339 { 1340 unsigned int pdir_idx = PDIR_IDX(va); 1341 unsigned int ptbl_idx = PTBL_IDX(va); 1342 vm_page_t m; 1343 pte_t *ptbl; 1344 pte_t *pte; 1345 1346 //int su = (pmap == kernel_pmap); 1347 //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", 1348 // su, (u_int32_t)pmap, va, flags); 1349 1350 ptbl = pmap->pm_pdir[pdir_idx]; 1351 KASSERT(ptbl, ("pte_remove: null ptbl")); 1352 1353 pte = &ptbl[ptbl_idx]; 1354 1355 if (pte == NULL || !PTE_ISVALID(pte)) 1356 return (0); 1357 1358 if (PTE_ISWIRED(pte)) 1359 pmap->pm_stats.wired_count--; 1360 1361 /* Get vm_page_t for mapped pte. */ 1362 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1363 1364 /* Handle managed entry. */ 1365 if (PTE_ISMANAGED(pte)) { 1366 1367 if (PTE_ISMODIFIED(pte)) 1368 vm_page_dirty(m); 1369 1370 if (PTE_ISREFERENCED(pte)) 1371 vm_page_aflag_set(m, PGA_REFERENCED); 1372 1373 pv_remove(pmap, va, m); 1374 } else if (m->md.pv_tracked) { 1375 /* 1376 * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is 1377 * used. This is needed by the NCSW support code for fast 1378 * VA<->PA translation. 1379 */ 1380 pv_remove(pmap, va, m); 1381 if (TAILQ_EMPTY(&m->md.pv_list)) 1382 m->md.pv_tracked = false; 1383 } 1384 1385 mtx_lock_spin(&tlbivax_mutex); 1386 tlb_miss_lock(); 1387 1388 tlb0_flush_entry(va); 1389 *pte = 0; 1390 1391 tlb_miss_unlock(); 1392 mtx_unlock_spin(&tlbivax_mutex); 1393 1394 pmap->pm_stats.resident_count--; 1395 1396 if (flags & PTBL_UNHOLD) { 1397 //debugf("pte_remove: e (unhold)\n"); 1398 return (ptbl_unhold(mmu, pmap, pdir_idx)); 1399 } 1400 1401 //debugf("pte_remove: e\n"); 1402 return (0); 1403 } 1404 1405 /* 1406 * Insert PTE for a given page and virtual address. 1407 */ 1408 static int 1409 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1410 boolean_t nosleep) 1411 { 1412 unsigned int pdir_idx = PDIR_IDX(va); 1413 unsigned int ptbl_idx = PTBL_IDX(va); 1414 pte_t *ptbl, *pte, pte_tmp; 1415 1416 CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__, 1417 pmap == kernel_pmap, pmap, va); 1418 1419 /* Get the page table pointer. */ 1420 ptbl = pmap->pm_pdir[pdir_idx]; 1421 1422 if (ptbl == NULL) { 1423 /* Allocate page table pages. */ 1424 ptbl = ptbl_alloc(mmu, pmap, pdir_idx, nosleep); 1425 if (ptbl == NULL) { 1426 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1427 return (ENOMEM); 1428 } 1429 pmap->pm_pdir[pdir_idx] = ptbl; 1430 pte = &ptbl[ptbl_idx]; 1431 } else { 1432 /* 1433 * Check if there is valid mapping for requested 1434 * va, if there is, remove it. 1435 */ 1436 pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; 1437 if (PTE_ISVALID(pte)) { 1438 pte_remove(mmu, pmap, va, PTBL_HOLD); 1439 } else { 1440 /* 1441 * pte is not used, increment hold count 1442 * for ptbl pages. 1443 */ 1444 if (pmap != kernel_pmap) 1445 ptbl_hold(mmu, pmap, pdir_idx); 1446 } 1447 } 1448 1449 /* 1450 * Insert pv_entry into pv_list for mapped page if part of managed 1451 * memory. 1452 */ 1453 if ((m->oflags & VPO_UNMANAGED) == 0) { 1454 flags |= PTE_MANAGED; 1455 1456 /* Create and insert pv entry. */ 1457 pv_insert(pmap, va, m); 1458 } 1459 1460 pmap->pm_stats.resident_count++; 1461 1462 pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1463 pte_tmp |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */ 1464 1465 mtx_lock_spin(&tlbivax_mutex); 1466 tlb_miss_lock(); 1467 1468 tlb0_flush_entry(va); 1469 *pte = pte_tmp; 1470 1471 tlb_miss_unlock(); 1472 mtx_unlock_spin(&tlbivax_mutex); 1473 return (0); 1474 } 1475 1476 /* Return the pa for the given pmap/va. */ 1477 static vm_paddr_t 1478 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1479 { 1480 vm_paddr_t pa = 0; 1481 pte_t *pte; 1482 1483 pte = pte_find(mmu, pmap, va); 1484 if ((pte != NULL) && PTE_ISVALID(pte)) 1485 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1486 return (pa); 1487 } 1488 1489 /* Get a pointer to a PTE in a page table. */ 1490 static pte_t * 1491 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1492 { 1493 unsigned int pdir_idx = PDIR_IDX(va); 1494 unsigned int ptbl_idx = PTBL_IDX(va); 1495 1496 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 1497 1498 if (pmap->pm_pdir[pdir_idx]) 1499 return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); 1500 1501 return (NULL); 1502 } 1503 1504 /* Set up kernel page tables. */ 1505 static void 1506 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1507 { 1508 int i; 1509 vm_offset_t va; 1510 pte_t *pte; 1511 1512 /* Initialize kernel pdir */ 1513 for (i = 0; i < kernel_ptbls; i++) 1514 kernel_pmap->pm_pdir[kptbl_min + i] = 1515 (pte_t *)(pdir + (i * PAGE_SIZE * PTBL_PAGES)); 1516 1517 /* 1518 * Fill in PTEs covering kernel code and data. They are not required 1519 * for address translation, as this area is covered by static TLB1 1520 * entries, but for pte_vatopa() to work correctly with kernel area 1521 * addresses. 1522 */ 1523 for (va = addr; va < data_end; va += PAGE_SIZE) { 1524 pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]); 1525 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1526 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1527 PTE_VALID | PTE_PS_4KB; 1528 } 1529 } 1530 #endif 1531 1532 /**************************************************************************/ 1533 /* PMAP related */ 1534 /**************************************************************************/ 1535 1536 /* 1537 * This is called during booke_init, before the system is really initialized. 1538 */ 1539 static void 1540 mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) 1541 { 1542 vm_paddr_t phys_kernelend; 1543 struct mem_region *mp, *mp1; 1544 int cnt, i, j; 1545 vm_paddr_t s, e, sz; 1546 vm_paddr_t physsz, hwphyssz; 1547 u_int phys_avail_count; 1548 vm_size_t kstack0_sz; 1549 vm_offset_t kernel_pdir, kstack0; 1550 vm_paddr_t kstack0_phys; 1551 void *dpcpu; 1552 vm_offset_t kernel_ptbl_root; 1553 1554 debugf("mmu_booke_bootstrap: entered\n"); 1555 1556 /* Set interesting system properties */ 1557 #ifdef __powerpc64__ 1558 hw_direct_map = 1; 1559 #else 1560 hw_direct_map = 0; 1561 #endif 1562 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 1563 elf32_nxstack = 1; 1564 #endif 1565 1566 /* Initialize invalidation mutex */ 1567 mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN); 1568 1569 /* Read TLB0 size and associativity. */ 1570 tlb0_get_tlbconf(); 1571 1572 /* 1573 * Align kernel start and end address (kernel image). 1574 * Note that kernel end does not necessarily relate to kernsize. 1575 * kernsize is the size of the kernel that is actually mapped. 1576 */ 1577 kernstart = trunc_page(start); 1578 data_start = round_page(kernelend); 1579 data_end = data_start; 1580 1581 /* Allocate the dynamic per-cpu area. */ 1582 dpcpu = (void *)data_end; 1583 data_end += DPCPU_SIZE; 1584 1585 /* Allocate space for the message buffer. */ 1586 msgbufp = (struct msgbuf *)data_end; 1587 data_end += msgbufsize; 1588 debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1589 (uintptr_t)msgbufp, data_end); 1590 1591 data_end = round_page(data_end); 1592 1593 #ifdef __powerpc64__ 1594 kernel_ptbl_root = data_end; 1595 data_end += PP2D_NENTRIES * sizeof(pte_t**); 1596 #else 1597 /* Allocate space for ptbl_bufs. */ 1598 ptbl_bufs = (struct ptbl_buf *)data_end; 1599 data_end += sizeof(struct ptbl_buf) * PTBL_BUFS; 1600 debugf(" ptbl_bufs at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1601 (uintptr_t)ptbl_bufs, data_end); 1602 1603 data_end = round_page(data_end); 1604 kernel_ptbl_root = data_end; 1605 data_end += PDIR_NENTRIES * sizeof(pte_t*); 1606 #endif 1607 1608 /* Allocate PTE tables for kernel KVA. */ 1609 kernel_pdir = data_end; 1610 kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 1611 PDIR_SIZE); 1612 #ifdef __powerpc64__ 1613 kernel_pdirs = howmany(kernel_ptbls, PDIR_NENTRIES); 1614 data_end += kernel_pdirs * PDIR_PAGES * PAGE_SIZE; 1615 #endif 1616 data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE; 1617 debugf(" kernel ptbls: %d\n", kernel_ptbls); 1618 debugf(" kernel pdir at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1619 kernel_pdir, data_end); 1620 1621 debugf(" data_end: 0x%"PRI0ptrX"\n", data_end); 1622 if (data_end - kernstart > kernsize) { 1623 kernsize += tlb1_mapin_region(kernstart + kernsize, 1624 kernload + kernsize, (data_end - kernstart) - kernsize); 1625 } 1626 data_end = kernstart + kernsize; 1627 debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end); 1628 1629 /* 1630 * Clear the structures - note we can only do it safely after the 1631 * possible additional TLB1 translations are in place (above) so that 1632 * all range up to the currently calculated 'data_end' is covered. 1633 */ 1634 dpcpu_init(dpcpu, 0); 1635 #ifdef __powerpc64__ 1636 memset((void *)kernel_pdir, 0, 1637 kernel_pdirs * PDIR_PAGES * PAGE_SIZE + 1638 kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1639 #else 1640 memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE); 1641 memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1642 #endif 1643 1644 /*******************************************************/ 1645 /* Set the start and end of kva. */ 1646 /*******************************************************/ 1647 virtual_avail = round_page(data_end); 1648 virtual_end = VM_MAX_KERNEL_ADDRESS; 1649 1650 /* Allocate KVA space for page zero/copy operations. */ 1651 zero_page_va = virtual_avail; 1652 virtual_avail += PAGE_SIZE; 1653 copy_page_src_va = virtual_avail; 1654 virtual_avail += PAGE_SIZE; 1655 copy_page_dst_va = virtual_avail; 1656 virtual_avail += PAGE_SIZE; 1657 debugf("zero_page_va = 0x%"PRI0ptrX"\n", zero_page_va); 1658 debugf("copy_page_src_va = 0x%"PRI0ptrX"\n", copy_page_src_va); 1659 debugf("copy_page_dst_va = 0x%"PRI0ptrX"\n", copy_page_dst_va); 1660 1661 /* Initialize page zero/copy mutexes. */ 1662 mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); 1663 mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); 1664 1665 #ifndef __powerpc64__ 1666 /* Allocate KVA space for ptbl bufs. */ 1667 ptbl_buf_pool_vabase = virtual_avail; 1668 virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; 1669 debugf("ptbl_buf_pool_vabase = 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1670 ptbl_buf_pool_vabase, virtual_avail); 1671 #endif 1672 1673 /* Calculate corresponding physical addresses for the kernel region. */ 1674 phys_kernelend = kernload + kernsize; 1675 debugf("kernel image and allocated data:\n"); 1676 debugf(" kernload = 0x%09llx\n", (uint64_t)kernload); 1677 debugf(" kernstart = 0x%"PRI0ptrX"\n", kernstart); 1678 debugf(" kernsize = 0x%"PRI0ptrX"\n", kernsize); 1679 1680 /* 1681 * Remove kernel physical address range from avail regions list. Page 1682 * align all regions. Non-page aligned memory isn't very interesting 1683 * to us. Also, sort the entries for ascending addresses. 1684 */ 1685 1686 /* Retrieve phys/avail mem regions */ 1687 mem_regions(&physmem_regions, &physmem_regions_sz, 1688 &availmem_regions, &availmem_regions_sz); 1689 1690 if (nitems(phys_avail) < availmem_regions_sz) 1691 panic("mmu_booke_bootstrap: phys_avail too small"); 1692 1693 sz = 0; 1694 cnt = availmem_regions_sz; 1695 debugf("processing avail regions:\n"); 1696 for (mp = availmem_regions; mp->mr_size; mp++) { 1697 s = mp->mr_start; 1698 e = mp->mr_start + mp->mr_size; 1699 debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e); 1700 /* Check whether this region holds all of the kernel. */ 1701 if (s < kernload && e > phys_kernelend) { 1702 availmem_regions[cnt].mr_start = phys_kernelend; 1703 availmem_regions[cnt++].mr_size = e - phys_kernelend; 1704 e = kernload; 1705 } 1706 /* Look whether this regions starts within the kernel. */ 1707 if (s >= kernload && s < phys_kernelend) { 1708 if (e <= phys_kernelend) 1709 goto empty; 1710 s = phys_kernelend; 1711 } 1712 /* Now look whether this region ends within the kernel. */ 1713 if (e > kernload && e <= phys_kernelend) { 1714 if (s >= kernload) 1715 goto empty; 1716 e = kernload; 1717 } 1718 /* Now page align the start and size of the region. */ 1719 s = round_page(s); 1720 e = trunc_page(e); 1721 if (e < s) 1722 e = s; 1723 sz = e - s; 1724 debugf("%09jx-%09jx = %jx\n", 1725 (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz); 1726 1727 /* Check whether some memory is left here. */ 1728 if (sz == 0) { 1729 empty: 1730 memmove(mp, mp + 1, 1731 (cnt - (mp - availmem_regions)) * sizeof(*mp)); 1732 cnt--; 1733 mp--; 1734 continue; 1735 } 1736 1737 /* Do an insertion sort. */ 1738 for (mp1 = availmem_regions; mp1 < mp; mp1++) 1739 if (s < mp1->mr_start) 1740 break; 1741 if (mp1 < mp) { 1742 memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); 1743 mp1->mr_start = s; 1744 mp1->mr_size = sz; 1745 } else { 1746 mp->mr_start = s; 1747 mp->mr_size = sz; 1748 } 1749 } 1750 availmem_regions_sz = cnt; 1751 1752 /*******************************************************/ 1753 /* Steal physical memory for kernel stack from the end */ 1754 /* of the first avail region */ 1755 /*******************************************************/ 1756 kstack0_sz = kstack_pages * PAGE_SIZE; 1757 kstack0_phys = availmem_regions[0].mr_start + 1758 availmem_regions[0].mr_size; 1759 kstack0_phys -= kstack0_sz; 1760 availmem_regions[0].mr_size -= kstack0_sz; 1761 1762 /*******************************************************/ 1763 /* Fill in phys_avail table, based on availmem_regions */ 1764 /*******************************************************/ 1765 phys_avail_count = 0; 1766 physsz = 0; 1767 hwphyssz = 0; 1768 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 1769 1770 debugf("fill in phys_avail:\n"); 1771 for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { 1772 1773 debugf(" region: 0x%jx - 0x%jx (0x%jx)\n", 1774 (uintmax_t)availmem_regions[i].mr_start, 1775 (uintmax_t)availmem_regions[i].mr_start + 1776 availmem_regions[i].mr_size, 1777 (uintmax_t)availmem_regions[i].mr_size); 1778 1779 if (hwphyssz != 0 && 1780 (physsz + availmem_regions[i].mr_size) >= hwphyssz) { 1781 debugf(" hw.physmem adjust\n"); 1782 if (physsz < hwphyssz) { 1783 phys_avail[j] = availmem_regions[i].mr_start; 1784 phys_avail[j + 1] = 1785 availmem_regions[i].mr_start + 1786 hwphyssz - physsz; 1787 physsz = hwphyssz; 1788 phys_avail_count++; 1789 } 1790 break; 1791 } 1792 1793 phys_avail[j] = availmem_regions[i].mr_start; 1794 phys_avail[j + 1] = availmem_regions[i].mr_start + 1795 availmem_regions[i].mr_size; 1796 phys_avail_count++; 1797 physsz += availmem_regions[i].mr_size; 1798 } 1799 physmem = btoc(physsz); 1800 1801 /* Calculate the last available physical address. */ 1802 for (i = 0; phys_avail[i + 2] != 0; i += 2) 1803 ; 1804 Maxmem = powerpc_btop(phys_avail[i + 1]); 1805 1806 debugf("Maxmem = 0x%08lx\n", Maxmem); 1807 debugf("phys_avail_count = %d\n", phys_avail_count); 1808 debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n", 1809 (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem); 1810 1811 #ifdef __powerpc64__ 1812 /* 1813 * Map the physical memory contiguously in TLB1. 1814 * Round so it fits into a single mapping. 1815 */ 1816 tlb1_mapin_region(DMAP_BASE_ADDRESS, 0, 1817 phys_avail[i + 1]); 1818 #endif 1819 1820 /*******************************************************/ 1821 /* Initialize (statically allocated) kernel pmap. */ 1822 /*******************************************************/ 1823 PMAP_LOCK_INIT(kernel_pmap); 1824 #ifndef __powerpc64__ 1825 kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE; 1826 #endif 1827 #ifdef __powerpc64__ 1828 kernel_pmap->pm_pp2d = (pte_t ***)kernel_ptbl_root; 1829 #else 1830 kernel_pmap->pm_pdir = (pte_t **)kernel_ptbl_root; 1831 #endif 1832 1833 debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap); 1834 kernel_pte_alloc(virtual_avail, kernstart, kernel_pdir); 1835 for (i = 0; i < MAXCPU; i++) { 1836 kernel_pmap->pm_tid[i] = TID_KERNEL; 1837 1838 /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */ 1839 tidbusy[i][TID_KERNEL] = kernel_pmap; 1840 } 1841 1842 /* Mark kernel_pmap active on all CPUs */ 1843 CPU_FILL(&kernel_pmap->pm_active); 1844 1845 /* 1846 * Initialize the global pv list lock. 1847 */ 1848 rw_init(&pvh_global_lock, "pmap pv global"); 1849 1850 /*******************************************************/ 1851 /* Final setup */ 1852 /*******************************************************/ 1853 1854 /* Enter kstack0 into kernel map, provide guard page */ 1855 kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 1856 thread0.td_kstack = kstack0; 1857 thread0.td_kstack_pages = kstack_pages; 1858 1859 debugf("kstack_sz = 0x%08x\n", kstack0_sz); 1860 debugf("kstack0_phys at 0x%09llx - 0x%09llx\n", 1861 kstack0_phys, kstack0_phys + kstack0_sz); 1862 debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n", 1863 kstack0, kstack0 + kstack0_sz); 1864 1865 virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz; 1866 for (i = 0; i < kstack_pages; i++) { 1867 mmu_booke_kenter(mmu, kstack0, kstack0_phys); 1868 kstack0 += PAGE_SIZE; 1869 kstack0_phys += PAGE_SIZE; 1870 } 1871 1872 pmap_bootstrapped = 1; 1873 1874 debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail); 1875 debugf("virtual_end = %"PRI0ptrX"\n", virtual_end); 1876 1877 debugf("mmu_booke_bootstrap: exit\n"); 1878 } 1879 1880 #ifdef SMP 1881 void 1882 tlb1_ap_prep(void) 1883 { 1884 tlb_entry_t *e, tmp; 1885 unsigned int i; 1886 1887 /* Prepare TLB1 image for AP processors */ 1888 e = __boot_tlb1; 1889 for (i = 0; i < TLB1_ENTRIES; i++) { 1890 tlb1_read_entry(&tmp, i); 1891 1892 if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED)) 1893 memcpy(e++, &tmp, sizeof(tmp)); 1894 } 1895 } 1896 1897 void 1898 pmap_bootstrap_ap(volatile uint32_t *trcp __unused) 1899 { 1900 int i; 1901 1902 /* 1903 * Finish TLB1 configuration: the BSP already set up its TLB1 and we 1904 * have the snapshot of its contents in the s/w __boot_tlb1[] table 1905 * created by tlb1_ap_prep(), so use these values directly to 1906 * (re)program AP's TLB1 hardware. 1907 * 1908 * Start at index 1 because index 0 has the kernel map. 1909 */ 1910 for (i = 1; i < TLB1_ENTRIES; i++) { 1911 if (__boot_tlb1[i].mas1 & MAS1_VALID) 1912 tlb1_write_entry(&__boot_tlb1[i], i); 1913 } 1914 1915 set_mas4_defaults(); 1916 } 1917 #endif 1918 1919 static void 1920 booke_pmap_init_qpages(void) 1921 { 1922 struct pcpu *pc; 1923 int i; 1924 1925 CPU_FOREACH(i) { 1926 pc = pcpu_find(i); 1927 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); 1928 if (pc->pc_qmap_addr == 0) 1929 panic("pmap_init_qpages: unable to allocate KVA"); 1930 } 1931 } 1932 1933 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL); 1934 1935 /* 1936 * Get the physical page address for the given pmap/virtual address. 1937 */ 1938 static vm_paddr_t 1939 mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1940 { 1941 vm_paddr_t pa; 1942 1943 PMAP_LOCK(pmap); 1944 pa = pte_vatopa(mmu, pmap, va); 1945 PMAP_UNLOCK(pmap); 1946 1947 return (pa); 1948 } 1949 1950 /* 1951 * Extract the physical page address associated with the given 1952 * kernel virtual address. 1953 */ 1954 static vm_paddr_t 1955 mmu_booke_kextract(mmu_t mmu, vm_offset_t va) 1956 { 1957 tlb_entry_t e; 1958 vm_paddr_t p = 0; 1959 int i; 1960 1961 #ifdef __powerpc64__ 1962 if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS) 1963 return (DMAP_TO_PHYS(va)); 1964 #endif 1965 1966 if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS) 1967 p = pte_vatopa(mmu, kernel_pmap, va); 1968 1969 if (p == 0) { 1970 /* Check TLB1 mappings */ 1971 for (i = 0; i < TLB1_ENTRIES; i++) { 1972 tlb1_read_entry(&e, i); 1973 if (!(e.mas1 & MAS1_VALID)) 1974 continue; 1975 if (va >= e.virt && va < e.virt + e.size) 1976 return (e.phys + (va - e.virt)); 1977 } 1978 } 1979 1980 return (p); 1981 } 1982 1983 /* 1984 * Initialize the pmap module. 1985 * Called by vm_init, to initialize any structures that the pmap 1986 * system needs to map virtual memory. 1987 */ 1988 static void 1989 mmu_booke_init(mmu_t mmu) 1990 { 1991 int shpgperproc = PMAP_SHPGPERPROC; 1992 1993 /* 1994 * Initialize the address space (zone) for the pv entries. Set a 1995 * high water mark so that the system can recover from excessive 1996 * numbers of pv entries. 1997 */ 1998 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 1999 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 2000 2001 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 2002 pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; 2003 2004 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 2005 pv_entry_high_water = 9 * (pv_entry_max / 10); 2006 2007 uma_zone_reserve_kva(pvzone, pv_entry_max); 2008 2009 /* Pre-fill pvzone with initial number of pv entries. */ 2010 uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); 2011 2012 /* Create a UMA zone for page table roots. */ 2013 ptbl_root_zone = uma_zcreate("pmap root", PMAP_ROOT_SIZE, 2014 NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, UMA_ZONE_VM); 2015 2016 /* Initialize ptbl allocation. */ 2017 ptbl_init(); 2018 } 2019 2020 /* 2021 * Map a list of wired pages into kernel virtual address space. This is 2022 * intended for temporary mappings which do not need page modification or 2023 * references recorded. Existing mappings in the region are overwritten. 2024 */ 2025 static void 2026 mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) 2027 { 2028 vm_offset_t va; 2029 2030 va = sva; 2031 while (count-- > 0) { 2032 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 2033 va += PAGE_SIZE; 2034 m++; 2035 } 2036 } 2037 2038 /* 2039 * Remove page mappings from kernel virtual address space. Intended for 2040 * temporary mappings entered by mmu_booke_qenter. 2041 */ 2042 static void 2043 mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count) 2044 { 2045 vm_offset_t va; 2046 2047 va = sva; 2048 while (count-- > 0) { 2049 mmu_booke_kremove(mmu, va); 2050 va += PAGE_SIZE; 2051 } 2052 } 2053 2054 /* 2055 * Map a wired page into kernel virtual address space. 2056 */ 2057 static void 2058 mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) 2059 { 2060 2061 mmu_booke_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 2062 } 2063 2064 static void 2065 mmu_booke_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) 2066 { 2067 uint32_t flags; 2068 pte_t *pte; 2069 2070 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2071 (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va")); 2072 2073 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 2074 flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT; 2075 flags |= PTE_PS_4KB; 2076 2077 pte = pte_find(mmu, kernel_pmap, va); 2078 KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va. NULL PTE")); 2079 2080 mtx_lock_spin(&tlbivax_mutex); 2081 tlb_miss_lock(); 2082 2083 if (PTE_ISVALID(pte)) { 2084 2085 CTR1(KTR_PMAP, "%s: replacing entry!", __func__); 2086 2087 /* Flush entry from TLB0 */ 2088 tlb0_flush_entry(va); 2089 } 2090 2091 *pte = PTE_RPN_FROM_PA(pa) | flags; 2092 2093 //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " 2094 // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", 2095 // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); 2096 2097 /* Flush the real memory from the instruction cache. */ 2098 if ((flags & (PTE_I | PTE_G)) == 0) 2099 __syncicache((void *)va, PAGE_SIZE); 2100 2101 tlb_miss_unlock(); 2102 mtx_unlock_spin(&tlbivax_mutex); 2103 } 2104 2105 /* 2106 * Remove a page from kernel page table. 2107 */ 2108 static void 2109 mmu_booke_kremove(mmu_t mmu, vm_offset_t va) 2110 { 2111 pte_t *pte; 2112 2113 CTR2(KTR_PMAP,"%s: s (va = 0x%"PRI0ptrX")\n", __func__, va); 2114 2115 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2116 (va <= VM_MAX_KERNEL_ADDRESS)), 2117 ("mmu_booke_kremove: invalid va")); 2118 2119 pte = pte_find(mmu, kernel_pmap, va); 2120 2121 if (!PTE_ISVALID(pte)) { 2122 2123 CTR1(KTR_PMAP, "%s: invalid pte", __func__); 2124 2125 return; 2126 } 2127 2128 mtx_lock_spin(&tlbivax_mutex); 2129 tlb_miss_lock(); 2130 2131 /* Invalidate entry in TLB0, update PTE. */ 2132 tlb0_flush_entry(va); 2133 *pte = 0; 2134 2135 tlb_miss_unlock(); 2136 mtx_unlock_spin(&tlbivax_mutex); 2137 } 2138 2139 /* 2140 * Provide a kernel pointer corresponding to a given userland pointer. 2141 * The returned pointer is valid until the next time this function is 2142 * called in this thread. This is used internally in copyin/copyout. 2143 */ 2144 int 2145 mmu_booke_map_user_ptr(mmu_t mmu, pmap_t pm, volatile const void *uaddr, 2146 void **kaddr, size_t ulen, size_t *klen) 2147 { 2148 2149 if ((uintptr_t)uaddr + ulen > VM_MAXUSER_ADDRESS + PAGE_SIZE) 2150 return (EFAULT); 2151 2152 *kaddr = (void *)(uintptr_t)uaddr; 2153 if (klen) 2154 *klen = ulen; 2155 2156 return (0); 2157 } 2158 2159 /* 2160 * Figure out where a given kernel pointer (usually in a fault) points 2161 * to from the VM's perspective, potentially remapping into userland's 2162 * address space. 2163 */ 2164 static int 2165 mmu_booke_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user, 2166 vm_offset_t *decoded_addr) 2167 { 2168 2169 if (addr < VM_MAXUSER_ADDRESS) 2170 *is_user = 1; 2171 else 2172 *is_user = 0; 2173 2174 *decoded_addr = addr; 2175 return (0); 2176 } 2177 2178 /* 2179 * Initialize pmap associated with process 0. 2180 */ 2181 static void 2182 mmu_booke_pinit0(mmu_t mmu, pmap_t pmap) 2183 { 2184 2185 PMAP_LOCK_INIT(pmap); 2186 mmu_booke_pinit(mmu, pmap); 2187 PCPU_SET(curpmap, pmap); 2188 } 2189 2190 /* 2191 * Initialize a preallocated and zeroed pmap structure, 2192 * such as one in a vmspace structure. 2193 */ 2194 static void 2195 mmu_booke_pinit(mmu_t mmu, pmap_t pmap) 2196 { 2197 int i; 2198 2199 CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, 2200 curthread->td_proc->p_pid, curthread->td_proc->p_comm); 2201 2202 KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); 2203 2204 for (i = 0; i < MAXCPU; i++) 2205 pmap->pm_tid[i] = TID_NONE; 2206 CPU_ZERO(&kernel_pmap->pm_active); 2207 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 2208 #ifdef __powerpc64__ 2209 pmap->pm_pp2d = uma_zalloc(ptbl_root_zone, M_WAITOK); 2210 bzero(pmap->pm_pp2d, sizeof(pte_t **) * PP2D_NENTRIES); 2211 #else 2212 pmap->pm_pdir = uma_zalloc(ptbl_root_zone, M_WAITOK); 2213 bzero(pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); 2214 TAILQ_INIT(&pmap->pm_ptbl_list); 2215 #endif 2216 } 2217 2218 /* 2219 * Release any resources held by the given physical map. 2220 * Called when a pmap initialized by mmu_booke_pinit is being released. 2221 * Should only be called if the map contains no valid mappings. 2222 */ 2223 static void 2224 mmu_booke_release(mmu_t mmu, pmap_t pmap) 2225 { 2226 2227 KASSERT(pmap->pm_stats.resident_count == 0, 2228 ("pmap_release: pmap resident count %ld != 0", 2229 pmap->pm_stats.resident_count)); 2230 #ifdef __powerpc64__ 2231 uma_zfree(ptbl_root_zone, pmap->pm_pp2d); 2232 #else 2233 uma_zfree(ptbl_root_zone, pmap->pm_pdir); 2234 #endif 2235 } 2236 2237 /* 2238 * Insert the given physical page at the specified virtual address in the 2239 * target physical map with the protection requested. If specified the page 2240 * will be wired down. 2241 */ 2242 static int 2243 mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2244 vm_prot_t prot, u_int flags, int8_t psind) 2245 { 2246 int error; 2247 2248 rw_wlock(&pvh_global_lock); 2249 PMAP_LOCK(pmap); 2250 error = mmu_booke_enter_locked(mmu, pmap, va, m, prot, flags, psind); 2251 PMAP_UNLOCK(pmap); 2252 rw_wunlock(&pvh_global_lock); 2253 return (error); 2254 } 2255 2256 static int 2257 mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2258 vm_prot_t prot, u_int pmap_flags, int8_t psind __unused) 2259 { 2260 pte_t *pte; 2261 vm_paddr_t pa; 2262 uint32_t flags; 2263 int error, su, sync; 2264 2265 pa = VM_PAGE_TO_PHYS(m); 2266 su = (pmap == kernel_pmap); 2267 sync = 0; 2268 2269 //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " 2270 // "pa=0x%08x prot=0x%08x flags=%#x)\n", 2271 // (u_int32_t)pmap, su, pmap->pm_tid, 2272 // (u_int32_t)m, va, pa, prot, flags); 2273 2274 if (su) { 2275 KASSERT(((va >= virtual_avail) && 2276 (va <= VM_MAX_KERNEL_ADDRESS)), 2277 ("mmu_booke_enter_locked: kernel pmap, non kernel va")); 2278 } else { 2279 KASSERT((va <= VM_MAXUSER_ADDRESS), 2280 ("mmu_booke_enter_locked: user pmap, non user va")); 2281 } 2282 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2283 VM_OBJECT_ASSERT_LOCKED(m->object); 2284 2285 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2286 2287 /* 2288 * If there is an existing mapping, and the physical address has not 2289 * changed, must be protection or wiring change. 2290 */ 2291 if (((pte = pte_find(mmu, pmap, va)) != NULL) && 2292 (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { 2293 2294 /* 2295 * Before actually updating pte->flags we calculate and 2296 * prepare its new value in a helper var. 2297 */ 2298 flags = *pte; 2299 flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); 2300 2301 /* Wiring change, just update stats. */ 2302 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) { 2303 if (!PTE_ISWIRED(pte)) { 2304 flags |= PTE_WIRED; 2305 pmap->pm_stats.wired_count++; 2306 } 2307 } else { 2308 if (PTE_ISWIRED(pte)) { 2309 flags &= ~PTE_WIRED; 2310 pmap->pm_stats.wired_count--; 2311 } 2312 } 2313 2314 if (prot & VM_PROT_WRITE) { 2315 /* Add write permissions. */ 2316 flags |= PTE_SW; 2317 if (!su) 2318 flags |= PTE_UW; 2319 2320 if ((flags & PTE_MANAGED) != 0) 2321 vm_page_aflag_set(m, PGA_WRITEABLE); 2322 } else { 2323 /* Handle modified pages, sense modify status. */ 2324 2325 /* 2326 * The PTE_MODIFIED flag could be set by underlying 2327 * TLB misses since we last read it (above), possibly 2328 * other CPUs could update it so we check in the PTE 2329 * directly rather than rely on that saved local flags 2330 * copy. 2331 */ 2332 if (PTE_ISMODIFIED(pte)) 2333 vm_page_dirty(m); 2334 } 2335 2336 if (prot & VM_PROT_EXECUTE) { 2337 flags |= PTE_SX; 2338 if (!su) 2339 flags |= PTE_UX; 2340 2341 /* 2342 * Check existing flags for execute permissions: if we 2343 * are turning execute permissions on, icache should 2344 * be flushed. 2345 */ 2346 if ((*pte & (PTE_UX | PTE_SX)) == 0) 2347 sync++; 2348 } 2349 2350 flags &= ~PTE_REFERENCED; 2351 2352 /* 2353 * The new flags value is all calculated -- only now actually 2354 * update the PTE. 2355 */ 2356 mtx_lock_spin(&tlbivax_mutex); 2357 tlb_miss_lock(); 2358 2359 tlb0_flush_entry(va); 2360 *pte &= ~PTE_FLAGS_MASK; 2361 *pte |= flags; 2362 2363 tlb_miss_unlock(); 2364 mtx_unlock_spin(&tlbivax_mutex); 2365 2366 } else { 2367 /* 2368 * If there is an existing mapping, but it's for a different 2369 * physical address, pte_enter() will delete the old mapping. 2370 */ 2371 //if ((pte != NULL) && PTE_ISVALID(pte)) 2372 // debugf("mmu_booke_enter_locked: replace\n"); 2373 //else 2374 // debugf("mmu_booke_enter_locked: new\n"); 2375 2376 /* Now set up the flags and install the new mapping. */ 2377 flags = (PTE_SR | PTE_VALID); 2378 flags |= PTE_M; 2379 2380 if (!su) 2381 flags |= PTE_UR; 2382 2383 if (prot & VM_PROT_WRITE) { 2384 flags |= PTE_SW; 2385 if (!su) 2386 flags |= PTE_UW; 2387 2388 if ((m->oflags & VPO_UNMANAGED) == 0) 2389 vm_page_aflag_set(m, PGA_WRITEABLE); 2390 } 2391 2392 if (prot & VM_PROT_EXECUTE) { 2393 flags |= PTE_SX; 2394 if (!su) 2395 flags |= PTE_UX; 2396 } 2397 2398 /* If its wired update stats. */ 2399 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) 2400 flags |= PTE_WIRED; 2401 2402 error = pte_enter(mmu, pmap, m, va, flags, 2403 (pmap_flags & PMAP_ENTER_NOSLEEP) != 0); 2404 if (error != 0) 2405 return (KERN_RESOURCE_SHORTAGE); 2406 2407 if ((flags & PMAP_ENTER_WIRED) != 0) 2408 pmap->pm_stats.wired_count++; 2409 2410 /* Flush the real memory from the instruction cache. */ 2411 if (prot & VM_PROT_EXECUTE) 2412 sync++; 2413 } 2414 2415 if (sync && (su || pmap == PCPU_GET(curpmap))) { 2416 __syncicache((void *)va, PAGE_SIZE); 2417 sync = 0; 2418 } 2419 2420 return (KERN_SUCCESS); 2421 } 2422 2423 /* 2424 * Maps a sequence of resident pages belonging to the same object. 2425 * The sequence begins with the given page m_start. This page is 2426 * mapped at the given virtual address start. Each subsequent page is 2427 * mapped at a virtual address that is offset from start by the same 2428 * amount as the page is offset from m_start within the object. The 2429 * last page in the sequence is the page with the largest offset from 2430 * m_start that can be mapped at a virtual address less than the given 2431 * virtual address end. Not every virtual page between start and end 2432 * is mapped; only those for which a resident page exists with the 2433 * corresponding offset from m_start are mapped. 2434 */ 2435 static void 2436 mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start, 2437 vm_offset_t end, vm_page_t m_start, vm_prot_t prot) 2438 { 2439 vm_page_t m; 2440 vm_pindex_t diff, psize; 2441 2442 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2443 2444 psize = atop(end - start); 2445 m = m_start; 2446 rw_wlock(&pvh_global_lock); 2447 PMAP_LOCK(pmap); 2448 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2449 mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m, 2450 prot & (VM_PROT_READ | VM_PROT_EXECUTE), 2451 PMAP_ENTER_NOSLEEP, 0); 2452 m = TAILQ_NEXT(m, listq); 2453 } 2454 rw_wunlock(&pvh_global_lock); 2455 PMAP_UNLOCK(pmap); 2456 } 2457 2458 static void 2459 mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2460 vm_prot_t prot) 2461 { 2462 2463 rw_wlock(&pvh_global_lock); 2464 PMAP_LOCK(pmap); 2465 mmu_booke_enter_locked(mmu, pmap, va, m, 2466 prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 2467 0); 2468 rw_wunlock(&pvh_global_lock); 2469 PMAP_UNLOCK(pmap); 2470 } 2471 2472 /* 2473 * Remove the given range of addresses from the specified map. 2474 * 2475 * It is assumed that the start and end are properly rounded to the page size. 2476 */ 2477 static void 2478 mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva) 2479 { 2480 pte_t *pte; 2481 uint8_t hold_flag; 2482 2483 int su = (pmap == kernel_pmap); 2484 2485 //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", 2486 // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); 2487 2488 if (su) { 2489 KASSERT(((va >= virtual_avail) && 2490 (va <= VM_MAX_KERNEL_ADDRESS)), 2491 ("mmu_booke_remove: kernel pmap, non kernel va")); 2492 } else { 2493 KASSERT((va <= VM_MAXUSER_ADDRESS), 2494 ("mmu_booke_remove: user pmap, non user va")); 2495 } 2496 2497 if (PMAP_REMOVE_DONE(pmap)) { 2498 //debugf("mmu_booke_remove: e (empty)\n"); 2499 return; 2500 } 2501 2502 hold_flag = PTBL_HOLD_FLAG(pmap); 2503 //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); 2504 2505 rw_wlock(&pvh_global_lock); 2506 PMAP_LOCK(pmap); 2507 for (; va < endva; va += PAGE_SIZE) { 2508 pte = pte_find(mmu, pmap, va); 2509 if ((pte != NULL) && PTE_ISVALID(pte)) 2510 pte_remove(mmu, pmap, va, hold_flag); 2511 } 2512 PMAP_UNLOCK(pmap); 2513 rw_wunlock(&pvh_global_lock); 2514 2515 //debugf("mmu_booke_remove: e\n"); 2516 } 2517 2518 /* 2519 * Remove physical page from all pmaps in which it resides. 2520 */ 2521 static void 2522 mmu_booke_remove_all(mmu_t mmu, vm_page_t m) 2523 { 2524 pv_entry_t pv, pvn; 2525 uint8_t hold_flag; 2526 2527 rw_wlock(&pvh_global_lock); 2528 for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) { 2529 pvn = TAILQ_NEXT(pv, pv_link); 2530 2531 PMAP_LOCK(pv->pv_pmap); 2532 hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); 2533 pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag); 2534 PMAP_UNLOCK(pv->pv_pmap); 2535 } 2536 vm_page_aflag_clear(m, PGA_WRITEABLE); 2537 rw_wunlock(&pvh_global_lock); 2538 } 2539 2540 /* 2541 * Map a range of physical addresses into kernel virtual address space. 2542 */ 2543 static vm_offset_t 2544 mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, 2545 vm_paddr_t pa_end, int prot) 2546 { 2547 vm_offset_t sva = *virt; 2548 vm_offset_t va = sva; 2549 2550 //debugf("mmu_booke_map: s (sva = 0x%08x pa_start = 0x%08x pa_end = 0x%08x)\n", 2551 // sva, pa_start, pa_end); 2552 2553 while (pa_start < pa_end) { 2554 mmu_booke_kenter(mmu, va, pa_start); 2555 va += PAGE_SIZE; 2556 pa_start += PAGE_SIZE; 2557 } 2558 *virt = va; 2559 2560 //debugf("mmu_booke_map: e (va = 0x%08x)\n", va); 2561 return (sva); 2562 } 2563 2564 /* 2565 * The pmap must be activated before it's address space can be accessed in any 2566 * way. 2567 */ 2568 static void 2569 mmu_booke_activate(mmu_t mmu, struct thread *td) 2570 { 2571 pmap_t pmap; 2572 u_int cpuid; 2573 2574 pmap = &td->td_proc->p_vmspace->vm_pmap; 2575 2576 CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX")", 2577 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2578 2579 KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); 2580 2581 sched_pin(); 2582 2583 cpuid = PCPU_GET(cpuid); 2584 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 2585 PCPU_SET(curpmap, pmap); 2586 2587 if (pmap->pm_tid[cpuid] == TID_NONE) 2588 tid_alloc(pmap); 2589 2590 /* Load PID0 register with pmap tid value. */ 2591 mtspr(SPR_PID0, pmap->pm_tid[cpuid]); 2592 __asm __volatile("isync"); 2593 2594 mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0); 2595 2596 sched_unpin(); 2597 2598 CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__, 2599 pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm); 2600 } 2601 2602 /* 2603 * Deactivate the specified process's address space. 2604 */ 2605 static void 2606 mmu_booke_deactivate(mmu_t mmu, struct thread *td) 2607 { 2608 pmap_t pmap; 2609 2610 pmap = &td->td_proc->p_vmspace->vm_pmap; 2611 2612 CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX, 2613 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2614 2615 td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0); 2616 2617 CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active); 2618 PCPU_SET(curpmap, NULL); 2619 } 2620 2621 /* 2622 * Copy the range specified by src_addr/len 2623 * from the source map to the range dst_addr/len 2624 * in the destination map. 2625 * 2626 * This routine is only advisory and need not do anything. 2627 */ 2628 static void 2629 mmu_booke_copy(mmu_t mmu, pmap_t dst_pmap, pmap_t src_pmap, 2630 vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) 2631 { 2632 2633 } 2634 2635 /* 2636 * Set the physical protection on the specified range of this map as requested. 2637 */ 2638 static void 2639 mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva, 2640 vm_prot_t prot) 2641 { 2642 vm_offset_t va; 2643 vm_page_t m; 2644 pte_t *pte; 2645 2646 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2647 mmu_booke_remove(mmu, pmap, sva, eva); 2648 return; 2649 } 2650 2651 if (prot & VM_PROT_WRITE) 2652 return; 2653 2654 PMAP_LOCK(pmap); 2655 for (va = sva; va < eva; va += PAGE_SIZE) { 2656 if ((pte = pte_find(mmu, pmap, va)) != NULL) { 2657 if (PTE_ISVALID(pte)) { 2658 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2659 2660 mtx_lock_spin(&tlbivax_mutex); 2661 tlb_miss_lock(); 2662 2663 /* Handle modified pages. */ 2664 if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) 2665 vm_page_dirty(m); 2666 2667 tlb0_flush_entry(va); 2668 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2669 2670 tlb_miss_unlock(); 2671 mtx_unlock_spin(&tlbivax_mutex); 2672 } 2673 } 2674 } 2675 PMAP_UNLOCK(pmap); 2676 } 2677 2678 /* 2679 * Clear the write and modified bits in each of the given page's mappings. 2680 */ 2681 static void 2682 mmu_booke_remove_write(mmu_t mmu, vm_page_t m) 2683 { 2684 pv_entry_t pv; 2685 pte_t *pte; 2686 2687 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2688 ("mmu_booke_remove_write: page %p is not managed", m)); 2689 2690 /* 2691 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2692 * set by another thread while the object is locked. Thus, 2693 * if PGA_WRITEABLE is clear, no page table entries need updating. 2694 */ 2695 VM_OBJECT_ASSERT_WLOCKED(m->object); 2696 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2697 return; 2698 rw_wlock(&pvh_global_lock); 2699 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2700 PMAP_LOCK(pv->pv_pmap); 2701 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { 2702 if (PTE_ISVALID(pte)) { 2703 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2704 2705 mtx_lock_spin(&tlbivax_mutex); 2706 tlb_miss_lock(); 2707 2708 /* Handle modified pages. */ 2709 if (PTE_ISMODIFIED(pte)) 2710 vm_page_dirty(m); 2711 2712 /* Flush mapping from TLB0. */ 2713 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2714 2715 tlb_miss_unlock(); 2716 mtx_unlock_spin(&tlbivax_mutex); 2717 } 2718 } 2719 PMAP_UNLOCK(pv->pv_pmap); 2720 } 2721 vm_page_aflag_clear(m, PGA_WRITEABLE); 2722 rw_wunlock(&pvh_global_lock); 2723 } 2724 2725 static void 2726 mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2727 { 2728 pte_t *pte; 2729 vm_paddr_t pa = 0; 2730 int sync_sz, valid; 2731 #ifndef __powerpc64__ 2732 pmap_t pmap; 2733 vm_page_t m; 2734 vm_offset_t addr; 2735 int active; 2736 #endif 2737 2738 #ifndef __powerpc64__ 2739 rw_wlock(&pvh_global_lock); 2740 pmap = PCPU_GET(curpmap); 2741 active = (pm == kernel_pmap || pm == pmap) ? 1 : 0; 2742 #endif 2743 while (sz > 0) { 2744 PMAP_LOCK(pm); 2745 pte = pte_find(mmu, pm, va); 2746 valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; 2747 if (valid) 2748 pa = PTE_PA(pte); 2749 PMAP_UNLOCK(pm); 2750 sync_sz = PAGE_SIZE - (va & PAGE_MASK); 2751 sync_sz = min(sync_sz, sz); 2752 if (valid) { 2753 #ifdef __powerpc64__ 2754 pa += (va & PAGE_MASK); 2755 __syncicache((void *)PHYS_TO_DMAP(pa), sync_sz); 2756 #else 2757 if (!active) { 2758 /* Create a mapping in the active pmap. */ 2759 addr = 0; 2760 m = PHYS_TO_VM_PAGE(pa); 2761 PMAP_LOCK(pmap); 2762 pte_enter(mmu, pmap, m, addr, 2763 PTE_SR | PTE_VALID, FALSE); 2764 addr += (va & PAGE_MASK); 2765 __syncicache((void *)addr, sync_sz); 2766 pte_remove(mmu, pmap, addr, PTBL_UNHOLD); 2767 PMAP_UNLOCK(pmap); 2768 } else 2769 __syncicache((void *)va, sync_sz); 2770 #endif 2771 } 2772 va += sync_sz; 2773 sz -= sync_sz; 2774 } 2775 #ifndef __powerpc64__ 2776 rw_wunlock(&pvh_global_lock); 2777 #endif 2778 } 2779 2780 /* 2781 * Atomically extract and hold the physical page with the given 2782 * pmap and virtual address pair if that mapping permits the given 2783 * protection. 2784 */ 2785 static vm_page_t 2786 mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, 2787 vm_prot_t prot) 2788 { 2789 pte_t *pte; 2790 vm_page_t m; 2791 uint32_t pte_wbit; 2792 vm_paddr_t pa; 2793 2794 m = NULL; 2795 pa = 0; 2796 PMAP_LOCK(pmap); 2797 retry: 2798 pte = pte_find(mmu, pmap, va); 2799 if ((pte != NULL) && PTE_ISVALID(pte)) { 2800 if (pmap == kernel_pmap) 2801 pte_wbit = PTE_SW; 2802 else 2803 pte_wbit = PTE_UW; 2804 2805 if ((*pte & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { 2806 if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa)) 2807 goto retry; 2808 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2809 m->wire_count++; 2810 } 2811 } 2812 2813 PA_UNLOCK_COND(pa); 2814 PMAP_UNLOCK(pmap); 2815 return (m); 2816 } 2817 2818 /* 2819 * Initialize a vm_page's machine-dependent fields. 2820 */ 2821 static void 2822 mmu_booke_page_init(mmu_t mmu, vm_page_t m) 2823 { 2824 2825 m->md.pv_tracked = 0; 2826 TAILQ_INIT(&m->md.pv_list); 2827 } 2828 2829 /* 2830 * mmu_booke_zero_page_area zeros the specified hardware page by 2831 * mapping it into virtual memory and using bzero to clear 2832 * its contents. 2833 * 2834 * off and size must reside within a single page. 2835 */ 2836 static void 2837 mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 2838 { 2839 vm_offset_t va; 2840 2841 /* XXX KASSERT off and size are within a single page? */ 2842 2843 #ifdef __powerpc64__ 2844 va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2845 bzero((caddr_t)va + off, size); 2846 #else 2847 mtx_lock(&zero_page_mutex); 2848 va = zero_page_va; 2849 2850 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2851 bzero((caddr_t)va + off, size); 2852 mmu_booke_kremove(mmu, va); 2853 2854 mtx_unlock(&zero_page_mutex); 2855 #endif 2856 } 2857 2858 /* 2859 * mmu_booke_zero_page zeros the specified hardware page. 2860 */ 2861 static void 2862 mmu_booke_zero_page(mmu_t mmu, vm_page_t m) 2863 { 2864 vm_offset_t off, va; 2865 2866 #ifdef __powerpc64__ 2867 va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2868 2869 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 2870 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 2871 #else 2872 va = zero_page_va; 2873 mtx_lock(&zero_page_mutex); 2874 2875 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2876 2877 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 2878 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 2879 2880 mmu_booke_kremove(mmu, va); 2881 2882 mtx_unlock(&zero_page_mutex); 2883 #endif 2884 } 2885 2886 /* 2887 * mmu_booke_copy_page copies the specified (machine independent) page by 2888 * mapping the page into virtual memory and using memcopy to copy the page, 2889 * one machine dependent page at a time. 2890 */ 2891 static void 2892 mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) 2893 { 2894 vm_offset_t sva, dva; 2895 2896 #ifdef __powerpc64__ 2897 sva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(sm)); 2898 dva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dm)); 2899 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); 2900 #else 2901 sva = copy_page_src_va; 2902 dva = copy_page_dst_va; 2903 2904 mtx_lock(©_page_mutex); 2905 mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); 2906 mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); 2907 2908 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); 2909 2910 mmu_booke_kremove(mmu, dva); 2911 mmu_booke_kremove(mmu, sva); 2912 mtx_unlock(©_page_mutex); 2913 #endif 2914 } 2915 2916 static inline void 2917 mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 2918 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 2919 { 2920 void *a_cp, *b_cp; 2921 vm_offset_t a_pg_offset, b_pg_offset; 2922 int cnt; 2923 2924 #ifdef __powerpc64__ 2925 vm_page_t pa, pb; 2926 2927 while (xfersize > 0) { 2928 a_pg_offset = a_offset & PAGE_MASK; 2929 pa = ma[a_offset >> PAGE_SHIFT]; 2930 b_pg_offset = b_offset & PAGE_MASK; 2931 pb = mb[b_offset >> PAGE_SHIFT]; 2932 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2933 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2934 a_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pa)) + 2935 a_pg_offset); 2936 b_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pb)) + 2937 b_pg_offset); 2938 bcopy(a_cp, b_cp, cnt); 2939 a_offset += cnt; 2940 b_offset += cnt; 2941 xfersize -= cnt; 2942 } 2943 #else 2944 mtx_lock(©_page_mutex); 2945 while (xfersize > 0) { 2946 a_pg_offset = a_offset & PAGE_MASK; 2947 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2948 mmu_booke_kenter(mmu, copy_page_src_va, 2949 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); 2950 a_cp = (char *)copy_page_src_va + a_pg_offset; 2951 b_pg_offset = b_offset & PAGE_MASK; 2952 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2953 mmu_booke_kenter(mmu, copy_page_dst_va, 2954 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); 2955 b_cp = (char *)copy_page_dst_va + b_pg_offset; 2956 bcopy(a_cp, b_cp, cnt); 2957 mmu_booke_kremove(mmu, copy_page_dst_va); 2958 mmu_booke_kremove(mmu, copy_page_src_va); 2959 a_offset += cnt; 2960 b_offset += cnt; 2961 xfersize -= cnt; 2962 } 2963 mtx_unlock(©_page_mutex); 2964 #endif 2965 } 2966 2967 static vm_offset_t 2968 mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m) 2969 { 2970 #ifdef __powerpc64__ 2971 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 2972 #else 2973 vm_paddr_t paddr; 2974 vm_offset_t qaddr; 2975 uint32_t flags; 2976 pte_t *pte; 2977 2978 paddr = VM_PAGE_TO_PHYS(m); 2979 2980 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 2981 flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; 2982 flags |= PTE_PS_4KB; 2983 2984 critical_enter(); 2985 qaddr = PCPU_GET(qmap_addr); 2986 2987 pte = pte_find(mmu, kernel_pmap, qaddr); 2988 2989 KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy")); 2990 2991 /* 2992 * XXX: tlbivax is broadcast to other cores, but qaddr should 2993 * not be present in other TLBs. Is there a better instruction 2994 * sequence to use? Or just forget it & use mmu_booke_kenter()... 2995 */ 2996 __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK)); 2997 __asm __volatile("isync; msync"); 2998 2999 *pte = PTE_RPN_FROM_PA(paddr) | flags; 3000 3001 /* Flush the real memory from the instruction cache. */ 3002 if ((flags & (PTE_I | PTE_G)) == 0) 3003 __syncicache((void *)qaddr, PAGE_SIZE); 3004 3005 return (qaddr); 3006 #endif 3007 } 3008 3009 static void 3010 mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr) 3011 { 3012 #ifndef __powerpc64__ 3013 pte_t *pte; 3014 3015 pte = pte_find(mmu, kernel_pmap, addr); 3016 3017 KASSERT(PCPU_GET(qmap_addr) == addr, 3018 ("mmu_booke_quick_remove_page: invalid address")); 3019 KASSERT(*pte != 0, 3020 ("mmu_booke_quick_remove_page: PTE not in use")); 3021 3022 *pte = 0; 3023 critical_exit(); 3024 #endif 3025 } 3026 3027 /* 3028 * Return whether or not the specified physical page was modified 3029 * in any of physical maps. 3030 */ 3031 static boolean_t 3032 mmu_booke_is_modified(mmu_t mmu, vm_page_t m) 3033 { 3034 pte_t *pte; 3035 pv_entry_t pv; 3036 boolean_t rv; 3037 3038 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3039 ("mmu_booke_is_modified: page %p is not managed", m)); 3040 rv = FALSE; 3041 3042 /* 3043 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3044 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3045 * is clear, no PTEs can be modified. 3046 */ 3047 VM_OBJECT_ASSERT_WLOCKED(m->object); 3048 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3049 return (rv); 3050 rw_wlock(&pvh_global_lock); 3051 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3052 PMAP_LOCK(pv->pv_pmap); 3053 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3054 PTE_ISVALID(pte)) { 3055 if (PTE_ISMODIFIED(pte)) 3056 rv = TRUE; 3057 } 3058 PMAP_UNLOCK(pv->pv_pmap); 3059 if (rv) 3060 break; 3061 } 3062 rw_wunlock(&pvh_global_lock); 3063 return (rv); 3064 } 3065 3066 /* 3067 * Return whether or not the specified virtual address is eligible 3068 * for prefault. 3069 */ 3070 static boolean_t 3071 mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr) 3072 { 3073 3074 return (FALSE); 3075 } 3076 3077 /* 3078 * Return whether or not the specified physical page was referenced 3079 * in any physical maps. 3080 */ 3081 static boolean_t 3082 mmu_booke_is_referenced(mmu_t mmu, vm_page_t m) 3083 { 3084 pte_t *pte; 3085 pv_entry_t pv; 3086 boolean_t rv; 3087 3088 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3089 ("mmu_booke_is_referenced: page %p is not managed", m)); 3090 rv = FALSE; 3091 rw_wlock(&pvh_global_lock); 3092 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3093 PMAP_LOCK(pv->pv_pmap); 3094 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3095 PTE_ISVALID(pte)) { 3096 if (PTE_ISREFERENCED(pte)) 3097 rv = TRUE; 3098 } 3099 PMAP_UNLOCK(pv->pv_pmap); 3100 if (rv) 3101 break; 3102 } 3103 rw_wunlock(&pvh_global_lock); 3104 return (rv); 3105 } 3106 3107 /* 3108 * Clear the modify bits on the specified physical page. 3109 */ 3110 static void 3111 mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) 3112 { 3113 pte_t *pte; 3114 pv_entry_t pv; 3115 3116 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3117 ("mmu_booke_clear_modify: page %p is not managed", m)); 3118 VM_OBJECT_ASSERT_WLOCKED(m->object); 3119 KASSERT(!vm_page_xbusied(m), 3120 ("mmu_booke_clear_modify: page %p is exclusive busied", m)); 3121 3122 /* 3123 * If the page is not PG_AWRITEABLE, then no PTEs can be modified. 3124 * If the object containing the page is locked and the page is not 3125 * exclusive busied, then PG_AWRITEABLE cannot be concurrently set. 3126 */ 3127 if ((m->aflags & PGA_WRITEABLE) == 0) 3128 return; 3129 rw_wlock(&pvh_global_lock); 3130 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3131 PMAP_LOCK(pv->pv_pmap); 3132 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3133 PTE_ISVALID(pte)) { 3134 mtx_lock_spin(&tlbivax_mutex); 3135 tlb_miss_lock(); 3136 3137 if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) { 3138 tlb0_flush_entry(pv->pv_va); 3139 *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | 3140 PTE_REFERENCED); 3141 } 3142 3143 tlb_miss_unlock(); 3144 mtx_unlock_spin(&tlbivax_mutex); 3145 } 3146 PMAP_UNLOCK(pv->pv_pmap); 3147 } 3148 rw_wunlock(&pvh_global_lock); 3149 } 3150 3151 /* 3152 * Return a count of reference bits for a page, clearing those bits. 3153 * It is not necessary for every reference bit to be cleared, but it 3154 * is necessary that 0 only be returned when there are truly no 3155 * reference bits set. 3156 * 3157 * As an optimization, update the page's dirty field if a modified bit is 3158 * found while counting reference bits. This opportunistic update can be 3159 * performed at low cost and can eliminate the need for some future calls 3160 * to pmap_is_modified(). However, since this function stops after 3161 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 3162 * dirty pages. Those dirty pages will only be detected by a future call 3163 * to pmap_is_modified(). 3164 */ 3165 static int 3166 mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m) 3167 { 3168 pte_t *pte; 3169 pv_entry_t pv; 3170 int count; 3171 3172 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3173 ("mmu_booke_ts_referenced: page %p is not managed", m)); 3174 count = 0; 3175 rw_wlock(&pvh_global_lock); 3176 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3177 PMAP_LOCK(pv->pv_pmap); 3178 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3179 PTE_ISVALID(pte)) { 3180 if (PTE_ISMODIFIED(pte)) 3181 vm_page_dirty(m); 3182 if (PTE_ISREFERENCED(pte)) { 3183 mtx_lock_spin(&tlbivax_mutex); 3184 tlb_miss_lock(); 3185 3186 tlb0_flush_entry(pv->pv_va); 3187 *pte &= ~PTE_REFERENCED; 3188 3189 tlb_miss_unlock(); 3190 mtx_unlock_spin(&tlbivax_mutex); 3191 3192 if (++count >= PMAP_TS_REFERENCED_MAX) { 3193 PMAP_UNLOCK(pv->pv_pmap); 3194 break; 3195 } 3196 } 3197 } 3198 PMAP_UNLOCK(pv->pv_pmap); 3199 } 3200 rw_wunlock(&pvh_global_lock); 3201 return (count); 3202 } 3203 3204 /* 3205 * Clear the wired attribute from the mappings for the specified range of 3206 * addresses in the given pmap. Every valid mapping within that range must 3207 * have the wired attribute set. In contrast, invalid mappings cannot have 3208 * the wired attribute set, so they are ignored. 3209 * 3210 * The wired attribute of the page table entry is not a hardware feature, so 3211 * there is no need to invalidate any TLB entries. 3212 */ 3213 static void 3214 mmu_booke_unwire(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3215 { 3216 vm_offset_t va; 3217 pte_t *pte; 3218 3219 PMAP_LOCK(pmap); 3220 for (va = sva; va < eva; va += PAGE_SIZE) { 3221 if ((pte = pte_find(mmu, pmap, va)) != NULL && 3222 PTE_ISVALID(pte)) { 3223 if (!PTE_ISWIRED(pte)) 3224 panic("mmu_booke_unwire: pte %p isn't wired", 3225 pte); 3226 *pte &= ~PTE_WIRED; 3227 pmap->pm_stats.wired_count--; 3228 } 3229 } 3230 PMAP_UNLOCK(pmap); 3231 3232 } 3233 3234 /* 3235 * Return true if the pmap's pv is one of the first 16 pvs linked to from this 3236 * page. This count may be changed upwards or downwards in the future; it is 3237 * only necessary that true be returned for a small subset of pmaps for proper 3238 * page aging. 3239 */ 3240 static boolean_t 3241 mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 3242 { 3243 pv_entry_t pv; 3244 int loops; 3245 boolean_t rv; 3246 3247 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3248 ("mmu_booke_page_exists_quick: page %p is not managed", m)); 3249 loops = 0; 3250 rv = FALSE; 3251 rw_wlock(&pvh_global_lock); 3252 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3253 if (pv->pv_pmap == pmap) { 3254 rv = TRUE; 3255 break; 3256 } 3257 if (++loops >= 16) 3258 break; 3259 } 3260 rw_wunlock(&pvh_global_lock); 3261 return (rv); 3262 } 3263 3264 /* 3265 * Return the number of managed mappings to the given physical page that are 3266 * wired. 3267 */ 3268 static int 3269 mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m) 3270 { 3271 pv_entry_t pv; 3272 pte_t *pte; 3273 int count = 0; 3274 3275 if ((m->oflags & VPO_UNMANAGED) != 0) 3276 return (count); 3277 rw_wlock(&pvh_global_lock); 3278 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3279 PMAP_LOCK(pv->pv_pmap); 3280 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) 3281 if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) 3282 count++; 3283 PMAP_UNLOCK(pv->pv_pmap); 3284 } 3285 rw_wunlock(&pvh_global_lock); 3286 return (count); 3287 } 3288 3289 static int 3290 mmu_booke_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3291 { 3292 int i; 3293 vm_offset_t va; 3294 3295 /* 3296 * This currently does not work for entries that 3297 * overlap TLB1 entries. 3298 */ 3299 for (i = 0; i < TLB1_ENTRIES; i ++) { 3300 if (tlb1_iomapped(i, pa, size, &va) == 0) 3301 return (0); 3302 } 3303 3304 return (EFAULT); 3305 } 3306 3307 void 3308 mmu_booke_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) 3309 { 3310 vm_paddr_t ppa; 3311 vm_offset_t ofs; 3312 vm_size_t gran; 3313 3314 /* Minidumps are based on virtual memory addresses. */ 3315 if (do_minidump) { 3316 *va = (void *)(vm_offset_t)pa; 3317 return; 3318 } 3319 3320 /* Raw physical memory dumps don't have a virtual address. */ 3321 /* We always map a 256MB page at 256M. */ 3322 gran = 256 * 1024 * 1024; 3323 ppa = rounddown2(pa, gran); 3324 ofs = pa - ppa; 3325 *va = (void *)gran; 3326 tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO); 3327 3328 if (sz > (gran - ofs)) 3329 tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran, 3330 _TLB_ENTRY_IO); 3331 } 3332 3333 void 3334 mmu_booke_dumpsys_unmap(mmu_t mmu, vm_paddr_t pa, size_t sz, void *va) 3335 { 3336 vm_paddr_t ppa; 3337 vm_offset_t ofs; 3338 vm_size_t gran; 3339 tlb_entry_t e; 3340 int i; 3341 3342 /* Minidumps are based on virtual memory addresses. */ 3343 /* Nothing to do... */ 3344 if (do_minidump) 3345 return; 3346 3347 for (i = 0; i < TLB1_ENTRIES; i++) { 3348 tlb1_read_entry(&e, i); 3349 if (!(e.mas1 & MAS1_VALID)) 3350 break; 3351 } 3352 3353 /* Raw physical memory dumps don't have a virtual address. */ 3354 i--; 3355 e.mas1 = 0; 3356 e.mas2 = 0; 3357 e.mas3 = 0; 3358 tlb1_write_entry(&e, i); 3359 3360 gran = 256 * 1024 * 1024; 3361 ppa = rounddown2(pa, gran); 3362 ofs = pa - ppa; 3363 if (sz > (gran - ofs)) { 3364 i--; 3365 e.mas1 = 0; 3366 e.mas2 = 0; 3367 e.mas3 = 0; 3368 tlb1_write_entry(&e, i); 3369 } 3370 } 3371 3372 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; 3373 3374 void 3375 mmu_booke_scan_init(mmu_t mmu) 3376 { 3377 vm_offset_t va; 3378 pte_t *pte; 3379 int i; 3380 3381 if (!do_minidump) { 3382 /* Initialize phys. segments for dumpsys(). */ 3383 memset(&dump_map, 0, sizeof(dump_map)); 3384 mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, 3385 &availmem_regions_sz); 3386 for (i = 0; i < physmem_regions_sz; i++) { 3387 dump_map[i].pa_start = physmem_regions[i].mr_start; 3388 dump_map[i].pa_size = physmem_regions[i].mr_size; 3389 } 3390 return; 3391 } 3392 3393 /* Virtual segments for minidumps: */ 3394 memset(&dump_map, 0, sizeof(dump_map)); 3395 3396 /* 1st: kernel .data and .bss. */ 3397 dump_map[0].pa_start = trunc_page((uintptr_t)_etext); 3398 dump_map[0].pa_size = 3399 round_page((uintptr_t)_end) - dump_map[0].pa_start; 3400 3401 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ 3402 dump_map[1].pa_start = data_start; 3403 dump_map[1].pa_size = data_end - data_start; 3404 3405 /* 3rd: kernel VM. */ 3406 va = dump_map[1].pa_start + dump_map[1].pa_size; 3407 /* Find start of next chunk (from va). */ 3408 while (va < virtual_end) { 3409 /* Don't dump the buffer cache. */ 3410 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { 3411 va = kmi.buffer_eva; 3412 continue; 3413 } 3414 pte = pte_find(mmu, kernel_pmap, va); 3415 if (pte != NULL && PTE_ISVALID(pte)) 3416 break; 3417 va += PAGE_SIZE; 3418 } 3419 if (va < virtual_end) { 3420 dump_map[2].pa_start = va; 3421 va += PAGE_SIZE; 3422 /* Find last page in chunk. */ 3423 while (va < virtual_end) { 3424 /* Don't run into the buffer cache. */ 3425 if (va == kmi.buffer_sva) 3426 break; 3427 pte = pte_find(mmu, kernel_pmap, va); 3428 if (pte == NULL || !PTE_ISVALID(pte)) 3429 break; 3430 va += PAGE_SIZE; 3431 } 3432 dump_map[2].pa_size = va - dump_map[2].pa_start; 3433 } 3434 } 3435 3436 /* 3437 * Map a set of physical memory pages into the kernel virtual address space. 3438 * Return a pointer to where it is mapped. This routine is intended to be used 3439 * for mapping device memory, NOT real memory. 3440 */ 3441 static void * 3442 mmu_booke_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3443 { 3444 3445 return (mmu_booke_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT)); 3446 } 3447 3448 static void * 3449 mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) 3450 { 3451 tlb_entry_t e; 3452 void *res; 3453 uintptr_t va, tmpva; 3454 vm_size_t sz; 3455 int i; 3456 3457 /* 3458 * Check if this is premapped in TLB1. Note: this should probably also 3459 * check whether a sequence of TLB1 entries exist that match the 3460 * requirement, but now only checks the easy case. 3461 */ 3462 for (i = 0; i < TLB1_ENTRIES; i++) { 3463 tlb1_read_entry(&e, i); 3464 if (!(e.mas1 & MAS1_VALID)) 3465 continue; 3466 if (pa >= e.phys && 3467 (pa + size) <= (e.phys + e.size) && 3468 (ma == VM_MEMATTR_DEFAULT || 3469 tlb_calc_wimg(pa, ma) == 3470 (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED)))) 3471 return (void *)(e.virt + 3472 (vm_offset_t)(pa - e.phys)); 3473 } 3474 3475 size = roundup(size, PAGE_SIZE); 3476 3477 /* 3478 * The device mapping area is between VM_MAXUSER_ADDRESS and 3479 * VM_MIN_KERNEL_ADDRESS. This gives 1GB of device addressing. 3480 */ 3481 #ifdef SPARSE_MAPDEV 3482 /* 3483 * With a sparse mapdev, align to the largest starting region. This 3484 * could feasibly be optimized for a 'best-fit' alignment, but that 3485 * calculation could be very costly. 3486 * Align to the smaller of: 3487 * - first set bit in overlap of (pa & size mask) 3488 * - largest size envelope 3489 * 3490 * It's possible the device mapping may start at a PA that's not larger 3491 * than the size mask, so we need to offset in to maximize the TLB entry 3492 * range and minimize the number of used TLB entries. 3493 */ 3494 do { 3495 tmpva = tlb1_map_base; 3496 sz = ffsl(((1 << flsl(size-1)) - 1) & pa); 3497 sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1; 3498 va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa); 3499 #ifdef __powerpc64__ 3500 } while (!atomic_cmpset_long(&tlb1_map_base, tmpva, va + size)); 3501 #else 3502 } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size)); 3503 #endif 3504 #else 3505 #ifdef __powerpc64__ 3506 va = atomic_fetchadd_long(&tlb1_map_base, size); 3507 #else 3508 va = atomic_fetchadd_int(&tlb1_map_base, size); 3509 #endif 3510 #endif 3511 res = (void *)va; 3512 3513 do { 3514 sz = 1 << (ilog2(size) & ~1); 3515 /* Align size to PA */ 3516 if (pa % sz != 0) { 3517 do { 3518 sz >>= 2; 3519 } while (pa % sz != 0); 3520 } 3521 /* Now align from there to VA */ 3522 if (va % sz != 0) { 3523 do { 3524 sz >>= 2; 3525 } while (va % sz != 0); 3526 } 3527 if (bootverbose) 3528 printf("Wiring VA=%lx to PA=%jx (size=%lx)\n", 3529 va, (uintmax_t)pa, sz); 3530 if (tlb1_set_entry(va, pa, sz, 3531 _TLB_ENTRY_SHARED | tlb_calc_wimg(pa, ma)) < 0) 3532 return (NULL); 3533 size -= sz; 3534 pa += sz; 3535 va += sz; 3536 } while (size > 0); 3537 3538 return (res); 3539 } 3540 3541 /* 3542 * 'Unmap' a range mapped by mmu_booke_mapdev(). 3543 */ 3544 static void 3545 mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 3546 { 3547 #ifdef SUPPORTS_SHRINKING_TLB1 3548 vm_offset_t base, offset; 3549 3550 /* 3551 * Unmap only if this is inside kernel virtual space. 3552 */ 3553 if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { 3554 base = trunc_page(va); 3555 offset = va & PAGE_MASK; 3556 size = roundup(offset + size, PAGE_SIZE); 3557 kva_free(base, size); 3558 } 3559 #endif 3560 } 3561 3562 /* 3563 * mmu_booke_object_init_pt preloads the ptes for a given object into the 3564 * specified pmap. This eliminates the blast of soft faults on process startup 3565 * and immediately after an mmap. 3566 */ 3567 static void 3568 mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3569 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 3570 { 3571 3572 VM_OBJECT_ASSERT_WLOCKED(object); 3573 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3574 ("mmu_booke_object_init_pt: non-device object")); 3575 } 3576 3577 /* 3578 * Perform the pmap work for mincore. 3579 */ 3580 static int 3581 mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3582 vm_paddr_t *locked_pa) 3583 { 3584 3585 /* XXX: this should be implemented at some point */ 3586 return (0); 3587 } 3588 3589 static int 3590 mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, vm_size_t sz, 3591 vm_memattr_t mode) 3592 { 3593 vm_offset_t va; 3594 pte_t *pte; 3595 int i, j; 3596 tlb_entry_t e; 3597 3598 /* Check TLB1 mappings */ 3599 for (i = 0; i < TLB1_ENTRIES; i++) { 3600 tlb1_read_entry(&e, i); 3601 if (!(e.mas1 & MAS1_VALID)) 3602 continue; 3603 if (addr >= e.virt && addr < e.virt + e.size) 3604 break; 3605 } 3606 if (i < TLB1_ENTRIES) { 3607 /* Only allow full mappings to be modified for now. */ 3608 /* Validate the range. */ 3609 for (j = i, va = addr; va < addr + sz; va += e.size, j++) { 3610 tlb1_read_entry(&e, j); 3611 if (va != e.virt || (sz - (va - addr) < e.size)) 3612 return (EINVAL); 3613 } 3614 for (va = addr; va < addr + sz; va += e.size, i++) { 3615 tlb1_read_entry(&e, i); 3616 e.mas2 &= ~MAS2_WIMGE_MASK; 3617 e.mas2 |= tlb_calc_wimg(e.phys, mode); 3618 3619 /* 3620 * Write it out to the TLB. Should really re-sync with other 3621 * cores. 3622 */ 3623 tlb1_write_entry(&e, i); 3624 } 3625 return (0); 3626 } 3627 3628 /* Not in TLB1, try through pmap */ 3629 /* First validate the range. */ 3630 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3631 pte = pte_find(mmu, kernel_pmap, va); 3632 if (pte == NULL || !PTE_ISVALID(pte)) 3633 return (EINVAL); 3634 } 3635 3636 mtx_lock_spin(&tlbivax_mutex); 3637 tlb_miss_lock(); 3638 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3639 pte = pte_find(mmu, kernel_pmap, va); 3640 *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT); 3641 *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT; 3642 tlb0_flush_entry(va); 3643 } 3644 tlb_miss_unlock(); 3645 mtx_unlock_spin(&tlbivax_mutex); 3646 3647 return (0); 3648 } 3649 3650 /**************************************************************************/ 3651 /* TID handling */ 3652 /**************************************************************************/ 3653 3654 /* 3655 * Allocate a TID. If necessary, steal one from someone else. 3656 * The new TID is flushed from the TLB before returning. 3657 */ 3658 static tlbtid_t 3659 tid_alloc(pmap_t pmap) 3660 { 3661 tlbtid_t tid; 3662 int thiscpu; 3663 3664 KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); 3665 3666 CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap); 3667 3668 thiscpu = PCPU_GET(cpuid); 3669 3670 tid = PCPU_GET(booke.tid_next); 3671 if (tid > TID_MAX) 3672 tid = TID_MIN; 3673 PCPU_SET(booke.tid_next, tid + 1); 3674 3675 /* If we are stealing TID then clear the relevant pmap's field */ 3676 if (tidbusy[thiscpu][tid] != NULL) { 3677 3678 CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid); 3679 3680 tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE; 3681 3682 /* Flush all entries from TLB0 matching this TID. */ 3683 tid_flush(tid); 3684 } 3685 3686 tidbusy[thiscpu][tid] = pmap; 3687 pmap->pm_tid[thiscpu] = tid; 3688 __asm __volatile("msync; isync"); 3689 3690 CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid, 3691 PCPU_GET(booke.tid_next)); 3692 3693 return (tid); 3694 } 3695 3696 /**************************************************************************/ 3697 /* TLB0 handling */ 3698 /**************************************************************************/ 3699 3700 /* Convert TLB0 va and way number to tlb0[] table index. */ 3701 static inline unsigned int 3702 tlb0_tableidx(vm_offset_t va, unsigned int way) 3703 { 3704 unsigned int idx; 3705 3706 idx = (way * TLB0_ENTRIES_PER_WAY); 3707 idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; 3708 return (idx); 3709 } 3710 3711 /* 3712 * Invalidate TLB0 entry. 3713 */ 3714 static inline void 3715 tlb0_flush_entry(vm_offset_t va) 3716 { 3717 3718 CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va); 3719 3720 mtx_assert(&tlbivax_mutex, MA_OWNED); 3721 3722 __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK)); 3723 __asm __volatile("isync; msync"); 3724 __asm __volatile("tlbsync; msync"); 3725 3726 CTR1(KTR_PMAP, "%s: e", __func__); 3727 } 3728 3729 3730 /**************************************************************************/ 3731 /* TLB1 handling */ 3732 /**************************************************************************/ 3733 3734 /* 3735 * TLB1 mapping notes: 3736 * 3737 * TLB1[0] Kernel text and data. 3738 * TLB1[1-15] Additional kernel text and data mappings (if required), PCI 3739 * windows, other devices mappings. 3740 */ 3741 3742 /* 3743 * Read an entry from given TLB1 slot. 3744 */ 3745 void 3746 tlb1_read_entry(tlb_entry_t *entry, unsigned int slot) 3747 { 3748 register_t msr; 3749 uint32_t mas0; 3750 3751 KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__)); 3752 3753 msr = mfmsr(); 3754 __asm __volatile("wrteei 0"); 3755 3756 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot); 3757 mtspr(SPR_MAS0, mas0); 3758 __asm __volatile("isync; tlbre"); 3759 3760 entry->mas1 = mfspr(SPR_MAS1); 3761 entry->mas2 = mfspr(SPR_MAS2); 3762 entry->mas3 = mfspr(SPR_MAS3); 3763 3764 switch ((mfpvr() >> 16) & 0xFFFF) { 3765 case FSL_E500v2: 3766 case FSL_E500mc: 3767 case FSL_E5500: 3768 case FSL_E6500: 3769 entry->mas7 = mfspr(SPR_MAS7); 3770 break; 3771 default: 3772 entry->mas7 = 0; 3773 break; 3774 } 3775 __asm __volatile("wrtee %0" :: "r"(msr)); 3776 3777 entry->virt = entry->mas2 & MAS2_EPN_MASK; 3778 entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) | 3779 (entry->mas3 & MAS3_RPN); 3780 entry->size = 3781 tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT); 3782 } 3783 3784 struct tlbwrite_args { 3785 tlb_entry_t *e; 3786 unsigned int idx; 3787 }; 3788 3789 static void 3790 tlb1_write_entry_int(void *arg) 3791 { 3792 struct tlbwrite_args *args = arg; 3793 uint32_t mas0; 3794 3795 /* Select entry */ 3796 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(args->idx); 3797 3798 mtspr(SPR_MAS0, mas0); 3799 mtspr(SPR_MAS1, args->e->mas1); 3800 mtspr(SPR_MAS2, args->e->mas2); 3801 mtspr(SPR_MAS3, args->e->mas3); 3802 switch ((mfpvr() >> 16) & 0xFFFF) { 3803 case FSL_E500mc: 3804 case FSL_E5500: 3805 case FSL_E6500: 3806 mtspr(SPR_MAS8, 0); 3807 /* FALLTHROUGH */ 3808 case FSL_E500v2: 3809 mtspr(SPR_MAS7, args->e->mas7); 3810 break; 3811 default: 3812 break; 3813 } 3814 3815 __asm __volatile("isync; tlbwe; isync; msync"); 3816 3817 } 3818 3819 static void 3820 tlb1_write_entry_sync(void *arg) 3821 { 3822 /* Empty synchronization point for smp_rendezvous(). */ 3823 } 3824 3825 /* 3826 * Write given entry to TLB1 hardware. 3827 */ 3828 static void 3829 tlb1_write_entry(tlb_entry_t *e, unsigned int idx) 3830 { 3831 struct tlbwrite_args args; 3832 3833 args.e = e; 3834 args.idx = idx; 3835 3836 #ifdef SMP 3837 if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) { 3838 mb(); 3839 smp_rendezvous(tlb1_write_entry_sync, 3840 tlb1_write_entry_int, 3841 tlb1_write_entry_sync, &args); 3842 } else 3843 #endif 3844 { 3845 register_t msr; 3846 3847 msr = mfmsr(); 3848 __asm __volatile("wrteei 0"); 3849 tlb1_write_entry_int(&args); 3850 __asm __volatile("wrtee %0" :: "r"(msr)); 3851 } 3852 } 3853 3854 /* 3855 * Return the largest uint value log such that 2^log <= num. 3856 */ 3857 static unsigned int 3858 ilog2(unsigned long num) 3859 { 3860 long lz; 3861 3862 #ifdef __powerpc64__ 3863 __asm ("cntlzd %0, %1" : "=r" (lz) : "r" (num)); 3864 return (63 - lz); 3865 #else 3866 __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); 3867 return (31 - lz); 3868 #endif 3869 } 3870 3871 /* 3872 * Convert TLB TSIZE value to mapped region size. 3873 */ 3874 static vm_size_t 3875 tsize2size(unsigned int tsize) 3876 { 3877 3878 /* 3879 * size = 4^tsize KB 3880 * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) 3881 */ 3882 3883 return ((1 << (2 * tsize)) * 1024); 3884 } 3885 3886 /* 3887 * Convert region size (must be power of 4) to TLB TSIZE value. 3888 */ 3889 static unsigned int 3890 size2tsize(vm_size_t size) 3891 { 3892 3893 return (ilog2(size) / 2 - 5); 3894 } 3895 3896 /* 3897 * Register permanent kernel mapping in TLB1. 3898 * 3899 * Entries are created starting from index 0 (current free entry is 3900 * kept in tlb1_idx) and are not supposed to be invalidated. 3901 */ 3902 int 3903 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, 3904 uint32_t flags) 3905 { 3906 tlb_entry_t e; 3907 uint32_t ts, tid; 3908 int tsize, index; 3909 3910 for (index = 0; index < TLB1_ENTRIES; index++) { 3911 tlb1_read_entry(&e, index); 3912 if ((e.mas1 & MAS1_VALID) == 0) 3913 break; 3914 /* Check if we're just updating the flags, and update them. */ 3915 if (e.phys == pa && e.virt == va && e.size == size) { 3916 e.mas2 = (va & MAS2_EPN_MASK) | flags; 3917 tlb1_write_entry(&e, index); 3918 return (0); 3919 } 3920 } 3921 if (index >= TLB1_ENTRIES) { 3922 printf("tlb1_set_entry: TLB1 full!\n"); 3923 return (-1); 3924 } 3925 3926 /* Convert size to TSIZE */ 3927 tsize = size2tsize(size); 3928 3929 tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK; 3930 /* XXX TS is hard coded to 0 for now as we only use single address space */ 3931 ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK; 3932 3933 e.phys = pa; 3934 e.virt = va; 3935 e.size = size; 3936 e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; 3937 e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); 3938 e.mas2 = (va & MAS2_EPN_MASK) | flags; 3939 3940 /* Set supervisor RWX permission bits */ 3941 e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; 3942 e.mas7 = (pa >> 32) & MAS7_RPN; 3943 3944 tlb1_write_entry(&e, index); 3945 3946 /* 3947 * XXX in general TLB1 updates should be propagated between CPUs, 3948 * since current design assumes to have the same TLB1 set-up on all 3949 * cores. 3950 */ 3951 return (0); 3952 } 3953 3954 /* 3955 * Map in contiguous RAM region into the TLB1 using maximum of 3956 * KERNEL_REGION_MAX_TLB_ENTRIES entries. 3957 * 3958 * If necessary round up last entry size and return total size 3959 * used by all allocated entries. 3960 */ 3961 vm_size_t 3962 tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size) 3963 { 3964 vm_size_t pgs[KERNEL_REGION_MAX_TLB_ENTRIES]; 3965 vm_size_t mapped, pgsz, base, mask; 3966 int idx, nents; 3967 3968 /* Round up to the next 1M */ 3969 size = roundup2(size, 1 << 20); 3970 3971 mapped = 0; 3972 idx = 0; 3973 base = va; 3974 pgsz = 64*1024*1024; 3975 while (mapped < size) { 3976 while (mapped < size && idx < KERNEL_REGION_MAX_TLB_ENTRIES) { 3977 while (pgsz > (size - mapped)) 3978 pgsz >>= 2; 3979 pgs[idx++] = pgsz; 3980 mapped += pgsz; 3981 } 3982 3983 /* We under-map. Correct for this. */ 3984 if (mapped < size) { 3985 while (pgs[idx - 1] == pgsz) { 3986 idx--; 3987 mapped -= pgsz; 3988 } 3989 /* XXX We may increase beyond out starting point. */ 3990 pgsz <<= 2; 3991 pgs[idx++] = pgsz; 3992 mapped += pgsz; 3993 } 3994 } 3995 3996 nents = idx; 3997 mask = pgs[0] - 1; 3998 /* Align address to the boundary */ 3999 if (va & mask) { 4000 va = (va + mask) & ~mask; 4001 pa = (pa + mask) & ~mask; 4002 } 4003 4004 for (idx = 0; idx < nents; idx++) { 4005 pgsz = pgs[idx]; 4006 debugf("%u: %llx -> %jx, size=%jx\n", idx, pa, 4007 (uintmax_t)va, (uintmax_t)pgsz); 4008 tlb1_set_entry(va, pa, pgsz, 4009 _TLB_ENTRY_SHARED | _TLB_ENTRY_MEM); 4010 pa += pgsz; 4011 va += pgsz; 4012 } 4013 4014 mapped = (va - base); 4015 if (bootverbose) 4016 printf("mapped size 0x%"PRIxPTR" (wasted space 0x%"PRIxPTR")\n", 4017 mapped, mapped - size); 4018 return (mapped); 4019 } 4020 4021 /* 4022 * TLB1 initialization routine, to be called after the very first 4023 * assembler level setup done in locore.S. 4024 */ 4025 void 4026 tlb1_init() 4027 { 4028 uint32_t mas0, mas1, mas2, mas3, mas7; 4029 uint32_t tsz; 4030 4031 tlb1_get_tlbconf(); 4032 4033 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0); 4034 mtspr(SPR_MAS0, mas0); 4035 __asm __volatile("isync; tlbre"); 4036 4037 mas1 = mfspr(SPR_MAS1); 4038 mas2 = mfspr(SPR_MAS2); 4039 mas3 = mfspr(SPR_MAS3); 4040 mas7 = mfspr(SPR_MAS7); 4041 4042 kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | 4043 (mas3 & MAS3_RPN); 4044 4045 tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4046 kernsize += (tsz > 0) ? tsize2size(tsz) : 0; 4047 4048 /* Setup TLB miss defaults */ 4049 set_mas4_defaults(); 4050 } 4051 4052 /* 4053 * pmap_early_io_unmap() should be used in short conjunction with 4054 * pmap_early_io_map(), as in the following snippet: 4055 * 4056 * x = pmap_early_io_map(...); 4057 * <do something with x> 4058 * pmap_early_io_unmap(x, size); 4059 * 4060 * And avoiding more allocations between. 4061 */ 4062 void 4063 pmap_early_io_unmap(vm_offset_t va, vm_size_t size) 4064 { 4065 int i; 4066 tlb_entry_t e; 4067 vm_size_t isize; 4068 4069 size = roundup(size, PAGE_SIZE); 4070 isize = size; 4071 for (i = 0; i < TLB1_ENTRIES && size > 0; i++) { 4072 tlb1_read_entry(&e, i); 4073 if (!(e.mas1 & MAS1_VALID)) 4074 continue; 4075 if (va <= e.virt && (va + isize) >= (e.virt + e.size)) { 4076 size -= e.size; 4077 e.mas1 &= ~MAS1_VALID; 4078 tlb1_write_entry(&e, i); 4079 } 4080 } 4081 if (tlb1_map_base == va + isize) 4082 tlb1_map_base -= isize; 4083 } 4084 4085 vm_offset_t 4086 pmap_early_io_map(vm_paddr_t pa, vm_size_t size) 4087 { 4088 vm_paddr_t pa_base; 4089 vm_offset_t va, sz; 4090 int i; 4091 tlb_entry_t e; 4092 4093 KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!")); 4094 4095 for (i = 0; i < TLB1_ENTRIES; i++) { 4096 tlb1_read_entry(&e, i); 4097 if (!(e.mas1 & MAS1_VALID)) 4098 continue; 4099 if (pa >= e.phys && (pa + size) <= 4100 (e.phys + e.size)) 4101 return (e.virt + (pa - e.phys)); 4102 } 4103 4104 pa_base = rounddown(pa, PAGE_SIZE); 4105 size = roundup(size + (pa - pa_base), PAGE_SIZE); 4106 tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1)); 4107 va = tlb1_map_base + (pa - pa_base); 4108 4109 do { 4110 sz = 1 << (ilog2(size) & ~1); 4111 tlb1_set_entry(tlb1_map_base, pa_base, sz, 4112 _TLB_ENTRY_SHARED | _TLB_ENTRY_IO); 4113 size -= sz; 4114 pa_base += sz; 4115 tlb1_map_base += sz; 4116 } while (size > 0); 4117 4118 return (va); 4119 } 4120 4121 void 4122 pmap_track_page(pmap_t pmap, vm_offset_t va) 4123 { 4124 vm_paddr_t pa; 4125 vm_page_t page; 4126 struct pv_entry *pve; 4127 4128 va = trunc_page(va); 4129 pa = pmap_kextract(va); 4130 page = PHYS_TO_VM_PAGE(pa); 4131 4132 rw_wlock(&pvh_global_lock); 4133 PMAP_LOCK(pmap); 4134 4135 TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) { 4136 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 4137 goto out; 4138 } 4139 } 4140 page->md.pv_tracked = true; 4141 pv_insert(pmap, va, page); 4142 out: 4143 PMAP_UNLOCK(pmap); 4144 rw_wunlock(&pvh_global_lock); 4145 } 4146 4147 4148 /* 4149 * Setup MAS4 defaults. 4150 * These values are loaded to MAS0-2 on a TLB miss. 4151 */ 4152 static void 4153 set_mas4_defaults(void) 4154 { 4155 uint32_t mas4; 4156 4157 /* Defaults: TLB0, PID0, TSIZED=4K */ 4158 mas4 = MAS4_TLBSELD0; 4159 mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; 4160 #ifdef SMP 4161 mas4 |= MAS4_MD; 4162 #endif 4163 mtspr(SPR_MAS4, mas4); 4164 __asm __volatile("isync"); 4165 } 4166 4167 4168 /* 4169 * Return 0 if the physical IO range is encompassed by one of the 4170 * the TLB1 entries, otherwise return related error code. 4171 */ 4172 static int 4173 tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) 4174 { 4175 uint32_t prot; 4176 vm_paddr_t pa_start; 4177 vm_paddr_t pa_end; 4178 unsigned int entry_tsize; 4179 vm_size_t entry_size; 4180 tlb_entry_t e; 4181 4182 *va = (vm_offset_t)NULL; 4183 4184 tlb1_read_entry(&e, i); 4185 /* Skip invalid entries */ 4186 if (!(e.mas1 & MAS1_VALID)) 4187 return (EINVAL); 4188 4189 /* 4190 * The entry must be cache-inhibited, guarded, and r/w 4191 * so it can function as an i/o page 4192 */ 4193 prot = e.mas2 & (MAS2_I | MAS2_G); 4194 if (prot != (MAS2_I | MAS2_G)) 4195 return (EPERM); 4196 4197 prot = e.mas3 & (MAS3_SR | MAS3_SW); 4198 if (prot != (MAS3_SR | MAS3_SW)) 4199 return (EPERM); 4200 4201 /* The address should be within the entry range. */ 4202 entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4203 KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); 4204 4205 entry_size = tsize2size(entry_tsize); 4206 pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | 4207 (e.mas3 & MAS3_RPN); 4208 pa_end = pa_start + entry_size; 4209 4210 if ((pa < pa_start) || ((pa + size) > pa_end)) 4211 return (ERANGE); 4212 4213 /* Return virtual address of this mapping. */ 4214 *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start); 4215 return (0); 4216 } 4217 4218 /* 4219 * Invalidate all TLB0 entries which match the given TID. Note this is 4220 * dedicated for cases when invalidations should NOT be propagated to other 4221 * CPUs. 4222 */ 4223 static void 4224 tid_flush(tlbtid_t tid) 4225 { 4226 register_t msr; 4227 uint32_t mas0, mas1, mas2; 4228 int entry, way; 4229 4230 4231 /* Don't evict kernel translations */ 4232 if (tid == TID_KERNEL) 4233 return; 4234 4235 msr = mfmsr(); 4236 __asm __volatile("wrteei 0"); 4237 4238 /* 4239 * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use 4240 * it for PID invalidation. 4241 */ 4242 switch ((mfpvr() >> 16) & 0xffff) { 4243 case FSL_E500mc: 4244 case FSL_E5500: 4245 case FSL_E6500: 4246 mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); 4247 /* tlbilxpid */ 4248 __asm __volatile("isync; .long 0x7c000024; isync; msync"); 4249 __asm __volatile("wrtee %0" :: "r"(msr)); 4250 return; 4251 } 4252 4253 for (way = 0; way < TLB0_WAYS; way++) 4254 for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { 4255 4256 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 4257 mtspr(SPR_MAS0, mas0); 4258 4259 mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; 4260 mtspr(SPR_MAS2, mas2); 4261 4262 __asm __volatile("isync; tlbre"); 4263 4264 mas1 = mfspr(SPR_MAS1); 4265 4266 if (!(mas1 & MAS1_VALID)) 4267 continue; 4268 if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid) 4269 continue; 4270 mas1 &= ~MAS1_VALID; 4271 mtspr(SPR_MAS1, mas1); 4272 __asm __volatile("isync; tlbwe; isync; msync"); 4273 } 4274 __asm __volatile("wrtee %0" :: "r"(msr)); 4275 } 4276 4277 #ifdef DDB 4278 /* Print out contents of the MAS registers for each TLB0 entry */ 4279 static void 4280 #ifdef __powerpc64__ 4281 tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3, 4282 #else 4283 tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3, 4284 #endif 4285 uint32_t mas7) 4286 { 4287 int as; 4288 char desc[3]; 4289 tlbtid_t tid; 4290 vm_size_t size; 4291 unsigned int tsize; 4292 4293 desc[2] = '\0'; 4294 if (mas1 & MAS1_VALID) 4295 desc[0] = 'V'; 4296 else 4297 desc[0] = ' '; 4298 4299 if (mas1 & MAS1_IPROT) 4300 desc[1] = 'P'; 4301 else 4302 desc[1] = ' '; 4303 4304 as = (mas1 & MAS1_TS_MASK) ? 1 : 0; 4305 tid = MAS1_GETTID(mas1); 4306 4307 tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4308 size = 0; 4309 if (tsize) 4310 size = tsize2size(tsize); 4311 4312 printf("%3d: (%s) [AS=%d] " 4313 "sz = 0x%08x tsz = %d tid = %d mas1 = 0x%08x " 4314 "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n", 4315 i, desc, as, size, tsize, tid, mas1, mas2, mas3, mas7); 4316 } 4317 4318 DB_SHOW_COMMAND(tlb0, tlb0_print_tlbentries) 4319 { 4320 uint32_t mas0, mas1, mas3, mas7; 4321 #ifdef __powerpc64__ 4322 uint64_t mas2; 4323 #else 4324 uint32_t mas2; 4325 #endif 4326 int entryidx, way, idx; 4327 4328 printf("TLB0 entries:\n"); 4329 for (way = 0; way < TLB0_WAYS; way ++) 4330 for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { 4331 4332 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 4333 mtspr(SPR_MAS0, mas0); 4334 4335 mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; 4336 mtspr(SPR_MAS2, mas2); 4337 4338 __asm __volatile("isync; tlbre"); 4339 4340 mas1 = mfspr(SPR_MAS1); 4341 mas2 = mfspr(SPR_MAS2); 4342 mas3 = mfspr(SPR_MAS3); 4343 mas7 = mfspr(SPR_MAS7); 4344 4345 idx = tlb0_tableidx(mas2, way); 4346 tlb_print_entry(idx, mas1, mas2, mas3, mas7); 4347 } 4348 } 4349 4350 /* 4351 * Print out contents of the MAS registers for each TLB1 entry 4352 */ 4353 DB_SHOW_COMMAND(tlb1, tlb1_print_tlbentries) 4354 { 4355 uint32_t mas0, mas1, mas3, mas7; 4356 #ifdef __powerpc64__ 4357 uint64_t mas2; 4358 #else 4359 uint32_t mas2; 4360 #endif 4361 int i; 4362 4363 printf("TLB1 entries:\n"); 4364 for (i = 0; i < TLB1_ENTRIES; i++) { 4365 4366 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); 4367 mtspr(SPR_MAS0, mas0); 4368 4369 __asm __volatile("isync; tlbre"); 4370 4371 mas1 = mfspr(SPR_MAS1); 4372 mas2 = mfspr(SPR_MAS2); 4373 mas3 = mfspr(SPR_MAS3); 4374 mas7 = mfspr(SPR_MAS7); 4375 4376 tlb_print_entry(i, mas1, mas2, mas3, mas7); 4377 } 4378 } 4379 #endif 4380