1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> 5 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 20 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Some hw specific parts of this pmap were derived or influenced 29 * by NetBSD's ibm4xx pmap module. More generic code is shared with 30 * a few other pmap modules from the FreeBSD tree. 31 */ 32 33 /* 34 * VM layout notes: 35 * 36 * Kernel and user threads run within one common virtual address space 37 * defined by AS=0. 38 * 39 * 32-bit pmap: 40 * Virtual address space layout: 41 * ----------------------------- 42 * 0x0000_0000 - 0x7fff_ffff : user process 43 * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) 44 * 0xc000_0000 - 0xc0ff_ffff : kernel reserved 45 * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. 46 * 0xc100_0000 - 0xffff_ffff : KVA 47 * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy 48 * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs 49 * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 50 * 0xc200_9000 - 0xfeef_ffff : actual free KVA space 51 * 52 * 64-bit pmap: 53 * Virtual address space layout: 54 * ----------------------------- 55 * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process 56 * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries 57 * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region 58 * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack 59 * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved 60 * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data 61 * endkernel - msgbufp-1 : flat device tree 62 * msgbufp - kernel_pdir-1 : message buffer 63 * kernel_pdir - kernel_pp2d-1 : kernel page directory 64 * kernel_pp2d - . : kernel pointers to page directory 65 * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy 66 * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs 67 * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables 68 * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space 69 * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region 70 * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region 71 * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map 72 * 0xf000_0000_0000_0000 - +Maxmem : physmem map 73 * - 0xffff_ffff_ffff_ffff : device direct map 74 */ 75 76 #include <sys/cdefs.h> 77 __FBSDID("$FreeBSD$"); 78 79 #include "opt_ddb.h" 80 #include "opt_kstack_pages.h" 81 82 #include <sys/param.h> 83 #include <sys/conf.h> 84 #include <sys/malloc.h> 85 #include <sys/ktr.h> 86 #include <sys/proc.h> 87 #include <sys/user.h> 88 #include <sys/queue.h> 89 #include <sys/systm.h> 90 #include <sys/kernel.h> 91 #include <sys/kerneldump.h> 92 #include <sys/linker.h> 93 #include <sys/msgbuf.h> 94 #include <sys/lock.h> 95 #include <sys/mutex.h> 96 #include <sys/rwlock.h> 97 #include <sys/sched.h> 98 #include <sys/smp.h> 99 #include <sys/vmmeter.h> 100 101 #include <vm/vm.h> 102 #include <vm/vm_page.h> 103 #include <vm/vm_kern.h> 104 #include <vm/vm_pageout.h> 105 #include <vm/vm_extern.h> 106 #include <vm/vm_object.h> 107 #include <vm/vm_param.h> 108 #include <vm/vm_map.h> 109 #include <vm/vm_pager.h> 110 #include <vm/vm_phys.h> 111 #include <vm/vm_pagequeue.h> 112 #include <vm/uma.h> 113 114 #include <machine/_inttypes.h> 115 #include <machine/cpu.h> 116 #include <machine/pcb.h> 117 #include <machine/platform.h> 118 119 #include <machine/tlb.h> 120 #include <machine/spr.h> 121 #include <machine/md_var.h> 122 #include <machine/mmuvar.h> 123 #include <machine/pmap.h> 124 #include <machine/pte.h> 125 126 #include <ddb/ddb.h> 127 128 #include "mmu_if.h" 129 130 #define SPARSE_MAPDEV 131 #ifdef DEBUG 132 #define debugf(fmt, args...) printf(fmt, ##args) 133 #else 134 #define debugf(fmt, args...) 135 #endif 136 137 #ifdef __powerpc64__ 138 #define PRI0ptrX "016lx" 139 #else 140 #define PRI0ptrX "08x" 141 #endif 142 143 #define TODO panic("%s: not implemented", __func__); 144 145 extern unsigned char _etext[]; 146 extern unsigned char _end[]; 147 148 extern uint32_t *bootinfo; 149 150 vm_paddr_t kernload; 151 vm_offset_t kernstart; 152 vm_size_t kernsize; 153 154 /* Message buffer and tables. */ 155 static vm_offset_t data_start; 156 static vm_size_t data_end; 157 158 /* Phys/avail memory regions. */ 159 static struct mem_region *availmem_regions; 160 static int availmem_regions_sz; 161 static struct mem_region *physmem_regions; 162 static int physmem_regions_sz; 163 164 /* Reserved KVA space and mutex for mmu_booke_zero_page. */ 165 static vm_offset_t zero_page_va; 166 static struct mtx zero_page_mutex; 167 168 static struct mtx tlbivax_mutex; 169 170 /* Reserved KVA space and mutex for mmu_booke_copy_page. */ 171 static vm_offset_t copy_page_src_va; 172 static vm_offset_t copy_page_dst_va; 173 static struct mtx copy_page_mutex; 174 175 /**************************************************************************/ 176 /* PMAP */ 177 /**************************************************************************/ 178 179 static int mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t, 180 vm_prot_t, u_int flags, int8_t psind); 181 182 unsigned int kptbl_min; /* Index of the first kernel ptbl. */ 183 unsigned int kernel_ptbls; /* Number of KVA ptbls. */ 184 #ifdef __powerpc64__ 185 unsigned int kernel_pdirs; 186 #endif 187 static uma_zone_t ptbl_root_zone; 188 189 /* 190 * If user pmap is processed with mmu_booke_remove and the resident count 191 * drops to 0, there are no more pages to remove, so we need not continue. 192 */ 193 #define PMAP_REMOVE_DONE(pmap) \ 194 ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) 195 196 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 197 extern int elf32_nxstack; 198 #endif 199 200 /**************************************************************************/ 201 /* TLB and TID handling */ 202 /**************************************************************************/ 203 204 /* Translation ID busy table */ 205 static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1]; 206 207 /* 208 * TLB0 capabilities (entry, way numbers etc.). These can vary between e500 209 * core revisions and should be read from h/w registers during early config. 210 */ 211 uint32_t tlb0_entries; 212 uint32_t tlb0_ways; 213 uint32_t tlb0_entries_per_way; 214 uint32_t tlb1_entries; 215 216 #define TLB0_ENTRIES (tlb0_entries) 217 #define TLB0_WAYS (tlb0_ways) 218 #define TLB0_ENTRIES_PER_WAY (tlb0_entries_per_way) 219 220 #define TLB1_ENTRIES (tlb1_entries) 221 222 static vm_offset_t tlb1_map_base = (vm_offset_t)VM_MAXUSER_ADDRESS + PAGE_SIZE; 223 224 static tlbtid_t tid_alloc(struct pmap *); 225 static void tid_flush(tlbtid_t tid); 226 227 #ifdef DDB 228 #ifdef __powerpc64__ 229 static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t); 230 #else 231 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); 232 #endif 233 #endif 234 235 static void tlb1_read_entry(tlb_entry_t *, unsigned int); 236 static void tlb1_write_entry(tlb_entry_t *, unsigned int); 237 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); 238 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t); 239 240 static vm_size_t tsize2size(unsigned int); 241 static unsigned int size2tsize(vm_size_t); 242 static unsigned int ilog2(unsigned long); 243 244 static void set_mas4_defaults(void); 245 246 static inline void tlb0_flush_entry(vm_offset_t); 247 static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); 248 249 /**************************************************************************/ 250 /* Page table management */ 251 /**************************************************************************/ 252 253 static struct rwlock_padalign pvh_global_lock; 254 255 /* Data for the pv entry allocation mechanism */ 256 static uma_zone_t pvzone; 257 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 258 259 #define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ 260 261 #ifndef PMAP_SHPGPERPROC 262 #define PMAP_SHPGPERPROC 200 263 #endif 264 265 #ifdef __powerpc64__ 266 #define PMAP_ROOT_SIZE (sizeof(pte_t***) * PP2D_NENTRIES) 267 static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **, 268 unsigned int, boolean_t); 269 static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int, vm_page_t); 270 static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int); 271 static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t); 272 #else 273 #define PMAP_ROOT_SIZE (sizeof(pte_t**) * PDIR_NENTRIES) 274 static void ptbl_init(void); 275 static struct ptbl_buf *ptbl_buf_alloc(void); 276 static void ptbl_buf_free(struct ptbl_buf *); 277 static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); 278 279 static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t); 280 static void ptbl_free(mmu_t, pmap_t, unsigned int); 281 static void ptbl_hold(mmu_t, pmap_t, unsigned int); 282 static int ptbl_unhold(mmu_t, pmap_t, unsigned int); 283 #endif 284 285 static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); 286 static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t); 287 static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t); 288 static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t); 289 static void kernel_pte_alloc(vm_offset_t, vm_offset_t, vm_offset_t); 290 291 static pv_entry_t pv_alloc(void); 292 static void pv_free(pv_entry_t); 293 static void pv_insert(pmap_t, vm_offset_t, vm_page_t); 294 static void pv_remove(pmap_t, vm_offset_t, vm_page_t); 295 296 static void booke_pmap_init_qpages(void); 297 298 struct ptbl_buf { 299 TAILQ_ENTRY(ptbl_buf) link; /* list link */ 300 vm_offset_t kva; /* va of mapping */ 301 }; 302 303 #ifndef __powerpc64__ 304 /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ 305 #define PTBL_BUFS (128 * 16) 306 307 /* ptbl free list and a lock used for access synchronization. */ 308 static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; 309 static struct mtx ptbl_buf_freelist_lock; 310 311 /* Base address of kva space allocated fot ptbl bufs. */ 312 static vm_offset_t ptbl_buf_pool_vabase; 313 314 /* Pointer to ptbl_buf structures. */ 315 static struct ptbl_buf *ptbl_bufs; 316 #endif 317 318 #ifdef SMP 319 extern tlb_entry_t __boot_tlb1[]; 320 void pmap_bootstrap_ap(volatile uint32_t *); 321 #endif 322 323 /* 324 * Kernel MMU interface 325 */ 326 static void mmu_booke_clear_modify(mmu_t, vm_page_t); 327 static void mmu_booke_copy(mmu_t, pmap_t, pmap_t, vm_offset_t, 328 vm_size_t, vm_offset_t); 329 static void mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t); 330 static void mmu_booke_copy_pages(mmu_t, vm_page_t *, 331 vm_offset_t, vm_page_t *, vm_offset_t, int); 332 static int mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, 333 vm_prot_t, u_int flags, int8_t psind); 334 static void mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 335 vm_page_t, vm_prot_t); 336 static void mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, 337 vm_prot_t); 338 static vm_paddr_t mmu_booke_extract(mmu_t, pmap_t, vm_offset_t); 339 static vm_page_t mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t, 340 vm_prot_t); 341 static void mmu_booke_init(mmu_t); 342 static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t); 343 static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 344 static boolean_t mmu_booke_is_referenced(mmu_t, vm_page_t); 345 static int mmu_booke_ts_referenced(mmu_t, vm_page_t); 346 static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, 347 int); 348 static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t, 349 vm_paddr_t *); 350 static void mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t, 351 vm_object_t, vm_pindex_t, vm_size_t); 352 static boolean_t mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t); 353 static void mmu_booke_page_init(mmu_t, vm_page_t); 354 static int mmu_booke_page_wired_mappings(mmu_t, vm_page_t); 355 static void mmu_booke_pinit(mmu_t, pmap_t); 356 static void mmu_booke_pinit0(mmu_t, pmap_t); 357 static void mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 358 vm_prot_t); 359 static void mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 360 static void mmu_booke_qremove(mmu_t, vm_offset_t, int); 361 static void mmu_booke_release(mmu_t, pmap_t); 362 static void mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 363 static void mmu_booke_remove_all(mmu_t, vm_page_t); 364 static void mmu_booke_remove_write(mmu_t, vm_page_t); 365 static void mmu_booke_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 366 static void mmu_booke_zero_page(mmu_t, vm_page_t); 367 static void mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int); 368 static void mmu_booke_activate(mmu_t, struct thread *); 369 static void mmu_booke_deactivate(mmu_t, struct thread *); 370 static void mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t); 371 static void *mmu_booke_mapdev(mmu_t, vm_paddr_t, vm_size_t); 372 static void *mmu_booke_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t); 373 static void mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t); 374 static vm_paddr_t mmu_booke_kextract(mmu_t, vm_offset_t); 375 static void mmu_booke_kenter(mmu_t, vm_offset_t, vm_paddr_t); 376 static void mmu_booke_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t); 377 static void mmu_booke_kremove(mmu_t, vm_offset_t); 378 static boolean_t mmu_booke_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); 379 static void mmu_booke_sync_icache(mmu_t, pmap_t, vm_offset_t, 380 vm_size_t); 381 static void mmu_booke_dumpsys_map(mmu_t, vm_paddr_t pa, size_t, 382 void **); 383 static void mmu_booke_dumpsys_unmap(mmu_t, vm_paddr_t pa, size_t, 384 void *); 385 static void mmu_booke_scan_init(mmu_t); 386 static vm_offset_t mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m); 387 static void mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr); 388 static int mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, 389 vm_size_t sz, vm_memattr_t mode); 390 static int mmu_booke_map_user_ptr(mmu_t mmu, pmap_t pm, 391 volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen); 392 static int mmu_booke_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, 393 int *is_user, vm_offset_t *decoded_addr); 394 395 396 static mmu_method_t mmu_booke_methods[] = { 397 /* pmap dispatcher interface */ 398 MMUMETHOD(mmu_clear_modify, mmu_booke_clear_modify), 399 MMUMETHOD(mmu_copy, mmu_booke_copy), 400 MMUMETHOD(mmu_copy_page, mmu_booke_copy_page), 401 MMUMETHOD(mmu_copy_pages, mmu_booke_copy_pages), 402 MMUMETHOD(mmu_enter, mmu_booke_enter), 403 MMUMETHOD(mmu_enter_object, mmu_booke_enter_object), 404 MMUMETHOD(mmu_enter_quick, mmu_booke_enter_quick), 405 MMUMETHOD(mmu_extract, mmu_booke_extract), 406 MMUMETHOD(mmu_extract_and_hold, mmu_booke_extract_and_hold), 407 MMUMETHOD(mmu_init, mmu_booke_init), 408 MMUMETHOD(mmu_is_modified, mmu_booke_is_modified), 409 MMUMETHOD(mmu_is_prefaultable, mmu_booke_is_prefaultable), 410 MMUMETHOD(mmu_is_referenced, mmu_booke_is_referenced), 411 MMUMETHOD(mmu_ts_referenced, mmu_booke_ts_referenced), 412 MMUMETHOD(mmu_map, mmu_booke_map), 413 MMUMETHOD(mmu_mincore, mmu_booke_mincore), 414 MMUMETHOD(mmu_object_init_pt, mmu_booke_object_init_pt), 415 MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick), 416 MMUMETHOD(mmu_page_init, mmu_booke_page_init), 417 MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings), 418 MMUMETHOD(mmu_pinit, mmu_booke_pinit), 419 MMUMETHOD(mmu_pinit0, mmu_booke_pinit0), 420 MMUMETHOD(mmu_protect, mmu_booke_protect), 421 MMUMETHOD(mmu_qenter, mmu_booke_qenter), 422 MMUMETHOD(mmu_qremove, mmu_booke_qremove), 423 MMUMETHOD(mmu_release, mmu_booke_release), 424 MMUMETHOD(mmu_remove, mmu_booke_remove), 425 MMUMETHOD(mmu_remove_all, mmu_booke_remove_all), 426 MMUMETHOD(mmu_remove_write, mmu_booke_remove_write), 427 MMUMETHOD(mmu_sync_icache, mmu_booke_sync_icache), 428 MMUMETHOD(mmu_unwire, mmu_booke_unwire), 429 MMUMETHOD(mmu_zero_page, mmu_booke_zero_page), 430 MMUMETHOD(mmu_zero_page_area, mmu_booke_zero_page_area), 431 MMUMETHOD(mmu_activate, mmu_booke_activate), 432 MMUMETHOD(mmu_deactivate, mmu_booke_deactivate), 433 MMUMETHOD(mmu_quick_enter_page, mmu_booke_quick_enter_page), 434 MMUMETHOD(mmu_quick_remove_page, mmu_booke_quick_remove_page), 435 436 /* Internal interfaces */ 437 MMUMETHOD(mmu_bootstrap, mmu_booke_bootstrap), 438 MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped), 439 MMUMETHOD(mmu_mapdev, mmu_booke_mapdev), 440 MMUMETHOD(mmu_mapdev_attr, mmu_booke_mapdev_attr), 441 MMUMETHOD(mmu_kenter, mmu_booke_kenter), 442 MMUMETHOD(mmu_kenter_attr, mmu_booke_kenter_attr), 443 MMUMETHOD(mmu_kextract, mmu_booke_kextract), 444 MMUMETHOD(mmu_kremove, mmu_booke_kremove), 445 MMUMETHOD(mmu_unmapdev, mmu_booke_unmapdev), 446 MMUMETHOD(mmu_change_attr, mmu_booke_change_attr), 447 MMUMETHOD(mmu_map_user_ptr, mmu_booke_map_user_ptr), 448 MMUMETHOD(mmu_decode_kernel_ptr, mmu_booke_decode_kernel_ptr), 449 450 /* dumpsys() support */ 451 MMUMETHOD(mmu_dumpsys_map, mmu_booke_dumpsys_map), 452 MMUMETHOD(mmu_dumpsys_unmap, mmu_booke_dumpsys_unmap), 453 MMUMETHOD(mmu_scan_init, mmu_booke_scan_init), 454 455 { 0, 0 } 456 }; 457 458 MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods, 0); 459 460 static __inline uint32_t 461 tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) 462 { 463 uint32_t attrib; 464 int i; 465 466 if (ma != VM_MEMATTR_DEFAULT) { 467 switch (ma) { 468 case VM_MEMATTR_UNCACHEABLE: 469 return (MAS2_I | MAS2_G); 470 case VM_MEMATTR_WRITE_COMBINING: 471 case VM_MEMATTR_WRITE_BACK: 472 case VM_MEMATTR_PREFETCHABLE: 473 return (MAS2_I); 474 case VM_MEMATTR_WRITE_THROUGH: 475 return (MAS2_W | MAS2_M); 476 case VM_MEMATTR_CACHEABLE: 477 return (MAS2_M); 478 } 479 } 480 481 /* 482 * Assume the page is cache inhibited and access is guarded unless 483 * it's in our available memory array. 484 */ 485 attrib = _TLB_ENTRY_IO; 486 for (i = 0; i < physmem_regions_sz; i++) { 487 if ((pa >= physmem_regions[i].mr_start) && 488 (pa < (physmem_regions[i].mr_start + 489 physmem_regions[i].mr_size))) { 490 attrib = _TLB_ENTRY_MEM; 491 break; 492 } 493 } 494 495 return (attrib); 496 } 497 498 static inline void 499 tlb_miss_lock(void) 500 { 501 #ifdef SMP 502 struct pcpu *pc; 503 504 if (!smp_started) 505 return; 506 507 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 508 if (pc != pcpup) { 509 510 CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, " 511 "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke.tlb_lock); 512 513 KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)), 514 ("tlb_miss_lock: tried to lock self")); 515 516 tlb_lock(pc->pc_booke.tlb_lock); 517 518 CTR1(KTR_PMAP, "%s: locked", __func__); 519 } 520 } 521 #endif 522 } 523 524 static inline void 525 tlb_miss_unlock(void) 526 { 527 #ifdef SMP 528 struct pcpu *pc; 529 530 if (!smp_started) 531 return; 532 533 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 534 if (pc != pcpup) { 535 CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d", 536 __func__, pc->pc_cpuid); 537 538 tlb_unlock(pc->pc_booke.tlb_lock); 539 540 CTR1(KTR_PMAP, "%s: unlocked", __func__); 541 } 542 } 543 #endif 544 } 545 546 /* Return number of entries in TLB0. */ 547 static __inline void 548 tlb0_get_tlbconf(void) 549 { 550 uint32_t tlb0_cfg; 551 552 tlb0_cfg = mfspr(SPR_TLB0CFG); 553 tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK; 554 tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; 555 tlb0_entries_per_way = tlb0_entries / tlb0_ways; 556 } 557 558 /* Return number of entries in TLB1. */ 559 static __inline void 560 tlb1_get_tlbconf(void) 561 { 562 uint32_t tlb1_cfg; 563 564 tlb1_cfg = mfspr(SPR_TLB1CFG); 565 tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; 566 } 567 568 /**************************************************************************/ 569 /* Page table related */ 570 /**************************************************************************/ 571 572 #ifdef __powerpc64__ 573 /* Initialize pool of kva ptbl buffers. */ 574 static void 575 ptbl_init(void) 576 { 577 } 578 579 /* Get a pointer to a PTE in a page table. */ 580 static __inline pte_t * 581 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 582 { 583 pte_t **pdir; 584 pte_t *ptbl; 585 586 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 587 588 pdir = pmap->pm_pp2d[PP2D_IDX(va)]; 589 if (!pdir) 590 return NULL; 591 ptbl = pdir[PDIR_IDX(va)]; 592 return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL); 593 } 594 595 /* 596 * allocate a page of pointers to page directories, do not preallocate the 597 * page tables 598 */ 599 static pte_t ** 600 pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep) 601 { 602 vm_page_t m; 603 pte_t **pdir; 604 int req; 605 606 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 607 while ((m = vm_page_alloc(NULL, pp2d_idx, req)) == NULL) { 608 PMAP_UNLOCK(pmap); 609 if (nosleep) { 610 return (NULL); 611 } 612 vm_wait(NULL); 613 PMAP_LOCK(pmap); 614 } 615 616 /* Zero whole ptbl. */ 617 pdir = (pte_t **)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 618 mmu_booke_zero_page(mmu, m); 619 620 return (pdir); 621 } 622 623 /* Free pdir pages and invalidate pdir entry. */ 624 static void 625 pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, vm_page_t m) 626 { 627 pte_t **pdir; 628 629 pdir = pmap->pm_pp2d[pp2d_idx]; 630 631 KASSERT((pdir != NULL), ("pdir_free: null pdir")); 632 633 pmap->pm_pp2d[pp2d_idx] = NULL; 634 635 vm_wire_sub(1); 636 vm_page_free_zero(m); 637 } 638 639 /* 640 * Decrement pdir pages hold count and attempt to free pdir pages. Called 641 * when removing directory entry from pdir. 642 * 643 * Return 1 if pdir pages were freed. 644 */ 645 static int 646 pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx) 647 { 648 pte_t **pdir; 649 vm_paddr_t pa; 650 vm_page_t m; 651 652 KASSERT((pmap != kernel_pmap), 653 ("pdir_unhold: unholding kernel pdir!")); 654 655 pdir = pmap->pm_pp2d[pp2d_idx]; 656 657 /* decrement hold count */ 658 pa = DMAP_TO_PHYS((vm_offset_t) pdir); 659 m = PHYS_TO_VM_PAGE(pa); 660 661 /* 662 * Free pdir page if there are no dir entries in this pdir. 663 */ 664 m->wire_count--; 665 if (m->wire_count == 0) { 666 pdir_free(mmu, pmap, pp2d_idx, m); 667 return (1); 668 } 669 return (0); 670 } 671 672 /* 673 * Increment hold count for pdir pages. This routine is used when new ptlb 674 * entry is being inserted into pdir. 675 */ 676 static void 677 pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir) 678 { 679 vm_page_t m; 680 681 KASSERT((pmap != kernel_pmap), 682 ("pdir_hold: holding kernel pdir!")); 683 684 KASSERT((pdir != NULL), ("pdir_hold: null pdir")); 685 686 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pdir)); 687 m->wire_count++; 688 } 689 690 /* Allocate page table. */ 691 static pte_t * 692 ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, 693 boolean_t nosleep) 694 { 695 vm_page_t m; 696 pte_t *ptbl; 697 int req; 698 699 KASSERT((pdir[pdir_idx] == NULL), 700 ("%s: valid ptbl entry exists!", __func__)); 701 702 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 703 while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) { 704 PMAP_UNLOCK(pmap); 705 rw_wunlock(&pvh_global_lock); 706 if (nosleep) { 707 return (NULL); 708 } 709 vm_wait(NULL); 710 rw_wlock(&pvh_global_lock); 711 PMAP_LOCK(pmap); 712 } 713 714 /* Zero whole ptbl. */ 715 ptbl = (pte_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 716 mmu_booke_zero_page(mmu, m); 717 718 return (ptbl); 719 } 720 721 /* Free ptbl pages and invalidate pdir entry. */ 722 static void 723 ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, vm_page_t m) 724 { 725 pte_t *ptbl; 726 727 ptbl = pdir[pdir_idx]; 728 729 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 730 731 pdir[pdir_idx] = NULL; 732 733 vm_wire_sub(1); 734 vm_page_free_zero(m); 735 } 736 737 /* 738 * Decrement ptbl pages hold count and attempt to free ptbl pages. Called 739 * when removing pte entry from ptbl. 740 * 741 * Return 1 if ptbl pages were freed. 742 */ 743 static int 744 ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va) 745 { 746 pte_t *ptbl; 747 vm_page_t m; 748 u_int pp2d_idx; 749 pte_t **pdir; 750 u_int pdir_idx; 751 752 pp2d_idx = PP2D_IDX(va); 753 pdir_idx = PDIR_IDX(va); 754 755 KASSERT((pmap != kernel_pmap), 756 ("ptbl_unhold: unholding kernel ptbl!")); 757 758 pdir = pmap->pm_pp2d[pp2d_idx]; 759 ptbl = pdir[pdir_idx]; 760 761 /* decrement hold count */ 762 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); 763 764 /* 765 * Free ptbl pages if there are no pte entries in this ptbl. 766 * wire_count has the same value for all ptbl pages, so check the 767 * last page. 768 */ 769 m->wire_count--; 770 if (m->wire_count == 0) { 771 ptbl_free(mmu, pmap, pdir, pdir_idx, m); 772 pdir_unhold(mmu, pmap, pp2d_idx); 773 return (1); 774 } 775 return (0); 776 } 777 778 /* 779 * Increment hold count for ptbl pages. This routine is used when new pte 780 * entry is being inserted into ptbl. 781 */ 782 static void 783 ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) 784 { 785 pte_t *ptbl; 786 vm_page_t m; 787 788 KASSERT((pmap != kernel_pmap), 789 ("ptbl_hold: holding kernel ptbl!")); 790 791 ptbl = pdir[pdir_idx]; 792 793 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 794 795 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); 796 m->wire_count++; 797 } 798 #else 799 800 /* Initialize pool of kva ptbl buffers. */ 801 static void 802 ptbl_init(void) 803 { 804 int i; 805 806 CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__, 807 (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); 808 CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)", 809 __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); 810 811 mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); 812 TAILQ_INIT(&ptbl_buf_freelist); 813 814 for (i = 0; i < PTBL_BUFS; i++) { 815 ptbl_bufs[i].kva = 816 ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; 817 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); 818 } 819 } 820 821 /* Get a ptbl_buf from the freelist. */ 822 static struct ptbl_buf * 823 ptbl_buf_alloc(void) 824 { 825 struct ptbl_buf *buf; 826 827 mtx_lock(&ptbl_buf_freelist_lock); 828 buf = TAILQ_FIRST(&ptbl_buf_freelist); 829 if (buf != NULL) 830 TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); 831 mtx_unlock(&ptbl_buf_freelist_lock); 832 833 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 834 835 return (buf); 836 } 837 838 /* Return ptbl buff to free pool. */ 839 static void 840 ptbl_buf_free(struct ptbl_buf *buf) 841 { 842 843 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 844 845 mtx_lock(&ptbl_buf_freelist_lock); 846 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); 847 mtx_unlock(&ptbl_buf_freelist_lock); 848 } 849 850 /* 851 * Search the list of allocated ptbl bufs and find on list of allocated ptbls 852 */ 853 static void 854 ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) 855 { 856 struct ptbl_buf *pbuf; 857 858 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 859 860 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 861 862 TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) 863 if (pbuf->kva == (vm_offset_t)ptbl) { 864 /* Remove from pmap ptbl buf list. */ 865 TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); 866 867 /* Free corresponding ptbl buf. */ 868 ptbl_buf_free(pbuf); 869 break; 870 } 871 } 872 873 /* Allocate page table. */ 874 static pte_t * 875 ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep) 876 { 877 vm_page_t mtbl[PTBL_PAGES]; 878 vm_page_t m; 879 struct ptbl_buf *pbuf; 880 unsigned int pidx; 881 pte_t *ptbl; 882 int i, j; 883 884 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 885 (pmap == kernel_pmap), pdir_idx); 886 887 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 888 ("ptbl_alloc: invalid pdir_idx")); 889 KASSERT((pmap->pm_pdir[pdir_idx] == NULL), 890 ("pte_alloc: valid ptbl entry exists!")); 891 892 pbuf = ptbl_buf_alloc(); 893 if (pbuf == NULL) 894 panic("pte_alloc: couldn't alloc kernel virtual memory"); 895 896 ptbl = (pte_t *)pbuf->kva; 897 898 CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl); 899 900 for (i = 0; i < PTBL_PAGES; i++) { 901 pidx = (PTBL_PAGES * pdir_idx) + i; 902 while ((m = vm_page_alloc(NULL, pidx, 903 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 904 PMAP_UNLOCK(pmap); 905 rw_wunlock(&pvh_global_lock); 906 if (nosleep) { 907 ptbl_free_pmap_ptbl(pmap, ptbl); 908 for (j = 0; j < i; j++) 909 vm_page_free(mtbl[j]); 910 vm_wire_sub(i); 911 return (NULL); 912 } 913 vm_wait(NULL); 914 rw_wlock(&pvh_global_lock); 915 PMAP_LOCK(pmap); 916 } 917 mtbl[i] = m; 918 } 919 920 /* Map allocated pages into kernel_pmap. */ 921 mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES); 922 923 /* Zero whole ptbl. */ 924 bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE); 925 926 /* Add pbuf to the pmap ptbl bufs list. */ 927 TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); 928 929 return (ptbl); 930 } 931 932 /* Free ptbl pages and invalidate pdir entry. */ 933 static void 934 ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 935 { 936 pte_t *ptbl; 937 vm_paddr_t pa; 938 vm_offset_t va; 939 vm_page_t m; 940 int i; 941 942 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 943 (pmap == kernel_pmap), pdir_idx); 944 945 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 946 ("ptbl_free: invalid pdir_idx")); 947 948 ptbl = pmap->pm_pdir[pdir_idx]; 949 950 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 951 952 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 953 954 /* 955 * Invalidate the pdir entry as soon as possible, so that other CPUs 956 * don't attempt to look up the page tables we are releasing. 957 */ 958 mtx_lock_spin(&tlbivax_mutex); 959 tlb_miss_lock(); 960 961 pmap->pm_pdir[pdir_idx] = NULL; 962 963 tlb_miss_unlock(); 964 mtx_unlock_spin(&tlbivax_mutex); 965 966 for (i = 0; i < PTBL_PAGES; i++) { 967 va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); 968 pa = pte_vatopa(mmu, kernel_pmap, va); 969 m = PHYS_TO_VM_PAGE(pa); 970 vm_page_free_zero(m); 971 vm_wire_sub(1); 972 mmu_booke_kremove(mmu, va); 973 } 974 975 ptbl_free_pmap_ptbl(pmap, ptbl); 976 } 977 978 /* 979 * Decrement ptbl pages hold count and attempt to free ptbl pages. 980 * Called when removing pte entry from ptbl. 981 * 982 * Return 1 if ptbl pages were freed. 983 */ 984 static int 985 ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 986 { 987 pte_t *ptbl; 988 vm_paddr_t pa; 989 vm_page_t m; 990 int i; 991 992 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 993 (pmap == kernel_pmap), pdir_idx); 994 995 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 996 ("ptbl_unhold: invalid pdir_idx")); 997 KASSERT((pmap != kernel_pmap), 998 ("ptbl_unhold: unholding kernel ptbl!")); 999 1000 ptbl = pmap->pm_pdir[pdir_idx]; 1001 1002 //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); 1003 KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), 1004 ("ptbl_unhold: non kva ptbl")); 1005 1006 /* decrement hold count */ 1007 for (i = 0; i < PTBL_PAGES; i++) { 1008 pa = pte_vatopa(mmu, kernel_pmap, 1009 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1010 m = PHYS_TO_VM_PAGE(pa); 1011 m->wire_count--; 1012 } 1013 1014 /* 1015 * Free ptbl pages if there are no pte etries in this ptbl. 1016 * wire_count has the same value for all ptbl pages, so check the last 1017 * page. 1018 */ 1019 if (m->wire_count == 0) { 1020 ptbl_free(mmu, pmap, pdir_idx); 1021 1022 //debugf("ptbl_unhold: e (freed ptbl)\n"); 1023 return (1); 1024 } 1025 1026 return (0); 1027 } 1028 1029 /* 1030 * Increment hold count for ptbl pages. This routine is used when a new pte 1031 * entry is being inserted into the ptbl. 1032 */ 1033 static void 1034 ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 1035 { 1036 vm_paddr_t pa; 1037 pte_t *ptbl; 1038 vm_page_t m; 1039 int i; 1040 1041 CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap, 1042 pdir_idx); 1043 1044 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1045 ("ptbl_hold: invalid pdir_idx")); 1046 KASSERT((pmap != kernel_pmap), 1047 ("ptbl_hold: holding kernel ptbl!")); 1048 1049 ptbl = pmap->pm_pdir[pdir_idx]; 1050 1051 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 1052 1053 for (i = 0; i < PTBL_PAGES; i++) { 1054 pa = pte_vatopa(mmu, kernel_pmap, 1055 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1056 m = PHYS_TO_VM_PAGE(pa); 1057 m->wire_count++; 1058 } 1059 } 1060 #endif 1061 1062 /* Allocate pv_entry structure. */ 1063 pv_entry_t 1064 pv_alloc(void) 1065 { 1066 pv_entry_t pv; 1067 1068 pv_entry_count++; 1069 if (pv_entry_count > pv_entry_high_water) 1070 pagedaemon_wakeup(0); /* XXX powerpc NUMA */ 1071 pv = uma_zalloc(pvzone, M_NOWAIT); 1072 1073 return (pv); 1074 } 1075 1076 /* Free pv_entry structure. */ 1077 static __inline void 1078 pv_free(pv_entry_t pve) 1079 { 1080 1081 pv_entry_count--; 1082 uma_zfree(pvzone, pve); 1083 } 1084 1085 1086 /* Allocate and initialize pv_entry structure. */ 1087 static void 1088 pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) 1089 { 1090 pv_entry_t pve; 1091 1092 //int su = (pmap == kernel_pmap); 1093 //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, 1094 // (u_int32_t)pmap, va, (u_int32_t)m); 1095 1096 pve = pv_alloc(); 1097 if (pve == NULL) 1098 panic("pv_insert: no pv entries!"); 1099 1100 pve->pv_pmap = pmap; 1101 pve->pv_va = va; 1102 1103 /* add to pv_list */ 1104 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1105 rw_assert(&pvh_global_lock, RA_WLOCKED); 1106 1107 TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); 1108 1109 //debugf("pv_insert: e\n"); 1110 } 1111 1112 /* Destroy pv entry. */ 1113 static void 1114 pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) 1115 { 1116 pv_entry_t pve; 1117 1118 //int su = (pmap == kernel_pmap); 1119 //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); 1120 1121 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1122 rw_assert(&pvh_global_lock, RA_WLOCKED); 1123 1124 /* find pv entry */ 1125 TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { 1126 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 1127 /* remove from pv_list */ 1128 TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); 1129 if (TAILQ_EMPTY(&m->md.pv_list)) 1130 vm_page_aflag_clear(m, PGA_WRITEABLE); 1131 1132 /* free pv entry struct */ 1133 pv_free(pve); 1134 break; 1135 } 1136 } 1137 1138 //debugf("pv_remove: e\n"); 1139 } 1140 1141 #ifdef __powerpc64__ 1142 /* 1143 * Clean pte entry, try to free page table page if requested. 1144 * 1145 * Return 1 if ptbl pages were freed, otherwise return 0. 1146 */ 1147 static int 1148 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags) 1149 { 1150 vm_page_t m; 1151 pte_t *pte; 1152 1153 pte = pte_find(mmu, pmap, va); 1154 KASSERT(pte != NULL, ("%s: NULL pte", __func__)); 1155 1156 if (!PTE_ISVALID(pte)) 1157 return (0); 1158 1159 /* Get vm_page_t for mapped pte. */ 1160 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1161 1162 if (PTE_ISWIRED(pte)) 1163 pmap->pm_stats.wired_count--; 1164 1165 /* Handle managed entry. */ 1166 if (PTE_ISMANAGED(pte)) { 1167 1168 /* Handle modified pages. */ 1169 if (PTE_ISMODIFIED(pte)) 1170 vm_page_dirty(m); 1171 1172 /* Referenced pages. */ 1173 if (PTE_ISREFERENCED(pte)) 1174 vm_page_aflag_set(m, PGA_REFERENCED); 1175 1176 /* Remove pv_entry from pv_list. */ 1177 pv_remove(pmap, va, m); 1178 } else if (m->md.pv_tracked) { 1179 pv_remove(pmap, va, m); 1180 if (TAILQ_EMPTY(&m->md.pv_list)) 1181 m->md.pv_tracked = false; 1182 } 1183 mtx_lock_spin(&tlbivax_mutex); 1184 tlb_miss_lock(); 1185 1186 tlb0_flush_entry(va); 1187 *pte = 0; 1188 1189 tlb_miss_unlock(); 1190 mtx_unlock_spin(&tlbivax_mutex); 1191 1192 pmap->pm_stats.resident_count--; 1193 1194 if (flags & PTBL_UNHOLD) { 1195 return (ptbl_unhold(mmu, pmap, va)); 1196 } 1197 return (0); 1198 } 1199 1200 /* 1201 * Insert PTE for a given page and virtual address. 1202 */ 1203 static int 1204 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1205 boolean_t nosleep) 1206 { 1207 unsigned int pp2d_idx = PP2D_IDX(va); 1208 unsigned int pdir_idx = PDIR_IDX(va); 1209 unsigned int ptbl_idx = PTBL_IDX(va); 1210 pte_t *ptbl, *pte, pte_tmp; 1211 pte_t **pdir; 1212 1213 /* Get the page directory pointer. */ 1214 pdir = pmap->pm_pp2d[pp2d_idx]; 1215 if (pdir == NULL) 1216 pdir = pdir_alloc(mmu, pmap, pp2d_idx, nosleep); 1217 1218 /* Get the page table pointer. */ 1219 ptbl = pdir[pdir_idx]; 1220 1221 if (ptbl == NULL) { 1222 /* Allocate page table pages. */ 1223 ptbl = ptbl_alloc(mmu, pmap, pdir, pdir_idx, nosleep); 1224 if (ptbl == NULL) { 1225 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1226 return (ENOMEM); 1227 } 1228 pte = &ptbl[ptbl_idx]; 1229 } else { 1230 /* 1231 * Check if there is valid mapping for requested va, if there 1232 * is, remove it. 1233 */ 1234 pte = &ptbl[ptbl_idx]; 1235 if (PTE_ISVALID(pte)) { 1236 pte_remove(mmu, pmap, va, PTBL_HOLD); 1237 } else { 1238 /* 1239 * pte is not used, increment hold count for ptbl 1240 * pages. 1241 */ 1242 if (pmap != kernel_pmap) 1243 ptbl_hold(mmu, pmap, pdir, pdir_idx); 1244 } 1245 } 1246 1247 if (pdir[pdir_idx] == NULL) { 1248 if (pmap != kernel_pmap && pmap->pm_pp2d[pp2d_idx] != NULL) 1249 pdir_hold(mmu, pmap, pdir); 1250 pdir[pdir_idx] = ptbl; 1251 } 1252 if (pmap->pm_pp2d[pp2d_idx] == NULL) 1253 pmap->pm_pp2d[pp2d_idx] = pdir; 1254 1255 /* 1256 * Insert pv_entry into pv_list for mapped page if part of managed 1257 * memory. 1258 */ 1259 if ((m->oflags & VPO_UNMANAGED) == 0) { 1260 flags |= PTE_MANAGED; 1261 1262 /* Create and insert pv entry. */ 1263 pv_insert(pmap, va, m); 1264 } 1265 1266 pmap->pm_stats.resident_count++; 1267 1268 pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1269 pte_tmp |= (PTE_VALID | flags); 1270 1271 mtx_lock_spin(&tlbivax_mutex); 1272 tlb_miss_lock(); 1273 1274 tlb0_flush_entry(va); 1275 *pte = pte_tmp; 1276 1277 tlb_miss_unlock(); 1278 mtx_unlock_spin(&tlbivax_mutex); 1279 1280 return (0); 1281 } 1282 1283 /* Return the pa for the given pmap/va. */ 1284 static vm_paddr_t 1285 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1286 { 1287 vm_paddr_t pa = 0; 1288 pte_t *pte; 1289 1290 pte = pte_find(mmu, pmap, va); 1291 if ((pte != NULL) && PTE_ISVALID(pte)) 1292 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1293 return (pa); 1294 } 1295 1296 1297 /* allocate pte entries to manage (addr & mask) to (addr & mask) + size */ 1298 static void 1299 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1300 { 1301 int i, j; 1302 vm_offset_t va; 1303 pte_t *pte; 1304 1305 va = addr; 1306 /* Initialize kernel pdir */ 1307 for (i = 0; i < kernel_pdirs; i++) { 1308 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)] = 1309 (pte_t **)(pdir + (i * PAGE_SIZE * PDIR_PAGES)); 1310 for (j = PDIR_IDX(va + (i * PAGE_SIZE * PDIR_NENTRIES * PTBL_NENTRIES)); 1311 j < PDIR_NENTRIES; j++) { 1312 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)][j] = 1313 (pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE) + 1314 (((i * PDIR_NENTRIES) + j) * PAGE_SIZE)); 1315 } 1316 } 1317 1318 /* 1319 * Fill in PTEs covering kernel code and data. They are not required 1320 * for address translation, as this area is covered by static TLB1 1321 * entries, but for pte_vatopa() to work correctly with kernel area 1322 * addresses. 1323 */ 1324 for (va = addr; va < data_end; va += PAGE_SIZE) { 1325 pte = &(kernel_pmap->pm_pp2d[PP2D_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]); 1326 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1327 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1328 PTE_VALID | PTE_PS_4KB; 1329 } 1330 } 1331 #else 1332 /* 1333 * Clean pte entry, try to free page table page if requested. 1334 * 1335 * Return 1 if ptbl pages were freed, otherwise return 0. 1336 */ 1337 static int 1338 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, uint8_t flags) 1339 { 1340 unsigned int pdir_idx = PDIR_IDX(va); 1341 unsigned int ptbl_idx = PTBL_IDX(va); 1342 vm_page_t m; 1343 pte_t *ptbl; 1344 pte_t *pte; 1345 1346 //int su = (pmap == kernel_pmap); 1347 //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", 1348 // su, (u_int32_t)pmap, va, flags); 1349 1350 ptbl = pmap->pm_pdir[pdir_idx]; 1351 KASSERT(ptbl, ("pte_remove: null ptbl")); 1352 1353 pte = &ptbl[ptbl_idx]; 1354 1355 if (pte == NULL || !PTE_ISVALID(pte)) 1356 return (0); 1357 1358 if (PTE_ISWIRED(pte)) 1359 pmap->pm_stats.wired_count--; 1360 1361 /* Get vm_page_t for mapped pte. */ 1362 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1363 1364 /* Handle managed entry. */ 1365 if (PTE_ISMANAGED(pte)) { 1366 1367 if (PTE_ISMODIFIED(pte)) 1368 vm_page_dirty(m); 1369 1370 if (PTE_ISREFERENCED(pte)) 1371 vm_page_aflag_set(m, PGA_REFERENCED); 1372 1373 pv_remove(pmap, va, m); 1374 } else if (m->md.pv_tracked) { 1375 /* 1376 * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is 1377 * used. This is needed by the NCSW support code for fast 1378 * VA<->PA translation. 1379 */ 1380 pv_remove(pmap, va, m); 1381 if (TAILQ_EMPTY(&m->md.pv_list)) 1382 m->md.pv_tracked = false; 1383 } 1384 1385 mtx_lock_spin(&tlbivax_mutex); 1386 tlb_miss_lock(); 1387 1388 tlb0_flush_entry(va); 1389 *pte = 0; 1390 1391 tlb_miss_unlock(); 1392 mtx_unlock_spin(&tlbivax_mutex); 1393 1394 pmap->pm_stats.resident_count--; 1395 1396 if (flags & PTBL_UNHOLD) { 1397 //debugf("pte_remove: e (unhold)\n"); 1398 return (ptbl_unhold(mmu, pmap, pdir_idx)); 1399 } 1400 1401 //debugf("pte_remove: e\n"); 1402 return (0); 1403 } 1404 1405 /* 1406 * Insert PTE for a given page and virtual address. 1407 */ 1408 static int 1409 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1410 boolean_t nosleep) 1411 { 1412 unsigned int pdir_idx = PDIR_IDX(va); 1413 unsigned int ptbl_idx = PTBL_IDX(va); 1414 pte_t *ptbl, *pte, pte_tmp; 1415 1416 CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__, 1417 pmap == kernel_pmap, pmap, va); 1418 1419 /* Get the page table pointer. */ 1420 ptbl = pmap->pm_pdir[pdir_idx]; 1421 1422 if (ptbl == NULL) { 1423 /* Allocate page table pages. */ 1424 ptbl = ptbl_alloc(mmu, pmap, pdir_idx, nosleep); 1425 if (ptbl == NULL) { 1426 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1427 return (ENOMEM); 1428 } 1429 pmap->pm_pdir[pdir_idx] = ptbl; 1430 pte = &ptbl[ptbl_idx]; 1431 } else { 1432 /* 1433 * Check if there is valid mapping for requested 1434 * va, if there is, remove it. 1435 */ 1436 pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; 1437 if (PTE_ISVALID(pte)) { 1438 pte_remove(mmu, pmap, va, PTBL_HOLD); 1439 } else { 1440 /* 1441 * pte is not used, increment hold count 1442 * for ptbl pages. 1443 */ 1444 if (pmap != kernel_pmap) 1445 ptbl_hold(mmu, pmap, pdir_idx); 1446 } 1447 } 1448 1449 /* 1450 * Insert pv_entry into pv_list for mapped page if part of managed 1451 * memory. 1452 */ 1453 if ((m->oflags & VPO_UNMANAGED) == 0) { 1454 flags |= PTE_MANAGED; 1455 1456 /* Create and insert pv entry. */ 1457 pv_insert(pmap, va, m); 1458 } 1459 1460 pmap->pm_stats.resident_count++; 1461 1462 pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1463 pte_tmp |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */ 1464 1465 mtx_lock_spin(&tlbivax_mutex); 1466 tlb_miss_lock(); 1467 1468 tlb0_flush_entry(va); 1469 *pte = pte_tmp; 1470 1471 tlb_miss_unlock(); 1472 mtx_unlock_spin(&tlbivax_mutex); 1473 return (0); 1474 } 1475 1476 /* Return the pa for the given pmap/va. */ 1477 static vm_paddr_t 1478 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1479 { 1480 vm_paddr_t pa = 0; 1481 pte_t *pte; 1482 1483 pte = pte_find(mmu, pmap, va); 1484 if ((pte != NULL) && PTE_ISVALID(pte)) 1485 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1486 return (pa); 1487 } 1488 1489 /* Get a pointer to a PTE in a page table. */ 1490 static pte_t * 1491 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1492 { 1493 unsigned int pdir_idx = PDIR_IDX(va); 1494 unsigned int ptbl_idx = PTBL_IDX(va); 1495 1496 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 1497 1498 if (pmap->pm_pdir[pdir_idx]) 1499 return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); 1500 1501 return (NULL); 1502 } 1503 1504 /* Set up kernel page tables. */ 1505 static void 1506 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1507 { 1508 int i; 1509 vm_offset_t va; 1510 pte_t *pte; 1511 1512 /* Initialize kernel pdir */ 1513 for (i = 0; i < kernel_ptbls; i++) 1514 kernel_pmap->pm_pdir[kptbl_min + i] = 1515 (pte_t *)(pdir + (i * PAGE_SIZE * PTBL_PAGES)); 1516 1517 /* 1518 * Fill in PTEs covering kernel code and data. They are not required 1519 * for address translation, as this area is covered by static TLB1 1520 * entries, but for pte_vatopa() to work correctly with kernel area 1521 * addresses. 1522 */ 1523 for (va = addr; va < data_end; va += PAGE_SIZE) { 1524 pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]); 1525 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1526 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1527 PTE_VALID | PTE_PS_4KB; 1528 } 1529 } 1530 #endif 1531 1532 /**************************************************************************/ 1533 /* PMAP related */ 1534 /**************************************************************************/ 1535 1536 /* 1537 * This is called during booke_init, before the system is really initialized. 1538 */ 1539 static void 1540 mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) 1541 { 1542 vm_paddr_t phys_kernelend; 1543 struct mem_region *mp, *mp1; 1544 int cnt, i, j; 1545 vm_paddr_t s, e, sz; 1546 vm_paddr_t physsz, hwphyssz; 1547 u_int phys_avail_count; 1548 vm_size_t kstack0_sz; 1549 vm_offset_t kernel_pdir, kstack0; 1550 vm_paddr_t kstack0_phys; 1551 void *dpcpu; 1552 vm_offset_t kernel_ptbl_root; 1553 1554 debugf("mmu_booke_bootstrap: entered\n"); 1555 1556 /* Set interesting system properties */ 1557 #ifdef __powerpc64__ 1558 hw_direct_map = 1; 1559 #else 1560 hw_direct_map = 0; 1561 #endif 1562 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 1563 elf32_nxstack = 1; 1564 #endif 1565 1566 /* Initialize invalidation mutex */ 1567 mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN); 1568 1569 /* Read TLB0 size and associativity. */ 1570 tlb0_get_tlbconf(); 1571 1572 /* 1573 * Align kernel start and end address (kernel image). 1574 * Note that kernel end does not necessarily relate to kernsize. 1575 * kernsize is the size of the kernel that is actually mapped. 1576 */ 1577 data_start = round_page(kernelend); 1578 data_end = data_start; 1579 1580 /* Allocate the dynamic per-cpu area. */ 1581 dpcpu = (void *)data_end; 1582 data_end += DPCPU_SIZE; 1583 1584 /* Allocate space for the message buffer. */ 1585 msgbufp = (struct msgbuf *)data_end; 1586 data_end += msgbufsize; 1587 debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1588 (uintptr_t)msgbufp, data_end); 1589 1590 data_end = round_page(data_end); 1591 1592 #ifdef __powerpc64__ 1593 kernel_ptbl_root = data_end; 1594 data_end += PP2D_NENTRIES * sizeof(pte_t**); 1595 #else 1596 /* Allocate space for ptbl_bufs. */ 1597 ptbl_bufs = (struct ptbl_buf *)data_end; 1598 data_end += sizeof(struct ptbl_buf) * PTBL_BUFS; 1599 debugf(" ptbl_bufs at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1600 (uintptr_t)ptbl_bufs, data_end); 1601 1602 data_end = round_page(data_end); 1603 kernel_ptbl_root = data_end; 1604 data_end += PDIR_NENTRIES * sizeof(pte_t*); 1605 #endif 1606 1607 /* Allocate PTE tables for kernel KVA. */ 1608 kernel_pdir = data_end; 1609 kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 1610 PDIR_SIZE); 1611 #ifdef __powerpc64__ 1612 kernel_pdirs = howmany(kernel_ptbls, PDIR_NENTRIES); 1613 data_end += kernel_pdirs * PDIR_PAGES * PAGE_SIZE; 1614 #endif 1615 data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE; 1616 debugf(" kernel ptbls: %d\n", kernel_ptbls); 1617 debugf(" kernel pdir at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1618 kernel_pdir, data_end); 1619 1620 debugf(" data_end: 0x%"PRI0ptrX"\n", data_end); 1621 if (data_end - kernstart > kernsize) { 1622 kernsize += tlb1_mapin_region(kernstart + kernsize, 1623 kernload + kernsize, (data_end - kernstart) - kernsize); 1624 } 1625 data_end = kernstart + kernsize; 1626 debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end); 1627 1628 /* 1629 * Clear the structures - note we can only do it safely after the 1630 * possible additional TLB1 translations are in place (above) so that 1631 * all range up to the currently calculated 'data_end' is covered. 1632 */ 1633 dpcpu_init(dpcpu, 0); 1634 #ifdef __powerpc64__ 1635 memset((void *)kernel_pdir, 0, 1636 kernel_pdirs * PDIR_PAGES * PAGE_SIZE + 1637 kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1638 #else 1639 memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE); 1640 memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1641 #endif 1642 1643 /*******************************************************/ 1644 /* Set the start and end of kva. */ 1645 /*******************************************************/ 1646 virtual_avail = round_page(data_end); 1647 virtual_end = VM_MAX_KERNEL_ADDRESS; 1648 1649 /* Allocate KVA space for page zero/copy operations. */ 1650 zero_page_va = virtual_avail; 1651 virtual_avail += PAGE_SIZE; 1652 copy_page_src_va = virtual_avail; 1653 virtual_avail += PAGE_SIZE; 1654 copy_page_dst_va = virtual_avail; 1655 virtual_avail += PAGE_SIZE; 1656 debugf("zero_page_va = 0x%"PRI0ptrX"\n", zero_page_va); 1657 debugf("copy_page_src_va = 0x%"PRI0ptrX"\n", copy_page_src_va); 1658 debugf("copy_page_dst_va = 0x%"PRI0ptrX"\n", copy_page_dst_va); 1659 1660 /* Initialize page zero/copy mutexes. */ 1661 mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); 1662 mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); 1663 1664 #ifndef __powerpc64__ 1665 /* Allocate KVA space for ptbl bufs. */ 1666 ptbl_buf_pool_vabase = virtual_avail; 1667 virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; 1668 debugf("ptbl_buf_pool_vabase = 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1669 ptbl_buf_pool_vabase, virtual_avail); 1670 #endif 1671 1672 /* Calculate corresponding physical addresses for the kernel region. */ 1673 phys_kernelend = kernload + kernsize; 1674 debugf("kernel image and allocated data:\n"); 1675 debugf(" kernload = 0x%09llx\n", (uint64_t)kernload); 1676 debugf(" kernstart = 0x%"PRI0ptrX"\n", kernstart); 1677 debugf(" kernsize = 0x%"PRI0ptrX"\n", kernsize); 1678 1679 /* 1680 * Remove kernel physical address range from avail regions list. Page 1681 * align all regions. Non-page aligned memory isn't very interesting 1682 * to us. Also, sort the entries for ascending addresses. 1683 */ 1684 1685 /* Retrieve phys/avail mem regions */ 1686 mem_regions(&physmem_regions, &physmem_regions_sz, 1687 &availmem_regions, &availmem_regions_sz); 1688 1689 if (PHYS_AVAIL_ENTRIES < availmem_regions_sz) 1690 panic("mmu_booke_bootstrap: phys_avail too small"); 1691 1692 sz = 0; 1693 cnt = availmem_regions_sz; 1694 debugf("processing avail regions:\n"); 1695 for (mp = availmem_regions; mp->mr_size; mp++) { 1696 s = mp->mr_start; 1697 e = mp->mr_start + mp->mr_size; 1698 debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e); 1699 /* Check whether this region holds all of the kernel. */ 1700 if (s < kernload && e > phys_kernelend) { 1701 availmem_regions[cnt].mr_start = phys_kernelend; 1702 availmem_regions[cnt++].mr_size = e - phys_kernelend; 1703 e = kernload; 1704 } 1705 /* Look whether this regions starts within the kernel. */ 1706 if (s >= kernload && s < phys_kernelend) { 1707 if (e <= phys_kernelend) 1708 goto empty; 1709 s = phys_kernelend; 1710 } 1711 /* Now look whether this region ends within the kernel. */ 1712 if (e > kernload && e <= phys_kernelend) { 1713 if (s >= kernload) 1714 goto empty; 1715 e = kernload; 1716 } 1717 /* Now page align the start and size of the region. */ 1718 s = round_page(s); 1719 e = trunc_page(e); 1720 if (e < s) 1721 e = s; 1722 sz = e - s; 1723 debugf("%09jx-%09jx = %jx\n", 1724 (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz); 1725 1726 /* Check whether some memory is left here. */ 1727 if (sz == 0) { 1728 empty: 1729 memmove(mp, mp + 1, 1730 (cnt - (mp - availmem_regions)) * sizeof(*mp)); 1731 cnt--; 1732 mp--; 1733 continue; 1734 } 1735 1736 /* Do an insertion sort. */ 1737 for (mp1 = availmem_regions; mp1 < mp; mp1++) 1738 if (s < mp1->mr_start) 1739 break; 1740 if (mp1 < mp) { 1741 memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); 1742 mp1->mr_start = s; 1743 mp1->mr_size = sz; 1744 } else { 1745 mp->mr_start = s; 1746 mp->mr_size = sz; 1747 } 1748 } 1749 availmem_regions_sz = cnt; 1750 1751 /*******************************************************/ 1752 /* Steal physical memory for kernel stack from the end */ 1753 /* of the first avail region */ 1754 /*******************************************************/ 1755 kstack0_sz = kstack_pages * PAGE_SIZE; 1756 kstack0_phys = availmem_regions[0].mr_start + 1757 availmem_regions[0].mr_size; 1758 kstack0_phys -= kstack0_sz; 1759 availmem_regions[0].mr_size -= kstack0_sz; 1760 1761 /*******************************************************/ 1762 /* Fill in phys_avail table, based on availmem_regions */ 1763 /*******************************************************/ 1764 phys_avail_count = 0; 1765 physsz = 0; 1766 hwphyssz = 0; 1767 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 1768 1769 debugf("fill in phys_avail:\n"); 1770 for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { 1771 1772 debugf(" region: 0x%jx - 0x%jx (0x%jx)\n", 1773 (uintmax_t)availmem_regions[i].mr_start, 1774 (uintmax_t)availmem_regions[i].mr_start + 1775 availmem_regions[i].mr_size, 1776 (uintmax_t)availmem_regions[i].mr_size); 1777 1778 if (hwphyssz != 0 && 1779 (physsz + availmem_regions[i].mr_size) >= hwphyssz) { 1780 debugf(" hw.physmem adjust\n"); 1781 if (physsz < hwphyssz) { 1782 phys_avail[j] = availmem_regions[i].mr_start; 1783 phys_avail[j + 1] = 1784 availmem_regions[i].mr_start + 1785 hwphyssz - physsz; 1786 physsz = hwphyssz; 1787 phys_avail_count++; 1788 } 1789 break; 1790 } 1791 1792 phys_avail[j] = availmem_regions[i].mr_start; 1793 phys_avail[j + 1] = availmem_regions[i].mr_start + 1794 availmem_regions[i].mr_size; 1795 phys_avail_count++; 1796 physsz += availmem_regions[i].mr_size; 1797 } 1798 physmem = btoc(physsz); 1799 1800 /* Calculate the last available physical address. */ 1801 for (i = 0; phys_avail[i + 2] != 0; i += 2) 1802 ; 1803 Maxmem = powerpc_btop(phys_avail[i + 1]); 1804 1805 debugf("Maxmem = 0x%08lx\n", Maxmem); 1806 debugf("phys_avail_count = %d\n", phys_avail_count); 1807 debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n", 1808 (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem); 1809 1810 #ifdef __powerpc64__ 1811 /* 1812 * Map the physical memory contiguously in TLB1. 1813 * Round so it fits into a single mapping. 1814 */ 1815 tlb1_mapin_region(DMAP_BASE_ADDRESS, 0, 1816 phys_avail[i + 1]); 1817 #endif 1818 1819 /*******************************************************/ 1820 /* Initialize (statically allocated) kernel pmap. */ 1821 /*******************************************************/ 1822 PMAP_LOCK_INIT(kernel_pmap); 1823 #ifndef __powerpc64__ 1824 kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE; 1825 #endif 1826 #ifdef __powerpc64__ 1827 kernel_pmap->pm_pp2d = (pte_t ***)kernel_ptbl_root; 1828 #else 1829 kernel_pmap->pm_pdir = (pte_t **)kernel_ptbl_root; 1830 #endif 1831 1832 debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap); 1833 kernel_pte_alloc(virtual_avail, kernstart, kernel_pdir); 1834 for (i = 0; i < MAXCPU; i++) { 1835 kernel_pmap->pm_tid[i] = TID_KERNEL; 1836 1837 /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */ 1838 tidbusy[i][TID_KERNEL] = kernel_pmap; 1839 } 1840 1841 /* Mark kernel_pmap active on all CPUs */ 1842 CPU_FILL(&kernel_pmap->pm_active); 1843 1844 /* 1845 * Initialize the global pv list lock. 1846 */ 1847 rw_init(&pvh_global_lock, "pmap pv global"); 1848 1849 /*******************************************************/ 1850 /* Final setup */ 1851 /*******************************************************/ 1852 1853 /* Enter kstack0 into kernel map, provide guard page */ 1854 kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 1855 thread0.td_kstack = kstack0; 1856 thread0.td_kstack_pages = kstack_pages; 1857 1858 debugf("kstack_sz = 0x%08x\n", kstack0_sz); 1859 debugf("kstack0_phys at 0x%09llx - 0x%09llx\n", 1860 kstack0_phys, kstack0_phys + kstack0_sz); 1861 debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n", 1862 kstack0, kstack0 + kstack0_sz); 1863 1864 virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz; 1865 for (i = 0; i < kstack_pages; i++) { 1866 mmu_booke_kenter(mmu, kstack0, kstack0_phys); 1867 kstack0 += PAGE_SIZE; 1868 kstack0_phys += PAGE_SIZE; 1869 } 1870 1871 pmap_bootstrapped = 1; 1872 1873 debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail); 1874 debugf("virtual_end = %"PRI0ptrX"\n", virtual_end); 1875 1876 debugf("mmu_booke_bootstrap: exit\n"); 1877 } 1878 1879 #ifdef SMP 1880 void 1881 tlb1_ap_prep(void) 1882 { 1883 tlb_entry_t *e, tmp; 1884 unsigned int i; 1885 1886 /* Prepare TLB1 image for AP processors */ 1887 e = __boot_tlb1; 1888 for (i = 0; i < TLB1_ENTRIES; i++) { 1889 tlb1_read_entry(&tmp, i); 1890 1891 if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED)) 1892 memcpy(e++, &tmp, sizeof(tmp)); 1893 } 1894 } 1895 1896 void 1897 pmap_bootstrap_ap(volatile uint32_t *trcp __unused) 1898 { 1899 int i; 1900 1901 /* 1902 * Finish TLB1 configuration: the BSP already set up its TLB1 and we 1903 * have the snapshot of its contents in the s/w __boot_tlb1[] table 1904 * created by tlb1_ap_prep(), so use these values directly to 1905 * (re)program AP's TLB1 hardware. 1906 * 1907 * Start at index 1 because index 0 has the kernel map. 1908 */ 1909 for (i = 1; i < TLB1_ENTRIES; i++) { 1910 if (__boot_tlb1[i].mas1 & MAS1_VALID) 1911 tlb1_write_entry(&__boot_tlb1[i], i); 1912 } 1913 1914 set_mas4_defaults(); 1915 } 1916 #endif 1917 1918 static void 1919 booke_pmap_init_qpages(void) 1920 { 1921 struct pcpu *pc; 1922 int i; 1923 1924 CPU_FOREACH(i) { 1925 pc = pcpu_find(i); 1926 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); 1927 if (pc->pc_qmap_addr == 0) 1928 panic("pmap_init_qpages: unable to allocate KVA"); 1929 } 1930 } 1931 1932 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL); 1933 1934 /* 1935 * Get the physical page address for the given pmap/virtual address. 1936 */ 1937 static vm_paddr_t 1938 mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1939 { 1940 vm_paddr_t pa; 1941 1942 PMAP_LOCK(pmap); 1943 pa = pte_vatopa(mmu, pmap, va); 1944 PMAP_UNLOCK(pmap); 1945 1946 return (pa); 1947 } 1948 1949 /* 1950 * Extract the physical page address associated with the given 1951 * kernel virtual address. 1952 */ 1953 static vm_paddr_t 1954 mmu_booke_kextract(mmu_t mmu, vm_offset_t va) 1955 { 1956 tlb_entry_t e; 1957 vm_paddr_t p = 0; 1958 int i; 1959 1960 #ifdef __powerpc64__ 1961 if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS) 1962 return (DMAP_TO_PHYS(va)); 1963 #endif 1964 1965 if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS) 1966 p = pte_vatopa(mmu, kernel_pmap, va); 1967 1968 if (p == 0) { 1969 /* Check TLB1 mappings */ 1970 for (i = 0; i < TLB1_ENTRIES; i++) { 1971 tlb1_read_entry(&e, i); 1972 if (!(e.mas1 & MAS1_VALID)) 1973 continue; 1974 if (va >= e.virt && va < e.virt + e.size) 1975 return (e.phys + (va - e.virt)); 1976 } 1977 } 1978 1979 return (p); 1980 } 1981 1982 /* 1983 * Initialize the pmap module. 1984 * Called by vm_init, to initialize any structures that the pmap 1985 * system needs to map virtual memory. 1986 */ 1987 static void 1988 mmu_booke_init(mmu_t mmu) 1989 { 1990 int shpgperproc = PMAP_SHPGPERPROC; 1991 1992 /* 1993 * Initialize the address space (zone) for the pv entries. Set a 1994 * high water mark so that the system can recover from excessive 1995 * numbers of pv entries. 1996 */ 1997 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 1998 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 1999 2000 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 2001 pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; 2002 2003 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 2004 pv_entry_high_water = 9 * (pv_entry_max / 10); 2005 2006 uma_zone_reserve_kva(pvzone, pv_entry_max); 2007 2008 /* Pre-fill pvzone with initial number of pv entries. */ 2009 uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); 2010 2011 /* Create a UMA zone for page table roots. */ 2012 ptbl_root_zone = uma_zcreate("pmap root", PMAP_ROOT_SIZE, 2013 NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, UMA_ZONE_VM); 2014 2015 /* Initialize ptbl allocation. */ 2016 ptbl_init(); 2017 } 2018 2019 /* 2020 * Map a list of wired pages into kernel virtual address space. This is 2021 * intended for temporary mappings which do not need page modification or 2022 * references recorded. Existing mappings in the region are overwritten. 2023 */ 2024 static void 2025 mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) 2026 { 2027 vm_offset_t va; 2028 2029 va = sva; 2030 while (count-- > 0) { 2031 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 2032 va += PAGE_SIZE; 2033 m++; 2034 } 2035 } 2036 2037 /* 2038 * Remove page mappings from kernel virtual address space. Intended for 2039 * temporary mappings entered by mmu_booke_qenter. 2040 */ 2041 static void 2042 mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count) 2043 { 2044 vm_offset_t va; 2045 2046 va = sva; 2047 while (count-- > 0) { 2048 mmu_booke_kremove(mmu, va); 2049 va += PAGE_SIZE; 2050 } 2051 } 2052 2053 /* 2054 * Map a wired page into kernel virtual address space. 2055 */ 2056 static void 2057 mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) 2058 { 2059 2060 mmu_booke_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 2061 } 2062 2063 static void 2064 mmu_booke_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) 2065 { 2066 uint32_t flags; 2067 pte_t *pte; 2068 2069 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2070 (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va")); 2071 2072 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 2073 flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT; 2074 flags |= PTE_PS_4KB; 2075 2076 pte = pte_find(mmu, kernel_pmap, va); 2077 KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va. NULL PTE")); 2078 2079 mtx_lock_spin(&tlbivax_mutex); 2080 tlb_miss_lock(); 2081 2082 if (PTE_ISVALID(pte)) { 2083 2084 CTR1(KTR_PMAP, "%s: replacing entry!", __func__); 2085 2086 /* Flush entry from TLB0 */ 2087 tlb0_flush_entry(va); 2088 } 2089 2090 *pte = PTE_RPN_FROM_PA(pa) | flags; 2091 2092 //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " 2093 // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", 2094 // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); 2095 2096 /* Flush the real memory from the instruction cache. */ 2097 if ((flags & (PTE_I | PTE_G)) == 0) 2098 __syncicache((void *)va, PAGE_SIZE); 2099 2100 tlb_miss_unlock(); 2101 mtx_unlock_spin(&tlbivax_mutex); 2102 } 2103 2104 /* 2105 * Remove a page from kernel page table. 2106 */ 2107 static void 2108 mmu_booke_kremove(mmu_t mmu, vm_offset_t va) 2109 { 2110 pte_t *pte; 2111 2112 CTR2(KTR_PMAP,"%s: s (va = 0x%"PRI0ptrX")\n", __func__, va); 2113 2114 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2115 (va <= VM_MAX_KERNEL_ADDRESS)), 2116 ("mmu_booke_kremove: invalid va")); 2117 2118 pte = pte_find(mmu, kernel_pmap, va); 2119 2120 if (!PTE_ISVALID(pte)) { 2121 2122 CTR1(KTR_PMAP, "%s: invalid pte", __func__); 2123 2124 return; 2125 } 2126 2127 mtx_lock_spin(&tlbivax_mutex); 2128 tlb_miss_lock(); 2129 2130 /* Invalidate entry in TLB0, update PTE. */ 2131 tlb0_flush_entry(va); 2132 *pte = 0; 2133 2134 tlb_miss_unlock(); 2135 mtx_unlock_spin(&tlbivax_mutex); 2136 } 2137 2138 /* 2139 * Provide a kernel pointer corresponding to a given userland pointer. 2140 * The returned pointer is valid until the next time this function is 2141 * called in this thread. This is used internally in copyin/copyout. 2142 */ 2143 int 2144 mmu_booke_map_user_ptr(mmu_t mmu, pmap_t pm, volatile const void *uaddr, 2145 void **kaddr, size_t ulen, size_t *klen) 2146 { 2147 2148 if (trunc_page((uintptr_t)uaddr + ulen) > VM_MAXUSER_ADDRESS) 2149 return (EFAULT); 2150 2151 *kaddr = (void *)(uintptr_t)uaddr; 2152 if (klen) 2153 *klen = ulen; 2154 2155 return (0); 2156 } 2157 2158 /* 2159 * Figure out where a given kernel pointer (usually in a fault) points 2160 * to from the VM's perspective, potentially remapping into userland's 2161 * address space. 2162 */ 2163 static int 2164 mmu_booke_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user, 2165 vm_offset_t *decoded_addr) 2166 { 2167 2168 if (trunc_page(addr) <= VM_MAXUSER_ADDRESS) 2169 *is_user = 1; 2170 else 2171 *is_user = 0; 2172 2173 *decoded_addr = addr; 2174 return (0); 2175 } 2176 2177 /* 2178 * Initialize pmap associated with process 0. 2179 */ 2180 static void 2181 mmu_booke_pinit0(mmu_t mmu, pmap_t pmap) 2182 { 2183 2184 PMAP_LOCK_INIT(pmap); 2185 mmu_booke_pinit(mmu, pmap); 2186 PCPU_SET(curpmap, pmap); 2187 } 2188 2189 /* 2190 * Initialize a preallocated and zeroed pmap structure, 2191 * such as one in a vmspace structure. 2192 */ 2193 static void 2194 mmu_booke_pinit(mmu_t mmu, pmap_t pmap) 2195 { 2196 int i; 2197 2198 CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, 2199 curthread->td_proc->p_pid, curthread->td_proc->p_comm); 2200 2201 KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); 2202 2203 for (i = 0; i < MAXCPU; i++) 2204 pmap->pm_tid[i] = TID_NONE; 2205 CPU_ZERO(&kernel_pmap->pm_active); 2206 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 2207 #ifdef __powerpc64__ 2208 pmap->pm_pp2d = uma_zalloc(ptbl_root_zone, M_WAITOK); 2209 bzero(pmap->pm_pp2d, sizeof(pte_t **) * PP2D_NENTRIES); 2210 #else 2211 pmap->pm_pdir = uma_zalloc(ptbl_root_zone, M_WAITOK); 2212 bzero(pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); 2213 TAILQ_INIT(&pmap->pm_ptbl_list); 2214 #endif 2215 } 2216 2217 /* 2218 * Release any resources held by the given physical map. 2219 * Called when a pmap initialized by mmu_booke_pinit is being released. 2220 * Should only be called if the map contains no valid mappings. 2221 */ 2222 static void 2223 mmu_booke_release(mmu_t mmu, pmap_t pmap) 2224 { 2225 2226 KASSERT(pmap->pm_stats.resident_count == 0, 2227 ("pmap_release: pmap resident count %ld != 0", 2228 pmap->pm_stats.resident_count)); 2229 #ifdef __powerpc64__ 2230 uma_zfree(ptbl_root_zone, pmap->pm_pp2d); 2231 #else 2232 uma_zfree(ptbl_root_zone, pmap->pm_pdir); 2233 #endif 2234 } 2235 2236 /* 2237 * Insert the given physical page at the specified virtual address in the 2238 * target physical map with the protection requested. If specified the page 2239 * will be wired down. 2240 */ 2241 static int 2242 mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2243 vm_prot_t prot, u_int flags, int8_t psind) 2244 { 2245 int error; 2246 2247 rw_wlock(&pvh_global_lock); 2248 PMAP_LOCK(pmap); 2249 error = mmu_booke_enter_locked(mmu, pmap, va, m, prot, flags, psind); 2250 PMAP_UNLOCK(pmap); 2251 rw_wunlock(&pvh_global_lock); 2252 return (error); 2253 } 2254 2255 static int 2256 mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2257 vm_prot_t prot, u_int pmap_flags, int8_t psind __unused) 2258 { 2259 pte_t *pte; 2260 vm_paddr_t pa; 2261 uint32_t flags; 2262 int error, su, sync; 2263 2264 pa = VM_PAGE_TO_PHYS(m); 2265 su = (pmap == kernel_pmap); 2266 sync = 0; 2267 2268 //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " 2269 // "pa=0x%08x prot=0x%08x flags=%#x)\n", 2270 // (u_int32_t)pmap, su, pmap->pm_tid, 2271 // (u_int32_t)m, va, pa, prot, flags); 2272 2273 if (su) { 2274 KASSERT(((va >= virtual_avail) && 2275 (va <= VM_MAX_KERNEL_ADDRESS)), 2276 ("mmu_booke_enter_locked: kernel pmap, non kernel va")); 2277 } else { 2278 KASSERT((va <= VM_MAXUSER_ADDRESS), 2279 ("mmu_booke_enter_locked: user pmap, non user va")); 2280 } 2281 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2282 VM_OBJECT_ASSERT_LOCKED(m->object); 2283 2284 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2285 2286 /* 2287 * If there is an existing mapping, and the physical address has not 2288 * changed, must be protection or wiring change. 2289 */ 2290 if (((pte = pte_find(mmu, pmap, va)) != NULL) && 2291 (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { 2292 2293 /* 2294 * Before actually updating pte->flags we calculate and 2295 * prepare its new value in a helper var. 2296 */ 2297 flags = *pte; 2298 flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); 2299 2300 /* Wiring change, just update stats. */ 2301 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) { 2302 if (!PTE_ISWIRED(pte)) { 2303 flags |= PTE_WIRED; 2304 pmap->pm_stats.wired_count++; 2305 } 2306 } else { 2307 if (PTE_ISWIRED(pte)) { 2308 flags &= ~PTE_WIRED; 2309 pmap->pm_stats.wired_count--; 2310 } 2311 } 2312 2313 if (prot & VM_PROT_WRITE) { 2314 /* Add write permissions. */ 2315 flags |= PTE_SW; 2316 if (!su) 2317 flags |= PTE_UW; 2318 2319 if ((flags & PTE_MANAGED) != 0) 2320 vm_page_aflag_set(m, PGA_WRITEABLE); 2321 } else { 2322 /* Handle modified pages, sense modify status. */ 2323 2324 /* 2325 * The PTE_MODIFIED flag could be set by underlying 2326 * TLB misses since we last read it (above), possibly 2327 * other CPUs could update it so we check in the PTE 2328 * directly rather than rely on that saved local flags 2329 * copy. 2330 */ 2331 if (PTE_ISMODIFIED(pte)) 2332 vm_page_dirty(m); 2333 } 2334 2335 if (prot & VM_PROT_EXECUTE) { 2336 flags |= PTE_SX; 2337 if (!su) 2338 flags |= PTE_UX; 2339 2340 /* 2341 * Check existing flags for execute permissions: if we 2342 * are turning execute permissions on, icache should 2343 * be flushed. 2344 */ 2345 if ((*pte & (PTE_UX | PTE_SX)) == 0) 2346 sync++; 2347 } 2348 2349 flags &= ~PTE_REFERENCED; 2350 2351 /* 2352 * The new flags value is all calculated -- only now actually 2353 * update the PTE. 2354 */ 2355 mtx_lock_spin(&tlbivax_mutex); 2356 tlb_miss_lock(); 2357 2358 tlb0_flush_entry(va); 2359 *pte &= ~PTE_FLAGS_MASK; 2360 *pte |= flags; 2361 2362 tlb_miss_unlock(); 2363 mtx_unlock_spin(&tlbivax_mutex); 2364 2365 } else { 2366 /* 2367 * If there is an existing mapping, but it's for a different 2368 * physical address, pte_enter() will delete the old mapping. 2369 */ 2370 //if ((pte != NULL) && PTE_ISVALID(pte)) 2371 // debugf("mmu_booke_enter_locked: replace\n"); 2372 //else 2373 // debugf("mmu_booke_enter_locked: new\n"); 2374 2375 /* Now set up the flags and install the new mapping. */ 2376 flags = (PTE_SR | PTE_VALID); 2377 flags |= PTE_M; 2378 2379 if (!su) 2380 flags |= PTE_UR; 2381 2382 if (prot & VM_PROT_WRITE) { 2383 flags |= PTE_SW; 2384 if (!su) 2385 flags |= PTE_UW; 2386 2387 if ((m->oflags & VPO_UNMANAGED) == 0) 2388 vm_page_aflag_set(m, PGA_WRITEABLE); 2389 } 2390 2391 if (prot & VM_PROT_EXECUTE) { 2392 flags |= PTE_SX; 2393 if (!su) 2394 flags |= PTE_UX; 2395 } 2396 2397 /* If its wired update stats. */ 2398 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) 2399 flags |= PTE_WIRED; 2400 2401 error = pte_enter(mmu, pmap, m, va, flags, 2402 (pmap_flags & PMAP_ENTER_NOSLEEP) != 0); 2403 if (error != 0) 2404 return (KERN_RESOURCE_SHORTAGE); 2405 2406 if ((flags & PMAP_ENTER_WIRED) != 0) 2407 pmap->pm_stats.wired_count++; 2408 2409 /* Flush the real memory from the instruction cache. */ 2410 if (prot & VM_PROT_EXECUTE) 2411 sync++; 2412 } 2413 2414 if (sync && (su || pmap == PCPU_GET(curpmap))) { 2415 __syncicache((void *)va, PAGE_SIZE); 2416 sync = 0; 2417 } 2418 2419 return (KERN_SUCCESS); 2420 } 2421 2422 /* 2423 * Maps a sequence of resident pages belonging to the same object. 2424 * The sequence begins with the given page m_start. This page is 2425 * mapped at the given virtual address start. Each subsequent page is 2426 * mapped at a virtual address that is offset from start by the same 2427 * amount as the page is offset from m_start within the object. The 2428 * last page in the sequence is the page with the largest offset from 2429 * m_start that can be mapped at a virtual address less than the given 2430 * virtual address end. Not every virtual page between start and end 2431 * is mapped; only those for which a resident page exists with the 2432 * corresponding offset from m_start are mapped. 2433 */ 2434 static void 2435 mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start, 2436 vm_offset_t end, vm_page_t m_start, vm_prot_t prot) 2437 { 2438 vm_page_t m; 2439 vm_pindex_t diff, psize; 2440 2441 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2442 2443 psize = atop(end - start); 2444 m = m_start; 2445 rw_wlock(&pvh_global_lock); 2446 PMAP_LOCK(pmap); 2447 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2448 mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m, 2449 prot & (VM_PROT_READ | VM_PROT_EXECUTE), 2450 PMAP_ENTER_NOSLEEP, 0); 2451 m = TAILQ_NEXT(m, listq); 2452 } 2453 rw_wunlock(&pvh_global_lock); 2454 PMAP_UNLOCK(pmap); 2455 } 2456 2457 static void 2458 mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2459 vm_prot_t prot) 2460 { 2461 2462 rw_wlock(&pvh_global_lock); 2463 PMAP_LOCK(pmap); 2464 mmu_booke_enter_locked(mmu, pmap, va, m, 2465 prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 2466 0); 2467 rw_wunlock(&pvh_global_lock); 2468 PMAP_UNLOCK(pmap); 2469 } 2470 2471 /* 2472 * Remove the given range of addresses from the specified map. 2473 * 2474 * It is assumed that the start and end are properly rounded to the page size. 2475 */ 2476 static void 2477 mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva) 2478 { 2479 pte_t *pte; 2480 uint8_t hold_flag; 2481 2482 int su = (pmap == kernel_pmap); 2483 2484 //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", 2485 // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); 2486 2487 if (su) { 2488 KASSERT(((va >= virtual_avail) && 2489 (va <= VM_MAX_KERNEL_ADDRESS)), 2490 ("mmu_booke_remove: kernel pmap, non kernel va")); 2491 } else { 2492 KASSERT((va <= VM_MAXUSER_ADDRESS), 2493 ("mmu_booke_remove: user pmap, non user va")); 2494 } 2495 2496 if (PMAP_REMOVE_DONE(pmap)) { 2497 //debugf("mmu_booke_remove: e (empty)\n"); 2498 return; 2499 } 2500 2501 hold_flag = PTBL_HOLD_FLAG(pmap); 2502 //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); 2503 2504 rw_wlock(&pvh_global_lock); 2505 PMAP_LOCK(pmap); 2506 for (; va < endva; va += PAGE_SIZE) { 2507 pte = pte_find(mmu, pmap, va); 2508 if ((pte != NULL) && PTE_ISVALID(pte)) 2509 pte_remove(mmu, pmap, va, hold_flag); 2510 } 2511 PMAP_UNLOCK(pmap); 2512 rw_wunlock(&pvh_global_lock); 2513 2514 //debugf("mmu_booke_remove: e\n"); 2515 } 2516 2517 /* 2518 * Remove physical page from all pmaps in which it resides. 2519 */ 2520 static void 2521 mmu_booke_remove_all(mmu_t mmu, vm_page_t m) 2522 { 2523 pv_entry_t pv, pvn; 2524 uint8_t hold_flag; 2525 2526 rw_wlock(&pvh_global_lock); 2527 for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) { 2528 pvn = TAILQ_NEXT(pv, pv_link); 2529 2530 PMAP_LOCK(pv->pv_pmap); 2531 hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); 2532 pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag); 2533 PMAP_UNLOCK(pv->pv_pmap); 2534 } 2535 vm_page_aflag_clear(m, PGA_WRITEABLE); 2536 rw_wunlock(&pvh_global_lock); 2537 } 2538 2539 /* 2540 * Map a range of physical addresses into kernel virtual address space. 2541 */ 2542 static vm_offset_t 2543 mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, 2544 vm_paddr_t pa_end, int prot) 2545 { 2546 vm_offset_t sva = *virt; 2547 vm_offset_t va = sva; 2548 2549 //debugf("mmu_booke_map: s (sva = 0x%08x pa_start = 0x%08x pa_end = 0x%08x)\n", 2550 // sva, pa_start, pa_end); 2551 2552 while (pa_start < pa_end) { 2553 mmu_booke_kenter(mmu, va, pa_start); 2554 va += PAGE_SIZE; 2555 pa_start += PAGE_SIZE; 2556 } 2557 *virt = va; 2558 2559 //debugf("mmu_booke_map: e (va = 0x%08x)\n", va); 2560 return (sva); 2561 } 2562 2563 /* 2564 * The pmap must be activated before it's address space can be accessed in any 2565 * way. 2566 */ 2567 static void 2568 mmu_booke_activate(mmu_t mmu, struct thread *td) 2569 { 2570 pmap_t pmap; 2571 u_int cpuid; 2572 2573 pmap = &td->td_proc->p_vmspace->vm_pmap; 2574 2575 CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX")", 2576 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2577 2578 KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); 2579 2580 sched_pin(); 2581 2582 cpuid = PCPU_GET(cpuid); 2583 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 2584 PCPU_SET(curpmap, pmap); 2585 2586 if (pmap->pm_tid[cpuid] == TID_NONE) 2587 tid_alloc(pmap); 2588 2589 /* Load PID0 register with pmap tid value. */ 2590 mtspr(SPR_PID0, pmap->pm_tid[cpuid]); 2591 __asm __volatile("isync"); 2592 2593 mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0); 2594 2595 sched_unpin(); 2596 2597 CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__, 2598 pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm); 2599 } 2600 2601 /* 2602 * Deactivate the specified process's address space. 2603 */ 2604 static void 2605 mmu_booke_deactivate(mmu_t mmu, struct thread *td) 2606 { 2607 pmap_t pmap; 2608 2609 pmap = &td->td_proc->p_vmspace->vm_pmap; 2610 2611 CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX, 2612 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2613 2614 td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0); 2615 2616 CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active); 2617 PCPU_SET(curpmap, NULL); 2618 } 2619 2620 /* 2621 * Copy the range specified by src_addr/len 2622 * from the source map to the range dst_addr/len 2623 * in the destination map. 2624 * 2625 * This routine is only advisory and need not do anything. 2626 */ 2627 static void 2628 mmu_booke_copy(mmu_t mmu, pmap_t dst_pmap, pmap_t src_pmap, 2629 vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) 2630 { 2631 2632 } 2633 2634 /* 2635 * Set the physical protection on the specified range of this map as requested. 2636 */ 2637 static void 2638 mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva, 2639 vm_prot_t prot) 2640 { 2641 vm_offset_t va; 2642 vm_page_t m; 2643 pte_t *pte; 2644 2645 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2646 mmu_booke_remove(mmu, pmap, sva, eva); 2647 return; 2648 } 2649 2650 if (prot & VM_PROT_WRITE) 2651 return; 2652 2653 PMAP_LOCK(pmap); 2654 for (va = sva; va < eva; va += PAGE_SIZE) { 2655 if ((pte = pte_find(mmu, pmap, va)) != NULL) { 2656 if (PTE_ISVALID(pte)) { 2657 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2658 2659 mtx_lock_spin(&tlbivax_mutex); 2660 tlb_miss_lock(); 2661 2662 /* Handle modified pages. */ 2663 if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) 2664 vm_page_dirty(m); 2665 2666 tlb0_flush_entry(va); 2667 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2668 2669 tlb_miss_unlock(); 2670 mtx_unlock_spin(&tlbivax_mutex); 2671 } 2672 } 2673 } 2674 PMAP_UNLOCK(pmap); 2675 } 2676 2677 /* 2678 * Clear the write and modified bits in each of the given page's mappings. 2679 */ 2680 static void 2681 mmu_booke_remove_write(mmu_t mmu, vm_page_t m) 2682 { 2683 pv_entry_t pv; 2684 pte_t *pte; 2685 2686 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2687 ("mmu_booke_remove_write: page %p is not managed", m)); 2688 2689 /* 2690 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2691 * set by another thread while the object is locked. Thus, 2692 * if PGA_WRITEABLE is clear, no page table entries need updating. 2693 */ 2694 VM_OBJECT_ASSERT_WLOCKED(m->object); 2695 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2696 return; 2697 rw_wlock(&pvh_global_lock); 2698 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2699 PMAP_LOCK(pv->pv_pmap); 2700 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { 2701 if (PTE_ISVALID(pte)) { 2702 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2703 2704 mtx_lock_spin(&tlbivax_mutex); 2705 tlb_miss_lock(); 2706 2707 /* Handle modified pages. */ 2708 if (PTE_ISMODIFIED(pte)) 2709 vm_page_dirty(m); 2710 2711 /* Flush mapping from TLB0. */ 2712 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2713 2714 tlb_miss_unlock(); 2715 mtx_unlock_spin(&tlbivax_mutex); 2716 } 2717 } 2718 PMAP_UNLOCK(pv->pv_pmap); 2719 } 2720 vm_page_aflag_clear(m, PGA_WRITEABLE); 2721 rw_wunlock(&pvh_global_lock); 2722 } 2723 2724 static void 2725 mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2726 { 2727 pte_t *pte; 2728 vm_paddr_t pa = 0; 2729 int sync_sz, valid; 2730 #ifndef __powerpc64__ 2731 pmap_t pmap; 2732 vm_page_t m; 2733 vm_offset_t addr; 2734 int active; 2735 #endif 2736 2737 #ifndef __powerpc64__ 2738 rw_wlock(&pvh_global_lock); 2739 pmap = PCPU_GET(curpmap); 2740 active = (pm == kernel_pmap || pm == pmap) ? 1 : 0; 2741 #endif 2742 while (sz > 0) { 2743 PMAP_LOCK(pm); 2744 pte = pte_find(mmu, pm, va); 2745 valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; 2746 if (valid) 2747 pa = PTE_PA(pte); 2748 PMAP_UNLOCK(pm); 2749 sync_sz = PAGE_SIZE - (va & PAGE_MASK); 2750 sync_sz = min(sync_sz, sz); 2751 if (valid) { 2752 #ifdef __powerpc64__ 2753 pa += (va & PAGE_MASK); 2754 __syncicache((void *)PHYS_TO_DMAP(pa), sync_sz); 2755 #else 2756 if (!active) { 2757 /* Create a mapping in the active pmap. */ 2758 addr = 0; 2759 m = PHYS_TO_VM_PAGE(pa); 2760 PMAP_LOCK(pmap); 2761 pte_enter(mmu, pmap, m, addr, 2762 PTE_SR | PTE_VALID, FALSE); 2763 addr += (va & PAGE_MASK); 2764 __syncicache((void *)addr, sync_sz); 2765 pte_remove(mmu, pmap, addr, PTBL_UNHOLD); 2766 PMAP_UNLOCK(pmap); 2767 } else 2768 __syncicache((void *)va, sync_sz); 2769 #endif 2770 } 2771 va += sync_sz; 2772 sz -= sync_sz; 2773 } 2774 #ifndef __powerpc64__ 2775 rw_wunlock(&pvh_global_lock); 2776 #endif 2777 } 2778 2779 /* 2780 * Atomically extract and hold the physical page with the given 2781 * pmap and virtual address pair if that mapping permits the given 2782 * protection. 2783 */ 2784 static vm_page_t 2785 mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, 2786 vm_prot_t prot) 2787 { 2788 pte_t *pte; 2789 vm_page_t m; 2790 uint32_t pte_wbit; 2791 vm_paddr_t pa; 2792 2793 m = NULL; 2794 pa = 0; 2795 PMAP_LOCK(pmap); 2796 retry: 2797 pte = pte_find(mmu, pmap, va); 2798 if ((pte != NULL) && PTE_ISVALID(pte)) { 2799 if (pmap == kernel_pmap) 2800 pte_wbit = PTE_SW; 2801 else 2802 pte_wbit = PTE_UW; 2803 2804 if ((*pte & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { 2805 if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa)) 2806 goto retry; 2807 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2808 m->wire_count++; 2809 } 2810 } 2811 2812 PA_UNLOCK_COND(pa); 2813 PMAP_UNLOCK(pmap); 2814 return (m); 2815 } 2816 2817 /* 2818 * Initialize a vm_page's machine-dependent fields. 2819 */ 2820 static void 2821 mmu_booke_page_init(mmu_t mmu, vm_page_t m) 2822 { 2823 2824 m->md.pv_tracked = 0; 2825 TAILQ_INIT(&m->md.pv_list); 2826 } 2827 2828 /* 2829 * mmu_booke_zero_page_area zeros the specified hardware page by 2830 * mapping it into virtual memory and using bzero to clear 2831 * its contents. 2832 * 2833 * off and size must reside within a single page. 2834 */ 2835 static void 2836 mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 2837 { 2838 vm_offset_t va; 2839 2840 /* XXX KASSERT off and size are within a single page? */ 2841 2842 #ifdef __powerpc64__ 2843 va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2844 bzero((caddr_t)va + off, size); 2845 #else 2846 mtx_lock(&zero_page_mutex); 2847 va = zero_page_va; 2848 2849 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2850 bzero((caddr_t)va + off, size); 2851 mmu_booke_kremove(mmu, va); 2852 2853 mtx_unlock(&zero_page_mutex); 2854 #endif 2855 } 2856 2857 /* 2858 * mmu_booke_zero_page zeros the specified hardware page. 2859 */ 2860 static void 2861 mmu_booke_zero_page(mmu_t mmu, vm_page_t m) 2862 { 2863 vm_offset_t off, va; 2864 2865 #ifdef __powerpc64__ 2866 va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2867 2868 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 2869 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 2870 #else 2871 va = zero_page_va; 2872 mtx_lock(&zero_page_mutex); 2873 2874 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2875 2876 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 2877 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 2878 2879 mmu_booke_kremove(mmu, va); 2880 2881 mtx_unlock(&zero_page_mutex); 2882 #endif 2883 } 2884 2885 /* 2886 * mmu_booke_copy_page copies the specified (machine independent) page by 2887 * mapping the page into virtual memory and using memcopy to copy the page, 2888 * one machine dependent page at a time. 2889 */ 2890 static void 2891 mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) 2892 { 2893 vm_offset_t sva, dva; 2894 2895 #ifdef __powerpc64__ 2896 sva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(sm)); 2897 dva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dm)); 2898 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); 2899 #else 2900 sva = copy_page_src_va; 2901 dva = copy_page_dst_va; 2902 2903 mtx_lock(©_page_mutex); 2904 mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); 2905 mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); 2906 2907 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); 2908 2909 mmu_booke_kremove(mmu, dva); 2910 mmu_booke_kremove(mmu, sva); 2911 mtx_unlock(©_page_mutex); 2912 #endif 2913 } 2914 2915 static inline void 2916 mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 2917 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 2918 { 2919 void *a_cp, *b_cp; 2920 vm_offset_t a_pg_offset, b_pg_offset; 2921 int cnt; 2922 2923 #ifdef __powerpc64__ 2924 vm_page_t pa, pb; 2925 2926 while (xfersize > 0) { 2927 a_pg_offset = a_offset & PAGE_MASK; 2928 pa = ma[a_offset >> PAGE_SHIFT]; 2929 b_pg_offset = b_offset & PAGE_MASK; 2930 pb = mb[b_offset >> PAGE_SHIFT]; 2931 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2932 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2933 a_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pa)) + 2934 a_pg_offset); 2935 b_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pb)) + 2936 b_pg_offset); 2937 bcopy(a_cp, b_cp, cnt); 2938 a_offset += cnt; 2939 b_offset += cnt; 2940 xfersize -= cnt; 2941 } 2942 #else 2943 mtx_lock(©_page_mutex); 2944 while (xfersize > 0) { 2945 a_pg_offset = a_offset & PAGE_MASK; 2946 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2947 mmu_booke_kenter(mmu, copy_page_src_va, 2948 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); 2949 a_cp = (char *)copy_page_src_va + a_pg_offset; 2950 b_pg_offset = b_offset & PAGE_MASK; 2951 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2952 mmu_booke_kenter(mmu, copy_page_dst_va, 2953 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); 2954 b_cp = (char *)copy_page_dst_va + b_pg_offset; 2955 bcopy(a_cp, b_cp, cnt); 2956 mmu_booke_kremove(mmu, copy_page_dst_va); 2957 mmu_booke_kremove(mmu, copy_page_src_va); 2958 a_offset += cnt; 2959 b_offset += cnt; 2960 xfersize -= cnt; 2961 } 2962 mtx_unlock(©_page_mutex); 2963 #endif 2964 } 2965 2966 static vm_offset_t 2967 mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m) 2968 { 2969 #ifdef __powerpc64__ 2970 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 2971 #else 2972 vm_paddr_t paddr; 2973 vm_offset_t qaddr; 2974 uint32_t flags; 2975 pte_t *pte; 2976 2977 paddr = VM_PAGE_TO_PHYS(m); 2978 2979 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 2980 flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; 2981 flags |= PTE_PS_4KB; 2982 2983 critical_enter(); 2984 qaddr = PCPU_GET(qmap_addr); 2985 2986 pte = pte_find(mmu, kernel_pmap, qaddr); 2987 2988 KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy")); 2989 2990 /* 2991 * XXX: tlbivax is broadcast to other cores, but qaddr should 2992 * not be present in other TLBs. Is there a better instruction 2993 * sequence to use? Or just forget it & use mmu_booke_kenter()... 2994 */ 2995 __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK)); 2996 __asm __volatile("isync; msync"); 2997 2998 *pte = PTE_RPN_FROM_PA(paddr) | flags; 2999 3000 /* Flush the real memory from the instruction cache. */ 3001 if ((flags & (PTE_I | PTE_G)) == 0) 3002 __syncicache((void *)qaddr, PAGE_SIZE); 3003 3004 return (qaddr); 3005 #endif 3006 } 3007 3008 static void 3009 mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr) 3010 { 3011 #ifndef __powerpc64__ 3012 pte_t *pte; 3013 3014 pte = pte_find(mmu, kernel_pmap, addr); 3015 3016 KASSERT(PCPU_GET(qmap_addr) == addr, 3017 ("mmu_booke_quick_remove_page: invalid address")); 3018 KASSERT(*pte != 0, 3019 ("mmu_booke_quick_remove_page: PTE not in use")); 3020 3021 *pte = 0; 3022 critical_exit(); 3023 #endif 3024 } 3025 3026 /* 3027 * Return whether or not the specified physical page was modified 3028 * in any of physical maps. 3029 */ 3030 static boolean_t 3031 mmu_booke_is_modified(mmu_t mmu, vm_page_t m) 3032 { 3033 pte_t *pte; 3034 pv_entry_t pv; 3035 boolean_t rv; 3036 3037 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3038 ("mmu_booke_is_modified: page %p is not managed", m)); 3039 rv = FALSE; 3040 3041 /* 3042 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3043 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3044 * is clear, no PTEs can be modified. 3045 */ 3046 VM_OBJECT_ASSERT_WLOCKED(m->object); 3047 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3048 return (rv); 3049 rw_wlock(&pvh_global_lock); 3050 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3051 PMAP_LOCK(pv->pv_pmap); 3052 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3053 PTE_ISVALID(pte)) { 3054 if (PTE_ISMODIFIED(pte)) 3055 rv = TRUE; 3056 } 3057 PMAP_UNLOCK(pv->pv_pmap); 3058 if (rv) 3059 break; 3060 } 3061 rw_wunlock(&pvh_global_lock); 3062 return (rv); 3063 } 3064 3065 /* 3066 * Return whether or not the specified virtual address is eligible 3067 * for prefault. 3068 */ 3069 static boolean_t 3070 mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr) 3071 { 3072 3073 return (FALSE); 3074 } 3075 3076 /* 3077 * Return whether or not the specified physical page was referenced 3078 * in any physical maps. 3079 */ 3080 static boolean_t 3081 mmu_booke_is_referenced(mmu_t mmu, vm_page_t m) 3082 { 3083 pte_t *pte; 3084 pv_entry_t pv; 3085 boolean_t rv; 3086 3087 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3088 ("mmu_booke_is_referenced: page %p is not managed", m)); 3089 rv = FALSE; 3090 rw_wlock(&pvh_global_lock); 3091 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3092 PMAP_LOCK(pv->pv_pmap); 3093 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3094 PTE_ISVALID(pte)) { 3095 if (PTE_ISREFERENCED(pte)) 3096 rv = TRUE; 3097 } 3098 PMAP_UNLOCK(pv->pv_pmap); 3099 if (rv) 3100 break; 3101 } 3102 rw_wunlock(&pvh_global_lock); 3103 return (rv); 3104 } 3105 3106 /* 3107 * Clear the modify bits on the specified physical page. 3108 */ 3109 static void 3110 mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) 3111 { 3112 pte_t *pte; 3113 pv_entry_t pv; 3114 3115 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3116 ("mmu_booke_clear_modify: page %p is not managed", m)); 3117 VM_OBJECT_ASSERT_WLOCKED(m->object); 3118 KASSERT(!vm_page_xbusied(m), 3119 ("mmu_booke_clear_modify: page %p is exclusive busied", m)); 3120 3121 /* 3122 * If the page is not PG_AWRITEABLE, then no PTEs can be modified. 3123 * If the object containing the page is locked and the page is not 3124 * exclusive busied, then PG_AWRITEABLE cannot be concurrently set. 3125 */ 3126 if ((m->aflags & PGA_WRITEABLE) == 0) 3127 return; 3128 rw_wlock(&pvh_global_lock); 3129 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3130 PMAP_LOCK(pv->pv_pmap); 3131 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3132 PTE_ISVALID(pte)) { 3133 mtx_lock_spin(&tlbivax_mutex); 3134 tlb_miss_lock(); 3135 3136 if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) { 3137 tlb0_flush_entry(pv->pv_va); 3138 *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | 3139 PTE_REFERENCED); 3140 } 3141 3142 tlb_miss_unlock(); 3143 mtx_unlock_spin(&tlbivax_mutex); 3144 } 3145 PMAP_UNLOCK(pv->pv_pmap); 3146 } 3147 rw_wunlock(&pvh_global_lock); 3148 } 3149 3150 /* 3151 * Return a count of reference bits for a page, clearing those bits. 3152 * It is not necessary for every reference bit to be cleared, but it 3153 * is necessary that 0 only be returned when there are truly no 3154 * reference bits set. 3155 * 3156 * As an optimization, update the page's dirty field if a modified bit is 3157 * found while counting reference bits. This opportunistic update can be 3158 * performed at low cost and can eliminate the need for some future calls 3159 * to pmap_is_modified(). However, since this function stops after 3160 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 3161 * dirty pages. Those dirty pages will only be detected by a future call 3162 * to pmap_is_modified(). 3163 */ 3164 static int 3165 mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m) 3166 { 3167 pte_t *pte; 3168 pv_entry_t pv; 3169 int count; 3170 3171 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3172 ("mmu_booke_ts_referenced: page %p is not managed", m)); 3173 count = 0; 3174 rw_wlock(&pvh_global_lock); 3175 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3176 PMAP_LOCK(pv->pv_pmap); 3177 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3178 PTE_ISVALID(pte)) { 3179 if (PTE_ISMODIFIED(pte)) 3180 vm_page_dirty(m); 3181 if (PTE_ISREFERENCED(pte)) { 3182 mtx_lock_spin(&tlbivax_mutex); 3183 tlb_miss_lock(); 3184 3185 tlb0_flush_entry(pv->pv_va); 3186 *pte &= ~PTE_REFERENCED; 3187 3188 tlb_miss_unlock(); 3189 mtx_unlock_spin(&tlbivax_mutex); 3190 3191 if (++count >= PMAP_TS_REFERENCED_MAX) { 3192 PMAP_UNLOCK(pv->pv_pmap); 3193 break; 3194 } 3195 } 3196 } 3197 PMAP_UNLOCK(pv->pv_pmap); 3198 } 3199 rw_wunlock(&pvh_global_lock); 3200 return (count); 3201 } 3202 3203 /* 3204 * Clear the wired attribute from the mappings for the specified range of 3205 * addresses in the given pmap. Every valid mapping within that range must 3206 * have the wired attribute set. In contrast, invalid mappings cannot have 3207 * the wired attribute set, so they are ignored. 3208 * 3209 * The wired attribute of the page table entry is not a hardware feature, so 3210 * there is no need to invalidate any TLB entries. 3211 */ 3212 static void 3213 mmu_booke_unwire(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3214 { 3215 vm_offset_t va; 3216 pte_t *pte; 3217 3218 PMAP_LOCK(pmap); 3219 for (va = sva; va < eva; va += PAGE_SIZE) { 3220 if ((pte = pte_find(mmu, pmap, va)) != NULL && 3221 PTE_ISVALID(pte)) { 3222 if (!PTE_ISWIRED(pte)) 3223 panic("mmu_booke_unwire: pte %p isn't wired", 3224 pte); 3225 *pte &= ~PTE_WIRED; 3226 pmap->pm_stats.wired_count--; 3227 } 3228 } 3229 PMAP_UNLOCK(pmap); 3230 3231 } 3232 3233 /* 3234 * Return true if the pmap's pv is one of the first 16 pvs linked to from this 3235 * page. This count may be changed upwards or downwards in the future; it is 3236 * only necessary that true be returned for a small subset of pmaps for proper 3237 * page aging. 3238 */ 3239 static boolean_t 3240 mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 3241 { 3242 pv_entry_t pv; 3243 int loops; 3244 boolean_t rv; 3245 3246 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3247 ("mmu_booke_page_exists_quick: page %p is not managed", m)); 3248 loops = 0; 3249 rv = FALSE; 3250 rw_wlock(&pvh_global_lock); 3251 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3252 if (pv->pv_pmap == pmap) { 3253 rv = TRUE; 3254 break; 3255 } 3256 if (++loops >= 16) 3257 break; 3258 } 3259 rw_wunlock(&pvh_global_lock); 3260 return (rv); 3261 } 3262 3263 /* 3264 * Return the number of managed mappings to the given physical page that are 3265 * wired. 3266 */ 3267 static int 3268 mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m) 3269 { 3270 pv_entry_t pv; 3271 pte_t *pte; 3272 int count = 0; 3273 3274 if ((m->oflags & VPO_UNMANAGED) != 0) 3275 return (count); 3276 rw_wlock(&pvh_global_lock); 3277 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3278 PMAP_LOCK(pv->pv_pmap); 3279 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) 3280 if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) 3281 count++; 3282 PMAP_UNLOCK(pv->pv_pmap); 3283 } 3284 rw_wunlock(&pvh_global_lock); 3285 return (count); 3286 } 3287 3288 static int 3289 mmu_booke_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3290 { 3291 int i; 3292 vm_offset_t va; 3293 3294 /* 3295 * This currently does not work for entries that 3296 * overlap TLB1 entries. 3297 */ 3298 for (i = 0; i < TLB1_ENTRIES; i ++) { 3299 if (tlb1_iomapped(i, pa, size, &va) == 0) 3300 return (0); 3301 } 3302 3303 return (EFAULT); 3304 } 3305 3306 void 3307 mmu_booke_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) 3308 { 3309 vm_paddr_t ppa; 3310 vm_offset_t ofs; 3311 vm_size_t gran; 3312 3313 /* Minidumps are based on virtual memory addresses. */ 3314 if (do_minidump) { 3315 *va = (void *)(vm_offset_t)pa; 3316 return; 3317 } 3318 3319 /* Raw physical memory dumps don't have a virtual address. */ 3320 /* We always map a 256MB page at 256M. */ 3321 gran = 256 * 1024 * 1024; 3322 ppa = rounddown2(pa, gran); 3323 ofs = pa - ppa; 3324 *va = (void *)gran; 3325 tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO); 3326 3327 if (sz > (gran - ofs)) 3328 tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran, 3329 _TLB_ENTRY_IO); 3330 } 3331 3332 void 3333 mmu_booke_dumpsys_unmap(mmu_t mmu, vm_paddr_t pa, size_t sz, void *va) 3334 { 3335 vm_paddr_t ppa; 3336 vm_offset_t ofs; 3337 vm_size_t gran; 3338 tlb_entry_t e; 3339 int i; 3340 3341 /* Minidumps are based on virtual memory addresses. */ 3342 /* Nothing to do... */ 3343 if (do_minidump) 3344 return; 3345 3346 for (i = 0; i < TLB1_ENTRIES; i++) { 3347 tlb1_read_entry(&e, i); 3348 if (!(e.mas1 & MAS1_VALID)) 3349 break; 3350 } 3351 3352 /* Raw physical memory dumps don't have a virtual address. */ 3353 i--; 3354 e.mas1 = 0; 3355 e.mas2 = 0; 3356 e.mas3 = 0; 3357 tlb1_write_entry(&e, i); 3358 3359 gran = 256 * 1024 * 1024; 3360 ppa = rounddown2(pa, gran); 3361 ofs = pa - ppa; 3362 if (sz > (gran - ofs)) { 3363 i--; 3364 e.mas1 = 0; 3365 e.mas2 = 0; 3366 e.mas3 = 0; 3367 tlb1_write_entry(&e, i); 3368 } 3369 } 3370 3371 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; 3372 3373 void 3374 mmu_booke_scan_init(mmu_t mmu) 3375 { 3376 vm_offset_t va; 3377 pte_t *pte; 3378 int i; 3379 3380 if (!do_minidump) { 3381 /* Initialize phys. segments for dumpsys(). */ 3382 memset(&dump_map, 0, sizeof(dump_map)); 3383 mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, 3384 &availmem_regions_sz); 3385 for (i = 0; i < physmem_regions_sz; i++) { 3386 dump_map[i].pa_start = physmem_regions[i].mr_start; 3387 dump_map[i].pa_size = physmem_regions[i].mr_size; 3388 } 3389 return; 3390 } 3391 3392 /* Virtual segments for minidumps: */ 3393 memset(&dump_map, 0, sizeof(dump_map)); 3394 3395 /* 1st: kernel .data and .bss. */ 3396 dump_map[0].pa_start = trunc_page((uintptr_t)_etext); 3397 dump_map[0].pa_size = 3398 round_page((uintptr_t)_end) - dump_map[0].pa_start; 3399 3400 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ 3401 dump_map[1].pa_start = data_start; 3402 dump_map[1].pa_size = data_end - data_start; 3403 3404 /* 3rd: kernel VM. */ 3405 va = dump_map[1].pa_start + dump_map[1].pa_size; 3406 /* Find start of next chunk (from va). */ 3407 while (va < virtual_end) { 3408 /* Don't dump the buffer cache. */ 3409 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { 3410 va = kmi.buffer_eva; 3411 continue; 3412 } 3413 pte = pte_find(mmu, kernel_pmap, va); 3414 if (pte != NULL && PTE_ISVALID(pte)) 3415 break; 3416 va += PAGE_SIZE; 3417 } 3418 if (va < virtual_end) { 3419 dump_map[2].pa_start = va; 3420 va += PAGE_SIZE; 3421 /* Find last page in chunk. */ 3422 while (va < virtual_end) { 3423 /* Don't run into the buffer cache. */ 3424 if (va == kmi.buffer_sva) 3425 break; 3426 pte = pte_find(mmu, kernel_pmap, va); 3427 if (pte == NULL || !PTE_ISVALID(pte)) 3428 break; 3429 va += PAGE_SIZE; 3430 } 3431 dump_map[2].pa_size = va - dump_map[2].pa_start; 3432 } 3433 } 3434 3435 /* 3436 * Map a set of physical memory pages into the kernel virtual address space. 3437 * Return a pointer to where it is mapped. This routine is intended to be used 3438 * for mapping device memory, NOT real memory. 3439 */ 3440 static void * 3441 mmu_booke_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3442 { 3443 3444 return (mmu_booke_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT)); 3445 } 3446 3447 static void * 3448 mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) 3449 { 3450 tlb_entry_t e; 3451 void *res; 3452 uintptr_t va, tmpva; 3453 vm_size_t sz; 3454 int i; 3455 3456 /* 3457 * Check if this is premapped in TLB1. Note: this should probably also 3458 * check whether a sequence of TLB1 entries exist that match the 3459 * requirement, but now only checks the easy case. 3460 */ 3461 for (i = 0; i < TLB1_ENTRIES; i++) { 3462 tlb1_read_entry(&e, i); 3463 if (!(e.mas1 & MAS1_VALID)) 3464 continue; 3465 if (pa >= e.phys && 3466 (pa + size) <= (e.phys + e.size) && 3467 (ma == VM_MEMATTR_DEFAULT || 3468 tlb_calc_wimg(pa, ma) == 3469 (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED)))) 3470 return (void *)(e.virt + 3471 (vm_offset_t)(pa - e.phys)); 3472 } 3473 3474 size = roundup(size, PAGE_SIZE); 3475 3476 /* 3477 * The device mapping area is between VM_MAXUSER_ADDRESS and 3478 * VM_MIN_KERNEL_ADDRESS. This gives 1GB of device addressing. 3479 */ 3480 #ifdef SPARSE_MAPDEV 3481 /* 3482 * With a sparse mapdev, align to the largest starting region. This 3483 * could feasibly be optimized for a 'best-fit' alignment, but that 3484 * calculation could be very costly. 3485 * Align to the smaller of: 3486 * - first set bit in overlap of (pa & size mask) 3487 * - largest size envelope 3488 * 3489 * It's possible the device mapping may start at a PA that's not larger 3490 * than the size mask, so we need to offset in to maximize the TLB entry 3491 * range and minimize the number of used TLB entries. 3492 */ 3493 do { 3494 tmpva = tlb1_map_base; 3495 sz = ffsl(((1 << flsl(size-1)) - 1) & pa); 3496 sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1; 3497 va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa); 3498 #ifdef __powerpc64__ 3499 } while (!atomic_cmpset_long(&tlb1_map_base, tmpva, va + size)); 3500 #else 3501 } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size)); 3502 #endif 3503 #else 3504 #ifdef __powerpc64__ 3505 va = atomic_fetchadd_long(&tlb1_map_base, size); 3506 #else 3507 va = atomic_fetchadd_int(&tlb1_map_base, size); 3508 #endif 3509 #endif 3510 res = (void *)va; 3511 3512 do { 3513 sz = 1 << (ilog2(size) & ~1); 3514 /* Align size to PA */ 3515 if (pa % sz != 0) { 3516 do { 3517 sz >>= 2; 3518 } while (pa % sz != 0); 3519 } 3520 /* Now align from there to VA */ 3521 if (va % sz != 0) { 3522 do { 3523 sz >>= 2; 3524 } while (va % sz != 0); 3525 } 3526 if (bootverbose) 3527 printf("Wiring VA=%lx to PA=%jx (size=%lx)\n", 3528 va, (uintmax_t)pa, sz); 3529 if (tlb1_set_entry(va, pa, sz, 3530 _TLB_ENTRY_SHARED | tlb_calc_wimg(pa, ma)) < 0) 3531 return (NULL); 3532 size -= sz; 3533 pa += sz; 3534 va += sz; 3535 } while (size > 0); 3536 3537 return (res); 3538 } 3539 3540 /* 3541 * 'Unmap' a range mapped by mmu_booke_mapdev(). 3542 */ 3543 static void 3544 mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 3545 { 3546 #ifdef SUPPORTS_SHRINKING_TLB1 3547 vm_offset_t base, offset; 3548 3549 /* 3550 * Unmap only if this is inside kernel virtual space. 3551 */ 3552 if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { 3553 base = trunc_page(va); 3554 offset = va & PAGE_MASK; 3555 size = roundup(offset + size, PAGE_SIZE); 3556 kva_free(base, size); 3557 } 3558 #endif 3559 } 3560 3561 /* 3562 * mmu_booke_object_init_pt preloads the ptes for a given object into the 3563 * specified pmap. This eliminates the blast of soft faults on process startup 3564 * and immediately after an mmap. 3565 */ 3566 static void 3567 mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3568 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 3569 { 3570 3571 VM_OBJECT_ASSERT_WLOCKED(object); 3572 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3573 ("mmu_booke_object_init_pt: non-device object")); 3574 } 3575 3576 /* 3577 * Perform the pmap work for mincore. 3578 */ 3579 static int 3580 mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3581 vm_paddr_t *locked_pa) 3582 { 3583 3584 /* XXX: this should be implemented at some point */ 3585 return (0); 3586 } 3587 3588 static int 3589 mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, vm_size_t sz, 3590 vm_memattr_t mode) 3591 { 3592 vm_offset_t va; 3593 pte_t *pte; 3594 int i, j; 3595 tlb_entry_t e; 3596 3597 /* Check TLB1 mappings */ 3598 for (i = 0; i < TLB1_ENTRIES; i++) { 3599 tlb1_read_entry(&e, i); 3600 if (!(e.mas1 & MAS1_VALID)) 3601 continue; 3602 if (addr >= e.virt && addr < e.virt + e.size) 3603 break; 3604 } 3605 if (i < TLB1_ENTRIES) { 3606 /* Only allow full mappings to be modified for now. */ 3607 /* Validate the range. */ 3608 for (j = i, va = addr; va < addr + sz; va += e.size, j++) { 3609 tlb1_read_entry(&e, j); 3610 if (va != e.virt || (sz - (va - addr) < e.size)) 3611 return (EINVAL); 3612 } 3613 for (va = addr; va < addr + sz; va += e.size, i++) { 3614 tlb1_read_entry(&e, i); 3615 e.mas2 &= ~MAS2_WIMGE_MASK; 3616 e.mas2 |= tlb_calc_wimg(e.phys, mode); 3617 3618 /* 3619 * Write it out to the TLB. Should really re-sync with other 3620 * cores. 3621 */ 3622 tlb1_write_entry(&e, i); 3623 } 3624 return (0); 3625 } 3626 3627 /* Not in TLB1, try through pmap */ 3628 /* First validate the range. */ 3629 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3630 pte = pte_find(mmu, kernel_pmap, va); 3631 if (pte == NULL || !PTE_ISVALID(pte)) 3632 return (EINVAL); 3633 } 3634 3635 mtx_lock_spin(&tlbivax_mutex); 3636 tlb_miss_lock(); 3637 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3638 pte = pte_find(mmu, kernel_pmap, va); 3639 *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT); 3640 *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT; 3641 tlb0_flush_entry(va); 3642 } 3643 tlb_miss_unlock(); 3644 mtx_unlock_spin(&tlbivax_mutex); 3645 3646 return (0); 3647 } 3648 3649 /**************************************************************************/ 3650 /* TID handling */ 3651 /**************************************************************************/ 3652 3653 /* 3654 * Allocate a TID. If necessary, steal one from someone else. 3655 * The new TID is flushed from the TLB before returning. 3656 */ 3657 static tlbtid_t 3658 tid_alloc(pmap_t pmap) 3659 { 3660 tlbtid_t tid; 3661 int thiscpu; 3662 3663 KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); 3664 3665 CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap); 3666 3667 thiscpu = PCPU_GET(cpuid); 3668 3669 tid = PCPU_GET(booke.tid_next); 3670 if (tid > TID_MAX) 3671 tid = TID_MIN; 3672 PCPU_SET(booke.tid_next, tid + 1); 3673 3674 /* If we are stealing TID then clear the relevant pmap's field */ 3675 if (tidbusy[thiscpu][tid] != NULL) { 3676 3677 CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid); 3678 3679 tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE; 3680 3681 /* Flush all entries from TLB0 matching this TID. */ 3682 tid_flush(tid); 3683 } 3684 3685 tidbusy[thiscpu][tid] = pmap; 3686 pmap->pm_tid[thiscpu] = tid; 3687 __asm __volatile("msync; isync"); 3688 3689 CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid, 3690 PCPU_GET(booke.tid_next)); 3691 3692 return (tid); 3693 } 3694 3695 /**************************************************************************/ 3696 /* TLB0 handling */ 3697 /**************************************************************************/ 3698 3699 /* Convert TLB0 va and way number to tlb0[] table index. */ 3700 static inline unsigned int 3701 tlb0_tableidx(vm_offset_t va, unsigned int way) 3702 { 3703 unsigned int idx; 3704 3705 idx = (way * TLB0_ENTRIES_PER_WAY); 3706 idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; 3707 return (idx); 3708 } 3709 3710 /* 3711 * Invalidate TLB0 entry. 3712 */ 3713 static inline void 3714 tlb0_flush_entry(vm_offset_t va) 3715 { 3716 3717 CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va); 3718 3719 mtx_assert(&tlbivax_mutex, MA_OWNED); 3720 3721 __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK)); 3722 __asm __volatile("isync; msync"); 3723 __asm __volatile("tlbsync; msync"); 3724 3725 CTR1(KTR_PMAP, "%s: e", __func__); 3726 } 3727 3728 3729 /**************************************************************************/ 3730 /* TLB1 handling */ 3731 /**************************************************************************/ 3732 3733 /* 3734 * TLB1 mapping notes: 3735 * 3736 * TLB1[0] Kernel text and data. 3737 * TLB1[1-15] Additional kernel text and data mappings (if required), PCI 3738 * windows, other devices mappings. 3739 */ 3740 3741 /* 3742 * Read an entry from given TLB1 slot. 3743 */ 3744 void 3745 tlb1_read_entry(tlb_entry_t *entry, unsigned int slot) 3746 { 3747 register_t msr; 3748 uint32_t mas0; 3749 3750 KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__)); 3751 3752 msr = mfmsr(); 3753 __asm __volatile("wrteei 0"); 3754 3755 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot); 3756 mtspr(SPR_MAS0, mas0); 3757 __asm __volatile("isync; tlbre"); 3758 3759 entry->mas1 = mfspr(SPR_MAS1); 3760 entry->mas2 = mfspr(SPR_MAS2); 3761 entry->mas3 = mfspr(SPR_MAS3); 3762 3763 switch ((mfpvr() >> 16) & 0xFFFF) { 3764 case FSL_E500v2: 3765 case FSL_E500mc: 3766 case FSL_E5500: 3767 case FSL_E6500: 3768 entry->mas7 = mfspr(SPR_MAS7); 3769 break; 3770 default: 3771 entry->mas7 = 0; 3772 break; 3773 } 3774 __asm __volatile("wrtee %0" :: "r"(msr)); 3775 3776 entry->virt = entry->mas2 & MAS2_EPN_MASK; 3777 entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) | 3778 (entry->mas3 & MAS3_RPN); 3779 entry->size = 3780 tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT); 3781 } 3782 3783 struct tlbwrite_args { 3784 tlb_entry_t *e; 3785 unsigned int idx; 3786 }; 3787 3788 static void 3789 tlb1_write_entry_int(void *arg) 3790 { 3791 struct tlbwrite_args *args = arg; 3792 uint32_t mas0; 3793 3794 /* Select entry */ 3795 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(args->idx); 3796 3797 mtspr(SPR_MAS0, mas0); 3798 mtspr(SPR_MAS1, args->e->mas1); 3799 mtspr(SPR_MAS2, args->e->mas2); 3800 mtspr(SPR_MAS3, args->e->mas3); 3801 switch ((mfpvr() >> 16) & 0xFFFF) { 3802 case FSL_E500mc: 3803 case FSL_E5500: 3804 case FSL_E6500: 3805 mtspr(SPR_MAS8, 0); 3806 /* FALLTHROUGH */ 3807 case FSL_E500v2: 3808 mtspr(SPR_MAS7, args->e->mas7); 3809 break; 3810 default: 3811 break; 3812 } 3813 3814 __asm __volatile("isync; tlbwe; isync; msync"); 3815 3816 } 3817 3818 static void 3819 tlb1_write_entry_sync(void *arg) 3820 { 3821 /* Empty synchronization point for smp_rendezvous(). */ 3822 } 3823 3824 /* 3825 * Write given entry to TLB1 hardware. 3826 */ 3827 static void 3828 tlb1_write_entry(tlb_entry_t *e, unsigned int idx) 3829 { 3830 struct tlbwrite_args args; 3831 3832 args.e = e; 3833 args.idx = idx; 3834 3835 #ifdef SMP 3836 if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) { 3837 mb(); 3838 smp_rendezvous(tlb1_write_entry_sync, 3839 tlb1_write_entry_int, 3840 tlb1_write_entry_sync, &args); 3841 } else 3842 #endif 3843 { 3844 register_t msr; 3845 3846 msr = mfmsr(); 3847 __asm __volatile("wrteei 0"); 3848 tlb1_write_entry_int(&args); 3849 __asm __volatile("wrtee %0" :: "r"(msr)); 3850 } 3851 } 3852 3853 /* 3854 * Return the largest uint value log such that 2^log <= num. 3855 */ 3856 static unsigned int 3857 ilog2(unsigned long num) 3858 { 3859 long lz; 3860 3861 #ifdef __powerpc64__ 3862 __asm ("cntlzd %0, %1" : "=r" (lz) : "r" (num)); 3863 return (63 - lz); 3864 #else 3865 __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); 3866 return (31 - lz); 3867 #endif 3868 } 3869 3870 /* 3871 * Convert TLB TSIZE value to mapped region size. 3872 */ 3873 static vm_size_t 3874 tsize2size(unsigned int tsize) 3875 { 3876 3877 /* 3878 * size = 4^tsize KB 3879 * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) 3880 */ 3881 3882 return ((1 << (2 * tsize)) * 1024); 3883 } 3884 3885 /* 3886 * Convert region size (must be power of 4) to TLB TSIZE value. 3887 */ 3888 static unsigned int 3889 size2tsize(vm_size_t size) 3890 { 3891 3892 return (ilog2(size) / 2 - 5); 3893 } 3894 3895 /* 3896 * Register permanent kernel mapping in TLB1. 3897 * 3898 * Entries are created starting from index 0 (current free entry is 3899 * kept in tlb1_idx) and are not supposed to be invalidated. 3900 */ 3901 int 3902 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, 3903 uint32_t flags) 3904 { 3905 tlb_entry_t e; 3906 uint32_t ts, tid; 3907 int tsize, index; 3908 3909 for (index = 0; index < TLB1_ENTRIES; index++) { 3910 tlb1_read_entry(&e, index); 3911 if ((e.mas1 & MAS1_VALID) == 0) 3912 break; 3913 /* Check if we're just updating the flags, and update them. */ 3914 if (e.phys == pa && e.virt == va && e.size == size) { 3915 e.mas2 = (va & MAS2_EPN_MASK) | flags; 3916 tlb1_write_entry(&e, index); 3917 return (0); 3918 } 3919 } 3920 if (index >= TLB1_ENTRIES) { 3921 printf("tlb1_set_entry: TLB1 full!\n"); 3922 return (-1); 3923 } 3924 3925 /* Convert size to TSIZE */ 3926 tsize = size2tsize(size); 3927 3928 tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK; 3929 /* XXX TS is hard coded to 0 for now as we only use single address space */ 3930 ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK; 3931 3932 e.phys = pa; 3933 e.virt = va; 3934 e.size = size; 3935 e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; 3936 e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); 3937 e.mas2 = (va & MAS2_EPN_MASK) | flags; 3938 3939 /* Set supervisor RWX permission bits */ 3940 e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; 3941 e.mas7 = (pa >> 32) & MAS7_RPN; 3942 3943 tlb1_write_entry(&e, index); 3944 3945 /* 3946 * XXX in general TLB1 updates should be propagated between CPUs, 3947 * since current design assumes to have the same TLB1 set-up on all 3948 * cores. 3949 */ 3950 return (0); 3951 } 3952 3953 /* 3954 * Map in contiguous RAM region into the TLB1 using maximum of 3955 * KERNEL_REGION_MAX_TLB_ENTRIES entries. 3956 * 3957 * If necessary round up last entry size and return total size 3958 * used by all allocated entries. 3959 */ 3960 vm_size_t 3961 tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size) 3962 { 3963 vm_size_t pgs[KERNEL_REGION_MAX_TLB_ENTRIES]; 3964 vm_size_t mapped, pgsz, base, mask; 3965 int idx, nents; 3966 3967 /* Round up to the next 1M */ 3968 size = roundup2(size, 1 << 20); 3969 3970 mapped = 0; 3971 idx = 0; 3972 base = va; 3973 pgsz = 64*1024*1024; 3974 while (mapped < size) { 3975 while (mapped < size && idx < KERNEL_REGION_MAX_TLB_ENTRIES) { 3976 while (pgsz > (size - mapped)) 3977 pgsz >>= 2; 3978 pgs[idx++] = pgsz; 3979 mapped += pgsz; 3980 } 3981 3982 /* We under-map. Correct for this. */ 3983 if (mapped < size) { 3984 while (pgs[idx - 1] == pgsz) { 3985 idx--; 3986 mapped -= pgsz; 3987 } 3988 /* XXX We may increase beyond out starting point. */ 3989 pgsz <<= 2; 3990 pgs[idx++] = pgsz; 3991 mapped += pgsz; 3992 } 3993 } 3994 3995 nents = idx; 3996 mask = pgs[0] - 1; 3997 /* Align address to the boundary */ 3998 if (va & mask) { 3999 va = (va + mask) & ~mask; 4000 pa = (pa + mask) & ~mask; 4001 } 4002 4003 for (idx = 0; idx < nents; idx++) { 4004 pgsz = pgs[idx]; 4005 debugf("%u: %llx -> %jx, size=%jx\n", idx, pa, 4006 (uintmax_t)va, (uintmax_t)pgsz); 4007 tlb1_set_entry(va, pa, pgsz, 4008 _TLB_ENTRY_SHARED | _TLB_ENTRY_MEM); 4009 pa += pgsz; 4010 va += pgsz; 4011 } 4012 4013 mapped = (va - base); 4014 if (bootverbose) 4015 printf("mapped size 0x%"PRIxPTR" (wasted space 0x%"PRIxPTR")\n", 4016 mapped, mapped - size); 4017 return (mapped); 4018 } 4019 4020 /* 4021 * TLB1 initialization routine, to be called after the very first 4022 * assembler level setup done in locore.S. 4023 */ 4024 void 4025 tlb1_init() 4026 { 4027 vm_offset_t mas2; 4028 uint32_t mas0, mas1, mas3, mas7; 4029 uint32_t tsz; 4030 4031 tlb1_get_tlbconf(); 4032 4033 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0); 4034 mtspr(SPR_MAS0, mas0); 4035 __asm __volatile("isync; tlbre"); 4036 4037 mas1 = mfspr(SPR_MAS1); 4038 mas2 = mfspr(SPR_MAS2); 4039 mas3 = mfspr(SPR_MAS3); 4040 mas7 = mfspr(SPR_MAS7); 4041 4042 kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | 4043 (mas3 & MAS3_RPN); 4044 4045 tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4046 kernsize += (tsz > 0) ? tsize2size(tsz) : 0; 4047 kernstart = trunc_page(mas2); 4048 4049 /* Setup TLB miss defaults */ 4050 set_mas4_defaults(); 4051 } 4052 4053 /* 4054 * pmap_early_io_unmap() should be used in short conjunction with 4055 * pmap_early_io_map(), as in the following snippet: 4056 * 4057 * x = pmap_early_io_map(...); 4058 * <do something with x> 4059 * pmap_early_io_unmap(x, size); 4060 * 4061 * And avoiding more allocations between. 4062 */ 4063 void 4064 pmap_early_io_unmap(vm_offset_t va, vm_size_t size) 4065 { 4066 int i; 4067 tlb_entry_t e; 4068 vm_size_t isize; 4069 4070 size = roundup(size, PAGE_SIZE); 4071 isize = size; 4072 for (i = 0; i < TLB1_ENTRIES && size > 0; i++) { 4073 tlb1_read_entry(&e, i); 4074 if (!(e.mas1 & MAS1_VALID)) 4075 continue; 4076 if (va <= e.virt && (va + isize) >= (e.virt + e.size)) { 4077 size -= e.size; 4078 e.mas1 &= ~MAS1_VALID; 4079 tlb1_write_entry(&e, i); 4080 } 4081 } 4082 if (tlb1_map_base == va + isize) 4083 tlb1_map_base -= isize; 4084 } 4085 4086 vm_offset_t 4087 pmap_early_io_map(vm_paddr_t pa, vm_size_t size) 4088 { 4089 vm_paddr_t pa_base; 4090 vm_offset_t va, sz; 4091 int i; 4092 tlb_entry_t e; 4093 4094 KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!")); 4095 4096 for (i = 0; i < TLB1_ENTRIES; i++) { 4097 tlb1_read_entry(&e, i); 4098 if (!(e.mas1 & MAS1_VALID)) 4099 continue; 4100 if (pa >= e.phys && (pa + size) <= 4101 (e.phys + e.size)) 4102 return (e.virt + (pa - e.phys)); 4103 } 4104 4105 pa_base = rounddown(pa, PAGE_SIZE); 4106 size = roundup(size + (pa - pa_base), PAGE_SIZE); 4107 tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1)); 4108 va = tlb1_map_base + (pa - pa_base); 4109 4110 do { 4111 sz = 1 << (ilog2(size) & ~1); 4112 tlb1_set_entry(tlb1_map_base, pa_base, sz, 4113 _TLB_ENTRY_SHARED | _TLB_ENTRY_IO); 4114 size -= sz; 4115 pa_base += sz; 4116 tlb1_map_base += sz; 4117 } while (size > 0); 4118 4119 return (va); 4120 } 4121 4122 void 4123 pmap_track_page(pmap_t pmap, vm_offset_t va) 4124 { 4125 vm_paddr_t pa; 4126 vm_page_t page; 4127 struct pv_entry *pve; 4128 4129 va = trunc_page(va); 4130 pa = pmap_kextract(va); 4131 page = PHYS_TO_VM_PAGE(pa); 4132 4133 rw_wlock(&pvh_global_lock); 4134 PMAP_LOCK(pmap); 4135 4136 TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) { 4137 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 4138 goto out; 4139 } 4140 } 4141 page->md.pv_tracked = true; 4142 pv_insert(pmap, va, page); 4143 out: 4144 PMAP_UNLOCK(pmap); 4145 rw_wunlock(&pvh_global_lock); 4146 } 4147 4148 4149 /* 4150 * Setup MAS4 defaults. 4151 * These values are loaded to MAS0-2 on a TLB miss. 4152 */ 4153 static void 4154 set_mas4_defaults(void) 4155 { 4156 uint32_t mas4; 4157 4158 /* Defaults: TLB0, PID0, TSIZED=4K */ 4159 mas4 = MAS4_TLBSELD0; 4160 mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; 4161 #ifdef SMP 4162 mas4 |= MAS4_MD; 4163 #endif 4164 mtspr(SPR_MAS4, mas4); 4165 __asm __volatile("isync"); 4166 } 4167 4168 4169 /* 4170 * Return 0 if the physical IO range is encompassed by one of the 4171 * the TLB1 entries, otherwise return related error code. 4172 */ 4173 static int 4174 tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) 4175 { 4176 uint32_t prot; 4177 vm_paddr_t pa_start; 4178 vm_paddr_t pa_end; 4179 unsigned int entry_tsize; 4180 vm_size_t entry_size; 4181 tlb_entry_t e; 4182 4183 *va = (vm_offset_t)NULL; 4184 4185 tlb1_read_entry(&e, i); 4186 /* Skip invalid entries */ 4187 if (!(e.mas1 & MAS1_VALID)) 4188 return (EINVAL); 4189 4190 /* 4191 * The entry must be cache-inhibited, guarded, and r/w 4192 * so it can function as an i/o page 4193 */ 4194 prot = e.mas2 & (MAS2_I | MAS2_G); 4195 if (prot != (MAS2_I | MAS2_G)) 4196 return (EPERM); 4197 4198 prot = e.mas3 & (MAS3_SR | MAS3_SW); 4199 if (prot != (MAS3_SR | MAS3_SW)) 4200 return (EPERM); 4201 4202 /* The address should be within the entry range. */ 4203 entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4204 KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); 4205 4206 entry_size = tsize2size(entry_tsize); 4207 pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | 4208 (e.mas3 & MAS3_RPN); 4209 pa_end = pa_start + entry_size; 4210 4211 if ((pa < pa_start) || ((pa + size) > pa_end)) 4212 return (ERANGE); 4213 4214 /* Return virtual address of this mapping. */ 4215 *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start); 4216 return (0); 4217 } 4218 4219 /* 4220 * Invalidate all TLB0 entries which match the given TID. Note this is 4221 * dedicated for cases when invalidations should NOT be propagated to other 4222 * CPUs. 4223 */ 4224 static void 4225 tid_flush(tlbtid_t tid) 4226 { 4227 register_t msr; 4228 uint32_t mas0, mas1, mas2; 4229 int entry, way; 4230 4231 4232 /* Don't evict kernel translations */ 4233 if (tid == TID_KERNEL) 4234 return; 4235 4236 msr = mfmsr(); 4237 __asm __volatile("wrteei 0"); 4238 4239 /* 4240 * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use 4241 * it for PID invalidation. 4242 */ 4243 switch ((mfpvr() >> 16) & 0xffff) { 4244 case FSL_E500mc: 4245 case FSL_E5500: 4246 case FSL_E6500: 4247 mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); 4248 /* tlbilxpid */ 4249 __asm __volatile("isync; .long 0x7c000024; isync; msync"); 4250 __asm __volatile("wrtee %0" :: "r"(msr)); 4251 return; 4252 } 4253 4254 for (way = 0; way < TLB0_WAYS; way++) 4255 for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { 4256 4257 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 4258 mtspr(SPR_MAS0, mas0); 4259 4260 mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; 4261 mtspr(SPR_MAS2, mas2); 4262 4263 __asm __volatile("isync; tlbre"); 4264 4265 mas1 = mfspr(SPR_MAS1); 4266 4267 if (!(mas1 & MAS1_VALID)) 4268 continue; 4269 if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid) 4270 continue; 4271 mas1 &= ~MAS1_VALID; 4272 mtspr(SPR_MAS1, mas1); 4273 __asm __volatile("isync; tlbwe; isync; msync"); 4274 } 4275 __asm __volatile("wrtee %0" :: "r"(msr)); 4276 } 4277 4278 #ifdef DDB 4279 /* Print out contents of the MAS registers for each TLB0 entry */ 4280 static void 4281 #ifdef __powerpc64__ 4282 tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3, 4283 #else 4284 tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3, 4285 #endif 4286 uint32_t mas7) 4287 { 4288 int as; 4289 char desc[3]; 4290 tlbtid_t tid; 4291 vm_size_t size; 4292 unsigned int tsize; 4293 4294 desc[2] = '\0'; 4295 if (mas1 & MAS1_VALID) 4296 desc[0] = 'V'; 4297 else 4298 desc[0] = ' '; 4299 4300 if (mas1 & MAS1_IPROT) 4301 desc[1] = 'P'; 4302 else 4303 desc[1] = ' '; 4304 4305 as = (mas1 & MAS1_TS_MASK) ? 1 : 0; 4306 tid = MAS1_GETTID(mas1); 4307 4308 tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4309 size = 0; 4310 if (tsize) 4311 size = tsize2size(tsize); 4312 4313 printf("%3d: (%s) [AS=%d] " 4314 "sz = 0x%08x tsz = %d tid = %d mas1 = 0x%08x " 4315 "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n", 4316 i, desc, as, size, tsize, tid, mas1, mas2, mas3, mas7); 4317 } 4318 4319 DB_SHOW_COMMAND(tlb0, tlb0_print_tlbentries) 4320 { 4321 uint32_t mas0, mas1, mas3, mas7; 4322 #ifdef __powerpc64__ 4323 uint64_t mas2; 4324 #else 4325 uint32_t mas2; 4326 #endif 4327 int entryidx, way, idx; 4328 4329 printf("TLB0 entries:\n"); 4330 for (way = 0; way < TLB0_WAYS; way ++) 4331 for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { 4332 4333 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 4334 mtspr(SPR_MAS0, mas0); 4335 4336 mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; 4337 mtspr(SPR_MAS2, mas2); 4338 4339 __asm __volatile("isync; tlbre"); 4340 4341 mas1 = mfspr(SPR_MAS1); 4342 mas2 = mfspr(SPR_MAS2); 4343 mas3 = mfspr(SPR_MAS3); 4344 mas7 = mfspr(SPR_MAS7); 4345 4346 idx = tlb0_tableidx(mas2, way); 4347 tlb_print_entry(idx, mas1, mas2, mas3, mas7); 4348 } 4349 } 4350 4351 /* 4352 * Print out contents of the MAS registers for each TLB1 entry 4353 */ 4354 DB_SHOW_COMMAND(tlb1, tlb1_print_tlbentries) 4355 { 4356 uint32_t mas0, mas1, mas3, mas7; 4357 #ifdef __powerpc64__ 4358 uint64_t mas2; 4359 #else 4360 uint32_t mas2; 4361 #endif 4362 int i; 4363 4364 printf("TLB1 entries:\n"); 4365 for (i = 0; i < TLB1_ENTRIES; i++) { 4366 4367 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); 4368 mtspr(SPR_MAS0, mas0); 4369 4370 __asm __volatile("isync; tlbre"); 4371 4372 mas1 = mfspr(SPR_MAS1); 4373 mas2 = mfspr(SPR_MAS2); 4374 mas3 = mfspr(SPR_MAS3); 4375 mas7 = mfspr(SPR_MAS7); 4376 4377 tlb_print_entry(i, mas1, mas2, mas3, mas7); 4378 } 4379 } 4380 #endif 4381