1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> 5 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 20 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Some hw specific parts of this pmap were derived or influenced 29 * by NetBSD's ibm4xx pmap module. More generic code is shared with 30 * a few other pmap modules from the FreeBSD tree. 31 */ 32 33 /* 34 * VM layout notes: 35 * 36 * Kernel and user threads run within one common virtual address space 37 * defined by AS=0. 38 * 39 * 32-bit pmap: 40 * Virtual address space layout: 41 * ----------------------------- 42 * 0x0000_0000 - 0x7fff_ffff : user process 43 * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) 44 * 0xc000_0000 - 0xc0ff_ffff : kernel reserved 45 * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. 46 * 0xc100_0000 - 0xffff_ffff : KVA 47 * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy 48 * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs 49 * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 50 * 0xc200_9000 - 0xfeef_ffff : actual free KVA space 51 * 52 * 64-bit pmap: 53 * Virtual address space layout: 54 * ----------------------------- 55 * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process 56 * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries 57 * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region 58 * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack 59 * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved 60 * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data 61 * endkernel - msgbufp-1 : flat device tree 62 * msgbufp - kernel_pdir-1 : message buffer 63 * kernel_pdir - kernel_pp2d-1 : kernel page directory 64 * kernel_pp2d - . : kernel pointers to page directory 65 * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy 66 * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs 67 * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables 68 * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space 69 * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region 70 * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region 71 * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map 72 * 0xf000_0000_0000_0000 - +Maxmem : physmem map 73 * - 0xffff_ffff_ffff_ffff : device direct map 74 */ 75 76 #include <sys/cdefs.h> 77 __FBSDID("$FreeBSD$"); 78 79 #include "opt_ddb.h" 80 #include "opt_kstack_pages.h" 81 82 #include <sys/param.h> 83 #include <sys/conf.h> 84 #include <sys/malloc.h> 85 #include <sys/ktr.h> 86 #include <sys/proc.h> 87 #include <sys/user.h> 88 #include <sys/queue.h> 89 #include <sys/systm.h> 90 #include <sys/kernel.h> 91 #include <sys/kerneldump.h> 92 #include <sys/linker.h> 93 #include <sys/msgbuf.h> 94 #include <sys/lock.h> 95 #include <sys/mutex.h> 96 #include <sys/rwlock.h> 97 #include <sys/sched.h> 98 #include <sys/smp.h> 99 #include <sys/vmmeter.h> 100 101 #include <vm/vm.h> 102 #include <vm/vm_page.h> 103 #include <vm/vm_kern.h> 104 #include <vm/vm_pageout.h> 105 #include <vm/vm_extern.h> 106 #include <vm/vm_object.h> 107 #include <vm/vm_param.h> 108 #include <vm/vm_map.h> 109 #include <vm/vm_pager.h> 110 #include <vm/vm_phys.h> 111 #include <vm/vm_pagequeue.h> 112 #include <vm/uma.h> 113 114 #include <machine/_inttypes.h> 115 #include <machine/cpu.h> 116 #include <machine/pcb.h> 117 #include <machine/platform.h> 118 119 #include <machine/tlb.h> 120 #include <machine/spr.h> 121 #include <machine/md_var.h> 122 #include <machine/mmuvar.h> 123 #include <machine/pmap.h> 124 #include <machine/pte.h> 125 126 #include <ddb/ddb.h> 127 128 #include "mmu_if.h" 129 130 #define SPARSE_MAPDEV 131 #ifdef DEBUG 132 #define debugf(fmt, args...) printf(fmt, ##args) 133 #else 134 #define debugf(fmt, args...) 135 #endif 136 137 #ifdef __powerpc64__ 138 #define PRI0ptrX "016lx" 139 #else 140 #define PRI0ptrX "08x" 141 #endif 142 143 #define TODO panic("%s: not implemented", __func__); 144 145 extern unsigned char _etext[]; 146 extern unsigned char _end[]; 147 148 extern uint32_t *bootinfo; 149 150 vm_paddr_t kernload; 151 vm_offset_t kernstart; 152 vm_size_t kernsize; 153 154 /* Message buffer and tables. */ 155 static vm_offset_t data_start; 156 static vm_size_t data_end; 157 158 /* Phys/avail memory regions. */ 159 static struct mem_region *availmem_regions; 160 static int availmem_regions_sz; 161 static struct mem_region *physmem_regions; 162 static int physmem_regions_sz; 163 164 /* Reserved KVA space and mutex for mmu_booke_zero_page. */ 165 static vm_offset_t zero_page_va; 166 static struct mtx zero_page_mutex; 167 168 static struct mtx tlbivax_mutex; 169 170 /* Reserved KVA space and mutex for mmu_booke_copy_page. */ 171 static vm_offset_t copy_page_src_va; 172 static vm_offset_t copy_page_dst_va; 173 static struct mtx copy_page_mutex; 174 175 /**************************************************************************/ 176 /* PMAP */ 177 /**************************************************************************/ 178 179 static int mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t, 180 vm_prot_t, u_int flags, int8_t psind); 181 182 unsigned int kptbl_min; /* Index of the first kernel ptbl. */ 183 unsigned int kernel_ptbls; /* Number of KVA ptbls. */ 184 #ifdef __powerpc64__ 185 unsigned int kernel_pdirs; 186 #endif 187 188 /* 189 * If user pmap is processed with mmu_booke_remove and the resident count 190 * drops to 0, there are no more pages to remove, so we need not continue. 191 */ 192 #define PMAP_REMOVE_DONE(pmap) \ 193 ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) 194 195 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 196 extern int elf32_nxstack; 197 #endif 198 199 /**************************************************************************/ 200 /* TLB and TID handling */ 201 /**************************************************************************/ 202 203 /* Translation ID busy table */ 204 static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1]; 205 206 /* 207 * TLB0 capabilities (entry, way numbers etc.). These can vary between e500 208 * core revisions and should be read from h/w registers during early config. 209 */ 210 uint32_t tlb0_entries; 211 uint32_t tlb0_ways; 212 uint32_t tlb0_entries_per_way; 213 uint32_t tlb1_entries; 214 215 #define TLB0_ENTRIES (tlb0_entries) 216 #define TLB0_WAYS (tlb0_ways) 217 #define TLB0_ENTRIES_PER_WAY (tlb0_entries_per_way) 218 219 #define TLB1_ENTRIES (tlb1_entries) 220 221 static vm_offset_t tlb1_map_base = VM_MAXUSER_ADDRESS + PAGE_SIZE; 222 223 static tlbtid_t tid_alloc(struct pmap *); 224 static void tid_flush(tlbtid_t tid); 225 226 #ifdef DDB 227 #ifdef __powerpc64__ 228 static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t); 229 #else 230 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); 231 #endif 232 #endif 233 234 static void tlb1_read_entry(tlb_entry_t *, unsigned int); 235 static void tlb1_write_entry(tlb_entry_t *, unsigned int); 236 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); 237 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t); 238 239 static vm_size_t tsize2size(unsigned int); 240 static unsigned int size2tsize(vm_size_t); 241 static unsigned int ilog2(unsigned long); 242 243 static void set_mas4_defaults(void); 244 245 static inline void tlb0_flush_entry(vm_offset_t); 246 static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); 247 248 /**************************************************************************/ 249 /* Page table management */ 250 /**************************************************************************/ 251 252 static struct rwlock_padalign pvh_global_lock; 253 254 /* Data for the pv entry allocation mechanism */ 255 static uma_zone_t pvzone; 256 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 257 258 #define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ 259 260 #ifndef PMAP_SHPGPERPROC 261 #define PMAP_SHPGPERPROC 200 262 #endif 263 264 #ifdef __powerpc64__ 265 static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **, 266 unsigned int, boolean_t); 267 static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int); 268 static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int); 269 static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t); 270 #else 271 static void ptbl_init(void); 272 static struct ptbl_buf *ptbl_buf_alloc(void); 273 static void ptbl_buf_free(struct ptbl_buf *); 274 static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); 275 276 static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t); 277 static void ptbl_free(mmu_t, pmap_t, unsigned int); 278 static void ptbl_hold(mmu_t, pmap_t, unsigned int); 279 static int ptbl_unhold(mmu_t, pmap_t, unsigned int); 280 #endif 281 282 static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); 283 static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t); 284 static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t); 285 static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t); 286 static void kernel_pte_alloc(vm_offset_t, vm_offset_t, vm_offset_t); 287 288 static pv_entry_t pv_alloc(void); 289 static void pv_free(pv_entry_t); 290 static void pv_insert(pmap_t, vm_offset_t, vm_page_t); 291 static void pv_remove(pmap_t, vm_offset_t, vm_page_t); 292 293 static void booke_pmap_init_qpages(void); 294 295 struct ptbl_buf { 296 TAILQ_ENTRY(ptbl_buf) link; /* list link */ 297 vm_offset_t kva; /* va of mapping */ 298 }; 299 300 #ifndef __powerpc64__ 301 /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ 302 #define PTBL_BUFS (128 * 16) 303 304 /* ptbl free list and a lock used for access synchronization. */ 305 static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; 306 static struct mtx ptbl_buf_freelist_lock; 307 308 /* Base address of kva space allocated fot ptbl bufs. */ 309 static vm_offset_t ptbl_buf_pool_vabase; 310 311 /* Pointer to ptbl_buf structures. */ 312 static struct ptbl_buf *ptbl_bufs; 313 #endif 314 315 #ifdef SMP 316 extern tlb_entry_t __boot_tlb1[]; 317 void pmap_bootstrap_ap(volatile uint32_t *); 318 #endif 319 320 /* 321 * Kernel MMU interface 322 */ 323 static void mmu_booke_clear_modify(mmu_t, vm_page_t); 324 static void mmu_booke_copy(mmu_t, pmap_t, pmap_t, vm_offset_t, 325 vm_size_t, vm_offset_t); 326 static void mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t); 327 static void mmu_booke_copy_pages(mmu_t, vm_page_t *, 328 vm_offset_t, vm_page_t *, vm_offset_t, int); 329 static int mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, 330 vm_prot_t, u_int flags, int8_t psind); 331 static void mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 332 vm_page_t, vm_prot_t); 333 static void mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, 334 vm_prot_t); 335 static vm_paddr_t mmu_booke_extract(mmu_t, pmap_t, vm_offset_t); 336 static vm_page_t mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t, 337 vm_prot_t); 338 static void mmu_booke_init(mmu_t); 339 static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t); 340 static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 341 static boolean_t mmu_booke_is_referenced(mmu_t, vm_page_t); 342 static int mmu_booke_ts_referenced(mmu_t, vm_page_t); 343 static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, 344 int); 345 static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t, 346 vm_paddr_t *); 347 static void mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t, 348 vm_object_t, vm_pindex_t, vm_size_t); 349 static boolean_t mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t); 350 static void mmu_booke_page_init(mmu_t, vm_page_t); 351 static int mmu_booke_page_wired_mappings(mmu_t, vm_page_t); 352 static void mmu_booke_pinit(mmu_t, pmap_t); 353 static void mmu_booke_pinit0(mmu_t, pmap_t); 354 static void mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 355 vm_prot_t); 356 static void mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 357 static void mmu_booke_qremove(mmu_t, vm_offset_t, int); 358 static void mmu_booke_release(mmu_t, pmap_t); 359 static void mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 360 static void mmu_booke_remove_all(mmu_t, vm_page_t); 361 static void mmu_booke_remove_write(mmu_t, vm_page_t); 362 static void mmu_booke_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 363 static void mmu_booke_zero_page(mmu_t, vm_page_t); 364 static void mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int); 365 static void mmu_booke_activate(mmu_t, struct thread *); 366 static void mmu_booke_deactivate(mmu_t, struct thread *); 367 static void mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t); 368 static void *mmu_booke_mapdev(mmu_t, vm_paddr_t, vm_size_t); 369 static void *mmu_booke_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t); 370 static void mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t); 371 static vm_paddr_t mmu_booke_kextract(mmu_t, vm_offset_t); 372 static void mmu_booke_kenter(mmu_t, vm_offset_t, vm_paddr_t); 373 static void mmu_booke_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t); 374 static void mmu_booke_kremove(mmu_t, vm_offset_t); 375 static boolean_t mmu_booke_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); 376 static void mmu_booke_sync_icache(mmu_t, pmap_t, vm_offset_t, 377 vm_size_t); 378 static void mmu_booke_dumpsys_map(mmu_t, vm_paddr_t pa, size_t, 379 void **); 380 static void mmu_booke_dumpsys_unmap(mmu_t, vm_paddr_t pa, size_t, 381 void *); 382 static void mmu_booke_scan_init(mmu_t); 383 static vm_offset_t mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m); 384 static void mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr); 385 static int mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, 386 vm_size_t sz, vm_memattr_t mode); 387 static int mmu_booke_map_user_ptr(mmu_t mmu, pmap_t pm, 388 volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen); 389 static int mmu_booke_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, 390 int *is_user, vm_offset_t *decoded_addr); 391 392 393 static mmu_method_t mmu_booke_methods[] = { 394 /* pmap dispatcher interface */ 395 MMUMETHOD(mmu_clear_modify, mmu_booke_clear_modify), 396 MMUMETHOD(mmu_copy, mmu_booke_copy), 397 MMUMETHOD(mmu_copy_page, mmu_booke_copy_page), 398 MMUMETHOD(mmu_copy_pages, mmu_booke_copy_pages), 399 MMUMETHOD(mmu_enter, mmu_booke_enter), 400 MMUMETHOD(mmu_enter_object, mmu_booke_enter_object), 401 MMUMETHOD(mmu_enter_quick, mmu_booke_enter_quick), 402 MMUMETHOD(mmu_extract, mmu_booke_extract), 403 MMUMETHOD(mmu_extract_and_hold, mmu_booke_extract_and_hold), 404 MMUMETHOD(mmu_init, mmu_booke_init), 405 MMUMETHOD(mmu_is_modified, mmu_booke_is_modified), 406 MMUMETHOD(mmu_is_prefaultable, mmu_booke_is_prefaultable), 407 MMUMETHOD(mmu_is_referenced, mmu_booke_is_referenced), 408 MMUMETHOD(mmu_ts_referenced, mmu_booke_ts_referenced), 409 MMUMETHOD(mmu_map, mmu_booke_map), 410 MMUMETHOD(mmu_mincore, mmu_booke_mincore), 411 MMUMETHOD(mmu_object_init_pt, mmu_booke_object_init_pt), 412 MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick), 413 MMUMETHOD(mmu_page_init, mmu_booke_page_init), 414 MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings), 415 MMUMETHOD(mmu_pinit, mmu_booke_pinit), 416 MMUMETHOD(mmu_pinit0, mmu_booke_pinit0), 417 MMUMETHOD(mmu_protect, mmu_booke_protect), 418 MMUMETHOD(mmu_qenter, mmu_booke_qenter), 419 MMUMETHOD(mmu_qremove, mmu_booke_qremove), 420 MMUMETHOD(mmu_release, mmu_booke_release), 421 MMUMETHOD(mmu_remove, mmu_booke_remove), 422 MMUMETHOD(mmu_remove_all, mmu_booke_remove_all), 423 MMUMETHOD(mmu_remove_write, mmu_booke_remove_write), 424 MMUMETHOD(mmu_sync_icache, mmu_booke_sync_icache), 425 MMUMETHOD(mmu_unwire, mmu_booke_unwire), 426 MMUMETHOD(mmu_zero_page, mmu_booke_zero_page), 427 MMUMETHOD(mmu_zero_page_area, mmu_booke_zero_page_area), 428 MMUMETHOD(mmu_activate, mmu_booke_activate), 429 MMUMETHOD(mmu_deactivate, mmu_booke_deactivate), 430 MMUMETHOD(mmu_quick_enter_page, mmu_booke_quick_enter_page), 431 MMUMETHOD(mmu_quick_remove_page, mmu_booke_quick_remove_page), 432 433 /* Internal interfaces */ 434 MMUMETHOD(mmu_bootstrap, mmu_booke_bootstrap), 435 MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped), 436 MMUMETHOD(mmu_mapdev, mmu_booke_mapdev), 437 MMUMETHOD(mmu_mapdev_attr, mmu_booke_mapdev_attr), 438 MMUMETHOD(mmu_kenter, mmu_booke_kenter), 439 MMUMETHOD(mmu_kenter_attr, mmu_booke_kenter_attr), 440 MMUMETHOD(mmu_kextract, mmu_booke_kextract), 441 MMUMETHOD(mmu_kremove, mmu_booke_kremove), 442 MMUMETHOD(mmu_unmapdev, mmu_booke_unmapdev), 443 MMUMETHOD(mmu_change_attr, mmu_booke_change_attr), 444 MMUMETHOD(mmu_map_user_ptr, mmu_booke_map_user_ptr), 445 MMUMETHOD(mmu_decode_kernel_ptr, mmu_booke_decode_kernel_ptr), 446 447 /* dumpsys() support */ 448 MMUMETHOD(mmu_dumpsys_map, mmu_booke_dumpsys_map), 449 MMUMETHOD(mmu_dumpsys_unmap, mmu_booke_dumpsys_unmap), 450 MMUMETHOD(mmu_scan_init, mmu_booke_scan_init), 451 452 { 0, 0 } 453 }; 454 455 MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods, 0); 456 457 static __inline uint32_t 458 tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) 459 { 460 uint32_t attrib; 461 int i; 462 463 if (ma != VM_MEMATTR_DEFAULT) { 464 switch (ma) { 465 case VM_MEMATTR_UNCACHEABLE: 466 return (MAS2_I | MAS2_G); 467 case VM_MEMATTR_WRITE_COMBINING: 468 case VM_MEMATTR_WRITE_BACK: 469 case VM_MEMATTR_PREFETCHABLE: 470 return (MAS2_I); 471 case VM_MEMATTR_WRITE_THROUGH: 472 return (MAS2_W | MAS2_M); 473 case VM_MEMATTR_CACHEABLE: 474 return (MAS2_M); 475 } 476 } 477 478 /* 479 * Assume the page is cache inhibited and access is guarded unless 480 * it's in our available memory array. 481 */ 482 attrib = _TLB_ENTRY_IO; 483 for (i = 0; i < physmem_regions_sz; i++) { 484 if ((pa >= physmem_regions[i].mr_start) && 485 (pa < (physmem_regions[i].mr_start + 486 physmem_regions[i].mr_size))) { 487 attrib = _TLB_ENTRY_MEM; 488 break; 489 } 490 } 491 492 return (attrib); 493 } 494 495 static inline void 496 tlb_miss_lock(void) 497 { 498 #ifdef SMP 499 struct pcpu *pc; 500 501 if (!smp_started) 502 return; 503 504 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 505 if (pc != pcpup) { 506 507 CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, " 508 "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke.tlb_lock); 509 510 KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)), 511 ("tlb_miss_lock: tried to lock self")); 512 513 tlb_lock(pc->pc_booke.tlb_lock); 514 515 CTR1(KTR_PMAP, "%s: locked", __func__); 516 } 517 } 518 #endif 519 } 520 521 static inline void 522 tlb_miss_unlock(void) 523 { 524 #ifdef SMP 525 struct pcpu *pc; 526 527 if (!smp_started) 528 return; 529 530 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 531 if (pc != pcpup) { 532 CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d", 533 __func__, pc->pc_cpuid); 534 535 tlb_unlock(pc->pc_booke.tlb_lock); 536 537 CTR1(KTR_PMAP, "%s: unlocked", __func__); 538 } 539 } 540 #endif 541 } 542 543 /* Return number of entries in TLB0. */ 544 static __inline void 545 tlb0_get_tlbconf(void) 546 { 547 uint32_t tlb0_cfg; 548 549 tlb0_cfg = mfspr(SPR_TLB0CFG); 550 tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK; 551 tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; 552 tlb0_entries_per_way = tlb0_entries / tlb0_ways; 553 } 554 555 /* Return number of entries in TLB1. */ 556 static __inline void 557 tlb1_get_tlbconf(void) 558 { 559 uint32_t tlb1_cfg; 560 561 tlb1_cfg = mfspr(SPR_TLB1CFG); 562 tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; 563 } 564 565 /**************************************************************************/ 566 /* Page table related */ 567 /**************************************************************************/ 568 569 #ifdef __powerpc64__ 570 /* Initialize pool of kva ptbl buffers. */ 571 static void 572 ptbl_init(void) 573 { 574 } 575 576 /* Get a pointer to a PTE in a page table. */ 577 static __inline pte_t * 578 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 579 { 580 pte_t **pdir; 581 pte_t *ptbl; 582 583 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 584 585 pdir = pmap->pm_pp2d[PP2D_IDX(va)]; 586 if (!pdir) 587 return NULL; 588 ptbl = pdir[PDIR_IDX(va)]; 589 return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL); 590 } 591 592 /* 593 * allocate a page of pointers to page directories, do not preallocate the 594 * page tables 595 */ 596 static pte_t ** 597 pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep) 598 { 599 vm_page_t m; 600 pte_t **pdir; 601 int req; 602 603 KASSERT((pdir[pp2d_idx] == NULL), 604 ("%s: valid pdir entry exists!", __func__)); 605 606 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 607 while ((m = vm_page_alloc(NULL, pp2d_idx, req)) == NULL) { 608 PMAP_UNLOCK(pmap); 609 if (nosleep) { 610 return (NULL); 611 } 612 vm_wait(NULL); 613 PMAP_LOCK(pmap); 614 } 615 616 /* Zero whole ptbl. */ 617 pdir = (pte_t **)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 618 bzero(pdir, PAGE_SIZE); 619 620 return (pdir); 621 } 622 623 /* Free pdir pages and invalidate pdir entry. */ 624 static void 625 pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx) 626 { 627 pte_t **pdir; 628 vm_paddr_t pa; 629 vm_offset_t va; 630 vm_page_t m; 631 632 pdir = pmap->pm_pp2d[pp2d_idx]; 633 634 KASSERT((pdir != NULL), ("pdir_free: null pdir")); 635 636 pmap->pm_pp2d[pp2d_idx] = NULL; 637 638 va = (vm_offset_t) pdir; 639 pa = DMAP_TO_PHYS(va); 640 m = PHYS_TO_VM_PAGE(pa); 641 vm_page_free_zero(m); 642 } 643 644 /* 645 * Decrement pdir pages hold count and attempt to free pdir pages. Called 646 * when removing directory entry from pdir. 647 * 648 * Return 1 if pdir pages were freed. 649 */ 650 static int 651 pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx) 652 { 653 pte_t **pdir; 654 vm_paddr_t pa; 655 vm_page_t m; 656 657 KASSERT((pmap != kernel_pmap), 658 ("pdir_unhold: unholding kernel pdir!")); 659 660 pdir = pmap->pm_pp2d[pp2d_idx]; 661 662 /* decrement hold count */ 663 pa = DMAP_TO_PHYS((vm_offset_t) pdir); 664 m = PHYS_TO_VM_PAGE(pa); 665 666 /* 667 * Free pdir page if there are no dir entries in this pdir. 668 */ 669 if (vm_page_unwire_noq(m)) { 670 pdir_free(mmu, pmap, pp2d_idx); 671 return (1); 672 } 673 return (0); 674 } 675 676 /* 677 * Increment hold count for pdir pages. This routine is used when new ptlb 678 * entry is being inserted into pdir. 679 */ 680 static void 681 pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir) 682 { 683 vm_page_t m; 684 685 KASSERT((pmap != kernel_pmap), 686 ("pdir_hold: holding kernel pdir!")); 687 688 KASSERT((pdir != NULL), ("pdir_hold: null pdir")); 689 690 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pdir)); 691 vm_page_wire(m); 692 } 693 694 /* Allocate page table. */ 695 static pte_t * 696 ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, 697 boolean_t nosleep) 698 { 699 vm_page_t m; 700 pte_t *ptbl; 701 int req; 702 703 KASSERT((pdir[pdir_idx] == NULL), 704 ("%s: valid ptbl entry exists!", __func__)); 705 706 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 707 while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) { 708 PMAP_UNLOCK(pmap); 709 rw_wunlock(&pvh_global_lock); 710 if (nosleep) { 711 return (NULL); 712 } 713 vm_wait(NULL); 714 rw_wlock(&pvh_global_lock); 715 PMAP_LOCK(pmap); 716 } 717 718 /* Zero whole ptbl. */ 719 ptbl = (pte_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 720 bzero(ptbl, PAGE_SIZE); 721 722 return (ptbl); 723 } 724 725 /* Free ptbl pages and invalidate pdir entry. */ 726 static void 727 ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) 728 { 729 pte_t *ptbl; 730 vm_paddr_t pa; 731 vm_offset_t va; 732 vm_page_t m; 733 734 ptbl = pdir[pdir_idx]; 735 736 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 737 738 pdir[pdir_idx] = NULL; 739 740 va = (vm_offset_t) ptbl; 741 pa = DMAP_TO_PHYS(va); 742 m = PHYS_TO_VM_PAGE(pa); 743 vm_page_free_zero(m); 744 } 745 746 /* 747 * Decrement ptbl pages hold count and attempt to free ptbl pages. Called 748 * when removing pte entry from ptbl. 749 * 750 * Return 1 if ptbl pages were freed. 751 */ 752 static int 753 ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va) 754 { 755 pte_t *ptbl; 756 vm_page_t m; 757 u_int pp2d_idx; 758 pte_t **pdir; 759 u_int pdir_idx; 760 761 pp2d_idx = PP2D_IDX(va); 762 pdir_idx = PDIR_IDX(va); 763 764 KASSERT((pmap != kernel_pmap), 765 ("ptbl_unhold: unholding kernel ptbl!")); 766 767 pdir = pmap->pm_pp2d[pp2d_idx]; 768 ptbl = pdir[pdir_idx]; 769 770 /* decrement hold count */ 771 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); 772 773 /* 774 * Free ptbl pages if there are no pte entries in this ptbl. 775 * wire_count has the same value for all ptbl pages, so check the 776 * last page. 777 */ 778 if (vm_page_unwire_noq(m)) { 779 ptbl_free(mmu, pmap, pdir, pdir_idx); 780 pdir_unhold(mmu, pmap, pp2d_idx); 781 return (1); 782 } 783 return (0); 784 } 785 786 /* 787 * Increment hold count for ptbl pages. This routine is used when new pte 788 * entry is being inserted into ptbl. 789 */ 790 static void 791 ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) 792 { 793 pte_t *ptbl; 794 vm_page_t m; 795 796 KASSERT((pmap != kernel_pmap), 797 ("ptbl_hold: holding kernel ptbl!")); 798 799 ptbl = pdir[pdir_idx]; 800 801 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 802 803 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); 804 vm_page_wire(m); 805 } 806 #else 807 808 /* Initialize pool of kva ptbl buffers. */ 809 static void 810 ptbl_init(void) 811 { 812 int i; 813 814 CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__, 815 (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); 816 CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)", 817 __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); 818 819 mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); 820 TAILQ_INIT(&ptbl_buf_freelist); 821 822 for (i = 0; i < PTBL_BUFS; i++) { 823 ptbl_bufs[i].kva = 824 ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; 825 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); 826 } 827 } 828 829 /* Get a ptbl_buf from the freelist. */ 830 static struct ptbl_buf * 831 ptbl_buf_alloc(void) 832 { 833 struct ptbl_buf *buf; 834 835 mtx_lock(&ptbl_buf_freelist_lock); 836 buf = TAILQ_FIRST(&ptbl_buf_freelist); 837 if (buf != NULL) 838 TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); 839 mtx_unlock(&ptbl_buf_freelist_lock); 840 841 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 842 843 return (buf); 844 } 845 846 /* Return ptbl buff to free pool. */ 847 static void 848 ptbl_buf_free(struct ptbl_buf *buf) 849 { 850 851 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 852 853 mtx_lock(&ptbl_buf_freelist_lock); 854 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); 855 mtx_unlock(&ptbl_buf_freelist_lock); 856 } 857 858 /* 859 * Search the list of allocated ptbl bufs and find on list of allocated ptbls 860 */ 861 static void 862 ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) 863 { 864 struct ptbl_buf *pbuf; 865 866 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 867 868 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 869 870 TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) 871 if (pbuf->kva == (vm_offset_t)ptbl) { 872 /* Remove from pmap ptbl buf list. */ 873 TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); 874 875 /* Free corresponding ptbl buf. */ 876 ptbl_buf_free(pbuf); 877 break; 878 } 879 } 880 881 /* Allocate page table. */ 882 static pte_t * 883 ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep) 884 { 885 vm_page_t mtbl[PTBL_PAGES]; 886 vm_page_t m; 887 struct ptbl_buf *pbuf; 888 unsigned int pidx; 889 pte_t *ptbl; 890 int i, j; 891 892 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 893 (pmap == kernel_pmap), pdir_idx); 894 895 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 896 ("ptbl_alloc: invalid pdir_idx")); 897 KASSERT((pmap->pm_pdir[pdir_idx] == NULL), 898 ("pte_alloc: valid ptbl entry exists!")); 899 900 pbuf = ptbl_buf_alloc(); 901 if (pbuf == NULL) 902 panic("pte_alloc: couldn't alloc kernel virtual memory"); 903 904 ptbl = (pte_t *)pbuf->kva; 905 906 CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl); 907 908 for (i = 0; i < PTBL_PAGES; i++) { 909 pidx = (PTBL_PAGES * pdir_idx) + i; 910 while ((m = vm_page_alloc(NULL, pidx, 911 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 912 PMAP_UNLOCK(pmap); 913 rw_wunlock(&pvh_global_lock); 914 if (nosleep) { 915 ptbl_free_pmap_ptbl(pmap, ptbl); 916 for (j = 0; j < i; j++) 917 vm_page_free(mtbl[j]); 918 vm_wire_sub(i); 919 return (NULL); 920 } 921 vm_wait(NULL); 922 rw_wlock(&pvh_global_lock); 923 PMAP_LOCK(pmap); 924 } 925 mtbl[i] = m; 926 } 927 928 /* Map allocated pages into kernel_pmap. */ 929 mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES); 930 931 /* Zero whole ptbl. */ 932 bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE); 933 934 /* Add pbuf to the pmap ptbl bufs list. */ 935 TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); 936 937 return (ptbl); 938 } 939 940 /* Free ptbl pages and invalidate pdir entry. */ 941 static void 942 ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 943 { 944 pte_t *ptbl; 945 vm_paddr_t pa; 946 vm_offset_t va; 947 vm_page_t m; 948 int i; 949 950 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 951 (pmap == kernel_pmap), pdir_idx); 952 953 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 954 ("ptbl_free: invalid pdir_idx")); 955 956 ptbl = pmap->pm_pdir[pdir_idx]; 957 958 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 959 960 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 961 962 /* 963 * Invalidate the pdir entry as soon as possible, so that other CPUs 964 * don't attempt to look up the page tables we are releasing. 965 */ 966 mtx_lock_spin(&tlbivax_mutex); 967 tlb_miss_lock(); 968 969 pmap->pm_pdir[pdir_idx] = NULL; 970 971 tlb_miss_unlock(); 972 mtx_unlock_spin(&tlbivax_mutex); 973 974 for (i = 0; i < PTBL_PAGES; i++) { 975 va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); 976 pa = pte_vatopa(mmu, kernel_pmap, va); 977 m = PHYS_TO_VM_PAGE(pa); 978 vm_page_free_zero(m); 979 vm_wire_sub(1); 980 mmu_booke_kremove(mmu, va); 981 } 982 983 ptbl_free_pmap_ptbl(pmap, ptbl); 984 } 985 986 /* 987 * Decrement ptbl pages hold count and attempt to free ptbl pages. 988 * Called when removing pte entry from ptbl. 989 * 990 * Return 1 if ptbl pages were freed. 991 */ 992 static int 993 ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 994 { 995 pte_t *ptbl; 996 vm_paddr_t pa; 997 vm_page_t m; 998 int i; 999 1000 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 1001 (pmap == kernel_pmap), pdir_idx); 1002 1003 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1004 ("ptbl_unhold: invalid pdir_idx")); 1005 KASSERT((pmap != kernel_pmap), 1006 ("ptbl_unhold: unholding kernel ptbl!")); 1007 1008 ptbl = pmap->pm_pdir[pdir_idx]; 1009 1010 //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); 1011 KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), 1012 ("ptbl_unhold: non kva ptbl")); 1013 1014 /* decrement hold count */ 1015 for (i = 0; i < PTBL_PAGES; i++) { 1016 pa = pte_vatopa(mmu, kernel_pmap, 1017 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1018 m = PHYS_TO_VM_PAGE(pa); 1019 m->wire_count--; 1020 } 1021 1022 /* 1023 * Free ptbl pages if there are no pte etries in this ptbl. 1024 * wire_count has the same value for all ptbl pages, so check the last 1025 * page. 1026 */ 1027 if (m->wire_count == 0) { 1028 ptbl_free(mmu, pmap, pdir_idx); 1029 1030 //debugf("ptbl_unhold: e (freed ptbl)\n"); 1031 return (1); 1032 } 1033 1034 return (0); 1035 } 1036 1037 /* 1038 * Increment hold count for ptbl pages. This routine is used when a new pte 1039 * entry is being inserted into the ptbl. 1040 */ 1041 static void 1042 ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 1043 { 1044 vm_paddr_t pa; 1045 pte_t *ptbl; 1046 vm_page_t m; 1047 int i; 1048 1049 CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap, 1050 pdir_idx); 1051 1052 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1053 ("ptbl_hold: invalid pdir_idx")); 1054 KASSERT((pmap != kernel_pmap), 1055 ("ptbl_hold: holding kernel ptbl!")); 1056 1057 ptbl = pmap->pm_pdir[pdir_idx]; 1058 1059 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 1060 1061 for (i = 0; i < PTBL_PAGES; i++) { 1062 pa = pte_vatopa(mmu, kernel_pmap, 1063 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1064 m = PHYS_TO_VM_PAGE(pa); 1065 m->wire_count++; 1066 } 1067 } 1068 #endif 1069 1070 /* Allocate pv_entry structure. */ 1071 pv_entry_t 1072 pv_alloc(void) 1073 { 1074 pv_entry_t pv; 1075 1076 pv_entry_count++; 1077 if (pv_entry_count > pv_entry_high_water) 1078 pagedaemon_wakeup(0); /* XXX powerpc NUMA */ 1079 pv = uma_zalloc(pvzone, M_NOWAIT); 1080 1081 return (pv); 1082 } 1083 1084 /* Free pv_entry structure. */ 1085 static __inline void 1086 pv_free(pv_entry_t pve) 1087 { 1088 1089 pv_entry_count--; 1090 uma_zfree(pvzone, pve); 1091 } 1092 1093 1094 /* Allocate and initialize pv_entry structure. */ 1095 static void 1096 pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) 1097 { 1098 pv_entry_t pve; 1099 1100 //int su = (pmap == kernel_pmap); 1101 //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, 1102 // (u_int32_t)pmap, va, (u_int32_t)m); 1103 1104 pve = pv_alloc(); 1105 if (pve == NULL) 1106 panic("pv_insert: no pv entries!"); 1107 1108 pve->pv_pmap = pmap; 1109 pve->pv_va = va; 1110 1111 /* add to pv_list */ 1112 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1113 rw_assert(&pvh_global_lock, RA_WLOCKED); 1114 1115 TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); 1116 1117 //debugf("pv_insert: e\n"); 1118 } 1119 1120 /* Destroy pv entry. */ 1121 static void 1122 pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) 1123 { 1124 pv_entry_t pve; 1125 1126 //int su = (pmap == kernel_pmap); 1127 //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); 1128 1129 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1130 rw_assert(&pvh_global_lock, RA_WLOCKED); 1131 1132 /* find pv entry */ 1133 TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { 1134 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 1135 /* remove from pv_list */ 1136 TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); 1137 if (TAILQ_EMPTY(&m->md.pv_list)) 1138 vm_page_aflag_clear(m, PGA_WRITEABLE); 1139 1140 /* free pv entry struct */ 1141 pv_free(pve); 1142 break; 1143 } 1144 } 1145 1146 //debugf("pv_remove: e\n"); 1147 } 1148 1149 #ifdef __powerpc64__ 1150 /* 1151 * Clean pte entry, try to free page table page if requested. 1152 * 1153 * Return 1 if ptbl pages were freed, otherwise return 0. 1154 */ 1155 static int 1156 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags) 1157 { 1158 vm_page_t m; 1159 pte_t *pte; 1160 1161 pte = pte_find(mmu, pmap, va); 1162 KASSERT(pte != NULL, ("%s: NULL pte", __func__)); 1163 1164 if (!PTE_ISVALID(pte)) 1165 return (0); 1166 1167 /* Get vm_page_t for mapped pte. */ 1168 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1169 1170 if (PTE_ISWIRED(pte)) 1171 pmap->pm_stats.wired_count--; 1172 1173 /* Handle managed entry. */ 1174 if (PTE_ISMANAGED(pte)) { 1175 1176 /* Handle modified pages. */ 1177 if (PTE_ISMODIFIED(pte)) 1178 vm_page_dirty(m); 1179 1180 /* Referenced pages. */ 1181 if (PTE_ISREFERENCED(pte)) 1182 vm_page_aflag_set(m, PGA_REFERENCED); 1183 1184 /* Remove pv_entry from pv_list. */ 1185 pv_remove(pmap, va, m); 1186 } else if (m->md.pv_tracked) { 1187 pv_remove(pmap, va, m); 1188 if (TAILQ_EMPTY(&m->md.pv_list)) 1189 m->md.pv_tracked = false; 1190 } 1191 mtx_lock_spin(&tlbivax_mutex); 1192 tlb_miss_lock(); 1193 1194 tlb0_flush_entry(va); 1195 *pte = 0; 1196 1197 tlb_miss_unlock(); 1198 mtx_unlock_spin(&tlbivax_mutex); 1199 1200 pmap->pm_stats.resident_count--; 1201 1202 if (flags & PTBL_UNHOLD) { 1203 return (ptbl_unhold(mmu, pmap, va)); 1204 } 1205 return (0); 1206 } 1207 1208 /* 1209 * Insert PTE for a given page and virtual address. 1210 */ 1211 static int 1212 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1213 boolean_t nosleep) 1214 { 1215 unsigned int pp2d_idx = PP2D_IDX(va); 1216 unsigned int pdir_idx = PDIR_IDX(va); 1217 unsigned int ptbl_idx = PTBL_IDX(va); 1218 pte_t *ptbl, *pte, pte_tmp; 1219 pte_t **pdir; 1220 1221 /* Get the page directory pointer. */ 1222 pdir = pmap->pm_pp2d[pp2d_idx]; 1223 if (pdir == NULL) 1224 pdir = pdir_alloc(mmu, pmap, pp2d_idx, nosleep); 1225 1226 /* Get the page table pointer. */ 1227 ptbl = pdir[pdir_idx]; 1228 1229 if (ptbl == NULL) { 1230 /* Allocate page table pages. */ 1231 ptbl = ptbl_alloc(mmu, pmap, pdir, pdir_idx, nosleep); 1232 if (ptbl == NULL) { 1233 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1234 return (ENOMEM); 1235 } 1236 pte = &ptbl[ptbl_idx]; 1237 } else { 1238 /* 1239 * Check if there is valid mapping for requested va, if there 1240 * is, remove it. 1241 */ 1242 pte = &ptbl[ptbl_idx]; 1243 if (PTE_ISVALID(pte)) { 1244 pte_remove(mmu, pmap, va, PTBL_HOLD); 1245 } else { 1246 /* 1247 * pte is not used, increment hold count for ptbl 1248 * pages. 1249 */ 1250 if (pmap != kernel_pmap) 1251 ptbl_hold(mmu, pmap, pdir, pdir_idx); 1252 } 1253 } 1254 1255 if (pdir[pdir_idx] == NULL) { 1256 if (pmap != kernel_pmap && pmap->pm_pp2d[pp2d_idx] != NULL) 1257 pdir_hold(mmu, pmap, pdir); 1258 pdir[pdir_idx] = ptbl; 1259 } 1260 if (pmap->pm_pp2d[pp2d_idx] == NULL) 1261 pmap->pm_pp2d[pp2d_idx] = pdir; 1262 1263 /* 1264 * Insert pv_entry into pv_list for mapped page if part of managed 1265 * memory. 1266 */ 1267 if ((m->oflags & VPO_UNMANAGED) == 0) { 1268 flags |= PTE_MANAGED; 1269 1270 /* Create and insert pv entry. */ 1271 pv_insert(pmap, va, m); 1272 } 1273 1274 pmap->pm_stats.resident_count++; 1275 1276 pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1277 pte_tmp |= (PTE_VALID | flags); 1278 1279 mtx_lock_spin(&tlbivax_mutex); 1280 tlb_miss_lock(); 1281 1282 tlb0_flush_entry(va); 1283 *pte = pte_tmp; 1284 1285 tlb_miss_unlock(); 1286 mtx_unlock_spin(&tlbivax_mutex); 1287 1288 return (0); 1289 } 1290 1291 /* Return the pa for the given pmap/va. */ 1292 static vm_paddr_t 1293 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1294 { 1295 vm_paddr_t pa = 0; 1296 pte_t *pte; 1297 1298 pte = pte_find(mmu, pmap, va); 1299 if ((pte != NULL) && PTE_ISVALID(pte)) 1300 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1301 return (pa); 1302 } 1303 1304 1305 /* allocate pte entries to manage (addr & mask) to (addr & mask) + size */ 1306 static void 1307 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1308 { 1309 int i, j; 1310 vm_offset_t va; 1311 pte_t *pte; 1312 1313 va = addr; 1314 /* Initialize kernel pdir */ 1315 for (i = 0; i < kernel_pdirs; i++) { 1316 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)] = 1317 (pte_t **)(pdir + (i * PAGE_SIZE * PDIR_PAGES)); 1318 for (j = PDIR_IDX(va + (i * PAGE_SIZE * PDIR_NENTRIES * PTBL_NENTRIES)); 1319 j < PDIR_NENTRIES; j++) { 1320 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)][j] = 1321 (pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE) + 1322 (((i * PDIR_NENTRIES) + j) * PAGE_SIZE)); 1323 } 1324 } 1325 1326 /* 1327 * Fill in PTEs covering kernel code and data. They are not required 1328 * for address translation, as this area is covered by static TLB1 1329 * entries, but for pte_vatopa() to work correctly with kernel area 1330 * addresses. 1331 */ 1332 for (va = addr; va < data_end; va += PAGE_SIZE) { 1333 pte = &(kernel_pmap->pm_pp2d[PP2D_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]); 1334 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1335 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1336 PTE_VALID | PTE_PS_4KB; 1337 } 1338 } 1339 #else 1340 /* 1341 * Clean pte entry, try to free page table page if requested. 1342 * 1343 * Return 1 if ptbl pages were freed, otherwise return 0. 1344 */ 1345 static int 1346 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, uint8_t flags) 1347 { 1348 unsigned int pdir_idx = PDIR_IDX(va); 1349 unsigned int ptbl_idx = PTBL_IDX(va); 1350 vm_page_t m; 1351 pte_t *ptbl; 1352 pte_t *pte; 1353 1354 //int su = (pmap == kernel_pmap); 1355 //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", 1356 // su, (u_int32_t)pmap, va, flags); 1357 1358 ptbl = pmap->pm_pdir[pdir_idx]; 1359 KASSERT(ptbl, ("pte_remove: null ptbl")); 1360 1361 pte = &ptbl[ptbl_idx]; 1362 1363 if (pte == NULL || !PTE_ISVALID(pte)) 1364 return (0); 1365 1366 if (PTE_ISWIRED(pte)) 1367 pmap->pm_stats.wired_count--; 1368 1369 /* Get vm_page_t for mapped pte. */ 1370 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1371 1372 /* Handle managed entry. */ 1373 if (PTE_ISMANAGED(pte)) { 1374 1375 if (PTE_ISMODIFIED(pte)) 1376 vm_page_dirty(m); 1377 1378 if (PTE_ISREFERENCED(pte)) 1379 vm_page_aflag_set(m, PGA_REFERENCED); 1380 1381 pv_remove(pmap, va, m); 1382 } else if (m->md.pv_tracked) { 1383 /* 1384 * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is 1385 * used. This is needed by the NCSW support code for fast 1386 * VA<->PA translation. 1387 */ 1388 pv_remove(pmap, va, m); 1389 if (TAILQ_EMPTY(&m->md.pv_list)) 1390 m->md.pv_tracked = false; 1391 } 1392 1393 mtx_lock_spin(&tlbivax_mutex); 1394 tlb_miss_lock(); 1395 1396 tlb0_flush_entry(va); 1397 *pte = 0; 1398 1399 tlb_miss_unlock(); 1400 mtx_unlock_spin(&tlbivax_mutex); 1401 1402 pmap->pm_stats.resident_count--; 1403 1404 if (flags & PTBL_UNHOLD) { 1405 //debugf("pte_remove: e (unhold)\n"); 1406 return (ptbl_unhold(mmu, pmap, pdir_idx)); 1407 } 1408 1409 //debugf("pte_remove: e\n"); 1410 return (0); 1411 } 1412 1413 /* 1414 * Insert PTE for a given page and virtual address. 1415 */ 1416 static int 1417 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1418 boolean_t nosleep) 1419 { 1420 unsigned int pdir_idx = PDIR_IDX(va); 1421 unsigned int ptbl_idx = PTBL_IDX(va); 1422 pte_t *ptbl, *pte, pte_tmp; 1423 1424 CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__, 1425 pmap == kernel_pmap, pmap, va); 1426 1427 /* Get the page table pointer. */ 1428 ptbl = pmap->pm_pdir[pdir_idx]; 1429 1430 if (ptbl == NULL) { 1431 /* Allocate page table pages. */ 1432 ptbl = ptbl_alloc(mmu, pmap, pdir_idx, nosleep); 1433 if (ptbl == NULL) { 1434 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1435 return (ENOMEM); 1436 } 1437 pmap->pm_pdir[pdir_idx] = ptbl; 1438 pte = &ptbl[ptbl_idx]; 1439 } else { 1440 /* 1441 * Check if there is valid mapping for requested 1442 * va, if there is, remove it. 1443 */ 1444 pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; 1445 if (PTE_ISVALID(pte)) { 1446 pte_remove(mmu, pmap, va, PTBL_HOLD); 1447 } else { 1448 /* 1449 * pte is not used, increment hold count 1450 * for ptbl pages. 1451 */ 1452 if (pmap != kernel_pmap) 1453 ptbl_hold(mmu, pmap, pdir_idx); 1454 } 1455 } 1456 1457 /* 1458 * Insert pv_entry into pv_list for mapped page if part of managed 1459 * memory. 1460 */ 1461 if ((m->oflags & VPO_UNMANAGED) == 0) { 1462 flags |= PTE_MANAGED; 1463 1464 /* Create and insert pv entry. */ 1465 pv_insert(pmap, va, m); 1466 } 1467 1468 pmap->pm_stats.resident_count++; 1469 1470 pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1471 pte_tmp |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */ 1472 1473 mtx_lock_spin(&tlbivax_mutex); 1474 tlb_miss_lock(); 1475 1476 tlb0_flush_entry(va); 1477 *pte = pte_tmp; 1478 1479 tlb_miss_unlock(); 1480 mtx_unlock_spin(&tlbivax_mutex); 1481 return (0); 1482 } 1483 1484 /* Return the pa for the given pmap/va. */ 1485 static vm_paddr_t 1486 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1487 { 1488 vm_paddr_t pa = 0; 1489 pte_t *pte; 1490 1491 pte = pte_find(mmu, pmap, va); 1492 if ((pte != NULL) && PTE_ISVALID(pte)) 1493 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1494 return (pa); 1495 } 1496 1497 /* Get a pointer to a PTE in a page table. */ 1498 static pte_t * 1499 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1500 { 1501 unsigned int pdir_idx = PDIR_IDX(va); 1502 unsigned int ptbl_idx = PTBL_IDX(va); 1503 1504 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 1505 1506 if (pmap->pm_pdir[pdir_idx]) 1507 return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); 1508 1509 return (NULL); 1510 } 1511 1512 /* Set up kernel page tables. */ 1513 static void 1514 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1515 { 1516 int i; 1517 vm_offset_t va; 1518 pte_t *pte; 1519 1520 /* Initialize kernel pdir */ 1521 for (i = 0; i < kernel_ptbls; i++) 1522 kernel_pmap->pm_pdir[kptbl_min + i] = 1523 (pte_t *)(pdir + (i * PAGE_SIZE * PTBL_PAGES)); 1524 1525 /* 1526 * Fill in PTEs covering kernel code and data. They are not required 1527 * for address translation, as this area is covered by static TLB1 1528 * entries, but for pte_vatopa() to work correctly with kernel area 1529 * addresses. 1530 */ 1531 for (va = addr; va < data_end; va += PAGE_SIZE) { 1532 pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]); 1533 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1534 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1535 PTE_VALID | PTE_PS_4KB; 1536 } 1537 } 1538 #endif 1539 1540 /**************************************************************************/ 1541 /* PMAP related */ 1542 /**************************************************************************/ 1543 1544 /* 1545 * This is called during booke_init, before the system is really initialized. 1546 */ 1547 static void 1548 mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) 1549 { 1550 vm_paddr_t phys_kernelend; 1551 struct mem_region *mp, *mp1; 1552 int cnt, i, j; 1553 vm_paddr_t s, e, sz; 1554 vm_paddr_t physsz, hwphyssz; 1555 u_int phys_avail_count; 1556 vm_size_t kstack0_sz; 1557 vm_offset_t kernel_pdir, kstack0; 1558 vm_paddr_t kstack0_phys; 1559 void *dpcpu; 1560 1561 debugf("mmu_booke_bootstrap: entered\n"); 1562 1563 /* Set interesting system properties */ 1564 #ifdef __powerpc64__ 1565 hw_direct_map = 1; 1566 #else 1567 hw_direct_map = 0; 1568 #endif 1569 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 1570 elf32_nxstack = 1; 1571 #endif 1572 1573 /* Initialize invalidation mutex */ 1574 mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN); 1575 1576 /* Read TLB0 size and associativity. */ 1577 tlb0_get_tlbconf(); 1578 1579 /* 1580 * Align kernel start and end address (kernel image). 1581 * Note that kernel end does not necessarily relate to kernsize. 1582 * kernsize is the size of the kernel that is actually mapped. 1583 */ 1584 kernstart = trunc_page(start); 1585 data_start = round_page(kernelend); 1586 data_end = data_start; 1587 1588 /* Allocate the dynamic per-cpu area. */ 1589 dpcpu = (void *)data_end; 1590 data_end += DPCPU_SIZE; 1591 1592 /* Allocate space for the message buffer. */ 1593 msgbufp = (struct msgbuf *)data_end; 1594 data_end += msgbufsize; 1595 debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1596 (uintptr_t)msgbufp, data_end); 1597 1598 data_end = round_page(data_end); 1599 1600 #ifndef __powerpc64__ 1601 /* Allocate space for ptbl_bufs. */ 1602 ptbl_bufs = (struct ptbl_buf *)data_end; 1603 data_end += sizeof(struct ptbl_buf) * PTBL_BUFS; 1604 debugf(" ptbl_bufs at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1605 (uintptr_t)ptbl_bufs, data_end); 1606 1607 data_end = round_page(data_end); 1608 #endif 1609 1610 /* Allocate PTE tables for kernel KVA. */ 1611 kernel_pdir = data_end; 1612 kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 1613 PDIR_SIZE); 1614 #ifdef __powerpc64__ 1615 kernel_pdirs = howmany(kernel_ptbls, PDIR_NENTRIES); 1616 data_end += kernel_pdirs * PDIR_PAGES * PAGE_SIZE; 1617 #endif 1618 data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE; 1619 debugf(" kernel ptbls: %d\n", kernel_ptbls); 1620 debugf(" kernel pdir at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1621 kernel_pdir, data_end); 1622 1623 debugf(" data_end: 0x%"PRI0ptrX"\n", data_end); 1624 if (data_end - kernstart > kernsize) { 1625 kernsize += tlb1_mapin_region(kernstart + kernsize, 1626 kernload + kernsize, (data_end - kernstart) - kernsize); 1627 } 1628 data_end = kernstart + kernsize; 1629 debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end); 1630 1631 /* 1632 * Clear the structures - note we can only do it safely after the 1633 * possible additional TLB1 translations are in place (above) so that 1634 * all range up to the currently calculated 'data_end' is covered. 1635 */ 1636 dpcpu_init(dpcpu, 0); 1637 #ifdef __powerpc64__ 1638 memset((void *)kernel_pdir, 0, 1639 kernel_pdirs * PDIR_PAGES * PAGE_SIZE + 1640 kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1641 #else 1642 memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE); 1643 memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1644 #endif 1645 1646 /*******************************************************/ 1647 /* Set the start and end of kva. */ 1648 /*******************************************************/ 1649 virtual_avail = round_page(data_end); 1650 virtual_end = VM_MAX_KERNEL_ADDRESS; 1651 1652 /* Allocate KVA space for page zero/copy operations. */ 1653 zero_page_va = virtual_avail; 1654 virtual_avail += PAGE_SIZE; 1655 copy_page_src_va = virtual_avail; 1656 virtual_avail += PAGE_SIZE; 1657 copy_page_dst_va = virtual_avail; 1658 virtual_avail += PAGE_SIZE; 1659 debugf("zero_page_va = 0x%"PRI0ptrX"\n", zero_page_va); 1660 debugf("copy_page_src_va = 0x%"PRI0ptrX"\n", copy_page_src_va); 1661 debugf("copy_page_dst_va = 0x%"PRI0ptrX"\n", copy_page_dst_va); 1662 1663 /* Initialize page zero/copy mutexes. */ 1664 mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); 1665 mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); 1666 1667 #ifndef __powerpc64__ 1668 /* Allocate KVA space for ptbl bufs. */ 1669 ptbl_buf_pool_vabase = virtual_avail; 1670 virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; 1671 debugf("ptbl_buf_pool_vabase = 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1672 ptbl_buf_pool_vabase, virtual_avail); 1673 #endif 1674 1675 /* Calculate corresponding physical addresses for the kernel region. */ 1676 phys_kernelend = kernload + kernsize; 1677 debugf("kernel image and allocated data:\n"); 1678 debugf(" kernload = 0x%09llx\n", (uint64_t)kernload); 1679 debugf(" kernstart = 0x%"PRI0ptrX"\n", kernstart); 1680 debugf(" kernsize = 0x%"PRI0ptrX"\n", kernsize); 1681 1682 /* 1683 * Remove kernel physical address range from avail regions list. Page 1684 * align all regions. Non-page aligned memory isn't very interesting 1685 * to us. Also, sort the entries for ascending addresses. 1686 */ 1687 1688 /* Retrieve phys/avail mem regions */ 1689 mem_regions(&physmem_regions, &physmem_regions_sz, 1690 &availmem_regions, &availmem_regions_sz); 1691 1692 if (nitems(phys_avail) < availmem_regions_sz) 1693 panic("mmu_booke_bootstrap: phys_avail too small"); 1694 1695 sz = 0; 1696 cnt = availmem_regions_sz; 1697 debugf("processing avail regions:\n"); 1698 for (mp = availmem_regions; mp->mr_size; mp++) { 1699 s = mp->mr_start; 1700 e = mp->mr_start + mp->mr_size; 1701 debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e); 1702 /* Check whether this region holds all of the kernel. */ 1703 if (s < kernload && e > phys_kernelend) { 1704 availmem_regions[cnt].mr_start = phys_kernelend; 1705 availmem_regions[cnt++].mr_size = e - phys_kernelend; 1706 e = kernload; 1707 } 1708 /* Look whether this regions starts within the kernel. */ 1709 if (s >= kernload && s < phys_kernelend) { 1710 if (e <= phys_kernelend) 1711 goto empty; 1712 s = phys_kernelend; 1713 } 1714 /* Now look whether this region ends within the kernel. */ 1715 if (e > kernload && e <= phys_kernelend) { 1716 if (s >= kernload) 1717 goto empty; 1718 e = kernload; 1719 } 1720 /* Now page align the start and size of the region. */ 1721 s = round_page(s); 1722 e = trunc_page(e); 1723 if (e < s) 1724 e = s; 1725 sz = e - s; 1726 debugf("%09jx-%09jx = %jx\n", 1727 (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz); 1728 1729 /* Check whether some memory is left here. */ 1730 if (sz == 0) { 1731 empty: 1732 memmove(mp, mp + 1, 1733 (cnt - (mp - availmem_regions)) * sizeof(*mp)); 1734 cnt--; 1735 mp--; 1736 continue; 1737 } 1738 1739 /* Do an insertion sort. */ 1740 for (mp1 = availmem_regions; mp1 < mp; mp1++) 1741 if (s < mp1->mr_start) 1742 break; 1743 if (mp1 < mp) { 1744 memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); 1745 mp1->mr_start = s; 1746 mp1->mr_size = sz; 1747 } else { 1748 mp->mr_start = s; 1749 mp->mr_size = sz; 1750 } 1751 } 1752 availmem_regions_sz = cnt; 1753 1754 /*******************************************************/ 1755 /* Steal physical memory for kernel stack from the end */ 1756 /* of the first avail region */ 1757 /*******************************************************/ 1758 kstack0_sz = kstack_pages * PAGE_SIZE; 1759 kstack0_phys = availmem_regions[0].mr_start + 1760 availmem_regions[0].mr_size; 1761 kstack0_phys -= kstack0_sz; 1762 availmem_regions[0].mr_size -= kstack0_sz; 1763 1764 /*******************************************************/ 1765 /* Fill in phys_avail table, based on availmem_regions */ 1766 /*******************************************************/ 1767 phys_avail_count = 0; 1768 physsz = 0; 1769 hwphyssz = 0; 1770 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 1771 1772 debugf("fill in phys_avail:\n"); 1773 for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { 1774 1775 debugf(" region: 0x%jx - 0x%jx (0x%jx)\n", 1776 (uintmax_t)availmem_regions[i].mr_start, 1777 (uintmax_t)availmem_regions[i].mr_start + 1778 availmem_regions[i].mr_size, 1779 (uintmax_t)availmem_regions[i].mr_size); 1780 1781 if (hwphyssz != 0 && 1782 (physsz + availmem_regions[i].mr_size) >= hwphyssz) { 1783 debugf(" hw.physmem adjust\n"); 1784 if (physsz < hwphyssz) { 1785 phys_avail[j] = availmem_regions[i].mr_start; 1786 phys_avail[j + 1] = 1787 availmem_regions[i].mr_start + 1788 hwphyssz - physsz; 1789 physsz = hwphyssz; 1790 phys_avail_count++; 1791 } 1792 break; 1793 } 1794 1795 phys_avail[j] = availmem_regions[i].mr_start; 1796 phys_avail[j + 1] = availmem_regions[i].mr_start + 1797 availmem_regions[i].mr_size; 1798 phys_avail_count++; 1799 physsz += availmem_regions[i].mr_size; 1800 } 1801 physmem = btoc(physsz); 1802 1803 /* Calculate the last available physical address. */ 1804 for (i = 0; phys_avail[i + 2] != 0; i += 2) 1805 ; 1806 Maxmem = powerpc_btop(phys_avail[i + 1]); 1807 1808 debugf("Maxmem = 0x%08lx\n", Maxmem); 1809 debugf("phys_avail_count = %d\n", phys_avail_count); 1810 debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n", 1811 (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem); 1812 1813 #ifdef __powerpc64__ 1814 /* 1815 * Map the physical memory contiguously in TLB1. 1816 * Round so it fits into a single mapping. 1817 */ 1818 tlb1_mapin_region(DMAP_BASE_ADDRESS, 0, 1819 phys_avail[i + 1]); 1820 #endif 1821 1822 /*******************************************************/ 1823 /* Initialize (statically allocated) kernel pmap. */ 1824 /*******************************************************/ 1825 PMAP_LOCK_INIT(kernel_pmap); 1826 #ifndef __powerpc64__ 1827 kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE; 1828 #endif 1829 1830 debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap); 1831 kernel_pte_alloc(virtual_avail, kernstart, kernel_pdir); 1832 for (i = 0; i < MAXCPU; i++) { 1833 kernel_pmap->pm_tid[i] = TID_KERNEL; 1834 1835 /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */ 1836 tidbusy[i][TID_KERNEL] = kernel_pmap; 1837 } 1838 1839 /* Mark kernel_pmap active on all CPUs */ 1840 CPU_FILL(&kernel_pmap->pm_active); 1841 1842 /* 1843 * Initialize the global pv list lock. 1844 */ 1845 rw_init(&pvh_global_lock, "pmap pv global"); 1846 1847 /*******************************************************/ 1848 /* Final setup */ 1849 /*******************************************************/ 1850 1851 /* Enter kstack0 into kernel map, provide guard page */ 1852 kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 1853 thread0.td_kstack = kstack0; 1854 thread0.td_kstack_pages = kstack_pages; 1855 1856 debugf("kstack_sz = 0x%08x\n", kstack0_sz); 1857 debugf("kstack0_phys at 0x%09llx - 0x%09llx\n", 1858 kstack0_phys, kstack0_phys + kstack0_sz); 1859 debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n", 1860 kstack0, kstack0 + kstack0_sz); 1861 1862 virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz; 1863 for (i = 0; i < kstack_pages; i++) { 1864 mmu_booke_kenter(mmu, kstack0, kstack0_phys); 1865 kstack0 += PAGE_SIZE; 1866 kstack0_phys += PAGE_SIZE; 1867 } 1868 1869 pmap_bootstrapped = 1; 1870 1871 debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail); 1872 debugf("virtual_end = %"PRI0ptrX"\n", virtual_end); 1873 1874 debugf("mmu_booke_bootstrap: exit\n"); 1875 } 1876 1877 #ifdef SMP 1878 void 1879 tlb1_ap_prep(void) 1880 { 1881 tlb_entry_t *e, tmp; 1882 unsigned int i; 1883 1884 /* Prepare TLB1 image for AP processors */ 1885 e = __boot_tlb1; 1886 for (i = 0; i < TLB1_ENTRIES; i++) { 1887 tlb1_read_entry(&tmp, i); 1888 1889 if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED)) 1890 memcpy(e++, &tmp, sizeof(tmp)); 1891 } 1892 } 1893 1894 void 1895 pmap_bootstrap_ap(volatile uint32_t *trcp __unused) 1896 { 1897 int i; 1898 1899 /* 1900 * Finish TLB1 configuration: the BSP already set up its TLB1 and we 1901 * have the snapshot of its contents in the s/w __boot_tlb1[] table 1902 * created by tlb1_ap_prep(), so use these values directly to 1903 * (re)program AP's TLB1 hardware. 1904 * 1905 * Start at index 1 because index 0 has the kernel map. 1906 */ 1907 for (i = 1; i < TLB1_ENTRIES; i++) { 1908 if (__boot_tlb1[i].mas1 & MAS1_VALID) 1909 tlb1_write_entry(&__boot_tlb1[i], i); 1910 } 1911 1912 set_mas4_defaults(); 1913 } 1914 #endif 1915 1916 static void 1917 booke_pmap_init_qpages(void) 1918 { 1919 struct pcpu *pc; 1920 int i; 1921 1922 CPU_FOREACH(i) { 1923 pc = pcpu_find(i); 1924 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); 1925 if (pc->pc_qmap_addr == 0) 1926 panic("pmap_init_qpages: unable to allocate KVA"); 1927 } 1928 } 1929 1930 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL); 1931 1932 /* 1933 * Get the physical page address for the given pmap/virtual address. 1934 */ 1935 static vm_paddr_t 1936 mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1937 { 1938 vm_paddr_t pa; 1939 1940 PMAP_LOCK(pmap); 1941 pa = pte_vatopa(mmu, pmap, va); 1942 PMAP_UNLOCK(pmap); 1943 1944 return (pa); 1945 } 1946 1947 /* 1948 * Extract the physical page address associated with the given 1949 * kernel virtual address. 1950 */ 1951 static vm_paddr_t 1952 mmu_booke_kextract(mmu_t mmu, vm_offset_t va) 1953 { 1954 tlb_entry_t e; 1955 vm_paddr_t p = 0; 1956 int i; 1957 1958 if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS) 1959 p = pte_vatopa(mmu, kernel_pmap, va); 1960 1961 if (p == 0) { 1962 /* Check TLB1 mappings */ 1963 for (i = 0; i < TLB1_ENTRIES; i++) { 1964 tlb1_read_entry(&e, i); 1965 if (!(e.mas1 & MAS1_VALID)) 1966 continue; 1967 if (va >= e.virt && va < e.virt + e.size) 1968 return (e.phys + (va - e.virt)); 1969 } 1970 } 1971 1972 return (p); 1973 } 1974 1975 /* 1976 * Initialize the pmap module. 1977 * Called by vm_init, to initialize any structures that the pmap 1978 * system needs to map virtual memory. 1979 */ 1980 static void 1981 mmu_booke_init(mmu_t mmu) 1982 { 1983 int shpgperproc = PMAP_SHPGPERPROC; 1984 1985 /* 1986 * Initialize the address space (zone) for the pv entries. Set a 1987 * high water mark so that the system can recover from excessive 1988 * numbers of pv entries. 1989 */ 1990 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 1991 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 1992 1993 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 1994 pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; 1995 1996 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 1997 pv_entry_high_water = 9 * (pv_entry_max / 10); 1998 1999 uma_zone_reserve_kva(pvzone, pv_entry_max); 2000 2001 /* Pre-fill pvzone with initial number of pv entries. */ 2002 uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); 2003 2004 /* Initialize ptbl allocation. */ 2005 ptbl_init(); 2006 } 2007 2008 /* 2009 * Map a list of wired pages into kernel virtual address space. This is 2010 * intended for temporary mappings which do not need page modification or 2011 * references recorded. Existing mappings in the region are overwritten. 2012 */ 2013 static void 2014 mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) 2015 { 2016 vm_offset_t va; 2017 2018 va = sva; 2019 while (count-- > 0) { 2020 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 2021 va += PAGE_SIZE; 2022 m++; 2023 } 2024 } 2025 2026 /* 2027 * Remove page mappings from kernel virtual address space. Intended for 2028 * temporary mappings entered by mmu_booke_qenter. 2029 */ 2030 static void 2031 mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count) 2032 { 2033 vm_offset_t va; 2034 2035 va = sva; 2036 while (count-- > 0) { 2037 mmu_booke_kremove(mmu, va); 2038 va += PAGE_SIZE; 2039 } 2040 } 2041 2042 /* 2043 * Map a wired page into kernel virtual address space. 2044 */ 2045 static void 2046 mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) 2047 { 2048 2049 mmu_booke_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 2050 } 2051 2052 static void 2053 mmu_booke_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) 2054 { 2055 uint32_t flags; 2056 pte_t *pte; 2057 2058 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2059 (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va")); 2060 2061 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 2062 flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT; 2063 flags |= PTE_PS_4KB; 2064 2065 pte = pte_find(mmu, kernel_pmap, va); 2066 KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va. NULL PTE")); 2067 2068 mtx_lock_spin(&tlbivax_mutex); 2069 tlb_miss_lock(); 2070 2071 if (PTE_ISVALID(pte)) { 2072 2073 CTR1(KTR_PMAP, "%s: replacing entry!", __func__); 2074 2075 /* Flush entry from TLB0 */ 2076 tlb0_flush_entry(va); 2077 } 2078 2079 *pte = PTE_RPN_FROM_PA(pa) | flags; 2080 2081 //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " 2082 // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", 2083 // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); 2084 2085 /* Flush the real memory from the instruction cache. */ 2086 if ((flags & (PTE_I | PTE_G)) == 0) 2087 __syncicache((void *)va, PAGE_SIZE); 2088 2089 tlb_miss_unlock(); 2090 mtx_unlock_spin(&tlbivax_mutex); 2091 } 2092 2093 /* 2094 * Remove a page from kernel page table. 2095 */ 2096 static void 2097 mmu_booke_kremove(mmu_t mmu, vm_offset_t va) 2098 { 2099 pte_t *pte; 2100 2101 CTR2(KTR_PMAP,"%s: s (va = 0x%"PRI0ptrX")\n", __func__, va); 2102 2103 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2104 (va <= VM_MAX_KERNEL_ADDRESS)), 2105 ("mmu_booke_kremove: invalid va")); 2106 2107 pte = pte_find(mmu, kernel_pmap, va); 2108 2109 if (!PTE_ISVALID(pte)) { 2110 2111 CTR1(KTR_PMAP, "%s: invalid pte", __func__); 2112 2113 return; 2114 } 2115 2116 mtx_lock_spin(&tlbivax_mutex); 2117 tlb_miss_lock(); 2118 2119 /* Invalidate entry in TLB0, update PTE. */ 2120 tlb0_flush_entry(va); 2121 *pte = 0; 2122 2123 tlb_miss_unlock(); 2124 mtx_unlock_spin(&tlbivax_mutex); 2125 } 2126 2127 /* 2128 * Provide a kernel pointer corresponding to a given userland pointer. 2129 * The returned pointer is valid until the next time this function is 2130 * called in this thread. This is used internally in copyin/copyout. 2131 */ 2132 int 2133 mmu_booke_map_user_ptr(mmu_t mmu, pmap_t pm, volatile const void *uaddr, 2134 void **kaddr, size_t ulen, size_t *klen) 2135 { 2136 2137 if ((uintptr_t)uaddr + ulen > VM_MAXUSER_ADDRESS + PAGE_SIZE) 2138 return (EFAULT); 2139 2140 *kaddr = (void *)(uintptr_t)uaddr; 2141 if (klen) 2142 *klen = ulen; 2143 2144 return (0); 2145 } 2146 2147 /* 2148 * Figure out where a given kernel pointer (usually in a fault) points 2149 * to from the VM's perspective, potentially remapping into userland's 2150 * address space. 2151 */ 2152 static int 2153 mmu_booke_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user, 2154 vm_offset_t *decoded_addr) 2155 { 2156 2157 if (addr < VM_MAXUSER_ADDRESS) 2158 *is_user = 1; 2159 else 2160 *is_user = 0; 2161 2162 *decoded_addr = addr; 2163 return (0); 2164 } 2165 2166 /* 2167 * Initialize pmap associated with process 0. 2168 */ 2169 static void 2170 mmu_booke_pinit0(mmu_t mmu, pmap_t pmap) 2171 { 2172 2173 PMAP_LOCK_INIT(pmap); 2174 mmu_booke_pinit(mmu, pmap); 2175 PCPU_SET(curpmap, pmap); 2176 } 2177 2178 /* 2179 * Initialize a preallocated and zeroed pmap structure, 2180 * such as one in a vmspace structure. 2181 */ 2182 static void 2183 mmu_booke_pinit(mmu_t mmu, pmap_t pmap) 2184 { 2185 int i; 2186 2187 CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, 2188 curthread->td_proc->p_pid, curthread->td_proc->p_comm); 2189 2190 KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); 2191 2192 for (i = 0; i < MAXCPU; i++) 2193 pmap->pm_tid[i] = TID_NONE; 2194 CPU_ZERO(&kernel_pmap->pm_active); 2195 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 2196 #ifdef __powerpc64__ 2197 bzero(&pmap->pm_pp2d, sizeof(pte_t **) * PP2D_NENTRIES); 2198 TAILQ_INIT(&pmap->pm_pdir_list); 2199 #else 2200 bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); 2201 #endif 2202 TAILQ_INIT(&pmap->pm_ptbl_list); 2203 } 2204 2205 /* 2206 * Release any resources held by the given physical map. 2207 * Called when a pmap initialized by mmu_booke_pinit is being released. 2208 * Should only be called if the map contains no valid mappings. 2209 */ 2210 static void 2211 mmu_booke_release(mmu_t mmu, pmap_t pmap) 2212 { 2213 2214 KASSERT(pmap->pm_stats.resident_count == 0, 2215 ("pmap_release: pmap resident count %ld != 0", 2216 pmap->pm_stats.resident_count)); 2217 } 2218 2219 /* 2220 * Insert the given physical page at the specified virtual address in the 2221 * target physical map with the protection requested. If specified the page 2222 * will be wired down. 2223 */ 2224 static int 2225 mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2226 vm_prot_t prot, u_int flags, int8_t psind) 2227 { 2228 int error; 2229 2230 rw_wlock(&pvh_global_lock); 2231 PMAP_LOCK(pmap); 2232 error = mmu_booke_enter_locked(mmu, pmap, va, m, prot, flags, psind); 2233 PMAP_UNLOCK(pmap); 2234 rw_wunlock(&pvh_global_lock); 2235 return (error); 2236 } 2237 2238 static int 2239 mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2240 vm_prot_t prot, u_int pmap_flags, int8_t psind __unused) 2241 { 2242 pte_t *pte; 2243 vm_paddr_t pa; 2244 uint32_t flags; 2245 int error, su, sync; 2246 2247 pa = VM_PAGE_TO_PHYS(m); 2248 su = (pmap == kernel_pmap); 2249 sync = 0; 2250 2251 //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " 2252 // "pa=0x%08x prot=0x%08x flags=%#x)\n", 2253 // (u_int32_t)pmap, su, pmap->pm_tid, 2254 // (u_int32_t)m, va, pa, prot, flags); 2255 2256 if (su) { 2257 KASSERT(((va >= virtual_avail) && 2258 (va <= VM_MAX_KERNEL_ADDRESS)), 2259 ("mmu_booke_enter_locked: kernel pmap, non kernel va")); 2260 } else { 2261 KASSERT((va <= VM_MAXUSER_ADDRESS), 2262 ("mmu_booke_enter_locked: user pmap, non user va")); 2263 } 2264 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2265 VM_OBJECT_ASSERT_LOCKED(m->object); 2266 2267 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2268 2269 /* 2270 * If there is an existing mapping, and the physical address has not 2271 * changed, must be protection or wiring change. 2272 */ 2273 if (((pte = pte_find(mmu, pmap, va)) != NULL) && 2274 (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { 2275 2276 /* 2277 * Before actually updating pte->flags we calculate and 2278 * prepare its new value in a helper var. 2279 */ 2280 flags = *pte; 2281 flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); 2282 2283 /* Wiring change, just update stats. */ 2284 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) { 2285 if (!PTE_ISWIRED(pte)) { 2286 flags |= PTE_WIRED; 2287 pmap->pm_stats.wired_count++; 2288 } 2289 } else { 2290 if (PTE_ISWIRED(pte)) { 2291 flags &= ~PTE_WIRED; 2292 pmap->pm_stats.wired_count--; 2293 } 2294 } 2295 2296 if (prot & VM_PROT_WRITE) { 2297 /* Add write permissions. */ 2298 flags |= PTE_SW; 2299 if (!su) 2300 flags |= PTE_UW; 2301 2302 if ((flags & PTE_MANAGED) != 0) 2303 vm_page_aflag_set(m, PGA_WRITEABLE); 2304 } else { 2305 /* Handle modified pages, sense modify status. */ 2306 2307 /* 2308 * The PTE_MODIFIED flag could be set by underlying 2309 * TLB misses since we last read it (above), possibly 2310 * other CPUs could update it so we check in the PTE 2311 * directly rather than rely on that saved local flags 2312 * copy. 2313 */ 2314 if (PTE_ISMODIFIED(pte)) 2315 vm_page_dirty(m); 2316 } 2317 2318 if (prot & VM_PROT_EXECUTE) { 2319 flags |= PTE_SX; 2320 if (!su) 2321 flags |= PTE_UX; 2322 2323 /* 2324 * Check existing flags for execute permissions: if we 2325 * are turning execute permissions on, icache should 2326 * be flushed. 2327 */ 2328 if ((*pte & (PTE_UX | PTE_SX)) == 0) 2329 sync++; 2330 } 2331 2332 flags &= ~PTE_REFERENCED; 2333 2334 /* 2335 * The new flags value is all calculated -- only now actually 2336 * update the PTE. 2337 */ 2338 mtx_lock_spin(&tlbivax_mutex); 2339 tlb_miss_lock(); 2340 2341 tlb0_flush_entry(va); 2342 *pte &= ~PTE_FLAGS_MASK; 2343 *pte |= flags; 2344 2345 tlb_miss_unlock(); 2346 mtx_unlock_spin(&tlbivax_mutex); 2347 2348 } else { 2349 /* 2350 * If there is an existing mapping, but it's for a different 2351 * physical address, pte_enter() will delete the old mapping. 2352 */ 2353 //if ((pte != NULL) && PTE_ISVALID(pte)) 2354 // debugf("mmu_booke_enter_locked: replace\n"); 2355 //else 2356 // debugf("mmu_booke_enter_locked: new\n"); 2357 2358 /* Now set up the flags and install the new mapping. */ 2359 flags = (PTE_SR | PTE_VALID); 2360 flags |= PTE_M; 2361 2362 if (!su) 2363 flags |= PTE_UR; 2364 2365 if (prot & VM_PROT_WRITE) { 2366 flags |= PTE_SW; 2367 if (!su) 2368 flags |= PTE_UW; 2369 2370 if ((m->oflags & VPO_UNMANAGED) == 0) 2371 vm_page_aflag_set(m, PGA_WRITEABLE); 2372 } 2373 2374 if (prot & VM_PROT_EXECUTE) { 2375 flags |= PTE_SX; 2376 if (!su) 2377 flags |= PTE_UX; 2378 } 2379 2380 /* If its wired update stats. */ 2381 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) 2382 flags |= PTE_WIRED; 2383 2384 error = pte_enter(mmu, pmap, m, va, flags, 2385 (pmap_flags & PMAP_ENTER_NOSLEEP) != 0); 2386 if (error != 0) 2387 return (KERN_RESOURCE_SHORTAGE); 2388 2389 if ((flags & PMAP_ENTER_WIRED) != 0) 2390 pmap->pm_stats.wired_count++; 2391 2392 /* Flush the real memory from the instruction cache. */ 2393 if (prot & VM_PROT_EXECUTE) 2394 sync++; 2395 } 2396 2397 if (sync && (su || pmap == PCPU_GET(curpmap))) { 2398 __syncicache((void *)va, PAGE_SIZE); 2399 sync = 0; 2400 } 2401 2402 return (KERN_SUCCESS); 2403 } 2404 2405 /* 2406 * Maps a sequence of resident pages belonging to the same object. 2407 * The sequence begins with the given page m_start. This page is 2408 * mapped at the given virtual address start. Each subsequent page is 2409 * mapped at a virtual address that is offset from start by the same 2410 * amount as the page is offset from m_start within the object. The 2411 * last page in the sequence is the page with the largest offset from 2412 * m_start that can be mapped at a virtual address less than the given 2413 * virtual address end. Not every virtual page between start and end 2414 * is mapped; only those for which a resident page exists with the 2415 * corresponding offset from m_start are mapped. 2416 */ 2417 static void 2418 mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start, 2419 vm_offset_t end, vm_page_t m_start, vm_prot_t prot) 2420 { 2421 vm_page_t m; 2422 vm_pindex_t diff, psize; 2423 2424 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2425 2426 psize = atop(end - start); 2427 m = m_start; 2428 rw_wlock(&pvh_global_lock); 2429 PMAP_LOCK(pmap); 2430 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2431 mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m, 2432 prot & (VM_PROT_READ | VM_PROT_EXECUTE), 2433 PMAP_ENTER_NOSLEEP, 0); 2434 m = TAILQ_NEXT(m, listq); 2435 } 2436 rw_wunlock(&pvh_global_lock); 2437 PMAP_UNLOCK(pmap); 2438 } 2439 2440 static void 2441 mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2442 vm_prot_t prot) 2443 { 2444 2445 rw_wlock(&pvh_global_lock); 2446 PMAP_LOCK(pmap); 2447 mmu_booke_enter_locked(mmu, pmap, va, m, 2448 prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 2449 0); 2450 rw_wunlock(&pvh_global_lock); 2451 PMAP_UNLOCK(pmap); 2452 } 2453 2454 /* 2455 * Remove the given range of addresses from the specified map. 2456 * 2457 * It is assumed that the start and end are properly rounded to the page size. 2458 */ 2459 static void 2460 mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva) 2461 { 2462 pte_t *pte; 2463 uint8_t hold_flag; 2464 2465 int su = (pmap == kernel_pmap); 2466 2467 //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", 2468 // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); 2469 2470 if (su) { 2471 KASSERT(((va >= virtual_avail) && 2472 (va <= VM_MAX_KERNEL_ADDRESS)), 2473 ("mmu_booke_remove: kernel pmap, non kernel va")); 2474 } else { 2475 KASSERT((va <= VM_MAXUSER_ADDRESS), 2476 ("mmu_booke_remove: user pmap, non user va")); 2477 } 2478 2479 if (PMAP_REMOVE_DONE(pmap)) { 2480 //debugf("mmu_booke_remove: e (empty)\n"); 2481 return; 2482 } 2483 2484 hold_flag = PTBL_HOLD_FLAG(pmap); 2485 //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); 2486 2487 rw_wlock(&pvh_global_lock); 2488 PMAP_LOCK(pmap); 2489 for (; va < endva; va += PAGE_SIZE) { 2490 pte = pte_find(mmu, pmap, va); 2491 if ((pte != NULL) && PTE_ISVALID(pte)) 2492 pte_remove(mmu, pmap, va, hold_flag); 2493 } 2494 PMAP_UNLOCK(pmap); 2495 rw_wunlock(&pvh_global_lock); 2496 2497 //debugf("mmu_booke_remove: e\n"); 2498 } 2499 2500 /* 2501 * Remove physical page from all pmaps in which it resides. 2502 */ 2503 static void 2504 mmu_booke_remove_all(mmu_t mmu, vm_page_t m) 2505 { 2506 pv_entry_t pv, pvn; 2507 uint8_t hold_flag; 2508 2509 rw_wlock(&pvh_global_lock); 2510 for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) { 2511 pvn = TAILQ_NEXT(pv, pv_link); 2512 2513 PMAP_LOCK(pv->pv_pmap); 2514 hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); 2515 pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag); 2516 PMAP_UNLOCK(pv->pv_pmap); 2517 } 2518 vm_page_aflag_clear(m, PGA_WRITEABLE); 2519 rw_wunlock(&pvh_global_lock); 2520 } 2521 2522 /* 2523 * Map a range of physical addresses into kernel virtual address space. 2524 */ 2525 static vm_offset_t 2526 mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, 2527 vm_paddr_t pa_end, int prot) 2528 { 2529 vm_offset_t sva = *virt; 2530 vm_offset_t va = sva; 2531 2532 //debugf("mmu_booke_map: s (sva = 0x%08x pa_start = 0x%08x pa_end = 0x%08x)\n", 2533 // sva, pa_start, pa_end); 2534 2535 while (pa_start < pa_end) { 2536 mmu_booke_kenter(mmu, va, pa_start); 2537 va += PAGE_SIZE; 2538 pa_start += PAGE_SIZE; 2539 } 2540 *virt = va; 2541 2542 //debugf("mmu_booke_map: e (va = 0x%08x)\n", va); 2543 return (sva); 2544 } 2545 2546 /* 2547 * The pmap must be activated before it's address space can be accessed in any 2548 * way. 2549 */ 2550 static void 2551 mmu_booke_activate(mmu_t mmu, struct thread *td) 2552 { 2553 pmap_t pmap; 2554 u_int cpuid; 2555 2556 pmap = &td->td_proc->p_vmspace->vm_pmap; 2557 2558 CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX")", 2559 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2560 2561 KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); 2562 2563 sched_pin(); 2564 2565 cpuid = PCPU_GET(cpuid); 2566 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 2567 PCPU_SET(curpmap, pmap); 2568 2569 if (pmap->pm_tid[cpuid] == TID_NONE) 2570 tid_alloc(pmap); 2571 2572 /* Load PID0 register with pmap tid value. */ 2573 mtspr(SPR_PID0, pmap->pm_tid[cpuid]); 2574 __asm __volatile("isync"); 2575 2576 mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0); 2577 2578 sched_unpin(); 2579 2580 CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__, 2581 pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm); 2582 } 2583 2584 /* 2585 * Deactivate the specified process's address space. 2586 */ 2587 static void 2588 mmu_booke_deactivate(mmu_t mmu, struct thread *td) 2589 { 2590 pmap_t pmap; 2591 2592 pmap = &td->td_proc->p_vmspace->vm_pmap; 2593 2594 CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX, 2595 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2596 2597 td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0); 2598 2599 CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active); 2600 PCPU_SET(curpmap, NULL); 2601 } 2602 2603 /* 2604 * Copy the range specified by src_addr/len 2605 * from the source map to the range dst_addr/len 2606 * in the destination map. 2607 * 2608 * This routine is only advisory and need not do anything. 2609 */ 2610 static void 2611 mmu_booke_copy(mmu_t mmu, pmap_t dst_pmap, pmap_t src_pmap, 2612 vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) 2613 { 2614 2615 } 2616 2617 /* 2618 * Set the physical protection on the specified range of this map as requested. 2619 */ 2620 static void 2621 mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva, 2622 vm_prot_t prot) 2623 { 2624 vm_offset_t va; 2625 vm_page_t m; 2626 pte_t *pte; 2627 2628 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2629 mmu_booke_remove(mmu, pmap, sva, eva); 2630 return; 2631 } 2632 2633 if (prot & VM_PROT_WRITE) 2634 return; 2635 2636 PMAP_LOCK(pmap); 2637 for (va = sva; va < eva; va += PAGE_SIZE) { 2638 if ((pte = pte_find(mmu, pmap, va)) != NULL) { 2639 if (PTE_ISVALID(pte)) { 2640 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2641 2642 mtx_lock_spin(&tlbivax_mutex); 2643 tlb_miss_lock(); 2644 2645 /* Handle modified pages. */ 2646 if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) 2647 vm_page_dirty(m); 2648 2649 tlb0_flush_entry(va); 2650 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2651 2652 tlb_miss_unlock(); 2653 mtx_unlock_spin(&tlbivax_mutex); 2654 } 2655 } 2656 } 2657 PMAP_UNLOCK(pmap); 2658 } 2659 2660 /* 2661 * Clear the write and modified bits in each of the given page's mappings. 2662 */ 2663 static void 2664 mmu_booke_remove_write(mmu_t mmu, vm_page_t m) 2665 { 2666 pv_entry_t pv; 2667 pte_t *pte; 2668 2669 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2670 ("mmu_booke_remove_write: page %p is not managed", m)); 2671 2672 /* 2673 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2674 * set by another thread while the object is locked. Thus, 2675 * if PGA_WRITEABLE is clear, no page table entries need updating. 2676 */ 2677 VM_OBJECT_ASSERT_WLOCKED(m->object); 2678 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2679 return; 2680 rw_wlock(&pvh_global_lock); 2681 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2682 PMAP_LOCK(pv->pv_pmap); 2683 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { 2684 if (PTE_ISVALID(pte)) { 2685 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2686 2687 mtx_lock_spin(&tlbivax_mutex); 2688 tlb_miss_lock(); 2689 2690 /* Handle modified pages. */ 2691 if (PTE_ISMODIFIED(pte)) 2692 vm_page_dirty(m); 2693 2694 /* Flush mapping from TLB0. */ 2695 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2696 2697 tlb_miss_unlock(); 2698 mtx_unlock_spin(&tlbivax_mutex); 2699 } 2700 } 2701 PMAP_UNLOCK(pv->pv_pmap); 2702 } 2703 vm_page_aflag_clear(m, PGA_WRITEABLE); 2704 rw_wunlock(&pvh_global_lock); 2705 } 2706 2707 static void 2708 mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2709 { 2710 pte_t *pte; 2711 vm_paddr_t pa = 0; 2712 int sync_sz, valid; 2713 #ifndef __powerpc64__ 2714 pmap_t pmap; 2715 vm_page_t m; 2716 vm_offset_t addr; 2717 int active; 2718 #endif 2719 2720 #ifndef __powerpc64__ 2721 rw_wlock(&pvh_global_lock); 2722 pmap = PCPU_GET(curpmap); 2723 active = (pm == kernel_pmap || pm == pmap) ? 1 : 0; 2724 #endif 2725 while (sz > 0) { 2726 PMAP_LOCK(pm); 2727 pte = pte_find(mmu, pm, va); 2728 valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; 2729 if (valid) 2730 pa = PTE_PA(pte); 2731 PMAP_UNLOCK(pm); 2732 sync_sz = PAGE_SIZE - (va & PAGE_MASK); 2733 sync_sz = min(sync_sz, sz); 2734 if (valid) { 2735 #ifdef __powerpc64__ 2736 pa += (va & PAGE_MASK); 2737 __syncicache((void *)PHYS_TO_DMAP(pa), sync_sz); 2738 #else 2739 if (!active) { 2740 /* Create a mapping in the active pmap. */ 2741 addr = 0; 2742 m = PHYS_TO_VM_PAGE(pa); 2743 PMAP_LOCK(pmap); 2744 pte_enter(mmu, pmap, m, addr, 2745 PTE_SR | PTE_VALID, FALSE); 2746 addr += (va & PAGE_MASK); 2747 __syncicache((void *)addr, sync_sz); 2748 pte_remove(mmu, pmap, addr, PTBL_UNHOLD); 2749 PMAP_UNLOCK(pmap); 2750 } else 2751 __syncicache((void *)va, sync_sz); 2752 #endif 2753 } 2754 va += sync_sz; 2755 sz -= sync_sz; 2756 } 2757 #ifndef __powerpc64__ 2758 rw_wunlock(&pvh_global_lock); 2759 #endif 2760 } 2761 2762 /* 2763 * Atomically extract and hold the physical page with the given 2764 * pmap and virtual address pair if that mapping permits the given 2765 * protection. 2766 */ 2767 static vm_page_t 2768 mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, 2769 vm_prot_t prot) 2770 { 2771 pte_t *pte; 2772 vm_page_t m; 2773 uint32_t pte_wbit; 2774 vm_paddr_t pa; 2775 2776 m = NULL; 2777 pa = 0; 2778 PMAP_LOCK(pmap); 2779 retry: 2780 pte = pte_find(mmu, pmap, va); 2781 if ((pte != NULL) && PTE_ISVALID(pte)) { 2782 if (pmap == kernel_pmap) 2783 pte_wbit = PTE_SW; 2784 else 2785 pte_wbit = PTE_UW; 2786 2787 if ((*pte & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { 2788 if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa)) 2789 goto retry; 2790 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2791 vm_page_wire(m); 2792 } 2793 } 2794 2795 PA_UNLOCK_COND(pa); 2796 PMAP_UNLOCK(pmap); 2797 return (m); 2798 } 2799 2800 /* 2801 * Initialize a vm_page's machine-dependent fields. 2802 */ 2803 static void 2804 mmu_booke_page_init(mmu_t mmu, vm_page_t m) 2805 { 2806 2807 m->md.pv_tracked = 0; 2808 TAILQ_INIT(&m->md.pv_list); 2809 } 2810 2811 /* 2812 * mmu_booke_zero_page_area zeros the specified hardware page by 2813 * mapping it into virtual memory and using bzero to clear 2814 * its contents. 2815 * 2816 * off and size must reside within a single page. 2817 */ 2818 static void 2819 mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 2820 { 2821 vm_offset_t va; 2822 2823 /* XXX KASSERT off and size are within a single page? */ 2824 2825 #ifdef __powerpc64__ 2826 va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2827 bzero((caddr_t)va + off, size); 2828 #else 2829 mtx_lock(&zero_page_mutex); 2830 va = zero_page_va; 2831 2832 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2833 bzero((caddr_t)va + off, size); 2834 mmu_booke_kremove(mmu, va); 2835 2836 mtx_unlock(&zero_page_mutex); 2837 #endif 2838 } 2839 2840 /* 2841 * mmu_booke_zero_page zeros the specified hardware page. 2842 */ 2843 static void 2844 mmu_booke_zero_page(mmu_t mmu, vm_page_t m) 2845 { 2846 vm_offset_t off, va; 2847 2848 #ifdef __powerpc64__ 2849 va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2850 2851 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 2852 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 2853 #else 2854 va = zero_page_va; 2855 mtx_lock(&zero_page_mutex); 2856 2857 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2858 2859 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 2860 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 2861 2862 mmu_booke_kremove(mmu, va); 2863 2864 mtx_unlock(&zero_page_mutex); 2865 #endif 2866 } 2867 2868 /* 2869 * mmu_booke_copy_page copies the specified (machine independent) page by 2870 * mapping the page into virtual memory and using memcopy to copy the page, 2871 * one machine dependent page at a time. 2872 */ 2873 static void 2874 mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) 2875 { 2876 vm_offset_t sva, dva; 2877 2878 #ifdef __powerpc64__ 2879 sva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(sm)); 2880 dva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dm)); 2881 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); 2882 #else 2883 sva = copy_page_src_va; 2884 dva = copy_page_dst_va; 2885 2886 mtx_lock(©_page_mutex); 2887 mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); 2888 mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); 2889 2890 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); 2891 2892 mmu_booke_kremove(mmu, dva); 2893 mmu_booke_kremove(mmu, sva); 2894 mtx_unlock(©_page_mutex); 2895 #endif 2896 } 2897 2898 static inline void 2899 mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 2900 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 2901 { 2902 void *a_cp, *b_cp; 2903 vm_offset_t a_pg_offset, b_pg_offset; 2904 int cnt; 2905 2906 #ifdef __powerpc64__ 2907 vm_page_t pa, pb; 2908 2909 while (xfersize > 0) { 2910 a_pg_offset = a_offset & PAGE_MASK; 2911 pa = ma[a_offset >> PAGE_SHIFT]; 2912 b_pg_offset = b_offset & PAGE_MASK; 2913 pb = mb[b_offset >> PAGE_SHIFT]; 2914 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2915 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2916 a_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pa)) + 2917 a_pg_offset); 2918 b_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pb)) + 2919 b_pg_offset); 2920 bcopy(a_cp, b_cp, cnt); 2921 a_offset += cnt; 2922 b_offset += cnt; 2923 xfersize -= cnt; 2924 } 2925 #else 2926 mtx_lock(©_page_mutex); 2927 while (xfersize > 0) { 2928 a_pg_offset = a_offset & PAGE_MASK; 2929 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2930 mmu_booke_kenter(mmu, copy_page_src_va, 2931 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); 2932 a_cp = (char *)copy_page_src_va + a_pg_offset; 2933 b_pg_offset = b_offset & PAGE_MASK; 2934 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2935 mmu_booke_kenter(mmu, copy_page_dst_va, 2936 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); 2937 b_cp = (char *)copy_page_dst_va + b_pg_offset; 2938 bcopy(a_cp, b_cp, cnt); 2939 mmu_booke_kremove(mmu, copy_page_dst_va); 2940 mmu_booke_kremove(mmu, copy_page_src_va); 2941 a_offset += cnt; 2942 b_offset += cnt; 2943 xfersize -= cnt; 2944 } 2945 mtx_unlock(©_page_mutex); 2946 #endif 2947 } 2948 2949 static vm_offset_t 2950 mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m) 2951 { 2952 #ifdef __powerpc64__ 2953 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 2954 #else 2955 vm_paddr_t paddr; 2956 vm_offset_t qaddr; 2957 uint32_t flags; 2958 pte_t *pte; 2959 2960 paddr = VM_PAGE_TO_PHYS(m); 2961 2962 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 2963 flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; 2964 flags |= PTE_PS_4KB; 2965 2966 critical_enter(); 2967 qaddr = PCPU_GET(qmap_addr); 2968 2969 pte = pte_find(mmu, kernel_pmap, qaddr); 2970 2971 KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy")); 2972 2973 /* 2974 * XXX: tlbivax is broadcast to other cores, but qaddr should 2975 * not be present in other TLBs. Is there a better instruction 2976 * sequence to use? Or just forget it & use mmu_booke_kenter()... 2977 */ 2978 __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK)); 2979 __asm __volatile("isync; msync"); 2980 2981 *pte = PTE_RPN_FROM_PA(paddr) | flags; 2982 2983 /* Flush the real memory from the instruction cache. */ 2984 if ((flags & (PTE_I | PTE_G)) == 0) 2985 __syncicache((void *)qaddr, PAGE_SIZE); 2986 2987 return (qaddr); 2988 #endif 2989 } 2990 2991 static void 2992 mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr) 2993 { 2994 #ifndef __powerpc64__ 2995 pte_t *pte; 2996 2997 pte = pte_find(mmu, kernel_pmap, addr); 2998 2999 KASSERT(PCPU_GET(qmap_addr) == addr, 3000 ("mmu_booke_quick_remove_page: invalid address")); 3001 KASSERT(*pte != 0, 3002 ("mmu_booke_quick_remove_page: PTE not in use")); 3003 3004 *pte = 0; 3005 critical_exit(); 3006 #endif 3007 } 3008 3009 /* 3010 * Return whether or not the specified physical page was modified 3011 * in any of physical maps. 3012 */ 3013 static boolean_t 3014 mmu_booke_is_modified(mmu_t mmu, vm_page_t m) 3015 { 3016 pte_t *pte; 3017 pv_entry_t pv; 3018 boolean_t rv; 3019 3020 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3021 ("mmu_booke_is_modified: page %p is not managed", m)); 3022 rv = FALSE; 3023 3024 /* 3025 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3026 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3027 * is clear, no PTEs can be modified. 3028 */ 3029 VM_OBJECT_ASSERT_WLOCKED(m->object); 3030 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3031 return (rv); 3032 rw_wlock(&pvh_global_lock); 3033 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3034 PMAP_LOCK(pv->pv_pmap); 3035 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3036 PTE_ISVALID(pte)) { 3037 if (PTE_ISMODIFIED(pte)) 3038 rv = TRUE; 3039 } 3040 PMAP_UNLOCK(pv->pv_pmap); 3041 if (rv) 3042 break; 3043 } 3044 rw_wunlock(&pvh_global_lock); 3045 return (rv); 3046 } 3047 3048 /* 3049 * Return whether or not the specified virtual address is eligible 3050 * for prefault. 3051 */ 3052 static boolean_t 3053 mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr) 3054 { 3055 3056 return (FALSE); 3057 } 3058 3059 /* 3060 * Return whether or not the specified physical page was referenced 3061 * in any physical maps. 3062 */ 3063 static boolean_t 3064 mmu_booke_is_referenced(mmu_t mmu, vm_page_t m) 3065 { 3066 pte_t *pte; 3067 pv_entry_t pv; 3068 boolean_t rv; 3069 3070 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3071 ("mmu_booke_is_referenced: page %p is not managed", m)); 3072 rv = FALSE; 3073 rw_wlock(&pvh_global_lock); 3074 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3075 PMAP_LOCK(pv->pv_pmap); 3076 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3077 PTE_ISVALID(pte)) { 3078 if (PTE_ISREFERENCED(pte)) 3079 rv = TRUE; 3080 } 3081 PMAP_UNLOCK(pv->pv_pmap); 3082 if (rv) 3083 break; 3084 } 3085 rw_wunlock(&pvh_global_lock); 3086 return (rv); 3087 } 3088 3089 /* 3090 * Clear the modify bits on the specified physical page. 3091 */ 3092 static void 3093 mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) 3094 { 3095 pte_t *pte; 3096 pv_entry_t pv; 3097 3098 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3099 ("mmu_booke_clear_modify: page %p is not managed", m)); 3100 VM_OBJECT_ASSERT_WLOCKED(m->object); 3101 KASSERT(!vm_page_xbusied(m), 3102 ("mmu_booke_clear_modify: page %p is exclusive busied", m)); 3103 3104 /* 3105 * If the page is not PG_AWRITEABLE, then no PTEs can be modified. 3106 * If the object containing the page is locked and the page is not 3107 * exclusive busied, then PG_AWRITEABLE cannot be concurrently set. 3108 */ 3109 if ((m->aflags & PGA_WRITEABLE) == 0) 3110 return; 3111 rw_wlock(&pvh_global_lock); 3112 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3113 PMAP_LOCK(pv->pv_pmap); 3114 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3115 PTE_ISVALID(pte)) { 3116 mtx_lock_spin(&tlbivax_mutex); 3117 tlb_miss_lock(); 3118 3119 if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) { 3120 tlb0_flush_entry(pv->pv_va); 3121 *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | 3122 PTE_REFERENCED); 3123 } 3124 3125 tlb_miss_unlock(); 3126 mtx_unlock_spin(&tlbivax_mutex); 3127 } 3128 PMAP_UNLOCK(pv->pv_pmap); 3129 } 3130 rw_wunlock(&pvh_global_lock); 3131 } 3132 3133 /* 3134 * Return a count of reference bits for a page, clearing those bits. 3135 * It is not necessary for every reference bit to be cleared, but it 3136 * is necessary that 0 only be returned when there are truly no 3137 * reference bits set. 3138 * 3139 * As an optimization, update the page's dirty field if a modified bit is 3140 * found while counting reference bits. This opportunistic update can be 3141 * performed at low cost and can eliminate the need for some future calls 3142 * to pmap_is_modified(). However, since this function stops after 3143 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 3144 * dirty pages. Those dirty pages will only be detected by a future call 3145 * to pmap_is_modified(). 3146 */ 3147 static int 3148 mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m) 3149 { 3150 pte_t *pte; 3151 pv_entry_t pv; 3152 int count; 3153 3154 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3155 ("mmu_booke_ts_referenced: page %p is not managed", m)); 3156 count = 0; 3157 rw_wlock(&pvh_global_lock); 3158 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3159 PMAP_LOCK(pv->pv_pmap); 3160 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3161 PTE_ISVALID(pte)) { 3162 if (PTE_ISMODIFIED(pte)) 3163 vm_page_dirty(m); 3164 if (PTE_ISREFERENCED(pte)) { 3165 mtx_lock_spin(&tlbivax_mutex); 3166 tlb_miss_lock(); 3167 3168 tlb0_flush_entry(pv->pv_va); 3169 *pte &= ~PTE_REFERENCED; 3170 3171 tlb_miss_unlock(); 3172 mtx_unlock_spin(&tlbivax_mutex); 3173 3174 if (++count >= PMAP_TS_REFERENCED_MAX) { 3175 PMAP_UNLOCK(pv->pv_pmap); 3176 break; 3177 } 3178 } 3179 } 3180 PMAP_UNLOCK(pv->pv_pmap); 3181 } 3182 rw_wunlock(&pvh_global_lock); 3183 return (count); 3184 } 3185 3186 /* 3187 * Clear the wired attribute from the mappings for the specified range of 3188 * addresses in the given pmap. Every valid mapping within that range must 3189 * have the wired attribute set. In contrast, invalid mappings cannot have 3190 * the wired attribute set, so they are ignored. 3191 * 3192 * The wired attribute of the page table entry is not a hardware feature, so 3193 * there is no need to invalidate any TLB entries. 3194 */ 3195 static void 3196 mmu_booke_unwire(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3197 { 3198 vm_offset_t va; 3199 pte_t *pte; 3200 3201 PMAP_LOCK(pmap); 3202 for (va = sva; va < eva; va += PAGE_SIZE) { 3203 if ((pte = pte_find(mmu, pmap, va)) != NULL && 3204 PTE_ISVALID(pte)) { 3205 if (!PTE_ISWIRED(pte)) 3206 panic("mmu_booke_unwire: pte %p isn't wired", 3207 pte); 3208 *pte &= ~PTE_WIRED; 3209 pmap->pm_stats.wired_count--; 3210 } 3211 } 3212 PMAP_UNLOCK(pmap); 3213 3214 } 3215 3216 /* 3217 * Return true if the pmap's pv is one of the first 16 pvs linked to from this 3218 * page. This count may be changed upwards or downwards in the future; it is 3219 * only necessary that true be returned for a small subset of pmaps for proper 3220 * page aging. 3221 */ 3222 static boolean_t 3223 mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 3224 { 3225 pv_entry_t pv; 3226 int loops; 3227 boolean_t rv; 3228 3229 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3230 ("mmu_booke_page_exists_quick: page %p is not managed", m)); 3231 loops = 0; 3232 rv = FALSE; 3233 rw_wlock(&pvh_global_lock); 3234 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3235 if (pv->pv_pmap == pmap) { 3236 rv = TRUE; 3237 break; 3238 } 3239 if (++loops >= 16) 3240 break; 3241 } 3242 rw_wunlock(&pvh_global_lock); 3243 return (rv); 3244 } 3245 3246 /* 3247 * Return the number of managed mappings to the given physical page that are 3248 * wired. 3249 */ 3250 static int 3251 mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m) 3252 { 3253 pv_entry_t pv; 3254 pte_t *pte; 3255 int count = 0; 3256 3257 if ((m->oflags & VPO_UNMANAGED) != 0) 3258 return (count); 3259 rw_wlock(&pvh_global_lock); 3260 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3261 PMAP_LOCK(pv->pv_pmap); 3262 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) 3263 if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) 3264 count++; 3265 PMAP_UNLOCK(pv->pv_pmap); 3266 } 3267 rw_wunlock(&pvh_global_lock); 3268 return (count); 3269 } 3270 3271 static int 3272 mmu_booke_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3273 { 3274 int i; 3275 vm_offset_t va; 3276 3277 /* 3278 * This currently does not work for entries that 3279 * overlap TLB1 entries. 3280 */ 3281 for (i = 0; i < TLB1_ENTRIES; i ++) { 3282 if (tlb1_iomapped(i, pa, size, &va) == 0) 3283 return (0); 3284 } 3285 3286 return (EFAULT); 3287 } 3288 3289 void 3290 mmu_booke_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) 3291 { 3292 vm_paddr_t ppa; 3293 vm_offset_t ofs; 3294 vm_size_t gran; 3295 3296 /* Minidumps are based on virtual memory addresses. */ 3297 if (do_minidump) { 3298 *va = (void *)(vm_offset_t)pa; 3299 return; 3300 } 3301 3302 /* Raw physical memory dumps don't have a virtual address. */ 3303 /* We always map a 256MB page at 256M. */ 3304 gran = 256 * 1024 * 1024; 3305 ppa = rounddown2(pa, gran); 3306 ofs = pa - ppa; 3307 *va = (void *)gran; 3308 tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO); 3309 3310 if (sz > (gran - ofs)) 3311 tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran, 3312 _TLB_ENTRY_IO); 3313 } 3314 3315 void 3316 mmu_booke_dumpsys_unmap(mmu_t mmu, vm_paddr_t pa, size_t sz, void *va) 3317 { 3318 vm_paddr_t ppa; 3319 vm_offset_t ofs; 3320 vm_size_t gran; 3321 tlb_entry_t e; 3322 int i; 3323 3324 /* Minidumps are based on virtual memory addresses. */ 3325 /* Nothing to do... */ 3326 if (do_minidump) 3327 return; 3328 3329 for (i = 0; i < TLB1_ENTRIES; i++) { 3330 tlb1_read_entry(&e, i); 3331 if (!(e.mas1 & MAS1_VALID)) 3332 break; 3333 } 3334 3335 /* Raw physical memory dumps don't have a virtual address. */ 3336 i--; 3337 e.mas1 = 0; 3338 e.mas2 = 0; 3339 e.mas3 = 0; 3340 tlb1_write_entry(&e, i); 3341 3342 gran = 256 * 1024 * 1024; 3343 ppa = rounddown2(pa, gran); 3344 ofs = pa - ppa; 3345 if (sz > (gran - ofs)) { 3346 i--; 3347 e.mas1 = 0; 3348 e.mas2 = 0; 3349 e.mas3 = 0; 3350 tlb1_write_entry(&e, i); 3351 } 3352 } 3353 3354 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; 3355 3356 void 3357 mmu_booke_scan_init(mmu_t mmu) 3358 { 3359 vm_offset_t va; 3360 pte_t *pte; 3361 int i; 3362 3363 if (!do_minidump) { 3364 /* Initialize phys. segments for dumpsys(). */ 3365 memset(&dump_map, 0, sizeof(dump_map)); 3366 mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, 3367 &availmem_regions_sz); 3368 for (i = 0; i < physmem_regions_sz; i++) { 3369 dump_map[i].pa_start = physmem_regions[i].mr_start; 3370 dump_map[i].pa_size = physmem_regions[i].mr_size; 3371 } 3372 return; 3373 } 3374 3375 /* Virtual segments for minidumps: */ 3376 memset(&dump_map, 0, sizeof(dump_map)); 3377 3378 /* 1st: kernel .data and .bss. */ 3379 dump_map[0].pa_start = trunc_page((uintptr_t)_etext); 3380 dump_map[0].pa_size = 3381 round_page((uintptr_t)_end) - dump_map[0].pa_start; 3382 3383 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ 3384 dump_map[1].pa_start = data_start; 3385 dump_map[1].pa_size = data_end - data_start; 3386 3387 /* 3rd: kernel VM. */ 3388 va = dump_map[1].pa_start + dump_map[1].pa_size; 3389 /* Find start of next chunk (from va). */ 3390 while (va < virtual_end) { 3391 /* Don't dump the buffer cache. */ 3392 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { 3393 va = kmi.buffer_eva; 3394 continue; 3395 } 3396 pte = pte_find(mmu, kernel_pmap, va); 3397 if (pte != NULL && PTE_ISVALID(pte)) 3398 break; 3399 va += PAGE_SIZE; 3400 } 3401 if (va < virtual_end) { 3402 dump_map[2].pa_start = va; 3403 va += PAGE_SIZE; 3404 /* Find last page in chunk. */ 3405 while (va < virtual_end) { 3406 /* Don't run into the buffer cache. */ 3407 if (va == kmi.buffer_sva) 3408 break; 3409 pte = pte_find(mmu, kernel_pmap, va); 3410 if (pte == NULL || !PTE_ISVALID(pte)) 3411 break; 3412 va += PAGE_SIZE; 3413 } 3414 dump_map[2].pa_size = va - dump_map[2].pa_start; 3415 } 3416 } 3417 3418 /* 3419 * Map a set of physical memory pages into the kernel virtual address space. 3420 * Return a pointer to where it is mapped. This routine is intended to be used 3421 * for mapping device memory, NOT real memory. 3422 */ 3423 static void * 3424 mmu_booke_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3425 { 3426 3427 return (mmu_booke_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT)); 3428 } 3429 3430 static void * 3431 mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) 3432 { 3433 tlb_entry_t e; 3434 void *res; 3435 uintptr_t va, tmpva; 3436 vm_size_t sz; 3437 int i; 3438 3439 /* 3440 * Check if this is premapped in TLB1. Note: this should probably also 3441 * check whether a sequence of TLB1 entries exist that match the 3442 * requirement, but now only checks the easy case. 3443 */ 3444 for (i = 0; i < TLB1_ENTRIES; i++) { 3445 tlb1_read_entry(&e, i); 3446 if (!(e.mas1 & MAS1_VALID)) 3447 continue; 3448 if (pa >= e.phys && 3449 (pa + size) <= (e.phys + e.size) && 3450 (ma == VM_MEMATTR_DEFAULT || 3451 tlb_calc_wimg(pa, ma) == 3452 (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED)))) 3453 return (void *)(e.virt + 3454 (vm_offset_t)(pa - e.phys)); 3455 } 3456 3457 size = roundup(size, PAGE_SIZE); 3458 3459 /* 3460 * The device mapping area is between VM_MAXUSER_ADDRESS and 3461 * VM_MIN_KERNEL_ADDRESS. This gives 1GB of device addressing. 3462 */ 3463 #ifdef SPARSE_MAPDEV 3464 /* 3465 * With a sparse mapdev, align to the largest starting region. This 3466 * could feasibly be optimized for a 'best-fit' alignment, but that 3467 * calculation could be very costly. 3468 * Align to the smaller of: 3469 * - first set bit in overlap of (pa & size mask) 3470 * - largest size envelope 3471 * 3472 * It's possible the device mapping may start at a PA that's not larger 3473 * than the size mask, so we need to offset in to maximize the TLB entry 3474 * range and minimize the number of used TLB entries. 3475 */ 3476 do { 3477 tmpva = tlb1_map_base; 3478 sz = ffsl(((1 << flsl(size-1)) - 1) & pa); 3479 sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1; 3480 va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa); 3481 #ifdef __powerpc64__ 3482 } while (!atomic_cmpset_long(&tlb1_map_base, tmpva, va + size)); 3483 #else 3484 } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size)); 3485 #endif 3486 #else 3487 #ifdef __powerpc64__ 3488 va = atomic_fetchadd_long(&tlb1_map_base, size); 3489 #else 3490 va = atomic_fetchadd_int(&tlb1_map_base, size); 3491 #endif 3492 #endif 3493 res = (void *)va; 3494 3495 do { 3496 sz = 1 << (ilog2(size) & ~1); 3497 /* Align size to PA */ 3498 if (pa % sz != 0) { 3499 do { 3500 sz >>= 2; 3501 } while (pa % sz != 0); 3502 } 3503 /* Now align from there to VA */ 3504 if (va % sz != 0) { 3505 do { 3506 sz >>= 2; 3507 } while (va % sz != 0); 3508 } 3509 if (bootverbose) 3510 printf("Wiring VA=%lx to PA=%jx (size=%lx)\n", 3511 va, (uintmax_t)pa, sz); 3512 if (tlb1_set_entry(va, pa, sz, 3513 _TLB_ENTRY_SHARED | tlb_calc_wimg(pa, ma)) < 0) 3514 return (NULL); 3515 size -= sz; 3516 pa += sz; 3517 va += sz; 3518 } while (size > 0); 3519 3520 return (res); 3521 } 3522 3523 /* 3524 * 'Unmap' a range mapped by mmu_booke_mapdev(). 3525 */ 3526 static void 3527 mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 3528 { 3529 #ifdef SUPPORTS_SHRINKING_TLB1 3530 vm_offset_t base, offset; 3531 3532 /* 3533 * Unmap only if this is inside kernel virtual space. 3534 */ 3535 if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { 3536 base = trunc_page(va); 3537 offset = va & PAGE_MASK; 3538 size = roundup(offset + size, PAGE_SIZE); 3539 kva_free(base, size); 3540 } 3541 #endif 3542 } 3543 3544 /* 3545 * mmu_booke_object_init_pt preloads the ptes for a given object into the 3546 * specified pmap. This eliminates the blast of soft faults on process startup 3547 * and immediately after an mmap. 3548 */ 3549 static void 3550 mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3551 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 3552 { 3553 3554 VM_OBJECT_ASSERT_WLOCKED(object); 3555 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3556 ("mmu_booke_object_init_pt: non-device object")); 3557 } 3558 3559 /* 3560 * Perform the pmap work for mincore. 3561 */ 3562 static int 3563 mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3564 vm_paddr_t *locked_pa) 3565 { 3566 3567 /* XXX: this should be implemented at some point */ 3568 return (0); 3569 } 3570 3571 static int 3572 mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, vm_size_t sz, 3573 vm_memattr_t mode) 3574 { 3575 vm_offset_t va; 3576 pte_t *pte; 3577 int i, j; 3578 tlb_entry_t e; 3579 3580 /* Check TLB1 mappings */ 3581 for (i = 0; i < TLB1_ENTRIES; i++) { 3582 tlb1_read_entry(&e, i); 3583 if (!(e.mas1 & MAS1_VALID)) 3584 continue; 3585 if (addr >= e.virt && addr < e.virt + e.size) 3586 break; 3587 } 3588 if (i < TLB1_ENTRIES) { 3589 /* Only allow full mappings to be modified for now. */ 3590 /* Validate the range. */ 3591 for (j = i, va = addr; va < addr + sz; va += e.size, j++) { 3592 tlb1_read_entry(&e, j); 3593 if (va != e.virt || (sz - (va - addr) < e.size)) 3594 return (EINVAL); 3595 } 3596 for (va = addr; va < addr + sz; va += e.size, i++) { 3597 tlb1_read_entry(&e, i); 3598 e.mas2 &= ~MAS2_WIMGE_MASK; 3599 e.mas2 |= tlb_calc_wimg(e.phys, mode); 3600 3601 /* 3602 * Write it out to the TLB. Should really re-sync with other 3603 * cores. 3604 */ 3605 tlb1_write_entry(&e, i); 3606 } 3607 return (0); 3608 } 3609 3610 /* Not in TLB1, try through pmap */ 3611 /* First validate the range. */ 3612 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3613 pte = pte_find(mmu, kernel_pmap, va); 3614 if (pte == NULL || !PTE_ISVALID(pte)) 3615 return (EINVAL); 3616 } 3617 3618 mtx_lock_spin(&tlbivax_mutex); 3619 tlb_miss_lock(); 3620 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3621 pte = pte_find(mmu, kernel_pmap, va); 3622 *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT); 3623 *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT; 3624 tlb0_flush_entry(va); 3625 } 3626 tlb_miss_unlock(); 3627 mtx_unlock_spin(&tlbivax_mutex); 3628 3629 return (0); 3630 } 3631 3632 /**************************************************************************/ 3633 /* TID handling */ 3634 /**************************************************************************/ 3635 3636 /* 3637 * Allocate a TID. If necessary, steal one from someone else. 3638 * The new TID is flushed from the TLB before returning. 3639 */ 3640 static tlbtid_t 3641 tid_alloc(pmap_t pmap) 3642 { 3643 tlbtid_t tid; 3644 int thiscpu; 3645 3646 KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); 3647 3648 CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap); 3649 3650 thiscpu = PCPU_GET(cpuid); 3651 3652 tid = PCPU_GET(booke.tid_next); 3653 if (tid > TID_MAX) 3654 tid = TID_MIN; 3655 PCPU_SET(booke.tid_next, tid + 1); 3656 3657 /* If we are stealing TID then clear the relevant pmap's field */ 3658 if (tidbusy[thiscpu][tid] != NULL) { 3659 3660 CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid); 3661 3662 tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE; 3663 3664 /* Flush all entries from TLB0 matching this TID. */ 3665 tid_flush(tid); 3666 } 3667 3668 tidbusy[thiscpu][tid] = pmap; 3669 pmap->pm_tid[thiscpu] = tid; 3670 __asm __volatile("msync; isync"); 3671 3672 CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid, 3673 PCPU_GET(booke.tid_next)); 3674 3675 return (tid); 3676 } 3677 3678 /**************************************************************************/ 3679 /* TLB0 handling */ 3680 /**************************************************************************/ 3681 3682 /* Convert TLB0 va and way number to tlb0[] table index. */ 3683 static inline unsigned int 3684 tlb0_tableidx(vm_offset_t va, unsigned int way) 3685 { 3686 unsigned int idx; 3687 3688 idx = (way * TLB0_ENTRIES_PER_WAY); 3689 idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; 3690 return (idx); 3691 } 3692 3693 /* 3694 * Invalidate TLB0 entry. 3695 */ 3696 static inline void 3697 tlb0_flush_entry(vm_offset_t va) 3698 { 3699 3700 CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va); 3701 3702 mtx_assert(&tlbivax_mutex, MA_OWNED); 3703 3704 __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK)); 3705 __asm __volatile("isync; msync"); 3706 __asm __volatile("tlbsync; msync"); 3707 3708 CTR1(KTR_PMAP, "%s: e", __func__); 3709 } 3710 3711 3712 /**************************************************************************/ 3713 /* TLB1 handling */ 3714 /**************************************************************************/ 3715 3716 /* 3717 * TLB1 mapping notes: 3718 * 3719 * TLB1[0] Kernel text and data. 3720 * TLB1[1-15] Additional kernel text and data mappings (if required), PCI 3721 * windows, other devices mappings. 3722 */ 3723 3724 /* 3725 * Read an entry from given TLB1 slot. 3726 */ 3727 void 3728 tlb1_read_entry(tlb_entry_t *entry, unsigned int slot) 3729 { 3730 register_t msr; 3731 uint32_t mas0; 3732 3733 KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__)); 3734 3735 msr = mfmsr(); 3736 __asm __volatile("wrteei 0"); 3737 3738 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot); 3739 mtspr(SPR_MAS0, mas0); 3740 __asm __volatile("isync; tlbre"); 3741 3742 entry->mas1 = mfspr(SPR_MAS1); 3743 entry->mas2 = mfspr(SPR_MAS2); 3744 entry->mas3 = mfspr(SPR_MAS3); 3745 3746 switch ((mfpvr() >> 16) & 0xFFFF) { 3747 case FSL_E500v2: 3748 case FSL_E500mc: 3749 case FSL_E5500: 3750 case FSL_E6500: 3751 entry->mas7 = mfspr(SPR_MAS7); 3752 break; 3753 default: 3754 entry->mas7 = 0; 3755 break; 3756 } 3757 __asm __volatile("wrtee %0" :: "r"(msr)); 3758 3759 entry->virt = entry->mas2 & MAS2_EPN_MASK; 3760 entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) | 3761 (entry->mas3 & MAS3_RPN); 3762 entry->size = 3763 tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT); 3764 } 3765 3766 struct tlbwrite_args { 3767 tlb_entry_t *e; 3768 unsigned int idx; 3769 }; 3770 3771 static void 3772 tlb1_write_entry_int(void *arg) 3773 { 3774 struct tlbwrite_args *args = arg; 3775 uint32_t mas0; 3776 3777 /* Select entry */ 3778 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(args->idx); 3779 3780 mtspr(SPR_MAS0, mas0); 3781 mtspr(SPR_MAS1, args->e->mas1); 3782 mtspr(SPR_MAS2, args->e->mas2); 3783 mtspr(SPR_MAS3, args->e->mas3); 3784 switch ((mfpvr() >> 16) & 0xFFFF) { 3785 case FSL_E500mc: 3786 case FSL_E5500: 3787 case FSL_E6500: 3788 mtspr(SPR_MAS8, 0); 3789 /* FALLTHROUGH */ 3790 case FSL_E500v2: 3791 mtspr(SPR_MAS7, args->e->mas7); 3792 break; 3793 default: 3794 break; 3795 } 3796 3797 __asm __volatile("isync; tlbwe; isync; msync"); 3798 3799 } 3800 3801 static void 3802 tlb1_write_entry_sync(void *arg) 3803 { 3804 /* Empty synchronization point for smp_rendezvous(). */ 3805 } 3806 3807 /* 3808 * Write given entry to TLB1 hardware. 3809 */ 3810 static void 3811 tlb1_write_entry(tlb_entry_t *e, unsigned int idx) 3812 { 3813 struct tlbwrite_args args; 3814 3815 args.e = e; 3816 args.idx = idx; 3817 3818 #ifdef SMP 3819 if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) { 3820 mb(); 3821 smp_rendezvous(tlb1_write_entry_sync, 3822 tlb1_write_entry_int, 3823 tlb1_write_entry_sync, &args); 3824 } else 3825 #endif 3826 { 3827 register_t msr; 3828 3829 msr = mfmsr(); 3830 __asm __volatile("wrteei 0"); 3831 tlb1_write_entry_int(&args); 3832 __asm __volatile("wrtee %0" :: "r"(msr)); 3833 } 3834 } 3835 3836 /* 3837 * Return the largest uint value log such that 2^log <= num. 3838 */ 3839 static unsigned int 3840 ilog2(unsigned long num) 3841 { 3842 long lz; 3843 3844 #ifdef __powerpc64__ 3845 __asm ("cntlzd %0, %1" : "=r" (lz) : "r" (num)); 3846 return (63 - lz); 3847 #else 3848 __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); 3849 return (31 - lz); 3850 #endif 3851 } 3852 3853 /* 3854 * Convert TLB TSIZE value to mapped region size. 3855 */ 3856 static vm_size_t 3857 tsize2size(unsigned int tsize) 3858 { 3859 3860 /* 3861 * size = 4^tsize KB 3862 * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) 3863 */ 3864 3865 return ((1 << (2 * tsize)) * 1024); 3866 } 3867 3868 /* 3869 * Convert region size (must be power of 4) to TLB TSIZE value. 3870 */ 3871 static unsigned int 3872 size2tsize(vm_size_t size) 3873 { 3874 3875 return (ilog2(size) / 2 - 5); 3876 } 3877 3878 /* 3879 * Register permanent kernel mapping in TLB1. 3880 * 3881 * Entries are created starting from index 0 (current free entry is 3882 * kept in tlb1_idx) and are not supposed to be invalidated. 3883 */ 3884 int 3885 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, 3886 uint32_t flags) 3887 { 3888 tlb_entry_t e; 3889 uint32_t ts, tid; 3890 int tsize, index; 3891 3892 for (index = 0; index < TLB1_ENTRIES; index++) { 3893 tlb1_read_entry(&e, index); 3894 if ((e.mas1 & MAS1_VALID) == 0) 3895 break; 3896 /* Check if we're just updating the flags, and update them. */ 3897 if (e.phys == pa && e.virt == va && e.size == size) { 3898 e.mas2 = (va & MAS2_EPN_MASK) | flags; 3899 tlb1_write_entry(&e, index); 3900 return (0); 3901 } 3902 } 3903 if (index >= TLB1_ENTRIES) { 3904 printf("tlb1_set_entry: TLB1 full!\n"); 3905 return (-1); 3906 } 3907 3908 /* Convert size to TSIZE */ 3909 tsize = size2tsize(size); 3910 3911 tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK; 3912 /* XXX TS is hard coded to 0 for now as we only use single address space */ 3913 ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK; 3914 3915 e.phys = pa; 3916 e.virt = va; 3917 e.size = size; 3918 e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; 3919 e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); 3920 e.mas2 = (va & MAS2_EPN_MASK) | flags; 3921 3922 /* Set supervisor RWX permission bits */ 3923 e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; 3924 e.mas7 = (pa >> 32) & MAS7_RPN; 3925 3926 tlb1_write_entry(&e, index); 3927 3928 /* 3929 * XXX in general TLB1 updates should be propagated between CPUs, 3930 * since current design assumes to have the same TLB1 set-up on all 3931 * cores. 3932 */ 3933 return (0); 3934 } 3935 3936 /* 3937 * Map in contiguous RAM region into the TLB1 using maximum of 3938 * KERNEL_REGION_MAX_TLB_ENTRIES entries. 3939 * 3940 * If necessary round up last entry size and return total size 3941 * used by all allocated entries. 3942 */ 3943 vm_size_t 3944 tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size) 3945 { 3946 vm_size_t pgs[KERNEL_REGION_MAX_TLB_ENTRIES]; 3947 vm_size_t mapped, pgsz, base, mask; 3948 int idx, nents; 3949 3950 /* Round up to the next 1M */ 3951 size = roundup2(size, 1 << 20); 3952 3953 mapped = 0; 3954 idx = 0; 3955 base = va; 3956 pgsz = 64*1024*1024; 3957 while (mapped < size) { 3958 while (mapped < size && idx < KERNEL_REGION_MAX_TLB_ENTRIES) { 3959 while (pgsz > (size - mapped)) 3960 pgsz >>= 2; 3961 pgs[idx++] = pgsz; 3962 mapped += pgsz; 3963 } 3964 3965 /* We under-map. Correct for this. */ 3966 if (mapped < size) { 3967 while (pgs[idx - 1] == pgsz) { 3968 idx--; 3969 mapped -= pgsz; 3970 } 3971 /* XXX We may increase beyond out starting point. */ 3972 pgsz <<= 2; 3973 pgs[idx++] = pgsz; 3974 mapped += pgsz; 3975 } 3976 } 3977 3978 nents = idx; 3979 mask = pgs[0] - 1; 3980 /* Align address to the boundary */ 3981 if (va & mask) { 3982 va = (va + mask) & ~mask; 3983 pa = (pa + mask) & ~mask; 3984 } 3985 3986 for (idx = 0; idx < nents; idx++) { 3987 pgsz = pgs[idx]; 3988 debugf("%u: %llx -> %jx, size=%jx\n", idx, pa, 3989 (uintmax_t)va, (uintmax_t)pgsz); 3990 tlb1_set_entry(va, pa, pgsz, 3991 _TLB_ENTRY_SHARED | _TLB_ENTRY_MEM); 3992 pa += pgsz; 3993 va += pgsz; 3994 } 3995 3996 mapped = (va - base); 3997 if (bootverbose) 3998 printf("mapped size 0x%"PRIxPTR" (wasted space 0x%"PRIxPTR")\n", 3999 mapped, mapped - size); 4000 return (mapped); 4001 } 4002 4003 /* 4004 * TLB1 initialization routine, to be called after the very first 4005 * assembler level setup done in locore.S. 4006 */ 4007 void 4008 tlb1_init() 4009 { 4010 uint32_t mas0, mas1, mas2, mas3, mas7; 4011 uint32_t tsz; 4012 4013 tlb1_get_tlbconf(); 4014 4015 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0); 4016 mtspr(SPR_MAS0, mas0); 4017 __asm __volatile("isync; tlbre"); 4018 4019 mas1 = mfspr(SPR_MAS1); 4020 mas2 = mfspr(SPR_MAS2); 4021 mas3 = mfspr(SPR_MAS3); 4022 mas7 = mfspr(SPR_MAS7); 4023 4024 kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | 4025 (mas3 & MAS3_RPN); 4026 4027 tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4028 kernsize += (tsz > 0) ? tsize2size(tsz) : 0; 4029 4030 /* Setup TLB miss defaults */ 4031 set_mas4_defaults(); 4032 } 4033 4034 /* 4035 * pmap_early_io_unmap() should be used in short conjunction with 4036 * pmap_early_io_map(), as in the following snippet: 4037 * 4038 * x = pmap_early_io_map(...); 4039 * <do something with x> 4040 * pmap_early_io_unmap(x, size); 4041 * 4042 * And avoiding more allocations between. 4043 */ 4044 void 4045 pmap_early_io_unmap(vm_offset_t va, vm_size_t size) 4046 { 4047 int i; 4048 tlb_entry_t e; 4049 vm_size_t isize; 4050 4051 size = roundup(size, PAGE_SIZE); 4052 isize = size; 4053 for (i = 0; i < TLB1_ENTRIES && size > 0; i++) { 4054 tlb1_read_entry(&e, i); 4055 if (!(e.mas1 & MAS1_VALID)) 4056 continue; 4057 if (va <= e.virt && (va + isize) >= (e.virt + e.size)) { 4058 size -= e.size; 4059 e.mas1 &= ~MAS1_VALID; 4060 tlb1_write_entry(&e, i); 4061 } 4062 } 4063 if (tlb1_map_base == va + isize) 4064 tlb1_map_base -= isize; 4065 } 4066 4067 vm_offset_t 4068 pmap_early_io_map(vm_paddr_t pa, vm_size_t size) 4069 { 4070 vm_paddr_t pa_base; 4071 vm_offset_t va, sz; 4072 int i; 4073 tlb_entry_t e; 4074 4075 KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!")); 4076 4077 for (i = 0; i < TLB1_ENTRIES; i++) { 4078 tlb1_read_entry(&e, i); 4079 if (!(e.mas1 & MAS1_VALID)) 4080 continue; 4081 if (pa >= e.phys && (pa + size) <= 4082 (e.phys + e.size)) 4083 return (e.virt + (pa - e.phys)); 4084 } 4085 4086 pa_base = rounddown(pa, PAGE_SIZE); 4087 size = roundup(size + (pa - pa_base), PAGE_SIZE); 4088 tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1)); 4089 va = tlb1_map_base + (pa - pa_base); 4090 4091 do { 4092 sz = 1 << (ilog2(size) & ~1); 4093 tlb1_set_entry(tlb1_map_base, pa_base, sz, 4094 _TLB_ENTRY_SHARED | _TLB_ENTRY_IO); 4095 size -= sz; 4096 pa_base += sz; 4097 tlb1_map_base += sz; 4098 } while (size > 0); 4099 4100 return (va); 4101 } 4102 4103 void 4104 pmap_track_page(pmap_t pmap, vm_offset_t va) 4105 { 4106 vm_paddr_t pa; 4107 vm_page_t page; 4108 struct pv_entry *pve; 4109 4110 va = trunc_page(va); 4111 pa = pmap_kextract(va); 4112 page = PHYS_TO_VM_PAGE(pa); 4113 4114 rw_wlock(&pvh_global_lock); 4115 PMAP_LOCK(pmap); 4116 4117 TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) { 4118 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 4119 goto out; 4120 } 4121 } 4122 page->md.pv_tracked = true; 4123 pv_insert(pmap, va, page); 4124 out: 4125 PMAP_UNLOCK(pmap); 4126 rw_wunlock(&pvh_global_lock); 4127 } 4128 4129 4130 /* 4131 * Setup MAS4 defaults. 4132 * These values are loaded to MAS0-2 on a TLB miss. 4133 */ 4134 static void 4135 set_mas4_defaults(void) 4136 { 4137 uint32_t mas4; 4138 4139 /* Defaults: TLB0, PID0, TSIZED=4K */ 4140 mas4 = MAS4_TLBSELD0; 4141 mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; 4142 #ifdef SMP 4143 mas4 |= MAS4_MD; 4144 #endif 4145 mtspr(SPR_MAS4, mas4); 4146 __asm __volatile("isync"); 4147 } 4148 4149 4150 /* 4151 * Return 0 if the physical IO range is encompassed by one of the 4152 * the TLB1 entries, otherwise return related error code. 4153 */ 4154 static int 4155 tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) 4156 { 4157 uint32_t prot; 4158 vm_paddr_t pa_start; 4159 vm_paddr_t pa_end; 4160 unsigned int entry_tsize; 4161 vm_size_t entry_size; 4162 tlb_entry_t e; 4163 4164 *va = (vm_offset_t)NULL; 4165 4166 tlb1_read_entry(&e, i); 4167 /* Skip invalid entries */ 4168 if (!(e.mas1 & MAS1_VALID)) 4169 return (EINVAL); 4170 4171 /* 4172 * The entry must be cache-inhibited, guarded, and r/w 4173 * so it can function as an i/o page 4174 */ 4175 prot = e.mas2 & (MAS2_I | MAS2_G); 4176 if (prot != (MAS2_I | MAS2_G)) 4177 return (EPERM); 4178 4179 prot = e.mas3 & (MAS3_SR | MAS3_SW); 4180 if (prot != (MAS3_SR | MAS3_SW)) 4181 return (EPERM); 4182 4183 /* The address should be within the entry range. */ 4184 entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4185 KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); 4186 4187 entry_size = tsize2size(entry_tsize); 4188 pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | 4189 (e.mas3 & MAS3_RPN); 4190 pa_end = pa_start + entry_size; 4191 4192 if ((pa < pa_start) || ((pa + size) > pa_end)) 4193 return (ERANGE); 4194 4195 /* Return virtual address of this mapping. */ 4196 *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start); 4197 return (0); 4198 } 4199 4200 /* 4201 * Invalidate all TLB0 entries which match the given TID. Note this is 4202 * dedicated for cases when invalidations should NOT be propagated to other 4203 * CPUs. 4204 */ 4205 static void 4206 tid_flush(tlbtid_t tid) 4207 { 4208 register_t msr; 4209 uint32_t mas0, mas1, mas2; 4210 int entry, way; 4211 4212 4213 /* Don't evict kernel translations */ 4214 if (tid == TID_KERNEL) 4215 return; 4216 4217 msr = mfmsr(); 4218 __asm __volatile("wrteei 0"); 4219 4220 /* 4221 * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use 4222 * it for PID invalidation. 4223 */ 4224 switch ((mfpvr() >> 16) & 0xffff) { 4225 case FSL_E500mc: 4226 case FSL_E5500: 4227 case FSL_E6500: 4228 mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); 4229 /* tlbilxpid */ 4230 __asm __volatile("isync; .long 0x7c000024; isync; msync"); 4231 __asm __volatile("wrtee %0" :: "r"(msr)); 4232 return; 4233 } 4234 4235 for (way = 0; way < TLB0_WAYS; way++) 4236 for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { 4237 4238 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 4239 mtspr(SPR_MAS0, mas0); 4240 4241 mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; 4242 mtspr(SPR_MAS2, mas2); 4243 4244 __asm __volatile("isync; tlbre"); 4245 4246 mas1 = mfspr(SPR_MAS1); 4247 4248 if (!(mas1 & MAS1_VALID)) 4249 continue; 4250 if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid) 4251 continue; 4252 mas1 &= ~MAS1_VALID; 4253 mtspr(SPR_MAS1, mas1); 4254 __asm __volatile("isync; tlbwe; isync; msync"); 4255 } 4256 __asm __volatile("wrtee %0" :: "r"(msr)); 4257 } 4258 4259 #ifdef DDB 4260 /* Print out contents of the MAS registers for each TLB0 entry */ 4261 static void 4262 #ifdef __powerpc64__ 4263 tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3, 4264 #else 4265 tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3, 4266 #endif 4267 uint32_t mas7) 4268 { 4269 int as; 4270 char desc[3]; 4271 tlbtid_t tid; 4272 vm_size_t size; 4273 unsigned int tsize; 4274 4275 desc[2] = '\0'; 4276 if (mas1 & MAS1_VALID) 4277 desc[0] = 'V'; 4278 else 4279 desc[0] = ' '; 4280 4281 if (mas1 & MAS1_IPROT) 4282 desc[1] = 'P'; 4283 else 4284 desc[1] = ' '; 4285 4286 as = (mas1 & MAS1_TS_MASK) ? 1 : 0; 4287 tid = MAS1_GETTID(mas1); 4288 4289 tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4290 size = 0; 4291 if (tsize) 4292 size = tsize2size(tsize); 4293 4294 printf("%3d: (%s) [AS=%d] " 4295 "sz = 0x%08x tsz = %d tid = %d mas1 = 0x%08x " 4296 "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n", 4297 i, desc, as, size, tsize, tid, mas1, mas2, mas3, mas7); 4298 } 4299 4300 DB_SHOW_COMMAND(tlb0, tlb0_print_tlbentries) 4301 { 4302 uint32_t mas0, mas1, mas3, mas7; 4303 #ifdef __powerpc64__ 4304 uint64_t mas2; 4305 #else 4306 uint32_t mas2; 4307 #endif 4308 int entryidx, way, idx; 4309 4310 printf("TLB0 entries:\n"); 4311 for (way = 0; way < TLB0_WAYS; way ++) 4312 for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { 4313 4314 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 4315 mtspr(SPR_MAS0, mas0); 4316 4317 mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; 4318 mtspr(SPR_MAS2, mas2); 4319 4320 __asm __volatile("isync; tlbre"); 4321 4322 mas1 = mfspr(SPR_MAS1); 4323 mas2 = mfspr(SPR_MAS2); 4324 mas3 = mfspr(SPR_MAS3); 4325 mas7 = mfspr(SPR_MAS7); 4326 4327 idx = tlb0_tableidx(mas2, way); 4328 tlb_print_entry(idx, mas1, mas2, mas3, mas7); 4329 } 4330 } 4331 4332 /* 4333 * Print out contents of the MAS registers for each TLB1 entry 4334 */ 4335 DB_SHOW_COMMAND(tlb1, tlb1_print_tlbentries) 4336 { 4337 uint32_t mas0, mas1, mas3, mas7; 4338 #ifdef __powerpc64__ 4339 uint64_t mas2; 4340 #else 4341 uint32_t mas2; 4342 #endif 4343 int i; 4344 4345 printf("TLB1 entries:\n"); 4346 for (i = 0; i < TLB1_ENTRIES; i++) { 4347 4348 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); 4349 mtspr(SPR_MAS0, mas0); 4350 4351 __asm __volatile("isync; tlbre"); 4352 4353 mas1 = mfspr(SPR_MAS1); 4354 mas2 = mfspr(SPR_MAS2); 4355 mas3 = mfspr(SPR_MAS3); 4356 mas7 = mfspr(SPR_MAS7); 4357 4358 tlb_print_entry(i, mas1, mas2, mas3, mas7); 4359 } 4360 } 4361 #endif 4362