1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> 5 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 20 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Some hw specific parts of this pmap were derived or influenced 29 * by NetBSD's ibm4xx pmap module. More generic code is shared with 30 * a few other pmap modules from the FreeBSD tree. 31 */ 32 33 /* 34 * VM layout notes: 35 * 36 * Kernel and user threads run within one common virtual address space 37 * defined by AS=0. 38 * 39 * 32-bit pmap: 40 * Virtual address space layout: 41 * ----------------------------- 42 * 0x0000_0000 - 0x7fff_ffff : user process 43 * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) 44 * 0xc000_0000 - 0xc0ff_ffff : kernel reserved 45 * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. 46 * 0xc100_0000 - 0xffff_ffff : KVA 47 * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy 48 * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs 49 * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 50 * 0xc200_9000 - 0xfeef_ffff : actual free KVA space 51 * 52 * 64-bit pmap: 53 * Virtual address space layout: 54 * ----------------------------- 55 * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process 56 * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries 57 * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region 58 * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack 59 * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved 60 * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data 61 * endkernel - msgbufp-1 : flat device tree 62 * msgbufp - kernel_pdir-1 : message buffer 63 * kernel_pdir - kernel_pp2d-1 : kernel page directory 64 * kernel_pp2d - . : kernel pointers to page directory 65 * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy 66 * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs 67 * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables 68 * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space 69 * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region 70 * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region 71 * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map 72 * 0xf000_0000_0000_0000 - +Maxmem : physmem map 73 * - 0xffff_ffff_ffff_ffff : device direct map 74 */ 75 76 #include <sys/cdefs.h> 77 __FBSDID("$FreeBSD$"); 78 79 #include "opt_ddb.h" 80 #include "opt_kstack_pages.h" 81 82 #include <sys/param.h> 83 #include <sys/conf.h> 84 #include <sys/malloc.h> 85 #include <sys/ktr.h> 86 #include <sys/proc.h> 87 #include <sys/user.h> 88 #include <sys/queue.h> 89 #include <sys/systm.h> 90 #include <sys/kernel.h> 91 #include <sys/kerneldump.h> 92 #include <sys/linker.h> 93 #include <sys/msgbuf.h> 94 #include <sys/lock.h> 95 #include <sys/mutex.h> 96 #include <sys/rwlock.h> 97 #include <sys/sched.h> 98 #include <sys/smp.h> 99 #include <sys/vmmeter.h> 100 101 #include <vm/vm.h> 102 #include <vm/vm_page.h> 103 #include <vm/vm_kern.h> 104 #include <vm/vm_pageout.h> 105 #include <vm/vm_extern.h> 106 #include <vm/vm_object.h> 107 #include <vm/vm_param.h> 108 #include <vm/vm_map.h> 109 #include <vm/vm_pager.h> 110 #include <vm/vm_phys.h> 111 #include <vm/vm_pagequeue.h> 112 #include <vm/uma.h> 113 114 #include <machine/_inttypes.h> 115 #include <machine/cpu.h> 116 #include <machine/pcb.h> 117 #include <machine/platform.h> 118 119 #include <machine/tlb.h> 120 #include <machine/spr.h> 121 #include <machine/md_var.h> 122 #include <machine/mmuvar.h> 123 #include <machine/pmap.h> 124 #include <machine/pte.h> 125 126 #include <ddb/ddb.h> 127 128 #include "mmu_if.h" 129 130 #define SPARSE_MAPDEV 131 #ifdef DEBUG 132 #define debugf(fmt, args...) printf(fmt, ##args) 133 #else 134 #define debugf(fmt, args...) 135 #endif 136 137 #ifdef __powerpc64__ 138 #define PRI0ptrX "016lx" 139 #else 140 #define PRI0ptrX "08x" 141 #endif 142 143 #define TODO panic("%s: not implemented", __func__); 144 145 extern unsigned char _etext[]; 146 extern unsigned char _end[]; 147 148 extern uint32_t *bootinfo; 149 150 vm_paddr_t kernload; 151 vm_offset_t kernstart; 152 vm_size_t kernsize; 153 154 /* Message buffer and tables. */ 155 static vm_offset_t data_start; 156 static vm_size_t data_end; 157 158 /* Phys/avail memory regions. */ 159 static struct mem_region *availmem_regions; 160 static int availmem_regions_sz; 161 static struct mem_region *physmem_regions; 162 static int physmem_regions_sz; 163 164 #ifndef __powerpc64__ 165 /* Reserved KVA space and mutex for mmu_booke_zero_page. */ 166 static vm_offset_t zero_page_va; 167 static struct mtx zero_page_mutex; 168 169 /* Reserved KVA space and mutex for mmu_booke_copy_page. */ 170 static vm_offset_t copy_page_src_va; 171 static vm_offset_t copy_page_dst_va; 172 static struct mtx copy_page_mutex; 173 #endif 174 175 static struct mtx tlbivax_mutex; 176 177 /**************************************************************************/ 178 /* PMAP */ 179 /**************************************************************************/ 180 181 static int mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t, 182 vm_prot_t, u_int flags, int8_t psind); 183 184 unsigned int kptbl_min; /* Index of the first kernel ptbl. */ 185 unsigned int kernel_ptbls; /* Number of KVA ptbls. */ 186 #ifdef __powerpc64__ 187 unsigned int kernel_pdirs; 188 #endif 189 static uma_zone_t ptbl_root_zone; 190 191 /* 192 * If user pmap is processed with mmu_booke_remove and the resident count 193 * drops to 0, there are no more pages to remove, so we need not continue. 194 */ 195 #define PMAP_REMOVE_DONE(pmap) \ 196 ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) 197 198 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 199 extern int elf32_nxstack; 200 #endif 201 202 /**************************************************************************/ 203 /* TLB and TID handling */ 204 /**************************************************************************/ 205 206 /* Translation ID busy table */ 207 static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1]; 208 209 /* 210 * TLB0 capabilities (entry, way numbers etc.). These can vary between e500 211 * core revisions and should be read from h/w registers during early config. 212 */ 213 uint32_t tlb0_entries; 214 uint32_t tlb0_ways; 215 uint32_t tlb0_entries_per_way; 216 uint32_t tlb1_entries; 217 218 #define TLB0_ENTRIES (tlb0_entries) 219 #define TLB0_WAYS (tlb0_ways) 220 #define TLB0_ENTRIES_PER_WAY (tlb0_entries_per_way) 221 222 #define TLB1_ENTRIES (tlb1_entries) 223 224 /* 225 * Base of the pmap_mapdev() region. On 32-bit it immediately follows the 226 * userspace address range. On On 64-bit it's far above, at (1 << 63), and 227 * ranges up to the DMAP, giving 62 bits of PA allowed. This is far larger than 228 * the widest Book-E address bus, the e6500 has a 40-bit PA space. This allows 229 * us to map akin to the DMAP, with addresses identical to the PA, offset by the 230 * base. 231 */ 232 #ifdef __powerpc64__ 233 #define VM_MAPDEV_BASE 0x8000000000000000 234 #define VM_MAPDEV_PA_MAX 0x4000000000000000 /* Don't encroach on DMAP */ 235 #else 236 #define VM_MAPDEV_BASE ((vm_offset_t)VM_MAXUSER_ADDRESS + PAGE_SIZE) 237 #endif 238 239 static vm_offset_t tlb1_map_base = VM_MAPDEV_BASE; 240 241 static tlbtid_t tid_alloc(struct pmap *); 242 static void tid_flush(tlbtid_t tid); 243 244 #ifdef DDB 245 #ifdef __powerpc64__ 246 static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t); 247 #else 248 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); 249 #endif 250 #endif 251 252 static void tlb1_read_entry(tlb_entry_t *, unsigned int); 253 static void tlb1_write_entry(tlb_entry_t *, unsigned int); 254 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); 255 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t, int); 256 257 static vm_size_t tsize2size(unsigned int); 258 static unsigned int size2tsize(vm_size_t); 259 static unsigned long ilog2(unsigned long); 260 261 static void set_mas4_defaults(void); 262 263 static inline void tlb0_flush_entry(vm_offset_t); 264 static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); 265 266 /**************************************************************************/ 267 /* Page table management */ 268 /**************************************************************************/ 269 270 static struct rwlock_padalign pvh_global_lock; 271 272 /* Data for the pv entry allocation mechanism */ 273 static uma_zone_t pvzone; 274 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 275 276 #define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ 277 278 #ifndef PMAP_SHPGPERPROC 279 #define PMAP_SHPGPERPROC 200 280 #endif 281 282 #ifdef __powerpc64__ 283 #define PMAP_ROOT_SIZE (sizeof(pte_t***) * PP2D_NENTRIES) 284 static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **, 285 unsigned int, boolean_t); 286 static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int, vm_page_t); 287 static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int); 288 static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t); 289 #else 290 #define PMAP_ROOT_SIZE (sizeof(pte_t**) * PDIR_NENTRIES) 291 static void ptbl_init(void); 292 static struct ptbl_buf *ptbl_buf_alloc(void); 293 static void ptbl_buf_free(struct ptbl_buf *); 294 static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); 295 296 static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t); 297 static void ptbl_free(mmu_t, pmap_t, unsigned int); 298 static void ptbl_hold(mmu_t, pmap_t, unsigned int); 299 static int ptbl_unhold(mmu_t, pmap_t, unsigned int); 300 #endif 301 302 static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); 303 static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t); 304 static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t); 305 static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t); 306 static void kernel_pte_alloc(vm_offset_t, vm_offset_t, vm_offset_t); 307 308 static pv_entry_t pv_alloc(void); 309 static void pv_free(pv_entry_t); 310 static void pv_insert(pmap_t, vm_offset_t, vm_page_t); 311 static void pv_remove(pmap_t, vm_offset_t, vm_page_t); 312 313 static void booke_pmap_init_qpages(void); 314 315 struct ptbl_buf { 316 TAILQ_ENTRY(ptbl_buf) link; /* list link */ 317 vm_offset_t kva; /* va of mapping */ 318 }; 319 320 #ifndef __powerpc64__ 321 /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ 322 #define PTBL_BUFS (128 * 16) 323 324 /* ptbl free list and a lock used for access synchronization. */ 325 static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; 326 static struct mtx ptbl_buf_freelist_lock; 327 328 /* Base address of kva space allocated fot ptbl bufs. */ 329 static vm_offset_t ptbl_buf_pool_vabase; 330 331 /* Pointer to ptbl_buf structures. */ 332 static struct ptbl_buf *ptbl_bufs; 333 #endif 334 335 #ifdef SMP 336 extern tlb_entry_t __boot_tlb1[]; 337 void pmap_bootstrap_ap(volatile uint32_t *); 338 #endif 339 340 /* 341 * Kernel MMU interface 342 */ 343 static void mmu_booke_clear_modify(mmu_t, vm_page_t); 344 static void mmu_booke_copy(mmu_t, pmap_t, pmap_t, vm_offset_t, 345 vm_size_t, vm_offset_t); 346 static void mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t); 347 static void mmu_booke_copy_pages(mmu_t, vm_page_t *, 348 vm_offset_t, vm_page_t *, vm_offset_t, int); 349 static int mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, 350 vm_prot_t, u_int flags, int8_t psind); 351 static void mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 352 vm_page_t, vm_prot_t); 353 static void mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, 354 vm_prot_t); 355 static vm_paddr_t mmu_booke_extract(mmu_t, pmap_t, vm_offset_t); 356 static vm_page_t mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t, 357 vm_prot_t); 358 static void mmu_booke_init(mmu_t); 359 static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t); 360 static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 361 static boolean_t mmu_booke_is_referenced(mmu_t, vm_page_t); 362 static int mmu_booke_ts_referenced(mmu_t, vm_page_t); 363 static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, 364 int); 365 static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t, 366 vm_paddr_t *); 367 static void mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t, 368 vm_object_t, vm_pindex_t, vm_size_t); 369 static boolean_t mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t); 370 static void mmu_booke_page_init(mmu_t, vm_page_t); 371 static int mmu_booke_page_wired_mappings(mmu_t, vm_page_t); 372 static void mmu_booke_pinit(mmu_t, pmap_t); 373 static void mmu_booke_pinit0(mmu_t, pmap_t); 374 static void mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, 375 vm_prot_t); 376 static void mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 377 static void mmu_booke_qremove(mmu_t, vm_offset_t, int); 378 static void mmu_booke_release(mmu_t, pmap_t); 379 static void mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 380 static void mmu_booke_remove_all(mmu_t, vm_page_t); 381 static void mmu_booke_remove_write(mmu_t, vm_page_t); 382 static void mmu_booke_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 383 static void mmu_booke_zero_page(mmu_t, vm_page_t); 384 static void mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int); 385 static void mmu_booke_activate(mmu_t, struct thread *); 386 static void mmu_booke_deactivate(mmu_t, struct thread *); 387 static void mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t); 388 static void *mmu_booke_mapdev(mmu_t, vm_paddr_t, vm_size_t); 389 static void *mmu_booke_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t); 390 static void mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t); 391 static vm_paddr_t mmu_booke_kextract(mmu_t, vm_offset_t); 392 static void mmu_booke_kenter(mmu_t, vm_offset_t, vm_paddr_t); 393 static void mmu_booke_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t); 394 static void mmu_booke_kremove(mmu_t, vm_offset_t); 395 static boolean_t mmu_booke_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); 396 static void mmu_booke_sync_icache(mmu_t, pmap_t, vm_offset_t, 397 vm_size_t); 398 static void mmu_booke_dumpsys_map(mmu_t, vm_paddr_t pa, size_t, 399 void **); 400 static void mmu_booke_dumpsys_unmap(mmu_t, vm_paddr_t pa, size_t, 401 void *); 402 static void mmu_booke_scan_init(mmu_t); 403 static vm_offset_t mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m); 404 static void mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr); 405 static int mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, 406 vm_size_t sz, vm_memattr_t mode); 407 static int mmu_booke_map_user_ptr(mmu_t mmu, pmap_t pm, 408 volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen); 409 static int mmu_booke_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, 410 int *is_user, vm_offset_t *decoded_addr); 411 static void mmu_booke_page_array_startup(mmu_t , long); 412 413 414 static mmu_method_t mmu_booke_methods[] = { 415 /* pmap dispatcher interface */ 416 MMUMETHOD(mmu_clear_modify, mmu_booke_clear_modify), 417 MMUMETHOD(mmu_copy, mmu_booke_copy), 418 MMUMETHOD(mmu_copy_page, mmu_booke_copy_page), 419 MMUMETHOD(mmu_copy_pages, mmu_booke_copy_pages), 420 MMUMETHOD(mmu_enter, mmu_booke_enter), 421 MMUMETHOD(mmu_enter_object, mmu_booke_enter_object), 422 MMUMETHOD(mmu_enter_quick, mmu_booke_enter_quick), 423 MMUMETHOD(mmu_extract, mmu_booke_extract), 424 MMUMETHOD(mmu_extract_and_hold, mmu_booke_extract_and_hold), 425 MMUMETHOD(mmu_init, mmu_booke_init), 426 MMUMETHOD(mmu_is_modified, mmu_booke_is_modified), 427 MMUMETHOD(mmu_is_prefaultable, mmu_booke_is_prefaultable), 428 MMUMETHOD(mmu_is_referenced, mmu_booke_is_referenced), 429 MMUMETHOD(mmu_ts_referenced, mmu_booke_ts_referenced), 430 MMUMETHOD(mmu_map, mmu_booke_map), 431 MMUMETHOD(mmu_mincore, mmu_booke_mincore), 432 MMUMETHOD(mmu_object_init_pt, mmu_booke_object_init_pt), 433 MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick), 434 MMUMETHOD(mmu_page_init, mmu_booke_page_init), 435 MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings), 436 MMUMETHOD(mmu_pinit, mmu_booke_pinit), 437 MMUMETHOD(mmu_pinit0, mmu_booke_pinit0), 438 MMUMETHOD(mmu_protect, mmu_booke_protect), 439 MMUMETHOD(mmu_qenter, mmu_booke_qenter), 440 MMUMETHOD(mmu_qremove, mmu_booke_qremove), 441 MMUMETHOD(mmu_release, mmu_booke_release), 442 MMUMETHOD(mmu_remove, mmu_booke_remove), 443 MMUMETHOD(mmu_remove_all, mmu_booke_remove_all), 444 MMUMETHOD(mmu_remove_write, mmu_booke_remove_write), 445 MMUMETHOD(mmu_sync_icache, mmu_booke_sync_icache), 446 MMUMETHOD(mmu_unwire, mmu_booke_unwire), 447 MMUMETHOD(mmu_zero_page, mmu_booke_zero_page), 448 MMUMETHOD(mmu_zero_page_area, mmu_booke_zero_page_area), 449 MMUMETHOD(mmu_activate, mmu_booke_activate), 450 MMUMETHOD(mmu_deactivate, mmu_booke_deactivate), 451 MMUMETHOD(mmu_quick_enter_page, mmu_booke_quick_enter_page), 452 MMUMETHOD(mmu_quick_remove_page, mmu_booke_quick_remove_page), 453 MMUMETHOD(mmu_page_array_startup, mmu_booke_page_array_startup), 454 455 /* Internal interfaces */ 456 MMUMETHOD(mmu_bootstrap, mmu_booke_bootstrap), 457 MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped), 458 MMUMETHOD(mmu_mapdev, mmu_booke_mapdev), 459 MMUMETHOD(mmu_mapdev_attr, mmu_booke_mapdev_attr), 460 MMUMETHOD(mmu_kenter, mmu_booke_kenter), 461 MMUMETHOD(mmu_kenter_attr, mmu_booke_kenter_attr), 462 MMUMETHOD(mmu_kextract, mmu_booke_kextract), 463 MMUMETHOD(mmu_kremove, mmu_booke_kremove), 464 MMUMETHOD(mmu_unmapdev, mmu_booke_unmapdev), 465 MMUMETHOD(mmu_change_attr, mmu_booke_change_attr), 466 MMUMETHOD(mmu_map_user_ptr, mmu_booke_map_user_ptr), 467 MMUMETHOD(mmu_decode_kernel_ptr, mmu_booke_decode_kernel_ptr), 468 469 /* dumpsys() support */ 470 MMUMETHOD(mmu_dumpsys_map, mmu_booke_dumpsys_map), 471 MMUMETHOD(mmu_dumpsys_unmap, mmu_booke_dumpsys_unmap), 472 MMUMETHOD(mmu_scan_init, mmu_booke_scan_init), 473 474 { 0, 0 } 475 }; 476 477 MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods, 0); 478 479 static __inline uint32_t 480 tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) 481 { 482 uint32_t attrib; 483 int i; 484 485 if (ma != VM_MEMATTR_DEFAULT) { 486 switch (ma) { 487 case VM_MEMATTR_UNCACHEABLE: 488 return (MAS2_I | MAS2_G); 489 case VM_MEMATTR_WRITE_COMBINING: 490 case VM_MEMATTR_WRITE_BACK: 491 case VM_MEMATTR_PREFETCHABLE: 492 return (MAS2_I); 493 case VM_MEMATTR_WRITE_THROUGH: 494 return (MAS2_W | MAS2_M); 495 case VM_MEMATTR_CACHEABLE: 496 return (MAS2_M); 497 } 498 } 499 500 /* 501 * Assume the page is cache inhibited and access is guarded unless 502 * it's in our available memory array. 503 */ 504 attrib = _TLB_ENTRY_IO; 505 for (i = 0; i < physmem_regions_sz; i++) { 506 if ((pa >= physmem_regions[i].mr_start) && 507 (pa < (physmem_regions[i].mr_start + 508 physmem_regions[i].mr_size))) { 509 attrib = _TLB_ENTRY_MEM; 510 break; 511 } 512 } 513 514 return (attrib); 515 } 516 517 static inline void 518 tlb_miss_lock(void) 519 { 520 #ifdef SMP 521 struct pcpu *pc; 522 523 if (!smp_started) 524 return; 525 526 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 527 if (pc != pcpup) { 528 529 CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, " 530 "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke.tlb_lock); 531 532 KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)), 533 ("tlb_miss_lock: tried to lock self")); 534 535 tlb_lock(pc->pc_booke.tlb_lock); 536 537 CTR1(KTR_PMAP, "%s: locked", __func__); 538 } 539 } 540 #endif 541 } 542 543 static inline void 544 tlb_miss_unlock(void) 545 { 546 #ifdef SMP 547 struct pcpu *pc; 548 549 if (!smp_started) 550 return; 551 552 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 553 if (pc != pcpup) { 554 CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d", 555 __func__, pc->pc_cpuid); 556 557 tlb_unlock(pc->pc_booke.tlb_lock); 558 559 CTR1(KTR_PMAP, "%s: unlocked", __func__); 560 } 561 } 562 #endif 563 } 564 565 /* Return number of entries in TLB0. */ 566 static __inline void 567 tlb0_get_tlbconf(void) 568 { 569 uint32_t tlb0_cfg; 570 571 tlb0_cfg = mfspr(SPR_TLB0CFG); 572 tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK; 573 tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; 574 tlb0_entries_per_way = tlb0_entries / tlb0_ways; 575 } 576 577 /* Return number of entries in TLB1. */ 578 static __inline void 579 tlb1_get_tlbconf(void) 580 { 581 uint32_t tlb1_cfg; 582 583 tlb1_cfg = mfspr(SPR_TLB1CFG); 584 tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; 585 } 586 587 /**************************************************************************/ 588 /* Page table related */ 589 /**************************************************************************/ 590 591 #ifdef __powerpc64__ 592 /* Initialize pool of kva ptbl buffers. */ 593 static void 594 ptbl_init(void) 595 { 596 } 597 598 /* Get a pointer to a PTE in a page table. */ 599 static __inline pte_t * 600 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 601 { 602 pte_t **pdir; 603 pte_t *ptbl; 604 605 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 606 607 pdir = pmap->pm_pp2d[PP2D_IDX(va)]; 608 if (!pdir) 609 return NULL; 610 ptbl = pdir[PDIR_IDX(va)]; 611 return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL); 612 } 613 614 /* 615 * allocate a page of pointers to page directories, do not preallocate the 616 * page tables 617 */ 618 static pte_t ** 619 pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep) 620 { 621 vm_page_t m; 622 pte_t **pdir; 623 int req; 624 625 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 626 while ((m = vm_page_alloc(NULL, pp2d_idx, req)) == NULL) { 627 PMAP_UNLOCK(pmap); 628 if (nosleep) { 629 return (NULL); 630 } 631 vm_wait(NULL); 632 PMAP_LOCK(pmap); 633 } 634 635 /* Zero whole ptbl. */ 636 pdir = (pte_t **)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 637 mmu_booke_zero_page(mmu, m); 638 639 return (pdir); 640 } 641 642 /* Free pdir pages and invalidate pdir entry. */ 643 static void 644 pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, vm_page_t m) 645 { 646 pte_t **pdir; 647 648 pdir = pmap->pm_pp2d[pp2d_idx]; 649 650 KASSERT((pdir != NULL), ("pdir_free: null pdir")); 651 652 pmap->pm_pp2d[pp2d_idx] = NULL; 653 654 vm_wire_sub(1); 655 vm_page_free_zero(m); 656 } 657 658 /* 659 * Decrement pdir pages hold count and attempt to free pdir pages. Called 660 * when removing directory entry from pdir. 661 * 662 * Return 1 if pdir pages were freed. 663 */ 664 static int 665 pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx) 666 { 667 pte_t **pdir; 668 vm_paddr_t pa; 669 vm_page_t m; 670 671 KASSERT((pmap != kernel_pmap), 672 ("pdir_unhold: unholding kernel pdir!")); 673 674 pdir = pmap->pm_pp2d[pp2d_idx]; 675 676 /* decrement hold count */ 677 pa = DMAP_TO_PHYS((vm_offset_t) pdir); 678 m = PHYS_TO_VM_PAGE(pa); 679 680 /* 681 * Free pdir page if there are no dir entries in this pdir. 682 */ 683 m->ref_count--; 684 if (m->ref_count == 0) { 685 pdir_free(mmu, pmap, pp2d_idx, m); 686 return (1); 687 } 688 return (0); 689 } 690 691 /* 692 * Increment hold count for pdir pages. This routine is used when new ptlb 693 * entry is being inserted into pdir. 694 */ 695 static void 696 pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir) 697 { 698 vm_page_t m; 699 700 KASSERT((pmap != kernel_pmap), 701 ("pdir_hold: holding kernel pdir!")); 702 703 KASSERT((pdir != NULL), ("pdir_hold: null pdir")); 704 705 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pdir)); 706 m->ref_count++; 707 } 708 709 /* Allocate page table. */ 710 static pte_t * 711 ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, 712 boolean_t nosleep) 713 { 714 vm_page_t m; 715 pte_t *ptbl; 716 int req; 717 718 KASSERT((pdir[pdir_idx] == NULL), 719 ("%s: valid ptbl entry exists!", __func__)); 720 721 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; 722 while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) { 723 if (nosleep) 724 return (NULL); 725 PMAP_UNLOCK(pmap); 726 rw_wunlock(&pvh_global_lock); 727 vm_wait(NULL); 728 rw_wlock(&pvh_global_lock); 729 PMAP_LOCK(pmap); 730 } 731 732 /* Zero whole ptbl. */ 733 ptbl = (pte_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 734 mmu_booke_zero_page(mmu, m); 735 736 return (ptbl); 737 } 738 739 /* Free ptbl pages and invalidate pdir entry. */ 740 static void 741 ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, vm_page_t m) 742 { 743 pte_t *ptbl; 744 745 ptbl = pdir[pdir_idx]; 746 747 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 748 749 pdir[pdir_idx] = NULL; 750 751 vm_wire_sub(1); 752 vm_page_free_zero(m); 753 } 754 755 /* 756 * Decrement ptbl pages hold count and attempt to free ptbl pages. Called 757 * when removing pte entry from ptbl. 758 * 759 * Return 1 if ptbl pages were freed. 760 */ 761 static int 762 ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va) 763 { 764 pte_t *ptbl; 765 vm_page_t m; 766 u_int pp2d_idx; 767 pte_t **pdir; 768 u_int pdir_idx; 769 770 pp2d_idx = PP2D_IDX(va); 771 pdir_idx = PDIR_IDX(va); 772 773 KASSERT((pmap != kernel_pmap), 774 ("ptbl_unhold: unholding kernel ptbl!")); 775 776 pdir = pmap->pm_pp2d[pp2d_idx]; 777 ptbl = pdir[pdir_idx]; 778 779 /* decrement hold count */ 780 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); 781 782 /* 783 * Free ptbl pages if there are no pte entries in this ptbl. 784 * ref_count has the same value for all ptbl pages, so check the 785 * last page. 786 */ 787 m->ref_count--; 788 if (m->ref_count == 0) { 789 ptbl_free(mmu, pmap, pdir, pdir_idx, m); 790 pdir_unhold(mmu, pmap, pp2d_idx); 791 return (1); 792 } 793 return (0); 794 } 795 796 /* 797 * Increment hold count for ptbl pages. This routine is used when new pte 798 * entry is being inserted into ptbl. 799 */ 800 static void 801 ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) 802 { 803 pte_t *ptbl; 804 vm_page_t m; 805 806 KASSERT((pmap != kernel_pmap), 807 ("ptbl_hold: holding kernel ptbl!")); 808 809 ptbl = pdir[pdir_idx]; 810 811 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 812 813 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); 814 m->ref_count++; 815 } 816 #else 817 818 /* Initialize pool of kva ptbl buffers. */ 819 static void 820 ptbl_init(void) 821 { 822 int i; 823 824 CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__, 825 (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); 826 CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)", 827 __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); 828 829 mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); 830 TAILQ_INIT(&ptbl_buf_freelist); 831 832 for (i = 0; i < PTBL_BUFS; i++) { 833 ptbl_bufs[i].kva = 834 ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; 835 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); 836 } 837 } 838 839 /* Get a ptbl_buf from the freelist. */ 840 static struct ptbl_buf * 841 ptbl_buf_alloc(void) 842 { 843 struct ptbl_buf *buf; 844 845 mtx_lock(&ptbl_buf_freelist_lock); 846 buf = TAILQ_FIRST(&ptbl_buf_freelist); 847 if (buf != NULL) 848 TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); 849 mtx_unlock(&ptbl_buf_freelist_lock); 850 851 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 852 853 return (buf); 854 } 855 856 /* Return ptbl buff to free pool. */ 857 static void 858 ptbl_buf_free(struct ptbl_buf *buf) 859 { 860 861 CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); 862 863 mtx_lock(&ptbl_buf_freelist_lock); 864 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); 865 mtx_unlock(&ptbl_buf_freelist_lock); 866 } 867 868 /* 869 * Search the list of allocated ptbl bufs and find on list of allocated ptbls 870 */ 871 static void 872 ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) 873 { 874 struct ptbl_buf *pbuf; 875 876 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 877 878 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 879 880 TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) 881 if (pbuf->kva == (vm_offset_t)ptbl) { 882 /* Remove from pmap ptbl buf list. */ 883 TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); 884 885 /* Free corresponding ptbl buf. */ 886 ptbl_buf_free(pbuf); 887 break; 888 } 889 } 890 891 /* Allocate page table. */ 892 static pte_t * 893 ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep) 894 { 895 vm_page_t mtbl[PTBL_PAGES]; 896 vm_page_t m; 897 struct ptbl_buf *pbuf; 898 unsigned int pidx; 899 pte_t *ptbl; 900 int i, j; 901 902 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 903 (pmap == kernel_pmap), pdir_idx); 904 905 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 906 ("ptbl_alloc: invalid pdir_idx")); 907 KASSERT((pmap->pm_pdir[pdir_idx] == NULL), 908 ("pte_alloc: valid ptbl entry exists!")); 909 910 pbuf = ptbl_buf_alloc(); 911 if (pbuf == NULL) 912 panic("pte_alloc: couldn't alloc kernel virtual memory"); 913 914 ptbl = (pte_t *)pbuf->kva; 915 916 CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl); 917 918 for (i = 0; i < PTBL_PAGES; i++) { 919 pidx = (PTBL_PAGES * pdir_idx) + i; 920 while ((m = vm_page_alloc(NULL, pidx, 921 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 922 if (nosleep) { 923 ptbl_free_pmap_ptbl(pmap, ptbl); 924 for (j = 0; j < i; j++) 925 vm_page_free(mtbl[j]); 926 vm_wire_sub(i); 927 return (NULL); 928 } 929 PMAP_UNLOCK(pmap); 930 rw_wunlock(&pvh_global_lock); 931 vm_wait(NULL); 932 rw_wlock(&pvh_global_lock); 933 PMAP_LOCK(pmap); 934 } 935 mtbl[i] = m; 936 } 937 938 /* Map allocated pages into kernel_pmap. */ 939 mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES); 940 941 /* Zero whole ptbl. */ 942 bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE); 943 944 /* Add pbuf to the pmap ptbl bufs list. */ 945 TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); 946 947 return (ptbl); 948 } 949 950 /* Free ptbl pages and invalidate pdir entry. */ 951 static void 952 ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 953 { 954 pte_t *ptbl; 955 vm_paddr_t pa; 956 vm_offset_t va; 957 vm_page_t m; 958 int i; 959 960 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 961 (pmap == kernel_pmap), pdir_idx); 962 963 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 964 ("ptbl_free: invalid pdir_idx")); 965 966 ptbl = pmap->pm_pdir[pdir_idx]; 967 968 CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); 969 970 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); 971 972 /* 973 * Invalidate the pdir entry as soon as possible, so that other CPUs 974 * don't attempt to look up the page tables we are releasing. 975 */ 976 mtx_lock_spin(&tlbivax_mutex); 977 tlb_miss_lock(); 978 979 pmap->pm_pdir[pdir_idx] = NULL; 980 981 tlb_miss_unlock(); 982 mtx_unlock_spin(&tlbivax_mutex); 983 984 for (i = 0; i < PTBL_PAGES; i++) { 985 va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); 986 pa = pte_vatopa(mmu, kernel_pmap, va); 987 m = PHYS_TO_VM_PAGE(pa); 988 vm_page_free_zero(m); 989 vm_wire_sub(1); 990 mmu_booke_kremove(mmu, va); 991 } 992 993 ptbl_free_pmap_ptbl(pmap, ptbl); 994 } 995 996 /* 997 * Decrement ptbl pages hold count and attempt to free ptbl pages. 998 * Called when removing pte entry from ptbl. 999 * 1000 * Return 1 if ptbl pages were freed. 1001 */ 1002 static int 1003 ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 1004 { 1005 pte_t *ptbl; 1006 vm_paddr_t pa; 1007 vm_page_t m; 1008 int i; 1009 1010 CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, 1011 (pmap == kernel_pmap), pdir_idx); 1012 1013 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1014 ("ptbl_unhold: invalid pdir_idx")); 1015 KASSERT((pmap != kernel_pmap), 1016 ("ptbl_unhold: unholding kernel ptbl!")); 1017 1018 ptbl = pmap->pm_pdir[pdir_idx]; 1019 1020 //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); 1021 KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), 1022 ("ptbl_unhold: non kva ptbl")); 1023 1024 /* decrement hold count */ 1025 for (i = 0; i < PTBL_PAGES; i++) { 1026 pa = pte_vatopa(mmu, kernel_pmap, 1027 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1028 m = PHYS_TO_VM_PAGE(pa); 1029 m->ref_count--; 1030 } 1031 1032 /* 1033 * Free ptbl pages if there are no pte etries in this ptbl. 1034 * ref_count has the same value for all ptbl pages, so check the last 1035 * page. 1036 */ 1037 if (m->ref_count == 0) { 1038 ptbl_free(mmu, pmap, pdir_idx); 1039 1040 //debugf("ptbl_unhold: e (freed ptbl)\n"); 1041 return (1); 1042 } 1043 1044 return (0); 1045 } 1046 1047 /* 1048 * Increment hold count for ptbl pages. This routine is used when a new pte 1049 * entry is being inserted into the ptbl. 1050 */ 1051 static void 1052 ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) 1053 { 1054 vm_paddr_t pa; 1055 pte_t *ptbl; 1056 vm_page_t m; 1057 int i; 1058 1059 CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap, 1060 pdir_idx); 1061 1062 KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), 1063 ("ptbl_hold: invalid pdir_idx")); 1064 KASSERT((pmap != kernel_pmap), 1065 ("ptbl_hold: holding kernel ptbl!")); 1066 1067 ptbl = pmap->pm_pdir[pdir_idx]; 1068 1069 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); 1070 1071 for (i = 0; i < PTBL_PAGES; i++) { 1072 pa = pte_vatopa(mmu, kernel_pmap, 1073 (vm_offset_t)ptbl + (i * PAGE_SIZE)); 1074 m = PHYS_TO_VM_PAGE(pa); 1075 m->ref_count++; 1076 } 1077 } 1078 #endif 1079 1080 /* Allocate pv_entry structure. */ 1081 pv_entry_t 1082 pv_alloc(void) 1083 { 1084 pv_entry_t pv; 1085 1086 pv_entry_count++; 1087 if (pv_entry_count > pv_entry_high_water) 1088 pagedaemon_wakeup(0); /* XXX powerpc NUMA */ 1089 pv = uma_zalloc(pvzone, M_NOWAIT); 1090 1091 return (pv); 1092 } 1093 1094 /* Free pv_entry structure. */ 1095 static __inline void 1096 pv_free(pv_entry_t pve) 1097 { 1098 1099 pv_entry_count--; 1100 uma_zfree(pvzone, pve); 1101 } 1102 1103 1104 /* Allocate and initialize pv_entry structure. */ 1105 static void 1106 pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) 1107 { 1108 pv_entry_t pve; 1109 1110 //int su = (pmap == kernel_pmap); 1111 //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, 1112 // (u_int32_t)pmap, va, (u_int32_t)m); 1113 1114 pve = pv_alloc(); 1115 if (pve == NULL) 1116 panic("pv_insert: no pv entries!"); 1117 1118 pve->pv_pmap = pmap; 1119 pve->pv_va = va; 1120 1121 /* add to pv_list */ 1122 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1123 rw_assert(&pvh_global_lock, RA_WLOCKED); 1124 1125 TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); 1126 1127 //debugf("pv_insert: e\n"); 1128 } 1129 1130 /* Destroy pv entry. */ 1131 static void 1132 pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) 1133 { 1134 pv_entry_t pve; 1135 1136 //int su = (pmap == kernel_pmap); 1137 //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); 1138 1139 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1140 rw_assert(&pvh_global_lock, RA_WLOCKED); 1141 1142 /* find pv entry */ 1143 TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { 1144 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 1145 /* remove from pv_list */ 1146 TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); 1147 if (TAILQ_EMPTY(&m->md.pv_list)) 1148 vm_page_aflag_clear(m, PGA_WRITEABLE); 1149 1150 /* free pv entry struct */ 1151 pv_free(pve); 1152 break; 1153 } 1154 } 1155 1156 //debugf("pv_remove: e\n"); 1157 } 1158 1159 #ifdef __powerpc64__ 1160 /* 1161 * Clean pte entry, try to free page table page if requested. 1162 * 1163 * Return 1 if ptbl pages were freed, otherwise return 0. 1164 */ 1165 static int 1166 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags) 1167 { 1168 vm_page_t m; 1169 pte_t *pte; 1170 1171 pte = pte_find(mmu, pmap, va); 1172 KASSERT(pte != NULL, ("%s: NULL pte", __func__)); 1173 1174 if (!PTE_ISVALID(pte)) 1175 return (0); 1176 1177 /* Get vm_page_t for mapped pte. */ 1178 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1179 1180 if (PTE_ISWIRED(pte)) 1181 pmap->pm_stats.wired_count--; 1182 1183 /* Handle managed entry. */ 1184 if (PTE_ISMANAGED(pte)) { 1185 1186 /* Handle modified pages. */ 1187 if (PTE_ISMODIFIED(pte)) 1188 vm_page_dirty(m); 1189 1190 /* Referenced pages. */ 1191 if (PTE_ISREFERENCED(pte)) 1192 vm_page_aflag_set(m, PGA_REFERENCED); 1193 1194 /* Remove pv_entry from pv_list. */ 1195 pv_remove(pmap, va, m); 1196 } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { 1197 pv_remove(pmap, va, m); 1198 if (TAILQ_EMPTY(&m->md.pv_list)) 1199 m->md.pv_tracked = false; 1200 } 1201 mtx_lock_spin(&tlbivax_mutex); 1202 tlb_miss_lock(); 1203 1204 tlb0_flush_entry(va); 1205 *pte = 0; 1206 1207 tlb_miss_unlock(); 1208 mtx_unlock_spin(&tlbivax_mutex); 1209 1210 pmap->pm_stats.resident_count--; 1211 1212 if (flags & PTBL_UNHOLD) { 1213 return (ptbl_unhold(mmu, pmap, va)); 1214 } 1215 return (0); 1216 } 1217 1218 /* 1219 * Insert PTE for a given page and virtual address. 1220 */ 1221 static int 1222 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1223 boolean_t nosleep) 1224 { 1225 unsigned int pp2d_idx = PP2D_IDX(va); 1226 unsigned int pdir_idx = PDIR_IDX(va); 1227 unsigned int ptbl_idx = PTBL_IDX(va); 1228 pte_t *ptbl, *pte, pte_tmp; 1229 pte_t **pdir; 1230 1231 /* Get the page directory pointer. */ 1232 pdir = pmap->pm_pp2d[pp2d_idx]; 1233 if (pdir == NULL) 1234 pdir = pdir_alloc(mmu, pmap, pp2d_idx, nosleep); 1235 1236 /* Get the page table pointer. */ 1237 ptbl = pdir[pdir_idx]; 1238 1239 if (ptbl == NULL) { 1240 /* Allocate page table pages. */ 1241 ptbl = ptbl_alloc(mmu, pmap, pdir, pdir_idx, nosleep); 1242 if (ptbl == NULL) { 1243 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1244 return (ENOMEM); 1245 } 1246 pte = &ptbl[ptbl_idx]; 1247 } else { 1248 /* 1249 * Check if there is valid mapping for requested va, if there 1250 * is, remove it. 1251 */ 1252 pte = &ptbl[ptbl_idx]; 1253 if (PTE_ISVALID(pte)) { 1254 pte_remove(mmu, pmap, va, PTBL_HOLD); 1255 } else { 1256 /* 1257 * pte is not used, increment hold count for ptbl 1258 * pages. 1259 */ 1260 if (pmap != kernel_pmap) 1261 ptbl_hold(mmu, pmap, pdir, pdir_idx); 1262 } 1263 } 1264 1265 if (pdir[pdir_idx] == NULL) { 1266 if (pmap != kernel_pmap && pmap->pm_pp2d[pp2d_idx] != NULL) 1267 pdir_hold(mmu, pmap, pdir); 1268 pdir[pdir_idx] = ptbl; 1269 } 1270 if (pmap->pm_pp2d[pp2d_idx] == NULL) 1271 pmap->pm_pp2d[pp2d_idx] = pdir; 1272 1273 /* 1274 * Insert pv_entry into pv_list for mapped page if part of managed 1275 * memory. 1276 */ 1277 if ((m->oflags & VPO_UNMANAGED) == 0) { 1278 flags |= PTE_MANAGED; 1279 1280 /* Create and insert pv entry. */ 1281 pv_insert(pmap, va, m); 1282 } 1283 1284 pmap->pm_stats.resident_count++; 1285 1286 pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1287 pte_tmp |= (PTE_VALID | flags); 1288 1289 mtx_lock_spin(&tlbivax_mutex); 1290 tlb_miss_lock(); 1291 1292 tlb0_flush_entry(va); 1293 *pte = pte_tmp; 1294 1295 tlb_miss_unlock(); 1296 mtx_unlock_spin(&tlbivax_mutex); 1297 1298 return (0); 1299 } 1300 1301 /* Return the pa for the given pmap/va. */ 1302 static vm_paddr_t 1303 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1304 { 1305 vm_paddr_t pa = 0; 1306 pte_t *pte; 1307 1308 pte = pte_find(mmu, pmap, va); 1309 if ((pte != NULL) && PTE_ISVALID(pte)) 1310 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1311 return (pa); 1312 } 1313 1314 1315 /* allocate pte entries to manage (addr & mask) to (addr & mask) + size */ 1316 static void 1317 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1318 { 1319 int i, j; 1320 vm_offset_t va; 1321 pte_t *pte; 1322 1323 va = addr; 1324 /* Initialize kernel pdir */ 1325 for (i = 0; i < kernel_pdirs; i++) { 1326 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)] = 1327 (pte_t **)(pdir + (i * PAGE_SIZE * PDIR_PAGES)); 1328 for (j = PDIR_IDX(va + (i * PAGE_SIZE * PDIR_NENTRIES * PTBL_NENTRIES)); 1329 j < PDIR_NENTRIES; j++) { 1330 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)][j] = 1331 (pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE) + 1332 (((i * PDIR_NENTRIES) + j) * PAGE_SIZE)); 1333 } 1334 } 1335 1336 /* 1337 * Fill in PTEs covering kernel code and data. They are not required 1338 * for address translation, as this area is covered by static TLB1 1339 * entries, but for pte_vatopa() to work correctly with kernel area 1340 * addresses. 1341 */ 1342 for (va = addr; va < data_end; va += PAGE_SIZE) { 1343 pte = &(kernel_pmap->pm_pp2d[PP2D_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]); 1344 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1345 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1346 PTE_VALID | PTE_PS_4KB; 1347 } 1348 } 1349 #else 1350 /* 1351 * Clean pte entry, try to free page table page if requested. 1352 * 1353 * Return 1 if ptbl pages were freed, otherwise return 0. 1354 */ 1355 static int 1356 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, uint8_t flags) 1357 { 1358 unsigned int pdir_idx = PDIR_IDX(va); 1359 unsigned int ptbl_idx = PTBL_IDX(va); 1360 vm_page_t m; 1361 pte_t *ptbl; 1362 pte_t *pte; 1363 1364 //int su = (pmap == kernel_pmap); 1365 //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", 1366 // su, (u_int32_t)pmap, va, flags); 1367 1368 ptbl = pmap->pm_pdir[pdir_idx]; 1369 KASSERT(ptbl, ("pte_remove: null ptbl")); 1370 1371 pte = &ptbl[ptbl_idx]; 1372 1373 if (pte == NULL || !PTE_ISVALID(pte)) 1374 return (0); 1375 1376 if (PTE_ISWIRED(pte)) 1377 pmap->pm_stats.wired_count--; 1378 1379 /* Get vm_page_t for mapped pte. */ 1380 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1381 1382 /* Handle managed entry. */ 1383 if (PTE_ISMANAGED(pte)) { 1384 1385 if (PTE_ISMODIFIED(pte)) 1386 vm_page_dirty(m); 1387 1388 if (PTE_ISREFERENCED(pte)) 1389 vm_page_aflag_set(m, PGA_REFERENCED); 1390 1391 pv_remove(pmap, va, m); 1392 } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { 1393 /* 1394 * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is 1395 * used. This is needed by the NCSW support code for fast 1396 * VA<->PA translation. 1397 */ 1398 pv_remove(pmap, va, m); 1399 if (TAILQ_EMPTY(&m->md.pv_list)) 1400 m->md.pv_tracked = false; 1401 } 1402 1403 mtx_lock_spin(&tlbivax_mutex); 1404 tlb_miss_lock(); 1405 1406 tlb0_flush_entry(va); 1407 *pte = 0; 1408 1409 tlb_miss_unlock(); 1410 mtx_unlock_spin(&tlbivax_mutex); 1411 1412 pmap->pm_stats.resident_count--; 1413 1414 if (flags & PTBL_UNHOLD) { 1415 //debugf("pte_remove: e (unhold)\n"); 1416 return (ptbl_unhold(mmu, pmap, pdir_idx)); 1417 } 1418 1419 //debugf("pte_remove: e\n"); 1420 return (0); 1421 } 1422 1423 /* 1424 * Insert PTE for a given page and virtual address. 1425 */ 1426 static int 1427 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, 1428 boolean_t nosleep) 1429 { 1430 unsigned int pdir_idx = PDIR_IDX(va); 1431 unsigned int ptbl_idx = PTBL_IDX(va); 1432 pte_t *ptbl, *pte, pte_tmp; 1433 1434 CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__, 1435 pmap == kernel_pmap, pmap, va); 1436 1437 /* Get the page table pointer. */ 1438 ptbl = pmap->pm_pdir[pdir_idx]; 1439 1440 if (ptbl == NULL) { 1441 /* Allocate page table pages. */ 1442 ptbl = ptbl_alloc(mmu, pmap, pdir_idx, nosleep); 1443 if (ptbl == NULL) { 1444 KASSERT(nosleep, ("nosleep and NULL ptbl")); 1445 return (ENOMEM); 1446 } 1447 pmap->pm_pdir[pdir_idx] = ptbl; 1448 pte = &ptbl[ptbl_idx]; 1449 } else { 1450 /* 1451 * Check if there is valid mapping for requested 1452 * va, if there is, remove it. 1453 */ 1454 pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; 1455 if (PTE_ISVALID(pte)) { 1456 pte_remove(mmu, pmap, va, PTBL_HOLD); 1457 } else { 1458 /* 1459 * pte is not used, increment hold count 1460 * for ptbl pages. 1461 */ 1462 if (pmap != kernel_pmap) 1463 ptbl_hold(mmu, pmap, pdir_idx); 1464 } 1465 } 1466 1467 /* 1468 * Insert pv_entry into pv_list for mapped page if part of managed 1469 * memory. 1470 */ 1471 if ((m->oflags & VPO_UNMANAGED) == 0) { 1472 flags |= PTE_MANAGED; 1473 1474 /* Create and insert pv entry. */ 1475 pv_insert(pmap, va, m); 1476 } 1477 1478 pmap->pm_stats.resident_count++; 1479 1480 pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); 1481 pte_tmp |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */ 1482 1483 mtx_lock_spin(&tlbivax_mutex); 1484 tlb_miss_lock(); 1485 1486 tlb0_flush_entry(va); 1487 *pte = pte_tmp; 1488 1489 tlb_miss_unlock(); 1490 mtx_unlock_spin(&tlbivax_mutex); 1491 return (0); 1492 } 1493 1494 /* Return the pa for the given pmap/va. */ 1495 static vm_paddr_t 1496 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1497 { 1498 vm_paddr_t pa = 0; 1499 pte_t *pte; 1500 1501 pte = pte_find(mmu, pmap, va); 1502 if ((pte != NULL) && PTE_ISVALID(pte)) 1503 pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); 1504 return (pa); 1505 } 1506 1507 /* Get a pointer to a PTE in a page table. */ 1508 static pte_t * 1509 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1510 { 1511 unsigned int pdir_idx = PDIR_IDX(va); 1512 unsigned int ptbl_idx = PTBL_IDX(va); 1513 1514 KASSERT((pmap != NULL), ("pte_find: invalid pmap")); 1515 1516 if (pmap->pm_pdir[pdir_idx]) 1517 return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); 1518 1519 return (NULL); 1520 } 1521 1522 /* Set up kernel page tables. */ 1523 static void 1524 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) 1525 { 1526 int i; 1527 vm_offset_t va; 1528 pte_t *pte; 1529 1530 /* Initialize kernel pdir */ 1531 for (i = 0; i < kernel_ptbls; i++) 1532 kernel_pmap->pm_pdir[kptbl_min + i] = 1533 (pte_t *)(pdir + (i * PAGE_SIZE * PTBL_PAGES)); 1534 1535 /* 1536 * Fill in PTEs covering kernel code and data. They are not required 1537 * for address translation, as this area is covered by static TLB1 1538 * entries, but for pte_vatopa() to work correctly with kernel area 1539 * addresses. 1540 */ 1541 for (va = addr; va < data_end; va += PAGE_SIZE) { 1542 pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]); 1543 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); 1544 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | 1545 PTE_VALID | PTE_PS_4KB; 1546 } 1547 } 1548 #endif 1549 1550 /**************************************************************************/ 1551 /* PMAP related */ 1552 /**************************************************************************/ 1553 1554 /* 1555 * This is called during booke_init, before the system is really initialized. 1556 */ 1557 static void 1558 mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) 1559 { 1560 vm_paddr_t phys_kernelend; 1561 struct mem_region *mp, *mp1; 1562 int cnt, i, j; 1563 vm_paddr_t s, e, sz; 1564 vm_paddr_t physsz, hwphyssz; 1565 u_int phys_avail_count; 1566 vm_size_t kstack0_sz; 1567 vm_offset_t kernel_pdir, kstack0; 1568 vm_paddr_t kstack0_phys; 1569 void *dpcpu; 1570 vm_offset_t kernel_ptbl_root; 1571 1572 debugf("mmu_booke_bootstrap: entered\n"); 1573 1574 /* Set interesting system properties */ 1575 #ifdef __powerpc64__ 1576 hw_direct_map = 1; 1577 #else 1578 hw_direct_map = 0; 1579 #endif 1580 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 1581 elf32_nxstack = 1; 1582 #endif 1583 1584 /* Initialize invalidation mutex */ 1585 mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN); 1586 1587 /* Read TLB0 size and associativity. */ 1588 tlb0_get_tlbconf(); 1589 1590 /* 1591 * Align kernel start and end address (kernel image). 1592 * Note that kernel end does not necessarily relate to kernsize. 1593 * kernsize is the size of the kernel that is actually mapped. 1594 */ 1595 data_start = round_page(kernelend); 1596 data_end = data_start; 1597 1598 /* Allocate the dynamic per-cpu area. */ 1599 dpcpu = (void *)data_end; 1600 data_end += DPCPU_SIZE; 1601 1602 /* Allocate space for the message buffer. */ 1603 msgbufp = (struct msgbuf *)data_end; 1604 data_end += msgbufsize; 1605 debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1606 (uintptr_t)msgbufp, data_end); 1607 1608 data_end = round_page(data_end); 1609 1610 #ifdef __powerpc64__ 1611 kernel_ptbl_root = data_end; 1612 data_end += PP2D_NENTRIES * sizeof(pte_t**); 1613 #else 1614 /* Allocate space for ptbl_bufs. */ 1615 ptbl_bufs = (struct ptbl_buf *)data_end; 1616 data_end += sizeof(struct ptbl_buf) * PTBL_BUFS; 1617 debugf(" ptbl_bufs at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1618 (uintptr_t)ptbl_bufs, data_end); 1619 1620 data_end = round_page(data_end); 1621 kernel_ptbl_root = data_end; 1622 data_end += PDIR_NENTRIES * sizeof(pte_t*); 1623 #endif 1624 1625 /* Allocate PTE tables for kernel KVA. */ 1626 kernel_pdir = data_end; 1627 kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, 1628 PDIR_SIZE); 1629 #ifdef __powerpc64__ 1630 kernel_pdirs = howmany(kernel_ptbls, PDIR_NENTRIES); 1631 data_end += kernel_pdirs * PDIR_PAGES * PAGE_SIZE; 1632 #endif 1633 data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE; 1634 debugf(" kernel ptbls: %d\n", kernel_ptbls); 1635 debugf(" kernel pdir at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1636 kernel_pdir, data_end); 1637 1638 /* Retrieve phys/avail mem regions */ 1639 mem_regions(&physmem_regions, &physmem_regions_sz, 1640 &availmem_regions, &availmem_regions_sz); 1641 1642 if (PHYS_AVAIL_ENTRIES < availmem_regions_sz) 1643 panic("mmu_booke_bootstrap: phys_avail too small"); 1644 1645 data_end = round_page(data_end); 1646 vm_page_array = (vm_page_t)data_end; 1647 /* 1648 * Get a rough idea (upper bound) on the size of the page array. The 1649 * vm_page_array will not handle any more pages than we have in the 1650 * avail_regions array, and most likely much less. 1651 */ 1652 sz = 0; 1653 for (mp = availmem_regions; mp->mr_size; mp++) { 1654 sz += mp->mr_size; 1655 } 1656 sz = (round_page(sz) / (PAGE_SIZE + sizeof(struct vm_page))); 1657 data_end += round_page(sz * sizeof(struct vm_page)); 1658 1659 /* Pre-round up to 1MB. This wastes some space, but saves TLB entries */ 1660 data_end = roundup2(data_end, 1 << 20); 1661 1662 debugf(" data_end: 0x%"PRI0ptrX"\n", data_end); 1663 debugf(" kernstart: %#zx\n", kernstart); 1664 debugf(" kernsize: %#zx\n", kernsize); 1665 1666 if (data_end - kernstart > kernsize) { 1667 kernsize += tlb1_mapin_region(kernstart + kernsize, 1668 kernload + kernsize, (data_end - kernstart) - kernsize, 1669 _TLB_ENTRY_MEM); 1670 } 1671 data_end = kernstart + kernsize; 1672 debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end); 1673 1674 /* 1675 * Clear the structures - note we can only do it safely after the 1676 * possible additional TLB1 translations are in place (above) so that 1677 * all range up to the currently calculated 'data_end' is covered. 1678 */ 1679 dpcpu_init(dpcpu, 0); 1680 #ifdef __powerpc64__ 1681 memset((void *)kernel_pdir, 0, 1682 kernel_pdirs * PDIR_PAGES * PAGE_SIZE + 1683 kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1684 #else 1685 memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE); 1686 memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE); 1687 #endif 1688 1689 /*******************************************************/ 1690 /* Set the start and end of kva. */ 1691 /*******************************************************/ 1692 virtual_avail = round_page(data_end); 1693 virtual_end = VM_MAX_KERNEL_ADDRESS; 1694 1695 #ifndef __powerpc64__ 1696 /* Allocate KVA space for page zero/copy operations. */ 1697 zero_page_va = virtual_avail; 1698 virtual_avail += PAGE_SIZE; 1699 copy_page_src_va = virtual_avail; 1700 virtual_avail += PAGE_SIZE; 1701 copy_page_dst_va = virtual_avail; 1702 virtual_avail += PAGE_SIZE; 1703 debugf("zero_page_va = 0x%"PRI0ptrX"\n", zero_page_va); 1704 debugf("copy_page_src_va = 0x%"PRI0ptrX"\n", copy_page_src_va); 1705 debugf("copy_page_dst_va = 0x%"PRI0ptrX"\n", copy_page_dst_va); 1706 1707 /* Initialize page zero/copy mutexes. */ 1708 mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); 1709 mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); 1710 1711 /* Allocate KVA space for ptbl bufs. */ 1712 ptbl_buf_pool_vabase = virtual_avail; 1713 virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; 1714 debugf("ptbl_buf_pool_vabase = 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 1715 ptbl_buf_pool_vabase, virtual_avail); 1716 #endif 1717 1718 /* Calculate corresponding physical addresses for the kernel region. */ 1719 phys_kernelend = kernload + kernsize; 1720 debugf("kernel image and allocated data:\n"); 1721 debugf(" kernload = 0x%09jx\n", (uintmax_t)kernload); 1722 debugf(" kernstart = 0x%"PRI0ptrX"\n", kernstart); 1723 debugf(" kernsize = 0x%"PRI0ptrX"\n", kernsize); 1724 1725 /* 1726 * Remove kernel physical address range from avail regions list. Page 1727 * align all regions. Non-page aligned memory isn't very interesting 1728 * to us. Also, sort the entries for ascending addresses. 1729 */ 1730 1731 sz = 0; 1732 cnt = availmem_regions_sz; 1733 debugf("processing avail regions:\n"); 1734 for (mp = availmem_regions; mp->mr_size; mp++) { 1735 s = mp->mr_start; 1736 e = mp->mr_start + mp->mr_size; 1737 debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e); 1738 /* Check whether this region holds all of the kernel. */ 1739 if (s < kernload && e > phys_kernelend) { 1740 availmem_regions[cnt].mr_start = phys_kernelend; 1741 availmem_regions[cnt++].mr_size = e - phys_kernelend; 1742 e = kernload; 1743 } 1744 /* Look whether this regions starts within the kernel. */ 1745 if (s >= kernload && s < phys_kernelend) { 1746 if (e <= phys_kernelend) 1747 goto empty; 1748 s = phys_kernelend; 1749 } 1750 /* Now look whether this region ends within the kernel. */ 1751 if (e > kernload && e <= phys_kernelend) { 1752 if (s >= kernload) 1753 goto empty; 1754 e = kernload; 1755 } 1756 /* Now page align the start and size of the region. */ 1757 s = round_page(s); 1758 e = trunc_page(e); 1759 if (e < s) 1760 e = s; 1761 sz = e - s; 1762 debugf("%09jx-%09jx = %jx\n", 1763 (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz); 1764 1765 /* Check whether some memory is left here. */ 1766 if (sz == 0) { 1767 empty: 1768 memmove(mp, mp + 1, 1769 (cnt - (mp - availmem_regions)) * sizeof(*mp)); 1770 cnt--; 1771 mp--; 1772 continue; 1773 } 1774 1775 /* Do an insertion sort. */ 1776 for (mp1 = availmem_regions; mp1 < mp; mp1++) 1777 if (s < mp1->mr_start) 1778 break; 1779 if (mp1 < mp) { 1780 memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); 1781 mp1->mr_start = s; 1782 mp1->mr_size = sz; 1783 } else { 1784 mp->mr_start = s; 1785 mp->mr_size = sz; 1786 } 1787 } 1788 availmem_regions_sz = cnt; 1789 1790 /*******************************************************/ 1791 /* Steal physical memory for kernel stack from the end */ 1792 /* of the first avail region */ 1793 /*******************************************************/ 1794 kstack0_sz = kstack_pages * PAGE_SIZE; 1795 kstack0_phys = availmem_regions[0].mr_start + 1796 availmem_regions[0].mr_size; 1797 kstack0_phys -= kstack0_sz; 1798 availmem_regions[0].mr_size -= kstack0_sz; 1799 1800 /*******************************************************/ 1801 /* Fill in phys_avail table, based on availmem_regions */ 1802 /*******************************************************/ 1803 phys_avail_count = 0; 1804 physsz = 0; 1805 hwphyssz = 0; 1806 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 1807 1808 debugf("fill in phys_avail:\n"); 1809 for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { 1810 1811 debugf(" region: 0x%jx - 0x%jx (0x%jx)\n", 1812 (uintmax_t)availmem_regions[i].mr_start, 1813 (uintmax_t)availmem_regions[i].mr_start + 1814 availmem_regions[i].mr_size, 1815 (uintmax_t)availmem_regions[i].mr_size); 1816 1817 if (hwphyssz != 0 && 1818 (physsz + availmem_regions[i].mr_size) >= hwphyssz) { 1819 debugf(" hw.physmem adjust\n"); 1820 if (physsz < hwphyssz) { 1821 phys_avail[j] = availmem_regions[i].mr_start; 1822 phys_avail[j + 1] = 1823 availmem_regions[i].mr_start + 1824 hwphyssz - physsz; 1825 physsz = hwphyssz; 1826 phys_avail_count++; 1827 dump_avail[j] = phys_avail[j]; 1828 dump_avail[j + 1] = phys_avail[j + 1]; 1829 } 1830 break; 1831 } 1832 1833 phys_avail[j] = availmem_regions[i].mr_start; 1834 phys_avail[j + 1] = availmem_regions[i].mr_start + 1835 availmem_regions[i].mr_size; 1836 phys_avail_count++; 1837 physsz += availmem_regions[i].mr_size; 1838 dump_avail[j] = phys_avail[j]; 1839 dump_avail[j + 1] = phys_avail[j + 1]; 1840 } 1841 physmem = btoc(physsz); 1842 1843 /* Calculate the last available physical address. */ 1844 for (i = 0; phys_avail[i + 2] != 0; i += 2) 1845 ; 1846 Maxmem = powerpc_btop(phys_avail[i + 1]); 1847 1848 debugf("Maxmem = 0x%08lx\n", Maxmem); 1849 debugf("phys_avail_count = %d\n", phys_avail_count); 1850 debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n", 1851 (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem); 1852 1853 #ifdef __powerpc64__ 1854 /* 1855 * Map the physical memory contiguously in TLB1. 1856 * Round so it fits into a single mapping. 1857 */ 1858 tlb1_mapin_region(DMAP_BASE_ADDRESS, 0, 1859 phys_avail[i + 1], _TLB_ENTRY_MEM); 1860 #endif 1861 1862 /*******************************************************/ 1863 /* Initialize (statically allocated) kernel pmap. */ 1864 /*******************************************************/ 1865 PMAP_LOCK_INIT(kernel_pmap); 1866 #ifdef __powerpc64__ 1867 kernel_pmap->pm_pp2d = (pte_t ***)kernel_ptbl_root; 1868 #else 1869 kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE; 1870 kernel_pmap->pm_pdir = (pte_t **)kernel_ptbl_root; 1871 #endif 1872 1873 debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap); 1874 kernel_pte_alloc(virtual_avail, kernstart, kernel_pdir); 1875 for (i = 0; i < MAXCPU; i++) { 1876 kernel_pmap->pm_tid[i] = TID_KERNEL; 1877 1878 /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */ 1879 tidbusy[i][TID_KERNEL] = kernel_pmap; 1880 } 1881 1882 /* Mark kernel_pmap active on all CPUs */ 1883 CPU_FILL(&kernel_pmap->pm_active); 1884 1885 /* 1886 * Initialize the global pv list lock. 1887 */ 1888 rw_init(&pvh_global_lock, "pmap pv global"); 1889 1890 /*******************************************************/ 1891 /* Final setup */ 1892 /*******************************************************/ 1893 1894 /* Enter kstack0 into kernel map, provide guard page */ 1895 kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 1896 thread0.td_kstack = kstack0; 1897 thread0.td_kstack_pages = kstack_pages; 1898 1899 debugf("kstack_sz = 0x%08jx\n", (uintmax_t)kstack0_sz); 1900 debugf("kstack0_phys at 0x%09jx - 0x%09jx\n", 1901 (uintmax_t)kstack0_phys, (uintmax_t)kstack0_phys + kstack0_sz); 1902 debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n", 1903 kstack0, kstack0 + kstack0_sz); 1904 1905 virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz; 1906 for (i = 0; i < kstack_pages; i++) { 1907 mmu_booke_kenter(mmu, kstack0, kstack0_phys); 1908 kstack0 += PAGE_SIZE; 1909 kstack0_phys += PAGE_SIZE; 1910 } 1911 1912 pmap_bootstrapped = 1; 1913 1914 debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail); 1915 debugf("virtual_end = %"PRI0ptrX"\n", virtual_end); 1916 1917 debugf("mmu_booke_bootstrap: exit\n"); 1918 } 1919 1920 #ifdef SMP 1921 void 1922 tlb1_ap_prep(void) 1923 { 1924 tlb_entry_t *e, tmp; 1925 unsigned int i; 1926 1927 /* Prepare TLB1 image for AP processors */ 1928 e = __boot_tlb1; 1929 for (i = 0; i < TLB1_ENTRIES; i++) { 1930 tlb1_read_entry(&tmp, i); 1931 1932 if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED)) 1933 memcpy(e++, &tmp, sizeof(tmp)); 1934 } 1935 } 1936 1937 void 1938 pmap_bootstrap_ap(volatile uint32_t *trcp __unused) 1939 { 1940 int i; 1941 1942 /* 1943 * Finish TLB1 configuration: the BSP already set up its TLB1 and we 1944 * have the snapshot of its contents in the s/w __boot_tlb1[] table 1945 * created by tlb1_ap_prep(), so use these values directly to 1946 * (re)program AP's TLB1 hardware. 1947 * 1948 * Start at index 1 because index 0 has the kernel map. 1949 */ 1950 for (i = 1; i < TLB1_ENTRIES; i++) { 1951 if (__boot_tlb1[i].mas1 & MAS1_VALID) 1952 tlb1_write_entry(&__boot_tlb1[i], i); 1953 } 1954 1955 set_mas4_defaults(); 1956 } 1957 #endif 1958 1959 static void 1960 booke_pmap_init_qpages(void) 1961 { 1962 struct pcpu *pc; 1963 int i; 1964 1965 CPU_FOREACH(i) { 1966 pc = pcpu_find(i); 1967 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); 1968 if (pc->pc_qmap_addr == 0) 1969 panic("pmap_init_qpages: unable to allocate KVA"); 1970 } 1971 } 1972 1973 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL); 1974 1975 /* 1976 * Get the physical page address for the given pmap/virtual address. 1977 */ 1978 static vm_paddr_t 1979 mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1980 { 1981 vm_paddr_t pa; 1982 1983 PMAP_LOCK(pmap); 1984 pa = pte_vatopa(mmu, pmap, va); 1985 PMAP_UNLOCK(pmap); 1986 1987 return (pa); 1988 } 1989 1990 /* 1991 * Extract the physical page address associated with the given 1992 * kernel virtual address. 1993 */ 1994 static vm_paddr_t 1995 mmu_booke_kextract(mmu_t mmu, vm_offset_t va) 1996 { 1997 tlb_entry_t e; 1998 vm_paddr_t p = 0; 1999 int i; 2000 2001 #ifdef __powerpc64__ 2002 if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS) 2003 return (DMAP_TO_PHYS(va)); 2004 #endif 2005 2006 if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS) 2007 p = pte_vatopa(mmu, kernel_pmap, va); 2008 2009 if (p == 0) { 2010 /* Check TLB1 mappings */ 2011 for (i = 0; i < TLB1_ENTRIES; i++) { 2012 tlb1_read_entry(&e, i); 2013 if (!(e.mas1 & MAS1_VALID)) 2014 continue; 2015 if (va >= e.virt && va < e.virt + e.size) 2016 return (e.phys + (va - e.virt)); 2017 } 2018 } 2019 2020 return (p); 2021 } 2022 2023 /* 2024 * Initialize the pmap module. 2025 * Called by vm_init, to initialize any structures that the pmap 2026 * system needs to map virtual memory. 2027 */ 2028 static void 2029 mmu_booke_init(mmu_t mmu) 2030 { 2031 int shpgperproc = PMAP_SHPGPERPROC; 2032 2033 /* 2034 * Initialize the address space (zone) for the pv entries. Set a 2035 * high water mark so that the system can recover from excessive 2036 * numbers of pv entries. 2037 */ 2038 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 2039 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 2040 2041 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 2042 pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; 2043 2044 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 2045 pv_entry_high_water = 9 * (pv_entry_max / 10); 2046 2047 uma_zone_reserve_kva(pvzone, pv_entry_max); 2048 2049 /* Pre-fill pvzone with initial number of pv entries. */ 2050 uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); 2051 2052 /* Create a UMA zone for page table roots. */ 2053 ptbl_root_zone = uma_zcreate("pmap root", PMAP_ROOT_SIZE, 2054 NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, UMA_ZONE_VM); 2055 2056 /* Initialize ptbl allocation. */ 2057 ptbl_init(); 2058 } 2059 2060 /* 2061 * Map a list of wired pages into kernel virtual address space. This is 2062 * intended for temporary mappings which do not need page modification or 2063 * references recorded. Existing mappings in the region are overwritten. 2064 */ 2065 static void 2066 mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) 2067 { 2068 vm_offset_t va; 2069 2070 va = sva; 2071 while (count-- > 0) { 2072 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 2073 va += PAGE_SIZE; 2074 m++; 2075 } 2076 } 2077 2078 /* 2079 * Remove page mappings from kernel virtual address space. Intended for 2080 * temporary mappings entered by mmu_booke_qenter. 2081 */ 2082 static void 2083 mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count) 2084 { 2085 vm_offset_t va; 2086 2087 va = sva; 2088 while (count-- > 0) { 2089 mmu_booke_kremove(mmu, va); 2090 va += PAGE_SIZE; 2091 } 2092 } 2093 2094 /* 2095 * Map a wired page into kernel virtual address space. 2096 */ 2097 static void 2098 mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) 2099 { 2100 2101 mmu_booke_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 2102 } 2103 2104 static void 2105 mmu_booke_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) 2106 { 2107 uint32_t flags; 2108 pte_t *pte; 2109 2110 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2111 (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va")); 2112 2113 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 2114 flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT; 2115 flags |= PTE_PS_4KB; 2116 2117 pte = pte_find(mmu, kernel_pmap, va); 2118 KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va. NULL PTE")); 2119 2120 mtx_lock_spin(&tlbivax_mutex); 2121 tlb_miss_lock(); 2122 2123 if (PTE_ISVALID(pte)) { 2124 2125 CTR1(KTR_PMAP, "%s: replacing entry!", __func__); 2126 2127 /* Flush entry from TLB0 */ 2128 tlb0_flush_entry(va); 2129 } 2130 2131 *pte = PTE_RPN_FROM_PA(pa) | flags; 2132 2133 //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " 2134 // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", 2135 // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); 2136 2137 /* Flush the real memory from the instruction cache. */ 2138 if ((flags & (PTE_I | PTE_G)) == 0) 2139 __syncicache((void *)va, PAGE_SIZE); 2140 2141 tlb_miss_unlock(); 2142 mtx_unlock_spin(&tlbivax_mutex); 2143 } 2144 2145 /* 2146 * Remove a page from kernel page table. 2147 */ 2148 static void 2149 mmu_booke_kremove(mmu_t mmu, vm_offset_t va) 2150 { 2151 pte_t *pte; 2152 2153 CTR2(KTR_PMAP,"%s: s (va = 0x%"PRI0ptrX")\n", __func__, va); 2154 2155 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 2156 (va <= VM_MAX_KERNEL_ADDRESS)), 2157 ("mmu_booke_kremove: invalid va")); 2158 2159 pte = pte_find(mmu, kernel_pmap, va); 2160 2161 if (!PTE_ISVALID(pte)) { 2162 2163 CTR1(KTR_PMAP, "%s: invalid pte", __func__); 2164 2165 return; 2166 } 2167 2168 mtx_lock_spin(&tlbivax_mutex); 2169 tlb_miss_lock(); 2170 2171 /* Invalidate entry in TLB0, update PTE. */ 2172 tlb0_flush_entry(va); 2173 *pte = 0; 2174 2175 tlb_miss_unlock(); 2176 mtx_unlock_spin(&tlbivax_mutex); 2177 } 2178 2179 /* 2180 * Provide a kernel pointer corresponding to a given userland pointer. 2181 * The returned pointer is valid until the next time this function is 2182 * called in this thread. This is used internally in copyin/copyout. 2183 */ 2184 int 2185 mmu_booke_map_user_ptr(mmu_t mmu, pmap_t pm, volatile const void *uaddr, 2186 void **kaddr, size_t ulen, size_t *klen) 2187 { 2188 2189 if (trunc_page((uintptr_t)uaddr + ulen) > VM_MAXUSER_ADDRESS) 2190 return (EFAULT); 2191 2192 *kaddr = (void *)(uintptr_t)uaddr; 2193 if (klen) 2194 *klen = ulen; 2195 2196 return (0); 2197 } 2198 2199 /* 2200 * Figure out where a given kernel pointer (usually in a fault) points 2201 * to from the VM's perspective, potentially remapping into userland's 2202 * address space. 2203 */ 2204 static int 2205 mmu_booke_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user, 2206 vm_offset_t *decoded_addr) 2207 { 2208 2209 if (trunc_page(addr) <= VM_MAXUSER_ADDRESS) 2210 *is_user = 1; 2211 else 2212 *is_user = 0; 2213 2214 *decoded_addr = addr; 2215 return (0); 2216 } 2217 2218 /* 2219 * Initialize pmap associated with process 0. 2220 */ 2221 static void 2222 mmu_booke_pinit0(mmu_t mmu, pmap_t pmap) 2223 { 2224 2225 PMAP_LOCK_INIT(pmap); 2226 mmu_booke_pinit(mmu, pmap); 2227 PCPU_SET(curpmap, pmap); 2228 } 2229 2230 /* 2231 * Initialize a preallocated and zeroed pmap structure, 2232 * such as one in a vmspace structure. 2233 */ 2234 static void 2235 mmu_booke_pinit(mmu_t mmu, pmap_t pmap) 2236 { 2237 int i; 2238 2239 CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, 2240 curthread->td_proc->p_pid, curthread->td_proc->p_comm); 2241 2242 KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); 2243 2244 for (i = 0; i < MAXCPU; i++) 2245 pmap->pm_tid[i] = TID_NONE; 2246 CPU_ZERO(&kernel_pmap->pm_active); 2247 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 2248 #ifdef __powerpc64__ 2249 pmap->pm_pp2d = uma_zalloc(ptbl_root_zone, M_WAITOK); 2250 bzero(pmap->pm_pp2d, sizeof(pte_t **) * PP2D_NENTRIES); 2251 #else 2252 pmap->pm_pdir = uma_zalloc(ptbl_root_zone, M_WAITOK); 2253 bzero(pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); 2254 TAILQ_INIT(&pmap->pm_ptbl_list); 2255 #endif 2256 } 2257 2258 /* 2259 * Release any resources held by the given physical map. 2260 * Called when a pmap initialized by mmu_booke_pinit is being released. 2261 * Should only be called if the map contains no valid mappings. 2262 */ 2263 static void 2264 mmu_booke_release(mmu_t mmu, pmap_t pmap) 2265 { 2266 2267 KASSERT(pmap->pm_stats.resident_count == 0, 2268 ("pmap_release: pmap resident count %ld != 0", 2269 pmap->pm_stats.resident_count)); 2270 #ifdef __powerpc64__ 2271 uma_zfree(ptbl_root_zone, pmap->pm_pp2d); 2272 #else 2273 uma_zfree(ptbl_root_zone, pmap->pm_pdir); 2274 #endif 2275 } 2276 2277 /* 2278 * Insert the given physical page at the specified virtual address in the 2279 * target physical map with the protection requested. If specified the page 2280 * will be wired down. 2281 */ 2282 static int 2283 mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2284 vm_prot_t prot, u_int flags, int8_t psind) 2285 { 2286 int error; 2287 2288 rw_wlock(&pvh_global_lock); 2289 PMAP_LOCK(pmap); 2290 error = mmu_booke_enter_locked(mmu, pmap, va, m, prot, flags, psind); 2291 PMAP_UNLOCK(pmap); 2292 rw_wunlock(&pvh_global_lock); 2293 return (error); 2294 } 2295 2296 static int 2297 mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2298 vm_prot_t prot, u_int pmap_flags, int8_t psind __unused) 2299 { 2300 pte_t *pte; 2301 vm_paddr_t pa; 2302 uint32_t flags; 2303 int error, su, sync; 2304 2305 pa = VM_PAGE_TO_PHYS(m); 2306 su = (pmap == kernel_pmap); 2307 sync = 0; 2308 2309 //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " 2310 // "pa=0x%08x prot=0x%08x flags=%#x)\n", 2311 // (u_int32_t)pmap, su, pmap->pm_tid, 2312 // (u_int32_t)m, va, pa, prot, flags); 2313 2314 if (su) { 2315 KASSERT(((va >= virtual_avail) && 2316 (va <= VM_MAX_KERNEL_ADDRESS)), 2317 ("mmu_booke_enter_locked: kernel pmap, non kernel va")); 2318 } else { 2319 KASSERT((va <= VM_MAXUSER_ADDRESS), 2320 ("mmu_booke_enter_locked: user pmap, non user va")); 2321 } 2322 if ((m->oflags & VPO_UNMANAGED) == 0) { 2323 if ((pmap_flags & PMAP_ENTER_QUICK_LOCKED) == 0) 2324 VM_PAGE_OBJECT_BUSY_ASSERT(m); 2325 else 2326 VM_OBJECT_ASSERT_LOCKED(m->object); 2327 } 2328 2329 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2330 2331 /* 2332 * If there is an existing mapping, and the physical address has not 2333 * changed, must be protection or wiring change. 2334 */ 2335 if (((pte = pte_find(mmu, pmap, va)) != NULL) && 2336 (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { 2337 2338 /* 2339 * Before actually updating pte->flags we calculate and 2340 * prepare its new value in a helper var. 2341 */ 2342 flags = *pte; 2343 flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); 2344 2345 /* Wiring change, just update stats. */ 2346 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) { 2347 if (!PTE_ISWIRED(pte)) { 2348 flags |= PTE_WIRED; 2349 pmap->pm_stats.wired_count++; 2350 } 2351 } else { 2352 if (PTE_ISWIRED(pte)) { 2353 flags &= ~PTE_WIRED; 2354 pmap->pm_stats.wired_count--; 2355 } 2356 } 2357 2358 if (prot & VM_PROT_WRITE) { 2359 /* Add write permissions. */ 2360 flags |= PTE_SW; 2361 if (!su) 2362 flags |= PTE_UW; 2363 2364 if ((flags & PTE_MANAGED) != 0) 2365 vm_page_aflag_set(m, PGA_WRITEABLE); 2366 } else { 2367 /* Handle modified pages, sense modify status. */ 2368 2369 /* 2370 * The PTE_MODIFIED flag could be set by underlying 2371 * TLB misses since we last read it (above), possibly 2372 * other CPUs could update it so we check in the PTE 2373 * directly rather than rely on that saved local flags 2374 * copy. 2375 */ 2376 if (PTE_ISMODIFIED(pte)) 2377 vm_page_dirty(m); 2378 } 2379 2380 if (prot & VM_PROT_EXECUTE) { 2381 flags |= PTE_SX; 2382 if (!su) 2383 flags |= PTE_UX; 2384 2385 /* 2386 * Check existing flags for execute permissions: if we 2387 * are turning execute permissions on, icache should 2388 * be flushed. 2389 */ 2390 if ((*pte & (PTE_UX | PTE_SX)) == 0) 2391 sync++; 2392 } 2393 2394 flags &= ~PTE_REFERENCED; 2395 2396 /* 2397 * The new flags value is all calculated -- only now actually 2398 * update the PTE. 2399 */ 2400 mtx_lock_spin(&tlbivax_mutex); 2401 tlb_miss_lock(); 2402 2403 tlb0_flush_entry(va); 2404 *pte &= ~PTE_FLAGS_MASK; 2405 *pte |= flags; 2406 2407 tlb_miss_unlock(); 2408 mtx_unlock_spin(&tlbivax_mutex); 2409 2410 } else { 2411 /* 2412 * If there is an existing mapping, but it's for a different 2413 * physical address, pte_enter() will delete the old mapping. 2414 */ 2415 //if ((pte != NULL) && PTE_ISVALID(pte)) 2416 // debugf("mmu_booke_enter_locked: replace\n"); 2417 //else 2418 // debugf("mmu_booke_enter_locked: new\n"); 2419 2420 /* Now set up the flags and install the new mapping. */ 2421 flags = (PTE_SR | PTE_VALID); 2422 flags |= PTE_M; 2423 2424 if (!su) 2425 flags |= PTE_UR; 2426 2427 if (prot & VM_PROT_WRITE) { 2428 flags |= PTE_SW; 2429 if (!su) 2430 flags |= PTE_UW; 2431 2432 if ((m->oflags & VPO_UNMANAGED) == 0) 2433 vm_page_aflag_set(m, PGA_WRITEABLE); 2434 } 2435 2436 if (prot & VM_PROT_EXECUTE) { 2437 flags |= PTE_SX; 2438 if (!su) 2439 flags |= PTE_UX; 2440 } 2441 2442 /* If its wired update stats. */ 2443 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) 2444 flags |= PTE_WIRED; 2445 2446 error = pte_enter(mmu, pmap, m, va, flags, 2447 (pmap_flags & PMAP_ENTER_NOSLEEP) != 0); 2448 if (error != 0) 2449 return (KERN_RESOURCE_SHORTAGE); 2450 2451 if ((flags & PMAP_ENTER_WIRED) != 0) 2452 pmap->pm_stats.wired_count++; 2453 2454 /* Flush the real memory from the instruction cache. */ 2455 if (prot & VM_PROT_EXECUTE) 2456 sync++; 2457 } 2458 2459 if (sync && (su || pmap == PCPU_GET(curpmap))) { 2460 __syncicache((void *)va, PAGE_SIZE); 2461 sync = 0; 2462 } 2463 2464 return (KERN_SUCCESS); 2465 } 2466 2467 /* 2468 * Maps a sequence of resident pages belonging to the same object. 2469 * The sequence begins with the given page m_start. This page is 2470 * mapped at the given virtual address start. Each subsequent page is 2471 * mapped at a virtual address that is offset from start by the same 2472 * amount as the page is offset from m_start within the object. The 2473 * last page in the sequence is the page with the largest offset from 2474 * m_start that can be mapped at a virtual address less than the given 2475 * virtual address end. Not every virtual page between start and end 2476 * is mapped; only those for which a resident page exists with the 2477 * corresponding offset from m_start are mapped. 2478 */ 2479 static void 2480 mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start, 2481 vm_offset_t end, vm_page_t m_start, vm_prot_t prot) 2482 { 2483 vm_page_t m; 2484 vm_pindex_t diff, psize; 2485 2486 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2487 2488 psize = atop(end - start); 2489 m = m_start; 2490 rw_wlock(&pvh_global_lock); 2491 PMAP_LOCK(pmap); 2492 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2493 mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m, 2494 prot & (VM_PROT_READ | VM_PROT_EXECUTE), 2495 PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); 2496 m = TAILQ_NEXT(m, listq); 2497 } 2498 PMAP_UNLOCK(pmap); 2499 rw_wunlock(&pvh_global_lock); 2500 } 2501 2502 static void 2503 mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 2504 vm_prot_t prot) 2505 { 2506 2507 rw_wlock(&pvh_global_lock); 2508 PMAP_LOCK(pmap); 2509 mmu_booke_enter_locked(mmu, pmap, va, m, 2510 prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | 2511 PMAP_ENTER_QUICK_LOCKED, 0); 2512 PMAP_UNLOCK(pmap); 2513 rw_wunlock(&pvh_global_lock); 2514 } 2515 2516 /* 2517 * Remove the given range of addresses from the specified map. 2518 * 2519 * It is assumed that the start and end are properly rounded to the page size. 2520 */ 2521 static void 2522 mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva) 2523 { 2524 pte_t *pte; 2525 uint8_t hold_flag; 2526 2527 int su = (pmap == kernel_pmap); 2528 2529 //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", 2530 // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); 2531 2532 if (su) { 2533 KASSERT(((va >= virtual_avail) && 2534 (va <= VM_MAX_KERNEL_ADDRESS)), 2535 ("mmu_booke_remove: kernel pmap, non kernel va")); 2536 } else { 2537 KASSERT((va <= VM_MAXUSER_ADDRESS), 2538 ("mmu_booke_remove: user pmap, non user va")); 2539 } 2540 2541 if (PMAP_REMOVE_DONE(pmap)) { 2542 //debugf("mmu_booke_remove: e (empty)\n"); 2543 return; 2544 } 2545 2546 hold_flag = PTBL_HOLD_FLAG(pmap); 2547 //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); 2548 2549 rw_wlock(&pvh_global_lock); 2550 PMAP_LOCK(pmap); 2551 for (; va < endva; va += PAGE_SIZE) { 2552 pte = pte_find(mmu, pmap, va); 2553 if ((pte != NULL) && PTE_ISVALID(pte)) 2554 pte_remove(mmu, pmap, va, hold_flag); 2555 } 2556 PMAP_UNLOCK(pmap); 2557 rw_wunlock(&pvh_global_lock); 2558 2559 //debugf("mmu_booke_remove: e\n"); 2560 } 2561 2562 /* 2563 * Remove physical page from all pmaps in which it resides. 2564 */ 2565 static void 2566 mmu_booke_remove_all(mmu_t mmu, vm_page_t m) 2567 { 2568 pv_entry_t pv, pvn; 2569 uint8_t hold_flag; 2570 2571 rw_wlock(&pvh_global_lock); 2572 for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) { 2573 pvn = TAILQ_NEXT(pv, pv_link); 2574 2575 PMAP_LOCK(pv->pv_pmap); 2576 hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); 2577 pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag); 2578 PMAP_UNLOCK(pv->pv_pmap); 2579 } 2580 vm_page_aflag_clear(m, PGA_WRITEABLE); 2581 rw_wunlock(&pvh_global_lock); 2582 } 2583 2584 /* 2585 * Map a range of physical addresses into kernel virtual address space. 2586 */ 2587 static vm_offset_t 2588 mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, 2589 vm_paddr_t pa_end, int prot) 2590 { 2591 vm_offset_t sva = *virt; 2592 vm_offset_t va = sva; 2593 2594 #ifdef __powerpc64__ 2595 /* XXX: Handle memory not starting at 0x0. */ 2596 if (pa_end < ctob(Maxmem)) 2597 return (PHYS_TO_DMAP(pa_start)); 2598 #endif 2599 2600 while (pa_start < pa_end) { 2601 mmu_booke_kenter(mmu, va, pa_start); 2602 va += PAGE_SIZE; 2603 pa_start += PAGE_SIZE; 2604 } 2605 *virt = va; 2606 2607 return (sva); 2608 } 2609 2610 /* 2611 * The pmap must be activated before it's address space can be accessed in any 2612 * way. 2613 */ 2614 static void 2615 mmu_booke_activate(mmu_t mmu, struct thread *td) 2616 { 2617 pmap_t pmap; 2618 u_int cpuid; 2619 2620 pmap = &td->td_proc->p_vmspace->vm_pmap; 2621 2622 CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX")", 2623 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2624 2625 KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); 2626 2627 sched_pin(); 2628 2629 cpuid = PCPU_GET(cpuid); 2630 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 2631 PCPU_SET(curpmap, pmap); 2632 2633 if (pmap->pm_tid[cpuid] == TID_NONE) 2634 tid_alloc(pmap); 2635 2636 /* Load PID0 register with pmap tid value. */ 2637 mtspr(SPR_PID0, pmap->pm_tid[cpuid]); 2638 __asm __volatile("isync"); 2639 2640 mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0); 2641 2642 sched_unpin(); 2643 2644 CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__, 2645 pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm); 2646 } 2647 2648 /* 2649 * Deactivate the specified process's address space. 2650 */ 2651 static void 2652 mmu_booke_deactivate(mmu_t mmu, struct thread *td) 2653 { 2654 pmap_t pmap; 2655 2656 pmap = &td->td_proc->p_vmspace->vm_pmap; 2657 2658 CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX, 2659 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 2660 2661 td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0); 2662 2663 CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active); 2664 PCPU_SET(curpmap, NULL); 2665 } 2666 2667 /* 2668 * Copy the range specified by src_addr/len 2669 * from the source map to the range dst_addr/len 2670 * in the destination map. 2671 * 2672 * This routine is only advisory and need not do anything. 2673 */ 2674 static void 2675 mmu_booke_copy(mmu_t mmu, pmap_t dst_pmap, pmap_t src_pmap, 2676 vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) 2677 { 2678 2679 } 2680 2681 /* 2682 * Set the physical protection on the specified range of this map as requested. 2683 */ 2684 static void 2685 mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva, 2686 vm_prot_t prot) 2687 { 2688 vm_offset_t va; 2689 vm_page_t m; 2690 pte_t *pte; 2691 2692 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2693 mmu_booke_remove(mmu, pmap, sva, eva); 2694 return; 2695 } 2696 2697 if (prot & VM_PROT_WRITE) 2698 return; 2699 2700 PMAP_LOCK(pmap); 2701 for (va = sva; va < eva; va += PAGE_SIZE) { 2702 if ((pte = pte_find(mmu, pmap, va)) != NULL) { 2703 if (PTE_ISVALID(pte)) { 2704 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2705 2706 mtx_lock_spin(&tlbivax_mutex); 2707 tlb_miss_lock(); 2708 2709 /* Handle modified pages. */ 2710 if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) 2711 vm_page_dirty(m); 2712 2713 tlb0_flush_entry(va); 2714 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2715 2716 tlb_miss_unlock(); 2717 mtx_unlock_spin(&tlbivax_mutex); 2718 } 2719 } 2720 } 2721 PMAP_UNLOCK(pmap); 2722 } 2723 2724 /* 2725 * Clear the write and modified bits in each of the given page's mappings. 2726 */ 2727 static void 2728 mmu_booke_remove_write(mmu_t mmu, vm_page_t m) 2729 { 2730 pv_entry_t pv; 2731 pte_t *pte; 2732 2733 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2734 ("mmu_booke_remove_write: page %p is not managed", m)); 2735 vm_page_assert_busied(m); 2736 2737 if (!pmap_page_is_write_mapped(m)) 2738 return; 2739 rw_wlock(&pvh_global_lock); 2740 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2741 PMAP_LOCK(pv->pv_pmap); 2742 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { 2743 if (PTE_ISVALID(pte)) { 2744 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2745 2746 mtx_lock_spin(&tlbivax_mutex); 2747 tlb_miss_lock(); 2748 2749 /* Handle modified pages. */ 2750 if (PTE_ISMODIFIED(pte)) 2751 vm_page_dirty(m); 2752 2753 /* Flush mapping from TLB0. */ 2754 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 2755 2756 tlb_miss_unlock(); 2757 mtx_unlock_spin(&tlbivax_mutex); 2758 } 2759 } 2760 PMAP_UNLOCK(pv->pv_pmap); 2761 } 2762 vm_page_aflag_clear(m, PGA_WRITEABLE); 2763 rw_wunlock(&pvh_global_lock); 2764 } 2765 2766 static void 2767 mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2768 { 2769 pte_t *pte; 2770 vm_paddr_t pa = 0; 2771 int sync_sz, valid; 2772 #ifndef __powerpc64__ 2773 pmap_t pmap; 2774 vm_page_t m; 2775 vm_offset_t addr; 2776 int active; 2777 #endif 2778 2779 #ifndef __powerpc64__ 2780 rw_wlock(&pvh_global_lock); 2781 pmap = PCPU_GET(curpmap); 2782 active = (pm == kernel_pmap || pm == pmap) ? 1 : 0; 2783 #endif 2784 while (sz > 0) { 2785 PMAP_LOCK(pm); 2786 pte = pte_find(mmu, pm, va); 2787 valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; 2788 if (valid) 2789 pa = PTE_PA(pte); 2790 PMAP_UNLOCK(pm); 2791 sync_sz = PAGE_SIZE - (va & PAGE_MASK); 2792 sync_sz = min(sync_sz, sz); 2793 if (valid) { 2794 #ifdef __powerpc64__ 2795 pa += (va & PAGE_MASK); 2796 __syncicache((void *)PHYS_TO_DMAP(pa), sync_sz); 2797 #else 2798 if (!active) { 2799 /* Create a mapping in the active pmap. */ 2800 addr = 0; 2801 m = PHYS_TO_VM_PAGE(pa); 2802 PMAP_LOCK(pmap); 2803 pte_enter(mmu, pmap, m, addr, 2804 PTE_SR | PTE_VALID, FALSE); 2805 addr += (va & PAGE_MASK); 2806 __syncicache((void *)addr, sync_sz); 2807 pte_remove(mmu, pmap, addr, PTBL_UNHOLD); 2808 PMAP_UNLOCK(pmap); 2809 } else 2810 __syncicache((void *)va, sync_sz); 2811 #endif 2812 } 2813 va += sync_sz; 2814 sz -= sync_sz; 2815 } 2816 #ifndef __powerpc64__ 2817 rw_wunlock(&pvh_global_lock); 2818 #endif 2819 } 2820 2821 /* 2822 * Atomically extract and hold the physical page with the given 2823 * pmap and virtual address pair if that mapping permits the given 2824 * protection. 2825 */ 2826 static vm_page_t 2827 mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, 2828 vm_prot_t prot) 2829 { 2830 pte_t *pte; 2831 vm_page_t m; 2832 uint32_t pte_wbit; 2833 2834 m = NULL; 2835 PMAP_LOCK(pmap); 2836 pte = pte_find(mmu, pmap, va); 2837 if ((pte != NULL) && PTE_ISVALID(pte)) { 2838 if (pmap == kernel_pmap) 2839 pte_wbit = PTE_SW; 2840 else 2841 pte_wbit = PTE_UW; 2842 2843 if ((*pte & pte_wbit) != 0 || (prot & VM_PROT_WRITE) == 0) { 2844 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 2845 if (!vm_page_wire_mapped(m)) 2846 m = NULL; 2847 } 2848 } 2849 PMAP_UNLOCK(pmap); 2850 return (m); 2851 } 2852 2853 /* 2854 * Initialize a vm_page's machine-dependent fields. 2855 */ 2856 static void 2857 mmu_booke_page_init(mmu_t mmu, vm_page_t m) 2858 { 2859 2860 m->md.pv_tracked = 0; 2861 TAILQ_INIT(&m->md.pv_list); 2862 } 2863 2864 /* 2865 * mmu_booke_zero_page_area zeros the specified hardware page by 2866 * mapping it into virtual memory and using bzero to clear 2867 * its contents. 2868 * 2869 * off and size must reside within a single page. 2870 */ 2871 static void 2872 mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 2873 { 2874 vm_offset_t va; 2875 2876 /* XXX KASSERT off and size are within a single page? */ 2877 2878 #ifdef __powerpc64__ 2879 va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2880 bzero((caddr_t)va + off, size); 2881 #else 2882 mtx_lock(&zero_page_mutex); 2883 va = zero_page_va; 2884 2885 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2886 bzero((caddr_t)va + off, size); 2887 mmu_booke_kremove(mmu, va); 2888 2889 mtx_unlock(&zero_page_mutex); 2890 #endif 2891 } 2892 2893 /* 2894 * mmu_booke_zero_page zeros the specified hardware page. 2895 */ 2896 static void 2897 mmu_booke_zero_page(mmu_t mmu, vm_page_t m) 2898 { 2899 vm_offset_t off, va; 2900 2901 #ifdef __powerpc64__ 2902 va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2903 2904 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 2905 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 2906 #else 2907 va = zero_page_va; 2908 mtx_lock(&zero_page_mutex); 2909 2910 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); 2911 2912 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 2913 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 2914 2915 mmu_booke_kremove(mmu, va); 2916 2917 mtx_unlock(&zero_page_mutex); 2918 #endif 2919 } 2920 2921 /* 2922 * mmu_booke_copy_page copies the specified (machine independent) page by 2923 * mapping the page into virtual memory and using memcopy to copy the page, 2924 * one machine dependent page at a time. 2925 */ 2926 static void 2927 mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) 2928 { 2929 vm_offset_t sva, dva; 2930 2931 #ifdef __powerpc64__ 2932 sva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(sm)); 2933 dva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dm)); 2934 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); 2935 #else 2936 sva = copy_page_src_va; 2937 dva = copy_page_dst_va; 2938 2939 mtx_lock(©_page_mutex); 2940 mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); 2941 mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); 2942 2943 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); 2944 2945 mmu_booke_kremove(mmu, dva); 2946 mmu_booke_kremove(mmu, sva); 2947 mtx_unlock(©_page_mutex); 2948 #endif 2949 } 2950 2951 static inline void 2952 mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 2953 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 2954 { 2955 void *a_cp, *b_cp; 2956 vm_offset_t a_pg_offset, b_pg_offset; 2957 int cnt; 2958 2959 #ifdef __powerpc64__ 2960 vm_page_t pa, pb; 2961 2962 while (xfersize > 0) { 2963 a_pg_offset = a_offset & PAGE_MASK; 2964 pa = ma[a_offset >> PAGE_SHIFT]; 2965 b_pg_offset = b_offset & PAGE_MASK; 2966 pb = mb[b_offset >> PAGE_SHIFT]; 2967 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2968 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2969 a_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pa)) + 2970 a_pg_offset); 2971 b_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pb)) + 2972 b_pg_offset); 2973 bcopy(a_cp, b_cp, cnt); 2974 a_offset += cnt; 2975 b_offset += cnt; 2976 xfersize -= cnt; 2977 } 2978 #else 2979 mtx_lock(©_page_mutex); 2980 while (xfersize > 0) { 2981 a_pg_offset = a_offset & PAGE_MASK; 2982 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2983 mmu_booke_kenter(mmu, copy_page_src_va, 2984 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); 2985 a_cp = (char *)copy_page_src_va + a_pg_offset; 2986 b_pg_offset = b_offset & PAGE_MASK; 2987 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2988 mmu_booke_kenter(mmu, copy_page_dst_va, 2989 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); 2990 b_cp = (char *)copy_page_dst_va + b_pg_offset; 2991 bcopy(a_cp, b_cp, cnt); 2992 mmu_booke_kremove(mmu, copy_page_dst_va); 2993 mmu_booke_kremove(mmu, copy_page_src_va); 2994 a_offset += cnt; 2995 b_offset += cnt; 2996 xfersize -= cnt; 2997 } 2998 mtx_unlock(©_page_mutex); 2999 #endif 3000 } 3001 3002 static vm_offset_t 3003 mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m) 3004 { 3005 #ifdef __powerpc64__ 3006 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 3007 #else 3008 vm_paddr_t paddr; 3009 vm_offset_t qaddr; 3010 uint32_t flags; 3011 pte_t *pte; 3012 3013 paddr = VM_PAGE_TO_PHYS(m); 3014 3015 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 3016 flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; 3017 flags |= PTE_PS_4KB; 3018 3019 critical_enter(); 3020 qaddr = PCPU_GET(qmap_addr); 3021 3022 pte = pte_find(mmu, kernel_pmap, qaddr); 3023 3024 KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy")); 3025 3026 /* 3027 * XXX: tlbivax is broadcast to other cores, but qaddr should 3028 * not be present in other TLBs. Is there a better instruction 3029 * sequence to use? Or just forget it & use mmu_booke_kenter()... 3030 */ 3031 __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK)); 3032 __asm __volatile("isync; msync"); 3033 3034 *pte = PTE_RPN_FROM_PA(paddr) | flags; 3035 3036 /* Flush the real memory from the instruction cache. */ 3037 if ((flags & (PTE_I | PTE_G)) == 0) 3038 __syncicache((void *)qaddr, PAGE_SIZE); 3039 3040 return (qaddr); 3041 #endif 3042 } 3043 3044 static void 3045 mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr) 3046 { 3047 #ifndef __powerpc64__ 3048 pte_t *pte; 3049 3050 pte = pte_find(mmu, kernel_pmap, addr); 3051 3052 KASSERT(PCPU_GET(qmap_addr) == addr, 3053 ("mmu_booke_quick_remove_page: invalid address")); 3054 KASSERT(*pte != 0, 3055 ("mmu_booke_quick_remove_page: PTE not in use")); 3056 3057 *pte = 0; 3058 critical_exit(); 3059 #endif 3060 } 3061 3062 /* 3063 * Return whether or not the specified physical page was modified 3064 * in any of physical maps. 3065 */ 3066 static boolean_t 3067 mmu_booke_is_modified(mmu_t mmu, vm_page_t m) 3068 { 3069 pte_t *pte; 3070 pv_entry_t pv; 3071 boolean_t rv; 3072 3073 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3074 ("mmu_booke_is_modified: page %p is not managed", m)); 3075 rv = FALSE; 3076 3077 /* 3078 * If the page is not busied then this check is racy. 3079 */ 3080 if (!pmap_page_is_write_mapped(m)) 3081 return (FALSE); 3082 3083 rw_wlock(&pvh_global_lock); 3084 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3085 PMAP_LOCK(pv->pv_pmap); 3086 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3087 PTE_ISVALID(pte)) { 3088 if (PTE_ISMODIFIED(pte)) 3089 rv = TRUE; 3090 } 3091 PMAP_UNLOCK(pv->pv_pmap); 3092 if (rv) 3093 break; 3094 } 3095 rw_wunlock(&pvh_global_lock); 3096 return (rv); 3097 } 3098 3099 /* 3100 * Return whether or not the specified virtual address is eligible 3101 * for prefault. 3102 */ 3103 static boolean_t 3104 mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr) 3105 { 3106 3107 return (FALSE); 3108 } 3109 3110 /* 3111 * Return whether or not the specified physical page was referenced 3112 * in any physical maps. 3113 */ 3114 static boolean_t 3115 mmu_booke_is_referenced(mmu_t mmu, vm_page_t m) 3116 { 3117 pte_t *pte; 3118 pv_entry_t pv; 3119 boolean_t rv; 3120 3121 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3122 ("mmu_booke_is_referenced: page %p is not managed", m)); 3123 rv = FALSE; 3124 rw_wlock(&pvh_global_lock); 3125 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3126 PMAP_LOCK(pv->pv_pmap); 3127 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3128 PTE_ISVALID(pte)) { 3129 if (PTE_ISREFERENCED(pte)) 3130 rv = TRUE; 3131 } 3132 PMAP_UNLOCK(pv->pv_pmap); 3133 if (rv) 3134 break; 3135 } 3136 rw_wunlock(&pvh_global_lock); 3137 return (rv); 3138 } 3139 3140 /* 3141 * Clear the modify bits on the specified physical page. 3142 */ 3143 static void 3144 mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) 3145 { 3146 pte_t *pte; 3147 pv_entry_t pv; 3148 3149 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3150 ("mmu_booke_clear_modify: page %p is not managed", m)); 3151 vm_page_assert_busied(m); 3152 3153 if (!pmap_page_is_write_mapped(m)) 3154 return; 3155 3156 rw_wlock(&pvh_global_lock); 3157 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3158 PMAP_LOCK(pv->pv_pmap); 3159 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3160 PTE_ISVALID(pte)) { 3161 mtx_lock_spin(&tlbivax_mutex); 3162 tlb_miss_lock(); 3163 3164 if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) { 3165 tlb0_flush_entry(pv->pv_va); 3166 *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | 3167 PTE_REFERENCED); 3168 } 3169 3170 tlb_miss_unlock(); 3171 mtx_unlock_spin(&tlbivax_mutex); 3172 } 3173 PMAP_UNLOCK(pv->pv_pmap); 3174 } 3175 rw_wunlock(&pvh_global_lock); 3176 } 3177 3178 /* 3179 * Return a count of reference bits for a page, clearing those bits. 3180 * It is not necessary for every reference bit to be cleared, but it 3181 * is necessary that 0 only be returned when there are truly no 3182 * reference bits set. 3183 * 3184 * As an optimization, update the page's dirty field if a modified bit is 3185 * found while counting reference bits. This opportunistic update can be 3186 * performed at low cost and can eliminate the need for some future calls 3187 * to pmap_is_modified(). However, since this function stops after 3188 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 3189 * dirty pages. Those dirty pages will only be detected by a future call 3190 * to pmap_is_modified(). 3191 */ 3192 static int 3193 mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m) 3194 { 3195 pte_t *pte; 3196 pv_entry_t pv; 3197 int count; 3198 3199 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3200 ("mmu_booke_ts_referenced: page %p is not managed", m)); 3201 count = 0; 3202 rw_wlock(&pvh_global_lock); 3203 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3204 PMAP_LOCK(pv->pv_pmap); 3205 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && 3206 PTE_ISVALID(pte)) { 3207 if (PTE_ISMODIFIED(pte)) 3208 vm_page_dirty(m); 3209 if (PTE_ISREFERENCED(pte)) { 3210 mtx_lock_spin(&tlbivax_mutex); 3211 tlb_miss_lock(); 3212 3213 tlb0_flush_entry(pv->pv_va); 3214 *pte &= ~PTE_REFERENCED; 3215 3216 tlb_miss_unlock(); 3217 mtx_unlock_spin(&tlbivax_mutex); 3218 3219 if (++count >= PMAP_TS_REFERENCED_MAX) { 3220 PMAP_UNLOCK(pv->pv_pmap); 3221 break; 3222 } 3223 } 3224 } 3225 PMAP_UNLOCK(pv->pv_pmap); 3226 } 3227 rw_wunlock(&pvh_global_lock); 3228 return (count); 3229 } 3230 3231 /* 3232 * Clear the wired attribute from the mappings for the specified range of 3233 * addresses in the given pmap. Every valid mapping within that range must 3234 * have the wired attribute set. In contrast, invalid mappings cannot have 3235 * the wired attribute set, so they are ignored. 3236 * 3237 * The wired attribute of the page table entry is not a hardware feature, so 3238 * there is no need to invalidate any TLB entries. 3239 */ 3240 static void 3241 mmu_booke_unwire(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3242 { 3243 vm_offset_t va; 3244 pte_t *pte; 3245 3246 PMAP_LOCK(pmap); 3247 for (va = sva; va < eva; va += PAGE_SIZE) { 3248 if ((pte = pte_find(mmu, pmap, va)) != NULL && 3249 PTE_ISVALID(pte)) { 3250 if (!PTE_ISWIRED(pte)) 3251 panic("mmu_booke_unwire: pte %p isn't wired", 3252 pte); 3253 *pte &= ~PTE_WIRED; 3254 pmap->pm_stats.wired_count--; 3255 } 3256 } 3257 PMAP_UNLOCK(pmap); 3258 3259 } 3260 3261 /* 3262 * Return true if the pmap's pv is one of the first 16 pvs linked to from this 3263 * page. This count may be changed upwards or downwards in the future; it is 3264 * only necessary that true be returned for a small subset of pmaps for proper 3265 * page aging. 3266 */ 3267 static boolean_t 3268 mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 3269 { 3270 pv_entry_t pv; 3271 int loops; 3272 boolean_t rv; 3273 3274 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3275 ("mmu_booke_page_exists_quick: page %p is not managed", m)); 3276 loops = 0; 3277 rv = FALSE; 3278 rw_wlock(&pvh_global_lock); 3279 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3280 if (pv->pv_pmap == pmap) { 3281 rv = TRUE; 3282 break; 3283 } 3284 if (++loops >= 16) 3285 break; 3286 } 3287 rw_wunlock(&pvh_global_lock); 3288 return (rv); 3289 } 3290 3291 /* 3292 * Return the number of managed mappings to the given physical page that are 3293 * wired. 3294 */ 3295 static int 3296 mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m) 3297 { 3298 pv_entry_t pv; 3299 pte_t *pte; 3300 int count = 0; 3301 3302 if ((m->oflags & VPO_UNMANAGED) != 0) 3303 return (count); 3304 rw_wlock(&pvh_global_lock); 3305 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 3306 PMAP_LOCK(pv->pv_pmap); 3307 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) 3308 if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) 3309 count++; 3310 PMAP_UNLOCK(pv->pv_pmap); 3311 } 3312 rw_wunlock(&pvh_global_lock); 3313 return (count); 3314 } 3315 3316 static int 3317 mmu_booke_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3318 { 3319 int i; 3320 vm_offset_t va; 3321 3322 /* 3323 * This currently does not work for entries that 3324 * overlap TLB1 entries. 3325 */ 3326 for (i = 0; i < TLB1_ENTRIES; i ++) { 3327 if (tlb1_iomapped(i, pa, size, &va) == 0) 3328 return (0); 3329 } 3330 3331 return (EFAULT); 3332 } 3333 3334 void 3335 mmu_booke_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) 3336 { 3337 vm_paddr_t ppa; 3338 vm_offset_t ofs; 3339 vm_size_t gran; 3340 3341 /* Minidumps are based on virtual memory addresses. */ 3342 if (do_minidump) { 3343 *va = (void *)(vm_offset_t)pa; 3344 return; 3345 } 3346 3347 /* Raw physical memory dumps don't have a virtual address. */ 3348 /* We always map a 256MB page at 256M. */ 3349 gran = 256 * 1024 * 1024; 3350 ppa = rounddown2(pa, gran); 3351 ofs = pa - ppa; 3352 *va = (void *)gran; 3353 tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO); 3354 3355 if (sz > (gran - ofs)) 3356 tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran, 3357 _TLB_ENTRY_IO); 3358 } 3359 3360 void 3361 mmu_booke_dumpsys_unmap(mmu_t mmu, vm_paddr_t pa, size_t sz, void *va) 3362 { 3363 vm_paddr_t ppa; 3364 vm_offset_t ofs; 3365 vm_size_t gran; 3366 tlb_entry_t e; 3367 int i; 3368 3369 /* Minidumps are based on virtual memory addresses. */ 3370 /* Nothing to do... */ 3371 if (do_minidump) 3372 return; 3373 3374 for (i = 0; i < TLB1_ENTRIES; i++) { 3375 tlb1_read_entry(&e, i); 3376 if (!(e.mas1 & MAS1_VALID)) 3377 break; 3378 } 3379 3380 /* Raw physical memory dumps don't have a virtual address. */ 3381 i--; 3382 e.mas1 = 0; 3383 e.mas2 = 0; 3384 e.mas3 = 0; 3385 tlb1_write_entry(&e, i); 3386 3387 gran = 256 * 1024 * 1024; 3388 ppa = rounddown2(pa, gran); 3389 ofs = pa - ppa; 3390 if (sz > (gran - ofs)) { 3391 i--; 3392 e.mas1 = 0; 3393 e.mas2 = 0; 3394 e.mas3 = 0; 3395 tlb1_write_entry(&e, i); 3396 } 3397 } 3398 3399 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; 3400 3401 void 3402 mmu_booke_scan_init(mmu_t mmu) 3403 { 3404 vm_offset_t va; 3405 pte_t *pte; 3406 int i; 3407 3408 if (!do_minidump) { 3409 /* Initialize phys. segments for dumpsys(). */ 3410 memset(&dump_map, 0, sizeof(dump_map)); 3411 mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, 3412 &availmem_regions_sz); 3413 for (i = 0; i < physmem_regions_sz; i++) { 3414 dump_map[i].pa_start = physmem_regions[i].mr_start; 3415 dump_map[i].pa_size = physmem_regions[i].mr_size; 3416 } 3417 return; 3418 } 3419 3420 /* Virtual segments for minidumps: */ 3421 memset(&dump_map, 0, sizeof(dump_map)); 3422 3423 /* 1st: kernel .data and .bss. */ 3424 dump_map[0].pa_start = trunc_page((uintptr_t)_etext); 3425 dump_map[0].pa_size = 3426 round_page((uintptr_t)_end) - dump_map[0].pa_start; 3427 3428 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ 3429 dump_map[1].pa_start = data_start; 3430 dump_map[1].pa_size = data_end - data_start; 3431 3432 /* 3rd: kernel VM. */ 3433 va = dump_map[1].pa_start + dump_map[1].pa_size; 3434 /* Find start of next chunk (from va). */ 3435 while (va < virtual_end) { 3436 /* Don't dump the buffer cache. */ 3437 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { 3438 va = kmi.buffer_eva; 3439 continue; 3440 } 3441 pte = pte_find(mmu, kernel_pmap, va); 3442 if (pte != NULL && PTE_ISVALID(pte)) 3443 break; 3444 va += PAGE_SIZE; 3445 } 3446 if (va < virtual_end) { 3447 dump_map[2].pa_start = va; 3448 va += PAGE_SIZE; 3449 /* Find last page in chunk. */ 3450 while (va < virtual_end) { 3451 /* Don't run into the buffer cache. */ 3452 if (va == kmi.buffer_sva) 3453 break; 3454 pte = pte_find(mmu, kernel_pmap, va); 3455 if (pte == NULL || !PTE_ISVALID(pte)) 3456 break; 3457 va += PAGE_SIZE; 3458 } 3459 dump_map[2].pa_size = va - dump_map[2].pa_start; 3460 } 3461 } 3462 3463 /* 3464 * Map a set of physical memory pages into the kernel virtual address space. 3465 * Return a pointer to where it is mapped. This routine is intended to be used 3466 * for mapping device memory, NOT real memory. 3467 */ 3468 static void * 3469 mmu_booke_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 3470 { 3471 3472 return (mmu_booke_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT)); 3473 } 3474 3475 static int 3476 tlb1_find_pa(vm_paddr_t pa, tlb_entry_t *e) 3477 { 3478 int i; 3479 3480 for (i = 0; i < TLB1_ENTRIES; i++) { 3481 tlb1_read_entry(e, i); 3482 if ((e->mas1 & MAS1_VALID) == 0) 3483 return (i); 3484 } 3485 return (-1); 3486 } 3487 3488 static void * 3489 mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) 3490 { 3491 tlb_entry_t e; 3492 vm_paddr_t tmppa; 3493 #ifndef __powerpc64__ 3494 uintptr_t tmpva; 3495 #endif 3496 uintptr_t va; 3497 vm_size_t sz; 3498 int i; 3499 int wimge; 3500 3501 /* 3502 * Check if this is premapped in TLB1. 3503 */ 3504 sz = size; 3505 tmppa = pa; 3506 va = ~0; 3507 wimge = tlb_calc_wimg(pa, ma); 3508 for (i = 0; i < TLB1_ENTRIES; i++) { 3509 tlb1_read_entry(&e, i); 3510 if (!(e.mas1 & MAS1_VALID)) 3511 continue; 3512 if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED))) 3513 continue; 3514 if (tmppa >= e.phys && tmppa < e.phys + e.size) { 3515 va = e.virt + (pa - e.phys); 3516 tmppa = e.phys + e.size; 3517 sz -= MIN(sz, e.size); 3518 while (sz > 0 && (i = tlb1_find_pa(tmppa, &e)) != -1) { 3519 if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED))) 3520 break; 3521 sz -= MIN(sz, e.size); 3522 tmppa = e.phys + e.size; 3523 } 3524 if (sz != 0) 3525 break; 3526 return ((void *)va); 3527 } 3528 } 3529 3530 size = roundup(size, PAGE_SIZE); 3531 3532 #ifdef __powerpc64__ 3533 KASSERT(pa < VM_MAPDEV_PA_MAX, 3534 ("Unsupported physical address! %lx", pa)); 3535 va = VM_MAPDEV_BASE + pa; 3536 #else 3537 /* 3538 * The device mapping area is between VM_MAXUSER_ADDRESS and 3539 * VM_MIN_KERNEL_ADDRESS. This gives 1GB of device addressing. 3540 */ 3541 #ifdef SPARSE_MAPDEV 3542 /* 3543 * With a sparse mapdev, align to the largest starting region. This 3544 * could feasibly be optimized for a 'best-fit' alignment, but that 3545 * calculation could be very costly. 3546 * Align to the smaller of: 3547 * - first set bit in overlap of (pa & size mask) 3548 * - largest size envelope 3549 * 3550 * It's possible the device mapping may start at a PA that's not larger 3551 * than the size mask, so we need to offset in to maximize the TLB entry 3552 * range and minimize the number of used TLB entries. 3553 */ 3554 do { 3555 tmpva = tlb1_map_base; 3556 sz = ffsl((~((1 << flsl(size-1)) - 1)) & pa); 3557 sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1; 3558 va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa); 3559 } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size)); 3560 va = atomic_fetchadd_int(&tlb1_map_base, size); 3561 #endif 3562 #endif 3563 3564 if (tlb1_mapin_region(va, pa, size, tlb_calc_wimg(pa, ma)) != size) 3565 return (NULL); 3566 3567 return ((void *)va); 3568 } 3569 3570 /* 3571 * 'Unmap' a range mapped by mmu_booke_mapdev(). 3572 */ 3573 static void 3574 mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 3575 { 3576 #ifdef SUPPORTS_SHRINKING_TLB1 3577 vm_offset_t base, offset; 3578 3579 /* 3580 * Unmap only if this is inside kernel virtual space. 3581 */ 3582 if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { 3583 base = trunc_page(va); 3584 offset = va & PAGE_MASK; 3585 size = roundup(offset + size, PAGE_SIZE); 3586 kva_free(base, size); 3587 } 3588 #endif 3589 } 3590 3591 /* 3592 * mmu_booke_object_init_pt preloads the ptes for a given object into the 3593 * specified pmap. This eliminates the blast of soft faults on process startup 3594 * and immediately after an mmap. 3595 */ 3596 static void 3597 mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3598 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 3599 { 3600 3601 VM_OBJECT_ASSERT_WLOCKED(object); 3602 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3603 ("mmu_booke_object_init_pt: non-device object")); 3604 } 3605 3606 /* 3607 * Perform the pmap work for mincore. 3608 */ 3609 static int 3610 mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr, 3611 vm_paddr_t *pap) 3612 { 3613 3614 /* XXX: this should be implemented at some point */ 3615 return (0); 3616 } 3617 3618 static int 3619 mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, vm_size_t sz, 3620 vm_memattr_t mode) 3621 { 3622 vm_offset_t va; 3623 pte_t *pte; 3624 int i, j; 3625 tlb_entry_t e; 3626 3627 addr = trunc_page(addr); 3628 3629 /* Only allow changes to mapped kernel addresses. This includes: 3630 * - KVA 3631 * - DMAP (powerpc64) 3632 * - Device mappings 3633 */ 3634 if (addr <= VM_MAXUSER_ADDRESS || 3635 #ifdef __powerpc64__ 3636 (addr >= tlb1_map_base && addr < DMAP_BASE_ADDRESS) || 3637 (addr > DMAP_MAX_ADDRESS && addr < VM_MIN_KERNEL_ADDRESS) || 3638 #else 3639 (addr >= tlb1_map_base && addr < VM_MIN_KERNEL_ADDRESS) || 3640 #endif 3641 (addr > VM_MAX_KERNEL_ADDRESS)) 3642 return (EINVAL); 3643 3644 /* Check TLB1 mappings */ 3645 for (i = 0; i < TLB1_ENTRIES; i++) { 3646 tlb1_read_entry(&e, i); 3647 if (!(e.mas1 & MAS1_VALID)) 3648 continue; 3649 if (addr >= e.virt && addr < e.virt + e.size) 3650 break; 3651 } 3652 if (i < TLB1_ENTRIES) { 3653 /* Only allow full mappings to be modified for now. */ 3654 /* Validate the range. */ 3655 for (j = i, va = addr; va < addr + sz; va += e.size, j++) { 3656 tlb1_read_entry(&e, j); 3657 if (va != e.virt || (sz - (va - addr) < e.size)) 3658 return (EINVAL); 3659 } 3660 for (va = addr; va < addr + sz; va += e.size, i++) { 3661 tlb1_read_entry(&e, i); 3662 e.mas2 &= ~MAS2_WIMGE_MASK; 3663 e.mas2 |= tlb_calc_wimg(e.phys, mode); 3664 3665 /* 3666 * Write it out to the TLB. Should really re-sync with other 3667 * cores. 3668 */ 3669 tlb1_write_entry(&e, i); 3670 } 3671 return (0); 3672 } 3673 3674 /* Not in TLB1, try through pmap */ 3675 /* First validate the range. */ 3676 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3677 pte = pte_find(mmu, kernel_pmap, va); 3678 if (pte == NULL || !PTE_ISVALID(pte)) 3679 return (EINVAL); 3680 } 3681 3682 mtx_lock_spin(&tlbivax_mutex); 3683 tlb_miss_lock(); 3684 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 3685 pte = pte_find(mmu, kernel_pmap, va); 3686 *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT); 3687 *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT; 3688 tlb0_flush_entry(va); 3689 } 3690 tlb_miss_unlock(); 3691 mtx_unlock_spin(&tlbivax_mutex); 3692 3693 return (0); 3694 } 3695 3696 static void 3697 mmu_booke_page_array_startup(mmu_t mmu, long pages) 3698 { 3699 vm_page_array_size = pages; 3700 } 3701 3702 /**************************************************************************/ 3703 /* TID handling */ 3704 /**************************************************************************/ 3705 3706 /* 3707 * Allocate a TID. If necessary, steal one from someone else. 3708 * The new TID is flushed from the TLB before returning. 3709 */ 3710 static tlbtid_t 3711 tid_alloc(pmap_t pmap) 3712 { 3713 tlbtid_t tid; 3714 int thiscpu; 3715 3716 KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); 3717 3718 CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap); 3719 3720 thiscpu = PCPU_GET(cpuid); 3721 3722 tid = PCPU_GET(booke.tid_next); 3723 if (tid > TID_MAX) 3724 tid = TID_MIN; 3725 PCPU_SET(booke.tid_next, tid + 1); 3726 3727 /* If we are stealing TID then clear the relevant pmap's field */ 3728 if (tidbusy[thiscpu][tid] != NULL) { 3729 3730 CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid); 3731 3732 tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE; 3733 3734 /* Flush all entries from TLB0 matching this TID. */ 3735 tid_flush(tid); 3736 } 3737 3738 tidbusy[thiscpu][tid] = pmap; 3739 pmap->pm_tid[thiscpu] = tid; 3740 __asm __volatile("msync; isync"); 3741 3742 CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid, 3743 PCPU_GET(booke.tid_next)); 3744 3745 return (tid); 3746 } 3747 3748 /**************************************************************************/ 3749 /* TLB0 handling */ 3750 /**************************************************************************/ 3751 3752 /* Convert TLB0 va and way number to tlb0[] table index. */ 3753 static inline unsigned int 3754 tlb0_tableidx(vm_offset_t va, unsigned int way) 3755 { 3756 unsigned int idx; 3757 3758 idx = (way * TLB0_ENTRIES_PER_WAY); 3759 idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; 3760 return (idx); 3761 } 3762 3763 /* 3764 * Invalidate TLB0 entry. 3765 */ 3766 static inline void 3767 tlb0_flush_entry(vm_offset_t va) 3768 { 3769 3770 CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va); 3771 3772 mtx_assert(&tlbivax_mutex, MA_OWNED); 3773 3774 __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK)); 3775 __asm __volatile("isync; msync"); 3776 __asm __volatile("tlbsync; msync"); 3777 3778 CTR1(KTR_PMAP, "%s: e", __func__); 3779 } 3780 3781 3782 /**************************************************************************/ 3783 /* TLB1 handling */ 3784 /**************************************************************************/ 3785 3786 /* 3787 * TLB1 mapping notes: 3788 * 3789 * TLB1[0] Kernel text and data. 3790 * TLB1[1-15] Additional kernel text and data mappings (if required), PCI 3791 * windows, other devices mappings. 3792 */ 3793 3794 /* 3795 * Read an entry from given TLB1 slot. 3796 */ 3797 void 3798 tlb1_read_entry(tlb_entry_t *entry, unsigned int slot) 3799 { 3800 register_t msr; 3801 uint32_t mas0; 3802 3803 KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__)); 3804 3805 msr = mfmsr(); 3806 __asm __volatile("wrteei 0"); 3807 3808 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot); 3809 mtspr(SPR_MAS0, mas0); 3810 __asm __volatile("isync; tlbre"); 3811 3812 entry->mas1 = mfspr(SPR_MAS1); 3813 entry->mas2 = mfspr(SPR_MAS2); 3814 entry->mas3 = mfspr(SPR_MAS3); 3815 3816 switch ((mfpvr() >> 16) & 0xFFFF) { 3817 case FSL_E500v2: 3818 case FSL_E500mc: 3819 case FSL_E5500: 3820 case FSL_E6500: 3821 entry->mas7 = mfspr(SPR_MAS7); 3822 break; 3823 default: 3824 entry->mas7 = 0; 3825 break; 3826 } 3827 __asm __volatile("wrtee %0" :: "r"(msr)); 3828 3829 entry->virt = entry->mas2 & MAS2_EPN_MASK; 3830 entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) | 3831 (entry->mas3 & MAS3_RPN); 3832 entry->size = 3833 tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT); 3834 } 3835 3836 struct tlbwrite_args { 3837 tlb_entry_t *e; 3838 unsigned int idx; 3839 }; 3840 3841 static uint32_t 3842 tlb1_find_free(void) 3843 { 3844 tlb_entry_t e; 3845 int i; 3846 3847 for (i = 0; i < TLB1_ENTRIES; i++) { 3848 tlb1_read_entry(&e, i); 3849 if ((e.mas1 & MAS1_VALID) == 0) 3850 return (i); 3851 } 3852 return (-1); 3853 } 3854 3855 static void 3856 tlb1_write_entry_int(void *arg) 3857 { 3858 struct tlbwrite_args *args = arg; 3859 uint32_t idx, mas0; 3860 3861 idx = args->idx; 3862 if (idx == -1) { 3863 idx = tlb1_find_free(); 3864 if (idx == -1) 3865 panic("No free TLB1 entries!\n"); 3866 } 3867 /* Select entry */ 3868 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx); 3869 3870 mtspr(SPR_MAS0, mas0); 3871 mtspr(SPR_MAS1, args->e->mas1); 3872 mtspr(SPR_MAS2, args->e->mas2); 3873 mtspr(SPR_MAS3, args->e->mas3); 3874 switch ((mfpvr() >> 16) & 0xFFFF) { 3875 case FSL_E500mc: 3876 case FSL_E5500: 3877 case FSL_E6500: 3878 mtspr(SPR_MAS8, 0); 3879 /* FALLTHROUGH */ 3880 case FSL_E500v2: 3881 mtspr(SPR_MAS7, args->e->mas7); 3882 break; 3883 default: 3884 break; 3885 } 3886 3887 __asm __volatile("isync; tlbwe; isync; msync"); 3888 3889 } 3890 3891 static void 3892 tlb1_write_entry_sync(void *arg) 3893 { 3894 /* Empty synchronization point for smp_rendezvous(). */ 3895 } 3896 3897 /* 3898 * Write given entry to TLB1 hardware. 3899 */ 3900 static void 3901 tlb1_write_entry(tlb_entry_t *e, unsigned int idx) 3902 { 3903 struct tlbwrite_args args; 3904 3905 args.e = e; 3906 args.idx = idx; 3907 3908 #ifdef SMP 3909 if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) { 3910 mb(); 3911 smp_rendezvous(tlb1_write_entry_sync, 3912 tlb1_write_entry_int, 3913 tlb1_write_entry_sync, &args); 3914 } else 3915 #endif 3916 { 3917 register_t msr; 3918 3919 msr = mfmsr(); 3920 __asm __volatile("wrteei 0"); 3921 tlb1_write_entry_int(&args); 3922 __asm __volatile("wrtee %0" :: "r"(msr)); 3923 } 3924 } 3925 3926 /* 3927 * Return the largest uint value log such that 2^log <= num. 3928 */ 3929 static unsigned long 3930 ilog2(unsigned long num) 3931 { 3932 long lz; 3933 3934 #ifdef __powerpc64__ 3935 __asm ("cntlzd %0, %1" : "=r" (lz) : "r" (num)); 3936 return (63 - lz); 3937 #else 3938 __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); 3939 return (31 - lz); 3940 #endif 3941 } 3942 3943 /* 3944 * Convert TLB TSIZE value to mapped region size. 3945 */ 3946 static vm_size_t 3947 tsize2size(unsigned int tsize) 3948 { 3949 3950 /* 3951 * size = 4^tsize KB 3952 * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) 3953 */ 3954 3955 return ((1 << (2 * tsize)) * 1024); 3956 } 3957 3958 /* 3959 * Convert region size (must be power of 4) to TLB TSIZE value. 3960 */ 3961 static unsigned int 3962 size2tsize(vm_size_t size) 3963 { 3964 3965 return (ilog2(size) / 2 - 5); 3966 } 3967 3968 /* 3969 * Register permanent kernel mapping in TLB1. 3970 * 3971 * Entries are created starting from index 0 (current free entry is 3972 * kept in tlb1_idx) and are not supposed to be invalidated. 3973 */ 3974 int 3975 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, 3976 uint32_t flags) 3977 { 3978 tlb_entry_t e; 3979 uint32_t ts, tid; 3980 int tsize, index; 3981 3982 /* First try to update an existing entry. */ 3983 for (index = 0; index < TLB1_ENTRIES; index++) { 3984 tlb1_read_entry(&e, index); 3985 /* Check if we're just updating the flags, and update them. */ 3986 if (e.phys == pa && e.virt == va && e.size == size) { 3987 e.mas2 = (va & MAS2_EPN_MASK) | flags; 3988 tlb1_write_entry(&e, index); 3989 return (0); 3990 } 3991 } 3992 3993 /* Convert size to TSIZE */ 3994 tsize = size2tsize(size); 3995 3996 tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK; 3997 /* XXX TS is hard coded to 0 for now as we only use single address space */ 3998 ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK; 3999 4000 e.phys = pa; 4001 e.virt = va; 4002 e.size = size; 4003 e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; 4004 e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); 4005 e.mas2 = (va & MAS2_EPN_MASK) | flags; 4006 4007 /* Set supervisor RWX permission bits */ 4008 e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; 4009 e.mas7 = (pa >> 32) & MAS7_RPN; 4010 4011 tlb1_write_entry(&e, -1); 4012 4013 return (0); 4014 } 4015 4016 /* 4017 * Map in contiguous RAM region into the TLB1. 4018 */ 4019 static vm_size_t 4020 tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size, int wimge) 4021 { 4022 vm_offset_t base; 4023 vm_size_t mapped, sz, ssize; 4024 4025 mapped = 0; 4026 base = va; 4027 ssize = size; 4028 4029 while (size > 0) { 4030 sz = 1UL << (ilog2(size) & ~1); 4031 /* Align size to PA */ 4032 if (pa % sz != 0) { 4033 do { 4034 sz >>= 2; 4035 } while (pa % sz != 0); 4036 } 4037 /* Now align from there to VA */ 4038 if (va % sz != 0) { 4039 do { 4040 sz >>= 2; 4041 } while (va % sz != 0); 4042 } 4043 #ifdef __powerpc64__ 4044 /* 4045 * Clamp TLB1 entries to 4G. 4046 * 4047 * While the e6500 supports up to 1TB mappings, the e5500 4048 * only supports up to 4G mappings. (0b1011) 4049 * 4050 * If any e6500 machines capable of supporting a very 4051 * large amount of memory appear in the future, we can 4052 * revisit this. 4053 * 4054 * For now, though, since we have plenty of space in TLB1, 4055 * always avoid creating entries larger than 4GB. 4056 */ 4057 sz = MIN(sz, 1UL << 32); 4058 #endif 4059 if (bootverbose) 4060 printf("Wiring VA=%p to PA=%jx (size=%lx)\n", 4061 (void *)va, (uintmax_t)pa, (long)sz); 4062 if (tlb1_set_entry(va, pa, sz, 4063 _TLB_ENTRY_SHARED | wimge) < 0) 4064 return (mapped); 4065 size -= sz; 4066 pa += sz; 4067 va += sz; 4068 } 4069 4070 mapped = (va - base); 4071 if (bootverbose) 4072 printf("mapped size 0x%"PRIxPTR" (wasted space 0x%"PRIxPTR")\n", 4073 mapped, mapped - ssize); 4074 4075 return (mapped); 4076 } 4077 4078 /* 4079 * TLB1 initialization routine, to be called after the very first 4080 * assembler level setup done in locore.S. 4081 */ 4082 void 4083 tlb1_init() 4084 { 4085 vm_offset_t mas2; 4086 uint32_t mas0, mas1, mas3, mas7; 4087 uint32_t tsz; 4088 4089 tlb1_get_tlbconf(); 4090 4091 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0); 4092 mtspr(SPR_MAS0, mas0); 4093 __asm __volatile("isync; tlbre"); 4094 4095 mas1 = mfspr(SPR_MAS1); 4096 mas2 = mfspr(SPR_MAS2); 4097 mas3 = mfspr(SPR_MAS3); 4098 mas7 = mfspr(SPR_MAS7); 4099 4100 kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | 4101 (mas3 & MAS3_RPN); 4102 4103 tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4104 kernsize += (tsz > 0) ? tsize2size(tsz) : 0; 4105 kernstart = trunc_page(mas2); 4106 4107 /* Setup TLB miss defaults */ 4108 set_mas4_defaults(); 4109 } 4110 4111 /* 4112 * pmap_early_io_unmap() should be used in short conjunction with 4113 * pmap_early_io_map(), as in the following snippet: 4114 * 4115 * x = pmap_early_io_map(...); 4116 * <do something with x> 4117 * pmap_early_io_unmap(x, size); 4118 * 4119 * And avoiding more allocations between. 4120 */ 4121 void 4122 pmap_early_io_unmap(vm_offset_t va, vm_size_t size) 4123 { 4124 int i; 4125 tlb_entry_t e; 4126 vm_size_t isize; 4127 4128 size = roundup(size, PAGE_SIZE); 4129 isize = size; 4130 for (i = 0; i < TLB1_ENTRIES && size > 0; i++) { 4131 tlb1_read_entry(&e, i); 4132 if (!(e.mas1 & MAS1_VALID)) 4133 continue; 4134 if (va <= e.virt && (va + isize) >= (e.virt + e.size)) { 4135 size -= e.size; 4136 e.mas1 &= ~MAS1_VALID; 4137 tlb1_write_entry(&e, i); 4138 } 4139 } 4140 if (tlb1_map_base == va + isize) 4141 tlb1_map_base -= isize; 4142 } 4143 4144 vm_offset_t 4145 pmap_early_io_map(vm_paddr_t pa, vm_size_t size) 4146 { 4147 vm_paddr_t pa_base; 4148 vm_offset_t va, sz; 4149 int i; 4150 tlb_entry_t e; 4151 4152 KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!")); 4153 4154 for (i = 0; i < TLB1_ENTRIES; i++) { 4155 tlb1_read_entry(&e, i); 4156 if (!(e.mas1 & MAS1_VALID)) 4157 continue; 4158 if (pa >= e.phys && (pa + size) <= 4159 (e.phys + e.size)) 4160 return (e.virt + (pa - e.phys)); 4161 } 4162 4163 pa_base = rounddown(pa, PAGE_SIZE); 4164 size = roundup(size + (pa - pa_base), PAGE_SIZE); 4165 tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1)); 4166 va = tlb1_map_base + (pa - pa_base); 4167 4168 do { 4169 sz = 1 << (ilog2(size) & ~1); 4170 tlb1_set_entry(tlb1_map_base, pa_base, sz, 4171 _TLB_ENTRY_SHARED | _TLB_ENTRY_IO); 4172 size -= sz; 4173 pa_base += sz; 4174 tlb1_map_base += sz; 4175 } while (size > 0); 4176 4177 return (va); 4178 } 4179 4180 void 4181 pmap_track_page(pmap_t pmap, vm_offset_t va) 4182 { 4183 vm_paddr_t pa; 4184 vm_page_t page; 4185 struct pv_entry *pve; 4186 4187 va = trunc_page(va); 4188 pa = pmap_kextract(va); 4189 page = PHYS_TO_VM_PAGE(pa); 4190 4191 rw_wlock(&pvh_global_lock); 4192 PMAP_LOCK(pmap); 4193 4194 TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) { 4195 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 4196 goto out; 4197 } 4198 } 4199 page->md.pv_tracked = true; 4200 pv_insert(pmap, va, page); 4201 out: 4202 PMAP_UNLOCK(pmap); 4203 rw_wunlock(&pvh_global_lock); 4204 } 4205 4206 4207 /* 4208 * Setup MAS4 defaults. 4209 * These values are loaded to MAS0-2 on a TLB miss. 4210 */ 4211 static void 4212 set_mas4_defaults(void) 4213 { 4214 uint32_t mas4; 4215 4216 /* Defaults: TLB0, PID0, TSIZED=4K */ 4217 mas4 = MAS4_TLBSELD0; 4218 mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; 4219 #ifdef SMP 4220 mas4 |= MAS4_MD; 4221 #endif 4222 mtspr(SPR_MAS4, mas4); 4223 __asm __volatile("isync"); 4224 } 4225 4226 4227 /* 4228 * Return 0 if the physical IO range is encompassed by one of the 4229 * the TLB1 entries, otherwise return related error code. 4230 */ 4231 static int 4232 tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) 4233 { 4234 uint32_t prot; 4235 vm_paddr_t pa_start; 4236 vm_paddr_t pa_end; 4237 unsigned int entry_tsize; 4238 vm_size_t entry_size; 4239 tlb_entry_t e; 4240 4241 *va = (vm_offset_t)NULL; 4242 4243 tlb1_read_entry(&e, i); 4244 /* Skip invalid entries */ 4245 if (!(e.mas1 & MAS1_VALID)) 4246 return (EINVAL); 4247 4248 /* 4249 * The entry must be cache-inhibited, guarded, and r/w 4250 * so it can function as an i/o page 4251 */ 4252 prot = e.mas2 & (MAS2_I | MAS2_G); 4253 if (prot != (MAS2_I | MAS2_G)) 4254 return (EPERM); 4255 4256 prot = e.mas3 & (MAS3_SR | MAS3_SW); 4257 if (prot != (MAS3_SR | MAS3_SW)) 4258 return (EPERM); 4259 4260 /* The address should be within the entry range. */ 4261 entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4262 KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); 4263 4264 entry_size = tsize2size(entry_tsize); 4265 pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | 4266 (e.mas3 & MAS3_RPN); 4267 pa_end = pa_start + entry_size; 4268 4269 if ((pa < pa_start) || ((pa + size) > pa_end)) 4270 return (ERANGE); 4271 4272 /* Return virtual address of this mapping. */ 4273 *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start); 4274 return (0); 4275 } 4276 4277 /* 4278 * Invalidate all TLB0 entries which match the given TID. Note this is 4279 * dedicated for cases when invalidations should NOT be propagated to other 4280 * CPUs. 4281 */ 4282 static void 4283 tid_flush(tlbtid_t tid) 4284 { 4285 register_t msr; 4286 uint32_t mas0, mas1, mas2; 4287 int entry, way; 4288 4289 4290 /* Don't evict kernel translations */ 4291 if (tid == TID_KERNEL) 4292 return; 4293 4294 msr = mfmsr(); 4295 __asm __volatile("wrteei 0"); 4296 4297 /* 4298 * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use 4299 * it for PID invalidation. 4300 */ 4301 switch ((mfpvr() >> 16) & 0xffff) { 4302 case FSL_E500mc: 4303 case FSL_E5500: 4304 case FSL_E6500: 4305 mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); 4306 /* tlbilxpid */ 4307 __asm __volatile("isync; .long 0x7c200024; isync; msync"); 4308 __asm __volatile("wrtee %0" :: "r"(msr)); 4309 return; 4310 } 4311 4312 for (way = 0; way < TLB0_WAYS; way++) 4313 for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { 4314 4315 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 4316 mtspr(SPR_MAS0, mas0); 4317 4318 mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; 4319 mtspr(SPR_MAS2, mas2); 4320 4321 __asm __volatile("isync; tlbre"); 4322 4323 mas1 = mfspr(SPR_MAS1); 4324 4325 if (!(mas1 & MAS1_VALID)) 4326 continue; 4327 if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid) 4328 continue; 4329 mas1 &= ~MAS1_VALID; 4330 mtspr(SPR_MAS1, mas1); 4331 __asm __volatile("isync; tlbwe; isync; msync"); 4332 } 4333 __asm __volatile("wrtee %0" :: "r"(msr)); 4334 } 4335 4336 #ifdef DDB 4337 /* Print out contents of the MAS registers for each TLB0 entry */ 4338 static void 4339 #ifdef __powerpc64__ 4340 tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3, 4341 #else 4342 tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3, 4343 #endif 4344 uint32_t mas7) 4345 { 4346 int as; 4347 char desc[3]; 4348 tlbtid_t tid; 4349 vm_size_t size; 4350 unsigned int tsize; 4351 4352 desc[2] = '\0'; 4353 if (mas1 & MAS1_VALID) 4354 desc[0] = 'V'; 4355 else 4356 desc[0] = ' '; 4357 4358 if (mas1 & MAS1_IPROT) 4359 desc[1] = 'P'; 4360 else 4361 desc[1] = ' '; 4362 4363 as = (mas1 & MAS1_TS_MASK) ? 1 : 0; 4364 tid = MAS1_GETTID(mas1); 4365 4366 tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 4367 size = 0; 4368 if (tsize) 4369 size = tsize2size(tsize); 4370 4371 printf("%3d: (%s) [AS=%d] " 4372 "sz = 0x%jx tsz = %d tid = %d mas1 = 0x%08x " 4373 "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n", 4374 i, desc, as, (uintmax_t)size, tsize, tid, mas1, mas2, mas3, mas7); 4375 } 4376 4377 DB_SHOW_COMMAND(tlb0, tlb0_print_tlbentries) 4378 { 4379 uint32_t mas0, mas1, mas3, mas7; 4380 #ifdef __powerpc64__ 4381 uint64_t mas2; 4382 #else 4383 uint32_t mas2; 4384 #endif 4385 int entryidx, way, idx; 4386 4387 printf("TLB0 entries:\n"); 4388 for (way = 0; way < TLB0_WAYS; way ++) 4389 for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { 4390 4391 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 4392 mtspr(SPR_MAS0, mas0); 4393 4394 mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; 4395 mtspr(SPR_MAS2, mas2); 4396 4397 __asm __volatile("isync; tlbre"); 4398 4399 mas1 = mfspr(SPR_MAS1); 4400 mas2 = mfspr(SPR_MAS2); 4401 mas3 = mfspr(SPR_MAS3); 4402 mas7 = mfspr(SPR_MAS7); 4403 4404 idx = tlb0_tableidx(mas2, way); 4405 tlb_print_entry(idx, mas1, mas2, mas3, mas7); 4406 } 4407 } 4408 4409 /* 4410 * Print out contents of the MAS registers for each TLB1 entry 4411 */ 4412 DB_SHOW_COMMAND(tlb1, tlb1_print_tlbentries) 4413 { 4414 uint32_t mas0, mas1, mas3, mas7; 4415 #ifdef __powerpc64__ 4416 uint64_t mas2; 4417 #else 4418 uint32_t mas2; 4419 #endif 4420 int i; 4421 4422 printf("TLB1 entries:\n"); 4423 for (i = 0; i < TLB1_ENTRIES; i++) { 4424 4425 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); 4426 mtspr(SPR_MAS0, mas0); 4427 4428 __asm __volatile("isync; tlbre"); 4429 4430 mas1 = mfspr(SPR_MAS1); 4431 mas2 = mfspr(SPR_MAS2); 4432 mas3 = mfspr(SPR_MAS3); 4433 mas7 = mfspr(SPR_MAS7); 4434 4435 tlb_print_entry(i, mas1, mas2, mas3, mas7); 4436 } 4437 } 4438 #endif 4439