1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> 5 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 20 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Some hw specific parts of this pmap were derived or influenced 29 * by NetBSD's ibm4xx pmap module. More generic code is shared with 30 * a few other pmap modules from the FreeBSD tree. 31 */ 32 33 /* 34 * VM layout notes: 35 * 36 * Kernel and user threads run within one common virtual address space 37 * defined by AS=0. 38 * 39 * 32-bit pmap: 40 * Virtual address space layout: 41 * ----------------------------- 42 * 0x0000_0000 - 0x7fff_ffff : user process 43 * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) 44 * 0xc000_0000 - 0xc0ff_ffff : kernel reserved 45 * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. 46 * 0xc100_0000 - 0xffff_ffff : KVA 47 * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy 48 * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs 49 * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 50 * 0xc200_9000 - 0xfeef_ffff : actual free KVA space 51 * 52 * 64-bit pmap: 53 * Virtual address space layout: 54 * ----------------------------- 55 * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process 56 * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries 57 * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region 58 * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack 59 * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved 60 * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data 61 * endkernel - msgbufp-1 : flat device tree 62 * msgbufp - kernel_pdir-1 : message buffer 63 * kernel_pdir - kernel_pp2d-1 : kernel page directory 64 * kernel_pp2d - . : kernel pointers to page directory 65 * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy 66 * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs 67 * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables 68 * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space 69 * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region 70 * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region 71 * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map 72 * 0xf000_0000_0000_0000 - +Maxmem : physmem map 73 * - 0xffff_ffff_ffff_ffff : device direct map 74 */ 75 76 #include <sys/cdefs.h> 77 #include "opt_ddb.h" 78 #include "opt_kstack_pages.h" 79 80 #include <sys/param.h> 81 #include <sys/conf.h> 82 #include <sys/malloc.h> 83 #include <sys/ktr.h> 84 #include <sys/proc.h> 85 #include <sys/user.h> 86 #include <sys/queue.h> 87 #include <sys/systm.h> 88 #include <sys/kernel.h> 89 #include <sys/kerneldump.h> 90 #include <sys/linker.h> 91 #include <sys/msgbuf.h> 92 #include <sys/lock.h> 93 #include <sys/mutex.h> 94 #include <sys/rwlock.h> 95 #include <sys/sched.h> 96 #include <sys/smp.h> 97 #include <sys/vmmeter.h> 98 99 #include <vm/vm.h> 100 #include <vm/vm_param.h> 101 #include <vm/vm_page.h> 102 #include <vm/vm_kern.h> 103 #include <vm/vm_pageout.h> 104 #include <vm/vm_extern.h> 105 #include <vm/vm_object.h> 106 #include <vm/vm_map.h> 107 #include <vm/vm_pager.h> 108 #include <vm/vm_phys.h> 109 #include <vm/vm_pagequeue.h> 110 #include <vm/vm_dumpset.h> 111 #include <vm/uma.h> 112 113 #include <machine/_inttypes.h> 114 #include <machine/cpu.h> 115 #include <machine/pcb.h> 116 #include <machine/platform.h> 117 118 #include <machine/tlb.h> 119 #include <machine/spr.h> 120 #include <machine/md_var.h> 121 #include <machine/mmuvar.h> 122 #include <machine/pmap.h> 123 #include <machine/pte.h> 124 125 #include <ddb/ddb.h> 126 127 #define SPARSE_MAPDEV 128 129 /* Use power-of-two mappings in mmu_booke_mapdev(), to save entries. */ 130 #define POW2_MAPPINGS 131 132 #ifdef DEBUG 133 #define debugf(fmt, args...) printf(fmt, ##args) 134 #define __debug_used 135 #else 136 #define debugf(fmt, args...) 137 #define __debug_used __unused 138 #endif 139 140 #ifdef __powerpc64__ 141 #define PRI0ptrX "016lx" 142 #else 143 #define PRI0ptrX "08x" 144 #endif 145 146 #define TODO panic("%s: not implemented", __func__); 147 148 extern unsigned char _etext[]; 149 extern unsigned char _end[]; 150 151 extern uint32_t *bootinfo; 152 153 vm_paddr_t kernload; 154 vm_offset_t kernstart; 155 vm_size_t kernsize; 156 157 /* Message buffer and tables. */ 158 static vm_offset_t data_start; 159 static vm_size_t data_end; 160 161 /* Phys/avail memory regions. */ 162 static struct mem_region *availmem_regions; 163 static int availmem_regions_sz; 164 static struct mem_region *physmem_regions; 165 static int physmem_regions_sz; 166 167 #ifndef __powerpc64__ 168 /* Reserved KVA space and mutex for mmu_booke_zero_page. */ 169 static vm_offset_t zero_page_va; 170 static struct mtx zero_page_mutex; 171 172 /* Reserved KVA space and mutex for mmu_booke_copy_page. */ 173 static vm_offset_t copy_page_src_va; 174 static vm_offset_t copy_page_dst_va; 175 static struct mtx copy_page_mutex; 176 #endif 177 178 static struct mtx tlbivax_mutex; 179 180 /**************************************************************************/ 181 /* PMAP */ 182 /**************************************************************************/ 183 184 static int mmu_booke_enter_locked(pmap_t, vm_offset_t, vm_page_t, 185 vm_prot_t, u_int flags, int8_t psind); 186 187 unsigned int kptbl_min; /* Index of the first kernel ptbl. */ 188 static uma_zone_t ptbl_root_zone; 189 190 /* 191 * If user pmap is processed with mmu_booke_remove and the resident count 192 * drops to 0, there are no more pages to remove, so we need not continue. 193 */ 194 #define PMAP_REMOVE_DONE(pmap) \ 195 ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) 196 197 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 198 extern int elf32_nxstack; 199 #endif 200 201 /**************************************************************************/ 202 /* TLB and TID handling */ 203 /**************************************************************************/ 204 205 /* Translation ID busy table */ 206 static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1]; 207 208 /* 209 * TLB0 capabilities (entry, way numbers etc.). These can vary between e500 210 * core revisions and should be read from h/w registers during early config. 211 */ 212 uint32_t tlb0_entries; 213 uint32_t tlb0_ways; 214 uint32_t tlb0_entries_per_way; 215 uint32_t tlb1_entries; 216 217 #define TLB0_ENTRIES (tlb0_entries) 218 #define TLB0_WAYS (tlb0_ways) 219 #define TLB0_ENTRIES_PER_WAY (tlb0_entries_per_way) 220 221 #define TLB1_ENTRIES (tlb1_entries) 222 223 static tlbtid_t tid_alloc(struct pmap *); 224 225 #ifdef DDB 226 #ifdef __powerpc64__ 227 static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t); 228 #else 229 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); 230 #endif 231 #endif 232 233 static void tlb1_read_entry(tlb_entry_t *, unsigned int); 234 static void tlb1_write_entry(tlb_entry_t *, unsigned int); 235 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); 236 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t, int); 237 238 static __inline uint32_t tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma); 239 240 static vm_size_t tsize2size(unsigned int); 241 static unsigned int size2tsize(vm_size_t); 242 static unsigned long ilog2(unsigned long); 243 244 static void set_mas4_defaults(void); 245 246 static inline void tlb0_flush_entry(vm_offset_t); 247 static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); 248 249 /**************************************************************************/ 250 /* Page table management */ 251 /**************************************************************************/ 252 253 static struct rwlock_padalign pvh_global_lock; 254 255 /* Data for the pv entry allocation mechanism */ 256 static uma_zone_t pvzone; 257 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 258 259 #define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ 260 261 #ifndef PMAP_SHPGPERPROC 262 #define PMAP_SHPGPERPROC 200 263 #endif 264 265 static vm_paddr_t pte_vatopa(pmap_t, vm_offset_t); 266 static int pte_enter(pmap_t, vm_page_t, vm_offset_t, uint32_t, bool); 267 static int pte_remove(pmap_t, vm_offset_t, uint8_t); 268 static pte_t *pte_find(pmap_t, vm_offset_t); 269 static void kernel_pte_alloc(vm_offset_t, vm_offset_t); 270 271 static pv_entry_t pv_alloc(void); 272 static void pv_free(pv_entry_t); 273 static void pv_insert(pmap_t, vm_offset_t, vm_page_t); 274 static void pv_remove(pmap_t, vm_offset_t, vm_page_t); 275 276 static void booke_pmap_init_qpages(void); 277 278 static inline void tlb_miss_lock(void); 279 static inline void tlb_miss_unlock(void); 280 281 #ifdef SMP 282 extern tlb_entry_t __boot_tlb1[]; 283 void pmap_bootstrap_ap(volatile uint32_t *); 284 #endif 285 286 /* 287 * Kernel MMU interface 288 */ 289 static void mmu_booke_clear_modify(vm_page_t); 290 static void mmu_booke_copy(pmap_t, pmap_t, vm_offset_t, 291 vm_size_t, vm_offset_t); 292 static void mmu_booke_copy_page(vm_page_t, vm_page_t); 293 static void mmu_booke_copy_pages(vm_page_t *, 294 vm_offset_t, vm_page_t *, vm_offset_t, int); 295 static int mmu_booke_enter(pmap_t, vm_offset_t, vm_page_t, 296 vm_prot_t, u_int flags, int8_t psind); 297 static void mmu_booke_enter_object(pmap_t, vm_offset_t, vm_offset_t, 298 vm_page_t, vm_prot_t); 299 static void mmu_booke_enter_quick(pmap_t, vm_offset_t, vm_page_t, 300 vm_prot_t); 301 static vm_paddr_t mmu_booke_extract(pmap_t, vm_offset_t); 302 static vm_page_t mmu_booke_extract_and_hold(pmap_t, vm_offset_t, 303 vm_prot_t); 304 static void mmu_booke_init(void); 305 static bool mmu_booke_is_modified(vm_page_t); 306 static bool mmu_booke_is_prefaultable(pmap_t, vm_offset_t); 307 static bool mmu_booke_is_referenced(vm_page_t); 308 static int mmu_booke_ts_referenced(vm_page_t); 309 static vm_offset_t mmu_booke_map(vm_offset_t *, vm_paddr_t, vm_paddr_t, 310 int); 311 static int mmu_booke_mincore(pmap_t, vm_offset_t, 312 vm_paddr_t *); 313 static void mmu_booke_object_init_pt(pmap_t, vm_offset_t, 314 vm_object_t, vm_pindex_t, vm_size_t); 315 static bool mmu_booke_page_exists_quick(pmap_t, vm_page_t); 316 static void mmu_booke_page_init(vm_page_t); 317 static int mmu_booke_page_wired_mappings(vm_page_t); 318 static int mmu_booke_pinit(pmap_t); 319 static void mmu_booke_pinit0(pmap_t); 320 static void mmu_booke_protect(pmap_t, vm_offset_t, vm_offset_t, 321 vm_prot_t); 322 static void mmu_booke_qenter(vm_offset_t, vm_page_t *, int); 323 static void mmu_booke_qremove(vm_offset_t, int); 324 static void mmu_booke_release(pmap_t); 325 static void mmu_booke_remove(pmap_t, vm_offset_t, vm_offset_t); 326 static void mmu_booke_remove_all(vm_page_t); 327 static void mmu_booke_remove_write(vm_page_t); 328 static void mmu_booke_unwire(pmap_t, vm_offset_t, vm_offset_t); 329 static void mmu_booke_zero_page(vm_page_t); 330 static void mmu_booke_zero_page_area(vm_page_t, int, int); 331 static void mmu_booke_activate(struct thread *); 332 static void mmu_booke_deactivate(struct thread *); 333 static void mmu_booke_bootstrap(vm_offset_t, vm_offset_t); 334 static void *mmu_booke_mapdev(vm_paddr_t, vm_size_t); 335 static void *mmu_booke_mapdev_attr(vm_paddr_t, vm_size_t, vm_memattr_t); 336 static void mmu_booke_unmapdev(void *, vm_size_t); 337 static vm_paddr_t mmu_booke_kextract(vm_offset_t); 338 static void mmu_booke_kenter(vm_offset_t, vm_paddr_t); 339 static void mmu_booke_kenter_attr(vm_offset_t, vm_paddr_t, vm_memattr_t); 340 static void mmu_booke_kremove(vm_offset_t); 341 static int mmu_booke_dev_direct_mapped(vm_paddr_t, vm_size_t); 342 static void mmu_booke_sync_icache(pmap_t, vm_offset_t, 343 vm_size_t); 344 static void mmu_booke_dumpsys_map(vm_paddr_t pa, size_t, 345 void **); 346 static void mmu_booke_dumpsys_unmap(vm_paddr_t pa, size_t, 347 void *); 348 static void mmu_booke_scan_init(void); 349 static vm_offset_t mmu_booke_quick_enter_page(vm_page_t m); 350 static void mmu_booke_quick_remove_page(vm_offset_t addr); 351 static int mmu_booke_change_attr(vm_offset_t addr, 352 vm_size_t sz, vm_memattr_t mode); 353 static int mmu_booke_decode_kernel_ptr(vm_offset_t addr, 354 int *is_user, vm_offset_t *decoded_addr); 355 static void mmu_booke_page_array_startup(long); 356 static bool mmu_booke_page_is_mapped(vm_page_t m); 357 static bool mmu_booke_ps_enabled(pmap_t pmap); 358 359 static struct pmap_funcs mmu_booke_methods = { 360 /* pmap dispatcher interface */ 361 .clear_modify = mmu_booke_clear_modify, 362 .copy = mmu_booke_copy, 363 .copy_page = mmu_booke_copy_page, 364 .copy_pages = mmu_booke_copy_pages, 365 .enter = mmu_booke_enter, 366 .enter_object = mmu_booke_enter_object, 367 .enter_quick = mmu_booke_enter_quick, 368 .extract = mmu_booke_extract, 369 .extract_and_hold = mmu_booke_extract_and_hold, 370 .init = mmu_booke_init, 371 .is_modified = mmu_booke_is_modified, 372 .is_prefaultable = mmu_booke_is_prefaultable, 373 .is_referenced = mmu_booke_is_referenced, 374 .ts_referenced = mmu_booke_ts_referenced, 375 .map = mmu_booke_map, 376 .mincore = mmu_booke_mincore, 377 .object_init_pt = mmu_booke_object_init_pt, 378 .page_exists_quick = mmu_booke_page_exists_quick, 379 .page_init = mmu_booke_page_init, 380 .page_wired_mappings = mmu_booke_page_wired_mappings, 381 .pinit = mmu_booke_pinit, 382 .pinit0 = mmu_booke_pinit0, 383 .protect = mmu_booke_protect, 384 .qenter = mmu_booke_qenter, 385 .qremove = mmu_booke_qremove, 386 .release = mmu_booke_release, 387 .remove = mmu_booke_remove, 388 .remove_all = mmu_booke_remove_all, 389 .remove_write = mmu_booke_remove_write, 390 .sync_icache = mmu_booke_sync_icache, 391 .unwire = mmu_booke_unwire, 392 .zero_page = mmu_booke_zero_page, 393 .zero_page_area = mmu_booke_zero_page_area, 394 .activate = mmu_booke_activate, 395 .deactivate = mmu_booke_deactivate, 396 .quick_enter_page = mmu_booke_quick_enter_page, 397 .quick_remove_page = mmu_booke_quick_remove_page, 398 .page_array_startup = mmu_booke_page_array_startup, 399 .page_is_mapped = mmu_booke_page_is_mapped, 400 .ps_enabled = mmu_booke_ps_enabled, 401 402 /* Internal interfaces */ 403 .bootstrap = mmu_booke_bootstrap, 404 .dev_direct_mapped = mmu_booke_dev_direct_mapped, 405 .mapdev = mmu_booke_mapdev, 406 .mapdev_attr = mmu_booke_mapdev_attr, 407 .kenter = mmu_booke_kenter, 408 .kenter_attr = mmu_booke_kenter_attr, 409 .kextract = mmu_booke_kextract, 410 .kremove = mmu_booke_kremove, 411 .unmapdev = mmu_booke_unmapdev, 412 .change_attr = mmu_booke_change_attr, 413 .decode_kernel_ptr = mmu_booke_decode_kernel_ptr, 414 415 /* dumpsys() support */ 416 .dumpsys_map_chunk = mmu_booke_dumpsys_map, 417 .dumpsys_unmap_chunk = mmu_booke_dumpsys_unmap, 418 .dumpsys_pa_init = mmu_booke_scan_init, 419 }; 420 421 MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods); 422 423 #ifdef __powerpc64__ 424 #include "pmap_64.c" 425 #else 426 #include "pmap_32.c" 427 #endif 428 429 static vm_offset_t tlb1_map_base = VM_MAPDEV_BASE; 430 431 static __inline uint32_t 432 tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) 433 { 434 uint32_t attrib; 435 int i; 436 437 if (ma != VM_MEMATTR_DEFAULT) { 438 switch (ma) { 439 case VM_MEMATTR_UNCACHEABLE: 440 return (MAS2_I | MAS2_G); 441 case VM_MEMATTR_WRITE_COMBINING: 442 case VM_MEMATTR_WRITE_BACK: 443 case VM_MEMATTR_PREFETCHABLE: 444 return (MAS2_I); 445 case VM_MEMATTR_WRITE_THROUGH: 446 return (MAS2_W | MAS2_M); 447 case VM_MEMATTR_CACHEABLE: 448 return (MAS2_M); 449 } 450 } 451 452 /* 453 * Assume the page is cache inhibited and access is guarded unless 454 * it's in our available memory array. 455 */ 456 attrib = _TLB_ENTRY_IO; 457 for (i = 0; i < physmem_regions_sz; i++) { 458 if ((pa >= physmem_regions[i].mr_start) && 459 (pa < (physmem_regions[i].mr_start + 460 physmem_regions[i].mr_size))) { 461 attrib = _TLB_ENTRY_MEM; 462 break; 463 } 464 } 465 466 return (attrib); 467 } 468 469 static inline void 470 tlb_miss_lock(void) 471 { 472 #ifdef SMP 473 struct pcpu *pc; 474 475 if (!smp_started) 476 return; 477 478 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 479 if (pc != pcpup) { 480 CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, " 481 "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke.tlb_lock); 482 483 KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)), 484 ("tlb_miss_lock: tried to lock self")); 485 486 tlb_lock(pc->pc_booke.tlb_lock); 487 488 CTR1(KTR_PMAP, "%s: locked", __func__); 489 } 490 } 491 #endif 492 } 493 494 static inline void 495 tlb_miss_unlock(void) 496 { 497 #ifdef SMP 498 struct pcpu *pc; 499 500 if (!smp_started) 501 return; 502 503 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 504 if (pc != pcpup) { 505 CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d", 506 __func__, pc->pc_cpuid); 507 508 tlb_unlock(pc->pc_booke.tlb_lock); 509 510 CTR1(KTR_PMAP, "%s: unlocked", __func__); 511 } 512 } 513 #endif 514 } 515 516 /* Return number of entries in TLB0. */ 517 static __inline void 518 tlb0_get_tlbconf(void) 519 { 520 uint32_t tlb0_cfg; 521 522 tlb0_cfg = mfspr(SPR_TLB0CFG); 523 tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK; 524 tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; 525 tlb0_entries_per_way = tlb0_entries / tlb0_ways; 526 } 527 528 /* Return number of entries in TLB1. */ 529 static __inline void 530 tlb1_get_tlbconf(void) 531 { 532 uint32_t tlb1_cfg; 533 534 tlb1_cfg = mfspr(SPR_TLB1CFG); 535 tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; 536 } 537 538 /**************************************************************************/ 539 /* Page table related */ 540 /**************************************************************************/ 541 542 /* Allocate pv_entry structure. */ 543 pv_entry_t 544 pv_alloc(void) 545 { 546 pv_entry_t pv; 547 548 pv_entry_count++; 549 if (pv_entry_count > pv_entry_high_water) 550 pagedaemon_wakeup(0); /* XXX powerpc NUMA */ 551 pv = uma_zalloc(pvzone, M_NOWAIT); 552 553 return (pv); 554 } 555 556 /* Free pv_entry structure. */ 557 static __inline void 558 pv_free(pv_entry_t pve) 559 { 560 561 pv_entry_count--; 562 uma_zfree(pvzone, pve); 563 } 564 565 /* Allocate and initialize pv_entry structure. */ 566 static void 567 pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) 568 { 569 pv_entry_t pve; 570 571 //int su = (pmap == kernel_pmap); 572 //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, 573 // (u_int32_t)pmap, va, (u_int32_t)m); 574 575 pve = pv_alloc(); 576 if (pve == NULL) 577 panic("pv_insert: no pv entries!"); 578 579 pve->pv_pmap = pmap; 580 pve->pv_va = va; 581 582 /* add to pv_list */ 583 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 584 rw_assert(&pvh_global_lock, RA_WLOCKED); 585 586 TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); 587 588 //debugf("pv_insert: e\n"); 589 } 590 591 /* Destroy pv entry. */ 592 static void 593 pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) 594 { 595 pv_entry_t pve; 596 597 //int su = (pmap == kernel_pmap); 598 //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); 599 600 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 601 rw_assert(&pvh_global_lock, RA_WLOCKED); 602 603 /* find pv entry */ 604 TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { 605 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 606 /* remove from pv_list */ 607 TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); 608 if (TAILQ_EMPTY(&m->md.pv_list)) 609 vm_page_aflag_clear(m, PGA_WRITEABLE); 610 611 /* free pv entry struct */ 612 pv_free(pve); 613 break; 614 } 615 } 616 617 //debugf("pv_remove: e\n"); 618 } 619 620 /**************************************************************************/ 621 /* PMAP related */ 622 /**************************************************************************/ 623 624 /* 625 * This is called during booke_init, before the system is really initialized. 626 */ 627 static void 628 mmu_booke_bootstrap(vm_offset_t start, vm_offset_t kernelend) 629 { 630 vm_paddr_t phys_kernelend; 631 struct mem_region *mp, *mp1; 632 int cnt, i, j; 633 vm_paddr_t s, e, sz; 634 vm_paddr_t physsz, hwphyssz; 635 u_int phys_avail_count __debug_used; 636 vm_size_t kstack0_sz; 637 vm_paddr_t kstack0_phys; 638 vm_offset_t kstack0; 639 void *dpcpu; 640 641 debugf("mmu_booke_bootstrap: entered\n"); 642 643 /* Set interesting system properties */ 644 #ifdef __powerpc64__ 645 hw_direct_map = 1; 646 #else 647 hw_direct_map = 0; 648 #endif 649 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) 650 elf32_nxstack = 1; 651 #endif 652 653 /* Initialize invalidation mutex */ 654 mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN); 655 656 /* Read TLB0 size and associativity. */ 657 tlb0_get_tlbconf(); 658 659 /* 660 * Align kernel start and end address (kernel image). 661 * Note that kernel end does not necessarily relate to kernsize. 662 * kernsize is the size of the kernel that is actually mapped. 663 */ 664 data_start = round_page(kernelend); 665 data_end = data_start; 666 667 /* Allocate the dynamic per-cpu area. */ 668 dpcpu = (void *)data_end; 669 data_end += DPCPU_SIZE; 670 671 /* Allocate space for the message buffer. */ 672 msgbufp = (struct msgbuf *)data_end; 673 data_end += msgbufsize; 674 debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 675 (uintptr_t)msgbufp, data_end); 676 677 data_end = round_page(data_end); 678 data_end = round_page(mmu_booke_alloc_kernel_pgtables(data_end)); 679 680 /* Retrieve phys/avail mem regions */ 681 mem_regions(&physmem_regions, &physmem_regions_sz, 682 &availmem_regions, &availmem_regions_sz); 683 684 if (PHYS_AVAIL_ENTRIES < availmem_regions_sz) 685 panic("mmu_booke_bootstrap: phys_avail too small"); 686 687 data_end = round_page(data_end); 688 vm_page_array = (vm_page_t)data_end; 689 /* 690 * Get a rough idea (upper bound) on the size of the page array. The 691 * vm_page_array will not handle any more pages than we have in the 692 * avail_regions array, and most likely much less. 693 */ 694 sz = 0; 695 for (mp = availmem_regions; mp->mr_size; mp++) { 696 sz += mp->mr_size; 697 } 698 sz = (round_page(sz) / (PAGE_SIZE + sizeof(struct vm_page))); 699 data_end += round_page(sz * sizeof(struct vm_page)); 700 701 /* Pre-round up to 1MB. This wastes some space, but saves TLB entries */ 702 data_end = roundup2(data_end, 1 << 20); 703 704 debugf(" data_end: 0x%"PRI0ptrX"\n", data_end); 705 debugf(" kernstart: %#zx\n", kernstart); 706 debugf(" kernsize: %#zx\n", kernsize); 707 708 if (data_end - kernstart > kernsize) { 709 kernsize += tlb1_mapin_region(kernstart + kernsize, 710 kernload + kernsize, (data_end - kernstart) - kernsize, 711 _TLB_ENTRY_MEM); 712 } 713 data_end = kernstart + kernsize; 714 debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end); 715 716 /* 717 * Clear the structures - note we can only do it safely after the 718 * possible additional TLB1 translations are in place (above) so that 719 * all range up to the currently calculated 'data_end' is covered. 720 */ 721 bzero((void *)data_start, data_end - data_start); 722 dpcpu_init(dpcpu, 0); 723 724 /*******************************************************/ 725 /* Set the start and end of kva. */ 726 /*******************************************************/ 727 virtual_avail = round_page(data_end); 728 virtual_end = VM_MAX_KERNEL_ADDRESS; 729 730 #ifndef __powerpc64__ 731 /* Allocate KVA space for page zero/copy operations. */ 732 zero_page_va = virtual_avail; 733 virtual_avail += PAGE_SIZE; 734 copy_page_src_va = virtual_avail; 735 virtual_avail += PAGE_SIZE; 736 copy_page_dst_va = virtual_avail; 737 virtual_avail += PAGE_SIZE; 738 debugf("zero_page_va = 0x%"PRI0ptrX"\n", zero_page_va); 739 debugf("copy_page_src_va = 0x%"PRI0ptrX"\n", copy_page_src_va); 740 debugf("copy_page_dst_va = 0x%"PRI0ptrX"\n", copy_page_dst_va); 741 742 /* Initialize page zero/copy mutexes. */ 743 mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); 744 mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); 745 746 /* Allocate KVA space for ptbl bufs. */ 747 ptbl_buf_pool_vabase = virtual_avail; 748 virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; 749 debugf("ptbl_buf_pool_vabase = 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", 750 ptbl_buf_pool_vabase, virtual_avail); 751 #endif 752 #ifdef __powerpc64__ 753 /* Allocate KVA space for crashdumpmap. */ 754 crashdumpmap = (caddr_t)virtual_avail; 755 virtual_avail += MAXDUMPPGS * PAGE_SIZE; 756 #endif 757 758 /* Calculate corresponding physical addresses for the kernel region. */ 759 phys_kernelend = kernload + kernsize; 760 debugf("kernel image and allocated data:\n"); 761 debugf(" kernload = 0x%09jx\n", (uintmax_t)kernload); 762 debugf(" kernstart = 0x%"PRI0ptrX"\n", kernstart); 763 debugf(" kernsize = 0x%"PRI0ptrX"\n", kernsize); 764 765 /* 766 * Remove kernel physical address range from avail regions list. Page 767 * align all regions. Non-page aligned memory isn't very interesting 768 * to us. Also, sort the entries for ascending addresses. 769 */ 770 771 sz = 0; 772 cnt = availmem_regions_sz; 773 debugf("processing avail regions:\n"); 774 for (mp = availmem_regions; mp->mr_size; mp++) { 775 s = mp->mr_start; 776 e = mp->mr_start + mp->mr_size; 777 debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e); 778 /* Check whether this region holds all of the kernel. */ 779 if (s < kernload && e > phys_kernelend) { 780 availmem_regions[cnt].mr_start = phys_kernelend; 781 availmem_regions[cnt++].mr_size = e - phys_kernelend; 782 e = kernload; 783 } 784 /* Look whether this regions starts within the kernel. */ 785 if (s >= kernload && s < phys_kernelend) { 786 if (e <= phys_kernelend) 787 goto empty; 788 s = phys_kernelend; 789 } 790 /* Now look whether this region ends within the kernel. */ 791 if (e > kernload && e <= phys_kernelend) { 792 if (s >= kernload) 793 goto empty; 794 e = kernload; 795 } 796 /* Now page align the start and size of the region. */ 797 s = round_page(s); 798 e = trunc_page(e); 799 if (e < s) 800 e = s; 801 sz = e - s; 802 debugf("%09jx-%09jx = %jx\n", 803 (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz); 804 805 /* Check whether some memory is left here. */ 806 if (sz == 0) { 807 empty: 808 memmove(mp, mp + 1, 809 (cnt - (mp - availmem_regions)) * sizeof(*mp)); 810 cnt--; 811 mp--; 812 continue; 813 } 814 815 /* Do an insertion sort. */ 816 for (mp1 = availmem_regions; mp1 < mp; mp1++) 817 if (s < mp1->mr_start) 818 break; 819 if (mp1 < mp) { 820 memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); 821 mp1->mr_start = s; 822 mp1->mr_size = sz; 823 } else { 824 mp->mr_start = s; 825 mp->mr_size = sz; 826 } 827 } 828 availmem_regions_sz = cnt; 829 830 /*******************************************************/ 831 /* Steal physical memory for kernel stack from the end */ 832 /* of the first avail region */ 833 /*******************************************************/ 834 kstack0_sz = kstack_pages * PAGE_SIZE; 835 kstack0_phys = availmem_regions[0].mr_start + 836 availmem_regions[0].mr_size; 837 kstack0_phys -= kstack0_sz; 838 availmem_regions[0].mr_size -= kstack0_sz; 839 840 /*******************************************************/ 841 /* Fill in phys_avail table, based on availmem_regions */ 842 /*******************************************************/ 843 phys_avail_count = 0; 844 physsz = 0; 845 hwphyssz = 0; 846 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 847 848 debugf("fill in phys_avail:\n"); 849 for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { 850 debugf(" region: 0x%jx - 0x%jx (0x%jx)\n", 851 (uintmax_t)availmem_regions[i].mr_start, 852 (uintmax_t)availmem_regions[i].mr_start + 853 availmem_regions[i].mr_size, 854 (uintmax_t)availmem_regions[i].mr_size); 855 856 if (hwphyssz != 0 && 857 (physsz + availmem_regions[i].mr_size) >= hwphyssz) { 858 debugf(" hw.physmem adjust\n"); 859 if (physsz < hwphyssz) { 860 phys_avail[j] = availmem_regions[i].mr_start; 861 phys_avail[j + 1] = 862 availmem_regions[i].mr_start + 863 hwphyssz - physsz; 864 physsz = hwphyssz; 865 phys_avail_count++; 866 dump_avail[j] = phys_avail[j]; 867 dump_avail[j + 1] = phys_avail[j + 1]; 868 } 869 break; 870 } 871 872 phys_avail[j] = availmem_regions[i].mr_start; 873 phys_avail[j + 1] = availmem_regions[i].mr_start + 874 availmem_regions[i].mr_size; 875 phys_avail_count++; 876 physsz += availmem_regions[i].mr_size; 877 dump_avail[j] = phys_avail[j]; 878 dump_avail[j + 1] = phys_avail[j + 1]; 879 } 880 physmem = btoc(physsz); 881 882 /* Calculate the last available physical address. */ 883 for (i = 0; phys_avail[i + 2] != 0; i += 2) 884 ; 885 Maxmem = powerpc_btop(phys_avail[i + 1]); 886 887 debugf("Maxmem = 0x%08lx\n", Maxmem); 888 debugf("phys_avail_count = %d\n", phys_avail_count); 889 debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n", 890 (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem); 891 892 #ifdef __powerpc64__ 893 /* 894 * Map the physical memory contiguously in TLB1. 895 * Round so it fits into a single mapping. 896 */ 897 tlb1_mapin_region(DMAP_BASE_ADDRESS, 0, 898 phys_avail[i + 1], _TLB_ENTRY_MEM); 899 #endif 900 901 /*******************************************************/ 902 /* Initialize (statically allocated) kernel pmap. */ 903 /*******************************************************/ 904 PMAP_LOCK_INIT(kernel_pmap); 905 906 debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap); 907 kernel_pte_alloc(virtual_avail, kernstart); 908 for (i = 0; i < MAXCPU; i++) { 909 kernel_pmap->pm_tid[i] = TID_KERNEL; 910 911 /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */ 912 tidbusy[i][TID_KERNEL] = kernel_pmap; 913 } 914 915 /* Mark kernel_pmap active on all CPUs */ 916 CPU_FILL(&kernel_pmap->pm_active); 917 918 /* 919 * Initialize the global pv list lock. 920 */ 921 rw_init(&pvh_global_lock, "pmap pv global"); 922 923 /*******************************************************/ 924 /* Final setup */ 925 /*******************************************************/ 926 927 /* Enter kstack0 into kernel map, provide guard page */ 928 kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 929 thread0.td_kstack = kstack0; 930 thread0.td_kstack_pages = kstack_pages; 931 932 debugf("kstack_sz = 0x%08jx\n", (uintmax_t)kstack0_sz); 933 debugf("kstack0_phys at 0x%09jx - 0x%09jx\n", 934 (uintmax_t)kstack0_phys, (uintmax_t)kstack0_phys + kstack0_sz); 935 debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n", 936 kstack0, kstack0 + kstack0_sz); 937 938 virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz; 939 for (i = 0; i < kstack_pages; i++) { 940 mmu_booke_kenter(kstack0, kstack0_phys); 941 kstack0 += PAGE_SIZE; 942 kstack0_phys += PAGE_SIZE; 943 } 944 945 pmap_bootstrapped = 1; 946 947 debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail); 948 debugf("virtual_end = %"PRI0ptrX"\n", virtual_end); 949 950 debugf("mmu_booke_bootstrap: exit\n"); 951 } 952 953 #ifdef SMP 954 void 955 tlb1_ap_prep(void) 956 { 957 tlb_entry_t *e, tmp; 958 unsigned int i; 959 960 /* Prepare TLB1 image for AP processors */ 961 e = __boot_tlb1; 962 for (i = 0; i < TLB1_ENTRIES; i++) { 963 tlb1_read_entry(&tmp, i); 964 965 if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED)) 966 memcpy(e++, &tmp, sizeof(tmp)); 967 } 968 } 969 970 void 971 pmap_bootstrap_ap(volatile uint32_t *trcp __unused) 972 { 973 int i; 974 975 /* 976 * Finish TLB1 configuration: the BSP already set up its TLB1 and we 977 * have the snapshot of its contents in the s/w __boot_tlb1[] table 978 * created by tlb1_ap_prep(), so use these values directly to 979 * (re)program AP's TLB1 hardware. 980 * 981 * Start at index 1 because index 0 has the kernel map. 982 */ 983 for (i = 1; i < TLB1_ENTRIES; i++) { 984 if (__boot_tlb1[i].mas1 & MAS1_VALID) 985 tlb1_write_entry(&__boot_tlb1[i], i); 986 } 987 988 set_mas4_defaults(); 989 } 990 #endif 991 992 static void 993 booke_pmap_init_qpages(void) 994 { 995 struct pcpu *pc; 996 int i; 997 998 CPU_FOREACH(i) { 999 pc = pcpu_find(i); 1000 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); 1001 if (pc->pc_qmap_addr == 0) 1002 panic("pmap_init_qpages: unable to allocate KVA"); 1003 } 1004 } 1005 1006 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL); 1007 1008 /* 1009 * Get the physical page address for the given pmap/virtual address. 1010 */ 1011 static vm_paddr_t 1012 mmu_booke_extract(pmap_t pmap, vm_offset_t va) 1013 { 1014 vm_paddr_t pa; 1015 1016 PMAP_LOCK(pmap); 1017 pa = pte_vatopa(pmap, va); 1018 PMAP_UNLOCK(pmap); 1019 1020 return (pa); 1021 } 1022 1023 /* 1024 * Extract the physical page address associated with the given 1025 * kernel virtual address. 1026 */ 1027 static vm_paddr_t 1028 mmu_booke_kextract(vm_offset_t va) 1029 { 1030 tlb_entry_t e; 1031 vm_paddr_t p = 0; 1032 int i; 1033 1034 #ifdef __powerpc64__ 1035 if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS) 1036 return (DMAP_TO_PHYS(va)); 1037 #endif 1038 1039 if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS) 1040 p = pte_vatopa(kernel_pmap, va); 1041 1042 if (p == 0) { 1043 /* Check TLB1 mappings */ 1044 for (i = 0; i < TLB1_ENTRIES; i++) { 1045 tlb1_read_entry(&e, i); 1046 if (!(e.mas1 & MAS1_VALID)) 1047 continue; 1048 if (va >= e.virt && va < e.virt + e.size) 1049 return (e.phys + (va - e.virt)); 1050 } 1051 } 1052 1053 return (p); 1054 } 1055 1056 /* 1057 * Initialize the pmap module. 1058 * Called by vm_init, to initialize any structures that the pmap 1059 * system needs to map virtual memory. 1060 */ 1061 static void 1062 mmu_booke_init(void) 1063 { 1064 int shpgperproc = PMAP_SHPGPERPROC; 1065 1066 /* 1067 * Initialize the address space (zone) for the pv entries. Set a 1068 * high water mark so that the system can recover from excessive 1069 * numbers of pv entries. 1070 */ 1071 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 1072 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 1073 1074 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 1075 pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; 1076 1077 TUNABLE_INT_FETCH("vm.pmap.pv_entry_max", &pv_entry_max); 1078 pv_entry_high_water = 9 * (pv_entry_max / 10); 1079 1080 uma_zone_reserve_kva(pvzone, pv_entry_max); 1081 1082 /* Pre-fill pvzone with initial number of pv entries. */ 1083 uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); 1084 1085 /* Create a UMA zone for page table roots. */ 1086 ptbl_root_zone = uma_zcreate("pmap root", PMAP_ROOT_SIZE, 1087 NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, UMA_ZONE_VM); 1088 1089 /* Initialize ptbl allocation. */ 1090 ptbl_init(); 1091 } 1092 1093 /* 1094 * Map a list of wired pages into kernel virtual address space. This is 1095 * intended for temporary mappings which do not need page modification or 1096 * references recorded. Existing mappings in the region are overwritten. 1097 */ 1098 static void 1099 mmu_booke_qenter(vm_offset_t sva, vm_page_t *m, int count) 1100 { 1101 vm_offset_t va; 1102 1103 va = sva; 1104 while (count-- > 0) { 1105 mmu_booke_kenter(va, VM_PAGE_TO_PHYS(*m)); 1106 va += PAGE_SIZE; 1107 m++; 1108 } 1109 } 1110 1111 /* 1112 * Remove page mappings from kernel virtual address space. Intended for 1113 * temporary mappings entered by mmu_booke_qenter. 1114 */ 1115 static void 1116 mmu_booke_qremove(vm_offset_t sva, int count) 1117 { 1118 vm_offset_t va; 1119 1120 va = sva; 1121 while (count-- > 0) { 1122 mmu_booke_kremove(va); 1123 va += PAGE_SIZE; 1124 } 1125 } 1126 1127 /* 1128 * Map a wired page into kernel virtual address space. 1129 */ 1130 static void 1131 mmu_booke_kenter(vm_offset_t va, vm_paddr_t pa) 1132 { 1133 1134 mmu_booke_kenter_attr(va, pa, VM_MEMATTR_DEFAULT); 1135 } 1136 1137 static void 1138 mmu_booke_kenter_attr(vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) 1139 { 1140 uint32_t flags; 1141 pte_t *pte; 1142 1143 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 1144 (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va")); 1145 1146 flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; 1147 flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT; 1148 flags |= PTE_PS_4KB; 1149 1150 pte = pte_find(kernel_pmap, va); 1151 KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va. NULL PTE")); 1152 1153 mtx_lock_spin(&tlbivax_mutex); 1154 tlb_miss_lock(); 1155 1156 if (PTE_ISVALID(pte)) { 1157 CTR1(KTR_PMAP, "%s: replacing entry!", __func__); 1158 1159 /* Flush entry from TLB0 */ 1160 tlb0_flush_entry(va); 1161 } 1162 1163 *pte = PTE_RPN_FROM_PA(pa) | flags; 1164 1165 //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " 1166 // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", 1167 // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); 1168 1169 /* Flush the real memory from the instruction cache. */ 1170 if ((flags & (PTE_I | PTE_G)) == 0) 1171 __syncicache((void *)va, PAGE_SIZE); 1172 1173 tlb_miss_unlock(); 1174 mtx_unlock_spin(&tlbivax_mutex); 1175 } 1176 1177 /* 1178 * Remove a page from kernel page table. 1179 */ 1180 static void 1181 mmu_booke_kremove(vm_offset_t va) 1182 { 1183 pte_t *pte; 1184 1185 CTR2(KTR_PMAP,"%s: s (va = 0x%"PRI0ptrX")\n", __func__, va); 1186 1187 KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && 1188 (va <= VM_MAX_KERNEL_ADDRESS)), 1189 ("mmu_booke_kremove: invalid va")); 1190 1191 pte = pte_find(kernel_pmap, va); 1192 1193 if (!PTE_ISVALID(pte)) { 1194 CTR1(KTR_PMAP, "%s: invalid pte", __func__); 1195 1196 return; 1197 } 1198 1199 mtx_lock_spin(&tlbivax_mutex); 1200 tlb_miss_lock(); 1201 1202 /* Invalidate entry in TLB0, update PTE. */ 1203 tlb0_flush_entry(va); 1204 *pte = 0; 1205 1206 tlb_miss_unlock(); 1207 mtx_unlock_spin(&tlbivax_mutex); 1208 } 1209 1210 /* 1211 * Figure out where a given kernel pointer (usually in a fault) points 1212 * to from the VM's perspective, potentially remapping into userland's 1213 * address space. 1214 */ 1215 static int 1216 mmu_booke_decode_kernel_ptr(vm_offset_t addr, int *is_user, 1217 vm_offset_t *decoded_addr) 1218 { 1219 1220 if (trunc_page(addr) <= VM_MAXUSER_ADDRESS) 1221 *is_user = 1; 1222 else 1223 *is_user = 0; 1224 1225 *decoded_addr = addr; 1226 return (0); 1227 } 1228 1229 static bool 1230 mmu_booke_page_is_mapped(vm_page_t m) 1231 { 1232 1233 return (!TAILQ_EMPTY(&(m)->md.pv_list)); 1234 } 1235 1236 static bool 1237 mmu_booke_ps_enabled(pmap_t pmap __unused) 1238 { 1239 return (false); 1240 } 1241 1242 /* 1243 * Initialize pmap associated with process 0. 1244 */ 1245 static void 1246 mmu_booke_pinit0(pmap_t pmap) 1247 { 1248 1249 PMAP_LOCK_INIT(pmap); 1250 mmu_booke_pinit(pmap); 1251 PCPU_SET(curpmap, pmap); 1252 } 1253 1254 /* 1255 * Insert the given physical page at the specified virtual address in the 1256 * target physical map with the protection requested. If specified the page 1257 * will be wired down. 1258 */ 1259 static int 1260 mmu_booke_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, 1261 vm_prot_t prot, u_int flags, int8_t psind) 1262 { 1263 int error; 1264 1265 rw_wlock(&pvh_global_lock); 1266 PMAP_LOCK(pmap); 1267 error = mmu_booke_enter_locked(pmap, va, m, prot, flags, psind); 1268 PMAP_UNLOCK(pmap); 1269 rw_wunlock(&pvh_global_lock); 1270 return (error); 1271 } 1272 1273 static int 1274 mmu_booke_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 1275 vm_prot_t prot, u_int pmap_flags, int8_t psind __unused) 1276 { 1277 pte_t *pte; 1278 vm_paddr_t pa; 1279 pte_t flags; 1280 int error, su, sync; 1281 1282 pa = VM_PAGE_TO_PHYS(m); 1283 su = (pmap == kernel_pmap); 1284 sync = 0; 1285 1286 //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " 1287 // "pa=0x%08x prot=0x%08x flags=%#x)\n", 1288 // (u_int32_t)pmap, su, pmap->pm_tid, 1289 // (u_int32_t)m, va, pa, prot, flags); 1290 1291 if (su) { 1292 KASSERT(((va >= virtual_avail) && 1293 (va <= VM_MAX_KERNEL_ADDRESS)), 1294 ("mmu_booke_enter_locked: kernel pmap, non kernel va")); 1295 } else { 1296 KASSERT((va <= VM_MAXUSER_ADDRESS), 1297 ("mmu_booke_enter_locked: user pmap, non user va")); 1298 } 1299 if ((m->oflags & VPO_UNMANAGED) == 0) { 1300 if ((pmap_flags & PMAP_ENTER_QUICK_LOCKED) == 0) 1301 VM_PAGE_OBJECT_BUSY_ASSERT(m); 1302 else 1303 VM_OBJECT_ASSERT_LOCKED(m->object); 1304 } 1305 1306 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1307 1308 /* 1309 * If there is an existing mapping, and the physical address has not 1310 * changed, must be protection or wiring change. 1311 */ 1312 if (((pte = pte_find(pmap, va)) != NULL) && 1313 (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { 1314 1315 /* 1316 * Before actually updating pte->flags we calculate and 1317 * prepare its new value in a helper var. 1318 */ 1319 flags = *pte; 1320 flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); 1321 1322 /* Wiring change, just update stats. */ 1323 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) { 1324 if (!PTE_ISWIRED(pte)) { 1325 flags |= PTE_WIRED; 1326 pmap->pm_stats.wired_count++; 1327 } 1328 } else { 1329 if (PTE_ISWIRED(pte)) { 1330 flags &= ~PTE_WIRED; 1331 pmap->pm_stats.wired_count--; 1332 } 1333 } 1334 1335 if (prot & VM_PROT_WRITE) { 1336 /* Add write permissions. */ 1337 flags |= PTE_SW; 1338 if (!su) 1339 flags |= PTE_UW; 1340 1341 if ((flags & PTE_MANAGED) != 0) 1342 vm_page_aflag_set(m, PGA_WRITEABLE); 1343 } else { 1344 /* Handle modified pages, sense modify status. */ 1345 1346 /* 1347 * The PTE_MODIFIED flag could be set by underlying 1348 * TLB misses since we last read it (above), possibly 1349 * other CPUs could update it so we check in the PTE 1350 * directly rather than rely on that saved local flags 1351 * copy. 1352 */ 1353 if (PTE_ISMODIFIED(pte)) 1354 vm_page_dirty(m); 1355 } 1356 1357 if (prot & VM_PROT_EXECUTE) { 1358 flags |= PTE_SX; 1359 if (!su) 1360 flags |= PTE_UX; 1361 1362 /* 1363 * Check existing flags for execute permissions: if we 1364 * are turning execute permissions on, icache should 1365 * be flushed. 1366 */ 1367 if ((*pte & (PTE_UX | PTE_SX)) == 0) 1368 sync++; 1369 } 1370 1371 flags &= ~PTE_REFERENCED; 1372 1373 /* 1374 * The new flags value is all calculated -- only now actually 1375 * update the PTE. 1376 */ 1377 mtx_lock_spin(&tlbivax_mutex); 1378 tlb_miss_lock(); 1379 1380 tlb0_flush_entry(va); 1381 *pte &= ~PTE_FLAGS_MASK; 1382 *pte |= flags; 1383 1384 tlb_miss_unlock(); 1385 mtx_unlock_spin(&tlbivax_mutex); 1386 1387 } else { 1388 /* 1389 * If there is an existing mapping, but it's for a different 1390 * physical address, pte_enter() will delete the old mapping. 1391 */ 1392 //if ((pte != NULL) && PTE_ISVALID(pte)) 1393 // debugf("mmu_booke_enter_locked: replace\n"); 1394 //else 1395 // debugf("mmu_booke_enter_locked: new\n"); 1396 1397 /* Now set up the flags and install the new mapping. */ 1398 flags = (PTE_SR | PTE_VALID); 1399 flags |= PTE_M; 1400 1401 if (!su) 1402 flags |= PTE_UR; 1403 1404 if (prot & VM_PROT_WRITE) { 1405 flags |= PTE_SW; 1406 if (!su) 1407 flags |= PTE_UW; 1408 1409 if ((m->oflags & VPO_UNMANAGED) == 0) 1410 vm_page_aflag_set(m, PGA_WRITEABLE); 1411 } 1412 1413 if (prot & VM_PROT_EXECUTE) { 1414 flags |= PTE_SX; 1415 if (!su) 1416 flags |= PTE_UX; 1417 } 1418 1419 /* If its wired update stats. */ 1420 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) 1421 flags |= PTE_WIRED; 1422 1423 error = pte_enter(pmap, m, va, flags, 1424 (pmap_flags & PMAP_ENTER_NOSLEEP) != 0); 1425 if (error != 0) 1426 return (KERN_RESOURCE_SHORTAGE); 1427 1428 if ((flags & PMAP_ENTER_WIRED) != 0) 1429 pmap->pm_stats.wired_count++; 1430 1431 /* Flush the real memory from the instruction cache. */ 1432 if (prot & VM_PROT_EXECUTE) 1433 sync++; 1434 } 1435 1436 if (sync && (su || pmap == PCPU_GET(curpmap))) { 1437 __syncicache((void *)va, PAGE_SIZE); 1438 sync = 0; 1439 } 1440 1441 return (KERN_SUCCESS); 1442 } 1443 1444 /* 1445 * Maps a sequence of resident pages belonging to the same object. 1446 * The sequence begins with the given page m_start. This page is 1447 * mapped at the given virtual address start. Each subsequent page is 1448 * mapped at a virtual address that is offset from start by the same 1449 * amount as the page is offset from m_start within the object. The 1450 * last page in the sequence is the page with the largest offset from 1451 * m_start that can be mapped at a virtual address less than the given 1452 * virtual address end. Not every virtual page between start and end 1453 * is mapped; only those for which a resident page exists with the 1454 * corresponding offset from m_start are mapped. 1455 */ 1456 static void 1457 mmu_booke_enter_object(pmap_t pmap, vm_offset_t start, 1458 vm_offset_t end, vm_page_t m_start, vm_prot_t prot) 1459 { 1460 vm_page_t m; 1461 vm_pindex_t diff, psize; 1462 1463 VM_OBJECT_ASSERT_LOCKED(m_start->object); 1464 1465 psize = atop(end - start); 1466 m = m_start; 1467 rw_wlock(&pvh_global_lock); 1468 PMAP_LOCK(pmap); 1469 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 1470 mmu_booke_enter_locked(pmap, start + ptoa(diff), m, 1471 prot & (VM_PROT_READ | VM_PROT_EXECUTE), 1472 PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); 1473 m = TAILQ_NEXT(m, listq); 1474 } 1475 PMAP_UNLOCK(pmap); 1476 rw_wunlock(&pvh_global_lock); 1477 } 1478 1479 static void 1480 mmu_booke_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, 1481 vm_prot_t prot) 1482 { 1483 1484 rw_wlock(&pvh_global_lock); 1485 PMAP_LOCK(pmap); 1486 mmu_booke_enter_locked(pmap, va, m, 1487 prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | 1488 PMAP_ENTER_QUICK_LOCKED, 0); 1489 PMAP_UNLOCK(pmap); 1490 rw_wunlock(&pvh_global_lock); 1491 } 1492 1493 /* 1494 * Remove the given range of addresses from the specified map. 1495 * 1496 * It is assumed that the start and end are properly rounded to the page size. 1497 */ 1498 static void 1499 mmu_booke_remove(pmap_t pmap, vm_offset_t va, vm_offset_t endva) 1500 { 1501 pte_t *pte; 1502 uint8_t hold_flag; 1503 1504 int su = (pmap == kernel_pmap); 1505 1506 //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", 1507 // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); 1508 1509 if (su) { 1510 KASSERT(((va >= virtual_avail) && 1511 (va <= VM_MAX_KERNEL_ADDRESS)), 1512 ("mmu_booke_remove: kernel pmap, non kernel va")); 1513 } else { 1514 KASSERT((va <= VM_MAXUSER_ADDRESS), 1515 ("mmu_booke_remove: user pmap, non user va")); 1516 } 1517 1518 if (PMAP_REMOVE_DONE(pmap)) { 1519 //debugf("mmu_booke_remove: e (empty)\n"); 1520 return; 1521 } 1522 1523 hold_flag = PTBL_HOLD_FLAG(pmap); 1524 //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); 1525 1526 rw_wlock(&pvh_global_lock); 1527 PMAP_LOCK(pmap); 1528 for (; va < endva; va += PAGE_SIZE) { 1529 pte = pte_find_next(pmap, &va); 1530 if ((pte == NULL) || !PTE_ISVALID(pte)) 1531 break; 1532 if (va >= endva) 1533 break; 1534 pte_remove(pmap, va, hold_flag); 1535 } 1536 PMAP_UNLOCK(pmap); 1537 rw_wunlock(&pvh_global_lock); 1538 1539 //debugf("mmu_booke_remove: e\n"); 1540 } 1541 1542 /* 1543 * Remove physical page from all pmaps in which it resides. 1544 */ 1545 static void 1546 mmu_booke_remove_all(vm_page_t m) 1547 { 1548 pv_entry_t pv, pvn; 1549 uint8_t hold_flag; 1550 1551 rw_wlock(&pvh_global_lock); 1552 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_link, pvn) { 1553 PMAP_LOCK(pv->pv_pmap); 1554 hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); 1555 pte_remove(pv->pv_pmap, pv->pv_va, hold_flag); 1556 PMAP_UNLOCK(pv->pv_pmap); 1557 } 1558 vm_page_aflag_clear(m, PGA_WRITEABLE); 1559 rw_wunlock(&pvh_global_lock); 1560 } 1561 1562 /* 1563 * Map a range of physical addresses into kernel virtual address space. 1564 */ 1565 static vm_offset_t 1566 mmu_booke_map(vm_offset_t *virt, vm_paddr_t pa_start, 1567 vm_paddr_t pa_end, int prot) 1568 { 1569 vm_offset_t sva = *virt; 1570 vm_offset_t va = sva; 1571 1572 #ifdef __powerpc64__ 1573 /* XXX: Handle memory not starting at 0x0. */ 1574 if (pa_end < ctob(Maxmem)) 1575 return (PHYS_TO_DMAP(pa_start)); 1576 #endif 1577 1578 while (pa_start < pa_end) { 1579 mmu_booke_kenter(va, pa_start); 1580 va += PAGE_SIZE; 1581 pa_start += PAGE_SIZE; 1582 } 1583 *virt = va; 1584 1585 return (sva); 1586 } 1587 1588 /* 1589 * The pmap must be activated before it's address space can be accessed in any 1590 * way. 1591 */ 1592 static void 1593 mmu_booke_activate(struct thread *td) 1594 { 1595 pmap_t pmap; 1596 u_int cpuid; 1597 1598 pmap = &td->td_proc->p_vmspace->vm_pmap; 1599 1600 CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX")", 1601 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 1602 1603 KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); 1604 1605 sched_pin(); 1606 1607 cpuid = PCPU_GET(cpuid); 1608 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 1609 PCPU_SET(curpmap, pmap); 1610 1611 if (pmap->pm_tid[cpuid] == TID_NONE) 1612 tid_alloc(pmap); 1613 1614 /* Load PID0 register with pmap tid value. */ 1615 mtspr(SPR_PID0, pmap->pm_tid[cpuid]); 1616 __asm __volatile("isync"); 1617 1618 mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0); 1619 1620 sched_unpin(); 1621 1622 CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__, 1623 pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm); 1624 } 1625 1626 /* 1627 * Deactivate the specified process's address space. 1628 */ 1629 static void 1630 mmu_booke_deactivate(struct thread *td) 1631 { 1632 pmap_t pmap; 1633 1634 pmap = &td->td_proc->p_vmspace->vm_pmap; 1635 1636 CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX, 1637 __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); 1638 1639 td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0); 1640 1641 CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active); 1642 PCPU_SET(curpmap, NULL); 1643 } 1644 1645 /* 1646 * Copy the range specified by src_addr/len 1647 * from the source map to the range dst_addr/len 1648 * in the destination map. 1649 * 1650 * This routine is only advisory and need not do anything. 1651 */ 1652 static void 1653 mmu_booke_copy(pmap_t dst_pmap, pmap_t src_pmap, 1654 vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) 1655 { 1656 1657 } 1658 1659 /* 1660 * Set the physical protection on the specified range of this map as requested. 1661 */ 1662 static void 1663 mmu_booke_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, 1664 vm_prot_t prot) 1665 { 1666 vm_offset_t va; 1667 vm_page_t m; 1668 pte_t *pte; 1669 1670 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1671 mmu_booke_remove(pmap, sva, eva); 1672 return; 1673 } 1674 1675 if (prot & VM_PROT_WRITE) 1676 return; 1677 1678 PMAP_LOCK(pmap); 1679 for (va = sva; va < eva; va += PAGE_SIZE) { 1680 if ((pte = pte_find(pmap, va)) != NULL) { 1681 if (PTE_ISVALID(pte)) { 1682 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1683 1684 mtx_lock_spin(&tlbivax_mutex); 1685 tlb_miss_lock(); 1686 1687 /* Handle modified pages. */ 1688 if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) 1689 vm_page_dirty(m); 1690 1691 tlb0_flush_entry(va); 1692 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 1693 1694 tlb_miss_unlock(); 1695 mtx_unlock_spin(&tlbivax_mutex); 1696 } 1697 } 1698 } 1699 PMAP_UNLOCK(pmap); 1700 } 1701 1702 /* 1703 * Clear the write and modified bits in each of the given page's mappings. 1704 */ 1705 static void 1706 mmu_booke_remove_write(vm_page_t m) 1707 { 1708 pv_entry_t pv; 1709 pte_t *pte; 1710 1711 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1712 ("mmu_booke_remove_write: page %p is not managed", m)); 1713 vm_page_assert_busied(m); 1714 1715 if (!pmap_page_is_write_mapped(m)) 1716 return; 1717 rw_wlock(&pvh_global_lock); 1718 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 1719 PMAP_LOCK(pv->pv_pmap); 1720 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL) { 1721 if (PTE_ISVALID(pte)) { 1722 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1723 1724 mtx_lock_spin(&tlbivax_mutex); 1725 tlb_miss_lock(); 1726 1727 /* Handle modified pages. */ 1728 if (PTE_ISMODIFIED(pte)) 1729 vm_page_dirty(m); 1730 1731 /* Flush mapping from TLB0. */ 1732 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); 1733 1734 tlb_miss_unlock(); 1735 mtx_unlock_spin(&tlbivax_mutex); 1736 } 1737 } 1738 PMAP_UNLOCK(pv->pv_pmap); 1739 } 1740 vm_page_aflag_clear(m, PGA_WRITEABLE); 1741 rw_wunlock(&pvh_global_lock); 1742 } 1743 1744 /* 1745 * Atomically extract and hold the physical page with the given 1746 * pmap and virtual address pair if that mapping permits the given 1747 * protection. 1748 */ 1749 static vm_page_t 1750 mmu_booke_extract_and_hold(pmap_t pmap, vm_offset_t va, 1751 vm_prot_t prot) 1752 { 1753 pte_t *pte; 1754 vm_page_t m; 1755 uint32_t pte_wbit; 1756 1757 m = NULL; 1758 PMAP_LOCK(pmap); 1759 pte = pte_find(pmap, va); 1760 if ((pte != NULL) && PTE_ISVALID(pte)) { 1761 if (pmap == kernel_pmap) 1762 pte_wbit = PTE_SW; 1763 else 1764 pte_wbit = PTE_UW; 1765 1766 if ((*pte & pte_wbit) != 0 || (prot & VM_PROT_WRITE) == 0) { 1767 m = PHYS_TO_VM_PAGE(PTE_PA(pte)); 1768 if (!vm_page_wire_mapped(m)) 1769 m = NULL; 1770 } 1771 } 1772 PMAP_UNLOCK(pmap); 1773 return (m); 1774 } 1775 1776 /* 1777 * Initialize a vm_page's machine-dependent fields. 1778 */ 1779 static void 1780 mmu_booke_page_init(vm_page_t m) 1781 { 1782 1783 m->md.pv_tracked = 0; 1784 TAILQ_INIT(&m->md.pv_list); 1785 } 1786 1787 /* 1788 * Return whether or not the specified physical page was modified 1789 * in any of physical maps. 1790 */ 1791 static bool 1792 mmu_booke_is_modified(vm_page_t m) 1793 { 1794 pte_t *pte; 1795 pv_entry_t pv; 1796 bool rv; 1797 1798 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1799 ("mmu_booke_is_modified: page %p is not managed", m)); 1800 rv = false; 1801 1802 /* 1803 * If the page is not busied then this check is racy. 1804 */ 1805 if (!pmap_page_is_write_mapped(m)) 1806 return (false); 1807 1808 rw_wlock(&pvh_global_lock); 1809 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 1810 PMAP_LOCK(pv->pv_pmap); 1811 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && 1812 PTE_ISVALID(pte)) { 1813 if (PTE_ISMODIFIED(pte)) 1814 rv = true; 1815 } 1816 PMAP_UNLOCK(pv->pv_pmap); 1817 if (rv) 1818 break; 1819 } 1820 rw_wunlock(&pvh_global_lock); 1821 return (rv); 1822 } 1823 1824 /* 1825 * Return whether or not the specified virtual address is eligible 1826 * for prefault. 1827 */ 1828 static bool 1829 mmu_booke_is_prefaultable(pmap_t pmap, vm_offset_t addr) 1830 { 1831 1832 return (false); 1833 } 1834 1835 /* 1836 * Return whether or not the specified physical page was referenced 1837 * in any physical maps. 1838 */ 1839 static bool 1840 mmu_booke_is_referenced(vm_page_t m) 1841 { 1842 pte_t *pte; 1843 pv_entry_t pv; 1844 bool rv; 1845 1846 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1847 ("mmu_booke_is_referenced: page %p is not managed", m)); 1848 rv = false; 1849 rw_wlock(&pvh_global_lock); 1850 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 1851 PMAP_LOCK(pv->pv_pmap); 1852 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && 1853 PTE_ISVALID(pte)) { 1854 if (PTE_ISREFERENCED(pte)) 1855 rv = true; 1856 } 1857 PMAP_UNLOCK(pv->pv_pmap); 1858 if (rv) 1859 break; 1860 } 1861 rw_wunlock(&pvh_global_lock); 1862 return (rv); 1863 } 1864 1865 /* 1866 * Clear the modify bits on the specified physical page. 1867 */ 1868 static void 1869 mmu_booke_clear_modify(vm_page_t m) 1870 { 1871 pte_t *pte; 1872 pv_entry_t pv; 1873 1874 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1875 ("mmu_booke_clear_modify: page %p is not managed", m)); 1876 vm_page_assert_busied(m); 1877 1878 if (!pmap_page_is_write_mapped(m)) 1879 return; 1880 1881 rw_wlock(&pvh_global_lock); 1882 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 1883 PMAP_LOCK(pv->pv_pmap); 1884 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && 1885 PTE_ISVALID(pte)) { 1886 mtx_lock_spin(&tlbivax_mutex); 1887 tlb_miss_lock(); 1888 1889 if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) { 1890 tlb0_flush_entry(pv->pv_va); 1891 *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | 1892 PTE_REFERENCED); 1893 } 1894 1895 tlb_miss_unlock(); 1896 mtx_unlock_spin(&tlbivax_mutex); 1897 } 1898 PMAP_UNLOCK(pv->pv_pmap); 1899 } 1900 rw_wunlock(&pvh_global_lock); 1901 } 1902 1903 /* 1904 * Return a count of reference bits for a page, clearing those bits. 1905 * It is not necessary for every reference bit to be cleared, but it 1906 * is necessary that 0 only be returned when there are truly no 1907 * reference bits set. 1908 * 1909 * As an optimization, update the page's dirty field if a modified bit is 1910 * found while counting reference bits. This opportunistic update can be 1911 * performed at low cost and can eliminate the need for some future calls 1912 * to pmap_is_modified(). However, since this function stops after 1913 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 1914 * dirty pages. Those dirty pages will only be detected by a future call 1915 * to pmap_is_modified(). 1916 */ 1917 static int 1918 mmu_booke_ts_referenced(vm_page_t m) 1919 { 1920 pte_t *pte; 1921 pv_entry_t pv; 1922 int count; 1923 1924 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1925 ("mmu_booke_ts_referenced: page %p is not managed", m)); 1926 count = 0; 1927 rw_wlock(&pvh_global_lock); 1928 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 1929 PMAP_LOCK(pv->pv_pmap); 1930 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && 1931 PTE_ISVALID(pte)) { 1932 if (PTE_ISMODIFIED(pte)) 1933 vm_page_dirty(m); 1934 if (PTE_ISREFERENCED(pte)) { 1935 mtx_lock_spin(&tlbivax_mutex); 1936 tlb_miss_lock(); 1937 1938 tlb0_flush_entry(pv->pv_va); 1939 *pte &= ~PTE_REFERENCED; 1940 1941 tlb_miss_unlock(); 1942 mtx_unlock_spin(&tlbivax_mutex); 1943 1944 if (++count >= PMAP_TS_REFERENCED_MAX) { 1945 PMAP_UNLOCK(pv->pv_pmap); 1946 break; 1947 } 1948 } 1949 } 1950 PMAP_UNLOCK(pv->pv_pmap); 1951 } 1952 rw_wunlock(&pvh_global_lock); 1953 return (count); 1954 } 1955 1956 /* 1957 * Clear the wired attribute from the mappings for the specified range of 1958 * addresses in the given pmap. Every valid mapping within that range must 1959 * have the wired attribute set. In contrast, invalid mappings cannot have 1960 * the wired attribute set, so they are ignored. 1961 * 1962 * The wired attribute of the page table entry is not a hardware feature, so 1963 * there is no need to invalidate any TLB entries. 1964 */ 1965 static void 1966 mmu_booke_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1967 { 1968 vm_offset_t va; 1969 pte_t *pte; 1970 1971 PMAP_LOCK(pmap); 1972 for (va = sva; va < eva; va += PAGE_SIZE) { 1973 if ((pte = pte_find(pmap, va)) != NULL && 1974 PTE_ISVALID(pte)) { 1975 if (!PTE_ISWIRED(pte)) 1976 panic("mmu_booke_unwire: pte %p isn't wired", 1977 pte); 1978 *pte &= ~PTE_WIRED; 1979 pmap->pm_stats.wired_count--; 1980 } 1981 } 1982 PMAP_UNLOCK(pmap); 1983 1984 } 1985 1986 /* 1987 * Return true if the pmap's pv is one of the first 16 pvs linked to from this 1988 * page. This count may be changed upwards or downwards in the future; it is 1989 * only necessary that true be returned for a small subset of pmaps for proper 1990 * page aging. 1991 */ 1992 static bool 1993 mmu_booke_page_exists_quick(pmap_t pmap, vm_page_t m) 1994 { 1995 pv_entry_t pv; 1996 int loops; 1997 bool rv; 1998 1999 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2000 ("mmu_booke_page_exists_quick: page %p is not managed", m)); 2001 loops = 0; 2002 rv = false; 2003 rw_wlock(&pvh_global_lock); 2004 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2005 if (pv->pv_pmap == pmap) { 2006 rv = true; 2007 break; 2008 } 2009 if (++loops >= 16) 2010 break; 2011 } 2012 rw_wunlock(&pvh_global_lock); 2013 return (rv); 2014 } 2015 2016 /* 2017 * Return the number of managed mappings to the given physical page that are 2018 * wired. 2019 */ 2020 static int 2021 mmu_booke_page_wired_mappings(vm_page_t m) 2022 { 2023 pv_entry_t pv; 2024 pte_t *pte; 2025 int count = 0; 2026 2027 if ((m->oflags & VPO_UNMANAGED) != 0) 2028 return (count); 2029 rw_wlock(&pvh_global_lock); 2030 TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { 2031 PMAP_LOCK(pv->pv_pmap); 2032 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL) 2033 if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) 2034 count++; 2035 PMAP_UNLOCK(pv->pv_pmap); 2036 } 2037 rw_wunlock(&pvh_global_lock); 2038 return (count); 2039 } 2040 2041 static int 2042 mmu_booke_dev_direct_mapped(vm_paddr_t pa, vm_size_t size) 2043 { 2044 int i; 2045 vm_offset_t va; 2046 2047 /* 2048 * This currently does not work for entries that 2049 * overlap TLB1 entries. 2050 */ 2051 for (i = 0; i < TLB1_ENTRIES; i ++) { 2052 if (tlb1_iomapped(i, pa, size, &va) == 0) 2053 return (0); 2054 } 2055 2056 return (EFAULT); 2057 } 2058 2059 void 2060 mmu_booke_dumpsys_map(vm_paddr_t pa, size_t sz, void **va) 2061 { 2062 vm_paddr_t ppa; 2063 vm_offset_t ofs; 2064 vm_size_t gran; 2065 2066 /* Minidumps are based on virtual memory addresses. */ 2067 if (do_minidump) { 2068 *va = (void *)(vm_offset_t)pa; 2069 return; 2070 } 2071 2072 /* Raw physical memory dumps don't have a virtual address. */ 2073 /* We always map a 256MB page at 256M. */ 2074 gran = 256 * 1024 * 1024; 2075 ppa = rounddown2(pa, gran); 2076 ofs = pa - ppa; 2077 *va = (void *)gran; 2078 tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO); 2079 2080 if (sz > (gran - ofs)) 2081 tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran, 2082 _TLB_ENTRY_IO); 2083 } 2084 2085 void 2086 mmu_booke_dumpsys_unmap(vm_paddr_t pa, size_t sz, void *va) 2087 { 2088 vm_paddr_t ppa; 2089 vm_offset_t ofs; 2090 vm_size_t gran; 2091 tlb_entry_t e; 2092 int i; 2093 2094 /* Minidumps are based on virtual memory addresses. */ 2095 /* Nothing to do... */ 2096 if (do_minidump) 2097 return; 2098 2099 for (i = 0; i < TLB1_ENTRIES; i++) { 2100 tlb1_read_entry(&e, i); 2101 if (!(e.mas1 & MAS1_VALID)) 2102 break; 2103 } 2104 2105 /* Raw physical memory dumps don't have a virtual address. */ 2106 i--; 2107 e.mas1 = 0; 2108 e.mas2 = 0; 2109 e.mas3 = 0; 2110 tlb1_write_entry(&e, i); 2111 2112 gran = 256 * 1024 * 1024; 2113 ppa = rounddown2(pa, gran); 2114 ofs = pa - ppa; 2115 if (sz > (gran - ofs)) { 2116 i--; 2117 e.mas1 = 0; 2118 e.mas2 = 0; 2119 e.mas3 = 0; 2120 tlb1_write_entry(&e, i); 2121 } 2122 } 2123 2124 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; 2125 2126 void 2127 mmu_booke_scan_init(void) 2128 { 2129 vm_offset_t va; 2130 pte_t *pte; 2131 int i; 2132 2133 if (!do_minidump) { 2134 /* Initialize phys. segments for dumpsys(). */ 2135 memset(&dump_map, 0, sizeof(dump_map)); 2136 mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, 2137 &availmem_regions_sz); 2138 for (i = 0; i < physmem_regions_sz; i++) { 2139 dump_map[i].pa_start = physmem_regions[i].mr_start; 2140 dump_map[i].pa_size = physmem_regions[i].mr_size; 2141 } 2142 return; 2143 } 2144 2145 /* Virtual segments for minidumps: */ 2146 memset(&dump_map, 0, sizeof(dump_map)); 2147 2148 /* 1st: kernel .data and .bss. */ 2149 dump_map[0].pa_start = trunc_page((uintptr_t)_etext); 2150 dump_map[0].pa_size = 2151 round_page((uintptr_t)_end) - dump_map[0].pa_start; 2152 2153 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ 2154 dump_map[1].pa_start = data_start; 2155 dump_map[1].pa_size = data_end - data_start; 2156 2157 /* 3rd: kernel VM. */ 2158 va = dump_map[1].pa_start + dump_map[1].pa_size; 2159 /* Find start of next chunk (from va). */ 2160 while (va < virtual_end) { 2161 /* Don't dump the buffer cache. */ 2162 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { 2163 va = kmi.buffer_eva; 2164 continue; 2165 } 2166 pte = pte_find(kernel_pmap, va); 2167 if (pte != NULL && PTE_ISVALID(pte)) 2168 break; 2169 va += PAGE_SIZE; 2170 } 2171 if (va < virtual_end) { 2172 dump_map[2].pa_start = va; 2173 va += PAGE_SIZE; 2174 /* Find last page in chunk. */ 2175 while (va < virtual_end) { 2176 /* Don't run into the buffer cache. */ 2177 if (va == kmi.buffer_sva) 2178 break; 2179 pte = pte_find(kernel_pmap, va); 2180 if (pte == NULL || !PTE_ISVALID(pte)) 2181 break; 2182 va += PAGE_SIZE; 2183 } 2184 dump_map[2].pa_size = va - dump_map[2].pa_start; 2185 } 2186 } 2187 2188 /* 2189 * Map a set of physical memory pages into the kernel virtual address space. 2190 * Return a pointer to where it is mapped. This routine is intended to be used 2191 * for mapping device memory, NOT real memory. 2192 */ 2193 static void * 2194 mmu_booke_mapdev(vm_paddr_t pa, vm_size_t size) 2195 { 2196 2197 return (mmu_booke_mapdev_attr(pa, size, VM_MEMATTR_DEFAULT)); 2198 } 2199 2200 static int 2201 tlb1_find_pa(vm_paddr_t pa, tlb_entry_t *e) 2202 { 2203 int i; 2204 2205 for (i = 0; i < TLB1_ENTRIES; i++) { 2206 tlb1_read_entry(e, i); 2207 if ((e->mas1 & MAS1_VALID) == 0) 2208 continue; 2209 if (e->phys == pa) 2210 return (i); 2211 } 2212 return (-1); 2213 } 2214 2215 static void * 2216 mmu_booke_mapdev_attr(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) 2217 { 2218 tlb_entry_t e; 2219 vm_paddr_t tmppa; 2220 #ifndef __powerpc64__ 2221 uintptr_t tmpva; 2222 #endif 2223 uintptr_t va, retva; 2224 vm_size_t sz; 2225 int i; 2226 int wimge; 2227 2228 /* 2229 * Check if this is premapped in TLB1. 2230 */ 2231 sz = size; 2232 tmppa = pa; 2233 va = ~0; 2234 wimge = tlb_calc_wimg(pa, ma); 2235 for (i = 0; i < TLB1_ENTRIES; i++) { 2236 tlb1_read_entry(&e, i); 2237 if (!(e.mas1 & MAS1_VALID)) 2238 continue; 2239 if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED))) 2240 continue; 2241 if (tmppa >= e.phys && tmppa < e.phys + e.size) { 2242 va = e.virt + (pa - e.phys); 2243 tmppa = e.phys + e.size; 2244 sz -= MIN(sz, e.size - (pa - e.phys)); 2245 while (sz > 0 && (i = tlb1_find_pa(tmppa, &e)) != -1) { 2246 if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED))) 2247 break; 2248 sz -= MIN(sz, e.size); 2249 tmppa = e.phys + e.size; 2250 } 2251 if (sz != 0) 2252 break; 2253 return ((void *)va); 2254 } 2255 } 2256 2257 size = roundup(size, PAGE_SIZE); 2258 2259 #ifdef __powerpc64__ 2260 KASSERT(pa < VM_MAPDEV_PA_MAX, 2261 ("Unsupported physical address! %lx", pa)); 2262 va = VM_MAPDEV_BASE + pa; 2263 retva = va; 2264 #ifdef POW2_MAPPINGS 2265 /* 2266 * Align the mapping to a power of 2 size, taking into account that we 2267 * may need to increase the size multiple times to satisfy the size and 2268 * alignment requirements. 2269 * 2270 * This works in the general case because it's very rare (near never?) 2271 * to have different access properties (WIMG) within a single 2272 * power-of-two region. If a design does call for that, POW2_MAPPINGS 2273 * can be undefined, and exact mappings will be used instead. 2274 */ 2275 sz = size; 2276 size = roundup2(size, 1 << ilog2(size)); 2277 while (rounddown2(va, size) + size < va + sz) 2278 size <<= 1; 2279 va = rounddown2(va, size); 2280 pa = rounddown2(pa, size); 2281 #endif 2282 #else 2283 /* 2284 * The device mapping area is between VM_MAXUSER_ADDRESS and 2285 * VM_MIN_KERNEL_ADDRESS. This gives 1GB of device addressing. 2286 */ 2287 #ifdef SPARSE_MAPDEV 2288 /* 2289 * With a sparse mapdev, align to the largest starting region. This 2290 * could feasibly be optimized for a 'best-fit' alignment, but that 2291 * calculation could be very costly. 2292 * Align to the smaller of: 2293 * - first set bit in overlap of (pa & size mask) 2294 * - largest size envelope 2295 * 2296 * It's possible the device mapping may start at a PA that's not larger 2297 * than the size mask, so we need to offset in to maximize the TLB entry 2298 * range and minimize the number of used TLB entries. 2299 */ 2300 do { 2301 tmpva = tlb1_map_base; 2302 sz = ffsl((~((1 << flsl(size-1)) - 1)) & pa); 2303 sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1; 2304 va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa); 2305 } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size)); 2306 #endif 2307 va = atomic_fetchadd_int(&tlb1_map_base, size); 2308 retva = va; 2309 #endif 2310 2311 if (tlb1_mapin_region(va, pa, size, tlb_calc_wimg(pa, ma)) != size) 2312 return (NULL); 2313 2314 return ((void *)retva); 2315 } 2316 2317 /* 2318 * 'Unmap' a range mapped by mmu_booke_mapdev(). 2319 */ 2320 static void 2321 mmu_booke_unmapdev(void *p, vm_size_t size) 2322 { 2323 #ifdef SUPPORTS_SHRINKING_TLB1 2324 vm_offset_t base, offset, va; 2325 2326 /* 2327 * Unmap only if this is inside kernel virtual space. 2328 */ 2329 va = (vm_offset_t)p; 2330 if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { 2331 base = trunc_page(va); 2332 offset = va & PAGE_MASK; 2333 size = roundup(offset + size, PAGE_SIZE); 2334 mmu_booke_qremove(base, atop(size)); 2335 kva_free(base, size); 2336 } 2337 #endif 2338 } 2339 2340 /* 2341 * mmu_booke_object_init_pt preloads the ptes for a given object into the 2342 * specified pmap. This eliminates the blast of soft faults on process startup 2343 * and immediately after an mmap. 2344 */ 2345 static void 2346 mmu_booke_object_init_pt(pmap_t pmap, vm_offset_t addr, 2347 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 2348 { 2349 2350 VM_OBJECT_ASSERT_WLOCKED(object); 2351 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2352 ("mmu_booke_object_init_pt: non-device object")); 2353 } 2354 2355 /* 2356 * Perform the pmap work for mincore. 2357 */ 2358 static int 2359 mmu_booke_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap) 2360 { 2361 2362 /* XXX: this should be implemented at some point */ 2363 return (0); 2364 } 2365 2366 static int 2367 mmu_booke_change_attr(vm_offset_t addr, vm_size_t sz, vm_memattr_t mode) 2368 { 2369 vm_offset_t va; 2370 pte_t *pte; 2371 int i, j; 2372 tlb_entry_t e; 2373 2374 addr = trunc_page(addr); 2375 2376 /* Only allow changes to mapped kernel addresses. This includes: 2377 * - KVA 2378 * - DMAP (powerpc64) 2379 * - Device mappings 2380 */ 2381 if (addr <= VM_MAXUSER_ADDRESS || 2382 #ifdef __powerpc64__ 2383 (addr >= tlb1_map_base && addr < DMAP_BASE_ADDRESS) || 2384 (addr > DMAP_MAX_ADDRESS && addr < VM_MIN_KERNEL_ADDRESS) || 2385 #else 2386 (addr >= tlb1_map_base && addr < VM_MIN_KERNEL_ADDRESS) || 2387 #endif 2388 (addr > VM_MAX_KERNEL_ADDRESS)) 2389 return (EINVAL); 2390 2391 /* Check TLB1 mappings */ 2392 for (i = 0; i < TLB1_ENTRIES; i++) { 2393 tlb1_read_entry(&e, i); 2394 if (!(e.mas1 & MAS1_VALID)) 2395 continue; 2396 if (addr >= e.virt && addr < e.virt + e.size) 2397 break; 2398 } 2399 if (i < TLB1_ENTRIES) { 2400 /* Only allow full mappings to be modified for now. */ 2401 /* Validate the range. */ 2402 for (j = i, va = addr; va < addr + sz; va += e.size, j++) { 2403 tlb1_read_entry(&e, j); 2404 if (va != e.virt || (sz - (va - addr) < e.size)) 2405 return (EINVAL); 2406 } 2407 for (va = addr; va < addr + sz; va += e.size, i++) { 2408 tlb1_read_entry(&e, i); 2409 e.mas2 &= ~MAS2_WIMGE_MASK; 2410 e.mas2 |= tlb_calc_wimg(e.phys, mode); 2411 2412 /* 2413 * Write it out to the TLB. Should really re-sync with other 2414 * cores. 2415 */ 2416 tlb1_write_entry(&e, i); 2417 } 2418 return (0); 2419 } 2420 2421 /* Not in TLB1, try through pmap */ 2422 /* First validate the range. */ 2423 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 2424 pte = pte_find(kernel_pmap, va); 2425 if (pte == NULL || !PTE_ISVALID(pte)) 2426 return (EINVAL); 2427 } 2428 2429 mtx_lock_spin(&tlbivax_mutex); 2430 tlb_miss_lock(); 2431 for (va = addr; va < addr + sz; va += PAGE_SIZE) { 2432 pte = pte_find(kernel_pmap, va); 2433 *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT); 2434 *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT; 2435 tlb0_flush_entry(va); 2436 } 2437 tlb_miss_unlock(); 2438 mtx_unlock_spin(&tlbivax_mutex); 2439 2440 return (0); 2441 } 2442 2443 static void 2444 mmu_booke_page_array_startup(long pages) 2445 { 2446 vm_page_array_size = pages; 2447 } 2448 2449 /**************************************************************************/ 2450 /* TID handling */ 2451 /**************************************************************************/ 2452 2453 /* 2454 * Allocate a TID. If necessary, steal one from someone else. 2455 * The new TID is flushed from the TLB before returning. 2456 */ 2457 static tlbtid_t 2458 tid_alloc(pmap_t pmap) 2459 { 2460 tlbtid_t tid; 2461 int thiscpu; 2462 2463 KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); 2464 2465 CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap); 2466 2467 thiscpu = PCPU_GET(cpuid); 2468 2469 tid = PCPU_GET(booke.tid_next); 2470 if (tid > TID_MAX) 2471 tid = TID_MIN; 2472 PCPU_SET(booke.tid_next, tid + 1); 2473 2474 /* If we are stealing TID then clear the relevant pmap's field */ 2475 if (tidbusy[thiscpu][tid] != NULL) { 2476 CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid); 2477 2478 tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE; 2479 2480 /* Flush all entries from TLB0 matching this TID. */ 2481 tid_flush(tid); 2482 } 2483 2484 tidbusy[thiscpu][tid] = pmap; 2485 pmap->pm_tid[thiscpu] = tid; 2486 __asm __volatile("msync; isync"); 2487 2488 CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid, 2489 PCPU_GET(booke.tid_next)); 2490 2491 return (tid); 2492 } 2493 2494 /**************************************************************************/ 2495 /* TLB0 handling */ 2496 /**************************************************************************/ 2497 2498 /* Convert TLB0 va and way number to tlb0[] table index. */ 2499 static inline unsigned int 2500 tlb0_tableidx(vm_offset_t va, unsigned int way) 2501 { 2502 unsigned int idx; 2503 2504 idx = (way * TLB0_ENTRIES_PER_WAY); 2505 idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; 2506 return (idx); 2507 } 2508 2509 /* 2510 * Invalidate TLB0 entry. 2511 */ 2512 static inline void 2513 tlb0_flush_entry(vm_offset_t va) 2514 { 2515 2516 CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va); 2517 2518 mtx_assert(&tlbivax_mutex, MA_OWNED); 2519 2520 __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK)); 2521 __asm __volatile("isync; msync"); 2522 __asm __volatile("tlbsync; msync"); 2523 2524 CTR1(KTR_PMAP, "%s: e", __func__); 2525 } 2526 2527 /**************************************************************************/ 2528 /* TLB1 handling */ 2529 /**************************************************************************/ 2530 2531 /* 2532 * TLB1 mapping notes: 2533 * 2534 * TLB1[0] Kernel text and data. 2535 * TLB1[1-15] Additional kernel text and data mappings (if required), PCI 2536 * windows, other devices mappings. 2537 */ 2538 2539 /* 2540 * Read an entry from given TLB1 slot. 2541 */ 2542 void 2543 tlb1_read_entry(tlb_entry_t *entry, unsigned int slot) 2544 { 2545 register_t msr; 2546 uint32_t mas0; 2547 2548 KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__)); 2549 2550 msr = mfmsr(); 2551 __asm __volatile("wrteei 0"); 2552 2553 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot); 2554 mtspr(SPR_MAS0, mas0); 2555 __asm __volatile("isync; tlbre"); 2556 2557 entry->mas1 = mfspr(SPR_MAS1); 2558 entry->mas2 = mfspr(SPR_MAS2); 2559 entry->mas3 = mfspr(SPR_MAS3); 2560 2561 switch ((mfpvr() >> 16) & 0xFFFF) { 2562 case FSL_E500v2: 2563 case FSL_E500mc: 2564 case FSL_E5500: 2565 case FSL_E6500: 2566 entry->mas7 = mfspr(SPR_MAS7); 2567 break; 2568 default: 2569 entry->mas7 = 0; 2570 break; 2571 } 2572 __asm __volatile("wrtee %0" :: "r"(msr)); 2573 2574 entry->virt = entry->mas2 & MAS2_EPN_MASK; 2575 entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) | 2576 (entry->mas3 & MAS3_RPN); 2577 entry->size = 2578 tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT); 2579 } 2580 2581 struct tlbwrite_args { 2582 tlb_entry_t *e; 2583 unsigned int idx; 2584 }; 2585 2586 static uint32_t 2587 tlb1_find_free(void) 2588 { 2589 tlb_entry_t e; 2590 int i; 2591 2592 for (i = 0; i < TLB1_ENTRIES; i++) { 2593 tlb1_read_entry(&e, i); 2594 if ((e.mas1 & MAS1_VALID) == 0) 2595 return (i); 2596 } 2597 return (-1); 2598 } 2599 2600 static void 2601 tlb1_purge_va_range(vm_offset_t va, vm_size_t size) 2602 { 2603 tlb_entry_t e; 2604 int i; 2605 2606 for (i = 0; i < TLB1_ENTRIES; i++) { 2607 tlb1_read_entry(&e, i); 2608 if ((e.mas1 & MAS1_VALID) == 0) 2609 continue; 2610 if ((e.mas2 & MAS2_EPN_MASK) >= va && 2611 (e.mas2 & MAS2_EPN_MASK) < va + size) { 2612 mtspr(SPR_MAS1, e.mas1 & ~MAS1_VALID); 2613 __asm __volatile("isync; tlbwe; isync; msync"); 2614 } 2615 } 2616 } 2617 2618 static void 2619 tlb1_write_entry_int(void *arg) 2620 { 2621 struct tlbwrite_args *args = arg; 2622 uint32_t idx, mas0; 2623 2624 idx = args->idx; 2625 if (idx == -1) { 2626 tlb1_purge_va_range(args->e->virt, args->e->size); 2627 idx = tlb1_find_free(); 2628 if (idx == -1) 2629 panic("No free TLB1 entries!\n"); 2630 } 2631 /* Select entry */ 2632 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx); 2633 2634 mtspr(SPR_MAS0, mas0); 2635 mtspr(SPR_MAS1, args->e->mas1); 2636 mtspr(SPR_MAS2, args->e->mas2); 2637 mtspr(SPR_MAS3, args->e->mas3); 2638 switch ((mfpvr() >> 16) & 0xFFFF) { 2639 case FSL_E500mc: 2640 case FSL_E5500: 2641 case FSL_E6500: 2642 mtspr(SPR_MAS8, 0); 2643 /* FALLTHROUGH */ 2644 case FSL_E500v2: 2645 mtspr(SPR_MAS7, args->e->mas7); 2646 break; 2647 default: 2648 break; 2649 } 2650 2651 __asm __volatile("isync; tlbwe; isync; msync"); 2652 2653 } 2654 2655 static void 2656 tlb1_write_entry_sync(void *arg) 2657 { 2658 /* Empty synchronization point for smp_rendezvous(). */ 2659 } 2660 2661 /* 2662 * Write given entry to TLB1 hardware. 2663 */ 2664 static void 2665 tlb1_write_entry(tlb_entry_t *e, unsigned int idx) 2666 { 2667 struct tlbwrite_args args; 2668 2669 args.e = e; 2670 args.idx = idx; 2671 2672 #ifdef SMP 2673 if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) { 2674 mb(); 2675 smp_rendezvous(tlb1_write_entry_sync, 2676 tlb1_write_entry_int, 2677 tlb1_write_entry_sync, &args); 2678 } else 2679 #endif 2680 { 2681 register_t msr; 2682 2683 msr = mfmsr(); 2684 __asm __volatile("wrteei 0"); 2685 tlb1_write_entry_int(&args); 2686 __asm __volatile("wrtee %0" :: "r"(msr)); 2687 } 2688 } 2689 2690 /* 2691 * Convert TLB TSIZE value to mapped region size. 2692 */ 2693 static vm_size_t 2694 tsize2size(unsigned int tsize) 2695 { 2696 2697 /* 2698 * size = 4^tsize KB 2699 * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) 2700 */ 2701 2702 return ((1 << (2 * tsize)) * 1024); 2703 } 2704 2705 /* 2706 * Convert region size (must be power of 4) to TLB TSIZE value. 2707 */ 2708 static unsigned int 2709 size2tsize(vm_size_t size) 2710 { 2711 2712 return (ilog2(size) / 2 - 5); 2713 } 2714 2715 /* 2716 * Register permanent kernel mapping in TLB1. 2717 * 2718 * Entries are created starting from index 0 (current free entry is 2719 * kept in tlb1_idx) and are not supposed to be invalidated. 2720 */ 2721 int 2722 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, 2723 uint32_t flags) 2724 { 2725 tlb_entry_t e; 2726 uint32_t ts, tid; 2727 int tsize, index; 2728 2729 /* First try to update an existing entry. */ 2730 for (index = 0; index < TLB1_ENTRIES; index++) { 2731 tlb1_read_entry(&e, index); 2732 /* Check if we're just updating the flags, and update them. */ 2733 if (e.phys == pa && e.virt == va && e.size == size) { 2734 e.mas2 = (va & MAS2_EPN_MASK) | flags; 2735 tlb1_write_entry(&e, index); 2736 return (0); 2737 } 2738 } 2739 2740 /* Convert size to TSIZE */ 2741 tsize = size2tsize(size); 2742 2743 tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK; 2744 /* XXX TS is hard coded to 0 for now as we only use single address space */ 2745 ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK; 2746 2747 e.phys = pa; 2748 e.virt = va; 2749 e.size = size; 2750 e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; 2751 e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); 2752 e.mas2 = (va & MAS2_EPN_MASK) | flags; 2753 2754 /* Set supervisor RWX permission bits */ 2755 e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; 2756 e.mas7 = (pa >> 32) & MAS7_RPN; 2757 2758 tlb1_write_entry(&e, -1); 2759 2760 return (0); 2761 } 2762 2763 /* 2764 * Map in contiguous RAM region into the TLB1. 2765 */ 2766 static vm_size_t 2767 tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size, int wimge) 2768 { 2769 vm_offset_t base; 2770 vm_size_t mapped, sz, ssize; 2771 2772 mapped = 0; 2773 base = va; 2774 ssize = size; 2775 2776 while (size > 0) { 2777 sz = 1UL << (ilog2(size) & ~1); 2778 /* Align size to PA */ 2779 if (pa % sz != 0) { 2780 do { 2781 sz >>= 2; 2782 } while (pa % sz != 0); 2783 } 2784 /* Now align from there to VA */ 2785 if (va % sz != 0) { 2786 do { 2787 sz >>= 2; 2788 } while (va % sz != 0); 2789 } 2790 #ifdef __powerpc64__ 2791 /* 2792 * Clamp TLB1 entries to 4G. 2793 * 2794 * While the e6500 supports up to 1TB mappings, the e5500 2795 * only supports up to 4G mappings. (0b1011) 2796 * 2797 * If any e6500 machines capable of supporting a very 2798 * large amount of memory appear in the future, we can 2799 * revisit this. 2800 * 2801 * For now, though, since we have plenty of space in TLB1, 2802 * always avoid creating entries larger than 4GB. 2803 */ 2804 sz = MIN(sz, 1UL << 32); 2805 #endif 2806 if (bootverbose) 2807 printf("Wiring VA=%p to PA=%jx (size=%lx)\n", 2808 (void *)va, (uintmax_t)pa, (long)sz); 2809 if (tlb1_set_entry(va, pa, sz, 2810 _TLB_ENTRY_SHARED | wimge) < 0) 2811 return (mapped); 2812 size -= sz; 2813 pa += sz; 2814 va += sz; 2815 } 2816 2817 mapped = (va - base); 2818 if (bootverbose) 2819 printf("mapped size 0x%"PRIxPTR" (wasted space 0x%"PRIxPTR")\n", 2820 mapped, mapped - ssize); 2821 2822 return (mapped); 2823 } 2824 2825 /* 2826 * TLB1 initialization routine, to be called after the very first 2827 * assembler level setup done in locore.S. 2828 */ 2829 void 2830 tlb1_init(void) 2831 { 2832 vm_offset_t mas2; 2833 uint32_t mas0, mas1, mas3, mas7; 2834 uint32_t tsz; 2835 2836 tlb1_get_tlbconf(); 2837 2838 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0); 2839 mtspr(SPR_MAS0, mas0); 2840 __asm __volatile("isync; tlbre"); 2841 2842 mas1 = mfspr(SPR_MAS1); 2843 mas2 = mfspr(SPR_MAS2); 2844 mas3 = mfspr(SPR_MAS3); 2845 mas7 = mfspr(SPR_MAS7); 2846 2847 kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | 2848 (mas3 & MAS3_RPN); 2849 2850 tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 2851 kernsize += (tsz > 0) ? tsize2size(tsz) : 0; 2852 kernstart = trunc_page(mas2); 2853 2854 /* Setup TLB miss defaults */ 2855 set_mas4_defaults(); 2856 } 2857 2858 /* 2859 * pmap_early_io_unmap() should be used in short conjunction with 2860 * pmap_early_io_map(), as in the following snippet: 2861 * 2862 * x = pmap_early_io_map(...); 2863 * <do something with x> 2864 * pmap_early_io_unmap(x, size); 2865 * 2866 * And avoiding more allocations between. 2867 */ 2868 void 2869 pmap_early_io_unmap(vm_offset_t va, vm_size_t size) 2870 { 2871 int i; 2872 tlb_entry_t e; 2873 vm_size_t isize; 2874 2875 size = roundup(size, PAGE_SIZE); 2876 isize = size; 2877 for (i = 0; i < TLB1_ENTRIES && size > 0; i++) { 2878 tlb1_read_entry(&e, i); 2879 if (!(e.mas1 & MAS1_VALID)) 2880 continue; 2881 if (va <= e.virt && (va + isize) >= (e.virt + e.size)) { 2882 size -= e.size; 2883 e.mas1 &= ~MAS1_VALID; 2884 tlb1_write_entry(&e, i); 2885 } 2886 } 2887 if (tlb1_map_base == va + isize) 2888 tlb1_map_base -= isize; 2889 } 2890 2891 vm_offset_t 2892 pmap_early_io_map(vm_paddr_t pa, vm_size_t size) 2893 { 2894 vm_paddr_t pa_base; 2895 vm_offset_t va, sz; 2896 int i; 2897 tlb_entry_t e; 2898 2899 KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!")); 2900 2901 for (i = 0; i < TLB1_ENTRIES; i++) { 2902 tlb1_read_entry(&e, i); 2903 if (!(e.mas1 & MAS1_VALID)) 2904 continue; 2905 if (pa >= e.phys && (pa + size) <= 2906 (e.phys + e.size)) 2907 return (e.virt + (pa - e.phys)); 2908 } 2909 2910 pa_base = rounddown(pa, PAGE_SIZE); 2911 size = roundup(size + (pa - pa_base), PAGE_SIZE); 2912 tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1)); 2913 va = tlb1_map_base + (pa - pa_base); 2914 2915 do { 2916 sz = 1 << (ilog2(size) & ~1); 2917 tlb1_set_entry(tlb1_map_base, pa_base, sz, 2918 _TLB_ENTRY_SHARED | _TLB_ENTRY_IO); 2919 size -= sz; 2920 pa_base += sz; 2921 tlb1_map_base += sz; 2922 } while (size > 0); 2923 2924 return (va); 2925 } 2926 2927 void 2928 pmap_track_page(pmap_t pmap, vm_offset_t va) 2929 { 2930 vm_paddr_t pa; 2931 vm_page_t page; 2932 struct pv_entry *pve; 2933 2934 va = trunc_page(va); 2935 pa = pmap_kextract(va); 2936 page = PHYS_TO_VM_PAGE(pa); 2937 2938 rw_wlock(&pvh_global_lock); 2939 PMAP_LOCK(pmap); 2940 2941 TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) { 2942 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { 2943 goto out; 2944 } 2945 } 2946 page->md.pv_tracked = true; 2947 pv_insert(pmap, va, page); 2948 out: 2949 PMAP_UNLOCK(pmap); 2950 rw_wunlock(&pvh_global_lock); 2951 } 2952 2953 /* 2954 * Setup MAS4 defaults. 2955 * These values are loaded to MAS0-2 on a TLB miss. 2956 */ 2957 static void 2958 set_mas4_defaults(void) 2959 { 2960 uint32_t mas4; 2961 2962 /* Defaults: TLB0, PID0, TSIZED=4K */ 2963 mas4 = MAS4_TLBSELD0; 2964 mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; 2965 #ifdef SMP 2966 mas4 |= MAS4_MD; 2967 #endif 2968 mtspr(SPR_MAS4, mas4); 2969 __asm __volatile("isync"); 2970 } 2971 2972 /* 2973 * Return 0 if the physical IO range is encompassed by one of the 2974 * the TLB1 entries, otherwise return related error code. 2975 */ 2976 static int 2977 tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) 2978 { 2979 uint32_t prot; 2980 vm_paddr_t pa_start; 2981 vm_paddr_t pa_end; 2982 unsigned int entry_tsize; 2983 vm_size_t entry_size; 2984 tlb_entry_t e; 2985 2986 *va = (vm_offset_t)NULL; 2987 2988 tlb1_read_entry(&e, i); 2989 /* Skip invalid entries */ 2990 if (!(e.mas1 & MAS1_VALID)) 2991 return (EINVAL); 2992 2993 /* 2994 * The entry must be cache-inhibited, guarded, and r/w 2995 * so it can function as an i/o page 2996 */ 2997 prot = e.mas2 & (MAS2_I | MAS2_G); 2998 if (prot != (MAS2_I | MAS2_G)) 2999 return (EPERM); 3000 3001 prot = e.mas3 & (MAS3_SR | MAS3_SW); 3002 if (prot != (MAS3_SR | MAS3_SW)) 3003 return (EPERM); 3004 3005 /* The address should be within the entry range. */ 3006 entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 3007 KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); 3008 3009 entry_size = tsize2size(entry_tsize); 3010 pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | 3011 (e.mas3 & MAS3_RPN); 3012 pa_end = pa_start + entry_size; 3013 3014 if ((pa < pa_start) || ((pa + size) > pa_end)) 3015 return (ERANGE); 3016 3017 /* Return virtual address of this mapping. */ 3018 *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start); 3019 return (0); 3020 } 3021 3022 #ifdef DDB 3023 /* Print out contents of the MAS registers for each TLB0 entry */ 3024 static void 3025 #ifdef __powerpc64__ 3026 tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3, 3027 #else 3028 tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3, 3029 #endif 3030 uint32_t mas7) 3031 { 3032 int as; 3033 char desc[3]; 3034 tlbtid_t tid; 3035 vm_size_t size; 3036 unsigned int tsize; 3037 3038 desc[2] = '\0'; 3039 if (mas1 & MAS1_VALID) 3040 desc[0] = 'V'; 3041 else 3042 desc[0] = ' '; 3043 3044 if (mas1 & MAS1_IPROT) 3045 desc[1] = 'P'; 3046 else 3047 desc[1] = ' '; 3048 3049 as = (mas1 & MAS1_TS_MASK) ? 1 : 0; 3050 tid = MAS1_GETTID(mas1); 3051 3052 tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; 3053 size = 0; 3054 if (tsize) 3055 size = tsize2size(tsize); 3056 3057 printf("%3d: (%s) [AS=%d] " 3058 "sz = 0x%jx tsz = %d tid = %d mas1 = 0x%08x " 3059 "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n", 3060 i, desc, as, (uintmax_t)size, tsize, tid, mas1, mas2, mas3, mas7); 3061 } 3062 3063 DB_SHOW_COMMAND(tlb0, tlb0_print_tlbentries) 3064 { 3065 uint32_t mas0, mas1, mas3, mas7; 3066 #ifdef __powerpc64__ 3067 uint64_t mas2; 3068 #else 3069 uint32_t mas2; 3070 #endif 3071 int entryidx, way, idx; 3072 3073 printf("TLB0 entries:\n"); 3074 for (way = 0; way < TLB0_WAYS; way ++) 3075 for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { 3076 mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); 3077 mtspr(SPR_MAS0, mas0); 3078 3079 mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; 3080 mtspr(SPR_MAS2, mas2); 3081 3082 __asm __volatile("isync; tlbre"); 3083 3084 mas1 = mfspr(SPR_MAS1); 3085 mas2 = mfspr(SPR_MAS2); 3086 mas3 = mfspr(SPR_MAS3); 3087 mas7 = mfspr(SPR_MAS7); 3088 3089 idx = tlb0_tableidx(mas2, way); 3090 tlb_print_entry(idx, mas1, mas2, mas3, mas7); 3091 } 3092 } 3093 3094 /* 3095 * Print out contents of the MAS registers for each TLB1 entry 3096 */ 3097 DB_SHOW_COMMAND(tlb1, tlb1_print_tlbentries) 3098 { 3099 uint32_t mas0, mas1, mas3, mas7; 3100 #ifdef __powerpc64__ 3101 uint64_t mas2; 3102 #else 3103 uint32_t mas2; 3104 #endif 3105 int i; 3106 3107 printf("TLB1 entries:\n"); 3108 for (i = 0; i < TLB1_ENTRIES; i++) { 3109 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); 3110 mtspr(SPR_MAS0, mas0); 3111 3112 __asm __volatile("isync; tlbre"); 3113 3114 mas1 = mfspr(SPR_MAS1); 3115 mas2 = mfspr(SPR_MAS2); 3116 mas3 = mfspr(SPR_MAS3); 3117 mas7 = mfspr(SPR_MAS7); 3118 3119 tlb_print_entry(i, mas1, mas2, mas3, mas7); 3120 } 3121 } 3122 #endif 3123