1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <vm/hat.h> 28 #include <vm/hat_sfmmu.h> 29 #include <vm/page.h> 30 #include <sys/pte.h> 31 #include <sys/systm.h> 32 #include <sys/mman.h> 33 #include <sys/sysmacros.h> 34 #include <sys/machparam.h> 35 #include <sys/vtrace.h> 36 #include <sys/kmem.h> 37 #include <sys/mmu.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/cpuvar.h> 41 #include <sys/debug.h> 42 #include <sys/lgrp.h> 43 #include <sys/archsystm.h> 44 #include <sys/machsystm.h> 45 #include <sys/vmsystm.h> 46 #include <sys/bitmap.h> 47 #include <vm/rm.h> 48 #include <vm/vm_dep.h> 49 #include <sys/t_lock.h> 50 #include <sys/vm_machparam.h> 51 #include <sys/promif.h> 52 #include <sys/prom_isa.h> 53 #include <sys/prom_plat.h> 54 #include <sys/prom_debug.h> 55 #include <sys/privregs.h> 56 #include <sys/bootconf.h> 57 #include <sys/memlist.h> 58 #include <sys/memlist_plat.h> 59 #include <sys/cpu_module.h> 60 #include <sys/reboot.h> 61 #include <sys/kdi.h> 62 #include <sys/hypervisor_api.h> 63 #include <sys/hsvc.h> 64 65 /* 66 * External routines and data structures 67 */ 68 extern void sfmmu_cache_flushcolor(int, pfn_t); 69 extern uint_t mmu_page_sizes; 70 71 /* 72 * Static routines 73 */ 74 static void sfmmu_set_tlb(void); 75 76 /* 77 * Global Data: 78 */ 79 caddr_t textva, datava; 80 tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ 81 82 int enable_bigktsb = 1; 83 int shtsb4m_first = 0; 84 85 tte_t bigktsb_ttes[MAX_BIGKTSB_TTES]; 86 int bigktsb_nttes = 0; 87 88 /* 89 * Controls the logic which enables the use of the 90 * QUAD_LDD_PHYS ASI for TSB accesses. 91 */ 92 int ktsb_phys = 1; 93 94 #ifdef SET_MMU_STATS 95 struct mmu_stat mmu_stat_area[NCPU]; 96 #endif /* SET_MMU_STATS */ 97 98 #ifdef DEBUG 99 /* 100 * The following two variables control if the hypervisor/hardware will 101 * be used to do the TSB table walk for kernel and user contexts. 102 */ 103 int hv_use_0_tsb = 1; 104 int hv_use_non0_tsb = 1; 105 #endif /* DEBUG */ 106 107 static void 108 sfmmu_set_fault_status_area(void) 109 { 110 caddr_t mmfsa_va; 111 extern caddr_t mmu_fault_status_area; 112 113 mmfsa_va = 114 mmu_fault_status_area + (MMFSA_SIZE * getprocessorid()); 115 set_mmfsa_scratchpad(mmfsa_va); 116 prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va)); 117 } 118 119 void 120 sfmmu_set_tsbs() 121 { 122 uint64_t rv; 123 struct hv_tsb_block *hvbp = &ksfmmup->sfmmu_hvblock; 124 125 #ifdef DEBUG 126 if (hv_use_0_tsb == 0) 127 return; 128 #endif /* DEBUG */ 129 130 rv = hv_set_ctx0(hvbp->hv_tsb_info_cnt, 131 hvbp->hv_tsb_info_pa); 132 if (rv != H_EOK) 133 prom_printf("cpu%d: hv_set_ctx0() returned %lx\n", 134 getprocessorid(), rv); 135 136 #ifdef SET_MMU_STATS 137 ASSERT(getprocessorid() < NCPU); 138 rv = hv_mmu_set_stat_area(va_to_pa(&mmu_stat_area[getprocessorid()]), 139 sizeof (mmu_stat_area[0])); 140 if (rv != H_EOK) 141 prom_printf("cpu%d: hv_mmu_set_stat_area() returned %lx\n", 142 getprocessorid(), rv); 143 #endif /* SET_MMU_STATS */ 144 } 145 146 /* 147 * This routine remaps the kernel using large ttes 148 * All entries except locked ones will be removed from the tlb. 149 * It assumes that both the text and data segments reside in a separate 150 * 4mb virtual and physical contigous memory chunk. This routine 151 * is only executed by the first cpu. The remaining cpus execute 152 * sfmmu_mp_startup() instead. 153 * XXX It assumes that the start of the text segment is KERNELBASE. It should 154 * actually be based on start. 155 */ 156 void 157 sfmmu_remap_kernel(void) 158 { 159 pfn_t pfn; 160 uint_t attr; 161 int flags; 162 163 extern char end[]; 164 extern struct as kas; 165 166 textva = (caddr_t)(KERNELBASE & MMU_PAGEMASK4M); 167 pfn = va_to_pfn(textva); 168 if (pfn == PFN_INVALID) 169 prom_panic("can't find kernel text pfn"); 170 pfn &= TTE_PFNMASK(TTE4M); 171 172 attr = PROC_TEXT | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC; 173 flags = HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD; 174 sfmmu_memtte(&ktext_tte, pfn, attr, TTE4M); 175 /* 176 * We set the lock bit in the tte to lock the translation in 177 * the tlb. 178 */ 179 TTE_SET_LOCKED(&ktext_tte); 180 sfmmu_tteload(kas.a_hat, &ktext_tte, textva, NULL, flags); 181 182 datava = (caddr_t)((uintptr_t)end & MMU_PAGEMASK4M); 183 pfn = va_to_pfn(datava); 184 if (pfn == PFN_INVALID) 185 prom_panic("can't find kernel data pfn"); 186 pfn &= TTE_PFNMASK(TTE4M); 187 188 attr = PROC_DATA | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC; 189 sfmmu_memtte(&kdata_tte, pfn, attr, TTE4M); 190 /* 191 * We set the lock bit in the tte to lock the translation in 192 * the tlb. We also set the mod bit to avoid taking dirty bit 193 * traps on kernel data. 194 */ 195 TTE_SET_LOCKED(&kdata_tte); 196 TTE_SET_LOFLAGS(&kdata_tte, 0, TTE_HWWR_INT); 197 sfmmu_tteload(kas.a_hat, &kdata_tte, datava, 198 (struct page *)NULL, flags); 199 200 /* 201 * create bigktsb ttes if necessary. 202 */ 203 if (enable_bigktsb) { 204 int i = 0; 205 caddr_t va = ktsb_base; 206 size_t tsbsz = ktsb_sz; 207 tte_t tte; 208 209 ASSERT(va >= datava + MMU_PAGESIZE4M); 210 ASSERT(tsbsz >= MMU_PAGESIZE4M); 211 ASSERT(IS_P2ALIGNED(tsbsz, tsbsz)); 212 ASSERT(IS_P2ALIGNED(va, tsbsz)); 213 attr = PROC_DATA | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC; 214 while (tsbsz != 0) { 215 ASSERT(i < MAX_BIGKTSB_TTES); 216 pfn = va_to_pfn(va); 217 ASSERT(pfn != PFN_INVALID); 218 ASSERT((pfn & ~TTE_PFNMASK(TTE4M)) == 0); 219 sfmmu_memtte(&tte, pfn, attr, TTE4M); 220 ASSERT(TTE_IS_MOD(&tte)); 221 /* 222 * No need to lock if we use physical addresses. 223 * Since we invalidate the kernel TSB using virtual 224 * addresses, it's an optimization to load them now 225 * so that we won't have to load them later. 226 */ 227 if (!ktsb_phys) { 228 TTE_SET_LOCKED(&tte); 229 } 230 sfmmu_tteload(kas.a_hat, &tte, va, NULL, flags); 231 bigktsb_ttes[i] = tte; 232 va += MMU_PAGESIZE4M; 233 tsbsz -= MMU_PAGESIZE4M; 234 i++; 235 } 236 bigktsb_nttes = i; 237 } 238 239 sfmmu_set_tlb(); 240 } 241 242 /* 243 * Setup the kernel's locked tte's 244 */ 245 void 246 sfmmu_set_tlb(void) 247 { 248 (void) hv_mmu_map_perm_addr(textva, KCONTEXT, *(uint64_t *)&ktext_tte, 249 MAP_ITLB | MAP_DTLB); 250 (void) hv_mmu_map_perm_addr(datava, KCONTEXT, *(uint64_t *)&kdata_tte, 251 MAP_DTLB); 252 253 if (!ktsb_phys && enable_bigktsb) { 254 int i; 255 caddr_t va = ktsb_base; 256 uint64_t tte; 257 258 ASSERT(bigktsb_nttes <= MAX_BIGKTSB_TTES); 259 for (i = 0; i < bigktsb_nttes; i++) { 260 tte = *(uint64_t *)&bigktsb_ttes[i]; 261 (void) hv_mmu_map_perm_addr(va, KCONTEXT, tte, 262 MAP_DTLB); 263 va += MMU_PAGESIZE4M; 264 } 265 } 266 } 267 268 /* 269 * This routine is executed by all other cpus except the first one 270 * at initialization time. It is responsible for taking over the 271 * mmu from the prom. We follow these steps. 272 * Lock the kernel's ttes in the TLB 273 * Initialize the tsb hardware registers 274 * Take over the trap table 275 * Flush the prom's locked entries from the TLB 276 */ 277 void 278 sfmmu_mp_startup(void) 279 { 280 sfmmu_set_tlb(); 281 setwstate(WSTATE_KERN); 282 /* 283 * sfmmu_set_fault_status_area() takes over trap_table 284 */ 285 sfmmu_set_fault_status_area(); 286 sfmmu_set_tsbs(); 287 install_va_to_tte(); 288 } 289 290 void 291 kdi_tlb_page_lock(caddr_t va, int do_dtlb) 292 { 293 tte_t tte; 294 pfn_t pfn = va_to_pfn(va); 295 uint64_t ret; 296 297 sfmmu_memtte(&tte, pfn, PROC_TEXT | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC, 298 TTE8K); 299 ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte, 300 MAP_ITLB | (do_dtlb ? MAP_DTLB : 0)); 301 302 if (ret != H_EOK) { 303 cmn_err(CE_PANIC, "cpu%d: cannot set permanent mapping for " 304 "va=0x%p, hv error code 0x%lx", 305 getprocessorid(), (void *)va, ret); 306 } 307 } 308 309 void 310 kdi_tlb_page_unlock(caddr_t va, int do_dtlb) 311 { 312 (void) hv_mmu_unmap_perm_addr(va, KCONTEXT, 313 MAP_ITLB | (do_dtlb ? MAP_DTLB : 0)); 314 } 315 316 /* 317 * Clear machine specific TSB information for a user process 318 */ 319 void 320 sfmmu_clear_utsbinfo() 321 { 322 (void) hv_set_ctxnon0(0, NULL); 323 } 324 325 /* 326 * The tsbord[] array is set up to translate from the order of tsbs in the sfmmu 327 * list to the order of tsbs in the tsb descriptor array passed to the hv, which 328 * is the search order used during Hardware Table Walk. 329 * So, the tsb with index i in the sfmmu list will have search order tsbord[i]. 330 * 331 * The order of tsbs in the sfmmu list will be as follows: 332 * 333 * 0 8K - 512K private TSB 334 * 1 4M - 256M private TSB 335 * 2 8K - 512K shared TSB 336 * 3 4M - 256M shared TSB 337 * 338 * Shared TSBs are only used if a process is part of an SCD. 339 * 340 * So, e.g. tsbord[3] = 1; 341 * corresponds to searching the shared 4M TSB second. 342 * 343 * The search order is selected so that the 8K-512K private TSB is always first. 344 * Currently shared context is not expected to map many 8K-512K pages that cause 345 * TLB misses so we order the shared TSB for 4M-256M pages in front of the 346 * shared TSB for 8K-512K pages. We also expect more TLB misses against private 347 * context mappings than shared context mappings and place private TSBs ahead of 348 * shared TSBs in descriptor order. The shtsb4m_first /etc/system tuneable can 349 * be used to change the default ordering of private and shared TSBs for 350 * 4M-256M pages. 351 */ 352 void 353 sfmmu_setup_tsbinfo(sfmmu_t *sfmmup) 354 { 355 struct tsb_info *tsbinfop; 356 hv_tsb_info_t *tdp; 357 int i; 358 int j; 359 int scd = 0; 360 int tsbord[NHV_TSB_INFO]; 361 362 #ifdef DEBUG 363 ASSERT(max_mmu_ctxdoms > 0); 364 if (sfmmup != ksfmmup) { 365 /* Process should have INVALID_CONTEXT on all MMUs. */ 366 for (i = 0; i < max_mmu_ctxdoms; i++) { 367 ASSERT(sfmmup->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); 368 } 369 } 370 #endif 371 372 tsbinfop = sfmmup->sfmmu_tsb; 373 if (tsbinfop == NULL) { 374 sfmmup->sfmmu_hvblock.hv_tsb_info_pa = (uint64_t)-1; 375 sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 0; 376 return; 377 } 378 379 ASSERT(sfmmup != ksfmmup || sfmmup->sfmmu_scdp == NULL); 380 ASSERT(sfmmup->sfmmu_scdp == NULL || 381 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb != NULL); 382 383 tsbord[0] = 0; 384 if (sfmmup->sfmmu_scdp == NULL) { 385 tsbord[1] = 1; 386 } else { 387 struct tsb_info *scd8ktsbp = 388 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; 389 ulong_t shared_4mttecnt = 0; 390 ulong_t priv_4mttecnt = 0; 391 int scd4mtsb = (scd8ktsbp->tsb_next != NULL); 392 393 for (i = TTE4M; i < MMU_PAGE_SIZES; i++) { 394 if (scd4mtsb) { 395 shared_4mttecnt += 396 sfmmup->sfmmu_scdismttecnt[i] + 397 sfmmup->sfmmu_scdrttecnt[i]; 398 } 399 if (tsbinfop->tsb_next != NULL) { 400 priv_4mttecnt += sfmmup->sfmmu_ttecnt[i] + 401 sfmmup->sfmmu_ismttecnt[i]; 402 } 403 } 404 if (tsbinfop->tsb_next == NULL) { 405 if (shared_4mttecnt) { 406 tsbord[1] = 2; 407 tsbord[2] = 1; 408 } else { 409 tsbord[1] = 1; 410 tsbord[2] = 2; 411 } 412 } else if (priv_4mttecnt) { 413 if (shared_4mttecnt) { 414 tsbord[1] = shtsb4m_first ? 2 : 1; 415 tsbord[2] = 3; 416 tsbord[3] = shtsb4m_first ? 1 : 2; 417 } else { 418 tsbord[1] = 1; 419 tsbord[2] = 2; 420 tsbord[3] = 3; 421 } 422 } else if (shared_4mttecnt) { 423 tsbord[1] = 3; 424 tsbord[2] = 2; 425 tsbord[3] = 1; 426 } else { 427 tsbord[1] = 2; 428 tsbord[2] = 1; 429 tsbord[3] = 3; 430 } 431 } 432 433 ASSERT(tsbinfop != NULL); 434 for (i = 0; tsbinfop != NULL && i < NHV_TSB_INFO; i++) { 435 if (i == 0) { 436 tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[i]; 437 sfmmup->sfmmu_hvblock.hv_tsb_info_pa = va_to_pa(tdp); 438 } 439 440 441 j = tsbord[i]; 442 443 tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[j]; 444 445 ASSERT(tsbinfop->tsb_ttesz_mask != 0); 446 tdp->hvtsb_idxpgsz = lowbit(tsbinfop->tsb_ttesz_mask) - 1; 447 tdp->hvtsb_assoc = 1; 448 tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc); 449 tdp->hvtsb_ctx_index = scd; 450 tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask; 451 tdp->hvtsb_rsvd = 0; 452 tdp->hvtsb_pa = tsbinfop->tsb_pa; 453 454 tsbinfop = tsbinfop->tsb_next; 455 if (tsbinfop == NULL && !scd && sfmmup->sfmmu_scdp != NULL) { 456 tsbinfop = 457 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; 458 scd = 1; 459 } 460 } 461 sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = i; 462 ASSERT(tsbinfop == NULL); 463 } 464 465 /* 466 * Invalidate a TSB via processor specific TSB invalidation routine 467 */ 468 void 469 sfmmu_inv_tsb(caddr_t tsb_base, uint_t tsb_bytes) 470 { 471 extern void cpu_inv_tsb(caddr_t, uint_t); 472 473 cpu_inv_tsb(tsb_base, tsb_bytes); 474 } 475 476 /* 477 * Completely flush the D-cache on all cpus. 478 * Not applicable to sun4v. 479 */ 480 void 481 sfmmu_cache_flushall() 482 { 483 } 484 485 /* 486 * Initialise the real address field in sfmmu_pgsz_order. 487 */ 488 void 489 sfmmu_init_pgsz_hv(sfmmu_t *sfmmup) 490 { 491 int i; 492 493 /* 494 * Initialize mmu counts for pagesize register programming. 495 */ 496 for (i = 0; i < max_mmu_page_sizes; i++) { 497 sfmmup->sfmmu_mmuttecnt[i] = 0; 498 } 499 500 sfmmup->sfmmu_pgsz_order.hv_pgsz_order_pa = 501 va_to_pa(&sfmmup->sfmmu_pgsz_order.hv_pgsz_order); 502 } 503