1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <vm/hat.h> 28 #include <vm/hat_sfmmu.h> 29 #include <vm/page.h> 30 #include <sys/pte.h> 31 #include <sys/systm.h> 32 #include <sys/mman.h> 33 #include <sys/sysmacros.h> 34 #include <sys/machparam.h> 35 #include <sys/vtrace.h> 36 #include <sys/kmem.h> 37 #include <sys/mmu.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/cpuvar.h> 41 #include <sys/debug.h> 42 #include <sys/lgrp.h> 43 #include <sys/archsystm.h> 44 #include <sys/machsystm.h> 45 #include <sys/vmsystm.h> 46 #include <sys/bitmap.h> 47 #include <vm/rm.h> 48 #include <sys/t_lock.h> 49 #include <sys/vm_machparam.h> 50 #include <sys/promif.h> 51 #include <sys/prom_isa.h> 52 #include <sys/prom_plat.h> 53 #include <sys/prom_debug.h> 54 #include <sys/privregs.h> 55 #include <sys/bootconf.h> 56 #include <sys/memlist.h> 57 #include <sys/memlist_plat.h> 58 #include <sys/cpu_module.h> 59 #include <sys/reboot.h> 60 #include <sys/kdi.h> 61 #include <sys/hypervisor_api.h> 62 63 /* 64 * External routines and data structures 65 */ 66 extern void sfmmu_cache_flushcolor(int, pfn_t); 67 extern uint_t mmu_page_sizes; 68 69 /* 70 * Static routines 71 */ 72 static void sfmmu_set_tlb(void); 73 74 /* 75 * Global Data: 76 */ 77 caddr_t textva, datava; 78 tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ 79 80 int enable_bigktsb = 1; 81 int shtsb4m_first = 0; 82 83 tte_t bigktsb_ttes[MAX_BIGKTSB_TTES]; 84 int bigktsb_nttes = 0; 85 86 /* 87 * Controls the logic which enables the use of the 88 * QUAD_LDD_PHYS ASI for TSB accesses. 89 */ 90 int ktsb_phys = 1; 91 92 #ifdef SET_MMU_STATS 93 struct mmu_stat mmu_stat_area[NCPU]; 94 #endif /* SET_MMU_STATS */ 95 96 #ifdef DEBUG 97 /* 98 * The following two variables control if the hypervisor/hardware will 99 * be used to do the TSB table walk for kernel and user contexts. 100 */ 101 int hv_use_0_tsb = 1; 102 int hv_use_non0_tsb = 1; 103 #endif /* DEBUG */ 104 105 static void 106 sfmmu_set_fault_status_area(void) 107 { 108 caddr_t mmfsa_va; 109 extern caddr_t mmu_fault_status_area; 110 111 mmfsa_va = 112 mmu_fault_status_area + (MMFSA_SIZE * getprocessorid()); 113 set_mmfsa_scratchpad(mmfsa_va); 114 prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va)); 115 } 116 117 void 118 sfmmu_set_tsbs() 119 { 120 uint64_t rv; 121 struct hv_tsb_block *hvbp = &ksfmmup->sfmmu_hvblock; 122 123 #ifdef DEBUG 124 if (hv_use_0_tsb == 0) 125 return; 126 #endif /* DEBUG */ 127 128 rv = hv_set_ctx0(hvbp->hv_tsb_info_cnt, 129 hvbp->hv_tsb_info_pa); 130 if (rv != H_EOK) 131 prom_printf("cpu%d: hv_set_ctx0() returned %lx\n", 132 getprocessorid(), rv); 133 134 #ifdef SET_MMU_STATS 135 ASSERT(getprocessorid() < NCPU); 136 rv = hv_mmu_set_stat_area(va_to_pa(&mmu_stat_area[getprocessorid()]), 137 sizeof (mmu_stat_area[0])); 138 if (rv != H_EOK) 139 prom_printf("cpu%d: hv_mmu_set_stat_area() returned %lx\n", 140 getprocessorid(), rv); 141 #endif /* SET_MMU_STATS */ 142 } 143 144 /* 145 * This routine remaps the kernel using large ttes 146 * All entries except locked ones will be removed from the tlb. 147 * It assumes that both the text and data segments reside in a separate 148 * 4mb virtual and physical contigous memory chunk. This routine 149 * is only executed by the first cpu. The remaining cpus execute 150 * sfmmu_mp_startup() instead. 151 * XXX It assumes that the start of the text segment is KERNELBASE. It should 152 * actually be based on start. 153 */ 154 void 155 sfmmu_remap_kernel(void) 156 { 157 pfn_t pfn; 158 uint_t attr; 159 int flags; 160 161 extern char end[]; 162 extern struct as kas; 163 164 textva = (caddr_t)(KERNELBASE & MMU_PAGEMASK4M); 165 pfn = va_to_pfn(textva); 166 if (pfn == PFN_INVALID) 167 prom_panic("can't find kernel text pfn"); 168 pfn &= TTE_PFNMASK(TTE4M); 169 170 attr = PROC_TEXT | HAT_NOSYNC; 171 flags = HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD; 172 sfmmu_memtte(&ktext_tte, pfn, attr, TTE4M); 173 /* 174 * We set the lock bit in the tte to lock the translation in 175 * the tlb. 176 */ 177 TTE_SET_LOCKED(&ktext_tte); 178 sfmmu_tteload(kas.a_hat, &ktext_tte, textva, NULL, flags); 179 180 datava = (caddr_t)((uintptr_t)end & MMU_PAGEMASK4M); 181 pfn = va_to_pfn(datava); 182 if (pfn == PFN_INVALID) 183 prom_panic("can't find kernel data pfn"); 184 pfn &= TTE_PFNMASK(TTE4M); 185 186 attr = PROC_DATA | HAT_NOSYNC; 187 sfmmu_memtte(&kdata_tte, pfn, attr, TTE4M); 188 /* 189 * We set the lock bit in the tte to lock the translation in 190 * the tlb. We also set the mod bit to avoid taking dirty bit 191 * traps on kernel data. 192 */ 193 TTE_SET_LOCKED(&kdata_tte); 194 TTE_SET_LOFLAGS(&kdata_tte, 0, TTE_HWWR_INT); 195 sfmmu_tteload(kas.a_hat, &kdata_tte, datava, 196 (struct page *)NULL, flags); 197 198 /* 199 * create bigktsb ttes if necessary. 200 */ 201 if (enable_bigktsb) { 202 int i = 0; 203 caddr_t va = ktsb_base; 204 size_t tsbsz = ktsb_sz; 205 tte_t tte; 206 207 ASSERT(va >= datava + MMU_PAGESIZE4M); 208 ASSERT(tsbsz >= MMU_PAGESIZE4M); 209 ASSERT(IS_P2ALIGNED(tsbsz, tsbsz)); 210 ASSERT(IS_P2ALIGNED(va, tsbsz)); 211 attr = PROC_DATA | HAT_NOSYNC; 212 while (tsbsz != 0) { 213 ASSERT(i < MAX_BIGKTSB_TTES); 214 pfn = va_to_pfn(va); 215 ASSERT(pfn != PFN_INVALID); 216 ASSERT((pfn & ~TTE_PFNMASK(TTE4M)) == 0); 217 sfmmu_memtte(&tte, pfn, attr, TTE4M); 218 ASSERT(TTE_IS_MOD(&tte)); 219 /* 220 * No need to lock if we use physical addresses. 221 * Since we invalidate the kernel TSB using virtual 222 * addresses, it's an optimization to load them now 223 * so that we won't have to load them later. 224 */ 225 if (!ktsb_phys) { 226 TTE_SET_LOCKED(&tte); 227 } 228 sfmmu_tteload(kas.a_hat, &tte, va, NULL, flags); 229 bigktsb_ttes[i] = tte; 230 va += MMU_PAGESIZE4M; 231 tsbsz -= MMU_PAGESIZE4M; 232 i++; 233 } 234 bigktsb_nttes = i; 235 } 236 237 sfmmu_set_tlb(); 238 } 239 240 /* 241 * Setup the kernel's locked tte's 242 */ 243 void 244 sfmmu_set_tlb(void) 245 { 246 (void) hv_mmu_map_perm_addr(textva, KCONTEXT, *(uint64_t *)&ktext_tte, 247 MAP_ITLB | MAP_DTLB); 248 (void) hv_mmu_map_perm_addr(datava, KCONTEXT, *(uint64_t *)&kdata_tte, 249 MAP_DTLB); 250 251 if (!ktsb_phys && enable_bigktsb) { 252 int i; 253 caddr_t va = ktsb_base; 254 uint64_t tte; 255 256 ASSERT(bigktsb_nttes <= MAX_BIGKTSB_TTES); 257 for (i = 0; i < bigktsb_nttes; i++) { 258 tte = *(uint64_t *)&bigktsb_ttes[i]; 259 (void) hv_mmu_map_perm_addr(va, KCONTEXT, tte, 260 MAP_DTLB); 261 va += MMU_PAGESIZE4M; 262 } 263 } 264 } 265 266 /* 267 * This routine is executed by all other cpus except the first one 268 * at initialization time. It is responsible for taking over the 269 * mmu from the prom. We follow these steps. 270 * Lock the kernel's ttes in the TLB 271 * Initialize the tsb hardware registers 272 * Take over the trap table 273 * Flush the prom's locked entries from the TLB 274 */ 275 void 276 sfmmu_mp_startup(void) 277 { 278 sfmmu_set_tlb(); 279 setwstate(WSTATE_KERN); 280 /* 281 * sfmmu_set_fault_status_area() takes over trap_table 282 */ 283 sfmmu_set_fault_status_area(); 284 sfmmu_set_tsbs(); 285 install_va_to_tte(); 286 } 287 288 void 289 kdi_tlb_page_lock(caddr_t va, int do_dtlb) 290 { 291 tte_t tte; 292 pfn_t pfn = va_to_pfn(va); 293 uint64_t ret; 294 295 sfmmu_memtte(&tte, pfn, (PROC_TEXT | HAT_NOSYNC), TTE8K); 296 ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte, 297 MAP_ITLB | (do_dtlb ? MAP_DTLB : 0)); 298 299 if (ret != H_EOK) { 300 cmn_err(CE_PANIC, "cpu%d: cannot set permanent mapping for " 301 "va=0x%p, hv error code 0x%lx", 302 getprocessorid(), (void *)va, ret); 303 } 304 } 305 306 void 307 kdi_tlb_page_unlock(caddr_t va, int do_dtlb) 308 { 309 (void) hv_mmu_unmap_perm_addr(va, KCONTEXT, 310 MAP_ITLB | (do_dtlb ? MAP_DTLB : 0)); 311 } 312 313 /* 314 * Clear machine specific TSB information for a user process 315 */ 316 void 317 sfmmu_clear_utsbinfo() 318 { 319 (void) hv_set_ctxnon0(0, NULL); 320 } 321 322 /* 323 * The tsbord[] array is set up to translate from the order of tsbs in the sfmmu 324 * list to the order of tsbs in the tsb descriptor array passed to the hv, which 325 * is the search order used during Hardware Table Walk. 326 * So, the tsb with index i in the sfmmu list will have search order tsbord[i]. 327 * 328 * The order of tsbs in the sfmmu list will be as follows: 329 * 330 * 0 8K - 512K private TSB 331 * 1 4M - 256M private TSB 332 * 2 8K - 512K shared TSB 333 * 3 4M - 256M shared TSB 334 * 335 * Shared TSBs are only used if a process is part of an SCD. 336 * 337 * So, e.g. tsbord[3] = 1; 338 * corresponds to searching the shared 4M TSB second. 339 * 340 * The search order is selected so that the 8K-512K private TSB is always first. 341 * Currently shared context is not expected to map many 8K-512K pages that cause 342 * TLB misses so we order the shared TSB for 4M-256M pages in front of the 343 * shared TSB for 8K-512K pages. We also expect more TLB misses against private 344 * context mappings than shared context mappings and place private TSBs ahead of 345 * shared TSBs in descriptor order. The shtsb4m_first /etc/system tuneable can 346 * be used to change the default ordering of private and shared TSBs for 347 * 4M-256M pages. 348 */ 349 void 350 sfmmu_setup_tsbinfo(sfmmu_t *sfmmup) 351 { 352 struct tsb_info *tsbinfop; 353 hv_tsb_info_t *tdp; 354 int i; 355 int j; 356 int scd = 0; 357 int tsbord[NHV_TSB_INFO]; 358 359 #ifdef DEBUG 360 ASSERT(max_mmu_ctxdoms > 0); 361 if (sfmmup != ksfmmup) { 362 /* Process should have INVALID_CONTEXT on all MMUs. */ 363 for (i = 0; i < max_mmu_ctxdoms; i++) { 364 ASSERT(sfmmup->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); 365 } 366 } 367 #endif 368 369 tsbinfop = sfmmup->sfmmu_tsb; 370 if (tsbinfop == NULL) { 371 sfmmup->sfmmu_hvblock.hv_tsb_info_pa = (uint64_t)-1; 372 sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 0; 373 return; 374 } 375 376 ASSERT(sfmmup != ksfmmup || sfmmup->sfmmu_scdp == NULL); 377 ASSERT(sfmmup->sfmmu_scdp == NULL || 378 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb != NULL); 379 380 tsbord[0] = 0; 381 if (sfmmup->sfmmu_scdp == NULL) { 382 tsbord[1] = 1; 383 } else { 384 struct tsb_info *scd8ktsbp = 385 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; 386 ulong_t shared_4mttecnt = 0; 387 ulong_t priv_4mttecnt = 0; 388 int scd4mtsb = (scd8ktsbp->tsb_next != NULL); 389 390 for (i = TTE4M; i < MMU_PAGE_SIZES; i++) { 391 if (scd4mtsb) { 392 shared_4mttecnt += 393 sfmmup->sfmmu_scdismttecnt[i] + 394 sfmmup->sfmmu_scdrttecnt[i]; 395 } 396 if (tsbinfop->tsb_next != NULL) { 397 priv_4mttecnt += sfmmup->sfmmu_ttecnt[i] + 398 sfmmup->sfmmu_ismttecnt[i]; 399 } 400 } 401 if (tsbinfop->tsb_next == NULL) { 402 if (shared_4mttecnt) { 403 tsbord[1] = 2; 404 tsbord[2] = 1; 405 } else { 406 tsbord[1] = 1; 407 tsbord[2] = 2; 408 } 409 } else if (priv_4mttecnt) { 410 if (shared_4mttecnt) { 411 tsbord[1] = shtsb4m_first ? 2 : 1; 412 tsbord[2] = 3; 413 tsbord[3] = shtsb4m_first ? 1 : 2; 414 } else { 415 tsbord[1] = 1; 416 tsbord[2] = 2; 417 tsbord[3] = 3; 418 } 419 } else if (shared_4mttecnt) { 420 tsbord[1] = 3; 421 tsbord[2] = 2; 422 tsbord[3] = 1; 423 } else { 424 tsbord[1] = 2; 425 tsbord[2] = 1; 426 tsbord[3] = 3; 427 } 428 } 429 430 ASSERT(tsbinfop != NULL); 431 for (i = 0; tsbinfop != NULL && i < NHV_TSB_INFO; i++) { 432 if (i == 0) { 433 tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[i]; 434 sfmmup->sfmmu_hvblock.hv_tsb_info_pa = va_to_pa(tdp); 435 } 436 437 438 j = tsbord[i]; 439 440 tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[j]; 441 442 ASSERT(tsbinfop->tsb_ttesz_mask != 0); 443 tdp->hvtsb_idxpgsz = lowbit(tsbinfop->tsb_ttesz_mask) - 1; 444 tdp->hvtsb_assoc = 1; 445 tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc); 446 tdp->hvtsb_ctx_index = scd; 447 tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask; 448 tdp->hvtsb_rsvd = 0; 449 tdp->hvtsb_pa = tsbinfop->tsb_pa; 450 451 tsbinfop = tsbinfop->tsb_next; 452 if (tsbinfop == NULL && !scd && sfmmup->sfmmu_scdp != NULL) { 453 tsbinfop = 454 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; 455 scd = 1; 456 } 457 } 458 sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = i; 459 ASSERT(tsbinfop == NULL); 460 } 461 462 /* 463 * Invalidate a TSB via processor specific TSB invalidation routine 464 */ 465 void 466 sfmmu_inv_tsb(caddr_t tsb_base, uint_t tsb_bytes) 467 { 468 extern void cpu_inv_tsb(caddr_t, uint_t); 469 470 cpu_inv_tsb(tsb_base, tsb_bytes); 471 } 472 473 /* 474 * Completely flush the D-cache on all cpus. 475 * Not applicable to sun4v. 476 */ 477 void 478 sfmmu_cache_flushall() 479 { 480 } 481