1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <vm/hat.h> 28 #include <vm/hat_sfmmu.h> 29 #include <vm/page.h> 30 #include <sys/pte.h> 31 #include <sys/systm.h> 32 #include <sys/mman.h> 33 #include <sys/sysmacros.h> 34 #include <sys/machparam.h> 35 #include <sys/vtrace.h> 36 #include <sys/kmem.h> 37 #include <sys/mmu.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/cpuvar.h> 41 #include <sys/debug.h> 42 #include <sys/lgrp.h> 43 #include <sys/archsystm.h> 44 #include <sys/machsystm.h> 45 #include <sys/vmsystm.h> 46 #include <sys/bitmap.h> 47 #include <vm/rm.h> 48 #include <sys/t_lock.h> 49 #include <sys/vm_machparam.h> 50 #include <sys/promif.h> 51 #include <sys/prom_isa.h> 52 #include <sys/prom_plat.h> 53 #include <sys/prom_debug.h> 54 #include <sys/privregs.h> 55 #include <sys/bootconf.h> 56 #include <sys/memlist.h> 57 #include <sys/memlist_plat.h> 58 #include <sys/cpu_module.h> 59 #include <sys/reboot.h> 60 #include <sys/kdi.h> 61 #include <sys/hypervisor_api.h> 62 #include <sys/hsvc.h> 63 64 /* 65 * External routines and data structures 66 */ 67 extern void sfmmu_cache_flushcolor(int, pfn_t); 68 extern uint_t mmu_page_sizes; 69 70 /* 71 * Static routines 72 */ 73 static void sfmmu_set_tlb(void); 74 75 /* 76 * Global Data: 77 */ 78 caddr_t textva, datava; 79 tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ 80 81 int enable_bigktsb = 1; 82 int shtsb4m_first = 0; 83 84 tte_t bigktsb_ttes[MAX_BIGKTSB_TTES]; 85 int bigktsb_nttes = 0; 86 87 /* 88 * Controls the logic which enables the use of the 89 * QUAD_LDD_PHYS ASI for TSB accesses. 90 */ 91 int ktsb_phys = 1; 92 93 #ifdef SET_MMU_STATS 94 struct mmu_stat mmu_stat_area[NCPU]; 95 #endif /* SET_MMU_STATS */ 96 97 #ifdef DEBUG 98 /* 99 * The following two variables control if the hypervisor/hardware will 100 * be used to do the TSB table walk for kernel and user contexts. 101 */ 102 int hv_use_0_tsb = 1; 103 int hv_use_non0_tsb = 1; 104 #endif /* DEBUG */ 105 106 static void 107 sfmmu_set_fault_status_area(void) 108 { 109 caddr_t mmfsa_va; 110 extern caddr_t mmu_fault_status_area; 111 112 mmfsa_va = 113 mmu_fault_status_area + (MMFSA_SIZE * getprocessorid()); 114 set_mmfsa_scratchpad(mmfsa_va); 115 prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va)); 116 } 117 118 void 119 sfmmu_set_tsbs() 120 { 121 uint64_t rv; 122 struct hv_tsb_block *hvbp = &ksfmmup->sfmmu_hvblock; 123 124 #ifdef DEBUG 125 if (hv_use_0_tsb == 0) 126 return; 127 #endif /* DEBUG */ 128 129 rv = hv_set_ctx0(hvbp->hv_tsb_info_cnt, 130 hvbp->hv_tsb_info_pa); 131 if (rv != H_EOK) 132 prom_printf("cpu%d: hv_set_ctx0() returned %lx\n", 133 getprocessorid(), rv); 134 135 #ifdef SET_MMU_STATS 136 ASSERT(getprocessorid() < NCPU); 137 rv = hv_mmu_set_stat_area(va_to_pa(&mmu_stat_area[getprocessorid()]), 138 sizeof (mmu_stat_area[0])); 139 if (rv != H_EOK) 140 prom_printf("cpu%d: hv_mmu_set_stat_area() returned %lx\n", 141 getprocessorid(), rv); 142 #endif /* SET_MMU_STATS */ 143 } 144 145 /* 146 * This routine remaps the kernel using large ttes 147 * All entries except locked ones will be removed from the tlb. 148 * It assumes that both the text and data segments reside in a separate 149 * 4mb virtual and physical contigous memory chunk. This routine 150 * is only executed by the first cpu. The remaining cpus execute 151 * sfmmu_mp_startup() instead. 152 * XXX It assumes that the start of the text segment is KERNELBASE. It should 153 * actually be based on start. 154 */ 155 void 156 sfmmu_remap_kernel(void) 157 { 158 pfn_t pfn; 159 uint_t attr; 160 int flags; 161 162 extern char end[]; 163 extern struct as kas; 164 165 textva = (caddr_t)(KERNELBASE & MMU_PAGEMASK4M); 166 pfn = va_to_pfn(textva); 167 if (pfn == PFN_INVALID) 168 prom_panic("can't find kernel text pfn"); 169 pfn &= TTE_PFNMASK(TTE4M); 170 171 attr = PROC_TEXT | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC; 172 flags = HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD; 173 sfmmu_memtte(&ktext_tte, pfn, attr, TTE4M); 174 /* 175 * We set the lock bit in the tte to lock the translation in 176 * the tlb. 177 */ 178 TTE_SET_LOCKED(&ktext_tte); 179 sfmmu_tteload(kas.a_hat, &ktext_tte, textva, NULL, flags); 180 181 datava = (caddr_t)((uintptr_t)end & MMU_PAGEMASK4M); 182 pfn = va_to_pfn(datava); 183 if (pfn == PFN_INVALID) 184 prom_panic("can't find kernel data pfn"); 185 pfn &= TTE_PFNMASK(TTE4M); 186 187 attr = PROC_DATA | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC; 188 sfmmu_memtte(&kdata_tte, pfn, attr, TTE4M); 189 /* 190 * We set the lock bit in the tte to lock the translation in 191 * the tlb. We also set the mod bit to avoid taking dirty bit 192 * traps on kernel data. 193 */ 194 TTE_SET_LOCKED(&kdata_tte); 195 TTE_SET_LOFLAGS(&kdata_tte, 0, TTE_HWWR_INT); 196 sfmmu_tteload(kas.a_hat, &kdata_tte, datava, 197 (struct page *)NULL, flags); 198 199 /* 200 * create bigktsb ttes if necessary. 201 */ 202 if (enable_bigktsb) { 203 int i = 0; 204 caddr_t va = ktsb_base; 205 size_t tsbsz = ktsb_sz; 206 tte_t tte; 207 208 ASSERT(va >= datava + MMU_PAGESIZE4M); 209 ASSERT(tsbsz >= MMU_PAGESIZE4M); 210 ASSERT(IS_P2ALIGNED(tsbsz, tsbsz)); 211 ASSERT(IS_P2ALIGNED(va, tsbsz)); 212 attr = PROC_DATA | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC; 213 while (tsbsz != 0) { 214 ASSERT(i < MAX_BIGKTSB_TTES); 215 pfn = va_to_pfn(va); 216 ASSERT(pfn != PFN_INVALID); 217 ASSERT((pfn & ~TTE_PFNMASK(TTE4M)) == 0); 218 sfmmu_memtte(&tte, pfn, attr, TTE4M); 219 ASSERT(TTE_IS_MOD(&tte)); 220 /* 221 * No need to lock if we use physical addresses. 222 * Since we invalidate the kernel TSB using virtual 223 * addresses, it's an optimization to load them now 224 * so that we won't have to load them later. 225 */ 226 if (!ktsb_phys) { 227 TTE_SET_LOCKED(&tte); 228 } 229 sfmmu_tteload(kas.a_hat, &tte, va, NULL, flags); 230 bigktsb_ttes[i] = tte; 231 va += MMU_PAGESIZE4M; 232 tsbsz -= MMU_PAGESIZE4M; 233 i++; 234 } 235 bigktsb_nttes = i; 236 } 237 238 sfmmu_set_tlb(); 239 } 240 241 /* 242 * Setup the kernel's locked tte's 243 */ 244 void 245 sfmmu_set_tlb(void) 246 { 247 (void) hv_mmu_map_perm_addr(textva, KCONTEXT, *(uint64_t *)&ktext_tte, 248 MAP_ITLB | MAP_DTLB); 249 (void) hv_mmu_map_perm_addr(datava, KCONTEXT, *(uint64_t *)&kdata_tte, 250 MAP_DTLB); 251 252 if (!ktsb_phys && enable_bigktsb) { 253 int i; 254 caddr_t va = ktsb_base; 255 uint64_t tte; 256 257 ASSERT(bigktsb_nttes <= MAX_BIGKTSB_TTES); 258 for (i = 0; i < bigktsb_nttes; i++) { 259 tte = *(uint64_t *)&bigktsb_ttes[i]; 260 (void) hv_mmu_map_perm_addr(va, KCONTEXT, tte, 261 MAP_DTLB); 262 va += MMU_PAGESIZE4M; 263 } 264 } 265 } 266 267 /* 268 * This routine is executed by all other cpus except the first one 269 * at initialization time. It is responsible for taking over the 270 * mmu from the prom. We follow these steps. 271 * Lock the kernel's ttes in the TLB 272 * Initialize the tsb hardware registers 273 * Take over the trap table 274 * Flush the prom's locked entries from the TLB 275 */ 276 void 277 sfmmu_mp_startup(void) 278 { 279 sfmmu_set_tlb(); 280 setwstate(WSTATE_KERN); 281 /* 282 * sfmmu_set_fault_status_area() takes over trap_table 283 */ 284 sfmmu_set_fault_status_area(); 285 sfmmu_set_tsbs(); 286 install_va_to_tte(); 287 } 288 289 void 290 kdi_tlb_page_lock(caddr_t va, int do_dtlb) 291 { 292 tte_t tte; 293 pfn_t pfn = va_to_pfn(va); 294 uint64_t ret; 295 296 sfmmu_memtte(&tte, pfn, PROC_TEXT | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC, 297 TTE8K); 298 ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte, 299 MAP_ITLB | (do_dtlb ? MAP_DTLB : 0)); 300 301 if (ret != H_EOK) { 302 cmn_err(CE_PANIC, "cpu%d: cannot set permanent mapping for " 303 "va=0x%p, hv error code 0x%lx", 304 getprocessorid(), (void *)va, ret); 305 } 306 } 307 308 void 309 kdi_tlb_page_unlock(caddr_t va, int do_dtlb) 310 { 311 (void) hv_mmu_unmap_perm_addr(va, KCONTEXT, 312 MAP_ITLB | (do_dtlb ? MAP_DTLB : 0)); 313 } 314 315 /* 316 * Clear machine specific TSB information for a user process 317 */ 318 void 319 sfmmu_clear_utsbinfo() 320 { 321 (void) hv_set_ctxnon0(0, NULL); 322 } 323 324 /* 325 * The tsbord[] array is set up to translate from the order of tsbs in the sfmmu 326 * list to the order of tsbs in the tsb descriptor array passed to the hv, which 327 * is the search order used during Hardware Table Walk. 328 * So, the tsb with index i in the sfmmu list will have search order tsbord[i]. 329 * 330 * The order of tsbs in the sfmmu list will be as follows: 331 * 332 * 0 8K - 512K private TSB 333 * 1 4M - 256M private TSB 334 * 2 8K - 512K shared TSB 335 * 3 4M - 256M shared TSB 336 * 337 * Shared TSBs are only used if a process is part of an SCD. 338 * 339 * So, e.g. tsbord[3] = 1; 340 * corresponds to searching the shared 4M TSB second. 341 * 342 * The search order is selected so that the 8K-512K private TSB is always first. 343 * Currently shared context is not expected to map many 8K-512K pages that cause 344 * TLB misses so we order the shared TSB for 4M-256M pages in front of the 345 * shared TSB for 8K-512K pages. We also expect more TLB misses against private 346 * context mappings than shared context mappings and place private TSBs ahead of 347 * shared TSBs in descriptor order. The shtsb4m_first /etc/system tuneable can 348 * be used to change the default ordering of private and shared TSBs for 349 * 4M-256M pages. 350 */ 351 void 352 sfmmu_setup_tsbinfo(sfmmu_t *sfmmup) 353 { 354 struct tsb_info *tsbinfop; 355 hv_tsb_info_t *tdp; 356 int i; 357 int j; 358 int scd = 0; 359 int tsbord[NHV_TSB_INFO]; 360 361 #ifdef DEBUG 362 ASSERT(max_mmu_ctxdoms > 0); 363 if (sfmmup != ksfmmup) { 364 /* Process should have INVALID_CONTEXT on all MMUs. */ 365 for (i = 0; i < max_mmu_ctxdoms; i++) { 366 ASSERT(sfmmup->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); 367 } 368 } 369 #endif 370 371 tsbinfop = sfmmup->sfmmu_tsb; 372 if (tsbinfop == NULL) { 373 sfmmup->sfmmu_hvblock.hv_tsb_info_pa = (uint64_t)-1; 374 sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 0; 375 return; 376 } 377 378 ASSERT(sfmmup != ksfmmup || sfmmup->sfmmu_scdp == NULL); 379 ASSERT(sfmmup->sfmmu_scdp == NULL || 380 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb != NULL); 381 382 tsbord[0] = 0; 383 if (sfmmup->sfmmu_scdp == NULL) { 384 tsbord[1] = 1; 385 } else { 386 struct tsb_info *scd8ktsbp = 387 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; 388 ulong_t shared_4mttecnt = 0; 389 ulong_t priv_4mttecnt = 0; 390 int scd4mtsb = (scd8ktsbp->tsb_next != NULL); 391 392 for (i = TTE4M; i < MMU_PAGE_SIZES; i++) { 393 if (scd4mtsb) { 394 shared_4mttecnt += 395 sfmmup->sfmmu_scdismttecnt[i] + 396 sfmmup->sfmmu_scdrttecnt[i]; 397 } 398 if (tsbinfop->tsb_next != NULL) { 399 priv_4mttecnt += sfmmup->sfmmu_ttecnt[i] + 400 sfmmup->sfmmu_ismttecnt[i]; 401 } 402 } 403 if (tsbinfop->tsb_next == NULL) { 404 if (shared_4mttecnt) { 405 tsbord[1] = 2; 406 tsbord[2] = 1; 407 } else { 408 tsbord[1] = 1; 409 tsbord[2] = 2; 410 } 411 } else if (priv_4mttecnt) { 412 if (shared_4mttecnt) { 413 tsbord[1] = shtsb4m_first ? 2 : 1; 414 tsbord[2] = 3; 415 tsbord[3] = shtsb4m_first ? 1 : 2; 416 } else { 417 tsbord[1] = 1; 418 tsbord[2] = 2; 419 tsbord[3] = 3; 420 } 421 } else if (shared_4mttecnt) { 422 tsbord[1] = 3; 423 tsbord[2] = 2; 424 tsbord[3] = 1; 425 } else { 426 tsbord[1] = 2; 427 tsbord[2] = 1; 428 tsbord[3] = 3; 429 } 430 } 431 432 ASSERT(tsbinfop != NULL); 433 for (i = 0; tsbinfop != NULL && i < NHV_TSB_INFO; i++) { 434 if (i == 0) { 435 tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[i]; 436 sfmmup->sfmmu_hvblock.hv_tsb_info_pa = va_to_pa(tdp); 437 } 438 439 440 j = tsbord[i]; 441 442 tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[j]; 443 444 ASSERT(tsbinfop->tsb_ttesz_mask != 0); 445 tdp->hvtsb_idxpgsz = lowbit(tsbinfop->tsb_ttesz_mask) - 1; 446 tdp->hvtsb_assoc = 1; 447 tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc); 448 tdp->hvtsb_ctx_index = scd; 449 tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask; 450 tdp->hvtsb_rsvd = 0; 451 tdp->hvtsb_pa = tsbinfop->tsb_pa; 452 453 tsbinfop = tsbinfop->tsb_next; 454 if (tsbinfop == NULL && !scd && sfmmup->sfmmu_scdp != NULL) { 455 tsbinfop = 456 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; 457 scd = 1; 458 } 459 } 460 sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = i; 461 ASSERT(tsbinfop == NULL); 462 } 463 464 /* 465 * Invalidate a TSB via processor specific TSB invalidation routine 466 */ 467 void 468 sfmmu_inv_tsb(caddr_t tsb_base, uint_t tsb_bytes) 469 { 470 extern void cpu_inv_tsb(caddr_t, uint_t); 471 472 cpu_inv_tsb(tsb_base, tsb_bytes); 473 } 474 475 /* 476 * Completely flush the D-cache on all cpus. 477 * Not applicable to sun4v. 478 */ 479 void 480 sfmmu_cache_flushall() 481 { 482 } 483