1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <vm/hat.h> 30 #include <vm/hat_sfmmu.h> 31 #include <vm/page.h> 32 #include <sys/pte.h> 33 #include <sys/systm.h> 34 #include <sys/mman.h> 35 #include <sys/sysmacros.h> 36 #include <sys/machparam.h> 37 #include <sys/vtrace.h> 38 #include <sys/kmem.h> 39 #include <sys/mmu.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/cpuvar.h> 43 #include <sys/debug.h> 44 #include <sys/lgrp.h> 45 #include <sys/archsystm.h> 46 #include <sys/machsystm.h> 47 #include <sys/vmsystm.h> 48 #include <sys/bitmap.h> 49 #include <vm/rm.h> 50 #include <sys/t_lock.h> 51 #include <sys/vm_machparam.h> 52 #include <sys/promif.h> 53 #include <sys/prom_isa.h> 54 #include <sys/prom_plat.h> 55 #include <sys/prom_debug.h> 56 #include <sys/privregs.h> 57 #include <sys/bootconf.h> 58 #include <sys/memlist.h> 59 #include <sys/memlist_plat.h> 60 #include <sys/cpu_module.h> 61 #include <sys/reboot.h> 62 #include <sys/kdi.h> 63 #include <sys/hypervisor_api.h> 64 65 /* 66 * External routines and data structures 67 */ 68 extern void sfmmu_cache_flushcolor(int, pfn_t); 69 extern uint_t mmu_page_sizes; 70 71 /* 72 * Static routines 73 */ 74 static void sfmmu_set_tlb(void); 75 76 /* 77 * Global Data: 78 */ 79 caddr_t textva, datava; 80 tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ 81 82 int enable_bigktsb = 1; 83 int shtsb4m_first = 0; 84 85 tte_t bigktsb_ttes[MAX_BIGKTSB_TTES]; 86 int bigktsb_nttes = 0; 87 88 /* 89 * Controls the logic which enables the use of the 90 * QUAD_LDD_PHYS ASI for TSB accesses. 91 */ 92 int ktsb_phys = 1; 93 94 #ifdef SET_MMU_STATS 95 struct mmu_stat mmu_stat_area[NCPU]; 96 #endif /* SET_MMU_STATS */ 97 98 #ifdef DEBUG 99 /* 100 * The following two variables control if the hypervisor/hardware will 101 * be used to do the TSB table walk for kernel and user contexts. 102 */ 103 int hv_use_0_tsb = 1; 104 int hv_use_non0_tsb = 1; 105 #endif /* DEBUG */ 106 107 static void 108 sfmmu_set_fault_status_area(void) 109 { 110 caddr_t mmfsa_va; 111 extern caddr_t mmu_fault_status_area; 112 113 mmfsa_va = 114 mmu_fault_status_area + (MMFSA_SIZE * getprocessorid()); 115 set_mmfsa_scratchpad(mmfsa_va); 116 prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va)); 117 } 118 119 void 120 sfmmu_set_tsbs() 121 { 122 uint64_t rv; 123 struct hv_tsb_block *hvbp = &ksfmmup->sfmmu_hvblock; 124 125 #ifdef DEBUG 126 if (hv_use_0_tsb == 0) 127 return; 128 #endif /* DEBUG */ 129 130 rv = hv_set_ctx0(hvbp->hv_tsb_info_cnt, 131 hvbp->hv_tsb_info_pa); 132 if (rv != H_EOK) 133 prom_printf("cpu%d: hv_set_ctx0() returned %lx\n", 134 getprocessorid(), rv); 135 136 #ifdef SET_MMU_STATS 137 ASSERT(getprocessorid() < NCPU); 138 rv = hv_mmu_set_stat_area(va_to_pa(&mmu_stat_area[getprocessorid()]), 139 sizeof (mmu_stat_area[0])); 140 if (rv != H_EOK) 141 prom_printf("cpu%d: hv_mmu_set_stat_area() returned %lx\n", 142 getprocessorid(), rv); 143 #endif /* SET_MMU_STATS */ 144 } 145 146 /* 147 * This routine remaps the kernel using large ttes 148 * All entries except locked ones will be removed from the tlb. 149 * It assumes that both the text and data segments reside in a separate 150 * 4mb virtual and physical contigous memory chunk. This routine 151 * is only executed by the first cpu. The remaining cpus execute 152 * sfmmu_mp_startup() instead. 153 * XXX It assumes that the start of the text segment is KERNELBASE. It should 154 * actually be based on start. 155 */ 156 void 157 sfmmu_remap_kernel(void) 158 { 159 pfn_t pfn; 160 uint_t attr; 161 int flags; 162 163 extern char end[]; 164 extern struct as kas; 165 166 textva = (caddr_t)(KERNELBASE & MMU_PAGEMASK4M); 167 pfn = va_to_pfn(textva); 168 if (pfn == PFN_INVALID) 169 prom_panic("can't find kernel text pfn"); 170 pfn &= TTE_PFNMASK(TTE4M); 171 172 attr = PROC_TEXT | HAT_NOSYNC; 173 flags = HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD; 174 sfmmu_memtte(&ktext_tte, pfn, attr, TTE4M); 175 /* 176 * We set the lock bit in the tte to lock the translation in 177 * the tlb. 178 */ 179 TTE_SET_LOCKED(&ktext_tte); 180 sfmmu_tteload(kas.a_hat, &ktext_tte, textva, NULL, flags); 181 182 datava = (caddr_t)((uintptr_t)end & MMU_PAGEMASK4M); 183 pfn = va_to_pfn(datava); 184 if (pfn == PFN_INVALID) 185 prom_panic("can't find kernel data pfn"); 186 pfn &= TTE_PFNMASK(TTE4M); 187 188 attr = PROC_DATA | HAT_NOSYNC; 189 sfmmu_memtte(&kdata_tte, pfn, attr, TTE4M); 190 /* 191 * We set the lock bit in the tte to lock the translation in 192 * the tlb. We also set the mod bit to avoid taking dirty bit 193 * traps on kernel data. 194 */ 195 TTE_SET_LOCKED(&kdata_tte); 196 TTE_SET_LOFLAGS(&kdata_tte, 0, TTE_HWWR_INT); 197 sfmmu_tteload(kas.a_hat, &kdata_tte, datava, 198 (struct page *)NULL, flags); 199 200 /* 201 * create bigktsb ttes if necessary. 202 */ 203 if (enable_bigktsb) { 204 int i = 0; 205 caddr_t va = ktsb_base; 206 size_t tsbsz = ktsb_sz; 207 tte_t tte; 208 209 ASSERT(va >= datava + MMU_PAGESIZE4M); 210 ASSERT(tsbsz >= MMU_PAGESIZE4M); 211 ASSERT(IS_P2ALIGNED(tsbsz, tsbsz)); 212 ASSERT(IS_P2ALIGNED(va, tsbsz)); 213 attr = PROC_DATA | HAT_NOSYNC; 214 while (tsbsz != 0) { 215 ASSERT(i < MAX_BIGKTSB_TTES); 216 pfn = va_to_pfn(va); 217 ASSERT(pfn != PFN_INVALID); 218 ASSERT((pfn & ~TTE_PFNMASK(TTE4M)) == 0); 219 sfmmu_memtte(&tte, pfn, attr, TTE4M); 220 ASSERT(TTE_IS_MOD(&tte)); 221 /* 222 * No need to lock if we use physical addresses. 223 * Since we invalidate the kernel TSB using virtual 224 * addresses, it's an optimization to load them now 225 * so that we won't have to load them later. 226 */ 227 if (!ktsb_phys) { 228 TTE_SET_LOCKED(&tte); 229 } 230 sfmmu_tteload(kas.a_hat, &tte, va, NULL, flags); 231 bigktsb_ttes[i] = tte; 232 va += MMU_PAGESIZE4M; 233 tsbsz -= MMU_PAGESIZE4M; 234 i++; 235 } 236 bigktsb_nttes = i; 237 } 238 239 sfmmu_set_tlb(); 240 } 241 242 /* 243 * Setup the kernel's locked tte's 244 */ 245 void 246 sfmmu_set_tlb(void) 247 { 248 (void) hv_mmu_map_perm_addr(textva, KCONTEXT, *(uint64_t *)&ktext_tte, 249 MAP_ITLB | MAP_DTLB); 250 (void) hv_mmu_map_perm_addr(datava, KCONTEXT, *(uint64_t *)&kdata_tte, 251 MAP_DTLB); 252 253 if (!ktsb_phys && enable_bigktsb) { 254 int i; 255 caddr_t va = ktsb_base; 256 uint64_t tte; 257 258 ASSERT(bigktsb_nttes <= MAX_BIGKTSB_TTES); 259 for (i = 0; i < bigktsb_nttes; i++) { 260 tte = *(uint64_t *)&bigktsb_ttes[i]; 261 (void) hv_mmu_map_perm_addr(va, KCONTEXT, tte, 262 MAP_DTLB); 263 va += MMU_PAGESIZE4M; 264 } 265 } 266 } 267 268 /* 269 * This routine is executed by all other cpus except the first one 270 * at initialization time. It is responsible for taking over the 271 * mmu from the prom. We follow these steps. 272 * Lock the kernel's ttes in the TLB 273 * Initialize the tsb hardware registers 274 * Take over the trap table 275 * Flush the prom's locked entries from the TLB 276 */ 277 void 278 sfmmu_mp_startup(void) 279 { 280 sfmmu_set_tlb(); 281 setwstate(WSTATE_KERN); 282 /* 283 * sfmmu_set_fault_status_area() takes over trap_table 284 */ 285 sfmmu_set_fault_status_area(); 286 sfmmu_set_tsbs(); 287 install_va_to_tte(); 288 } 289 290 void 291 kdi_tlb_page_lock(caddr_t va, int do_dtlb) 292 { 293 tte_t tte; 294 pfn_t pfn = va_to_pfn(va); 295 uint64_t ret; 296 297 sfmmu_memtte(&tte, pfn, (PROC_TEXT | HAT_NOSYNC), TTE8K); 298 ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte, 299 MAP_ITLB | (do_dtlb ? MAP_DTLB : 0)); 300 301 if (ret != H_EOK) { 302 cmn_err(CE_PANIC, "cpu%d: cannot set permanent mapping for " 303 "va=0x%p, hv error code 0x%lx", 304 getprocessorid(), (void *)va, ret); 305 } 306 } 307 308 void 309 kdi_tlb_page_unlock(caddr_t va, int do_dtlb) 310 { 311 (void) hv_mmu_unmap_perm_addr(va, KCONTEXT, 312 MAP_ITLB | (do_dtlb ? MAP_DTLB : 0)); 313 } 314 315 /* 316 * Clear machine specific TSB information for a user process 317 */ 318 void 319 sfmmu_clear_utsbinfo() 320 { 321 (void) hv_set_ctxnon0(0, NULL); 322 } 323 324 /* 325 * The tsbord[] array is set up to translate from the order of tsbs in the sfmmu 326 * list to the order of tsbs in the tsb descriptor array passed to the hv, which 327 * is the search order used during Hardware Table Walk. 328 * So, the tsb with index i in the sfmmu list will have search order tsbord[i]. 329 * 330 * The order of tsbs in the sfmmu list will be as follows: 331 * 332 * 0 8K - 512K private TSB 333 * 1 4M - 256M private TSB 334 * 2 8K - 512K shared TSB 335 * 3 4M - 256M shared TSB 336 * 337 * Shared TSBs are only used if a process is part of an SCD. 338 * 339 * So, e.g. tsbord[3] = 1; 340 * corresponds to searching the shared 4M TSB second. 341 * 342 * The search order is selected so that the 8K-512K private TSB is always first. 343 * Currently shared context is not expected to map many 8K-512K pages that cause 344 * TLB misses so we order the shared TSB for 4M-256M pages in front of the 345 * shared TSB for 8K-512K pages. We also expect more TLB misses against private 346 * context mappings than shared context mappings and place private TSBs ahead of 347 * shared TSBs in descriptor order. The shtsb4m_first /etc/system tuneable can 348 * be used to change the default ordering of private and shared TSBs for 349 * 4M-256M pages. 350 */ 351 void 352 sfmmu_setup_tsbinfo(sfmmu_t *sfmmup) 353 { 354 struct tsb_info *tsbinfop; 355 hv_tsb_info_t *tdp; 356 int i; 357 int j; 358 int scd = 0; 359 int tsbord[NHV_TSB_INFO]; 360 361 #ifdef DEBUG 362 ASSERT(max_mmu_ctxdoms > 0); 363 if (sfmmup != ksfmmup) { 364 /* Process should have INVALID_CONTEXT on all MMUs. */ 365 for (i = 0; i < max_mmu_ctxdoms; i++) { 366 ASSERT(sfmmup->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); 367 } 368 } 369 #endif 370 371 tsbinfop = sfmmup->sfmmu_tsb; 372 if (tsbinfop == NULL) { 373 sfmmup->sfmmu_hvblock.hv_tsb_info_pa = (uint64_t)-1; 374 sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 0; 375 return; 376 } 377 378 ASSERT(sfmmup != ksfmmup || sfmmup->sfmmu_scdp == NULL); 379 ASSERT(sfmmup->sfmmu_scdp == NULL || 380 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb != NULL); 381 382 tsbord[0] = 0; 383 if (sfmmup->sfmmu_scdp == NULL) { 384 tsbord[1] = 1; 385 } else { 386 struct tsb_info *scd8ktsbp = 387 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; 388 ulong_t shared_4mttecnt = 0; 389 ulong_t priv_4mttecnt = 0; 390 int scd4mtsb = (scd8ktsbp->tsb_next != NULL); 391 392 for (i = TTE4M; i < MMU_PAGE_SIZES; i++) { 393 if (scd4mtsb) { 394 shared_4mttecnt += 395 sfmmup->sfmmu_scdismttecnt[i] + 396 sfmmup->sfmmu_scdrttecnt[i]; 397 } 398 if (tsbinfop->tsb_next != NULL) { 399 priv_4mttecnt += sfmmup->sfmmu_ttecnt[i] + 400 sfmmup->sfmmu_ismttecnt[i]; 401 } 402 } 403 if (tsbinfop->tsb_next == NULL) { 404 if (shared_4mttecnt) { 405 tsbord[1] = 2; 406 tsbord[2] = 1; 407 } else { 408 tsbord[1] = 1; 409 tsbord[2] = 2; 410 } 411 } else if (priv_4mttecnt) { 412 if (shared_4mttecnt) { 413 tsbord[1] = shtsb4m_first ? 2 : 1; 414 tsbord[2] = 3; 415 tsbord[3] = shtsb4m_first ? 1 : 2; 416 } else { 417 tsbord[1] = 1; 418 tsbord[2] = 2; 419 tsbord[3] = 3; 420 } 421 } else if (shared_4mttecnt) { 422 tsbord[1] = 3; 423 tsbord[2] = 2; 424 tsbord[3] = 1; 425 } else { 426 tsbord[1] = 2; 427 tsbord[2] = 1; 428 tsbord[3] = 3; 429 } 430 } 431 432 ASSERT(tsbinfop != NULL); 433 for (i = 0; tsbinfop != NULL && i < NHV_TSB_INFO; i++) { 434 if (i == 0) { 435 tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[i]; 436 sfmmup->sfmmu_hvblock.hv_tsb_info_pa = va_to_pa(tdp); 437 } 438 439 440 j = tsbord[i]; 441 442 tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[j]; 443 444 ASSERT(tsbinfop->tsb_ttesz_mask != 0); 445 tdp->hvtsb_idxpgsz = lowbit(tsbinfop->tsb_ttesz_mask) - 1; 446 tdp->hvtsb_assoc = 1; 447 tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc); 448 tdp->hvtsb_ctx_index = scd; 449 tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask; 450 tdp->hvtsb_rsvd = 0; 451 tdp->hvtsb_pa = tsbinfop->tsb_pa; 452 453 tsbinfop = tsbinfop->tsb_next; 454 if (tsbinfop == NULL && !scd && sfmmup->sfmmu_scdp != NULL) { 455 tsbinfop = 456 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; 457 scd = 1; 458 } 459 } 460 sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = i; 461 ASSERT(tsbinfop == NULL); 462 } 463 464 /* 465 * Invalidate a TSB via processor specific TSB invalidation routine 466 */ 467 void 468 sfmmu_inv_tsb(caddr_t tsb_base, uint_t tsb_bytes) 469 { 470 extern void cpu_inv_tsb(caddr_t, uint_t); 471 472 cpu_inv_tsb(tsb_base, tsb_bytes); 473 } 474 475 /* 476 * Completely flush the D-cache on all cpus. 477 * Not applicable to sun4v. 478 */ 479 void 480 sfmmu_cache_flushall() 481 { 482 } 483