1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <vm/hat.h>
28 #include <vm/hat_sfmmu.h>
29 #include <vm/page.h>
30 #include <sys/pte.h>
31 #include <sys/systm.h>
32 #include <sys/mman.h>
33 #include <sys/sysmacros.h>
34 #include <sys/machparam.h>
35 #include <sys/vtrace.h>
36 #include <sys/kmem.h>
37 #include <sys/mmu.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/cpuvar.h>
41 #include <sys/debug.h>
42 #include <sys/lgrp.h>
43 #include <sys/archsystm.h>
44 #include <sys/machsystm.h>
45 #include <sys/vmsystm.h>
46 #include <sys/bitmap.h>
47 #include <vm/rm.h>
48 #include <sys/t_lock.h>
49 #include <sys/vm_machparam.h>
50 #include <sys/promif.h>
51 #include <sys/prom_isa.h>
52 #include <sys/prom_plat.h>
53 #include <sys/prom_debug.h>
54 #include <sys/privregs.h>
55 #include <sys/bootconf.h>
56 #include <sys/memlist.h>
57 #include <sys/memlist_plat.h>
58 #include <sys/cpu_module.h>
59 #include <sys/reboot.h>
60 #include <sys/kdi.h>
61 #include <sys/hypervisor_api.h>
62
63 /*
64 * External routines and data structures
65 */
66 extern void sfmmu_cache_flushcolor(int, pfn_t);
67 extern uint_t mmu_page_sizes;
68
69 /*
70 * Static routines
71 */
72 static void sfmmu_set_tlb(void);
73
74 /*
75 * Global Data:
76 */
77 caddr_t textva, datava;
78 tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */
79
80 int enable_bigktsb = 1;
81 int shtsb4m_first = 0;
82
83 tte_t bigktsb_ttes[MAX_BIGKTSB_TTES];
84 int bigktsb_nttes = 0;
85
86 /*
87 * Controls the logic which enables the use of the
88 * QUAD_LDD_PHYS ASI for TSB accesses.
89 */
90 int ktsb_phys = 1;
91
92 #ifdef SET_MMU_STATS
93 struct mmu_stat mmu_stat_area[NCPU];
94 #endif /* SET_MMU_STATS */
95
96 #ifdef DEBUG
97 /*
98 * The following two variables control if the hypervisor/hardware will
99 * be used to do the TSB table walk for kernel and user contexts.
100 */
101 int hv_use_0_tsb = 1;
102 int hv_use_non0_tsb = 1;
103 #endif /* DEBUG */
104
105 static void
sfmmu_set_fault_status_area(void)106 sfmmu_set_fault_status_area(void)
107 {
108 caddr_t mmfsa_va;
109 extern caddr_t mmu_fault_status_area;
110
111 mmfsa_va =
112 mmu_fault_status_area + (MMFSA_SIZE * getprocessorid());
113 set_mmfsa_scratchpad(mmfsa_va);
114 prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va));
115 }
116
117 void
sfmmu_set_tsbs()118 sfmmu_set_tsbs()
119 {
120 uint64_t rv;
121 struct hv_tsb_block *hvbp = &ksfmmup->sfmmu_hvblock;
122
123 #ifdef DEBUG
124 if (hv_use_0_tsb == 0)
125 return;
126 #endif /* DEBUG */
127
128 rv = hv_set_ctx0(hvbp->hv_tsb_info_cnt,
129 hvbp->hv_tsb_info_pa);
130 if (rv != H_EOK)
131 prom_printf("cpu%d: hv_set_ctx0() returned %lx\n",
132 getprocessorid(), rv);
133
134 #ifdef SET_MMU_STATS
135 ASSERT(getprocessorid() < NCPU);
136 rv = hv_mmu_set_stat_area(va_to_pa(&mmu_stat_area[getprocessorid()]),
137 sizeof (mmu_stat_area[0]));
138 if (rv != H_EOK)
139 prom_printf("cpu%d: hv_mmu_set_stat_area() returned %lx\n",
140 getprocessorid(), rv);
141 #endif /* SET_MMU_STATS */
142 }
143
144 /*
145 * This routine remaps the kernel using large ttes
146 * All entries except locked ones will be removed from the tlb.
147 * It assumes that both the text and data segments reside in a separate
148 * 4mb virtual and physical contigous memory chunk. This routine
149 * is only executed by the first cpu. The remaining cpus execute
150 * sfmmu_mp_startup() instead.
151 * XXX It assumes that the start of the text segment is KERNELBASE. It should
152 * actually be based on start.
153 */
154 void
sfmmu_remap_kernel(void)155 sfmmu_remap_kernel(void)
156 {
157 pfn_t pfn;
158 uint_t attr;
159 int flags;
160
161 extern char end[];
162 extern struct as kas;
163
164 textva = (caddr_t)(KERNELBASE & MMU_PAGEMASK4M);
165 pfn = va_to_pfn(textva);
166 if (pfn == PFN_INVALID)
167 prom_panic("can't find kernel text pfn");
168 pfn &= TTE_PFNMASK(TTE4M);
169
170 attr = PROC_TEXT | HAT_NOSYNC;
171 flags = HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD;
172 sfmmu_memtte(&ktext_tte, pfn, attr, TTE4M);
173 /*
174 * We set the lock bit in the tte to lock the translation in
175 * the tlb.
176 */
177 TTE_SET_LOCKED(&ktext_tte);
178 sfmmu_tteload(kas.a_hat, &ktext_tte, textva, NULL, flags);
179
180 datava = (caddr_t)((uintptr_t)end & MMU_PAGEMASK4M);
181 pfn = va_to_pfn(datava);
182 if (pfn == PFN_INVALID)
183 prom_panic("can't find kernel data pfn");
184 pfn &= TTE_PFNMASK(TTE4M);
185
186 attr = PROC_DATA | HAT_NOSYNC;
187 sfmmu_memtte(&kdata_tte, pfn, attr, TTE4M);
188 /*
189 * We set the lock bit in the tte to lock the translation in
190 * the tlb. We also set the mod bit to avoid taking dirty bit
191 * traps on kernel data.
192 */
193 TTE_SET_LOCKED(&kdata_tte);
194 TTE_SET_LOFLAGS(&kdata_tte, 0, TTE_HWWR_INT);
195 sfmmu_tteload(kas.a_hat, &kdata_tte, datava,
196 (struct page *)NULL, flags);
197
198 /*
199 * create bigktsb ttes if necessary.
200 */
201 if (enable_bigktsb) {
202 int i = 0;
203 caddr_t va = ktsb_base;
204 size_t tsbsz = ktsb_sz;
205 tte_t tte;
206
207 ASSERT(va >= datava + MMU_PAGESIZE4M);
208 ASSERT(tsbsz >= MMU_PAGESIZE4M);
209 ASSERT(IS_P2ALIGNED(tsbsz, tsbsz));
210 ASSERT(IS_P2ALIGNED(va, tsbsz));
211 attr = PROC_DATA | HAT_NOSYNC;
212 while (tsbsz != 0) {
213 ASSERT(i < MAX_BIGKTSB_TTES);
214 pfn = va_to_pfn(va);
215 ASSERT(pfn != PFN_INVALID);
216 ASSERT((pfn & ~TTE_PFNMASK(TTE4M)) == 0);
217 sfmmu_memtte(&tte, pfn, attr, TTE4M);
218 ASSERT(TTE_IS_MOD(&tte));
219 /*
220 * No need to lock if we use physical addresses.
221 * Since we invalidate the kernel TSB using virtual
222 * addresses, it's an optimization to load them now
223 * so that we won't have to load them later.
224 */
225 if (!ktsb_phys) {
226 TTE_SET_LOCKED(&tte);
227 }
228 sfmmu_tteload(kas.a_hat, &tte, va, NULL, flags);
229 bigktsb_ttes[i] = tte;
230 va += MMU_PAGESIZE4M;
231 tsbsz -= MMU_PAGESIZE4M;
232 i++;
233 }
234 bigktsb_nttes = i;
235 }
236
237 sfmmu_set_tlb();
238 }
239
240 /*
241 * Setup the kernel's locked tte's
242 */
243 void
sfmmu_set_tlb(void)244 sfmmu_set_tlb(void)
245 {
246 (void) hv_mmu_map_perm_addr(textva, KCONTEXT, *(uint64_t *)&ktext_tte,
247 MAP_ITLB | MAP_DTLB);
248 (void) hv_mmu_map_perm_addr(datava, KCONTEXT, *(uint64_t *)&kdata_tte,
249 MAP_DTLB);
250
251 if (!ktsb_phys && enable_bigktsb) {
252 int i;
253 caddr_t va = ktsb_base;
254 uint64_t tte;
255
256 ASSERT(bigktsb_nttes <= MAX_BIGKTSB_TTES);
257 for (i = 0; i < bigktsb_nttes; i++) {
258 tte = *(uint64_t *)&bigktsb_ttes[i];
259 (void) hv_mmu_map_perm_addr(va, KCONTEXT, tte,
260 MAP_DTLB);
261 va += MMU_PAGESIZE4M;
262 }
263 }
264 }
265
266 /*
267 * This routine is executed by all other cpus except the first one
268 * at initialization time. It is responsible for taking over the
269 * mmu from the prom. We follow these steps.
270 * Lock the kernel's ttes in the TLB
271 * Initialize the tsb hardware registers
272 * Take over the trap table
273 * Flush the prom's locked entries from the TLB
274 */
275 void
sfmmu_mp_startup(void)276 sfmmu_mp_startup(void)
277 {
278 sfmmu_set_tlb();
279 setwstate(WSTATE_KERN);
280 /*
281 * sfmmu_set_fault_status_area() takes over trap_table
282 */
283 sfmmu_set_fault_status_area();
284 sfmmu_set_tsbs();
285 install_va_to_tte();
286 }
287
288 void
kdi_tlb_page_lock(caddr_t va,int do_dtlb)289 kdi_tlb_page_lock(caddr_t va, int do_dtlb)
290 {
291 tte_t tte;
292 pfn_t pfn = va_to_pfn(va);
293 uint64_t ret;
294
295 sfmmu_memtte(&tte, pfn, (PROC_TEXT | HAT_NOSYNC), TTE8K);
296 ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte,
297 MAP_ITLB | (do_dtlb ? MAP_DTLB : 0));
298
299 if (ret != H_EOK) {
300 cmn_err(CE_PANIC, "cpu%d: cannot set permanent mapping for "
301 "va=0x%p, hv error code 0x%lx",
302 getprocessorid(), (void *)va, ret);
303 }
304 }
305
306 void
kdi_tlb_page_unlock(caddr_t va,int do_dtlb)307 kdi_tlb_page_unlock(caddr_t va, int do_dtlb)
308 {
309 (void) hv_mmu_unmap_perm_addr(va, KCONTEXT,
310 MAP_ITLB | (do_dtlb ? MAP_DTLB : 0));
311 }
312
313 /*
314 * Clear machine specific TSB information for a user process
315 */
316 void
sfmmu_clear_utsbinfo()317 sfmmu_clear_utsbinfo()
318 {
319 (void) hv_set_ctxnon0(0, NULL);
320 }
321
322 /*
323 * The tsbord[] array is set up to translate from the order of tsbs in the sfmmu
324 * list to the order of tsbs in the tsb descriptor array passed to the hv, which
325 * is the search order used during Hardware Table Walk.
326 * So, the tsb with index i in the sfmmu list will have search order tsbord[i].
327 *
328 * The order of tsbs in the sfmmu list will be as follows:
329 *
330 * 0 8K - 512K private TSB
331 * 1 4M - 256M private TSB
332 * 2 8K - 512K shared TSB
333 * 3 4M - 256M shared TSB
334 *
335 * Shared TSBs are only used if a process is part of an SCD.
336 *
337 * So, e.g. tsbord[3] = 1;
338 * corresponds to searching the shared 4M TSB second.
339 *
340 * The search order is selected so that the 8K-512K private TSB is always first.
341 * Currently shared context is not expected to map many 8K-512K pages that cause
342 * TLB misses so we order the shared TSB for 4M-256M pages in front of the
343 * shared TSB for 8K-512K pages. We also expect more TLB misses against private
344 * context mappings than shared context mappings and place private TSBs ahead of
345 * shared TSBs in descriptor order. The shtsb4m_first /etc/system tuneable can
346 * be used to change the default ordering of private and shared TSBs for
347 * 4M-256M pages.
348 */
349 void
sfmmu_setup_tsbinfo(sfmmu_t * sfmmup)350 sfmmu_setup_tsbinfo(sfmmu_t *sfmmup)
351 {
352 struct tsb_info *tsbinfop;
353 hv_tsb_info_t *tdp;
354 int i;
355 int j;
356 int scd = 0;
357 int tsbord[NHV_TSB_INFO];
358
359 #ifdef DEBUG
360 ASSERT(max_mmu_ctxdoms > 0);
361 if (sfmmup != ksfmmup) {
362 /* Process should have INVALID_CONTEXT on all MMUs. */
363 for (i = 0; i < max_mmu_ctxdoms; i++) {
364 ASSERT(sfmmup->sfmmu_ctxs[i].cnum == INVALID_CONTEXT);
365 }
366 }
367 #endif
368
369 tsbinfop = sfmmup->sfmmu_tsb;
370 if (tsbinfop == NULL) {
371 sfmmup->sfmmu_hvblock.hv_tsb_info_pa = (uint64_t)-1;
372 sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 0;
373 return;
374 }
375
376 ASSERT(sfmmup != ksfmmup || sfmmup->sfmmu_scdp == NULL);
377 ASSERT(sfmmup->sfmmu_scdp == NULL ||
378 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb != NULL);
379
380 tsbord[0] = 0;
381 if (sfmmup->sfmmu_scdp == NULL) {
382 tsbord[1] = 1;
383 } else {
384 struct tsb_info *scd8ktsbp =
385 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb;
386 ulong_t shared_4mttecnt = 0;
387 ulong_t priv_4mttecnt = 0;
388 int scd4mtsb = (scd8ktsbp->tsb_next != NULL);
389
390 for (i = TTE4M; i < MMU_PAGE_SIZES; i++) {
391 if (scd4mtsb) {
392 shared_4mttecnt +=
393 sfmmup->sfmmu_scdismttecnt[i] +
394 sfmmup->sfmmu_scdrttecnt[i];
395 }
396 if (tsbinfop->tsb_next != NULL) {
397 priv_4mttecnt += sfmmup->sfmmu_ttecnt[i] +
398 sfmmup->sfmmu_ismttecnt[i];
399 }
400 }
401 if (tsbinfop->tsb_next == NULL) {
402 if (shared_4mttecnt) {
403 tsbord[1] = 2;
404 tsbord[2] = 1;
405 } else {
406 tsbord[1] = 1;
407 tsbord[2] = 2;
408 }
409 } else if (priv_4mttecnt) {
410 if (shared_4mttecnt) {
411 tsbord[1] = shtsb4m_first ? 2 : 1;
412 tsbord[2] = 3;
413 tsbord[3] = shtsb4m_first ? 1 : 2;
414 } else {
415 tsbord[1] = 1;
416 tsbord[2] = 2;
417 tsbord[3] = 3;
418 }
419 } else if (shared_4mttecnt) {
420 tsbord[1] = 3;
421 tsbord[2] = 2;
422 tsbord[3] = 1;
423 } else {
424 tsbord[1] = 2;
425 tsbord[2] = 1;
426 tsbord[3] = 3;
427 }
428 }
429
430 ASSERT(tsbinfop != NULL);
431 for (i = 0; tsbinfop != NULL && i < NHV_TSB_INFO; i++) {
432 if (i == 0) {
433 tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[i];
434 sfmmup->sfmmu_hvblock.hv_tsb_info_pa = va_to_pa(tdp);
435 }
436
437
438 j = tsbord[i];
439
440 tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[j];
441
442 ASSERT(tsbinfop->tsb_ttesz_mask != 0);
443 tdp->hvtsb_idxpgsz = lowbit(tsbinfop->tsb_ttesz_mask) - 1;
444 tdp->hvtsb_assoc = 1;
445 tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc);
446 tdp->hvtsb_ctx_index = scd;
447 tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask;
448 tdp->hvtsb_rsvd = 0;
449 tdp->hvtsb_pa = tsbinfop->tsb_pa;
450
451 tsbinfop = tsbinfop->tsb_next;
452 if (tsbinfop == NULL && !scd && sfmmup->sfmmu_scdp != NULL) {
453 tsbinfop =
454 sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb;
455 scd = 1;
456 }
457 }
458 sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = i;
459 ASSERT(tsbinfop == NULL);
460 }
461
462 /*
463 * Invalidate a TSB via processor specific TSB invalidation routine
464 */
465 void
sfmmu_inv_tsb(caddr_t tsb_base,uint_t tsb_bytes)466 sfmmu_inv_tsb(caddr_t tsb_base, uint_t tsb_bytes)
467 {
468 extern void cpu_inv_tsb(caddr_t, uint_t);
469
470 cpu_inv_tsb(tsb_base, tsb_bytes);
471 }
472
473 /*
474 * Completely flush the D-cache on all cpus.
475 * Not applicable to sun4v.
476 */
477 void
sfmmu_cache_flushall()478 sfmmu_cache_flushall()
479 {
480 }
481