1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/archsystm.h>
29 #include <sys/machsystm.h>
30 #include <sys/t_lock.h>
31 #include <sys/vmem.h>
32 #include <sys/mman.h>
33 #include <sys/vm.h>
34 #include <sys/cpu.h>
35 #include <sys/cmn_err.h>
36 #include <sys/cpuvar.h>
37 #include <sys/atomic.h>
38 #include <vm/as.h>
39 #include <vm/hat.h>
40 #include <vm/as.h>
41 #include <vm/page.h>
42 #include <vm/seg.h>
43 #include <vm/seg_kmem.h>
44 #include <vm/seg_kpm.h>
45 #include <vm/hat_sfmmu.h>
46 #include <sys/debug.h>
47 #include <sys/cpu_module.h>
48 #include <sys/mem_cage.h>
49
50 /*
51 * A quick way to generate a cache consistent address to map in a page.
52 * users: ppcopy, pagezero, /proc, dev/mem
53 *
54 * The ppmapin/ppmapout routines provide a quick way of generating a cache
55 * consistent address by reserving a given amount of kernel address space.
56 * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided
57 * into x number of sets, where x is the number of colors for the virtual
58 * cache. The number of colors is how many times a page can be mapped
59 * simulatenously in the cache. For direct map caches this translates to
60 * the number of pages in the cache.
61 * Each set will be assigned a group of virtual pages from the reserved memory
62 * depending on its virtual color.
63 * When trying to assign a virtual address we will find out the color for the
64 * physical page in question (if applicable). Then we will try to find an
65 * available virtual page from the set of the appropiate color.
66 */
67
68 #define clsettoarray(color, set) ((color * nsets) + set)
69
70 int pp_slots = 4; /* small default, tuned by cpu module */
71
72 /* tuned by cpu module, default is "safe" */
73 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
74
75 static caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
76 static int nsets; /* number of sets */
77 static int ppmap_pages; /* generate align mask */
78 static int ppmap_shift; /* set selector */
79
80 #ifdef PPDEBUG
81 #define MAXCOLORS 16 /* for debug only */
82 static int ppalloc_noslot = 0; /* # of allocations from kernelmap */
83 static int align_hits[MAXCOLORS];
84 static int pp_allocs; /* # of ppmapin requests */
85 #endif /* PPDEBUG */
86
87 /*
88 * There are only 64 TLB entries on spitfire, 16 on cheetah
89 * (fully-associative TLB) so we allow the cpu module to tune the
90 * number to use here via pp_slots.
91 */
92 static struct ppmap_va {
93 caddr_t ppmap_slots[MAXPP_SLOTS];
94 } ppmap_va[NCPU];
95
96 void
ppmapinit(void)97 ppmapinit(void)
98 {
99 int color, nset, setsize;
100 caddr_t va;
101
102 ASSERT(pp_slots <= MAXPP_SLOTS);
103
104 va = (caddr_t)PPMAPBASE;
105 if (cache & CACHE_VAC) {
106 int a;
107
108 ppmap_pages = mmu_btop(shm_alignment);
109 nsets = PPMAPSIZE / shm_alignment;
110 setsize = shm_alignment;
111 ppmap_shift = MMU_PAGESHIFT;
112 a = ppmap_pages;
113 while (a >>= 1)
114 ppmap_shift++;
115 } else {
116 /*
117 * If we do not have a virtual indexed cache we simply
118 * have only one set containing all pages.
119 */
120 ppmap_pages = 1;
121 nsets = mmu_btop(PPMAPSIZE);
122 setsize = MMU_PAGESIZE;
123 ppmap_shift = MMU_PAGESHIFT;
124 }
125 for (color = 0; color < ppmap_pages; color++) {
126 for (nset = 0; nset < nsets; nset++) {
127 ppmap_vaddrs[clsettoarray(color, nset)] =
128 (caddr_t)((uintptr_t)va + (nset * setsize));
129 }
130 va += MMU_PAGESIZE;
131 }
132 }
133
134 /*
135 * Allocate a cache consistent virtual address to map a page, pp,
136 * with protection, vprot; and map it in the MMU, using the most
137 * efficient means possible. The argument avoid is a virtual address
138 * hint which when masked yields an offset into a virtual cache
139 * that should be avoided when allocating an address to map in a
140 * page. An avoid arg of -1 means you don't care, for instance pagezero.
141 *
142 * machine dependent, depends on virtual address space layout,
143 * understands that all kernel addresses have bit 31 set.
144 *
145 * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
146 * that found in other architectures. In other architectures the hint
147 * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
148 * This was used to avoid virtual cache trashing in the bcopy. Unfortunately
149 * in the case of a COW, this later on caused a cache aliasing conflict. In
150 * sun4, the bcopy routine uses the block ld/st instructions so we don't have
151 * to worry about virtual cache trashing. Actually, by using the hint to choose
152 * the right color we can almost guarantee a cache conflict will not occur.
153 */
154
155 caddr_t
ppmapin(page_t * pp,uint_t vprot,caddr_t hint)156 ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
157 {
158 int color, nset, index, start;
159 caddr_t va;
160
161 #ifdef PPDEBUG
162 pp_allocs++;
163 #endif /* PPDEBUG */
164 if (cache & CACHE_VAC) {
165 color = sfmmu_get_ppvcolor(pp);
166 if (color == -1) {
167 if ((intptr_t)hint != -1L) {
168 color = addr_to_vcolor(hint);
169 } else {
170 color = addr_to_vcolor(mmu_ptob(pp->p_pagenum));
171 }
172 }
173
174 } else {
175 /*
176 * For physical caches, we can pick any address we want.
177 */
178 color = 0;
179 }
180
181 start = color;
182 do {
183 for (nset = 0; nset < nsets; nset++) {
184 index = clsettoarray(color, nset);
185 va = ppmap_vaddrs[index];
186 if (va != NULL) {
187 #ifdef PPDEBUG
188 align_hits[color]++;
189 #endif /* PPDEBUG */
190 if (atomic_cas_ptr(&ppmap_vaddrs[index],
191 va, NULL) == va) {
192 hat_memload(kas.a_hat, va, pp,
193 vprot | HAT_NOSYNC,
194 HAT_LOAD_LOCK);
195 return (va);
196 }
197 }
198 }
199 /*
200 * first pick didn't succeed, try another
201 */
202 if (++color == ppmap_pages)
203 color = 0;
204 } while (color != start);
205
206 #ifdef PPDEBUG
207 ppalloc_noslot++;
208 #endif /* PPDEBUG */
209
210 /*
211 * No free slots; get a random one from the kernel heap area.
212 */
213 va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
214
215 hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
216
217 return (va);
218
219 }
220
221 void
ppmapout(caddr_t va)222 ppmapout(caddr_t va)
223 {
224 int color, nset, index;
225
226 if (va >= kernelheap && va < ekernelheap) {
227 /*
228 * Space came from kernelmap, flush the page and
229 * return the space.
230 */
231 hat_unload(kas.a_hat, va, PAGESIZE,
232 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
233 vmem_free(heap_arena, va, PAGESIZE);
234 } else {
235 /*
236 * Space came from ppmap_vaddrs[], give it back.
237 */
238 color = addr_to_vcolor(va);
239 ASSERT((cache & CACHE_VAC)? (color < ppmap_pages) : 1);
240
241 nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
242 index = clsettoarray(color, nset);
243 hat_unload(kas.a_hat, va, PAGESIZE,
244 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
245
246 ASSERT(ppmap_vaddrs[index] == NULL);
247 ppmap_vaddrs[index] = va;
248 }
249 }
250
251 #ifdef DEBUG
252 #define PP_STAT_ADD(stat) (stat)++
253 uint_t pload, ploadfail;
254 uint_t ppzero, ppzero_short;
255 #else
256 #define PP_STAT_ADD(stat)
257 #endif /* DEBUG */
258
259 /*
260 * Find a slot in per CPU page copy area. Load up a locked TLB in the
261 * running cpu. We don't call hat layer to load up the tte since the
262 * mapping is only temporary. If the thread migrates it'll get a TLB
263 * miss trap and TLB/TSB miss handler will panic since there is no
264 * official hat record of this mapping.
265 */
266 static caddr_t
pp_load_tlb(processorid_t cpu,caddr_t ** pslot,page_t * pp,uint_t prot)267 pp_load_tlb(processorid_t cpu, caddr_t **pslot, page_t *pp, uint_t prot)
268 {
269 struct ppmap_va *ppmap;
270 tte_t tte;
271 caddr_t *myslot;
272 caddr_t va;
273 long i, start, stride;
274 int vcolor;
275 uint_t flags, strict_flag;
276
277 PP_STAT_ADD(pload);
278
279 ppmap = &ppmap_va[cpu];
280 va = (caddr_t)(PPMAP_FAST_BASE + (MMU_PAGESIZE * MAXPP_SLOTS) * cpu);
281 myslot = ppmap->ppmap_slots;
282 ASSERT(addr_to_vcolor(va) == 0);
283
284 if (prot & TTE_HWWR_INT) {
285 flags = PPAGE_STORE_VCOLORING | PPAGE_STORES_POLLUTE;
286 strict_flag = PPAGE_STORES_POLLUTE;
287 } else {
288 flags = PPAGE_LOAD_VCOLORING | PPAGE_LOADS_POLLUTE;
289 strict_flag = PPAGE_LOADS_POLLUTE;
290 }
291
292 /*
293 * If consistent handling is required then keep the current
294 * vcolor of the page. Furthermore, if loads or stores can
295 * pollute the VAC then using a "new" page (unassigned vcolor)
296 * won't work and we have to return a failure.
297 */
298 if (pp_consistent_coloring & flags) {
299 vcolor = sfmmu_get_ppvcolor(pp);
300 if ((vcolor == -1) &&
301 (pp_consistent_coloring & strict_flag))
302 return (NULL);
303 /* else keep the current vcolor of the page */
304 } else {
305 vcolor = -1;
306 }
307
308 if (vcolor != -1) {
309 va += MMU_PAGESIZE * vcolor;
310 start = vcolor;
311 stride = ppmap_pages; /* number of colors */
312 myslot += vcolor;
313 } else {
314 start = 0;
315 stride = 1;
316 }
317
318 for (i = start; i < pp_slots; i += stride) {
319 if (*myslot == NULL) {
320 if (atomic_cas_ptr(myslot, NULL, va) == NULL)
321 break;
322 }
323 myslot += stride;
324 va += MMU_PAGESIZE * stride;
325 }
326
327 if (i >= pp_slots) {
328 PP_STAT_ADD(ploadfail);
329 return (NULL);
330 }
331
332 ASSERT(vcolor == -1 || addr_to_vcolor(va) == vcolor);
333
334 /*
335 * Now we have a slot we can use, make the tte.
336 */
337 tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(pp->p_pagenum);
338 tte.tte_intlo = TTE_PFN_INTLO(pp->p_pagenum) | TTE_CP_INT |
339 TTE_CV_INT | TTE_PRIV_INT | TTE_LCK_INT | prot;
340
341 ASSERT(CPU->cpu_id == cpu);
342 sfmmu_dtlb_ld_kva(va, &tte);
343
344 *pslot = myslot; /* Return ptr to the slot we used. */
345
346 return (va);
347 }
348
349 static void
pp_unload_tlb(caddr_t * pslot,caddr_t va)350 pp_unload_tlb(caddr_t *pslot, caddr_t va)
351 {
352 ASSERT(*pslot == va);
353
354 vtag_flushpage(va, (uint64_t)ksfmmup);
355 *pslot = NULL; /* release the slot */
356 }
357
358 /*
359 * Common copy routine which attempts to use hwblkpagecopy. If this routine
360 * can't be used, failure (0) will be returned. Otherwise, a PAGESIZE page
361 * will be copied and success (1) will be returned.
362 */
363 int
ppcopy_common(page_t * fm_pp,page_t * to_pp)364 ppcopy_common(page_t *fm_pp, page_t *to_pp)
365 {
366 caddr_t fm_va, to_va;
367 caddr_t *fm_slot, *to_slot;
368 processorid_t cpu;
369 label_t ljb;
370 int ret = 1;
371
372 ASSERT(fm_pp != NULL && PAGE_LOCKED(fm_pp));
373 ASSERT(to_pp != NULL && PAGE_LOCKED(to_pp));
374
375 /*
376 * If we can't use VIS block loads and stores we can't use
377 * pp_load_tlb/pp_unload_tlb due to the possibility of
378 * d$ aliasing.
379 */
380 if (!use_hw_bcopy && (cache & CACHE_VAC))
381 return (0);
382
383 kpreempt_disable();
384 cpu = CPU->cpu_id;
385 fm_va = pp_load_tlb(cpu, &fm_slot, fm_pp, 0);
386 if (fm_va == NULL) {
387 kpreempt_enable();
388 return (0);
389 }
390 to_va = pp_load_tlb(cpu, &to_slot, to_pp, TTE_HWWR_INT);
391 if (to_va == NULL) {
392 pp_unload_tlb(fm_slot, fm_va);
393 kpreempt_enable();
394 return (0);
395 }
396 if (on_fault(&ljb)) {
397 ret = 0;
398 goto faulted;
399 }
400 hwblkpagecopy(fm_va, to_va);
401 no_fault();
402 faulted:
403 ASSERT(CPU->cpu_id == cpu);
404 pp_unload_tlb(fm_slot, fm_va);
405 pp_unload_tlb(to_slot, to_va);
406 kpreempt_enable();
407 return (ret);
408 }
409
410 /*
411 * Routine to copy kernel pages during relocation. It will copy one
412 * PAGESIZE page to another PAGESIZE page. This function may be called
413 * above LOCK_LEVEL so it should not grab any locks.
414 */
415 void
ppcopy_kernel__relocatable(page_t * fm_pp,page_t * to_pp)416 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
417 {
418 uint64_t fm_pa, to_pa;
419 size_t nbytes;
420
421 fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
422 to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
423
424 nbytes = MMU_PAGESIZE;
425
426 for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
427 hw_pa_bcopy32(fm_pa, to_pa);
428 }
429
430 /*
431 * Copy the data from the physical page represented by "frompp" to
432 * that represented by "topp".
433 *
434 * Try to use per cpu mapping first, if that fails then call pp_mapin
435 * to load it.
436 *
437 * Returns one on success or zero on some sort of fault while doing the copy.
438 */
439 int
ppcopy(page_t * fm_pp,page_t * to_pp)440 ppcopy(page_t *fm_pp, page_t *to_pp)
441 {
442 caddr_t fm_va, to_va;
443 label_t ljb;
444 int ret = 1;
445 boolean_t use_kpm = B_FALSE;
446
447 /* Try the fast path first */
448 if (ppcopy_common(fm_pp, to_pp))
449 return (1);
450
451 /*
452 * Try to map using KPM if enabled and we are the cageout thread.
453 * If it fails, fall back to ppmapin/ppmaput
454 */
455
456 if (kpm_enable) {
457 if (curthread == kcage_cageout_thread)
458 use_kpm = B_TRUE;
459 }
460
461 if (use_kpm) {
462 if ((fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
463 (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
464 if (fm_va != NULL)
465 hat_kpm_mapout(fm_pp, NULL, fm_va);
466 use_kpm = B_FALSE;
467 }
468 }
469
470 if (use_kpm == B_FALSE) {
471 /* do the slow path */
472 fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
473 to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
474 if (on_fault(&ljb)) {
475 ret = 0;
476 goto faulted;
477 }
478 }
479 bcopy(fm_va, to_va, PAGESIZE);
480 no_fault();
481 faulted:
482 /* unmap */
483 if (use_kpm == B_TRUE) {
484 hat_kpm_mapout(fm_pp, NULL, fm_va);
485 hat_kpm_mapout(to_pp, NULL, to_va);
486 } else {
487 ppmapout(fm_va);
488 ppmapout(to_va);
489 }
490 return (ret);
491 }
492
493 /*
494 * Zero the physical page from off to off + len given by `pp'
495 * without changing the reference and modified bits of page.
496 *
497 * Again, we'll try per cpu mapping first.
498 */
499 void
pagezero(page_t * pp,uint_t off,uint_t len)500 pagezero(page_t *pp, uint_t off, uint_t len)
501 {
502 caddr_t va;
503 caddr_t *slot;
504 int fast = 1;
505 processorid_t cpu;
506 extern int hwblkclr(void *, size_t);
507 extern int use_hw_bzero;
508
509 ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
510 ASSERT(PAGE_LOCKED(pp));
511
512 PP_STAT_ADD(ppzero);
513
514 if (len != MMU_PAGESIZE || !use_hw_bzero) {
515 /*
516 * Since the fast path doesn't do anything about
517 * VAC coloring, we make sure bcopy h/w will be used.
518 */
519 fast = 0;
520 va = NULL;
521 PP_STAT_ADD(ppzero_short);
522 }
523
524 kpreempt_disable();
525
526 if (fast) {
527 cpu = CPU->cpu_id;
528 va = pp_load_tlb(cpu, &slot, pp, TTE_HWWR_INT);
529 }
530
531 if (va == NULL) {
532 /*
533 * We are here either length != MMU_PAGESIZE or pp_load_tlb()
534 * returns NULL or use_hw_bzero is disabled.
535 */
536 va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
537 fast = 0;
538 }
539
540 if (hwblkclr(va + off, len)) {
541 /*
542 * We may not have used block commit asi.
543 * So flush the I-$ manually
544 */
545
546 ASSERT(fast == 0);
547
548 sync_icache(va + off, len);
549 } else {
550 /*
551 * We have used blk commit, and flushed the I-$. However we
552 * still may have an instruction in the pipeline. Only a flush
553 * instruction will invalidate that.
554 */
555 doflush(va);
556 }
557
558 if (fast) {
559 ASSERT(CPU->cpu_id == cpu);
560 pp_unload_tlb(slot, va);
561 } else {
562 ppmapout(va);
563 }
564
565 kpreempt_enable();
566 }
567