xref: /titanic_52/usr/src/uts/sun4u/os/ppage.c (revision 03831d35f7499c87d51205817c93e9a8d42c4bae)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/archsystm.h>
32 #include <sys/machsystm.h>
33 #include <sys/t_lock.h>
34 #include <sys/vmem.h>
35 #include <sys/mman.h>
36 #include <sys/vm.h>
37 #include <sys/cpu.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpuvar.h>
40 #include <sys/atomic.h>
41 #include <vm/as.h>
42 #include <vm/hat.h>
43 #include <vm/as.h>
44 #include <vm/page.h>
45 #include <vm/seg.h>
46 #include <vm/seg_kmem.h>
47 #include <vm/hat_sfmmu.h>
48 #include <sys/debug.h>
49 #include <sys/cpu_module.h>
50 
51 /*
52  * A quick way to generate a cache consistent address to map in a page.
53  * users: ppcopy, pagezero, /proc, dev/mem
54  *
55  * The ppmapin/ppmapout routines provide a quick way of generating a cache
56  * consistent address by reserving a given amount of kernel address space.
57  * The base is PPMAPBASE and its size is PPMAPSIZE.  This memory is divided
58  * into x number of sets, where x is the number of colors for the virtual
59  * cache. The number of colors is how many times a page can be mapped
60  * simulatenously in the cache.  For direct map caches this translates to
61  * the number of pages in the cache.
62  * Each set will be assigned a group of virtual pages from the reserved memory
63  * depending on its virtual color.
64  * When trying to assign a virtual address we will find out the color for the
65  * physical page in question (if applicable).  Then we will try to find an
66  * available virtual page from the set of the appropiate color.
67  */
68 
69 #define	clsettoarray(color, set) ((color * nsets) + set)
70 
71 int pp_slots = 4;		/* small default, tuned by cpu module */
72 
73 /* tuned by cpu module, default is "safe" */
74 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
75 
76 static caddr_t	ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
77 static int	nsets;			/* number of sets */
78 static int	ppmap_pages;		/* generate align mask */
79 static int	ppmap_shift;		/* set selector */
80 
81 #ifdef PPDEBUG
82 #define		MAXCOLORS	16	/* for debug only */
83 static int	ppalloc_noslot = 0;	/* # of allocations from kernelmap */
84 static int	align_hits[MAXCOLORS];
85 static int	pp_allocs;		/* # of ppmapin requests */
86 #endif /* PPDEBUG */
87 
88 /*
89  * There are only 64 TLB entries on spitfire, 16 on cheetah
90  * (fully-associative TLB) so we allow the cpu module to tune the
91  * number to use here via pp_slots.
92  */
93 static struct ppmap_va {
94 	caddr_t	ppmap_slots[MAXPP_SLOTS];
95 } ppmap_va[NCPU];
96 
97 void
98 ppmapinit(void)
99 {
100 	int color, nset, setsize;
101 	caddr_t va;
102 
103 	ASSERT(pp_slots <= MAXPP_SLOTS);
104 
105 	va = (caddr_t)PPMAPBASE;
106 	if (cache & CACHE_VAC) {
107 		int a;
108 
109 		ppmap_pages = mmu_btop(shm_alignment);
110 		nsets = PPMAPSIZE / shm_alignment;
111 		setsize = shm_alignment;
112 		ppmap_shift = MMU_PAGESHIFT;
113 		a = ppmap_pages;
114 		while (a >>= 1)
115 			ppmap_shift++;
116 	} else {
117 		/*
118 		 * If we do not have a virtual indexed cache we simply
119 		 * have only one set containing all pages.
120 		 */
121 		ppmap_pages = 1;
122 		nsets = mmu_btop(PPMAPSIZE);
123 		setsize = MMU_PAGESIZE;
124 		ppmap_shift = MMU_PAGESHIFT;
125 	}
126 	for (color = 0; color < ppmap_pages; color++) {
127 		for (nset = 0; nset < nsets; nset++) {
128 			ppmap_vaddrs[clsettoarray(color, nset)] =
129 			    (caddr_t)((uintptr_t)va + (nset * setsize));
130 		}
131 		va += MMU_PAGESIZE;
132 	}
133 }
134 
135 /*
136  * Allocate a cache consistent virtual address to map a page, pp,
137  * with protection, vprot; and map it in the MMU, using the most
138  * efficient means possible.  The argument avoid is a virtual address
139  * hint which when masked yields an offset into a virtual cache
140  * that should be avoided when allocating an address to map in a
141  * page.  An avoid arg of -1 means you don't care, for instance pagezero.
142  *
143  * machine dependent, depends on virtual address space layout,
144  * understands that all kernel addresses have bit 31 set.
145  *
146  * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
147  * that found in other architectures.  In other architectures the hint
148  * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
149  * This was used to avoid virtual cache trashing in the bcopy.  Unfortunately
150  * in the case of a COW,  this later on caused a cache aliasing conflict.  In
151  * sun4, the bcopy routine uses the block ld/st instructions so we don't have
152  * to worry about virtual cache trashing.  Actually, by using the hint to choose
153  * the right color we can almost guarantee a cache conflict will not occur.
154  */
155 
156 caddr_t
157 ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
158 {
159 	int color, nset, index, start;
160 	caddr_t va;
161 
162 #ifdef PPDEBUG
163 	pp_allocs++;
164 #endif /* PPDEBUG */
165 	if (cache & CACHE_VAC) {
166 		color = sfmmu_get_ppvcolor(pp);
167 		if (color == -1) {
168 			if ((intptr_t)hint != -1L) {
169 				color = addr_to_vcolor(hint);
170 			} else {
171 				color = addr_to_vcolor(mmu_ptob(pp->p_pagenum));
172 			}
173 		}
174 
175 	} else {
176 		/*
177 		 * For physical caches, we can pick any address we want.
178 		 */
179 		color = 0;
180 	}
181 
182 	start = color;
183 	do {
184 		for (nset = 0; nset < nsets; nset++) {
185 			index = clsettoarray(color, nset);
186 			va = ppmap_vaddrs[index];
187 			if (va != NULL) {
188 #ifdef PPDEBUG
189 				align_hits[color]++;
190 #endif /* PPDEBUG */
191 				if (casptr(&ppmap_vaddrs[index],
192 				    va, NULL) == va) {
193 					hat_memload(kas.a_hat, va, pp,
194 						vprot | HAT_NOSYNC,
195 						HAT_LOAD_LOCK);
196 					return (va);
197 				}
198 			}
199 		}
200 		/*
201 		 * first pick didn't succeed, try another
202 		 */
203 		if (++color == ppmap_pages)
204 			color = 0;
205 	} while (color != start);
206 
207 #ifdef PPDEBUG
208 	ppalloc_noslot++;
209 #endif /* PPDEBUG */
210 
211 	/*
212 	 * No free slots; get a random one from the kernel heap area.
213 	 */
214 	va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
215 
216 	hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
217 
218 	return (va);
219 
220 }
221 
222 void
223 ppmapout(caddr_t va)
224 {
225 	int color, nset, index;
226 
227 	if (va >= kernelheap && va < ekernelheap) {
228 		/*
229 		 * Space came from kernelmap, flush the page and
230 		 * return the space.
231 		 */
232 		hat_unload(kas.a_hat, va, PAGESIZE,
233 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
234 		vmem_free(heap_arena, va, PAGESIZE);
235 	} else {
236 		/*
237 		 * Space came from ppmap_vaddrs[], give it back.
238 		 */
239 		color = addr_to_vcolor(va);
240 		ASSERT((cache & CACHE_VAC)? (color < ppmap_pages) : 1);
241 
242 		nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
243 		index = clsettoarray(color, nset);
244 		hat_unload(kas.a_hat, va, PAGESIZE,
245 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
246 
247 		ASSERT(ppmap_vaddrs[index] == NULL);
248 		ppmap_vaddrs[index] = va;
249 	}
250 }
251 
252 #ifdef DEBUG
253 #define	PP_STAT_ADD(stat)	(stat)++
254 uint_t pload, ploadfail;
255 uint_t ppzero, ppzero_short;
256 #else
257 #define	PP_STAT_ADD(stat)
258 #endif /* DEBUG */
259 
260 /*
261  * Find a slot in per CPU page copy area. Load up a locked TLB in the
262  * running cpu. We don't call hat layer to load up the tte since the
263  * mapping is only temporary. If the thread migrates it'll get a TLB
264  * miss trap and TLB/TSB miss handler will panic since there is no
265  * official hat record of this mapping.
266  */
267 static caddr_t
268 pp_load_tlb(processorid_t cpu, caddr_t **pslot, page_t *pp, uint_t prot)
269 {
270 	struct ppmap_va	*ppmap;
271 	tte_t		tte;
272 	caddr_t		*myslot;
273 	caddr_t		va;
274 	long		i, start, stride;
275 	int		vcolor;
276 	uint_t		flags, strict_flag;
277 
278 	PP_STAT_ADD(pload);
279 
280 	ppmap = &ppmap_va[cpu];
281 	va = (caddr_t)(PPMAP_FAST_BASE + (MMU_PAGESIZE * MAXPP_SLOTS) * cpu);
282 	myslot = ppmap->ppmap_slots;
283 	ASSERT(addr_to_vcolor(va) == 0);
284 
285 	if (prot & TTE_HWWR_INT) {
286 		flags = PPAGE_STORE_VCOLORING | PPAGE_STORES_POLLUTE;
287 		strict_flag = PPAGE_STORES_POLLUTE;
288 	} else {
289 		flags = PPAGE_LOAD_VCOLORING | PPAGE_LOADS_POLLUTE;
290 		strict_flag = PPAGE_LOADS_POLLUTE;
291 	}
292 
293 	/*
294 	 * If consistent handling is required then keep the current
295 	 * vcolor of the page.  Furthermore, if loads or stores can
296 	 * pollute the VAC then using a "new" page (unassigned vcolor)
297 	 * won't work and we have to return a failure.
298 	 */
299 	if (pp_consistent_coloring & flags) {
300 		vcolor = sfmmu_get_ppvcolor(pp);
301 		if ((vcolor == -1) &&
302 		    (pp_consistent_coloring & strict_flag))
303 			return (NULL);
304 		/* else keep the current vcolor of the page */
305 	} else {
306 		vcolor = -1;
307 	}
308 
309 	if (vcolor != -1) {
310 		va += MMU_PAGESIZE * vcolor;
311 		start = vcolor;
312 		stride = ppmap_pages; /* number of colors */
313 		myslot += vcolor;
314 	} else {
315 		start = 0;
316 		stride = 1;
317 	}
318 
319 	for (i = start; i < pp_slots; i += stride) {
320 		if (*myslot == NULL) {
321 			if (casptr(myslot, NULL, va) == NULL)
322 				break;
323 		}
324 		myslot += stride;
325 		va += MMU_PAGESIZE * stride;
326 	}
327 
328 	if (i >= pp_slots) {
329 		PP_STAT_ADD(ploadfail);
330 		return (NULL);
331 	}
332 
333 	ASSERT(vcolor == -1 || addr_to_vcolor(va) == vcolor);
334 
335 	/*
336 	 * Now we have a slot we can use, make the tte.
337 	 */
338 	tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(pp->p_pagenum);
339 	tte.tte_intlo = TTE_PFN_INTLO(pp->p_pagenum) | TTE_CP_INT |
340 	    TTE_CV_INT | TTE_PRIV_INT | TTE_LCK_INT | prot;
341 
342 	ASSERT(CPU->cpu_id == cpu);
343 	sfmmu_dtlb_ld(va, KCONTEXT, &tte);
344 
345 	*pslot = myslot;	/* Return ptr to the slot we used. */
346 
347 	return (va);
348 }
349 
350 static void
351 pp_unload_tlb(caddr_t *pslot, caddr_t va)
352 {
353 	ASSERT(*pslot == va);
354 
355 	vtag_flushpage(va, KCONTEXT);
356 	*pslot = NULL;				/* release the slot */
357 }
358 
359 /*
360  * Common copy routine which attempts to use hwblkpagecopy.  If this routine
361  * can't be used, failure (0) will be returned.  Otherwise, a PAGESIZE page
362  * will be copied and success (1) will be returned.
363  */
364 int
365 ppcopy_common(page_t *fm_pp, page_t *to_pp)
366 {
367 	caddr_t fm_va, to_va;
368 	caddr_t	*fm_slot, *to_slot;
369 	processorid_t cpu;
370 
371 	ASSERT(PAGE_LOCKED(fm_pp));
372 	ASSERT(PAGE_LOCKED(to_pp));
373 
374 	/*
375 	 * If we can't use VIS block loads and stores we can't use
376 	 * pp_load_tlb/pp_unload_tlb due to the possibility of
377 	 * d$ aliasing.
378 	 */
379 	if (!use_hw_bcopy && (cache & CACHE_VAC))
380 		return (0);
381 
382 	kpreempt_disable();
383 	cpu = CPU->cpu_id;
384 	fm_va = pp_load_tlb(cpu, &fm_slot, fm_pp, 0);
385 	if (fm_va == NULL) {
386 		kpreempt_enable();
387 		return (0);
388 	}
389 	to_va = pp_load_tlb(cpu, &to_slot, to_pp, TTE_HWWR_INT);
390 	if (to_va == NULL) {
391 		pp_unload_tlb(fm_slot, fm_va);
392 		kpreempt_enable();
393 		return (0);
394 	}
395 	hwblkpagecopy(fm_va, to_va);
396 	ASSERT(CPU->cpu_id == cpu);
397 	pp_unload_tlb(fm_slot, fm_va);
398 	pp_unload_tlb(to_slot, to_va);
399 	kpreempt_enable();
400 	return (1);
401 }
402 
403 /*
404  * Routine to copy kernel pages during relocation.  It will copy one
405  * PAGESIZE page to another PAGESIZE page.  This function may be called
406  * above LOCK_LEVEL so it should not grab any locks.
407  */
408 void
409 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
410 {
411 	uint64_t fm_pa, to_pa;
412 	size_t nbytes;
413 
414 	fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
415 	to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
416 
417 	nbytes = MMU_PAGESIZE;
418 
419 	for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
420 		hw_pa_bcopy32(fm_pa, to_pa);
421 }
422 
423 /*
424  * Copy the data from the physical page represented by "frompp" to
425  * that represented by "topp".
426  *
427  * Try to use per cpu mapping first, if that fails then call pp_mapin
428  * to load it.
429  */
430 void
431 ppcopy(page_t *fm_pp, page_t *to_pp)
432 {
433 	caddr_t fm_va, to_va;
434 
435 	/* Try the fast path first */
436 	if (ppcopy_common(fm_pp, to_pp))
437 		return;
438 
439 	/* Fast path failed, so we need to do the slow path. */
440 	fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
441 	to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
442 	bcopy(fm_va, to_va, PAGESIZE);
443 	ppmapout(fm_va);
444 	ppmapout(to_va);
445 }
446 
447 /*
448  * Zero the physical page from off to off + len given by `pp'
449  * without changing the reference and modified bits of page.
450  *
451  * Again, we'll try per cpu mapping first.
452  */
453 void
454 pagezero(page_t *pp, uint_t off, uint_t len)
455 {
456 	caddr_t va;
457 	caddr_t *slot;
458 	int fast = 1;
459 	processorid_t cpu;
460 	extern int hwblkclr(void *, size_t);
461 	extern int use_hw_bzero;
462 
463 	ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
464 	ASSERT(PAGE_LOCKED(pp));
465 
466 	PP_STAT_ADD(ppzero);
467 
468 	if (len != MMU_PAGESIZE || !use_hw_bzero) {
469 		/*
470 		 * Since the fast path doesn't do anything about
471 		 * VAC coloring, we make sure bcopy h/w will be used.
472 		 */
473 		fast = 0;
474 		va = NULL;
475 		PP_STAT_ADD(ppzero_short);
476 	}
477 
478 	kpreempt_disable();
479 
480 	if (fast) {
481 		cpu = CPU->cpu_id;
482 		va = pp_load_tlb(cpu, &slot, pp, TTE_HWWR_INT);
483 	}
484 
485 	if (va == NULL) {
486 		/*
487 		 * We are here either length != MMU_PAGESIZE or pp_load_tlb()
488 		 * returns NULL or use_hw_bzero is disabled.
489 		 */
490 		va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
491 		fast = 0;
492 	}
493 
494 	if (hwblkclr(va + off, len)) {
495 		/*
496 		 * We may not have used block commit asi.
497 		 * So flush the I-$ manually
498 		 */
499 
500 		ASSERT(fast == 0);
501 
502 		sync_icache(va + off, len);
503 	} else {
504 		/*
505 		 * We have used blk commit, and flushed the I-$. However we
506 		 * still may have an instruction in the pipeline. Only a flush
507 		 * instruction will invalidate that.
508 		 */
509 		doflush(va);
510 	}
511 
512 	if (fast) {
513 		ASSERT(CPU->cpu_id == cpu);
514 		pp_unload_tlb(slot, va);
515 	} else {
516 		ppmapout(va);
517 	}
518 
519 	kpreempt_enable();
520 }
521