xref: /titanic_52/usr/src/uts/sun4u/vm/mach_vm_dep.c (revision 449975fd500a154ec93bafe3fc6a5913c5bb82a5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /*	All Rights Reserved   */
28 
29 /*
30  * Portions of this source code were derived from Berkeley 4.3 BSD
31  * under license from the Regents of the University of California.
32  */
33 
34 #pragma ident	"%Z%%M%	%I%	%E% SMI"
35 
36 /*
37  * UNIX machine dependent virtual memory support.
38  */
39 
40 #include <sys/vm.h>
41 #include <sys/exec.h>
42 #include <sys/cmn_err.h>
43 #include <sys/cpu_module.h>
44 #include <sys/cpu.h>
45 #include <sys/elf_SPARC.h>
46 #include <sys/archsystm.h>
47 #include <vm/hat_sfmmu.h>
48 #include <sys/memnode.h>
49 #include <sys/mem_cage.h>
50 #include <vm/vm_dep.h>
51 
52 #if defined(__sparcv9) && defined(SF_ERRATA_57)
53 caddr_t errata57_limit;
54 #endif
55 
56 uint_t page_colors = 0;
57 uint_t page_colors_mask = 0;
58 uint_t page_coloring_shift = 0;
59 int consistent_coloring;
60 
61 uint_t mmu_page_sizes = DEFAULT_MMU_PAGE_SIZES;
62 uint_t max_mmu_page_sizes = MMU_PAGE_SIZES;
63 uint_t mmu_hashcnt = DEFAULT_MAX_HASHCNT;
64 uint_t max_mmu_hashcnt = MAX_HASHCNT;
65 size_t mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE;
66 
67 /*
68  * The sun4u hardware mapping sizes which will always be supported are
69  * 8K, 64K, 512K and 4M.  If sun4u based machines need to support other
70  * page sizes, platform or cpu specific routines need to modify the value.
71  * The base pagesize (p_szc == 0) must always be supported by the hardware.
72  */
73 int mmu_exported_pagesize_mask = (1 << TTE8K) | (1 << TTE64K) |
74 	(1 << TTE512K) | (1 << TTE4M);
75 uint_t mmu_exported_page_sizes;
76 
77 uint_t szc_2_userszc[MMU_PAGE_SIZES];
78 uint_t userszc_2_szc[MMU_PAGE_SIZES];
79 
80 extern uint_t vac_colors_mask;
81 extern int vac_shift;
82 
83 hw_pagesize_t hw_page_array[] = {
84 	{MMU_PAGESIZE, MMU_PAGESHIFT, MMU_PAGESIZE >> MMU_PAGESHIFT},
85 	{MMU_PAGESIZE64K, MMU_PAGESHIFT64K, MMU_PAGESIZE64K >> MMU_PAGESHIFT},
86 	{MMU_PAGESIZE512K, MMU_PAGESHIFT512K,
87 	    MMU_PAGESIZE512K >> MMU_PAGESHIFT},
88 	{MMU_PAGESIZE4M, MMU_PAGESHIFT4M, MMU_PAGESIZE4M >> MMU_PAGESHIFT},
89 	{MMU_PAGESIZE32M, MMU_PAGESHIFT32M, MMU_PAGESIZE32M >> MMU_PAGESHIFT},
90 	{MMU_PAGESIZE256M, MMU_PAGESHIFT256M,
91 	    MMU_PAGESIZE256M >> MMU_PAGESHIFT},
92 	{0, 0, 0}
93 };
94 
95 /*
96  * use_text_pgsz64k, use_initdata_pgsz64k and use_text_pgsz4m
97  * can be set in platform or CPU specific code but user can change the
98  * default values via /etc/system.
99  */
100 
101 int	use_text_pgsz64k = 0;
102 int	use_text_pgsz4m = 0;
103 int	use_initdata_pgsz64k = 0;
104 
105 /*
106  * disable_text_largepages and disable_initdata_largepages bitmaks are set in
107  * platform or CPU specific code to disable page sizes that should not be
108  * used. These variables normally shouldn't be changed via /etc/system. A
109  * particular page size for text or inititialized data will be used by default
110  * if both one of use_* variables is set to 1 AND this page size is not
111  * disabled in the corresponding disable_* bitmask variable.
112  */
113 
114 int disable_text_largepages = (1 << TTE4M) | (1 << TTE64K);
115 int disable_initdata_largepages = (1 << TTE64K);
116 
117 /*
118  * Minimum segment size tunables before 64K or 4M large pages
119  * should be used to map it.
120  */
121 size_t text_pgsz64k_minsize = MMU_PAGESIZE64K;
122 size_t text_pgsz4m_minsize = MMU_PAGESIZE4M;
123 size_t initdata_pgsz64k_minsize = MMU_PAGESIZE64K;
124 
125 size_t max_shm_lpsize = ULONG_MAX;
126 
127 /*
128  * map_addr_proc() is the routine called when the system is to
129  * choose an address for the user.  We will pick an address
130  * range which is just below the current stack limit.  The
131  * algorithm used for cache consistency on machines with virtual
132  * address caches is such that offset 0 in the vnode is always
133  * on a shm_alignment'ed aligned address.  Unfortunately, this
134  * means that vnodes which are demand paged will not be mapped
135  * cache consistently with the executable images.  When the
136  * cache alignment for a given object is inconsistent, the
137  * lower level code must manage the translations so that this
138  * is not seen here (at the cost of efficiency, of course).
139  *
140  * addrp is a value/result parameter.
141  *	On input it is a hint from the user to be used in a completely
142  *	machine dependent fashion.  For MAP_ALIGN, addrp contains the
143  *	minimal alignment.
144  *
145  *	On output it is NULL if no address can be found in the current
146  *	processes address space or else an address that is currently
147  *	not mapped for len bytes with a page of red zone on either side.
148  *	If vacalign is true, then the selected address will obey the alignment
149  *	constraints of a vac machine based on the given off value.
150  */
151 /*ARGSUSED4*/
152 void
153 map_addr_proc(caddr_t *addrp, size_t len, offset_t off, int vacalign,
154     caddr_t userlimit, struct proc *p, uint_t flags)
155 {
156 	struct as *as = p->p_as;
157 	caddr_t addr;
158 	caddr_t base;
159 	size_t slen;
160 	uintptr_t align_amount;
161 	int allow_largepage_alignment = 1;
162 
163 	base = p->p_brkbase;
164 	if (userlimit < as->a_userlimit) {
165 		/*
166 		 * This happens when a program wants to map something in
167 		 * a range that's accessible to a program in a smaller
168 		 * address space.  For example, a 64-bit program might
169 		 * be calling mmap32(2) to guarantee that the returned
170 		 * address is below 4Gbytes.
171 		 */
172 		ASSERT(userlimit > base);
173 		slen = userlimit - base;
174 	} else {
175 		slen = p->p_usrstack - base - (((size_t)rctl_enforced_value(
176 		    rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p) + PAGEOFFSET)
177 		    & PAGEMASK);
178 	}
179 	len = (len + PAGEOFFSET) & PAGEMASK;
180 
181 	/*
182 	 * Redzone for each side of the request. This is done to leave
183 	 * one page unmapped between segments. This is not required, but
184 	 * it's useful for the user because if their program strays across
185 	 * a segment boundary, it will catch a fault immediately making
186 	 * debugging a little easier.
187 	 */
188 	len += (2 * PAGESIZE);
189 
190 	/*
191 	 *  If the request is larger than the size of a particular
192 	 *  mmu level, then we use that level to map the request.
193 	 *  But this requires that both the virtual and the physical
194 	 *  addresses be aligned with respect to that level, so we
195 	 *  do the virtual bit of nastiness here.
196 	 *
197 	 *  For 32-bit processes, only those which have specified
198 	 *  MAP_ALIGN or an addr will be aligned on a page size > 4MB. Otherwise
199 	 *  we can potentially waste up to 256MB of the 4G process address
200 	 *  space just for alignment.
201 	 */
202 	if (p->p_model == DATAMODEL_ILP32 && ((flags & MAP_ALIGN) == 0 ||
203 	    ((uintptr_t)*addrp) != 0)) {
204 		allow_largepage_alignment = 0;
205 	}
206 	if ((mmu_page_sizes == max_mmu_page_sizes) &&
207 	    allow_largepage_alignment &&
208 		(len >= MMU_PAGESIZE256M)) {	/* 256MB mappings */
209 		align_amount = MMU_PAGESIZE256M;
210 	} else if ((mmu_page_sizes == max_mmu_page_sizes) &&
211 	    allow_largepage_alignment &&
212 		(len >= MMU_PAGESIZE32M)) {	/* 32MB mappings */
213 		align_amount = MMU_PAGESIZE32M;
214 	} else if (len >= MMU_PAGESIZE4M) {  /* 4MB mappings */
215 		align_amount = MMU_PAGESIZE4M;
216 	} else if (len >= MMU_PAGESIZE512K) { /* 512KB mappings */
217 		align_amount = MMU_PAGESIZE512K;
218 	} else if (len >= MMU_PAGESIZE64K) { /* 64KB mappings */
219 		align_amount = MMU_PAGESIZE64K;
220 	} else  {
221 		/*
222 		 * Align virtual addresses on a 64K boundary to ensure
223 		 * that ELF shared libraries are mapped with the appropriate
224 		 * alignment constraints by the run-time linker.
225 		 */
226 		align_amount = ELF_SPARC_MAXPGSZ;
227 		if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp != 0) &&
228 			((uintptr_t)*addrp < align_amount))
229 			align_amount = (uintptr_t)*addrp;
230 	}
231 
232 	/*
233 	 * 64-bit processes require 1024K alignment of ELF shared libraries.
234 	 */
235 	if (p->p_model == DATAMODEL_LP64)
236 		align_amount = MAX(align_amount, ELF_SPARCV9_MAXPGSZ);
237 #ifdef VAC
238 	if (vac && vacalign && (align_amount < shm_alignment))
239 		align_amount = shm_alignment;
240 #endif
241 
242 	if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp > align_amount)) {
243 		align_amount = (uintptr_t)*addrp;
244 	}
245 	len += align_amount;
246 
247 	/*
248 	 * Look for a large enough hole starting below the stack limit.
249 	 * After finding it, use the upper part.  Addition of PAGESIZE is
250 	 * for the redzone as described above.
251 	 */
252 	as_purge(as);
253 	if (as_gap(as, len, &base, &slen, AH_HI, NULL) == 0) {
254 		caddr_t as_addr;
255 
256 		addr = base + slen - len + PAGESIZE;
257 		as_addr = addr;
258 		/*
259 		 * Round address DOWN to the alignment amount,
260 		 * add the offset, and if this address is less
261 		 * than the original address, add alignment amount.
262 		 */
263 		addr = (caddr_t)((uintptr_t)addr & (~(align_amount - 1l)));
264 		addr += (long)(off & (align_amount - 1l));
265 		if (addr < as_addr) {
266 			addr += align_amount;
267 		}
268 
269 		ASSERT(addr <= (as_addr + align_amount));
270 		ASSERT(((uintptr_t)addr & (align_amount - 1l)) ==
271 		    ((uintptr_t)(off & (align_amount - 1l))));
272 		*addrp = addr;
273 
274 #if defined(SF_ERRATA_57)
275 		if (AS_TYPE_64BIT(as) && addr < errata57_limit) {
276 			*addrp = NULL;
277 		}
278 #endif
279 	} else {
280 		*addrp = NULL;	/* no more virtual space */
281 	}
282 }
283 
284 /*
285  * Platforms with smaller or larger TLBs may wish to change this.  Most
286  * sun4u platforms can hold 1024 8K entries by default and most processes
287  * are observed to be < 6MB on these machines, so we decide to move up
288  * here to give ourselves some wiggle room for other, smaller segments.
289  */
290 int auto_lpg_tlb_threshold = 768;
291 int auto_lpg_minszc = TTE4M;
292 int auto_lpg_maxszc = TTE4M;
293 size_t auto_lpg_heap_default = MMU_PAGESIZE;
294 size_t auto_lpg_stack_default = MMU_PAGESIZE;
295 size_t auto_lpg_va_default = MMU_PAGESIZE;
296 size_t auto_lpg_remap_threshold = 0;
297 /*
298  * Number of pages in 1 GB.  Don't enable automatic large pages if we have
299  * fewer than this many pages.
300  */
301 pgcnt_t auto_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT);
302 
303 /*
304  * Suggest a page size to be used to map a segment of type maptype and length
305  * len.  Returns a page size (not a size code).
306  * If remap is non-NULL, fill in a value suggesting whether or not to remap
307  * this segment.
308  */
309 size_t
310 map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap)
311 {
312 	uint_t	n;
313 	size_t	pgsz = 0;
314 
315 	if (remap)
316 		*remap = (len > auto_lpg_remap_threshold);
317 
318 	switch (maptype) {
319 	case MAPPGSZ_ISM:
320 		n = hat_preferred_pgsz(p->p_as->a_hat, addr, len, maptype);
321 		pgsz = hw_page_array[n].hp_size;
322 
323 		/*
324 		 * For non-Panther systems, the following code sets the [D]ISM
325 		 * pagesize to 4M if either of the DTLBs happens to be
326 		 * programmed to a different large pagesize.
327 		 * The Panther code might hit this case as well,
328 		 * if and only if the addr is not aligned to >= 4M.
329 		 */
330 		if ((pgsz > 0) && (pgsz < MMU_PAGESIZE4M))
331 			pgsz = MMU_PAGESIZE4M;
332 		break;
333 
334 	case MAPPGSZ_VA:
335 		n = hat_preferred_pgsz(p->p_as->a_hat, addr, len, maptype);
336 		pgsz = hw_page_array[n].hp_size;
337 		if ((pgsz <= MMU_PAGESIZE) ||
338 		    !IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz))
339 			pgsz = map_pgszva(p, addr, len);
340 		break;
341 
342 	case MAPPGSZ_STK:
343 		pgsz = map_pgszstk(p, addr, len);
344 		break;
345 
346 	case MAPPGSZ_HEAP:
347 		pgsz = map_pgszheap(p, addr, len);
348 		break;
349 	}
350 	return (pgsz);
351 }
352 
353 /*
354  * Platform-dependent page scrub call.
355  */
356 void
357 pagescrub(page_t *pp, uint_t off, uint_t len)
358 {
359 	/*
360 	 * For now, we rely on the fact that pagezero() will
361 	 * always clear UEs.
362 	 */
363 	pagezero(pp, off, len);
364 }
365 
366 /*ARGSUSED*/
367 void
368 sync_data_memory(caddr_t va, size_t len)
369 {
370 	cpu_flush_ecache();
371 }
372 
373 /*
374  * platform specific large pages for kernel heap support
375  */
376 void
377 mmu_init_kcontext()
378 {
379 	extern void set_kcontextreg();
380 
381 	if (kcontextreg)
382 		set_kcontextreg();
383 }
384 
385 void
386 contig_mem_init(void)
387 {
388 	/* not applicable to sun4u */
389 }
390