xref: /titanic_51/usr/src/uts/i86pc/vm/vm_dep.h (revision 99ebb4ca412cb0a19d77a3899a87c055b9c30fa8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * UNIX machine dependent virtual memory support.
28  */
29 
30 #ifndef	_VM_DEP_H
31 #define	_VM_DEP_H
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 #ifdef	__cplusplus
36 extern "C" {
37 #endif
38 
39 #include <sys/clock.h>
40 #include <vm/hat_pte.h>
41 
42 /*
43  * WARNING: vm_dep.h is included by files in common. As such, macros
44  * dependent upon PTE36 such as LARGEPAGESIZE cannot be used in this file.
45  */
46 
47 #define	GETTICK()	tsc_read()
48 
49 /* memranges in descending order */
50 extern pfn_t		*memranges;
51 
52 #define	MEMRANGEHI(mtype)						\
53 	((mtype > 0) ? memranges[mtype - 1] - 1: physmax)
54 #define	MEMRANGELO(mtype)	(memranges[mtype])
55 
56 #define	MTYPE_FREEMEM(mt)						\
57 	(mnoderanges[mt].mnr_mt_clpgcnt +				\
58 	    mnoderanges[mt].mnr_mt_flpgcnt +				\
59 	    mnoderanges[mt].mnr_mt_lgpgcnt)
60 
61 /*
62  * combined memory ranges from mnode and memranges[] to manage single
63  * mnode/mtype dimension in the page lists.
64  */
65 typedef struct {
66 	pfn_t	mnr_pfnlo;
67 	pfn_t	mnr_pfnhi;
68 	int	mnr_mnode;
69 	int	mnr_memrange;		/* index into memranges[] */
70 	/* maintain page list stats */
71 	pgcnt_t	mnr_mt_pgmax;		/* mnode/mtype max page cnt */
72 	pgcnt_t	mnr_mt_clpgcnt;		/* cache list cnt */
73 	pgcnt_t	mnr_mt_flpgcnt;		/* free list cnt - small pages */
74 	pgcnt_t	mnr_mt_lgpgcnt;		/* free list cnt - large pages */
75 #ifdef DEBUG
76 	struct mnr_mts {		/* mnode/mtype szc stats */
77 		pgcnt_t	mnr_mts_pgcnt;
78 		int	mnr_mts_colors;
79 		pgcnt_t *mnr_mtsc_pgcnt;
80 	} 	*mnr_mts;
81 #endif
82 } mnoderange_t;
83 
84 #ifdef DEBUG
85 #define	PLCNT_SZ(ctrs_sz) {						\
86 	int	szc, colors;						\
87 	ctrs_sz += mnoderangecnt * sizeof (struct mnr_mts) *		\
88 	    mmu_page_sizes;						\
89 	for (szc = 0; szc < mmu_page_sizes; szc++) {			\
90 		colors = page_get_pagecolors(szc);			\
91 		ctrs_sz += mnoderangecnt * sizeof (pgcnt_t) * colors;	\
92 	}								\
93 }
94 
95 #define	PLCNT_INIT(addr) {						\
96 	int	mt, szc, colors;					\
97 	for (mt = 0; mt < mnoderangecnt; mt++) {			\
98 		mnoderanges[mt].mnr_mts = (struct mnr_mts *)addr;	\
99 		addr += (sizeof (struct mnr_mts) * mmu_page_sizes);	\
100 		for (szc = 0; szc < mmu_page_sizes; szc++) {		\
101 			colors = page_get_pagecolors(szc);		\
102 			mnoderanges[mt].mnr_mts[szc].mnr_mts_colors =	\
103 			    colors;					\
104 			mnoderanges[mt].mnr_mts[szc].mnr_mtsc_pgcnt =	\
105 			    (pgcnt_t *)addr;				\
106 			addr += (sizeof (pgcnt_t) * colors);		\
107 		}							\
108 	}								\
109 }
110 #define	PLCNT_DO(pp, mtype, szc, cnt, flags) {				\
111 	int	bin = PP_2_BIN(pp);					\
112 	if (flags & PG_CACHE_LIST)					\
113 		atomic_add_long(&mnoderanges[mtype].			\
114 		    mnr_mt_clpgcnt, cnt);				\
115 	else if (szc)							\
116 		atomic_add_long(&mnoderanges[mtype].			\
117 		    mnr_mt_lgpgcnt, cnt);				\
118 	else								\
119 		atomic_add_long(&mnoderanges[mtype].			\
120 		    mnr_mt_flpgcnt, cnt);				\
121 	atomic_add_long(&mnoderanges[mtype].mnr_mts[szc].		\
122 	    mnr_mts_pgcnt, cnt);					\
123 	atomic_add_long(&mnoderanges[mtype].mnr_mts[szc].		\
124 	    mnr_mtsc_pgcnt[bin], cnt);					\
125 }
126 #else
127 #define	PLCNT_SZ(ctrs_sz)
128 #define	PLCNT_INIT(base)
129 #define	PLCNT_DO(pp, mtype, szc, cnt, flags) {				\
130 	if (flags & PG_CACHE_LIST)					\
131 		atomic_add_long(&mnoderanges[mtype].			\
132 		    mnr_mt_clpgcnt, cnt);				\
133 	else if (szc)							\
134 		atomic_add_long(&mnoderanges[mtype].			\
135 		    mnr_mt_lgpgcnt, cnt);				\
136 	else								\
137 		atomic_add_long(&mnoderanges[mtype].			\
138 		    mnr_mt_flpgcnt, cnt);				\
139 }
140 #endif
141 
142 #define	PLCNT_INCR(pp, mnode, mtype, szc, flags) {			\
143 	long	cnt = (1 << PAGE_BSZS_SHIFT(szc));			\
144 	ASSERT(mtype == PP_2_MTYPE(pp));				\
145 	if (physmax4g && mtype <= mtype4g)				\
146 		atomic_add_long(&freemem4g, cnt);			\
147 	PLCNT_DO(pp, mtype, szc, cnt, flags);				\
148 }
149 
150 #define	PLCNT_DECR(pp, mnode, mtype, szc, flags) {			\
151 	long	cnt = ((-1) << PAGE_BSZS_SHIFT(szc));			\
152 	ASSERT(mtype == PP_2_MTYPE(pp));				\
153 	if (physmax4g && mtype <= mtype4g)				\
154 		atomic_add_long(&freemem4g, cnt);			\
155 	PLCNT_DO(pp, mtype, szc, cnt, flags);				\
156 }
157 
158 /*
159  * macros to update page list max counts.  no-op on x86.
160  */
161 #define	PLCNT_XFER_NORELOC(pp)
162 
163 #define	PLCNT_MODIFY_MAX(pfn, cnt)	mtype_modify_max(pfn, (pgcnt_t)cnt)
164 
165 extern mnoderange_t	*mnoderanges;
166 extern int		mnoderangecnt;
167 extern int		mtype4g;
168 
169 /*
170  * 4g memory management variables for systems with more than 4g of memory:
171  *
172  * physical memory below 4g is required for 32bit dma devices and, currently,
173  * for kmem memory. On systems with more than 4g of memory, the pool of memory
174  * below 4g can be depleted without any paging activity given that there is
175  * likely to be sufficient memory above 4g.
176  *
177  * physmax4g is set true if the largest pfn is over 4g. The rest of the
178  * 4g memory management code is enabled only when physmax4g is true.
179  *
180  * maxmem4g is the count of the maximum number of pages on the page lists
181  * with physical addresses below 4g. It can be a lot less then 4g given that
182  * BIOS may reserve large chunks of space below 4g for hot plug pci devices,
183  * agp aperture etc.
184  *
185  * freemem4g maintains the count of the number of available pages on the
186  * page lists with physical addresses below 4g.
187  *
188  * DESFREE4G specifies the desired amount of below 4g memory. It defaults to
189  * 6% (desfree4gshift = 4) of maxmem4g.
190  *
191  * RESTRICT4G_ALLOC returns true if freemem4g falls below DESFREE4G
192  * and the amount of physical memory above 4g is greater than freemem4g.
193  * In this case, page_get_* routines will restrict below 4g allocations
194  * for requests that don't specifically require it.
195  */
196 
197 extern int		physmax4g;
198 extern pgcnt_t		maxmem4g;
199 extern pgcnt_t		freemem4g;
200 extern int		lotsfree4gshift;
201 extern int		desfree4gshift;
202 #define	LOTSFREE4G	(maxmem4g >> lotsfree4gshift)
203 #define	DESFREE4G	(maxmem4g >> desfree4gshift)
204 
205 #define	RESTRICT4G_ALLOC					\
206 	(physmax4g && (freemem4g < DESFREE4G) && ((freemem4g << 1) < freemem))
207 
208 /*
209  * 16m memory management:
210  *
211  * reserve some amount of physical memory below 16m for legacy devices.
212  *
213  * RESTRICT16M_ALLOC returns true if an there are sufficient free pages above
214  * 16m or if the 16m pool drops below DESFREE16M.
215  *
216  * In this case, general page allocations via page_get_{free,cache}list
217  * routines will be restricted from allocating from the 16m pool. Allocations
218  * that require specific pfn ranges (page_get_anylist) and PG_PANIC allocations
219  * are not restricted.
220  */
221 
222 #define	FREEMEM16M	MTYPE_FREEMEM(0)
223 #define	DESFREE16M	desfree16m
224 #define	RESTRICT16M_ALLOC(freemem, pgcnt, flags)		\
225 	((freemem != 0) && ((flags & PG_PANIC) == 0) &&		\
226 	    ((freemem >= (FREEMEM16M)) ||			\
227 	    (FREEMEM16M  < (DESFREE16M + pgcnt))))
228 extern pgcnt_t		desfree16m;
229 
230 extern int		restricted_kmemalloc;
231 extern int		memrange_num(pfn_t);
232 extern int		pfn_2_mtype(pfn_t);
233 extern int		mtype_func(int, int, uint_t);
234 extern void		mtype_modify_max(pfn_t, long);
235 extern int		mnode_pgcnt(int);
236 extern int		mnode_range_cnt(int);
237 
238 #define	NUM_MEM_RANGES	4		/* memory range types */
239 
240 /*
241  * candidate counters in vm_pagelist.c are indexed by color and range
242  */
243 #define	MAX_MNODE_MRANGES	NUM_MEM_RANGES
244 #define	MNODE_RANGE_CNT(mnode)	mnode_range_cnt(mnode)
245 #define	MNODE_MAX_MRANGE(mnode)	(memrange_num(mem_node_config[mnode].physbase))
246 #define	MTYPE_2_MRANGE(mnode, mtype)	\
247 	(mnode_maxmrange[mnode] - mnoderanges[mtype].mnr_memrange)
248 
249 /*
250  * Per page size free lists. Allocated dynamically.
251  * dimensions [mtype][mmu_page_sizes][colors]
252  *
253  * mtype specifies a physical memory range with a unique mnode.
254  */
255 
256 extern page_t ****page_freelists;
257 
258 #define	PAGE_FREELISTS(mnode, szc, color, mtype)		\
259 	(*(page_freelists[mtype][szc] + (color)))
260 
261 /*
262  * For now there is only a single size cache list. Allocated dynamically.
263  * dimensions [mtype][colors]
264  *
265  * mtype specifies a physical memory range with a unique mnode.
266  */
267 extern page_t ***page_cachelists;
268 
269 #define	PAGE_CACHELISTS(mnode, color, mtype) 		\
270 	(*(page_cachelists[mtype] + (color)))
271 
272 /*
273  * There are mutexes for both the page freelist
274  * and the page cachelist.  We want enough locks to make contention
275  * reasonable, but not too many -- otherwise page_freelist_lock() gets
276  * so expensive that it becomes the bottleneck!
277  */
278 
279 #define	NPC_MUTEX	16
280 
281 extern kmutex_t	*fpc_mutex[NPC_MUTEX];
282 extern kmutex_t	*cpc_mutex[NPC_MUTEX];
283 
284 extern page_t *page_get_mnode_freelist(int, uint_t, int, uchar_t, uint_t);
285 extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int);
286 
287 #define	PAGE_GET_COLOR_SHIFT(szc, nszc)                          \
288 	    (hw_page_array[(nszc)].hp_shift - hw_page_array[(szc)].hp_shift)
289 
290 #define	PFN_2_COLOR(pfn, szc)						\
291 	(((pfn) & page_colors_mask) >>			                \
292 	(hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift))
293 
294 #define	PNUM_SIZE(szc)							\
295 	(hw_page_array[(szc)].hp_pgcnt)
296 #define	PNUM_SHIFT(szc)							\
297 	(hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift)
298 #define	PAGE_GET_SHIFT(szc)						\
299 	(hw_page_array[(szc)].hp_shift)
300 #define	PAGE_GET_PAGECOLORS(szc)					\
301 	(hw_page_array[(szc)].hp_colors)
302 
303 /*
304  * This macro calculates the next sequential pfn with the specified
305  * color using color equivalency mask
306  */
307 #define	PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask)        \
308 	ASSERT(((color) & ~(ceq_mask)) == 0);                                 \
309 	{								      \
310 		uint_t	pfn_shift = PAGE_BSZS_SHIFT(szc);                     \
311 		pfn_t	spfn = pfn >> pfn_shift;                              \
312 		pfn_t	stride = (ceq_mask) + 1;                              \
313 		ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0);                 \
314 		if (((spfn ^ (color)) & (ceq_mask)) == 0) {                   \
315 			pfn += stride << pfn_shift;                           \
316 		} else {                                                      \
317 			pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color);          \
318 			pfn = (pfn > spfn ? pfn : pfn + stride) << pfn_shift; \
319 		}                                                             \
320 	}
321 
322 /* get the color equivalency mask for the next szc */
323 #define	PAGE_GET_NSZ_MASK(szc, mask)                                         \
324 	((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc)))
325 
326 /* get the color of the next szc */
327 #define	PAGE_GET_NSZ_COLOR(szc, color)                                       \
328 	((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc)))
329 
330 /* Find the bin for the given page if it was of size szc */
331 #define	PP_2_BIN_SZC(pp, szc)	(PFN_2_COLOR(pp->p_pagenum, szc))
332 
333 #define	PP_2_BIN(pp)		(PP_2_BIN_SZC(pp, pp->p_szc))
334 
335 #define	PP_2_MEM_NODE(pp)	(PFN_2_MEM_NODE(pp->p_pagenum))
336 #define	PP_2_MTYPE(pp)		(pfn_2_mtype(pp->p_pagenum))
337 #define	PP_2_SZC(pp)		(pp->p_szc)
338 
339 #define	SZCPAGES(szc)		(1 << PAGE_BSZS_SHIFT(szc))
340 #define	PFN_BASE(pfnum, szc)	(pfnum & ~(SZCPAGES(szc) - 1))
341 
342 /*
343  * this structure is used for walking free page lists
344  * controls when to split large pages into smaller pages,
345  * and when to coalesce smaller pages into larger pages
346  */
347 typedef struct page_list_walker {
348 	uint_t	plw_colors;		/* num of colors for szc */
349 	uint_t  plw_color_mask;		/* colors-1 */
350 	uint_t	plw_bin_step;		/* next bin: 1 or 2 */
351 	uint_t  plw_count;		/* loop count */
352 	uint_t	plw_bin0;		/* starting bin */
353 	uint_t  plw_bin_marker;		/* bin after initial jump */
354 	uint_t  plw_bin_split_prev;	/* last bin we tried to split */
355 	uint_t  plw_do_split;		/* set if OK to split */
356 	uint_t  plw_split_next;		/* next bin to split */
357 	uint_t	plw_ceq_dif;		/* number of different color groups */
358 					/* to check */
359 	uint_t	plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */
360 	uint_t	plw_bins[MMU_PAGE_SIZES + 1];	/* num of bins */
361 } page_list_walker_t;
362 
363 void	page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin,
364     int can_split, int use_ceq, page_list_walker_t *plw);
365 
366 uint_t	page_list_walk_next_bin(uchar_t szc, uint_t bin,
367     page_list_walker_t *plw);
368 
369 extern struct cpu	cpus[];
370 #define	CPU0		cpus
371 
372 #if defined(__amd64)
373 
374 /*
375  * set the mtype range (called from page_get_{free,cache}list)
376  *   - set range to above 4g if the system has more than 4g of memory and the
377  *   amount of memory below 4g runs low. If not, set range to above 16m if
378  *   16m threshold is reached otherwise set range to all of memory
379  *   starting from the hi pfns.
380  *
381  * page_get_anylist gets its mtype range from the specified ddi_dma_attr_t.
382  */
383 #define	MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) {			\
384 	mtype = mnoderangecnt - 1;					\
385 	if (RESTRICT4G_ALLOC) {						\
386 		VM_STAT_ADD(vmm_vmstats.restrict4gcnt);			\
387 		/* here only for > 4g systems */			\
388 		flags |= PGI_MT_RANGE4G;				\
389 	} else if (RESTRICT16M_ALLOC(freemem, btop(pgsz), flags)) {	\
390 		flags |= PGI_MT_RANGE16M;				\
391 	} else {							\
392 		VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt);		\
393 		VM_STAT_COND_ADD((flags & PG_PANIC),			\
394 		    vmm_vmstats.pgpanicalloc);				\
395 		flags |= PGI_MT_RANGE0;					\
396 	}								\
397 }
398 
399 #elif defined(__i386)
400 
401 /*
402  * set the mtype range
403  *   - kmem requests needs to be below 4g if restricted_kmemalloc is set.
404  *   - for non kmem requests, set range to above 4g if the amount of memory
405  *   below 4g runs low.
406  */
407 
408 #define	MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) {			\
409 	if (restricted_kmemalloc && (vp) == &kvp &&			\
410 	    (caddr_t)(vaddr) >= kernelheap &&				\
411 	    (caddr_t)(vaddr) < ekernelheap) {				\
412 		ASSERT(physmax4g);					\
413 		mtype = mtype4g;					\
414 		if (RESTRICT16M_ALLOC(freemem4g - btop(pgsz),		\
415 		    btop(pgsz), flags)) {				\
416 			flags |= PGI_MT_RANGE16M;			\
417 		} else {						\
418 			VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt);	\
419 			VM_STAT_COND_ADD((flags & PG_PANIC),		\
420 			    vmm_vmstats.pgpanicalloc);			\
421 			flags |= PGI_MT_RANGE0;				\
422 		}							\
423 	} else {							\
424 		mtype = mnoderangecnt - 1;				\
425 		if (RESTRICT4G_ALLOC) {					\
426 			VM_STAT_ADD(vmm_vmstats.restrict4gcnt);		\
427 			/* here only for > 4g systems */		\
428 			flags |= PGI_MT_RANGE4G;			\
429 		} else if (RESTRICT16M_ALLOC(freemem, btop(pgsz),	\
430 		    flags)) {						\
431 			flags |= PGI_MT_RANGE16M;			\
432 		} else {						\
433 			VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt);	\
434 			VM_STAT_COND_ADD((flags & PG_PANIC),		\
435 			    vmm_vmstats.pgpanicalloc);			\
436 			flags |= PGI_MT_RANGE0;				\
437 		}							\
438 	}								\
439 }
440 
441 #endif	/* __i386 */
442 
443 /*
444  * macros to loop through the mtype range (page_get_mnode_{free,cache,any}list,
445  * and page_get_contig_pages)
446  *
447  * MTYPE_START sets the initial mtype. -1 if the mtype range specified does
448  * not contain mnode.
449  *
450  * MTYPE_NEXT sets the next mtype. -1 if there are no more valid
451  * mtype in the range.
452  */
453 
454 #define	MTYPE_START(mnode, mtype, flags)				\
455 	(mtype = mtype_func(mnode, mtype, flags))
456 
457 #define	MTYPE_NEXT(mnode, mtype, flags) {				\
458 	if (flags & PGI_MT_RANGE) {					\
459 		mtype = mtype_func(mnode, mtype, flags | PGI_MT_NEXT);	\
460 	} else {							\
461 		mtype = -1;						\
462 	}								\
463 }
464 
465 /* mtype init for page_get_replacement_page */
466 
467 #define	MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt) {		\
468 	mtype = mnoderangecnt - 1;					\
469 	if (RESTRICT16M_ALLOC(freemem, pgcnt, flags)) {			\
470 		flags |= PGI_MT_RANGE16M;				\
471 	} else {							\
472 		VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt);		\
473 		flags |= PGI_MT_RANGE0;					\
474 	}								\
475 }
476 
477 #define	MNODE_PGCNT(mnode)		mnode_pgcnt(mnode)
478 
479 #define	MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi)			\
480 	ASSERT(mnoderanges[mtype].mnr_mnode == mnode);			\
481 	pfnlo = mnoderanges[mtype].mnr_pfnlo;				\
482 	pfnhi = mnoderanges[mtype].mnr_pfnhi;
483 
484 #define	PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ?	\
485 	&fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] :			\
486 	&cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode])
487 
488 #define	FPC_MUTEX(mnode, i)	(&fpc_mutex[i][mnode])
489 #define	CPC_MUTEX(mnode, i)	(&cpc_mutex[i][mnode])
490 
491 #ifdef DEBUG
492 #define	CHK_LPG(pp, szc)	chk_lpg(pp, szc)
493 extern void	chk_lpg(page_t *, uchar_t);
494 #else
495 #define	CHK_LPG(pp, szc)
496 #endif
497 
498 #define	FULL_REGION_CNT(rg_szc)	\
499 	(LEVEL_SIZE(rg_szc) >> LEVEL_SHIFT(rg_szc - 1))
500 
501 /* Return the leader for this mapping size */
502 #define	PP_GROUPLEADER(pp, szc) \
503 	(&(pp)[-(int)((pp)->p_pagenum & (SZCPAGES(szc)-1))])
504 
505 /* Return the root page for this page based on p_szc */
506 #define	PP_PAGEROOT(pp) ((pp)->p_szc == 0 ? (pp) : \
507 	PP_GROUPLEADER((pp), (pp)->p_szc))
508 
509 /*
510  * The counter base must be per page_counter element to prevent
511  * races when re-indexing, and the base page size element should
512  * be aligned on a boundary of the given region size.
513  *
514  * We also round up the number of pages spanned by the counters
515  * for a given region to PC_BASE_ALIGN in certain situations to simplify
516  * the coding for some non-performance critical routines.
517  */
518 
519 #define	PC_BASE_ALIGN		((pfn_t)1 << PAGE_BSZS_SHIFT(MMU_PAGE_SIZES-1))
520 #define	PC_BASE_ALIGN_MASK	(PC_BASE_ALIGN - 1)
521 
522 /*
523  * cpu/mmu-dependent vm variables
524  */
525 extern uint_t mmu_page_sizes;
526 extern uint_t mmu_exported_page_sizes;
527 
528 /* For x86, userszc is the same as the kernel's szc */
529 #define	USERSZC_2_SZC(userszc)	(userszc)
530 #define	SZC_2_USERSZC(szc)	(szc)
531 
532 /*
533  * for hw_page_map_t, sized to hold the ratio of large page to base
534  * pagesize (1024 max)
535  */
536 typedef	short	hpmctr_t;
537 
538 /*
539  * get the setsize of the current cpu - assume homogenous for x86
540  */
541 extern int	l2cache_sz, l2cache_linesz, l2cache_assoc;
542 
543 #define	L2CACHE_ALIGN		l2cache_linesz
544 #define	L2CACHE_ALIGN_MAX	64
545 #define	CPUSETSIZE()		\
546 	(l2cache_assoc ? (l2cache_sz / l2cache_assoc) : MMU_PAGESIZE)
547 
548 /*
549  * Return the log2(pagesize(szc) / MMU_PAGESIZE) --- or the shift count
550  * for the number of base pages in this pagesize
551  */
552 #define	PAGE_BSZS_SHIFT(szc) (LEVEL_SHIFT(szc) - MMU_PAGESHIFT)
553 
554 /*
555  * Internal PG_ flags.
556  */
557 #define	PGI_RELOCONLY	0x010000	/* opposite of PG_NORELOC */
558 #define	PGI_NOCAGE	0x020000	/* cage is disabled */
559 #define	PGI_PGCPHIPRI	0x040000	/* page_get_contig_page pri alloc */
560 #define	PGI_PGCPSZC0	0x080000	/* relocate base pagesize page */
561 
562 /*
563  * PGI range flags - should not overlap PGI flags
564  */
565 #define	PGI_MT_RANGE0	0x1000000	/* mtype range to 0 */
566 #define	PGI_MT_RANGE16M	0x2000000	/* mtype range to 16m */
567 #define	PGI_MT_RANGE4G	0x4000000	/* mtype range to 4g */
568 #define	PGI_MT_NEXT	0x8000000	/* get next mtype */
569 #define	PGI_MT_RANGE	(PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G)
570 
571 /*
572  * Maximum and default values for user heap, stack, private and shared
573  * anonymous memory, and user text and initialized data.
574  * Used by map_pgsz*() routines.
575  */
576 extern size_t max_uheap_lpsize;
577 extern size_t default_uheap_lpsize;
578 extern size_t max_ustack_lpsize;
579 extern size_t default_ustack_lpsize;
580 extern size_t max_privmap_lpsize;
581 extern size_t max_uidata_lpsize;
582 extern size_t max_utext_lpsize;
583 extern size_t max_shm_lpsize;
584 extern size_t mcntl0_lpsize;
585 
586 /*
587  * Sanity control. Don't use large pages regardless of user
588  * settings if there's less than priv or shm_lpg_min_physmem memory installed.
589  * The units for this variable are 8K pages.
590  */
591 extern pgcnt_t privm_lpg_min_physmem;
592 extern pgcnt_t shm_lpg_min_physmem;
593 
594 /*
595  * hash as and addr to get a bin.
596  */
597 
598 #define	AS_2_BIN(as, seg, vp, addr, bin, szc)				    \
599 	bin = (((((uintptr_t)(addr) >> PAGESHIFT) + ((uintptr_t)(as) >> 4)) \
600 	    & page_colors_mask) >>					    \
601 	    (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift))
602 
603 /*
604  * cpu private vm data - accessed thru CPU->cpu_vm_data
605  *	vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock()
606  *	vc_pnext_memseg: tracks last memseg visited in page_nextn()
607  *	vc_kmptr: orignal unaligned kmem pointer for this vm_cpu_data_t
608  *	vc_kmsize: orignal kmem size for this vm_cpu_data_t
609  */
610 
611 typedef struct {
612 	struct memseg	*vc_pnum_memseg;
613 	struct memseg	*vc_pnext_memseg;
614 	void		*vc_kmptr;
615 	size_t		vc_kmsize;
616 } vm_cpu_data_t;
617 
618 /* allocation size to ensure vm_cpu_data_t resides in its own cache line */
619 #define	VM_CPU_DATA_PADSIZE						\
620 	(P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX))
621 
622 /* for boot cpu before kmem is initialized */
623 extern char	vm_cpu_data0[];
624 
625 /*
626  * When a bin is empty, and we can't satisfy a color request correctly,
627  * we scan.  If we assume that the programs have reasonable spatial
628  * behavior, then it will not be a good idea to use the adjacent color.
629  * Using the adjacent color would result in virtually adjacent addresses
630  * mapping into the same spot in the cache.  So, if we stumble across
631  * an empty bin, skip a bunch before looking.  After the first skip,
632  * then just look one bin at a time so we don't miss our cache on
633  * every look. Be sure to check every bin.  Page_create() will panic
634  * if we miss a page.
635  *
636  * This also explains the `<=' in the for loops in both page_get_freelist()
637  * and page_get_cachelist().  Since we checked the target bin, skipped
638  * a bunch, then continued one a time, we wind up checking the target bin
639  * twice to make sure we get all of them bins.
640  */
641 #define	BIN_STEP	19
642 
643 #ifdef VM_STATS
644 struct vmm_vmstats_str {
645 	ulong_t pgf_alloc[MMU_PAGE_SIZES];	/* page_get_freelist */
646 	ulong_t pgf_allocok[MMU_PAGE_SIZES];
647 	ulong_t pgf_allocokrem[MMU_PAGE_SIZES];
648 	ulong_t pgf_allocfailed[MMU_PAGE_SIZES];
649 	ulong_t	pgf_allocdeferred;
650 	ulong_t	pgf_allocretry[MMU_PAGE_SIZES];
651 	ulong_t pgc_alloc;			/* page_get_cachelist */
652 	ulong_t pgc_allocok;
653 	ulong_t pgc_allocokrem;
654 	ulong_t pgc_allocokdeferred;
655 	ulong_t pgc_allocfailed;
656 	ulong_t	pgcp_alloc[MMU_PAGE_SIZES];	/* page_get_contig_pages */
657 	ulong_t	pgcp_allocfailed[MMU_PAGE_SIZES];
658 	ulong_t	pgcp_allocempty[MMU_PAGE_SIZES];
659 	ulong_t	pgcp_allocok[MMU_PAGE_SIZES];
660 	ulong_t	ptcp[MMU_PAGE_SIZES];		/* page_trylock_contig_pages */
661 	ulong_t	ptcpfreethresh[MMU_PAGE_SIZES];
662 	ulong_t	ptcpfailexcl[MMU_PAGE_SIZES];
663 	ulong_t	ptcpfailszc[MMU_PAGE_SIZES];
664 	ulong_t	ptcpfailcage[MMU_PAGE_SIZES];
665 	ulong_t	ptcpok[MMU_PAGE_SIZES];
666 	ulong_t	pgmf_alloc[MMU_PAGE_SIZES];	/* page_get_mnode_freelist */
667 	ulong_t	pgmf_allocfailed[MMU_PAGE_SIZES];
668 	ulong_t	pgmf_allocempty[MMU_PAGE_SIZES];
669 	ulong_t	pgmf_allocok[MMU_PAGE_SIZES];
670 	ulong_t	pgmc_alloc;			/* page_get_mnode_cachelist */
671 	ulong_t	pgmc_allocfailed;
672 	ulong_t	pgmc_allocempty;
673 	ulong_t	pgmc_allocok;
674 	ulong_t	pladd_free[MMU_PAGE_SIZES];	/* page_list_add/sub */
675 	ulong_t	plsub_free[MMU_PAGE_SIZES];
676 	ulong_t	pladd_cache;
677 	ulong_t	plsub_cache;
678 	ulong_t	plsubpages_szcbig;
679 	ulong_t	plsubpages_szc0;
680 	ulong_t	pfs_req[MMU_PAGE_SIZES];	/* page_freelist_split */
681 	ulong_t	pfs_demote[MMU_PAGE_SIZES];
682 	ulong_t	pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
683 	ulong_t	ppr_reloc[MMU_PAGE_SIZES];	/* page_relocate */
684 	ulong_t ppr_relocnoroot[MMU_PAGE_SIZES];
685 	ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES];
686 	ulong_t ppr_relocnolock[MMU_PAGE_SIZES];
687 	ulong_t ppr_relocnomem[MMU_PAGE_SIZES];
688 	ulong_t ppr_relocok[MMU_PAGE_SIZES];
689 	/* page coalesce counter */
690 	ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
691 	/* candidates useful */
692 	ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
693 	/* ctrs changed after locking */
694 	ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
695 	/* page_freelist_coalesce failed */
696 	ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
697 	ulong_t page_ctrs_coalesce_all;	/* page coalesce all counter */
698 	ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */
699 	ulong_t	restrict4gcnt;
700 	ulong_t	unrestrict16mcnt;	/* non-DMA 16m allocs allowed */
701 	ulong_t	pgpanicalloc;		/* PG_PANIC allocation */
702 };
703 extern struct vmm_vmstats_str vmm_vmstats;
704 #endif	/* VM_STATS */
705 
706 extern size_t page_ctrs_sz(void);
707 extern caddr_t page_ctrs_alloc(caddr_t);
708 extern void page_ctr_sub(int, int, page_t *, int);
709 extern page_t *page_freelist_split(uchar_t,
710     uint_t, int, int, pfn_t, page_list_walker_t *);
711 extern page_t *page_freelist_coalesce(int, uchar_t, uint_t, uint_t, int,
712     pfn_t);
713 extern uint_t page_get_pagecolors(uint_t);
714 
715 #ifdef	__cplusplus
716 }
717 #endif
718 
719 #endif	/* _VM_DEP_H */
720