xref: /illumos-gate/usr/src/uts/sun4/vm/vm_dep.h (revision eda50310abb3984bab11856a2aca8936d26881cb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * UNIX machine dependent virtual memory support.
28  */
29 
30 #ifndef	_VM_DEP_H
31 #define	_VM_DEP_H
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 #ifdef	__cplusplus
36 extern "C" {
37 #endif
38 
39 #include <vm/hat_sfmmu.h>
40 #include <sys/archsystm.h>
41 #include <sys/memnode.h>
42 
43 #define	GETTICK()	gettick()
44 
45 /*
46  * Per page size free lists. Allocated dynamically.
47  */
48 #define	MAX_MEM_TYPES	2	/* 0 = reloc, 1 = noreloc */
49 #define	MTYPE_RELOC	0
50 #define	MTYPE_NORELOC	1
51 
52 #define	PP_2_MTYPE(pp)	(PP_ISNORELOC(pp) ? MTYPE_NORELOC : MTYPE_RELOC)
53 
54 #define	MTYPE_INIT(mtype, vp, vaddr, flags, pgsz)			\
55 	mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC;
56 
57 /* mtype init for page_get_replacement_page */
58 #define	MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt)			\
59 	mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC;
60 
61 #define	MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi)			\
62 	ASSERT(mtype != MTYPE_NORELOC);					\
63 	pfnlo = mem_node_config[mnode].physbase;			\
64 	pfnhi = mem_node_config[mnode].physmax;
65 
66 /*
67  * candidate counters in vm_pagelist.c are indexed by color and range
68  */
69 #define	MAX_MNODE_MRANGES		MAX_MEM_TYPES
70 #define	MNODE_RANGE_CNT(mnode)		MAX_MNODE_MRANGES
71 #define	MNODE_MAX_MRANGE(mnode)		(MAX_MEM_TYPES - 1)
72 #define	MTYPE_2_MRANGE(mnode, mtype)	(mtype)
73 
74 /*
75  * Internal PG_ flags.
76  */
77 #define	PGI_RELOCONLY	0x10000	/* acts in the opposite sense to PG_NORELOC */
78 #define	PGI_NOCAGE	0x20000	/* indicates Cage is disabled */
79 #define	PGI_PGCPHIPRI	0x40000	/* page_get_contig_page priority allocation */
80 #define	PGI_PGCPSZC0	0x80000	/* relocate base pagesize page */
81 
82 /*
83  * PGI mtype flags - should not overlap PGI flags
84  */
85 #define	PGI_MT_RANGE	0x1000000	/* mtype range */
86 #define	PGI_MT_NEXT	0x2000000	/* get next mtype */
87 
88 extern page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES];
89 extern page_t ***page_cachelists[MAX_MEM_TYPES];
90 
91 #define	PAGE_FREELISTS(mnode, szc, color, mtype) \
92 	(*(page_freelists[szc][mtype][mnode] + (color)))
93 
94 #define	PAGE_CACHELISTS(mnode, color, mtype) \
95 	(*(page_cachelists[mtype][mnode] + (color)))
96 
97 /*
98  * There are 'page_colors' colors/bins.  Spread them out under a
99  * couple of locks.  There are mutexes for both the page freelist
100  * and the page cachelist.  We want enough locks to make contention
101  * reasonable, but not too many -- otherwise page_freelist_lock() gets
102  * so expensive that it becomes the bottleneck!
103  */
104 #define	NPC_MUTEX	16
105 
106 extern kmutex_t	*fpc_mutex[NPC_MUTEX];
107 extern kmutex_t	*cpc_mutex[NPC_MUTEX];
108 
109 /*
110  * cpu specific color conversion functions
111  */
112 extern uint_t page_get_nsz_color_mask_cpu(uchar_t, uint_t);
113 #pragma weak page_get_nsz_color_mask_cpu
114 
115 extern uint_t page_get_nsz_color_cpu(uchar_t, uint_t);
116 #pragma weak page_get_nsz_color_cpu
117 
118 extern uint_t page_get_color_shift_cpu(uchar_t, uchar_t);
119 #pragma weak page_get_color_shift_cpu
120 
121 extern pfn_t page_next_pfn_for_color_cpu(pfn_t,
122     uchar_t, uint_t, uint_t, uint_t);
123 #pragma weak page_next_pfn_for_color_cpu
124 
125 extern uint_t  page_pfn_2_color_cpu(pfn_t, uchar_t);
126 #pragma weak page_pfn_2_color_cpu
127 
128 #define	PAGE_GET_COLOR_SHIFT(szc, nszc)				\
129 	((&page_get_color_shift_cpu != NULL) ?			\
130 	    page_get_color_shift_cpu(szc, nszc) :		\
131 	    (hw_page_array[(nszc)].hp_shift -			\
132 		hw_page_array[(szc)].hp_shift))
133 
134 #define	PFN_2_COLOR(pfn, szc)					\
135 	((&page_pfn_2_color_cpu != NULL) ?			\
136 	    page_pfn_2_color_cpu(pfn, szc) :			\
137 	    ((pfn & (hw_page_array[0].hp_colors - 1)) >>	\
138 		(hw_page_array[szc].hp_shift -			\
139 		    hw_page_array[0].hp_shift)))
140 
141 #define	PNUM_SIZE(szc)							\
142 	(hw_page_array[(szc)].hp_pgcnt)
143 #define	PNUM_SHIFT(szc)							\
144 	(hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift)
145 #define	PAGE_GET_SHIFT(szc)						\
146 	(hw_page_array[(szc)].hp_shift)
147 #define	PAGE_GET_PAGECOLORS(szc)					\
148 	(hw_page_array[(szc)].hp_colors)
149 
150 /*
151  * This macro calculates the next sequential pfn with the specified
152  * color using color equivalency mask
153  */
154 #define	PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask)        \
155 	ASSERT(((color) & ~(ceq_mask)) == 0);                                 \
156 	if (&page_next_pfn_for_color_cpu == NULL) {                           \
157 		uint_t	pfn_shift = PAGE_BSZS_SHIFT(szc);                     \
158 		pfn_t	spfn = pfn >> pfn_shift;                              \
159 		pfn_t	stride = (ceq_mask) + 1;                              \
160 		ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0);                 \
161 		if (((spfn ^ (color)) & (ceq_mask)) == 0) {                   \
162 			pfn += stride << pfn_shift;                           \
163 		} else {                                                      \
164 			pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color);          \
165 			pfn = (pfn > spfn ? pfn : pfn + stride) << pfn_shift; \
166 		}                                                             \
167 	} else {                                                              \
168 		pfn = page_next_pfn_for_color_cpu(pfn, szc, color,	      \
169 		    ceq_mask, color_mask);                                    \
170 	}
171 
172 /* get the color equivalency mask for the next szc */
173 #define	PAGE_GET_NSZ_MASK(szc, mask)                                         \
174 	((&page_get_nsz_color_mask_cpu == NULL) ?                            \
175 	    ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) :  \
176 	    page_get_nsz_color_mask_cpu(szc, mask))
177 
178 /* get the color of the next szc */
179 #define	PAGE_GET_NSZ_COLOR(szc, color)                                       \
180 	((&page_get_nsz_color_cpu == NULL) ?                                 \
181 	    ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) : \
182 	    page_get_nsz_color_cpu(szc, color))
183 
184 /* Find the bin for the given page if it was of size szc */
185 #define	PP_2_BIN_SZC(pp, szc)	(PFN_2_COLOR(pp->p_pagenum, szc))
186 
187 #define	PP_2_BIN(pp)		(PP_2_BIN_SZC(pp, pp->p_szc))
188 
189 #define	PP_2_MEM_NODE(pp)	(PFN_2_MEM_NODE(pp->p_pagenum))
190 
191 #define	PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ?	\
192 	&fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] :			\
193 	&cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode])
194 
195 #define	FPC_MUTEX(mnode, i)	(&fpc_mutex[i][mnode])
196 #define	CPC_MUTEX(mnode, i)	(&cpc_mutex[i][mnode])
197 
198 #define	PFN_BASE(pfnum, szc)	(pfnum & ~((1 << PAGE_BSZS_SHIFT(szc)) - 1))
199 
200 /*
201  * this structure is used for walking free page lists
202  * controls when to split large pages into smaller pages,
203  * and when to coalesce smaller pages into larger pages
204  */
205 typedef struct page_list_walker {
206 	uint_t	plw_colors;		/* num of colors for szc */
207 	uint_t  plw_color_mask;		/* colors-1 */
208 	uint_t	plw_bin_step;		/* next bin: 1 or 2 */
209 	uint_t  plw_count;		/* loop count */
210 	uint_t	plw_bin0;		/* starting bin */
211 	uint_t  plw_bin_marker;		/* bin after initial jump */
212 	uint_t  plw_bin_split_prev;	/* last bin we tried to split */
213 	uint_t  plw_do_split;		/* set if OK to split */
214 	uint_t  plw_split_next;		/* next bin to split */
215 	uint_t	plw_ceq_dif;		/* number of different color groups */
216 					/* to check */
217 	uint_t	plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */
218 	uint_t	plw_bins[MMU_PAGE_SIZES + 1];	/* num of bins */
219 } page_list_walker_t;
220 
221 void	page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin,
222     int can_split, int use_ceq, page_list_walker_t *plw);
223 
224 typedef	char	hpmctr_t;
225 
226 #ifdef DEBUG
227 #define	CHK_LPG(pp, szc)	chk_lpg(pp, szc)
228 extern void	chk_lpg(page_t *, uchar_t);
229 #else
230 #define	CHK_LPG(pp, szc)
231 #endif
232 
233 /*
234  * page list count per mnode and type.
235  */
236 typedef	struct {
237 	pgcnt_t	plc_mt_pgmax;		/* max page cnt */
238 	pgcnt_t plc_mt_clpgcnt;		/* cache list cnt */
239 	pgcnt_t plc_mt_flpgcnt;		/* free list cnt - small pages */
240 	pgcnt_t plc_mt_lgpgcnt;		/* free list cnt - large pages */
241 #ifdef DEBUG
242 	struct {
243 		pgcnt_t plc_mts_pgcnt;	/* per page size count */
244 		int	plc_mts_colors;
245 		pgcnt_t	*plc_mtsc_pgcnt; /* per color bin count */
246 	} plc_mts[MMU_PAGE_SIZES];
247 #endif
248 } plcnt_t[MAX_MEM_NODES][MAX_MEM_TYPES];
249 
250 #ifdef DEBUG
251 
252 #define	PLCNT_SZ(ctrs_sz) {						\
253 	int	szc;							\
254 	for (szc = 0; szc < mmu_page_sizes; szc++) {			\
255 		int	colors = page_get_pagecolors(szc);		\
256 		ctrs_sz += (max_mem_nodes * MAX_MEM_TYPES *		\
257 		    colors * sizeof (pgcnt_t));				\
258 	}								\
259 }
260 
261 #define	PLCNT_INIT(base) {						\
262 	int	mn, mt, szc, colors;					\
263 	for (szc = 0; szc < mmu_page_sizes; szc++) {			\
264 		colors = page_get_pagecolors(szc);			\
265 		for (mn = 0; mn < max_mem_nodes; mn++) {		\
266 			for (mt = 0; mt < MAX_MEM_TYPES; mt++) {	\
267 				plcnt[mn][mt].plc_mts[szc].		\
268 				    plc_mts_colors = colors;		\
269 				plcnt[mn][mt].plc_mts[szc].		\
270 				    plc_mtsc_pgcnt = (pgcnt_t *)base;	\
271 				base += (colors * sizeof (pgcnt_t));	\
272 			}						\
273 		}							\
274 	}								\
275 }
276 
277 #define	PLCNT_DO(pp, mn, mtype, szc, cnt, flags) {			\
278 	int	bin = PP_2_BIN(pp);					\
279 	if (flags & PG_CACHE_LIST)					\
280 		atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt);	\
281 	else if (szc)							\
282 		atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt);	\
283 	else								\
284 		atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt);	\
285 	atomic_add_long(&plcnt[mn][mtype].plc_mts[szc].plc_mts_pgcnt,	\
286 	    cnt);							\
287 	atomic_add_long(&plcnt[mn][mtype].plc_mts[szc].			\
288 	    plc_mtsc_pgcnt[bin], cnt);					\
289 }
290 
291 #else
292 
293 #define	PLCNT_SZ(ctrs_sz)
294 
295 #define	PLCNT_INIT(base)
296 
297 /* PG_FREE_LIST may not be explicitly set in flags for large pages */
298 
299 #define	PLCNT_DO(pp, mn, mtype, szc, cnt, flags) {			\
300 	if (flags & PG_CACHE_LIST)					\
301 		atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt);	\
302 	else if (szc)							\
303 		atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt);	\
304 	else								\
305 		atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt);	\
306 }
307 
308 #endif
309 
310 #define	PLCNT_INCR(pp, mn, mtype, szc, flags) {				\
311 	long	cnt = (1 << PAGE_BSZS_SHIFT(szc));			\
312 	PLCNT_DO(pp, mn, mtype, szc, cnt, flags);			\
313 }
314 
315 #define	PLCNT_DECR(pp, mn, mtype, szc, flags) {				\
316 	long	cnt = ((-1) << PAGE_BSZS_SHIFT(szc));			\
317 	PLCNT_DO(pp, mn, mtype, szc, cnt, flags);			\
318 }
319 
320 /*
321  * macros to update page list max counts - done when pages transferred
322  * from RELOC to NORELOC mtype (kcage_init or kcage_assimilate_page).
323  */
324 
325 #define	PLCNT_XFER_NORELOC(pp) {					\
326 	long	cnt = (1 << PAGE_BSZS_SHIFT((pp)->p_szc));		\
327 	int	mn = PP_2_MEM_NODE(pp);					\
328 	atomic_add_long(&plcnt[mn][MTYPE_NORELOC].plc_mt_pgmax, cnt);	\
329 	atomic_add_long(&plcnt[mn][MTYPE_RELOC].plc_mt_pgmax, -cnt);	\
330 }
331 
332 /*
333  * macro to modify the page list max counts when memory is added to
334  * the page lists during startup (add_physmem) or during a DR operation
335  * when memory is added (kphysm_add_memory_dynamic) or deleted
336  * (kphysm_del_cleanup).
337  */
338 #define	PLCNT_MODIFY_MAX(pfn, cnt) {					\
339 	int	mn = PFN_2_MEM_NODE(pfn);				\
340 	atomic_add_long(&plcnt[mn][MTYPE_RELOC].plc_mt_pgmax, (cnt));	\
341 }
342 
343 extern plcnt_t	plcnt;
344 
345 #define	MNODE_PGCNT(mn)							\
346 	(plcnt[mn][MTYPE_RELOC].plc_mt_clpgcnt +			\
347 	    plcnt[mn][MTYPE_NORELOC].plc_mt_clpgcnt +			\
348 	    plcnt[mn][MTYPE_RELOC].plc_mt_flpgcnt +			\
349 	    plcnt[mn][MTYPE_NORELOC].plc_mt_flpgcnt +			\
350 	    plcnt[mn][MTYPE_RELOC].plc_mt_lgpgcnt +			\
351 	    plcnt[mn][MTYPE_NORELOC].plc_mt_lgpgcnt)
352 
353 #define	MNODETYPE_PGCNT(mn, mtype)					\
354 	(plcnt[mn][mtype].plc_mt_clpgcnt +				\
355 	    plcnt[mn][mtype].plc_mt_flpgcnt +				\
356 	    plcnt[mn][mtype].plc_mt_lgpgcnt)
357 
358 /*
359  * macros to loop through the mtype range - MTYPE_START returns -1 in
360  * mtype if no pages in mnode/mtype and possibly NEXT mtype.
361  */
362 #define	MTYPE_START(mnode, mtype, flags) {				\
363 	if (plcnt[mnode][mtype].plc_mt_pgmax == 0) {			\
364 		ASSERT(MNODETYPE_PGCNT(mnode, mtype) == 0);		\
365 		MTYPE_NEXT(mnode, mtype, flags);			\
366 	}								\
367 }
368 
369 /*
370  * if allocation from the RELOC pool failed and there is sufficient cage
371  * memory, attempt to allocate from the NORELOC pool.
372  */
373 #define	MTYPE_NEXT(mnode, mtype, flags) { 				\
374 	if (!(flags & (PG_NORELOC | PGI_NOCAGE | PGI_RELOCONLY)) &&	\
375 	    (kcage_freemem >= kcage_lotsfree)) {			\
376 		if (plcnt[mnode][MTYPE_NORELOC].plc_mt_pgmax == 0) {	\
377 			ASSERT(MNODETYPE_PGCNT(mnode, MTYPE_NORELOC) == 0); \
378 			mtype = -1;					\
379 		} else {						\
380 			mtype = MTYPE_NORELOC;				\
381 			flags |= PG_NORELOC;				\
382 		}							\
383 	} else {							\
384 		mtype = -1;						\
385 	}								\
386 }
387 
388 /*
389  * get the ecache setsize for the current cpu.
390  */
391 #define	CPUSETSIZE()	(cpunodes[CPU->cpu_id].ecache_setsize)
392 #define	CPUASSOC()	(cpunodes[CPU->cpu_id].ecache_associativity)
393 
394 extern struct cpu	cpu0;
395 #define	CPU0		&cpu0
396 
397 #define	PAGE_BSZS_SHIFT(szc)	TTE_BSZS_SHIFT(szc)
398 /*
399  * For sfmmu each larger page is 8 times the size of the previous
400  * size page.
401  */
402 #define	FULL_REGION_CNT(rg_szc)	(8)
403 
404 /*
405  * The counter base must be per page_counter element to prevent
406  * races when re-indexing, and the base page size element should
407  * be aligned on a boundary of the given region size.
408  *
409  * We also round up the number of pages spanned by the counters
410  * for a given region to PC_BASE_ALIGN in certain situations to simplify
411  * the coding for some non-performance critical routines.
412  */
413 #define	PC_BASE_ALIGN		((pfn_t)1 << PAGE_BSZS_SHIFT(mmu_page_sizes-1))
414 #define	PC_BASE_ALIGN_MASK	(PC_BASE_ALIGN - 1)
415 
416 extern int ecache_alignsize;
417 #define	L2CACHE_ALIGN		ecache_alignsize
418 #define	L2CACHE_ALIGN_MAX	512
419 
420 extern int consistent_coloring;
421 extern uint_t vac_colors_mask;
422 extern int vac_size;
423 extern int vac_shift;
424 
425 /*
426  * Auto large page selection support variables. Some CPU
427  * implementations may differ from the defaults and will need
428  * to change these.
429  */
430 extern int auto_lpg_tlb_threshold;
431 extern int auto_lpg_minszc;
432 extern int auto_lpg_maxszc;
433 extern size_t auto_lpg_heap_default;
434 extern size_t auto_lpg_stack_default;
435 extern size_t auto_lpg_va_default;
436 extern size_t auto_lpg_remap_threshold;
437 extern pgcnt_t auto_lpg_min_physmem;
438 
439 /*
440  * AS_2_BIN macro controls the page coloring policy.
441  * 0 (default) uses various vaddr bits
442  * 1 virtual=paddr
443  * 2 bin hopping
444  */
445 #define	AS_2_BIN(as, seg, vp, addr, bin, szc)				\
446 switch (consistent_coloring) {						\
447 	default:                                                        \
448 		cmn_err(CE_WARN,					\
449 			"AS_2_BIN: bad consistent coloring value");	\
450 		/* assume default algorithm -> continue */		\
451 	case 0: {                                                       \
452 		uint32_t ndx, new;					\
453 		int slew = 0;						\
454 		pfn_t pfn;                                              \
455                                                                         \
456 		if (vp != NULL && IS_SWAPVP(vp) &&			\
457 		    seg->s_ops == &segvn_ops)				\
458 			slew = as_color_bin(as);			\
459                                                                         \
460 		pfn = ((uintptr_t)addr >> MMU_PAGESHIFT) +		\
461 			(((uintptr_t)addr >> page_coloring_shift) <<	\
462 			(vac_shift - MMU_PAGESHIFT));			\
463 		if ((szc) == 0 ||					\
464 		    (szc == 1 && &page_pfn_2_color_cpu == NULL &&	\
465 		    CPUASSOC() > PNUM_SIZE(1))) {			\
466 			pfn += slew;					\
467 			bin = PFN_2_COLOR(pfn, szc);			\
468 		} else {						\
469 			bin = PFN_2_COLOR(pfn, szc);			\
470 			bin += slew >> (vac_shift - MMU_PAGESHIFT);	\
471 			bin &= hw_page_array[(szc)].hp_colors - 1;	\
472 		}							\
473 		break;                                                  \
474 	}                                                               \
475 	case 1:                                                         \
476 		bin = PFN_2_COLOR(((uintptr_t)addr >> MMU_PAGESHIFT),   \
477 					szc);	                        \
478 		break;                                                  \
479 	case 2: {                                                       \
480 		int cnt = as_color_bin(as);				\
481 		uint_t color_mask = page_get_pagecolors(0) - 1;		\
482                                                                         \
483 		/* make sure physical color aligns with vac color */	\
484 		while ((cnt & vac_colors_mask) !=			\
485 		    addr_to_vcolor(addr)) {				\
486 			cnt++;						\
487 		}                                                       \
488 		bin = cnt = cnt & color_mask;			        \
489 		bin >>= PAGE_GET_COLOR_SHIFT(0, szc);                   \
490 		/* update per as page coloring fields */		\
491 		cnt = (cnt + 1) & color_mask;			        \
492 		if (cnt == (as_color_start(as) & color_mask)) {	        \
493 			cnt = as_color_start(as) = as_color_start(as) + \
494 				PGCLR_LOOPFACTOR;			\
495 		}                                                       \
496 		as_color_bin(as) = cnt & color_mask;		        \
497 		break;                                                  \
498 	}								\
499 }									\
500 	ASSERT(bin < page_get_pagecolors(szc));
501 
502 /*
503  * cpu private vm data - accessed thru CPU->cpu_vm_data
504  *	vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock()
505  *	vc_pnext_memseg: tracks last memseg visited in page_nextn()
506  *	vc_kmptr: unaligned kmem pointer for this vm_cpu_data_t
507  *	vc_kmsize: orignal kmem size for this vm_cpu_data_t
508  */
509 
510 typedef struct {
511 	struct memseg	*vc_pnum_memseg;
512 	struct memseg	*vc_pnext_memseg;
513 	void		*vc_kmptr;
514 	size_t		vc_kmsize;
515 } vm_cpu_data_t;
516 
517 /* allocation size to ensure vm_cpu_data_t resides in its own cache line */
518 #define	VM_CPU_DATA_PADSIZE						\
519 	(P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX))
520 
521 /* for boot cpu before kmem is initialized */
522 extern char	vm_cpu_data0[];
523 
524 /*
525  * Function to get an ecache color bin: F(as, cnt, vcolor).
526  * the goal of this function is to:
527  * - to spread a processes' physical pages across the entire ecache to
528  *	maximize its use.
529  * - to minimize vac flushes caused when we reuse a physical page on a
530  *	different vac color than it was previously used.
531  * - to prevent all processes to use the same exact colors and trash each
532  *	other.
533  *
534  * cnt is a bin ptr kept on a per as basis.  As we page_create we increment
535  * the ptr so we spread out the physical pages to cover the entire ecache.
536  * The virtual color is made a subset of the physical color in order to
537  * in minimize virtual cache flushing.
538  * We add in the as to spread out different as.	 This happens when we
539  * initialize the start count value.
540  * sizeof(struct as) is 60 so we shift by 3 to get into the bit range
541  * that will tend to change.  For example, on spitfire based machines
542  * (vcshft == 1) contigous as are spread bu ~6 bins.
543  * vcshft provides for proper virtual color alignment.
544  * In theory cnt should be updated using cas only but if we are off by one
545  * or 2 it is no big deal.
546  * We also keep a start value which is used to randomize on what bin we
547  * start counting when it is time to start another loop. This avoids
548  * contigous allocations of ecache size to point to the same bin.
549  * Why 3? Seems work ok. Better than 7 or anything larger.
550  */
551 #define	PGCLR_LOOPFACTOR 3
552 
553 /*
554  * When a bin is empty, and we can't satisfy a color request correctly,
555  * we scan.  If we assume that the programs have reasonable spatial
556  * behavior, then it will not be a good idea to use the adjacent color.
557  * Using the adjacent color would result in virtually adjacent addresses
558  * mapping into the same spot in the cache.  So, if we stumble across
559  * an empty bin, skip a bunch before looking.  After the first skip,
560  * then just look one bin at a time so we don't miss our cache on
561  * every look. Be sure to check every bin.  Page_create() will panic
562  * if we miss a page.
563  *
564  * This also explains the `<=' in the for loops in both page_get_freelist()
565  * and page_get_cachelist().  Since we checked the target bin, skipped
566  * a bunch, then continued one a time, we wind up checking the target bin
567  * twice to make sure we get all of them bins.
568  */
569 #define	BIN_STEP	20
570 
571 #ifdef VM_STATS
572 struct vmm_vmstats_str {
573 	ulong_t pgf_alloc[MMU_PAGE_SIZES];	/* page_get_freelist */
574 	ulong_t pgf_allocok[MMU_PAGE_SIZES];
575 	ulong_t pgf_allocokrem[MMU_PAGE_SIZES];
576 	ulong_t pgf_allocfailed[MMU_PAGE_SIZES];
577 	ulong_t pgf_allocdeferred;
578 	ulong_t	pgf_allocretry[MMU_PAGE_SIZES];
579 	ulong_t pgc_alloc;			/* page_get_cachelist */
580 	ulong_t pgc_allocok;
581 	ulong_t pgc_allocokrem;
582 	ulong_t	pgc_allocokdeferred;
583 	ulong_t pgc_allocfailed;
584 	ulong_t	pgcp_alloc[MMU_PAGE_SIZES];	/* page_get_contig_pages */
585 	ulong_t	pgcp_allocfailed[MMU_PAGE_SIZES];
586 	ulong_t	pgcp_allocempty[MMU_PAGE_SIZES];
587 	ulong_t	pgcp_allocok[MMU_PAGE_SIZES];
588 	ulong_t	ptcp[MMU_PAGE_SIZES];		/* page_trylock_contig_pages */
589 	ulong_t	ptcpfreethresh[MMU_PAGE_SIZES];
590 	ulong_t	ptcpfailexcl[MMU_PAGE_SIZES];
591 	ulong_t	ptcpfailszc[MMU_PAGE_SIZES];
592 	ulong_t	ptcpfailcage[MMU_PAGE_SIZES];
593 	ulong_t	ptcpok[MMU_PAGE_SIZES];
594 	ulong_t	pgmf_alloc[MMU_PAGE_SIZES];	/* page_get_mnode_freelist */
595 	ulong_t	pgmf_allocfailed[MMU_PAGE_SIZES];
596 	ulong_t	pgmf_allocempty[MMU_PAGE_SIZES];
597 	ulong_t	pgmf_allocok[MMU_PAGE_SIZES];
598 	ulong_t	pgmc_alloc;			/* page_get_mnode_cachelist */
599 	ulong_t	pgmc_allocfailed;
600 	ulong_t	pgmc_allocempty;
601 	ulong_t	pgmc_allocok;
602 	ulong_t	pladd_free[MMU_PAGE_SIZES];	/* page_list_add/sub */
603 	ulong_t	plsub_free[MMU_PAGE_SIZES];
604 	ulong_t	pladd_cache;
605 	ulong_t	plsub_cache;
606 	ulong_t	plsubpages_szcbig;
607 	ulong_t	plsubpages_szc0;
608 	ulong_t	pfs_req[MMU_PAGE_SIZES];	/* page_freelist_split */
609 	ulong_t	pfs_demote[MMU_PAGE_SIZES];
610 	ulong_t	pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
611 	ulong_t ppr_reloc[MMU_PAGE_SIZES];	/* page_relocate */
612 	ulong_t ppr_relocok[MMU_PAGE_SIZES];
613 	ulong_t ppr_relocnoroot[MMU_PAGE_SIZES];
614 	ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES];
615 	ulong_t ppr_relocnolock[MMU_PAGE_SIZES];
616 	ulong_t ppr_relocnomem[MMU_PAGE_SIZES];
617 	ulong_t ppr_krelocfail[MMU_PAGE_SIZES];
618 	/* page coalesce counter */
619 	ulong_t	page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
620 	/* candidates useful */
621 	ulong_t	page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
622 	/* ctrs changed after locking */
623 	ulong_t	page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
624 	/* page_freelist_coalesce failed */
625 	ulong_t	page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
626 	ulong_t	page_ctrs_coalesce_all;	/* page coalesce all counter */
627 	ulong_t	page_ctrs_cands_skip_all; /* candidates useful for all func */
628 };
629 extern struct vmm_vmstats_str vmm_vmstats;
630 #endif	/* VM_STATS */
631 
632 /*
633  * Used to hold off page relocations into the cage until OBP has completed
634  * its boot-time handoff of its resources to the kernel.
635  */
636 extern int page_relocate_ready;
637 
638 /*
639  * cpu/mmu-dependent vm variables may be reset at bootup.
640  */
641 extern uint_t mmu_page_sizes;
642 extern uint_t max_mmu_page_sizes;
643 extern uint_t mmu_hashcnt;
644 extern uint_t max_mmu_hashcnt;
645 extern size_t mmu_ism_pagesize;
646 extern int mmu_exported_pagesize_mask;
647 extern uint_t mmu_exported_page_sizes;
648 extern uint_t szc_2_userszc[];
649 extern uint_t userszc_2_szc[];
650 
651 #define	USERSZC_2_SZC(userszc)	(userszc_2_szc[userszc])
652 #define	SZC_2_USERSZC(szc)	(szc_2_userszc[szc])
653 
654 /*
655  * Platform specific page routines
656  */
657 extern void mach_page_add(page_t **, page_t *);
658 extern void mach_page_sub(page_t **, page_t *);
659 extern uint_t page_get_pagecolors(uint_t);
660 extern void ppcopy_kernel__relocatable(page_t *, page_t *);
661 #define	ppcopy_kernel(p1, p2)	ppcopy_kernel__relocatable(p1, p2)
662 
663 /*
664  * platform specific large pages for kernel heap support
665  */
666 extern size_t get_segkmem_lpsize(size_t lpsize);
667 extern size_t mmu_get_kernel_lpsize(size_t lpsize);
668 extern void mmu_init_kernel_pgsz(struct hat *hat);
669 extern void mmu_init_kcontext();
670 extern uint64_t kcontextreg;
671 
672 #ifdef	__cplusplus
673 }
674 #endif
675 
676 #endif	/* _VM_DEP_H */
677