xref: /illumos-gate/usr/src/uts/i86pc/vm/hat_pte.h (revision f498645a3eecf2ddd304b4ea9c7f1b4c155ff79e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_VM_HAT_PTE_H
28 #define	_VM_HAT_PTE_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #ifdef	__cplusplus
33 extern "C" {
34 #endif
35 
36 
37 #include <sys/types.h>
38 
39 /*
40  * Defines for the bits in X86 and AMD64 Page Tables
41  *
42  * Notes:
43  *
44  * Largepages and PAT bits:
45  *
46  * bit 7 at level 0 is the PAT bit
47  * bit 7 above level 0 is the Pagesize bit (set for large page)
48  * bit 12 (when a large page) is the PAT bit
49  *
50  * In Solaris the PAT/PWT/PCD values are set up so that:
51  *
52  * PAT & PWT -> Write Protected
53  * PAT & PCD -> Write Combining
54  * PAT by itself (PWT == 0 && PCD == 0) yields uncacheable (same as PCD == 1)
55  *
56  *
57  * Permission bits:
58  *
59  * - PT_USER must be set in all levels for user pages
60  * - PT_WRITE must be set in all levels for user writable pages
61  * - PT_NX applies if set at any level
62  *
63  * For these, we use the "allow" settings in all tables above level 0 and only
64  * ever disable things in PTEs.
65  *
66  * The use of PT_GLOBAL and PT_NX depend on being enabled in processor
67  * control registers. Hence, we use a variable to reference these bit
68  * masks. During hat_kern_setup() if the feature isn't enabled we
69  * clear out the variables.
70  */
71 #define	PT_VALID	(0x001)	/* a valid translation is present */
72 #define	PT_WRITABLE	(0x002)	/* the page is writable */
73 #define	PT_USER		(0x004)	/* the page is accessible by user mode */
74 #define	PT_WRITETHRU	(0x008)	/* write back caching is disabled (non-PAT) */
75 #define	PT_NOCACHE	(0x010)	/* page is not cacheable (non-PAT) */
76 #define	PT_REF		(0x020)	/* page was referenced */
77 #define	PT_MOD		(0x040)	/* page was modified */
78 #define	PT_PAGESIZE	(0x080)	/* above level 0, indicates a large page */
79 #define	PT_PAT_4K	(0x080) /* at level 0, used for write combining */
80 #define	PT_GLOBAL	(0x100)	/* the mapping is global */
81 #define	PT_SOFTWARE	(0xe00)	/* available for software */
82 
83 #define	PT_PAT_LARGE	(0x1000)	/* PAT bit for large pages */
84 
85 #define	PT_PTPBITS	(PT_VALID | PT_USER | PT_WRITABLE | PT_REF)
86 #define	PT_FLAGBITS	(0xfff)	/* for masking off flag bits */
87 
88 /*
89  * The software bits are used by the HAT to track attributes.
90  *
91  * PT_NOSYNC - The PT_REF/PT_MOD bits are not sync'd to page_t.
92  *             The hat will install them as always set.
93  *
94  * PT_NOCONSIST - There is no entry for this hment for this mapping.
95  */
96 #define	PT_NOSYNC	(0x200)	/* PTE was created with HAT_NOSYNC */
97 #define	PT_NOCONSIST	(0x400)	/* PTE was created with HAT_LOAD_NOCONSIST */
98 
99 /*
100  * macros to get/set/clear the PTE fields
101  */
102 #define	PTE_SET(p, f)	((p) |= (f))
103 #define	PTE_CLR(p, f)	((p) &= ~(x86pte_t)(f))
104 #define	PTE_GET(p, f)	((p) & (f))
105 
106 /*
107  * Handy macro to check if a pagetable entry or pointer is valid
108  */
109 #define	PTE_ISVALID(p)		PTE_GET(p, PT_VALID)
110 
111 /*
112  * Does a PTE map a large page.
113  */
114 #define	PTE_IS_LGPG(p, l)	((l) > 0 && PTE_GET((p), PT_PAGESIZE))
115 
116 /*
117  * does this PTE represent a page (not a pointer to another page table)?
118  */
119 #define	PTE_ISPAGE(p, l)	\
120 	(PTE_ISVALID(p) && ((l) == 0 || PTE_GET(p, PT_PAGESIZE)))
121 
122 /*
123  * Handy macro to check if 2 PTE's are the same - ignores REF/MOD bits
124  */
125 #define	PTE_EQUIV(a, b)	 (((a) | PT_REF | PT_MOD) == ((b) | PT_REF | PT_MOD))
126 
127 /*
128  * Shorthand for converting a PTE to it's pfn.
129  */
130 #define	PTE2PFN(p, l)	\
131 	mmu_btop(PTE_GET((p), PTE_IS_LGPG((p), (l)) ? PT_PADDR_LGPG : PT_PADDR))
132 
133 /*
134  * The software extraction for a single Page Table Entry will always
135  * be a 64 bit unsigned int. If running a non-PAE hat, the page table
136  * access routines know to extend/shorten it to 32 bits.
137  */
138 typedef uint64_t x86pte_t;
139 typedef uint32_t x86pte32_t;
140 #define	PT_NX		(0x8000000000000000ull)
141 #define	PT_PADDR	(0x00fffffffffff000ull)
142 #define	PT_PADDR_LGPG	(0x00ffffffffffe000ull)	/* phys addr for large pages */
143 
144 /*
145  * Macros to create a PTP or PTE from the pfn and level
146  */
147 #define	MAKEPTP(pfn, l)	\
148 	(((x86pte_t)(pfn) << MMU_PAGESHIFT) | mmu.ptp_bits[(l) + 1])
149 #define	MAKEPTE(pfn, l)	\
150 	(((x86pte_t)(pfn) << MMU_PAGESHIFT) | mmu.pte_bits[l])
151 
152 /*
153  * The idea of "level" refers to the level where the page table is used in the
154  * the hardware address translation steps. The level values correspond to the
155  * following names of tables used in AMD/Intel architecture documents:
156  *
157  *	AMD/INTEL name		Level #
158  *	----------------------	-------
159  *	Page Map Level 4	   3
160  *	Page Directory Pointer	   2
161  *	Page Directory		   1
162  *	Page Table		   0
163  *
164  * The numbering scheme is such that the values of 0 and 1 can correspond to
165  * the pagesize codes used for MPSS support. For now the Maximum level at
166  * which you can have a large page is a constant, that may change in
167  * future processors.
168  *
169  * The type of "level_t" is signed so that it can be used like:
170  *	level_t	l;
171  *	...
172  *	while (--l >= 0)
173  *		...
174  */
175 #define	MAX_NUM_LEVEL		4
176 #define	MAX_PAGE_LEVEL		1			/* for now.. sigh */
177 typedef	int16_t level_t;
178 #define	LEVEL_SHIFT(l)	(mmu.level_shift[l])
179 #define	LEVEL_SIZE(l)	(mmu.level_size[l])
180 #define	LEVEL_OFFSET(l)	(mmu.level_offset[l])
181 #define	LEVEL_MASK(l)	(mmu.level_mask[l])
182 
183 /*
184  * Macros to:
185  * Check for a PFN above 4Gig and 64Gig for 32 bit PAE support
186  */
187 #define	PFN_4G		(4ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
188 #define	PFN_64G		(64ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
189 #define	PFN_ABOVE4G(pfn) ((pfn) >= PFN_4G)
190 #define	PFN_ABOVE64G(pfn) ((pfn) >= PFN_64G)
191 
192 /*
193  * The CR3 register holds the physical address of the top level page table.
194  */
195 #define	MAKECR3(pfn)    mmu_ptob(pfn)
196 
197 /*
198  * HAT/MMU parameters that depend on kernel mode and/or processor type
199  */
200 struct htable;
201 struct hat_mmu_info {
202 	x86pte_t pt_nx;		/* either 0 or PT_NX */
203 	x86pte_t pt_global;	/* either 0 or PT_GLOBAL */
204 
205 	pfn_t highest_pfn;
206 
207 	uint_t num_level;	/* number of page table levels in use */
208 	uint_t max_level;	/* just num_level - 1 */
209 	uint_t max_page_level;	/* maximum level at which we can map a page */
210 	uint_t ptes_per_table;	/* # of entries in lower level page tables */
211 	uint_t top_level_count;	/* # of entries in top most level page table */
212 
213 	uint_t	hash_cnt;	/* cnt of entries in htable_hash_cache */
214 	uint_t	vlp_hash_cnt;	/* cnt of entries in vlp htable_hash_cache */
215 
216 	uint_t pae_hat;		/* either 0 or 1 */
217 
218 	uintptr_t hole_start;	/* start of VA hole (or -1 if none) */
219 	uintptr_t hole_end;	/* end of VA hole (or 0 if none) */
220 
221 	struct htable **kmap_htables; /* htables for segmap + 32 bit heap */
222 	x86pte_t *kmap_ptes;	/* mapping of pagetables that map kmap */
223 	uintptr_t kmap_addr;	/* start addr of kmap */
224 	uintptr_t kmap_eaddr;	/* end addr of kmap */
225 
226 	uint_t pte_size;	/* either 4 or 8 */
227 	uint_t pte_size_shift;	/* either 2 or 3 */
228 	x86pte_t ptp_bits[MAX_NUM_LEVEL];	/* bits set for interior PTP */
229 	x86pte_t pte_bits[MAX_NUM_LEVEL];	/* bits set for leaf PTE */
230 
231 	/*
232 	 * The following tables are equivalent to PAGEXXXXX at different levels
233 	 * in the page table hierarchy.
234 	 */
235 	uint_t level_shift[MAX_NUM_LEVEL];	/* PAGESHIFT for given level */
236 	uintptr_t level_size[MAX_NUM_LEVEL];	/* PAGESIZE for given level */
237 	uintptr_t level_offset[MAX_NUM_LEVEL];	/* PAGEOFFSET for given level */
238 	uintptr_t level_mask[MAX_NUM_LEVEL];	/* PAGEMASK for given level */
239 
240 	uint_t tlb_entries[MAX_NUM_LEVEL];	/* tlb entries per pagesize */
241 };
242 
243 
244 #if defined(_KERNEL)
245 /*
246  * The concept of a VA hole exists in AMD64. This might need to be made
247  * model specific eventually.
248  *
249  * In the 64 bit kernel PTE loads are atomic, but need cas64 on 32 bit kernel.
250  */
251 #if defined(__amd64)
252 
253 #ifdef lint
254 #define	IN_VA_HOLE(va)	(__lintzero)
255 #else
256 #define	IN_VA_HOLE(va)	(mmu.hole_start <= (va) && (va) < mmu.hole_end)
257 #endif
258 
259 #define	FMT_PTE "%lx"
260 #define	ATOMIC_LOAD64(ptr, pte) ((pte) = *(ptr))
261 
262 #elif defined(__i386)
263 
264 #ifdef lint
265 #define	IN_VA_HOLE(va)	(__lintzero)
266 #else
267 #define	IN_VA_HOLE(va)	(0)
268 #endif
269 
270 #define	FMT_PTE "%llx"
271 #define	ATOMIC_LOAD64(ptr, pte) (((pte) = *(ptr)),			\
272 	((pte) = cas64(ptr, pte, pte)))
273 
274 #endif	/* __i386 */
275 
276 
277 extern struct hat_mmu_info mmu;
278 
279 #endif	/* _KERNEL */
280 
281 
282 #ifdef	__cplusplus
283 }
284 #endif
285 
286 #endif	/* _VM_HAT_PTE_H */
287