xref: /illumos-gate/usr/src/uts/i86pc/vm/hat_pte.h (revision bb57d1f5164aca913cbd286ae1b61c896167cfa7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef	_VM_HAT_PTE_H
27 #define	_VM_HAT_PTE_H
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #ifdef	__cplusplus
32 extern "C" {
33 #endif
34 
35 #include <sys/types.h>
36 #include <sys/mach_mmu.h>
37 
38 /*
39  * macros to get/set/clear the PTE fields
40  */
41 #define	PTE_SET(p, f)	((p) |= (f))
42 #define	PTE_CLR(p, f)	((p) &= ~(x86pte_t)(f))
43 #define	PTE_GET(p, f)	((p) & (f))
44 
45 /*
46  * Handy macro to check if a pagetable entry or pointer is valid
47  */
48 #define	PTE_ISVALID(p)		PTE_GET(p, PT_VALID)
49 
50 /*
51  * Does a PTE map a large page.
52  */
53 #define	PTE_IS_LGPG(p, l)	((l) > 0 && PTE_GET((p), PT_PAGESIZE))
54 
55 /*
56  * does this PTE represent a page (not a pointer to another page table)?
57  */
58 #define	PTE_ISPAGE(p, l)	\
59 	(PTE_ISVALID(p) && ((l) == 0 || PTE_GET(p, PT_PAGESIZE)))
60 
61 /*
62  * Handy macro to check if 2 PTE's are the same - ignores REF/MOD bits.
63  * On the 64 bit hypervisor we also have to ignore the high order
64  * software bits and the global/user bit which are set/cleared
65  * capriciously (by the hypervisor!)
66  */
67 #if defined(__amd64) && defined(__xpv)
68 #define	PT_IGNORE	((0x7fful << 52) | PT_GLOBAL | PT_USER)
69 #else
70 #define	PT_IGNORE	(0)
71 #endif
72 #define	PTE_EQUIV(a, b)	 (((a) | (PT_IGNORE | PT_REF | PT_MOD)) == \
73 	((b) | (PT_IGNORE | PT_REF | PT_MOD)))
74 
75 /*
76  * Shorthand for converting a PTE to it's pfn.
77  */
78 #define	PTE2MFN(p, l)	\
79 	mmu_btop(PTE_GET((p), PTE_IS_LGPG((p), (l)) ? PT_PADDR_LGPG : PT_PADDR))
80 #ifdef __xpv
81 #define	PTE2PFN(p, l) pte2pfn(p, l)
82 #else
83 #define	PTE2PFN(p, l) PTE2MFN(p, l)
84 #endif
85 
86 #define	PT_NX		(0x8000000000000000ull)
87 #define	PT_PADDR	(0x000ffffffffff000ull)
88 #define	PT_PADDR_LGPG	(0x000fffffffffe000ull)	/* phys addr for large pages */
89 
90 /*
91  * Macros to create a PTP or PTE from the pfn and level
92  */
93 #ifdef __xpv
94 
95 /*
96  * we use the highest order bit in physical address pfns to mark foreign mfns
97  */
98 #ifdef _LP64
99 #define	PFN_IS_FOREIGN_MFN (1ul << 51)
100 #else
101 #define	PFN_IS_FOREIGN_MFN (1ul << 31)
102 #endif
103 
104 #define	MAKEPTP(pfn, l)	\
105 	(pa_to_ma(pfn_to_pa(pfn)) | mmu.ptp_bits[(l) + 1])
106 #define	MAKEPTE(pfn, l) \
107 	((pfn & PFN_IS_FOREIGN_MFN) ? \
108 	((pfn_to_pa(pfn & ~PFN_IS_FOREIGN_MFN) | mmu.pte_bits[l]) | \
109 	PT_FOREIGN | PT_REF | PT_MOD) : \
110 	(pa_to_ma(pfn_to_pa(pfn)) | mmu.pte_bits[l]))
111 #else
112 #define	MAKEPTP(pfn, l)	\
113 	(pfn_to_pa(pfn) | mmu.ptp_bits[(l) + 1])
114 #define	MAKEPTE(pfn, l)	\
115 	(pfn_to_pa(pfn) | mmu.pte_bits[l])
116 #endif
117 
118 /*
119  * The idea of "level" refers to the level where the page table is used in the
120  * the hardware address translation steps. The level values correspond to the
121  * following names of tables used in AMD/Intel architecture documents:
122  *
123  *	AMD/INTEL name		Level #
124  *	----------------------	-------
125  *	Page Map Level 4	   3
126  *	Page Directory Pointer	   2
127  *	Page Directory		   1
128  *	Page Table		   0
129  *
130  * The numbering scheme is such that the values of 0 and 1 can correspond to
131  * the pagesize codes used for MPSS support. For now the Maximum level at
132  * which you can have a large page is a constant, that may change in
133  * future processors.
134  *
135  * The type of "level_t" is signed so that it can be used like:
136  *	level_t	l;
137  *	...
138  *	while (--l >= 0)
139  *		...
140  */
141 #define	MAX_NUM_LEVEL		4
142 #define	MAX_PAGE_LEVEL		2
143 typedef	int8_t level_t;
144 #define	LEVEL_SHIFT(l)	(mmu.level_shift[l])
145 #define	LEVEL_SIZE(l)	(mmu.level_size[l])
146 #define	LEVEL_OFFSET(l)	(mmu.level_offset[l])
147 #define	LEVEL_MASK(l)	(mmu.level_mask[l])
148 
149 /*
150  * Macros to:
151  * Check for a PFN above 4Gig and 64Gig for 32 bit PAE support
152  */
153 #define	PFN_4G		(4ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
154 #define	PFN_64G		(64ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
155 #define	PFN_ABOVE4G(pfn) ((pfn) >= PFN_4G)
156 #define	PFN_ABOVE64G(pfn) ((pfn) >= PFN_64G)
157 
158 /*
159  * The CR3 register holds the physical address of the top level page table.
160  */
161 #define	MAKECR3(pfn)	mmu_ptob(pfn)
162 
163 /*
164  * HAT/MMU parameters that depend on kernel mode and/or processor type
165  */
166 struct htable;
167 struct hat_mmu_info {
168 	x86pte_t pt_nx;		/* either 0 or PT_NX */
169 	x86pte_t pt_global;	/* either 0 or PT_GLOBAL */
170 
171 	pfn_t highest_pfn;
172 
173 	uint_t num_level;	/* number of page table levels in use */
174 	uint_t max_level;	/* just num_level - 1 */
175 	uint_t max_page_level;	/* maximum level at which we can map a page */
176 	uint_t umax_page_level; /* max user page map level */
177 	uint_t ptes_per_table;	/* # of entries in lower level page tables */
178 	uint_t top_level_count;	/* # of entries in top most level page table */
179 
180 	uint_t	hash_cnt;	/* cnt of entries in htable_hash_cache */
181 	uint_t	vlp_hash_cnt;	/* cnt of entries in vlp htable_hash_cache */
182 
183 	uint_t pae_hat;		/* either 0 or 1 */
184 
185 	uintptr_t hole_start;	/* start of VA hole (or -1 if none) */
186 	uintptr_t hole_end;	/* end of VA hole (or 0 if none) */
187 
188 	struct htable **kmap_htables; /* htables for segmap + 32 bit heap */
189 	x86pte_t *kmap_ptes;	/* mapping of pagetables that map kmap */
190 	uintptr_t kmap_addr;	/* start addr of kmap */
191 	uintptr_t kmap_eaddr;	/* end addr of kmap */
192 
193 	uint_t pte_size;	/* either 4 or 8 */
194 	uint_t pte_size_shift;	/* either 2 or 3 */
195 	x86pte_t ptp_bits[MAX_NUM_LEVEL];	/* bits set for interior PTP */
196 	x86pte_t pte_bits[MAX_NUM_LEVEL];	/* bits set for leaf PTE */
197 
198 	/*
199 	 * A range of VA used to window pages in the i86pc/vm code.
200 	 * See PWIN_XXX macros.
201 	 */
202 	caddr_t	pwin_base;
203 	caddr_t	pwin_pte_va;
204 	paddr_t	pwin_pte_pa;
205 
206 	/*
207 	 * The following tables are equivalent to PAGEXXXXX at different levels
208 	 * in the page table hierarchy.
209 	 */
210 	uint_t level_shift[MAX_NUM_LEVEL];	/* PAGESHIFT for given level */
211 	uintptr_t level_size[MAX_NUM_LEVEL];	/* PAGESIZE for given level */
212 	uintptr_t level_offset[MAX_NUM_LEVEL];	/* PAGEOFFSET for given level */
213 	uintptr_t level_mask[MAX_NUM_LEVEL];	/* PAGEMASK for given level */
214 };
215 
216 
217 #if defined(_KERNEL)
218 
219 /*
220  * Macros to access the HAT's private page windows. They're used for
221  * accessing pagetables, ppcopy() and page_zero().
222  * The 1st two macros are used to get an index for the particular use.
223  * The next three give you:
224  * - the virtual address of the window
225  * - the virtual address of the pte that maps the window
226  * - the physical address of the pte that map the window
227  */
228 #define	PWIN_TABLE(cpuid)	((cpuid) * 2)
229 #define	PWIN_SRC(cpuid)		((cpuid) * 2 + 1)	/* for x86pte_copy() */
230 #define	PWIN_VA(x)		(mmu.pwin_base + ((x) << MMU_PAGESHIFT))
231 #define	PWIN_PTE_VA(x)		(mmu.pwin_pte_va + ((x) << mmu.pte_size_shift))
232 #define	PWIN_PTE_PA(x)		(mmu.pwin_pte_pa + ((x) << mmu.pte_size_shift))
233 
234 /*
235  * The concept of a VA hole exists in AMD64. This might need to be made
236  * model specific eventually.
237  *
238  * In the 64 bit kernel PTE loads are atomic, but need cas64 on 32 bit kernel.
239  */
240 #if defined(__amd64)
241 
242 #ifdef lint
243 #define	IN_VA_HOLE(va)	(__lintzero)
244 #else
245 #define	IN_VA_HOLE(va)	(mmu.hole_start <= (va) && (va) < mmu.hole_end)
246 #endif
247 
248 #define	FMT_PTE "0x%lx"
249 #define	GET_PTE(ptr)		(*(x86pte_t *)(ptr))
250 #define	SET_PTE(ptr, pte)	(*(x86pte_t *)(ptr) = pte)
251 #define	CAS_PTE(ptr, x, y)	cas64(ptr, x, y)
252 
253 #elif defined(__i386)
254 
255 #define	IN_VA_HOLE(va)	(__lintzero)
256 
257 #define	FMT_PTE "0x%llx"
258 
259 /* on 32 bit kernels, 64 bit loads aren't atomic, use get_pte64() */
260 extern x86pte_t get_pte64(x86pte_t *ptr);
261 #define	GET_PTE(ptr)	(mmu.pae_hat ? get_pte64(ptr) : *(x86pte32_t *)(ptr))
262 #define	SET_PTE(ptr, pte)						\
263 	((mmu.pae_hat ? ((x86pte32_t *)(ptr))[1] = (pte >> 32) : 0),	\
264 	*(x86pte32_t *)(ptr) = pte)
265 #define	CAS_PTE(ptr, x, y)			\
266 	(mmu.pae_hat ? cas64(ptr, x, y) :	\
267 	cas32((uint32_t *)(ptr), (uint32_t)(x), (uint32_t)(y)))
268 
269 #endif	/* __i386 */
270 
271 /*
272  * Return a pointer to the pte entry at the given index within a page table.
273  */
274 #define	PT_INDEX_PTR(p, x) \
275 	((x86pte_t *)((uintptr_t)(p) + ((x) << mmu.pte_size_shift)))
276 
277 /*
278  * Return the physical address of the pte entry at the given index within a
279  * page table.
280  */
281 #define	PT_INDEX_PHYSADDR(p, x) \
282 	((paddr_t)(p) + ((x) << mmu.pte_size_shift))
283 
284 /*
285  * From pfn to bytes, careful not to lose bits on PAE.
286  */
287 #define	pfn_to_pa(pfn) (mmu_ptob((paddr_t)(pfn)))
288 
289 #ifdef __xpv
290 extern pfn_t pte2pfn(x86pte_t, level_t);
291 #endif
292 
293 extern struct hat_mmu_info mmu;
294 
295 #endif	/* _KERNEL */
296 
297 
298 #ifdef	__cplusplus
299 }
300 #endif
301 
302 #endif	/* _VM_HAT_PTE_H */
303