xref: /titanic_52/usr/src/uts/i86pc/vm/hat_pte.h (revision a29e56d91db891741f1af9f6bbd3e3c3cac5f19b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef	_VM_HAT_PTE_H
27 #define	_VM_HAT_PTE_H
28 
29 #ifdef	__cplusplus
30 extern "C" {
31 #endif
32 
33 #include <sys/types.h>
34 #include <sys/mach_mmu.h>
35 
36 /*
37  * macros to get/set/clear the PTE fields
38  */
39 #define	PTE_SET(p, f)	((p) |= (f))
40 #define	PTE_CLR(p, f)	((p) &= ~(x86pte_t)(f))
41 #define	PTE_GET(p, f)	((p) & (f))
42 
43 /*
44  * Handy macro to check if a pagetable entry or pointer is valid
45  */
46 #define	PTE_ISVALID(p)		PTE_GET(p, PT_VALID)
47 
48 /*
49  * Does a PTE map a large page.
50  */
51 #define	PTE_IS_LGPG(p, l)	((l) > 0 && PTE_GET((p), PT_PAGESIZE))
52 
53 /*
54  * does this PTE represent a page (not a pointer to another page table)?
55  */
56 #define	PTE_ISPAGE(p, l)	\
57 	(PTE_ISVALID(p) && ((l) == 0 || PTE_GET(p, PT_PAGESIZE)))
58 
59 /*
60  * Handy macro to check if 2 PTE's are the same - ignores REF/MOD bits.
61  * On the 64 bit hypervisor we also have to ignore the high order
62  * software bits and the global/user bit which are set/cleared
63  * capriciously (by the hypervisor!)
64  */
65 #if defined(__amd64) && defined(__xpv)
66 #define	PT_IGNORE	((0x7fful << 52) | PT_GLOBAL | PT_USER)
67 #else
68 #define	PT_IGNORE	(0)
69 #endif
70 #define	PTE_EQUIV(a, b)	 (((a) | (PT_IGNORE | PT_REF | PT_MOD)) == \
71 	((b) | (PT_IGNORE | PT_REF | PT_MOD)))
72 
73 /*
74  * Shorthand for converting a PTE to it's pfn.
75  */
76 #define	PTE2MFN(p, l)	\
77 	mmu_btop(PTE_GET((p), PTE_IS_LGPG((p), (l)) ? PT_PADDR_LGPG : PT_PADDR))
78 #ifdef __xpv
79 #define	PTE2PFN(p, l) pte2pfn(p, l)
80 #else
81 #define	PTE2PFN(p, l) PTE2MFN(p, l)
82 #endif
83 
84 #define	PT_NX		(0x8000000000000000ull)
85 #define	PT_PADDR	(0x000ffffffffff000ull)
86 #define	PT_PADDR_LGPG	(0x000fffffffffe000ull)	/* phys addr for large pages */
87 
88 /*
89  * Macros to create a PTP or PTE from the pfn and level
90  */
91 #ifdef __xpv
92 
93 /*
94  * we use the highest order bit in physical address pfns to mark foreign mfns
95  */
96 #ifdef _LP64
97 #define	PFN_IS_FOREIGN_MFN (1ul << 51)
98 #else
99 #define	PFN_IS_FOREIGN_MFN (1ul << 31)
100 #endif
101 
102 #define	MAKEPTP(pfn, l)	\
103 	(pa_to_ma(pfn_to_pa(pfn)) | mmu.ptp_bits[(l) + 1])
104 #define	MAKEPTE(pfn, l) \
105 	((pfn & PFN_IS_FOREIGN_MFN) ? \
106 	((pfn_to_pa(pfn & ~PFN_IS_FOREIGN_MFN) | mmu.pte_bits[l]) | \
107 	PT_FOREIGN | PT_REF | PT_MOD) : \
108 	(pa_to_ma(pfn_to_pa(pfn)) | mmu.pte_bits[l]))
109 #else
110 #define	MAKEPTP(pfn, l)	\
111 	(pfn_to_pa(pfn) | mmu.ptp_bits[(l) + 1])
112 #define	MAKEPTE(pfn, l)	\
113 	(pfn_to_pa(pfn) | mmu.pte_bits[l])
114 #endif
115 
116 /*
117  * The idea of "level" refers to the level where the page table is used in the
118  * the hardware address translation steps. The level values correspond to the
119  * following names of tables used in AMD/Intel architecture documents:
120  *
121  *	AMD/INTEL name		Level #
122  *	----------------------	-------
123  *	Page Map Level 4	   3
124  *	Page Directory Pointer	   2
125  *	Page Directory		   1
126  *	Page Table		   0
127  *
128  * The numbering scheme is such that the values of 0 and 1 can correspond to
129  * the pagesize codes used for MPSS support. For now the Maximum level at
130  * which you can have a large page is a constant, that may change in
131  * future processors.
132  *
133  * The type of "level_t" is signed so that it can be used like:
134  *	level_t	l;
135  *	...
136  *	while (--l >= 0)
137  *		...
138  */
139 #define	MAX_NUM_LEVEL		4
140 #define	MAX_PAGE_LEVEL		2
141 typedef	int8_t level_t;
142 #define	LEVEL_SHIFT(l)	(mmu.level_shift[l])
143 #define	LEVEL_SIZE(l)	(mmu.level_size[l])
144 #define	LEVEL_OFFSET(l)	(mmu.level_offset[l])
145 #define	LEVEL_MASK(l)	(mmu.level_mask[l])
146 
147 /*
148  * Macros to:
149  * Check for a PFN above 4Gig and 64Gig for 32 bit PAE support
150  */
151 #define	PFN_4G		(4ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
152 #define	PFN_64G		(64ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
153 #define	PFN_ABOVE4G(pfn) ((pfn) >= PFN_4G)
154 #define	PFN_ABOVE64G(pfn) ((pfn) >= PFN_64G)
155 
156 /*
157  * The CR3 register holds the physical address of the top level page table.
158  */
159 #define	MAKECR3(pfn)	mmu_ptob(pfn)
160 
161 /*
162  * HAT/MMU parameters that depend on kernel mode and/or processor type
163  */
164 struct htable;
165 struct hat_mmu_info {
166 	x86pte_t pt_nx;		/* either 0 or PT_NX */
167 	x86pte_t pt_global;	/* either 0 or PT_GLOBAL */
168 
169 	pfn_t highest_pfn;
170 
171 	uint_t num_level;	/* number of page table levels in use */
172 	uint_t max_level;	/* just num_level - 1 */
173 	uint_t max_page_level;	/* maximum level at which we can map a page */
174 	uint_t umax_page_level; /* max user page map level */
175 	uint_t ptes_per_table;	/* # of entries in lower level page tables */
176 	uint_t top_level_count;	/* # of entries in top most level page table */
177 
178 	uint_t	hash_cnt;	/* cnt of entries in htable_hash_cache */
179 	uint_t	vlp_hash_cnt;	/* cnt of entries in vlp htable_hash_cache */
180 
181 	uint_t pae_hat;		/* either 0 or 1 */
182 
183 	uintptr_t hole_start;	/* start of VA hole (or -1 if none) */
184 	uintptr_t hole_end;	/* end of VA hole (or 0 if none) */
185 
186 	struct htable **kmap_htables; /* htables for segmap + 32 bit heap */
187 	x86pte_t *kmap_ptes;	/* mapping of pagetables that map kmap */
188 	uintptr_t kmap_addr;	/* start addr of kmap */
189 	uintptr_t kmap_eaddr;	/* end addr of kmap */
190 
191 	uint_t pte_size;	/* either 4 or 8 */
192 	uint_t pte_size_shift;	/* either 2 or 3 */
193 	x86pte_t ptp_bits[MAX_NUM_LEVEL];	/* bits set for interior PTP */
194 	x86pte_t pte_bits[MAX_NUM_LEVEL];	/* bits set for leaf PTE */
195 
196 	/*
197 	 * A range of VA used to window pages in the i86pc/vm code.
198 	 * See PWIN_XXX macros.
199 	 */
200 	caddr_t	pwin_base;
201 	caddr_t	pwin_pte_va;
202 	paddr_t	pwin_pte_pa;
203 
204 	/*
205 	 * The following tables are equivalent to PAGEXXXXX at different levels
206 	 * in the page table hierarchy.
207 	 */
208 	uint_t level_shift[MAX_NUM_LEVEL];	/* PAGESHIFT for given level */
209 	uintptr_t level_size[MAX_NUM_LEVEL];	/* PAGESIZE for given level */
210 	uintptr_t level_offset[MAX_NUM_LEVEL];	/* PAGEOFFSET for given level */
211 	uintptr_t level_mask[MAX_NUM_LEVEL];	/* PAGEMASK for given level */
212 };
213 
214 
215 #if defined(_KERNEL)
216 
217 /*
218  * Macros to access the HAT's private page windows. They're used for
219  * accessing pagetables, ppcopy() and page_zero().
220  * The 1st two macros are used to get an index for the particular use.
221  * The next three give you:
222  * - the virtual address of the window
223  * - the virtual address of the pte that maps the window
224  * - the physical address of the pte that map the window
225  */
226 #define	PWIN_TABLE(cpuid)	((cpuid) * 2)
227 #define	PWIN_SRC(cpuid)		((cpuid) * 2 + 1)	/* for x86pte_copy() */
228 #define	PWIN_VA(x)		(mmu.pwin_base + ((x) << MMU_PAGESHIFT))
229 #define	PWIN_PTE_VA(x)		(mmu.pwin_pte_va + ((x) << mmu.pte_size_shift))
230 #define	PWIN_PTE_PA(x)		(mmu.pwin_pte_pa + ((x) << mmu.pte_size_shift))
231 
232 /*
233  * The concept of a VA hole exists in AMD64. This might need to be made
234  * model specific eventually.
235  *
236  * In the 64 bit kernel PTE loads are atomic, but need atomic_cas_64 on 32
237  * bit kernel.
238  */
239 #if defined(__amd64)
240 
241 #ifdef lint
242 #define	IN_VA_HOLE(va)	(__lintzero)
243 #else
244 #define	IN_VA_HOLE(va)	(mmu.hole_start <= (va) && (va) < mmu.hole_end)
245 #endif
246 
247 #define	FMT_PTE "0x%lx"
248 #define	GET_PTE(ptr)		(*(x86pte_t *)(ptr))
249 #define	SET_PTE(ptr, pte)	(*(x86pte_t *)(ptr) = pte)
250 #define	CAS_PTE(ptr, x, y)	atomic_cas_64(ptr, x, y)
251 
252 #elif defined(__i386)
253 
254 #define	IN_VA_HOLE(va)	(__lintzero)
255 
256 #define	FMT_PTE "0x%llx"
257 
258 /* on 32 bit kernels, 64 bit loads aren't atomic, use get_pte64() */
259 extern x86pte_t get_pte64(x86pte_t *ptr);
260 #define	GET_PTE(ptr)	(mmu.pae_hat ? get_pte64(ptr) : *(x86pte32_t *)(ptr))
261 #define	SET_PTE(ptr, pte)						\
262 	((mmu.pae_hat ? ((x86pte32_t *)(ptr))[1] = (pte >> 32) : 0),	\
263 	*(x86pte32_t *)(ptr) = pte)
264 #define	CAS_PTE(ptr, x, y)			\
265 	(mmu.pae_hat ? atomic_cas_64(ptr, x, y) :	\
266 	atomic_cas_32((uint32_t *)(ptr), (uint32_t)(x), (uint32_t)(y)))
267 
268 #endif	/* __i386 */
269 
270 /*
271  * Return a pointer to the pte entry at the given index within a page table.
272  */
273 #define	PT_INDEX_PTR(p, x) \
274 	((x86pte_t *)((uintptr_t)(p) + ((x) << mmu.pte_size_shift)))
275 
276 /*
277  * Return the physical address of the pte entry at the given index within a
278  * page table.
279  */
280 #define	PT_INDEX_PHYSADDR(p, x) \
281 	((paddr_t)(p) + ((x) << mmu.pte_size_shift))
282 
283 /*
284  * From pfn to bytes, careful not to lose bits on PAE.
285  */
286 #define	pfn_to_pa(pfn) (mmu_ptob((paddr_t)(pfn)))
287 
288 #ifdef __xpv
289 extern pfn_t pte2pfn(x86pte_t, level_t);
290 #endif
291 
292 extern struct hat_mmu_info mmu;
293 
294 #endif	/* _KERNEL */
295 
296 
297 #ifdef	__cplusplus
298 }
299 #endif
300 
301 #endif	/* _VM_HAT_PTE_H */
302