xref: /illumos-gate/usr/src/uts/i86pc/vm/hat_pte.h (revision 66582b606a8194f7f3ba5b3a3a6dca5b0d346361)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2018 Joyent, Inc.
25  */
26 
27 #ifndef	_VM_HAT_PTE_H
28 #define	_VM_HAT_PTE_H
29 
30 #ifdef	__cplusplus
31 extern "C" {
32 #endif
33 
34 #include <sys/types.h>
35 #include <sys/mach_mmu.h>
36 
37 /*
38  * macros to get/set/clear the PTE fields
39  */
40 #define	PTE_SET(p, f)	((p) |= (f))
41 #define	PTE_CLR(p, f)	((p) &= ~(x86pte_t)(f))
42 #define	PTE_GET(p, f)	((p) & (f))
43 
44 /*
45  * Handy macro to check if a pagetable entry or pointer is valid
46  */
47 #define	PTE_ISVALID(p)		PTE_GET(p, PT_VALID)
48 
49 /*
50  * Does a PTE map a large page.
51  */
52 #define	PTE_IS_LGPG(p, l)	((l) > 0 && PTE_GET((p), PT_PAGESIZE))
53 
54 /*
55  * does this PTE represent a page (not a pointer to another page table)?
56  */
57 #define	PTE_ISPAGE(p, l)	\
58 	(PTE_ISVALID(p) && ((l) == 0 || PTE_GET(p, PT_PAGESIZE)))
59 
60 /*
61  * Handy macro to check if 2 PTE's are the same - ignores REF/MOD bits.
62  * On the 64 bit hypervisor we also have to ignore the high order
63  * software bits and the global/user bit which are set/cleared
64  * capriciously (by the hypervisor!)
65  */
66 #if defined(__amd64) && defined(__xpv)
67 #define	PT_IGNORE	((0x7fful << 52) | PT_GLOBAL | PT_USER)
68 #else
69 #define	PT_IGNORE	(0)
70 #endif
71 #define	PTE_EQUIV(a, b)	 (((a) | (PT_IGNORE | PT_REF | PT_MOD)) == \
72 	((b) | (PT_IGNORE | PT_REF | PT_MOD)))
73 
74 /*
75  * Shorthand for converting a PTE to it's pfn.
76  */
77 #define	PTE2MFN(p, l)	\
78 	mmu_btop(PTE_GET((p), PTE_IS_LGPG((p), (l)) ? PT_PADDR_LGPG : PT_PADDR))
79 #ifdef __xpv
80 #define	PTE2PFN(p, l) pte2pfn(p, l)
81 #else
82 #define	PTE2PFN(p, l) PTE2MFN(p, l)
83 #endif
84 
85 #define	PT_NX		(0x8000000000000000ull)
86 #define	PT_PADDR	(0x000ffffffffff000ull)
87 #define	PT_PADDR_LGPG	(0x000fffffffffe000ull)	/* phys addr for large pages */
88 
89 /*
90  * Macros to create a PTP or PTE from the pfn and level
91  */
92 #ifdef __xpv
93 
94 /*
95  * we use the highest order bit in physical address pfns to mark foreign mfns
96  */
97 #ifdef _LP64
98 #define	PFN_IS_FOREIGN_MFN (1ul << 51)
99 #else
100 #define	PFN_IS_FOREIGN_MFN (1ul << 31)
101 #endif
102 
103 #define	MAKEPTP(pfn, l)	\
104 	(pa_to_ma(pfn_to_pa(pfn)) | mmu.ptp_bits[(l) + 1])
105 #define	MAKEPTE(pfn, l) \
106 	((pfn & PFN_IS_FOREIGN_MFN) ? \
107 	((pfn_to_pa(pfn & ~PFN_IS_FOREIGN_MFN) | mmu.pte_bits[l]) | \
108 	PT_FOREIGN | PT_REF | PT_MOD) : \
109 	(pa_to_ma(pfn_to_pa(pfn)) | mmu.pte_bits[l]))
110 #else
111 #define	MAKEPTP(pfn, l)	\
112 	(pfn_to_pa(pfn) | mmu.ptp_bits[(l) + 1])
113 #define	MAKEPTE(pfn, l)	\
114 	(pfn_to_pa(pfn) | mmu.pte_bits[l])
115 #endif
116 
117 /*
118  * The idea of "level" refers to the level where the page table is used in the
119  * the hardware address translation steps. The level values correspond to the
120  * following names of tables used in AMD/Intel architecture documents:
121  *
122  *	AMD/INTEL name		Level #
123  *	----------------------	-------
124  *	Page Map Level 4	   3
125  *	Page Directory Pointer	   2
126  *	Page Directory		   1
127  *	Page Table		   0
128  *
129  * The numbering scheme is such that the values of 0 and 1 can correspond to
130  * the pagesize codes used for MPSS support. For now the Maximum level at
131  * which you can have a large page is a constant, that may change in
132  * future processors.
133  *
134  * The type of "level_t" is signed so that it can be used like:
135  *	level_t	l;
136  *	...
137  *	while (--l >= 0)
138  *		...
139  */
140 #define	MAX_NUM_LEVEL		4
141 #define	MAX_PAGE_LEVEL		2
142 #define	MIN_PAGE_LEVEL		0
143 typedef	int8_t level_t;
144 #define	LEVEL_SHIFT(l)	(mmu.level_shift[l])
145 #define	LEVEL_SIZE(l)	(mmu.level_size[l])
146 #define	LEVEL_OFFSET(l)	(mmu.level_offset[l])
147 #define	LEVEL_MASK(l)	(mmu.level_mask[l])
148 
149 /*
150  * Macros to:
151  * Check for a PFN above 4Gig and 64Gig for 32 bit PAE support
152  */
153 #define	PFN_4G		(4ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
154 #define	PFN_64G		(64ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
155 #define	PFN_ABOVE4G(pfn) ((pfn) >= PFN_4G)
156 #define	PFN_ABOVE64G(pfn) ((pfn) >= PFN_64G)
157 
158 /*
159  * The CR3 register holds the physical address of the top level page table,
160  * along with the current PCID if any.
161  */
162 #define	MAKECR3(pfn, pcid)	(mmu_ptob(pfn) | pcid)
163 
164 /*
165  * HAT/MMU parameters that depend on kernel mode and/or processor type
166  */
167 struct htable;
168 struct hat_mmu_info {
169 	x86pte_t pt_nx;		/* either 0 or PT_NX */
170 	x86pte_t pt_global;	/* either 0 or PT_GLOBAL */
171 
172 	pfn_t highest_pfn;
173 
174 	uint_t num_level;	/* number of page table levels in use */
175 	uint_t max_level;	/* just num_level - 1 */
176 	uint_t max_page_level;	/* maximum level at which we can map a page */
177 	uint_t umax_page_level; /* max user page map level */
178 	uint_t ptes_per_table;	/* # of entries in lower level page tables */
179 	uint_t top_level_count;	/* # of entries in top-level page table */
180 	uint_t top_level_uslots; /* # of user slots in top-level page table */
181 	uint_t num_copied_ents;	/* # of PCP-copied PTEs to create */
182 	/* 32-bit versions of values */
183 	uint_t top_level_uslots32;
184 	uint_t max_level32;
185 	uint_t num_copied_ents32;
186 
187 	uint_t hash_cnt;	/* cnt of entries in htable_hash_cache */
188 	uint_t hat32_hash_cnt;	/* cnt of entries in 32-bit htable_hash_cache */
189 
190 	uint_t pae_hat;		/* either 0 or 1 */
191 
192 	uintptr_t hole_start;	/* start of VA hole (or -1 if none) */
193 	uintptr_t hole_end;	/* end of VA hole (or 0 if none) */
194 
195 	struct htable **kmap_htables; /* htables for segmap + 32 bit heap */
196 	x86pte_t *kmap_ptes;	/* mapping of pagetables that map kmap */
197 	uintptr_t kmap_addr;	/* start addr of kmap */
198 	uintptr_t kmap_eaddr;	/* end addr of kmap */
199 
200 	uint_t pte_size;	/* either 4 or 8 */
201 	uint_t pte_size_shift;	/* either 2 or 3 */
202 	x86pte_t ptp_bits[MAX_NUM_LEVEL];	/* bits set for interior PTP */
203 	x86pte_t pte_bits[MAX_NUM_LEVEL];	/* bits set for leaf PTE */
204 
205 	/*
206 	 * A range of VA used to window pages in the i86pc/vm code.
207 	 * See PWIN_XXX macros.
208 	 */
209 	caddr_t	pwin_base;
210 	caddr_t	pwin_pte_va;
211 	paddr_t	pwin_pte_pa;
212 
213 	/*
214 	 * The following tables are equivalent to PAGEXXXXX at different levels
215 	 * in the page table hierarchy.
216 	 */
217 	uint_t level_shift[MAX_NUM_LEVEL];	/* PAGESHIFT for given level */
218 	uintptr_t level_size[MAX_NUM_LEVEL];	/* PAGESIZE for given level */
219 	uintptr_t level_offset[MAX_NUM_LEVEL];	/* PAGEOFFSET for given level */
220 	uintptr_t level_mask[MAX_NUM_LEVEL];	/* PAGEMASK for given level */
221 };
222 
223 
224 #if defined(_KERNEL)
225 
226 /*
227  * Macros to access the HAT's private page windows. They're used for
228  * accessing pagetables, ppcopy() and page_zero().
229  * The 1st two macros are used to get an index for the particular use.
230  * The next three give you:
231  * - the virtual address of the window
232  * - the virtual address of the pte that maps the window
233  * - the physical address of the pte that map the window
234  */
235 #define	PWIN_TABLE(cpuid)	((cpuid) * 2)
236 #define	PWIN_SRC(cpuid)		((cpuid) * 2 + 1)	/* for x86pte_copy() */
237 #define	PWIN_VA(x)		(mmu.pwin_base + ((x) << MMU_PAGESHIFT))
238 #define	PWIN_PTE_VA(x)		(mmu.pwin_pte_va + ((x) << mmu.pte_size_shift))
239 #define	PWIN_PTE_PA(x)		(mmu.pwin_pte_pa + ((x) << mmu.pte_size_shift))
240 
241 /*
242  * The concept of a VA hole exists in AMD64. This might need to be made
243  * model specific eventually.
244  *
245  * In the 64 bit kernel PTE loads are atomic, but need atomic_cas_64 on 32
246  * bit kernel.
247  */
248 #if defined(__amd64)
249 
250 #ifdef lint
251 #define	IN_VA_HOLE(va)	(__lintzero)
252 #else
253 #define	IN_VA_HOLE(va)	(mmu.hole_start <= (va) && (va) < mmu.hole_end)
254 #endif
255 
256 #define	FMT_PTE "0x%lx"
257 #define	GET_PTE(ptr)		(*(x86pte_t *)(ptr))
258 #define	SET_PTE(ptr, pte)	(*(x86pte_t *)(ptr) = pte)
259 #define	CAS_PTE(ptr, x, y)	atomic_cas_64(ptr, x, y)
260 
261 #elif defined(__i386)
262 
263 #define	IN_VA_HOLE(va)	(__lintzero)
264 
265 #define	FMT_PTE "0x%llx"
266 
267 /* on 32 bit kernels, 64 bit loads aren't atomic, use get_pte64() */
268 extern x86pte_t get_pte64(x86pte_t *ptr);
269 #define	GET_PTE(ptr)	(mmu.pae_hat ? get_pte64(ptr) : *(x86pte32_t *)(ptr))
270 #define	SET_PTE(ptr, pte)						\
271 	((mmu.pae_hat ? ((x86pte32_t *)(ptr))[1] = (pte >> 32) : 0),	\
272 	*(x86pte32_t *)(ptr) = pte)
273 #define	CAS_PTE(ptr, x, y)			\
274 	(mmu.pae_hat ? atomic_cas_64(ptr, x, y) :	\
275 	atomic_cas_32((uint32_t *)(ptr), (uint32_t)(x), (uint32_t)(y)))
276 
277 #endif	/* __i386 */
278 
279 /*
280  * Return a pointer to the pte entry at the given index within a page table.
281  */
282 #define	PT_INDEX_PTR(p, x) \
283 	((x86pte_t *)((uintptr_t)(p) + ((x) << mmu.pte_size_shift)))
284 
285 /*
286  * Return the physical address of the pte entry at the given index within a
287  * page table.
288  */
289 #define	PT_INDEX_PHYSADDR(p, x) \
290 	((paddr_t)(p) + ((x) << mmu.pte_size_shift))
291 
292 /*
293  * From pfn to bytes, careful not to lose bits on PAE.
294  */
295 #define	pfn_to_pa(pfn) (mmu_ptob((paddr_t)(pfn)))
296 
297 #ifdef __xpv
298 extern pfn_t pte2pfn(x86pte_t, level_t);
299 #endif
300 
301 extern struct hat_mmu_info mmu;
302 
303 #endif	/* _KERNEL */
304 
305 
306 #ifdef	__cplusplus
307 }
308 #endif
309 
310 #endif	/* _VM_HAT_PTE_H */
311