xref: /illumos-gate/usr/src/uts/i86pc/vm/htable.h (revision e1a4a99e6f424cd8d62deb51dccd37f0406e7204)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_VM_HTABLE_H
28 #define	_VM_HTABLE_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #ifdef	__cplusplus
33 extern "C" {
34 #endif
35 
36 #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL)
37 #include <asm/htable.h>
38 #endif
39 
40 extern void atomic_andb(uint8_t *addr, uint8_t value);
41 extern void atomic_orb(uint8_t *addr, uint8_t value);
42 extern void atomic_inc16(uint16_t *addr);
43 extern void atomic_dec16(uint16_t *addr);
44 extern void mmu_tlbflush_entry(caddr_t addr);
45 
46 /*
47  * Each hardware page table has an htable_t describing it.
48  *
49  * We use a reference counter mechanism to detect when we can free an htable.
50  * In the implmentation the reference count is split into 2 separate counters:
51  *
52  *	ht_busy is a traditional reference count of uses of the htable pointer
53  *
54  *	ht_valid_cnt is a count of how references are implied by valid PTE/PTP
55  *	         entries in the pagetable
56  *
57  * ht_busy is only incremented by htable_lookup() or htable_create()
58  * while holding the appropriate hash_table mutex. While installing a new
59  * valid PTE or PTP, in order to increment ht_valid_cnt a thread must have
60  * done an htable_lookup() or htable_create() but not the htable_release yet.
61  *
62  * htable_release(), while holding the mutex, can know that if
63  * busy == 1 and valid_cnt == 0, the htable can be free'd.
64  *
65  * The fields have been ordered to make htable_lookup() fast. Hence,
66  * ht_hat, ht_vaddr, ht_level and ht_next need to be clustered together.
67  */
68 struct htable {
69 	struct htable	*ht_next;	/* forward link for hash table */
70 	struct hat	*ht_hat;	/* hat this mapping comes from */
71 	uintptr_t	ht_vaddr;	/* virt addr at start of this table */
72 	level_t		ht_level;	/* page table level: 0=4K, 1=2M, ... */
73 	uint16_t	ht_flags;	/* see below */
74 	int16_t		ht_busy;	/* implements locking protocol */
75 	uint16_t	ht_num_ptes;	/* # of PTEs in page table */
76 	int16_t		ht_valid_cnt;	/* # of valid entries in this table */
77 	uint32_t	ht_lock_cnt;	/* # of locked entries in this table */
78 					/* never used for kernel hat */
79 	pfn_t		ht_pfn;		/* pfn of page of the pagetable */
80 	struct htable	*ht_prev;	/* backward link for hash table */
81 	struct htable	*ht_parent;	/* htable that points to this htable */
82 	struct htable	*ht_shares;	/* for HTABLE_SHARED_PFN only */
83 };
84 typedef struct htable htable_t;
85 
86 /*
87  * Flags values for htable ht_flags field:
88  *
89  * HTABLE_VLP - this is the top level htable of a VLP HAT.
90  *
91  * HTABLE_SHARED_PFN - this htable had it's PFN assigned from sharing another
92  * 	htable. Used by hat_share() for ISM.
93  */
94 #define	HTABLE_VLP		(0x0001)
95 #define	HTABLE_SHARED_PFN	(0x0002)
96 
97 /*
98  * The htable hash table hashing function.  The 28 is so that high
99  * order bits are include in the hash index to skew the wrap
100  * around of addresses. Even though the hash buckets are stored per
101  * hat we include the value of hat pointer in the hash function so
102  * that the secondary hash for the htable mutex winds up begin different in
103  * every address space.
104  */
105 #define	HTABLE_HASH(hat, va, lvl)					\
106 	((((va) >> LEVEL_SHIFT(1)) + ((va) >> 28) + (lvl) +		\
107 	((uintptr_t)(hat) >> 4)) & ((hat)->hat_num_hash - 1))
108 
109 /*
110  * For 32 bit, access to page table entries is done via the page table's PFN and
111  * the index of the PTE. We use a CPU specific mapping (a la ppcopy) to map
112  * in page tables on an "as needed" basis.
113  *
114  * 64 bit kernels will use seg_kpm style mappings and avoid any overhead.
115  *
116  * The code uses compare and swap instructions to read/write PTE's to
117  * avoid atomicity problems, since PTEs can be 8 bytes on 32 bit systems.
118  * Again this can be optimized on 64 bit systems, since aligned load/store
119  * will naturally be atomic.
120  *
121  * Each CPU gets a unique hat_cpu_info structure in cpu_hat_info.
122  */
123 struct hat_cpu_info {
124 	pfn_t hci_mapped_pfn;		/* pfn of currently mapped page table */
125 	x86pte_t *hci_pagetable_va;	/* VA to use for mappings */
126 	x86pte_t *hci_kernel_pte;	/* kernel PTE for cpu_pagetable_va */
127 	kmutex_t hci_mutex;		/* mutex to ensure sequential usage */
128 #if defined(__amd64)
129 	pfn_t	hci_vlp_pfn;		/* pfn of hci_vlp_l3ptes */
130 	x86pte_t *hci_vlp_l3ptes;	/* VLP Level==3 pagetable (top) */
131 	x86pte_t *hci_vlp_l2ptes;	/* VLP Level==2 pagetable */
132 #endif	/* __amd64 */
133 };
134 
135 
136 /*
137  * Compute the last page aligned VA mapped by an htable.
138  *
139  * Given a va and a level, compute the virtual address of the start of the
140  * next page at that level.
141  *
142  * XX64 - The check for the VA hole needs to be better generalized.
143  */
144 #if defined(__amd64)
145 
146 #define	HTABLE_LAST_PAGE(ht)						\
147 	((ht)->ht_level == mmu.max_level ? ((uintptr_t)0UL - MMU_PAGESIZE) :\
148 	((ht)->ht_vaddr - MMU_PAGESIZE +				\
149 	((uintptr_t)((ht)->ht_num_ptes) << LEVEL_SHIFT((ht)->ht_level))))
150 
151 #define	NEXT_ENTRY_VA(va, l)	\
152 	((va & LEVEL_MASK(l)) + LEVEL_SIZE(l) == mmu.hole_start ?	\
153 	mmu.hole_end : (va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
154 
155 #elif defined(__i386)
156 
157 #define	HTABLE_LAST_PAGE(ht)	((ht)->ht_vaddr - MMU_PAGESIZE + \
158 	((uintptr_t)((ht)->ht_num_ptes) << LEVEL_SHIFT((ht)->ht_level)))
159 
160 #define	NEXT_ENTRY_VA(va, l) ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
161 
162 #endif
163 
164 #if defined(_KERNEL)
165 
166 /*
167  * initialization function called from hat_init()
168  */
169 extern void htable_init(void);
170 
171 /*
172  * Functions to lookup, or "lookup and create", the htable corresponding
173  * to the virtual address "vaddr"  in the "hat" at the given "level" of
174  * page tables. htable_lookup() may return NULL if no such entry exists.
175  *
176  * On return the given htable is marked busy (a shared lock) - this prevents
177  * the htable from being stolen or freed) until htable_release() is called.
178  *
179  * If kalloc_flag is set on an htable_create() we can't call kmem allocation
180  * routines for this htable, since it's for the kernel hat itself.
181  *
182  * htable_acquire() is used when an htable pointer has been extracted from
183  * an hment and we need to get a reference to the htable.
184  */
185 extern htable_t *htable_lookup(struct hat *hat, uintptr_t vaddr, level_t level);
186 extern htable_t *htable_create(struct hat *hat, uintptr_t vaddr, level_t level,
187 	htable_t *shared);
188 extern void htable_acquire(htable_t *);
189 
190 extern void htable_release(htable_t *ht);
191 
192 /*
193  * Code to free all remaining htables for a hat. Called after the hat is no
194  * longer in use by any thread.
195  */
196 extern void htable_purge_hat(struct hat *hat);
197 
198 /*
199  * Find the htable, page table entry index, and PTE of the given virtual
200  * address.  If not found returns NULL. When found, returns the htable_t *,
201  * sets entry, and has a hold on the htable.
202  */
203 extern htable_t *htable_getpte(struct hat *, uintptr_t, uint_t *, x86pte_t *,
204 	level_t);
205 
206 /*
207  * Similar to hat_getpte(), except that this only succeeds if a valid
208  * page mapping is present.
209  */
210 extern htable_t *htable_getpage(struct hat *hat, uintptr_t va, uint_t *entry);
211 
212 /*
213  * Called to allocate initial/additional htables for reserve.
214  */
215 extern void htable_initial_reserve(uint_t);
216 extern void htable_reserve(uint_t);
217 
218 /*
219  * Used to readjust the htable reserve after the reserve list has been used.
220  * Also called after boot to release left over boot reserves.
221  */
222 extern void htable_adjust_reserve(void);
223 
224 /*
225  * Routine to find the next populated htable at or above a given virtual
226  * address. Can specify an upper limit, or HTABLE_WALK_TO_END to indicate
227  * that it should search the entire address space.  Similar to
228  * hat_getpte(), but used for walking through address ranges. It can be
229  * used like this:
230  *
231  *	va = ...
232  *	ht = NULL;
233  *	while (va < end_va) {
234  *		pte = htable_walk(hat, &ht, &va, end_va);
235  *		if (!pte)
236  *			break;
237  *
238  *		... code to operate on page at va ...
239  *
240  *		va += LEVEL_SIZE(ht->ht_level);
241  *	}
242  *	if (ht)
243  *		htable_release(ht);
244  *
245  */
246 extern x86pte_t htable_walk(struct hat *hat, htable_t **ht, uintptr_t *va,
247 	uintptr_t eaddr);
248 
249 #define	HTABLE_WALK_TO_END ((uintptr_t)-1)
250 
251 /*
252  * Utilities convert between virtual addresses and page table entry indeces.
253  */
254 extern uint_t htable_va2entry(uintptr_t va, htable_t *ht);
255 extern uintptr_t htable_e2va(htable_t *ht, uint_t entry);
256 
257 /*
258  * Interfaces that provide access to page table entries via the htable.
259  *
260  * Note that all accesses except x86pte_copy() and x86pte_zero() are atomic.
261  */
262 extern void	x86pte_cpu_init(cpu_t *, void *);
263 
264 extern x86pte_t	x86pte_get(htable_t *, uint_t entry);
265 
266 extern x86pte_t	x86pte_set(htable_t *, uint_t entry, x86pte_t new, void *);
267 
268 extern x86pte_t x86pte_invalidate_pfn(htable_t *ht, uint_t entry, pfn_t pfn,
269 	void *pte_ptr);
270 
271 extern x86pte_t x86pte_update(htable_t *ht, uint_t entry,
272 	x86pte_t old, x86pte_t new);
273 
274 extern void	x86pte_copy(htable_t *src, htable_t *dest, uint_t entry,
275 	uint_t cnt);
276 
277 extern void	x86pte_zero(htable_t *ht, uint_t entry, uint_t cnt);
278 
279 
280 /*
281  * these are actually inlines for "lock; incw", "lock; decw", etc. instructions.
282  */
283 #define	HTABLE_INC(x)	atomic_inc16((uint16_t *)&x)
284 #define	HTABLE_DEC(x)	atomic_dec16((uint16_t *)&x)
285 #define	HTABLE_LOCK_INC(ht)	atomic_add_32(&(ht)->ht_lock_cnt, 1)
286 #define	HTABLE_LOCK_DEC(ht)	atomic_add_32(&(ht)->ht_lock_cnt, -1)
287 
288 #endif	/* _KERNEL */
289 
290 
291 #ifdef	__cplusplus
292 }
293 #endif
294 
295 #endif	/* _VM_HTABLE_H */
296