xref: /titanic_52/usr/src/uts/i86pc/vm/htable.h (revision 4e5fbfeda6c7dee3dd62538723087263e6de8e18)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef	_VM_HTABLE_H
27 #define	_VM_HTABLE_H
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #ifdef	__cplusplus
32 extern "C" {
33 #endif
34 
35 #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL)
36 #include <asm/htable.h>
37 #endif
38 
39 extern void atomic_andb(uint8_t *addr, uint8_t value);
40 extern void atomic_orb(uint8_t *addr, uint8_t value);
41 extern void atomic_inc16(uint16_t *addr);
42 extern void atomic_dec16(uint16_t *addr);
43 extern void mmu_tlbflush_entry(caddr_t addr);
44 
45 /*
46  * Each hardware page table has an htable_t describing it.
47  *
48  * We use a reference counter mechanism to detect when we can free an htable.
49  * In the implmentation the reference count is split into 2 separate counters:
50  *
51  *	ht_busy is a traditional reference count of uses of the htable pointer
52  *
53  *	ht_valid_cnt is a count of how references are implied by valid PTE/PTP
54  *	         entries in the pagetable
55  *
56  * ht_busy is only incremented by htable_lookup() or htable_create()
57  * while holding the appropriate hash_table mutex. While installing a new
58  * valid PTE or PTP, in order to increment ht_valid_cnt a thread must have
59  * done an htable_lookup() or htable_create() but not the htable_release yet.
60  *
61  * htable_release(), while holding the mutex, can know that if
62  * busy == 1 and valid_cnt == 0, the htable can be free'd.
63  *
64  * The fields have been ordered to make htable_lookup() fast. Hence,
65  * ht_hat, ht_vaddr, ht_level and ht_next need to be clustered together.
66  */
67 struct htable {
68 	struct htable	*ht_next;	/* forward link for hash table */
69 	struct hat	*ht_hat;	/* hat this mapping comes from */
70 	uintptr_t	ht_vaddr;	/* virt addr at start of this table */
71 	int8_t		ht_level;	/* page table level: 0=4K, 1=2M, ... */
72 	uint8_t		ht_flags;	/* see below */
73 	int16_t		ht_busy;	/* implements locking protocol */
74 	int16_t		ht_valid_cnt;	/* # of valid entries in this table */
75 	uint32_t	ht_lock_cnt;	/* # of locked entries in this table */
76 					/* never used for kernel hat */
77 	pfn_t		ht_pfn;		/* pfn of page of the pagetable */
78 	struct htable	*ht_prev;	/* backward link for hash table */
79 	struct htable	*ht_parent;	/* htable that points to this htable */
80 	struct htable	*ht_shares;	/* for HTABLE_SHARED_PFN only */
81 };
82 typedef struct htable htable_t;
83 
84 /*
85  * Flags values for htable ht_flags field:
86  *
87  * HTABLE_VLP - this is the top level htable of a VLP HAT.
88  *
89  * HTABLE_SHARED_PFN - this htable had its PFN assigned from sharing another
90  * 	htable. Used by hat_share() for ISM.
91  */
92 #define	HTABLE_VLP		(0x01)
93 #define	HTABLE_SHARED_PFN	(0x02)
94 
95 /*
96  * The htable hash table hashing function.  The 28 is so that high
97  * order bits are include in the hash index to skew the wrap
98  * around of addresses. Even though the hash buckets are stored per
99  * hat we include the value of hat pointer in the hash function so
100  * that the secondary hash for the htable mutex winds up begin different in
101  * every address space.
102  */
103 #define	HTABLE_HASH(hat, va, lvl)					\
104 	((((va) >> LEVEL_SHIFT(1)) + ((va) >> 28) + (lvl) +		\
105 	((uintptr_t)(hat) >> 4)) & ((hat)->hat_num_hash - 1))
106 
107 /*
108  * Each CPU gets a unique hat_cpu_info structure in cpu_hat_info.
109  */
110 struct hat_cpu_info {
111 	kmutex_t hci_mutex;		/* mutex to ensure sequential usage */
112 #if defined(__amd64)
113 	pfn_t	hci_vlp_pfn;		/* pfn of hci_vlp_l3ptes */
114 	x86pte_t *hci_vlp_l3ptes;	/* VLP Level==3 pagetable (top) */
115 	x86pte_t *hci_vlp_l2ptes;	/* VLP Level==2 pagetable */
116 #endif	/* __amd64 */
117 };
118 
119 
120 /*
121  * Compute the last page aligned VA mapped by an htable.
122  *
123  * Given a va and a level, compute the virtual address of the start of the
124  * next page at that level.
125  *
126  * XX64 - The check for the VA hole needs to be better generalized.
127  */
128 #define	HTABLE_NUM_PTES_PAE(ht)		\
129 	(((ht)->ht_flags & HTABLE_VLP) ? 4 : 512)
130 #if defined(__amd64)
131 #define	HTABLE_NUM_PTES(ht)	HTABLE_NUM_PTES_PAE(ht)
132 
133 #define	HTABLE_LAST_PAGE(ht)						\
134 	((ht)->ht_level == mmu.max_level ? ((uintptr_t)0UL - MMU_PAGESIZE) :\
135 	((ht)->ht_vaddr - MMU_PAGESIZE +				\
136 	((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level))))
137 
138 #define	NEXT_ENTRY_VA(va, l)	\
139 	((va & LEVEL_MASK(l)) + LEVEL_SIZE(l) == mmu.hole_start ?	\
140 	mmu.hole_end : (va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
141 
142 #elif defined(__i386)
143 
144 #define	HTABLE_NUM_PTES(ht)	(!mmu.pae_hat ? 1024 : HTABLE_NUM_PTES_PAE(ht))
145 
146 #define	HTABLE_LAST_PAGE(ht)	((ht)->ht_vaddr - MMU_PAGESIZE + \
147 	((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level)))
148 
149 #define	NEXT_ENTRY_VA(va, l) ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
150 
151 #endif
152 
153 #if defined(_KERNEL)
154 
155 /*
156  * initialization function called from hat_init()
157  */
158 extern void htable_init(void);
159 
160 /*
161  * Functions to lookup, or "lookup and create", the htable corresponding
162  * to the virtual address "vaddr"  in the "hat" at the given "level" of
163  * page tables. htable_lookup() may return NULL if no such entry exists.
164  *
165  * On return the given htable is marked busy (a shared lock) - this prevents
166  * the htable from being stolen or freed) until htable_release() is called.
167  *
168  * If kalloc_flag is set on an htable_create() we can't call kmem allocation
169  * routines for this htable, since it's for the kernel hat itself.
170  *
171  * htable_acquire() is used when an htable pointer has been extracted from
172  * an hment and we need to get a reference to the htable.
173  */
174 extern htable_t *htable_lookup(struct hat *hat, uintptr_t vaddr, level_t level);
175 extern htable_t *htable_create(struct hat *hat, uintptr_t vaddr, level_t level,
176 	htable_t *shared);
177 extern void htable_acquire(htable_t *);
178 
179 extern void htable_release(htable_t *ht);
180 extern void htable_destroy(htable_t *ht);
181 
182 /*
183  * Code to free all remaining htables for a hat. Called after the hat is no
184  * longer in use by any thread.
185  */
186 extern void htable_purge_hat(struct hat *hat);
187 
188 /*
189  * Find the htable, page table entry index, and PTE of the given virtual
190  * address.  If not found returns NULL. When found, returns the htable_t *,
191  * sets entry, and has a hold on the htable.
192  */
193 extern htable_t *htable_getpte(struct hat *, uintptr_t, uint_t *, x86pte_t *,
194 	level_t);
195 
196 /*
197  * Similar to hat_getpte(), except that this only succeeds if a valid
198  * page mapping is present.
199  */
200 extern htable_t *htable_getpage(struct hat *hat, uintptr_t va, uint_t *entry);
201 
202 /*
203  * Called to allocate initial/additional htables for reserve.
204  */
205 extern void htable_initial_reserve(uint_t);
206 extern void htable_reserve(uint_t);
207 
208 /*
209  * Used to readjust the htable reserve after the reserve list has been used.
210  * Also called after boot to release left over boot reserves.
211  */
212 extern void htable_adjust_reserve(void);
213 
214 /*
215  * Attach initial pagetables as htables
216  */
217 extern void htable_attach(struct hat *, uintptr_t, level_t, struct htable *,
218     pfn_t);
219 
220 /*
221  * return the number of pages mapped by a hat
222  */
223 extern pgcnt_t htable_count_pages(struct hat *);
224 
225 /*
226  * Routine to find the next populated htable at or above a given virtual
227  * address. Can specify an upper limit, or HTABLE_WALK_TO_END to indicate
228  * that it should search the entire address space.  Similar to
229  * hat_getpte(), but used for walking through address ranges. It can be
230  * used like this:
231  *
232  *	va = ...
233  *	ht = NULL;
234  *	while (va < end_va) {
235  *		pte = htable_walk(hat, &ht, &va, end_va);
236  *		if (!pte)
237  *			break;
238  *
239  *		... code to operate on page at va ...
240  *
241  *		va += LEVEL_SIZE(ht->ht_level);
242  *	}
243  *	if (ht)
244  *		htable_release(ht);
245  *
246  */
247 extern x86pte_t htable_walk(struct hat *hat, htable_t **ht, uintptr_t *va,
248 	uintptr_t eaddr);
249 
250 #define	HTABLE_WALK_TO_END ((uintptr_t)-1)
251 
252 /*
253  * Utilities convert between virtual addresses and page table entry indeces.
254  */
255 extern uint_t htable_va2entry(uintptr_t va, htable_t *ht);
256 extern uintptr_t htable_e2va(htable_t *ht, uint_t entry);
257 
258 /*
259  * Interfaces that provide access to page table entries via the htable.
260  *
261  * Note that all accesses except x86pte_copy() and x86pte_zero() are atomic.
262  */
263 extern void	x86pte_cpu_init(cpu_t *);
264 extern void	x86pte_cpu_fini(cpu_t *);
265 
266 extern x86pte_t	x86pte_get(htable_t *, uint_t entry);
267 
268 /*
269  * x86pte_set returns LPAGE_ERROR if it's asked to overwrite a page table
270  * link with a large page mapping.
271  */
272 #define	LPAGE_ERROR (-(x86pte_t)1)
273 extern x86pte_t	x86pte_set(htable_t *, uint_t entry, x86pte_t new, void *);
274 
275 extern x86pte_t x86pte_inval(htable_t *ht, uint_t entry,
276 	x86pte_t old, x86pte_t *ptr);
277 
278 extern x86pte_t x86pte_update(htable_t *ht, uint_t entry,
279 	x86pte_t old, x86pte_t new);
280 
281 extern void	x86pte_copy(htable_t *src, htable_t *dest, uint_t entry,
282 	uint_t cnt);
283 
284 /*
285  * access to a pagetable knowing only the pfn
286  */
287 extern x86pte_t *x86pte_mapin(pfn_t, uint_t, htable_t *);
288 extern void x86pte_mapout(void);
289 
290 /*
291  * these are actually inlines for "lock; incw", "lock; decw", etc. instructions.
292  */
293 #define	HTABLE_INC(x)	atomic_inc16((uint16_t *)&x)
294 #define	HTABLE_DEC(x)	atomic_dec16((uint16_t *)&x)
295 #define	HTABLE_LOCK_INC(ht)	atomic_add_32(&(ht)->ht_lock_cnt, 1)
296 #define	HTABLE_LOCK_DEC(ht)	atomic_add_32(&(ht)->ht_lock_cnt, -1)
297 
298 #endif	/* _KERNEL */
299 
300 
301 #ifdef	__cplusplus
302 }
303 #endif
304 
305 #endif	/* _VM_HTABLE_H */
306