xref: /illumos-gate/usr/src/uts/i86pc/vm/hat_i86.h (revision 74ecdb5171c9f3673b9393b1a3dc6f3a65e93895)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2014 by Delphix. All rights reserved.
27  * Copyright 2018 Joyent, Inc.
28  */
29 
30 #ifndef	_VM_HAT_I86_H
31 #define	_VM_HAT_I86_H
32 
33 
34 #ifdef	__cplusplus
35 extern "C" {
36 #endif
37 
38 /*
39  * VM - Hardware Address Translation management.
40  *
41  * This file describes the contents of the x86_64 HAT data structures.
42  */
43 #include <sys/types.h>
44 #include <sys/t_lock.h>
45 #include <sys/cpuvar.h>
46 #include <sys/x_call.h>
47 #include <vm/seg.h>
48 #include <vm/page.h>
49 #include <sys/vmparam.h>
50 #include <sys/vm_machparam.h>
51 #include <sys/promif.h>
52 #include <vm/hat_pte.h>
53 #include <vm/htable.h>
54 #include <vm/hment.h>
55 
56 /*
57  * The essential data types involved:
58  *
59  * htable_t	- There is one of these for each page table and it is used
60  *		by the HAT to manage the page table.
61  *
62  * hment_t	- Links together multiple PTEs to a single page.
63  */
64 
65 /*
66  * Maximum number of per-CPU pagetable entries that we'll need to cache in the
67  * HAT. See the big theory statement in uts/i86pc/vm/hat_i86.c for more
68  * information.
69  */
70 #if defined(__xpv)
71 /*
72  * The Xen hypervisor does not use per-CPU pagetables (PCP). Define a single
73  * struct member for it at least to make life easier and not make the member
74  * conditional.
75  */
76 #define	MAX_COPIED_PTES	1
77 #else
78 /*
79  * The 64-bit kernel may have up to 512 PTEs present in it for a given process.
80  */
81 #define	MAX_COPIED_PTES	512
82 #endif	/* __xpv */
83 
84 #define	TOP_LEVEL(h)	(((h)->hat_max_level))
85 
86 /*
87  * The hat struct exists for each address space.
88  */
89 struct hat {
90 	kmutex_t	hat_mutex;
91 	struct as	*hat_as;
92 	uint_t		hat_stats;
93 	pgcnt_t		hat_pages_mapped[MAX_PAGE_LEVEL + 1];
94 	pgcnt_t		hat_ism_pgcnt;
95 	cpuset_t	hat_cpus;
96 	uint16_t	hat_flags;
97 	uint8_t		hat_max_level;	/* top level of this HAT */
98 	uint_t		hat_num_copied;	/* Actual num of hat_copied_ptes[] */
99 	htable_t	*hat_htable;	/* top level htable */
100 	struct hat	*hat_next;
101 	struct hat	*hat_prev;
102 	uint_t		hat_num_hash;	/* number of htable hash buckets */
103 	htable_t	**hat_ht_hash;	/* htable hash buckets */
104 	htable_t	*hat_ht_cached;	/* cached free htables */
105 	x86pte_t	hat_copied_ptes[MAX_COPIED_PTES];
106 #if defined(__amd64) && defined(__xpv)
107 	pfn_t		hat_user_ptable; /* alt top ptable for user mode */
108 #endif
109 };
110 typedef struct hat hat_t;
111 
112 #define	PGCNT_INC(hat, level)	\
113 	atomic_inc_ulong(&(hat)->hat_pages_mapped[level]);
114 #define	PGCNT_DEC(hat, level)	\
115 	atomic_dec_ulong(&(hat)->hat_pages_mapped[level]);
116 
117 /*
118  * Flags for the hat_flags field. For more information, please see the big
119  * theory statement on the HAT design in uts/i86pc/vm/hat_i86.c.
120  *
121  * HAT_FREEING - set when HAT is being destroyed - mostly used to detect that
122  *	demap()s can be avoided.
123  *
124  * HAT_COPIED - Indicates this HAT is a source for per-cpu page tables: see the
125  * 	big comment in hat_i86.c for a description.
126  *
127  * HAT_COPIED_32 - HAT_COPIED, but for an ILP32 process.
128  *
129  * HAT_VICTIM - This is set while a hat is being examined for page table
130  *	stealing and prevents it from being freed.
131  *
132  * HAT_SHARED - The hat has exported it's page tables via hat_share()
133  *
134  * HAT_PINNED - On the hypervisor, indicates the top page table has been pinned.
135  *
136  * HAT_PCP - Used for the per-cpu user page table (i.e. associated with a CPU,
137  *	not a process).
138  */
139 #define	HAT_FREEING	(0x0001)
140 #define	HAT_VICTIM	(0x0002)
141 #define	HAT_SHARED	(0x0004)
142 #define	HAT_PINNED	(0x0008)
143 #define	HAT_COPIED	(0x0010)
144 #define	HAT_COPIED_32	(0x0020)
145 #define	HAT_PCP		(0x0040)
146 
147 /*
148  * Additional platform attribute for hat_devload() to force no caching.
149  */
150 #define	HAT_PLAT_NOCACHE	(0x100000)
151 
152 /*
153  * Simple statistics for the HAT. These are just counters that are
154  * atomically incremented. They can be reset directly from the kernel
155  * debugger.
156  */
157 struct hatstats {
158 	ulong_t	hs_reap_attempts;
159 	ulong_t	hs_reaped;
160 	ulong_t	hs_steals;
161 	ulong_t	hs_ptable_allocs;
162 	ulong_t	hs_ptable_frees;
163 	ulong_t	hs_htable_rgets;	/* allocs from reserve */
164 	ulong_t	hs_htable_rputs;	/* putbacks to reserve */
165 	ulong_t	hs_htable_shared;	/* number of htables shared */
166 	ulong_t	hs_htable_unshared;	/* number of htables unshared */
167 	ulong_t	hs_hm_alloc;
168 	ulong_t	hs_hm_free;
169 	ulong_t	hs_hm_put_reserve;
170 	ulong_t	hs_hm_get_reserve;
171 	ulong_t	hs_hm_steals;
172 	ulong_t	hs_hm_steal_exam;
173 	ulong_t hs_tlb_inval_delayed;
174 	ulong_t hs_hat_copied64;
175 	ulong_t hs_hat_copied32;
176 	ulong_t hs_hat_normal64;
177 };
178 extern struct hatstats hatstat;
179 #ifdef DEBUG
180 #define	HATSTAT_INC(x)	(++hatstat.x)
181 #else
182 #define	HATSTAT_INC(x)	(0)
183 #endif
184 
185 #if defined(_KERNEL)
186 
187 /*
188  * Useful macro to align hat_XXX() address arguments to a page boundary
189  */
190 #define	ALIGN2PAGE(a)		((uintptr_t)(a) & MMU_PAGEMASK)
191 #define	IS_PAGEALIGNED(a)	(((uintptr_t)(a) & MMU_PAGEOFFSET) == 0)
192 
193 extern uint_t	khat_running;	/* set at end of hat_kern_setup() */
194 extern cpuset_t khat_cpuset;	/* cpuset for kernal address demap Xcalls */
195 extern kmutex_t hat_list_lock;
196 extern kcondvar_t hat_list_cv;
197 
198 
199 
200 /*
201  * Interfaces to setup a cpu private mapping (ie. preemption disabled).
202  * The attr and flags arguments are the same as for hat_devload().
203  * setup() must be called once, then any number of calls to remap(),
204  * followed by a final call to release()
205  *
206  * Used by ppcopy(), page_zero(), the memscrubber, and the kernel debugger.
207  */
208 typedef paddr_t hat_mempte_t;				/* phys addr of PTE */
209 extern hat_mempte_t hat_mempte_setup(caddr_t addr);
210 extern void hat_mempte_remap(pfn_t, caddr_t, hat_mempte_t,
211 	uint_t attr, uint_t flags);
212 extern void hat_mempte_release(caddr_t addr, hat_mempte_t);
213 
214 /*
215  * Interfaces to manage which thread has access to htable and hment reserves.
216  * The USE_HAT_RESERVES macro should always be recomputed in full. Its value
217  * (due to curthread) can change after any call into kmem/vmem.
218  */
219 extern uint_t can_steal_post_boot;
220 extern uint_t use_boot_reserve;
221 #define	USE_HAT_RESERVES()					\
222 	(use_boot_reserve || curthread->t_hatdepth > 1 ||	\
223 	panicstr != NULL || vmem_is_populator())
224 
225 /*
226  * initialization stuff needed by by startup, mp_startup...
227  */
228 extern void hat_cpu_online(struct cpu *);
229 extern void hat_cpu_offline(struct cpu *);
230 extern void setup_vaddr_for_ppcopy(struct cpu *);
231 extern void teardown_vaddr_for_ppcopy(struct cpu *);
232 extern void clear_boot_mappings(uintptr_t, uintptr_t);
233 
234 /*
235  * magic value to indicate that all TLB entries should be demapped.
236  */
237 #define	DEMAP_ALL_ADDR	(~(uintptr_t)0)
238 
239 /*
240  * not in any include file???
241  */
242 extern void halt(char *fmt);
243 
244 /*
245  * x86 specific routines for use online in setup or i86pc/vm files
246  */
247 extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size,
248 	caddr_t ekernelheap);
249 extern void hat_kern_setup(void);
250 extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags,
251 	x86pte_t old_pte, void *pte_ptr, boolean_t tlb);
252 extern void hat_init_finish(void);
253 extern caddr_t hat_kpm_pfn2va(pfn_t pfn);
254 extern pfn_t hat_kpm_va2pfn(caddr_t);
255 extern page_t *hat_kpm_vaddr2page(caddr_t);
256 extern uintptr_t hat_kernelbase(uintptr_t);
257 extern void hat_kmap_init(uintptr_t base, size_t len);
258 
259 extern hment_t *hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry);
260 
261 extern void mmu_calc_user_slots(void);
262 extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
263 extern void hat_switch(struct hat *hat);
264 
265 #define	TLB_RANGE_LEN(r)	((r)->tr_cnt << LEVEL_SHIFT((r)->tr_level))
266 
267 /*
268  * A range of virtual pages for purposes of demapping.
269  */
270 typedef struct tlb_range {
271 	uintptr_t tr_va; 	/* address of page */
272 	ulong_t	tr_cnt; 	/* number of pages in range */
273 	int8_t	tr_level; 	/* page table level */
274 } tlb_range_t;
275 
276 #if defined(__xpv)
277 
278 #define	XPV_DISALLOW_MIGRATE()	xen_block_migrate()
279 #define	XPV_ALLOW_MIGRATE()	xen_allow_migrate()
280 
281 #define	mmu_flush_tlb_page(va)	mmu_invlpg((caddr_t)va)
282 #define	mmu_flush_tlb_kpage(va)	mmu_invlpg((caddr_t)va)
283 
284 /*
285  * Interfaces to use around code that maps/unmaps grant table references.
286  */
287 extern void hat_prepare_mapping(hat_t *, caddr_t, uint64_t *);
288 extern void hat_release_mapping(hat_t *, caddr_t);
289 
290 #else
291 
292 #define	XPV_DISALLOW_MIGRATE()	/* nothing */
293 #define	XPV_ALLOW_MIGRATE()	/* nothing */
294 
295 #define	pfn_is_foreign(pfn)	__lintzero
296 
297 typedef enum flush_tlb_type {
298 	FLUSH_TLB_ALL = 1,
299 	FLUSH_TLB_NONGLOBAL = 2,
300 	FLUSH_TLB_RANGE = 3,
301 } flush_tlb_type_t;
302 
303 extern void mmu_flush_tlb(flush_tlb_type_t, tlb_range_t *);
304 extern void mmu_flush_tlb_kpage(uintptr_t);
305 extern void mmu_flush_tlb_page(uintptr_t);
306 
307 extern void hati_cpu_punchin(cpu_t *cpu, uintptr_t va, uint_t attrs);
308 
309 /*
310  * routines to deal with delayed TLB invalidations for idle CPUs
311  */
312 extern void tlb_going_idle(void);
313 extern void tlb_service(void);
314 
315 #endif /* !__xpv */
316 
317 #endif	/* _KERNEL */
318 
319 #ifdef	__cplusplus
320 }
321 #endif
322 
323 #endif	/* _VM_HAT_I86_H */
324