xref: /illumos-gate/usr/src/uts/i86pc/vm/i86_mmu.c (revision 6def3553daaea99d3558cb94db34178e1617bfe4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/t_lock.h>
29 #include <sys/memlist.h>
30 #include <sys/cpuvar.h>
31 #include <sys/vmem.h>
32 #include <sys/mman.h>
33 #include <sys/vm.h>
34 #include <sys/kmem.h>
35 #include <sys/cmn_err.h>
36 #include <sys/debug.h>
37 #include <sys/vm_machparam.h>
38 #include <sys/tss.h>
39 #include <sys/vnode.h>
40 #include <vm/hat.h>
41 #include <vm/anon.h>
42 #include <vm/as.h>
43 #include <vm/page.h>
44 #include <vm/seg.h>
45 #include <vm/seg_kmem.h>
46 #include <vm/seg_map.h>
47 #include <vm/hat_i86.h>
48 #include <sys/promif.h>
49 #include <sys/x86_archext.h>
50 #include <sys/systm.h>
51 #include <sys/archsystm.h>
52 #include <sys/sunddi.h>
53 #include <sys/ddidmareq.h>
54 #include <sys/controlregs.h>
55 #include <sys/reboot.h>
56 #include <sys/kdi.h>
57 #include <sys/bootconf.h>
58 #include <sys/bootsvcs.h>
59 #include <sys/bootinfo.h>
60 #include <vm/kboot_mmu.h>
61 
62 caddr_t
63 i86devmap(pfn_t pf, pgcnt_t pgcnt, uint_t prot)
64 {
65 	caddr_t addr;
66 	caddr_t addr1;
67 	page_t *pp;
68 
69 	addr1 = addr = vmem_alloc(heap_arena, mmu_ptob(pgcnt), VM_SLEEP);
70 
71 	for (; pgcnt != 0; addr += MMU_PAGESIZE, ++pf, --pgcnt) {
72 		pp = page_numtopp_nolock(pf);
73 		if (pp == NULL) {
74 			hat_devload(kas.a_hat, addr, MMU_PAGESIZE, pf,
75 			    prot | HAT_NOSYNC, HAT_LOAD_LOCK);
76 		} else {
77 			hat_memload(kas.a_hat, addr, pp,
78 			    prot | HAT_NOSYNC, HAT_LOAD_LOCK);
79 		}
80 	}
81 
82 	return (addr1);
83 }
84 
85 /*
86  * This routine is like page_numtopp, but accepts only free pages, which
87  * it allocates (unfrees) and returns with the exclusive lock held.
88  * It is used by machdep.c/dma_init() to find contiguous free pages.
89  *
90  * XXX this and some others should probably be in vm_machdep.c
91  */
92 page_t *
93 page_numtopp_alloc(pfn_t pfnum)
94 {
95 	page_t *pp;
96 
97 retry:
98 	pp = page_numtopp_nolock(pfnum);
99 	if (pp == NULL) {
100 		return (NULL);
101 	}
102 
103 	if (!page_trylock(pp, SE_EXCL)) {
104 		return (NULL);
105 	}
106 
107 	if (page_pptonum(pp) != pfnum) {
108 		page_unlock(pp);
109 		goto retry;
110 	}
111 
112 	if (!PP_ISFREE(pp)) {
113 		page_unlock(pp);
114 		return (NULL);
115 	}
116 	if (pp->p_szc) {
117 		page_demote_free_pages(pp);
118 		page_unlock(pp);
119 		goto retry;
120 	}
121 
122 	/* If associated with a vnode, destroy mappings */
123 
124 	if (pp->p_vnode) {
125 
126 		page_destroy_free(pp);
127 
128 		if (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_NO_RECLAIM)) {
129 			return (NULL);
130 		}
131 
132 		if (page_pptonum(pp) != pfnum) {
133 			page_unlock(pp);
134 			goto retry;
135 		}
136 	}
137 
138 	if (!PP_ISFREE(pp) || !page_reclaim(pp, (kmutex_t *)NULL)) {
139 		page_unlock(pp);
140 		return (NULL);
141 	}
142 
143 	return (pp);
144 }
145 
146 /*
147  * Flag is not set early in boot. Once it is set we are no longer
148  * using boot's page tables.
149  */
150 uint_t khat_running = 0;
151 
152 /*
153  * This procedure is callable only while the boot loader is in charge of the
154  * MMU. It assumes that PA == VA for page table pointers.  It doesn't live in
155  * kboot_mmu.c since it's used from common code.
156  */
157 pfn_t
158 va_to_pfn(void *vaddr)
159 {
160 	uintptr_t	des_va = ALIGN2PAGE(vaddr);
161 	uintptr_t	va = des_va;
162 	size_t		len;
163 	uint_t		prot;
164 	pfn_t		pfn;
165 
166 	if (khat_running)
167 		panic("va_to_pfn(): called too late\n");
168 
169 	if (kbm_probe(&va, &len, &pfn, &prot) == 0)
170 		return (PFN_INVALID);
171 	if (va > des_va)
172 		return (PFN_INVALID);
173 	if (va < des_va)
174 		pfn += mmu_btop(des_va - va);
175 	return (pfn);
176 }
177 
178 /*
179  * Initialize a special area in the kernel that always holds some PTEs for
180  * faster performance. This always holds segmap's PTEs.
181  * In the 32 bit kernel this maps the kernel heap too.
182  */
183 void
184 hat_kmap_init(uintptr_t base, size_t len)
185 {
186 	uintptr_t map_addr;	/* base rounded down to large page size */
187 	uintptr_t map_eaddr;	/* base + len rounded up */
188 	size_t map_len;
189 	caddr_t ptes;		/* mapping area in kernel for kmap ptes */
190 	size_t window_size;	/* size of mapping area for ptes */
191 	ulong_t htable_cnt;	/* # of page tables to cover map_len */
192 	ulong_t i;
193 	htable_t *ht;
194 	uintptr_t va;
195 
196 	/*
197 	 * We have to map in an area that matches an entire page table.
198 	 */
199 	map_addr = base & LEVEL_MASK(1);
200 	map_eaddr = (base + len + LEVEL_SIZE(1) - 1) & LEVEL_MASK(1);
201 	map_len = map_eaddr - map_addr;
202 	window_size = mmu_btop(map_len) * mmu.pte_size;
203 	window_size = (window_size + LEVEL_SIZE(1)) & LEVEL_MASK(1);
204 	htable_cnt = map_len >> LEVEL_SHIFT(1);
205 
206 	/*
207 	 * allocate vmem for the kmap_ptes
208 	 */
209 	ptes = vmem_xalloc(heap_arena, window_size, LEVEL_SIZE(1), 0,
210 	    0, NULL, NULL, VM_SLEEP);
211 	mmu.kmap_htables =
212 	    kmem_alloc(htable_cnt * sizeof (htable_t *), KM_SLEEP);
213 
214 	/*
215 	 * Map the page tables that cover kmap into the allocated range.
216 	 * Note we don't ever htable_release() the kmap page tables - they
217 	 * can't ever be stolen, freed, etc.
218 	 */
219 	for (va = map_addr, i = 0; i < htable_cnt; va += LEVEL_SIZE(1), ++i) {
220 		ht = htable_create(kas.a_hat, va, 0, NULL);
221 		if (ht == NULL)
222 			panic("hat_kmap_init: ht == NULL");
223 		mmu.kmap_htables[i] = ht;
224 
225 		hat_devload(kas.a_hat, ptes + i * MMU_PAGESIZE,
226 		    MMU_PAGESIZE, ht->ht_pfn,
227 		    PROT_READ | PROT_WRITE | HAT_NOSYNC | HAT_UNORDERED_OK,
228 		    HAT_LOAD | HAT_LOAD_NOCONSIST);
229 	}
230 
231 	/*
232 	 * set information in mmu to activate handling of kmap
233 	 */
234 	mmu.kmap_addr = map_addr;
235 	mmu.kmap_eaddr = map_eaddr;
236 	mmu.kmap_ptes = (x86pte_t *)ptes;
237 }
238 
239 extern caddr_t	kpm_vbase;
240 extern size_t	kpm_size;
241 
242 /*
243  * Routine to pre-allocate data structures for hat_kern_setup(). It computes
244  * how many pagetables it needs by walking the boot loader's page tables.
245  */
246 /*ARGSUSED*/
247 void
248 hat_kern_alloc(
249 	caddr_t	segmap_base,
250 	size_t	segmap_size,
251 	caddr_t	ekernelheap)
252 {
253 	uintptr_t	last_va = (uintptr_t)-1;	/* catch 1st time */
254 	uintptr_t	va = 0;
255 	size_t		size;
256 	pfn_t		pfn;
257 	uint_t		prot;
258 	uint_t		table_cnt = 1;
259 	uint_t		mapping_cnt;
260 	level_t		start_level;
261 	level_t		l;
262 	struct memlist	*pmem;
263 	level_t		lpagel = mmu.max_page_level;
264 	uint64_t	paddr;
265 	int64_t		psize;
266 
267 
268 	if (kpm_size > 0) {
269 		/*
270 		 * Create the kpm page tables.
271 		 */
272 		for (pmem = phys_install; pmem; pmem = pmem->next) {
273 			paddr = pmem->address;
274 			psize = pmem->size;
275 			while (psize >= MMU_PAGESIZE) {
276 				if ((paddr & LEVEL_OFFSET(lpagel)) == 0 &&
277 				    psize > LEVEL_SIZE(lpagel))
278 					l = lpagel;
279 				else
280 					l = 0;
281 				kbm_map((uintptr_t)kpm_vbase + paddr, paddr,
282 				    l, 1);
283 				paddr += LEVEL_SIZE(l);
284 				psize -= LEVEL_SIZE(l);
285 			}
286 		}
287 	} else {
288 		/*
289 		 * Create the page windows and 1 page of VA in
290 		 * which we map the PTEs of those windows.
291 		 */
292 		mmu.pwin_base = vmem_xalloc(heap_arena, 2 * NCPU * MMU_PAGESIZE,
293 		    LEVEL_SIZE(1), 0, 0, NULL, NULL, VM_SLEEP);
294 		ASSERT(NCPU * 2 <= MMU_PAGESIZE / mmu.pte_size);
295 		mmu.pwin_pte_va = vmem_xalloc(heap_arena, MMU_PAGESIZE,
296 		    MMU_PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP);
297 
298 		/*
299 		 * Find/Create the page table window mappings.
300 		 */
301 		paddr = 0;
302 		(void) find_pte((uintptr_t)mmu.pwin_base, &paddr, 0, 0);
303 		ASSERT(paddr != 0);
304 		ASSERT((paddr & MMU_PAGEOFFSET) == 0);
305 		mmu.pwin_pte_pa = paddr;
306 		kbm_map((uintptr_t)mmu.pwin_pte_va, mmu.pwin_pte_pa, 0, 1);
307 	}
308 
309 	/*
310 	 * Walk the boot loader's page tables and figure out
311 	 * how many tables and page mappings there will be.
312 	 */
313 	while (kbm_probe(&va, &size, &pfn, &prot) != 0) {
314 		/*
315 		 * At each level, if the last_va falls into a new htable,
316 		 * increment table_cnt. We can stop at the 1st level where
317 		 * they are in the same htable.
318 		 */
319 		if (size == MMU_PAGESIZE)
320 			start_level = 0;
321 		else
322 			start_level = 1;
323 
324 		for (l = start_level; l < mmu.max_level; ++l) {
325 			if (va >> LEVEL_SHIFT(l + 1) ==
326 			    last_va >> LEVEL_SHIFT(l + 1))
327 				break;
328 			++table_cnt;
329 		}
330 		last_va = va;
331 		va = (va & LEVEL_MASK(1)) + LEVEL_SIZE(1);
332 	}
333 
334 	/*
335 	 * Besides the boot loader mappings, we're going to fill in
336 	 * the entire top level page table for the kernel. Make sure there's
337 	 * enough reserve for that too.
338 	 */
339 	table_cnt += mmu.top_level_count - ((kernelbase >>
340 	    LEVEL_SHIFT(mmu.max_level)) & (mmu.top_level_count - 1));
341 
342 #if defined(__i386)
343 	/*
344 	 * The 32 bit PAE hat allocates tables one level below the top when
345 	 * kernelbase isn't 1 Gig aligned. We'll just be sloppy and allocate
346 	 * a bunch more to the reserve. Any unused will be returned later.
347 	 * Note we've already counted these mappings, just not the extra
348 	 * pagetables.
349 	 */
350 	if (mmu.pae_hat != 0 && (kernelbase & LEVEL_OFFSET(mmu.max_level)) != 0)
351 		table_cnt += mmu.ptes_per_table -
352 		    ((kernelbase & LEVEL_OFFSET(mmu.max_level)) >>
353 		    LEVEL_SHIFT(mmu.max_level - 1));
354 #endif
355 
356 	/*
357 	 * Add 1/4 more into table_cnt for extra slop.  The unused
358 	 * slop is freed back when we htable_adjust_reserve() later.
359 	 */
360 	table_cnt += table_cnt >> 2;
361 
362 	/*
363 	 * We only need mapping entries (hments) for shared pages.
364 	 * This should be far, far fewer than the total possible,
365 	 * We'll allocate enough for 1/16 of all possible PTEs.
366 	 */
367 	mapping_cnt = (table_cnt * mmu.ptes_per_table) >> 4;
368 
369 	/*
370 	 * Now create the initial htable/hment reserves
371 	 */
372 	htable_initial_reserve(table_cnt);
373 	hment_reserve(mapping_cnt);
374 	x86pte_cpu_init(CPU);
375 }
376 
377 
378 /*
379  * This routine handles the work of creating the kernel's initial mappings
380  * by deciphering the mappings in the page tables created by the boot program.
381  *
382  * We maintain large page mappings, but only to a level 1 pagesize.
383  * The boot loader can only add new mappings once this function starts.
384  * In particular it can not change the pagesize used for any existing
385  * mappings or this code breaks!
386  */
387 
388 void
389 hat_kern_setup(void)
390 {
391 	/*
392 	 * Attach htables to the existing pagetables
393 	 */
394 	htable_attach(kas.a_hat, 0, mmu.max_level, NULL,
395 	    mmu_btop(getcr3()));
396 
397 #if defined(__i386)
398 	CPU->cpu_tss->tss_cr3 = dftss0.tss_cr3 = getcr3();
399 #endif /* __i386 */
400 
401 	/*
402 	 * The kernel HAT is now officially open for business.
403 	 */
404 	khat_running = 1;
405 
406 	CPUSET_ATOMIC_ADD(kas.a_hat->hat_cpus, CPU->cpu_id);
407 	CPU->cpu_current_hat = kas.a_hat;
408 }
409