xref: /titanic_41/usr/src/uts/i86xpv/os/xen_mmu.c (revision 275c9da86e89f8abf71135cf63d9fc23671b2e60)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/mach_mmu.h>
30 #include <sys/machsystm.h>
31 #include <sys/cmn_err.h>
32 #include <sys/promif.h>
33 #include <sys/hypervisor.h>
34 #include <sys/bootconf.h>
35 #include <sys/ontrap.h>
36 #include <sys/rwlock.h>
37 #include <sys/sysmacros.h>
38 #include <vm/seg_kmem.h>
39 #include <vm/kboot_mmu.h>
40 #include <vm/hat_pte.h>
41 #include <vm/hat.h>
42 #include <vm/htable.h>
43 #include <vm/hat_i86.h>
44 
45 start_info_t *xen_info;
46 ulong_t mfn_count;
47 mfn_t *mfn_list;
48 mfn_t *mfn_list_pages;		/* pages that make a table of mfn's */
49 				/* that make up the pa_to_ma table */
50 mfn_t *mfn_list_pages_page;	/* page of mfn's for mfn_list_pages */
51 mfn_t cached_max_mfn;
52 uintptr_t xen_virt_start;
53 pfn_t *mfn_to_pfn_mapping;
54 caddr_t xb_addr;		/* virtual addr for the store_mfn page */
55 
56 
57 /*
58  * We need to prevent migration or suspension of a domU while it's
59  * manipulating MFN values, as the MFN values will spontaneously
60  * change. The next 4 routines provide a mechanism for that.
61  * The basic idea is to use reader/writer mutex, readers are any thread
62  * that is manipulating MFNs. Only the thread which is going to actually call
63  * HYPERVISOR_suspend() will become a writer.
64  *
65  * Since various places need to manipulate MFNs and also call the HAT,
66  * we track if a thread acquires reader status and allow it to recursively
67  * do so again. This prevents deadlocks if a migration request
68  * is started and waits for some reader, but then the previous reader needs
69  * to call into the HAT.
70  */
71 #define	NUM_M2P_LOCKS 128
72 static struct {
73 	krwlock_t m2p_rwlock;
74 	char m2p_pad[64 - sizeof (krwlock_t)];	/* 64 byte cache line size */
75 } m2p_lock[NUM_M2P_LOCKS];
76 
77 #define	XM2P_HASH	((uintptr_t)curthread->t_tid & (NUM_M2P_LOCKS - 1))
78 
79 void
80 xen_block_migrate(void)
81 {
82 	if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
83 	    ++curthread->t_xpvcntr == 1)
84 		rw_enter(&m2p_lock[XM2P_HASH].m2p_rwlock, RW_READER);
85 }
86 
87 void
88 xen_allow_migrate(void)
89 {
90 	if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
91 	    --curthread->t_xpvcntr == 0)
92 		rw_exit(&m2p_lock[XM2P_HASH].m2p_rwlock);
93 }
94 
95 void
96 xen_start_migrate(void)
97 {
98 	int i;
99 
100 	ASSERT(curthread->t_xpvcntr == 0);
101 	++curthread->t_xpvcntr; /* this allows calls into HAT */
102 	for (i = 0; i < NUM_M2P_LOCKS; ++i)
103 		rw_enter(&m2p_lock[i].m2p_rwlock, RW_WRITER);
104 }
105 
106 void
107 xen_end_migrate(void)
108 {
109 	int i;
110 
111 	for (i = 0; i < NUM_M2P_LOCKS; ++i)
112 		rw_exit(&m2p_lock[i].m2p_rwlock);
113 	ASSERT(curthread->t_xpvcntr == 1);
114 	--curthread->t_xpvcntr;
115 }
116 
117 /*ARGSUSED*/
118 void
119 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
120 {
121 	mmu_update_t t;
122 	maddr_t mtable = pa_to_ma(table);
123 	int retcnt;
124 
125 	t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
126 	t.val = pteval;
127 	if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
128 		bop_panic("HYPERVISOR_mmu_update() failed");
129 }
130 
131 /*
132  * The start_info_t and mfn_list are initially mapped in low "boot" memory.
133  * Each has a page aligned address and size. We relocate them up into the
134  * kernel's normal address space at this point in time. We also create
135  * the arrays that let the hypervisor suspend/resume a domain.
136  */
137 void
138 xen_relocate_start_info(void)
139 {
140 	maddr_t mach_addr;
141 	size_t sz;
142 	size_t sz2;
143 	offset_t off;
144 	uintptr_t addr;
145 	uintptr_t old;
146 	int i, j;
147 
148 	/*
149 	 * In dom0, we have to account for the console_info structure
150 	 * which might immediately follow the start_info in memory.
151 	 */
152 	sz = sizeof (start_info_t);
153 	if (DOMAIN_IS_INITDOMAIN(xen_info) &&
154 	    xen_info->console.dom0.info_off >= sizeof (start_info_t)) {
155 		sz += xen_info->console.dom0.info_off - sizeof (start_info_t) +
156 		    xen_info->console.dom0.info_size;
157 	}
158 	sz = P2ROUNDUP(sz, MMU_PAGESIZE);
159 	addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
160 	for (off = 0; off < sz; off += MMU_PAGESIZE) {
161 		mach_addr = pa_to_ma(pfn_to_pa(va_to_pfn(
162 		    (caddr_t)xen_info + off)));
163 		kbm_map_ma(mach_addr + off, addr + off, 0);
164 	}
165 	boot_mapin((caddr_t)addr, sz);
166 	old = (uintptr_t)xen_info;
167 	xen_info = (start_info_t *)addr;
168 	for (off = 0; off < sz; off += MMU_PAGESIZE)
169 		kbm_unmap(old + off);
170 
171 	/*
172 	 * Relocate the mfn_list, any number of pages.
173 	 */
174 	sz = P2ROUNDUP(mfn_count * sizeof (mfn_t), MMU_PAGESIZE);
175 	addr = (uintptr_t)vmem_xalloc(heap_arena, sz, MMU_PAGESIZE, 0,
176 	    0, 0, 0, VM_SLEEP);
177 	for (off = 0; off < sz; off += MMU_PAGESIZE) {
178 		mach_addr =
179 		    pa_to_ma(pfn_to_pa(va_to_pfn((caddr_t)mfn_list + off)));
180 		kbm_map_ma(mach_addr, addr + off, 0);
181 	}
182 	boot_mapin((caddr_t)addr, sz);
183 	old = (uintptr_t)mfn_list;
184 	mfn_list = (mfn_t *)addr;
185 	xen_info->mfn_list = (mfn_t)addr;
186 	for (off = 0; off < sz; off += MMU_PAGESIZE)
187 		kbm_unmap(old + off);
188 
189 	/*
190 	 * Create the lists of mfn_list pages needed by suspend/resume.
191 	 * Note we skip this for domain 0 as it can't suspend/resume.
192 	 */
193 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
194 		sz2 = P2ROUNDUP(mmu_btop(sz) * sizeof (mfn_t), MMU_PAGESIZE);
195 		mfn_list_pages = kmem_zalloc(sz2, VM_SLEEP);
196 		mfn_list_pages_page = kmem_zalloc(MMU_PAGESIZE, VM_SLEEP);
197 		i = 0;
198 		for (off = 0; off < sz; off += MMU_PAGESIZE) {
199 			j = mmu_btop(off);
200 			if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
201 				mfn_list_pages_page[i++] =
202 				    pfn_to_mfn(va_to_pfn(&mfn_list_pages[j]));
203 			}
204 			mfn_list_pages[j] =
205 			    pfn_to_mfn(va_to_pfn((caddr_t)mfn_list + off));
206 		}
207 		HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
208 		    pfn_to_mfn(va_to_pfn(mfn_list_pages_page));
209 		HYPERVISOR_shared_info->arch.max_pfn = xen_info->nr_pages;
210 	}
211 
212 	/*
213 	 * Remap the shared info (for I/O) into high memory, too.
214 	 */
215 	sz = MMU_PAGESIZE;
216 	addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
217 	kbm_map_ma(xen_info->shared_info, addr, 0);
218 	/* shared info has no PFN so don't do: boot_mapin((caddr_t)addr, sz) */
219 	old = (uintptr_t)HYPERVISOR_shared_info;
220 	HYPERVISOR_shared_info = (void *)addr;
221 	kbm_unmap(old);
222 
223 	/*
224 	 * Remap the console info into high memory, too.
225 	 */
226 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
227 		sz = MMU_PAGESIZE;
228 		addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
229 		kbm_map_ma(pfn_to_pa(xen_info->console.domU.mfn), addr, 0);
230 		boot_mapin((caddr_t)addr, sz);
231 		old = (uintptr_t)HYPERVISOR_console_page;
232 		HYPERVISOR_console_page = (void *)addr;
233 		kbm_unmap(old);
234 	} else {
235 		HYPERVISOR_console_page = NULL;
236 	}
237 
238 	/*
239 	 * On domUs we need to have the xenbus page (store_mfn) mapped into
240 	 * the kernel. This is referenced as xb_addr.
241 	 */
242 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
243 		xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
244 		kbm_map_ma(mfn_to_ma(xen_info->store_mfn),
245 		    (uintptr_t)xb_addr, 0);
246 		boot_mapin(xb_addr, MMU_PAGESIZE);
247 	}
248 }
249 
250 /*
251  * Generate the pfn value to use for a foreign mfn.
252  */
253 pfn_t
254 xen_assign_pfn(mfn_t mfn)
255 {
256 	pfn_t pfn;
257 
258 #ifdef DEBUG
259 	/*
260 	 * make sure this MFN isn't in our list of MFNs
261 	 */
262 	on_trap_data_t otd;
263 	uint_t	on_trap_ready = (t0.t_stk != NULL);
264 
265 	if (on_trap_ready) {
266 		if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
267 			pfn = mfn_to_pfn_mapping[mfn];
268 			if (pfn < mfn_count && mfn_list[pfn] == mfn)
269 				panic("xen_assign_pfn() mfn belongs to us");
270 		}
271 		no_trap();
272 	}
273 #endif /* DEBUG */
274 
275 	if (mfn == MFN_INVALID)
276 		panic("xen_assign_pfn(MFN_INVALID) not allowed");
277 	pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
278 	if (pfn == mfn)
279 		panic("xen_assign_pfn(mfn) PFN_IS_FOREIGN_MFN bit already set");
280 	return (pfn);
281 }
282 
283 void
284 xen_release_pfn(pfn_t pfn)
285 {
286 	if (pfn == PFN_INVALID)
287 		panic("xen_release_pfn(PFN_INVALID) not allowed");
288 	if ((pfn & PFN_IS_FOREIGN_MFN) == 0)
289 		panic("mfn high bit not set");
290 }
291 
292 uint_t
293 pfn_is_foreign(pfn_t pfn)
294 {
295 	if (pfn == PFN_INVALID)
296 		return (0);
297 	return ((pfn & PFN_IS_FOREIGN_MFN) != 0);
298 }
299 
300 pfn_t
301 pte2pfn(x86pte_t pte, level_t l)
302 {
303 	mfn_t mfn = PTE2MFN(pte, l);
304 
305 	if ((pte & PT_SOFTWARE) >= PT_FOREIGN)
306 		return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);
307 	return (mfn_to_pfn(mfn));
308 }
309 
310 mfn_t
311 pfn_to_mfn(pfn_t pfn)
312 {
313 	if (pfn == PFN_INVALID)
314 		panic("pfn_to_mfn(PFN_INVALID) not allowed");
315 
316 	if (pfn & PFN_IS_FOREIGN_MFN)
317 		return (pfn & ~PFN_IS_FOREIGN_MFN);
318 
319 	if (pfn >= mfn_count)
320 		panic("pfn_to_mfn(): illegal PFN 0x%lx", pfn);
321 
322 	return (mfn_list[pfn]);
323 }
324 
325 /*
326  * This routine translates an MFN back into the corresponding PFN value.
327  * It has to be careful since the mfn_to_pfn_mapping[] might fault
328  * as that table is sparse. It also has to check for non-faulting, but out of
329  * range that exceed the table.
330  */
331 pfn_t
332 mfn_to_pfn(mfn_t mfn)
333 {
334 	pfn_t pfn;
335 	on_trap_data_t otd;
336 	uint_t	on_trap_ready = (t0.t_stk != NULL);
337 
338 	/*
339 	 * Cleared at a suspend or migrate
340 	 */
341 	if (cached_max_mfn == 0)
342 		cached_max_mfn =
343 		    HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
344 
345 	if (cached_max_mfn < mfn)
346 		return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);
347 
348 	if (on_trap_ready && on_trap(&otd, OT_DATA_ACCESS)) {
349 		pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
350 	} else {
351 		pfn = mfn_to_pfn_mapping[mfn];
352 
353 		if (pfn == PFN_INVALID || pfn >= mfn_count ||
354 		    pfn_to_mfn(pfn) != mfn)
355 			pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
356 	}
357 
358 	if (on_trap_ready)
359 		no_trap();
360 
361 	/*
362 	 * If khat_running is set then we should be checking
363 	 * in domUs that migration is blocked while using the
364 	 * mfn_to_pfn_mapping[] table.
365 	 */
366 	ASSERT(!khat_running || DOMAIN_IS_INITDOMAIN(xen_info) ||
367 	    rw_read_held(&m2p_lock[XM2P_HASH].m2p_rwlock));
368 
369 	return (pfn);
370 }
371 
372 /*
373  * From a pseudo-physical address, find the corresponding machine address.
374  */
375 maddr_t
376 pa_to_ma(paddr_t pa)
377 {
378 	mfn_t mfn = pfn_to_mfn(mmu_btop(pa));
379 
380 	if (mfn == MFN_INVALID)
381 		panic("pa_to_ma() got MFN_INVALID");
382 	return (mfn_to_ma(mfn) + (pa & MMU_PAGEOFFSET));
383 }
384 
385 /*
386  * From a machine address, find the corresponding pseudo-physical address.
387  */
388 paddr_t
389 ma_to_pa(maddr_t ma)
390 {
391 	pfn_t pfn = mfn_to_pfn(mmu_btop(ma));
392 
393 	if (pfn == PFN_INVALID)
394 		panic("ma_to_pa() got PFN_INVALID");
395 	return (pfn_to_pa(pfn) + (ma & MMU_PAGEOFFSET));
396 }
397 
398 /*
399  * When calling reassign_pfn(), the page must be (at least) read locked
400  * to make sure swrand does not try to grab it.
401  */
402 #ifdef DEBUG
403 #define	CHECK_PAGE_LOCK(pfn)	{			\
404 	page_t *pp = page_numtopp_nolock(pfn);		\
405 	if ((pp != NULL) && (!PAGE_LOCKED(pp))) {	\
406 		panic("reassign_pfn() called with unlocked page (pfn 0x%lx)", \
407 		    pfn);				\
408 	}						\
409 }
410 #else	/* DEBUG */
411 #define	CHECK_PAGE_LOCK(pfn)
412 #endif	/* DEBUG */
413 
414 /*
415  * Reassign a new machine page to back a physical address.
416  */
417 void
418 reassign_pfn(pfn_t pfn, mfn_t mfn)
419 {
420 	int mmu_update_return;
421 	mmu_update_t t;
422 	extern void update_contig_pfnlist(pfn_t, mfn_t, mfn_t);
423 
424 	ASSERT(pfn != PFN_INVALID);
425 	ASSERT(!pfn_is_foreign(pfn));
426 
427 	ASSERT(pfn < mfn_count);
428 	update_contig_pfnlist(pfn, mfn_list[pfn], mfn);
429 	if (mfn == MFN_INVALID) {
430 		CHECK_PAGE_LOCK(pfn);
431 		if (kpm_vbase != NULL && xen_kpm_page(pfn, 0) < 0)
432 			panic("reassign_pfn(): failed to remove kpm mapping");
433 		mfn_list[pfn] = mfn;
434 		return;
435 	}
436 
437 	/*
438 	 * Verify that previously given away pages are still page locked.
439 	 */
440 	if (mfn_list[pfn] == MFN_INVALID) {
441 		CHECK_PAGE_LOCK(pfn);
442 	}
443 	mfn_list[pfn] = mfn;
444 
445 	t.ptr = mfn_to_ma(mfn) | MMU_MACHPHYS_UPDATE;
446 	t.val = pfn;
447 
448 	if (HYPERVISOR_mmu_update(&t, 1, &mmu_update_return, DOMID_SELF))
449 		panic("HYPERVISOR_mmu_update() failed");
450 	ASSERT(mmu_update_return == 1);
451 
452 	if (kpm_vbase != NULL && xen_kpm_page(pfn, PT_VALID | PT_WRITABLE) < 0)
453 		panic("reassign_pfn(): failed to enable kpm mapping");
454 }
455 
456 /*
457  * XXPV code to work around problems with GNTTABOP_map_grant_ref
458  * Hopefully we can remove this when GNTTABOP_map_grant_ref is fixed.
459  */
460 void
461 xen_fix_foreign(uint64_t va)
462 {
463 	uintptr_t v = va;
464 	htable_t *ht;
465 	uint_t entry;
466 	x86pte_t pte;
467 
468 	/*
469 	 * Look up the PTE for VA. If it is not marked foreign,
470 	 * add the appropriate soft bits and reinstall the new PTE.
471 	 */
472 	ht = htable_getpage(kas.a_hat, v, &entry);
473 	if (ht == NULL) {
474 		panic("xen_fix_foreign(va=0x%p) htable not found", (void *)v);
475 		return;
476 	}
477 	pte = x86pte_get(ht, entry);
478 	if ((pte & PT_SOFTWARE) < PT_FOREIGN) {
479 		pte |= PT_FOREIGN;
480 		if (HYPERVISOR_update_va_mapping(v, pte, UVMF_NONE) != 0)
481 			panic("xen_fix_foreign(va=0x%p) failed, pte=" FMT_PTE,
482 			    (void *)v, pte);
483 	}
484 	htable_release(ht);
485 }
486