xref: /titanic_50/usr/src/uts/i86xpv/os/xen_mmu.c (revision 7ec363dc481bba196d724969022171de4687989f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/mach_mmu.h>
30 #include <sys/machsystm.h>
31 #include <sys/cmn_err.h>
32 #include <sys/promif.h>
33 #include <sys/hypervisor.h>
34 #include <sys/bootconf.h>
35 #include <sys/ontrap.h>
36 #include <sys/rwlock.h>
37 #include <sys/sysmacros.h>
38 #include <vm/seg_kmem.h>
39 #include <vm/kboot_mmu.h>
40 #include <vm/hat_pte.h>
41 #include <vm/hat.h>
42 #include <vm/htable.h>
43 #include <vm/hat_i86.h>
44 
45 start_info_t *xen_info;
46 ulong_t mfn_count;
47 mfn_t *mfn_list;
48 mfn_t *mfn_list_pages;		/* pages that make a table of mfn's */
49 				/* that make up the pa_to_ma table */
50 mfn_t *mfn_list_pages_page;	/* page of mfn's for mfn_list_pages */
51 mfn_t cached_max_mfn;
52 uintptr_t xen_virt_start;
53 pfn_t *mfn_to_pfn_mapping;
54 caddr_t xb_addr;		/* virtual addr for the store_mfn page */
55 
56 
57 /*
58  * Running on the hypervisor, we need to prevent migration while holding
59  * PTE values that we might do PTE2PFN() or pa_to_ma() on, as the
60  * mfn_to_pfn_mapping and mfn_list[] translation tables might change.
61  *
62  * As the suspend process uses the HAT, we need to check we don't already own
63  * the lock as a writer before we try to take it as a reader.
64  */
65 #define	NUM_M2P_LOCKS 128
66 static struct {
67 	krwlock_t m2p_rwlock;
68 	char m2p_pad[64 - sizeof (krwlock_t)];	/* 64 byte cache line size */
69 } m2p_lock[NUM_M2P_LOCKS];
70 
71 #define	XM2P_HASH	((uintptr_t)curthread->t_tid & (NUM_M2P_LOCKS - 1))
72 
73 void
74 xen_block_migrate(void)
75 {
76 	if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
77 	    rw_owner(&m2p_lock[XM2P_HASH].m2p_rwlock) != curthread)
78 		rw_enter(&m2p_lock[XM2P_HASH].m2p_rwlock, RW_READER);
79 }
80 
81 void
82 xen_allow_migrate(void)
83 {
84 	if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
85 	    rw_owner(&m2p_lock[XM2P_HASH].m2p_rwlock) != curthread)
86 		rw_exit(&m2p_lock[XM2P_HASH].m2p_rwlock);
87 }
88 
89 void
90 xen_start_migrate(void)
91 {
92 	int i;
93 
94 	for (i = 0; i < NUM_M2P_LOCKS; ++i)
95 		rw_enter(&m2p_lock[i].m2p_rwlock, RW_WRITER);
96 }
97 
98 void
99 xen_end_migrate(void)
100 {
101 	int i;
102 
103 	for (i = 0; i < NUM_M2P_LOCKS; ++i)
104 		rw_exit(&m2p_lock[i].m2p_rwlock);
105 }
106 
107 /*ARGSUSED*/
108 void
109 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
110 {
111 	mmu_update_t t;
112 	maddr_t mtable = pa_to_ma(table);
113 	int retcnt;
114 
115 	t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
116 	t.val = pteval;
117 	if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
118 		bop_panic("HYPERVISOR_mmu_update() failed");
119 }
120 
121 /*
122  * The start_info_t and mfn_list are initially mapped in low "boot" memory.
123  * Each has a page aligned address and size. We relocate them up into the
124  * kernel's normal address space at this point in time. We also create
125  * the arrays that let the hypervisor suspend/resume a domain.
126  */
127 void
128 xen_relocate_start_info(void)
129 {
130 	maddr_t mach_addr;
131 	size_t sz;
132 	size_t sz2;
133 	offset_t off;
134 	uintptr_t addr;
135 	uintptr_t old;
136 	int i, j;
137 
138 	/*
139 	 * In dom0, we have to account for the console_info structure
140 	 * which might immediately follow the start_info in memory.
141 	 */
142 	sz = sizeof (start_info_t);
143 	if (DOMAIN_IS_INITDOMAIN(xen_info) &&
144 	    xen_info->console.dom0.info_off >= sizeof (start_info_t)) {
145 		sz += xen_info->console.dom0.info_off - sizeof (start_info_t) +
146 		    xen_info->console.dom0.info_size;
147 	}
148 	sz = P2ROUNDUP(sz, MMU_PAGESIZE);
149 	addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
150 	for (off = 0; off < sz; off += MMU_PAGESIZE) {
151 		mach_addr = pa_to_ma(pfn_to_pa(va_to_pfn(
152 		    (caddr_t)xen_info + off)));
153 		kbm_map_ma(mach_addr + off, addr + off, 0);
154 	}
155 	boot_mapin((caddr_t)addr, sz);
156 	old = (uintptr_t)xen_info;
157 	xen_info = (start_info_t *)addr;
158 	for (off = 0; off < sz; off += MMU_PAGESIZE)
159 		kbm_unmap(old + off);
160 
161 	/*
162 	 * Relocate the mfn_list, any number of pages.
163 	 */
164 	sz = P2ROUNDUP(mfn_count * sizeof (mfn_t), MMU_PAGESIZE);
165 	addr = (uintptr_t)vmem_xalloc(heap_arena, sz, MMU_PAGESIZE, 0,
166 	    0, 0, 0, VM_SLEEP);
167 	for (off = 0; off < sz; off += MMU_PAGESIZE) {
168 		mach_addr =
169 		    pa_to_ma(pfn_to_pa(va_to_pfn((caddr_t)mfn_list + off)));
170 		kbm_map_ma(mach_addr, addr + off, 0);
171 	}
172 	boot_mapin((caddr_t)addr, sz);
173 	old = (uintptr_t)mfn_list;
174 	mfn_list = (mfn_t *)addr;
175 	xen_info->mfn_list = (mfn_t)addr;
176 	for (off = 0; off < sz; off += MMU_PAGESIZE)
177 		kbm_unmap(old + off);
178 
179 	/*
180 	 * Create the lists of mfn_list pages needed by suspend/resume.
181 	 * Note we skip this for domain 0 as it can't suspend/resume.
182 	 */
183 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
184 		sz2 = P2ROUNDUP(mmu_btop(sz) * sizeof (mfn_t), MMU_PAGESIZE);
185 		mfn_list_pages = kmem_zalloc(sz2, VM_SLEEP);
186 		mfn_list_pages_page = kmem_zalloc(MMU_PAGESIZE, VM_SLEEP);
187 		i = 0;
188 		for (off = 0; off < sz; off += MMU_PAGESIZE) {
189 			j = mmu_btop(off);
190 			if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
191 				mfn_list_pages_page[i++] =
192 				    pfn_to_mfn(va_to_pfn(&mfn_list_pages[j]));
193 			}
194 			mfn_list_pages[j] =
195 			    pfn_to_mfn(va_to_pfn((caddr_t)mfn_list + off));
196 		}
197 		HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
198 		    pfn_to_mfn(va_to_pfn(mfn_list_pages_page));
199 		HYPERVISOR_shared_info->arch.max_pfn = xen_info->nr_pages;
200 	}
201 
202 	/*
203 	 * Remap the shared info (for I/O) into high memory, too.
204 	 */
205 	sz = MMU_PAGESIZE;
206 	addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
207 	kbm_map_ma(xen_info->shared_info, addr, 0);
208 	/* shared info has no PFN so don't do: boot_mapin((caddr_t)addr, sz) */
209 	old = (uintptr_t)HYPERVISOR_shared_info;
210 	HYPERVISOR_shared_info = (void *)addr;
211 	kbm_unmap(old);
212 
213 	/*
214 	 * Remap the console info into high memory, too.
215 	 */
216 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
217 		sz = MMU_PAGESIZE;
218 		addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
219 		kbm_map_ma(pfn_to_pa(xen_info->console.domU.mfn), addr, 0);
220 		boot_mapin((caddr_t)addr, sz);
221 		old = (uintptr_t)HYPERVISOR_console_page;
222 		HYPERVISOR_console_page = (void *)addr;
223 		kbm_unmap(old);
224 	} else {
225 		HYPERVISOR_console_page = NULL;
226 	}
227 
228 	/*
229 	 * On domUs we need to have the xenbus page (store_mfn) mapped into
230 	 * the kernel. This is referenced as xb_addr.
231 	 */
232 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
233 		xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
234 		kbm_map_ma(mfn_to_ma(xen_info->store_mfn),
235 		    (uintptr_t)xb_addr, 0);
236 		boot_mapin(xb_addr, MMU_PAGESIZE);
237 	}
238 }
239 
240 /*
241  * Generate the pfn value to use for a foreign mfn.
242  */
243 pfn_t
244 xen_assign_pfn(mfn_t mfn)
245 {
246 	pfn_t pfn;
247 
248 #ifdef DEBUG
249 	/*
250 	 * make sure this MFN isn't in our list of MFNs
251 	 */
252 	on_trap_data_t otd;
253 	uint_t	on_trap_ready = (t0.t_stk != NULL);
254 
255 	if (on_trap_ready) {
256 		if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
257 			pfn = mfn_to_pfn_mapping[mfn];
258 			if (pfn < mfn_count && mfn_list[pfn] == mfn)
259 				panic("xen_assign_pfn() mfn belongs to us");
260 		}
261 		no_trap();
262 	}
263 #endif /* DEBUG */
264 
265 	if (mfn == MFN_INVALID)
266 		panic("xen_assign_pfn(MFN_INVALID) not allowed");
267 	pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
268 	if (pfn == mfn)
269 		panic("xen_assign_pfn(mfn) PFN_IS_FOREIGN_MFN bit already set");
270 	return (pfn);
271 }
272 
273 void
274 xen_release_pfn(pfn_t pfn)
275 {
276 	if (pfn == PFN_INVALID)
277 		panic("xen_release_pfn(PFN_INVALID) not allowed");
278 	if ((pfn & PFN_IS_FOREIGN_MFN) == 0)
279 		panic("mfn high bit not set");
280 }
281 
282 uint_t
283 pfn_is_foreign(pfn_t pfn)
284 {
285 	if (pfn == PFN_INVALID)
286 		return (0);
287 	return ((pfn & PFN_IS_FOREIGN_MFN) != 0);
288 }
289 
290 pfn_t
291 pte2pfn(x86pte_t pte, level_t l)
292 {
293 	mfn_t mfn = PTE2MFN(pte, l);
294 
295 	if ((pte & PT_SOFTWARE) >= PT_FOREIGN)
296 		return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);
297 	return (mfn_to_pfn(mfn));
298 }
299 
300 mfn_t
301 pfn_to_mfn(pfn_t pfn)
302 {
303 	if (pfn == PFN_INVALID)
304 		panic("pfn_to_mfn(PFN_INVALID) not allowed");
305 
306 	if (pfn & PFN_IS_FOREIGN_MFN)
307 		return (pfn & ~PFN_IS_FOREIGN_MFN);
308 
309 	if (pfn >= mfn_count)
310 		panic("pfn_to_mfn(): illegal PFN 0x%lx", pfn);
311 
312 	return (mfn_list[pfn]);
313 }
314 
315 /*
316  * This routine translates an MFN back into the corresponding PFN value.
317  * It has to be careful since the mfn_to_pfn_mapping[] might fault
318  * as that table is sparse. It also has to check for non-faulting, but out of
319  * range that exceed the table.
320  */
321 pfn_t
322 mfn_to_pfn(mfn_t mfn)
323 {
324 	pfn_t pfn;
325 	on_trap_data_t otd;
326 	uint_t	on_trap_ready = (t0.t_stk != NULL);
327 
328 	/*
329 	 * Cleared at a suspend or migrate
330 	 */
331 	if (cached_max_mfn == 0)
332 		cached_max_mfn =
333 		    HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
334 
335 	if (cached_max_mfn < mfn)
336 		return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);
337 
338 	if (on_trap_ready && on_trap(&otd, OT_DATA_ACCESS)) {
339 		pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
340 	} else {
341 		pfn = mfn_to_pfn_mapping[mfn];
342 
343 		if (pfn == PFN_INVALID || pfn >= mfn_count ||
344 		    pfn_to_mfn(pfn) != mfn)
345 			pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
346 	}
347 
348 	if (on_trap_ready)
349 		no_trap();
350 
351 	/*
352 	 * If khat_running is set then we should be checking
353 	 * in domUs that migration is blocked while using the
354 	 * mfn_to_pfn_mapping[] table.
355 	 */
356 	ASSERT(!khat_running || DOMAIN_IS_INITDOMAIN(xen_info) ||
357 	    rw_read_held(&m2p_lock[XM2P_HASH].m2p_rwlock));
358 
359 	return (pfn);
360 }
361 
362 /*
363  * From a pseudo-physical address, find the corresponding machine address.
364  */
365 maddr_t
366 pa_to_ma(paddr_t pa)
367 {
368 	mfn_t mfn = pfn_to_mfn(mmu_btop(pa));
369 
370 	if (mfn == MFN_INVALID)
371 		panic("pa_to_ma() got MFN_INVALID");
372 	return (mfn_to_ma(mfn) + (pa & MMU_PAGEOFFSET));
373 }
374 
375 /*
376  * From a machine address, find the corresponding pseudo-physical address.
377  */
378 paddr_t
379 ma_to_pa(maddr_t ma)
380 {
381 	pfn_t pfn = mfn_to_pfn(mmu_btop(ma));
382 
383 	if (pfn == PFN_INVALID)
384 		panic("ma_to_pa() got PFN_INVALID");
385 	return (pfn_to_pa(pfn) + (ma & MMU_PAGEOFFSET));
386 }
387 
388 /*
389  * When calling reassign_pfn(), the page must be (at least) read locked
390  * to make sure swrand does not try to grab it.
391  */
392 #ifdef DEBUG
393 #define	CHECK_PAGE_LOCK(pfn)	{			\
394 	page_t *pp = page_numtopp_nolock(pfn);		\
395 	if ((pp != NULL) && (!PAGE_LOCKED(pp))) {	\
396 		panic("reassign_pfn() called with unlocked page (pfn 0x%lx)", \
397 		    pfn);				\
398 	}						\
399 }
400 #else	/* DEBUG */
401 #define	CHECK_PAGE_LOCK(pfn)
402 #endif	/* DEBUG */
403 
404 /*
405  * Reassign a new machine page to back a physical address.
406  */
407 void
408 reassign_pfn(pfn_t pfn, mfn_t mfn)
409 {
410 	int mmu_update_return;
411 	mmu_update_t t;
412 	extern void update_contig_pfnlist(pfn_t, mfn_t, mfn_t);
413 
414 	ASSERT(pfn != PFN_INVALID);
415 	ASSERT(!pfn_is_foreign(pfn));
416 
417 	ASSERT(pfn < mfn_count);
418 	update_contig_pfnlist(pfn, mfn_list[pfn], mfn);
419 	if (mfn == MFN_INVALID) {
420 		CHECK_PAGE_LOCK(pfn);
421 		if (kpm_vbase != NULL && xen_kpm_page(pfn, 0) < 0)
422 			panic("reassign_pfn(): failed to remove kpm mapping");
423 		mfn_list[pfn] = mfn;
424 		return;
425 	}
426 
427 	/*
428 	 * Verify that previously given away pages are still page locked.
429 	 */
430 	if (mfn_list[pfn] == MFN_INVALID) {
431 		CHECK_PAGE_LOCK(pfn);
432 	}
433 	mfn_list[pfn] = mfn;
434 
435 	t.ptr = mfn_to_ma(mfn) | MMU_MACHPHYS_UPDATE;
436 	t.val = pfn;
437 
438 	if (HYPERVISOR_mmu_update(&t, 1, &mmu_update_return, DOMID_SELF))
439 		panic("HYPERVISOR_mmu_update() failed");
440 	ASSERT(mmu_update_return == 1);
441 
442 	if (kpm_vbase != NULL && xen_kpm_page(pfn, PT_VALID | PT_WRITABLE) < 0)
443 		panic("reassign_pfn(): failed to enable kpm mapping");
444 }
445 
446 /*
447  * XXPV code to work around problems with GNTTABOP_map_grant_ref
448  * Hopefully we can remove this when GNTTABOP_map_grant_ref is fixed.
449  */
450 void
451 xen_fix_foreign(uint64_t va)
452 {
453 	uintptr_t v = va;
454 	htable_t *ht;
455 	uint_t entry;
456 	x86pte_t pte;
457 
458 	/*
459 	 * Look up the PTE for VA. If it is not marked foreign,
460 	 * add the appropriate soft bits and reinstall the new PTE.
461 	 */
462 	ht = htable_getpage(kas.a_hat, v, &entry);
463 	if (ht == NULL) {
464 		panic("xen_fix_foreign(va=0x%p) htable not found", (void *)v);
465 		return;
466 	}
467 	pte = x86pte_get(ht, entry);
468 	if ((pte & PT_SOFTWARE) < PT_FOREIGN) {
469 		pte |= PT_FOREIGN;
470 		if (HYPERVISOR_update_va_mapping(v, pte, UVMF_NONE) != 0)
471 			panic("xen_fix_foreign(va=0x%p) failed, pte=" FMT_PTE,
472 			    (void *)v, pte);
473 	}
474 	htable_release(ht);
475 }
476