1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27
28 #include <sys/mach_mmu.h>
29 #include <sys/machsystm.h>
30 #include <sys/cmn_err.h>
31 #include <sys/promif.h>
32 #include <sys/hypervisor.h>
33 #include <sys/bootconf.h>
34 #include <sys/ontrap.h>
35 #include <sys/rwlock.h>
36 #include <sys/sysmacros.h>
37 #include <vm/seg_kmem.h>
38 #include <vm/kboot_mmu.h>
39 #include <vm/hat_pte.h>
40 #include <vm/hat.h>
41 #include <vm/htable.h>
42 #include <vm/hat_i86.h>
43
44 start_info_t *xen_info;
45 ulong_t mfn_count;
46 mfn_t *mfn_list;
47 mfn_t *mfn_list_pages; /* pages that make a table of mfn's */
48 /* that make up the pa_to_ma table */
49 mfn_t *mfn_list_pages_page; /* page of mfn's for mfn_list_pages */
50 mfn_t cached_max_mfn;
51 uintptr_t xen_virt_start;
52 pfn_t *mfn_to_pfn_mapping;
53 caddr_t xb_addr; /* virtual addr for the store_mfn page */
54
55
56 /*
57 * We need to prevent migration or suspension of a domU while it's
58 * manipulating MFN values, as the MFN values will spontaneously
59 * change. The next 4 routines provide a mechanism for that.
60 * The basic idea is to use reader/writer mutex, readers are any thread
61 * that is manipulating MFNs. Only the thread which is going to actually call
62 * HYPERVISOR_suspend() will become a writer.
63 *
64 * Since various places need to manipulate MFNs and also call the HAT,
65 * we track if a thread acquires reader status and allow it to recursively
66 * do so again. This prevents deadlocks if a migration request
67 * is started and waits for some reader, but then the previous reader needs
68 * to call into the HAT.
69 */
70 #define NUM_M2P_LOCKS 128
71 static struct {
72 krwlock_t m2p_rwlock;
73 char m2p_pad[64 - sizeof (krwlock_t)]; /* 64 byte cache line size */
74 } m2p_lock[NUM_M2P_LOCKS];
75
76 #define XM2P_HASH ((uintptr_t)curthread->t_tid & (NUM_M2P_LOCKS - 1))
77
78 void
xen_block_migrate(void)79 xen_block_migrate(void)
80 {
81 if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
82 ++curthread->t_xpvcntr == 1)
83 rw_enter(&m2p_lock[XM2P_HASH].m2p_rwlock, RW_READER);
84 }
85
86 void
xen_allow_migrate(void)87 xen_allow_migrate(void)
88 {
89 if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
90 --curthread->t_xpvcntr == 0)
91 rw_exit(&m2p_lock[XM2P_HASH].m2p_rwlock);
92 }
93
94 void
xen_start_migrate(void)95 xen_start_migrate(void)
96 {
97 int i;
98
99 ASSERT(curthread->t_xpvcntr == 0);
100 ++curthread->t_xpvcntr; /* this allows calls into HAT */
101 for (i = 0; i < NUM_M2P_LOCKS; ++i)
102 rw_enter(&m2p_lock[i].m2p_rwlock, RW_WRITER);
103 }
104
105 void
xen_end_migrate(void)106 xen_end_migrate(void)
107 {
108 int i;
109
110 for (i = 0; i < NUM_M2P_LOCKS; ++i)
111 rw_exit(&m2p_lock[i].m2p_rwlock);
112 ASSERT(curthread->t_xpvcntr == 1);
113 --curthread->t_xpvcntr;
114 }
115
116 /*ARGSUSED*/
117 void
set_pteval(paddr_t table,uint_t index,uint_t level,x86pte_t pteval)118 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
119 {
120 mmu_update_t t;
121 maddr_t mtable = pa_to_ma(table);
122 int retcnt;
123
124 t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
125 t.val = pteval;
126 if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
127 bop_panic("HYPERVISOR_mmu_update() failed");
128 }
129
130 /*
131 * The start_info_t and mfn_list are initially mapped in low "boot" memory.
132 * Each has a page aligned address and size. We relocate them up into the
133 * kernel's normal address space at this point in time. We also create
134 * the arrays that let the hypervisor suspend/resume a domain.
135 */
136 void
xen_relocate_start_info(void)137 xen_relocate_start_info(void)
138 {
139 maddr_t mach_addr;
140 size_t sz;
141 size_t sz2;
142 offset_t off;
143 uintptr_t addr;
144 uintptr_t old;
145 int i, j;
146
147 /*
148 * In dom0, we have to account for the console_info structure
149 * which might immediately follow the start_info in memory.
150 */
151 sz = sizeof (start_info_t);
152 if (DOMAIN_IS_INITDOMAIN(xen_info) &&
153 xen_info->console.dom0.info_off >= sizeof (start_info_t)) {
154 sz += xen_info->console.dom0.info_off - sizeof (start_info_t) +
155 xen_info->console.dom0.info_size;
156 }
157 sz = P2ROUNDUP(sz, MMU_PAGESIZE);
158 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
159 for (off = 0; off < sz; off += MMU_PAGESIZE) {
160 mach_addr = pa_to_ma(pfn_to_pa(va_to_pfn(
161 (caddr_t)xen_info + off)));
162 kbm_map_ma(mach_addr + off, addr + off, 0);
163 }
164 boot_mapin((caddr_t)addr, sz);
165 old = (uintptr_t)xen_info;
166 xen_info = (start_info_t *)addr;
167 for (off = 0; off < sz; off += MMU_PAGESIZE)
168 kbm_unmap(old + off);
169
170 /*
171 * Relocate the mfn_list, any number of pages.
172 */
173 sz = P2ROUNDUP(mfn_count * sizeof (mfn_t), MMU_PAGESIZE);
174 addr = (uintptr_t)vmem_xalloc(heap_arena, sz, MMU_PAGESIZE, 0,
175 0, 0, 0, VM_SLEEP);
176 for (off = 0; off < sz; off += MMU_PAGESIZE) {
177 mach_addr =
178 pa_to_ma(pfn_to_pa(va_to_pfn((caddr_t)mfn_list + off)));
179 kbm_map_ma(mach_addr, addr + off, 0);
180 }
181 boot_mapin((caddr_t)addr, sz);
182 old = (uintptr_t)mfn_list;
183 mfn_list = (mfn_t *)addr;
184 xen_info->mfn_list = (mfn_t)addr;
185 for (off = 0; off < sz; off += MMU_PAGESIZE)
186 kbm_unmap(old + off);
187
188 /*
189 * Create the lists of mfn_list pages needed by suspend/resume.
190 * Note we skip this for domain 0 as it can't suspend/resume.
191 */
192 if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
193 sz2 = P2ROUNDUP(mmu_btop(sz) * sizeof (mfn_t), MMU_PAGESIZE);
194 mfn_list_pages = kmem_zalloc(sz2, VM_SLEEP);
195 mfn_list_pages_page = kmem_zalloc(MMU_PAGESIZE, VM_SLEEP);
196 i = 0;
197 for (off = 0; off < sz; off += MMU_PAGESIZE) {
198 j = mmu_btop(off);
199 if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
200 mfn_list_pages_page[i++] =
201 pfn_to_mfn(va_to_pfn(&mfn_list_pages[j]));
202 }
203 mfn_list_pages[j] =
204 pfn_to_mfn(va_to_pfn((caddr_t)mfn_list + off));
205 }
206 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
207 pfn_to_mfn(va_to_pfn(mfn_list_pages_page));
208 HYPERVISOR_shared_info->arch.max_pfn = xen_info->nr_pages;
209 }
210
211 /*
212 * Remap the shared info (for I/O) into high memory, too.
213 */
214 sz = MMU_PAGESIZE;
215 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
216 kbm_map_ma(xen_info->shared_info, addr, 0);
217 /* shared info has no PFN so don't do: boot_mapin((caddr_t)addr, sz) */
218 old = (uintptr_t)HYPERVISOR_shared_info;
219 HYPERVISOR_shared_info = (void *)addr;
220 kbm_unmap(old);
221
222 /*
223 * Remap the console info into high memory, too.
224 */
225 if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
226 sz = MMU_PAGESIZE;
227 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
228 kbm_map_ma(pfn_to_pa(xen_info->console.domU.mfn), addr, 0);
229 boot_mapin((caddr_t)addr, sz);
230 old = (uintptr_t)HYPERVISOR_console_page;
231 HYPERVISOR_console_page = (void *)addr;
232 kbm_unmap(old);
233 } else {
234 HYPERVISOR_console_page = NULL;
235 }
236
237 /*
238 * On domUs we need to have the xenbus page (store_mfn) mapped into
239 * the kernel. This is referenced as xb_addr.
240 */
241 if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
242 xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
243 kbm_map_ma(mfn_to_ma(xen_info->store_mfn),
244 (uintptr_t)xb_addr, 0);
245 boot_mapin(xb_addr, MMU_PAGESIZE);
246 }
247 }
248
249 /*
250 * Generate the pfn value to use for a foreign mfn.
251 */
252 pfn_t
xen_assign_pfn(mfn_t mfn)253 xen_assign_pfn(mfn_t mfn)
254 {
255 pfn_t pfn;
256
257 #ifdef DEBUG
258 /*
259 * make sure this MFN isn't in our list of MFNs
260 */
261 on_trap_data_t otd;
262 uint_t on_trap_ready = (t0.t_stk != NULL);
263
264 if (on_trap_ready) {
265 if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
266 pfn = mfn_to_pfn_mapping[mfn];
267 if (pfn < mfn_count && mfn_list[pfn] == mfn)
268 panic("xen_assign_pfn() mfn belongs to us");
269 }
270 no_trap();
271 }
272 #endif /* DEBUG */
273
274 if (mfn == MFN_INVALID)
275 panic("xen_assign_pfn(MFN_INVALID) not allowed");
276 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
277 if (pfn == mfn)
278 panic("xen_assign_pfn(mfn) PFN_IS_FOREIGN_MFN bit already set");
279 return (pfn);
280 }
281
282 void
xen_release_pfn(pfn_t pfn)283 xen_release_pfn(pfn_t pfn)
284 {
285 if (pfn == PFN_INVALID)
286 panic("xen_release_pfn(PFN_INVALID) not allowed");
287 if ((pfn & PFN_IS_FOREIGN_MFN) == 0)
288 panic("mfn high bit not set");
289 }
290
291 uint_t
pfn_is_foreign(pfn_t pfn)292 pfn_is_foreign(pfn_t pfn)
293 {
294 if (pfn == PFN_INVALID)
295 return (0);
296 return ((pfn & PFN_IS_FOREIGN_MFN) != 0);
297 }
298
299 pfn_t
pte2pfn(x86pte_t pte,level_t l)300 pte2pfn(x86pte_t pte, level_t l)
301 {
302 mfn_t mfn = PTE2MFN(pte, l);
303
304 if ((pte & PT_SOFTWARE) >= PT_FOREIGN)
305 return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);
306 return (mfn_to_pfn(mfn));
307 }
308
309 mfn_t
pfn_to_mfn(pfn_t pfn)310 pfn_to_mfn(pfn_t pfn)
311 {
312 if (pfn == PFN_INVALID)
313 panic("pfn_to_mfn(PFN_INVALID) not allowed");
314
315 if (pfn & PFN_IS_FOREIGN_MFN)
316 return (pfn & ~PFN_IS_FOREIGN_MFN);
317
318 if (pfn >= mfn_count)
319 panic("pfn_to_mfn(): illegal PFN 0x%lx", pfn);
320
321 return (mfn_list[pfn]);
322 }
323
324 /*
325 * This routine translates an MFN back into the corresponding PFN value.
326 * It has to be careful since the mfn_to_pfn_mapping[] might fault
327 * as that table is sparse. It also has to check for non-faulting, but out of
328 * range that exceed the table.
329 */
330 pfn_t
mfn_to_pfn(mfn_t mfn)331 mfn_to_pfn(mfn_t mfn)
332 {
333 pfn_t pfn;
334 on_trap_data_t otd;
335 uint_t on_trap_ready = (t0.t_stk != NULL);
336
337 /*
338 * Cleared at a suspend or migrate
339 */
340 if (cached_max_mfn == 0)
341 cached_max_mfn =
342 HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
343
344 if (cached_max_mfn < mfn)
345 return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);
346
347 if (on_trap_ready && on_trap(&otd, OT_DATA_ACCESS)) {
348 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
349 } else {
350 pfn = mfn_to_pfn_mapping[mfn];
351
352 if (pfn == PFN_INVALID || pfn >= mfn_count ||
353 pfn_to_mfn(pfn) != mfn)
354 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
355 }
356
357 if (on_trap_ready)
358 no_trap();
359
360 /*
361 * If khat_running is set then we should be checking
362 * in domUs that migration is blocked while using the
363 * mfn_to_pfn_mapping[] table.
364 */
365 ASSERT(!khat_running || DOMAIN_IS_INITDOMAIN(xen_info) ||
366 rw_read_held(&m2p_lock[XM2P_HASH].m2p_rwlock));
367
368 return (pfn);
369 }
370
371 /*
372 * From a pseudo-physical address, find the corresponding machine address.
373 */
374 maddr_t
pa_to_ma(paddr_t pa)375 pa_to_ma(paddr_t pa)
376 {
377 mfn_t mfn = pfn_to_mfn(mmu_btop(pa));
378
379 if (mfn == MFN_INVALID)
380 panic("pa_to_ma() got MFN_INVALID");
381 return (mfn_to_ma(mfn) + (pa & MMU_PAGEOFFSET));
382 }
383
384 /*
385 * From a machine address, find the corresponding pseudo-physical address.
386 */
387 paddr_t
ma_to_pa(maddr_t ma)388 ma_to_pa(maddr_t ma)
389 {
390 pfn_t pfn = mfn_to_pfn(mmu_btop(ma));
391
392 if (pfn == PFN_INVALID)
393 panic("ma_to_pa() got PFN_INVALID");
394 return (pfn_to_pa(pfn) + (ma & MMU_PAGEOFFSET));
395 }
396
397 /*
398 * When calling reassign_pfn(), the page must be (at least) read locked
399 * to make sure swrand does not try to grab it.
400 */
401 #ifdef DEBUG
402 #define CHECK_PAGE_LOCK(pfn) { \
403 page_t *pp = page_numtopp_nolock(pfn); \
404 if ((pp != NULL) && (!PAGE_LOCKED(pp))) { \
405 panic("reassign_pfn() called with unlocked page (pfn 0x%lx)", \
406 pfn); \
407 } \
408 }
409 #else /* DEBUG */
410 #define CHECK_PAGE_LOCK(pfn)
411 #endif /* DEBUG */
412
413 /*
414 * Reassign a new machine page to back a physical address.
415 */
416 void
reassign_pfn(pfn_t pfn,mfn_t mfn)417 reassign_pfn(pfn_t pfn, mfn_t mfn)
418 {
419 int mmu_update_return;
420 mmu_update_t t;
421 extern void update_contig_pfnlist(pfn_t, mfn_t, mfn_t);
422
423 ASSERT(pfn != PFN_INVALID);
424 ASSERT(!pfn_is_foreign(pfn));
425
426 ASSERT(pfn < mfn_count);
427 update_contig_pfnlist(pfn, mfn_list[pfn], mfn);
428 if (mfn == MFN_INVALID) {
429 CHECK_PAGE_LOCK(pfn);
430 if (kpm_vbase != NULL && xen_kpm_page(pfn, 0) < 0)
431 panic("reassign_pfn(): failed to remove kpm mapping");
432 mfn_list[pfn] = mfn;
433 return;
434 }
435
436 /*
437 * Verify that previously given away pages are still page locked.
438 */
439 if (mfn_list[pfn] == MFN_INVALID) {
440 CHECK_PAGE_LOCK(pfn);
441 }
442 mfn_list[pfn] = mfn;
443
444 t.ptr = mfn_to_ma(mfn) | MMU_MACHPHYS_UPDATE;
445 t.val = pfn;
446
447 if (HYPERVISOR_mmu_update(&t, 1, &mmu_update_return, DOMID_SELF))
448 panic("HYPERVISOR_mmu_update() failed");
449 ASSERT(mmu_update_return == 1);
450
451 if (kpm_vbase != NULL && xen_kpm_page(pfn, PT_VALID | PT_WRITABLE) < 0)
452 panic("reassign_pfn(): failed to enable kpm mapping");
453 }
454