1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/mach_mmu.h> 30 #include <sys/machsystm.h> 31 #include <sys/cmn_err.h> 32 #include <sys/promif.h> 33 #include <sys/hypervisor.h> 34 #include <sys/bootconf.h> 35 #include <sys/ontrap.h> 36 #include <sys/rwlock.h> 37 #include <sys/sysmacros.h> 38 #include <vm/seg_kmem.h> 39 #include <vm/kboot_mmu.h> 40 #include <vm/hat_pte.h> 41 #include <vm/hat.h> 42 #include <vm/htable.h> 43 #include <vm/hat_i86.h> 44 45 start_info_t *xen_info; 46 ulong_t mfn_count; 47 mfn_t *mfn_list; 48 mfn_t *mfn_list_pages; /* pages that make a table of mfn's */ 49 /* that make up the pa_to_ma table */ 50 mfn_t *mfn_list_pages_page; /* page of mfn's for mfn_list_pages */ 51 mfn_t cached_max_mfn; 52 uintptr_t xen_virt_start; 53 pfn_t *mfn_to_pfn_mapping; 54 caddr_t xb_addr; /* virtual addr for the store_mfn page */ 55 56 57 /* 58 * We need to prevent migration or suspension of a domU while it's 59 * manipulating MFN values, as the MFN values will spontaneously 60 * change. The next 4 routines provide a mechanism for that. 61 * The basic idea is to use reader/writer mutex, readers are any thread 62 * that is manipulating MFNs. Only the thread which is going to actually call 63 * HYPERVISOR_suspend() will become a writer. 64 * 65 * Since various places need to manipulate MFNs and also call the HAT, 66 * we track if a thread acquires reader status and allow it to recursively 67 * do so again. This prevents deadlocks if a migration request 68 * is started and waits for some reader, but then the previous reader needs 69 * to call into the HAT. 70 */ 71 #define NUM_M2P_LOCKS 128 72 static struct { 73 krwlock_t m2p_rwlock; 74 char m2p_pad[64 - sizeof (krwlock_t)]; /* 64 byte cache line size */ 75 } m2p_lock[NUM_M2P_LOCKS]; 76 77 #define XM2P_HASH ((uintptr_t)curthread->t_tid & (NUM_M2P_LOCKS - 1)) 78 79 void 80 xen_block_migrate(void) 81 { 82 if (!DOMAIN_IS_INITDOMAIN(xen_info) && 83 ++curthread->t_xpvcntr == 1) 84 rw_enter(&m2p_lock[XM2P_HASH].m2p_rwlock, RW_READER); 85 } 86 87 void 88 xen_allow_migrate(void) 89 { 90 if (!DOMAIN_IS_INITDOMAIN(xen_info) && 91 --curthread->t_xpvcntr == 0) 92 rw_exit(&m2p_lock[XM2P_HASH].m2p_rwlock); 93 } 94 95 void 96 xen_start_migrate(void) 97 { 98 int i; 99 100 ASSERT(curthread->t_xpvcntr == 0); 101 ++curthread->t_xpvcntr; /* this allows calls into HAT */ 102 for (i = 0; i < NUM_M2P_LOCKS; ++i) 103 rw_enter(&m2p_lock[i].m2p_rwlock, RW_WRITER); 104 } 105 106 void 107 xen_end_migrate(void) 108 { 109 int i; 110 111 for (i = 0; i < NUM_M2P_LOCKS; ++i) 112 rw_exit(&m2p_lock[i].m2p_rwlock); 113 ASSERT(curthread->t_xpvcntr == 1); 114 --curthread->t_xpvcntr; 115 } 116 117 /*ARGSUSED*/ 118 void 119 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 120 { 121 mmu_update_t t; 122 maddr_t mtable = pa_to_ma(table); 123 int retcnt; 124 125 t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE; 126 t.val = pteval; 127 if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1) 128 bop_panic("HYPERVISOR_mmu_update() failed"); 129 } 130 131 /* 132 * The start_info_t and mfn_list are initially mapped in low "boot" memory. 133 * Each has a page aligned address and size. We relocate them up into the 134 * kernel's normal address space at this point in time. We also create 135 * the arrays that let the hypervisor suspend/resume a domain. 136 */ 137 void 138 xen_relocate_start_info(void) 139 { 140 maddr_t mach_addr; 141 size_t sz; 142 size_t sz2; 143 offset_t off; 144 uintptr_t addr; 145 uintptr_t old; 146 int i, j; 147 148 /* 149 * In dom0, we have to account for the console_info structure 150 * which might immediately follow the start_info in memory. 151 */ 152 sz = sizeof (start_info_t); 153 if (DOMAIN_IS_INITDOMAIN(xen_info) && 154 xen_info->console.dom0.info_off >= sizeof (start_info_t)) { 155 sz += xen_info->console.dom0.info_off - sizeof (start_info_t) + 156 xen_info->console.dom0.info_size; 157 } 158 sz = P2ROUNDUP(sz, MMU_PAGESIZE); 159 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP); 160 for (off = 0; off < sz; off += MMU_PAGESIZE) { 161 mach_addr = pa_to_ma(pfn_to_pa(va_to_pfn( 162 (caddr_t)xen_info + off))); 163 kbm_map_ma(mach_addr + off, addr + off, 0); 164 } 165 boot_mapin((caddr_t)addr, sz); 166 old = (uintptr_t)xen_info; 167 xen_info = (start_info_t *)addr; 168 for (off = 0; off < sz; off += MMU_PAGESIZE) 169 kbm_unmap(old + off); 170 171 /* 172 * Relocate the mfn_list, any number of pages. 173 */ 174 sz = P2ROUNDUP(mfn_count * sizeof (mfn_t), MMU_PAGESIZE); 175 addr = (uintptr_t)vmem_xalloc(heap_arena, sz, MMU_PAGESIZE, 0, 176 0, 0, 0, VM_SLEEP); 177 for (off = 0; off < sz; off += MMU_PAGESIZE) { 178 mach_addr = 179 pa_to_ma(pfn_to_pa(va_to_pfn((caddr_t)mfn_list + off))); 180 kbm_map_ma(mach_addr, addr + off, 0); 181 } 182 boot_mapin((caddr_t)addr, sz); 183 old = (uintptr_t)mfn_list; 184 mfn_list = (mfn_t *)addr; 185 xen_info->mfn_list = (mfn_t)addr; 186 for (off = 0; off < sz; off += MMU_PAGESIZE) 187 kbm_unmap(old + off); 188 189 /* 190 * Create the lists of mfn_list pages needed by suspend/resume. 191 * Note we skip this for domain 0 as it can't suspend/resume. 192 */ 193 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 194 sz2 = P2ROUNDUP(mmu_btop(sz) * sizeof (mfn_t), MMU_PAGESIZE); 195 mfn_list_pages = kmem_zalloc(sz2, VM_SLEEP); 196 mfn_list_pages_page = kmem_zalloc(MMU_PAGESIZE, VM_SLEEP); 197 i = 0; 198 for (off = 0; off < sz; off += MMU_PAGESIZE) { 199 j = mmu_btop(off); 200 if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) { 201 mfn_list_pages_page[i++] = 202 pfn_to_mfn(va_to_pfn(&mfn_list_pages[j])); 203 } 204 mfn_list_pages[j] = 205 pfn_to_mfn(va_to_pfn((caddr_t)mfn_list + off)); 206 } 207 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 208 pfn_to_mfn(va_to_pfn(mfn_list_pages_page)); 209 HYPERVISOR_shared_info->arch.max_pfn = xen_info->nr_pages; 210 } 211 212 /* 213 * Remap the shared info (for I/O) into high memory, too. 214 */ 215 sz = MMU_PAGESIZE; 216 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP); 217 kbm_map_ma(xen_info->shared_info, addr, 0); 218 /* shared info has no PFN so don't do: boot_mapin((caddr_t)addr, sz) */ 219 old = (uintptr_t)HYPERVISOR_shared_info; 220 HYPERVISOR_shared_info = (void *)addr; 221 kbm_unmap(old); 222 223 /* 224 * Remap the console info into high memory, too. 225 */ 226 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 227 sz = MMU_PAGESIZE; 228 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP); 229 kbm_map_ma(pfn_to_pa(xen_info->console.domU.mfn), addr, 0); 230 boot_mapin((caddr_t)addr, sz); 231 old = (uintptr_t)HYPERVISOR_console_page; 232 HYPERVISOR_console_page = (void *)addr; 233 kbm_unmap(old); 234 } else { 235 HYPERVISOR_console_page = NULL; 236 } 237 238 /* 239 * On domUs we need to have the xenbus page (store_mfn) mapped into 240 * the kernel. This is referenced as xb_addr. 241 */ 242 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 243 xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP); 244 kbm_map_ma(mfn_to_ma(xen_info->store_mfn), 245 (uintptr_t)xb_addr, 0); 246 boot_mapin(xb_addr, MMU_PAGESIZE); 247 } 248 } 249 250 /* 251 * Generate the pfn value to use for a foreign mfn. 252 */ 253 pfn_t 254 xen_assign_pfn(mfn_t mfn) 255 { 256 pfn_t pfn; 257 258 #ifdef DEBUG 259 /* 260 * make sure this MFN isn't in our list of MFNs 261 */ 262 on_trap_data_t otd; 263 uint_t on_trap_ready = (t0.t_stk != NULL); 264 265 if (on_trap_ready) { 266 if (on_trap(&otd, OT_DATA_ACCESS) == 0) { 267 pfn = mfn_to_pfn_mapping[mfn]; 268 if (pfn < mfn_count && mfn_list[pfn] == mfn) 269 panic("xen_assign_pfn() mfn belongs to us"); 270 } 271 no_trap(); 272 } 273 #endif /* DEBUG */ 274 275 if (mfn == MFN_INVALID) 276 panic("xen_assign_pfn(MFN_INVALID) not allowed"); 277 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN; 278 if (pfn == mfn) 279 panic("xen_assign_pfn(mfn) PFN_IS_FOREIGN_MFN bit already set"); 280 return (pfn); 281 } 282 283 void 284 xen_release_pfn(pfn_t pfn) 285 { 286 if (pfn == PFN_INVALID) 287 panic("xen_release_pfn(PFN_INVALID) not allowed"); 288 if ((pfn & PFN_IS_FOREIGN_MFN) == 0) 289 panic("mfn high bit not set"); 290 } 291 292 uint_t 293 pfn_is_foreign(pfn_t pfn) 294 { 295 if (pfn == PFN_INVALID) 296 return (0); 297 return ((pfn & PFN_IS_FOREIGN_MFN) != 0); 298 } 299 300 pfn_t 301 pte2pfn(x86pte_t pte, level_t l) 302 { 303 mfn_t mfn = PTE2MFN(pte, l); 304 305 if ((pte & PT_SOFTWARE) >= PT_FOREIGN) 306 return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN); 307 return (mfn_to_pfn(mfn)); 308 } 309 310 mfn_t 311 pfn_to_mfn(pfn_t pfn) 312 { 313 if (pfn == PFN_INVALID) 314 panic("pfn_to_mfn(PFN_INVALID) not allowed"); 315 316 if (pfn & PFN_IS_FOREIGN_MFN) 317 return (pfn & ~PFN_IS_FOREIGN_MFN); 318 319 if (pfn >= mfn_count) 320 panic("pfn_to_mfn(): illegal PFN 0x%lx", pfn); 321 322 return (mfn_list[pfn]); 323 } 324 325 /* 326 * This routine translates an MFN back into the corresponding PFN value. 327 * It has to be careful since the mfn_to_pfn_mapping[] might fault 328 * as that table is sparse. It also has to check for non-faulting, but out of 329 * range that exceed the table. 330 */ 331 pfn_t 332 mfn_to_pfn(mfn_t mfn) 333 { 334 pfn_t pfn; 335 on_trap_data_t otd; 336 uint_t on_trap_ready = (t0.t_stk != NULL); 337 338 /* 339 * Cleared at a suspend or migrate 340 */ 341 if (cached_max_mfn == 0) 342 cached_max_mfn = 343 HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL); 344 345 if (cached_max_mfn < mfn) 346 return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN); 347 348 if (on_trap_ready && on_trap(&otd, OT_DATA_ACCESS)) { 349 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN; 350 } else { 351 pfn = mfn_to_pfn_mapping[mfn]; 352 353 if (pfn == PFN_INVALID || pfn >= mfn_count || 354 pfn_to_mfn(pfn) != mfn) 355 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN; 356 } 357 358 if (on_trap_ready) 359 no_trap(); 360 361 /* 362 * If khat_running is set then we should be checking 363 * in domUs that migration is blocked while using the 364 * mfn_to_pfn_mapping[] table. 365 */ 366 ASSERT(!khat_running || DOMAIN_IS_INITDOMAIN(xen_info) || 367 rw_read_held(&m2p_lock[XM2P_HASH].m2p_rwlock)); 368 369 return (pfn); 370 } 371 372 /* 373 * From a pseudo-physical address, find the corresponding machine address. 374 */ 375 maddr_t 376 pa_to_ma(paddr_t pa) 377 { 378 mfn_t mfn = pfn_to_mfn(mmu_btop(pa)); 379 380 if (mfn == MFN_INVALID) 381 panic("pa_to_ma() got MFN_INVALID"); 382 return (mfn_to_ma(mfn) + (pa & MMU_PAGEOFFSET)); 383 } 384 385 /* 386 * From a machine address, find the corresponding pseudo-physical address. 387 */ 388 paddr_t 389 ma_to_pa(maddr_t ma) 390 { 391 pfn_t pfn = mfn_to_pfn(mmu_btop(ma)); 392 393 if (pfn == PFN_INVALID) 394 panic("ma_to_pa() got PFN_INVALID"); 395 return (pfn_to_pa(pfn) + (ma & MMU_PAGEOFFSET)); 396 } 397 398 /* 399 * When calling reassign_pfn(), the page must be (at least) read locked 400 * to make sure swrand does not try to grab it. 401 */ 402 #ifdef DEBUG 403 #define CHECK_PAGE_LOCK(pfn) { \ 404 page_t *pp = page_numtopp_nolock(pfn); \ 405 if ((pp != NULL) && (!PAGE_LOCKED(pp))) { \ 406 panic("reassign_pfn() called with unlocked page (pfn 0x%lx)", \ 407 pfn); \ 408 } \ 409 } 410 #else /* DEBUG */ 411 #define CHECK_PAGE_LOCK(pfn) 412 #endif /* DEBUG */ 413 414 /* 415 * Reassign a new machine page to back a physical address. 416 */ 417 void 418 reassign_pfn(pfn_t pfn, mfn_t mfn) 419 { 420 int mmu_update_return; 421 mmu_update_t t; 422 extern void update_contig_pfnlist(pfn_t, mfn_t, mfn_t); 423 424 ASSERT(pfn != PFN_INVALID); 425 ASSERT(!pfn_is_foreign(pfn)); 426 427 ASSERT(pfn < mfn_count); 428 update_contig_pfnlist(pfn, mfn_list[pfn], mfn); 429 if (mfn == MFN_INVALID) { 430 CHECK_PAGE_LOCK(pfn); 431 if (kpm_vbase != NULL && xen_kpm_page(pfn, 0) < 0) 432 panic("reassign_pfn(): failed to remove kpm mapping"); 433 mfn_list[pfn] = mfn; 434 return; 435 } 436 437 /* 438 * Verify that previously given away pages are still page locked. 439 */ 440 if (mfn_list[pfn] == MFN_INVALID) { 441 CHECK_PAGE_LOCK(pfn); 442 } 443 mfn_list[pfn] = mfn; 444 445 t.ptr = mfn_to_ma(mfn) | MMU_MACHPHYS_UPDATE; 446 t.val = pfn; 447 448 if (HYPERVISOR_mmu_update(&t, 1, &mmu_update_return, DOMID_SELF)) 449 panic("HYPERVISOR_mmu_update() failed"); 450 ASSERT(mmu_update_return == 1); 451 452 if (kpm_vbase != NULL && xen_kpm_page(pfn, PT_VALID | PT_WRITABLE) < 0) 453 panic("reassign_pfn(): failed to enable kpm mapping"); 454 } 455 456 /* 457 * XXPV code to work around problems with GNTTABOP_map_grant_ref 458 * Hopefully we can remove this when GNTTABOP_map_grant_ref is fixed. 459 */ 460 void 461 xen_fix_foreign(uint64_t va) 462 { 463 uintptr_t v = va; 464 htable_t *ht; 465 uint_t entry; 466 x86pte_t pte; 467 468 /* 469 * Look up the PTE for VA. If it is not marked foreign, 470 * add the appropriate soft bits and reinstall the new PTE. 471 */ 472 ht = htable_getpage(kas.a_hat, v, &entry); 473 if (ht == NULL) { 474 panic("xen_fix_foreign(va=0x%p) htable not found", (void *)v); 475 return; 476 } 477 pte = x86pte_get(ht, entry); 478 if ((pte & PT_SOFTWARE) < PT_FOREIGN) { 479 pte |= PT_FOREIGN; 480 if (HYPERVISOR_update_va_mapping(v, pte, UVMF_NONE) != 0) 481 panic("xen_fix_foreign(va=0x%p) failed, pte=" FMT_PTE, 482 (void *)v, pte); 483 } 484 htable_release(ht); 485 } 486