1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 28 #include <sys/mach_mmu.h> 29 #include <sys/machsystm.h> 30 #include <sys/cmn_err.h> 31 #include <sys/promif.h> 32 #include <sys/hypervisor.h> 33 #include <sys/bootconf.h> 34 #include <sys/ontrap.h> 35 #include <sys/rwlock.h> 36 #include <sys/sysmacros.h> 37 #include <vm/seg_kmem.h> 38 #include <vm/kboot_mmu.h> 39 #include <vm/hat_pte.h> 40 #include <vm/hat.h> 41 #include <vm/htable.h> 42 #include <vm/hat_i86.h> 43 44 start_info_t *xen_info; 45 ulong_t mfn_count; 46 mfn_t *mfn_list; 47 mfn_t *mfn_list_pages; /* pages that make a table of mfn's */ 48 /* that make up the pa_to_ma table */ 49 mfn_t *mfn_list_pages_page; /* page of mfn's for mfn_list_pages */ 50 mfn_t cached_max_mfn; 51 uintptr_t xen_virt_start; 52 pfn_t *mfn_to_pfn_mapping; 53 caddr_t xb_addr; /* virtual addr for the store_mfn page */ 54 55 56 /* 57 * We need to prevent migration or suspension of a domU while it's 58 * manipulating MFN values, as the MFN values will spontaneously 59 * change. The next 4 routines provide a mechanism for that. 60 * The basic idea is to use reader/writer mutex, readers are any thread 61 * that is manipulating MFNs. Only the thread which is going to actually call 62 * HYPERVISOR_suspend() will become a writer. 63 * 64 * Since various places need to manipulate MFNs and also call the HAT, 65 * we track if a thread acquires reader status and allow it to recursively 66 * do so again. This prevents deadlocks if a migration request 67 * is started and waits for some reader, but then the previous reader needs 68 * to call into the HAT. 69 */ 70 #define NUM_M2P_LOCKS 128 71 static struct { 72 krwlock_t m2p_rwlock; 73 char m2p_pad[64 - sizeof (krwlock_t)]; /* 64 byte cache line size */ 74 } m2p_lock[NUM_M2P_LOCKS]; 75 76 #define XM2P_HASH ((uintptr_t)curthread->t_tid & (NUM_M2P_LOCKS - 1)) 77 78 void 79 xen_block_migrate(void) 80 { 81 if (!DOMAIN_IS_INITDOMAIN(xen_info) && 82 ++curthread->t_xpvcntr == 1) 83 rw_enter(&m2p_lock[XM2P_HASH].m2p_rwlock, RW_READER); 84 } 85 86 void 87 xen_allow_migrate(void) 88 { 89 if (!DOMAIN_IS_INITDOMAIN(xen_info) && 90 --curthread->t_xpvcntr == 0) 91 rw_exit(&m2p_lock[XM2P_HASH].m2p_rwlock); 92 } 93 94 void 95 xen_start_migrate(void) 96 { 97 int i; 98 99 ASSERT(curthread->t_xpvcntr == 0); 100 ++curthread->t_xpvcntr; /* this allows calls into HAT */ 101 for (i = 0; i < NUM_M2P_LOCKS; ++i) 102 rw_enter(&m2p_lock[i].m2p_rwlock, RW_WRITER); 103 } 104 105 void 106 xen_end_migrate(void) 107 { 108 int i; 109 110 for (i = 0; i < NUM_M2P_LOCKS; ++i) 111 rw_exit(&m2p_lock[i].m2p_rwlock); 112 ASSERT(curthread->t_xpvcntr == 1); 113 --curthread->t_xpvcntr; 114 } 115 116 /*ARGSUSED*/ 117 void 118 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 119 { 120 mmu_update_t t; 121 maddr_t mtable = pa_to_ma(table); 122 int retcnt; 123 124 t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE; 125 t.val = pteval; 126 if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1) 127 bop_panic("HYPERVISOR_mmu_update() failed"); 128 } 129 130 /* 131 * The start_info_t and mfn_list are initially mapped in low "boot" memory. 132 * Each has a page aligned address and size. We relocate them up into the 133 * kernel's normal address space at this point in time. We also create 134 * the arrays that let the hypervisor suspend/resume a domain. 135 */ 136 void 137 xen_relocate_start_info(void) 138 { 139 maddr_t mach_addr; 140 size_t sz; 141 size_t sz2; 142 offset_t off; 143 uintptr_t addr; 144 uintptr_t old; 145 int i, j; 146 147 /* 148 * In dom0, we have to account for the console_info structure 149 * which might immediately follow the start_info in memory. 150 */ 151 sz = sizeof (start_info_t); 152 if (DOMAIN_IS_INITDOMAIN(xen_info) && 153 xen_info->console.dom0.info_off >= sizeof (start_info_t)) { 154 sz += xen_info->console.dom0.info_off - sizeof (start_info_t) + 155 xen_info->console.dom0.info_size; 156 } 157 sz = P2ROUNDUP(sz, MMU_PAGESIZE); 158 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP); 159 for (off = 0; off < sz; off += MMU_PAGESIZE) { 160 mach_addr = pa_to_ma(pfn_to_pa(va_to_pfn( 161 (caddr_t)xen_info + off))); 162 kbm_map_ma(mach_addr + off, addr + off, 0); 163 } 164 boot_mapin((caddr_t)addr, sz); 165 old = (uintptr_t)xen_info; 166 xen_info = (start_info_t *)addr; 167 for (off = 0; off < sz; off += MMU_PAGESIZE) 168 kbm_unmap(old + off); 169 170 /* 171 * Relocate the mfn_list, any number of pages. 172 */ 173 sz = P2ROUNDUP(mfn_count * sizeof (mfn_t), MMU_PAGESIZE); 174 addr = (uintptr_t)vmem_xalloc(heap_arena, sz, MMU_PAGESIZE, 0, 175 0, 0, 0, VM_SLEEP); 176 for (off = 0; off < sz; off += MMU_PAGESIZE) { 177 mach_addr = 178 pa_to_ma(pfn_to_pa(va_to_pfn((caddr_t)mfn_list + off))); 179 kbm_map_ma(mach_addr, addr + off, 0); 180 } 181 boot_mapin((caddr_t)addr, sz); 182 old = (uintptr_t)mfn_list; 183 mfn_list = (mfn_t *)addr; 184 xen_info->mfn_list = (mfn_t)addr; 185 for (off = 0; off < sz; off += MMU_PAGESIZE) 186 kbm_unmap(old + off); 187 188 /* 189 * Create the lists of mfn_list pages needed by suspend/resume. 190 * Note we skip this for domain 0 as it can't suspend/resume. 191 */ 192 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 193 sz2 = P2ROUNDUP(mmu_btop(sz) * sizeof (mfn_t), MMU_PAGESIZE); 194 mfn_list_pages = kmem_zalloc(sz2, VM_SLEEP); 195 mfn_list_pages_page = kmem_zalloc(MMU_PAGESIZE, VM_SLEEP); 196 i = 0; 197 for (off = 0; off < sz; off += MMU_PAGESIZE) { 198 j = mmu_btop(off); 199 if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) { 200 mfn_list_pages_page[i++] = 201 pfn_to_mfn(va_to_pfn(&mfn_list_pages[j])); 202 } 203 mfn_list_pages[j] = 204 pfn_to_mfn(va_to_pfn((caddr_t)mfn_list + off)); 205 } 206 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 207 pfn_to_mfn(va_to_pfn(mfn_list_pages_page)); 208 HYPERVISOR_shared_info->arch.max_pfn = xen_info->nr_pages; 209 } 210 211 /* 212 * Remap the shared info (for I/O) into high memory, too. 213 */ 214 sz = MMU_PAGESIZE; 215 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP); 216 kbm_map_ma(xen_info->shared_info, addr, 0); 217 /* shared info has no PFN so don't do: boot_mapin((caddr_t)addr, sz) */ 218 old = (uintptr_t)HYPERVISOR_shared_info; 219 HYPERVISOR_shared_info = (void *)addr; 220 kbm_unmap(old); 221 222 /* 223 * Remap the console info into high memory, too. 224 */ 225 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 226 sz = MMU_PAGESIZE; 227 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP); 228 kbm_map_ma(pfn_to_pa(xen_info->console.domU.mfn), addr, 0); 229 boot_mapin((caddr_t)addr, sz); 230 old = (uintptr_t)HYPERVISOR_console_page; 231 HYPERVISOR_console_page = (void *)addr; 232 kbm_unmap(old); 233 } else { 234 HYPERVISOR_console_page = NULL; 235 } 236 237 /* 238 * On domUs we need to have the xenbus page (store_mfn) mapped into 239 * the kernel. This is referenced as xb_addr. 240 */ 241 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 242 xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP); 243 kbm_map_ma(mfn_to_ma(xen_info->store_mfn), 244 (uintptr_t)xb_addr, 0); 245 boot_mapin(xb_addr, MMU_PAGESIZE); 246 } 247 } 248 249 /* 250 * Generate the pfn value to use for a foreign mfn. 251 */ 252 pfn_t 253 xen_assign_pfn(mfn_t mfn) 254 { 255 pfn_t pfn; 256 257 #ifdef DEBUG 258 /* 259 * make sure this MFN isn't in our list of MFNs 260 */ 261 on_trap_data_t otd; 262 uint_t on_trap_ready = (t0.t_stk != NULL); 263 264 if (on_trap_ready) { 265 if (on_trap(&otd, OT_DATA_ACCESS) == 0) { 266 pfn = mfn_to_pfn_mapping[mfn]; 267 if (pfn < mfn_count && mfn_list[pfn] == mfn) 268 panic("xen_assign_pfn() mfn belongs to us"); 269 } 270 no_trap(); 271 } 272 #endif /* DEBUG */ 273 274 if (mfn == MFN_INVALID) 275 panic("xen_assign_pfn(MFN_INVALID) not allowed"); 276 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN; 277 if (pfn == mfn) 278 panic("xen_assign_pfn(mfn) PFN_IS_FOREIGN_MFN bit already set"); 279 return (pfn); 280 } 281 282 void 283 xen_release_pfn(pfn_t pfn) 284 { 285 if (pfn == PFN_INVALID) 286 panic("xen_release_pfn(PFN_INVALID) not allowed"); 287 if ((pfn & PFN_IS_FOREIGN_MFN) == 0) 288 panic("mfn high bit not set"); 289 } 290 291 uint_t 292 pfn_is_foreign(pfn_t pfn) 293 { 294 if (pfn == PFN_INVALID) 295 return (0); 296 return ((pfn & PFN_IS_FOREIGN_MFN) != 0); 297 } 298 299 pfn_t 300 pte2pfn(x86pte_t pte, level_t l) 301 { 302 mfn_t mfn = PTE2MFN(pte, l); 303 304 if ((pte & PT_SOFTWARE) >= PT_FOREIGN) 305 return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN); 306 return (mfn_to_pfn(mfn)); 307 } 308 309 mfn_t 310 pfn_to_mfn(pfn_t pfn) 311 { 312 if (pfn == PFN_INVALID) 313 panic("pfn_to_mfn(PFN_INVALID) not allowed"); 314 315 if (pfn & PFN_IS_FOREIGN_MFN) 316 return (pfn & ~PFN_IS_FOREIGN_MFN); 317 318 if (pfn >= mfn_count) 319 panic("pfn_to_mfn(): illegal PFN 0x%lx", pfn); 320 321 return (mfn_list[pfn]); 322 } 323 324 /* 325 * This routine translates an MFN back into the corresponding PFN value. 326 * It has to be careful since the mfn_to_pfn_mapping[] might fault 327 * as that table is sparse. It also has to check for non-faulting, but out of 328 * range that exceed the table. 329 */ 330 pfn_t 331 mfn_to_pfn(mfn_t mfn) 332 { 333 pfn_t pfn; 334 on_trap_data_t otd; 335 uint_t on_trap_ready = (t0.t_stk != NULL); 336 337 /* 338 * Cleared at a suspend or migrate 339 */ 340 if (cached_max_mfn == 0) 341 cached_max_mfn = 342 HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL); 343 344 if (cached_max_mfn < mfn) 345 return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN); 346 347 if (on_trap_ready && on_trap(&otd, OT_DATA_ACCESS)) { 348 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN; 349 } else { 350 pfn = mfn_to_pfn_mapping[mfn]; 351 352 if (pfn == PFN_INVALID || pfn >= mfn_count || 353 pfn_to_mfn(pfn) != mfn) 354 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN; 355 } 356 357 if (on_trap_ready) 358 no_trap(); 359 360 /* 361 * If khat_running is set then we should be checking 362 * in domUs that migration is blocked while using the 363 * mfn_to_pfn_mapping[] table. 364 */ 365 ASSERT(!khat_running || DOMAIN_IS_INITDOMAIN(xen_info) || 366 rw_read_held(&m2p_lock[XM2P_HASH].m2p_rwlock)); 367 368 return (pfn); 369 } 370 371 /* 372 * From a pseudo-physical address, find the corresponding machine address. 373 */ 374 maddr_t 375 pa_to_ma(paddr_t pa) 376 { 377 mfn_t mfn = pfn_to_mfn(mmu_btop(pa)); 378 379 if (mfn == MFN_INVALID) 380 panic("pa_to_ma() got MFN_INVALID"); 381 return (mfn_to_ma(mfn) + (pa & MMU_PAGEOFFSET)); 382 } 383 384 /* 385 * From a machine address, find the corresponding pseudo-physical address. 386 */ 387 paddr_t 388 ma_to_pa(maddr_t ma) 389 { 390 pfn_t pfn = mfn_to_pfn(mmu_btop(ma)); 391 392 if (pfn == PFN_INVALID) 393 panic("ma_to_pa() got PFN_INVALID"); 394 return (pfn_to_pa(pfn) + (ma & MMU_PAGEOFFSET)); 395 } 396 397 /* 398 * When calling reassign_pfn(), the page must be (at least) read locked 399 * to make sure swrand does not try to grab it. 400 */ 401 #ifdef DEBUG 402 #define CHECK_PAGE_LOCK(pfn) { \ 403 page_t *pp = page_numtopp_nolock(pfn); \ 404 if ((pp != NULL) && (!PAGE_LOCKED(pp))) { \ 405 panic("reassign_pfn() called with unlocked page (pfn 0x%lx)", \ 406 pfn); \ 407 } \ 408 } 409 #else /* DEBUG */ 410 #define CHECK_PAGE_LOCK(pfn) 411 #endif /* DEBUG */ 412 413 /* 414 * Reassign a new machine page to back a physical address. 415 */ 416 void 417 reassign_pfn(pfn_t pfn, mfn_t mfn) 418 { 419 int mmu_update_return; 420 mmu_update_t t; 421 extern void update_contig_pfnlist(pfn_t, mfn_t, mfn_t); 422 423 ASSERT(pfn != PFN_INVALID); 424 ASSERT(!pfn_is_foreign(pfn)); 425 426 ASSERT(pfn < mfn_count); 427 update_contig_pfnlist(pfn, mfn_list[pfn], mfn); 428 if (mfn == MFN_INVALID) { 429 CHECK_PAGE_LOCK(pfn); 430 if (kpm_vbase != NULL && xen_kpm_page(pfn, 0) < 0) 431 panic("reassign_pfn(): failed to remove kpm mapping"); 432 mfn_list[pfn] = mfn; 433 return; 434 } 435 436 /* 437 * Verify that previously given away pages are still page locked. 438 */ 439 if (mfn_list[pfn] == MFN_INVALID) { 440 CHECK_PAGE_LOCK(pfn); 441 } 442 mfn_list[pfn] = mfn; 443 444 t.ptr = mfn_to_ma(mfn) | MMU_MACHPHYS_UPDATE; 445 t.val = pfn; 446 447 if (HYPERVISOR_mmu_update(&t, 1, &mmu_update_return, DOMID_SELF)) 448 panic("HYPERVISOR_mmu_update() failed"); 449 ASSERT(mmu_update_return == 1); 450 451 if (kpm_vbase != NULL && xen_kpm_page(pfn, PT_VALID | PT_WRITABLE) < 0) 452 panic("reassign_pfn(): failed to enable kpm mapping"); 453 } 454 455 /* 456 * XXPV code to work around problems with GNTTABOP_map_grant_ref 457 * Hopefully we can remove this when GNTTABOP_map_grant_ref is fixed. 458 */ 459 void 460 xen_fix_foreign(struct hat *hat, uint64_t va) 461 { 462 uintptr_t v = va; 463 htable_t *ht; 464 uint_t entry; 465 x86pte_t pte; 466 467 /* 468 * Look up the PTE for VA. If it is not marked foreign, 469 * add the appropriate soft bits and reinstall the new PTE. 470 */ 471 ht = htable_getpage(hat, v, &entry); 472 if (ht == NULL) { 473 cmn_err(CE_WARN, "xen_fix_foreign(va=0x%p) htable not found", 474 (void *)v); 475 return; 476 } 477 478 pte = x86pte_get(ht, entry); 479 if ((pte & PT_SOFTWARE) < PT_FOREIGN) { 480 pte |= PT_FOREIGN; 481 if (HYPERVISOR_update_va_mapping(v, pte, UVMF_NONE) != 0) 482 cmn_err(CE_WARN, "xen_fix_foreign(va=0x%p) failed, pte=" 483 FMT_PTE, (void *)v, pte); 484 } 485 htable_release(ht); 486 } 487