1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/mach_mmu.h> 30 #include <sys/machsystm.h> 31 #include <sys/cmn_err.h> 32 #include <sys/promif.h> 33 #include <sys/hypervisor.h> 34 #include <sys/bootconf.h> 35 #include <sys/ontrap.h> 36 #include <sys/rwlock.h> 37 #include <sys/sysmacros.h> 38 #include <vm/seg_kmem.h> 39 #include <vm/kboot_mmu.h> 40 #include <vm/hat_pte.h> 41 #include <vm/hat.h> 42 #include <vm/htable.h> 43 #include <vm/hat_i86.h> 44 45 start_info_t *xen_info; 46 ulong_t mfn_count; 47 mfn_t *mfn_list; 48 mfn_t *mfn_list_pages; /* pages that make a table of mfn's */ 49 /* that make up the pa_to_ma table */ 50 mfn_t *mfn_list_pages_page; /* page of mfn's for mfn_list_pages */ 51 mfn_t cached_max_mfn; 52 uintptr_t xen_virt_start; 53 pfn_t *mfn_to_pfn_mapping; 54 caddr_t xb_addr; /* virtual addr for the store_mfn page */ 55 56 57 /* 58 * Running on the hypervisor, we need to prevent migration while holding 59 * PTE values that we might do PTE2PFN() or pa_to_ma() on, as the 60 * mfn_to_pfn_mapping and mfn_list[] translation tables might change. 61 * 62 * As the suspend process uses the HAT, we need to check we don't already own 63 * the lock as a writer before we try to take it as a reader. 64 */ 65 #define NUM_M2P_LOCKS 128 66 static struct { 67 krwlock_t m2p_rwlock; 68 char m2p_pad[64 - sizeof (krwlock_t)]; /* 64 byte cache line size */ 69 } m2p_lock[NUM_M2P_LOCKS]; 70 71 #define XM2P_HASH ((uintptr_t)curthread->t_tid & (NUM_M2P_LOCKS - 1)) 72 73 void 74 xen_block_migrate(void) 75 { 76 if (!DOMAIN_IS_INITDOMAIN(xen_info) && 77 rw_owner(&m2p_lock[XM2P_HASH].m2p_rwlock) != curthread) 78 rw_enter(&m2p_lock[XM2P_HASH].m2p_rwlock, RW_READER); 79 } 80 81 void 82 xen_allow_migrate(void) 83 { 84 if (!DOMAIN_IS_INITDOMAIN(xen_info) && 85 rw_owner(&m2p_lock[XM2P_HASH].m2p_rwlock) != curthread) 86 rw_exit(&m2p_lock[XM2P_HASH].m2p_rwlock); 87 } 88 89 void 90 xen_start_migrate(void) 91 { 92 int i; 93 94 for (i = 0; i < NUM_M2P_LOCKS; ++i) 95 rw_enter(&m2p_lock[i].m2p_rwlock, RW_WRITER); 96 } 97 98 void 99 xen_end_migrate(void) 100 { 101 int i; 102 103 for (i = 0; i < NUM_M2P_LOCKS; ++i) 104 rw_exit(&m2p_lock[i].m2p_rwlock); 105 } 106 107 /*ARGSUSED*/ 108 void 109 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 110 { 111 mmu_update_t t; 112 maddr_t mtable = pa_to_ma(table); 113 int retcnt; 114 115 t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE; 116 t.val = pteval; 117 if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1) 118 bop_panic("HYPERVISOR_mmu_update() failed"); 119 } 120 121 /* 122 * The start_info_t and mfn_list are initially mapped in low "boot" memory. 123 * Each has a page aligned address and size. We relocate them up into the 124 * kernel's normal address space at this point in time. We also create 125 * the arrays that let the hypervisor suspend/resume a domain. 126 */ 127 void 128 xen_relocate_start_info(void) 129 { 130 maddr_t mach_addr; 131 size_t sz; 132 size_t sz2; 133 offset_t off; 134 uintptr_t addr; 135 uintptr_t old; 136 int i, j; 137 138 /* 139 * In dom0, we have to account for the console_info structure 140 * which might immediately follow the start_info in memory. 141 */ 142 sz = sizeof (start_info_t); 143 if (DOMAIN_IS_INITDOMAIN(xen_info) && 144 xen_info->console.dom0.info_off >= sizeof (start_info_t)) { 145 sz += xen_info->console.dom0.info_off - sizeof (start_info_t) + 146 xen_info->console.dom0.info_size; 147 } 148 sz = P2ROUNDUP(sz, MMU_PAGESIZE); 149 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP); 150 for (off = 0; off < sz; off += MMU_PAGESIZE) { 151 mach_addr = pa_to_ma(pfn_to_pa(va_to_pfn( 152 (caddr_t)xen_info + off))); 153 kbm_map_ma(mach_addr + off, addr + off, 0); 154 } 155 boot_mapin((caddr_t)addr, sz); 156 old = (uintptr_t)xen_info; 157 xen_info = (start_info_t *)addr; 158 for (off = 0; off < sz; off += MMU_PAGESIZE) 159 kbm_unmap(old + off); 160 161 /* 162 * Relocate the mfn_list, any number of pages. 163 */ 164 sz = P2ROUNDUP(mfn_count * sizeof (mfn_t), MMU_PAGESIZE); 165 addr = (uintptr_t)vmem_xalloc(heap_arena, sz, MMU_PAGESIZE, 0, 166 0, 0, 0, VM_SLEEP); 167 for (off = 0; off < sz; off += MMU_PAGESIZE) { 168 mach_addr = 169 pa_to_ma(pfn_to_pa(va_to_pfn((caddr_t)mfn_list + off))); 170 kbm_map_ma(mach_addr, addr + off, 0); 171 } 172 boot_mapin((caddr_t)addr, sz); 173 old = (uintptr_t)mfn_list; 174 mfn_list = (mfn_t *)addr; 175 xen_info->mfn_list = (mfn_t)addr; 176 for (off = 0; off < sz; off += MMU_PAGESIZE) 177 kbm_unmap(old + off); 178 179 /* 180 * Create the lists of mfn_list pages needed by suspend/resume. 181 * Note we skip this for domain 0 as it can't suspend/resume. 182 */ 183 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 184 sz2 = P2ROUNDUP(mmu_btop(sz) * sizeof (mfn_t), MMU_PAGESIZE); 185 mfn_list_pages = kmem_zalloc(sz2, VM_SLEEP); 186 mfn_list_pages_page = kmem_zalloc(MMU_PAGESIZE, VM_SLEEP); 187 i = 0; 188 for (off = 0; off < sz; off += MMU_PAGESIZE) { 189 j = mmu_btop(off); 190 if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) { 191 mfn_list_pages_page[i++] = 192 pfn_to_mfn(va_to_pfn(&mfn_list_pages[j])); 193 } 194 mfn_list_pages[j] = 195 pfn_to_mfn(va_to_pfn((caddr_t)mfn_list + off)); 196 } 197 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 198 pfn_to_mfn(va_to_pfn(mfn_list_pages_page)); 199 HYPERVISOR_shared_info->arch.max_pfn = xen_info->nr_pages; 200 } 201 202 /* 203 * Remap the shared info (for I/O) into high memory, too. 204 */ 205 sz = MMU_PAGESIZE; 206 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP); 207 kbm_map_ma(xen_info->shared_info, addr, 0); 208 /* shared info has no PFN so don't do: boot_mapin((caddr_t)addr, sz) */ 209 old = (uintptr_t)HYPERVISOR_shared_info; 210 HYPERVISOR_shared_info = (void *)addr; 211 kbm_unmap(old); 212 213 /* 214 * Remap the console info into high memory, too. 215 */ 216 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 217 sz = MMU_PAGESIZE; 218 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP); 219 kbm_map_ma(pfn_to_pa(xen_info->console.domU.mfn), addr, 0); 220 boot_mapin((caddr_t)addr, sz); 221 old = (uintptr_t)HYPERVISOR_console_page; 222 HYPERVISOR_console_page = (void *)addr; 223 kbm_unmap(old); 224 } else { 225 HYPERVISOR_console_page = NULL; 226 } 227 228 /* 229 * On domUs we need to have the xenbus page (store_mfn) mapped into 230 * the kernel. This is referenced as xb_addr. 231 */ 232 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 233 xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP); 234 kbm_map_ma(mfn_to_ma(xen_info->store_mfn), 235 (uintptr_t)xb_addr, 0); 236 boot_mapin(xb_addr, MMU_PAGESIZE); 237 } 238 } 239 240 /* 241 * Generate the pfn value to use for a foreign mfn. 242 */ 243 pfn_t 244 xen_assign_pfn(mfn_t mfn) 245 { 246 pfn_t pfn; 247 248 #ifdef DEBUG 249 /* 250 * make sure this MFN isn't in our list of MFNs 251 */ 252 on_trap_data_t otd; 253 uint_t on_trap_ready = (t0.t_stk != NULL); 254 255 if (on_trap_ready) { 256 if (on_trap(&otd, OT_DATA_ACCESS) == 0) { 257 pfn = mfn_to_pfn_mapping[mfn]; 258 if (pfn < mfn_count && mfn_list[pfn] == mfn) 259 panic("xen_assign_pfn() mfn belongs to us"); 260 } 261 no_trap(); 262 } 263 #endif /* DEBUG */ 264 265 if (mfn == MFN_INVALID) 266 panic("xen_assign_pfn(MFN_INVALID) not allowed"); 267 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN; 268 if (pfn == mfn) 269 panic("xen_assign_pfn(mfn) PFN_IS_FOREIGN_MFN bit already set"); 270 return (pfn); 271 } 272 273 void 274 xen_release_pfn(pfn_t pfn) 275 { 276 if (pfn == PFN_INVALID) 277 panic("xen_release_pfn(PFN_INVALID) not allowed"); 278 if ((pfn & PFN_IS_FOREIGN_MFN) == 0) 279 panic("mfn high bit not set"); 280 } 281 282 uint_t 283 pfn_is_foreign(pfn_t pfn) 284 { 285 if (pfn == PFN_INVALID) 286 return (0); 287 return ((pfn & PFN_IS_FOREIGN_MFN) != 0); 288 } 289 290 pfn_t 291 pte2pfn(x86pte_t pte, level_t l) 292 { 293 mfn_t mfn = PTE2MFN(pte, l); 294 295 if ((pte & PT_SOFTWARE) >= PT_FOREIGN) 296 return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN); 297 return (mfn_to_pfn(mfn)); 298 } 299 300 mfn_t 301 pfn_to_mfn(pfn_t pfn) 302 { 303 if (pfn == PFN_INVALID) 304 panic("pfn_to_mfn(PFN_INVALID) not allowed"); 305 306 if (pfn & PFN_IS_FOREIGN_MFN) 307 return (pfn & ~PFN_IS_FOREIGN_MFN); 308 309 if (pfn >= mfn_count) 310 panic("pfn_to_mfn(): illegal PFN 0x%lx", pfn); 311 312 return (mfn_list[pfn]); 313 } 314 315 /* 316 * This routine translates an MFN back into the corresponding PFN value. 317 * It has to be careful since the mfn_to_pfn_mapping[] might fault 318 * as that table is sparse. It also has to check for non-faulting, but out of 319 * range that exceed the table. 320 */ 321 pfn_t 322 mfn_to_pfn(mfn_t mfn) 323 { 324 pfn_t pfn; 325 on_trap_data_t otd; 326 uint_t on_trap_ready = (t0.t_stk != NULL); 327 328 /* 329 * Cleared at a suspend or migrate 330 */ 331 if (cached_max_mfn == 0) 332 cached_max_mfn = 333 HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL); 334 335 if (cached_max_mfn < mfn) 336 return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN); 337 338 if (on_trap_ready && on_trap(&otd, OT_DATA_ACCESS)) { 339 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN; 340 } else { 341 pfn = mfn_to_pfn_mapping[mfn]; 342 343 if (pfn == PFN_INVALID || pfn >= mfn_count || 344 pfn_to_mfn(pfn) != mfn) 345 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN; 346 } 347 348 if (on_trap_ready) 349 no_trap(); 350 351 /* 352 * If khat_running is set then we should be checking 353 * in domUs that migration is blocked while using the 354 * mfn_to_pfn_mapping[] table. 355 */ 356 ASSERT(!khat_running || DOMAIN_IS_INITDOMAIN(xen_info) || 357 rw_read_held(&m2p_lock[XM2P_HASH].m2p_rwlock)); 358 359 return (pfn); 360 } 361 362 /* 363 * From a pseudo-physical address, find the corresponding machine address. 364 */ 365 maddr_t 366 pa_to_ma(paddr_t pa) 367 { 368 mfn_t mfn = pfn_to_mfn(mmu_btop(pa)); 369 370 if (mfn == MFN_INVALID) 371 panic("pa_to_ma() got MFN_INVALID"); 372 return (mfn_to_ma(mfn) + (pa & MMU_PAGEOFFSET)); 373 } 374 375 /* 376 * From a machine address, find the corresponding pseudo-physical address. 377 */ 378 paddr_t 379 ma_to_pa(maddr_t ma) 380 { 381 pfn_t pfn = mfn_to_pfn(mmu_btop(ma)); 382 383 if (pfn == PFN_INVALID) 384 panic("ma_to_pa() got PFN_INVALID"); 385 return (pfn_to_pa(pfn) + (ma & MMU_PAGEOFFSET)); 386 } 387 388 /* 389 * When calling reassign_pfn(), the page must be (at least) read locked 390 * to make sure swrand does not try to grab it. 391 */ 392 #ifdef DEBUG 393 #define CHECK_PAGE_LOCK(pfn) { \ 394 page_t *pp = page_numtopp_nolock(pfn); \ 395 if ((pp != NULL) && (!PAGE_LOCKED(pp))) { \ 396 panic("reassign_pfn() called with unlocked page (pfn 0x%lx)", \ 397 pfn); \ 398 } \ 399 } 400 #else /* DEBUG */ 401 #define CHECK_PAGE_LOCK(pfn) 402 #endif /* DEBUG */ 403 404 /* 405 * Reassign a new machine page to back a physical address. 406 */ 407 void 408 reassign_pfn(pfn_t pfn, mfn_t mfn) 409 { 410 int mmu_update_return; 411 mmu_update_t t; 412 extern void update_contig_pfnlist(pfn_t, mfn_t, mfn_t); 413 414 ASSERT(pfn != PFN_INVALID); 415 ASSERT(!pfn_is_foreign(pfn)); 416 417 ASSERT(pfn < mfn_count); 418 update_contig_pfnlist(pfn, mfn_list[pfn], mfn); 419 if (mfn == MFN_INVALID) { 420 CHECK_PAGE_LOCK(pfn); 421 if (kpm_vbase != NULL && xen_kpm_page(pfn, 0) < 0) 422 panic("reassign_pfn(): failed to remove kpm mapping"); 423 mfn_list[pfn] = mfn; 424 return; 425 } 426 427 /* 428 * Verify that previously given away pages are still page locked. 429 */ 430 if (mfn_list[pfn] == MFN_INVALID) { 431 CHECK_PAGE_LOCK(pfn); 432 } 433 mfn_list[pfn] = mfn; 434 435 t.ptr = mfn_to_ma(mfn) | MMU_MACHPHYS_UPDATE; 436 t.val = pfn; 437 438 if (HYPERVISOR_mmu_update(&t, 1, &mmu_update_return, DOMID_SELF)) 439 panic("HYPERVISOR_mmu_update() failed"); 440 ASSERT(mmu_update_return == 1); 441 442 if (kpm_vbase != NULL && xen_kpm_page(pfn, PT_VALID | PT_WRITABLE) < 0) 443 panic("reassign_pfn(): failed to enable kpm mapping"); 444 } 445 446 /* 447 * XXPV code to work around problems with GNTTABOP_map_grant_ref 448 * Hopefully we can remove this when GNTTABOP_map_grant_ref is fixed. 449 */ 450 void 451 xen_fix_foreign(uint64_t va) 452 { 453 uintptr_t v = va; 454 htable_t *ht; 455 uint_t entry; 456 x86pte_t pte; 457 458 /* 459 * Look up the PTE for VA. If it is not marked foreign, 460 * add the appropriate soft bits and reinstall the new PTE. 461 */ 462 ht = htable_getpage(kas.a_hat, v, &entry); 463 if (ht == NULL) { 464 panic("xen_fix_foreign(va=0x%p) htable not found", (void *)v); 465 return; 466 } 467 pte = x86pte_get(ht, entry); 468 if ((pte & PT_SOFTWARE) < PT_FOREIGN) { 469 pte |= PT_FOREIGN; 470 if (HYPERVISOR_update_va_mapping(v, pte, UVMF_NONE) != 0) 471 panic("xen_fix_foreign(va=0x%p) failed, pte=" FMT_PTE, 472 (void *)v, pte); 473 } 474 htable_release(ht); 475 } 476