1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright 2018 Joyent, Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/archsystm.h> 32 #include <sys/debug.h> 33 #include <sys/bootconf.h> 34 #include <sys/bootsvcs.h> 35 #include <sys/bootinfo.h> 36 #include <sys/mman.h> 37 #include <sys/cmn_err.h> 38 #include <sys/param.h> 39 #include <sys/machparam.h> 40 #include <sys/machsystm.h> 41 #include <sys/promif.h> 42 #include <sys/kobj.h> 43 #ifdef __xpv 44 #include <sys/hypervisor.h> 45 #endif 46 #include <vm/kboot_mmu.h> 47 #include <vm/hat_pte.h> 48 #include <vm/hat_i86.h> 49 #include <vm/seg_kmem.h> 50 51 #if 0 52 /* 53 * Joe's debug printing 54 */ 55 #define DBG(x) \ 56 bop_printf(NULL, "kboot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x)); 57 #else 58 #define DBG(x) /* naught */ 59 #endif 60 61 /* 62 * Page table and memory stuff. 63 */ 64 static caddr_t window; 65 static caddr_t pte_to_window; 66 67 /* 68 * this are needed by mmu_init() 69 */ 70 int kbm_nx_support = 0; /* NX bit in PTEs is in use */ 71 int kbm_pae_support = 0; /* PAE is 64 bit Page table entries */ 72 int kbm_pge_support = 0; /* PGE is Page table global bit enabled */ 73 int kbm_largepage_support = 0; 74 uint_t kbm_nucleus_size = 0; 75 76 #define BOOT_SHIFT(l) (shift_amt[l]) 77 #define BOOT_SZ(l) ((size_t)1 << BOOT_SHIFT(l)) 78 #define BOOT_OFFSET(l) (BOOT_SZ(l) - 1) 79 #define BOOT_MASK(l) (~BOOT_OFFSET(l)) 80 81 /* 82 * Initialize memory management parameters for boot time page table management 83 */ 84 void 85 kbm_init(struct xboot_info *bi) 86 { 87 /* 88 * configure mmu information 89 */ 90 kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size; 91 kbm_largepage_support = bi->bi_use_largepage; 92 kbm_nx_support = bi->bi_use_nx; 93 kbm_pae_support = bi->bi_use_pae; 94 kbm_pge_support = bi->bi_use_pge; 95 window = bi->bi_pt_window; 96 DBG(window); 97 pte_to_window = bi->bi_pte_to_pt_window; 98 DBG(pte_to_window); 99 if (kbm_pae_support) { 100 shift_amt = shift_amt_pae; 101 ptes_per_table = 512; 102 pte_size = 8; 103 lpagesize = TWO_MEG; 104 top_level = 3; 105 } else { 106 shift_amt = shift_amt_nopae; 107 ptes_per_table = 1024; 108 pte_size = 4; 109 lpagesize = FOUR_MEG; 110 top_level = 1; 111 } 112 113 #ifdef __xpv 114 xen_info = bi->bi_xen_start_info; 115 mfn_list = (mfn_t *)xen_info->mfn_list; 116 DBG(mfn_list); 117 mfn_count = xen_info->nr_pages; 118 DBG(mfn_count); 119 #endif 120 top_page_table = bi->bi_top_page_table; 121 DBG(top_page_table); 122 } 123 124 /* 125 * Change the addressible page table window to point at a given page 126 */ 127 /*ARGSUSED*/ 128 void * 129 kbm_remap_window(paddr_t physaddr, int writeable) 130 { 131 x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE; 132 133 DBG(physaddr); 134 135 #ifdef __xpv 136 if (!writeable) 137 pt_bits &= ~PT_WRITABLE; 138 if (HYPERVISOR_update_va_mapping((uintptr_t)window, 139 pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0) 140 bop_panic("HYPERVISOR_update_va_mapping() failed"); 141 #else 142 if (kbm_pae_support) 143 *((x86pte_t *)pte_to_window) = physaddr | pt_bits; 144 else 145 *((x86pte32_t *)pte_to_window) = physaddr | pt_bits; 146 mmu_invlpg(window); 147 #endif 148 DBG(window); 149 return (window); 150 } 151 152 /* 153 * Add a mapping for the physical page at the given virtual address. 154 */ 155 void 156 kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel) 157 { 158 x86pte_t *ptep; 159 paddr_t pte_physaddr; 160 x86pte_t pteval; 161 162 if (khat_running) 163 panic("kbm_map() called too late"); 164 165 pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE; 166 if (level >= 1) 167 pteval |= PT_PAGESIZE; 168 if (kbm_pge_support && is_kernel) 169 pteval |= PT_GLOBAL; 170 171 #ifdef __xpv 172 /* 173 * try update_va_mapping first - fails if page table is missing. 174 */ 175 if (HYPERVISOR_update_va_mapping(va, pteval, 176 UVMF_INVLPG | UVMF_LOCAL) == 0) 177 return; 178 #endif 179 180 /* 181 * Find the pte that will map this address. This creates any 182 * missing intermediate level page tables. 183 */ 184 ptep = find_pte(va, &pte_physaddr, level, 0); 185 if (ptep == NULL) 186 bop_panic("kbm_map: find_pte returned NULL"); 187 188 #ifdef __xpv 189 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL)) 190 bop_panic("HYPERVISOR_update_va_mapping() failed"); 191 #else 192 if (kbm_pae_support) 193 *ptep = pteval; 194 else 195 *((x86pte32_t *)ptep) = pteval; 196 mmu_invlpg((caddr_t)va); 197 #endif 198 } 199 200 #ifdef __xpv 201 202 /* 203 * Add a mapping for the machine page at the given virtual address. 204 */ 205 void 206 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level) 207 { 208 paddr_t pte_physaddr; 209 x86pte_t pteval; 210 211 pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE; 212 if (level == 1) 213 pteval |= PT_PAGESIZE; 214 215 /* 216 * try update_va_mapping first - fails if page table is missing. 217 */ 218 if (HYPERVISOR_update_va_mapping(va, 219 pteval, UVMF_INVLPG | UVMF_LOCAL) == 0) 220 return; 221 222 /* 223 * Find the pte that will map this address. This creates any 224 * missing intermediate level page tables 225 */ 226 (void) find_pte(va, &pte_physaddr, level, 0); 227 228 if (HYPERVISOR_update_va_mapping(va, 229 pteval, UVMF_INVLPG | UVMF_LOCAL) != 0) 230 bop_panic("HYPERVISOR_update_va_mapping failed"); 231 } 232 233 #endif /* __xpv */ 234 235 236 /* 237 * Probe the boot time page tables to find the first mapping 238 * including va (or higher) and return non-zero if one is found. 239 * va is updated to the starting address and len to the pagesize. 240 * pp will be set to point to the 1st page_t of the mapped page(s). 241 * 242 * Note that if va is in the middle of a large page, the returned va 243 * will be less than what was asked for. 244 */ 245 int 246 kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot) 247 { 248 uintptr_t probe_va; 249 x86pte_t *ptep; 250 paddr_t pte_physaddr; 251 x86pte_t pte_val; 252 level_t l; 253 254 if (khat_running) 255 panic("kbm_probe() called too late"); 256 *len = 0; 257 *pfn = PFN_INVALID; 258 *prot = 0; 259 probe_va = *va; 260 restart_new_va: 261 l = top_level; 262 for (;;) { 263 if (IN_VA_HOLE(probe_va)) 264 probe_va = mmu.hole_end; 265 266 if (IN_HYPERVISOR_VA(probe_va)) 267 #if defined(__xpv) 268 probe_va = HYPERVISOR_VIRT_END; 269 #else 270 return (0); 271 #endif 272 273 /* 274 * If we don't have a valid PTP/PTE at this level 275 * then we can bump VA by this level's pagesize and try again. 276 * When the probe_va wraps around, we are done. 277 */ 278 ptep = find_pte(probe_va, &pte_physaddr, l, 1); 279 if (ptep == NULL) 280 bop_panic("kbm_probe: find_pte returned NULL"); 281 if (kbm_pae_support) 282 pte_val = *ptep; 283 else 284 pte_val = *((x86pte32_t *)ptep); 285 if (!PTE_ISVALID(pte_val)) { 286 probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l); 287 if (probe_va <= *va) 288 return (0); 289 goto restart_new_va; 290 } 291 292 /* 293 * If this entry is a pointer to a lower level page table 294 * go down to it. 295 */ 296 if (!PTE_ISPAGE(pte_val, l)) { 297 ASSERT(l > 0); 298 --l; 299 continue; 300 } 301 302 /* 303 * We found a boot level page table entry 304 */ 305 *len = BOOT_SZ(l); 306 *va = probe_va & ~(*len - 1); 307 *pfn = PTE2PFN(pte_val, l); 308 309 310 *prot = PROT_READ | PROT_EXEC; 311 if (PTE_GET(pte_val, PT_WRITABLE)) 312 *prot |= PROT_WRITE; 313 314 /* 315 * pt_nx is cleared if processor doesn't support NX bit 316 */ 317 if (PTE_GET(pte_val, mmu.pt_nx)) 318 *prot &= ~PROT_EXEC; 319 320 return (1); 321 } 322 } 323 324 325 /* 326 * Destroy a boot loader page table 4K mapping. 327 */ 328 void 329 kbm_unmap(uintptr_t va) 330 { 331 if (khat_running) 332 panic("kbm_unmap() called too late"); 333 else { 334 #ifdef __xpv 335 (void) HYPERVISOR_update_va_mapping(va, 0, 336 UVMF_INVLPG | UVMF_LOCAL); 337 #else 338 x86pte_t *ptep; 339 level_t level = 0; 340 uint_t probe_only = 1; 341 342 ptep = find_pte(va, NULL, level, probe_only); 343 if (ptep == NULL) 344 return; 345 346 if (kbm_pae_support) 347 *ptep = 0; 348 else 349 *((x86pte32_t *)ptep) = 0; 350 mmu_invlpg((caddr_t)va); 351 #endif 352 } 353 } 354 355 356 /* 357 * Change a boot loader page table 4K mapping. 358 * Returns the pfn of the old mapping. 359 */ 360 pfn_t 361 kbm_remap(uintptr_t va, pfn_t pfn) 362 { 363 x86pte_t *ptep; 364 level_t level = 0; 365 uint_t probe_only = 1; 366 x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE | 367 PT_NOCONSIST | PT_VALID; 368 x86pte_t old_pte; 369 370 if (khat_running) 371 panic("kbm_remap() called too late"); 372 ptep = find_pte(va, NULL, level, probe_only); 373 if (ptep == NULL) 374 bop_panic("kbm_remap: find_pte returned NULL"); 375 376 if (kbm_pae_support) 377 old_pte = *ptep; 378 else 379 old_pte = *((x86pte32_t *)ptep); 380 381 #ifdef __xpv 382 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL)) 383 bop_panic("HYPERVISOR_update_va_mapping() failed"); 384 #else 385 if (kbm_pae_support) 386 *((x86pte_t *)ptep) = pte_val; 387 else 388 *((x86pte32_t *)ptep) = pte_val; 389 mmu_invlpg((caddr_t)va); 390 #endif 391 392 if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1) 393 return (PFN_INVALID); 394 return (mmu_btop(ma_to_pa(old_pte))); 395 } 396 397 398 /* 399 * Change a boot loader page table 4K mapping to read only. 400 */ 401 void 402 kbm_read_only(uintptr_t va, paddr_t pa) 403 { 404 x86pte_t pte_val = pa_to_ma(pa) | 405 PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID; 406 407 #ifdef __xpv 408 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL)) 409 bop_panic("HYPERVISOR_update_va_mapping() failed"); 410 #else 411 x86pte_t *ptep; 412 level_t level = 0; 413 414 ptep = find_pte(va, NULL, level, 0); 415 if (ptep == NULL) 416 bop_panic("kbm_read_only: find_pte returned NULL"); 417 418 if (kbm_pae_support) 419 *ptep = pte_val; 420 else 421 *((x86pte32_t *)ptep) = pte_val; 422 mmu_invlpg((caddr_t)va); 423 #endif 424 } 425 426 /* 427 * interfaces for kernel debugger to access physical memory 428 */ 429 static x86pte_t save_pte; 430 431 void * 432 kbm_push(paddr_t pa) 433 { 434 static int first_time = 1; 435 436 if (first_time) { 437 first_time = 0; 438 return (window); 439 } 440 441 if (kbm_pae_support) 442 save_pte = *((x86pte_t *)pte_to_window); 443 else 444 save_pte = *((x86pte32_t *)pte_to_window); 445 return (kbm_remap_window(pa, 0)); 446 } 447 448 void 449 kbm_pop(void) 450 { 451 #ifdef __xpv 452 if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte, 453 UVMF_INVLPG | UVMF_LOCAL) < 0) 454 bop_panic("HYPERVISOR_update_va_mapping() failed"); 455 #else 456 if (kbm_pae_support) 457 *((x86pte_t *)pte_to_window) = save_pte; 458 else 459 *((x86pte32_t *)pte_to_window) = save_pte; 460 mmu_invlpg(window); 461 #endif 462 } 463 464 x86pte_t 465 get_pteval(paddr_t table, uint_t index) 466 { 467 void *table_ptr = kbm_remap_window(table, 0); 468 469 if (kbm_pae_support) 470 return (((x86pte_t *)table_ptr)[index]); 471 return (((x86pte32_t *)table_ptr)[index]); 472 } 473 474 #ifndef __xpv 475 void 476 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 477 { 478 void *table_ptr = kbm_remap_window(table, 0); 479 if (kbm_pae_support) 480 ((x86pte_t *)table_ptr)[index] = pteval; 481 else 482 ((x86pte32_t *)table_ptr)[index] = pteval; 483 if (level == top_level && level == 2) 484 reload_cr3(); 485 } 486 #endif 487 488 paddr_t 489 make_ptable(x86pte_t *pteval, uint_t level) 490 { 491 paddr_t new_table; 492 void *table_ptr; 493 494 new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE); 495 table_ptr = kbm_remap_window(new_table, 1); 496 bzero(table_ptr, MMU_PAGESIZE); 497 #ifdef __xpv 498 /* Remove write permission to the new page table. */ 499 (void) kbm_remap_window(new_table, 0); 500 #endif 501 502 if (level == top_level && level == 2) 503 *pteval = pa_to_ma(new_table) | PT_VALID; 504 else 505 *pteval = pa_to_ma(new_table) | 506 PT_VALID | PT_REF | PT_USER | PT_WRITABLE; 507 508 return (new_table); 509 } 510 511 x86pte_t * 512 map_pte(paddr_t table, uint_t index) 513 { 514 void *table_ptr = kbm_remap_window(table, 0); 515 return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size)); 516 } 517