/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __xpv #include #endif #include #include #include #include #if 0 /* * Joe's debug printing */ #define DBG(x) \ bop_printf(NULL, "%M%: %s is %" PRIx64 "\n", #x, (uint64_t)(x)); #else #define DBG(x) /* naught */ #endif /* * Page table and memory stuff. */ static caddr_t window; static caddr_t pte_to_window; /* * this are needed by mmu_init() */ int kbm_nx_support = 0; /* NX bit in PTEs is in use */ int kbm_pae_support = 0; /* PAE is 64 bit Page table entries */ int kbm_pge_support = 0; /* PGE is Page table global bit enabled */ int kbm_largepage_support = 0; uint_t kbm_nucleus_size = 0; #define BOOT_SHIFT(l) (shift_amt[l]) #define BOOT_SZ(l) ((size_t)1 << BOOT_SHIFT(l)) #define BOOT_OFFSET(l) (BOOT_SZ(l) - 1) #define BOOT_MASK(l) (~BOOT_OFFSET(l)) /* * Initialize memory management parameters for boot time page table management */ void kbm_init(struct xboot_info *bi) { /* * configure mmu information */ kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size; kbm_largepage_support = bi->bi_use_largepage; kbm_nx_support = bi->bi_use_nx; kbm_pae_support = bi->bi_use_pae; kbm_pge_support = bi->bi_use_pge; window = bi->bi_pt_window; DBG(window); pte_to_window = bi->bi_pte_to_pt_window; DBG(pte_to_window); if (kbm_pae_support) { shift_amt = shift_amt_pae; ptes_per_table = 512; pte_size = 8; lpagesize = TWO_MEG; #ifdef __amd64 top_level = 3; #else top_level = 2; #endif } else { shift_amt = shift_amt_nopae; ptes_per_table = 1024; pte_size = 4; lpagesize = FOUR_MEG; top_level = 1; } #ifdef __xpv xen_info = bi->bi_xen_start_info; mfn_list = (mfn_t *)xen_info->mfn_list; DBG(mfn_list); mfn_count = xen_info->nr_pages; DBG(mfn_count); #endif top_page_table = bi->bi_top_page_table; DBG(top_page_table); } /* * Change the addressible page table window to point at a given page */ /*ARGSUSED*/ void * kbm_remap_window(paddr_t physaddr, int writeable) { x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE; DBG(physaddr); #ifdef __xpv if (!writeable) pt_bits &= ~PT_WRITABLE; if (HYPERVISOR_update_va_mapping((uintptr_t)window, pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0) bop_panic("HYPERVISOR_update_va_mapping() failed"); #else if (kbm_pae_support) *((x86pte_t *)pte_to_window) = physaddr | pt_bits; else *((x86pte32_t *)pte_to_window) = physaddr | pt_bits; mmu_tlbflush_entry(window); #endif DBG(window); return (window); } /* * Add a mapping for the physical page at the given virtual address. */ void kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel) { x86pte_t *ptep; paddr_t pte_physaddr; x86pte_t pteval; if (khat_running) panic("kbm_map() called too late"); pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE; if (level == 1) pteval |= PT_PAGESIZE; if (kbm_pge_support && is_kernel) pteval |= PT_GLOBAL; #ifdef __xpv /* * try update_va_mapping first - fails if page table is missing. */ if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL) == 0) return; #endif /* * Find the pte that will map this address. This creates any * missing intermediate level page tables. */ ptep = find_pte(va, &pte_physaddr, level, 0); if (ptep == NULL) bop_panic("kbm_map: find_pte returned NULL"); #ifdef __xpv if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL)) bop_panic("HYPERVISOR_update_va_mapping() failed"); #else if (kbm_pae_support) *ptep = pteval; else *((x86pte32_t *)ptep) = pteval; mmu_tlbflush_entry((caddr_t)va); #endif } #ifdef __xpv /* * Add a mapping for the machine page at the given virtual address. */ void kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level) { paddr_t pte_physaddr; x86pte_t pteval; pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE; if (level == 1) pteval |= PT_PAGESIZE; /* * try update_va_mapping first - fails if page table is missing. */ if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL) == 0) return; /* * Find the pte that will map this address. This creates any * missing intermediate level page tables */ (void) find_pte(va, &pte_physaddr, level, 0); if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL) != 0) bop_panic("HYPERVISOR_update_va_mapping failed"); } #endif /* __xpv */ /* * Probe the boot time page tables to find the first mapping * including va (or higher) and return non-zero if one is found. * va is updated to the starting address and len to the pagesize. * pp will be set to point to the 1st page_t of the mapped page(s). * * Note that if va is in the middle of a large page, the returned va * will be less than what was asked for. */ int kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot) { uintptr_t probe_va; x86pte_t *ptep; paddr_t pte_physaddr; x86pte_t pte_val; level_t l; if (khat_running) panic("kbm_probe() called too late"); *len = 0; *pfn = PFN_INVALID; *prot = 0; probe_va = *va; restart_new_va: l = top_level; for (;;) { if (IN_VA_HOLE(probe_va)) probe_va = mmu.hole_end; if (IN_HYPERVISOR_VA(probe_va)) #if defined(__amd64) && defined(__xpv) probe_va = HYPERVISOR_VIRT_END; #else return (0); #endif /* * If we don't have a valid PTP/PTE at this level * then we can bump VA by this level's pagesize and try again. * When the probe_va wraps around, we are done. */ ptep = find_pte(probe_va, &pte_physaddr, l, 1); if (ptep == NULL) bop_panic("kbm_probe: find_pte returned NULL"); if (kbm_pae_support) pte_val = *ptep; else pte_val = *((x86pte32_t *)ptep); if (!PTE_ISVALID(pte_val)) { probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l); if (probe_va <= *va) return (0); goto restart_new_va; } /* * If this entry is a pointer to a lower level page table * go down to it. */ if (!PTE_ISPAGE(pte_val, l)) { ASSERT(l > 0); --l; continue; } /* * We found a boot level page table entry */ *len = BOOT_SZ(l); *va = probe_va & ~(*len - 1); *pfn = PTE2PFN(pte_val, l); *prot = PROT_READ | PROT_EXEC; if (PTE_GET(pte_val, PT_WRITABLE)) *prot |= PROT_WRITE; /* * pt_nx is cleared if processor doesn't support NX bit */ if (PTE_GET(pte_val, mmu.pt_nx)) *prot &= ~PROT_EXEC; return (1); } } /* * Destroy a boot loader page table 4K mapping. */ void kbm_unmap(uintptr_t va) { if (khat_running) panic("kbm_unmap() called too late"); else { #ifdef __xpv (void) HYPERVISOR_update_va_mapping(va, 0, UVMF_INVLPG | UVMF_LOCAL); #else x86pte_t *ptep; level_t level = 0; uint_t probe_only = 1; ptep = find_pte(va, NULL, level, probe_only); if (ptep == NULL) return; if (kbm_pae_support) *ptep = 0; else *((x86pte32_t *)ptep) = 0; mmu_tlbflush_entry((caddr_t)va); #endif } } /* * Change a boot loader page table 4K mapping. * Returns the pfn of the old mapping. */ pfn_t kbm_remap(uintptr_t va, pfn_t pfn) { x86pte_t *ptep; level_t level = 0; uint_t probe_only = 1; x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE | PT_NOCONSIST | PT_VALID; x86pte_t old_pte; if (khat_running) panic("kbm_remap() called too late"); ptep = find_pte(va, NULL, level, probe_only); if (ptep == NULL) bop_panic("kbm_remap: find_pte returned NULL"); if (kbm_pae_support) old_pte = *ptep; else old_pte = *((x86pte32_t *)ptep); #ifdef __xpv if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL)) bop_panic("HYPERVISOR_update_va_mapping() failed"); #else if (kbm_pae_support) *((x86pte_t *)ptep) = pte_val; else *((x86pte32_t *)ptep) = pte_val; mmu_tlbflush_entry((caddr_t)va); #endif if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1) return (PFN_INVALID); return (mmu_btop(ma_to_pa(old_pte))); } /* * Change a boot loader page table 4K mapping to read only. */ void kbm_read_only(uintptr_t va, paddr_t pa) { x86pte_t pte_val = pa_to_ma(pa) | PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID; #ifdef __xpv if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL)) bop_panic("HYPERVISOR_update_va_mapping() failed"); #else x86pte_t *ptep; level_t level = 0; ptep = find_pte(va, NULL, level, 0); if (ptep == NULL) bop_panic("kbm_read_only: find_pte returned NULL"); if (kbm_pae_support) *ptep = pte_val; else *((x86pte32_t *)ptep) = pte_val; mmu_tlbflush_entry((caddr_t)va); #endif } /* * interfaces for kernel debugger to access physical memory */ static x86pte_t save_pte; void * kbm_push(paddr_t pa) { static int first_time = 1; if (first_time) { first_time = 0; return (window); } if (kbm_pae_support) save_pte = *((x86pte_t *)pte_to_window); else save_pte = *((x86pte32_t *)pte_to_window); return (kbm_remap_window(pa, 0)); } void kbm_pop(void) { #ifdef __xpv if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte, UVMF_INVLPG | UVMF_LOCAL) < 0) bop_panic("HYPERVISOR_update_va_mapping() failed"); #else if (kbm_pae_support) *((x86pte_t *)pte_to_window) = save_pte; else *((x86pte32_t *)pte_to_window) = save_pte; mmu_tlbflush_entry(window); #endif } x86pte_t get_pteval(paddr_t table, uint_t index) { void *table_ptr = kbm_remap_window(table, 0); if (kbm_pae_support) return (((x86pte_t *)table_ptr)[index]); return (((x86pte32_t *)table_ptr)[index]); } #ifndef __xpv void set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) { void *table_ptr = kbm_remap_window(table, 0); if (kbm_pae_support) ((x86pte_t *)table_ptr)[index] = pteval; else ((x86pte32_t *)table_ptr)[index] = pteval; if (level == top_level && level == 2) reload_cr3(); } #endif paddr_t make_ptable(x86pte_t *pteval, uint_t level) { paddr_t new_table; void *table_ptr; new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE); table_ptr = kbm_remap_window(new_table, 1); bzero(table_ptr, MMU_PAGESIZE); #ifdef __xpv /* Remove write permission to the new page table. */ (void) kbm_remap_window(new_table, 0); #endif if (level == top_level && level == 2) *pteval = pa_to_ma(new_table) | PT_VALID; else *pteval = pa_to_ma(new_table) | PT_VALID | PT_REF | PT_USER | PT_WRITABLE; return (new_table); } x86pte_t * map_pte(paddr_t table, uint_t index) { void *table_ptr = kbm_remap_window(table, 0); return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size)); }