1f95a0250SRodney W. Grimes /*-
2*8a16b7a1SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause
3*8a16b7a1SPedro F. Giffuni *
4f95a0250SRodney W. Grimes * Copyright (c) 1989, 1992, 1993
5f95a0250SRodney W. Grimes * The Regents of the University of California. All rights reserved.
6f95a0250SRodney W. Grimes *
7f95a0250SRodney W. Grimes * This code is derived from software developed by the Computer Systems
8f95a0250SRodney W. Grimes * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
9f95a0250SRodney W. Grimes * BG 91-66 and contributed to Berkeley.
10f95a0250SRodney W. Grimes *
11f95a0250SRodney W. Grimes * Redistribution and use in source and binary forms, with or without
12f95a0250SRodney W. Grimes * modification, are permitted provided that the following conditions
13f95a0250SRodney W. Grimes * are met:
14f95a0250SRodney W. Grimes * 1. Redistributions of source code must retain the above copyright
15f95a0250SRodney W. Grimes * notice, this list of conditions and the following disclaimer.
16f95a0250SRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright
17f95a0250SRodney W. Grimes * notice, this list of conditions and the following disclaimer in the
18f95a0250SRodney W. Grimes * documentation and/or other materials provided with the distribution.
19fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors
20f95a0250SRodney W. Grimes * may be used to endorse or promote products derived from this software
21f95a0250SRodney W. Grimes * without specific prior written permission.
22f95a0250SRodney W. Grimes *
23f95a0250SRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24f95a0250SRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25f95a0250SRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26f95a0250SRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27f95a0250SRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28f95a0250SRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29f95a0250SRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30f95a0250SRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31f95a0250SRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32f95a0250SRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33f95a0250SRodney W. Grimes * SUCH DAMAGE.
34f95a0250SRodney W. Grimes */
35f95a0250SRodney W. Grimes
36f95a0250SRodney W. Grimes
37f95a0250SRodney W. Grimes /*
38a1de871dSPeter Wemm * AMD64 machine dependent routines for kvm. Hopefully, the forthcoming
39f95a0250SRodney W. Grimes * vm code will one day obsolete this module.
40f95a0250SRodney W. Grimes */
41f95a0250SRodney W. Grimes
42f95a0250SRodney W. Grimes #include <sys/param.h>
437f911abeSJohn Baldwin #include <sys/endian.h>
447f911abeSJohn Baldwin #include <stdint.h>
4551295a4dSJordan K. Hubbard #include <stdlib.h>
46953e4134SEd Schouten #include <string.h>
47f95a0250SRodney W. Grimes #include <unistd.h>
488baaf913SWill Andrews #include <vm/vm.h>
49f95a0250SRodney W. Grimes #include <kvm.h>
50f95a0250SRodney W. Grimes
51f95a0250SRodney W. Grimes #include <limits.h>
52f95a0250SRodney W. Grimes
53f95a0250SRodney W. Grimes #include "kvm_private.h"
547f911abeSJohn Baldwin #include "kvm_amd64.h"
55f95a0250SRodney W. Grimes
56f95a0250SRodney W. Grimes struct vmstate {
577f911abeSJohn Baldwin size_t phnum;
587f911abeSJohn Baldwin GElf_Phdr *phdr;
597f911abeSJohn Baldwin amd64_pml4e_t *PML4;
60f95a0250SRodney W. Grimes };
61f95a0250SRodney W. Grimes
62e55a0cd8SPeter Wemm /*
63e55a0cd8SPeter Wemm * Translate a physical memory address to a file-offset in the crash-dump.
64e55a0cd8SPeter Wemm */
65e55a0cd8SPeter Wemm static size_t
_kvm_pa2off(kvm_t * kd,uint64_t pa,off_t * ofs)66e55a0cd8SPeter Wemm _kvm_pa2off(kvm_t *kd, uint64_t pa, off_t *ofs)
67e55a0cd8SPeter Wemm {
687f911abeSJohn Baldwin struct vmstate *vm = kd->vmst;
697f911abeSJohn Baldwin GElf_Phdr *p;
707f911abeSJohn Baldwin size_t n;
71e55a0cd8SPeter Wemm
72d7dc9f76SHidetoshi Shimokawa if (kd->rawdump) {
73d7dc9f76SHidetoshi Shimokawa *ofs = pa;
747f911abeSJohn Baldwin return (AMD64_PAGE_SIZE - (pa & AMD64_PAGE_MASK));
75d7dc9f76SHidetoshi Shimokawa }
76d7dc9f76SHidetoshi Shimokawa
777f911abeSJohn Baldwin p = vm->phdr;
787f911abeSJohn Baldwin n = vm->phnum;
79e55a0cd8SPeter Wemm while (n && (pa < p->p_paddr || pa >= p->p_paddr + p->p_memsz))
80e55a0cd8SPeter Wemm p++, n--;
81e55a0cd8SPeter Wemm if (n == 0)
82e55a0cd8SPeter Wemm return (0);
83e55a0cd8SPeter Wemm *ofs = (pa - p->p_paddr) + p->p_offset;
847f911abeSJohn Baldwin return (AMD64_PAGE_SIZE - (pa & AMD64_PAGE_MASK));
85e55a0cd8SPeter Wemm }
86e55a0cd8SPeter Wemm
877f911abeSJohn Baldwin static void
_amd64_freevtop(kvm_t * kd)887f911abeSJohn Baldwin _amd64_freevtop(kvm_t *kd)
892f85bf6eSPeter Wemm {
90e55a0cd8SPeter Wemm struct vmstate *vm = kd->vmst;
91e55a0cd8SPeter Wemm
92e55a0cd8SPeter Wemm if (vm->PML4)
93e55a0cd8SPeter Wemm free(vm->PML4);
947f911abeSJohn Baldwin free(vm->phdr);
95e55a0cd8SPeter Wemm free(vm);
96e55a0cd8SPeter Wemm kd->vmst = NULL;
9721d54b07SRodney W. Grimes }
98f95a0250SRodney W. Grimes
997f911abeSJohn Baldwin static int
_amd64_probe(kvm_t * kd)1007f911abeSJohn Baldwin _amd64_probe(kvm_t *kd)
1012f85bf6eSPeter Wemm {
102e9ca6fe4SPeter Wemm
1037f911abeSJohn Baldwin return (_kvm_probe_elf_kernel(kd, ELFCLASS64, EM_X86_64) &&
1047f911abeSJohn Baldwin !_kvm_is_minidump(kd));
1057f911abeSJohn Baldwin }
1067f911abeSJohn Baldwin
1077f911abeSJohn Baldwin static int
_amd64_initvtop(kvm_t * kd)1087f911abeSJohn Baldwin _amd64_initvtop(kvm_t *kd)
1097f911abeSJohn Baldwin {
1107f911abeSJohn Baldwin struct kvm_nlist nl[2];
1117f911abeSJohn Baldwin amd64_physaddr_t pa;
1127f911abeSJohn Baldwin kvaddr_t kernbase;
1137f911abeSJohn Baldwin amd64_pml4e_t *PML4;
114f95a0250SRodney W. Grimes
115e55a0cd8SPeter Wemm kd->vmst = (struct vmstate *)_kvm_malloc(kd, sizeof(*kd->vmst));
116fb0e1892SEnji Cooper if (kd->vmst == NULL) {
11721d54b07SRodney W. Grimes _kvm_err(kd, kd->program, "cannot allocate vm");
118f95a0250SRodney W. Grimes return (-1);
11921d54b07SRodney W. Grimes }
120e55a0cd8SPeter Wemm kd->vmst->PML4 = 0;
121e55a0cd8SPeter Wemm
122d7dc9f76SHidetoshi Shimokawa if (kd->rawdump == 0) {
1237f911abeSJohn Baldwin if (_kvm_read_core_phdrs(kd, &kd->vmst->phnum,
1247f911abeSJohn Baldwin &kd->vmst->phdr) == -1)
125e55a0cd8SPeter Wemm return (-1);
126d7dc9f76SHidetoshi Shimokawa }
127f95a0250SRodney W. Grimes
128c10970ddSUlrich Spörlein nl[0].n_name = "kernbase";
129c10970ddSUlrich Spörlein nl[1].n_name = 0;
130f85f3040SPeter Wemm
1317f911abeSJohn Baldwin if (kvm_nlist2(kd, nl) != 0) {
132f2b29125SPeter Wemm _kvm_err(kd, kd->program, "bad namelist - no kernbase");
133f2b29125SPeter Wemm return (-1);
134f2b29125SPeter Wemm }
135c10970ddSUlrich Spörlein kernbase = nl[0].n_value;
136f85f3040SPeter Wemm
137c10970ddSUlrich Spörlein nl[0].n_name = "KPML4phys";
138c10970ddSUlrich Spörlein nl[1].n_name = 0;
139f95a0250SRodney W. Grimes
1407f911abeSJohn Baldwin if (kvm_nlist2(kd, nl) != 0) {
141f2b29125SPeter Wemm _kvm_err(kd, kd->program, "bad namelist - no KPML4phys");
142f95a0250SRodney W. Grimes return (-1);
143f95a0250SRodney W. Grimes }
1447f911abeSJohn Baldwin if (kvm_read2(kd, (nl[0].n_value - kernbase), &pa, sizeof(pa)) !=
145f85f3040SPeter Wemm sizeof(pa)) {
146f2b29125SPeter Wemm _kvm_err(kd, kd->program, "cannot read KPML4phys");
147f95a0250SRodney W. Grimes return (-1);
148f95a0250SRodney W. Grimes }
1497f911abeSJohn Baldwin pa = le64toh(pa);
1507f911abeSJohn Baldwin PML4 = _kvm_malloc(kd, AMD64_PAGE_SIZE);
151fb0e1892SEnji Cooper if (PML4 == NULL) {
152fb0e1892SEnji Cooper _kvm_err(kd, kd->program, "cannot allocate PML4");
153fb0e1892SEnji Cooper return (-1);
154fb0e1892SEnji Cooper }
1557f911abeSJohn Baldwin if (kvm_read2(kd, pa, PML4, AMD64_PAGE_SIZE) != AMD64_PAGE_SIZE) {
156f2b29125SPeter Wemm _kvm_err(kd, kd->program, "cannot read KPML4phys");
1578b4e5ab9SEnji Cooper free(PML4);
158f95a0250SRodney W. Grimes return (-1);
159f95a0250SRodney W. Grimes }
160e55a0cd8SPeter Wemm kd->vmst->PML4 = PML4;
161f95a0250SRodney W. Grimes return (0);
162f95a0250SRodney W. Grimes }
163f95a0250SRodney W. Grimes
164f95a0250SRodney W. Grimes static int
_amd64_vatop(kvm_t * kd,kvaddr_t va,off_t * pa)1657f911abeSJohn Baldwin _amd64_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
1662f85bf6eSPeter Wemm {
1672f85bf6eSPeter Wemm struct vmstate *vm;
1687f911abeSJohn Baldwin amd64_physaddr_t offset;
1697f911abeSJohn Baldwin amd64_physaddr_t pdpe_pa;
1707f911abeSJohn Baldwin amd64_physaddr_t pde_pa;
1717f911abeSJohn Baldwin amd64_physaddr_t pte_pa;
1727f911abeSJohn Baldwin amd64_pml4e_t pml4e;
1737f911abeSJohn Baldwin amd64_pdpe_t pdpe;
1747f911abeSJohn Baldwin amd64_pde_t pde;
1757f911abeSJohn Baldwin amd64_pte_t pte;
1767f911abeSJohn Baldwin kvaddr_t pml4eindex;
1777f911abeSJohn Baldwin kvaddr_t pdpeindex;
1787f911abeSJohn Baldwin kvaddr_t pdeindex;
1797f911abeSJohn Baldwin kvaddr_t pteindex;
1807f911abeSJohn Baldwin amd64_physaddr_t a;
181e55a0cd8SPeter Wemm off_t ofs;
182e55a0cd8SPeter Wemm size_t s;
1832f85bf6eSPeter Wemm
1842f85bf6eSPeter Wemm vm = kd->vmst;
1857f911abeSJohn Baldwin offset = va & AMD64_PAGE_MASK;
1862f85bf6eSPeter Wemm
1872f85bf6eSPeter Wemm /*
1882f85bf6eSPeter Wemm * If we are initializing (kernel page table descriptor pointer
1892f85bf6eSPeter Wemm * not yet set) then return pa == va to avoid infinite recursion.
1902f85bf6eSPeter Wemm */
191fb0e1892SEnji Cooper if (vm->PML4 == NULL) {
192e55a0cd8SPeter Wemm s = _kvm_pa2off(kd, va, pa);
193e55a0cd8SPeter Wemm if (s == 0) {
194e55a0cd8SPeter Wemm _kvm_err(kd, kd->program,
1957f911abeSJohn Baldwin "_amd64_vatop: bootstrap data not in dump");
196e55a0cd8SPeter Wemm goto invalid;
197e55a0cd8SPeter Wemm } else
1987f911abeSJohn Baldwin return (AMD64_PAGE_SIZE - offset);
1992f85bf6eSPeter Wemm }
2002f85bf6eSPeter Wemm
2017f911abeSJohn Baldwin pml4eindex = (va >> AMD64_PML4SHIFT) & (AMD64_NPML4EPG - 1);
2027f911abeSJohn Baldwin pml4e = le64toh(vm->PML4[pml4eindex]);
2037f911abeSJohn Baldwin if ((pml4e & AMD64_PG_V) == 0) {
2047f911abeSJohn Baldwin _kvm_err(kd, kd->program, "_amd64_vatop: pml4e not valid");
205f2b29125SPeter Wemm goto invalid;
206e55a0cd8SPeter Wemm }
207f2b29125SPeter Wemm
2087f911abeSJohn Baldwin pdpeindex = (va >> AMD64_PDPSHIFT) & (AMD64_NPDPEPG - 1);
2097f911abeSJohn Baldwin pdpe_pa = (pml4e & AMD64_PG_FRAME) + (pdpeindex * sizeof(amd64_pdpe_t));
210f2b29125SPeter Wemm
211e55a0cd8SPeter Wemm s = _kvm_pa2off(kd, pdpe_pa, &ofs);
2127f911abeSJohn Baldwin if (s < sizeof(pdpe)) {
2137f911abeSJohn Baldwin _kvm_err(kd, kd->program, "_amd64_vatop: pdpe_pa not found");
214e55a0cd8SPeter Wemm goto invalid;
215e55a0cd8SPeter Wemm }
2167f911abeSJohn Baldwin if (pread(kd->pmfd, &pdpe, sizeof(pdpe), ofs) != sizeof(pdpe)) {
2177f911abeSJohn Baldwin _kvm_syserr(kd, kd->program, "_amd64_vatop: read pdpe");
218f2b29125SPeter Wemm goto invalid;
219f2b29125SPeter Wemm }
2207f911abeSJohn Baldwin pdpe = le64toh(pdpe);
2217f911abeSJohn Baldwin if ((pdpe & AMD64_PG_V) == 0) {
2227f911abeSJohn Baldwin _kvm_err(kd, kd->program, "_amd64_vatop: pdpe not valid");
223f2b29125SPeter Wemm goto invalid;
224e55a0cd8SPeter Wemm }
225f2b29125SPeter Wemm
2267f911abeSJohn Baldwin if (pdpe & AMD64_PG_PS) {
2274afb0d5aSTor Egge /*
2287f911abeSJohn Baldwin * No next-level page table; pdpe describes one 1GB page.
2294afb0d5aSTor Egge */
230ad3ecc20SJohn Baldwin a = (pdpe & AMD64_PG_1GB_FRAME) + (va & AMD64_PDPMASK);
231e55a0cd8SPeter Wemm s = _kvm_pa2off(kd, a, pa);
232e55a0cd8SPeter Wemm if (s == 0) {
233e55a0cd8SPeter Wemm _kvm_err(kd, kd->program,
2347f911abeSJohn Baldwin "_amd64_vatop: 1GB page address not in dump");
235e55a0cd8SPeter Wemm goto invalid;
236e55a0cd8SPeter Wemm } else
2377f911abeSJohn Baldwin return (AMD64_NBPDP - (va & AMD64_PDPMASK));
2384afb0d5aSTor Egge }
2394afb0d5aSTor Egge
2407f911abeSJohn Baldwin pdeindex = (va >> AMD64_PDRSHIFT) & (AMD64_NPDEPG - 1);
2417f911abeSJohn Baldwin pde_pa = (pdpe & AMD64_PG_FRAME) + (pdeindex * sizeof(amd64_pde_t));
2422f85bf6eSPeter Wemm
2437f911abeSJohn Baldwin s = _kvm_pa2off(kd, pde_pa, &ofs);
2447f911abeSJohn Baldwin if (s < sizeof(pde)) {
2457f911abeSJohn Baldwin _kvm_syserr(kd, kd->program, "_amd64_vatop: pde_pa not found");
246e55a0cd8SPeter Wemm goto invalid;
247e55a0cd8SPeter Wemm }
2487f911abeSJohn Baldwin if (pread(kd->pmfd, &pde, sizeof(pde), ofs) != sizeof(pde)) {
2497f911abeSJohn Baldwin _kvm_syserr(kd, kd->program, "_amd64_vatop: read pde");
2502f85bf6eSPeter Wemm goto invalid;
2512f85bf6eSPeter Wemm }
2527f911abeSJohn Baldwin pde = le64toh(pde);
2537f911abeSJohn Baldwin if ((pde & AMD64_PG_V) == 0) {
2547f911abeSJohn Baldwin _kvm_err(kd, kd->program, "_amd64_vatop: pde not valid");
2552f85bf6eSPeter Wemm goto invalid;
256e55a0cd8SPeter Wemm }
2572f85bf6eSPeter Wemm
2587f911abeSJohn Baldwin if (pde & AMD64_PG_PS) {
2597f911abeSJohn Baldwin /*
2607f911abeSJohn Baldwin * No final-level page table; pde describes one 2MB page.
2617f911abeSJohn Baldwin */
2627f911abeSJohn Baldwin a = (pde & AMD64_PG_PS_FRAME) + (va & AMD64_PDRMASK);
263e55a0cd8SPeter Wemm s = _kvm_pa2off(kd, a, pa);
264e55a0cd8SPeter Wemm if (s == 0) {
2657f911abeSJohn Baldwin _kvm_err(kd, kd->program,
2667f911abeSJohn Baldwin "_amd64_vatop: 2MB page address not in dump");
267e55a0cd8SPeter Wemm goto invalid;
268e55a0cd8SPeter Wemm } else
2697f911abeSJohn Baldwin return (AMD64_NBPDR - (va & AMD64_PDRMASK));
2707f911abeSJohn Baldwin }
2717f911abeSJohn Baldwin
2727f911abeSJohn Baldwin pteindex = (va >> AMD64_PAGE_SHIFT) & (AMD64_NPTEPG - 1);
2737f911abeSJohn Baldwin pte_pa = (pde & AMD64_PG_FRAME) + (pteindex * sizeof(amd64_pte_t));
2747f911abeSJohn Baldwin
2757f911abeSJohn Baldwin s = _kvm_pa2off(kd, pte_pa, &ofs);
2767f911abeSJohn Baldwin if (s < sizeof(pte)) {
2777f911abeSJohn Baldwin _kvm_err(kd, kd->program, "_amd64_vatop: pte_pa not found");
2787f911abeSJohn Baldwin goto invalid;
2797f911abeSJohn Baldwin }
2807f911abeSJohn Baldwin if (pread(kd->pmfd, &pte, sizeof(pte), ofs) != sizeof(pte)) {
2817f911abeSJohn Baldwin _kvm_syserr(kd, kd->program, "_amd64_vatop: read");
2827f911abeSJohn Baldwin goto invalid;
2837f911abeSJohn Baldwin }
2847f911abeSJohn Baldwin if ((pte & AMD64_PG_V) == 0) {
2857f911abeSJohn Baldwin _kvm_err(kd, kd->program, "_amd64_vatop: pte not valid");
2867f911abeSJohn Baldwin goto invalid;
2877f911abeSJohn Baldwin }
2887f911abeSJohn Baldwin
2897f911abeSJohn Baldwin a = (pte & AMD64_PG_FRAME) + offset;
2907f911abeSJohn Baldwin s = _kvm_pa2off(kd, a, pa);
2917f911abeSJohn Baldwin if (s == 0) {
2927f911abeSJohn Baldwin _kvm_err(kd, kd->program, "_amd64_vatop: address not in dump");
2937f911abeSJohn Baldwin goto invalid;
2947f911abeSJohn Baldwin } else
2957f911abeSJohn Baldwin return (AMD64_PAGE_SIZE - offset);
2962f85bf6eSPeter Wemm
2972f85bf6eSPeter Wemm invalid:
2987f911abeSJohn Baldwin _kvm_err(kd, 0, "invalid address (0x%jx)", (uintmax_t)va);
2992f85bf6eSPeter Wemm return (0);
300f95a0250SRodney W. Grimes }
301f95a0250SRodney W. Grimes
3027f911abeSJohn Baldwin static int
_amd64_kvatop(kvm_t * kd,kvaddr_t va,off_t * pa)3037f911abeSJohn Baldwin _amd64_kvatop(kvm_t *kd, kvaddr_t va, off_t *pa)
3042f85bf6eSPeter Wemm {
305e55a0cd8SPeter Wemm
306e55a0cd8SPeter Wemm if (ISALIVE(kd)) {
307e55a0cd8SPeter Wemm _kvm_err(kd, 0, "kvm_kvatop called in live kernel!");
308e55a0cd8SPeter Wemm return (0);
309e55a0cd8SPeter Wemm }
3107f911abeSJohn Baldwin return (_amd64_vatop(kd, va, pa));
311f95a0250SRodney W. Grimes }
3127f911abeSJohn Baldwin
3137f911abeSJohn Baldwin int
_amd64_native(kvm_t * kd __unused)314881b0edbSEnji Cooper _amd64_native(kvm_t *kd __unused)
3157f911abeSJohn Baldwin {
3167f911abeSJohn Baldwin
3177f911abeSJohn Baldwin #ifdef __amd64__
3187f911abeSJohn Baldwin return (1);
3197f911abeSJohn Baldwin #else
3207f911abeSJohn Baldwin return (0);
3217f911abeSJohn Baldwin #endif
3227f911abeSJohn Baldwin }
3237f911abeSJohn Baldwin
324881b0edbSEnji Cooper static struct kvm_arch kvm_amd64 = {
3257f911abeSJohn Baldwin .ka_probe = _amd64_probe,
3267f911abeSJohn Baldwin .ka_initvtop = _amd64_initvtop,
3277f911abeSJohn Baldwin .ka_freevtop = _amd64_freevtop,
3287f911abeSJohn Baldwin .ka_kvatop = _amd64_kvatop,
3297f911abeSJohn Baldwin .ka_native = _amd64_native,
3307f911abeSJohn Baldwin };
3317f911abeSJohn Baldwin
3327f911abeSJohn Baldwin KVM_ARCH(kvm_amd64);
333