1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * KVM backend for hypervisor domain dumps. We don't use libkvm for
28 * such dumps, since they do not have a namelist file or the typical
29 * dump structures we expect to aid bootstrapping. Instead, we
30 * bootstrap based upon a debug_info structure at a known VA, using the
31 * guest's own page tables to resolve to physical addresses, and
32 * construct the namelist in a manner similar to ksyms_snapshot().
33 *
34 * Note that there are two formats understood by this module: the older,
35 * ad hoc format, which we call 'core' within this file, and an
36 * ELF-based format, known as 'elf'.
37 *
38 * We only support the older format generated on Solaris dom0: before we
39 * fixed it, core dump files were broken whenever a PFN didn't map a
40 * real MFN (!).
41 */
42
43 #include <strings.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <stddef.h>
47 #include <stdarg.h>
48 #include <unistd.h>
49 #include <fcntl.h>
50 #include <gelf.h>
51 #include <errno.h>
52
53 #include <sys/mman.h>
54 #include <sys/stat.h>
55 #include <sys/debug_info.h>
56 #include <sys/xen_mmu.h>
57 #include <sys/elf.h>
58 #include <sys/machelf.h>
59 #include <sys/modctl.h>
60 #include <sys/kobj.h>
61 #include <sys/kobj_impl.h>
62 #include <sys/sysmacros.h>
63 #include <sys/privmregs.h>
64 #include <vm/as.h>
65
66 #include <mdb/mdb_io.h>
67 #include <mdb/mdb_kb.h>
68 #include <mdb/mdb_target_impl.h>
69
70 #include <xen/public/xen.h>
71 #include <xen/public/version.h>
72 #include <xen/public/elfnote.h>
73
74 #define XKB_SHDR_NULL 0
75 #define XKB_SHDR_SYMTAB 1
76 #define XKB_SHDR_STRTAB 2
77 #define XKB_SHDR_SHSTRTAB 3
78 #define XKB_SHDR_NUM 4
79
80 #define XKB_WALK_LOCAL 0x1
81 #define XKB_WALK_GLOBAL 0x2
82 #define XKB_WALK_STR 0x4
83 #define XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR)
84
85 #if defined(__i386)
86 #define DEBUG_INFO 0xf4bff000
87 #define DEBUG_INFO_HVM 0xfe7ff000
88 #elif defined(__amd64)
89 #define DEBUG_INFO 0xfffffffffb7ff000
90 #define DEBUG_INFO_HVM 0xfffffffffb7ff000
91 #endif
92
93 #define PAGE_SIZE 0x1000
94 #define PAGE_SHIFT 12
95 #define PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1))
96 #define PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1))
97 #define PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0)
98 #define PT_PADDR_LGPG 0x000fffffffffe000ull
99 #define PT_PADDR 0x000ffffffffff000ull
100 #define PT_VALID 0x1
101 #define PT_PAGESIZE 0x080
102 #define PTE_IS_LGPG(p, l) ((l) > 0 && ((p) & PT_PAGESIZE))
103
104 #define XC_CORE_MAGIC 0xF00FEBED
105 #define XC_CORE_MAGIC_HVM 0xF00FEBEE
106
107 #define VGCF_HVM_GUEST (1<<1)
108
109 typedef struct xc_core_header {
110 unsigned int xch_magic;
111 unsigned int xch_nr_vcpus;
112 unsigned int xch_nr_pages;
113 unsigned int xch_ctxt_offset;
114 unsigned int xch_index_offset;
115 unsigned int xch_pages_offset;
116 } xc_core_header_t;
117
118 struct xc_elf_header {
119 uint64_t xeh_magic;
120 uint64_t xeh_nr_vcpus;
121 uint64_t xeh_nr_pages;
122 uint64_t xeh_page_size;
123 };
124
125 struct xc_elf_version {
126 uint64_t xev_major;
127 uint64_t xev_minor;
128 xen_extraversion_t xev_extra;
129 xen_compile_info_t xev_compile_info;
130 xen_capabilities_info_t xev_capabilities;
131 xen_changeset_info_t xev_changeset;
132 xen_platform_parameters_t xev_platform_parameters;
133 uint64_t xev_pagesize;
134 };
135
136 /*
137 * Either an old-style (3.0.4) core format, or the ELF format.
138 */
139 typedef enum {
140 XKB_FORMAT_UNKNOWN = 0,
141 XKB_FORMAT_CORE = 1,
142 XKB_FORMAT_ELF = 2
143 } xkb_type_t;
144
145 typedef struct mfn_map {
146 mfn_t mm_mfn;
147 char *mm_map;
148 } mfn_map_t;
149
150 typedef struct mmu_info {
151 size_t mi_max;
152 size_t mi_shift[4];
153 size_t mi_ptes;
154 size_t mi_ptesize;
155 } mmu_info_t;
156
157 typedef struct xkb_core {
158 xc_core_header_t xc_hdr;
159 void *xc_p2m_buf;
160 } xkb_core_t;
161
162 typedef struct xkb_elf {
163 mdb_gelf_file_t *xe_gelf;
164 size_t *xe_off;
165 struct xc_elf_header xe_hdr;
166 struct xc_elf_version xe_version;
167 } xkb_elf_t;
168
169 typedef struct xkb {
170 char *xkb_path;
171 int xkb_fd;
172 int xkb_is_hvm;
173
174 xkb_type_t xkb_type;
175 xkb_core_t xkb_core;
176 xkb_elf_t xkb_elf;
177
178 size_t xkb_nr_vcpus;
179 size_t xkb_nr_pages;
180 size_t xkb_pages_off;
181 xen_pfn_t xkb_max_pfn;
182 mfn_t xkb_max_mfn;
183 int xkb_is_pae;
184
185 mmu_info_t xkb_mmu;
186 debug_info_t xkb_info;
187
188 void *xkb_vcpu_data;
189 size_t xkb_vcpu_data_sz;
190 struct vcpu_guest_context **xkb_vcpus;
191
192 char *xkb_pages;
193 mfn_t *xkb_p2m;
194 xen_pfn_t *xkb_m2p;
195 mfn_map_t xkb_pt_map[4];
196 mfn_map_t xkb_map;
197
198 char *xkb_namelist;
199 size_t xkb_namesize;
200 } xkb_t;
201
202 static const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0";
203
204 typedef struct xkb_namelist {
205 Ehdr kh_elf_hdr;
206 Phdr kh_text_phdr;
207 Phdr kh_data_phdr;
208 Shdr kh_shdr[XKB_SHDR_NUM];
209 char shstrings[sizeof (xkb_shstrtab)];
210 } xkb_namelist_t;
211
212 static int xkb_build_ksyms(xkb_t *);
213 static offset_t xkb_mfn_to_offset(xkb_t *, mfn_t);
214 static mfn_t xkb_va_to_mfn(xkb_t *, uintptr_t, mfn_t);
215 static ssize_t xkb_read(xkb_t *, uintptr_t, void *, size_t);
216 static int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *);
217 static char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *);
218 static int xkb_close(xkb_t *);
219
220 /*
221 * Jump through the hoops we need to to correctly identify a core file
222 * of either the old or new format.
223 */
224 int
xkb_identify(const char * file,int * longmode)225 xkb_identify(const char *file, int *longmode)
226 {
227 xc_core_header_t header;
228 mdb_gelf_file_t *gf = NULL;
229 mdb_gelf_sect_t *sect = NULL;
230 mdb_io_t *io = NULL;
231 char *notes = NULL;
232 char *pos;
233 int ret = 0;
234 size_t sz;
235 int fd;
236
237 if ((fd = open64(file, O_RDONLY)) == -1)
238 return (-1);
239
240 if (pread64(fd, &header, sizeof (header), 0) != sizeof (header)) {
241 (void) close(fd);
242 return (0);
243 }
244
245 (void) close(fd);
246
247 if (header.xch_magic == XC_CORE_MAGIC) {
248 *longmode = 0;
249
250 /*
251 * Indeed.
252 */
253 sz = header.xch_index_offset - header.xch_ctxt_offset;
254 #ifdef _LP64
255 if (sizeof (struct vcpu_guest_context) *
256 header.xch_nr_vcpus == sz)
257 *longmode = 1;
258 #else
259 if (sizeof (struct vcpu_guest_context) *
260 header.xch_nr_vcpus != sz)
261 *longmode = 1;
262 #endif /* _LP64 */
263
264 return (1);
265 }
266
267 if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL)
268 return (-1);
269
270 if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL)
271 goto out;
272
273 if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL)
274 goto out;
275
276 if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL)
277 goto out;
278
279 for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
280 struct xc_elf_version *vers;
281 /* LINTED - alignment */
282 Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
283 char *desc;
284 char *name;
285
286 name = pos + sizeof (*nhdr);
287 desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
288
289 pos = desc + nhdr->n_descsz;
290
291 if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION)
292 continue;
293
294 /*
295 * The contents of this struct differ between 32 and 64
296 * bit; however, not until past the 'xev_capabilities'
297 * member, so we can just about get away with this.
298 */
299
300 /* LINTED - alignment */
301 vers = (struct xc_elf_version *)desc;
302
303 if (strstr(vers->xev_capabilities, "x86_64")) {
304 /*
305 * 64-bit hypervisor, but it can still be
306 * a 32-bit domain core. 32-bit domain cores
307 * are also dumped in Elf64 format, but they
308 * have e_machine set to EM_386, not EM_AMD64.
309 */
310 if (gf->gf_ehdr.e_machine == EM_386)
311 *longmode = 0;
312 else
313 *longmode = 1;
314 } else if (strstr(vers->xev_capabilities, "x86_32") ||
315 strstr(vers->xev_capabilities, "x86_32p")) {
316 /*
317 * 32-bit hypervisor, can only be a 32-bit core.
318 */
319 *longmode = 0;
320 } else {
321 mdb_warn("couldn't derive word size of dump; "
322 "assuming 64-bit");
323 *longmode = 1;
324 }
325 }
326
327 ret = 1;
328
329 out:
330 if (gf != NULL)
331 mdb_gelf_destroy(gf);
332 else if (io != NULL)
333 mdb_io_destroy(io);
334 return (ret);
335 }
336
337 static void *
xkb_fail(xkb_t * xkb,const char * msg,...)338 xkb_fail(xkb_t *xkb, const char *msg, ...)
339 {
340 va_list args;
341
342 va_start(args, msg);
343 if (xkb != NULL)
344 (void) fprintf(stderr, "%s: ", xkb->xkb_path);
345 (void) vfprintf(stderr, msg, args);
346 (void) fprintf(stderr, "\n");
347 va_end(args);
348 if (xkb != NULL)
349 (void) xkb_close(xkb);
350
351 errno = ENOEXEC;
352
353 return (NULL);
354 }
355
356 static int
xkb_build_m2p(xkb_t * xkb)357 xkb_build_m2p(xkb_t *xkb)
358 {
359 size_t i;
360
361 for (i = 0; i <= xkb->xkb_max_pfn; i++) {
362 if (xkb->xkb_p2m[i] != MFN_INVALID &&
363 xkb->xkb_p2m[i] > xkb->xkb_max_mfn)
364 xkb->xkb_max_mfn = xkb->xkb_p2m[i];
365 }
366
367 xkb->xkb_m2p = mdb_alloc((xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t),
368 UM_SLEEP);
369
370 for (i = 0; i <= xkb->xkb_max_mfn; i++)
371 xkb->xkb_m2p[i] = PFN_INVALID;
372
373 for (i = 0; i <= xkb->xkb_max_pfn; i++) {
374 if (xkb->xkb_p2m[i] != MFN_INVALID)
375 xkb->xkb_m2p[xkb->xkb_p2m[i]] = i;
376 }
377
378 return (1);
379 }
380
381 /*
382 * With FORMAT_CORE, we can use the table in the dump file directly.
383 * Just to make things fun, they've not page-aligned the p2m table.
384 */
385 static int
xkb_map_p2m(xkb_t * xkb)386 xkb_map_p2m(xkb_t *xkb)
387 {
388 offset_t off;
389 size_t size;
390 xkb_core_t *xc = &xkb->xkb_core;
391 size_t count = xkb->xkb_nr_pages;
392 size_t boff = xc->xc_hdr.xch_index_offset;
393
394 size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2);
395 size = PAGE_MASK(size);
396 off = PAGE_MASK(boff);
397
398 /* LINTED - alignment */
399 xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ,
400 MAP_SHARED, xkb->xkb_fd, off);
401
402 if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) {
403 (void) xkb_fail(xkb, "cannot map p2m table");
404 return (0);
405 }
406
407 /* LINTED - alignment */
408 xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf +
409 PAGE_OFFSET(boff));
410
411 return (1);
412 }
413
414 /*
415 * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert
416 * into a linear array indexed by pfn for convenience. We also need to
417 * track the mapping between mfn and the offset in the file: a pfn with
418 * no mfn will not appear in the core file.
419 */
420 static int
xkb_build_p2m(xkb_t * xkb)421 xkb_build_p2m(xkb_t *xkb)
422 {
423 xkb_elf_t *xe = &xkb->xkb_elf;
424 mdb_gelf_sect_t *sect;
425 size_t size;
426 size_t i;
427
428 struct elf_p2m {
429 uint64_t pfn;
430 uint64_t gmfn;
431 } *p2m;
432
433 sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m");
434
435 if (sect == NULL) {
436 (void) xkb_fail(xkb, "cannot find section .xen_p2m");
437 return (0);
438 }
439
440 if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
441 (void) xkb_fail(xkb, "couldn't read .xen_p2m");
442 return (0);
443 }
444
445 for (i = 0; i < xkb->xkb_nr_pages; i++) {
446 if (p2m[i].pfn > xkb->xkb_max_pfn)
447 xkb->xkb_max_pfn = p2m[i].pfn;
448 }
449
450 size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
451 xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
452 size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
453 xe->xe_off = mdb_alloc(size, UM_SLEEP);
454
455 for (i = 0; i <= xkb->xkb_max_pfn; i++) {
456 xkb->xkb_p2m[i] = PFN_INVALID;
457 xe->xe_off[i] = (size_t)-1;
458 }
459
460 for (i = 0; i < xkb->xkb_nr_pages; i++) {
461 xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn;
462 xe->xe_off[p2m[i].pfn] = i;
463 }
464
465 return (1);
466 }
467
468 /*
469 * For HVM images, we don't have the corresponding MFN list; the table
470 * is just a mapping from page index in the dump to the corresponding
471 * PFN. To simplify the other code, we'll pretend that these PFNs are
472 * really MFNs as well, by populating xkb_p2m.
473 */
474 static int
xkb_build_fake_p2m(xkb_t * xkb)475 xkb_build_fake_p2m(xkb_t *xkb)
476 {
477 xkb_elf_t *xe = &xkb->xkb_elf;
478 mdb_gelf_sect_t *sect;
479 size_t size;
480 size_t i;
481
482 uint64_t *p2pfn;
483
484 sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pfn");
485
486 if (sect == NULL) {
487 (void) xkb_fail(xkb, "cannot find section .xen_pfn");
488 return (0);
489 }
490
491 if ((p2pfn = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
492 (void) xkb_fail(xkb, "couldn't read .xen_pfn");
493 return (0);
494 }
495
496 for (i = 0; i < xkb->xkb_nr_pages; i++) {
497 if (p2pfn[i] != PFN_INVALID && p2pfn[i] > xkb->xkb_max_pfn)
498 xkb->xkb_max_pfn = p2pfn[i];
499 }
500
501 size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
502 xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
503
504 size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
505 xe->xe_off = mdb_alloc(size, UM_SLEEP);
506
507 for (i = 0; i <= xkb->xkb_max_pfn; i++) {
508 xkb->xkb_p2m[i] = PFN_INVALID;
509 xe->xe_off[i] = (size_t)-1;
510 }
511
512 for (i = 0; i < xkb->xkb_nr_pages; i++) {
513 if (p2pfn[i] == PFN_INVALID)
514 continue;
515 xkb->xkb_p2m[p2pfn[i]] = p2pfn[i];
516 xe->xe_off[p2pfn[i]] = i;
517 }
518
519 return (1);
520 }
521
522 /*
523 * Return the MFN of the top-level page table for the given as.
524 */
525 static mfn_t
xkb_as_to_mfn(xkb_t * xkb,struct as * as)526 xkb_as_to_mfn(xkb_t *xkb, struct as *as)
527 {
528 uintptr_t asp = (uintptr_t)as;
529 uintptr_t hatp;
530 uintptr_t htablep;
531 uintptr_t pfn;
532
533 if (!xkb_read_word(xkb, asp + offsetof(struct as, a_hat), &hatp))
534 return (MFN_INVALID);
535 if (!xkb_read_word(xkb, hatp + xkb->xkb_info.di_hat_htable_off,
536 &htablep))
537 return (MFN_INVALID);
538 if (!xkb_read_word(xkb, htablep + xkb->xkb_info.di_ht_pfn_off,
539 &pfn))
540 return (MFN_INVALID);
541
542 if (pfn > xkb->xkb_max_pfn)
543 return (MFN_INVALID);
544
545 return (xkb->xkb_p2m[pfn]);
546 }
547
548 static mfn_t
xkb_cr3_to_pfn(xkb_t * xkb)549 xkb_cr3_to_pfn(xkb_t *xkb)
550 {
551 uint64_t cr3 = xkb->xkb_vcpus[0]->ctrlreg[3];
552 if (xkb->xkb_is_hvm)
553 return (cr3 >> PAGE_SHIFT);
554 return (xen_cr3_to_pfn(cr3));
555 }
556
557 static ssize_t
xkb_read_helper(xkb_t * xkb,struct as * as,int phys,uint64_t addr,void * buf,size_t size)558 xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr,
559 void *buf, size_t size)
560 {
561 size_t left = size;
562 int windowed = (xkb->xkb_pages == NULL);
563 mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
564
565 if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
566 return (-1);
567
568 while (left) {
569 uint64_t pos = addr + (size - left);
570 char *outpos = (char *)buf + (size - left);
571 size_t pageoff = PAGE_OFFSET(pos);
572 size_t sz = MIN(left, PAGE_SIZE - pageoff);
573 mfn_t mfn;
574
575 if (!phys) {
576 mfn = xkb_va_to_mfn(xkb, pos, tlmfn);
577 if (mfn == MFN_INVALID)
578 return (-1);
579 } else {
580 xen_pfn_t pfn = pos >> PAGE_SHIFT;
581 if (pfn > xkb->xkb_max_pfn)
582 return (-1);
583 mfn = xkb->xkb_p2m[pfn];
584 if (mfn == MFN_INVALID)
585 return (-1);
586 }
587
588 /*
589 * If we're windowed then pread() is much faster.
590 */
591 if (windowed) {
592 offset_t off = xkb_mfn_to_offset(xkb, mfn);
593 int ret;
594
595 if (off == ~1ULL)
596 return (-1);
597
598 off += pageoff;
599
600 ret = pread64(xkb->xkb_fd, outpos, sz, off);
601 if (ret == -1)
602 return (-1);
603 if (ret != sz)
604 return ((size - left) + ret);
605
606 left -= ret;
607 } else {
608 if (xkb_map_mfn(xkb, mfn, &xkb->xkb_map) == NULL)
609 return (-1);
610
611 bcopy(xkb->xkb_map.mm_map + pageoff, outpos, sz);
612
613 left -= sz;
614 }
615 }
616
617 return (size);
618 }
619
620 static ssize_t
xkb_pread(xkb_t * xkb,uint64_t addr,void * buf,size_t size)621 xkb_pread(xkb_t *xkb, uint64_t addr, void *buf, size_t size)
622 {
623 return (xkb_read_helper(xkb, NULL, 1, addr, buf, size));
624 }
625
626 static ssize_t
xkb_aread(xkb_t * xkb,uintptr_t addr,void * buf,size_t size,struct as * as)627 xkb_aread(xkb_t *xkb, uintptr_t addr, void *buf, size_t size, struct as *as)
628 {
629 return (xkb_read_helper(xkb, as, 0, addr, buf, size));
630 }
631
632 static ssize_t
xkb_read(xkb_t * xkb,uintptr_t addr,void * buf,size_t size)633 xkb_read(xkb_t *xkb, uintptr_t addr, void *buf, size_t size)
634 {
635 return (xkb_aread(xkb, addr, buf, size, NULL));
636 }
637
638 static int
xkb_read_word(xkb_t * xkb,uintptr_t addr,uintptr_t * buf)639 xkb_read_word(xkb_t *xkb, uintptr_t addr, uintptr_t *buf)
640 {
641 if (xkb_read(xkb, addr, buf, sizeof (uintptr_t)) !=
642 sizeof (uintptr_t))
643 return (0);
644 return (1);
645 }
646
647 static char *
xkb_readstr(xkb_t * xkb,uintptr_t addr)648 xkb_readstr(xkb_t *xkb, uintptr_t addr)
649 {
650 char *str = mdb_alloc(1024, UM_SLEEP);
651 size_t i;
652
653 for (i = 0; i < 1024; i++) {
654 if (xkb_read(xkb, addr + i, &str[i], 1) != 1) {
655 mdb_free(str, 1024);
656 return (NULL);
657 }
658
659 if (str[i] == '\0')
660 break;
661 }
662
663 if (i == 1024) {
664 mdb_free(str, 1024);
665 return (NULL);
666 }
667
668 return (str);
669 }
670
671 static offset_t
xkb_pfn_to_off(xkb_t * xkb,xen_pfn_t pfn)672 xkb_pfn_to_off(xkb_t *xkb, xen_pfn_t pfn)
673 {
674 if (pfn == PFN_INVALID || pfn > xkb->xkb_max_pfn)
675 return (-1ULL);
676
677 if (xkb->xkb_type == XKB_FORMAT_CORE)
678 return (PAGE_SIZE * pfn);
679
680 return (PAGE_SIZE * (xkb->xkb_elf.xe_off[pfn]));
681 }
682
683 static offset_t
xkb_mfn_to_offset(xkb_t * xkb,mfn_t mfn)684 xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn)
685 {
686 xen_pfn_t pfn;
687
688 if (mfn > xkb->xkb_max_mfn)
689 return (-1ULL);
690
691 pfn = xkb->xkb_m2p[mfn];
692
693 if (pfn == PFN_INVALID)
694 return (-1ULL);
695
696 return (xkb->xkb_pages_off + xkb_pfn_to_off(xkb, pfn));
697 }
698
699 static char *
xkb_map_mfn(xkb_t * xkb,mfn_t mfn,mfn_map_t * mm)700 xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm)
701 {
702 int windowed = (xkb->xkb_pages == NULL);
703 offset_t off;
704
705 if (mm->mm_mfn == mfn)
706 return (mm->mm_map);
707
708 mm->mm_mfn = mfn;
709
710 if (windowed) {
711 if (mm->mm_map != (char *)MAP_FAILED) {
712 (void) munmap(mm->mm_map, PAGE_SIZE);
713 mm->mm_map = (void *)MAP_FAILED;
714 }
715
716 if ((off = xkb_mfn_to_offset(xkb, mfn)) == (-1ULL))
717 return (NULL);
718
719 mm->mm_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED,
720 xkb->xkb_fd, off);
721
722 if (mm->mm_map == (char *)MAP_FAILED)
723 return (NULL);
724 } else {
725 xen_pfn_t pfn;
726
727 mm->mm_map = NULL;
728
729 if (mfn > xkb->xkb_max_mfn)
730 return (NULL);
731
732 pfn = xkb->xkb_m2p[mfn];
733
734 if (pfn == PFN_INVALID)
735 return (NULL);
736
737 mm->mm_map = xkb->xkb_pages + xkb_pfn_to_off(xkb, pfn);
738 }
739
740 return (mm->mm_map);
741 }
742
743 static uint64_t
xkb_get_pte(mmu_info_t * mmu,char * ptep)744 xkb_get_pte(mmu_info_t *mmu, char *ptep)
745 {
746 uint64_t pte = 0;
747
748 if (mmu->mi_ptesize == 8) {
749 /* LINTED - alignment */
750 pte = *((uint64_t *)ptep);
751 } else {
752 /* LINTED - alignment */
753 pte = *((uint32_t *)ptep);
754 }
755
756 return (pte);
757 }
758
759 static mfn_t
xkb_pte_to_base_mfn(uint64_t pte,size_t level)760 xkb_pte_to_base_mfn(uint64_t pte, size_t level)
761 {
762 if (PTE_IS_LGPG(pte, level)) {
763 pte &= PT_PADDR_LGPG;
764 } else {
765 pte &= PT_PADDR;
766 }
767
768 return (pte >> PAGE_SHIFT);
769 }
770
771 /*
772 * Resolve the given VA into an MFN, using the provided mfn as a top-level page
773 * table.
774 */
775 static mfn_t
xkb_va_to_mfn(xkb_t * xkb,uintptr_t va,mfn_t mfn)776 xkb_va_to_mfn(xkb_t *xkb, uintptr_t va, mfn_t mfn)
777 {
778 mmu_info_t *mmu = &xkb->xkb_mmu;
779 uint64_t pte;
780 size_t level;
781
782 for (level = mmu->mi_max; ; --level) {
783 size_t entry;
784
785 if (xkb_map_mfn(xkb, mfn, &xkb->xkb_pt_map[level]) == NULL)
786 return (MFN_INVALID);
787
788 entry = (va >> mmu->mi_shift[level]) & (mmu->mi_ptes - 1);
789
790 pte = xkb_get_pte(mmu, (char *)xkb->xkb_pt_map[level].mm_map +
791 entry * mmu->mi_ptesize);
792
793 if ((mfn = xkb_pte_to_base_mfn(pte, level)) == MFN_INVALID)
794 return (MFN_INVALID);
795
796 if (level == 0)
797 break;
798
799 /*
800 * Currently 'mfn' refers to the base MFN of the
801 * large-page mapping. Add on the 4K-sized index into
802 * the large-page mapping to get the right MFN within
803 * the mapping.
804 */
805 if (PTE_IS_LGPG(pte, level)) {
806 mfn += (va & ((1 << mmu->mi_shift[level]) - 1)) >>
807 PAGE_SHIFT;
808 break;
809 }
810 }
811
812 return (mfn);
813 }
814
815 static int
xkb_read_module(xkb_t * xkb,uintptr_t modulep,struct module * module,uintptr_t * sym_addr,uintptr_t * sym_count,uintptr_t * str_addr)816 xkb_read_module(xkb_t *xkb, uintptr_t modulep, struct module *module,
817 uintptr_t *sym_addr, uintptr_t *sym_count, uintptr_t *str_addr)
818 {
819 if (xkb_read(xkb, modulep, module, sizeof (struct module)) !=
820 sizeof (struct module))
821 return (0);
822
823 if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
824 offsetof(Shdr, sh_addr), sym_addr))
825 return (0);
826
827 if (!xkb_read_word(xkb, (uintptr_t)module->strhdr +
828 offsetof(Shdr, sh_addr), str_addr))
829 return (0);
830
831 if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
832 offsetof(Shdr, sh_size), sym_count))
833 return (0);
834 *sym_count /= sizeof (Sym);
835
836 return (1);
837 }
838
839 static int
xkb_read_modsyms(xkb_t * xkb,char ** buf,size_t * sizes,int types,uintptr_t sym_addr,uintptr_t str_addr,uintptr_t sym_count)840 xkb_read_modsyms(xkb_t *xkb, char **buf, size_t *sizes, int types,
841 uintptr_t sym_addr, uintptr_t str_addr, uintptr_t sym_count)
842 {
843 size_t i;
844
845 for (i = 0; i < sym_count; i++) {
846 Sym sym;
847 char *name;
848 size_t sz;
849 int type = XKB_WALK_GLOBAL;
850
851 if (xkb_read(xkb, sym_addr + i * sizeof (sym), &sym,
852 sizeof (sym)) != sizeof (sym))
853 return (0);
854
855 if (GELF_ST_BIND(sym.st_info) == STB_LOCAL)
856 type = XKB_WALK_LOCAL;
857
858 name = xkb_readstr(xkb, str_addr + sym.st_name);
859
860 sym.st_shndx = SHN_ABS;
861 sym.st_name = sizes[XKB_WALK_STR];
862
863 sizes[type] += sizeof (sym);
864 sz = strlen(name) + 1;
865 sizes[XKB_WALK_STR] += sz;
866
867 if (buf != NULL) {
868 if (types & type) {
869 bcopy(&sym, *buf, sizeof (sym));
870 *buf += sizeof (sym);
871 }
872 if (types & XKB_WALK_STR) {
873 bcopy(name, *buf, sz);
874 *buf += sz;
875 }
876 }
877
878 mdb_free(name, 1024);
879 }
880
881 return (1);
882 }
883
884 static int
xkb_walk_syms(xkb_t * xkb,uintptr_t modhead,char ** buf,size_t * sizes,int types)885 xkb_walk_syms(xkb_t *xkb, uintptr_t modhead, char **buf,
886 size_t *sizes, int types)
887 {
888 uintptr_t modctl = modhead;
889 uintptr_t modulep;
890 struct module module;
891 uintptr_t sym_count;
892 uintptr_t sym_addr;
893 uintptr_t str_addr;
894 size_t max_iter = 500;
895
896 bzero(sizes, sizeof (*sizes) * (XKB_WALK_STR + 1));
897
898 /*
899 * empty first symbol
900 */
901 sizes[XKB_WALK_LOCAL] += sizeof (Sym);
902 sizes[XKB_WALK_STR] += 1;
903
904 if (buf != NULL) {
905 if (types & XKB_WALK_LOCAL) {
906 Sym tmp;
907 bzero(&tmp, sizeof (tmp));
908 bcopy(&tmp, *buf, sizeof (tmp));
909 *buf += sizeof (tmp);
910 }
911 if (types & XKB_WALK_STR) {
912 **buf = '\0';
913 (*buf)++;
914 }
915 }
916
917 for (;;) {
918 if (!xkb_read_word(xkb,
919 modctl + offsetof(struct modctl, mod_mp), &modulep))
920 return (0);
921
922 if (modulep == NULL)
923 goto next;
924
925 if (!xkb_read_module(xkb, modulep, &module, &sym_addr,
926 &sym_count, &str_addr))
927 return (0);
928
929 if ((module.flags & KOBJ_NOKSYMS))
930 goto next;
931
932 if (!xkb_read_modsyms(xkb, buf, sizes, types, sym_addr,
933 str_addr, sym_count))
934 return (0);
935
936 next:
937 if (!xkb_read_word(xkb,
938 modctl + offsetof(struct modctl, mod_next), &modctl))
939 return (0);
940
941 if (modctl == modhead)
942 break;
943 /*
944 * Try and prevent us looping forever if we have a broken list.
945 */
946 if (--max_iter == 0)
947 break;
948 }
949
950 return (1);
951 }
952
953 /*
954 * Userspace equivalent of ksyms_snapshot(). Since we don't have a namelist
955 * file for hypervisor images, we fabricate one here using code similar
956 * to that of /dev/ksyms.
957 */
958 static int
xkb_build_ksyms(xkb_t * xkb)959 xkb_build_ksyms(xkb_t *xkb)
960 {
961 debug_info_t *info = &xkb->xkb_info;
962 size_t sizes[XKB_WALK_STR + 1];
963 xkb_namelist_t *hdr;
964 char *buf;
965 struct modctl modules;
966 uintptr_t module;
967 Shdr *shp;
968
969 if (xkb_read(xkb, info->di_modules, &modules,
970 sizeof (struct modctl)) != sizeof (struct modctl))
971 return (0);
972
973 module = (uintptr_t)modules.mod_mp;
974
975 if (!xkb_walk_syms(xkb, info->di_modules, NULL, sizes,
976 XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR))
977 return (0);
978
979 xkb->xkb_namesize = sizeof (xkb_namelist_t);
980 xkb->xkb_namesize += sizes[XKB_WALK_LOCAL];
981 xkb->xkb_namesize += sizes[XKB_WALK_GLOBAL];
982 xkb->xkb_namesize += sizes[XKB_WALK_STR];
983
984 if ((xkb->xkb_namelist = mdb_zalloc(xkb->xkb_namesize, UM_SLEEP))
985 == NULL)
986 return (0);
987
988 /* LINTED - alignment */
989 hdr = (xkb_namelist_t *)xkb->xkb_namelist;
990
991 if (xkb_read(xkb, module + offsetof(struct module, hdr),
992 &hdr->kh_elf_hdr, sizeof (Ehdr)) != sizeof (Ehdr))
993 return (0);
994
995 hdr->kh_elf_hdr.e_phoff = offsetof(xkb_namelist_t, kh_text_phdr);
996 hdr->kh_elf_hdr.e_shoff = offsetof(xkb_namelist_t, kh_shdr);
997 hdr->kh_elf_hdr.e_phnum = 2;
998 hdr->kh_elf_hdr.e_shnum = XKB_SHDR_NUM;
999 hdr->kh_elf_hdr.e_shstrndx = XKB_SHDR_SHSTRTAB;
1000
1001 hdr->kh_text_phdr.p_type = PT_LOAD;
1002 hdr->kh_text_phdr.p_vaddr = (Addr)info->di_s_text;
1003 hdr->kh_text_phdr.p_memsz = (Word)(info->di_e_text - info->di_s_text);
1004 hdr->kh_text_phdr.p_flags = PF_R | PF_X;
1005
1006 hdr->kh_data_phdr.p_type = PT_LOAD;
1007 hdr->kh_data_phdr.p_vaddr = (Addr)info->di_s_data;
1008 hdr->kh_data_phdr.p_memsz = (Word)(info->di_e_data - info->di_s_data);
1009 hdr->kh_data_phdr.p_flags = PF_R | PF_W | PF_X;
1010
1011 shp = &hdr->kh_shdr[XKB_SHDR_SYMTAB];
1012 shp->sh_name = 1; /* xkb_shstrtab[1] = ".symtab" */
1013 shp->sh_type = SHT_SYMTAB;
1014 shp->sh_offset = sizeof (xkb_namelist_t);
1015 shp->sh_size = sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
1016 shp->sh_link = XKB_SHDR_STRTAB;
1017 shp->sh_info = sizes[XKB_WALK_LOCAL] / sizeof (Sym);
1018 shp->sh_addralign = sizeof (Addr);
1019 shp->sh_entsize = sizeof (Sym);
1020 shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1021
1022
1023 shp = &hdr->kh_shdr[XKB_SHDR_STRTAB];
1024 shp->sh_name = 9; /* xkb_shstrtab[9] = ".strtab" */
1025 shp->sh_type = SHT_STRTAB;
1026 shp->sh_offset = sizeof (xkb_namelist_t) +
1027 sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
1028 shp->sh_size = sizes[XKB_WALK_STR];
1029 shp->sh_addralign = 1;
1030 shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1031
1032
1033 shp = &hdr->kh_shdr[XKB_SHDR_SHSTRTAB];
1034 shp->sh_name = 17; /* xkb_shstrtab[17] = ".shstrtab" */
1035 shp->sh_type = SHT_STRTAB;
1036 shp->sh_offset = offsetof(xkb_namelist_t, shstrings);
1037 shp->sh_size = sizeof (xkb_shstrtab);
1038 shp->sh_addralign = 1;
1039 shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1040
1041 bcopy(xkb_shstrtab, hdr->shstrings, sizeof (xkb_shstrtab));
1042
1043 buf = xkb->xkb_namelist + sizeof (xkb_namelist_t);
1044
1045 if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1046 XKB_WALK_LOCAL))
1047 return (0);
1048 if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1049 XKB_WALK_GLOBAL))
1050 return (0);
1051 if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1052 XKB_WALK_STR))
1053 return (0);
1054
1055 return (1);
1056 }
1057
1058 static xkb_t *
xkb_open_core(xkb_t * xkb)1059 xkb_open_core(xkb_t *xkb)
1060 {
1061 xkb_core_t *xc = &xkb->xkb_core;
1062 size_t sz;
1063 int i;
1064 struct vcpu_guest_context *vcp;
1065
1066 xkb->xkb_type = XKB_FORMAT_CORE;
1067
1068 if ((xkb->xkb_fd = open64(xkb->xkb_path, O_RDONLY)) == -1)
1069 return (xkb_fail(xkb, "cannot open %s", xkb->xkb_path));
1070
1071 if (pread64(xkb->xkb_fd, &xc->xc_hdr, sizeof (xc->xc_hdr), 0) !=
1072 sizeof (xc->xc_hdr))
1073 return (xkb_fail(xkb, "invalid dump file"));
1074
1075 if (xc->xc_hdr.xch_magic == XC_CORE_MAGIC_HVM)
1076 return (xkb_fail(xkb, "cannot process HVM images"));
1077
1078 if (xc->xc_hdr.xch_magic != XC_CORE_MAGIC) {
1079 return (xkb_fail(xkb, "invalid magic %d",
1080 xc->xc_hdr.xch_magic));
1081 }
1082
1083 /*
1084 * With FORMAT_CORE, all pages are in the dump (non-existing
1085 * ones are zeroed out).
1086 */
1087 xkb->xkb_nr_pages = xc->xc_hdr.xch_nr_pages;
1088 xkb->xkb_pages_off = xc->xc_hdr.xch_pages_offset;
1089 xkb->xkb_max_pfn = xc->xc_hdr.xch_nr_pages - 1;
1090 xkb->xkb_nr_vcpus = xc->xc_hdr.xch_nr_vcpus;
1091
1092 sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context);
1093 xkb->xkb_vcpu_data_sz = sz;
1094 xkb->xkb_vcpu_data = mdb_alloc(sz, UM_SLEEP);
1095
1096 if (pread64(xkb->xkb_fd, xkb->xkb_vcpu_data, sz,
1097 xc->xc_hdr.xch_ctxt_offset) != sz)
1098 return (xkb_fail(xkb, "cannot read VCPU contexts"));
1099
1100 sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
1101 xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
1102
1103 vcp = xkb->xkb_vcpu_data;
1104 for (i = 0; i < xkb->xkb_nr_vcpus; i++)
1105 xkb->xkb_vcpus[i] = &vcp[i];
1106
1107 /*
1108 * Try to map all the data pages. If we can't, fall back to the
1109 * window/pread() approach, which is significantly slower.
1110 */
1111 xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1112 PROT_READ, MAP_SHARED, xkb->xkb_fd, xc->xc_hdr.xch_pages_offset);
1113
1114 if (xkb->xkb_pages == (char *)MAP_FAILED)
1115 xkb->xkb_pages = NULL;
1116
1117 /*
1118 * We'd like to adapt for correctness' sake, but we have no way of
1119 * detecting a PAE guest, since cr4 writes are disallowed.
1120 */
1121 xkb->xkb_is_pae = 1;
1122
1123 if (!xkb_map_p2m(xkb))
1124 return (NULL);
1125
1126 return (xkb);
1127 }
1128
1129 static xkb_t *
xkb_open_elf(xkb_t * xkb)1130 xkb_open_elf(xkb_t *xkb)
1131 {
1132 xkb_elf_t *xe = &xkb->xkb_elf;
1133 mdb_gelf_sect_t *sect;
1134 char *notes;
1135 char *pos;
1136 mdb_io_t *io;
1137 size_t sz;
1138 int i;
1139 void *dp;
1140
1141 if ((io = mdb_fdio_create_path(NULL, xkb->xkb_path,
1142 O_RDONLY, 0)) == NULL)
1143 return (xkb_fail(xkb, "failed to open"));
1144
1145 xe->xe_gelf = mdb_gelf_create(io, ET_NONE, GF_FILE);
1146
1147 if (xe->xe_gelf == NULL) {
1148 mdb_io_destroy(io);
1149 return (xkb);
1150 }
1151
1152 xkb->xkb_fd = mdb_fdio_fileno(io);
1153
1154 sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".note.Xen");
1155
1156 if (sect == NULL)
1157 return (xkb);
1158
1159 if ((notes = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
1160 return (xkb);
1161
1162 /*
1163 * Now we know this is indeed a hypervisor core dump, even if
1164 * it's corrupted.
1165 */
1166 xkb->xkb_type = XKB_FORMAT_ELF;
1167
1168 for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
1169 /* LINTED - alignment */
1170 Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
1171 uint64_t vers;
1172 char *desc;
1173 char *name;
1174
1175 name = pos + sizeof (*nhdr);
1176 desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
1177
1178 pos = desc + nhdr->n_descsz;
1179
1180 switch (nhdr->n_type) {
1181 case XEN_ELFNOTE_DUMPCORE_NONE:
1182 break;
1183
1184 case XEN_ELFNOTE_DUMPCORE_HEADER:
1185 if (nhdr->n_descsz != sizeof (struct xc_elf_header)) {
1186 return (xkb_fail(xkb, "invalid ELF note "
1187 "XEN_ELFNOTE_DUMPCORE_HEADER\n"));
1188 }
1189
1190 bcopy(desc, &xe->xe_hdr,
1191 sizeof (struct xc_elf_header));
1192 break;
1193
1194 case XEN_ELFNOTE_DUMPCORE_XEN_VERSION:
1195 if (nhdr->n_descsz < sizeof (struct xc_elf_version)) {
1196 return (xkb_fail(xkb, "invalid ELF note "
1197 "XEN_ELFNOTE_DUMPCORE_XEN_VERSION\n"));
1198 }
1199
1200 bcopy(desc, &xe->xe_version,
1201 sizeof (struct xc_elf_version));
1202 break;
1203
1204 case XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION:
1205 /* LINTED - alignment */
1206 vers = *((uint64_t *)desc);
1207 if ((vers >> 32) != 0) {
1208 return (xkb_fail(xkb, "unknown major "
1209 "version %d (expected 0)\n",
1210 (int)(vers >> 32)));
1211 }
1212
1213 if ((vers & 0xffffffff) != 1) {
1214 mdb_warn("unexpected dump minor number "
1215 "version %d (expected 1)\n",
1216 (int)(vers & 0xffffffff));
1217 }
1218 break;
1219
1220 default:
1221 mdb_warn("unknown ELF note %d(%s)\n",
1222 nhdr->n_type, name);
1223 break;
1224 }
1225 }
1226
1227 xkb->xkb_is_hvm = xe->xe_hdr.xeh_magic == XC_CORE_MAGIC_HVM;
1228
1229 if (xe->xe_hdr.xeh_magic != XC_CORE_MAGIC &&
1230 xe->xe_hdr.xeh_magic != XC_CORE_MAGIC_HVM) {
1231 return (xkb_fail(xkb, "invalid magic %d",
1232 xe->xe_hdr.xeh_magic));
1233 }
1234
1235 xkb->xkb_nr_pages = xe->xe_hdr.xeh_nr_pages;
1236 xkb->xkb_is_pae = (strstr(xe->xe_version.xev_capabilities,
1237 "x86_32p") != NULL);
1238
1239 sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_prstatus");
1240
1241 if (sect == NULL)
1242 return (xkb_fail(xkb, "cannot find section .xen_prstatus"));
1243
1244 if (sect->gs_shdr.sh_entsize < sizeof (vcpu_guest_context_t))
1245 return (xkb_fail(xkb, "invalid section .xen_prstatus"));
1246
1247 xkb->xkb_nr_vcpus = sect->gs_shdr.sh_size / sect->gs_shdr.sh_entsize;
1248
1249 xkb->xkb_vcpu_data = mdb_gelf_sect_load(xe->xe_gelf, sect);
1250 if (xkb->xkb_vcpu_data == NULL)
1251 return (xkb_fail(xkb, "cannot load section .xen_prstatus"));
1252 xkb->xkb_vcpu_data_sz = sect->gs_shdr.sh_size;
1253
1254 /*
1255 * The vcpu_guest_context structures saved in the core file
1256 * are actually unions of the 64-bit and 32-bit versions.
1257 * Don't rely on the entry size to match the size of
1258 * the structure, but set up an array of pointers.
1259 */
1260 sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
1261 xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
1262 for (i = 0; i < xkb->xkb_nr_vcpus; i++) {
1263 dp = ((char *)xkb->xkb_vcpu_data +
1264 i * sect->gs_shdr.sh_entsize);
1265 xkb->xkb_vcpus[i] = dp;
1266 }
1267
1268 sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pages");
1269
1270 if (sect == NULL)
1271 return (xkb_fail(xkb, "cannot find section .xen_pages"));
1272
1273 if (!PAGE_ALIGNED(sect->gs_shdr.sh_offset))
1274 return (xkb_fail(xkb, ".xen_pages is not page aligned"));
1275
1276 if (sect->gs_shdr.sh_entsize != PAGE_SIZE)
1277 return (xkb_fail(xkb, "invalid section .xen_pages"));
1278
1279 xkb->xkb_pages_off = sect->gs_shdr.sh_offset;
1280
1281 /*
1282 * Try to map all the data pages. If we can't, fall back to the
1283 * window/pread() approach, which is significantly slower.
1284 */
1285 xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1286 PROT_READ, MAP_SHARED, xkb->xkb_fd, xkb->xkb_pages_off);
1287
1288 if (xkb->xkb_pages == (char *)MAP_FAILED)
1289 xkb->xkb_pages = NULL;
1290
1291 if (xkb->xkb_is_hvm) {
1292 if (!xkb_build_fake_p2m(xkb))
1293 return (NULL);
1294 } else {
1295 if (!xkb_build_p2m(xkb))
1296 return (NULL);
1297 }
1298
1299 return (xkb);
1300 }
1301
1302 static void
xkb_init_mmu(xkb_t * xkb)1303 xkb_init_mmu(xkb_t *xkb)
1304 {
1305 #if defined(__amd64)
1306 xkb->xkb_mmu.mi_max = 3;
1307 xkb->xkb_mmu.mi_shift[0] = 12;
1308 xkb->xkb_mmu.mi_shift[1] = 21;
1309 xkb->xkb_mmu.mi_shift[2] = 30;
1310 xkb->xkb_mmu.mi_shift[3] = 39;
1311 xkb->xkb_mmu.mi_ptes = 512;
1312 xkb->xkb_mmu.mi_ptesize = 8;
1313 #elif defined(__i386)
1314 if (xkb->xkb_is_pae) {
1315 xkb->xkb_mmu.mi_max = 2;
1316 xkb->xkb_mmu.mi_shift[0] = 12;
1317 xkb->xkb_mmu.mi_shift[1] = 21;
1318 xkb->xkb_mmu.mi_shift[2] = 30;
1319 xkb->xkb_mmu.mi_ptes = 512;
1320 xkb->xkb_mmu.mi_ptesize = 8;
1321 } else {
1322 xkb->xkb_mmu.mi_max = 1;
1323 xkb->xkb_mmu.mi_shift[0] = 12;
1324 xkb->xkb_mmu.mi_shift[1] = 22;
1325 xkb->xkb_mmu.mi_ptes = 1024;
1326 xkb->xkb_mmu.mi_ptesize = 4;
1327 }
1328 #endif
1329 }
1330
1331 /*ARGSUSED*/
1332 xkb_t *
xkb_open(const char * namelist,const char * corefile,const char * swapfile,int flag,const char * err)1333 xkb_open(const char *namelist, const char *corefile, const char *swapfile,
1334 int flag, const char *err)
1335 {
1336 uintptr_t debug_info = DEBUG_INFO;
1337 struct stat64 corestat;
1338 xkb_t *xkb = NULL;
1339 size_t i;
1340
1341 if (stat64(corefile, &corestat) == -1)
1342 return (xkb_fail(xkb, "cannot stat %s", corefile));
1343
1344 if (flag != O_RDONLY)
1345 return (xkb_fail(xkb, "invalid open flags"));
1346
1347 xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP);
1348
1349 for (i = 0; i < 4; i++) {
1350 xkb->xkb_pt_map[i].mm_mfn = MFN_INVALID;
1351 xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED;
1352 }
1353
1354 xkb->xkb_type = XKB_FORMAT_UNKNOWN;
1355 xkb->xkb_map.mm_mfn = MFN_INVALID;
1356 xkb->xkb_map.mm_map = (char *)MAP_FAILED;
1357 xkb->xkb_core.xc_p2m_buf = (char *)MAP_FAILED;
1358 xkb->xkb_fd = -1;
1359
1360 xkb->xkb_path = strdup(corefile);
1361
1362 if ((xkb = xkb_open_elf(xkb)) == NULL)
1363 return (NULL);
1364
1365 if (xkb->xkb_type == XKB_FORMAT_UNKNOWN) {
1366 if (!xkb_open_core(xkb))
1367 return (NULL);
1368 }
1369
1370 xkb_init_mmu(xkb);
1371
1372 if (!xkb_build_m2p(xkb))
1373 return (NULL);
1374
1375 if (xkb->xkb_is_hvm)
1376 debug_info = DEBUG_INFO_HVM;
1377
1378 if (xkb_read(xkb, debug_info, &xkb->xkb_info,
1379 sizeof (xkb->xkb_info)) != sizeof (xkb->xkb_info))
1380 return (xkb_fail(xkb, "cannot read debug_info"));
1381
1382 if (xkb->xkb_info.di_magic != DEBUG_INFO_MAGIC) {
1383 return (xkb_fail(xkb, "invalid debug info magic %d",
1384 xkb->xkb_info.di_magic));
1385 }
1386
1387 if (xkb->xkb_info.di_version != DEBUG_INFO_VERSION) {
1388 return (xkb_fail(xkb, "unknown debug info version %d",
1389 xkb->xkb_info.di_version));
1390 }
1391
1392 if (!xkb_build_ksyms(xkb))
1393 return (xkb_fail(xkb, "cannot construct namelist"));
1394
1395 return (xkb);
1396 }
1397
1398 int
xkb_close(xkb_t * xkb)1399 xkb_close(xkb_t *xkb)
1400 {
1401 size_t i, sz;
1402
1403 if (xkb == NULL)
1404 return (0);
1405
1406 if (xkb->xkb_m2p != NULL) {
1407 mdb_free(xkb->xkb_m2p,
1408 (xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t));
1409 }
1410
1411 if (xkb->xkb_pages != NULL) {
1412 (void) munmap((void *)xkb->xkb_pages,
1413 PAGE_SIZE * xkb->xkb_nr_pages);
1414 } else {
1415 for (i = 0; i < 4; i++) {
1416 char *addr = xkb->xkb_pt_map[i].mm_map;
1417 if (addr != (char *)MAP_FAILED)
1418 (void) munmap((void *)addr, PAGE_SIZE);
1419 }
1420 if (xkb->xkb_map.mm_map != (char *)MAP_FAILED) {
1421 (void) munmap((void *)xkb->xkb_map.mm_map,
1422 PAGE_SIZE);
1423 }
1424 }
1425
1426 if (xkb->xkb_namelist != NULL)
1427 mdb_free(xkb->xkb_namelist, xkb->xkb_namesize);
1428
1429 if (xkb->xkb_type == XKB_FORMAT_ELF) {
1430 xkb_elf_t *xe = &xkb->xkb_elf;
1431
1432 if (xe->xe_gelf != NULL)
1433 mdb_gelf_destroy(xe->xe_gelf);
1434
1435 sz = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
1436
1437 if (xkb->xkb_p2m != NULL)
1438 mdb_free(xkb->xkb_p2m, sz);
1439
1440 sz = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
1441
1442 if (xe->xe_off != NULL)
1443 mdb_free(xe->xe_off, sz);
1444
1445 } else if (xkb->xkb_type == XKB_FORMAT_CORE) {
1446 xkb_core_t *xc = &xkb->xkb_core;
1447
1448 if (xkb->xkb_fd != -1)
1449 (void) close(xkb->xkb_fd);
1450
1451 sz = (xkb->xkb_nr_pages * sizeof (mfn_t)) + (PAGE_SIZE * 2);
1452 sz = PAGE_MASK(sz);
1453
1454 if (xc->xc_p2m_buf != (xen_pfn_t *)MAP_FAILED)
1455 (void) munmap(xc->xc_p2m_buf, sz);
1456
1457 if (xkb->xkb_vcpu_data != NULL)
1458 mdb_free(xkb->xkb_vcpu_data, xkb->xkb_vcpu_data_sz);
1459 }
1460
1461 if (xkb->xkb_vcpus != NULL) {
1462 sz = sizeof (struct vcpu_guest_context *) *
1463 xkb->xkb_nr_vcpus;
1464 mdb_free(xkb->xkb_vcpus, sz);
1465 }
1466
1467 free(xkb->xkb_path);
1468
1469 mdb_free(xkb, sizeof (*xkb));
1470 return (0);
1471 }
1472
1473 /*ARGSUSED*/
1474 static mdb_io_t *
xkb_sym_io(xkb_t * xkb,const char * symfile)1475 xkb_sym_io(xkb_t *xkb, const char *symfile)
1476 {
1477 mdb_io_t *io = mdb_memio_create(xkb->xkb_namelist, xkb->xkb_namesize);
1478
1479 if (io == NULL)
1480 mdb_warn("failed to create namelist from %s", xkb->xkb_path);
1481
1482 return (io);
1483 }
1484
1485 uint64_t
xkb_vtop(xkb_t * xkb,struct as * as,uintptr_t addr)1486 xkb_vtop(xkb_t *xkb, struct as *as, uintptr_t addr)
1487 {
1488 mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
1489 mfn_t mfn;
1490
1491 if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
1492 return (-1ULL);
1493
1494 mfn = xkb_va_to_mfn(xkb, addr, tlmfn);
1495
1496 if (mfn == MFN_INVALID || mfn > xkb->xkb_max_mfn)
1497 return (-1ULL);
1498
1499 return (((uint64_t)xkb->xkb_m2p[mfn] << PAGE_SHIFT)
1500 | PAGE_OFFSET(addr));
1501 }
1502
1503 static int
xkb_getmregs(xkb_t * xkb,uint_t cpu,struct privmregs * mregs)1504 xkb_getmregs(xkb_t *xkb, uint_t cpu, struct privmregs *mregs)
1505 {
1506 struct vcpu_guest_context *vcpu;
1507 struct cpu_user_regs *ur;
1508 struct regs *regs;
1509
1510 if (cpu >= xkb->xkb_nr_vcpus) {
1511 errno = EINVAL;
1512 return (-1);
1513 }
1514
1515 bzero(mregs, sizeof (*mregs));
1516
1517 vcpu = xkb->xkb_vcpus[cpu];
1518 ur = &vcpu->user_regs;
1519 regs = &mregs->pm_gregs;
1520
1521 regs->r_ss = ur->ss;
1522 regs->r_cs = ur->cs;
1523 regs->r_ds = ur->ds;
1524 regs->r_es = ur->es;
1525 regs->r_fs = ur->fs;
1526 regs->r_gs = ur->gs;
1527 regs->r_trapno = ur->entry_vector;
1528 regs->r_err = ur->error_code;
1529 #ifdef __amd64
1530 regs->r_savfp = ur->rbp;
1531 regs->r_savpc = ur->rip;
1532 regs->r_rdi = ur->rdi;
1533 regs->r_rsi = ur->rsi;
1534 regs->r_rdx = ur->rdx;
1535 regs->r_rcx = ur->rcx;
1536 regs->r_r8 = ur->r8;
1537 regs->r_r9 = ur->r9;
1538 regs->r_rax = ur->rax;
1539 regs->r_rbx = ur->rbx;
1540 regs->r_rbp = ur->rbp;
1541 regs->r_r10 = ur->r10;
1542 regs->r_r11 = ur->r11;
1543 regs->r_r12 = ur->r12;
1544 regs->r_r13 = ur->r13;
1545 regs->r_r14 = ur->r14;
1546 regs->r_r15 = ur->r15;
1547 regs->r_rip = ur->rip;
1548 regs->r_rfl = ur->rflags;
1549 regs->r_rsp = ur->rsp;
1550 #else
1551 regs->r_savfp = ur->ebp;
1552 regs->r_savpc = ur->eip;
1553 regs->r_edi = ur->edi;
1554 regs->r_esi = ur->esi;
1555 regs->r_ebp = ur->ebp;
1556 regs->r_esp = ur->esp;
1557 regs->r_ebx = ur->ebx;
1558 regs->r_edx = ur->edx;
1559 regs->r_ecx = ur->ecx;
1560 regs->r_eax = ur->eax;
1561 regs->r_eip = ur->eip;
1562 regs->r_efl = ur->eflags;
1563 regs->r_uesp = 0;
1564 #endif
1565
1566 bcopy(&vcpu->ctrlreg, &mregs->pm_cr, 8 * sizeof (ulong_t));
1567 bcopy(&vcpu->debugreg, &mregs->pm_dr, 8 * sizeof (ulong_t));
1568
1569 mregs->pm_flags = PM_GREGS | PM_CRREGS | PM_DRREGS;
1570
1571 return (0);
1572 }
1573
1574 static mdb_kb_ops_t xpv_kb_ops = {
1575 .kb_open = (void *(*)())xkb_open,
1576 .kb_close = (int (*)())xkb_close,
1577 .kb_sym_io = (mdb_io_t *(*)())xkb_sym_io,
1578 .kb_kread = (ssize_t (*)())xkb_read,
1579 .kb_kwrite = (ssize_t (*)())mdb_tgt_notsup,
1580 .kb_aread = (ssize_t (*)())xkb_aread,
1581 .kb_awrite = (ssize_t (*)())mdb_tgt_notsup,
1582 .kb_pread = (ssize_t (*)())xkb_pread,
1583 .kb_pwrite = (ssize_t (*)())mdb_tgt_notsup,
1584 .kb_vtop = (uint64_t (*)())xkb_vtop,
1585 .kb_getmregs = (int (*)())xkb_getmregs
1586 };
1587
1588 mdb_kb_ops_t *
mdb_kb_ops(void)1589 mdb_kb_ops(void)
1590 {
1591 return (&xpv_kb_ops);
1592 }
1593
1594 static const mdb_dcmd_t dcmds[] = { NULL, };
1595 static const mdb_walker_t walkers[] = { NULL, };
1596 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1597
1598 const mdb_modinfo_t *
_mdb_init(void)1599 _mdb_init(void)
1600 {
1601 return (&modinfo);
1602 }
1603
1604 void
_mdb_fini(void)1605 _mdb_fini(void)
1606 {
1607 }
1608