1843e1988Sjohnlev /*
2843e1988Sjohnlev * CDDL HEADER START
3843e1988Sjohnlev *
4843e1988Sjohnlev * The contents of this file are subject to the terms of the
5843e1988Sjohnlev * Common Development and Distribution License (the "License").
6843e1988Sjohnlev * You may not use this file except in compliance with the License.
7843e1988Sjohnlev *
8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev * See the License for the specific language governing permissions
11843e1988Sjohnlev * and limitations under the License.
12843e1988Sjohnlev *
13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev *
19843e1988Sjohnlev * CDDL HEADER END
20843e1988Sjohnlev */
21843e1988Sjohnlev /*
22*0d928757SGary Mills * Copyright (c) 2012 Gary Mills
23*0d928757SGary Mills *
2441afdfa7SKrishnendu Sadhukhan - Sun Microsystems * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
25843e1988Sjohnlev */
26843e1988Sjohnlev
27843e1988Sjohnlev #include <sys/types.h>
28843e1988Sjohnlev #include <sys/clock.h>
29843e1988Sjohnlev #include <sys/psm.h>
30843e1988Sjohnlev #include <sys/archsystm.h>
31843e1988Sjohnlev #include <sys/machsystm.h>
32843e1988Sjohnlev #include <sys/compress.h>
33843e1988Sjohnlev #include <sys/modctl.h>
34843e1988Sjohnlev #include <sys/trap.h>
35843e1988Sjohnlev #include <sys/panic.h>
36843e1988Sjohnlev #include <sys/regset.h>
37843e1988Sjohnlev #include <sys/frame.h>
38843e1988Sjohnlev #include <sys/kobj.h>
39843e1988Sjohnlev #include <sys/apic.h>
4041afdfa7SKrishnendu Sadhukhan - Sun Microsystems #include <sys/apic_timer.h>
41843e1988Sjohnlev #include <sys/dumphdr.h>
42843e1988Sjohnlev #include <sys/mem.h>
43843e1988Sjohnlev #include <sys/x86_archext.h>
44843e1988Sjohnlev #include <sys/xpv_panic.h>
45843e1988Sjohnlev #include <sys/boot_console.h>
46843e1988Sjohnlev #include <sys/bootsvcs.h>
47843e1988Sjohnlev #include <sys/consdev.h>
48843e1988Sjohnlev #include <vm/hat_pte.h>
49843e1988Sjohnlev #include <vm/hat_i86.h>
50843e1988Sjohnlev
51843e1988Sjohnlev /* XXX: need to add a PAE version too, if we ever support both PAE and non */
52843e1988Sjohnlev #if defined(__i386)
53843e1988Sjohnlev #define XPV_FILENAME "/boot/xen-syms"
54843e1988Sjohnlev #else
55843e1988Sjohnlev #define XPV_FILENAME "/boot/amd64/xen-syms"
56843e1988Sjohnlev #endif
57843e1988Sjohnlev #define XPV_MODNAME "xpv"
58843e1988Sjohnlev
59843e1988Sjohnlev int xpv_panicking = 0;
60843e1988Sjohnlev
61843e1988Sjohnlev struct module *xpv_module;
62843e1988Sjohnlev struct modctl *xpv_modctl;
63843e1988Sjohnlev
64843e1988Sjohnlev #define ALIGN(x, a) ((a) == 0 ? (uintptr_t)(x) : \
65843e1988Sjohnlev (((uintptr_t)(x) + (uintptr_t)(a) - 1l) & ~((uintptr_t)(a) - 1l)))
66843e1988Sjohnlev
67843e1988Sjohnlev /* Pointer to the xpv_panic_info structure handed to us by Xen. */
68843e1988Sjohnlev static struct panic_info *xpv_panic_info = NULL;
69843e1988Sjohnlev
70843e1988Sjohnlev /* Timer support */
71843e1988Sjohnlev #define NSEC_SHIFT 5
72843e1988Sjohnlev #define T_XPV_TIMER 0xd1
73843e1988Sjohnlev #define XPV_TIMER_INTERVAL 1000 /* 1000 microseconds */
74843e1988Sjohnlev static uint32_t *xpv_apicadr = NULL;
75843e1988Sjohnlev static uint_t nsec_scale;
76843e1988Sjohnlev
77843e1988Sjohnlev /* IDT support */
78843e1988Sjohnlev #pragma align 16(xpv_panic_idt)
79843e1988Sjohnlev static gate_desc_t xpv_panic_idt[NIDT]; /* interrupt descriptor table */
80843e1988Sjohnlev
81843e1988Sjohnlev /* Xen pagetables mapped into our HAT's ptable windows */
82843e1988Sjohnlev static pfn_t ptable_pfn[MAX_NUM_LEVEL];
83843e1988Sjohnlev
84843e1988Sjohnlev /* Number of MMU_PAGESIZE pages we're adding to the Solaris dump */
85843e1988Sjohnlev static int xpv_dump_pages;
86843e1988Sjohnlev
87843e1988Sjohnlev /*
88a576ab5bSrab * There are up to two large swathes of RAM that we don't want to include
89a576ab5bSrab * in the dump: those that comprise the Xen version of segkpm. On 32-bit
90a576ab5bSrab * systems there is no such region of memory. On 64-bit systems, there
91a576ab5bSrab * should be just a single contiguous region that corresponds to all of
92a576ab5bSrab * physical memory. The tricky bit is that Xen's heap sometimes lives in
93a576ab5bSrab * the middle of their segkpm, and is mapped using only kpm-like addresses.
94a576ab5bSrab * In that case, we need to skip the swathes before and after Xen's heap.
95a576ab5bSrab */
96a576ab5bSrab uintptr_t kpm1_low = 0;
97a576ab5bSrab uintptr_t kpm1_high = 0;
98a576ab5bSrab uintptr_t kpm2_low = 0;
99a576ab5bSrab uintptr_t kpm2_high = 0;
100a576ab5bSrab
101a576ab5bSrab /*
102843e1988Sjohnlev * Some commonly used values that we don't want to recompute over and over.
103843e1988Sjohnlev */
104843e1988Sjohnlev static int xpv_panic_nptes[MAX_NUM_LEVEL];
105843e1988Sjohnlev static ulong_t xpv_panic_cr3;
106843e1988Sjohnlev static uintptr_t xpv_end;
107843e1988Sjohnlev
108843e1988Sjohnlev static void xpv_panic_console_print(const char *fmt, ...);
109843e1988Sjohnlev static void (*xpv_panic_printf)(const char *, ...) = xpv_panic_console_print;
110843e1988Sjohnlev
111843e1988Sjohnlev #define CONSOLE_BUF_SIZE 256
112843e1988Sjohnlev static char console_buffer[CONSOLE_BUF_SIZE];
113843e1988Sjohnlev static boolean_t use_polledio;
114843e1988Sjohnlev
115e4b86885SCheng Sean Ye /*
116e4b86885SCheng Sean Ye * Pointers to machine check panic info (if any).
117e4b86885SCheng Sean Ye */
118e4b86885SCheng Sean Ye xpv_mca_panic_data_t *xpv_mca_panic_data = NULL;
119e4b86885SCheng Sean Ye
120843e1988Sjohnlev static void
xpv_panic_putc(int m)121843e1988Sjohnlev xpv_panic_putc(int m)
122843e1988Sjohnlev {
123843e1988Sjohnlev struct cons_polledio *c = cons_polledio;
124843e1988Sjohnlev
125843e1988Sjohnlev /* This really shouldn't happen */
126*0d928757SGary Mills if (boot_console_type(NULL) == CONS_HYPERVISOR)
127843e1988Sjohnlev return;
128843e1988Sjohnlev
129843e1988Sjohnlev if (use_polledio == B_TRUE)
130843e1988Sjohnlev c->cons_polledio_putchar(c->cons_polledio_argument, m);
131843e1988Sjohnlev else
132843e1988Sjohnlev bcons_putchar(m);
133843e1988Sjohnlev }
134843e1988Sjohnlev
135843e1988Sjohnlev static void
xpv_panic_puts(char * msg)136843e1988Sjohnlev xpv_panic_puts(char *msg)
137843e1988Sjohnlev {
138843e1988Sjohnlev char *m;
139843e1988Sjohnlev
140843e1988Sjohnlev dump_timeleft = dump_timeout;
141843e1988Sjohnlev for (m = msg; *m; m++)
142843e1988Sjohnlev xpv_panic_putc((int)*m);
143843e1988Sjohnlev }
144843e1988Sjohnlev
145843e1988Sjohnlev static void
xpv_panic_console_print(const char * fmt,...)146843e1988Sjohnlev xpv_panic_console_print(const char *fmt, ...)
147843e1988Sjohnlev {
148843e1988Sjohnlev va_list ap;
149843e1988Sjohnlev
150843e1988Sjohnlev va_start(ap, fmt);
151843e1988Sjohnlev (void) vsnprintf(console_buffer, sizeof (console_buffer), fmt, ap);
152843e1988Sjohnlev va_end(ap);
153843e1988Sjohnlev
154843e1988Sjohnlev xpv_panic_puts(console_buffer);
155843e1988Sjohnlev }
156843e1988Sjohnlev
157843e1988Sjohnlev static void
xpv_panic_map(int level,pfn_t pfn)158843e1988Sjohnlev xpv_panic_map(int level, pfn_t pfn)
159843e1988Sjohnlev {
160843e1988Sjohnlev x86pte_t pte, *pteptr;
161843e1988Sjohnlev
162843e1988Sjohnlev /*
163843e1988Sjohnlev * The provided pfn represents a level 'level' page table. Map it
164843e1988Sjohnlev * into the 'level' slot in the list of page table windows.
165843e1988Sjohnlev */
166843e1988Sjohnlev pteptr = (x86pte_t *)PWIN_PTE_VA(level);
167843e1988Sjohnlev pte = pfn_to_pa(pfn) | PT_VALID;
168843e1988Sjohnlev
169843e1988Sjohnlev XPV_ALLOW_PAGETABLE_UPDATES();
170843e1988Sjohnlev if (mmu.pae_hat)
171843e1988Sjohnlev *pteptr = pte;
172843e1988Sjohnlev else
173843e1988Sjohnlev *(x86pte32_t *)pteptr = pte;
174843e1988Sjohnlev XPV_DISALLOW_PAGETABLE_UPDATES();
175843e1988Sjohnlev
176843e1988Sjohnlev mmu_tlbflush_entry(PWIN_VA(level));
177843e1988Sjohnlev }
178843e1988Sjohnlev
179843e1988Sjohnlev /*
180843e1988Sjohnlev * Walk the page tables to find the pfn mapped by the given va.
181843e1988Sjohnlev */
182843e1988Sjohnlev static pfn_t
xpv_va_walk(uintptr_t * vaddr)183843e1988Sjohnlev xpv_va_walk(uintptr_t *vaddr)
184843e1988Sjohnlev {
185843e1988Sjohnlev int l, idx;
186843e1988Sjohnlev pfn_t pfn;
187843e1988Sjohnlev x86pte_t pte;
188843e1988Sjohnlev x86pte_t *ptep;
189843e1988Sjohnlev uintptr_t va = *vaddr;
190843e1988Sjohnlev uintptr_t scan_va;
191843e1988Sjohnlev caddr_t ptable_window;
192843e1988Sjohnlev static pfn_t toplevel_pfn;
193843e1988Sjohnlev static uintptr_t lastva;
194843e1988Sjohnlev
195843e1988Sjohnlev /*
196843e1988Sjohnlev * If we do anything other than a simple scan through memory, don't
197843e1988Sjohnlev * trust the mapped page tables.
198843e1988Sjohnlev */
199843e1988Sjohnlev if (va != lastva + MMU_PAGESIZE)
200843e1988Sjohnlev for (l = mmu.max_level; l >= 0; l--)
201843e1988Sjohnlev ptable_pfn[l] = PFN_INVALID;
202843e1988Sjohnlev
203843e1988Sjohnlev toplevel_pfn = mmu_btop(xpv_panic_cr3);
204843e1988Sjohnlev
205843e1988Sjohnlev while (va < xpv_end && va >= *vaddr) {
206843e1988Sjohnlev /* Find the lowest table with any entry for va */
207843e1988Sjohnlev pfn = toplevel_pfn;
208843e1988Sjohnlev for (l = mmu.max_level; l >= 0; l--) {
209843e1988Sjohnlev if (ptable_pfn[l] != pfn) {
210843e1988Sjohnlev xpv_panic_map(l, pfn);
211843e1988Sjohnlev ptable_pfn[l] = pfn;
212843e1988Sjohnlev }
213843e1988Sjohnlev
214843e1988Sjohnlev /*
215843e1988Sjohnlev * Search this pagetable for any mapping to an
216843e1988Sjohnlev * address >= va.
217843e1988Sjohnlev */
218843e1988Sjohnlev ptable_window = PWIN_VA(l);
219843e1988Sjohnlev if (l == mmu.max_level && mmu.pae_hat)
220843e1988Sjohnlev ptable_window +=
221843e1988Sjohnlev (xpv_panic_cr3 & MMU_PAGEOFFSET);
222843e1988Sjohnlev
223843e1988Sjohnlev idx = (va >> LEVEL_SHIFT(l)) & (xpv_panic_nptes[l] - 1);
224843e1988Sjohnlev scan_va = va;
225843e1988Sjohnlev while (idx < xpv_panic_nptes[l] && scan_va < xpv_end &&
226843e1988Sjohnlev scan_va >= *vaddr) {
227843e1988Sjohnlev ptep = (x86pte_t *)(ptable_window +
228843e1988Sjohnlev (idx << mmu.pte_size_shift));
229843e1988Sjohnlev pte = GET_PTE(ptep);
230843e1988Sjohnlev if (pte & PTE_VALID)
231843e1988Sjohnlev break;
232843e1988Sjohnlev idx++;
233843e1988Sjohnlev scan_va += mmu.level_size[l];
234843e1988Sjohnlev }
235843e1988Sjohnlev
236843e1988Sjohnlev /*
237843e1988Sjohnlev * If there are no valid mappings in this table, we
238843e1988Sjohnlev * can skip to the end of the VA range it covers.
239843e1988Sjohnlev */
240843e1988Sjohnlev if (idx == xpv_panic_nptes[l]) {
241843e1988Sjohnlev va = NEXT_ENTRY_VA(va, l + 1);
242843e1988Sjohnlev break;
243843e1988Sjohnlev }
244843e1988Sjohnlev
245a576ab5bSrab va = scan_va;
246a576ab5bSrab /*
247a576ab5bSrab * See if we've hit the end of the range.
248a576ab5bSrab */
249a576ab5bSrab if (va >= xpv_end || va < *vaddr)
250a576ab5bSrab break;
251a576ab5bSrab
252843e1988Sjohnlev /*
253843e1988Sjohnlev * If this mapping is for a pagetable, we drop down
254843e1988Sjohnlev * to the next level in the hierarchy and look for
255843e1988Sjohnlev * a mapping in it.
256843e1988Sjohnlev */
257843e1988Sjohnlev pfn = PTE2MFN(pte, l);
258843e1988Sjohnlev if (!PTE_ISPAGE(pte, l))
259843e1988Sjohnlev continue;
260843e1988Sjohnlev
261843e1988Sjohnlev /*
262843e1988Sjohnlev * The APIC page is magic. Nothing to see here;
263843e1988Sjohnlev * move along.
264843e1988Sjohnlev */
265843e1988Sjohnlev if (((uintptr_t)xpv_apicadr & MMU_PAGEMASK) ==
266843e1988Sjohnlev (va & MMU_PAGEMASK)) {
267843e1988Sjohnlev va += MMU_PAGESIZE;
268843e1988Sjohnlev break;
269843e1988Sjohnlev }
270843e1988Sjohnlev
271a576ab5bSrab /*
272a576ab5bSrab * See if the address is within one of the two
273a576ab5bSrab * kpm-like regions we want to skip.
274a576ab5bSrab */
275a576ab5bSrab if (va >= kpm1_low && va < kpm1_high) {
276a576ab5bSrab va = kpm1_high;
277a576ab5bSrab break;
278a576ab5bSrab }
279a576ab5bSrab if (va >= kpm2_low && va < kpm2_high) {
280a576ab5bSrab va = kpm2_high;
281843e1988Sjohnlev break;
282843e1988Sjohnlev }
283843e1988Sjohnlev
284843e1988Sjohnlev /*
285843e1988Sjohnlev * The Xen panic code only handles small pages. If
286843e1988Sjohnlev * this mapping is for a large page, we need to
287843e1988Sjohnlev * identify the consituent page that covers the
288843e1988Sjohnlev * specific VA we were looking for.
289843e1988Sjohnlev */
290843e1988Sjohnlev if (l > 0) {
291843e1988Sjohnlev if (l > 1)
292843e1988Sjohnlev panic("Xen panic can't cope with "
293843e1988Sjohnlev "giant pages.");
294843e1988Sjohnlev idx = (va >> LEVEL_SHIFT(0)) &
295843e1988Sjohnlev (xpv_panic_nptes[0] - 1);
296843e1988Sjohnlev pfn += idx;
297843e1988Sjohnlev }
298843e1988Sjohnlev
299843e1988Sjohnlev *vaddr = va;
300843e1988Sjohnlev lastva = va;
301843e1988Sjohnlev return (pfn | PFN_IS_FOREIGN_MFN);
302843e1988Sjohnlev }
303843e1988Sjohnlev }
304843e1988Sjohnlev return (PFN_INVALID);
305843e1988Sjohnlev }
306843e1988Sjohnlev
307843e1988Sjohnlev /*
308843e1988Sjohnlev * Walk through the Xen VA space, finding pages that are mapped in.
309843e1988Sjohnlev *
310843e1988Sjohnlev * These pages all have MFNs rather than PFNs, meaning they may be outside
311843e1988Sjohnlev * the physical address space the kernel knows about, or they may collide
312843e1988Sjohnlev * with PFNs the kernel is using.
313843e1988Sjohnlev *
314843e1988Sjohnlev * The obvious trick of just adding the PFN_IS_FOREIGN_MFN bit to the MFNs
315843e1988Sjohnlev * to avoid collisions doesn't work. The pages need to be written to disk
316843e1988Sjohnlev * in PFN-order or savecore gets confused. We can't allocate memory to
317843e1988Sjohnlev * contruct a sorted pfn->VA reverse mapping, so we have to write the pages
318843e1988Sjohnlev * to disk in VA order.
319843e1988Sjohnlev *
320843e1988Sjohnlev * To square this circle, we simply make up PFNs for each of Xen's pages.
321843e1988Sjohnlev * We assign each mapped page a fake PFN in ascending order. These fake
322843e1988Sjohnlev * PFNs each have the FOREIGN bit set, ensuring that they fall outside the
323843e1988Sjohnlev * range of Solaris PFNs written by the kernel.
324843e1988Sjohnlev */
325843e1988Sjohnlev int
dump_xpv_addr()326843e1988Sjohnlev dump_xpv_addr()
327843e1988Sjohnlev {
328843e1988Sjohnlev uintptr_t va;
329843e1988Sjohnlev mem_vtop_t mem_vtop;
330843e1988Sjohnlev
331843e1988Sjohnlev xpv_dump_pages = 0;
332843e1988Sjohnlev va = xen_virt_start;
333843e1988Sjohnlev
334843e1988Sjohnlev while (xpv_va_walk(&va) != PFN_INVALID) {
335843e1988Sjohnlev mem_vtop.m_as = &kas;
336843e1988Sjohnlev mem_vtop.m_va = (void *)va;
337843e1988Sjohnlev mem_vtop.m_pfn = (pfn_t)xpv_dump_pages | PFN_IS_FOREIGN_MFN;
338843e1988Sjohnlev
339843e1988Sjohnlev dumpvp_write(&mem_vtop, sizeof (mem_vtop_t));
340843e1988Sjohnlev xpv_dump_pages++;
341843e1988Sjohnlev
342843e1988Sjohnlev va += MMU_PAGESIZE;
343843e1988Sjohnlev }
344843e1988Sjohnlev
345843e1988Sjohnlev /*
346843e1988Sjohnlev * Add the shared_info page. This page actually ends up in the
347843e1988Sjohnlev * dump twice: once for the Xen va and once for the Solaris va.
348843e1988Sjohnlev * This isn't ideal, but we don't know the address Xen is using for
349843e1988Sjohnlev * the page, so we can't share it.
350843e1988Sjohnlev */
351843e1988Sjohnlev mem_vtop.m_as = &kas;
352843e1988Sjohnlev mem_vtop.m_va = HYPERVISOR_shared_info;
353843e1988Sjohnlev mem_vtop.m_pfn = (pfn_t)xpv_dump_pages | PFN_IS_FOREIGN_MFN;
354843e1988Sjohnlev dumpvp_write(&mem_vtop, sizeof (mem_vtop_t));
355843e1988Sjohnlev xpv_dump_pages++;
356843e1988Sjohnlev
357843e1988Sjohnlev return (xpv_dump_pages);
358843e1988Sjohnlev }
359843e1988Sjohnlev
360843e1988Sjohnlev void
dump_xpv_pfn()361843e1988Sjohnlev dump_xpv_pfn()
362843e1988Sjohnlev {
363843e1988Sjohnlev pfn_t pfn;
364843e1988Sjohnlev int cnt;
365843e1988Sjohnlev
366843e1988Sjohnlev for (cnt = 0; cnt < xpv_dump_pages; cnt++) {
367843e1988Sjohnlev pfn = (pfn_t)cnt | PFN_IS_FOREIGN_MFN;
368843e1988Sjohnlev dumpvp_write(&pfn, sizeof (pfn));
369843e1988Sjohnlev }
370843e1988Sjohnlev }
371843e1988Sjohnlev
372843e1988Sjohnlev int
dump_xpv_data(void * dump_cbuf)373843e1988Sjohnlev dump_xpv_data(void *dump_cbuf)
374843e1988Sjohnlev {
375843e1988Sjohnlev uintptr_t va;
376843e1988Sjohnlev uint32_t csize;
377843e1988Sjohnlev int cnt = 0;
378843e1988Sjohnlev
379843e1988Sjohnlev /*
380843e1988Sjohnlev * XXX: we should probably run this data through a UE check. The
381843e1988Sjohnlev * catch is that the UE code relies on on_trap() and getpfnum()
382843e1988Sjohnlev * working.
383843e1988Sjohnlev */
384843e1988Sjohnlev va = xen_virt_start;
385843e1988Sjohnlev
386843e1988Sjohnlev while (xpv_va_walk(&va) != PFN_INVALID) {
387843e1988Sjohnlev csize = (uint32_t)compress((void *)va, dump_cbuf, PAGESIZE);
388843e1988Sjohnlev dumpvp_write(&csize, sizeof (uint32_t));
389843e1988Sjohnlev dumpvp_write(dump_cbuf, csize);
390843e1988Sjohnlev if (dump_ioerr) {
391843e1988Sjohnlev dumphdr->dump_flags &= ~DF_COMPLETE;
392843e1988Sjohnlev return (cnt);
393843e1988Sjohnlev }
394843e1988Sjohnlev cnt++;
395843e1988Sjohnlev va += MMU_PAGESIZE;
396843e1988Sjohnlev }
397843e1988Sjohnlev
398843e1988Sjohnlev /*
399843e1988Sjohnlev * Finally, dump the shared_info page
400843e1988Sjohnlev */
401843e1988Sjohnlev csize = (uint32_t)compress((void *)HYPERVISOR_shared_info, dump_cbuf,
402843e1988Sjohnlev PAGESIZE);
403843e1988Sjohnlev dumpvp_write(&csize, sizeof (uint32_t));
404843e1988Sjohnlev dumpvp_write(dump_cbuf, csize);
405843e1988Sjohnlev if (dump_ioerr)
406843e1988Sjohnlev dumphdr->dump_flags &= ~DF_COMPLETE;
407843e1988Sjohnlev cnt++;
408843e1988Sjohnlev
409843e1988Sjohnlev return (cnt);
410843e1988Sjohnlev }
411843e1988Sjohnlev
412843e1988Sjohnlev static void *
showstack(void * fpreg,int xpv_only)413843e1988Sjohnlev showstack(void *fpreg, int xpv_only)
414843e1988Sjohnlev {
415843e1988Sjohnlev struct frame *fpp;
416843e1988Sjohnlev ulong_t off;
417843e1988Sjohnlev char *sym;
418843e1988Sjohnlev uintptr_t pc, fp, lastfp;
419843e1988Sjohnlev uintptr_t minaddr = min(KERNELBASE, xen_virt_start);
420843e1988Sjohnlev
421843e1988Sjohnlev fp = (uintptr_t)fpreg;
422843e1988Sjohnlev if (fp < minaddr) {
423843e1988Sjohnlev xpv_panic_printf("Bad frame ptr: 0x%p\n", fpreg);
424843e1988Sjohnlev return (fpreg);
425843e1988Sjohnlev }
426843e1988Sjohnlev
427843e1988Sjohnlev do {
428843e1988Sjohnlev fpp = (struct frame *)fp;
429843e1988Sjohnlev pc = fpp->fr_savpc;
430843e1988Sjohnlev
431843e1988Sjohnlev if ((xpv_only != 0) &&
432843e1988Sjohnlev (fp > xpv_end || fp < xen_virt_start))
433843e1988Sjohnlev break;
434843e1988Sjohnlev if ((sym = kobj_getsymname(pc, &off)) != NULL)
435843e1988Sjohnlev xpv_panic_printf("%08lx %s:%s+%lx\n", fp,
436843e1988Sjohnlev mod_containing_pc((caddr_t)pc), sym, off);
437843e1988Sjohnlev else if ((pc >= xen_virt_start) && (pc <= xpv_end))
438843e1988Sjohnlev xpv_panic_printf("%08lx 0x%lx (in Xen)\n", fp, pc);
439843e1988Sjohnlev else
440843e1988Sjohnlev xpv_panic_printf("%08lx %lx\n", fp, pc);
441843e1988Sjohnlev
442843e1988Sjohnlev lastfp = fp;
443843e1988Sjohnlev fp = fpp->fr_savfp;
444843e1988Sjohnlev
445843e1988Sjohnlev /*
446843e1988Sjohnlev * Xen marks an exception frame by inverting the frame
447843e1988Sjohnlev * pointer.
448843e1988Sjohnlev */
449843e1988Sjohnlev if (fp < lastfp) {
450843e1988Sjohnlev if ((~fp > minaddr) && ((~fp) ^ lastfp) < 0xfff)
451843e1988Sjohnlev fp = ~fp;
452843e1988Sjohnlev }
453843e1988Sjohnlev } while (fp > lastfp);
454843e1988Sjohnlev return ((void *)fp);
455843e1988Sjohnlev }
456843e1988Sjohnlev
457843e1988Sjohnlev void *
xpv_traceback(void * fpreg)458843e1988Sjohnlev xpv_traceback(void *fpreg)
459843e1988Sjohnlev {
460843e1988Sjohnlev return (showstack(fpreg, 1));
461843e1988Sjohnlev }
462843e1988Sjohnlev
463843e1988Sjohnlev #if defined(__amd64)
464843e1988Sjohnlev static void
xpv_panic_hypercall(ulong_t call)465843e1988Sjohnlev xpv_panic_hypercall(ulong_t call)
466843e1988Sjohnlev {
467843e1988Sjohnlev panic("Illegally issued hypercall %d during panic!\n", (int)call);
468843e1988Sjohnlev }
469843e1988Sjohnlev #endif
470843e1988Sjohnlev
471843e1988Sjohnlev void
xpv_die(struct regs * rp)472843e1988Sjohnlev xpv_die(struct regs *rp)
473843e1988Sjohnlev {
474843e1988Sjohnlev struct panic_trap_info ti;
475843e1988Sjohnlev struct cregs creg;
476843e1988Sjohnlev
477843e1988Sjohnlev ti.trap_regs = rp;
478843e1988Sjohnlev ti.trap_type = rp->r_trapno;
479843e1988Sjohnlev
480843e1988Sjohnlev curthread->t_panic_trap = &ti;
481843e1988Sjohnlev if (ti.trap_type == T_PGFLT) {
482843e1988Sjohnlev getcregs(&creg);
483843e1988Sjohnlev ti.trap_addr = (caddr_t)creg.cr_cr2;
484843e1988Sjohnlev panic("Fatal pagefault at 0x%lx. fault addr=0x%p rp=0x%p",
485903a11ebSrh87107 rp->r_pc, (void *)ti.trap_addr, (void *)rp);
486843e1988Sjohnlev } else {
487843e1988Sjohnlev ti.trap_addr = (caddr_t)rp->r_pc;
488843e1988Sjohnlev panic("Fatal trap %ld at 0x%lx. rp=0x%p", rp->r_trapno,
489903a11ebSrh87107 rp->r_pc, (void *)rp);
490843e1988Sjohnlev }
491843e1988Sjohnlev }
492843e1988Sjohnlev
493843e1988Sjohnlev /*
494843e1988Sjohnlev * Build IDT to handle a Xen panic
495843e1988Sjohnlev */
496843e1988Sjohnlev static void
switch_to_xpv_panic_idt()497843e1988Sjohnlev switch_to_xpv_panic_idt()
498843e1988Sjohnlev {
499843e1988Sjohnlev int i;
500843e1988Sjohnlev desctbr_t idtr;
501843e1988Sjohnlev gate_desc_t *idt = xpv_panic_idt;
502843e1988Sjohnlev selector_t cs = get_cs_register();
503843e1988Sjohnlev
504843e1988Sjohnlev for (i = 0; i < 32; i++)
5059844da31SSeth Goldberg set_gatesegd(&idt[i], &xpv_invaltrap, cs, SDT_SYSIGT, TRP_XPL,
5069844da31SSeth Goldberg 0);
507843e1988Sjohnlev
5089844da31SSeth Goldberg set_gatesegd(&idt[T_ZERODIV], &xpv_div0trap, cs, SDT_SYSIGT, TRP_XPL,
5099844da31SSeth Goldberg 0);
5109844da31SSeth Goldberg set_gatesegd(&idt[T_SGLSTP], &xpv_dbgtrap, cs, SDT_SYSIGT, TRP_XPL, 0);
5119844da31SSeth Goldberg set_gatesegd(&idt[T_NMIFLT], &xpv_nmiint, cs, SDT_SYSIGT, TRP_XPL, 0);
512843e1988Sjohnlev set_gatesegd(&idt[T_BOUNDFLT], &xpv_boundstrap, cs, SDT_SYSIGT,
5139844da31SSeth Goldberg TRP_XPL, 0);
5149844da31SSeth Goldberg set_gatesegd(&idt[T_ILLINST], &xpv_invoptrap, cs, SDT_SYSIGT, TRP_XPL,
5159844da31SSeth Goldberg 0);
5169844da31SSeth Goldberg set_gatesegd(&idt[T_NOEXTFLT], &xpv_ndptrap, cs, SDT_SYSIGT, TRP_XPL,
5179844da31SSeth Goldberg 0);
5189844da31SSeth Goldberg set_gatesegd(&idt[T_TSSFLT], &xpv_invtsstrap, cs, SDT_SYSIGT, TRP_XPL,
5199844da31SSeth Goldberg 0);
5209844da31SSeth Goldberg set_gatesegd(&idt[T_SEGFLT], &xpv_segnptrap, cs, SDT_SYSIGT, TRP_XPL,
5219844da31SSeth Goldberg 0);
5229844da31SSeth Goldberg set_gatesegd(&idt[T_STKFLT], &xpv_stktrap, cs, SDT_SYSIGT, TRP_XPL, 0);
5239844da31SSeth Goldberg set_gatesegd(&idt[T_GPFLT], &xpv_gptrap, cs, SDT_SYSIGT, TRP_XPL, 0);
5249844da31SSeth Goldberg set_gatesegd(&idt[T_PGFLT], &xpv_pftrap, cs, SDT_SYSIGT, TRP_XPL, 0);
5259844da31SSeth Goldberg set_gatesegd(&idt[T_EXTERRFLT], &xpv_ndperr, cs, SDT_SYSIGT, TRP_XPL,
5269844da31SSeth Goldberg 0);
5279844da31SSeth Goldberg set_gatesegd(&idt[T_ALIGNMENT], &xpv_achktrap, cs, SDT_SYSIGT, TRP_XPL,
5289844da31SSeth Goldberg 0);
5299844da31SSeth Goldberg set_gatesegd(&idt[T_MCE], &xpv_mcetrap, cs, SDT_SYSIGT, TRP_XPL, 0);
5309844da31SSeth Goldberg set_gatesegd(&idt[T_SIMDFPE], &xpv_xmtrap, cs, SDT_SYSIGT, TRP_XPL, 0);
531843e1988Sjohnlev
532843e1988Sjohnlev /*
533843e1988Sjohnlev * We have no double fault handler. Any single fault represents a
534843e1988Sjohnlev * catastrophic failure for us, so there is no attempt to handle
535843e1988Sjohnlev * them cleanly: we just print a message and reboot. If we
536843e1988Sjohnlev * encounter a second fault while doing that, there is nothing
537843e1988Sjohnlev * else we can do.
538843e1988Sjohnlev */
539843e1988Sjohnlev
540843e1988Sjohnlev /*
541843e1988Sjohnlev * Be prepared to absorb any stray device interrupts received
542843e1988Sjohnlev * while writing the core to disk.
543843e1988Sjohnlev */
544843e1988Sjohnlev for (i = 33; i < NIDT; i++)
545843e1988Sjohnlev set_gatesegd(&idt[i], &xpv_surprise_intr, cs, SDT_SYSIGT,
5469844da31SSeth Goldberg TRP_XPL, 0);
547843e1988Sjohnlev
548843e1988Sjohnlev /* The one interrupt we expect to get is from the APIC timer. */
549843e1988Sjohnlev set_gatesegd(&idt[T_XPV_TIMER], &xpv_timer_trap, cs, SDT_SYSIGT,
5509844da31SSeth Goldberg TRP_XPL, 0);
551843e1988Sjohnlev
552843e1988Sjohnlev idtr.dtr_base = (uintptr_t)xpv_panic_idt;
553843e1988Sjohnlev idtr.dtr_limit = sizeof (xpv_panic_idt) - 1;
554843e1988Sjohnlev wr_idtr(&idtr);
555843e1988Sjohnlev
556843e1988Sjohnlev #if defined(__amd64)
557843e1988Sjohnlev /* Catch any hypercalls. */
558843e1988Sjohnlev wrmsr(MSR_AMD_LSTAR, (uintptr_t)xpv_panic_hypercall);
559843e1988Sjohnlev wrmsr(MSR_AMD_CSTAR, (uintptr_t)xpv_panic_hypercall);
560843e1988Sjohnlev #endif
561843e1988Sjohnlev }
562843e1988Sjohnlev
563843e1988Sjohnlev static void
xpv_apic_clkinit()564843e1988Sjohnlev xpv_apic_clkinit()
565843e1988Sjohnlev {
566843e1988Sjohnlev uint_t apic_ticks = 0;
567843e1988Sjohnlev
568843e1988Sjohnlev /*
569843e1988Sjohnlev * Measure how many APIC ticks there are within a fixed time
570843e1988Sjohnlev * period. We're going to be fairly coarse here. This timer is
571843e1988Sjohnlev * just being used to detect a stalled panic, so as long as we have
572843e1988Sjohnlev * the right order of magnitude, everything should be fine.
573843e1988Sjohnlev */
574843e1988Sjohnlev xpv_apicadr[APIC_SPUR_INT_REG] = AV_UNIT_ENABLE | APIC_SPUR_INTR;
575843e1988Sjohnlev xpv_apicadr[APIC_LOCAL_TIMER] = AV_MASK;
576843e1988Sjohnlev xpv_apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */
577843e1988Sjohnlev
578843e1988Sjohnlev xpv_apicadr[APIC_DIVIDE_REG] = 0;
579843e1988Sjohnlev xpv_apicadr[APIC_INIT_COUNT] = APIC_MAXVAL;
580843e1988Sjohnlev drv_usecwait(XPV_TIMER_INTERVAL);
581843e1988Sjohnlev apic_ticks = APIC_MAXVAL - xpv_apicadr[APIC_CURR_COUNT];
582843e1988Sjohnlev
583843e1988Sjohnlev /*
584843e1988Sjohnlev * apic_ticks now represents roughly how many apic ticks comprise
585843e1988Sjohnlev * one timeout interval. Program the timer to send us an interrupt
586843e1988Sjohnlev * every time that interval expires.
587843e1988Sjohnlev */
58841afdfa7SKrishnendu Sadhukhan - Sun Microsystems xpv_apicadr[APIC_LOCAL_TIMER] = T_XPV_TIMER | AV_PERIODIC;
589843e1988Sjohnlev xpv_apicadr[APIC_INIT_COUNT] = apic_ticks;
590843e1988Sjohnlev xpv_apicadr[APIC_EOI_REG] = 0;
591843e1988Sjohnlev }
592843e1988Sjohnlev
593843e1988Sjohnlev void
xpv_timer_tick(void)594843e1988Sjohnlev xpv_timer_tick(void)
595843e1988Sjohnlev {
596843e1988Sjohnlev static int ticks = 0;
597843e1988Sjohnlev
598843e1988Sjohnlev if (ticks++ >= MICROSEC / XPV_TIMER_INTERVAL) {
599843e1988Sjohnlev ticks = 0;
600843e1988Sjohnlev if (dump_timeleft && (--dump_timeleft == 0))
601843e1988Sjohnlev panic("Xen panic timeout\n");
602843e1988Sjohnlev }
603843e1988Sjohnlev xpv_apicadr[APIC_EOI_REG] = 0;
604843e1988Sjohnlev }
605843e1988Sjohnlev
606843e1988Sjohnlev void
xpv_interrupt(void)607843e1988Sjohnlev xpv_interrupt(void)
608843e1988Sjohnlev {
609843e1988Sjohnlev #ifdef DEBUG
610843e1988Sjohnlev static int cnt = 0;
611843e1988Sjohnlev
612843e1988Sjohnlev if (cnt++ < 10)
613843e1988Sjohnlev xpv_panic_printf("Unexpected interrupt received.\n");
614843e1988Sjohnlev if ((cnt < 1000) && ((cnt % 100) == 0))
615843e1988Sjohnlev xpv_panic_printf("%d unexpected interrupts received.\n", cnt);
616843e1988Sjohnlev #endif
617843e1988Sjohnlev
618843e1988Sjohnlev xpv_apicadr[APIC_EOI_REG] = 0;
619843e1988Sjohnlev }
620843e1988Sjohnlev
621843e1988Sjohnlev /*
622843e1988Sjohnlev * Managing time in panic context is trivial. We only have a single CPU,
623843e1988Sjohnlev * we never get rescheduled, we never get suspended. We just need to
624843e1988Sjohnlev * convert clock ticks into nanoseconds.
625843e1988Sjohnlev */
626843e1988Sjohnlev static hrtime_t
xpv_panic_gethrtime(void)627843e1988Sjohnlev xpv_panic_gethrtime(void)
628843e1988Sjohnlev {
629843e1988Sjohnlev hrtime_t tsc, hrt;
630843e1988Sjohnlev unsigned int *l = (unsigned int *)&(tsc);
631843e1988Sjohnlev
632843e1988Sjohnlev tsc = __rdtsc_insn();
633843e1988Sjohnlev hrt = (mul32(l[1], nsec_scale) << NSEC_SHIFT) +
634843e1988Sjohnlev (mul32(l[0], nsec_scale) >> (32 - NSEC_SHIFT));
635843e1988Sjohnlev
636843e1988Sjohnlev return (hrt);
637843e1988Sjohnlev }
638843e1988Sjohnlev
639843e1988Sjohnlev static void
xpv_panic_time_init()640843e1988Sjohnlev xpv_panic_time_init()
641843e1988Sjohnlev {
642843e1988Sjohnlev nsec_scale =
643843e1988Sjohnlev CPU->cpu_m.mcpu_vcpu_info->time.tsc_to_system_mul >> NSEC_SHIFT;
644843e1988Sjohnlev
645843e1988Sjohnlev gethrtimef = xpv_panic_gethrtime;
646843e1988Sjohnlev }
647843e1988Sjohnlev
648843e1988Sjohnlev static void
xpv_panicsys(struct regs * rp,char * fmt,...)649843e1988Sjohnlev xpv_panicsys(struct regs *rp, char *fmt, ...)
650843e1988Sjohnlev {
651843e1988Sjohnlev extern void panicsys(const char *, va_list, struct regs *, int);
652843e1988Sjohnlev va_list alist;
653843e1988Sjohnlev
654843e1988Sjohnlev va_start(alist, fmt);
655843e1988Sjohnlev panicsys(fmt, alist, rp, 1);
656843e1988Sjohnlev va_end(alist);
657843e1988Sjohnlev }
658843e1988Sjohnlev
659843e1988Sjohnlev void
xpv_do_panic(void * arg)660843e1988Sjohnlev xpv_do_panic(void *arg)
661843e1988Sjohnlev {
662843e1988Sjohnlev struct panic_info *pip = (struct panic_info *)arg;
663843e1988Sjohnlev int l;
664843e1988Sjohnlev struct cregs creg;
665843e1988Sjohnlev #if defined(__amd64)
666843e1988Sjohnlev extern uintptr_t postbootkernelbase;
667843e1988Sjohnlev #endif
668843e1988Sjohnlev
669843e1988Sjohnlev if (xpv_panicking++ > 0)
670843e1988Sjohnlev panic("multiple calls to xpv_do_panic()");
671843e1988Sjohnlev
672843e1988Sjohnlev /*
673843e1988Sjohnlev * Indicate to the underlying panic framework that a panic has been
674843e1988Sjohnlev * initiated. This is ordinarily done as part of vpanic(). Since
675843e1988Sjohnlev * we already have all the register state saved by the hypervisor,
676843e1988Sjohnlev * we skip that and jump straight into the panic processing code.
677e4b86885SCheng Sean Ye *
678e4b86885SCheng Sean Ye * XXX If another thread grabs and wins the panic_quiesce trigger
679e4b86885SCheng Sean Ye * then we'll have two threads in panicsys believing they are in
680e4b86885SCheng Sean Ye * charge of the panic attempt!
681843e1988Sjohnlev */
682843e1988Sjohnlev (void) panic_trigger(&panic_quiesce);
683843e1988Sjohnlev
684843e1988Sjohnlev #if defined(__amd64)
685843e1988Sjohnlev /*
686843e1988Sjohnlev * bzero() and bcopy() get unhappy when asked to operate on
687843e1988Sjohnlev * addresses outside of the kernel. At this point Xen is really a
688843e1988Sjohnlev * part of the kernel, so we update the routines' notion of where
689843e1988Sjohnlev * the kernel starts.
690843e1988Sjohnlev */
691843e1988Sjohnlev postbootkernelbase = xen_virt_start;
692843e1988Sjohnlev #endif
693843e1988Sjohnlev
694843e1988Sjohnlev #if defined(HYPERVISOR_VIRT_END)
695843e1988Sjohnlev xpv_end = HYPERVISOR_VIRT_END;
696843e1988Sjohnlev #else
697843e1988Sjohnlev xpv_end = (uintptr_t)UINTPTR_MAX - sizeof (uintptr_t);
698843e1988Sjohnlev #endif
699843e1988Sjohnlev
700843e1988Sjohnlev /*
701843e1988Sjohnlev * If we were redirecting console output to the hypervisor, we have
702843e1988Sjohnlev * to stop.
703843e1988Sjohnlev */
704843e1988Sjohnlev use_polledio = B_FALSE;
705*0d928757SGary Mills if (boot_console_type(NULL) == CONS_HYPERVISOR) {
706843e1988Sjohnlev bcons_device_change(CONS_HYPERVISOR);
707843e1988Sjohnlev } else if (cons_polledio != NULL &&
708843e1988Sjohnlev cons_polledio->cons_polledio_putchar != NULL) {
709843e1988Sjohnlev if (cons_polledio->cons_polledio_enter != NULL)
710843e1988Sjohnlev cons_polledio->cons_polledio_enter(
711843e1988Sjohnlev cons_polledio->cons_polledio_argument);
712843e1988Sjohnlev use_polledio = 1;
713843e1988Sjohnlev }
714843e1988Sjohnlev
715843e1988Sjohnlev /* Make sure we handle all console output from here on. */
716843e1988Sjohnlev sysp->bsvc_putchar = xpv_panic_putc;
717843e1988Sjohnlev
718843e1988Sjohnlev /*
719843e1988Sjohnlev * If we find an unsupported panic_info structure, there's not much
720843e1988Sjohnlev * we can do other than complain, plow on, and hope for the best.
721843e1988Sjohnlev */
722843e1988Sjohnlev if (pip->pi_version != PANIC_INFO_VERSION)
723843e1988Sjohnlev xpv_panic_printf("Warning: Xen is using an unsupported "
724843e1988Sjohnlev "version of the panic_info structure.\n");
725843e1988Sjohnlev
726843e1988Sjohnlev xpv_panic_info = pip;
727843e1988Sjohnlev
728a576ab5bSrab #if defined(__amd64)
729a576ab5bSrab kpm1_low = (uintptr_t)xpv_panic_info->pi_ram_start;
730a576ab5bSrab if (xpv_panic_info->pi_xen_start == NULL) {
731a576ab5bSrab kpm1_high = (uintptr_t)xpv_panic_info->pi_ram_end;
732a576ab5bSrab } else {
733a576ab5bSrab kpm1_high = (uintptr_t)xpv_panic_info->pi_xen_start;
734a576ab5bSrab kpm2_low = (uintptr_t)xpv_panic_info->pi_xen_end;
735a576ab5bSrab kpm2_high = (uintptr_t)xpv_panic_info->pi_ram_end;
736a576ab5bSrab }
737a576ab5bSrab #endif
738a576ab5bSrab
739843e1988Sjohnlev /*
740843e1988Sjohnlev * Make sure we are running on the Solaris %gs. The Xen panic code
741843e1988Sjohnlev * should already have set up the GDT properly.
742843e1988Sjohnlev */
743843e1988Sjohnlev xpv_panic_resetgs();
744843e1988Sjohnlev #if defined(__amd64)
745843e1988Sjohnlev wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
746843e1988Sjohnlev #endif
747843e1988Sjohnlev
748843e1988Sjohnlev xpv_panic_time_init();
749843e1988Sjohnlev
750843e1988Sjohnlev /*
751843e1988Sjohnlev * Switch to our own IDT, avoiding any accidental returns to Xen
752843e1988Sjohnlev * world.
753843e1988Sjohnlev */
754843e1988Sjohnlev switch_to_xpv_panic_idt();
755843e1988Sjohnlev
756843e1988Sjohnlev /*
757843e1988Sjohnlev * Initialize the APIC timer, which is used to detect a hung dump
758843e1988Sjohnlev * attempt.
759843e1988Sjohnlev */
760843e1988Sjohnlev xpv_apicadr = pip->pi_apic;
761843e1988Sjohnlev xpv_apic_clkinit();
762843e1988Sjohnlev
763843e1988Sjohnlev /*
764843e1988Sjohnlev * Set up a few values that we'll need repeatedly.
765843e1988Sjohnlev */
766843e1988Sjohnlev getcregs(&creg);
767843e1988Sjohnlev xpv_panic_cr3 = creg.cr_cr3;
768843e1988Sjohnlev for (l = mmu.max_level; l >= 0; l--)
769843e1988Sjohnlev xpv_panic_nptes[l] = mmu.ptes_per_table;
770843e1988Sjohnlev #ifdef __i386
771843e1988Sjohnlev if (mmu.pae_hat)
772843e1988Sjohnlev xpv_panic_nptes[mmu.max_level] = 4;
773843e1988Sjohnlev #endif
774843e1988Sjohnlev
775843e1988Sjohnlev /* Add the fake Xen module to the module list */
776843e1988Sjohnlev if (xpv_module != NULL) {
777843e1988Sjohnlev extern int last_module_id;
778843e1988Sjohnlev
779843e1988Sjohnlev xpv_modctl->mod_id = last_module_id++;
780843e1988Sjohnlev xpv_modctl->mod_next = &modules;
781843e1988Sjohnlev xpv_modctl->mod_prev = modules.mod_prev;
782843e1988Sjohnlev modules.mod_prev->mod_next = xpv_modctl;
783843e1988Sjohnlev modules.mod_prev = xpv_modctl;
784843e1988Sjohnlev }
785e4b86885SCheng Sean Ye
786e4b86885SCheng Sean Ye if (pip->pi_mca.mpd_magic == MCA_PANICDATA_MAGIC)
787e4b86885SCheng Sean Ye xpv_mca_panic_data = &pip->pi_mca;
788e4b86885SCheng Sean Ye
789843e1988Sjohnlev xpv_panic_printf = printf;
790843e1988Sjohnlev xpv_panicsys((struct regs *)pip->pi_regs, pip->pi_panicstr);
791843e1988Sjohnlev xpv_panic_printf("Failed to reboot following panic.\n");
792843e1988Sjohnlev for (;;)
793843e1988Sjohnlev ;
794843e1988Sjohnlev }
795843e1988Sjohnlev
796843e1988Sjohnlev /*
797843e1988Sjohnlev * Set up the necessary data structures to pretend that the Xen hypervisor
798843e1988Sjohnlev * is a loadable module, allowing mdb to find the Xen symbols in a crash
799843e1988Sjohnlev * dump. Since these symbols all map to VA space Solaris doesn't normally
800843e1988Sjohnlev * have access to, we don't link these structures into the kernel's lists
801843e1988Sjohnlev * until/unless we hit a Xen panic.
802843e1988Sjohnlev *
803843e1988Sjohnlev * The observant reader will note a striking amount of overlap between this
804843e1988Sjohnlev * code and that found in krtld. While it would be handy if we could just
805843e1988Sjohnlev * ask krtld to do this work for us, it's not that simple. Among the
806843e1988Sjohnlev * complications: we're not actually loading the text here (grub did it at
807843e1988Sjohnlev * boot), the .text section is writable, there are no relocations to do,
808843e1988Sjohnlev * none of the module text/data is in readable memory, etc. Training krtld
809843e1988Sjohnlev * to deal with this weird module is as complicated, and more risky, than
810843e1988Sjohnlev * reimplementing the necessary subset of it here.
811843e1988Sjohnlev */
812843e1988Sjohnlev static void
init_xen_module()813843e1988Sjohnlev init_xen_module()
814843e1988Sjohnlev {
815843e1988Sjohnlev struct _buf *file = NULL;
816843e1988Sjohnlev struct module *mp;
817843e1988Sjohnlev struct modctl *mcp;
818843e1988Sjohnlev int i, shn;
819843e1988Sjohnlev Shdr *shp, *ctf_shp;
820843e1988Sjohnlev char *names = NULL;
821843e1988Sjohnlev size_t n, namesize, text_align, data_align;
822843e1988Sjohnlev #if defined(__amd64)
823843e1988Sjohnlev const char machine = EM_AMD64;
824843e1988Sjohnlev #else
825843e1988Sjohnlev const char machine = EM_386;
826843e1988Sjohnlev #endif
827843e1988Sjohnlev
828843e1988Sjohnlev /* Allocate and init the module structure */
829843e1988Sjohnlev mp = kmem_zalloc(sizeof (*mp), KM_SLEEP);
830843e1988Sjohnlev mp->filename = kobj_zalloc(strlen(XPV_FILENAME) + 1, KM_SLEEP);
831843e1988Sjohnlev (void) strcpy(mp->filename, XPV_FILENAME);
832843e1988Sjohnlev
833843e1988Sjohnlev /* Allocate and init the modctl structure */
834843e1988Sjohnlev mcp = kmem_zalloc(sizeof (*mcp), KM_SLEEP);
835843e1988Sjohnlev mcp->mod_modname = kobj_zalloc(strlen(XPV_MODNAME) + 1, KM_SLEEP);
836843e1988Sjohnlev (void) strcpy(mcp->mod_modname, XPV_MODNAME);
837843e1988Sjohnlev mcp->mod_filename = kobj_zalloc(strlen(XPV_FILENAME) + 1, KM_SLEEP);
838843e1988Sjohnlev (void) strcpy(mcp->mod_filename, XPV_FILENAME);
839843e1988Sjohnlev mcp->mod_inprogress_thread = (kthread_id_t)-1;
840843e1988Sjohnlev mcp->mod_ref = 1;
841843e1988Sjohnlev mcp->mod_loaded = 1;
842843e1988Sjohnlev mcp->mod_loadcnt = 1;
843843e1988Sjohnlev mcp->mod_mp = mp;
844843e1988Sjohnlev
845843e1988Sjohnlev /*
846843e1988Sjohnlev * Try to open a Xen image that hasn't had its symbol and CTF
847843e1988Sjohnlev * information stripped off.
848843e1988Sjohnlev */
849843e1988Sjohnlev file = kobj_open_file(XPV_FILENAME);
850843e1988Sjohnlev if (file == (struct _buf *)-1) {
851843e1988Sjohnlev file = NULL;
852843e1988Sjohnlev goto err;
853843e1988Sjohnlev }
854843e1988Sjohnlev
855843e1988Sjohnlev /*
856843e1988Sjohnlev * Read the header and ensure that this is an ELF file for the
857843e1988Sjohnlev * proper ISA. If it's not, somebody has done something very
858843e1988Sjohnlev * stupid. Why bother? See Mencken.
859843e1988Sjohnlev */
860843e1988Sjohnlev if (kobj_read_file(file, (char *)&mp->hdr, sizeof (mp->hdr), 0) < 0)
861843e1988Sjohnlev goto err;
862843e1988Sjohnlev for (i = 0; i < SELFMAG; i++)
863843e1988Sjohnlev if (mp->hdr.e_ident[i] != ELFMAG[i])
864843e1988Sjohnlev goto err;
865843e1988Sjohnlev if ((mp->hdr.e_ident[EI_DATA] != ELFDATA2LSB) ||
866843e1988Sjohnlev (mp->hdr.e_machine != machine))
867843e1988Sjohnlev goto err;
868843e1988Sjohnlev
869843e1988Sjohnlev /* Read in the section headers */
870843e1988Sjohnlev n = mp->hdr.e_shentsize * mp->hdr.e_shnum;
871843e1988Sjohnlev mp->shdrs = kmem_zalloc(n, KM_SLEEP);
872843e1988Sjohnlev if (kobj_read_file(file, mp->shdrs, n, mp->hdr.e_shoff) < 0)
873843e1988Sjohnlev goto err;
874843e1988Sjohnlev
875843e1988Sjohnlev /* Read the section names */
876843e1988Sjohnlev shp = (Shdr *)(mp->shdrs + mp->hdr.e_shstrndx * mp->hdr.e_shentsize);
877843e1988Sjohnlev namesize = shp->sh_size;
878843e1988Sjohnlev names = kmem_zalloc(shp->sh_size, KM_SLEEP);
879843e1988Sjohnlev if (kobj_read_file(file, names, shp->sh_size, shp->sh_offset) < 0)
880843e1988Sjohnlev goto err;
881843e1988Sjohnlev
882843e1988Sjohnlev /*
883843e1988Sjohnlev * Fill in the text and data size fields.
884843e1988Sjohnlev */
885843e1988Sjohnlev ctf_shp = NULL;
886843e1988Sjohnlev text_align = data_align = 0;
887843e1988Sjohnlev for (shn = 1; shn < mp->hdr.e_shnum; shn++) {
888843e1988Sjohnlev shp = (Shdr *)(mp->shdrs + shn * mp->hdr.e_shentsize);
889843e1988Sjohnlev
890843e1988Sjohnlev /* Sanity check the offset of the section name */
891843e1988Sjohnlev if (shp->sh_name >= namesize)
892843e1988Sjohnlev continue;
893843e1988Sjohnlev
894843e1988Sjohnlev /* If we find the symtab section, remember it for later. */
895843e1988Sjohnlev if (shp->sh_type == SHT_SYMTAB) {
896843e1988Sjohnlev mp->symtbl_section = shn;
897843e1988Sjohnlev mp->symhdr = shp;
898843e1988Sjohnlev continue;
899843e1988Sjohnlev }
900843e1988Sjohnlev
901843e1988Sjohnlev /* If we find the CTF section, remember it for later. */
902843e1988Sjohnlev if ((shp->sh_size != 0) &&
903843e1988Sjohnlev (strcmp(names + shp->sh_name, ".SUNW_ctf") == 0)) {
904843e1988Sjohnlev ctf_shp = shp;
905843e1988Sjohnlev continue;
906843e1988Sjohnlev }
907843e1988Sjohnlev
908843e1988Sjohnlev if (!(shp->sh_flags & SHF_ALLOC))
909843e1988Sjohnlev continue;
910843e1988Sjohnlev
911843e1988Sjohnlev /*
912843e1988Sjohnlev * Xen marks its text section as writable, so we need to
913843e1988Sjohnlev * look for the name - not just the flag.
914843e1988Sjohnlev */
915843e1988Sjohnlev if ((strcmp(&names[shp->sh_name], ".text") != NULL) &&
916843e1988Sjohnlev (shp->sh_flags & SHF_WRITE) != 0) {
917843e1988Sjohnlev if (shp->sh_addralign > data_align)
918843e1988Sjohnlev data_align = shp->sh_addralign;
919843e1988Sjohnlev mp->data_size = ALIGN(mp->data_size, data_align);
920843e1988Sjohnlev mp->data_size += ALIGN(shp->sh_size, 8);
921843e1988Sjohnlev if (mp->data == NULL || mp->data > (char *)shp->sh_addr)
922843e1988Sjohnlev mp->data = (char *)shp->sh_addr;
923843e1988Sjohnlev } else {
924843e1988Sjohnlev if (shp->sh_addralign > text_align)
925843e1988Sjohnlev text_align = shp->sh_addralign;
926843e1988Sjohnlev mp->text_size = ALIGN(mp->text_size, text_align);
927843e1988Sjohnlev mp->text_size += ALIGN(shp->sh_size, 8);
928843e1988Sjohnlev if (mp->text == NULL || mp->text > (char *)shp->sh_addr)
929843e1988Sjohnlev mp->text = (char *)shp->sh_addr;
930843e1988Sjohnlev }
931843e1988Sjohnlev }
932843e1988Sjohnlev kmem_free(names, namesize);
933843e1988Sjohnlev names = NULL;
9348cdfbd11Snn35248 shp = NULL;
935843e1988Sjohnlev mcp->mod_text = mp->text;
936843e1988Sjohnlev mcp->mod_text_size = mp->text_size;
937843e1988Sjohnlev
938843e1988Sjohnlev /*
939843e1988Sjohnlev * If we have symbol table and string table sections, read them in
940843e1988Sjohnlev * now. If we don't, we just plow on. We'll still get a valid
941843e1988Sjohnlev * core dump, but finding anything useful will be just a bit
942843e1988Sjohnlev * harder.
943843e1988Sjohnlev *
944843e1988Sjohnlev * Note: we don't bother with a hash table. We'll never do a
945843e1988Sjohnlev * symbol lookup unless we crash, and then mdb creates its own. We
946843e1988Sjohnlev * also don't try to perform any relocations. Xen should be loaded
947843e1988Sjohnlev * exactly where the ELF file indicates, and the symbol information
948843e1988Sjohnlev * in the file should be complete and correct already. Static
949843e1988Sjohnlev * linking ain't all bad.
950843e1988Sjohnlev */
951843e1988Sjohnlev if ((mp->symhdr != NULL) && (mp->symhdr->sh_link < mp->hdr.e_shnum)) {
952843e1988Sjohnlev mp->strhdr = (Shdr *)
953843e1988Sjohnlev (mp->shdrs + mp->symhdr->sh_link * mp->hdr.e_shentsize);
954843e1988Sjohnlev mp->nsyms = mp->symhdr->sh_size / mp->symhdr->sh_entsize;
955843e1988Sjohnlev
956843e1988Sjohnlev /* Allocate space for the symbol table and strings. */
957843e1988Sjohnlev mp->symsize = mp->symhdr->sh_size +
958843e1988Sjohnlev mp->nsyms * sizeof (symid_t) + mp->strhdr->sh_size;
959843e1988Sjohnlev mp->symspace = kmem_zalloc(mp->symsize, KM_SLEEP);
960843e1988Sjohnlev mp->symtbl = mp->symspace;
961843e1988Sjohnlev mp->strings = (char *)(mp->symtbl + mp->symhdr->sh_size);
962843e1988Sjohnlev
963843e1988Sjohnlev if ((kobj_read_file(file, mp->symtbl,
964843e1988Sjohnlev mp->symhdr->sh_size, mp->symhdr->sh_offset) < 0) ||
965843e1988Sjohnlev (kobj_read_file(file, mp->strings,
966843e1988Sjohnlev mp->strhdr->sh_size, mp->strhdr->sh_offset) < 0))
967843e1988Sjohnlev goto err;
968843e1988Sjohnlev }
969843e1988Sjohnlev
970843e1988Sjohnlev /*
971843e1988Sjohnlev * Read in the CTF section
972843e1988Sjohnlev */
973843e1988Sjohnlev if ((ctf_shp != NULL) && ((moddebug & MODDEBUG_NOCTF) == 0)) {
9748cdfbd11Snn35248 mp->ctfdata = kmem_zalloc(ctf_shp->sh_size, KM_SLEEP);
975843e1988Sjohnlev mp->ctfsize = ctf_shp->sh_size;
976843e1988Sjohnlev if (kobj_read_file(file, mp->ctfdata, mp->ctfsize,
977843e1988Sjohnlev ctf_shp->sh_offset) < 0)
978843e1988Sjohnlev goto err;
979843e1988Sjohnlev }
980843e1988Sjohnlev
981843e1988Sjohnlev kobj_close_file(file);
982843e1988Sjohnlev
983843e1988Sjohnlev xpv_module = mp;
984843e1988Sjohnlev xpv_modctl = mcp;
985843e1988Sjohnlev return;
986843e1988Sjohnlev
987843e1988Sjohnlev err:
988843e1988Sjohnlev cmn_err(CE_WARN, "Failed to initialize xpv module.");
989843e1988Sjohnlev if (file != NULL)
990843e1988Sjohnlev kobj_close_file(file);
991843e1988Sjohnlev
992843e1988Sjohnlev kmem_free(mp->filename, strlen(XPV_FILENAME) + 1);
993843e1988Sjohnlev if (mp->shdrs != NULL)
994843e1988Sjohnlev kmem_free(mp->shdrs, mp->hdr.e_shentsize * mp->hdr.e_shnum);
995843e1988Sjohnlev if (mp->symspace != NULL)
996843e1988Sjohnlev kmem_free(mp->symspace, mp->symsize);
997843e1988Sjohnlev if (mp->ctfdata != NULL)
998843e1988Sjohnlev kmem_free(mp->ctfdata, mp->ctfsize);
999843e1988Sjohnlev kmem_free(mp, sizeof (*mp));
1000843e1988Sjohnlev kmem_free(mcp->mod_filename, strlen(XPV_FILENAME) + 1);
1001843e1988Sjohnlev kmem_free(mcp->mod_modname, strlen(XPV_MODNAME) + 1);
1002843e1988Sjohnlev kmem_free(mcp, sizeof (*mcp));
1003843e1988Sjohnlev if (names != NULL)
1004843e1988Sjohnlev kmem_free(names, namesize);
1005843e1988Sjohnlev }
1006843e1988Sjohnlev
1007843e1988Sjohnlev void
xpv_panic_init()1008843e1988Sjohnlev xpv_panic_init()
1009843e1988Sjohnlev {
1010843e1988Sjohnlev xen_platform_op_t op;
1011843e1988Sjohnlev int i;
1012843e1988Sjohnlev
1013843e1988Sjohnlev ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1014843e1988Sjohnlev
1015843e1988Sjohnlev for (i = 0; i < mmu.num_level; i++)
1016843e1988Sjohnlev ptable_pfn[i] = PFN_INVALID;
1017843e1988Sjohnlev
1018843e1988Sjohnlev /* Let Xen know where to jump if/when it panics. */
1019843e1988Sjohnlev op.cmd = XENPF_panic_init;
1020843e1988Sjohnlev op.interface_version = XENPF_INTERFACE_VERSION;
1021843e1988Sjohnlev op.u.panic_init.panic_addr = (unsigned long)xpv_panic_hdlr;
1022843e1988Sjohnlev
1023843e1988Sjohnlev (void) HYPERVISOR_platform_op(&op);
1024843e1988Sjohnlev
1025843e1988Sjohnlev init_xen_module();
1026843e1988Sjohnlev }
1027