xref: /freebsd/sys/contrib/xen/arch-x86/xen.h (revision e6bfd18d21b225af6a0ed67ceeaf1293b7b9eba5)
1 /******************************************************************************
2  * arch-x86/xen.h
3  *
4  * Guest OS interface to x86 Xen.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Copyright (c) 2004-2006, K A Fraser
25  */
26 
27 #include "../xen.h"
28 
29 #ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__
30 #define __XEN_PUBLIC_ARCH_X86_XEN_H__
31 
32 /* Structural guest handles introduced in 0x00030201. */
33 #if __XEN_INTERFACE_VERSION__ >= 0x00030201
34 #define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
35     typedef struct { type *p; } __guest_handle_ ## name
36 #else
37 #define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
38     typedef type * __guest_handle_ ## name
39 #endif
40 
41 /*
42  * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
43  * in a struct in memory.
44  * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an
45  * hypercall argument.
46  * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but
47  * they might not be on other architectures.
48  */
49 #define __DEFINE_XEN_GUEST_HANDLE(name, type) \
50     ___DEFINE_XEN_GUEST_HANDLE(name, type);   \
51     ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
52 #define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
53 #define __XEN_GUEST_HANDLE(name)        __guest_handle_ ## name
54 #define XEN_GUEST_HANDLE(name)          __XEN_GUEST_HANDLE(name)
55 #define XEN_GUEST_HANDLE_PARAM(name)    XEN_GUEST_HANDLE(name)
56 #define set_xen_guest_handle_raw(hnd, val)  do { (hnd).p = val; } while (0)
57 #define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)
58 
59 #if defined(__i386__)
60 # ifdef __XEN__
61 __DeFiNe__ __DECL_REG_LO8(which) uint32_t e ## which ## x
62 __DeFiNe__ __DECL_REG_LO16(name) union { uint32_t e ## name; }
63 # endif
64 #include "xen-x86_32.h"
65 # ifdef __XEN__
66 __UnDeF__ __DECL_REG_LO8
67 __UnDeF__ __DECL_REG_LO16
68 __DeFiNe__ __DECL_REG_LO8(which) e ## which ## x
69 __DeFiNe__ __DECL_REG_LO16(name) e ## name
70 # endif
71 #elif defined(__x86_64__)
72 #include "xen-x86_64.h"
73 #endif
74 
75 #ifndef __ASSEMBLY__
76 typedef unsigned long xen_pfn_t;
77 #define PRI_xen_pfn "lx"
78 #define PRIu_xen_pfn "lu"
79 #endif
80 
81 #define XEN_HAVE_PV_GUEST_ENTRY 1
82 
83 #define XEN_HAVE_PV_UPCALL_MASK 1
84 
85 /*
86  * `incontents 200 segdesc Segment Descriptor Tables
87  */
88 /*
89  * ` enum neg_errnoval
90  * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries);
91  * `
92  */
93 /*
94  * A number of GDT entries are reserved by Xen. These are not situated at the
95  * start of the GDT because some stupid OSes export hard-coded selector values
96  * in their ABI. These hard-coded values are always near the start of the GDT,
97  * so Xen places itself out of the way, at the far end of the GDT.
98  *
99  * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op
100  */
101 #define FIRST_RESERVED_GDT_PAGE  14
102 #define FIRST_RESERVED_GDT_BYTE  (FIRST_RESERVED_GDT_PAGE * 4096)
103 #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
104 
105 
106 /*
107  * ` enum neg_errnoval
108  * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc);
109  * `
110  * ` @pa   The machine physical address of the descriptor to
111  * `       update. Must be either a descriptor page or writable.
112  * ` @desc The descriptor value to update, in the same format as a
113  * `       native descriptor table entry.
114  */
115 
116 /* Maximum number of virtual CPUs in legacy multi-processor guests. */
117 #define XEN_LEGACY_MAX_VCPUS 32
118 
119 #ifndef __ASSEMBLY__
120 
121 typedef unsigned long xen_ulong_t;
122 #define PRI_xen_ulong "lx"
123 
124 /*
125  * ` enum neg_errnoval
126  * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp);
127  * `
128  * Sets the stack segment and pointer for the current vcpu.
129  */
130 
131 /*
132  * ` enum neg_errnoval
133  * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]);
134  * `
135  */
136 /*
137  * Send an array of these to HYPERVISOR_set_trap_table().
138  * Terminate the array with a sentinel entry, with traps[].address==0.
139  * The privilege level specifies which modes may enter a trap via a software
140  * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
141  * privilege levels as follows:
142  *  Level == 0: Noone may enter
143  *  Level == 1: Kernel may enter
144  *  Level == 2: Kernel may enter
145  *  Level == 3: Everyone may enter
146  *
147  * Note: For compatibility with kernels not setting up exception handlers
148  *       early enough, Xen will avoid trying to inject #GP (and hence crash
149  *       the domain) when an RDMSR would require this, but no handler was
150  *       set yet. The precise conditions are implementation specific, and
151  *       new code may not rely on such behavior anyway.
152  */
153 #define TI_GET_DPL(_ti)      ((_ti)->flags & 3)
154 #define TI_GET_IF(_ti)       ((_ti)->flags & 4)
155 #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
156 #define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))
157 struct trap_info {
158     uint8_t       vector;  /* exception vector                              */
159     uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
160     uint16_t      cs;      /* code selector                                 */
161     unsigned long address; /* code offset                                   */
162 };
163 typedef struct trap_info trap_info_t;
164 DEFINE_XEN_GUEST_HANDLE(trap_info_t);
165 
166 typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
167 
168 /*
169  * The following is all CPU context. Note that the fpu_ctxt block is filled
170  * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
171  *
172  * Also note that when calling DOMCTL_setvcpucontext for HVM guests, not all
173  * information in this structure is updated, the fields read include: fpu_ctxt
174  * (if VGCT_I387_VALID is set), flags, user_regs and debugreg[*].
175  *
176  * Note: VCPUOP_initialise for HVM guests is non-symetric with
177  * DOMCTL_setvcpucontext, and uses struct vcpu_hvm_context from hvm/hvm_vcpu.h
178  */
179 struct vcpu_guest_context {
180     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
181     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
182 #define VGCF_I387_VALID                (1<<0)
183 #define VGCF_IN_KERNEL                 (1<<2)
184 #define _VGCF_i387_valid               0
185 #define VGCF_i387_valid                (1<<_VGCF_i387_valid)
186 #define _VGCF_in_kernel                2
187 #define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
188 #define _VGCF_failsafe_disables_events 3
189 #define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
190 #define _VGCF_syscall_disables_events  4
191 #define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)
192 #define _VGCF_online                   5
193 #define VGCF_online                    (1<<_VGCF_online)
194     unsigned long flags;                    /* VGCF_* flags                 */
195     struct cpu_user_regs user_regs;         /* User-level CPU registers     */
196     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
197     unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
198     unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
199     unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */
200     /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
201     unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */
202     unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
203 #ifdef __i386__
204     unsigned long event_callback_cs;        /* CS:EIP of event callback     */
205     unsigned long event_callback_eip;
206     unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */
207     unsigned long failsafe_callback_eip;
208 #else
209     unsigned long event_callback_eip;
210     unsigned long failsafe_callback_eip;
211 #ifdef __XEN__
212     union {
213         unsigned long syscall_callback_eip;
214         struct {
215             unsigned int event_callback_cs;    /* compat CS of event cb     */
216             unsigned int failsafe_callback_cs; /* compat CS of failsafe cb  */
217         };
218     };
219 #else
220     unsigned long syscall_callback_eip;
221 #endif
222 #endif
223     unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
224 #ifdef __x86_64__
225     /* Segment base addresses. */
226     uint64_t      fs_base;
227     uint64_t      gs_base_kernel;
228     uint64_t      gs_base_user;
229 #endif
230 };
231 typedef struct vcpu_guest_context vcpu_guest_context_t;
232 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
233 
234 struct arch_shared_info {
235     /*
236      * Number of valid entries in the p2m table(s) anchored at
237      * pfn_to_mfn_frame_list_list and/or p2m_vaddr.
238      */
239     unsigned long max_pfn;
240     /*
241      * Frame containing list of mfns containing list of mfns containing p2m.
242      * A value of 0 indicates it has not yet been set up, ~0 indicates it has
243      * been set to invalid e.g. due to the p2m being too large for the 3-level
244      * p2m tree. In this case the linear mapper p2m list anchored at p2m_vaddr
245      * is to be used.
246      */
247     xen_pfn_t     pfn_to_mfn_frame_list_list;
248     unsigned long nmi_reason;
249     /*
250      * Following three fields are valid if p2m_cr3 contains a value different
251      * from 0.
252      * p2m_cr3 is the root of the address space where p2m_vaddr is valid.
253      * p2m_cr3 is in the same format as a cr3 value in the vcpu register state
254      * and holds the folded machine frame number (via xen_pfn_to_cr3) of a
255      * L3 or L4 page table.
256      * p2m_vaddr holds the virtual address of the linear p2m list. All entries
257      * in the range [0...max_pfn[ are accessible via this pointer.
258      * p2m_generation will be incremented by the guest before and after each
259      * change of the mappings of the p2m list. p2m_generation starts at 0 and
260      * a value with the least significant bit set indicates that a mapping
261      * update is in progress. This allows guest external software (e.g. in Dom0)
262      * to verify that read mappings are consistent and whether they have changed
263      * since the last check.
264      * Modifying a p2m element in the linear p2m list is allowed via an atomic
265      * write only.
266      */
267     unsigned long p2m_cr3;         /* cr3 value of the p2m address space */
268     unsigned long p2m_vaddr;       /* virtual address of the p2m list */
269     unsigned long p2m_generation;  /* generation count of p2m mapping */
270 #ifdef __i386__
271     /* There's no room for this field in the generic structure. */
272     uint32_t wc_sec_hi;
273 #endif
274 };
275 typedef struct arch_shared_info arch_shared_info_t;
276 
277 #if defined(__XEN__) || defined(__XEN_TOOLS__)
278 /*
279  * struct xen_arch_domainconfig's ABI is covered by
280  * XEN_DOMCTL_INTERFACE_VERSION.
281  */
282 struct xen_arch_domainconfig {
283 #define _XEN_X86_EMU_LAPIC          0
284 #define XEN_X86_EMU_LAPIC           (1U<<_XEN_X86_EMU_LAPIC)
285 #define _XEN_X86_EMU_HPET           1
286 #define XEN_X86_EMU_HPET            (1U<<_XEN_X86_EMU_HPET)
287 #define _XEN_X86_EMU_PM             2
288 #define XEN_X86_EMU_PM              (1U<<_XEN_X86_EMU_PM)
289 #define _XEN_X86_EMU_RTC            3
290 #define XEN_X86_EMU_RTC             (1U<<_XEN_X86_EMU_RTC)
291 #define _XEN_X86_EMU_IOAPIC         4
292 #define XEN_X86_EMU_IOAPIC          (1U<<_XEN_X86_EMU_IOAPIC)
293 #define _XEN_X86_EMU_PIC            5
294 #define XEN_X86_EMU_PIC             (1U<<_XEN_X86_EMU_PIC)
295 #define _XEN_X86_EMU_VGA            6
296 #define XEN_X86_EMU_VGA             (1U<<_XEN_X86_EMU_VGA)
297 #define _XEN_X86_EMU_IOMMU          7
298 #define XEN_X86_EMU_IOMMU           (1U<<_XEN_X86_EMU_IOMMU)
299 #define _XEN_X86_EMU_PIT            8
300 #define XEN_X86_EMU_PIT             (1U<<_XEN_X86_EMU_PIT)
301 #define _XEN_X86_EMU_USE_PIRQ       9
302 #define XEN_X86_EMU_USE_PIRQ        (1U<<_XEN_X86_EMU_USE_PIRQ)
303 #define _XEN_X86_EMU_VPCI           10
304 #define XEN_X86_EMU_VPCI            (1U<<_XEN_X86_EMU_VPCI)
305 
306 #define XEN_X86_EMU_ALL             (XEN_X86_EMU_LAPIC | XEN_X86_EMU_HPET |  \
307                                      XEN_X86_EMU_PM | XEN_X86_EMU_RTC |      \
308                                      XEN_X86_EMU_IOAPIC | XEN_X86_EMU_PIC |  \
309                                      XEN_X86_EMU_VGA | XEN_X86_EMU_IOMMU |   \
310                                      XEN_X86_EMU_PIT | XEN_X86_EMU_USE_PIRQ |\
311                                      XEN_X86_EMU_VPCI)
312     uint32_t emulation_flags;
313 
314 /*
315  * Select whether to use a relaxed behavior for accesses to MSRs not explicitly
316  * handled by Xen instead of injecting a #GP to the guest. Note this option
317  * doesn't allow the guest to read or write to the underlying MSR.
318  */
319 #define XEN_X86_MSR_RELAXED (1u << 0)
320     uint32_t misc_flags;
321 };
322 
323 /* Location of online VCPU bitmap. */
324 #define XEN_ACPI_CPU_MAP             0xaf00
325 #define XEN_ACPI_CPU_MAP_LEN         ((HVM_MAX_VCPUS + 7) / 8)
326 
327 /* GPE0 bit set during CPU hotplug */
328 #define XEN_ACPI_GPE0_CPUHP_BIT      2
329 #endif
330 
331 /*
332  * Representations of architectural CPUID and MSR information.  Used as the
333  * serialised version of Xen's internal representation.
334  */
335 typedef struct xen_cpuid_leaf {
336 #define XEN_CPUID_NO_SUBLEAF 0xffffffffu
337     uint32_t leaf, subleaf;
338     uint32_t a, b, c, d;
339 } xen_cpuid_leaf_t;
340 DEFINE_XEN_GUEST_HANDLE(xen_cpuid_leaf_t);
341 
342 typedef struct xen_msr_entry {
343     uint32_t idx;
344     uint32_t flags; /* Reserved MBZ. */
345     uint64_t val;
346 } xen_msr_entry_t;
347 DEFINE_XEN_GUEST_HANDLE(xen_msr_entry_t);
348 
349 #endif /* !__ASSEMBLY__ */
350 
351 /*
352  * ` enum neg_errnoval
353  * ` HYPERVISOR_fpu_taskswitch(int set);
354  * `
355  * Sets (if set!=0) or clears (if set==0) CR0.TS.
356  */
357 
358 /*
359  * ` enum neg_errnoval
360  * ` HYPERVISOR_set_debugreg(int regno, unsigned long value);
361  *
362  * ` unsigned long
363  * ` HYPERVISOR_get_debugreg(int regno);
364  * For 0<=reg<=7, returns the debug register value.
365  * For other values of reg, returns ((unsigned long)-EINVAL).
366  * (Unfortunately, this interface is defective.)
367  */
368 
369 /*
370  * Prefix forces emulation of some non-trapping instructions.
371  * Currently only CPUID.
372  */
373 #ifdef __ASSEMBLY__
374 #define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
375 #define XEN_CPUID          XEN_EMULATE_PREFIX cpuid
376 #else
377 #define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
378 #define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
379 #endif
380 
381 /*
382  * Debug console IO port, also called "port E9 hack". Each character written
383  * to this IO port will be printed on the hypervisor console, subject to log
384  * level restrictions.
385  */
386 #define XEN_HVM_DEBUGCONS_IOPORT 0xe9
387 
388 #endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */
389 
390 /*
391  * Local variables:
392  * mode: C
393  * c-file-style: "BSD"
394  * c-basic-offset: 4
395  * tab-width: 4
396  * indent-tabs-mode: nil
397  * End:
398  */
399