1 /* 2 * Machine specific setup for xen 3 * 4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 5 */ 6 7 #include <linux/module.h> 8 #include <linux/sched.h> 9 #include <linux/mm.h> 10 #include <linux/pm.h> 11 #include <linux/memblock.h> 12 13 #include <asm/elf.h> 14 #include <asm/vdso.h> 15 #include <asm/e820.h> 16 #include <asm/setup.h> 17 #include <asm/acpi.h> 18 #include <asm/xen/hypervisor.h> 19 #include <asm/xen/hypercall.h> 20 21 #include <xen/page.h> 22 #include <xen/interface/callback.h> 23 #include <xen/interface/physdev.h> 24 #include <xen/interface/memory.h> 25 #include <xen/features.h> 26 27 #include "xen-ops.h" 28 #include "vdso.h" 29 30 /* These are code, but not functions. Defined in entry.S */ 31 extern const char xen_hypervisor_callback[]; 32 extern const char xen_failsafe_callback[]; 33 extern void xen_sysenter_target(void); 34 extern void xen_syscall_target(void); 35 extern void xen_syscall32_target(void); 36 37 static unsigned long __init xen_release_chunk(phys_addr_t start_addr, 38 phys_addr_t end_addr) 39 { 40 struct xen_memory_reservation reservation = { 41 .address_bits = 0, 42 .extent_order = 0, 43 .domid = DOMID_SELF 44 }; 45 unsigned long start, end; 46 unsigned long len = 0; 47 unsigned long pfn; 48 int ret; 49 50 start = PFN_UP(start_addr); 51 end = PFN_DOWN(end_addr); 52 53 if (end <= start) 54 return 0; 55 56 printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ", 57 start, end); 58 for(pfn = start; pfn < end; pfn++) { 59 unsigned long mfn = pfn_to_mfn(pfn); 60 61 /* Make sure pfn exists to start with */ 62 if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) 63 continue; 64 65 set_xen_guest_handle(reservation.extent_start, &mfn); 66 reservation.nr_extents = 1; 67 68 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 69 &reservation); 70 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", 71 start, end, ret); 72 if (ret == 1) { 73 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 74 len++; 75 } 76 } 77 printk(KERN_CONT "%ld pages freed\n", len); 78 79 return len; 80 } 81 82 static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, 83 const struct e820map *e820) 84 { 85 phys_addr_t max_addr = PFN_PHYS(max_pfn); 86 phys_addr_t last_end = 0; 87 unsigned long released = 0; 88 int i; 89 90 for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { 91 phys_addr_t end = e820->map[i].addr; 92 end = min(max_addr, end); 93 94 released += xen_release_chunk(last_end, end); 95 last_end = e820->map[i].addr + e820->map[i].size; 96 } 97 98 if (last_end < max_addr) 99 released += xen_release_chunk(last_end, max_addr); 100 101 printk(KERN_INFO "released %ld pages of unused memory\n", released); 102 return released; 103 } 104 105 /** 106 * machine_specific_memory_setup - Hook for machine specific memory setup. 107 **/ 108 109 char * __init xen_memory_setup(void) 110 { 111 unsigned long max_pfn = xen_start_info->nr_pages; 112 113 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); 114 115 e820.nr_map = 0; 116 117 e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); 118 119 /* 120 * Even though this is normal, usable memory under Xen, reserve 121 * ISA memory anyway because too many things think they can poke 122 * about in there. 123 */ 124 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, 125 E820_RESERVED); 126 127 /* 128 * Reserve Xen bits: 129 * - mfn_list 130 * - xen_start_info 131 * See comment above "struct start_info" in <xen/interface/xen.h> 132 */ 133 memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), 134 __pa(xen_start_info->pt_base), 135 "XEN START INFO"); 136 137 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 138 139 xen_return_unused_memory(xen_start_info->nr_pages, &e820); 140 141 return "Xen"; 142 } 143 144 static void xen_idle(void) 145 { 146 local_irq_disable(); 147 148 if (need_resched()) 149 local_irq_enable(); 150 else { 151 current_thread_info()->status &= ~TS_POLLING; 152 smp_mb__after_clear_bit(); 153 safe_halt(); 154 current_thread_info()->status |= TS_POLLING; 155 } 156 } 157 158 /* 159 * Set the bit indicating "nosegneg" library variants should be used. 160 * We only need to bother in pure 32-bit mode; compat 32-bit processes 161 * can have un-truncated segments, so wrapping around is allowed. 162 */ 163 static void __init fiddle_vdso(void) 164 { 165 #ifdef CONFIG_X86_32 166 u32 *mask; 167 mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK); 168 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 169 mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK); 170 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 171 #endif 172 } 173 174 static __cpuinit int register_callback(unsigned type, const void *func) 175 { 176 struct callback_register callback = { 177 .type = type, 178 .address = XEN_CALLBACK(__KERNEL_CS, func), 179 .flags = CALLBACKF_mask_events, 180 }; 181 182 return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); 183 } 184 185 void __cpuinit xen_enable_sysenter(void) 186 { 187 int ret; 188 unsigned sysenter_feature; 189 190 #ifdef CONFIG_X86_32 191 sysenter_feature = X86_FEATURE_SEP; 192 #else 193 sysenter_feature = X86_FEATURE_SYSENTER32; 194 #endif 195 196 if (!boot_cpu_has(sysenter_feature)) 197 return; 198 199 ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); 200 if(ret != 0) 201 setup_clear_cpu_cap(sysenter_feature); 202 } 203 204 void __cpuinit xen_enable_syscall(void) 205 { 206 #ifdef CONFIG_X86_64 207 int ret; 208 209 ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); 210 if (ret != 0) { 211 printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); 212 /* Pretty fatal; 64-bit userspace has no other 213 mechanism for syscalls. */ 214 } 215 216 if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { 217 ret = register_callback(CALLBACKTYPE_syscall32, 218 xen_syscall32_target); 219 if (ret != 0) 220 setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); 221 } 222 #endif /* CONFIG_X86_64 */ 223 } 224 225 void __init xen_arch_setup(void) 226 { 227 struct physdev_set_iopl set_iopl; 228 int rc; 229 230 xen_panic_handler_init(); 231 232 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 233 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 234 235 if (!xen_feature(XENFEAT_auto_translated_physmap)) 236 HYPERVISOR_vm_assist(VMASST_CMD_enable, 237 VMASST_TYPE_pae_extended_cr3); 238 239 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || 240 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 241 BUG(); 242 243 xen_enable_sysenter(); 244 xen_enable_syscall(); 245 246 set_iopl.iopl = 1; 247 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 248 if (rc != 0) 249 printk(KERN_INFO "physdev_op failed %d\n", rc); 250 251 #ifdef CONFIG_ACPI 252 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 253 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 254 disable_acpi(); 255 } 256 #endif 257 258 memcpy(boot_command_line, xen_start_info->cmd_line, 259 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? 260 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 261 262 pm_idle = xen_idle; 263 264 paravirt_disable_iospace(); 265 266 fiddle_vdso(); 267 } 268