1 /* 2 * Machine specific setup for xen 3 * 4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 5 */ 6 7 #include <linux/module.h> 8 #include <linux/sched.h> 9 #include <linux/mm.h> 10 #include <linux/pm.h> 11 12 #include <asm/elf.h> 13 #include <asm/vdso.h> 14 #include <asm/e820.h> 15 #include <asm/setup.h> 16 #include <asm/acpi.h> 17 #include <asm/xen/hypervisor.h> 18 #include <asm/xen/hypercall.h> 19 20 #include <xen/page.h> 21 #include <xen/interface/callback.h> 22 #include <xen/interface/physdev.h> 23 #include <xen/interface/memory.h> 24 #include <xen/features.h> 25 26 #include "xen-ops.h" 27 #include "vdso.h" 28 29 /* These are code, but not functions. Defined in entry.S */ 30 extern const char xen_hypervisor_callback[]; 31 extern const char xen_failsafe_callback[]; 32 extern void xen_sysenter_target(void); 33 extern void xen_syscall_target(void); 34 extern void xen_syscall32_target(void); 35 36 static unsigned long __init xen_release_chunk(phys_addr_t start_addr, 37 phys_addr_t end_addr) 38 { 39 struct xen_memory_reservation reservation = { 40 .address_bits = 0, 41 .extent_order = 0, 42 .domid = DOMID_SELF 43 }; 44 unsigned long start, end; 45 unsigned long len = 0; 46 unsigned long pfn; 47 int ret; 48 49 start = PFN_UP(start_addr); 50 end = PFN_DOWN(end_addr); 51 52 if (end <= start) 53 return 0; 54 55 printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ", 56 start, end); 57 for(pfn = start; pfn < end; pfn++) { 58 unsigned long mfn = pfn_to_mfn(pfn); 59 60 /* Make sure pfn exists to start with */ 61 if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) 62 continue; 63 64 set_xen_guest_handle(reservation.extent_start, &mfn); 65 reservation.nr_extents = 1; 66 67 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 68 &reservation); 69 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", 70 start, end, ret); 71 if (ret == 1) { 72 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 73 len++; 74 } 75 } 76 printk(KERN_CONT "%ld pages freed\n", len); 77 78 return len; 79 } 80 81 static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, 82 const struct e820map *e820) 83 { 84 phys_addr_t max_addr = PFN_PHYS(max_pfn); 85 phys_addr_t last_end = 0; 86 unsigned long released = 0; 87 int i; 88 89 for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { 90 phys_addr_t end = e820->map[i].addr; 91 end = min(max_addr, end); 92 93 released += xen_release_chunk(last_end, end); 94 last_end = e820->map[i].addr + e820->map[i].size; 95 } 96 97 if (last_end < max_addr) 98 released += xen_release_chunk(last_end, max_addr); 99 100 printk(KERN_INFO "released %ld pages of unused memory\n", released); 101 return released; 102 } 103 104 /** 105 * machine_specific_memory_setup - Hook for machine specific memory setup. 106 **/ 107 108 char * __init xen_memory_setup(void) 109 { 110 unsigned long max_pfn = xen_start_info->nr_pages; 111 112 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); 113 114 e820.nr_map = 0; 115 116 e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); 117 118 /* 119 * Even though this is normal, usable memory under Xen, reserve 120 * ISA memory anyway because too many things think they can poke 121 * about in there. 122 */ 123 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, 124 E820_RESERVED); 125 126 /* 127 * Reserve Xen bits: 128 * - mfn_list 129 * - xen_start_info 130 * See comment above "struct start_info" in <xen/interface/xen.h> 131 */ 132 reserve_early(__pa(xen_start_info->mfn_list), 133 __pa(xen_start_info->pt_base), 134 "XEN START INFO"); 135 136 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 137 138 xen_return_unused_memory(xen_start_info->nr_pages, &e820); 139 140 return "Xen"; 141 } 142 143 static void xen_idle(void) 144 { 145 local_irq_disable(); 146 147 if (need_resched()) 148 local_irq_enable(); 149 else { 150 current_thread_info()->status &= ~TS_POLLING; 151 smp_mb__after_clear_bit(); 152 safe_halt(); 153 current_thread_info()->status |= TS_POLLING; 154 } 155 } 156 157 /* 158 * Set the bit indicating "nosegneg" library variants should be used. 159 * We only need to bother in pure 32-bit mode; compat 32-bit processes 160 * can have un-truncated segments, so wrapping around is allowed. 161 */ 162 static void __init fiddle_vdso(void) 163 { 164 #ifdef CONFIG_X86_32 165 u32 *mask; 166 mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK); 167 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 168 mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK); 169 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 170 #endif 171 } 172 173 static __cpuinit int register_callback(unsigned type, const void *func) 174 { 175 struct callback_register callback = { 176 .type = type, 177 .address = XEN_CALLBACK(__KERNEL_CS, func), 178 .flags = CALLBACKF_mask_events, 179 }; 180 181 return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); 182 } 183 184 void __cpuinit xen_enable_sysenter(void) 185 { 186 int ret; 187 unsigned sysenter_feature; 188 189 #ifdef CONFIG_X86_32 190 sysenter_feature = X86_FEATURE_SEP; 191 #else 192 sysenter_feature = X86_FEATURE_SYSENTER32; 193 #endif 194 195 if (!boot_cpu_has(sysenter_feature)) 196 return; 197 198 ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); 199 if(ret != 0) 200 setup_clear_cpu_cap(sysenter_feature); 201 } 202 203 void __cpuinit xen_enable_syscall(void) 204 { 205 #ifdef CONFIG_X86_64 206 int ret; 207 208 ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); 209 if (ret != 0) { 210 printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); 211 /* Pretty fatal; 64-bit userspace has no other 212 mechanism for syscalls. */ 213 } 214 215 if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { 216 ret = register_callback(CALLBACKTYPE_syscall32, 217 xen_syscall32_target); 218 if (ret != 0) 219 setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); 220 } 221 #endif /* CONFIG_X86_64 */ 222 } 223 224 void __init xen_arch_setup(void) 225 { 226 struct physdev_set_iopl set_iopl; 227 int rc; 228 229 xen_panic_handler_init(); 230 231 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 232 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 233 234 if (!xen_feature(XENFEAT_auto_translated_physmap)) 235 HYPERVISOR_vm_assist(VMASST_CMD_enable, 236 VMASST_TYPE_pae_extended_cr3); 237 238 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || 239 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 240 BUG(); 241 242 xen_enable_sysenter(); 243 xen_enable_syscall(); 244 245 set_iopl.iopl = 1; 246 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 247 if (rc != 0) 248 printk(KERN_INFO "physdev_op failed %d\n", rc); 249 250 #ifdef CONFIG_ACPI 251 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 252 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 253 disable_acpi(); 254 } 255 #endif 256 257 memcpy(boot_command_line, xen_start_info->cmd_line, 258 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? 259 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 260 261 pm_idle = xen_idle; 262 263 paravirt_disable_iospace(); 264 265 fiddle_vdso(); 266 } 267