1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/console.h> 4 #include <linux/cpu.h> 5 #include <linux/kexec.h> 6 #include <linux/memblock.h> 7 #include <linux/slab.h> 8 #include <linux/panic_notifier.h> 9 10 #include <xen/xen.h> 11 #include <xen/features.h> 12 #include <xen/interface/sched.h> 13 #include <xen/interface/version.h> 14 #include <xen/page.h> 15 16 #include <asm/xen/hypercall.h> 17 #include <asm/xen/hypervisor.h> 18 #include <asm/cpu.h> 19 #include <asm/e820/api.h> 20 #include <asm/setup.h> 21 22 #include "xen-ops.h" 23 #include "smp.h" 24 #include "pmu.h" 25 26 EXPORT_SYMBOL_GPL(hypercall_page); 27 28 /* 29 * Pointer to the xen_vcpu_info structure or 30 * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info 31 * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info 32 * but during boot it is switched to point to xen_vcpu_info. 33 * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events. 34 * Make sure that xen_vcpu_info doesn't cross a page boundary by making it 35 * cache-line aligned (the struct is guaranteed to have a size of 64 bytes, 36 * which matches the cache line size of 64-bit x86 processors). 37 */ 38 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); 39 DEFINE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); 40 41 /* Linux <-> Xen vCPU id mapping */ 42 DEFINE_PER_CPU(uint32_t, xen_vcpu_id); 43 EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); 44 45 unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; 46 EXPORT_SYMBOL(machine_to_phys_mapping); 47 unsigned long machine_to_phys_nr; 48 EXPORT_SYMBOL(machine_to_phys_nr); 49 50 struct start_info *xen_start_info; 51 EXPORT_SYMBOL_GPL(xen_start_info); 52 53 struct shared_info xen_dummy_shared_info; 54 55 __read_mostly bool xen_have_vector_callback = true; 56 EXPORT_SYMBOL_GPL(xen_have_vector_callback); 57 58 /* 59 * NB: These need to live in .data or alike because they're used by 60 * xen_prepare_pvh() which runs before clearing the bss. 61 */ 62 enum xen_domain_type __ro_after_init xen_domain_type = XEN_NATIVE; 63 EXPORT_SYMBOL_GPL(xen_domain_type); 64 uint32_t __ro_after_init xen_start_flags; 65 EXPORT_SYMBOL(xen_start_flags); 66 67 /* 68 * Point at some empty memory to start with. We map the real shared_info 69 * page as soon as fixmap is up and running. 70 */ 71 struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; 72 73 static int xen_cpu_up_online(unsigned int cpu) 74 { 75 xen_init_lock_cpu(cpu); 76 return 0; 77 } 78 79 int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), 80 int (*cpu_dead_cb)(unsigned int)) 81 { 82 int rc; 83 84 rc = cpuhp_setup_state_nocalls(CPUHP_XEN_PREPARE, 85 "x86/xen/guest:prepare", 86 cpu_up_prepare_cb, cpu_dead_cb); 87 if (rc >= 0) { 88 rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, 89 "x86/xen/guest:online", 90 xen_cpu_up_online, NULL); 91 if (rc < 0) 92 cpuhp_remove_state_nocalls(CPUHP_XEN_PREPARE); 93 } 94 95 return rc >= 0 ? 0 : rc; 96 } 97 98 static void xen_vcpu_setup_restore(int cpu) 99 { 100 /* Any per_cpu(xen_vcpu) is stale, so reset it */ 101 xen_vcpu_info_reset(cpu); 102 103 /* 104 * For PVH and PVHVM, setup online VCPUs only. The rest will 105 * be handled by hotplug. 106 */ 107 if (xen_pv_domain() || 108 (xen_hvm_domain() && cpu_online(cpu))) 109 xen_vcpu_setup(cpu); 110 } 111 112 /* 113 * On restore, set the vcpu placement up again. 114 * If it fails, then we're in a bad state, since 115 * we can't back out from using it... 116 */ 117 void xen_vcpu_restore(void) 118 { 119 int cpu; 120 121 for_each_possible_cpu(cpu) { 122 bool other_cpu = (cpu != smp_processor_id()); 123 bool is_up; 124 125 if (xen_vcpu_nr(cpu) == XEN_VCPU_ID_INVALID) 126 continue; 127 128 /* Only Xen 4.5 and higher support this. */ 129 is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, 130 xen_vcpu_nr(cpu), NULL) > 0; 131 132 if (other_cpu && is_up && 133 HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) 134 BUG(); 135 136 if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) 137 xen_setup_runstate_info(cpu); 138 139 xen_vcpu_setup_restore(cpu); 140 141 if (other_cpu && is_up && 142 HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) 143 BUG(); 144 } 145 } 146 147 void xen_vcpu_info_reset(int cpu) 148 { 149 if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) { 150 per_cpu(xen_vcpu, cpu) = 151 &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; 152 } else { 153 /* Set to NULL so that if somebody accesses it we get an OOPS */ 154 per_cpu(xen_vcpu, cpu) = NULL; 155 } 156 } 157 158 void xen_vcpu_setup(int cpu) 159 { 160 struct vcpu_register_vcpu_info info; 161 int err; 162 struct vcpu_info *vcpup; 163 164 BUILD_BUG_ON(sizeof(*vcpup) > SMP_CACHE_BYTES); 165 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); 166 167 /* 168 * This path is called on PVHVM at bootup (xen_hvm_smp_prepare_boot_cpu) 169 * and at restore (xen_vcpu_restore). Also called for hotplugged 170 * VCPUs (cpu_init -> xen_hvm_cpu_prepare_hvm). 171 * However, the hypercall can only be done once (see below) so if a VCPU 172 * is offlined and comes back online then let's not redo the hypercall. 173 * 174 * For PV it is called during restore (xen_vcpu_restore) and bootup 175 * (xen_setup_vcpu_info_placement). The hotplug mechanism does not 176 * use this function. 177 */ 178 if (xen_hvm_domain()) { 179 if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) 180 return; 181 } 182 183 vcpup = &per_cpu(xen_vcpu_info, cpu); 184 info.mfn = arbitrary_virt_to_mfn(vcpup); 185 info.offset = offset_in_page(vcpup); 186 187 /* 188 * N.B. This hypercall can _only_ be called once per CPU. 189 * Subsequent calls will error out with -EINVAL. This is due to 190 * the fact that hypervisor has no unregister variant and this 191 * hypercall does not allow to over-write info.mfn and 192 * info.offset. 193 */ 194 err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), 195 &info); 196 if (err) 197 panic("register_vcpu_info failed: cpu=%d err=%d\n", cpu, err); 198 199 per_cpu(xen_vcpu, cpu) = vcpup; 200 } 201 202 void __init xen_banner(void) 203 { 204 unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); 205 struct xen_extraversion extra; 206 207 HYPERVISOR_xen_version(XENVER_extraversion, &extra); 208 209 pr_info("Booting kernel on %s\n", pv_info.name); 210 pr_info("Xen version: %u.%u%s%s\n", 211 version >> 16, version & 0xffff, extra.extraversion, 212 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) 213 ? " (preserve-AD)" : ""); 214 } 215 216 /* Check if running on Xen version (major, minor) or later */ 217 bool xen_running_on_version_or_later(unsigned int major, unsigned int minor) 218 { 219 unsigned int version; 220 221 if (!xen_domain()) 222 return false; 223 224 version = HYPERVISOR_xen_version(XENVER_version, NULL); 225 if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) || 226 ((version >> 16) > major)) 227 return true; 228 return false; 229 } 230 231 void __init xen_add_preferred_consoles(void) 232 { 233 add_preferred_console("xenboot", 0, NULL); 234 if (!boot_params.screen_info.orig_video_isVGA) 235 add_preferred_console("tty", 0, NULL); 236 add_preferred_console("hvc", 0, NULL); 237 if (boot_params.screen_info.orig_video_isVGA) 238 add_preferred_console("tty", 0, NULL); 239 } 240 241 void xen_reboot(int reason) 242 { 243 struct sched_shutdown r = { .reason = reason }; 244 int cpu; 245 246 for_each_online_cpu(cpu) 247 xen_pmu_finish(cpu); 248 249 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) 250 BUG(); 251 } 252 253 static int reboot_reason = SHUTDOWN_reboot; 254 static bool xen_legacy_crash; 255 void xen_emergency_restart(void) 256 { 257 xen_reboot(reboot_reason); 258 } 259 260 static int 261 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) 262 { 263 if (!kexec_crash_loaded()) { 264 if (xen_legacy_crash) 265 xen_reboot(SHUTDOWN_crash); 266 267 reboot_reason = SHUTDOWN_crash; 268 269 /* 270 * If panic_timeout==0 then we are supposed to wait forever. 271 * However, to preserve original dom0 behavior we have to drop 272 * into hypervisor. (domU behavior is controlled by its 273 * config file) 274 */ 275 if (panic_timeout == 0) 276 panic_timeout = -1; 277 } 278 return NOTIFY_DONE; 279 } 280 281 static int __init parse_xen_legacy_crash(char *arg) 282 { 283 xen_legacy_crash = true; 284 return 0; 285 } 286 early_param("xen_legacy_crash", parse_xen_legacy_crash); 287 288 static struct notifier_block xen_panic_block = { 289 .notifier_call = xen_panic_event, 290 .priority = INT_MIN 291 }; 292 293 int xen_panic_handler_init(void) 294 { 295 atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); 296 return 0; 297 } 298 299 void xen_pin_vcpu(int cpu) 300 { 301 static bool disable_pinning; 302 struct sched_pin_override pin_override; 303 int ret; 304 305 if (disable_pinning) 306 return; 307 308 pin_override.pcpu = cpu; 309 ret = HYPERVISOR_sched_op(SCHEDOP_pin_override, &pin_override); 310 311 /* Ignore errors when removing override. */ 312 if (cpu < 0) 313 return; 314 315 switch (ret) { 316 case -ENOSYS: 317 pr_warn("Unable to pin on physical cpu %d. In case of problems consider vcpu pinning.\n", 318 cpu); 319 disable_pinning = true; 320 break; 321 case -EPERM: 322 WARN(1, "Trying to pin vcpu without having privilege to do so\n"); 323 disable_pinning = true; 324 break; 325 case -EINVAL: 326 case -EBUSY: 327 pr_warn("Physical cpu %d not available for pinning. Check Xen cpu configuration.\n", 328 cpu); 329 break; 330 case 0: 331 break; 332 default: 333 WARN(1, "rc %d while trying to pin vcpu\n", ret); 334 disable_pinning = true; 335 } 336 } 337 338 #ifdef CONFIG_HOTPLUG_CPU 339 void xen_arch_register_cpu(int num) 340 { 341 arch_register_cpu(num); 342 } 343 EXPORT_SYMBOL(xen_arch_register_cpu); 344 345 void xen_arch_unregister_cpu(int num) 346 { 347 arch_unregister_cpu(num); 348 } 349 EXPORT_SYMBOL(xen_arch_unregister_cpu); 350 #endif 351 352 /* Amount of extra memory space we add to the e820 ranges */ 353 struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; 354 355 void __init xen_add_extra_mem(unsigned long start_pfn, unsigned long n_pfns) 356 { 357 unsigned int i; 358 359 /* 360 * No need to check for zero size, should happen rarely and will only 361 * write a new entry regarded to be unused due to zero size. 362 */ 363 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 364 /* Add new region. */ 365 if (xen_extra_mem[i].n_pfns == 0) { 366 xen_extra_mem[i].start_pfn = start_pfn; 367 xen_extra_mem[i].n_pfns = n_pfns; 368 break; 369 } 370 /* Append to existing region. */ 371 if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns == 372 start_pfn) { 373 xen_extra_mem[i].n_pfns += n_pfns; 374 break; 375 } 376 } 377 if (i == XEN_EXTRA_MEM_MAX_REGIONS) 378 printk(KERN_WARNING "Warning: not enough extra memory regions\n"); 379 380 memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); 381 } 382