1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Local APIC handling, local APIC timers 4 * 5 * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> 6 * 7 * Fixes 8 * Maciej W. Rozycki : Bits for genuine 82489DX APICs; 9 * thanks to Eric Gilmore 10 * and Rolf G. Tews 11 * for testing these extensively. 12 * Maciej W. Rozycki : Various updates and fixes. 13 * Mikael Pettersson : Power Management for UP-APIC. 14 * Pavel Machek and 15 * Mikael Pettersson : PM converted to driver model. 16 */ 17 18 #include <linux/perf_event.h> 19 #include <linux/kernel_stat.h> 20 #include <linux/mc146818rtc.h> 21 #include <linux/acpi_pmtmr.h> 22 #include <linux/clockchips.h> 23 #include <linux/interrupt.h> 24 #include <linux/memblock.h> 25 #include <linux/ftrace.h> 26 #include <linux/ioport.h> 27 #include <linux/export.h> 28 #include <linux/syscore_ops.h> 29 #include <linux/delay.h> 30 #include <linux/timex.h> 31 #include <linux/i8253.h> 32 #include <linux/dmar.h> 33 #include <linux/init.h> 34 #include <linux/cpu.h> 35 #include <linux/dmi.h> 36 #include <linux/smp.h> 37 #include <linux/mm.h> 38 39 #include <asm/trace/irq_vectors.h> 40 #include <asm/irq_remapping.h> 41 #include <asm/pc-conf-reg.h> 42 #include <asm/perf_event.h> 43 #include <asm/x86_init.h> 44 #include <linux/atomic.h> 45 #include <asm/barrier.h> 46 #include <asm/mpspec.h> 47 #include <asm/i8259.h> 48 #include <asm/proto.h> 49 #include <asm/traps.h> 50 #include <asm/apic.h> 51 #include <asm/acpi.h> 52 #include <asm/io_apic.h> 53 #include <asm/desc.h> 54 #include <asm/hpet.h> 55 #include <asm/mtrr.h> 56 #include <asm/time.h> 57 #include <asm/smp.h> 58 #include <asm/mce.h> 59 #include <asm/tsc.h> 60 #include <asm/hypervisor.h> 61 #include <asm/cpu_device_id.h> 62 #include <asm/intel-family.h> 63 #include <asm/irq_regs.h> 64 #include <asm/cpu.h> 65 66 unsigned int num_processors; 67 68 unsigned disabled_cpus; 69 70 /* Processor that is doing the boot up */ 71 unsigned int boot_cpu_physical_apicid __ro_after_init = -1U; 72 EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); 73 74 u8 boot_cpu_apic_version __ro_after_init; 75 76 /* 77 * The highest APIC ID seen during enumeration. 78 */ 79 static unsigned int max_physical_apicid; 80 81 /* 82 * Bitmask of physically existing CPUs: 83 */ 84 physid_mask_t phys_cpu_present_map; 85 86 /* 87 * Processor to be disabled specified by kernel parameter 88 * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to 89 * avoid undefined behaviour caused by sending INIT from AP to BSP. 90 */ 91 static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID; 92 93 /* 94 * This variable controls which CPUs receive external NMIs. By default, 95 * external NMIs are delivered only to the BSP. 96 */ 97 static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP; 98 99 /* 100 * Hypervisor supports 15 bits of APIC ID in MSI Extended Destination ID 101 */ 102 static bool virt_ext_dest_id __ro_after_init; 103 104 /* 105 * Map cpu index to physical APIC ID 106 */ 107 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); 108 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); 109 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); 110 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 111 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 112 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); 113 114 #ifdef CONFIG_X86_32 115 116 /* 117 * On x86_32, the mapping between cpu and logical apicid may vary 118 * depending on apic in use. The following early percpu variable is 119 * used for the mapping. This is where the behaviors of x86_64 and 32 120 * actually diverge. Let's keep it ugly for now. 121 */ 122 DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); 123 124 /* Local APIC was disabled by the BIOS and enabled by the kernel */ 125 static int enabled_via_apicbase __ro_after_init; 126 127 /* 128 * Handle interrupt mode configuration register (IMCR). 129 * This register controls whether the interrupt signals 130 * that reach the BSP come from the master PIC or from the 131 * local APIC. Before entering Symmetric I/O Mode, either 132 * the BIOS or the operating system must switch out of 133 * PIC Mode by changing the IMCR. 134 */ 135 static inline void imcr_pic_to_apic(void) 136 { 137 /* NMI and 8259 INTR go through APIC */ 138 pc_conf_set(PC_CONF_MPS_IMCR, 0x01); 139 } 140 141 static inline void imcr_apic_to_pic(void) 142 { 143 /* NMI and 8259 INTR go directly to BSP */ 144 pc_conf_set(PC_CONF_MPS_IMCR, 0x00); 145 } 146 #endif 147 148 /* 149 * Knob to control our willingness to enable the local APIC. 150 * 151 * +1=force-enable 152 */ 153 static int force_enable_local_apic __initdata; 154 155 /* 156 * APIC command line parameters 157 */ 158 static int __init parse_lapic(char *arg) 159 { 160 if (IS_ENABLED(CONFIG_X86_32) && !arg) 161 force_enable_local_apic = 1; 162 else if (arg && !strncmp(arg, "notscdeadline", 13)) 163 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 164 return 0; 165 } 166 early_param("lapic", parse_lapic); 167 168 #ifdef CONFIG_X86_64 169 static int apic_calibrate_pmtmr __initdata; 170 static __init int setup_apicpmtimer(char *s) 171 { 172 apic_calibrate_pmtmr = 1; 173 notsc_setup(NULL); 174 return 1; 175 } 176 __setup("apicpmtimer", setup_apicpmtimer); 177 #endif 178 179 unsigned long mp_lapic_addr __ro_after_init; 180 int disable_apic __ro_after_init; 181 /* Disable local APIC timer from the kernel commandline or via dmi quirk */ 182 static int disable_apic_timer __initdata; 183 /* Local APIC timer works in C2 */ 184 int local_apic_timer_c2_ok __ro_after_init; 185 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 186 187 /* 188 * Debug level, exported for io_apic.c 189 */ 190 int apic_verbosity __ro_after_init; 191 192 int pic_mode __ro_after_init; 193 194 /* Have we found an MP table */ 195 int smp_found_config __ro_after_init; 196 197 static struct resource lapic_resource = { 198 .name = "Local APIC", 199 .flags = IORESOURCE_MEM | IORESOURCE_BUSY, 200 }; 201 202 unsigned int lapic_timer_period = 0; 203 204 static void apic_pm_activate(void); 205 206 static unsigned long apic_phys __ro_after_init; 207 208 /* 209 * Get the LAPIC version 210 */ 211 static inline int lapic_get_version(void) 212 { 213 return GET_APIC_VERSION(apic_read(APIC_LVR)); 214 } 215 216 /* 217 * Check, if the APIC is integrated or a separate chip 218 */ 219 static inline int lapic_is_integrated(void) 220 { 221 return APIC_INTEGRATED(lapic_get_version()); 222 } 223 224 /* 225 * Check, whether this is a modern or a first generation APIC 226 */ 227 static int modern_apic(void) 228 { 229 /* AMD systems use old APIC versions, so check the CPU */ 230 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && 231 boot_cpu_data.x86 >= 0xf) 232 return 1; 233 234 /* Hygon systems use modern APIC */ 235 if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) 236 return 1; 237 238 return lapic_get_version() >= 0x14; 239 } 240 241 /* 242 * right after this call apic become NOOP driven 243 * so apic->write/read doesn't do anything 244 */ 245 static void __init apic_disable(void) 246 { 247 pr_info("APIC: switched to apic NOOP\n"); 248 apic = &apic_noop; 249 } 250 251 void native_apic_wait_icr_idle(void) 252 { 253 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 254 cpu_relax(); 255 } 256 257 u32 native_safe_apic_wait_icr_idle(void) 258 { 259 u32 send_status; 260 int timeout; 261 262 timeout = 0; 263 do { 264 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; 265 if (!send_status) 266 break; 267 inc_irq_stat(icr_read_retry_count); 268 udelay(100); 269 } while (timeout++ < 1000); 270 271 return send_status; 272 } 273 274 void native_apic_icr_write(u32 low, u32 id) 275 { 276 unsigned long flags; 277 278 local_irq_save(flags); 279 apic_write(APIC_ICR2, SET_XAPIC_DEST_FIELD(id)); 280 apic_write(APIC_ICR, low); 281 local_irq_restore(flags); 282 } 283 284 u64 native_apic_icr_read(void) 285 { 286 u32 icr1, icr2; 287 288 icr2 = apic_read(APIC_ICR2); 289 icr1 = apic_read(APIC_ICR); 290 291 return icr1 | ((u64)icr2 << 32); 292 } 293 294 #ifdef CONFIG_X86_32 295 /** 296 * get_physical_broadcast - Get number of physical broadcast IDs 297 */ 298 int get_physical_broadcast(void) 299 { 300 return modern_apic() ? 0xff : 0xf; 301 } 302 #endif 303 304 /** 305 * lapic_get_maxlvt - get the maximum number of local vector table entries 306 */ 307 int lapic_get_maxlvt(void) 308 { 309 /* 310 * - we always have APIC integrated on 64bit mode 311 * - 82489DXs do not report # of LVT entries 312 */ 313 return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2; 314 } 315 316 /* 317 * Local APIC timer 318 */ 319 320 /* Clock divisor */ 321 #define APIC_DIVISOR 16 322 #define TSC_DIVISOR 8 323 324 /* i82489DX specific */ 325 #define I82489DX_BASE_DIVIDER (((0x2) << 18)) 326 327 /* 328 * This function sets up the local APIC timer, with a timeout of 329 * 'clocks' APIC bus clock. During calibration we actually call 330 * this function twice on the boot CPU, once with a bogus timeout 331 * value, second time for real. The other (noncalibrating) CPUs 332 * call this function only once, with the real, calibrated value. 333 * 334 * We do reads before writes even if unnecessary, to get around the 335 * P5 APIC double write bug. 336 */ 337 static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 338 { 339 unsigned int lvtt_value, tmp_value; 340 341 lvtt_value = LOCAL_TIMER_VECTOR; 342 if (!oneshot) 343 lvtt_value |= APIC_LVT_TIMER_PERIODIC; 344 else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 345 lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE; 346 347 /* 348 * The i82489DX APIC uses bit 18 and 19 for the base divider. This 349 * overlaps with bit 18 on integrated APICs, but is not documented 350 * in the SDM. No problem though. i82489DX equipped systems do not 351 * have TSC deadline timer. 352 */ 353 if (!lapic_is_integrated()) 354 lvtt_value |= I82489DX_BASE_DIVIDER; 355 356 if (!irqen) 357 lvtt_value |= APIC_LVT_MASKED; 358 359 apic_write(APIC_LVTT, lvtt_value); 360 361 if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { 362 /* 363 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, 364 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. 365 * According to Intel, MFENCE can do the serialization here. 366 */ 367 asm volatile("mfence" : : : "memory"); 368 return; 369 } 370 371 /* 372 * Divide PICLK by 16 373 */ 374 tmp_value = apic_read(APIC_TDCR); 375 apic_write(APIC_TDCR, 376 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 377 APIC_TDR_DIV_16); 378 379 if (!oneshot) 380 apic_write(APIC_TMICT, clocks / APIC_DIVISOR); 381 } 382 383 /* 384 * Setup extended LVT, AMD specific 385 * 386 * Software should use the LVT offsets the BIOS provides. The offsets 387 * are determined by the subsystems using it like those for MCE 388 * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts 389 * are supported. Beginning with family 10h at least 4 offsets are 390 * available. 391 * 392 * Since the offsets must be consistent for all cores, we keep track 393 * of the LVT offsets in software and reserve the offset for the same 394 * vector also to be used on other cores. An offset is freed by 395 * setting the entry to APIC_EILVT_MASKED. 396 * 397 * If the BIOS is right, there should be no conflicts. Otherwise a 398 * "[Firmware Bug]: ..." error message is generated. However, if 399 * software does not properly determines the offsets, it is not 400 * necessarily a BIOS bug. 401 */ 402 403 static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX]; 404 405 static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new) 406 { 407 return (old & APIC_EILVT_MASKED) 408 || (new == APIC_EILVT_MASKED) 409 || ((new & ~APIC_EILVT_MASKED) == old); 410 } 411 412 static unsigned int reserve_eilvt_offset(int offset, unsigned int new) 413 { 414 unsigned int rsvd, vector; 415 416 if (offset >= APIC_EILVT_NR_MAX) 417 return ~0; 418 419 rsvd = atomic_read(&eilvt_offsets[offset]); 420 do { 421 vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */ 422 if (vector && !eilvt_entry_is_changeable(vector, new)) 423 /* may not change if vectors are different */ 424 return rsvd; 425 } while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new)); 426 427 rsvd = new & ~APIC_EILVT_MASKED; 428 if (rsvd && rsvd != vector) 429 pr_info("LVT offset %d assigned for vector 0x%02x\n", 430 offset, rsvd); 431 432 return new; 433 } 434 435 /* 436 * If mask=1, the LVT entry does not generate interrupts while mask=0 437 * enables the vector. See also the BKDGs. Must be called with 438 * preemption disabled. 439 */ 440 441 int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) 442 { 443 unsigned long reg = APIC_EILVTn(offset); 444 unsigned int new, old, reserved; 445 446 new = (mask << 16) | (msg_type << 8) | vector; 447 old = apic_read(reg); 448 reserved = reserve_eilvt_offset(offset, new); 449 450 if (reserved != new) { 451 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " 452 "vector 0x%x, but the register is already in use for " 453 "vector 0x%x on another cpu\n", 454 smp_processor_id(), reg, offset, new, reserved); 455 return -EINVAL; 456 } 457 458 if (!eilvt_entry_is_changeable(old, new)) { 459 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " 460 "vector 0x%x, but the register is already in use for " 461 "vector 0x%x on this cpu\n", 462 smp_processor_id(), reg, offset, new, old); 463 return -EBUSY; 464 } 465 466 apic_write(reg, new); 467 468 return 0; 469 } 470 EXPORT_SYMBOL_GPL(setup_APIC_eilvt); 471 472 /* 473 * Program the next event, relative to now 474 */ 475 static int lapic_next_event(unsigned long delta, 476 struct clock_event_device *evt) 477 { 478 apic_write(APIC_TMICT, delta); 479 return 0; 480 } 481 482 static int lapic_next_deadline(unsigned long delta, 483 struct clock_event_device *evt) 484 { 485 u64 tsc; 486 487 /* This MSR is special and need a special fence: */ 488 weak_wrmsr_fence(); 489 490 tsc = rdtsc(); 491 wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); 492 return 0; 493 } 494 495 static int lapic_timer_shutdown(struct clock_event_device *evt) 496 { 497 unsigned int v; 498 499 /* Lapic used as dummy for broadcast ? */ 500 if (evt->features & CLOCK_EVT_FEAT_DUMMY) 501 return 0; 502 503 v = apic_read(APIC_LVTT); 504 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 505 apic_write(APIC_LVTT, v); 506 apic_write(APIC_TMICT, 0); 507 return 0; 508 } 509 510 static inline int 511 lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot) 512 { 513 /* Lapic used as dummy for broadcast ? */ 514 if (evt->features & CLOCK_EVT_FEAT_DUMMY) 515 return 0; 516 517 __setup_APIC_LVTT(lapic_timer_period, oneshot, 1); 518 return 0; 519 } 520 521 static int lapic_timer_set_periodic(struct clock_event_device *evt) 522 { 523 return lapic_timer_set_periodic_oneshot(evt, false); 524 } 525 526 static int lapic_timer_set_oneshot(struct clock_event_device *evt) 527 { 528 return lapic_timer_set_periodic_oneshot(evt, true); 529 } 530 531 /* 532 * Local APIC timer broadcast function 533 */ 534 static void lapic_timer_broadcast(const struct cpumask *mask) 535 { 536 #ifdef CONFIG_SMP 537 apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 538 #endif 539 } 540 541 542 /* 543 * The local apic timer can be used for any function which is CPU local. 544 */ 545 static struct clock_event_device lapic_clockevent = { 546 .name = "lapic", 547 .features = CLOCK_EVT_FEAT_PERIODIC | 548 CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP 549 | CLOCK_EVT_FEAT_DUMMY, 550 .shift = 32, 551 .set_state_shutdown = lapic_timer_shutdown, 552 .set_state_periodic = lapic_timer_set_periodic, 553 .set_state_oneshot = lapic_timer_set_oneshot, 554 .set_state_oneshot_stopped = lapic_timer_shutdown, 555 .set_next_event = lapic_next_event, 556 .broadcast = lapic_timer_broadcast, 557 .rating = 100, 558 .irq = -1, 559 }; 560 static DEFINE_PER_CPU(struct clock_event_device, lapic_events); 561 562 static const struct x86_cpu_id deadline_match[] __initconst = { 563 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */ 564 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */ 565 566 X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020), 567 568 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011), 569 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e), 570 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c), 571 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003), 572 573 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136), 574 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014), 575 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0), 576 577 X86_MATCH_INTEL_FAM6_MODEL( HASWELL, 0x22), 578 X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L, 0x20), 579 X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G, 0x17), 580 581 X86_MATCH_INTEL_FAM6_MODEL( BROADWELL, 0x25), 582 X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G, 0x17), 583 584 X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L, 0xb2), 585 X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE, 0xb2), 586 587 X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L, 0x52), 588 X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE, 0x52), 589 590 {}, 591 }; 592 593 static __init bool apic_validate_deadline_timer(void) 594 { 595 const struct x86_cpu_id *m; 596 u32 rev; 597 598 if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 599 return false; 600 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 601 return true; 602 603 m = x86_match_cpu(deadline_match); 604 if (!m) 605 return true; 606 607 rev = (u32)m->driver_data; 608 609 if (boot_cpu_data.microcode >= rev) 610 return true; 611 612 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 613 pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; " 614 "please update microcode to version: 0x%x (or later)\n", rev); 615 return false; 616 } 617 618 /* 619 * Setup the local APIC timer for this CPU. Copy the initialized values 620 * of the boot CPU and register the clock event in the framework. 621 */ 622 static void setup_APIC_timer(void) 623 { 624 struct clock_event_device *levt = this_cpu_ptr(&lapic_events); 625 626 if (this_cpu_has(X86_FEATURE_ARAT)) { 627 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; 628 /* Make LAPIC timer preferable over percpu HPET */ 629 lapic_clockevent.rating = 150; 630 } 631 632 memcpy(levt, &lapic_clockevent, sizeof(*levt)); 633 levt->cpumask = cpumask_of(smp_processor_id()); 634 635 if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { 636 levt->name = "lapic-deadline"; 637 levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC | 638 CLOCK_EVT_FEAT_DUMMY); 639 levt->set_next_event = lapic_next_deadline; 640 clockevents_config_and_register(levt, 641 tsc_khz * (1000 / TSC_DIVISOR), 642 0xF, ~0UL); 643 } else 644 clockevents_register_device(levt); 645 } 646 647 /* 648 * Install the updated TSC frequency from recalibration at the TSC 649 * deadline clockevent devices. 650 */ 651 static void __lapic_update_tsc_freq(void *info) 652 { 653 struct clock_event_device *levt = this_cpu_ptr(&lapic_events); 654 655 if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 656 return; 657 658 clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR)); 659 } 660 661 void lapic_update_tsc_freq(void) 662 { 663 /* 664 * The clockevent device's ->mult and ->shift can both be 665 * changed. In order to avoid races, schedule the frequency 666 * update code on each CPU. 667 */ 668 on_each_cpu(__lapic_update_tsc_freq, NULL, 0); 669 } 670 671 /* 672 * In this functions we calibrate APIC bus clocks to the external timer. 673 * 674 * We want to do the calibration only once since we want to have local timer 675 * irqs synchronous. CPUs connected by the same APIC bus have the very same bus 676 * frequency. 677 * 678 * This was previously done by reading the PIT/HPET and waiting for a wrap 679 * around to find out, that a tick has elapsed. I have a box, where the PIT 680 * readout is broken, so it never gets out of the wait loop again. This was 681 * also reported by others. 682 * 683 * Monitoring the jiffies value is inaccurate and the clockevents 684 * infrastructure allows us to do a simple substitution of the interrupt 685 * handler. 686 * 687 * The calibration routine also uses the pm_timer when possible, as the PIT 688 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes 689 * back to normal later in the boot process). 690 */ 691 692 #define LAPIC_CAL_LOOPS (HZ/10) 693 694 static __initdata int lapic_cal_loops = -1; 695 static __initdata long lapic_cal_t1, lapic_cal_t2; 696 static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; 697 static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; 698 static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; 699 700 /* 701 * Temporary interrupt handler and polled calibration function. 702 */ 703 static void __init lapic_cal_handler(struct clock_event_device *dev) 704 { 705 unsigned long long tsc = 0; 706 long tapic = apic_read(APIC_TMCCT); 707 unsigned long pm = acpi_pm_read_early(); 708 709 if (boot_cpu_has(X86_FEATURE_TSC)) 710 tsc = rdtsc(); 711 712 switch (lapic_cal_loops++) { 713 case 0: 714 lapic_cal_t1 = tapic; 715 lapic_cal_tsc1 = tsc; 716 lapic_cal_pm1 = pm; 717 lapic_cal_j1 = jiffies; 718 break; 719 720 case LAPIC_CAL_LOOPS: 721 lapic_cal_t2 = tapic; 722 lapic_cal_tsc2 = tsc; 723 if (pm < lapic_cal_pm1) 724 pm += ACPI_PM_OVRRUN; 725 lapic_cal_pm2 = pm; 726 lapic_cal_j2 = jiffies; 727 break; 728 } 729 } 730 731 static int __init 732 calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) 733 { 734 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; 735 const long pm_thresh = pm_100ms / 100; 736 unsigned long mult; 737 u64 res; 738 739 #ifndef CONFIG_X86_PM_TIMER 740 return -1; 741 #endif 742 743 apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); 744 745 /* Check, if the PM timer is available */ 746 if (!deltapm) 747 return -1; 748 749 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); 750 751 if (deltapm > (pm_100ms - pm_thresh) && 752 deltapm < (pm_100ms + pm_thresh)) { 753 apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); 754 return 0; 755 } 756 757 res = (((u64)deltapm) * mult) >> 22; 758 do_div(res, 1000000); 759 pr_warn("APIC calibration not consistent " 760 "with PM-Timer: %ldms instead of 100ms\n", (long)res); 761 762 /* Correct the lapic counter value */ 763 res = (((u64)(*delta)) * pm_100ms); 764 do_div(res, deltapm); 765 pr_info("APIC delta adjusted to PM-Timer: " 766 "%lu (%ld)\n", (unsigned long)res, *delta); 767 *delta = (long)res; 768 769 /* Correct the tsc counter value */ 770 if (boot_cpu_has(X86_FEATURE_TSC)) { 771 res = (((u64)(*deltatsc)) * pm_100ms); 772 do_div(res, deltapm); 773 apic_printk(APIC_VERBOSE, "TSC delta adjusted to " 774 "PM-Timer: %lu (%ld)\n", 775 (unsigned long)res, *deltatsc); 776 *deltatsc = (long)res; 777 } 778 779 return 0; 780 } 781 782 static int __init lapic_init_clockevent(void) 783 { 784 if (!lapic_timer_period) 785 return -1; 786 787 /* Calculate the scaled math multiplication factor */ 788 lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR, 789 TICK_NSEC, lapic_clockevent.shift); 790 lapic_clockevent.max_delta_ns = 791 clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); 792 lapic_clockevent.max_delta_ticks = 0x7FFFFFFF; 793 lapic_clockevent.min_delta_ns = 794 clockevent_delta2ns(0xF, &lapic_clockevent); 795 lapic_clockevent.min_delta_ticks = 0xF; 796 797 return 0; 798 } 799 800 bool __init apic_needs_pit(void) 801 { 802 /* 803 * If the frequencies are not known, PIT is required for both TSC 804 * and apic timer calibration. 805 */ 806 if (!tsc_khz || !cpu_khz) 807 return true; 808 809 /* Is there an APIC at all or is it disabled? */ 810 if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) 811 return true; 812 813 /* 814 * If interrupt delivery mode is legacy PIC or virtual wire without 815 * configuration, the local APIC timer wont be set up. Make sure 816 * that the PIT is initialized. 817 */ 818 if (apic_intr_mode == APIC_PIC || 819 apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) 820 return true; 821 822 /* Virt guests may lack ARAT, but still have DEADLINE */ 823 if (!boot_cpu_has(X86_FEATURE_ARAT)) 824 return true; 825 826 /* Deadline timer is based on TSC so no further PIT action required */ 827 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 828 return false; 829 830 /* APIC timer disabled? */ 831 if (disable_apic_timer) 832 return true; 833 /* 834 * The APIC timer frequency is known already, no PIT calibration 835 * required. If unknown, let the PIT be initialized. 836 */ 837 return lapic_timer_period == 0; 838 } 839 840 static int __init calibrate_APIC_clock(void) 841 { 842 struct clock_event_device *levt = this_cpu_ptr(&lapic_events); 843 u64 tsc_perj = 0, tsc_start = 0; 844 unsigned long jif_start; 845 unsigned long deltaj; 846 long delta, deltatsc; 847 int pm_referenced = 0; 848 849 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 850 return 0; 851 852 /* 853 * Check if lapic timer has already been calibrated by platform 854 * specific routine, such as tsc calibration code. If so just fill 855 * in the clockevent structure and return. 856 */ 857 if (!lapic_init_clockevent()) { 858 apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", 859 lapic_timer_period); 860 /* 861 * Direct calibration methods must have an always running 862 * local APIC timer, no need for broadcast timer. 863 */ 864 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; 865 return 0; 866 } 867 868 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" 869 "calibrating APIC timer ...\n"); 870 871 /* 872 * There are platforms w/o global clockevent devices. Instead of 873 * making the calibration conditional on that, use a polling based 874 * approach everywhere. 875 */ 876 local_irq_disable(); 877 878 /* 879 * Setup the APIC counter to maximum. There is no way the lapic 880 * can underflow in the 100ms detection time frame 881 */ 882 __setup_APIC_LVTT(0xffffffff, 0, 0); 883 884 /* 885 * Methods to terminate the calibration loop: 886 * 1) Global clockevent if available (jiffies) 887 * 2) TSC if available and frequency is known 888 */ 889 jif_start = READ_ONCE(jiffies); 890 891 if (tsc_khz) { 892 tsc_start = rdtsc(); 893 tsc_perj = div_u64((u64)tsc_khz * 1000, HZ); 894 } 895 896 /* 897 * Enable interrupts so the tick can fire, if a global 898 * clockevent device is available 899 */ 900 local_irq_enable(); 901 902 while (lapic_cal_loops <= LAPIC_CAL_LOOPS) { 903 /* Wait for a tick to elapse */ 904 while (1) { 905 if (tsc_khz) { 906 u64 tsc_now = rdtsc(); 907 if ((tsc_now - tsc_start) >= tsc_perj) { 908 tsc_start += tsc_perj; 909 break; 910 } 911 } else { 912 unsigned long jif_now = READ_ONCE(jiffies); 913 914 if (time_after(jif_now, jif_start)) { 915 jif_start = jif_now; 916 break; 917 } 918 } 919 cpu_relax(); 920 } 921 922 /* Invoke the calibration routine */ 923 local_irq_disable(); 924 lapic_cal_handler(NULL); 925 local_irq_enable(); 926 } 927 928 local_irq_disable(); 929 930 /* Build delta t1-t2 as apic timer counts down */ 931 delta = lapic_cal_t1 - lapic_cal_t2; 932 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); 933 934 deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); 935 936 /* we trust the PM based calibration if possible */ 937 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, 938 &delta, &deltatsc); 939 940 lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; 941 lapic_init_clockevent(); 942 943 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); 944 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); 945 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", 946 lapic_timer_period); 947 948 if (boot_cpu_has(X86_FEATURE_TSC)) { 949 apic_printk(APIC_VERBOSE, "..... CPU clock speed is " 950 "%ld.%04ld MHz.\n", 951 (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), 952 (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); 953 } 954 955 apic_printk(APIC_VERBOSE, "..... host bus clock speed is " 956 "%u.%04u MHz.\n", 957 lapic_timer_period / (1000000 / HZ), 958 lapic_timer_period % (1000000 / HZ)); 959 960 /* 961 * Do a sanity check on the APIC calibration result 962 */ 963 if (lapic_timer_period < (1000000 / HZ)) { 964 local_irq_enable(); 965 pr_warn("APIC frequency too slow, disabling apic timer\n"); 966 return -1; 967 } 968 969 levt->features &= ~CLOCK_EVT_FEAT_DUMMY; 970 971 /* 972 * PM timer calibration failed or not turned on so lets try APIC 973 * timer based calibration, if a global clockevent device is 974 * available. 975 */ 976 if (!pm_referenced && global_clock_event) { 977 apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); 978 979 /* 980 * Setup the apic timer manually 981 */ 982 levt->event_handler = lapic_cal_handler; 983 lapic_timer_set_periodic(levt); 984 lapic_cal_loops = -1; 985 986 /* Let the interrupts run */ 987 local_irq_enable(); 988 989 while (lapic_cal_loops <= LAPIC_CAL_LOOPS) 990 cpu_relax(); 991 992 /* Stop the lapic timer */ 993 local_irq_disable(); 994 lapic_timer_shutdown(levt); 995 996 /* Jiffies delta */ 997 deltaj = lapic_cal_j2 - lapic_cal_j1; 998 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); 999 1000 /* Check, if the jiffies result is consistent */ 1001 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) 1002 apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); 1003 else 1004 levt->features |= CLOCK_EVT_FEAT_DUMMY; 1005 } 1006 local_irq_enable(); 1007 1008 if (levt->features & CLOCK_EVT_FEAT_DUMMY) { 1009 pr_warn("APIC timer disabled due to verification failure\n"); 1010 return -1; 1011 } 1012 1013 return 0; 1014 } 1015 1016 /* 1017 * Setup the boot APIC 1018 * 1019 * Calibrate and verify the result. 1020 */ 1021 void __init setup_boot_APIC_clock(void) 1022 { 1023 /* 1024 * The local apic timer can be disabled via the kernel 1025 * commandline or from the CPU detection code. Register the lapic 1026 * timer as a dummy clock event source on SMP systems, so the 1027 * broadcast mechanism is used. On UP systems simply ignore it. 1028 */ 1029 if (disable_apic_timer) { 1030 pr_info("Disabling APIC timer\n"); 1031 /* No broadcast on UP ! */ 1032 if (num_possible_cpus() > 1) { 1033 lapic_clockevent.mult = 1; 1034 setup_APIC_timer(); 1035 } 1036 return; 1037 } 1038 1039 if (calibrate_APIC_clock()) { 1040 /* No broadcast on UP ! */ 1041 if (num_possible_cpus() > 1) 1042 setup_APIC_timer(); 1043 return; 1044 } 1045 1046 /* 1047 * If nmi_watchdog is set to IO_APIC, we need the 1048 * PIT/HPET going. Otherwise register lapic as a dummy 1049 * device. 1050 */ 1051 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; 1052 1053 /* Setup the lapic or request the broadcast */ 1054 setup_APIC_timer(); 1055 amd_e400_c1e_apic_setup(); 1056 } 1057 1058 void setup_secondary_APIC_clock(void) 1059 { 1060 setup_APIC_timer(); 1061 amd_e400_c1e_apic_setup(); 1062 } 1063 1064 /* 1065 * The guts of the apic timer interrupt 1066 */ 1067 static void local_apic_timer_interrupt(void) 1068 { 1069 struct clock_event_device *evt = this_cpu_ptr(&lapic_events); 1070 1071 /* 1072 * Normally we should not be here till LAPIC has been initialized but 1073 * in some cases like kdump, its possible that there is a pending LAPIC 1074 * timer interrupt from previous kernel's context and is delivered in 1075 * new kernel the moment interrupts are enabled. 1076 * 1077 * Interrupts are enabled early and LAPIC is setup much later, hence 1078 * its possible that when we get here evt->event_handler is NULL. 1079 * Check for event_handler being NULL and discard the interrupt as 1080 * spurious. 1081 */ 1082 if (!evt->event_handler) { 1083 pr_warn("Spurious LAPIC timer interrupt on cpu %d\n", 1084 smp_processor_id()); 1085 /* Switch it off */ 1086 lapic_timer_shutdown(evt); 1087 return; 1088 } 1089 1090 /* 1091 * the NMI deadlock-detector uses this. 1092 */ 1093 inc_irq_stat(apic_timer_irqs); 1094 1095 evt->event_handler(evt); 1096 } 1097 1098 /* 1099 * Local APIC timer interrupt. This is the most natural way for doing 1100 * local interrupts, but local timer interrupts can be emulated by 1101 * broadcast interrupts too. [in case the hw doesn't support APIC timers] 1102 * 1103 * [ if a single-CPU system runs an SMP kernel then we call the local 1104 * interrupt as well. Thus we cannot inline the local irq ... ] 1105 */ 1106 DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt) 1107 { 1108 struct pt_regs *old_regs = set_irq_regs(regs); 1109 1110 ack_APIC_irq(); 1111 trace_local_timer_entry(LOCAL_TIMER_VECTOR); 1112 local_apic_timer_interrupt(); 1113 trace_local_timer_exit(LOCAL_TIMER_VECTOR); 1114 1115 set_irq_regs(old_regs); 1116 } 1117 1118 /* 1119 * Local APIC start and shutdown 1120 */ 1121 1122 /** 1123 * clear_local_APIC - shutdown the local APIC 1124 * 1125 * This is called, when a CPU is disabled and before rebooting, so the state of 1126 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS 1127 * leftovers during boot. 1128 */ 1129 void clear_local_APIC(void) 1130 { 1131 int maxlvt; 1132 u32 v; 1133 1134 /* APIC hasn't been mapped yet */ 1135 if (!x2apic_mode && !apic_phys) 1136 return; 1137 1138 maxlvt = lapic_get_maxlvt(); 1139 /* 1140 * Masking an LVT entry can trigger a local APIC error 1141 * if the vector is zero. Mask LVTERR first to prevent this. 1142 */ 1143 if (maxlvt >= 3) { 1144 v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ 1145 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); 1146 } 1147 /* 1148 * Careful: we have to set masks only first to deassert 1149 * any level-triggered sources. 1150 */ 1151 v = apic_read(APIC_LVTT); 1152 apic_write(APIC_LVTT, v | APIC_LVT_MASKED); 1153 v = apic_read(APIC_LVT0); 1154 apic_write(APIC_LVT0, v | APIC_LVT_MASKED); 1155 v = apic_read(APIC_LVT1); 1156 apic_write(APIC_LVT1, v | APIC_LVT_MASKED); 1157 if (maxlvt >= 4) { 1158 v = apic_read(APIC_LVTPC); 1159 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); 1160 } 1161 1162 /* lets not touch this if we didn't frob it */ 1163 #ifdef CONFIG_X86_THERMAL_VECTOR 1164 if (maxlvt >= 5) { 1165 v = apic_read(APIC_LVTTHMR); 1166 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); 1167 } 1168 #endif 1169 #ifdef CONFIG_X86_MCE_INTEL 1170 if (maxlvt >= 6) { 1171 v = apic_read(APIC_LVTCMCI); 1172 if (!(v & APIC_LVT_MASKED)) 1173 apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED); 1174 } 1175 #endif 1176 1177 /* 1178 * Clean APIC state for other OSs: 1179 */ 1180 apic_write(APIC_LVTT, APIC_LVT_MASKED); 1181 apic_write(APIC_LVT0, APIC_LVT_MASKED); 1182 apic_write(APIC_LVT1, APIC_LVT_MASKED); 1183 if (maxlvt >= 3) 1184 apic_write(APIC_LVTERR, APIC_LVT_MASKED); 1185 if (maxlvt >= 4) 1186 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 1187 1188 /* Integrated APIC (!82489DX) ? */ 1189 if (lapic_is_integrated()) { 1190 if (maxlvt > 3) 1191 /* Clear ESR due to Pentium errata 3AP and 11AP */ 1192 apic_write(APIC_ESR, 0); 1193 apic_read(APIC_ESR); 1194 } 1195 } 1196 1197 /** 1198 * apic_soft_disable - Clears and software disables the local APIC on hotplug 1199 * 1200 * Contrary to disable_local_APIC() this does not touch the enable bit in 1201 * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC 1202 * bus would require a hardware reset as the APIC would lose track of bus 1203 * arbitration. On systems with FSB delivery APICBASE could be disabled, 1204 * but it has to be guaranteed that no interrupt is sent to the APIC while 1205 * in that state and it's not clear from the SDM whether it still responds 1206 * to INIT/SIPI messages. Stay on the safe side and use software disable. 1207 */ 1208 void apic_soft_disable(void) 1209 { 1210 u32 value; 1211 1212 clear_local_APIC(); 1213 1214 /* Soft disable APIC (implies clearing of registers for 82489DX!). */ 1215 value = apic_read(APIC_SPIV); 1216 value &= ~APIC_SPIV_APIC_ENABLED; 1217 apic_write(APIC_SPIV, value); 1218 } 1219 1220 /** 1221 * disable_local_APIC - clear and disable the local APIC 1222 */ 1223 void disable_local_APIC(void) 1224 { 1225 /* APIC hasn't been mapped yet */ 1226 if (!x2apic_mode && !apic_phys) 1227 return; 1228 1229 apic_soft_disable(); 1230 1231 #ifdef CONFIG_X86_32 1232 /* 1233 * When LAPIC was disabled by the BIOS and enabled by the kernel, 1234 * restore the disabled state. 1235 */ 1236 if (enabled_via_apicbase) { 1237 unsigned int l, h; 1238 1239 rdmsr(MSR_IA32_APICBASE, l, h); 1240 l &= ~MSR_IA32_APICBASE_ENABLE; 1241 wrmsr(MSR_IA32_APICBASE, l, h); 1242 } 1243 #endif 1244 } 1245 1246 /* 1247 * If Linux enabled the LAPIC against the BIOS default disable it down before 1248 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and 1249 * not power-off. Additionally clear all LVT entries before disable_local_APIC 1250 * for the case where Linux didn't enable the LAPIC. 1251 */ 1252 void lapic_shutdown(void) 1253 { 1254 unsigned long flags; 1255 1256 if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) 1257 return; 1258 1259 local_irq_save(flags); 1260 1261 #ifdef CONFIG_X86_32 1262 if (!enabled_via_apicbase) 1263 clear_local_APIC(); 1264 else 1265 #endif 1266 disable_local_APIC(); 1267 1268 1269 local_irq_restore(flags); 1270 } 1271 1272 /** 1273 * sync_Arb_IDs - synchronize APIC bus arbitration IDs 1274 */ 1275 void __init sync_Arb_IDs(void) 1276 { 1277 /* 1278 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not 1279 * needed on AMD. 1280 */ 1281 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 1282 return; 1283 1284 /* 1285 * Wait for idle. 1286 */ 1287 apic_wait_icr_idle(); 1288 1289 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); 1290 apic_write(APIC_ICR, APIC_DEST_ALLINC | 1291 APIC_INT_LEVELTRIG | APIC_DM_INIT); 1292 } 1293 1294 enum apic_intr_mode_id apic_intr_mode __ro_after_init; 1295 1296 static int __init __apic_intr_mode_select(void) 1297 { 1298 /* Check kernel option */ 1299 if (disable_apic) { 1300 pr_info("APIC disabled via kernel command line\n"); 1301 return APIC_PIC; 1302 } 1303 1304 /* Check BIOS */ 1305 #ifdef CONFIG_X86_64 1306 /* On 64-bit, the APIC must be integrated, Check local APIC only */ 1307 if (!boot_cpu_has(X86_FEATURE_APIC)) { 1308 disable_apic = 1; 1309 pr_info("APIC disabled by BIOS\n"); 1310 return APIC_PIC; 1311 } 1312 #else 1313 /* On 32-bit, the APIC may be integrated APIC or 82489DX */ 1314 1315 /* Neither 82489DX nor integrated APIC ? */ 1316 if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) { 1317 disable_apic = 1; 1318 return APIC_PIC; 1319 } 1320 1321 /* If the BIOS pretends there is an integrated APIC ? */ 1322 if (!boot_cpu_has(X86_FEATURE_APIC) && 1323 APIC_INTEGRATED(boot_cpu_apic_version)) { 1324 disable_apic = 1; 1325 pr_err(FW_BUG "Local APIC %d not detected, force emulation\n", 1326 boot_cpu_physical_apicid); 1327 return APIC_PIC; 1328 } 1329 #endif 1330 1331 /* Check MP table or ACPI MADT configuration */ 1332 if (!smp_found_config) { 1333 disable_ioapic_support(); 1334 if (!acpi_lapic) { 1335 pr_info("APIC: ACPI MADT or MP tables are not detected\n"); 1336 return APIC_VIRTUAL_WIRE_NO_CONFIG; 1337 } 1338 return APIC_VIRTUAL_WIRE; 1339 } 1340 1341 #ifdef CONFIG_SMP 1342 /* If SMP should be disabled, then really disable it! */ 1343 if (!setup_max_cpus) { 1344 pr_info("APIC: SMP mode deactivated\n"); 1345 return APIC_SYMMETRIC_IO_NO_ROUTING; 1346 } 1347 1348 if (read_apic_id() != boot_cpu_physical_apicid) { 1349 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1350 read_apic_id(), boot_cpu_physical_apicid); 1351 /* Or can we switch back to PIC here? */ 1352 } 1353 #endif 1354 1355 return APIC_SYMMETRIC_IO; 1356 } 1357 1358 /* Select the interrupt delivery mode for the BSP */ 1359 void __init apic_intr_mode_select(void) 1360 { 1361 apic_intr_mode = __apic_intr_mode_select(); 1362 } 1363 1364 /* 1365 * An initial setup of the virtual wire mode. 1366 */ 1367 void __init init_bsp_APIC(void) 1368 { 1369 unsigned int value; 1370 1371 /* 1372 * Don't do the setup now if we have a SMP BIOS as the 1373 * through-I/O-APIC virtual wire mode might be active. 1374 */ 1375 if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) 1376 return; 1377 1378 /* 1379 * Do not trust the local APIC being empty at bootup. 1380 */ 1381 clear_local_APIC(); 1382 1383 /* 1384 * Enable APIC. 1385 */ 1386 value = apic_read(APIC_SPIV); 1387 value &= ~APIC_VECTOR_MASK; 1388 value |= APIC_SPIV_APIC_ENABLED; 1389 1390 #ifdef CONFIG_X86_32 1391 /* This bit is reserved on P4/Xeon and should be cleared */ 1392 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 1393 (boot_cpu_data.x86 == 15)) 1394 value &= ~APIC_SPIV_FOCUS_DISABLED; 1395 else 1396 #endif 1397 value |= APIC_SPIV_FOCUS_DISABLED; 1398 value |= SPURIOUS_APIC_VECTOR; 1399 apic_write(APIC_SPIV, value); 1400 1401 /* 1402 * Set up the virtual wire mode. 1403 */ 1404 apic_write(APIC_LVT0, APIC_DM_EXTINT); 1405 value = APIC_DM_NMI; 1406 if (!lapic_is_integrated()) /* 82489DX */ 1407 value |= APIC_LVT_LEVEL_TRIGGER; 1408 if (apic_extnmi == APIC_EXTNMI_NONE) 1409 value |= APIC_LVT_MASKED; 1410 apic_write(APIC_LVT1, value); 1411 } 1412 1413 static void __init apic_bsp_setup(bool upmode); 1414 1415 /* Init the interrupt delivery mode for the BSP */ 1416 void __init apic_intr_mode_init(void) 1417 { 1418 bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); 1419 1420 switch (apic_intr_mode) { 1421 case APIC_PIC: 1422 pr_info("APIC: Keep in PIC mode(8259)\n"); 1423 return; 1424 case APIC_VIRTUAL_WIRE: 1425 pr_info("APIC: Switch to virtual wire mode setup\n"); 1426 break; 1427 case APIC_VIRTUAL_WIRE_NO_CONFIG: 1428 pr_info("APIC: Switch to virtual wire mode setup with no configuration\n"); 1429 upmode = true; 1430 break; 1431 case APIC_SYMMETRIC_IO: 1432 pr_info("APIC: Switch to symmetric I/O mode setup\n"); 1433 break; 1434 case APIC_SYMMETRIC_IO_NO_ROUTING: 1435 pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n"); 1436 break; 1437 } 1438 1439 default_setup_apic_routing(); 1440 1441 if (x86_platform.apic_post_init) 1442 x86_platform.apic_post_init(); 1443 1444 apic_bsp_setup(upmode); 1445 } 1446 1447 static void lapic_setup_esr(void) 1448 { 1449 unsigned int oldvalue, value, maxlvt; 1450 1451 if (!lapic_is_integrated()) { 1452 pr_info("No ESR for 82489DX.\n"); 1453 return; 1454 } 1455 1456 if (apic->disable_esr) { 1457 /* 1458 * Something untraceable is creating bad interrupts on 1459 * secondary quads ... for the moment, just leave the 1460 * ESR disabled - we can't do anything useful with the 1461 * errors anyway - mbligh 1462 */ 1463 pr_info("Leaving ESR disabled.\n"); 1464 return; 1465 } 1466 1467 maxlvt = lapic_get_maxlvt(); 1468 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 1469 apic_write(APIC_ESR, 0); 1470 oldvalue = apic_read(APIC_ESR); 1471 1472 /* enables sending errors */ 1473 value = ERROR_APIC_VECTOR; 1474 apic_write(APIC_LVTERR, value); 1475 1476 /* 1477 * spec says clear errors after enabling vector. 1478 */ 1479 if (maxlvt > 3) 1480 apic_write(APIC_ESR, 0); 1481 value = apic_read(APIC_ESR); 1482 if (value != oldvalue) 1483 apic_printk(APIC_VERBOSE, "ESR value before enabling " 1484 "vector: 0x%08x after: 0x%08x\n", 1485 oldvalue, value); 1486 } 1487 1488 #define APIC_IR_REGS APIC_ISR_NR 1489 #define APIC_IR_BITS (APIC_IR_REGS * 32) 1490 #define APIC_IR_MAPSIZE (APIC_IR_BITS / BITS_PER_LONG) 1491 1492 union apic_ir { 1493 unsigned long map[APIC_IR_MAPSIZE]; 1494 u32 regs[APIC_IR_REGS]; 1495 }; 1496 1497 static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr) 1498 { 1499 int i, bit; 1500 1501 /* Read the IRRs */ 1502 for (i = 0; i < APIC_IR_REGS; i++) 1503 irr->regs[i] = apic_read(APIC_IRR + i * 0x10); 1504 1505 /* Read the ISRs */ 1506 for (i = 0; i < APIC_IR_REGS; i++) 1507 isr->regs[i] = apic_read(APIC_ISR + i * 0x10); 1508 1509 /* 1510 * If the ISR map is not empty. ACK the APIC and run another round 1511 * to verify whether a pending IRR has been unblocked and turned 1512 * into a ISR. 1513 */ 1514 if (!bitmap_empty(isr->map, APIC_IR_BITS)) { 1515 /* 1516 * There can be multiple ISR bits set when a high priority 1517 * interrupt preempted a lower priority one. Issue an ACK 1518 * per set bit. 1519 */ 1520 for_each_set_bit(bit, isr->map, APIC_IR_BITS) 1521 ack_APIC_irq(); 1522 return true; 1523 } 1524 1525 return !bitmap_empty(irr->map, APIC_IR_BITS); 1526 } 1527 1528 /* 1529 * After a crash, we no longer service the interrupts and a pending 1530 * interrupt from previous kernel might still have ISR bit set. 1531 * 1532 * Most probably by now the CPU has serviced that pending interrupt and it 1533 * might not have done the ack_APIC_irq() because it thought, interrupt 1534 * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear 1535 * the ISR bit and cpu thinks it has already serviced the interrupt. Hence 1536 * a vector might get locked. It was noticed for timer irq (vector 1537 * 0x31). Issue an extra EOI to clear ISR. 1538 * 1539 * If there are pending IRR bits they turn into ISR bits after a higher 1540 * priority ISR bit has been acked. 1541 */ 1542 static void apic_pending_intr_clear(void) 1543 { 1544 union apic_ir irr, isr; 1545 unsigned int i; 1546 1547 /* 512 loops are way oversized and give the APIC a chance to obey. */ 1548 for (i = 0; i < 512; i++) { 1549 if (!apic_check_and_ack(&irr, &isr)) 1550 return; 1551 } 1552 /* Dump the IRR/ISR content if that failed */ 1553 pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map); 1554 } 1555 1556 /** 1557 * setup_local_APIC - setup the local APIC 1558 * 1559 * Used to setup local APIC while initializing BSP or bringing up APs. 1560 * Always called with preemption disabled. 1561 */ 1562 static void setup_local_APIC(void) 1563 { 1564 int cpu = smp_processor_id(); 1565 unsigned int value; 1566 1567 if (disable_apic) { 1568 disable_ioapic_support(); 1569 return; 1570 } 1571 1572 /* 1573 * If this comes from kexec/kcrash the APIC might be enabled in 1574 * SPIV. Soft disable it before doing further initialization. 1575 */ 1576 value = apic_read(APIC_SPIV); 1577 value &= ~APIC_SPIV_APIC_ENABLED; 1578 apic_write(APIC_SPIV, value); 1579 1580 #ifdef CONFIG_X86_32 1581 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1582 if (lapic_is_integrated() && apic->disable_esr) { 1583 apic_write(APIC_ESR, 0); 1584 apic_write(APIC_ESR, 0); 1585 apic_write(APIC_ESR, 0); 1586 apic_write(APIC_ESR, 0); 1587 } 1588 #endif 1589 /* 1590 * Double-check whether this APIC is really registered. 1591 * This is meaningless in clustered apic mode, so we skip it. 1592 */ 1593 BUG_ON(!apic->apic_id_registered()); 1594 1595 /* 1596 * Intel recommends to set DFR, LDR and TPR before enabling 1597 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 1598 * document number 292116). So here it goes... 1599 */ 1600 apic->init_apic_ldr(); 1601 1602 #ifdef CONFIG_X86_32 1603 if (apic->dest_mode_logical) { 1604 int logical_apicid, ldr_apicid; 1605 1606 /* 1607 * APIC LDR is initialized. If logical_apicid mapping was 1608 * initialized during get_smp_config(), make sure it matches 1609 * the actual value. 1610 */ 1611 logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); 1612 ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); 1613 if (logical_apicid != BAD_APICID) 1614 WARN_ON(logical_apicid != ldr_apicid); 1615 /* Always use the value from LDR. */ 1616 early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; 1617 } 1618 #endif 1619 1620 /* 1621 * Set Task Priority to 'accept all except vectors 0-31'. An APIC 1622 * vector in the 16-31 range could be delivered if TPR == 0, but we 1623 * would think it's an exception and terrible things will happen. We 1624 * never change this later on. 1625 */ 1626 value = apic_read(APIC_TASKPRI); 1627 value &= ~APIC_TPRI_MASK; 1628 value |= 0x10; 1629 apic_write(APIC_TASKPRI, value); 1630 1631 /* Clear eventually stale ISR/IRR bits */ 1632 apic_pending_intr_clear(); 1633 1634 /* 1635 * Now that we are all set up, enable the APIC 1636 */ 1637 value = apic_read(APIC_SPIV); 1638 value &= ~APIC_VECTOR_MASK; 1639 /* 1640 * Enable APIC 1641 */ 1642 value |= APIC_SPIV_APIC_ENABLED; 1643 1644 #ifdef CONFIG_X86_32 1645 /* 1646 * Some unknown Intel IO/APIC (or APIC) errata is biting us with 1647 * certain networking cards. If high frequency interrupts are 1648 * happening on a particular IOAPIC pin, plus the IOAPIC routing 1649 * entry is masked/unmasked at a high rate as well then sooner or 1650 * later IOAPIC line gets 'stuck', no more interrupts are received 1651 * from the device. If focus CPU is disabled then the hang goes 1652 * away, oh well :-( 1653 * 1654 * [ This bug can be reproduced easily with a level-triggered 1655 * PCI Ne2000 networking cards and PII/PIII processors, dual 1656 * BX chipset. ] 1657 */ 1658 /* 1659 * Actually disabling the focus CPU check just makes the hang less 1660 * frequent as it makes the interrupt distribution model be more 1661 * like LRU than MRU (the short-term load is more even across CPUs). 1662 */ 1663 1664 /* 1665 * - enable focus processor (bit==0) 1666 * - 64bit mode always use processor focus 1667 * so no need to set it 1668 */ 1669 value &= ~APIC_SPIV_FOCUS_DISABLED; 1670 #endif 1671 1672 /* 1673 * Set spurious IRQ vector 1674 */ 1675 value |= SPURIOUS_APIC_VECTOR; 1676 apic_write(APIC_SPIV, value); 1677 1678 perf_events_lapic_init(); 1679 1680 /* 1681 * Set up LVT0, LVT1: 1682 * 1683 * set up through-local-APIC on the boot CPU's LINT0. This is not 1684 * strictly necessary in pure symmetric-IO mode, but sometimes 1685 * we delegate interrupts to the 8259A. 1686 */ 1687 /* 1688 * TODO: set up through-local-APIC from through-I/O-APIC? --macro 1689 */ 1690 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; 1691 if (!cpu && (pic_mode || !value || skip_ioapic_setup)) { 1692 value = APIC_DM_EXTINT; 1693 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu); 1694 } else { 1695 value = APIC_DM_EXTINT | APIC_LVT_MASKED; 1696 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu); 1697 } 1698 apic_write(APIC_LVT0, value); 1699 1700 /* 1701 * Only the BSP sees the LINT1 NMI signal by default. This can be 1702 * modified by apic_extnmi= boot option. 1703 */ 1704 if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) || 1705 apic_extnmi == APIC_EXTNMI_ALL) 1706 value = APIC_DM_NMI; 1707 else 1708 value = APIC_DM_NMI | APIC_LVT_MASKED; 1709 1710 /* Is 82489DX ? */ 1711 if (!lapic_is_integrated()) 1712 value |= APIC_LVT_LEVEL_TRIGGER; 1713 apic_write(APIC_LVT1, value); 1714 1715 #ifdef CONFIG_X86_MCE_INTEL 1716 /* Recheck CMCI information after local APIC is up on CPU #0 */ 1717 if (!cpu) 1718 cmci_recheck(); 1719 #endif 1720 } 1721 1722 static void end_local_APIC_setup(void) 1723 { 1724 lapic_setup_esr(); 1725 1726 #ifdef CONFIG_X86_32 1727 { 1728 unsigned int value; 1729 /* Disable the local apic timer */ 1730 value = apic_read(APIC_LVTT); 1731 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 1732 apic_write(APIC_LVTT, value); 1733 } 1734 #endif 1735 1736 apic_pm_activate(); 1737 } 1738 1739 /* 1740 * APIC setup function for application processors. Called from smpboot.c 1741 */ 1742 void apic_ap_setup(void) 1743 { 1744 setup_local_APIC(); 1745 end_local_APIC_setup(); 1746 } 1747 1748 #ifdef CONFIG_X86_X2APIC 1749 int x2apic_mode; 1750 EXPORT_SYMBOL_GPL(x2apic_mode); 1751 1752 enum { 1753 X2APIC_OFF, 1754 X2APIC_DISABLED, 1755 /* All states below here have X2APIC enabled */ 1756 X2APIC_ON, 1757 X2APIC_ON_LOCKED 1758 }; 1759 static int x2apic_state; 1760 1761 static bool x2apic_hw_locked(void) 1762 { 1763 u64 ia32_cap; 1764 u64 msr; 1765 1766 ia32_cap = x86_read_arch_cap_msr(); 1767 if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) { 1768 rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr); 1769 return (msr & LEGACY_XAPIC_DISABLED); 1770 } 1771 return false; 1772 } 1773 1774 static void __x2apic_disable(void) 1775 { 1776 u64 msr; 1777 1778 if (!boot_cpu_has(X86_FEATURE_APIC)) 1779 return; 1780 1781 rdmsrl(MSR_IA32_APICBASE, msr); 1782 if (!(msr & X2APIC_ENABLE)) 1783 return; 1784 /* Disable xapic and x2apic first and then reenable xapic mode */ 1785 wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); 1786 wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); 1787 printk_once(KERN_INFO "x2apic disabled\n"); 1788 } 1789 1790 static void __x2apic_enable(void) 1791 { 1792 u64 msr; 1793 1794 rdmsrl(MSR_IA32_APICBASE, msr); 1795 if (msr & X2APIC_ENABLE) 1796 return; 1797 wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); 1798 printk_once(KERN_INFO "x2apic enabled\n"); 1799 } 1800 1801 static int __init setup_nox2apic(char *str) 1802 { 1803 if (x2apic_enabled()) { 1804 int apicid = native_apic_msr_read(APIC_ID); 1805 1806 if (apicid >= 255) { 1807 pr_warn("Apicid: %08x, cannot enforce nox2apic\n", 1808 apicid); 1809 return 0; 1810 } 1811 if (x2apic_hw_locked()) { 1812 pr_warn("APIC locked in x2apic mode, can't disable\n"); 1813 return 0; 1814 } 1815 pr_warn("x2apic already enabled.\n"); 1816 __x2apic_disable(); 1817 } 1818 setup_clear_cpu_cap(X86_FEATURE_X2APIC); 1819 x2apic_state = X2APIC_DISABLED; 1820 x2apic_mode = 0; 1821 return 0; 1822 } 1823 early_param("nox2apic", setup_nox2apic); 1824 1825 /* Called from cpu_init() to enable x2apic on (secondary) cpus */ 1826 void x2apic_setup(void) 1827 { 1828 /* 1829 * Try to make the AP's APIC state match that of the BSP, but if the 1830 * BSP is unlocked and the AP is locked then there is a state mismatch. 1831 * Warn about the mismatch in case a GP fault occurs due to a locked AP 1832 * trying to be turned off. 1833 */ 1834 if (x2apic_state != X2APIC_ON_LOCKED && x2apic_hw_locked()) 1835 pr_warn("x2apic lock mismatch between BSP and AP.\n"); 1836 /* 1837 * If x2apic is not in ON or LOCKED state, disable it if already enabled 1838 * from BIOS. 1839 */ 1840 if (x2apic_state < X2APIC_ON) { 1841 __x2apic_disable(); 1842 return; 1843 } 1844 __x2apic_enable(); 1845 } 1846 1847 static __init void x2apic_disable(void) 1848 { 1849 u32 x2apic_id, state = x2apic_state; 1850 1851 x2apic_mode = 0; 1852 x2apic_state = X2APIC_DISABLED; 1853 1854 if (state != X2APIC_ON) 1855 return; 1856 1857 x2apic_id = read_apic_id(); 1858 if (x2apic_id >= 255) 1859 panic("Cannot disable x2apic, id: %08x\n", x2apic_id); 1860 1861 if (x2apic_hw_locked()) { 1862 pr_warn("Cannot disable locked x2apic, id: %08x\n", x2apic_id); 1863 return; 1864 } 1865 1866 __x2apic_disable(); 1867 register_lapic_address(mp_lapic_addr); 1868 } 1869 1870 static __init void x2apic_enable(void) 1871 { 1872 if (x2apic_state != X2APIC_OFF) 1873 return; 1874 1875 x2apic_mode = 1; 1876 x2apic_state = X2APIC_ON; 1877 __x2apic_enable(); 1878 } 1879 1880 static __init void try_to_enable_x2apic(int remap_mode) 1881 { 1882 if (x2apic_state == X2APIC_DISABLED) 1883 return; 1884 1885 if (remap_mode != IRQ_REMAP_X2APIC_MODE) { 1886 u32 apic_limit = 255; 1887 1888 /* 1889 * Using X2APIC without IR is not architecturally supported 1890 * on bare metal but may be supported in guests. 1891 */ 1892 if (!x86_init.hyper.x2apic_available()) { 1893 pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); 1894 x2apic_disable(); 1895 return; 1896 } 1897 1898 /* 1899 * If the hypervisor supports extended destination ID in 1900 * MSI, that increases the maximum APIC ID that can be 1901 * used for non-remapped IRQ domains. 1902 */ 1903 if (x86_init.hyper.msi_ext_dest_id()) { 1904 virt_ext_dest_id = 1; 1905 apic_limit = 32767; 1906 } 1907 1908 /* 1909 * Without IR, all CPUs can be addressed by IOAPIC/MSI only 1910 * in physical mode, and CPUs with an APIC ID that cannot 1911 * be addressed must not be brought online. 1912 */ 1913 x2apic_set_max_apicid(apic_limit); 1914 x2apic_phys = 1; 1915 } 1916 x2apic_enable(); 1917 } 1918 1919 void __init check_x2apic(void) 1920 { 1921 if (x2apic_enabled()) { 1922 pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n"); 1923 x2apic_mode = 1; 1924 if (x2apic_hw_locked()) 1925 x2apic_state = X2APIC_ON_LOCKED; 1926 else 1927 x2apic_state = X2APIC_ON; 1928 } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) { 1929 x2apic_state = X2APIC_DISABLED; 1930 } 1931 } 1932 #else /* CONFIG_X86_X2APIC */ 1933 void __init check_x2apic(void) 1934 { 1935 if (!apic_is_x2apic_enabled()) 1936 return; 1937 /* 1938 * Checkme: Can we simply turn off x2APIC here instead of disabling the APIC? 1939 */ 1940 pr_err("Kernel does not support x2APIC, please recompile with CONFIG_X86_X2APIC.\n"); 1941 pr_err("Disabling APIC, expect reduced performance and functionality.\n"); 1942 1943 disable_apic = 1; 1944 setup_clear_cpu_cap(X86_FEATURE_APIC); 1945 } 1946 1947 static inline void try_to_enable_x2apic(int remap_mode) { } 1948 static inline void __x2apic_enable(void) { } 1949 #endif /* !CONFIG_X86_X2APIC */ 1950 1951 void __init enable_IR_x2apic(void) 1952 { 1953 unsigned long flags; 1954 int ret, ir_stat; 1955 1956 if (skip_ioapic_setup) { 1957 pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); 1958 return; 1959 } 1960 1961 ir_stat = irq_remapping_prepare(); 1962 if (ir_stat < 0 && !x2apic_supported()) 1963 return; 1964 1965 ret = save_ioapic_entries(); 1966 if (ret) { 1967 pr_info("Saving IO-APIC state failed: %d\n", ret); 1968 return; 1969 } 1970 1971 local_irq_save(flags); 1972 legacy_pic->mask_all(); 1973 mask_ioapic_entries(); 1974 1975 /* If irq_remapping_prepare() succeeded, try to enable it */ 1976 if (ir_stat >= 0) 1977 ir_stat = irq_remapping_enable(); 1978 /* ir_stat contains the remap mode or an error code */ 1979 try_to_enable_x2apic(ir_stat); 1980 1981 if (ir_stat < 0) 1982 restore_ioapic_entries(); 1983 legacy_pic->restore_mask(); 1984 local_irq_restore(flags); 1985 } 1986 1987 #ifdef CONFIG_X86_64 1988 /* 1989 * Detect and enable local APICs on non-SMP boards. 1990 * Original code written by Keir Fraser. 1991 * On AMD64 we trust the BIOS - if it says no APIC it is likely 1992 * not correctly set up (usually the APIC timer won't work etc.) 1993 */ 1994 static int __init detect_init_APIC(void) 1995 { 1996 if (!boot_cpu_has(X86_FEATURE_APIC)) { 1997 pr_info("No local APIC present\n"); 1998 return -1; 1999 } 2000 2001 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; 2002 return 0; 2003 } 2004 #else 2005 2006 static int __init apic_verify(void) 2007 { 2008 u32 features, h, l; 2009 2010 /* 2011 * The APIC feature bit should now be enabled 2012 * in `cpuid' 2013 */ 2014 features = cpuid_edx(1); 2015 if (!(features & (1 << X86_FEATURE_APIC))) { 2016 pr_warn("Could not enable APIC!\n"); 2017 return -1; 2018 } 2019 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 2020 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; 2021 2022 /* The BIOS may have set up the APIC at some other address */ 2023 if (boot_cpu_data.x86 >= 6) { 2024 rdmsr(MSR_IA32_APICBASE, l, h); 2025 if (l & MSR_IA32_APICBASE_ENABLE) 2026 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; 2027 } 2028 2029 pr_info("Found and enabled local APIC!\n"); 2030 return 0; 2031 } 2032 2033 int __init apic_force_enable(unsigned long addr) 2034 { 2035 u32 h, l; 2036 2037 if (disable_apic) 2038 return -1; 2039 2040 /* 2041 * Some BIOSes disable the local APIC in the APIC_BASE 2042 * MSR. This can only be done in software for Intel P6 or later 2043 * and AMD K7 (Model > 1) or later. 2044 */ 2045 if (boot_cpu_data.x86 >= 6) { 2046 rdmsr(MSR_IA32_APICBASE, l, h); 2047 if (!(l & MSR_IA32_APICBASE_ENABLE)) { 2048 pr_info("Local APIC disabled by BIOS -- reenabling.\n"); 2049 l &= ~MSR_IA32_APICBASE_BASE; 2050 l |= MSR_IA32_APICBASE_ENABLE | addr; 2051 wrmsr(MSR_IA32_APICBASE, l, h); 2052 enabled_via_apicbase = 1; 2053 } 2054 } 2055 return apic_verify(); 2056 } 2057 2058 /* 2059 * Detect and initialize APIC 2060 */ 2061 static int __init detect_init_APIC(void) 2062 { 2063 /* Disabled by kernel option? */ 2064 if (disable_apic) 2065 return -1; 2066 2067 switch (boot_cpu_data.x86_vendor) { 2068 case X86_VENDOR_AMD: 2069 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || 2070 (boot_cpu_data.x86 >= 15)) 2071 break; 2072 goto no_apic; 2073 case X86_VENDOR_HYGON: 2074 break; 2075 case X86_VENDOR_INTEL: 2076 if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || 2077 (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC))) 2078 break; 2079 goto no_apic; 2080 default: 2081 goto no_apic; 2082 } 2083 2084 if (!boot_cpu_has(X86_FEATURE_APIC)) { 2085 /* 2086 * Over-ride BIOS and try to enable the local APIC only if 2087 * "lapic" specified. 2088 */ 2089 if (!force_enable_local_apic) { 2090 pr_info("Local APIC disabled by BIOS -- " 2091 "you can enable it with \"lapic\"\n"); 2092 return -1; 2093 } 2094 if (apic_force_enable(APIC_DEFAULT_PHYS_BASE)) 2095 return -1; 2096 } else { 2097 if (apic_verify()) 2098 return -1; 2099 } 2100 2101 apic_pm_activate(); 2102 2103 return 0; 2104 2105 no_apic: 2106 pr_info("No local APIC present or hardware disabled\n"); 2107 return -1; 2108 } 2109 #endif 2110 2111 /** 2112 * init_apic_mappings - initialize APIC mappings 2113 */ 2114 void __init init_apic_mappings(void) 2115 { 2116 unsigned int new_apicid; 2117 2118 if (apic_validate_deadline_timer()) 2119 pr_info("TSC deadline timer available\n"); 2120 2121 if (x2apic_mode) { 2122 boot_cpu_physical_apicid = read_apic_id(); 2123 return; 2124 } 2125 2126 /* If no local APIC can be found return early */ 2127 if (!smp_found_config && detect_init_APIC()) { 2128 /* lets NOP'ify apic operations */ 2129 pr_info("APIC: disable apic facility\n"); 2130 apic_disable(); 2131 } else { 2132 apic_phys = mp_lapic_addr; 2133 2134 /* 2135 * If the system has ACPI MADT tables or MP info, the LAPIC 2136 * address is already registered. 2137 */ 2138 if (!acpi_lapic && !smp_found_config) 2139 register_lapic_address(apic_phys); 2140 } 2141 2142 /* 2143 * Fetch the APIC ID of the BSP in case we have a 2144 * default configuration (or the MP table is broken). 2145 */ 2146 new_apicid = read_apic_id(); 2147 if (boot_cpu_physical_apicid != new_apicid) { 2148 boot_cpu_physical_apicid = new_apicid; 2149 /* 2150 * yeah -- we lie about apic_version 2151 * in case if apic was disabled via boot option 2152 * but it's not a problem for SMP compiled kernel 2153 * since apic_intr_mode_select is prepared for such 2154 * a case and disable smp mode 2155 */ 2156 boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); 2157 } 2158 } 2159 2160 void __init register_lapic_address(unsigned long address) 2161 { 2162 mp_lapic_addr = address; 2163 2164 if (!x2apic_mode) { 2165 set_fixmap_nocache(FIX_APIC_BASE, address); 2166 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", 2167 APIC_BASE, address); 2168 } 2169 if (boot_cpu_physical_apicid == -1U) { 2170 boot_cpu_physical_apicid = read_apic_id(); 2171 boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); 2172 } 2173 } 2174 2175 /* 2176 * Local APIC interrupts 2177 */ 2178 2179 /* 2180 * Common handling code for spurious_interrupt and spurious_vector entry 2181 * points below. No point in allowing the compiler to inline it twice. 2182 */ 2183 static noinline void handle_spurious_interrupt(u8 vector) 2184 { 2185 u32 v; 2186 2187 trace_spurious_apic_entry(vector); 2188 2189 inc_irq_stat(irq_spurious_count); 2190 2191 /* 2192 * If this is a spurious interrupt then do not acknowledge 2193 */ 2194 if (vector == SPURIOUS_APIC_VECTOR) { 2195 /* See SDM vol 3 */ 2196 pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n", 2197 smp_processor_id()); 2198 goto out; 2199 } 2200 2201 /* 2202 * If it is a vectored one, verify it's set in the ISR. If set, 2203 * acknowledge it. 2204 */ 2205 v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); 2206 if (v & (1 << (vector & 0x1f))) { 2207 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", 2208 vector, smp_processor_id()); 2209 ack_APIC_irq(); 2210 } else { 2211 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", 2212 vector, smp_processor_id()); 2213 } 2214 out: 2215 trace_spurious_apic_exit(vector); 2216 } 2217 2218 /** 2219 * spurious_interrupt - Catch all for interrupts raised on unused vectors 2220 * @regs: Pointer to pt_regs on stack 2221 * @vector: The vector number 2222 * 2223 * This is invoked from ASM entry code to catch all interrupts which 2224 * trigger on an entry which is routed to the common_spurious idtentry 2225 * point. 2226 */ 2227 DEFINE_IDTENTRY_IRQ(spurious_interrupt) 2228 { 2229 handle_spurious_interrupt(vector); 2230 } 2231 2232 DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt) 2233 { 2234 handle_spurious_interrupt(SPURIOUS_APIC_VECTOR); 2235 } 2236 2237 /* 2238 * This interrupt should never happen with our APIC/SMP architecture 2239 */ 2240 DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt) 2241 { 2242 static const char * const error_interrupt_reason[] = { 2243 "Send CS error", /* APIC Error Bit 0 */ 2244 "Receive CS error", /* APIC Error Bit 1 */ 2245 "Send accept error", /* APIC Error Bit 2 */ 2246 "Receive accept error", /* APIC Error Bit 3 */ 2247 "Redirectable IPI", /* APIC Error Bit 4 */ 2248 "Send illegal vector", /* APIC Error Bit 5 */ 2249 "Received illegal vector", /* APIC Error Bit 6 */ 2250 "Illegal register address", /* APIC Error Bit 7 */ 2251 }; 2252 u32 v, i = 0; 2253 2254 trace_error_apic_entry(ERROR_APIC_VECTOR); 2255 2256 /* First tickle the hardware, only then report what went on. -- REW */ 2257 if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */ 2258 apic_write(APIC_ESR, 0); 2259 v = apic_read(APIC_ESR); 2260 ack_APIC_irq(); 2261 atomic_inc(&irq_err_count); 2262 2263 apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x", 2264 smp_processor_id(), v); 2265 2266 v &= 0xff; 2267 while (v) { 2268 if (v & 0x1) 2269 apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); 2270 i++; 2271 v >>= 1; 2272 } 2273 2274 apic_printk(APIC_DEBUG, KERN_CONT "\n"); 2275 2276 trace_error_apic_exit(ERROR_APIC_VECTOR); 2277 } 2278 2279 /** 2280 * connect_bsp_APIC - attach the APIC to the interrupt system 2281 */ 2282 static void __init connect_bsp_APIC(void) 2283 { 2284 #ifdef CONFIG_X86_32 2285 if (pic_mode) { 2286 /* 2287 * Do not trust the local APIC being empty at bootup. 2288 */ 2289 clear_local_APIC(); 2290 /* 2291 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's 2292 * local APIC to INT and NMI lines. 2293 */ 2294 apic_printk(APIC_VERBOSE, "leaving PIC mode, " 2295 "enabling APIC mode.\n"); 2296 imcr_pic_to_apic(); 2297 } 2298 #endif 2299 } 2300 2301 /** 2302 * disconnect_bsp_APIC - detach the APIC from the interrupt system 2303 * @virt_wire_setup: indicates, whether virtual wire mode is selected 2304 * 2305 * Virtual wire mode is necessary to deliver legacy interrupts even when the 2306 * APIC is disabled. 2307 */ 2308 void disconnect_bsp_APIC(int virt_wire_setup) 2309 { 2310 unsigned int value; 2311 2312 #ifdef CONFIG_X86_32 2313 if (pic_mode) { 2314 /* 2315 * Put the board back into PIC mode (has an effect only on 2316 * certain older boards). Note that APIC interrupts, including 2317 * IPIs, won't work beyond this point! The only exception are 2318 * INIT IPIs. 2319 */ 2320 apic_printk(APIC_VERBOSE, "disabling APIC mode, " 2321 "entering PIC mode.\n"); 2322 imcr_apic_to_pic(); 2323 return; 2324 } 2325 #endif 2326 2327 /* Go back to Virtual Wire compatibility mode */ 2328 2329 /* For the spurious interrupt use vector F, and enable it */ 2330 value = apic_read(APIC_SPIV); 2331 value &= ~APIC_VECTOR_MASK; 2332 value |= APIC_SPIV_APIC_ENABLED; 2333 value |= 0xf; 2334 apic_write(APIC_SPIV, value); 2335 2336 if (!virt_wire_setup) { 2337 /* 2338 * For LVT0 make it edge triggered, active high, 2339 * external and enabled 2340 */ 2341 value = apic_read(APIC_LVT0); 2342 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | 2343 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 2344 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); 2345 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; 2346 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); 2347 apic_write(APIC_LVT0, value); 2348 } else { 2349 /* Disable LVT0 */ 2350 apic_write(APIC_LVT0, APIC_LVT_MASKED); 2351 } 2352 2353 /* 2354 * For LVT1 make it edge triggered, active high, 2355 * nmi and enabled 2356 */ 2357 value = apic_read(APIC_LVT1); 2358 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | 2359 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 2360 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); 2361 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; 2362 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); 2363 apic_write(APIC_LVT1, value); 2364 } 2365 2366 /* 2367 * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated 2368 * contiguously, it equals to current allocated max logical CPU ID plus 1. 2369 * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, 2370 * so the maximum of nr_logical_cpuids is nr_cpu_ids. 2371 * 2372 * NOTE: Reserve 0 for BSP. 2373 */ 2374 static int nr_logical_cpuids = 1; 2375 2376 /* 2377 * Used to store mapping between logical CPU IDs and APIC IDs. 2378 */ 2379 static int cpuid_to_apicid[] = { 2380 [0 ... NR_CPUS - 1] = -1, 2381 }; 2382 2383 bool arch_match_cpu_phys_id(int cpu, u64 phys_id) 2384 { 2385 return phys_id == cpuid_to_apicid[cpu]; 2386 } 2387 2388 #ifdef CONFIG_SMP 2389 /** 2390 * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread 2391 * @apicid: APIC ID to check 2392 */ 2393 bool apic_id_is_primary_thread(unsigned int apicid) 2394 { 2395 u32 mask; 2396 2397 if (smp_num_siblings == 1) 2398 return true; 2399 /* Isolate the SMT bit(s) in the APICID and check for 0 */ 2400 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; 2401 return !(apicid & mask); 2402 } 2403 #endif 2404 2405 /* 2406 * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids 2407 * and cpuid_to_apicid[] synchronized. 2408 */ 2409 static int allocate_logical_cpuid(int apicid) 2410 { 2411 int i; 2412 2413 /* 2414 * cpuid <-> apicid mapping is persistent, so when a cpu is up, 2415 * check if the kernel has allocated a cpuid for it. 2416 */ 2417 for (i = 0; i < nr_logical_cpuids; i++) { 2418 if (cpuid_to_apicid[i] == apicid) 2419 return i; 2420 } 2421 2422 /* Allocate a new cpuid. */ 2423 if (nr_logical_cpuids >= nr_cpu_ids) { 2424 WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " 2425 "Processor %d/0x%x and the rest are ignored.\n", 2426 nr_cpu_ids, nr_logical_cpuids, apicid); 2427 return -EINVAL; 2428 } 2429 2430 cpuid_to_apicid[nr_logical_cpuids] = apicid; 2431 return nr_logical_cpuids++; 2432 } 2433 2434 int generic_processor_info(int apicid, int version) 2435 { 2436 int cpu, max = nr_cpu_ids; 2437 bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, 2438 phys_cpu_present_map); 2439 2440 /* 2441 * boot_cpu_physical_apicid is designed to have the apicid 2442 * returned by read_apic_id(), i.e, the apicid of the 2443 * currently booting-up processor. However, on some platforms, 2444 * it is temporarily modified by the apicid reported as BSP 2445 * through MP table. Concretely: 2446 * 2447 * - arch/x86/kernel/mpparse.c: MP_processor_info() 2448 * - arch/x86/mm/amdtopology.c: amd_numa_init() 2449 * 2450 * This function is executed with the modified 2451 * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel 2452 * parameter doesn't work to disable APs on kdump 2nd kernel. 2453 * 2454 * Since fixing handling of boot_cpu_physical_apicid requires 2455 * another discussion and tests on each platform, we leave it 2456 * for now and here we use read_apic_id() directly in this 2457 * function, generic_processor_info(). 2458 */ 2459 if (disabled_cpu_apicid != BAD_APICID && 2460 disabled_cpu_apicid != read_apic_id() && 2461 disabled_cpu_apicid == apicid) { 2462 int thiscpu = num_processors + disabled_cpus; 2463 2464 pr_warn("APIC: Disabling requested cpu." 2465 " Processor %d/0x%x ignored.\n", thiscpu, apicid); 2466 2467 disabled_cpus++; 2468 return -ENODEV; 2469 } 2470 2471 /* 2472 * If boot cpu has not been detected yet, then only allow upto 2473 * nr_cpu_ids - 1 processors and keep one slot free for boot cpu 2474 */ 2475 if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 && 2476 apicid != boot_cpu_physical_apicid) { 2477 int thiscpu = max + disabled_cpus - 1; 2478 2479 pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost" 2480 " reached. Keeping one slot for boot cpu." 2481 " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); 2482 2483 disabled_cpus++; 2484 return -ENODEV; 2485 } 2486 2487 if (num_processors >= nr_cpu_ids) { 2488 int thiscpu = max + disabled_cpus; 2489 2490 pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " 2491 "Processor %d/0x%x ignored.\n", max, thiscpu, apicid); 2492 2493 disabled_cpus++; 2494 return -EINVAL; 2495 } 2496 2497 if (apicid == boot_cpu_physical_apicid) { 2498 /* 2499 * x86_bios_cpu_apicid is required to have processors listed 2500 * in same order as logical cpu numbers. Hence the first 2501 * entry is BSP, and so on. 2502 * boot_cpu_init() already hold bit 0 in cpu_present_mask 2503 * for BSP. 2504 */ 2505 cpu = 0; 2506 2507 /* Logical cpuid 0 is reserved for BSP. */ 2508 cpuid_to_apicid[0] = apicid; 2509 } else { 2510 cpu = allocate_logical_cpuid(apicid); 2511 if (cpu < 0) { 2512 disabled_cpus++; 2513 return -EINVAL; 2514 } 2515 } 2516 2517 /* 2518 * Validate version 2519 */ 2520 if (version == 0x0) { 2521 pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", 2522 cpu, apicid); 2523 version = 0x10; 2524 } 2525 2526 if (version != boot_cpu_apic_version) { 2527 pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", 2528 boot_cpu_apic_version, cpu, version); 2529 } 2530 2531 if (apicid > max_physical_apicid) 2532 max_physical_apicid = apicid; 2533 2534 #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) 2535 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; 2536 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 2537 #endif 2538 #ifdef CONFIG_X86_32 2539 early_per_cpu(x86_cpu_to_logical_apicid, cpu) = 2540 apic->x86_32_early_logical_apicid(cpu); 2541 #endif 2542 set_cpu_possible(cpu, true); 2543 physid_set(apicid, phys_cpu_present_map); 2544 set_cpu_present(cpu, true); 2545 num_processors++; 2546 2547 return cpu; 2548 } 2549 2550 int hard_smp_processor_id(void) 2551 { 2552 return read_apic_id(); 2553 } 2554 2555 void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, 2556 bool dmar) 2557 { 2558 memset(msg, 0, sizeof(*msg)); 2559 2560 msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; 2561 msg->arch_addr_lo.dest_mode_logical = apic->dest_mode_logical; 2562 msg->arch_addr_lo.destid_0_7 = cfg->dest_apicid & 0xFF; 2563 2564 msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_FIXED; 2565 msg->arch_data.vector = cfg->vector; 2566 2567 msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; 2568 /* 2569 * Only the IOMMU itself can use the trick of putting destination 2570 * APIC ID into the high bits of the address. Anything else would 2571 * just be writing to memory if it tried that, and needs IR to 2572 * address APICs which can't be addressed in the normal 32-bit 2573 * address range at 0xFFExxxxx. That is typically just 8 bits, but 2574 * some hypervisors allow the extended destination ID field in bits 2575 * 5-11 to be used, giving support for 15 bits of APIC IDs in total. 2576 */ 2577 if (dmar) 2578 msg->arch_addr_hi.destid_8_31 = cfg->dest_apicid >> 8; 2579 else if (virt_ext_dest_id && cfg->dest_apicid < 0x8000) 2580 msg->arch_addr_lo.virt_destid_8_14 = cfg->dest_apicid >> 8; 2581 else 2582 WARN_ON_ONCE(cfg->dest_apicid > 0xFF); 2583 } 2584 2585 u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid) 2586 { 2587 u32 dest = msg->arch_addr_lo.destid_0_7; 2588 2589 if (extid) 2590 dest |= msg->arch_addr_hi.destid_8_31 << 8; 2591 return dest; 2592 } 2593 EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid); 2594 2595 #ifdef CONFIG_X86_64 2596 void __init acpi_wake_cpu_handler_update(wakeup_cpu_handler handler) 2597 { 2598 struct apic **drv; 2599 2600 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) 2601 (*drv)->wakeup_secondary_cpu_64 = handler; 2602 } 2603 #endif 2604 2605 /* 2606 * Override the generic EOI implementation with an optimized version. 2607 * Only called during early boot when only one CPU is active and with 2608 * interrupts disabled, so we know this does not race with actual APIC driver 2609 * use. 2610 */ 2611 void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) 2612 { 2613 struct apic **drv; 2614 2615 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { 2616 /* Should happen once for each apic */ 2617 WARN_ON((*drv)->eoi_write == eoi_write); 2618 (*drv)->native_eoi_write = (*drv)->eoi_write; 2619 (*drv)->eoi_write = eoi_write; 2620 } 2621 } 2622 2623 static void __init apic_bsp_up_setup(void) 2624 { 2625 #ifdef CONFIG_X86_64 2626 apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid)); 2627 #else 2628 /* 2629 * Hack: In case of kdump, after a crash, kernel might be booting 2630 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid 2631 * might be zero if read from MP tables. Get it from LAPIC. 2632 */ 2633 # ifdef CONFIG_CRASH_DUMP 2634 boot_cpu_physical_apicid = read_apic_id(); 2635 # endif 2636 #endif 2637 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 2638 } 2639 2640 /** 2641 * apic_bsp_setup - Setup function for local apic and io-apic 2642 * @upmode: Force UP mode (for APIC_init_uniprocessor) 2643 */ 2644 static void __init apic_bsp_setup(bool upmode) 2645 { 2646 connect_bsp_APIC(); 2647 if (upmode) 2648 apic_bsp_up_setup(); 2649 setup_local_APIC(); 2650 2651 enable_IO_APIC(); 2652 end_local_APIC_setup(); 2653 irq_remap_enable_fault_handling(); 2654 setup_IO_APIC(); 2655 lapic_update_legacy_vectors(); 2656 } 2657 2658 #ifdef CONFIG_UP_LATE_INIT 2659 void __init up_late_init(void) 2660 { 2661 if (apic_intr_mode == APIC_PIC) 2662 return; 2663 2664 /* Setup local timer */ 2665 x86_init.timers.setup_percpu_clockev(); 2666 } 2667 #endif 2668 2669 /* 2670 * Power management 2671 */ 2672 #ifdef CONFIG_PM 2673 2674 static struct { 2675 /* 2676 * 'active' is true if the local APIC was enabled by us and 2677 * not the BIOS; this signifies that we are also responsible 2678 * for disabling it before entering apm/acpi suspend 2679 */ 2680 int active; 2681 /* r/w apic fields */ 2682 unsigned int apic_id; 2683 unsigned int apic_taskpri; 2684 unsigned int apic_ldr; 2685 unsigned int apic_dfr; 2686 unsigned int apic_spiv; 2687 unsigned int apic_lvtt; 2688 unsigned int apic_lvtpc; 2689 unsigned int apic_lvt0; 2690 unsigned int apic_lvt1; 2691 unsigned int apic_lvterr; 2692 unsigned int apic_tmict; 2693 unsigned int apic_tdcr; 2694 unsigned int apic_thmr; 2695 unsigned int apic_cmci; 2696 } apic_pm_state; 2697 2698 static int lapic_suspend(void) 2699 { 2700 unsigned long flags; 2701 int maxlvt; 2702 2703 if (!apic_pm_state.active) 2704 return 0; 2705 2706 maxlvt = lapic_get_maxlvt(); 2707 2708 apic_pm_state.apic_id = apic_read(APIC_ID); 2709 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 2710 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 2711 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 2712 apic_pm_state.apic_spiv = apic_read(APIC_SPIV); 2713 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); 2714 if (maxlvt >= 4) 2715 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); 2716 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); 2717 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); 2718 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 2719 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 2720 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 2721 #ifdef CONFIG_X86_THERMAL_VECTOR 2722 if (maxlvt >= 5) 2723 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 2724 #endif 2725 #ifdef CONFIG_X86_MCE_INTEL 2726 if (maxlvt >= 6) 2727 apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI); 2728 #endif 2729 2730 local_irq_save(flags); 2731 2732 /* 2733 * Mask IOAPIC before disabling the local APIC to prevent stale IRR 2734 * entries on some implementations. 2735 */ 2736 mask_ioapic_entries(); 2737 2738 disable_local_APIC(); 2739 2740 irq_remapping_disable(); 2741 2742 local_irq_restore(flags); 2743 return 0; 2744 } 2745 2746 static void lapic_resume(void) 2747 { 2748 unsigned int l, h; 2749 unsigned long flags; 2750 int maxlvt; 2751 2752 if (!apic_pm_state.active) 2753 return; 2754 2755 local_irq_save(flags); 2756 2757 /* 2758 * IO-APIC and PIC have their own resume routines. 2759 * We just mask them here to make sure the interrupt 2760 * subsystem is completely quiet while we enable x2apic 2761 * and interrupt-remapping. 2762 */ 2763 mask_ioapic_entries(); 2764 legacy_pic->mask_all(); 2765 2766 if (x2apic_mode) { 2767 __x2apic_enable(); 2768 } else { 2769 /* 2770 * Make sure the APICBASE points to the right address 2771 * 2772 * FIXME! This will be wrong if we ever support suspend on 2773 * SMP! We'll need to do this as part of the CPU restore! 2774 */ 2775 if (boot_cpu_data.x86 >= 6) { 2776 rdmsr(MSR_IA32_APICBASE, l, h); 2777 l &= ~MSR_IA32_APICBASE_BASE; 2778 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 2779 wrmsr(MSR_IA32_APICBASE, l, h); 2780 } 2781 } 2782 2783 maxlvt = lapic_get_maxlvt(); 2784 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 2785 apic_write(APIC_ID, apic_pm_state.apic_id); 2786 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 2787 apic_write(APIC_LDR, apic_pm_state.apic_ldr); 2788 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); 2789 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 2790 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 2791 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 2792 #ifdef CONFIG_X86_THERMAL_VECTOR 2793 if (maxlvt >= 5) 2794 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 2795 #endif 2796 #ifdef CONFIG_X86_MCE_INTEL 2797 if (maxlvt >= 6) 2798 apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci); 2799 #endif 2800 if (maxlvt >= 4) 2801 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); 2802 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); 2803 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); 2804 apic_write(APIC_TMICT, apic_pm_state.apic_tmict); 2805 apic_write(APIC_ESR, 0); 2806 apic_read(APIC_ESR); 2807 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); 2808 apic_write(APIC_ESR, 0); 2809 apic_read(APIC_ESR); 2810 2811 irq_remapping_reenable(x2apic_mode); 2812 2813 local_irq_restore(flags); 2814 } 2815 2816 /* 2817 * This device has no shutdown method - fully functioning local APICs 2818 * are needed on every CPU up until machine_halt/restart/poweroff. 2819 */ 2820 2821 static struct syscore_ops lapic_syscore_ops = { 2822 .resume = lapic_resume, 2823 .suspend = lapic_suspend, 2824 }; 2825 2826 static void apic_pm_activate(void) 2827 { 2828 apic_pm_state.active = 1; 2829 } 2830 2831 static int __init init_lapic_sysfs(void) 2832 { 2833 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ 2834 if (boot_cpu_has(X86_FEATURE_APIC)) 2835 register_syscore_ops(&lapic_syscore_ops); 2836 2837 return 0; 2838 } 2839 2840 /* local apic needs to resume before other devices access its registers. */ 2841 core_initcall(init_lapic_sysfs); 2842 2843 #else /* CONFIG_PM */ 2844 2845 static void apic_pm_activate(void) { } 2846 2847 #endif /* CONFIG_PM */ 2848 2849 #ifdef CONFIG_X86_64 2850 2851 static int multi_checked; 2852 static int multi; 2853 2854 static int set_multi(const struct dmi_system_id *d) 2855 { 2856 if (multi) 2857 return 0; 2858 pr_info("APIC: %s detected, Multi Chassis\n", d->ident); 2859 multi = 1; 2860 return 0; 2861 } 2862 2863 static const struct dmi_system_id multi_dmi_table[] = { 2864 { 2865 .callback = set_multi, 2866 .ident = "IBM System Summit2", 2867 .matches = { 2868 DMI_MATCH(DMI_SYS_VENDOR, "IBM"), 2869 DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"), 2870 }, 2871 }, 2872 {} 2873 }; 2874 2875 static void dmi_check_multi(void) 2876 { 2877 if (multi_checked) 2878 return; 2879 2880 dmi_check_system(multi_dmi_table); 2881 multi_checked = 1; 2882 } 2883 2884 /* 2885 * apic_is_clustered_box() -- Check if we can expect good TSC 2886 * 2887 * Thus far, the major user of this is IBM's Summit2 series: 2888 * Clustered boxes may have unsynced TSC problems if they are 2889 * multi-chassis. 2890 * Use DMI to check them 2891 */ 2892 int apic_is_clustered_box(void) 2893 { 2894 dmi_check_multi(); 2895 return multi; 2896 } 2897 #endif 2898 2899 /* 2900 * APIC command line parameters 2901 */ 2902 static int __init setup_disableapic(char *arg) 2903 { 2904 disable_apic = 1; 2905 setup_clear_cpu_cap(X86_FEATURE_APIC); 2906 return 0; 2907 } 2908 early_param("disableapic", setup_disableapic); 2909 2910 /* same as disableapic, for compatibility */ 2911 static int __init setup_nolapic(char *arg) 2912 { 2913 return setup_disableapic(arg); 2914 } 2915 early_param("nolapic", setup_nolapic); 2916 2917 static int __init parse_lapic_timer_c2_ok(char *arg) 2918 { 2919 local_apic_timer_c2_ok = 1; 2920 return 0; 2921 } 2922 early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 2923 2924 static int __init parse_disable_apic_timer(char *arg) 2925 { 2926 disable_apic_timer = 1; 2927 return 0; 2928 } 2929 early_param("noapictimer", parse_disable_apic_timer); 2930 2931 static int __init parse_nolapic_timer(char *arg) 2932 { 2933 disable_apic_timer = 1; 2934 return 0; 2935 } 2936 early_param("nolapic_timer", parse_nolapic_timer); 2937 2938 static int __init apic_set_verbosity(char *arg) 2939 { 2940 if (!arg) { 2941 #ifdef CONFIG_X86_64 2942 skip_ioapic_setup = 0; 2943 return 0; 2944 #endif 2945 return -EINVAL; 2946 } 2947 2948 if (strcmp("debug", arg) == 0) 2949 apic_verbosity = APIC_DEBUG; 2950 else if (strcmp("verbose", arg) == 0) 2951 apic_verbosity = APIC_VERBOSE; 2952 #ifdef CONFIG_X86_64 2953 else { 2954 pr_warn("APIC Verbosity level %s not recognised" 2955 " use apic=verbose or apic=debug\n", arg); 2956 return -EINVAL; 2957 } 2958 #endif 2959 2960 return 0; 2961 } 2962 early_param("apic", apic_set_verbosity); 2963 2964 static int __init lapic_insert_resource(void) 2965 { 2966 if (!apic_phys) 2967 return -1; 2968 2969 /* Put local APIC into the resource map. */ 2970 lapic_resource.start = apic_phys; 2971 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; 2972 insert_resource(&iomem_resource, &lapic_resource); 2973 2974 return 0; 2975 } 2976 2977 /* 2978 * need call insert after e820__reserve_resources() 2979 * that is using request_resource 2980 */ 2981 late_initcall(lapic_insert_resource); 2982 2983 static int __init apic_set_disabled_cpu_apicid(char *arg) 2984 { 2985 if (!arg || !get_option(&arg, &disabled_cpu_apicid)) 2986 return -EINVAL; 2987 2988 return 0; 2989 } 2990 early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); 2991 2992 static int __init apic_set_extnmi(char *arg) 2993 { 2994 if (!arg) 2995 return -EINVAL; 2996 2997 if (!strncmp("all", arg, 3)) 2998 apic_extnmi = APIC_EXTNMI_ALL; 2999 else if (!strncmp("none", arg, 4)) 3000 apic_extnmi = APIC_EXTNMI_NONE; 3001 else if (!strncmp("bsp", arg, 3)) 3002 apic_extnmi = APIC_EXTNMI_BSP; 3003 else { 3004 pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg); 3005 return -EINVAL; 3006 } 3007 3008 return 0; 3009 } 3010 early_param("apic_extnmi", apic_set_extnmi); 3011