1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Local APIC handling, local APIC timers 4 * 5 * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> 6 * 7 * Fixes 8 * Maciej W. Rozycki : Bits for genuine 82489DX APICs; 9 * thanks to Eric Gilmore 10 * and Rolf G. Tews 11 * for testing these extensively. 12 * Maciej W. Rozycki : Various updates and fixes. 13 * Mikael Pettersson : Power Management for UP-APIC. 14 * Pavel Machek and 15 * Mikael Pettersson : PM converted to driver model. 16 */ 17 18 #include <linux/perf_event.h> 19 #include <linux/kernel_stat.h> 20 #include <linux/mc146818rtc.h> 21 #include <linux/acpi_pmtmr.h> 22 #include <linux/clockchips.h> 23 #include <linux/interrupt.h> 24 #include <linux/memblock.h> 25 #include <linux/ftrace.h> 26 #include <linux/ioport.h> 27 #include <linux/export.h> 28 #include <linux/syscore_ops.h> 29 #include <linux/delay.h> 30 #include <linux/timex.h> 31 #include <linux/i8253.h> 32 #include <linux/dmar.h> 33 #include <linux/init.h> 34 #include <linux/cpu.h> 35 #include <linux/dmi.h> 36 #include <linux/smp.h> 37 #include <linux/mm.h> 38 39 #include <asm/trace/irq_vectors.h> 40 #include <asm/irq_remapping.h> 41 #include <asm/pc-conf-reg.h> 42 #include <asm/perf_event.h> 43 #include <asm/x86_init.h> 44 #include <linux/atomic.h> 45 #include <asm/barrier.h> 46 #include <asm/mpspec.h> 47 #include <asm/i8259.h> 48 #include <asm/proto.h> 49 #include <asm/traps.h> 50 #include <asm/apic.h> 51 #include <asm/acpi.h> 52 #include <asm/io_apic.h> 53 #include <asm/desc.h> 54 #include <asm/hpet.h> 55 #include <asm/mtrr.h> 56 #include <asm/time.h> 57 #include <asm/smp.h> 58 #include <asm/mce.h> 59 #include <asm/tsc.h> 60 #include <asm/hypervisor.h> 61 #include <asm/cpu_device_id.h> 62 #include <asm/intel-family.h> 63 #include <asm/irq_regs.h> 64 #include <asm/cpu.h> 65 66 unsigned int num_processors; 67 68 unsigned disabled_cpus; 69 70 /* Processor that is doing the boot up */ 71 unsigned int boot_cpu_physical_apicid __ro_after_init = -1U; 72 EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); 73 74 u8 boot_cpu_apic_version __ro_after_init; 75 76 /* 77 * The highest APIC ID seen during enumeration. 78 */ 79 static unsigned int max_physical_apicid; 80 81 /* 82 * Bitmask of physically existing CPUs: 83 */ 84 physid_mask_t phys_cpu_present_map; 85 86 /* 87 * Processor to be disabled specified by kernel parameter 88 * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to 89 * avoid undefined behaviour caused by sending INIT from AP to BSP. 90 */ 91 static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID; 92 93 /* 94 * This variable controls which CPUs receive external NMIs. By default, 95 * external NMIs are delivered only to the BSP. 96 */ 97 static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP; 98 99 /* 100 * Hypervisor supports 15 bits of APIC ID in MSI Extended Destination ID 101 */ 102 static bool virt_ext_dest_id __ro_after_init; 103 104 /* 105 * Map cpu index to physical APIC ID 106 */ 107 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); 108 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); 109 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); 110 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 111 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 112 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); 113 114 #ifdef CONFIG_X86_32 115 116 /* 117 * On x86_32, the mapping between cpu and logical apicid may vary 118 * depending on apic in use. The following early percpu variable is 119 * used for the mapping. This is where the behaviors of x86_64 and 32 120 * actually diverge. Let's keep it ugly for now. 121 */ 122 DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); 123 124 /* Local APIC was disabled by the BIOS and enabled by the kernel */ 125 static int enabled_via_apicbase __ro_after_init; 126 127 /* 128 * Handle interrupt mode configuration register (IMCR). 129 * This register controls whether the interrupt signals 130 * that reach the BSP come from the master PIC or from the 131 * local APIC. Before entering Symmetric I/O Mode, either 132 * the BIOS or the operating system must switch out of 133 * PIC Mode by changing the IMCR. 134 */ 135 static inline void imcr_pic_to_apic(void) 136 { 137 /* NMI and 8259 INTR go through APIC */ 138 pc_conf_set(PC_CONF_MPS_IMCR, 0x01); 139 } 140 141 static inline void imcr_apic_to_pic(void) 142 { 143 /* NMI and 8259 INTR go directly to BSP */ 144 pc_conf_set(PC_CONF_MPS_IMCR, 0x00); 145 } 146 #endif 147 148 /* 149 * Knob to control our willingness to enable the local APIC. 150 * 151 * +1=force-enable 152 */ 153 static int force_enable_local_apic __initdata; 154 155 /* 156 * APIC command line parameters 157 */ 158 static int __init parse_lapic(char *arg) 159 { 160 if (IS_ENABLED(CONFIG_X86_32) && !arg) 161 force_enable_local_apic = 1; 162 else if (arg && !strncmp(arg, "notscdeadline", 13)) 163 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 164 return 0; 165 } 166 early_param("lapic", parse_lapic); 167 168 #ifdef CONFIG_X86_64 169 static int apic_calibrate_pmtmr __initdata; 170 static __init int setup_apicpmtimer(char *s) 171 { 172 apic_calibrate_pmtmr = 1; 173 notsc_setup(NULL); 174 return 1; 175 } 176 __setup("apicpmtimer", setup_apicpmtimer); 177 #endif 178 179 unsigned long mp_lapic_addr __ro_after_init; 180 int disable_apic __ro_after_init; 181 /* Disable local APIC timer from the kernel commandline or via dmi quirk */ 182 static int disable_apic_timer __initdata; 183 /* Local APIC timer works in C2 */ 184 int local_apic_timer_c2_ok __ro_after_init; 185 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 186 187 /* 188 * Debug level, exported for io_apic.c 189 */ 190 int apic_verbosity __ro_after_init; 191 192 int pic_mode __ro_after_init; 193 194 /* Have we found an MP table */ 195 int smp_found_config __ro_after_init; 196 197 static struct resource lapic_resource = { 198 .name = "Local APIC", 199 .flags = IORESOURCE_MEM | IORESOURCE_BUSY, 200 }; 201 202 unsigned int lapic_timer_period = 0; 203 204 static void apic_pm_activate(void); 205 206 static unsigned long apic_phys __ro_after_init; 207 208 /* 209 * Get the LAPIC version 210 */ 211 static inline int lapic_get_version(void) 212 { 213 return GET_APIC_VERSION(apic_read(APIC_LVR)); 214 } 215 216 /* 217 * Check, if the APIC is integrated or a separate chip 218 */ 219 static inline int lapic_is_integrated(void) 220 { 221 return APIC_INTEGRATED(lapic_get_version()); 222 } 223 224 /* 225 * Check, whether this is a modern or a first generation APIC 226 */ 227 static int modern_apic(void) 228 { 229 /* AMD systems use old APIC versions, so check the CPU */ 230 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && 231 boot_cpu_data.x86 >= 0xf) 232 return 1; 233 234 /* Hygon systems use modern APIC */ 235 if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) 236 return 1; 237 238 return lapic_get_version() >= 0x14; 239 } 240 241 /* 242 * right after this call apic become NOOP driven 243 * so apic->write/read doesn't do anything 244 */ 245 static void __init apic_disable(void) 246 { 247 pr_info("APIC: switched to apic NOOP\n"); 248 apic = &apic_noop; 249 } 250 251 void native_apic_wait_icr_idle(void) 252 { 253 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 254 cpu_relax(); 255 } 256 257 u32 native_safe_apic_wait_icr_idle(void) 258 { 259 u32 send_status; 260 int timeout; 261 262 timeout = 0; 263 do { 264 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; 265 if (!send_status) 266 break; 267 inc_irq_stat(icr_read_retry_count); 268 udelay(100); 269 } while (timeout++ < 1000); 270 271 return send_status; 272 } 273 274 void native_apic_icr_write(u32 low, u32 id) 275 { 276 unsigned long flags; 277 278 local_irq_save(flags); 279 apic_write(APIC_ICR2, SET_XAPIC_DEST_FIELD(id)); 280 apic_write(APIC_ICR, low); 281 local_irq_restore(flags); 282 } 283 284 u64 native_apic_icr_read(void) 285 { 286 u32 icr1, icr2; 287 288 icr2 = apic_read(APIC_ICR2); 289 icr1 = apic_read(APIC_ICR); 290 291 return icr1 | ((u64)icr2 << 32); 292 } 293 294 #ifdef CONFIG_X86_32 295 /** 296 * get_physical_broadcast - Get number of physical broadcast IDs 297 */ 298 int get_physical_broadcast(void) 299 { 300 return modern_apic() ? 0xff : 0xf; 301 } 302 #endif 303 304 /** 305 * lapic_get_maxlvt - get the maximum number of local vector table entries 306 */ 307 int lapic_get_maxlvt(void) 308 { 309 /* 310 * - we always have APIC integrated on 64bit mode 311 * - 82489DXs do not report # of LVT entries 312 */ 313 return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2; 314 } 315 316 /* 317 * Local APIC timer 318 */ 319 320 /* Clock divisor */ 321 #define APIC_DIVISOR 16 322 #define TSC_DIVISOR 8 323 324 /* i82489DX specific */ 325 #define I82489DX_BASE_DIVIDER (((0x2) << 18)) 326 327 /* 328 * This function sets up the local APIC timer, with a timeout of 329 * 'clocks' APIC bus clock. During calibration we actually call 330 * this function twice on the boot CPU, once with a bogus timeout 331 * value, second time for real. The other (noncalibrating) CPUs 332 * call this function only once, with the real, calibrated value. 333 * 334 * We do reads before writes even if unnecessary, to get around the 335 * P5 APIC double write bug. 336 */ 337 static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 338 { 339 unsigned int lvtt_value, tmp_value; 340 341 lvtt_value = LOCAL_TIMER_VECTOR; 342 if (!oneshot) 343 lvtt_value |= APIC_LVT_TIMER_PERIODIC; 344 else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 345 lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE; 346 347 /* 348 * The i82489DX APIC uses bit 18 and 19 for the base divider. This 349 * overlaps with bit 18 on integrated APICs, but is not documented 350 * in the SDM. No problem though. i82489DX equipped systems do not 351 * have TSC deadline timer. 352 */ 353 if (!lapic_is_integrated()) 354 lvtt_value |= I82489DX_BASE_DIVIDER; 355 356 if (!irqen) 357 lvtt_value |= APIC_LVT_MASKED; 358 359 apic_write(APIC_LVTT, lvtt_value); 360 361 if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { 362 /* 363 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, 364 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. 365 * According to Intel, MFENCE can do the serialization here. 366 */ 367 asm volatile("mfence" : : : "memory"); 368 return; 369 } 370 371 /* 372 * Divide PICLK by 16 373 */ 374 tmp_value = apic_read(APIC_TDCR); 375 apic_write(APIC_TDCR, 376 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 377 APIC_TDR_DIV_16); 378 379 if (!oneshot) 380 apic_write(APIC_TMICT, clocks / APIC_DIVISOR); 381 } 382 383 /* 384 * Setup extended LVT, AMD specific 385 * 386 * Software should use the LVT offsets the BIOS provides. The offsets 387 * are determined by the subsystems using it like those for MCE 388 * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts 389 * are supported. Beginning with family 10h at least 4 offsets are 390 * available. 391 * 392 * Since the offsets must be consistent for all cores, we keep track 393 * of the LVT offsets in software and reserve the offset for the same 394 * vector also to be used on other cores. An offset is freed by 395 * setting the entry to APIC_EILVT_MASKED. 396 * 397 * If the BIOS is right, there should be no conflicts. Otherwise a 398 * "[Firmware Bug]: ..." error message is generated. However, if 399 * software does not properly determines the offsets, it is not 400 * necessarily a BIOS bug. 401 */ 402 403 static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX]; 404 405 static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new) 406 { 407 return (old & APIC_EILVT_MASKED) 408 || (new == APIC_EILVT_MASKED) 409 || ((new & ~APIC_EILVT_MASKED) == old); 410 } 411 412 static unsigned int reserve_eilvt_offset(int offset, unsigned int new) 413 { 414 unsigned int rsvd, vector; 415 416 if (offset >= APIC_EILVT_NR_MAX) 417 return ~0; 418 419 rsvd = atomic_read(&eilvt_offsets[offset]); 420 do { 421 vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */ 422 if (vector && !eilvt_entry_is_changeable(vector, new)) 423 /* may not change if vectors are different */ 424 return rsvd; 425 rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); 426 } while (rsvd != new); 427 428 rsvd &= ~APIC_EILVT_MASKED; 429 if (rsvd && rsvd != vector) 430 pr_info("LVT offset %d assigned for vector 0x%02x\n", 431 offset, rsvd); 432 433 return new; 434 } 435 436 /* 437 * If mask=1, the LVT entry does not generate interrupts while mask=0 438 * enables the vector. See also the BKDGs. Must be called with 439 * preemption disabled. 440 */ 441 442 int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) 443 { 444 unsigned long reg = APIC_EILVTn(offset); 445 unsigned int new, old, reserved; 446 447 new = (mask << 16) | (msg_type << 8) | vector; 448 old = apic_read(reg); 449 reserved = reserve_eilvt_offset(offset, new); 450 451 if (reserved != new) { 452 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " 453 "vector 0x%x, but the register is already in use for " 454 "vector 0x%x on another cpu\n", 455 smp_processor_id(), reg, offset, new, reserved); 456 return -EINVAL; 457 } 458 459 if (!eilvt_entry_is_changeable(old, new)) { 460 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " 461 "vector 0x%x, but the register is already in use for " 462 "vector 0x%x on this cpu\n", 463 smp_processor_id(), reg, offset, new, old); 464 return -EBUSY; 465 } 466 467 apic_write(reg, new); 468 469 return 0; 470 } 471 EXPORT_SYMBOL_GPL(setup_APIC_eilvt); 472 473 /* 474 * Program the next event, relative to now 475 */ 476 static int lapic_next_event(unsigned long delta, 477 struct clock_event_device *evt) 478 { 479 apic_write(APIC_TMICT, delta); 480 return 0; 481 } 482 483 static int lapic_next_deadline(unsigned long delta, 484 struct clock_event_device *evt) 485 { 486 u64 tsc; 487 488 /* This MSR is special and need a special fence: */ 489 weak_wrmsr_fence(); 490 491 tsc = rdtsc(); 492 wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); 493 return 0; 494 } 495 496 static int lapic_timer_shutdown(struct clock_event_device *evt) 497 { 498 unsigned int v; 499 500 /* Lapic used as dummy for broadcast ? */ 501 if (evt->features & CLOCK_EVT_FEAT_DUMMY) 502 return 0; 503 504 v = apic_read(APIC_LVTT); 505 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 506 apic_write(APIC_LVTT, v); 507 apic_write(APIC_TMICT, 0); 508 return 0; 509 } 510 511 static inline int 512 lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot) 513 { 514 /* Lapic used as dummy for broadcast ? */ 515 if (evt->features & CLOCK_EVT_FEAT_DUMMY) 516 return 0; 517 518 __setup_APIC_LVTT(lapic_timer_period, oneshot, 1); 519 return 0; 520 } 521 522 static int lapic_timer_set_periodic(struct clock_event_device *evt) 523 { 524 return lapic_timer_set_periodic_oneshot(evt, false); 525 } 526 527 static int lapic_timer_set_oneshot(struct clock_event_device *evt) 528 { 529 return lapic_timer_set_periodic_oneshot(evt, true); 530 } 531 532 /* 533 * Local APIC timer broadcast function 534 */ 535 static void lapic_timer_broadcast(const struct cpumask *mask) 536 { 537 #ifdef CONFIG_SMP 538 apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 539 #endif 540 } 541 542 543 /* 544 * The local apic timer can be used for any function which is CPU local. 545 */ 546 static struct clock_event_device lapic_clockevent = { 547 .name = "lapic", 548 .features = CLOCK_EVT_FEAT_PERIODIC | 549 CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP 550 | CLOCK_EVT_FEAT_DUMMY, 551 .shift = 32, 552 .set_state_shutdown = lapic_timer_shutdown, 553 .set_state_periodic = lapic_timer_set_periodic, 554 .set_state_oneshot = lapic_timer_set_oneshot, 555 .set_state_oneshot_stopped = lapic_timer_shutdown, 556 .set_next_event = lapic_next_event, 557 .broadcast = lapic_timer_broadcast, 558 .rating = 100, 559 .irq = -1, 560 }; 561 static DEFINE_PER_CPU(struct clock_event_device, lapic_events); 562 563 static const struct x86_cpu_id deadline_match[] __initconst = { 564 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */ 565 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */ 566 567 X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020), 568 569 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011), 570 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e), 571 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c), 572 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003), 573 574 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136), 575 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014), 576 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0), 577 578 X86_MATCH_INTEL_FAM6_MODEL( HASWELL, 0x22), 579 X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L, 0x20), 580 X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G, 0x17), 581 582 X86_MATCH_INTEL_FAM6_MODEL( BROADWELL, 0x25), 583 X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G, 0x17), 584 585 X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L, 0xb2), 586 X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE, 0xb2), 587 588 X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L, 0x52), 589 X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE, 0x52), 590 591 {}, 592 }; 593 594 static __init bool apic_validate_deadline_timer(void) 595 { 596 const struct x86_cpu_id *m; 597 u32 rev; 598 599 if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 600 return false; 601 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 602 return true; 603 604 m = x86_match_cpu(deadline_match); 605 if (!m) 606 return true; 607 608 rev = (u32)m->driver_data; 609 610 if (boot_cpu_data.microcode >= rev) 611 return true; 612 613 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 614 pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; " 615 "please update microcode to version: 0x%x (or later)\n", rev); 616 return false; 617 } 618 619 /* 620 * Setup the local APIC timer for this CPU. Copy the initialized values 621 * of the boot CPU and register the clock event in the framework. 622 */ 623 static void setup_APIC_timer(void) 624 { 625 struct clock_event_device *levt = this_cpu_ptr(&lapic_events); 626 627 if (this_cpu_has(X86_FEATURE_ARAT)) { 628 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; 629 /* Make LAPIC timer preferable over percpu HPET */ 630 lapic_clockevent.rating = 150; 631 } 632 633 memcpy(levt, &lapic_clockevent, sizeof(*levt)); 634 levt->cpumask = cpumask_of(smp_processor_id()); 635 636 if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { 637 levt->name = "lapic-deadline"; 638 levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC | 639 CLOCK_EVT_FEAT_DUMMY); 640 levt->set_next_event = lapic_next_deadline; 641 clockevents_config_and_register(levt, 642 tsc_khz * (1000 / TSC_DIVISOR), 643 0xF, ~0UL); 644 } else 645 clockevents_register_device(levt); 646 } 647 648 /* 649 * Install the updated TSC frequency from recalibration at the TSC 650 * deadline clockevent devices. 651 */ 652 static void __lapic_update_tsc_freq(void *info) 653 { 654 struct clock_event_device *levt = this_cpu_ptr(&lapic_events); 655 656 if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 657 return; 658 659 clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR)); 660 } 661 662 void lapic_update_tsc_freq(void) 663 { 664 /* 665 * The clockevent device's ->mult and ->shift can both be 666 * changed. In order to avoid races, schedule the frequency 667 * update code on each CPU. 668 */ 669 on_each_cpu(__lapic_update_tsc_freq, NULL, 0); 670 } 671 672 /* 673 * In this functions we calibrate APIC bus clocks to the external timer. 674 * 675 * We want to do the calibration only once since we want to have local timer 676 * irqs synchronous. CPUs connected by the same APIC bus have the very same bus 677 * frequency. 678 * 679 * This was previously done by reading the PIT/HPET and waiting for a wrap 680 * around to find out, that a tick has elapsed. I have a box, where the PIT 681 * readout is broken, so it never gets out of the wait loop again. This was 682 * also reported by others. 683 * 684 * Monitoring the jiffies value is inaccurate and the clockevents 685 * infrastructure allows us to do a simple substitution of the interrupt 686 * handler. 687 * 688 * The calibration routine also uses the pm_timer when possible, as the PIT 689 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes 690 * back to normal later in the boot process). 691 */ 692 693 #define LAPIC_CAL_LOOPS (HZ/10) 694 695 static __initdata int lapic_cal_loops = -1; 696 static __initdata long lapic_cal_t1, lapic_cal_t2; 697 static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; 698 static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; 699 static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; 700 701 /* 702 * Temporary interrupt handler and polled calibration function. 703 */ 704 static void __init lapic_cal_handler(struct clock_event_device *dev) 705 { 706 unsigned long long tsc = 0; 707 long tapic = apic_read(APIC_TMCCT); 708 unsigned long pm = acpi_pm_read_early(); 709 710 if (boot_cpu_has(X86_FEATURE_TSC)) 711 tsc = rdtsc(); 712 713 switch (lapic_cal_loops++) { 714 case 0: 715 lapic_cal_t1 = tapic; 716 lapic_cal_tsc1 = tsc; 717 lapic_cal_pm1 = pm; 718 lapic_cal_j1 = jiffies; 719 break; 720 721 case LAPIC_CAL_LOOPS: 722 lapic_cal_t2 = tapic; 723 lapic_cal_tsc2 = tsc; 724 if (pm < lapic_cal_pm1) 725 pm += ACPI_PM_OVRRUN; 726 lapic_cal_pm2 = pm; 727 lapic_cal_j2 = jiffies; 728 break; 729 } 730 } 731 732 static int __init 733 calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) 734 { 735 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; 736 const long pm_thresh = pm_100ms / 100; 737 unsigned long mult; 738 u64 res; 739 740 #ifndef CONFIG_X86_PM_TIMER 741 return -1; 742 #endif 743 744 apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); 745 746 /* Check, if the PM timer is available */ 747 if (!deltapm) 748 return -1; 749 750 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); 751 752 if (deltapm > (pm_100ms - pm_thresh) && 753 deltapm < (pm_100ms + pm_thresh)) { 754 apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); 755 return 0; 756 } 757 758 res = (((u64)deltapm) * mult) >> 22; 759 do_div(res, 1000000); 760 pr_warn("APIC calibration not consistent " 761 "with PM-Timer: %ldms instead of 100ms\n", (long)res); 762 763 /* Correct the lapic counter value */ 764 res = (((u64)(*delta)) * pm_100ms); 765 do_div(res, deltapm); 766 pr_info("APIC delta adjusted to PM-Timer: " 767 "%lu (%ld)\n", (unsigned long)res, *delta); 768 *delta = (long)res; 769 770 /* Correct the tsc counter value */ 771 if (boot_cpu_has(X86_FEATURE_TSC)) { 772 res = (((u64)(*deltatsc)) * pm_100ms); 773 do_div(res, deltapm); 774 apic_printk(APIC_VERBOSE, "TSC delta adjusted to " 775 "PM-Timer: %lu (%ld)\n", 776 (unsigned long)res, *deltatsc); 777 *deltatsc = (long)res; 778 } 779 780 return 0; 781 } 782 783 static int __init lapic_init_clockevent(void) 784 { 785 if (!lapic_timer_period) 786 return -1; 787 788 /* Calculate the scaled math multiplication factor */ 789 lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR, 790 TICK_NSEC, lapic_clockevent.shift); 791 lapic_clockevent.max_delta_ns = 792 clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); 793 lapic_clockevent.max_delta_ticks = 0x7FFFFFFF; 794 lapic_clockevent.min_delta_ns = 795 clockevent_delta2ns(0xF, &lapic_clockevent); 796 lapic_clockevent.min_delta_ticks = 0xF; 797 798 return 0; 799 } 800 801 bool __init apic_needs_pit(void) 802 { 803 /* 804 * If the frequencies are not known, PIT is required for both TSC 805 * and apic timer calibration. 806 */ 807 if (!tsc_khz || !cpu_khz) 808 return true; 809 810 /* Is there an APIC at all or is it disabled? */ 811 if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) 812 return true; 813 814 /* 815 * If interrupt delivery mode is legacy PIC or virtual wire without 816 * configuration, the local APIC timer wont be set up. Make sure 817 * that the PIT is initialized. 818 */ 819 if (apic_intr_mode == APIC_PIC || 820 apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) 821 return true; 822 823 /* Virt guests may lack ARAT, but still have DEADLINE */ 824 if (!boot_cpu_has(X86_FEATURE_ARAT)) 825 return true; 826 827 /* Deadline timer is based on TSC so no further PIT action required */ 828 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 829 return false; 830 831 /* APIC timer disabled? */ 832 if (disable_apic_timer) 833 return true; 834 /* 835 * The APIC timer frequency is known already, no PIT calibration 836 * required. If unknown, let the PIT be initialized. 837 */ 838 return lapic_timer_period == 0; 839 } 840 841 static int __init calibrate_APIC_clock(void) 842 { 843 struct clock_event_device *levt = this_cpu_ptr(&lapic_events); 844 u64 tsc_perj = 0, tsc_start = 0; 845 unsigned long jif_start; 846 unsigned long deltaj; 847 long delta, deltatsc; 848 int pm_referenced = 0; 849 850 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 851 return 0; 852 853 /* 854 * Check if lapic timer has already been calibrated by platform 855 * specific routine, such as tsc calibration code. If so just fill 856 * in the clockevent structure and return. 857 */ 858 if (!lapic_init_clockevent()) { 859 apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", 860 lapic_timer_period); 861 /* 862 * Direct calibration methods must have an always running 863 * local APIC timer, no need for broadcast timer. 864 */ 865 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; 866 return 0; 867 } 868 869 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" 870 "calibrating APIC timer ...\n"); 871 872 /* 873 * There are platforms w/o global clockevent devices. Instead of 874 * making the calibration conditional on that, use a polling based 875 * approach everywhere. 876 */ 877 local_irq_disable(); 878 879 /* 880 * Setup the APIC counter to maximum. There is no way the lapic 881 * can underflow in the 100ms detection time frame 882 */ 883 __setup_APIC_LVTT(0xffffffff, 0, 0); 884 885 /* 886 * Methods to terminate the calibration loop: 887 * 1) Global clockevent if available (jiffies) 888 * 2) TSC if available and frequency is known 889 */ 890 jif_start = READ_ONCE(jiffies); 891 892 if (tsc_khz) { 893 tsc_start = rdtsc(); 894 tsc_perj = div_u64((u64)tsc_khz * 1000, HZ); 895 } 896 897 /* 898 * Enable interrupts so the tick can fire, if a global 899 * clockevent device is available 900 */ 901 local_irq_enable(); 902 903 while (lapic_cal_loops <= LAPIC_CAL_LOOPS) { 904 /* Wait for a tick to elapse */ 905 while (1) { 906 if (tsc_khz) { 907 u64 tsc_now = rdtsc(); 908 if ((tsc_now - tsc_start) >= tsc_perj) { 909 tsc_start += tsc_perj; 910 break; 911 } 912 } else { 913 unsigned long jif_now = READ_ONCE(jiffies); 914 915 if (time_after(jif_now, jif_start)) { 916 jif_start = jif_now; 917 break; 918 } 919 } 920 cpu_relax(); 921 } 922 923 /* Invoke the calibration routine */ 924 local_irq_disable(); 925 lapic_cal_handler(NULL); 926 local_irq_enable(); 927 } 928 929 local_irq_disable(); 930 931 /* Build delta t1-t2 as apic timer counts down */ 932 delta = lapic_cal_t1 - lapic_cal_t2; 933 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); 934 935 deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); 936 937 /* we trust the PM based calibration if possible */ 938 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, 939 &delta, &deltatsc); 940 941 lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; 942 lapic_init_clockevent(); 943 944 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); 945 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); 946 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", 947 lapic_timer_period); 948 949 if (boot_cpu_has(X86_FEATURE_TSC)) { 950 apic_printk(APIC_VERBOSE, "..... CPU clock speed is " 951 "%ld.%04ld MHz.\n", 952 (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), 953 (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); 954 } 955 956 apic_printk(APIC_VERBOSE, "..... host bus clock speed is " 957 "%u.%04u MHz.\n", 958 lapic_timer_period / (1000000 / HZ), 959 lapic_timer_period % (1000000 / HZ)); 960 961 /* 962 * Do a sanity check on the APIC calibration result 963 */ 964 if (lapic_timer_period < (1000000 / HZ)) { 965 local_irq_enable(); 966 pr_warn("APIC frequency too slow, disabling apic timer\n"); 967 return -1; 968 } 969 970 levt->features &= ~CLOCK_EVT_FEAT_DUMMY; 971 972 /* 973 * PM timer calibration failed or not turned on so lets try APIC 974 * timer based calibration, if a global clockevent device is 975 * available. 976 */ 977 if (!pm_referenced && global_clock_event) { 978 apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); 979 980 /* 981 * Setup the apic timer manually 982 */ 983 levt->event_handler = lapic_cal_handler; 984 lapic_timer_set_periodic(levt); 985 lapic_cal_loops = -1; 986 987 /* Let the interrupts run */ 988 local_irq_enable(); 989 990 while (lapic_cal_loops <= LAPIC_CAL_LOOPS) 991 cpu_relax(); 992 993 /* Stop the lapic timer */ 994 local_irq_disable(); 995 lapic_timer_shutdown(levt); 996 997 /* Jiffies delta */ 998 deltaj = lapic_cal_j2 - lapic_cal_j1; 999 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); 1000 1001 /* Check, if the jiffies result is consistent */ 1002 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) 1003 apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); 1004 else 1005 levt->features |= CLOCK_EVT_FEAT_DUMMY; 1006 } 1007 local_irq_enable(); 1008 1009 if (levt->features & CLOCK_EVT_FEAT_DUMMY) { 1010 pr_warn("APIC timer disabled due to verification failure\n"); 1011 return -1; 1012 } 1013 1014 return 0; 1015 } 1016 1017 /* 1018 * Setup the boot APIC 1019 * 1020 * Calibrate and verify the result. 1021 */ 1022 void __init setup_boot_APIC_clock(void) 1023 { 1024 /* 1025 * The local apic timer can be disabled via the kernel 1026 * commandline or from the CPU detection code. Register the lapic 1027 * timer as a dummy clock event source on SMP systems, so the 1028 * broadcast mechanism is used. On UP systems simply ignore it. 1029 */ 1030 if (disable_apic_timer) { 1031 pr_info("Disabling APIC timer\n"); 1032 /* No broadcast on UP ! */ 1033 if (num_possible_cpus() > 1) { 1034 lapic_clockevent.mult = 1; 1035 setup_APIC_timer(); 1036 } 1037 return; 1038 } 1039 1040 if (calibrate_APIC_clock()) { 1041 /* No broadcast on UP ! */ 1042 if (num_possible_cpus() > 1) 1043 setup_APIC_timer(); 1044 return; 1045 } 1046 1047 /* 1048 * If nmi_watchdog is set to IO_APIC, we need the 1049 * PIT/HPET going. Otherwise register lapic as a dummy 1050 * device. 1051 */ 1052 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; 1053 1054 /* Setup the lapic or request the broadcast */ 1055 setup_APIC_timer(); 1056 amd_e400_c1e_apic_setup(); 1057 } 1058 1059 void setup_secondary_APIC_clock(void) 1060 { 1061 setup_APIC_timer(); 1062 amd_e400_c1e_apic_setup(); 1063 } 1064 1065 /* 1066 * The guts of the apic timer interrupt 1067 */ 1068 static void local_apic_timer_interrupt(void) 1069 { 1070 struct clock_event_device *evt = this_cpu_ptr(&lapic_events); 1071 1072 /* 1073 * Normally we should not be here till LAPIC has been initialized but 1074 * in some cases like kdump, its possible that there is a pending LAPIC 1075 * timer interrupt from previous kernel's context and is delivered in 1076 * new kernel the moment interrupts are enabled. 1077 * 1078 * Interrupts are enabled early and LAPIC is setup much later, hence 1079 * its possible that when we get here evt->event_handler is NULL. 1080 * Check for event_handler being NULL and discard the interrupt as 1081 * spurious. 1082 */ 1083 if (!evt->event_handler) { 1084 pr_warn("Spurious LAPIC timer interrupt on cpu %d\n", 1085 smp_processor_id()); 1086 /* Switch it off */ 1087 lapic_timer_shutdown(evt); 1088 return; 1089 } 1090 1091 /* 1092 * the NMI deadlock-detector uses this. 1093 */ 1094 inc_irq_stat(apic_timer_irqs); 1095 1096 evt->event_handler(evt); 1097 } 1098 1099 /* 1100 * Local APIC timer interrupt. This is the most natural way for doing 1101 * local interrupts, but local timer interrupts can be emulated by 1102 * broadcast interrupts too. [in case the hw doesn't support APIC timers] 1103 * 1104 * [ if a single-CPU system runs an SMP kernel then we call the local 1105 * interrupt as well. Thus we cannot inline the local irq ... ] 1106 */ 1107 DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt) 1108 { 1109 struct pt_regs *old_regs = set_irq_regs(regs); 1110 1111 ack_APIC_irq(); 1112 trace_local_timer_entry(LOCAL_TIMER_VECTOR); 1113 local_apic_timer_interrupt(); 1114 trace_local_timer_exit(LOCAL_TIMER_VECTOR); 1115 1116 set_irq_regs(old_regs); 1117 } 1118 1119 /* 1120 * Local APIC start and shutdown 1121 */ 1122 1123 /** 1124 * clear_local_APIC - shutdown the local APIC 1125 * 1126 * This is called, when a CPU is disabled and before rebooting, so the state of 1127 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS 1128 * leftovers during boot. 1129 */ 1130 void clear_local_APIC(void) 1131 { 1132 int maxlvt; 1133 u32 v; 1134 1135 /* APIC hasn't been mapped yet */ 1136 if (!x2apic_mode && !apic_phys) 1137 return; 1138 1139 maxlvt = lapic_get_maxlvt(); 1140 /* 1141 * Masking an LVT entry can trigger a local APIC error 1142 * if the vector is zero. Mask LVTERR first to prevent this. 1143 */ 1144 if (maxlvt >= 3) { 1145 v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ 1146 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); 1147 } 1148 /* 1149 * Careful: we have to set masks only first to deassert 1150 * any level-triggered sources. 1151 */ 1152 v = apic_read(APIC_LVTT); 1153 apic_write(APIC_LVTT, v | APIC_LVT_MASKED); 1154 v = apic_read(APIC_LVT0); 1155 apic_write(APIC_LVT0, v | APIC_LVT_MASKED); 1156 v = apic_read(APIC_LVT1); 1157 apic_write(APIC_LVT1, v | APIC_LVT_MASKED); 1158 if (maxlvt >= 4) { 1159 v = apic_read(APIC_LVTPC); 1160 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); 1161 } 1162 1163 /* lets not touch this if we didn't frob it */ 1164 #ifdef CONFIG_X86_THERMAL_VECTOR 1165 if (maxlvt >= 5) { 1166 v = apic_read(APIC_LVTTHMR); 1167 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); 1168 } 1169 #endif 1170 #ifdef CONFIG_X86_MCE_INTEL 1171 if (maxlvt >= 6) { 1172 v = apic_read(APIC_LVTCMCI); 1173 if (!(v & APIC_LVT_MASKED)) 1174 apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED); 1175 } 1176 #endif 1177 1178 /* 1179 * Clean APIC state for other OSs: 1180 */ 1181 apic_write(APIC_LVTT, APIC_LVT_MASKED); 1182 apic_write(APIC_LVT0, APIC_LVT_MASKED); 1183 apic_write(APIC_LVT1, APIC_LVT_MASKED); 1184 if (maxlvt >= 3) 1185 apic_write(APIC_LVTERR, APIC_LVT_MASKED); 1186 if (maxlvt >= 4) 1187 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 1188 1189 /* Integrated APIC (!82489DX) ? */ 1190 if (lapic_is_integrated()) { 1191 if (maxlvt > 3) 1192 /* Clear ESR due to Pentium errata 3AP and 11AP */ 1193 apic_write(APIC_ESR, 0); 1194 apic_read(APIC_ESR); 1195 } 1196 } 1197 1198 /** 1199 * apic_soft_disable - Clears and software disables the local APIC on hotplug 1200 * 1201 * Contrary to disable_local_APIC() this does not touch the enable bit in 1202 * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC 1203 * bus would require a hardware reset as the APIC would lose track of bus 1204 * arbitration. On systems with FSB delivery APICBASE could be disabled, 1205 * but it has to be guaranteed that no interrupt is sent to the APIC while 1206 * in that state and it's not clear from the SDM whether it still responds 1207 * to INIT/SIPI messages. Stay on the safe side and use software disable. 1208 */ 1209 void apic_soft_disable(void) 1210 { 1211 u32 value; 1212 1213 clear_local_APIC(); 1214 1215 /* Soft disable APIC (implies clearing of registers for 82489DX!). */ 1216 value = apic_read(APIC_SPIV); 1217 value &= ~APIC_SPIV_APIC_ENABLED; 1218 apic_write(APIC_SPIV, value); 1219 } 1220 1221 /** 1222 * disable_local_APIC - clear and disable the local APIC 1223 */ 1224 void disable_local_APIC(void) 1225 { 1226 /* APIC hasn't been mapped yet */ 1227 if (!x2apic_mode && !apic_phys) 1228 return; 1229 1230 apic_soft_disable(); 1231 1232 #ifdef CONFIG_X86_32 1233 /* 1234 * When LAPIC was disabled by the BIOS and enabled by the kernel, 1235 * restore the disabled state. 1236 */ 1237 if (enabled_via_apicbase) { 1238 unsigned int l, h; 1239 1240 rdmsr(MSR_IA32_APICBASE, l, h); 1241 l &= ~MSR_IA32_APICBASE_ENABLE; 1242 wrmsr(MSR_IA32_APICBASE, l, h); 1243 } 1244 #endif 1245 } 1246 1247 /* 1248 * If Linux enabled the LAPIC against the BIOS default disable it down before 1249 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and 1250 * not power-off. Additionally clear all LVT entries before disable_local_APIC 1251 * for the case where Linux didn't enable the LAPIC. 1252 */ 1253 void lapic_shutdown(void) 1254 { 1255 unsigned long flags; 1256 1257 if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) 1258 return; 1259 1260 local_irq_save(flags); 1261 1262 #ifdef CONFIG_X86_32 1263 if (!enabled_via_apicbase) 1264 clear_local_APIC(); 1265 else 1266 #endif 1267 disable_local_APIC(); 1268 1269 1270 local_irq_restore(flags); 1271 } 1272 1273 /** 1274 * sync_Arb_IDs - synchronize APIC bus arbitration IDs 1275 */ 1276 void __init sync_Arb_IDs(void) 1277 { 1278 /* 1279 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not 1280 * needed on AMD. 1281 */ 1282 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 1283 return; 1284 1285 /* 1286 * Wait for idle. 1287 */ 1288 apic_wait_icr_idle(); 1289 1290 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); 1291 apic_write(APIC_ICR, APIC_DEST_ALLINC | 1292 APIC_INT_LEVELTRIG | APIC_DM_INIT); 1293 } 1294 1295 enum apic_intr_mode_id apic_intr_mode __ro_after_init; 1296 1297 static int __init __apic_intr_mode_select(void) 1298 { 1299 /* Check kernel option */ 1300 if (disable_apic) { 1301 pr_info("APIC disabled via kernel command line\n"); 1302 return APIC_PIC; 1303 } 1304 1305 /* Check BIOS */ 1306 #ifdef CONFIG_X86_64 1307 /* On 64-bit, the APIC must be integrated, Check local APIC only */ 1308 if (!boot_cpu_has(X86_FEATURE_APIC)) { 1309 disable_apic = 1; 1310 pr_info("APIC disabled by BIOS\n"); 1311 return APIC_PIC; 1312 } 1313 #else 1314 /* On 32-bit, the APIC may be integrated APIC or 82489DX */ 1315 1316 /* Neither 82489DX nor integrated APIC ? */ 1317 if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) { 1318 disable_apic = 1; 1319 return APIC_PIC; 1320 } 1321 1322 /* If the BIOS pretends there is an integrated APIC ? */ 1323 if (!boot_cpu_has(X86_FEATURE_APIC) && 1324 APIC_INTEGRATED(boot_cpu_apic_version)) { 1325 disable_apic = 1; 1326 pr_err(FW_BUG "Local APIC %d not detected, force emulation\n", 1327 boot_cpu_physical_apicid); 1328 return APIC_PIC; 1329 } 1330 #endif 1331 1332 /* Check MP table or ACPI MADT configuration */ 1333 if (!smp_found_config) { 1334 disable_ioapic_support(); 1335 if (!acpi_lapic) { 1336 pr_info("APIC: ACPI MADT or MP tables are not detected\n"); 1337 return APIC_VIRTUAL_WIRE_NO_CONFIG; 1338 } 1339 return APIC_VIRTUAL_WIRE; 1340 } 1341 1342 #ifdef CONFIG_SMP 1343 /* If SMP should be disabled, then really disable it! */ 1344 if (!setup_max_cpus) { 1345 pr_info("APIC: SMP mode deactivated\n"); 1346 return APIC_SYMMETRIC_IO_NO_ROUTING; 1347 } 1348 1349 if (read_apic_id() != boot_cpu_physical_apicid) { 1350 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1351 read_apic_id(), boot_cpu_physical_apicid); 1352 /* Or can we switch back to PIC here? */ 1353 } 1354 #endif 1355 1356 return APIC_SYMMETRIC_IO; 1357 } 1358 1359 /* Select the interrupt delivery mode for the BSP */ 1360 void __init apic_intr_mode_select(void) 1361 { 1362 apic_intr_mode = __apic_intr_mode_select(); 1363 } 1364 1365 /* 1366 * An initial setup of the virtual wire mode. 1367 */ 1368 void __init init_bsp_APIC(void) 1369 { 1370 unsigned int value; 1371 1372 /* 1373 * Don't do the setup now if we have a SMP BIOS as the 1374 * through-I/O-APIC virtual wire mode might be active. 1375 */ 1376 if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) 1377 return; 1378 1379 /* 1380 * Do not trust the local APIC being empty at bootup. 1381 */ 1382 clear_local_APIC(); 1383 1384 /* 1385 * Enable APIC. 1386 */ 1387 value = apic_read(APIC_SPIV); 1388 value &= ~APIC_VECTOR_MASK; 1389 value |= APIC_SPIV_APIC_ENABLED; 1390 1391 #ifdef CONFIG_X86_32 1392 /* This bit is reserved on P4/Xeon and should be cleared */ 1393 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 1394 (boot_cpu_data.x86 == 15)) 1395 value &= ~APIC_SPIV_FOCUS_DISABLED; 1396 else 1397 #endif 1398 value |= APIC_SPIV_FOCUS_DISABLED; 1399 value |= SPURIOUS_APIC_VECTOR; 1400 apic_write(APIC_SPIV, value); 1401 1402 /* 1403 * Set up the virtual wire mode. 1404 */ 1405 apic_write(APIC_LVT0, APIC_DM_EXTINT); 1406 value = APIC_DM_NMI; 1407 if (!lapic_is_integrated()) /* 82489DX */ 1408 value |= APIC_LVT_LEVEL_TRIGGER; 1409 if (apic_extnmi == APIC_EXTNMI_NONE) 1410 value |= APIC_LVT_MASKED; 1411 apic_write(APIC_LVT1, value); 1412 } 1413 1414 static void __init apic_bsp_setup(bool upmode); 1415 1416 /* Init the interrupt delivery mode for the BSP */ 1417 void __init apic_intr_mode_init(void) 1418 { 1419 bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); 1420 1421 switch (apic_intr_mode) { 1422 case APIC_PIC: 1423 pr_info("APIC: Keep in PIC mode(8259)\n"); 1424 return; 1425 case APIC_VIRTUAL_WIRE: 1426 pr_info("APIC: Switch to virtual wire mode setup\n"); 1427 break; 1428 case APIC_VIRTUAL_WIRE_NO_CONFIG: 1429 pr_info("APIC: Switch to virtual wire mode setup with no configuration\n"); 1430 upmode = true; 1431 break; 1432 case APIC_SYMMETRIC_IO: 1433 pr_info("APIC: Switch to symmetric I/O mode setup\n"); 1434 break; 1435 case APIC_SYMMETRIC_IO_NO_ROUTING: 1436 pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n"); 1437 break; 1438 } 1439 1440 default_setup_apic_routing(); 1441 1442 if (x86_platform.apic_post_init) 1443 x86_platform.apic_post_init(); 1444 1445 apic_bsp_setup(upmode); 1446 } 1447 1448 static void lapic_setup_esr(void) 1449 { 1450 unsigned int oldvalue, value, maxlvt; 1451 1452 if (!lapic_is_integrated()) { 1453 pr_info("No ESR for 82489DX.\n"); 1454 return; 1455 } 1456 1457 if (apic->disable_esr) { 1458 /* 1459 * Something untraceable is creating bad interrupts on 1460 * secondary quads ... for the moment, just leave the 1461 * ESR disabled - we can't do anything useful with the 1462 * errors anyway - mbligh 1463 */ 1464 pr_info("Leaving ESR disabled.\n"); 1465 return; 1466 } 1467 1468 maxlvt = lapic_get_maxlvt(); 1469 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 1470 apic_write(APIC_ESR, 0); 1471 oldvalue = apic_read(APIC_ESR); 1472 1473 /* enables sending errors */ 1474 value = ERROR_APIC_VECTOR; 1475 apic_write(APIC_LVTERR, value); 1476 1477 /* 1478 * spec says clear errors after enabling vector. 1479 */ 1480 if (maxlvt > 3) 1481 apic_write(APIC_ESR, 0); 1482 value = apic_read(APIC_ESR); 1483 if (value != oldvalue) 1484 apic_printk(APIC_VERBOSE, "ESR value before enabling " 1485 "vector: 0x%08x after: 0x%08x\n", 1486 oldvalue, value); 1487 } 1488 1489 #define APIC_IR_REGS APIC_ISR_NR 1490 #define APIC_IR_BITS (APIC_IR_REGS * 32) 1491 #define APIC_IR_MAPSIZE (APIC_IR_BITS / BITS_PER_LONG) 1492 1493 union apic_ir { 1494 unsigned long map[APIC_IR_MAPSIZE]; 1495 u32 regs[APIC_IR_REGS]; 1496 }; 1497 1498 static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr) 1499 { 1500 int i, bit; 1501 1502 /* Read the IRRs */ 1503 for (i = 0; i < APIC_IR_REGS; i++) 1504 irr->regs[i] = apic_read(APIC_IRR + i * 0x10); 1505 1506 /* Read the ISRs */ 1507 for (i = 0; i < APIC_IR_REGS; i++) 1508 isr->regs[i] = apic_read(APIC_ISR + i * 0x10); 1509 1510 /* 1511 * If the ISR map is not empty. ACK the APIC and run another round 1512 * to verify whether a pending IRR has been unblocked and turned 1513 * into a ISR. 1514 */ 1515 if (!bitmap_empty(isr->map, APIC_IR_BITS)) { 1516 /* 1517 * There can be multiple ISR bits set when a high priority 1518 * interrupt preempted a lower priority one. Issue an ACK 1519 * per set bit. 1520 */ 1521 for_each_set_bit(bit, isr->map, APIC_IR_BITS) 1522 ack_APIC_irq(); 1523 return true; 1524 } 1525 1526 return !bitmap_empty(irr->map, APIC_IR_BITS); 1527 } 1528 1529 /* 1530 * After a crash, we no longer service the interrupts and a pending 1531 * interrupt from previous kernel might still have ISR bit set. 1532 * 1533 * Most probably by now the CPU has serviced that pending interrupt and it 1534 * might not have done the ack_APIC_irq() because it thought, interrupt 1535 * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear 1536 * the ISR bit and cpu thinks it has already serviced the interrupt. Hence 1537 * a vector might get locked. It was noticed for timer irq (vector 1538 * 0x31). Issue an extra EOI to clear ISR. 1539 * 1540 * If there are pending IRR bits they turn into ISR bits after a higher 1541 * priority ISR bit has been acked. 1542 */ 1543 static void apic_pending_intr_clear(void) 1544 { 1545 union apic_ir irr, isr; 1546 unsigned int i; 1547 1548 /* 512 loops are way oversized and give the APIC a chance to obey. */ 1549 for (i = 0; i < 512; i++) { 1550 if (!apic_check_and_ack(&irr, &isr)) 1551 return; 1552 } 1553 /* Dump the IRR/ISR content if that failed */ 1554 pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map); 1555 } 1556 1557 /** 1558 * setup_local_APIC - setup the local APIC 1559 * 1560 * Used to setup local APIC while initializing BSP or bringing up APs. 1561 * Always called with preemption disabled. 1562 */ 1563 static void setup_local_APIC(void) 1564 { 1565 int cpu = smp_processor_id(); 1566 unsigned int value; 1567 1568 if (disable_apic) { 1569 disable_ioapic_support(); 1570 return; 1571 } 1572 1573 /* 1574 * If this comes from kexec/kcrash the APIC might be enabled in 1575 * SPIV. Soft disable it before doing further initialization. 1576 */ 1577 value = apic_read(APIC_SPIV); 1578 value &= ~APIC_SPIV_APIC_ENABLED; 1579 apic_write(APIC_SPIV, value); 1580 1581 #ifdef CONFIG_X86_32 1582 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1583 if (lapic_is_integrated() && apic->disable_esr) { 1584 apic_write(APIC_ESR, 0); 1585 apic_write(APIC_ESR, 0); 1586 apic_write(APIC_ESR, 0); 1587 apic_write(APIC_ESR, 0); 1588 } 1589 #endif 1590 /* 1591 * Double-check whether this APIC is really registered. 1592 * This is meaningless in clustered apic mode, so we skip it. 1593 */ 1594 BUG_ON(!apic->apic_id_registered()); 1595 1596 /* 1597 * Intel recommends to set DFR, LDR and TPR before enabling 1598 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 1599 * document number 292116). So here it goes... 1600 */ 1601 apic->init_apic_ldr(); 1602 1603 #ifdef CONFIG_X86_32 1604 if (apic->dest_mode_logical) { 1605 int logical_apicid, ldr_apicid; 1606 1607 /* 1608 * APIC LDR is initialized. If logical_apicid mapping was 1609 * initialized during get_smp_config(), make sure it matches 1610 * the actual value. 1611 */ 1612 logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); 1613 ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); 1614 if (logical_apicid != BAD_APICID) 1615 WARN_ON(logical_apicid != ldr_apicid); 1616 /* Always use the value from LDR. */ 1617 early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; 1618 } 1619 #endif 1620 1621 /* 1622 * Set Task Priority to 'accept all except vectors 0-31'. An APIC 1623 * vector in the 16-31 range could be delivered if TPR == 0, but we 1624 * would think it's an exception and terrible things will happen. We 1625 * never change this later on. 1626 */ 1627 value = apic_read(APIC_TASKPRI); 1628 value &= ~APIC_TPRI_MASK; 1629 value |= 0x10; 1630 apic_write(APIC_TASKPRI, value); 1631 1632 /* Clear eventually stale ISR/IRR bits */ 1633 apic_pending_intr_clear(); 1634 1635 /* 1636 * Now that we are all set up, enable the APIC 1637 */ 1638 value = apic_read(APIC_SPIV); 1639 value &= ~APIC_VECTOR_MASK; 1640 /* 1641 * Enable APIC 1642 */ 1643 value |= APIC_SPIV_APIC_ENABLED; 1644 1645 #ifdef CONFIG_X86_32 1646 /* 1647 * Some unknown Intel IO/APIC (or APIC) errata is biting us with 1648 * certain networking cards. If high frequency interrupts are 1649 * happening on a particular IOAPIC pin, plus the IOAPIC routing 1650 * entry is masked/unmasked at a high rate as well then sooner or 1651 * later IOAPIC line gets 'stuck', no more interrupts are received 1652 * from the device. If focus CPU is disabled then the hang goes 1653 * away, oh well :-( 1654 * 1655 * [ This bug can be reproduced easily with a level-triggered 1656 * PCI Ne2000 networking cards and PII/PIII processors, dual 1657 * BX chipset. ] 1658 */ 1659 /* 1660 * Actually disabling the focus CPU check just makes the hang less 1661 * frequent as it makes the interrupt distribution model be more 1662 * like LRU than MRU (the short-term load is more even across CPUs). 1663 */ 1664 1665 /* 1666 * - enable focus processor (bit==0) 1667 * - 64bit mode always use processor focus 1668 * so no need to set it 1669 */ 1670 value &= ~APIC_SPIV_FOCUS_DISABLED; 1671 #endif 1672 1673 /* 1674 * Set spurious IRQ vector 1675 */ 1676 value |= SPURIOUS_APIC_VECTOR; 1677 apic_write(APIC_SPIV, value); 1678 1679 perf_events_lapic_init(); 1680 1681 /* 1682 * Set up LVT0, LVT1: 1683 * 1684 * set up through-local-APIC on the boot CPU's LINT0. This is not 1685 * strictly necessary in pure symmetric-IO mode, but sometimes 1686 * we delegate interrupts to the 8259A. 1687 */ 1688 /* 1689 * TODO: set up through-local-APIC from through-I/O-APIC? --macro 1690 */ 1691 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; 1692 if (!cpu && (pic_mode || !value || skip_ioapic_setup)) { 1693 value = APIC_DM_EXTINT; 1694 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu); 1695 } else { 1696 value = APIC_DM_EXTINT | APIC_LVT_MASKED; 1697 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu); 1698 } 1699 apic_write(APIC_LVT0, value); 1700 1701 /* 1702 * Only the BSP sees the LINT1 NMI signal by default. This can be 1703 * modified by apic_extnmi= boot option. 1704 */ 1705 if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) || 1706 apic_extnmi == APIC_EXTNMI_ALL) 1707 value = APIC_DM_NMI; 1708 else 1709 value = APIC_DM_NMI | APIC_LVT_MASKED; 1710 1711 /* Is 82489DX ? */ 1712 if (!lapic_is_integrated()) 1713 value |= APIC_LVT_LEVEL_TRIGGER; 1714 apic_write(APIC_LVT1, value); 1715 1716 #ifdef CONFIG_X86_MCE_INTEL 1717 /* Recheck CMCI information after local APIC is up on CPU #0 */ 1718 if (!cpu) 1719 cmci_recheck(); 1720 #endif 1721 } 1722 1723 static void end_local_APIC_setup(void) 1724 { 1725 lapic_setup_esr(); 1726 1727 #ifdef CONFIG_X86_32 1728 { 1729 unsigned int value; 1730 /* Disable the local apic timer */ 1731 value = apic_read(APIC_LVTT); 1732 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 1733 apic_write(APIC_LVTT, value); 1734 } 1735 #endif 1736 1737 apic_pm_activate(); 1738 } 1739 1740 /* 1741 * APIC setup function for application processors. Called from smpboot.c 1742 */ 1743 void apic_ap_setup(void) 1744 { 1745 setup_local_APIC(); 1746 end_local_APIC_setup(); 1747 } 1748 1749 #ifdef CONFIG_X86_X2APIC 1750 int x2apic_mode; 1751 EXPORT_SYMBOL_GPL(x2apic_mode); 1752 1753 enum { 1754 X2APIC_OFF, 1755 X2APIC_DISABLED, 1756 /* All states below here have X2APIC enabled */ 1757 X2APIC_ON, 1758 X2APIC_ON_LOCKED 1759 }; 1760 static int x2apic_state; 1761 1762 static bool x2apic_hw_locked(void) 1763 { 1764 u64 ia32_cap; 1765 u64 msr; 1766 1767 ia32_cap = x86_read_arch_cap_msr(); 1768 if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) { 1769 rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr); 1770 return (msr & LEGACY_XAPIC_DISABLED); 1771 } 1772 return false; 1773 } 1774 1775 static void __x2apic_disable(void) 1776 { 1777 u64 msr; 1778 1779 if (!boot_cpu_has(X86_FEATURE_APIC)) 1780 return; 1781 1782 rdmsrl(MSR_IA32_APICBASE, msr); 1783 if (!(msr & X2APIC_ENABLE)) 1784 return; 1785 /* Disable xapic and x2apic first and then reenable xapic mode */ 1786 wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); 1787 wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); 1788 printk_once(KERN_INFO "x2apic disabled\n"); 1789 } 1790 1791 static void __x2apic_enable(void) 1792 { 1793 u64 msr; 1794 1795 rdmsrl(MSR_IA32_APICBASE, msr); 1796 if (msr & X2APIC_ENABLE) 1797 return; 1798 wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); 1799 printk_once(KERN_INFO "x2apic enabled\n"); 1800 } 1801 1802 static int __init setup_nox2apic(char *str) 1803 { 1804 if (x2apic_enabled()) { 1805 int apicid = native_apic_msr_read(APIC_ID); 1806 1807 if (apicid >= 255) { 1808 pr_warn("Apicid: %08x, cannot enforce nox2apic\n", 1809 apicid); 1810 return 0; 1811 } 1812 if (x2apic_hw_locked()) { 1813 pr_warn("APIC locked in x2apic mode, can't disable\n"); 1814 return 0; 1815 } 1816 pr_warn("x2apic already enabled.\n"); 1817 __x2apic_disable(); 1818 } 1819 setup_clear_cpu_cap(X86_FEATURE_X2APIC); 1820 x2apic_state = X2APIC_DISABLED; 1821 x2apic_mode = 0; 1822 return 0; 1823 } 1824 early_param("nox2apic", setup_nox2apic); 1825 1826 /* Called from cpu_init() to enable x2apic on (secondary) cpus */ 1827 void x2apic_setup(void) 1828 { 1829 /* 1830 * Try to make the AP's APIC state match that of the BSP, but if the 1831 * BSP is unlocked and the AP is locked then there is a state mismatch. 1832 * Warn about the mismatch in case a GP fault occurs due to a locked AP 1833 * trying to be turned off. 1834 */ 1835 if (x2apic_state != X2APIC_ON_LOCKED && x2apic_hw_locked()) 1836 pr_warn("x2apic lock mismatch between BSP and AP.\n"); 1837 /* 1838 * If x2apic is not in ON or LOCKED state, disable it if already enabled 1839 * from BIOS. 1840 */ 1841 if (x2apic_state < X2APIC_ON) { 1842 __x2apic_disable(); 1843 return; 1844 } 1845 __x2apic_enable(); 1846 } 1847 1848 static __init void x2apic_disable(void) 1849 { 1850 u32 x2apic_id, state = x2apic_state; 1851 1852 x2apic_mode = 0; 1853 x2apic_state = X2APIC_DISABLED; 1854 1855 if (state != X2APIC_ON) 1856 return; 1857 1858 x2apic_id = read_apic_id(); 1859 if (x2apic_id >= 255) 1860 panic("Cannot disable x2apic, id: %08x\n", x2apic_id); 1861 1862 if (x2apic_hw_locked()) { 1863 pr_warn("Cannot disable locked x2apic, id: %08x\n", x2apic_id); 1864 return; 1865 } 1866 1867 __x2apic_disable(); 1868 register_lapic_address(mp_lapic_addr); 1869 } 1870 1871 static __init void x2apic_enable(void) 1872 { 1873 if (x2apic_state != X2APIC_OFF) 1874 return; 1875 1876 x2apic_mode = 1; 1877 x2apic_state = X2APIC_ON; 1878 __x2apic_enable(); 1879 } 1880 1881 static __init void try_to_enable_x2apic(int remap_mode) 1882 { 1883 if (x2apic_state == X2APIC_DISABLED) 1884 return; 1885 1886 if (remap_mode != IRQ_REMAP_X2APIC_MODE) { 1887 u32 apic_limit = 255; 1888 1889 /* 1890 * Using X2APIC without IR is not architecturally supported 1891 * on bare metal but may be supported in guests. 1892 */ 1893 if (!x86_init.hyper.x2apic_available()) { 1894 pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); 1895 x2apic_disable(); 1896 return; 1897 } 1898 1899 /* 1900 * If the hypervisor supports extended destination ID in 1901 * MSI, that increases the maximum APIC ID that can be 1902 * used for non-remapped IRQ domains. 1903 */ 1904 if (x86_init.hyper.msi_ext_dest_id()) { 1905 virt_ext_dest_id = 1; 1906 apic_limit = 32767; 1907 } 1908 1909 /* 1910 * Without IR, all CPUs can be addressed by IOAPIC/MSI only 1911 * in physical mode, and CPUs with an APIC ID that cannot 1912 * be addressed must not be brought online. 1913 */ 1914 x2apic_set_max_apicid(apic_limit); 1915 x2apic_phys = 1; 1916 } 1917 x2apic_enable(); 1918 } 1919 1920 void __init check_x2apic(void) 1921 { 1922 if (x2apic_enabled()) { 1923 pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n"); 1924 x2apic_mode = 1; 1925 if (x2apic_hw_locked()) 1926 x2apic_state = X2APIC_ON_LOCKED; 1927 else 1928 x2apic_state = X2APIC_ON; 1929 } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) { 1930 x2apic_state = X2APIC_DISABLED; 1931 } 1932 } 1933 #else /* CONFIG_X86_X2APIC */ 1934 void __init check_x2apic(void) 1935 { 1936 if (!apic_is_x2apic_enabled()) 1937 return; 1938 /* 1939 * Checkme: Can we simply turn off x2APIC here instead of disabling the APIC? 1940 */ 1941 pr_err("Kernel does not support x2APIC, please recompile with CONFIG_X86_X2APIC.\n"); 1942 pr_err("Disabling APIC, expect reduced performance and functionality.\n"); 1943 1944 disable_apic = 1; 1945 setup_clear_cpu_cap(X86_FEATURE_APIC); 1946 } 1947 1948 static inline void try_to_enable_x2apic(int remap_mode) { } 1949 static inline void __x2apic_enable(void) { } 1950 #endif /* !CONFIG_X86_X2APIC */ 1951 1952 void __init enable_IR_x2apic(void) 1953 { 1954 unsigned long flags; 1955 int ret, ir_stat; 1956 1957 if (skip_ioapic_setup) { 1958 pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); 1959 return; 1960 } 1961 1962 ir_stat = irq_remapping_prepare(); 1963 if (ir_stat < 0 && !x2apic_supported()) 1964 return; 1965 1966 ret = save_ioapic_entries(); 1967 if (ret) { 1968 pr_info("Saving IO-APIC state failed: %d\n", ret); 1969 return; 1970 } 1971 1972 local_irq_save(flags); 1973 legacy_pic->mask_all(); 1974 mask_ioapic_entries(); 1975 1976 /* If irq_remapping_prepare() succeeded, try to enable it */ 1977 if (ir_stat >= 0) 1978 ir_stat = irq_remapping_enable(); 1979 /* ir_stat contains the remap mode or an error code */ 1980 try_to_enable_x2apic(ir_stat); 1981 1982 if (ir_stat < 0) 1983 restore_ioapic_entries(); 1984 legacy_pic->restore_mask(); 1985 local_irq_restore(flags); 1986 } 1987 1988 #ifdef CONFIG_X86_64 1989 /* 1990 * Detect and enable local APICs on non-SMP boards. 1991 * Original code written by Keir Fraser. 1992 * On AMD64 we trust the BIOS - if it says no APIC it is likely 1993 * not correctly set up (usually the APIC timer won't work etc.) 1994 */ 1995 static int __init detect_init_APIC(void) 1996 { 1997 if (!boot_cpu_has(X86_FEATURE_APIC)) { 1998 pr_info("No local APIC present\n"); 1999 return -1; 2000 } 2001 2002 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; 2003 return 0; 2004 } 2005 #else 2006 2007 static int __init apic_verify(void) 2008 { 2009 u32 features, h, l; 2010 2011 /* 2012 * The APIC feature bit should now be enabled 2013 * in `cpuid' 2014 */ 2015 features = cpuid_edx(1); 2016 if (!(features & (1 << X86_FEATURE_APIC))) { 2017 pr_warn("Could not enable APIC!\n"); 2018 return -1; 2019 } 2020 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 2021 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; 2022 2023 /* The BIOS may have set up the APIC at some other address */ 2024 if (boot_cpu_data.x86 >= 6) { 2025 rdmsr(MSR_IA32_APICBASE, l, h); 2026 if (l & MSR_IA32_APICBASE_ENABLE) 2027 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; 2028 } 2029 2030 pr_info("Found and enabled local APIC!\n"); 2031 return 0; 2032 } 2033 2034 int __init apic_force_enable(unsigned long addr) 2035 { 2036 u32 h, l; 2037 2038 if (disable_apic) 2039 return -1; 2040 2041 /* 2042 * Some BIOSes disable the local APIC in the APIC_BASE 2043 * MSR. This can only be done in software for Intel P6 or later 2044 * and AMD K7 (Model > 1) or later. 2045 */ 2046 if (boot_cpu_data.x86 >= 6) { 2047 rdmsr(MSR_IA32_APICBASE, l, h); 2048 if (!(l & MSR_IA32_APICBASE_ENABLE)) { 2049 pr_info("Local APIC disabled by BIOS -- reenabling.\n"); 2050 l &= ~MSR_IA32_APICBASE_BASE; 2051 l |= MSR_IA32_APICBASE_ENABLE | addr; 2052 wrmsr(MSR_IA32_APICBASE, l, h); 2053 enabled_via_apicbase = 1; 2054 } 2055 } 2056 return apic_verify(); 2057 } 2058 2059 /* 2060 * Detect and initialize APIC 2061 */ 2062 static int __init detect_init_APIC(void) 2063 { 2064 /* Disabled by kernel option? */ 2065 if (disable_apic) 2066 return -1; 2067 2068 switch (boot_cpu_data.x86_vendor) { 2069 case X86_VENDOR_AMD: 2070 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || 2071 (boot_cpu_data.x86 >= 15)) 2072 break; 2073 goto no_apic; 2074 case X86_VENDOR_HYGON: 2075 break; 2076 case X86_VENDOR_INTEL: 2077 if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || 2078 (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC))) 2079 break; 2080 goto no_apic; 2081 default: 2082 goto no_apic; 2083 } 2084 2085 if (!boot_cpu_has(X86_FEATURE_APIC)) { 2086 /* 2087 * Over-ride BIOS and try to enable the local APIC only if 2088 * "lapic" specified. 2089 */ 2090 if (!force_enable_local_apic) { 2091 pr_info("Local APIC disabled by BIOS -- " 2092 "you can enable it with \"lapic\"\n"); 2093 return -1; 2094 } 2095 if (apic_force_enable(APIC_DEFAULT_PHYS_BASE)) 2096 return -1; 2097 } else { 2098 if (apic_verify()) 2099 return -1; 2100 } 2101 2102 apic_pm_activate(); 2103 2104 return 0; 2105 2106 no_apic: 2107 pr_info("No local APIC present or hardware disabled\n"); 2108 return -1; 2109 } 2110 #endif 2111 2112 /** 2113 * init_apic_mappings - initialize APIC mappings 2114 */ 2115 void __init init_apic_mappings(void) 2116 { 2117 unsigned int new_apicid; 2118 2119 if (apic_validate_deadline_timer()) 2120 pr_info("TSC deadline timer available\n"); 2121 2122 if (x2apic_mode) { 2123 boot_cpu_physical_apicid = read_apic_id(); 2124 return; 2125 } 2126 2127 /* If no local APIC can be found return early */ 2128 if (!smp_found_config && detect_init_APIC()) { 2129 /* lets NOP'ify apic operations */ 2130 pr_info("APIC: disable apic facility\n"); 2131 apic_disable(); 2132 } else { 2133 apic_phys = mp_lapic_addr; 2134 2135 /* 2136 * If the system has ACPI MADT tables or MP info, the LAPIC 2137 * address is already registered. 2138 */ 2139 if (!acpi_lapic && !smp_found_config) 2140 register_lapic_address(apic_phys); 2141 } 2142 2143 /* 2144 * Fetch the APIC ID of the BSP in case we have a 2145 * default configuration (or the MP table is broken). 2146 */ 2147 new_apicid = read_apic_id(); 2148 if (boot_cpu_physical_apicid != new_apicid) { 2149 boot_cpu_physical_apicid = new_apicid; 2150 /* 2151 * yeah -- we lie about apic_version 2152 * in case if apic was disabled via boot option 2153 * but it's not a problem for SMP compiled kernel 2154 * since apic_intr_mode_select is prepared for such 2155 * a case and disable smp mode 2156 */ 2157 boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); 2158 } 2159 } 2160 2161 void __init register_lapic_address(unsigned long address) 2162 { 2163 mp_lapic_addr = address; 2164 2165 if (!x2apic_mode) { 2166 set_fixmap_nocache(FIX_APIC_BASE, address); 2167 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", 2168 APIC_BASE, address); 2169 } 2170 if (boot_cpu_physical_apicid == -1U) { 2171 boot_cpu_physical_apicid = read_apic_id(); 2172 boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); 2173 } 2174 } 2175 2176 /* 2177 * Local APIC interrupts 2178 */ 2179 2180 /* 2181 * Common handling code for spurious_interrupt and spurious_vector entry 2182 * points below. No point in allowing the compiler to inline it twice. 2183 */ 2184 static noinline void handle_spurious_interrupt(u8 vector) 2185 { 2186 u32 v; 2187 2188 trace_spurious_apic_entry(vector); 2189 2190 inc_irq_stat(irq_spurious_count); 2191 2192 /* 2193 * If this is a spurious interrupt then do not acknowledge 2194 */ 2195 if (vector == SPURIOUS_APIC_VECTOR) { 2196 /* See SDM vol 3 */ 2197 pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n", 2198 smp_processor_id()); 2199 goto out; 2200 } 2201 2202 /* 2203 * If it is a vectored one, verify it's set in the ISR. If set, 2204 * acknowledge it. 2205 */ 2206 v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); 2207 if (v & (1 << (vector & 0x1f))) { 2208 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", 2209 vector, smp_processor_id()); 2210 ack_APIC_irq(); 2211 } else { 2212 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", 2213 vector, smp_processor_id()); 2214 } 2215 out: 2216 trace_spurious_apic_exit(vector); 2217 } 2218 2219 /** 2220 * spurious_interrupt - Catch all for interrupts raised on unused vectors 2221 * @regs: Pointer to pt_regs on stack 2222 * @vector: The vector number 2223 * 2224 * This is invoked from ASM entry code to catch all interrupts which 2225 * trigger on an entry which is routed to the common_spurious idtentry 2226 * point. 2227 */ 2228 DEFINE_IDTENTRY_IRQ(spurious_interrupt) 2229 { 2230 handle_spurious_interrupt(vector); 2231 } 2232 2233 DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt) 2234 { 2235 handle_spurious_interrupt(SPURIOUS_APIC_VECTOR); 2236 } 2237 2238 /* 2239 * This interrupt should never happen with our APIC/SMP architecture 2240 */ 2241 DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt) 2242 { 2243 static const char * const error_interrupt_reason[] = { 2244 "Send CS error", /* APIC Error Bit 0 */ 2245 "Receive CS error", /* APIC Error Bit 1 */ 2246 "Send accept error", /* APIC Error Bit 2 */ 2247 "Receive accept error", /* APIC Error Bit 3 */ 2248 "Redirectable IPI", /* APIC Error Bit 4 */ 2249 "Send illegal vector", /* APIC Error Bit 5 */ 2250 "Received illegal vector", /* APIC Error Bit 6 */ 2251 "Illegal register address", /* APIC Error Bit 7 */ 2252 }; 2253 u32 v, i = 0; 2254 2255 trace_error_apic_entry(ERROR_APIC_VECTOR); 2256 2257 /* First tickle the hardware, only then report what went on. -- REW */ 2258 if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */ 2259 apic_write(APIC_ESR, 0); 2260 v = apic_read(APIC_ESR); 2261 ack_APIC_irq(); 2262 atomic_inc(&irq_err_count); 2263 2264 apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x", 2265 smp_processor_id(), v); 2266 2267 v &= 0xff; 2268 while (v) { 2269 if (v & 0x1) 2270 apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); 2271 i++; 2272 v >>= 1; 2273 } 2274 2275 apic_printk(APIC_DEBUG, KERN_CONT "\n"); 2276 2277 trace_error_apic_exit(ERROR_APIC_VECTOR); 2278 } 2279 2280 /** 2281 * connect_bsp_APIC - attach the APIC to the interrupt system 2282 */ 2283 static void __init connect_bsp_APIC(void) 2284 { 2285 #ifdef CONFIG_X86_32 2286 if (pic_mode) { 2287 /* 2288 * Do not trust the local APIC being empty at bootup. 2289 */ 2290 clear_local_APIC(); 2291 /* 2292 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's 2293 * local APIC to INT and NMI lines. 2294 */ 2295 apic_printk(APIC_VERBOSE, "leaving PIC mode, " 2296 "enabling APIC mode.\n"); 2297 imcr_pic_to_apic(); 2298 } 2299 #endif 2300 } 2301 2302 /** 2303 * disconnect_bsp_APIC - detach the APIC from the interrupt system 2304 * @virt_wire_setup: indicates, whether virtual wire mode is selected 2305 * 2306 * Virtual wire mode is necessary to deliver legacy interrupts even when the 2307 * APIC is disabled. 2308 */ 2309 void disconnect_bsp_APIC(int virt_wire_setup) 2310 { 2311 unsigned int value; 2312 2313 #ifdef CONFIG_X86_32 2314 if (pic_mode) { 2315 /* 2316 * Put the board back into PIC mode (has an effect only on 2317 * certain older boards). Note that APIC interrupts, including 2318 * IPIs, won't work beyond this point! The only exception are 2319 * INIT IPIs. 2320 */ 2321 apic_printk(APIC_VERBOSE, "disabling APIC mode, " 2322 "entering PIC mode.\n"); 2323 imcr_apic_to_pic(); 2324 return; 2325 } 2326 #endif 2327 2328 /* Go back to Virtual Wire compatibility mode */ 2329 2330 /* For the spurious interrupt use vector F, and enable it */ 2331 value = apic_read(APIC_SPIV); 2332 value &= ~APIC_VECTOR_MASK; 2333 value |= APIC_SPIV_APIC_ENABLED; 2334 value |= 0xf; 2335 apic_write(APIC_SPIV, value); 2336 2337 if (!virt_wire_setup) { 2338 /* 2339 * For LVT0 make it edge triggered, active high, 2340 * external and enabled 2341 */ 2342 value = apic_read(APIC_LVT0); 2343 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | 2344 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 2345 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); 2346 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; 2347 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); 2348 apic_write(APIC_LVT0, value); 2349 } else { 2350 /* Disable LVT0 */ 2351 apic_write(APIC_LVT0, APIC_LVT_MASKED); 2352 } 2353 2354 /* 2355 * For LVT1 make it edge triggered, active high, 2356 * nmi and enabled 2357 */ 2358 value = apic_read(APIC_LVT1); 2359 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | 2360 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 2361 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); 2362 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; 2363 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); 2364 apic_write(APIC_LVT1, value); 2365 } 2366 2367 /* 2368 * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated 2369 * contiguously, it equals to current allocated max logical CPU ID plus 1. 2370 * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, 2371 * so the maximum of nr_logical_cpuids is nr_cpu_ids. 2372 * 2373 * NOTE: Reserve 0 for BSP. 2374 */ 2375 static int nr_logical_cpuids = 1; 2376 2377 /* 2378 * Used to store mapping between logical CPU IDs and APIC IDs. 2379 */ 2380 static int cpuid_to_apicid[] = { 2381 [0 ... NR_CPUS - 1] = -1, 2382 }; 2383 2384 bool arch_match_cpu_phys_id(int cpu, u64 phys_id) 2385 { 2386 return phys_id == cpuid_to_apicid[cpu]; 2387 } 2388 2389 #ifdef CONFIG_SMP 2390 /** 2391 * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread 2392 * @apicid: APIC ID to check 2393 */ 2394 bool apic_id_is_primary_thread(unsigned int apicid) 2395 { 2396 u32 mask; 2397 2398 if (smp_num_siblings == 1) 2399 return true; 2400 /* Isolate the SMT bit(s) in the APICID and check for 0 */ 2401 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; 2402 return !(apicid & mask); 2403 } 2404 #endif 2405 2406 /* 2407 * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids 2408 * and cpuid_to_apicid[] synchronized. 2409 */ 2410 static int allocate_logical_cpuid(int apicid) 2411 { 2412 int i; 2413 2414 /* 2415 * cpuid <-> apicid mapping is persistent, so when a cpu is up, 2416 * check if the kernel has allocated a cpuid for it. 2417 */ 2418 for (i = 0; i < nr_logical_cpuids; i++) { 2419 if (cpuid_to_apicid[i] == apicid) 2420 return i; 2421 } 2422 2423 /* Allocate a new cpuid. */ 2424 if (nr_logical_cpuids >= nr_cpu_ids) { 2425 WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " 2426 "Processor %d/0x%x and the rest are ignored.\n", 2427 nr_cpu_ids, nr_logical_cpuids, apicid); 2428 return -EINVAL; 2429 } 2430 2431 cpuid_to_apicid[nr_logical_cpuids] = apicid; 2432 return nr_logical_cpuids++; 2433 } 2434 2435 int generic_processor_info(int apicid, int version) 2436 { 2437 int cpu, max = nr_cpu_ids; 2438 bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, 2439 phys_cpu_present_map); 2440 2441 /* 2442 * boot_cpu_physical_apicid is designed to have the apicid 2443 * returned by read_apic_id(), i.e, the apicid of the 2444 * currently booting-up processor. However, on some platforms, 2445 * it is temporarily modified by the apicid reported as BSP 2446 * through MP table. Concretely: 2447 * 2448 * - arch/x86/kernel/mpparse.c: MP_processor_info() 2449 * - arch/x86/mm/amdtopology.c: amd_numa_init() 2450 * 2451 * This function is executed with the modified 2452 * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel 2453 * parameter doesn't work to disable APs on kdump 2nd kernel. 2454 * 2455 * Since fixing handling of boot_cpu_physical_apicid requires 2456 * another discussion and tests on each platform, we leave it 2457 * for now and here we use read_apic_id() directly in this 2458 * function, generic_processor_info(). 2459 */ 2460 if (disabled_cpu_apicid != BAD_APICID && 2461 disabled_cpu_apicid != read_apic_id() && 2462 disabled_cpu_apicid == apicid) { 2463 int thiscpu = num_processors + disabled_cpus; 2464 2465 pr_warn("APIC: Disabling requested cpu." 2466 " Processor %d/0x%x ignored.\n", thiscpu, apicid); 2467 2468 disabled_cpus++; 2469 return -ENODEV; 2470 } 2471 2472 /* 2473 * If boot cpu has not been detected yet, then only allow upto 2474 * nr_cpu_ids - 1 processors and keep one slot free for boot cpu 2475 */ 2476 if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 && 2477 apicid != boot_cpu_physical_apicid) { 2478 int thiscpu = max + disabled_cpus - 1; 2479 2480 pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost" 2481 " reached. Keeping one slot for boot cpu." 2482 " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); 2483 2484 disabled_cpus++; 2485 return -ENODEV; 2486 } 2487 2488 if (num_processors >= nr_cpu_ids) { 2489 int thiscpu = max + disabled_cpus; 2490 2491 pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " 2492 "Processor %d/0x%x ignored.\n", max, thiscpu, apicid); 2493 2494 disabled_cpus++; 2495 return -EINVAL; 2496 } 2497 2498 if (apicid == boot_cpu_physical_apicid) { 2499 /* 2500 * x86_bios_cpu_apicid is required to have processors listed 2501 * in same order as logical cpu numbers. Hence the first 2502 * entry is BSP, and so on. 2503 * boot_cpu_init() already hold bit 0 in cpu_present_mask 2504 * for BSP. 2505 */ 2506 cpu = 0; 2507 2508 /* Logical cpuid 0 is reserved for BSP. */ 2509 cpuid_to_apicid[0] = apicid; 2510 } else { 2511 cpu = allocate_logical_cpuid(apicid); 2512 if (cpu < 0) { 2513 disabled_cpus++; 2514 return -EINVAL; 2515 } 2516 } 2517 2518 /* 2519 * Validate version 2520 */ 2521 if (version == 0x0) { 2522 pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", 2523 cpu, apicid); 2524 version = 0x10; 2525 } 2526 2527 if (version != boot_cpu_apic_version) { 2528 pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", 2529 boot_cpu_apic_version, cpu, version); 2530 } 2531 2532 if (apicid > max_physical_apicid) 2533 max_physical_apicid = apicid; 2534 2535 #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) 2536 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; 2537 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 2538 #endif 2539 #ifdef CONFIG_X86_32 2540 early_per_cpu(x86_cpu_to_logical_apicid, cpu) = 2541 apic->x86_32_early_logical_apicid(cpu); 2542 #endif 2543 set_cpu_possible(cpu, true); 2544 physid_set(apicid, phys_cpu_present_map); 2545 set_cpu_present(cpu, true); 2546 num_processors++; 2547 2548 return cpu; 2549 } 2550 2551 int hard_smp_processor_id(void) 2552 { 2553 return read_apic_id(); 2554 } 2555 2556 void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, 2557 bool dmar) 2558 { 2559 memset(msg, 0, sizeof(*msg)); 2560 2561 msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; 2562 msg->arch_addr_lo.dest_mode_logical = apic->dest_mode_logical; 2563 msg->arch_addr_lo.destid_0_7 = cfg->dest_apicid & 0xFF; 2564 2565 msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_FIXED; 2566 msg->arch_data.vector = cfg->vector; 2567 2568 msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; 2569 /* 2570 * Only the IOMMU itself can use the trick of putting destination 2571 * APIC ID into the high bits of the address. Anything else would 2572 * just be writing to memory if it tried that, and needs IR to 2573 * address APICs which can't be addressed in the normal 32-bit 2574 * address range at 0xFFExxxxx. That is typically just 8 bits, but 2575 * some hypervisors allow the extended destination ID field in bits 2576 * 5-11 to be used, giving support for 15 bits of APIC IDs in total. 2577 */ 2578 if (dmar) 2579 msg->arch_addr_hi.destid_8_31 = cfg->dest_apicid >> 8; 2580 else if (virt_ext_dest_id && cfg->dest_apicid < 0x8000) 2581 msg->arch_addr_lo.virt_destid_8_14 = cfg->dest_apicid >> 8; 2582 else 2583 WARN_ON_ONCE(cfg->dest_apicid > 0xFF); 2584 } 2585 2586 u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid) 2587 { 2588 u32 dest = msg->arch_addr_lo.destid_0_7; 2589 2590 if (extid) 2591 dest |= msg->arch_addr_hi.destid_8_31 << 8; 2592 return dest; 2593 } 2594 EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid); 2595 2596 #ifdef CONFIG_X86_64 2597 void __init acpi_wake_cpu_handler_update(wakeup_cpu_handler handler) 2598 { 2599 struct apic **drv; 2600 2601 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) 2602 (*drv)->wakeup_secondary_cpu_64 = handler; 2603 } 2604 #endif 2605 2606 /* 2607 * Override the generic EOI implementation with an optimized version. 2608 * Only called during early boot when only one CPU is active and with 2609 * interrupts disabled, so we know this does not race with actual APIC driver 2610 * use. 2611 */ 2612 void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) 2613 { 2614 struct apic **drv; 2615 2616 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { 2617 /* Should happen once for each apic */ 2618 WARN_ON((*drv)->eoi_write == eoi_write); 2619 (*drv)->native_eoi_write = (*drv)->eoi_write; 2620 (*drv)->eoi_write = eoi_write; 2621 } 2622 } 2623 2624 static void __init apic_bsp_up_setup(void) 2625 { 2626 #ifdef CONFIG_X86_64 2627 apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid)); 2628 #else 2629 /* 2630 * Hack: In case of kdump, after a crash, kernel might be booting 2631 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid 2632 * might be zero if read from MP tables. Get it from LAPIC. 2633 */ 2634 # ifdef CONFIG_CRASH_DUMP 2635 boot_cpu_physical_apicid = read_apic_id(); 2636 # endif 2637 #endif 2638 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 2639 } 2640 2641 /** 2642 * apic_bsp_setup - Setup function for local apic and io-apic 2643 * @upmode: Force UP mode (for APIC_init_uniprocessor) 2644 */ 2645 static void __init apic_bsp_setup(bool upmode) 2646 { 2647 connect_bsp_APIC(); 2648 if (upmode) 2649 apic_bsp_up_setup(); 2650 setup_local_APIC(); 2651 2652 enable_IO_APIC(); 2653 end_local_APIC_setup(); 2654 irq_remap_enable_fault_handling(); 2655 setup_IO_APIC(); 2656 lapic_update_legacy_vectors(); 2657 } 2658 2659 #ifdef CONFIG_UP_LATE_INIT 2660 void __init up_late_init(void) 2661 { 2662 if (apic_intr_mode == APIC_PIC) 2663 return; 2664 2665 /* Setup local timer */ 2666 x86_init.timers.setup_percpu_clockev(); 2667 } 2668 #endif 2669 2670 /* 2671 * Power management 2672 */ 2673 #ifdef CONFIG_PM 2674 2675 static struct { 2676 /* 2677 * 'active' is true if the local APIC was enabled by us and 2678 * not the BIOS; this signifies that we are also responsible 2679 * for disabling it before entering apm/acpi suspend 2680 */ 2681 int active; 2682 /* r/w apic fields */ 2683 unsigned int apic_id; 2684 unsigned int apic_taskpri; 2685 unsigned int apic_ldr; 2686 unsigned int apic_dfr; 2687 unsigned int apic_spiv; 2688 unsigned int apic_lvtt; 2689 unsigned int apic_lvtpc; 2690 unsigned int apic_lvt0; 2691 unsigned int apic_lvt1; 2692 unsigned int apic_lvterr; 2693 unsigned int apic_tmict; 2694 unsigned int apic_tdcr; 2695 unsigned int apic_thmr; 2696 unsigned int apic_cmci; 2697 } apic_pm_state; 2698 2699 static int lapic_suspend(void) 2700 { 2701 unsigned long flags; 2702 int maxlvt; 2703 2704 if (!apic_pm_state.active) 2705 return 0; 2706 2707 maxlvt = lapic_get_maxlvt(); 2708 2709 apic_pm_state.apic_id = apic_read(APIC_ID); 2710 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 2711 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 2712 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 2713 apic_pm_state.apic_spiv = apic_read(APIC_SPIV); 2714 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); 2715 if (maxlvt >= 4) 2716 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); 2717 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); 2718 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); 2719 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 2720 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 2721 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 2722 #ifdef CONFIG_X86_THERMAL_VECTOR 2723 if (maxlvt >= 5) 2724 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 2725 #endif 2726 #ifdef CONFIG_X86_MCE_INTEL 2727 if (maxlvt >= 6) 2728 apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI); 2729 #endif 2730 2731 local_irq_save(flags); 2732 2733 /* 2734 * Mask IOAPIC before disabling the local APIC to prevent stale IRR 2735 * entries on some implementations. 2736 */ 2737 mask_ioapic_entries(); 2738 2739 disable_local_APIC(); 2740 2741 irq_remapping_disable(); 2742 2743 local_irq_restore(flags); 2744 return 0; 2745 } 2746 2747 static void lapic_resume(void) 2748 { 2749 unsigned int l, h; 2750 unsigned long flags; 2751 int maxlvt; 2752 2753 if (!apic_pm_state.active) 2754 return; 2755 2756 local_irq_save(flags); 2757 2758 /* 2759 * IO-APIC and PIC have their own resume routines. 2760 * We just mask them here to make sure the interrupt 2761 * subsystem is completely quiet while we enable x2apic 2762 * and interrupt-remapping. 2763 */ 2764 mask_ioapic_entries(); 2765 legacy_pic->mask_all(); 2766 2767 if (x2apic_mode) { 2768 __x2apic_enable(); 2769 } else { 2770 /* 2771 * Make sure the APICBASE points to the right address 2772 * 2773 * FIXME! This will be wrong if we ever support suspend on 2774 * SMP! We'll need to do this as part of the CPU restore! 2775 */ 2776 if (boot_cpu_data.x86 >= 6) { 2777 rdmsr(MSR_IA32_APICBASE, l, h); 2778 l &= ~MSR_IA32_APICBASE_BASE; 2779 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 2780 wrmsr(MSR_IA32_APICBASE, l, h); 2781 } 2782 } 2783 2784 maxlvt = lapic_get_maxlvt(); 2785 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 2786 apic_write(APIC_ID, apic_pm_state.apic_id); 2787 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 2788 apic_write(APIC_LDR, apic_pm_state.apic_ldr); 2789 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); 2790 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 2791 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 2792 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 2793 #ifdef CONFIG_X86_THERMAL_VECTOR 2794 if (maxlvt >= 5) 2795 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 2796 #endif 2797 #ifdef CONFIG_X86_MCE_INTEL 2798 if (maxlvt >= 6) 2799 apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci); 2800 #endif 2801 if (maxlvt >= 4) 2802 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); 2803 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); 2804 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); 2805 apic_write(APIC_TMICT, apic_pm_state.apic_tmict); 2806 apic_write(APIC_ESR, 0); 2807 apic_read(APIC_ESR); 2808 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); 2809 apic_write(APIC_ESR, 0); 2810 apic_read(APIC_ESR); 2811 2812 irq_remapping_reenable(x2apic_mode); 2813 2814 local_irq_restore(flags); 2815 } 2816 2817 /* 2818 * This device has no shutdown method - fully functioning local APICs 2819 * are needed on every CPU up until machine_halt/restart/poweroff. 2820 */ 2821 2822 static struct syscore_ops lapic_syscore_ops = { 2823 .resume = lapic_resume, 2824 .suspend = lapic_suspend, 2825 }; 2826 2827 static void apic_pm_activate(void) 2828 { 2829 apic_pm_state.active = 1; 2830 } 2831 2832 static int __init init_lapic_sysfs(void) 2833 { 2834 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ 2835 if (boot_cpu_has(X86_FEATURE_APIC)) 2836 register_syscore_ops(&lapic_syscore_ops); 2837 2838 return 0; 2839 } 2840 2841 /* local apic needs to resume before other devices access its registers. */ 2842 core_initcall(init_lapic_sysfs); 2843 2844 #else /* CONFIG_PM */ 2845 2846 static void apic_pm_activate(void) { } 2847 2848 #endif /* CONFIG_PM */ 2849 2850 #ifdef CONFIG_X86_64 2851 2852 static int multi_checked; 2853 static int multi; 2854 2855 static int set_multi(const struct dmi_system_id *d) 2856 { 2857 if (multi) 2858 return 0; 2859 pr_info("APIC: %s detected, Multi Chassis\n", d->ident); 2860 multi = 1; 2861 return 0; 2862 } 2863 2864 static const struct dmi_system_id multi_dmi_table[] = { 2865 { 2866 .callback = set_multi, 2867 .ident = "IBM System Summit2", 2868 .matches = { 2869 DMI_MATCH(DMI_SYS_VENDOR, "IBM"), 2870 DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"), 2871 }, 2872 }, 2873 {} 2874 }; 2875 2876 static void dmi_check_multi(void) 2877 { 2878 if (multi_checked) 2879 return; 2880 2881 dmi_check_system(multi_dmi_table); 2882 multi_checked = 1; 2883 } 2884 2885 /* 2886 * apic_is_clustered_box() -- Check if we can expect good TSC 2887 * 2888 * Thus far, the major user of this is IBM's Summit2 series: 2889 * Clustered boxes may have unsynced TSC problems if they are 2890 * multi-chassis. 2891 * Use DMI to check them 2892 */ 2893 int apic_is_clustered_box(void) 2894 { 2895 dmi_check_multi(); 2896 return multi; 2897 } 2898 #endif 2899 2900 /* 2901 * APIC command line parameters 2902 */ 2903 static int __init setup_disableapic(char *arg) 2904 { 2905 disable_apic = 1; 2906 setup_clear_cpu_cap(X86_FEATURE_APIC); 2907 return 0; 2908 } 2909 early_param("disableapic", setup_disableapic); 2910 2911 /* same as disableapic, for compatibility */ 2912 static int __init setup_nolapic(char *arg) 2913 { 2914 return setup_disableapic(arg); 2915 } 2916 early_param("nolapic", setup_nolapic); 2917 2918 static int __init parse_lapic_timer_c2_ok(char *arg) 2919 { 2920 local_apic_timer_c2_ok = 1; 2921 return 0; 2922 } 2923 early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 2924 2925 static int __init parse_disable_apic_timer(char *arg) 2926 { 2927 disable_apic_timer = 1; 2928 return 0; 2929 } 2930 early_param("noapictimer", parse_disable_apic_timer); 2931 2932 static int __init parse_nolapic_timer(char *arg) 2933 { 2934 disable_apic_timer = 1; 2935 return 0; 2936 } 2937 early_param("nolapic_timer", parse_nolapic_timer); 2938 2939 static int __init apic_set_verbosity(char *arg) 2940 { 2941 if (!arg) { 2942 #ifdef CONFIG_X86_64 2943 skip_ioapic_setup = 0; 2944 return 0; 2945 #endif 2946 return -EINVAL; 2947 } 2948 2949 if (strcmp("debug", arg) == 0) 2950 apic_verbosity = APIC_DEBUG; 2951 else if (strcmp("verbose", arg) == 0) 2952 apic_verbosity = APIC_VERBOSE; 2953 #ifdef CONFIG_X86_64 2954 else { 2955 pr_warn("APIC Verbosity level %s not recognised" 2956 " use apic=verbose or apic=debug\n", arg); 2957 return -EINVAL; 2958 } 2959 #endif 2960 2961 return 0; 2962 } 2963 early_param("apic", apic_set_verbosity); 2964 2965 static int __init lapic_insert_resource(void) 2966 { 2967 if (!apic_phys) 2968 return -1; 2969 2970 /* Put local APIC into the resource map. */ 2971 lapic_resource.start = apic_phys; 2972 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; 2973 insert_resource(&iomem_resource, &lapic_resource); 2974 2975 return 0; 2976 } 2977 2978 /* 2979 * need call insert after e820__reserve_resources() 2980 * that is using request_resource 2981 */ 2982 late_initcall(lapic_insert_resource); 2983 2984 static int __init apic_set_disabled_cpu_apicid(char *arg) 2985 { 2986 if (!arg || !get_option(&arg, &disabled_cpu_apicid)) 2987 return -EINVAL; 2988 2989 return 0; 2990 } 2991 early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); 2992 2993 static int __init apic_set_extnmi(char *arg) 2994 { 2995 if (!arg) 2996 return -EINVAL; 2997 2998 if (!strncmp("all", arg, 3)) 2999 apic_extnmi = APIC_EXTNMI_ALL; 3000 else if (!strncmp("none", arg, 4)) 3001 apic_extnmi = APIC_EXTNMI_NONE; 3002 else if (!strncmp("bsp", arg, 3)) 3003 apic_extnmi = APIC_EXTNMI_BSP; 3004 else { 3005 pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg); 3006 return -EINVAL; 3007 } 3008 3009 return 0; 3010 } 3011 early_param("apic_extnmi", apic_set_extnmi); 3012