xref: /linux/arch/x86/kernel/apic/apic.c (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1 /*
2  *	Local APIC handling, local APIC timers
3  *
4  *	(c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
5  *
6  *	Fixes
7  *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
8  *					thanks to Eric Gilmore
9  *					and Rolf G. Tews
10  *					for testing these extensively.
11  *	Maciej W. Rozycki	:	Various updates and fixes.
12  *	Mikael Pettersson	:	Power Management for UP-APIC.
13  *	Pavel Machek and
14  *	Mikael Pettersson	:	PM converted to driver model.
15  */
16 
17 #include <linux/perf_event.h>
18 #include <linux/kernel_stat.h>
19 #include <linux/mc146818rtc.h>
20 #include <linux/acpi_pmtmr.h>
21 #include <linux/clockchips.h>
22 #include <linux/interrupt.h>
23 #include <linux/bootmem.h>
24 #include <linux/ftrace.h>
25 #include <linux/ioport.h>
26 #include <linux/module.h>
27 #include <linux/syscore_ops.h>
28 #include <linux/delay.h>
29 #include <linux/timex.h>
30 #include <linux/i8253.h>
31 #include <linux/dmar.h>
32 #include <linux/init.h>
33 #include <linux/cpu.h>
34 #include <linux/dmi.h>
35 #include <linux/smp.h>
36 #include <linux/mm.h>
37 
38 #include <asm/trace/irq_vectors.h>
39 #include <asm/irq_remapping.h>
40 #include <asm/perf_event.h>
41 #include <asm/x86_init.h>
42 #include <asm/pgalloc.h>
43 #include <linux/atomic.h>
44 #include <asm/mpspec.h>
45 #include <asm/i8259.h>
46 #include <asm/proto.h>
47 #include <asm/apic.h>
48 #include <asm/io_apic.h>
49 #include <asm/desc.h>
50 #include <asm/hpet.h>
51 #include <asm/idle.h>
52 #include <asm/mtrr.h>
53 #include <asm/time.h>
54 #include <asm/smp.h>
55 #include <asm/mce.h>
56 #include <asm/tsc.h>
57 #include <asm/hypervisor.h>
58 
59 unsigned int num_processors;
60 
61 unsigned disabled_cpus;
62 
63 /* Processor that is doing the boot up */
64 unsigned int boot_cpu_physical_apicid = -1U;
65 EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
66 
67 /*
68  * The highest APIC ID seen during enumeration.
69  */
70 static unsigned int max_physical_apicid;
71 
72 /*
73  * Bitmask of physically existing CPUs:
74  */
75 physid_mask_t phys_cpu_present_map;
76 
77 /*
78  * Processor to be disabled specified by kernel parameter
79  * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
80  * avoid undefined behaviour caused by sending INIT from AP to BSP.
81  */
82 static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID;
83 
84 /*
85  * Map cpu index to physical APIC ID
86  */
87 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
88 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
89 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
90 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
91 
92 #ifdef CONFIG_X86_32
93 
94 /*
95  * On x86_32, the mapping between cpu and logical apicid may vary
96  * depending on apic in use.  The following early percpu variable is
97  * used for the mapping.  This is where the behaviors of x86_64 and 32
98  * actually diverge.  Let's keep it ugly for now.
99  */
100 DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
101 
102 /* Local APIC was disabled by the BIOS and enabled by the kernel */
103 static int enabled_via_apicbase;
104 
105 /*
106  * Handle interrupt mode configuration register (IMCR).
107  * This register controls whether the interrupt signals
108  * that reach the BSP come from the master PIC or from the
109  * local APIC. Before entering Symmetric I/O Mode, either
110  * the BIOS or the operating system must switch out of
111  * PIC Mode by changing the IMCR.
112  */
113 static inline void imcr_pic_to_apic(void)
114 {
115 	/* select IMCR register */
116 	outb(0x70, 0x22);
117 	/* NMI and 8259 INTR go through APIC */
118 	outb(0x01, 0x23);
119 }
120 
121 static inline void imcr_apic_to_pic(void)
122 {
123 	/* select IMCR register */
124 	outb(0x70, 0x22);
125 	/* NMI and 8259 INTR go directly to BSP */
126 	outb(0x00, 0x23);
127 }
128 #endif
129 
130 /*
131  * Knob to control our willingness to enable the local APIC.
132  *
133  * +1=force-enable
134  */
135 static int force_enable_local_apic __initdata;
136 
137 /*
138  * APIC command line parameters
139  */
140 static int __init parse_lapic(char *arg)
141 {
142 	if (config_enabled(CONFIG_X86_32) && !arg)
143 		force_enable_local_apic = 1;
144 	else if (arg && !strncmp(arg, "notscdeadline", 13))
145 		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
146 	return 0;
147 }
148 early_param("lapic", parse_lapic);
149 
150 #ifdef CONFIG_X86_64
151 static int apic_calibrate_pmtmr __initdata;
152 static __init int setup_apicpmtimer(char *s)
153 {
154 	apic_calibrate_pmtmr = 1;
155 	notsc_setup(NULL);
156 	return 0;
157 }
158 __setup("apicpmtimer", setup_apicpmtimer);
159 #endif
160 
161 unsigned long mp_lapic_addr;
162 int disable_apic;
163 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
164 static int disable_apic_timer __initdata;
165 /* Local APIC timer works in C2 */
166 int local_apic_timer_c2_ok;
167 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
168 
169 int first_system_vector = FIRST_SYSTEM_VECTOR;
170 
171 /*
172  * Debug level, exported for io_apic.c
173  */
174 unsigned int apic_verbosity;
175 
176 int pic_mode;
177 
178 /* Have we found an MP table */
179 int smp_found_config;
180 
181 static struct resource lapic_resource = {
182 	.name = "Local APIC",
183 	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
184 };
185 
186 unsigned int lapic_timer_frequency = 0;
187 
188 static void apic_pm_activate(void);
189 
190 static unsigned long apic_phys;
191 
192 /*
193  * Get the LAPIC version
194  */
195 static inline int lapic_get_version(void)
196 {
197 	return GET_APIC_VERSION(apic_read(APIC_LVR));
198 }
199 
200 /*
201  * Check, if the APIC is integrated or a separate chip
202  */
203 static inline int lapic_is_integrated(void)
204 {
205 #ifdef CONFIG_X86_64
206 	return 1;
207 #else
208 	return APIC_INTEGRATED(lapic_get_version());
209 #endif
210 }
211 
212 /*
213  * Check, whether this is a modern or a first generation APIC
214  */
215 static int modern_apic(void)
216 {
217 	/* AMD systems use old APIC versions, so check the CPU */
218 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
219 	    boot_cpu_data.x86 >= 0xf)
220 		return 1;
221 	return lapic_get_version() >= 0x14;
222 }
223 
224 /*
225  * right after this call apic become NOOP driven
226  * so apic->write/read doesn't do anything
227  */
228 static void __init apic_disable(void)
229 {
230 	pr_info("APIC: switched to apic NOOP\n");
231 	apic = &apic_noop;
232 }
233 
234 void native_apic_wait_icr_idle(void)
235 {
236 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
237 		cpu_relax();
238 }
239 
240 u32 native_safe_apic_wait_icr_idle(void)
241 {
242 	u32 send_status;
243 	int timeout;
244 
245 	timeout = 0;
246 	do {
247 		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
248 		if (!send_status)
249 			break;
250 		inc_irq_stat(icr_read_retry_count);
251 		udelay(100);
252 	} while (timeout++ < 1000);
253 
254 	return send_status;
255 }
256 
257 void native_apic_icr_write(u32 low, u32 id)
258 {
259 	unsigned long flags;
260 
261 	local_irq_save(flags);
262 	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
263 	apic_write(APIC_ICR, low);
264 	local_irq_restore(flags);
265 }
266 
267 u64 native_apic_icr_read(void)
268 {
269 	u32 icr1, icr2;
270 
271 	icr2 = apic_read(APIC_ICR2);
272 	icr1 = apic_read(APIC_ICR);
273 
274 	return icr1 | ((u64)icr2 << 32);
275 }
276 
277 #ifdef CONFIG_X86_32
278 /**
279  * get_physical_broadcast - Get number of physical broadcast IDs
280  */
281 int get_physical_broadcast(void)
282 {
283 	return modern_apic() ? 0xff : 0xf;
284 }
285 #endif
286 
287 /**
288  * lapic_get_maxlvt - get the maximum number of local vector table entries
289  */
290 int lapic_get_maxlvt(void)
291 {
292 	unsigned int v;
293 
294 	v = apic_read(APIC_LVR);
295 	/*
296 	 * - we always have APIC integrated on 64bit mode
297 	 * - 82489DXs do not report # of LVT entries
298 	 */
299 	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
300 }
301 
302 /*
303  * Local APIC timer
304  */
305 
306 /* Clock divisor */
307 #define APIC_DIVISOR 16
308 #define TSC_DIVISOR  32
309 
310 /*
311  * This function sets up the local APIC timer, with a timeout of
312  * 'clocks' APIC bus clock. During calibration we actually call
313  * this function twice on the boot CPU, once with a bogus timeout
314  * value, second time for real. The other (noncalibrating) CPUs
315  * call this function only once, with the real, calibrated value.
316  *
317  * We do reads before writes even if unnecessary, to get around the
318  * P5 APIC double write bug.
319  */
320 static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
321 {
322 	unsigned int lvtt_value, tmp_value;
323 
324 	lvtt_value = LOCAL_TIMER_VECTOR;
325 	if (!oneshot)
326 		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
327 	else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
328 		lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
329 
330 	if (!lapic_is_integrated())
331 		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
332 
333 	if (!irqen)
334 		lvtt_value |= APIC_LVT_MASKED;
335 
336 	apic_write(APIC_LVTT, lvtt_value);
337 
338 	if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
339 		/*
340 		 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
341 		 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
342 		 * According to Intel, MFENCE can do the serialization here.
343 		 */
344 		asm volatile("mfence" : : : "memory");
345 
346 		printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
347 		return;
348 	}
349 
350 	/*
351 	 * Divide PICLK by 16
352 	 */
353 	tmp_value = apic_read(APIC_TDCR);
354 	apic_write(APIC_TDCR,
355 		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
356 		APIC_TDR_DIV_16);
357 
358 	if (!oneshot)
359 		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
360 }
361 
362 /*
363  * Setup extended LVT, AMD specific
364  *
365  * Software should use the LVT offsets the BIOS provides.  The offsets
366  * are determined by the subsystems using it like those for MCE
367  * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
368  * are supported. Beginning with family 10h at least 4 offsets are
369  * available.
370  *
371  * Since the offsets must be consistent for all cores, we keep track
372  * of the LVT offsets in software and reserve the offset for the same
373  * vector also to be used on other cores. An offset is freed by
374  * setting the entry to APIC_EILVT_MASKED.
375  *
376  * If the BIOS is right, there should be no conflicts. Otherwise a
377  * "[Firmware Bug]: ..." error message is generated. However, if
378  * software does not properly determines the offsets, it is not
379  * necessarily a BIOS bug.
380  */
381 
382 static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
383 
384 static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
385 {
386 	return (old & APIC_EILVT_MASKED)
387 		|| (new == APIC_EILVT_MASKED)
388 		|| ((new & ~APIC_EILVT_MASKED) == old);
389 }
390 
391 static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
392 {
393 	unsigned int rsvd, vector;
394 
395 	if (offset >= APIC_EILVT_NR_MAX)
396 		return ~0;
397 
398 	rsvd = atomic_read(&eilvt_offsets[offset]);
399 	do {
400 		vector = rsvd & ~APIC_EILVT_MASKED;	/* 0: unassigned */
401 		if (vector && !eilvt_entry_is_changeable(vector, new))
402 			/* may not change if vectors are different */
403 			return rsvd;
404 		rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
405 	} while (rsvd != new);
406 
407 	rsvd &= ~APIC_EILVT_MASKED;
408 	if (rsvd && rsvd != vector)
409 		pr_info("LVT offset %d assigned for vector 0x%02x\n",
410 			offset, rsvd);
411 
412 	return new;
413 }
414 
415 /*
416  * If mask=1, the LVT entry does not generate interrupts while mask=0
417  * enables the vector. See also the BKDGs. Must be called with
418  * preemption disabled.
419  */
420 
421 int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
422 {
423 	unsigned long reg = APIC_EILVTn(offset);
424 	unsigned int new, old, reserved;
425 
426 	new = (mask << 16) | (msg_type << 8) | vector;
427 	old = apic_read(reg);
428 	reserved = reserve_eilvt_offset(offset, new);
429 
430 	if (reserved != new) {
431 		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
432 		       "vector 0x%x, but the register is already in use for "
433 		       "vector 0x%x on another cpu\n",
434 		       smp_processor_id(), reg, offset, new, reserved);
435 		return -EINVAL;
436 	}
437 
438 	if (!eilvt_entry_is_changeable(old, new)) {
439 		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
440 		       "vector 0x%x, but the register is already in use for "
441 		       "vector 0x%x on this cpu\n",
442 		       smp_processor_id(), reg, offset, new, old);
443 		return -EBUSY;
444 	}
445 
446 	apic_write(reg, new);
447 
448 	return 0;
449 }
450 EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
451 
452 /*
453  * Program the next event, relative to now
454  */
455 static int lapic_next_event(unsigned long delta,
456 			    struct clock_event_device *evt)
457 {
458 	apic_write(APIC_TMICT, delta);
459 	return 0;
460 }
461 
462 static int lapic_next_deadline(unsigned long delta,
463 			       struct clock_event_device *evt)
464 {
465 	u64 tsc;
466 
467 	tsc = rdtsc();
468 	wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
469 	return 0;
470 }
471 
472 static int lapic_timer_shutdown(struct clock_event_device *evt)
473 {
474 	unsigned int v;
475 
476 	/* Lapic used as dummy for broadcast ? */
477 	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
478 		return 0;
479 
480 	v = apic_read(APIC_LVTT);
481 	v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
482 	apic_write(APIC_LVTT, v);
483 	apic_write(APIC_TMICT, 0);
484 	return 0;
485 }
486 
487 static inline int
488 lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
489 {
490 	/* Lapic used as dummy for broadcast ? */
491 	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
492 		return 0;
493 
494 	__setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1);
495 	return 0;
496 }
497 
498 static int lapic_timer_set_periodic(struct clock_event_device *evt)
499 {
500 	return lapic_timer_set_periodic_oneshot(evt, false);
501 }
502 
503 static int lapic_timer_set_oneshot(struct clock_event_device *evt)
504 {
505 	return lapic_timer_set_periodic_oneshot(evt, true);
506 }
507 
508 /*
509  * Local APIC timer broadcast function
510  */
511 static void lapic_timer_broadcast(const struct cpumask *mask)
512 {
513 #ifdef CONFIG_SMP
514 	apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
515 #endif
516 }
517 
518 
519 /*
520  * The local apic timer can be used for any function which is CPU local.
521  */
522 static struct clock_event_device lapic_clockevent = {
523 	.name			= "lapic",
524 	.features		= CLOCK_EVT_FEAT_PERIODIC |
525 				  CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
526 				  | CLOCK_EVT_FEAT_DUMMY,
527 	.shift			= 32,
528 	.set_state_shutdown	= lapic_timer_shutdown,
529 	.set_state_periodic	= lapic_timer_set_periodic,
530 	.set_state_oneshot	= lapic_timer_set_oneshot,
531 	.set_next_event		= lapic_next_event,
532 	.broadcast		= lapic_timer_broadcast,
533 	.rating			= 100,
534 	.irq			= -1,
535 };
536 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
537 
538 /*
539  * Setup the local APIC timer for this CPU. Copy the initialized values
540  * of the boot CPU and register the clock event in the framework.
541  */
542 static void setup_APIC_timer(void)
543 {
544 	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
545 
546 	if (this_cpu_has(X86_FEATURE_ARAT)) {
547 		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
548 		/* Make LAPIC timer preferrable over percpu HPET */
549 		lapic_clockevent.rating = 150;
550 	}
551 
552 	memcpy(levt, &lapic_clockevent, sizeof(*levt));
553 	levt->cpumask = cpumask_of(smp_processor_id());
554 
555 	if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
556 		levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
557 				    CLOCK_EVT_FEAT_DUMMY);
558 		levt->set_next_event = lapic_next_deadline;
559 		clockevents_config_and_register(levt,
560 						(tsc_khz / TSC_DIVISOR) * 1000,
561 						0xF, ~0UL);
562 	} else
563 		clockevents_register_device(levt);
564 }
565 
566 /*
567  * In this functions we calibrate APIC bus clocks to the external timer.
568  *
569  * We want to do the calibration only once since we want to have local timer
570  * irqs syncron. CPUs connected by the same APIC bus have the very same bus
571  * frequency.
572  *
573  * This was previously done by reading the PIT/HPET and waiting for a wrap
574  * around to find out, that a tick has elapsed. I have a box, where the PIT
575  * readout is broken, so it never gets out of the wait loop again. This was
576  * also reported by others.
577  *
578  * Monitoring the jiffies value is inaccurate and the clockevents
579  * infrastructure allows us to do a simple substitution of the interrupt
580  * handler.
581  *
582  * The calibration routine also uses the pm_timer when possible, as the PIT
583  * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
584  * back to normal later in the boot process).
585  */
586 
587 #define LAPIC_CAL_LOOPS		(HZ/10)
588 
589 static __initdata int lapic_cal_loops = -1;
590 static __initdata long lapic_cal_t1, lapic_cal_t2;
591 static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
592 static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
593 static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
594 
595 /*
596  * Temporary interrupt handler.
597  */
598 static void __init lapic_cal_handler(struct clock_event_device *dev)
599 {
600 	unsigned long long tsc = 0;
601 	long tapic = apic_read(APIC_TMCCT);
602 	unsigned long pm = acpi_pm_read_early();
603 
604 	if (cpu_has_tsc)
605 		tsc = rdtsc();
606 
607 	switch (lapic_cal_loops++) {
608 	case 0:
609 		lapic_cal_t1 = tapic;
610 		lapic_cal_tsc1 = tsc;
611 		lapic_cal_pm1 = pm;
612 		lapic_cal_j1 = jiffies;
613 		break;
614 
615 	case LAPIC_CAL_LOOPS:
616 		lapic_cal_t2 = tapic;
617 		lapic_cal_tsc2 = tsc;
618 		if (pm < lapic_cal_pm1)
619 			pm += ACPI_PM_OVRRUN;
620 		lapic_cal_pm2 = pm;
621 		lapic_cal_j2 = jiffies;
622 		break;
623 	}
624 }
625 
626 static int __init
627 calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
628 {
629 	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
630 	const long pm_thresh = pm_100ms / 100;
631 	unsigned long mult;
632 	u64 res;
633 
634 #ifndef CONFIG_X86_PM_TIMER
635 	return -1;
636 #endif
637 
638 	apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
639 
640 	/* Check, if the PM timer is available */
641 	if (!deltapm)
642 		return -1;
643 
644 	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
645 
646 	if (deltapm > (pm_100ms - pm_thresh) &&
647 	    deltapm < (pm_100ms + pm_thresh)) {
648 		apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
649 		return 0;
650 	}
651 
652 	res = (((u64)deltapm) *  mult) >> 22;
653 	do_div(res, 1000000);
654 	pr_warning("APIC calibration not consistent "
655 		   "with PM-Timer: %ldms instead of 100ms\n",(long)res);
656 
657 	/* Correct the lapic counter value */
658 	res = (((u64)(*delta)) * pm_100ms);
659 	do_div(res, deltapm);
660 	pr_info("APIC delta adjusted to PM-Timer: "
661 		"%lu (%ld)\n", (unsigned long)res, *delta);
662 	*delta = (long)res;
663 
664 	/* Correct the tsc counter value */
665 	if (cpu_has_tsc) {
666 		res = (((u64)(*deltatsc)) * pm_100ms);
667 		do_div(res, deltapm);
668 		apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
669 					  "PM-Timer: %lu (%ld)\n",
670 					(unsigned long)res, *deltatsc);
671 		*deltatsc = (long)res;
672 	}
673 
674 	return 0;
675 }
676 
677 static int __init calibrate_APIC_clock(void)
678 {
679 	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
680 	void (*real_handler)(struct clock_event_device *dev);
681 	unsigned long deltaj;
682 	long delta, deltatsc;
683 	int pm_referenced = 0;
684 
685 	/**
686 	 * check if lapic timer has already been calibrated by platform
687 	 * specific routine, such as tsc calibration code. if so, we just fill
688 	 * in the clockevent structure and return.
689 	 */
690 
691 	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
692 		return 0;
693 	} else if (lapic_timer_frequency) {
694 		apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
695 				lapic_timer_frequency);
696 		lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
697 					TICK_NSEC, lapic_clockevent.shift);
698 		lapic_clockevent.max_delta_ns =
699 			clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
700 		lapic_clockevent.min_delta_ns =
701 			clockevent_delta2ns(0xF, &lapic_clockevent);
702 		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
703 		return 0;
704 	}
705 
706 	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
707 		    "calibrating APIC timer ...\n");
708 
709 	local_irq_disable();
710 
711 	/* Replace the global interrupt handler */
712 	real_handler = global_clock_event->event_handler;
713 	global_clock_event->event_handler = lapic_cal_handler;
714 
715 	/*
716 	 * Setup the APIC counter to maximum. There is no way the lapic
717 	 * can underflow in the 100ms detection time frame
718 	 */
719 	__setup_APIC_LVTT(0xffffffff, 0, 0);
720 
721 	/* Let the interrupts run */
722 	local_irq_enable();
723 
724 	while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
725 		cpu_relax();
726 
727 	local_irq_disable();
728 
729 	/* Restore the real event handler */
730 	global_clock_event->event_handler = real_handler;
731 
732 	/* Build delta t1-t2 as apic timer counts down */
733 	delta = lapic_cal_t1 - lapic_cal_t2;
734 	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
735 
736 	deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
737 
738 	/* we trust the PM based calibration if possible */
739 	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
740 					&delta, &deltatsc);
741 
742 	/* Calculate the scaled math multiplication factor */
743 	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
744 				       lapic_clockevent.shift);
745 	lapic_clockevent.max_delta_ns =
746 		clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
747 	lapic_clockevent.min_delta_ns =
748 		clockevent_delta2ns(0xF, &lapic_clockevent);
749 
750 	lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
751 
752 	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
753 	apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
754 	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
755 		    lapic_timer_frequency);
756 
757 	if (cpu_has_tsc) {
758 		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
759 			    "%ld.%04ld MHz.\n",
760 			    (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
761 			    (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
762 	}
763 
764 	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
765 		    "%u.%04u MHz.\n",
766 		    lapic_timer_frequency / (1000000 / HZ),
767 		    lapic_timer_frequency % (1000000 / HZ));
768 
769 	/*
770 	 * Do a sanity check on the APIC calibration result
771 	 */
772 	if (lapic_timer_frequency < (1000000 / HZ)) {
773 		local_irq_enable();
774 		pr_warning("APIC frequency too slow, disabling apic timer\n");
775 		return -1;
776 	}
777 
778 	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
779 
780 	/*
781 	 * PM timer calibration failed or not turned on
782 	 * so lets try APIC timer based calibration
783 	 */
784 	if (!pm_referenced) {
785 		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
786 
787 		/*
788 		 * Setup the apic timer manually
789 		 */
790 		levt->event_handler = lapic_cal_handler;
791 		lapic_timer_set_periodic(levt);
792 		lapic_cal_loops = -1;
793 
794 		/* Let the interrupts run */
795 		local_irq_enable();
796 
797 		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
798 			cpu_relax();
799 
800 		/* Stop the lapic timer */
801 		local_irq_disable();
802 		lapic_timer_shutdown(levt);
803 
804 		/* Jiffies delta */
805 		deltaj = lapic_cal_j2 - lapic_cal_j1;
806 		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
807 
808 		/* Check, if the jiffies result is consistent */
809 		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
810 			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
811 		else
812 			levt->features |= CLOCK_EVT_FEAT_DUMMY;
813 	}
814 	local_irq_enable();
815 
816 	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
817 		pr_warning("APIC timer disabled due to verification failure\n");
818 			return -1;
819 	}
820 
821 	return 0;
822 }
823 
824 /*
825  * Setup the boot APIC
826  *
827  * Calibrate and verify the result.
828  */
829 void __init setup_boot_APIC_clock(void)
830 {
831 	/*
832 	 * The local apic timer can be disabled via the kernel
833 	 * commandline or from the CPU detection code. Register the lapic
834 	 * timer as a dummy clock event source on SMP systems, so the
835 	 * broadcast mechanism is used. On UP systems simply ignore it.
836 	 */
837 	if (disable_apic_timer) {
838 		pr_info("Disabling APIC timer\n");
839 		/* No broadcast on UP ! */
840 		if (num_possible_cpus() > 1) {
841 			lapic_clockevent.mult = 1;
842 			setup_APIC_timer();
843 		}
844 		return;
845 	}
846 
847 	if (calibrate_APIC_clock()) {
848 		/* No broadcast on UP ! */
849 		if (num_possible_cpus() > 1)
850 			setup_APIC_timer();
851 		return;
852 	}
853 
854 	/*
855 	 * If nmi_watchdog is set to IO_APIC, we need the
856 	 * PIT/HPET going.  Otherwise register lapic as a dummy
857 	 * device.
858 	 */
859 	lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
860 
861 	/* Setup the lapic or request the broadcast */
862 	setup_APIC_timer();
863 }
864 
865 void setup_secondary_APIC_clock(void)
866 {
867 	setup_APIC_timer();
868 }
869 
870 /*
871  * The guts of the apic timer interrupt
872  */
873 static void local_apic_timer_interrupt(void)
874 {
875 	int cpu = smp_processor_id();
876 	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
877 
878 	/*
879 	 * Normally we should not be here till LAPIC has been initialized but
880 	 * in some cases like kdump, its possible that there is a pending LAPIC
881 	 * timer interrupt from previous kernel's context and is delivered in
882 	 * new kernel the moment interrupts are enabled.
883 	 *
884 	 * Interrupts are enabled early and LAPIC is setup much later, hence
885 	 * its possible that when we get here evt->event_handler is NULL.
886 	 * Check for event_handler being NULL and discard the interrupt as
887 	 * spurious.
888 	 */
889 	if (!evt->event_handler) {
890 		pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
891 		/* Switch it off */
892 		lapic_timer_shutdown(evt);
893 		return;
894 	}
895 
896 	/*
897 	 * the NMI deadlock-detector uses this.
898 	 */
899 	inc_irq_stat(apic_timer_irqs);
900 
901 	evt->event_handler(evt);
902 }
903 
904 /*
905  * Local APIC timer interrupt. This is the most natural way for doing
906  * local interrupts, but local timer interrupts can be emulated by
907  * broadcast interrupts too. [in case the hw doesn't support APIC timers]
908  *
909  * [ if a single-CPU system runs an SMP kernel then we call the local
910  *   interrupt as well. Thus we cannot inline the local irq ... ]
911  */
912 __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
913 {
914 	struct pt_regs *old_regs = set_irq_regs(regs);
915 
916 	/*
917 	 * NOTE! We'd better ACK the irq immediately,
918 	 * because timer handling can be slow.
919 	 *
920 	 * update_process_times() expects us to have done irq_enter().
921 	 * Besides, if we don't timer interrupts ignore the global
922 	 * interrupt lock, which is the WrongThing (tm) to do.
923 	 */
924 	entering_ack_irq();
925 	local_apic_timer_interrupt();
926 	exiting_irq();
927 
928 	set_irq_regs(old_regs);
929 }
930 
931 __visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
932 {
933 	struct pt_regs *old_regs = set_irq_regs(regs);
934 
935 	/*
936 	 * NOTE! We'd better ACK the irq immediately,
937 	 * because timer handling can be slow.
938 	 *
939 	 * update_process_times() expects us to have done irq_enter().
940 	 * Besides, if we don't timer interrupts ignore the global
941 	 * interrupt lock, which is the WrongThing (tm) to do.
942 	 */
943 	entering_ack_irq();
944 	trace_local_timer_entry(LOCAL_TIMER_VECTOR);
945 	local_apic_timer_interrupt();
946 	trace_local_timer_exit(LOCAL_TIMER_VECTOR);
947 	exiting_irq();
948 
949 	set_irq_regs(old_regs);
950 }
951 
952 int setup_profiling_timer(unsigned int multiplier)
953 {
954 	return -EINVAL;
955 }
956 
957 /*
958  * Local APIC start and shutdown
959  */
960 
961 /**
962  * clear_local_APIC - shutdown the local APIC
963  *
964  * This is called, when a CPU is disabled and before rebooting, so the state of
965  * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
966  * leftovers during boot.
967  */
968 void clear_local_APIC(void)
969 {
970 	int maxlvt;
971 	u32 v;
972 
973 	/* APIC hasn't been mapped yet */
974 	if (!x2apic_mode && !apic_phys)
975 		return;
976 
977 	maxlvt = lapic_get_maxlvt();
978 	/*
979 	 * Masking an LVT entry can trigger a local APIC error
980 	 * if the vector is zero. Mask LVTERR first to prevent this.
981 	 */
982 	if (maxlvt >= 3) {
983 		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
984 		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
985 	}
986 	/*
987 	 * Careful: we have to set masks only first to deassert
988 	 * any level-triggered sources.
989 	 */
990 	v = apic_read(APIC_LVTT);
991 	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
992 	v = apic_read(APIC_LVT0);
993 	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
994 	v = apic_read(APIC_LVT1);
995 	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
996 	if (maxlvt >= 4) {
997 		v = apic_read(APIC_LVTPC);
998 		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
999 	}
1000 
1001 	/* lets not touch this if we didn't frob it */
1002 #ifdef CONFIG_X86_THERMAL_VECTOR
1003 	if (maxlvt >= 5) {
1004 		v = apic_read(APIC_LVTTHMR);
1005 		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
1006 	}
1007 #endif
1008 #ifdef CONFIG_X86_MCE_INTEL
1009 	if (maxlvt >= 6) {
1010 		v = apic_read(APIC_LVTCMCI);
1011 		if (!(v & APIC_LVT_MASKED))
1012 			apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
1013 	}
1014 #endif
1015 
1016 	/*
1017 	 * Clean APIC state for other OSs:
1018 	 */
1019 	apic_write(APIC_LVTT, APIC_LVT_MASKED);
1020 	apic_write(APIC_LVT0, APIC_LVT_MASKED);
1021 	apic_write(APIC_LVT1, APIC_LVT_MASKED);
1022 	if (maxlvt >= 3)
1023 		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
1024 	if (maxlvt >= 4)
1025 		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
1026 
1027 	/* Integrated APIC (!82489DX) ? */
1028 	if (lapic_is_integrated()) {
1029 		if (maxlvt > 3)
1030 			/* Clear ESR due to Pentium errata 3AP and 11AP */
1031 			apic_write(APIC_ESR, 0);
1032 		apic_read(APIC_ESR);
1033 	}
1034 }
1035 
1036 /**
1037  * disable_local_APIC - clear and disable the local APIC
1038  */
1039 void disable_local_APIC(void)
1040 {
1041 	unsigned int value;
1042 
1043 	/* APIC hasn't been mapped yet */
1044 	if (!x2apic_mode && !apic_phys)
1045 		return;
1046 
1047 	clear_local_APIC();
1048 
1049 	/*
1050 	 * Disable APIC (implies clearing of registers
1051 	 * for 82489DX!).
1052 	 */
1053 	value = apic_read(APIC_SPIV);
1054 	value &= ~APIC_SPIV_APIC_ENABLED;
1055 	apic_write(APIC_SPIV, value);
1056 
1057 #ifdef CONFIG_X86_32
1058 	/*
1059 	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
1060 	 * restore the disabled state.
1061 	 */
1062 	if (enabled_via_apicbase) {
1063 		unsigned int l, h;
1064 
1065 		rdmsr(MSR_IA32_APICBASE, l, h);
1066 		l &= ~MSR_IA32_APICBASE_ENABLE;
1067 		wrmsr(MSR_IA32_APICBASE, l, h);
1068 	}
1069 #endif
1070 }
1071 
1072 /*
1073  * If Linux enabled the LAPIC against the BIOS default disable it down before
1074  * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
1075  * not power-off.  Additionally clear all LVT entries before disable_local_APIC
1076  * for the case where Linux didn't enable the LAPIC.
1077  */
1078 void lapic_shutdown(void)
1079 {
1080 	unsigned long flags;
1081 
1082 	if (!cpu_has_apic && !apic_from_smp_config())
1083 		return;
1084 
1085 	local_irq_save(flags);
1086 
1087 #ifdef CONFIG_X86_32
1088 	if (!enabled_via_apicbase)
1089 		clear_local_APIC();
1090 	else
1091 #endif
1092 		disable_local_APIC();
1093 
1094 
1095 	local_irq_restore(flags);
1096 }
1097 
1098 /**
1099  * sync_Arb_IDs - synchronize APIC bus arbitration IDs
1100  */
1101 void __init sync_Arb_IDs(void)
1102 {
1103 	/*
1104 	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
1105 	 * needed on AMD.
1106 	 */
1107 	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
1108 		return;
1109 
1110 	/*
1111 	 * Wait for idle.
1112 	 */
1113 	apic_wait_icr_idle();
1114 
1115 	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
1116 	apic_write(APIC_ICR, APIC_DEST_ALLINC |
1117 			APIC_INT_LEVELTRIG | APIC_DM_INIT);
1118 }
1119 
1120 /*
1121  * An initial setup of the virtual wire mode.
1122  */
1123 void __init init_bsp_APIC(void)
1124 {
1125 	unsigned int value;
1126 
1127 	/*
1128 	 * Don't do the setup now if we have a SMP BIOS as the
1129 	 * through-I/O-APIC virtual wire mode might be active.
1130 	 */
1131 	if (smp_found_config || !cpu_has_apic)
1132 		return;
1133 
1134 	/*
1135 	 * Do not trust the local APIC being empty at bootup.
1136 	 */
1137 	clear_local_APIC();
1138 
1139 	/*
1140 	 * Enable APIC.
1141 	 */
1142 	value = apic_read(APIC_SPIV);
1143 	value &= ~APIC_VECTOR_MASK;
1144 	value |= APIC_SPIV_APIC_ENABLED;
1145 
1146 #ifdef CONFIG_X86_32
1147 	/* This bit is reserved on P4/Xeon and should be cleared */
1148 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
1149 	    (boot_cpu_data.x86 == 15))
1150 		value &= ~APIC_SPIV_FOCUS_DISABLED;
1151 	else
1152 #endif
1153 		value |= APIC_SPIV_FOCUS_DISABLED;
1154 	value |= SPURIOUS_APIC_VECTOR;
1155 	apic_write(APIC_SPIV, value);
1156 
1157 	/*
1158 	 * Set up the virtual wire mode.
1159 	 */
1160 	apic_write(APIC_LVT0, APIC_DM_EXTINT);
1161 	value = APIC_DM_NMI;
1162 	if (!lapic_is_integrated())		/* 82489DX */
1163 		value |= APIC_LVT_LEVEL_TRIGGER;
1164 	apic_write(APIC_LVT1, value);
1165 }
1166 
1167 static void lapic_setup_esr(void)
1168 {
1169 	unsigned int oldvalue, value, maxlvt;
1170 
1171 	if (!lapic_is_integrated()) {
1172 		pr_info("No ESR for 82489DX.\n");
1173 		return;
1174 	}
1175 
1176 	if (apic->disable_esr) {
1177 		/*
1178 		 * Something untraceable is creating bad interrupts on
1179 		 * secondary quads ... for the moment, just leave the
1180 		 * ESR disabled - we can't do anything useful with the
1181 		 * errors anyway - mbligh
1182 		 */
1183 		pr_info("Leaving ESR disabled.\n");
1184 		return;
1185 	}
1186 
1187 	maxlvt = lapic_get_maxlvt();
1188 	if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
1189 		apic_write(APIC_ESR, 0);
1190 	oldvalue = apic_read(APIC_ESR);
1191 
1192 	/* enables sending errors */
1193 	value = ERROR_APIC_VECTOR;
1194 	apic_write(APIC_LVTERR, value);
1195 
1196 	/*
1197 	 * spec says clear errors after enabling vector.
1198 	 */
1199 	if (maxlvt > 3)
1200 		apic_write(APIC_ESR, 0);
1201 	value = apic_read(APIC_ESR);
1202 	if (value != oldvalue)
1203 		apic_printk(APIC_VERBOSE, "ESR value before enabling "
1204 			"vector: 0x%08x  after: 0x%08x\n",
1205 			oldvalue, value);
1206 }
1207 
1208 /**
1209  * setup_local_APIC - setup the local APIC
1210  *
1211  * Used to setup local APIC while initializing BSP or bringin up APs.
1212  * Always called with preemption disabled.
1213  */
1214 void setup_local_APIC(void)
1215 {
1216 	int cpu = smp_processor_id();
1217 	unsigned int value, queued;
1218 	int i, j, acked = 0;
1219 	unsigned long long tsc = 0, ntsc;
1220 	long long max_loops = cpu_khz ? cpu_khz : 1000000;
1221 
1222 	if (cpu_has_tsc)
1223 		tsc = rdtsc();
1224 
1225 	if (disable_apic) {
1226 		disable_ioapic_support();
1227 		return;
1228 	}
1229 
1230 #ifdef CONFIG_X86_32
1231 	/* Pound the ESR really hard over the head with a big hammer - mbligh */
1232 	if (lapic_is_integrated() && apic->disable_esr) {
1233 		apic_write(APIC_ESR, 0);
1234 		apic_write(APIC_ESR, 0);
1235 		apic_write(APIC_ESR, 0);
1236 		apic_write(APIC_ESR, 0);
1237 	}
1238 #endif
1239 	perf_events_lapic_init();
1240 
1241 	/*
1242 	 * Double-check whether this APIC is really registered.
1243 	 * This is meaningless in clustered apic mode, so we skip it.
1244 	 */
1245 	BUG_ON(!apic->apic_id_registered());
1246 
1247 	/*
1248 	 * Intel recommends to set DFR, LDR and TPR before enabling
1249 	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
1250 	 * document number 292116).  So here it goes...
1251 	 */
1252 	apic->init_apic_ldr();
1253 
1254 #ifdef CONFIG_X86_32
1255 	/*
1256 	 * APIC LDR is initialized.  If logical_apicid mapping was
1257 	 * initialized during get_smp_config(), make sure it matches the
1258 	 * actual value.
1259 	 */
1260 	i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
1261 	WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
1262 	/* always use the value from LDR */
1263 	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
1264 		logical_smp_processor_id();
1265 #endif
1266 
1267 	/*
1268 	 * Set Task Priority to 'accept all'. We never change this
1269 	 * later on.
1270 	 */
1271 	value = apic_read(APIC_TASKPRI);
1272 	value &= ~APIC_TPRI_MASK;
1273 	apic_write(APIC_TASKPRI, value);
1274 
1275 	/*
1276 	 * After a crash, we no longer service the interrupts and a pending
1277 	 * interrupt from previous kernel might still have ISR bit set.
1278 	 *
1279 	 * Most probably by now CPU has serviced that pending interrupt and
1280 	 * it might not have done the ack_APIC_irq() because it thought,
1281 	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
1282 	 * does not clear the ISR bit and cpu thinks it has already serivced
1283 	 * the interrupt. Hence a vector might get locked. It was noticed
1284 	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
1285 	 */
1286 	do {
1287 		queued = 0;
1288 		for (i = APIC_ISR_NR - 1; i >= 0; i--)
1289 			queued |= apic_read(APIC_IRR + i*0x10);
1290 
1291 		for (i = APIC_ISR_NR - 1; i >= 0; i--) {
1292 			value = apic_read(APIC_ISR + i*0x10);
1293 			for (j = 31; j >= 0; j--) {
1294 				if (value & (1<<j)) {
1295 					ack_APIC_irq();
1296 					acked++;
1297 				}
1298 			}
1299 		}
1300 		if (acked > 256) {
1301 			printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n",
1302 			       acked);
1303 			break;
1304 		}
1305 		if (queued) {
1306 			if (cpu_has_tsc && cpu_khz) {
1307 				ntsc = rdtsc();
1308 				max_loops = (cpu_khz << 10) - (ntsc - tsc);
1309 			} else
1310 				max_loops--;
1311 		}
1312 	} while (queued && max_loops > 0);
1313 	WARN_ON(max_loops <= 0);
1314 
1315 	/*
1316 	 * Now that we are all set up, enable the APIC
1317 	 */
1318 	value = apic_read(APIC_SPIV);
1319 	value &= ~APIC_VECTOR_MASK;
1320 	/*
1321 	 * Enable APIC
1322 	 */
1323 	value |= APIC_SPIV_APIC_ENABLED;
1324 
1325 #ifdef CONFIG_X86_32
1326 	/*
1327 	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1328 	 * certain networking cards. If high frequency interrupts are
1329 	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
1330 	 * entry is masked/unmasked at a high rate as well then sooner or
1331 	 * later IOAPIC line gets 'stuck', no more interrupts are received
1332 	 * from the device. If focus CPU is disabled then the hang goes
1333 	 * away, oh well :-(
1334 	 *
1335 	 * [ This bug can be reproduced easily with a level-triggered
1336 	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
1337 	 *   BX chipset. ]
1338 	 */
1339 	/*
1340 	 * Actually disabling the focus CPU check just makes the hang less
1341 	 * frequent as it makes the interrupt distributon model be more
1342 	 * like LRU than MRU (the short-term load is more even across CPUs).
1343 	 * See also the comment in end_level_ioapic_irq().  --macro
1344 	 */
1345 
1346 	/*
1347 	 * - enable focus processor (bit==0)
1348 	 * - 64bit mode always use processor focus
1349 	 *   so no need to set it
1350 	 */
1351 	value &= ~APIC_SPIV_FOCUS_DISABLED;
1352 #endif
1353 
1354 	/*
1355 	 * Set spurious IRQ vector
1356 	 */
1357 	value |= SPURIOUS_APIC_VECTOR;
1358 	apic_write(APIC_SPIV, value);
1359 
1360 	/*
1361 	 * Set up LVT0, LVT1:
1362 	 *
1363 	 * set up through-local-APIC on the BP's LINT0. This is not
1364 	 * strictly necessary in pure symmetric-IO mode, but sometimes
1365 	 * we delegate interrupts to the 8259A.
1366 	 */
1367 	/*
1368 	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1369 	 */
1370 	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1371 	if (!cpu && (pic_mode || !value)) {
1372 		value = APIC_DM_EXTINT;
1373 		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
1374 	} else {
1375 		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1376 		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
1377 	}
1378 	apic_write(APIC_LVT0, value);
1379 
1380 	/*
1381 	 * only the BP should see the LINT1 NMI signal, obviously.
1382 	 */
1383 	if (!cpu)
1384 		value = APIC_DM_NMI;
1385 	else
1386 		value = APIC_DM_NMI | APIC_LVT_MASKED;
1387 	if (!lapic_is_integrated())		/* 82489DX */
1388 		value |= APIC_LVT_LEVEL_TRIGGER;
1389 	apic_write(APIC_LVT1, value);
1390 
1391 #ifdef CONFIG_X86_MCE_INTEL
1392 	/* Recheck CMCI information after local APIC is up on CPU #0 */
1393 	if (!cpu)
1394 		cmci_recheck();
1395 #endif
1396 }
1397 
1398 static void end_local_APIC_setup(void)
1399 {
1400 	lapic_setup_esr();
1401 
1402 #ifdef CONFIG_X86_32
1403 	{
1404 		unsigned int value;
1405 		/* Disable the local apic timer */
1406 		value = apic_read(APIC_LVTT);
1407 		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1408 		apic_write(APIC_LVTT, value);
1409 	}
1410 #endif
1411 
1412 	apic_pm_activate();
1413 }
1414 
1415 /*
1416  * APIC setup function for application processors. Called from smpboot.c
1417  */
1418 void apic_ap_setup(void)
1419 {
1420 	setup_local_APIC();
1421 	end_local_APIC_setup();
1422 }
1423 
1424 #ifdef CONFIG_X86_X2APIC
1425 int x2apic_mode;
1426 
1427 enum {
1428 	X2APIC_OFF,
1429 	X2APIC_ON,
1430 	X2APIC_DISABLED,
1431 };
1432 static int x2apic_state;
1433 
1434 static inline void __x2apic_disable(void)
1435 {
1436 	u64 msr;
1437 
1438 	if (!cpu_has_apic)
1439 		return;
1440 
1441 	rdmsrl(MSR_IA32_APICBASE, msr);
1442 	if (!(msr & X2APIC_ENABLE))
1443 		return;
1444 	/* Disable xapic and x2apic first and then reenable xapic mode */
1445 	wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1446 	wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1447 	printk_once(KERN_INFO "x2apic disabled\n");
1448 }
1449 
1450 static inline void __x2apic_enable(void)
1451 {
1452 	u64 msr;
1453 
1454 	rdmsrl(MSR_IA32_APICBASE, msr);
1455 	if (msr & X2APIC_ENABLE)
1456 		return;
1457 	wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1458 	printk_once(KERN_INFO "x2apic enabled\n");
1459 }
1460 
1461 static int __init setup_nox2apic(char *str)
1462 {
1463 	if (x2apic_enabled()) {
1464 		int apicid = native_apic_msr_read(APIC_ID);
1465 
1466 		if (apicid >= 255) {
1467 			pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
1468 				   apicid);
1469 			return 0;
1470 		}
1471 		pr_warning("x2apic already enabled.\n");
1472 		__x2apic_disable();
1473 	}
1474 	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1475 	x2apic_state = X2APIC_DISABLED;
1476 	x2apic_mode = 0;
1477 	return 0;
1478 }
1479 early_param("nox2apic", setup_nox2apic);
1480 
1481 /* Called from cpu_init() to enable x2apic on (secondary) cpus */
1482 void x2apic_setup(void)
1483 {
1484 	/*
1485 	 * If x2apic is not in ON state, disable it if already enabled
1486 	 * from BIOS.
1487 	 */
1488 	if (x2apic_state != X2APIC_ON) {
1489 		__x2apic_disable();
1490 		return;
1491 	}
1492 	__x2apic_enable();
1493 }
1494 
1495 static __init void x2apic_disable(void)
1496 {
1497 	u32 x2apic_id, state = x2apic_state;
1498 
1499 	x2apic_mode = 0;
1500 	x2apic_state = X2APIC_DISABLED;
1501 
1502 	if (state != X2APIC_ON)
1503 		return;
1504 
1505 	x2apic_id = read_apic_id();
1506 	if (x2apic_id >= 255)
1507 		panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1508 
1509 	__x2apic_disable();
1510 	register_lapic_address(mp_lapic_addr);
1511 }
1512 
1513 static __init void x2apic_enable(void)
1514 {
1515 	if (x2apic_state != X2APIC_OFF)
1516 		return;
1517 
1518 	x2apic_mode = 1;
1519 	x2apic_state = X2APIC_ON;
1520 	__x2apic_enable();
1521 }
1522 
1523 static __init void try_to_enable_x2apic(int remap_mode)
1524 {
1525 	if (x2apic_state == X2APIC_DISABLED)
1526 		return;
1527 
1528 	if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
1529 		/* IR is required if there is APIC ID > 255 even when running
1530 		 * under KVM
1531 		 */
1532 		if (max_physical_apicid > 255 ||
1533 		    !hypervisor_x2apic_available()) {
1534 			pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
1535 			x2apic_disable();
1536 			return;
1537 		}
1538 
1539 		/*
1540 		 * without IR all CPUs can be addressed by IOAPIC/MSI
1541 		 * only in physical mode
1542 		 */
1543 		x2apic_phys = 1;
1544 	}
1545 	x2apic_enable();
1546 }
1547 
1548 void __init check_x2apic(void)
1549 {
1550 	if (x2apic_enabled()) {
1551 		pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
1552 		x2apic_mode = 1;
1553 		x2apic_state = X2APIC_ON;
1554 	} else if (!cpu_has_x2apic) {
1555 		x2apic_state = X2APIC_DISABLED;
1556 	}
1557 }
1558 #else /* CONFIG_X86_X2APIC */
1559 static int __init validate_x2apic(void)
1560 {
1561 	if (!apic_is_x2apic_enabled())
1562 		return 0;
1563 	/*
1564 	 * Checkme: Can we simply turn off x2apic here instead of panic?
1565 	 */
1566 	panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n");
1567 }
1568 early_initcall(validate_x2apic);
1569 
1570 static inline void try_to_enable_x2apic(int remap_mode) { }
1571 static inline void __x2apic_enable(void) { }
1572 #endif /* !CONFIG_X86_X2APIC */
1573 
1574 static int __init try_to_enable_IR(void)
1575 {
1576 #ifdef CONFIG_X86_IO_APIC
1577 	if (!x2apic_enabled() && skip_ioapic_setup) {
1578 		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
1579 		return -1;
1580 	}
1581 #endif
1582 	return irq_remapping_enable();
1583 }
1584 
1585 void __init enable_IR_x2apic(void)
1586 {
1587 	unsigned long flags;
1588 	int ret, ir_stat;
1589 
1590 	ir_stat = irq_remapping_prepare();
1591 	if (ir_stat < 0 && !x2apic_supported())
1592 		return;
1593 
1594 	ret = save_ioapic_entries();
1595 	if (ret) {
1596 		pr_info("Saving IO-APIC state failed: %d\n", ret);
1597 		return;
1598 	}
1599 
1600 	local_irq_save(flags);
1601 	legacy_pic->mask_all();
1602 	mask_ioapic_entries();
1603 
1604 	/* If irq_remapping_prepare() succeded, try to enable it */
1605 	if (ir_stat >= 0)
1606 		ir_stat = try_to_enable_IR();
1607 	/* ir_stat contains the remap mode or an error code */
1608 	try_to_enable_x2apic(ir_stat);
1609 
1610 	if (ir_stat < 0)
1611 		restore_ioapic_entries();
1612 	legacy_pic->restore_mask();
1613 	local_irq_restore(flags);
1614 }
1615 
1616 #ifdef CONFIG_X86_64
1617 /*
1618  * Detect and enable local APICs on non-SMP boards.
1619  * Original code written by Keir Fraser.
1620  * On AMD64 we trust the BIOS - if it says no APIC it is likely
1621  * not correctly set up (usually the APIC timer won't work etc.)
1622  */
1623 static int __init detect_init_APIC(void)
1624 {
1625 	if (!cpu_has_apic) {
1626 		pr_info("No local APIC present\n");
1627 		return -1;
1628 	}
1629 
1630 	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1631 	return 0;
1632 }
1633 #else
1634 
1635 static int __init apic_verify(void)
1636 {
1637 	u32 features, h, l;
1638 
1639 	/*
1640 	 * The APIC feature bit should now be enabled
1641 	 * in `cpuid'
1642 	 */
1643 	features = cpuid_edx(1);
1644 	if (!(features & (1 << X86_FEATURE_APIC))) {
1645 		pr_warning("Could not enable APIC!\n");
1646 		return -1;
1647 	}
1648 	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1649 	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1650 
1651 	/* The BIOS may have set up the APIC at some other address */
1652 	if (boot_cpu_data.x86 >= 6) {
1653 		rdmsr(MSR_IA32_APICBASE, l, h);
1654 		if (l & MSR_IA32_APICBASE_ENABLE)
1655 			mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1656 	}
1657 
1658 	pr_info("Found and enabled local APIC!\n");
1659 	return 0;
1660 }
1661 
1662 int __init apic_force_enable(unsigned long addr)
1663 {
1664 	u32 h, l;
1665 
1666 	if (disable_apic)
1667 		return -1;
1668 
1669 	/*
1670 	 * Some BIOSes disable the local APIC in the APIC_BASE
1671 	 * MSR. This can only be done in software for Intel P6 or later
1672 	 * and AMD K7 (Model > 1) or later.
1673 	 */
1674 	if (boot_cpu_data.x86 >= 6) {
1675 		rdmsr(MSR_IA32_APICBASE, l, h);
1676 		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1677 			pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1678 			l &= ~MSR_IA32_APICBASE_BASE;
1679 			l |= MSR_IA32_APICBASE_ENABLE | addr;
1680 			wrmsr(MSR_IA32_APICBASE, l, h);
1681 			enabled_via_apicbase = 1;
1682 		}
1683 	}
1684 	return apic_verify();
1685 }
1686 
1687 /*
1688  * Detect and initialize APIC
1689  */
1690 static int __init detect_init_APIC(void)
1691 {
1692 	/* Disabled by kernel option? */
1693 	if (disable_apic)
1694 		return -1;
1695 
1696 	switch (boot_cpu_data.x86_vendor) {
1697 	case X86_VENDOR_AMD:
1698 		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
1699 		    (boot_cpu_data.x86 >= 15))
1700 			break;
1701 		goto no_apic;
1702 	case X86_VENDOR_INTEL:
1703 		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
1704 		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
1705 			break;
1706 		goto no_apic;
1707 	default:
1708 		goto no_apic;
1709 	}
1710 
1711 	if (!cpu_has_apic) {
1712 		/*
1713 		 * Over-ride BIOS and try to enable the local APIC only if
1714 		 * "lapic" specified.
1715 		 */
1716 		if (!force_enable_local_apic) {
1717 			pr_info("Local APIC disabled by BIOS -- "
1718 				"you can enable it with \"lapic\"\n");
1719 			return -1;
1720 		}
1721 		if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
1722 			return -1;
1723 	} else {
1724 		if (apic_verify())
1725 			return -1;
1726 	}
1727 
1728 	apic_pm_activate();
1729 
1730 	return 0;
1731 
1732 no_apic:
1733 	pr_info("No local APIC present or hardware disabled\n");
1734 	return -1;
1735 }
1736 #endif
1737 
1738 /**
1739  * init_apic_mappings - initialize APIC mappings
1740  */
1741 void __init init_apic_mappings(void)
1742 {
1743 	unsigned int new_apicid;
1744 
1745 	if (x2apic_mode) {
1746 		boot_cpu_physical_apicid = read_apic_id();
1747 		return;
1748 	}
1749 
1750 	/* If no local APIC can be found return early */
1751 	if (!smp_found_config && detect_init_APIC()) {
1752 		/* lets NOP'ify apic operations */
1753 		pr_info("APIC: disable apic facility\n");
1754 		apic_disable();
1755 	} else {
1756 		apic_phys = mp_lapic_addr;
1757 
1758 		/*
1759 		 * acpi lapic path already maps that address in
1760 		 * acpi_register_lapic_address()
1761 		 */
1762 		if (!acpi_lapic && !smp_found_config)
1763 			register_lapic_address(apic_phys);
1764 	}
1765 
1766 	/*
1767 	 * Fetch the APIC ID of the BSP in case we have a
1768 	 * default configuration (or the MP table is broken).
1769 	 */
1770 	new_apicid = read_apic_id();
1771 	if (boot_cpu_physical_apicid != new_apicid) {
1772 		boot_cpu_physical_apicid = new_apicid;
1773 		/*
1774 		 * yeah -- we lie about apic_version
1775 		 * in case if apic was disabled via boot option
1776 		 * but it's not a problem for SMP compiled kernel
1777 		 * since smp_sanity_check is prepared for such a case
1778 		 * and disable smp mode
1779 		 */
1780 		apic_version[new_apicid] =
1781 			 GET_APIC_VERSION(apic_read(APIC_LVR));
1782 	}
1783 }
1784 
1785 void __init register_lapic_address(unsigned long address)
1786 {
1787 	mp_lapic_addr = address;
1788 
1789 	if (!x2apic_mode) {
1790 		set_fixmap_nocache(FIX_APIC_BASE, address);
1791 		apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1792 			    APIC_BASE, mp_lapic_addr);
1793 	}
1794 	if (boot_cpu_physical_apicid == -1U) {
1795 		boot_cpu_physical_apicid  = read_apic_id();
1796 		apic_version[boot_cpu_physical_apicid] =
1797 			 GET_APIC_VERSION(apic_read(APIC_LVR));
1798 	}
1799 }
1800 
1801 int apic_version[MAX_LOCAL_APIC];
1802 
1803 /*
1804  * Local APIC interrupts
1805  */
1806 
1807 /*
1808  * This interrupt should _never_ happen with our APIC/SMP architecture
1809  */
1810 static inline void __smp_spurious_interrupt(u8 vector)
1811 {
1812 	u32 v;
1813 
1814 	/*
1815 	 * Check if this really is a spurious interrupt and ACK it
1816 	 * if it is a vectored one.  Just in case...
1817 	 * Spurious interrupts should not be ACKed.
1818 	 */
1819 	v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
1820 	if (v & (1 << (vector & 0x1f)))
1821 		ack_APIC_irq();
1822 
1823 	inc_irq_stat(irq_spurious_count);
1824 
1825 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
1826 	pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
1827 		"should never happen.\n", vector, smp_processor_id());
1828 }
1829 
1830 __visible void smp_spurious_interrupt(struct pt_regs *regs)
1831 {
1832 	entering_irq();
1833 	__smp_spurious_interrupt(~regs->orig_ax);
1834 	exiting_irq();
1835 }
1836 
1837 __visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
1838 {
1839 	u8 vector = ~regs->orig_ax;
1840 
1841 	entering_irq();
1842 	trace_spurious_apic_entry(vector);
1843 	__smp_spurious_interrupt(vector);
1844 	trace_spurious_apic_exit(vector);
1845 	exiting_irq();
1846 }
1847 
1848 /*
1849  * This interrupt should never happen with our APIC/SMP architecture
1850  */
1851 static inline void __smp_error_interrupt(struct pt_regs *regs)
1852 {
1853 	u32 v;
1854 	u32 i = 0;
1855 	static const char * const error_interrupt_reason[] = {
1856 		"Send CS error",		/* APIC Error Bit 0 */
1857 		"Receive CS error",		/* APIC Error Bit 1 */
1858 		"Send accept error",		/* APIC Error Bit 2 */
1859 		"Receive accept error",		/* APIC Error Bit 3 */
1860 		"Redirectable IPI",		/* APIC Error Bit 4 */
1861 		"Send illegal vector",		/* APIC Error Bit 5 */
1862 		"Received illegal vector",	/* APIC Error Bit 6 */
1863 		"Illegal register address",	/* APIC Error Bit 7 */
1864 	};
1865 
1866 	/* First tickle the hardware, only then report what went on. -- REW */
1867 	if (lapic_get_maxlvt() > 3)	/* Due to the Pentium erratum 3AP. */
1868 		apic_write(APIC_ESR, 0);
1869 	v = apic_read(APIC_ESR);
1870 	ack_APIC_irq();
1871 	atomic_inc(&irq_err_count);
1872 
1873 	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
1874 		    smp_processor_id(), v);
1875 
1876 	v &= 0xff;
1877 	while (v) {
1878 		if (v & 0x1)
1879 			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
1880 		i++;
1881 		v >>= 1;
1882 	}
1883 
1884 	apic_printk(APIC_DEBUG, KERN_CONT "\n");
1885 
1886 }
1887 
1888 __visible void smp_error_interrupt(struct pt_regs *regs)
1889 {
1890 	entering_irq();
1891 	__smp_error_interrupt(regs);
1892 	exiting_irq();
1893 }
1894 
1895 __visible void smp_trace_error_interrupt(struct pt_regs *regs)
1896 {
1897 	entering_irq();
1898 	trace_error_apic_entry(ERROR_APIC_VECTOR);
1899 	__smp_error_interrupt(regs);
1900 	trace_error_apic_exit(ERROR_APIC_VECTOR);
1901 	exiting_irq();
1902 }
1903 
1904 /**
1905  * connect_bsp_APIC - attach the APIC to the interrupt system
1906  */
1907 static void __init connect_bsp_APIC(void)
1908 {
1909 #ifdef CONFIG_X86_32
1910 	if (pic_mode) {
1911 		/*
1912 		 * Do not trust the local APIC being empty at bootup.
1913 		 */
1914 		clear_local_APIC();
1915 		/*
1916 		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
1917 		 * local APIC to INT and NMI lines.
1918 		 */
1919 		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1920 				"enabling APIC mode.\n");
1921 		imcr_pic_to_apic();
1922 	}
1923 #endif
1924 }
1925 
1926 /**
1927  * disconnect_bsp_APIC - detach the APIC from the interrupt system
1928  * @virt_wire_setup:	indicates, whether virtual wire mode is selected
1929  *
1930  * Virtual wire mode is necessary to deliver legacy interrupts even when the
1931  * APIC is disabled.
1932  */
1933 void disconnect_bsp_APIC(int virt_wire_setup)
1934 {
1935 	unsigned int value;
1936 
1937 #ifdef CONFIG_X86_32
1938 	if (pic_mode) {
1939 		/*
1940 		 * Put the board back into PIC mode (has an effect only on
1941 		 * certain older boards).  Note that APIC interrupts, including
1942 		 * IPIs, won't work beyond this point!  The only exception are
1943 		 * INIT IPIs.
1944 		 */
1945 		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
1946 				"entering PIC mode.\n");
1947 		imcr_apic_to_pic();
1948 		return;
1949 	}
1950 #endif
1951 
1952 	/* Go back to Virtual Wire compatibility mode */
1953 
1954 	/* For the spurious interrupt use vector F, and enable it */
1955 	value = apic_read(APIC_SPIV);
1956 	value &= ~APIC_VECTOR_MASK;
1957 	value |= APIC_SPIV_APIC_ENABLED;
1958 	value |= 0xf;
1959 	apic_write(APIC_SPIV, value);
1960 
1961 	if (!virt_wire_setup) {
1962 		/*
1963 		 * For LVT0 make it edge triggered, active high,
1964 		 * external and enabled
1965 		 */
1966 		value = apic_read(APIC_LVT0);
1967 		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1968 			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1969 			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1970 		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1971 		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1972 		apic_write(APIC_LVT0, value);
1973 	} else {
1974 		/* Disable LVT0 */
1975 		apic_write(APIC_LVT0, APIC_LVT_MASKED);
1976 	}
1977 
1978 	/*
1979 	 * For LVT1 make it edge triggered, active high,
1980 	 * nmi and enabled
1981 	 */
1982 	value = apic_read(APIC_LVT1);
1983 	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1984 			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1985 			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1986 	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1987 	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
1988 	apic_write(APIC_LVT1, value);
1989 }
1990 
1991 int generic_processor_info(int apicid, int version)
1992 {
1993 	int cpu, max = nr_cpu_ids;
1994 	bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
1995 				phys_cpu_present_map);
1996 
1997 	/*
1998 	 * boot_cpu_physical_apicid is designed to have the apicid
1999 	 * returned by read_apic_id(), i.e, the apicid of the
2000 	 * currently booting-up processor. However, on some platforms,
2001 	 * it is temporarily modified by the apicid reported as BSP
2002 	 * through MP table. Concretely:
2003 	 *
2004 	 * - arch/x86/kernel/mpparse.c: MP_processor_info()
2005 	 * - arch/x86/mm/amdtopology.c: amd_numa_init()
2006 	 *
2007 	 * This function is executed with the modified
2008 	 * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel
2009 	 * parameter doesn't work to disable APs on kdump 2nd kernel.
2010 	 *
2011 	 * Since fixing handling of boot_cpu_physical_apicid requires
2012 	 * another discussion and tests on each platform, we leave it
2013 	 * for now and here we use read_apic_id() directly in this
2014 	 * function, generic_processor_info().
2015 	 */
2016 	if (disabled_cpu_apicid != BAD_APICID &&
2017 	    disabled_cpu_apicid != read_apic_id() &&
2018 	    disabled_cpu_apicid == apicid) {
2019 		int thiscpu = num_processors + disabled_cpus;
2020 
2021 		pr_warning("APIC: Disabling requested cpu."
2022 			   " Processor %d/0x%x ignored.\n",
2023 			   thiscpu, apicid);
2024 
2025 		disabled_cpus++;
2026 		return -ENODEV;
2027 	}
2028 
2029 	/*
2030 	 * If boot cpu has not been detected yet, then only allow upto
2031 	 * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
2032 	 */
2033 	if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
2034 	    apicid != boot_cpu_physical_apicid) {
2035 		int thiscpu = max + disabled_cpus - 1;
2036 
2037 		pr_warning(
2038 			"ACPI: NR_CPUS/possible_cpus limit of %i almost"
2039 			" reached. Keeping one slot for boot cpu."
2040 			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2041 
2042 		disabled_cpus++;
2043 		return -ENODEV;
2044 	}
2045 
2046 	if (num_processors >= nr_cpu_ids) {
2047 		int thiscpu = max + disabled_cpus;
2048 
2049 		pr_warning(
2050 			"ACPI: NR_CPUS/possible_cpus limit of %i reached."
2051 			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2052 
2053 		disabled_cpus++;
2054 		return -EINVAL;
2055 	}
2056 
2057 	num_processors++;
2058 	if (apicid == boot_cpu_physical_apicid) {
2059 		/*
2060 		 * x86_bios_cpu_apicid is required to have processors listed
2061 		 * in same order as logical cpu numbers. Hence the first
2062 		 * entry is BSP, and so on.
2063 		 * boot_cpu_init() already hold bit 0 in cpu_present_mask
2064 		 * for BSP.
2065 		 */
2066 		cpu = 0;
2067 	} else
2068 		cpu = cpumask_next_zero(-1, cpu_present_mask);
2069 
2070 	/*
2071 	 * Validate version
2072 	 */
2073 	if (version == 0x0) {
2074 		pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
2075 			   cpu, apicid);
2076 		version = 0x10;
2077 	}
2078 	apic_version[apicid] = version;
2079 
2080 	if (version != apic_version[boot_cpu_physical_apicid]) {
2081 		pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
2082 			apic_version[boot_cpu_physical_apicid], cpu, version);
2083 	}
2084 
2085 	physid_set(apicid, phys_cpu_present_map);
2086 	if (apicid > max_physical_apicid)
2087 		max_physical_apicid = apicid;
2088 
2089 #if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
2090 	early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
2091 	early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
2092 #endif
2093 #ifdef CONFIG_X86_32
2094 	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
2095 		apic->x86_32_early_logical_apicid(cpu);
2096 #endif
2097 	set_cpu_possible(cpu, true);
2098 	set_cpu_present(cpu, true);
2099 
2100 	return cpu;
2101 }
2102 
2103 int hard_smp_processor_id(void)
2104 {
2105 	return read_apic_id();
2106 }
2107 
2108 void default_init_apic_ldr(void)
2109 {
2110 	unsigned long val;
2111 
2112 	apic_write(APIC_DFR, APIC_DFR_VALUE);
2113 	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
2114 	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
2115 	apic_write(APIC_LDR, val);
2116 }
2117 
2118 int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
2119 				   const struct cpumask *andmask,
2120 				   unsigned int *apicid)
2121 {
2122 	unsigned int cpu;
2123 
2124 	for_each_cpu_and(cpu, cpumask, andmask) {
2125 		if (cpumask_test_cpu(cpu, cpu_online_mask))
2126 			break;
2127 	}
2128 
2129 	if (likely(cpu < nr_cpu_ids)) {
2130 		*apicid = per_cpu(x86_cpu_to_apicid, cpu);
2131 		return 0;
2132 	}
2133 
2134 	return -EINVAL;
2135 }
2136 
2137 /*
2138  * Override the generic EOI implementation with an optimized version.
2139  * Only called during early boot when only one CPU is active and with
2140  * interrupts disabled, so we know this does not race with actual APIC driver
2141  * use.
2142  */
2143 void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
2144 {
2145 	struct apic **drv;
2146 
2147 	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
2148 		/* Should happen once for each apic */
2149 		WARN_ON((*drv)->eoi_write == eoi_write);
2150 		(*drv)->eoi_write = eoi_write;
2151 	}
2152 }
2153 
2154 static void __init apic_bsp_up_setup(void)
2155 {
2156 #ifdef CONFIG_X86_64
2157 	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
2158 #else
2159 	/*
2160 	 * Hack: In case of kdump, after a crash, kernel might be booting
2161 	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
2162 	 * might be zero if read from MP tables. Get it from LAPIC.
2163 	 */
2164 # ifdef CONFIG_CRASH_DUMP
2165 	boot_cpu_physical_apicid = read_apic_id();
2166 # endif
2167 #endif
2168 	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
2169 }
2170 
2171 /**
2172  * apic_bsp_setup - Setup function for local apic and io-apic
2173  * @upmode:		Force UP mode (for APIC_init_uniprocessor)
2174  *
2175  * Returns:
2176  * apic_id of BSP APIC
2177  */
2178 int __init apic_bsp_setup(bool upmode)
2179 {
2180 	int id;
2181 
2182 	connect_bsp_APIC();
2183 	if (upmode)
2184 		apic_bsp_up_setup();
2185 	setup_local_APIC();
2186 
2187 	if (x2apic_mode)
2188 		id = apic_read(APIC_LDR);
2189 	else
2190 		id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
2191 
2192 	enable_IO_APIC();
2193 	end_local_APIC_setup();
2194 	irq_remap_enable_fault_handling();
2195 	setup_IO_APIC();
2196 	/* Setup local timer */
2197 	x86_init.timers.setup_percpu_clockev();
2198 	return id;
2199 }
2200 
2201 /*
2202  * This initializes the IO-APIC and APIC hardware if this is
2203  * a UP kernel.
2204  */
2205 int __init APIC_init_uniprocessor(void)
2206 {
2207 	if (disable_apic) {
2208 		pr_info("Apic disabled\n");
2209 		return -1;
2210 	}
2211 #ifdef CONFIG_X86_64
2212 	if (!cpu_has_apic) {
2213 		disable_apic = 1;
2214 		pr_info("Apic disabled by BIOS\n");
2215 		return -1;
2216 	}
2217 #else
2218 	if (!smp_found_config && !cpu_has_apic)
2219 		return -1;
2220 
2221 	/*
2222 	 * Complain if the BIOS pretends there is one.
2223 	 */
2224 	if (!cpu_has_apic &&
2225 	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
2226 		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
2227 			boot_cpu_physical_apicid);
2228 		return -1;
2229 	}
2230 #endif
2231 
2232 	if (!smp_found_config)
2233 		disable_ioapic_support();
2234 
2235 	default_setup_apic_routing();
2236 	apic_bsp_setup(true);
2237 	return 0;
2238 }
2239 
2240 #ifdef CONFIG_UP_LATE_INIT
2241 void __init up_late_init(void)
2242 {
2243 	APIC_init_uniprocessor();
2244 }
2245 #endif
2246 
2247 /*
2248  * Power management
2249  */
2250 #ifdef CONFIG_PM
2251 
2252 static struct {
2253 	/*
2254 	 * 'active' is true if the local APIC was enabled by us and
2255 	 * not the BIOS; this signifies that we are also responsible
2256 	 * for disabling it before entering apm/acpi suspend
2257 	 */
2258 	int active;
2259 	/* r/w apic fields */
2260 	unsigned int apic_id;
2261 	unsigned int apic_taskpri;
2262 	unsigned int apic_ldr;
2263 	unsigned int apic_dfr;
2264 	unsigned int apic_spiv;
2265 	unsigned int apic_lvtt;
2266 	unsigned int apic_lvtpc;
2267 	unsigned int apic_lvt0;
2268 	unsigned int apic_lvt1;
2269 	unsigned int apic_lvterr;
2270 	unsigned int apic_tmict;
2271 	unsigned int apic_tdcr;
2272 	unsigned int apic_thmr;
2273 } apic_pm_state;
2274 
2275 static int lapic_suspend(void)
2276 {
2277 	unsigned long flags;
2278 	int maxlvt;
2279 
2280 	if (!apic_pm_state.active)
2281 		return 0;
2282 
2283 	maxlvt = lapic_get_maxlvt();
2284 
2285 	apic_pm_state.apic_id = apic_read(APIC_ID);
2286 	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
2287 	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
2288 	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
2289 	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
2290 	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
2291 	if (maxlvt >= 4)
2292 		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
2293 	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
2294 	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
2295 	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
2296 	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
2297 	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
2298 #ifdef CONFIG_X86_THERMAL_VECTOR
2299 	if (maxlvt >= 5)
2300 		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
2301 #endif
2302 
2303 	local_irq_save(flags);
2304 	disable_local_APIC();
2305 
2306 	irq_remapping_disable();
2307 
2308 	local_irq_restore(flags);
2309 	return 0;
2310 }
2311 
2312 static void lapic_resume(void)
2313 {
2314 	unsigned int l, h;
2315 	unsigned long flags;
2316 	int maxlvt;
2317 
2318 	if (!apic_pm_state.active)
2319 		return;
2320 
2321 	local_irq_save(flags);
2322 
2323 	/*
2324 	 * IO-APIC and PIC have their own resume routines.
2325 	 * We just mask them here to make sure the interrupt
2326 	 * subsystem is completely quiet while we enable x2apic
2327 	 * and interrupt-remapping.
2328 	 */
2329 	mask_ioapic_entries();
2330 	legacy_pic->mask_all();
2331 
2332 	if (x2apic_mode) {
2333 		__x2apic_enable();
2334 	} else {
2335 		/*
2336 		 * Make sure the APICBASE points to the right address
2337 		 *
2338 		 * FIXME! This will be wrong if we ever support suspend on
2339 		 * SMP! We'll need to do this as part of the CPU restore!
2340 		 */
2341 		if (boot_cpu_data.x86 >= 6) {
2342 			rdmsr(MSR_IA32_APICBASE, l, h);
2343 			l &= ~MSR_IA32_APICBASE_BASE;
2344 			l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
2345 			wrmsr(MSR_IA32_APICBASE, l, h);
2346 		}
2347 	}
2348 
2349 	maxlvt = lapic_get_maxlvt();
2350 	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
2351 	apic_write(APIC_ID, apic_pm_state.apic_id);
2352 	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
2353 	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
2354 	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
2355 	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
2356 	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
2357 	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
2358 #if defined(CONFIG_X86_MCE_INTEL)
2359 	if (maxlvt >= 5)
2360 		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
2361 #endif
2362 	if (maxlvt >= 4)
2363 		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
2364 	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
2365 	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
2366 	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
2367 	apic_write(APIC_ESR, 0);
2368 	apic_read(APIC_ESR);
2369 	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
2370 	apic_write(APIC_ESR, 0);
2371 	apic_read(APIC_ESR);
2372 
2373 	irq_remapping_reenable(x2apic_mode);
2374 
2375 	local_irq_restore(flags);
2376 }
2377 
2378 /*
2379  * This device has no shutdown method - fully functioning local APICs
2380  * are needed on every CPU up until machine_halt/restart/poweroff.
2381  */
2382 
2383 static struct syscore_ops lapic_syscore_ops = {
2384 	.resume		= lapic_resume,
2385 	.suspend	= lapic_suspend,
2386 };
2387 
2388 static void apic_pm_activate(void)
2389 {
2390 	apic_pm_state.active = 1;
2391 }
2392 
2393 static int __init init_lapic_sysfs(void)
2394 {
2395 	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2396 	if (cpu_has_apic)
2397 		register_syscore_ops(&lapic_syscore_ops);
2398 
2399 	return 0;
2400 }
2401 
2402 /* local apic needs to resume before other devices access its registers. */
2403 core_initcall(init_lapic_sysfs);
2404 
2405 #else	/* CONFIG_PM */
2406 
2407 static void apic_pm_activate(void) { }
2408 
2409 #endif	/* CONFIG_PM */
2410 
2411 #ifdef CONFIG_X86_64
2412 
2413 static int multi_checked;
2414 static int multi;
2415 
2416 static int set_multi(const struct dmi_system_id *d)
2417 {
2418 	if (multi)
2419 		return 0;
2420 	pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
2421 	multi = 1;
2422 	return 0;
2423 }
2424 
2425 static const struct dmi_system_id multi_dmi_table[] = {
2426 	{
2427 		.callback = set_multi,
2428 		.ident = "IBM System Summit2",
2429 		.matches = {
2430 			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
2431 			DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
2432 		},
2433 	},
2434 	{}
2435 };
2436 
2437 static void dmi_check_multi(void)
2438 {
2439 	if (multi_checked)
2440 		return;
2441 
2442 	dmi_check_system(multi_dmi_table);
2443 	multi_checked = 1;
2444 }
2445 
2446 /*
2447  * apic_is_clustered_box() -- Check if we can expect good TSC
2448  *
2449  * Thus far, the major user of this is IBM's Summit2 series:
2450  * Clustered boxes may have unsynced TSC problems if they are
2451  * multi-chassis.
2452  * Use DMI to check them
2453  */
2454 int apic_is_clustered_box(void)
2455 {
2456 	dmi_check_multi();
2457 	return multi;
2458 }
2459 #endif
2460 
2461 /*
2462  * APIC command line parameters
2463  */
2464 static int __init setup_disableapic(char *arg)
2465 {
2466 	disable_apic = 1;
2467 	setup_clear_cpu_cap(X86_FEATURE_APIC);
2468 	return 0;
2469 }
2470 early_param("disableapic", setup_disableapic);
2471 
2472 /* same as disableapic, for compatibility */
2473 static int __init setup_nolapic(char *arg)
2474 {
2475 	return setup_disableapic(arg);
2476 }
2477 early_param("nolapic", setup_nolapic);
2478 
2479 static int __init parse_lapic_timer_c2_ok(char *arg)
2480 {
2481 	local_apic_timer_c2_ok = 1;
2482 	return 0;
2483 }
2484 early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
2485 
2486 static int __init parse_disable_apic_timer(char *arg)
2487 {
2488 	disable_apic_timer = 1;
2489 	return 0;
2490 }
2491 early_param("noapictimer", parse_disable_apic_timer);
2492 
2493 static int __init parse_nolapic_timer(char *arg)
2494 {
2495 	disable_apic_timer = 1;
2496 	return 0;
2497 }
2498 early_param("nolapic_timer", parse_nolapic_timer);
2499 
2500 static int __init apic_set_verbosity(char *arg)
2501 {
2502 	if (!arg)  {
2503 #ifdef CONFIG_X86_64
2504 		skip_ioapic_setup = 0;
2505 		return 0;
2506 #endif
2507 		return -EINVAL;
2508 	}
2509 
2510 	if (strcmp("debug", arg) == 0)
2511 		apic_verbosity = APIC_DEBUG;
2512 	else if (strcmp("verbose", arg) == 0)
2513 		apic_verbosity = APIC_VERBOSE;
2514 	else {
2515 		pr_warning("APIC Verbosity level %s not recognised"
2516 			" use apic=verbose or apic=debug\n", arg);
2517 		return -EINVAL;
2518 	}
2519 
2520 	return 0;
2521 }
2522 early_param("apic", apic_set_verbosity);
2523 
2524 static int __init lapic_insert_resource(void)
2525 {
2526 	if (!apic_phys)
2527 		return -1;
2528 
2529 	/* Put local APIC into the resource map. */
2530 	lapic_resource.start = apic_phys;
2531 	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
2532 	insert_resource(&iomem_resource, &lapic_resource);
2533 
2534 	return 0;
2535 }
2536 
2537 /*
2538  * need call insert after e820_reserve_resources()
2539  * that is using request_resource
2540  */
2541 late_initcall(lapic_insert_resource);
2542 
2543 static int __init apic_set_disabled_cpu_apicid(char *arg)
2544 {
2545 	if (!arg || !get_option(&arg, &disabled_cpu_apicid))
2546 		return -EINVAL;
2547 
2548 	return 0;
2549 }
2550 early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
2551