xref: /linux/arch/x86/kernel/apic/io_apic.c (revision b43ab901d671e3e3cad425ea5e9a3c74e266dcdd)
1 /*
2  *	Intel IO-APIC support for multi-Pentium hosts.
3  *
4  *	Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
5  *
6  *	Many thanks to Stig Venaas for trying out countless experimental
7  *	patches and reporting/debugging problems patiently!
8  *
9  *	(c) 1999, Multiple IO-APIC support, developed by
10  *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
11  *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
12  *	further tested and cleaned up by Zach Brown <zab@redhat.com>
13  *	and Ingo Molnar <mingo@redhat.com>
14  *
15  *	Fixes
16  *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
17  *					thanks to Eric Gilmore
18  *					and Rolf G. Tews
19  *					for testing these extensively
20  *	Paul Diefenbaugh	:	Added full ACPI support
21  */
22 
23 #include <linux/mm.h>
24 #include <linux/interrupt.h>
25 #include <linux/init.h>
26 #include <linux/delay.h>
27 #include <linux/sched.h>
28 #include <linux/pci.h>
29 #include <linux/mc146818rtc.h>
30 #include <linux/compiler.h>
31 #include <linux/acpi.h>
32 #include <linux/module.h>
33 #include <linux/syscore_ops.h>
34 #include <linux/msi.h>
35 #include <linux/htirq.h>
36 #include <linux/freezer.h>
37 #include <linux/kthread.h>
38 #include <linux/jiffies.h>	/* time_after() */
39 #include <linux/slab.h>
40 #ifdef CONFIG_ACPI
41 #include <acpi/acpi_bus.h>
42 #endif
43 #include <linux/bootmem.h>
44 #include <linux/dmar.h>
45 #include <linux/hpet.h>
46 
47 #include <asm/idle.h>
48 #include <asm/io.h>
49 #include <asm/smp.h>
50 #include <asm/cpu.h>
51 #include <asm/desc.h>
52 #include <asm/proto.h>
53 #include <asm/acpi.h>
54 #include <asm/dma.h>
55 #include <asm/timer.h>
56 #include <asm/i8259.h>
57 #include <asm/msidef.h>
58 #include <asm/hypertransport.h>
59 #include <asm/setup.h>
60 #include <asm/irq_remapping.h>
61 #include <asm/hpet.h>
62 #include <asm/hw_irq.h>
63 
64 #include <asm/apic.h>
65 
66 #define __apicdebuginit(type) static type __init
67 #define for_each_irq_pin(entry, head) \
68 	for (entry = head; entry; entry = entry->next)
69 
70 /*
71  *      Is the SiS APIC rmw bug present ?
72  *      -1 = don't know, 0 = no, 1 = yes
73  */
74 int sis_apic_bug = -1;
75 
76 static DEFINE_RAW_SPINLOCK(ioapic_lock);
77 static DEFINE_RAW_SPINLOCK(vector_lock);
78 
79 static struct ioapic {
80 	/*
81 	 * # of IRQ routing registers
82 	 */
83 	int nr_registers;
84 	/*
85 	 * Saved state during suspend/resume, or while enabling intr-remap.
86 	 */
87 	struct IO_APIC_route_entry *saved_registers;
88 	/* I/O APIC config */
89 	struct mpc_ioapic mp_config;
90 	/* IO APIC gsi routing info */
91 	struct mp_ioapic_gsi  gsi_config;
92 	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
93 } ioapics[MAX_IO_APICS];
94 
95 #define mpc_ioapic_ver(ioapic_idx)	ioapics[ioapic_idx].mp_config.apicver
96 
97 int mpc_ioapic_id(int ioapic_idx)
98 {
99 	return ioapics[ioapic_idx].mp_config.apicid;
100 }
101 
102 unsigned int mpc_ioapic_addr(int ioapic_idx)
103 {
104 	return ioapics[ioapic_idx].mp_config.apicaddr;
105 }
106 
107 struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
108 {
109 	return &ioapics[ioapic_idx].gsi_config;
110 }
111 
112 int nr_ioapics;
113 
114 /* The one past the highest gsi number used */
115 u32 gsi_top;
116 
117 /* MP IRQ source entries */
118 struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
119 
120 /* # of MP IRQ source entries */
121 int mp_irq_entries;
122 
123 /* GSI interrupts */
124 static int nr_irqs_gsi = NR_IRQS_LEGACY;
125 
126 #if defined (CONFIG_MCA) || defined (CONFIG_EISA)
127 int mp_bus_id_to_type[MAX_MP_BUSSES];
128 #endif
129 
130 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
131 
132 int skip_ioapic_setup;
133 
134 /**
135  * disable_ioapic_support() - disables ioapic support at runtime
136  */
137 void disable_ioapic_support(void)
138 {
139 #ifdef CONFIG_PCI
140 	noioapicquirk = 1;
141 	noioapicreroute = -1;
142 #endif
143 	skip_ioapic_setup = 1;
144 }
145 
146 static int __init parse_noapic(char *str)
147 {
148 	/* disable IO-APIC */
149 	disable_ioapic_support();
150 	return 0;
151 }
152 early_param("noapic", parse_noapic);
153 
154 static int io_apic_setup_irq_pin(unsigned int irq, int node,
155 				 struct io_apic_irq_attr *attr);
156 
157 /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
158 void mp_save_irq(struct mpc_intsrc *m)
159 {
160 	int i;
161 
162 	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
163 		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
164 		m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
165 		m->srcbusirq, m->dstapic, m->dstirq);
166 
167 	for (i = 0; i < mp_irq_entries; i++) {
168 		if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
169 			return;
170 	}
171 
172 	memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
173 	if (++mp_irq_entries == MAX_IRQ_SOURCES)
174 		panic("Max # of irq sources exceeded!!\n");
175 }
176 
177 struct irq_pin_list {
178 	int apic, pin;
179 	struct irq_pin_list *next;
180 };
181 
182 static struct irq_pin_list *alloc_irq_pin_list(int node)
183 {
184 	return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
185 }
186 
187 
188 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
189 static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
190 
191 int __init arch_early_irq_init(void)
192 {
193 	struct irq_cfg *cfg;
194 	int count, node, i;
195 
196 	if (!legacy_pic->nr_legacy_irqs)
197 		io_apic_irqs = ~0UL;
198 
199 	for (i = 0; i < nr_ioapics; i++) {
200 		ioapics[i].saved_registers =
201 			kzalloc(sizeof(struct IO_APIC_route_entry) *
202 				ioapics[i].nr_registers, GFP_KERNEL);
203 		if (!ioapics[i].saved_registers)
204 			pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
205 	}
206 
207 	cfg = irq_cfgx;
208 	count = ARRAY_SIZE(irq_cfgx);
209 	node = cpu_to_node(0);
210 
211 	/* Make sure the legacy interrupts are marked in the bitmap */
212 	irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
213 
214 	for (i = 0; i < count; i++) {
215 		irq_set_chip_data(i, &cfg[i]);
216 		zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
217 		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
218 		/*
219 		 * For legacy IRQ's, start with assigning irq0 to irq15 to
220 		 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
221 		 */
222 		if (i < legacy_pic->nr_legacy_irqs) {
223 			cfg[i].vector = IRQ0_VECTOR + i;
224 			cpumask_set_cpu(0, cfg[i].domain);
225 		}
226 	}
227 
228 	return 0;
229 }
230 
231 static struct irq_cfg *irq_cfg(unsigned int irq)
232 {
233 	return irq_get_chip_data(irq);
234 }
235 
236 static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
237 {
238 	struct irq_cfg *cfg;
239 
240 	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
241 	if (!cfg)
242 		return NULL;
243 	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
244 		goto out_cfg;
245 	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
246 		goto out_domain;
247 	return cfg;
248 out_domain:
249 	free_cpumask_var(cfg->domain);
250 out_cfg:
251 	kfree(cfg);
252 	return NULL;
253 }
254 
255 static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
256 {
257 	if (!cfg)
258 		return;
259 	irq_set_chip_data(at, NULL);
260 	free_cpumask_var(cfg->domain);
261 	free_cpumask_var(cfg->old_domain);
262 	kfree(cfg);
263 }
264 
265 static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
266 {
267 	int res = irq_alloc_desc_at(at, node);
268 	struct irq_cfg *cfg;
269 
270 	if (res < 0) {
271 		if (res != -EEXIST)
272 			return NULL;
273 		cfg = irq_get_chip_data(at);
274 		if (cfg)
275 			return cfg;
276 	}
277 
278 	cfg = alloc_irq_cfg(at, node);
279 	if (cfg)
280 		irq_set_chip_data(at, cfg);
281 	else
282 		irq_free_desc(at);
283 	return cfg;
284 }
285 
286 static int alloc_irq_from(unsigned int from, int node)
287 {
288 	return irq_alloc_desc_from(from, node);
289 }
290 
291 static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
292 {
293 	free_irq_cfg(at, cfg);
294 	irq_free_desc(at);
295 }
296 
297 struct io_apic {
298 	unsigned int index;
299 	unsigned int unused[3];
300 	unsigned int data;
301 	unsigned int unused2[11];
302 	unsigned int eoi;
303 };
304 
305 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
306 {
307 	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
308 		+ (mpc_ioapic_addr(idx) & ~PAGE_MASK);
309 }
310 
311 static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
312 {
313 	struct io_apic __iomem *io_apic = io_apic_base(apic);
314 	writel(vector, &io_apic->eoi);
315 }
316 
317 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
318 {
319 	struct io_apic __iomem *io_apic = io_apic_base(apic);
320 	writel(reg, &io_apic->index);
321 	return readl(&io_apic->data);
322 }
323 
324 static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
325 {
326 	struct io_apic __iomem *io_apic = io_apic_base(apic);
327 	writel(reg, &io_apic->index);
328 	writel(value, &io_apic->data);
329 }
330 
331 /*
332  * Re-write a value: to be used for read-modify-write
333  * cycles where the read already set up the index register.
334  *
335  * Older SiS APIC requires we rewrite the index register
336  */
337 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
338 {
339 	struct io_apic __iomem *io_apic = io_apic_base(apic);
340 
341 	if (sis_apic_bug)
342 		writel(reg, &io_apic->index);
343 	writel(value, &io_apic->data);
344 }
345 
346 static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
347 {
348 	struct irq_pin_list *entry;
349 	unsigned long flags;
350 
351 	raw_spin_lock_irqsave(&ioapic_lock, flags);
352 	for_each_irq_pin(entry, cfg->irq_2_pin) {
353 		unsigned int reg;
354 		int pin;
355 
356 		pin = entry->pin;
357 		reg = io_apic_read(entry->apic, 0x10 + pin*2);
358 		/* Is the remote IRR bit set? */
359 		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
360 			raw_spin_unlock_irqrestore(&ioapic_lock, flags);
361 			return true;
362 		}
363 	}
364 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
365 
366 	return false;
367 }
368 
369 union entry_union {
370 	struct { u32 w1, w2; };
371 	struct IO_APIC_route_entry entry;
372 };
373 
374 static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
375 {
376 	union entry_union eu;
377 
378 	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
379 	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
380 	return eu.entry;
381 }
382 
383 static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
384 {
385 	union entry_union eu;
386 	unsigned long flags;
387 	raw_spin_lock_irqsave(&ioapic_lock, flags);
388 	eu.entry = __ioapic_read_entry(apic, pin);
389 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
390 	return eu.entry;
391 }
392 
393 /*
394  * When we write a new IO APIC routing entry, we need to write the high
395  * word first! If the mask bit in the low word is clear, we will enable
396  * the interrupt, and we need to make sure the entry is fully populated
397  * before that happens.
398  */
399 static void
400 __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
401 {
402 	union entry_union eu = {{0, 0}};
403 
404 	eu.entry = e;
405 	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
406 	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
407 }
408 
409 static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
410 {
411 	unsigned long flags;
412 	raw_spin_lock_irqsave(&ioapic_lock, flags);
413 	__ioapic_write_entry(apic, pin, e);
414 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
415 }
416 
417 /*
418  * When we mask an IO APIC routing entry, we need to write the low
419  * word first, in order to set the mask bit before we change the
420  * high bits!
421  */
422 static void ioapic_mask_entry(int apic, int pin)
423 {
424 	unsigned long flags;
425 	union entry_union eu = { .entry.mask = 1 };
426 
427 	raw_spin_lock_irqsave(&ioapic_lock, flags);
428 	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
429 	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
430 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
431 }
432 
433 /*
434  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
435  * shared ISA-space IRQs, so we have to support them. We are super
436  * fast in the common case, and fast for shared ISA-space IRQs.
437  */
438 static int
439 __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
440 {
441 	struct irq_pin_list **last, *entry;
442 
443 	/* don't allow duplicates */
444 	last = &cfg->irq_2_pin;
445 	for_each_irq_pin(entry, cfg->irq_2_pin) {
446 		if (entry->apic == apic && entry->pin == pin)
447 			return 0;
448 		last = &entry->next;
449 	}
450 
451 	entry = alloc_irq_pin_list(node);
452 	if (!entry) {
453 		printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
454 				node, apic, pin);
455 		return -ENOMEM;
456 	}
457 	entry->apic = apic;
458 	entry->pin = pin;
459 
460 	*last = entry;
461 	return 0;
462 }
463 
464 static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
465 {
466 	if (__add_pin_to_irq_node(cfg, node, apic, pin))
467 		panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
468 }
469 
470 /*
471  * Reroute an IRQ to a different pin.
472  */
473 static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
474 					   int oldapic, int oldpin,
475 					   int newapic, int newpin)
476 {
477 	struct irq_pin_list *entry;
478 
479 	for_each_irq_pin(entry, cfg->irq_2_pin) {
480 		if (entry->apic == oldapic && entry->pin == oldpin) {
481 			entry->apic = newapic;
482 			entry->pin = newpin;
483 			/* every one is different, right? */
484 			return;
485 		}
486 	}
487 
488 	/* old apic/pin didn't exist, so just add new ones */
489 	add_pin_to_irq_node(cfg, node, newapic, newpin);
490 }
491 
492 static void __io_apic_modify_irq(struct irq_pin_list *entry,
493 				 int mask_and, int mask_or,
494 				 void (*final)(struct irq_pin_list *entry))
495 {
496 	unsigned int reg, pin;
497 
498 	pin = entry->pin;
499 	reg = io_apic_read(entry->apic, 0x10 + pin * 2);
500 	reg &= mask_and;
501 	reg |= mask_or;
502 	io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
503 	if (final)
504 		final(entry);
505 }
506 
507 static void io_apic_modify_irq(struct irq_cfg *cfg,
508 			       int mask_and, int mask_or,
509 			       void (*final)(struct irq_pin_list *entry))
510 {
511 	struct irq_pin_list *entry;
512 
513 	for_each_irq_pin(entry, cfg->irq_2_pin)
514 		__io_apic_modify_irq(entry, mask_and, mask_or, final);
515 }
516 
517 static void io_apic_sync(struct irq_pin_list *entry)
518 {
519 	/*
520 	 * Synchronize the IO-APIC and the CPU by doing
521 	 * a dummy read from the IO-APIC
522 	 */
523 	struct io_apic __iomem *io_apic;
524 	io_apic = io_apic_base(entry->apic);
525 	readl(&io_apic->data);
526 }
527 
528 static void mask_ioapic(struct irq_cfg *cfg)
529 {
530 	unsigned long flags;
531 
532 	raw_spin_lock_irqsave(&ioapic_lock, flags);
533 	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
534 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
535 }
536 
537 static void mask_ioapic_irq(struct irq_data *data)
538 {
539 	mask_ioapic(data->chip_data);
540 }
541 
542 static void __unmask_ioapic(struct irq_cfg *cfg)
543 {
544 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
545 }
546 
547 static void unmask_ioapic(struct irq_cfg *cfg)
548 {
549 	unsigned long flags;
550 
551 	raw_spin_lock_irqsave(&ioapic_lock, flags);
552 	__unmask_ioapic(cfg);
553 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
554 }
555 
556 static void unmask_ioapic_irq(struct irq_data *data)
557 {
558 	unmask_ioapic(data->chip_data);
559 }
560 
561 /*
562  * IO-APIC versions below 0x20 don't support EOI register.
563  * For the record, here is the information about various versions:
564  *     0Xh     82489DX
565  *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
566  *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
567  *     30h-FFh Reserved
568  *
569  * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
570  * version as 0x2. This is an error with documentation and these ICH chips
571  * use io-apic's of version 0x20.
572  *
573  * For IO-APIC's with EOI register, we use that to do an explicit EOI.
574  * Otherwise, we simulate the EOI message manually by changing the trigger
575  * mode to edge and then back to level, with RTE being masked during this.
576  */
577 static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
578 {
579 	if (mpc_ioapic_ver(apic) >= 0x20) {
580 		/*
581 		 * Intr-remapping uses pin number as the virtual vector
582 		 * in the RTE. Actual vector is programmed in
583 		 * intr-remapping table entry. Hence for the io-apic
584 		 * EOI we use the pin number.
585 		 */
586 		if (cfg && irq_remapped(cfg))
587 			io_apic_eoi(apic, pin);
588 		else
589 			io_apic_eoi(apic, vector);
590 	} else {
591 		struct IO_APIC_route_entry entry, entry1;
592 
593 		entry = entry1 = __ioapic_read_entry(apic, pin);
594 
595 		/*
596 		 * Mask the entry and change the trigger mode to edge.
597 		 */
598 		entry1.mask = 1;
599 		entry1.trigger = IOAPIC_EDGE;
600 
601 		__ioapic_write_entry(apic, pin, entry1);
602 
603 		/*
604 		 * Restore the previous level triggered entry.
605 		 */
606 		__ioapic_write_entry(apic, pin, entry);
607 	}
608 }
609 
610 static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
611 {
612 	struct irq_pin_list *entry;
613 	unsigned long flags;
614 
615 	raw_spin_lock_irqsave(&ioapic_lock, flags);
616 	for_each_irq_pin(entry, cfg->irq_2_pin)
617 		__eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg);
618 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
619 }
620 
621 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
622 {
623 	struct IO_APIC_route_entry entry;
624 
625 	/* Check delivery_mode to be sure we're not clearing an SMI pin */
626 	entry = ioapic_read_entry(apic, pin);
627 	if (entry.delivery_mode == dest_SMI)
628 		return;
629 
630 	/*
631 	 * Make sure the entry is masked and re-read the contents to check
632 	 * if it is a level triggered pin and if the remote-IRR is set.
633 	 */
634 	if (!entry.mask) {
635 		entry.mask = 1;
636 		ioapic_write_entry(apic, pin, entry);
637 		entry = ioapic_read_entry(apic, pin);
638 	}
639 
640 	if (entry.irr) {
641 		unsigned long flags;
642 
643 		/*
644 		 * Make sure the trigger mode is set to level. Explicit EOI
645 		 * doesn't clear the remote-IRR if the trigger mode is not
646 		 * set to level.
647 		 */
648 		if (!entry.trigger) {
649 			entry.trigger = IOAPIC_LEVEL;
650 			ioapic_write_entry(apic, pin, entry);
651 		}
652 
653 		raw_spin_lock_irqsave(&ioapic_lock, flags);
654 		__eoi_ioapic_pin(apic, pin, entry.vector, NULL);
655 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
656 	}
657 
658 	/*
659 	 * Clear the rest of the bits in the IO-APIC RTE except for the mask
660 	 * bit.
661 	 */
662 	ioapic_mask_entry(apic, pin);
663 	entry = ioapic_read_entry(apic, pin);
664 	if (entry.irr)
665 		printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n",
666 		       mpc_ioapic_id(apic), pin);
667 }
668 
669 static void clear_IO_APIC (void)
670 {
671 	int apic, pin;
672 
673 	for (apic = 0; apic < nr_ioapics; apic++)
674 		for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
675 			clear_IO_APIC_pin(apic, pin);
676 }
677 
678 #ifdef CONFIG_X86_32
679 /*
680  * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
681  * specific CPU-side IRQs.
682  */
683 
684 #define MAX_PIRQS 8
685 static int pirq_entries[MAX_PIRQS] = {
686 	[0 ... MAX_PIRQS - 1] = -1
687 };
688 
689 static int __init ioapic_pirq_setup(char *str)
690 {
691 	int i, max;
692 	int ints[MAX_PIRQS+1];
693 
694 	get_options(str, ARRAY_SIZE(ints), ints);
695 
696 	apic_printk(APIC_VERBOSE, KERN_INFO
697 			"PIRQ redirection, working around broken MP-BIOS.\n");
698 	max = MAX_PIRQS;
699 	if (ints[0] < MAX_PIRQS)
700 		max = ints[0];
701 
702 	for (i = 0; i < max; i++) {
703 		apic_printk(APIC_VERBOSE, KERN_DEBUG
704 				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
705 		/*
706 		 * PIRQs are mapped upside down, usually.
707 		 */
708 		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
709 	}
710 	return 1;
711 }
712 
713 __setup("pirq=", ioapic_pirq_setup);
714 #endif /* CONFIG_X86_32 */
715 
716 /*
717  * Saves all the IO-APIC RTE's
718  */
719 int save_ioapic_entries(void)
720 {
721 	int apic, pin;
722 	int err = 0;
723 
724 	for (apic = 0; apic < nr_ioapics; apic++) {
725 		if (!ioapics[apic].saved_registers) {
726 			err = -ENOMEM;
727 			continue;
728 		}
729 
730 		for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
731 			ioapics[apic].saved_registers[pin] =
732 				ioapic_read_entry(apic, pin);
733 	}
734 
735 	return err;
736 }
737 
738 /*
739  * Mask all IO APIC entries.
740  */
741 void mask_ioapic_entries(void)
742 {
743 	int apic, pin;
744 
745 	for (apic = 0; apic < nr_ioapics; apic++) {
746 		if (!ioapics[apic].saved_registers)
747 			continue;
748 
749 		for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
750 			struct IO_APIC_route_entry entry;
751 
752 			entry = ioapics[apic].saved_registers[pin];
753 			if (!entry.mask) {
754 				entry.mask = 1;
755 				ioapic_write_entry(apic, pin, entry);
756 			}
757 		}
758 	}
759 }
760 
761 /*
762  * Restore IO APIC entries which was saved in the ioapic structure.
763  */
764 int restore_ioapic_entries(void)
765 {
766 	int apic, pin;
767 
768 	for (apic = 0; apic < nr_ioapics; apic++) {
769 		if (!ioapics[apic].saved_registers)
770 			continue;
771 
772 		for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
773 			ioapic_write_entry(apic, pin,
774 					   ioapics[apic].saved_registers[pin]);
775 	}
776 	return 0;
777 }
778 
779 /*
780  * Find the IRQ entry number of a certain pin.
781  */
782 static int find_irq_entry(int ioapic_idx, int pin, int type)
783 {
784 	int i;
785 
786 	for (i = 0; i < mp_irq_entries; i++)
787 		if (mp_irqs[i].irqtype == type &&
788 		    (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||
789 		     mp_irqs[i].dstapic == MP_APIC_ALL) &&
790 		    mp_irqs[i].dstirq == pin)
791 			return i;
792 
793 	return -1;
794 }
795 
796 /*
797  * Find the pin to which IRQ[irq] (ISA) is connected
798  */
799 static int __init find_isa_irq_pin(int irq, int type)
800 {
801 	int i;
802 
803 	for (i = 0; i < mp_irq_entries; i++) {
804 		int lbus = mp_irqs[i].srcbus;
805 
806 		if (test_bit(lbus, mp_bus_not_pci) &&
807 		    (mp_irqs[i].irqtype == type) &&
808 		    (mp_irqs[i].srcbusirq == irq))
809 
810 			return mp_irqs[i].dstirq;
811 	}
812 	return -1;
813 }
814 
815 static int __init find_isa_irq_apic(int irq, int type)
816 {
817 	int i;
818 
819 	for (i = 0; i < mp_irq_entries; i++) {
820 		int lbus = mp_irqs[i].srcbus;
821 
822 		if (test_bit(lbus, mp_bus_not_pci) &&
823 		    (mp_irqs[i].irqtype == type) &&
824 		    (mp_irqs[i].srcbusirq == irq))
825 			break;
826 	}
827 
828 	if (i < mp_irq_entries) {
829 		int ioapic_idx;
830 
831 		for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
832 			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
833 				return ioapic_idx;
834 	}
835 
836 	return -1;
837 }
838 
839 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
840 /*
841  * EISA Edge/Level control register, ELCR
842  */
843 static int EISA_ELCR(unsigned int irq)
844 {
845 	if (irq < legacy_pic->nr_legacy_irqs) {
846 		unsigned int port = 0x4d0 + (irq >> 3);
847 		return (inb(port) >> (irq & 7)) & 1;
848 	}
849 	apic_printk(APIC_VERBOSE, KERN_INFO
850 			"Broken MPtable reports ISA irq %d\n", irq);
851 	return 0;
852 }
853 
854 #endif
855 
856 /* ISA interrupts are always polarity zero edge triggered,
857  * when listed as conforming in the MP table. */
858 
859 #define default_ISA_trigger(idx)	(0)
860 #define default_ISA_polarity(idx)	(0)
861 
862 /* EISA interrupts are always polarity zero and can be edge or level
863  * trigger depending on the ELCR value.  If an interrupt is listed as
864  * EISA conforming in the MP table, that means its trigger type must
865  * be read in from the ELCR */
866 
867 #define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].srcbusirq))
868 #define default_EISA_polarity(idx)	default_ISA_polarity(idx)
869 
870 /* PCI interrupts are always polarity one level triggered,
871  * when listed as conforming in the MP table. */
872 
873 #define default_PCI_trigger(idx)	(1)
874 #define default_PCI_polarity(idx)	(1)
875 
876 /* MCA interrupts are always polarity zero level triggered,
877  * when listed as conforming in the MP table. */
878 
879 #define default_MCA_trigger(idx)	(1)
880 #define default_MCA_polarity(idx)	default_ISA_polarity(idx)
881 
882 static int irq_polarity(int idx)
883 {
884 	int bus = mp_irqs[idx].srcbus;
885 	int polarity;
886 
887 	/*
888 	 * Determine IRQ line polarity (high active or low active):
889 	 */
890 	switch (mp_irqs[idx].irqflag & 3)
891 	{
892 		case 0: /* conforms, ie. bus-type dependent polarity */
893 			if (test_bit(bus, mp_bus_not_pci))
894 				polarity = default_ISA_polarity(idx);
895 			else
896 				polarity = default_PCI_polarity(idx);
897 			break;
898 		case 1: /* high active */
899 		{
900 			polarity = 0;
901 			break;
902 		}
903 		case 2: /* reserved */
904 		{
905 			printk(KERN_WARNING "broken BIOS!!\n");
906 			polarity = 1;
907 			break;
908 		}
909 		case 3: /* low active */
910 		{
911 			polarity = 1;
912 			break;
913 		}
914 		default: /* invalid */
915 		{
916 			printk(KERN_WARNING "broken BIOS!!\n");
917 			polarity = 1;
918 			break;
919 		}
920 	}
921 	return polarity;
922 }
923 
924 static int irq_trigger(int idx)
925 {
926 	int bus = mp_irqs[idx].srcbus;
927 	int trigger;
928 
929 	/*
930 	 * Determine IRQ trigger mode (edge or level sensitive):
931 	 */
932 	switch ((mp_irqs[idx].irqflag>>2) & 3)
933 	{
934 		case 0: /* conforms, ie. bus-type dependent */
935 			if (test_bit(bus, mp_bus_not_pci))
936 				trigger = default_ISA_trigger(idx);
937 			else
938 				trigger = default_PCI_trigger(idx);
939 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
940 			switch (mp_bus_id_to_type[bus]) {
941 				case MP_BUS_ISA: /* ISA pin */
942 				{
943 					/* set before the switch */
944 					break;
945 				}
946 				case MP_BUS_EISA: /* EISA pin */
947 				{
948 					trigger = default_EISA_trigger(idx);
949 					break;
950 				}
951 				case MP_BUS_PCI: /* PCI pin */
952 				{
953 					/* set before the switch */
954 					break;
955 				}
956 				case MP_BUS_MCA: /* MCA pin */
957 				{
958 					trigger = default_MCA_trigger(idx);
959 					break;
960 				}
961 				default:
962 				{
963 					printk(KERN_WARNING "broken BIOS!!\n");
964 					trigger = 1;
965 					break;
966 				}
967 			}
968 #endif
969 			break;
970 		case 1: /* edge */
971 		{
972 			trigger = 0;
973 			break;
974 		}
975 		case 2: /* reserved */
976 		{
977 			printk(KERN_WARNING "broken BIOS!!\n");
978 			trigger = 1;
979 			break;
980 		}
981 		case 3: /* level */
982 		{
983 			trigger = 1;
984 			break;
985 		}
986 		default: /* invalid */
987 		{
988 			printk(KERN_WARNING "broken BIOS!!\n");
989 			trigger = 0;
990 			break;
991 		}
992 	}
993 	return trigger;
994 }
995 
996 static int pin_2_irq(int idx, int apic, int pin)
997 {
998 	int irq;
999 	int bus = mp_irqs[idx].srcbus;
1000 	struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic);
1001 
1002 	/*
1003 	 * Debugging check, we are in big trouble if this message pops up!
1004 	 */
1005 	if (mp_irqs[idx].dstirq != pin)
1006 		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1007 
1008 	if (test_bit(bus, mp_bus_not_pci)) {
1009 		irq = mp_irqs[idx].srcbusirq;
1010 	} else {
1011 		u32 gsi = gsi_cfg->gsi_base + pin;
1012 
1013 		if (gsi >= NR_IRQS_LEGACY)
1014 			irq = gsi;
1015 		else
1016 			irq = gsi_top + gsi;
1017 	}
1018 
1019 #ifdef CONFIG_X86_32
1020 	/*
1021 	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
1022 	 */
1023 	if ((pin >= 16) && (pin <= 23)) {
1024 		if (pirq_entries[pin-16] != -1) {
1025 			if (!pirq_entries[pin-16]) {
1026 				apic_printk(APIC_VERBOSE, KERN_DEBUG
1027 						"disabling PIRQ%d\n", pin-16);
1028 			} else {
1029 				irq = pirq_entries[pin-16];
1030 				apic_printk(APIC_VERBOSE, KERN_DEBUG
1031 						"using PIRQ%d -> IRQ %d\n",
1032 						pin-16, irq);
1033 			}
1034 		}
1035 	}
1036 #endif
1037 
1038 	return irq;
1039 }
1040 
1041 /*
1042  * Find a specific PCI IRQ entry.
1043  * Not an __init, possibly needed by modules
1044  */
1045 int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
1046 				struct io_apic_irq_attr *irq_attr)
1047 {
1048 	int ioapic_idx, i, best_guess = -1;
1049 
1050 	apic_printk(APIC_DEBUG,
1051 		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
1052 		    bus, slot, pin);
1053 	if (test_bit(bus, mp_bus_not_pci)) {
1054 		apic_printk(APIC_VERBOSE,
1055 			    "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
1056 		return -1;
1057 	}
1058 	for (i = 0; i < mp_irq_entries; i++) {
1059 		int lbus = mp_irqs[i].srcbus;
1060 
1061 		for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
1062 			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
1063 			    mp_irqs[i].dstapic == MP_APIC_ALL)
1064 				break;
1065 
1066 		if (!test_bit(lbus, mp_bus_not_pci) &&
1067 		    !mp_irqs[i].irqtype &&
1068 		    (bus == lbus) &&
1069 		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
1070 			int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq);
1071 
1072 			if (!(ioapic_idx || IO_APIC_IRQ(irq)))
1073 				continue;
1074 
1075 			if (pin == (mp_irqs[i].srcbusirq & 3)) {
1076 				set_io_apic_irq_attr(irq_attr, ioapic_idx,
1077 						     mp_irqs[i].dstirq,
1078 						     irq_trigger(i),
1079 						     irq_polarity(i));
1080 				return irq;
1081 			}
1082 			/*
1083 			 * Use the first all-but-pin matching entry as a
1084 			 * best-guess fuzzy result for broken mptables.
1085 			 */
1086 			if (best_guess < 0) {
1087 				set_io_apic_irq_attr(irq_attr, ioapic_idx,
1088 						     mp_irqs[i].dstirq,
1089 						     irq_trigger(i),
1090 						     irq_polarity(i));
1091 				best_guess = irq;
1092 			}
1093 		}
1094 	}
1095 	return best_guess;
1096 }
1097 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
1098 
1099 void lock_vector_lock(void)
1100 {
1101 	/* Used to the online set of cpus does not change
1102 	 * during assign_irq_vector.
1103 	 */
1104 	raw_spin_lock(&vector_lock);
1105 }
1106 
1107 void unlock_vector_lock(void)
1108 {
1109 	raw_spin_unlock(&vector_lock);
1110 }
1111 
1112 static int
1113 __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1114 {
1115 	/*
1116 	 * NOTE! The local APIC isn't very good at handling
1117 	 * multiple interrupts at the same interrupt level.
1118 	 * As the interrupt level is determined by taking the
1119 	 * vector number and shifting that right by 4, we
1120 	 * want to spread these out a bit so that they don't
1121 	 * all fall in the same interrupt level.
1122 	 *
1123 	 * Also, we've got to be careful not to trash gate
1124 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1125 	 */
1126 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
1127 	static int current_offset = VECTOR_OFFSET_START % 8;
1128 	unsigned int old_vector;
1129 	int cpu, err;
1130 	cpumask_var_t tmp_mask;
1131 
1132 	if (cfg->move_in_progress)
1133 		return -EBUSY;
1134 
1135 	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
1136 		return -ENOMEM;
1137 
1138 	old_vector = cfg->vector;
1139 	if (old_vector) {
1140 		cpumask_and(tmp_mask, mask, cpu_online_mask);
1141 		cpumask_and(tmp_mask, cfg->domain, tmp_mask);
1142 		if (!cpumask_empty(tmp_mask)) {
1143 			free_cpumask_var(tmp_mask);
1144 			return 0;
1145 		}
1146 	}
1147 
1148 	/* Only try and allocate irqs on cpus that are present */
1149 	err = -ENOSPC;
1150 	for_each_cpu_and(cpu, mask, cpu_online_mask) {
1151 		int new_cpu;
1152 		int vector, offset;
1153 
1154 		apic->vector_allocation_domain(cpu, tmp_mask);
1155 
1156 		vector = current_vector;
1157 		offset = current_offset;
1158 next:
1159 		vector += 8;
1160 		if (vector >= first_system_vector) {
1161 			/* If out of vectors on large boxen, must share them. */
1162 			offset = (offset + 1) % 8;
1163 			vector = FIRST_EXTERNAL_VECTOR + offset;
1164 		}
1165 		if (unlikely(current_vector == vector))
1166 			continue;
1167 
1168 		if (test_bit(vector, used_vectors))
1169 			goto next;
1170 
1171 		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1172 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
1173 				goto next;
1174 		/* Found one! */
1175 		current_vector = vector;
1176 		current_offset = offset;
1177 		if (old_vector) {
1178 			cfg->move_in_progress = 1;
1179 			cpumask_copy(cfg->old_domain, cfg->domain);
1180 		}
1181 		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1182 			per_cpu(vector_irq, new_cpu)[vector] = irq;
1183 		cfg->vector = vector;
1184 		cpumask_copy(cfg->domain, tmp_mask);
1185 		err = 0;
1186 		break;
1187 	}
1188 	free_cpumask_var(tmp_mask);
1189 	return err;
1190 }
1191 
1192 int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1193 {
1194 	int err;
1195 	unsigned long flags;
1196 
1197 	raw_spin_lock_irqsave(&vector_lock, flags);
1198 	err = __assign_irq_vector(irq, cfg, mask);
1199 	raw_spin_unlock_irqrestore(&vector_lock, flags);
1200 	return err;
1201 }
1202 
1203 static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1204 {
1205 	int cpu, vector;
1206 
1207 	BUG_ON(!cfg->vector);
1208 
1209 	vector = cfg->vector;
1210 	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
1211 		per_cpu(vector_irq, cpu)[vector] = -1;
1212 
1213 	cfg->vector = 0;
1214 	cpumask_clear(cfg->domain);
1215 
1216 	if (likely(!cfg->move_in_progress))
1217 		return;
1218 	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
1219 		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1220 								vector++) {
1221 			if (per_cpu(vector_irq, cpu)[vector] != irq)
1222 				continue;
1223 			per_cpu(vector_irq, cpu)[vector] = -1;
1224 			break;
1225 		}
1226 	}
1227 	cfg->move_in_progress = 0;
1228 }
1229 
1230 void __setup_vector_irq(int cpu)
1231 {
1232 	/* Initialize vector_irq on a new cpu */
1233 	int irq, vector;
1234 	struct irq_cfg *cfg;
1235 
1236 	/*
1237 	 * vector_lock will make sure that we don't run into irq vector
1238 	 * assignments that might be happening on another cpu in parallel,
1239 	 * while we setup our initial vector to irq mappings.
1240 	 */
1241 	raw_spin_lock(&vector_lock);
1242 	/* Mark the inuse vectors */
1243 	for_each_active_irq(irq) {
1244 		cfg = irq_get_chip_data(irq);
1245 		if (!cfg)
1246 			continue;
1247 		/*
1248 		 * If it is a legacy IRQ handled by the legacy PIC, this cpu
1249 		 * will be part of the irq_cfg's domain.
1250 		 */
1251 		if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq))
1252 			cpumask_set_cpu(cpu, cfg->domain);
1253 
1254 		if (!cpumask_test_cpu(cpu, cfg->domain))
1255 			continue;
1256 		vector = cfg->vector;
1257 		per_cpu(vector_irq, cpu)[vector] = irq;
1258 	}
1259 	/* Mark the free vectors */
1260 	for (vector = 0; vector < NR_VECTORS; ++vector) {
1261 		irq = per_cpu(vector_irq, cpu)[vector];
1262 		if (irq < 0)
1263 			continue;
1264 
1265 		cfg = irq_cfg(irq);
1266 		if (!cpumask_test_cpu(cpu, cfg->domain))
1267 			per_cpu(vector_irq, cpu)[vector] = -1;
1268 	}
1269 	raw_spin_unlock(&vector_lock);
1270 }
1271 
1272 static struct irq_chip ioapic_chip;
1273 
1274 #ifdef CONFIG_X86_32
1275 static inline int IO_APIC_irq_trigger(int irq)
1276 {
1277 	int apic, idx, pin;
1278 
1279 	for (apic = 0; apic < nr_ioapics; apic++) {
1280 		for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
1281 			idx = find_irq_entry(apic, pin, mp_INT);
1282 			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1283 				return irq_trigger(idx);
1284 		}
1285 	}
1286 	/*
1287          * nonexistent IRQs are edge default
1288          */
1289 	return 0;
1290 }
1291 #else
1292 static inline int IO_APIC_irq_trigger(int irq)
1293 {
1294 	return 1;
1295 }
1296 #endif
1297 
1298 static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1299 				 unsigned long trigger)
1300 {
1301 	struct irq_chip *chip = &ioapic_chip;
1302 	irq_flow_handler_t hdl;
1303 	bool fasteoi;
1304 
1305 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1306 	    trigger == IOAPIC_LEVEL) {
1307 		irq_set_status_flags(irq, IRQ_LEVEL);
1308 		fasteoi = true;
1309 	} else {
1310 		irq_clear_status_flags(irq, IRQ_LEVEL);
1311 		fasteoi = false;
1312 	}
1313 
1314 	if (irq_remapped(cfg)) {
1315 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1316 		irq_remap_modify_chip_defaults(chip);
1317 		fasteoi = trigger != 0;
1318 	}
1319 
1320 	hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
1321 	irq_set_chip_and_handler_name(irq, chip, hdl,
1322 				      fasteoi ? "fasteoi" : "edge");
1323 }
1324 
1325 
1326 static int setup_ir_ioapic_entry(int irq,
1327 			      struct IR_IO_APIC_route_entry *entry,
1328 			      unsigned int destination, int vector,
1329 			      struct io_apic_irq_attr *attr)
1330 {
1331 	int index;
1332 	struct irte irte;
1333 	int ioapic_id = mpc_ioapic_id(attr->ioapic);
1334 	struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id);
1335 
1336 	if (!iommu) {
1337 		pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
1338 		return -ENODEV;
1339 	}
1340 
1341 	index = alloc_irte(iommu, irq, 1);
1342 	if (index < 0) {
1343 		pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id);
1344 		return -ENOMEM;
1345 	}
1346 
1347 	prepare_irte(&irte, vector, destination);
1348 
1349 	/* Set source-id of interrupt request */
1350 	set_ioapic_sid(&irte, ioapic_id);
1351 
1352 	modify_irte(irq, &irte);
1353 
1354 	apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
1355 		"Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
1356 		"Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
1357 		"Avail:%X Vector:%02X Dest:%08X "
1358 		"SID:%04X SQ:%X SVT:%X)\n",
1359 		attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
1360 		irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
1361 		irte.avail, irte.vector, irte.dest_id,
1362 		irte.sid, irte.sq, irte.svt);
1363 
1364 	memset(entry, 0, sizeof(*entry));
1365 
1366 	entry->index2	= (index >> 15) & 0x1;
1367 	entry->zero	= 0;
1368 	entry->format	= 1;
1369 	entry->index	= (index & 0x7fff);
1370 	/*
1371 	 * IO-APIC RTE will be configured with virtual vector.
1372 	 * irq handler will do the explicit EOI to the io-apic.
1373 	 */
1374 	entry->vector	= attr->ioapic_pin;
1375 	entry->mask	= 0;			/* enable IRQ */
1376 	entry->trigger	= attr->trigger;
1377 	entry->polarity	= attr->polarity;
1378 
1379 	/* Mask level triggered irqs.
1380 	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1381 	 */
1382 	if (attr->trigger)
1383 		entry->mask = 1;
1384 
1385 	return 0;
1386 }
1387 
1388 static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
1389 			       unsigned int destination, int vector,
1390 			       struct io_apic_irq_attr *attr)
1391 {
1392 	if (intr_remapping_enabled)
1393 		return setup_ir_ioapic_entry(irq,
1394 			 (struct IR_IO_APIC_route_entry *)entry,
1395 			 destination, vector, attr);
1396 
1397 	memset(entry, 0, sizeof(*entry));
1398 
1399 	entry->delivery_mode = apic->irq_delivery_mode;
1400 	entry->dest_mode     = apic->irq_dest_mode;
1401 	entry->dest	     = destination;
1402 	entry->vector	     = vector;
1403 	entry->mask	     = 0;			/* enable IRQ */
1404 	entry->trigger	     = attr->trigger;
1405 	entry->polarity	     = attr->polarity;
1406 
1407 	/*
1408 	 * Mask level triggered irqs.
1409 	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1410 	 */
1411 	if (attr->trigger)
1412 		entry->mask = 1;
1413 
1414 	return 0;
1415 }
1416 
1417 static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
1418 				struct io_apic_irq_attr *attr)
1419 {
1420 	struct IO_APIC_route_entry entry;
1421 	unsigned int dest;
1422 
1423 	if (!IO_APIC_IRQ(irq))
1424 		return;
1425 	/*
1426 	 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
1427 	 * controllers like 8259. Now that IO-APIC can handle this irq, update
1428 	 * the cfg->domain.
1429 	 */
1430 	if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
1431 		apic->vector_allocation_domain(0, cfg->domain);
1432 
1433 	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1434 		return;
1435 
1436 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
1437 
1438 	apic_printk(APIC_VERBOSE,KERN_DEBUG
1439 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1440 		    "IRQ %d Mode:%i Active:%i Dest:%d)\n",
1441 		    attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
1442 		    cfg->vector, irq, attr->trigger, attr->polarity, dest);
1443 
1444 	if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
1445 		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
1446 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
1447 		__clear_irq_vector(irq, cfg);
1448 
1449 		return;
1450 	}
1451 
1452 	ioapic_register_intr(irq, cfg, attr->trigger);
1453 	if (irq < legacy_pic->nr_legacy_irqs)
1454 		legacy_pic->mask(irq);
1455 
1456 	ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
1457 }
1458 
1459 static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin)
1460 {
1461 	if (idx != -1)
1462 		return false;
1463 
1464 	apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
1465 		    mpc_ioapic_id(ioapic_idx), pin);
1466 	return true;
1467 }
1468 
1469 static void __init __io_apic_setup_irqs(unsigned int ioapic_idx)
1470 {
1471 	int idx, node = cpu_to_node(0);
1472 	struct io_apic_irq_attr attr;
1473 	unsigned int pin, irq;
1474 
1475 	for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) {
1476 		idx = find_irq_entry(ioapic_idx, pin, mp_INT);
1477 		if (io_apic_pin_not_connected(idx, ioapic_idx, pin))
1478 			continue;
1479 
1480 		irq = pin_2_irq(idx, ioapic_idx, pin);
1481 
1482 		if ((ioapic_idx > 0) && (irq > 16))
1483 			continue;
1484 
1485 		/*
1486 		 * Skip the timer IRQ if there's a quirk handler
1487 		 * installed and if it returns 1:
1488 		 */
1489 		if (apic->multi_timer_check &&
1490 		    apic->multi_timer_check(ioapic_idx, irq))
1491 			continue;
1492 
1493 		set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
1494 				     irq_polarity(idx));
1495 
1496 		io_apic_setup_irq_pin(irq, node, &attr);
1497 	}
1498 }
1499 
1500 static void __init setup_IO_APIC_irqs(void)
1501 {
1502 	unsigned int ioapic_idx;
1503 
1504 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1505 
1506 	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
1507 		__io_apic_setup_irqs(ioapic_idx);
1508 }
1509 
1510 /*
1511  * for the gsit that is not in first ioapic
1512  * but could not use acpi_register_gsi()
1513  * like some special sci in IBM x3330
1514  */
1515 void setup_IO_APIC_irq_extra(u32 gsi)
1516 {
1517 	int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0);
1518 	struct io_apic_irq_attr attr;
1519 
1520 	/*
1521 	 * Convert 'gsi' to 'ioapic.pin'.
1522 	 */
1523 	ioapic_idx = mp_find_ioapic(gsi);
1524 	if (ioapic_idx < 0)
1525 		return;
1526 
1527 	pin = mp_find_ioapic_pin(ioapic_idx, gsi);
1528 	idx = find_irq_entry(ioapic_idx, pin, mp_INT);
1529 	if (idx == -1)
1530 		return;
1531 
1532 	irq = pin_2_irq(idx, ioapic_idx, pin);
1533 
1534 	/* Only handle the non legacy irqs on secondary ioapics */
1535 	if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY)
1536 		return;
1537 
1538 	set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
1539 			     irq_polarity(idx));
1540 
1541 	io_apic_setup_irq_pin_once(irq, node, &attr);
1542 }
1543 
1544 /*
1545  * Set up the timer pin, possibly with the 8259A-master behind.
1546  */
1547 static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1548 					 unsigned int pin, int vector)
1549 {
1550 	struct IO_APIC_route_entry entry;
1551 
1552 	if (intr_remapping_enabled)
1553 		return;
1554 
1555 	memset(&entry, 0, sizeof(entry));
1556 
1557 	/*
1558 	 * We use logical delivery to get the timer IRQ
1559 	 * to the first CPU.
1560 	 */
1561 	entry.dest_mode = apic->irq_dest_mode;
1562 	entry.mask = 0;			/* don't mask IRQ for edge */
1563 	entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
1564 	entry.delivery_mode = apic->irq_delivery_mode;
1565 	entry.polarity = 0;
1566 	entry.trigger = 0;
1567 	entry.vector = vector;
1568 
1569 	/*
1570 	 * The timer IRQ doesn't have to know that behind the
1571 	 * scene we may have a 8259A-master in AEOI mode ...
1572 	 */
1573 	irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
1574 				      "edge");
1575 
1576 	/*
1577 	 * Add it to the IO-APIC irq-routing table:
1578 	 */
1579 	ioapic_write_entry(ioapic_idx, pin, entry);
1580 }
1581 
1582 __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1583 {
1584 	int i;
1585 	union IO_APIC_reg_00 reg_00;
1586 	union IO_APIC_reg_01 reg_01;
1587 	union IO_APIC_reg_02 reg_02;
1588 	union IO_APIC_reg_03 reg_03;
1589 	unsigned long flags;
1590 
1591 	raw_spin_lock_irqsave(&ioapic_lock, flags);
1592 	reg_00.raw = io_apic_read(ioapic_idx, 0);
1593 	reg_01.raw = io_apic_read(ioapic_idx, 1);
1594 	if (reg_01.bits.version >= 0x10)
1595 		reg_02.raw = io_apic_read(ioapic_idx, 2);
1596 	if (reg_01.bits.version >= 0x20)
1597 		reg_03.raw = io_apic_read(ioapic_idx, 3);
1598 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1599 
1600 	printk("\n");
1601 	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
1602 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1603 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
1604 	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
1605 	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
1606 
1607 	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
1608 	printk(KERN_DEBUG ".......     : max redirection entries: %02X\n",
1609 		reg_01.bits.entries);
1610 
1611 	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
1612 	printk(KERN_DEBUG ".......     : IO APIC version: %02X\n",
1613 		reg_01.bits.version);
1614 
1615 	/*
1616 	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1617 	 * but the value of reg_02 is read as the previous read register
1618 	 * value, so ignore it if reg_02 == reg_01.
1619 	 */
1620 	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1621 		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1622 		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
1623 	}
1624 
1625 	/*
1626 	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1627 	 * or reg_03, but the value of reg_0[23] is read as the previous read
1628 	 * register value, so ignore it if reg_03 == reg_0[12].
1629 	 */
1630 	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1631 	    reg_03.raw != reg_01.raw) {
1632 		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1633 		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
1634 	}
1635 
1636 	printk(KERN_DEBUG ".... IRQ redirection table:\n");
1637 
1638 	if (intr_remapping_enabled) {
1639 		printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
1640 			" Pol Stat Indx2 Zero Vect:\n");
1641 	} else {
1642 		printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1643 			" Stat Dmod Deli Vect:\n");
1644 	}
1645 
1646 	for (i = 0; i <= reg_01.bits.entries; i++) {
1647 		if (intr_remapping_enabled) {
1648 			struct IO_APIC_route_entry entry;
1649 			struct IR_IO_APIC_route_entry *ir_entry;
1650 
1651 			entry = ioapic_read_entry(ioapic_idx, i);
1652 			ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
1653 			printk(KERN_DEBUG " %02x %04X ",
1654 				i,
1655 				ir_entry->index
1656 			);
1657 			printk("%1d   %1d    %1d    %1d   %1d   "
1658 				"%1d    %1d     %X    %02X\n",
1659 				ir_entry->format,
1660 				ir_entry->mask,
1661 				ir_entry->trigger,
1662 				ir_entry->irr,
1663 				ir_entry->polarity,
1664 				ir_entry->delivery_status,
1665 				ir_entry->index2,
1666 				ir_entry->zero,
1667 				ir_entry->vector
1668 			);
1669 		} else {
1670 			struct IO_APIC_route_entry entry;
1671 
1672 			entry = ioapic_read_entry(ioapic_idx, i);
1673 			printk(KERN_DEBUG " %02x %02X  ",
1674 				i,
1675 				entry.dest
1676 			);
1677 			printk("%1d    %1d    %1d   %1d   %1d    "
1678 				"%1d    %1d    %02X\n",
1679 				entry.mask,
1680 				entry.trigger,
1681 				entry.irr,
1682 				entry.polarity,
1683 				entry.delivery_status,
1684 				entry.dest_mode,
1685 				entry.delivery_mode,
1686 				entry.vector
1687 			);
1688 		}
1689 	}
1690 }
1691 
1692 __apicdebuginit(void) print_IO_APICs(void)
1693 {
1694 	int ioapic_idx;
1695 	struct irq_cfg *cfg;
1696 	unsigned int irq;
1697 	struct irq_chip *chip;
1698 
1699 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1700 	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
1701 		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1702 		       mpc_ioapic_id(ioapic_idx),
1703 		       ioapics[ioapic_idx].nr_registers);
1704 
1705 	/*
1706 	 * We are a bit conservative about what we expect.  We have to
1707 	 * know about every hardware change ASAP.
1708 	 */
1709 	printk(KERN_INFO "testing the IO APIC.......................\n");
1710 
1711 	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
1712 		print_IO_APIC(ioapic_idx);
1713 
1714 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
1715 	for_each_active_irq(irq) {
1716 		struct irq_pin_list *entry;
1717 
1718 		chip = irq_get_chip(irq);
1719 		if (chip != &ioapic_chip)
1720 			continue;
1721 
1722 		cfg = irq_get_chip_data(irq);
1723 		if (!cfg)
1724 			continue;
1725 		entry = cfg->irq_2_pin;
1726 		if (!entry)
1727 			continue;
1728 		printk(KERN_DEBUG "IRQ%d ", irq);
1729 		for_each_irq_pin(entry, cfg->irq_2_pin)
1730 			printk("-> %d:%d", entry->apic, entry->pin);
1731 		printk("\n");
1732 	}
1733 
1734 	printk(KERN_INFO ".................................... done.\n");
1735 }
1736 
1737 __apicdebuginit(void) print_APIC_field(int base)
1738 {
1739 	int i;
1740 
1741 	printk(KERN_DEBUG);
1742 
1743 	for (i = 0; i < 8; i++)
1744 		printk(KERN_CONT "%08x", apic_read(base + i*0x10));
1745 
1746 	printk(KERN_CONT "\n");
1747 }
1748 
1749 __apicdebuginit(void) print_local_APIC(void *dummy)
1750 {
1751 	unsigned int i, v, ver, maxlvt;
1752 	u64 icr;
1753 
1754 	printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1755 		smp_processor_id(), hard_smp_processor_id());
1756 	v = apic_read(APIC_ID);
1757 	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
1758 	v = apic_read(APIC_LVR);
1759 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1760 	ver = GET_APIC_VERSION(v);
1761 	maxlvt = lapic_get_maxlvt();
1762 
1763 	v = apic_read(APIC_TASKPRI);
1764 	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1765 
1766 	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
1767 		if (!APIC_XAPIC(ver)) {
1768 			v = apic_read(APIC_ARBPRI);
1769 			printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
1770 			       v & APIC_ARBPRI_MASK);
1771 		}
1772 		v = apic_read(APIC_PROCPRI);
1773 		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
1774 	}
1775 
1776 	/*
1777 	 * Remote read supported only in the 82489DX and local APIC for
1778 	 * Pentium processors.
1779 	 */
1780 	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
1781 		v = apic_read(APIC_RRR);
1782 		printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1783 	}
1784 
1785 	v = apic_read(APIC_LDR);
1786 	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1787 	if (!x2apic_enabled()) {
1788 		v = apic_read(APIC_DFR);
1789 		printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
1790 	}
1791 	v = apic_read(APIC_SPIV);
1792 	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1793 
1794 	printk(KERN_DEBUG "... APIC ISR field:\n");
1795 	print_APIC_field(APIC_ISR);
1796 	printk(KERN_DEBUG "... APIC TMR field:\n");
1797 	print_APIC_field(APIC_TMR);
1798 	printk(KERN_DEBUG "... APIC IRR field:\n");
1799 	print_APIC_field(APIC_IRR);
1800 
1801 	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
1802 		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
1803 			apic_write(APIC_ESR, 0);
1804 
1805 		v = apic_read(APIC_ESR);
1806 		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1807 	}
1808 
1809 	icr = apic_icr_read();
1810 	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
1811 	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
1812 
1813 	v = apic_read(APIC_LVTT);
1814 	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
1815 
1816 	if (maxlvt > 3) {                       /* PC is LVT#4. */
1817 		v = apic_read(APIC_LVTPC);
1818 		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
1819 	}
1820 	v = apic_read(APIC_LVT0);
1821 	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
1822 	v = apic_read(APIC_LVT1);
1823 	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
1824 
1825 	if (maxlvt > 2) {			/* ERR is LVT#3. */
1826 		v = apic_read(APIC_LVTERR);
1827 		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
1828 	}
1829 
1830 	v = apic_read(APIC_TMICT);
1831 	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
1832 	v = apic_read(APIC_TMCCT);
1833 	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
1834 	v = apic_read(APIC_TDCR);
1835 	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
1836 
1837 	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
1838 		v = apic_read(APIC_EFEAT);
1839 		maxlvt = (v >> 16) & 0xff;
1840 		printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
1841 		v = apic_read(APIC_ECTRL);
1842 		printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
1843 		for (i = 0; i < maxlvt; i++) {
1844 			v = apic_read(APIC_EILVTn(i));
1845 			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
1846 		}
1847 	}
1848 	printk("\n");
1849 }
1850 
1851 __apicdebuginit(void) print_local_APICs(int maxcpu)
1852 {
1853 	int cpu;
1854 
1855 	if (!maxcpu)
1856 		return;
1857 
1858 	preempt_disable();
1859 	for_each_online_cpu(cpu) {
1860 		if (cpu >= maxcpu)
1861 			break;
1862 		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
1863 	}
1864 	preempt_enable();
1865 }
1866 
1867 __apicdebuginit(void) print_PIC(void)
1868 {
1869 	unsigned int v;
1870 	unsigned long flags;
1871 
1872 	if (!legacy_pic->nr_legacy_irqs)
1873 		return;
1874 
1875 	printk(KERN_DEBUG "\nprinting PIC contents\n");
1876 
1877 	raw_spin_lock_irqsave(&i8259A_lock, flags);
1878 
1879 	v = inb(0xa1) << 8 | inb(0x21);
1880 	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
1881 
1882 	v = inb(0xa0) << 8 | inb(0x20);
1883 	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
1884 
1885 	outb(0x0b,0xa0);
1886 	outb(0x0b,0x20);
1887 	v = inb(0xa0) << 8 | inb(0x20);
1888 	outb(0x0a,0xa0);
1889 	outb(0x0a,0x20);
1890 
1891 	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
1892 
1893 	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
1894 
1895 	v = inb(0x4d1) << 8 | inb(0x4d0);
1896 	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1897 }
1898 
1899 static int __initdata show_lapic = 1;
1900 static __init int setup_show_lapic(char *arg)
1901 {
1902 	int num = -1;
1903 
1904 	if (strcmp(arg, "all") == 0) {
1905 		show_lapic = CONFIG_NR_CPUS;
1906 	} else {
1907 		get_option(&arg, &num);
1908 		if (num >= 0)
1909 			show_lapic = num;
1910 	}
1911 
1912 	return 1;
1913 }
1914 __setup("show_lapic=", setup_show_lapic);
1915 
1916 __apicdebuginit(int) print_ICs(void)
1917 {
1918 	if (apic_verbosity == APIC_QUIET)
1919 		return 0;
1920 
1921 	print_PIC();
1922 
1923 	/* don't print out if apic is not there */
1924 	if (!cpu_has_apic && !apic_from_smp_config())
1925 		return 0;
1926 
1927 	print_local_APICs(show_lapic);
1928 	print_IO_APICs();
1929 
1930 	return 0;
1931 }
1932 
1933 late_initcall(print_ICs);
1934 
1935 
1936 /* Where if anywhere is the i8259 connect in external int mode */
1937 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
1938 
1939 void __init enable_IO_APIC(void)
1940 {
1941 	int i8259_apic, i8259_pin;
1942 	int apic;
1943 
1944 	if (!legacy_pic->nr_legacy_irqs)
1945 		return;
1946 
1947 	for(apic = 0; apic < nr_ioapics; apic++) {
1948 		int pin;
1949 		/* See if any of the pins is in ExtINT mode */
1950 		for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
1951 			struct IO_APIC_route_entry entry;
1952 			entry = ioapic_read_entry(apic, pin);
1953 
1954 			/* If the interrupt line is enabled and in ExtInt mode
1955 			 * I have found the pin where the i8259 is connected.
1956 			 */
1957 			if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1958 				ioapic_i8259.apic = apic;
1959 				ioapic_i8259.pin  = pin;
1960 				goto found_i8259;
1961 			}
1962 		}
1963 	}
1964  found_i8259:
1965 	/* Look to see what if the MP table has reported the ExtINT */
1966 	/* If we could not find the appropriate pin by looking at the ioapic
1967 	 * the i8259 probably is not connected the ioapic but give the
1968 	 * mptable a chance anyway.
1969 	 */
1970 	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
1971 	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1972 	/* Trust the MP table if nothing is setup in the hardware */
1973 	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
1974 		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
1975 		ioapic_i8259.pin  = i8259_pin;
1976 		ioapic_i8259.apic = i8259_apic;
1977 	}
1978 	/* Complain if the MP table and the hardware disagree */
1979 	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
1980 		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
1981 	{
1982 		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
1983 	}
1984 
1985 	/*
1986 	 * Do not trust the IO-APIC being empty at bootup
1987 	 */
1988 	clear_IO_APIC();
1989 }
1990 
1991 /*
1992  * Not an __init, needed by the reboot code
1993  */
1994 void disable_IO_APIC(void)
1995 {
1996 	/*
1997 	 * Clear the IO-APIC before rebooting:
1998 	 */
1999 	clear_IO_APIC();
2000 
2001 	if (!legacy_pic->nr_legacy_irqs)
2002 		return;
2003 
2004 	/*
2005 	 * If the i8259 is routed through an IOAPIC
2006 	 * Put that IOAPIC in virtual wire mode
2007 	 * so legacy interrupts can be delivered.
2008 	 *
2009 	 * With interrupt-remapping, for now we will use virtual wire A mode,
2010 	 * as virtual wire B is little complex (need to configure both
2011 	 * IOAPIC RTE as well as interrupt-remapping table entry).
2012 	 * As this gets called during crash dump, keep this simple for now.
2013 	 */
2014 	if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
2015 		struct IO_APIC_route_entry entry;
2016 
2017 		memset(&entry, 0, sizeof(entry));
2018 		entry.mask            = 0; /* Enabled */
2019 		entry.trigger         = 0; /* Edge */
2020 		entry.irr             = 0;
2021 		entry.polarity        = 0; /* High */
2022 		entry.delivery_status = 0;
2023 		entry.dest_mode       = 0; /* Physical */
2024 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
2025 		entry.vector          = 0;
2026 		entry.dest            = read_apic_id();
2027 
2028 		/*
2029 		 * Add it to the IO-APIC irq-routing table:
2030 		 */
2031 		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
2032 	}
2033 
2034 	/*
2035 	 * Use virtual wire A mode when interrupt remapping is enabled.
2036 	 */
2037 	if (cpu_has_apic || apic_from_smp_config())
2038 		disconnect_bsp_APIC(!intr_remapping_enabled &&
2039 				ioapic_i8259.pin != -1);
2040 }
2041 
2042 #ifdef CONFIG_X86_32
2043 /*
2044  * function to set the IO-APIC physical IDs based on the
2045  * values stored in the MPC table.
2046  *
2047  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
2048  */
2049 void __init setup_ioapic_ids_from_mpc_nocheck(void)
2050 {
2051 	union IO_APIC_reg_00 reg_00;
2052 	physid_mask_t phys_id_present_map;
2053 	int ioapic_idx;
2054 	int i;
2055 	unsigned char old_id;
2056 	unsigned long flags;
2057 
2058 	/*
2059 	 * This is broken; anything with a real cpu count has to
2060 	 * circumvent this idiocy regardless.
2061 	 */
2062 	apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
2063 
2064 	/*
2065 	 * Set the IOAPIC ID to the value stored in the MPC table.
2066 	 */
2067 	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
2068 		/* Read the register 0 value */
2069 		raw_spin_lock_irqsave(&ioapic_lock, flags);
2070 		reg_00.raw = io_apic_read(ioapic_idx, 0);
2071 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2072 
2073 		old_id = mpc_ioapic_id(ioapic_idx);
2074 
2075 		if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) {
2076 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
2077 				ioapic_idx, mpc_ioapic_id(ioapic_idx));
2078 			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2079 				reg_00.bits.ID);
2080 			ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
2081 		}
2082 
2083 		/*
2084 		 * Sanity check, is the ID really free? Every APIC in a
2085 		 * system must have a unique ID or we get lots of nice
2086 		 * 'stuck on smp_invalidate_needed IPI wait' messages.
2087 		 */
2088 		if (apic->check_apicid_used(&phys_id_present_map,
2089 					    mpc_ioapic_id(ioapic_idx))) {
2090 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
2091 				ioapic_idx, mpc_ioapic_id(ioapic_idx));
2092 			for (i = 0; i < get_physical_broadcast(); i++)
2093 				if (!physid_isset(i, phys_id_present_map))
2094 					break;
2095 			if (i >= get_physical_broadcast())
2096 				panic("Max APIC ID exceeded!\n");
2097 			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2098 				i);
2099 			physid_set(i, phys_id_present_map);
2100 			ioapics[ioapic_idx].mp_config.apicid = i;
2101 		} else {
2102 			physid_mask_t tmp;
2103 			apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx),
2104 						    &tmp);
2105 			apic_printk(APIC_VERBOSE, "Setting %d in the "
2106 					"phys_id_present_map\n",
2107 					mpc_ioapic_id(ioapic_idx));
2108 			physids_or(phys_id_present_map, phys_id_present_map, tmp);
2109 		}
2110 
2111 		/*
2112 		 * We need to adjust the IRQ routing table
2113 		 * if the ID changed.
2114 		 */
2115 		if (old_id != mpc_ioapic_id(ioapic_idx))
2116 			for (i = 0; i < mp_irq_entries; i++)
2117 				if (mp_irqs[i].dstapic == old_id)
2118 					mp_irqs[i].dstapic
2119 						= mpc_ioapic_id(ioapic_idx);
2120 
2121 		/*
2122 		 * Update the ID register according to the right value
2123 		 * from the MPC table if they are different.
2124 		 */
2125 		if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)
2126 			continue;
2127 
2128 		apic_printk(APIC_VERBOSE, KERN_INFO
2129 			"...changing IO-APIC physical APIC ID to %d ...",
2130 			mpc_ioapic_id(ioapic_idx));
2131 
2132 		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
2133 		raw_spin_lock_irqsave(&ioapic_lock, flags);
2134 		io_apic_write(ioapic_idx, 0, reg_00.raw);
2135 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2136 
2137 		/*
2138 		 * Sanity check
2139 		 */
2140 		raw_spin_lock_irqsave(&ioapic_lock, flags);
2141 		reg_00.raw = io_apic_read(ioapic_idx, 0);
2142 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2143 		if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
2144 			printk("could not set ID!\n");
2145 		else
2146 			apic_printk(APIC_VERBOSE, " ok.\n");
2147 	}
2148 }
2149 
2150 void __init setup_ioapic_ids_from_mpc(void)
2151 {
2152 
2153 	if (acpi_ioapic)
2154 		return;
2155 	/*
2156 	 * Don't check I/O APIC IDs for xAPIC systems.  They have
2157 	 * no meaning without the serial APIC bus.
2158 	 */
2159 	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2160 		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
2161 		return;
2162 	setup_ioapic_ids_from_mpc_nocheck();
2163 }
2164 #endif
2165 
2166 int no_timer_check __initdata;
2167 
2168 static int __init notimercheck(char *s)
2169 {
2170 	no_timer_check = 1;
2171 	return 1;
2172 }
2173 __setup("no_timer_check", notimercheck);
2174 
2175 /*
2176  * There is a nasty bug in some older SMP boards, their mptable lies
2177  * about the timer IRQ. We do the following to work around the situation:
2178  *
2179  *	- timer IRQ defaults to IO-APIC IRQ
2180  *	- if this function detects that timer IRQs are defunct, then we fall
2181  *	  back to ISA timer IRQs
2182  */
2183 static int __init timer_irq_works(void)
2184 {
2185 	unsigned long t1 = jiffies;
2186 	unsigned long flags;
2187 
2188 	if (no_timer_check)
2189 		return 1;
2190 
2191 	local_save_flags(flags);
2192 	local_irq_enable();
2193 	/* Let ten ticks pass... */
2194 	mdelay((10 * 1000) / HZ);
2195 	local_irq_restore(flags);
2196 
2197 	/*
2198 	 * Expect a few ticks at least, to be sure some possible
2199 	 * glue logic does not lock up after one or two first
2200 	 * ticks in a non-ExtINT mode.  Also the local APIC
2201 	 * might have cached one ExtINT interrupt.  Finally, at
2202 	 * least one tick may be lost due to delays.
2203 	 */
2204 
2205 	/* jiffies wrap? */
2206 	if (time_after(jiffies, t1 + 4))
2207 		return 1;
2208 	return 0;
2209 }
2210 
2211 /*
2212  * In the SMP+IOAPIC case it might happen that there are an unspecified
2213  * number of pending IRQ events unhandled. These cases are very rare,
2214  * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
2215  * better to do it this way as thus we do not have to be aware of
2216  * 'pending' interrupts in the IRQ path, except at this point.
2217  */
2218 /*
2219  * Edge triggered needs to resend any interrupt
2220  * that was delayed but this is now handled in the device
2221  * independent code.
2222  */
2223 
2224 /*
2225  * Starting up a edge-triggered IO-APIC interrupt is
2226  * nasty - we need to make sure that we get the edge.
2227  * If it is already asserted for some reason, we need
2228  * return 1 to indicate that is was pending.
2229  *
2230  * This is not complete - we should be able to fake
2231  * an edge even if it isn't on the 8259A...
2232  */
2233 
2234 static unsigned int startup_ioapic_irq(struct irq_data *data)
2235 {
2236 	int was_pending = 0, irq = data->irq;
2237 	unsigned long flags;
2238 
2239 	raw_spin_lock_irqsave(&ioapic_lock, flags);
2240 	if (irq < legacy_pic->nr_legacy_irqs) {
2241 		legacy_pic->mask(irq);
2242 		if (legacy_pic->irq_pending(irq))
2243 			was_pending = 1;
2244 	}
2245 	__unmask_ioapic(data->chip_data);
2246 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2247 
2248 	return was_pending;
2249 }
2250 
2251 static int ioapic_retrigger_irq(struct irq_data *data)
2252 {
2253 	struct irq_cfg *cfg = data->chip_data;
2254 	unsigned long flags;
2255 
2256 	raw_spin_lock_irqsave(&vector_lock, flags);
2257 	apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2258 	raw_spin_unlock_irqrestore(&vector_lock, flags);
2259 
2260 	return 1;
2261 }
2262 
2263 /*
2264  * Level and edge triggered IO-APIC interrupts need different handling,
2265  * so we use two separate IRQ descriptors. Edge triggered IRQs can be
2266  * handled with the level-triggered descriptor, but that one has slightly
2267  * more overhead. Level-triggered interrupts cannot be handled with the
2268  * edge-triggered handler, without risking IRQ storms and other ugly
2269  * races.
2270  */
2271 
2272 #ifdef CONFIG_SMP
2273 void send_cleanup_vector(struct irq_cfg *cfg)
2274 {
2275 	cpumask_var_t cleanup_mask;
2276 
2277 	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
2278 		unsigned int i;
2279 		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
2280 			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
2281 	} else {
2282 		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
2283 		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2284 		free_cpumask_var(cleanup_mask);
2285 	}
2286 	cfg->move_in_progress = 0;
2287 }
2288 
2289 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
2290 {
2291 	int apic, pin;
2292 	struct irq_pin_list *entry;
2293 	u8 vector = cfg->vector;
2294 
2295 	for_each_irq_pin(entry, cfg->irq_2_pin) {
2296 		unsigned int reg;
2297 
2298 		apic = entry->apic;
2299 		pin = entry->pin;
2300 		/*
2301 		 * With interrupt-remapping, destination information comes
2302 		 * from interrupt-remapping table entry.
2303 		 */
2304 		if (!irq_remapped(cfg))
2305 			io_apic_write(apic, 0x11 + pin*2, dest);
2306 		reg = io_apic_read(apic, 0x10 + pin*2);
2307 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
2308 		reg |= vector;
2309 		io_apic_modify(apic, 0x10 + pin*2, reg);
2310 	}
2311 }
2312 
2313 /*
2314  * Either sets data->affinity to a valid value, and returns
2315  * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
2316  * leaves data->affinity untouched.
2317  */
2318 int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2319 			  unsigned int *dest_id)
2320 {
2321 	struct irq_cfg *cfg = data->chip_data;
2322 
2323 	if (!cpumask_intersects(mask, cpu_online_mask))
2324 		return -1;
2325 
2326 	if (assign_irq_vector(data->irq, data->chip_data, mask))
2327 		return -1;
2328 
2329 	cpumask_copy(data->affinity, mask);
2330 
2331 	*dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
2332 	return 0;
2333 }
2334 
2335 static int
2336 ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2337 		    bool force)
2338 {
2339 	unsigned int dest, irq = data->irq;
2340 	unsigned long flags;
2341 	int ret;
2342 
2343 	raw_spin_lock_irqsave(&ioapic_lock, flags);
2344 	ret = __ioapic_set_affinity(data, mask, &dest);
2345 	if (!ret) {
2346 		/* Only the high 8 bits are valid. */
2347 		dest = SET_APIC_LOGICAL_ID(dest);
2348 		__target_IO_APIC_irq(irq, dest, data->chip_data);
2349 	}
2350 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2351 	return ret;
2352 }
2353 
2354 #ifdef CONFIG_IRQ_REMAP
2355 
2356 /*
2357  * Migrate the IO-APIC irq in the presence of intr-remapping.
2358  *
2359  * For both level and edge triggered, irq migration is a simple atomic
2360  * update(of vector and cpu destination) of IRTE and flush the hardware cache.
2361  *
2362  * For level triggered, we eliminate the io-apic RTE modification (with the
2363  * updated vector information), by using a virtual vector (io-apic pin number).
2364  * Real vector that is used for interrupting cpu will be coming from
2365  * the interrupt-remapping table entry.
2366  *
2367  * As the migration is a simple atomic update of IRTE, the same mechanism
2368  * is used to migrate MSI irq's in the presence of interrupt-remapping.
2369  */
2370 static int
2371 ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2372 		       bool force)
2373 {
2374 	struct irq_cfg *cfg = data->chip_data;
2375 	unsigned int dest, irq = data->irq;
2376 	struct irte irte;
2377 
2378 	if (!cpumask_intersects(mask, cpu_online_mask))
2379 		return -EINVAL;
2380 
2381 	if (get_irte(irq, &irte))
2382 		return -EBUSY;
2383 
2384 	if (assign_irq_vector(irq, cfg, mask))
2385 		return -EBUSY;
2386 
2387 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2388 
2389 	irte.vector = cfg->vector;
2390 	irte.dest_id = IRTE_DEST(dest);
2391 
2392 	/*
2393 	 * Atomically updates the IRTE with the new destination, vector
2394 	 * and flushes the interrupt entry cache.
2395 	 */
2396 	modify_irte(irq, &irte);
2397 
2398 	/*
2399 	 * After this point, all the interrupts will start arriving
2400 	 * at the new destination. So, time to cleanup the previous
2401 	 * vector allocation.
2402 	 */
2403 	if (cfg->move_in_progress)
2404 		send_cleanup_vector(cfg);
2405 
2406 	cpumask_copy(data->affinity, mask);
2407 	return 0;
2408 }
2409 
2410 #else
2411 static inline int
2412 ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2413 		       bool force)
2414 {
2415 	return 0;
2416 }
2417 #endif
2418 
2419 asmlinkage void smp_irq_move_cleanup_interrupt(void)
2420 {
2421 	unsigned vector, me;
2422 
2423 	ack_APIC_irq();
2424 	irq_enter();
2425 	exit_idle();
2426 
2427 	me = smp_processor_id();
2428 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
2429 		unsigned int irq;
2430 		unsigned int irr;
2431 		struct irq_desc *desc;
2432 		struct irq_cfg *cfg;
2433 		irq = __this_cpu_read(vector_irq[vector]);
2434 
2435 		if (irq == -1)
2436 			continue;
2437 
2438 		desc = irq_to_desc(irq);
2439 		if (!desc)
2440 			continue;
2441 
2442 		cfg = irq_cfg(irq);
2443 		raw_spin_lock(&desc->lock);
2444 
2445 		/*
2446 		 * Check if the irq migration is in progress. If so, we
2447 		 * haven't received the cleanup request yet for this irq.
2448 		 */
2449 		if (cfg->move_in_progress)
2450 			goto unlock;
2451 
2452 		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2453 			goto unlock;
2454 
2455 		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
2456 		/*
2457 		 * Check if the vector that needs to be cleanedup is
2458 		 * registered at the cpu's IRR. If so, then this is not
2459 		 * the best time to clean it up. Lets clean it up in the
2460 		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
2461 		 * to myself.
2462 		 */
2463 		if (irr  & (1 << (vector % 32))) {
2464 			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
2465 			goto unlock;
2466 		}
2467 		__this_cpu_write(vector_irq[vector], -1);
2468 unlock:
2469 		raw_spin_unlock(&desc->lock);
2470 	}
2471 
2472 	irq_exit();
2473 }
2474 
2475 static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
2476 {
2477 	unsigned me;
2478 
2479 	if (likely(!cfg->move_in_progress))
2480 		return;
2481 
2482 	me = smp_processor_id();
2483 
2484 	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2485 		send_cleanup_vector(cfg);
2486 }
2487 
2488 static void irq_complete_move(struct irq_cfg *cfg)
2489 {
2490 	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
2491 }
2492 
2493 void irq_force_complete_move(int irq)
2494 {
2495 	struct irq_cfg *cfg = irq_get_chip_data(irq);
2496 
2497 	if (!cfg)
2498 		return;
2499 
2500 	__irq_complete_move(cfg, cfg->vector);
2501 }
2502 #else
2503 static inline void irq_complete_move(struct irq_cfg *cfg) { }
2504 #endif
2505 
2506 static void ack_apic_edge(struct irq_data *data)
2507 {
2508 	irq_complete_move(data->chip_data);
2509 	irq_move_irq(data);
2510 	ack_APIC_irq();
2511 }
2512 
2513 atomic_t irq_mis_count;
2514 
2515 static void ack_apic_level(struct irq_data *data)
2516 {
2517 	struct irq_cfg *cfg = data->chip_data;
2518 	int i, do_unmask_irq = 0, irq = data->irq;
2519 	unsigned long v;
2520 
2521 	irq_complete_move(cfg);
2522 #ifdef CONFIG_GENERIC_PENDING_IRQ
2523 	/* If we are moving the irq we need to mask it */
2524 	if (unlikely(irqd_is_setaffinity_pending(data))) {
2525 		do_unmask_irq = 1;
2526 		mask_ioapic(cfg);
2527 	}
2528 #endif
2529 
2530 	/*
2531 	 * It appears there is an erratum which affects at least version 0x11
2532 	 * of I/O APIC (that's the 82093AA and cores integrated into various
2533 	 * chipsets).  Under certain conditions a level-triggered interrupt is
2534 	 * erroneously delivered as edge-triggered one but the respective IRR
2535 	 * bit gets set nevertheless.  As a result the I/O unit expects an EOI
2536 	 * message but it will never arrive and further interrupts are blocked
2537 	 * from the source.  The exact reason is so far unknown, but the
2538 	 * phenomenon was observed when two consecutive interrupt requests
2539 	 * from a given source get delivered to the same CPU and the source is
2540 	 * temporarily disabled in between.
2541 	 *
2542 	 * A workaround is to simulate an EOI message manually.  We achieve it
2543 	 * by setting the trigger mode to edge and then to level when the edge
2544 	 * trigger mode gets detected in the TMR of a local APIC for a
2545 	 * level-triggered interrupt.  We mask the source for the time of the
2546 	 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2547 	 * The idea is from Manfred Spraul.  --macro
2548 	 *
2549 	 * Also in the case when cpu goes offline, fixup_irqs() will forward
2550 	 * any unhandled interrupt on the offlined cpu to the new cpu
2551 	 * destination that is handling the corresponding interrupt. This
2552 	 * interrupt forwarding is done via IPI's. Hence, in this case also
2553 	 * level-triggered io-apic interrupt will be seen as an edge
2554 	 * interrupt in the IRR. And we can't rely on the cpu's EOI
2555 	 * to be broadcasted to the IO-APIC's which will clear the remoteIRR
2556 	 * corresponding to the level-triggered interrupt. Hence on IO-APIC's
2557 	 * supporting EOI register, we do an explicit EOI to clear the
2558 	 * remote IRR and on IO-APIC's which don't have an EOI register,
2559 	 * we use the above logic (mask+edge followed by unmask+level) from
2560 	 * Manfred Spraul to clear the remote IRR.
2561 	 */
2562 	i = cfg->vector;
2563 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2564 
2565 	/*
2566 	 * We must acknowledge the irq before we move it or the acknowledge will
2567 	 * not propagate properly.
2568 	 */
2569 	ack_APIC_irq();
2570 
2571 	/*
2572 	 * Tail end of clearing remote IRR bit (either by delivering the EOI
2573 	 * message via io-apic EOI register write or simulating it using
2574 	 * mask+edge followed by unnask+level logic) manually when the
2575 	 * level triggered interrupt is seen as the edge triggered interrupt
2576 	 * at the cpu.
2577 	 */
2578 	if (!(v & (1 << (i & 0x1f)))) {
2579 		atomic_inc(&irq_mis_count);
2580 
2581 		eoi_ioapic_irq(irq, cfg);
2582 	}
2583 
2584 	/* Now we can move and renable the irq */
2585 	if (unlikely(do_unmask_irq)) {
2586 		/* Only migrate the irq if the ack has been received.
2587 		 *
2588 		 * On rare occasions the broadcast level triggered ack gets
2589 		 * delayed going to ioapics, and if we reprogram the
2590 		 * vector while Remote IRR is still set the irq will never
2591 		 * fire again.
2592 		 *
2593 		 * To prevent this scenario we read the Remote IRR bit
2594 		 * of the ioapic.  This has two effects.
2595 		 * - On any sane system the read of the ioapic will
2596 		 *   flush writes (and acks) going to the ioapic from
2597 		 *   this cpu.
2598 		 * - We get to see if the ACK has actually been delivered.
2599 		 *
2600 		 * Based on failed experiments of reprogramming the
2601 		 * ioapic entry from outside of irq context starting
2602 		 * with masking the ioapic entry and then polling until
2603 		 * Remote IRR was clear before reprogramming the
2604 		 * ioapic I don't trust the Remote IRR bit to be
2605 		 * completey accurate.
2606 		 *
2607 		 * However there appears to be no other way to plug
2608 		 * this race, so if the Remote IRR bit is not
2609 		 * accurate and is causing problems then it is a hardware bug
2610 		 * and you can go talk to the chipset vendor about it.
2611 		 */
2612 		if (!io_apic_level_ack_pending(cfg))
2613 			irq_move_masked_irq(data);
2614 		unmask_ioapic(cfg);
2615 	}
2616 }
2617 
2618 #ifdef CONFIG_IRQ_REMAP
2619 static void ir_ack_apic_edge(struct irq_data *data)
2620 {
2621 	ack_APIC_irq();
2622 }
2623 
2624 static void ir_ack_apic_level(struct irq_data *data)
2625 {
2626 	ack_APIC_irq();
2627 	eoi_ioapic_irq(data->irq, data->chip_data);
2628 }
2629 
2630 static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
2631 {
2632 	seq_printf(p, " IR-%s", data->chip->name);
2633 }
2634 
2635 static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
2636 {
2637 	chip->irq_print_chip = ir_print_prefix;
2638 	chip->irq_ack = ir_ack_apic_edge;
2639 	chip->irq_eoi = ir_ack_apic_level;
2640 
2641 #ifdef CONFIG_SMP
2642 	chip->irq_set_affinity = ir_ioapic_set_affinity;
2643 #endif
2644 }
2645 #endif /* CONFIG_IRQ_REMAP */
2646 
2647 static struct irq_chip ioapic_chip __read_mostly = {
2648 	.name			= "IO-APIC",
2649 	.irq_startup		= startup_ioapic_irq,
2650 	.irq_mask		= mask_ioapic_irq,
2651 	.irq_unmask		= unmask_ioapic_irq,
2652 	.irq_ack		= ack_apic_edge,
2653 	.irq_eoi		= ack_apic_level,
2654 #ifdef CONFIG_SMP
2655 	.irq_set_affinity	= ioapic_set_affinity,
2656 #endif
2657 	.irq_retrigger		= ioapic_retrigger_irq,
2658 };
2659 
2660 static inline void init_IO_APIC_traps(void)
2661 {
2662 	struct irq_cfg *cfg;
2663 	unsigned int irq;
2664 
2665 	/*
2666 	 * NOTE! The local APIC isn't very good at handling
2667 	 * multiple interrupts at the same interrupt level.
2668 	 * As the interrupt level is determined by taking the
2669 	 * vector number and shifting that right by 4, we
2670 	 * want to spread these out a bit so that they don't
2671 	 * all fall in the same interrupt level.
2672 	 *
2673 	 * Also, we've got to be careful not to trash gate
2674 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2675 	 */
2676 	for_each_active_irq(irq) {
2677 		cfg = irq_get_chip_data(irq);
2678 		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2679 			/*
2680 			 * Hmm.. We don't have an entry for this,
2681 			 * so default to an old-fashioned 8259
2682 			 * interrupt if we can..
2683 			 */
2684 			if (irq < legacy_pic->nr_legacy_irqs)
2685 				legacy_pic->make_irq(irq);
2686 			else
2687 				/* Strange. Oh, well.. */
2688 				irq_set_chip(irq, &no_irq_chip);
2689 		}
2690 	}
2691 }
2692 
2693 /*
2694  * The local APIC irq-chip implementation:
2695  */
2696 
2697 static void mask_lapic_irq(struct irq_data *data)
2698 {
2699 	unsigned long v;
2700 
2701 	v = apic_read(APIC_LVT0);
2702 	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2703 }
2704 
2705 static void unmask_lapic_irq(struct irq_data *data)
2706 {
2707 	unsigned long v;
2708 
2709 	v = apic_read(APIC_LVT0);
2710 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2711 }
2712 
2713 static void ack_lapic_irq(struct irq_data *data)
2714 {
2715 	ack_APIC_irq();
2716 }
2717 
2718 static struct irq_chip lapic_chip __read_mostly = {
2719 	.name		= "local-APIC",
2720 	.irq_mask	= mask_lapic_irq,
2721 	.irq_unmask	= unmask_lapic_irq,
2722 	.irq_ack	= ack_lapic_irq,
2723 };
2724 
2725 static void lapic_register_intr(int irq)
2726 {
2727 	irq_clear_status_flags(irq, IRQ_LEVEL);
2728 	irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2729 				      "edge");
2730 }
2731 
2732 /*
2733  * This looks a bit hackish but it's about the only one way of sending
2734  * a few INTA cycles to 8259As and any associated glue logic.  ICR does
2735  * not support the ExtINT mode, unfortunately.  We need to send these
2736  * cycles as some i82489DX-based boards have glue logic that keeps the
2737  * 8259A interrupt line asserted until INTA.  --macro
2738  */
2739 static inline void __init unlock_ExtINT_logic(void)
2740 {
2741 	int apic, pin, i;
2742 	struct IO_APIC_route_entry entry0, entry1;
2743 	unsigned char save_control, save_freq_select;
2744 
2745 	pin  = find_isa_irq_pin(8, mp_INT);
2746 	if (pin == -1) {
2747 		WARN_ON_ONCE(1);
2748 		return;
2749 	}
2750 	apic = find_isa_irq_apic(8, mp_INT);
2751 	if (apic == -1) {
2752 		WARN_ON_ONCE(1);
2753 		return;
2754 	}
2755 
2756 	entry0 = ioapic_read_entry(apic, pin);
2757 	clear_IO_APIC_pin(apic, pin);
2758 
2759 	memset(&entry1, 0, sizeof(entry1));
2760 
2761 	entry1.dest_mode = 0;			/* physical delivery */
2762 	entry1.mask = 0;			/* unmask IRQ now */
2763 	entry1.dest = hard_smp_processor_id();
2764 	entry1.delivery_mode = dest_ExtINT;
2765 	entry1.polarity = entry0.polarity;
2766 	entry1.trigger = 0;
2767 	entry1.vector = 0;
2768 
2769 	ioapic_write_entry(apic, pin, entry1);
2770 
2771 	save_control = CMOS_READ(RTC_CONTROL);
2772 	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2773 	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
2774 		   RTC_FREQ_SELECT);
2775 	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
2776 
2777 	i = 100;
2778 	while (i-- > 0) {
2779 		mdelay(10);
2780 		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
2781 			i -= 10;
2782 	}
2783 
2784 	CMOS_WRITE(save_control, RTC_CONTROL);
2785 	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2786 	clear_IO_APIC_pin(apic, pin);
2787 
2788 	ioapic_write_entry(apic, pin, entry0);
2789 }
2790 
2791 static int disable_timer_pin_1 __initdata;
2792 /* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2793 static int __init disable_timer_pin_setup(char *arg)
2794 {
2795 	disable_timer_pin_1 = 1;
2796 	return 0;
2797 }
2798 early_param("disable_timer_pin_1", disable_timer_pin_setup);
2799 
2800 int timer_through_8259 __initdata;
2801 
2802 /*
2803  * This code may look a bit paranoid, but it's supposed to cooperate with
2804  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
2805  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
2806  * fanatically on his truly buggy board.
2807  *
2808  * FIXME: really need to revamp this for all platforms.
2809  */
2810 static inline void __init check_timer(void)
2811 {
2812 	struct irq_cfg *cfg = irq_get_chip_data(0);
2813 	int node = cpu_to_node(0);
2814 	int apic1, pin1, apic2, pin2;
2815 	unsigned long flags;
2816 	int no_pin1 = 0;
2817 
2818 	local_irq_save(flags);
2819 
2820 	/*
2821 	 * get/set the timer IRQ vector:
2822 	 */
2823 	legacy_pic->mask(0);
2824 	assign_irq_vector(0, cfg, apic->target_cpus());
2825 
2826 	/*
2827 	 * As IRQ0 is to be enabled in the 8259A, the virtual
2828 	 * wire has to be disabled in the local APIC.  Also
2829 	 * timer interrupts need to be acknowledged manually in
2830 	 * the 8259A for the i82489DX when using the NMI
2831 	 * watchdog as that APIC treats NMIs as level-triggered.
2832 	 * The AEOI mode will finish them in the 8259A
2833 	 * automatically.
2834 	 */
2835 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2836 	legacy_pic->init(1);
2837 
2838 	pin1  = find_isa_irq_pin(0, mp_INT);
2839 	apic1 = find_isa_irq_apic(0, mp_INT);
2840 	pin2  = ioapic_i8259.pin;
2841 	apic2 = ioapic_i8259.apic;
2842 
2843 	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
2844 		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
2845 		    cfg->vector, apic1, pin1, apic2, pin2);
2846 
2847 	/*
2848 	 * Some BIOS writers are clueless and report the ExtINTA
2849 	 * I/O APIC input from the cascaded 8259A as the timer
2850 	 * interrupt input.  So just in case, if only one pin
2851 	 * was found above, try it both directly and through the
2852 	 * 8259A.
2853 	 */
2854 	if (pin1 == -1) {
2855 		if (intr_remapping_enabled)
2856 			panic("BIOS bug: timer not connected to IO-APIC");
2857 		pin1 = pin2;
2858 		apic1 = apic2;
2859 		no_pin1 = 1;
2860 	} else if (pin2 == -1) {
2861 		pin2 = pin1;
2862 		apic2 = apic1;
2863 	}
2864 
2865 	if (pin1 != -1) {
2866 		/*
2867 		 * Ok, does IRQ0 through the IOAPIC work?
2868 		 */
2869 		if (no_pin1) {
2870 			add_pin_to_irq_node(cfg, node, apic1, pin1);
2871 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2872 		} else {
2873 			/* for edge trigger, setup_ioapic_irq already
2874 			 * leave it unmasked.
2875 			 * so only need to unmask if it is level-trigger
2876 			 * do we really have level trigger timer?
2877 			 */
2878 			int idx;
2879 			idx = find_irq_entry(apic1, pin1, mp_INT);
2880 			if (idx != -1 && irq_trigger(idx))
2881 				unmask_ioapic(cfg);
2882 		}
2883 		if (timer_irq_works()) {
2884 			if (disable_timer_pin_1 > 0)
2885 				clear_IO_APIC_pin(0, pin1);
2886 			goto out;
2887 		}
2888 		if (intr_remapping_enabled)
2889 			panic("timer doesn't work through Interrupt-remapped IO-APIC");
2890 		local_irq_disable();
2891 		clear_IO_APIC_pin(apic1, pin1);
2892 		if (!no_pin1)
2893 			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
2894 				    "8254 timer not connected to IO-APIC\n");
2895 
2896 		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
2897 			    "(IRQ0) through the 8259A ...\n");
2898 		apic_printk(APIC_QUIET, KERN_INFO
2899 			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
2900 		/*
2901 		 * legacy devices should be connected to IO APIC #0
2902 		 */
2903 		replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
2904 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2905 		legacy_pic->unmask(0);
2906 		if (timer_irq_works()) {
2907 			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2908 			timer_through_8259 = 1;
2909 			goto out;
2910 		}
2911 		/*
2912 		 * Cleanup, just in case ...
2913 		 */
2914 		local_irq_disable();
2915 		legacy_pic->mask(0);
2916 		clear_IO_APIC_pin(apic2, pin2);
2917 		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2918 	}
2919 
2920 	apic_printk(APIC_QUIET, KERN_INFO
2921 		    "...trying to set up timer as Virtual Wire IRQ...\n");
2922 
2923 	lapic_register_intr(0);
2924 	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
2925 	legacy_pic->unmask(0);
2926 
2927 	if (timer_irq_works()) {
2928 		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2929 		goto out;
2930 	}
2931 	local_irq_disable();
2932 	legacy_pic->mask(0);
2933 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
2934 	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
2935 
2936 	apic_printk(APIC_QUIET, KERN_INFO
2937 		    "...trying to set up timer as ExtINT IRQ...\n");
2938 
2939 	legacy_pic->init(0);
2940 	legacy_pic->make_irq(0);
2941 	apic_write(APIC_LVT0, APIC_DM_EXTINT);
2942 
2943 	unlock_ExtINT_logic();
2944 
2945 	if (timer_irq_works()) {
2946 		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2947 		goto out;
2948 	}
2949 	local_irq_disable();
2950 	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2951 	if (x2apic_preenabled)
2952 		apic_printk(APIC_QUIET, KERN_INFO
2953 			    "Perhaps problem with the pre-enabled x2apic mode\n"
2954 			    "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
2955 	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
2956 		"report.  Then try booting with the 'noapic' option.\n");
2957 out:
2958 	local_irq_restore(flags);
2959 }
2960 
2961 /*
2962  * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2963  * to devices.  However there may be an I/O APIC pin available for
2964  * this interrupt regardless.  The pin may be left unconnected, but
2965  * typically it will be reused as an ExtINT cascade interrupt for
2966  * the master 8259A.  In the MPS case such a pin will normally be
2967  * reported as an ExtINT interrupt in the MP table.  With ACPI
2968  * there is no provision for ExtINT interrupts, and in the absence
2969  * of an override it would be treated as an ordinary ISA I/O APIC
2970  * interrupt, that is edge-triggered and unmasked by default.  We
2971  * used to do this, but it caused problems on some systems because
2972  * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
2973  * the same ExtINT cascade interrupt to drive the local APIC of the
2974  * bootstrap processor.  Therefore we refrain from routing IRQ2 to
2975  * the I/O APIC in all cases now.  No actual device should request
2976  * it anyway.  --macro
2977  */
2978 #define PIC_IRQS	(1UL << PIC_CASCADE_IR)
2979 
2980 void __init setup_IO_APIC(void)
2981 {
2982 
2983 	/*
2984 	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
2985 	 */
2986 	io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
2987 
2988 	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
2989 	/*
2990          * Set up IO-APIC IRQ routing.
2991          */
2992 	x86_init.mpparse.setup_ioapic_ids();
2993 
2994 	sync_Arb_IDs();
2995 	setup_IO_APIC_irqs();
2996 	init_IO_APIC_traps();
2997 	if (legacy_pic->nr_legacy_irqs)
2998 		check_timer();
2999 }
3000 
3001 /*
3002  *      Called after all the initialization is done. If we didn't find any
3003  *      APIC bugs then we can allow the modify fast path
3004  */
3005 
3006 static int __init io_apic_bug_finalize(void)
3007 {
3008 	if (sis_apic_bug == -1)
3009 		sis_apic_bug = 0;
3010 	return 0;
3011 }
3012 
3013 late_initcall(io_apic_bug_finalize);
3014 
3015 static void resume_ioapic_id(int ioapic_idx)
3016 {
3017 	unsigned long flags;
3018 	union IO_APIC_reg_00 reg_00;
3019 
3020 	raw_spin_lock_irqsave(&ioapic_lock, flags);
3021 	reg_00.raw = io_apic_read(ioapic_idx, 0);
3022 	if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
3023 		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
3024 		io_apic_write(ioapic_idx, 0, reg_00.raw);
3025 	}
3026 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3027 }
3028 
3029 static void ioapic_resume(void)
3030 {
3031 	int ioapic_idx;
3032 
3033 	for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--)
3034 		resume_ioapic_id(ioapic_idx);
3035 
3036 	restore_ioapic_entries();
3037 }
3038 
3039 static struct syscore_ops ioapic_syscore_ops = {
3040 	.suspend = save_ioapic_entries,
3041 	.resume = ioapic_resume,
3042 };
3043 
3044 static int __init ioapic_init_ops(void)
3045 {
3046 	register_syscore_ops(&ioapic_syscore_ops);
3047 
3048 	return 0;
3049 }
3050 
3051 device_initcall(ioapic_init_ops);
3052 
3053 /*
3054  * Dynamic irq allocate and deallocation
3055  */
3056 unsigned int create_irq_nr(unsigned int from, int node)
3057 {
3058 	struct irq_cfg *cfg;
3059 	unsigned long flags;
3060 	unsigned int ret = 0;
3061 	int irq;
3062 
3063 	if (from < nr_irqs_gsi)
3064 		from = nr_irqs_gsi;
3065 
3066 	irq = alloc_irq_from(from, node);
3067 	if (irq < 0)
3068 		return 0;
3069 	cfg = alloc_irq_cfg(irq, node);
3070 	if (!cfg) {
3071 		free_irq_at(irq, NULL);
3072 		return 0;
3073 	}
3074 
3075 	raw_spin_lock_irqsave(&vector_lock, flags);
3076 	if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
3077 		ret = irq;
3078 	raw_spin_unlock_irqrestore(&vector_lock, flags);
3079 
3080 	if (ret) {
3081 		irq_set_chip_data(irq, cfg);
3082 		irq_clear_status_flags(irq, IRQ_NOREQUEST);
3083 	} else {
3084 		free_irq_at(irq, cfg);
3085 	}
3086 	return ret;
3087 }
3088 
3089 int create_irq(void)
3090 {
3091 	int node = cpu_to_node(0);
3092 	unsigned int irq_want;
3093 	int irq;
3094 
3095 	irq_want = nr_irqs_gsi;
3096 	irq = create_irq_nr(irq_want, node);
3097 
3098 	if (irq == 0)
3099 		irq = -1;
3100 
3101 	return irq;
3102 }
3103 
3104 void destroy_irq(unsigned int irq)
3105 {
3106 	struct irq_cfg *cfg = irq_get_chip_data(irq);
3107 	unsigned long flags;
3108 
3109 	irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3110 
3111 	if (irq_remapped(cfg))
3112 		free_irte(irq);
3113 	raw_spin_lock_irqsave(&vector_lock, flags);
3114 	__clear_irq_vector(irq, cfg);
3115 	raw_spin_unlock_irqrestore(&vector_lock, flags);
3116 	free_irq_at(irq, cfg);
3117 }
3118 
3119 /*
3120  * MSI message composition
3121  */
3122 #ifdef CONFIG_PCI_MSI
3123 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3124 			   struct msi_msg *msg, u8 hpet_id)
3125 {
3126 	struct irq_cfg *cfg;
3127 	int err;
3128 	unsigned dest;
3129 
3130 	if (disable_apic)
3131 		return -ENXIO;
3132 
3133 	cfg = irq_cfg(irq);
3134 	err = assign_irq_vector(irq, cfg, apic->target_cpus());
3135 	if (err)
3136 		return err;
3137 
3138 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3139 
3140 	if (irq_remapped(cfg)) {
3141 		struct irte irte;
3142 		int ir_index;
3143 		u16 sub_handle;
3144 
3145 		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
3146 		BUG_ON(ir_index == -1);
3147 
3148 		prepare_irte(&irte, cfg->vector, dest);
3149 
3150 		/* Set source-id of interrupt request */
3151 		if (pdev)
3152 			set_msi_sid(&irte, pdev);
3153 		else
3154 			set_hpet_sid(&irte, hpet_id);
3155 
3156 		modify_irte(irq, &irte);
3157 
3158 		msg->address_hi = MSI_ADDR_BASE_HI;
3159 		msg->data = sub_handle;
3160 		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
3161 				  MSI_ADDR_IR_SHV |
3162 				  MSI_ADDR_IR_INDEX1(ir_index) |
3163 				  MSI_ADDR_IR_INDEX2(ir_index);
3164 	} else {
3165 		if (x2apic_enabled())
3166 			msg->address_hi = MSI_ADDR_BASE_HI |
3167 					  MSI_ADDR_EXT_DEST_ID(dest);
3168 		else
3169 			msg->address_hi = MSI_ADDR_BASE_HI;
3170 
3171 		msg->address_lo =
3172 			MSI_ADDR_BASE_LO |
3173 			((apic->irq_dest_mode == 0) ?
3174 				MSI_ADDR_DEST_MODE_PHYSICAL:
3175 				MSI_ADDR_DEST_MODE_LOGICAL) |
3176 			((apic->irq_delivery_mode != dest_LowestPrio) ?
3177 				MSI_ADDR_REDIRECTION_CPU:
3178 				MSI_ADDR_REDIRECTION_LOWPRI) |
3179 			MSI_ADDR_DEST_ID(dest);
3180 
3181 		msg->data =
3182 			MSI_DATA_TRIGGER_EDGE |
3183 			MSI_DATA_LEVEL_ASSERT |
3184 			((apic->irq_delivery_mode != dest_LowestPrio) ?
3185 				MSI_DATA_DELIVERY_FIXED:
3186 				MSI_DATA_DELIVERY_LOWPRI) |
3187 			MSI_DATA_VECTOR(cfg->vector);
3188 	}
3189 	return err;
3190 }
3191 
3192 #ifdef CONFIG_SMP
3193 static int
3194 msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3195 {
3196 	struct irq_cfg *cfg = data->chip_data;
3197 	struct msi_msg msg;
3198 	unsigned int dest;
3199 
3200 	if (__ioapic_set_affinity(data, mask, &dest))
3201 		return -1;
3202 
3203 	__get_cached_msi_msg(data->msi_desc, &msg);
3204 
3205 	msg.data &= ~MSI_DATA_VECTOR_MASK;
3206 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
3207 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3208 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3209 
3210 	__write_msi_msg(data->msi_desc, &msg);
3211 
3212 	return 0;
3213 }
3214 #endif /* CONFIG_SMP */
3215 
3216 /*
3217  * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
3218  * which implement the MSI or MSI-X Capability Structure.
3219  */
3220 static struct irq_chip msi_chip = {
3221 	.name			= "PCI-MSI",
3222 	.irq_unmask		= unmask_msi_irq,
3223 	.irq_mask		= mask_msi_irq,
3224 	.irq_ack		= ack_apic_edge,
3225 #ifdef CONFIG_SMP
3226 	.irq_set_affinity	= msi_set_affinity,
3227 #endif
3228 	.irq_retrigger		= ioapic_retrigger_irq,
3229 };
3230 
3231 /*
3232  * Map the PCI dev to the corresponding remapping hardware unit
3233  * and allocate 'nvec' consecutive interrupt-remapping table entries
3234  * in it.
3235  */
3236 static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3237 {
3238 	struct intel_iommu *iommu;
3239 	int index;
3240 
3241 	iommu = map_dev_to_ir(dev);
3242 	if (!iommu) {
3243 		printk(KERN_ERR
3244 		       "Unable to map PCI %s to iommu\n", pci_name(dev));
3245 		return -ENOENT;
3246 	}
3247 
3248 	index = alloc_irte(iommu, irq, nvec);
3249 	if (index < 0) {
3250 		printk(KERN_ERR
3251 		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
3252 		       pci_name(dev));
3253 		return -ENOSPC;
3254 	}
3255 	return index;
3256 }
3257 
3258 static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3259 {
3260 	struct irq_chip *chip = &msi_chip;
3261 	struct msi_msg msg;
3262 	int ret;
3263 
3264 	ret = msi_compose_msg(dev, irq, &msg, -1);
3265 	if (ret < 0)
3266 		return ret;
3267 
3268 	irq_set_msi_desc(irq, msidesc);
3269 	write_msi_msg(irq, &msg);
3270 
3271 	if (irq_remapped(irq_get_chip_data(irq))) {
3272 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3273 		irq_remap_modify_chip_defaults(chip);
3274 	}
3275 
3276 	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3277 
3278 	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
3279 
3280 	return 0;
3281 }
3282 
3283 int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3284 {
3285 	int node, ret, sub_handle, index = 0;
3286 	unsigned int irq, irq_want;
3287 	struct msi_desc *msidesc;
3288 	struct intel_iommu *iommu = NULL;
3289 
3290 	/* x86 doesn't support multiple MSI yet */
3291 	if (type == PCI_CAP_ID_MSI && nvec > 1)
3292 		return 1;
3293 
3294 	node = dev_to_node(&dev->dev);
3295 	irq_want = nr_irqs_gsi;
3296 	sub_handle = 0;
3297 	list_for_each_entry(msidesc, &dev->msi_list, list) {
3298 		irq = create_irq_nr(irq_want, node);
3299 		if (irq == 0)
3300 			return -1;
3301 		irq_want = irq + 1;
3302 		if (!intr_remapping_enabled)
3303 			goto no_ir;
3304 
3305 		if (!sub_handle) {
3306 			/*
3307 			 * allocate the consecutive block of IRTE's
3308 			 * for 'nvec'
3309 			 */
3310 			index = msi_alloc_irte(dev, irq, nvec);
3311 			if (index < 0) {
3312 				ret = index;
3313 				goto error;
3314 			}
3315 		} else {
3316 			iommu = map_dev_to_ir(dev);
3317 			if (!iommu) {
3318 				ret = -ENOENT;
3319 				goto error;
3320 			}
3321 			/*
3322 			 * setup the mapping between the irq and the IRTE
3323 			 * base index, the sub_handle pointing to the
3324 			 * appropriate interrupt remap table entry.
3325 			 */
3326 			set_irte_irq(irq, iommu, index, sub_handle);
3327 		}
3328 no_ir:
3329 		ret = setup_msi_irq(dev, msidesc, irq);
3330 		if (ret < 0)
3331 			goto error;
3332 		sub_handle++;
3333 	}
3334 	return 0;
3335 
3336 error:
3337 	destroy_irq(irq);
3338 	return ret;
3339 }
3340 
3341 void native_teardown_msi_irq(unsigned int irq)
3342 {
3343 	destroy_irq(irq);
3344 }
3345 
3346 #ifdef CONFIG_DMAR_TABLE
3347 #ifdef CONFIG_SMP
3348 static int
3349 dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3350 		      bool force)
3351 {
3352 	struct irq_cfg *cfg = data->chip_data;
3353 	unsigned int dest, irq = data->irq;
3354 	struct msi_msg msg;
3355 
3356 	if (__ioapic_set_affinity(data, mask, &dest))
3357 		return -1;
3358 
3359 	dmar_msi_read(irq, &msg);
3360 
3361 	msg.data &= ~MSI_DATA_VECTOR_MASK;
3362 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
3363 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3364 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3365 	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
3366 
3367 	dmar_msi_write(irq, &msg);
3368 
3369 	return 0;
3370 }
3371 
3372 #endif /* CONFIG_SMP */
3373 
3374 static struct irq_chip dmar_msi_type = {
3375 	.name			= "DMAR_MSI",
3376 	.irq_unmask		= dmar_msi_unmask,
3377 	.irq_mask		= dmar_msi_mask,
3378 	.irq_ack		= ack_apic_edge,
3379 #ifdef CONFIG_SMP
3380 	.irq_set_affinity	= dmar_msi_set_affinity,
3381 #endif
3382 	.irq_retrigger		= ioapic_retrigger_irq,
3383 };
3384 
3385 int arch_setup_dmar_msi(unsigned int irq)
3386 {
3387 	int ret;
3388 	struct msi_msg msg;
3389 
3390 	ret = msi_compose_msg(NULL, irq, &msg, -1);
3391 	if (ret < 0)
3392 		return ret;
3393 	dmar_msi_write(irq, &msg);
3394 	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
3395 				      "edge");
3396 	return 0;
3397 }
3398 #endif
3399 
3400 #ifdef CONFIG_HPET_TIMER
3401 
3402 #ifdef CONFIG_SMP
3403 static int hpet_msi_set_affinity(struct irq_data *data,
3404 				 const struct cpumask *mask, bool force)
3405 {
3406 	struct irq_cfg *cfg = data->chip_data;
3407 	struct msi_msg msg;
3408 	unsigned int dest;
3409 
3410 	if (__ioapic_set_affinity(data, mask, &dest))
3411 		return -1;
3412 
3413 	hpet_msi_read(data->handler_data, &msg);
3414 
3415 	msg.data &= ~MSI_DATA_VECTOR_MASK;
3416 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
3417 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3418 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3419 
3420 	hpet_msi_write(data->handler_data, &msg);
3421 
3422 	return 0;
3423 }
3424 
3425 #endif /* CONFIG_SMP */
3426 
3427 static struct irq_chip hpet_msi_type = {
3428 	.name = "HPET_MSI",
3429 	.irq_unmask = hpet_msi_unmask,
3430 	.irq_mask = hpet_msi_mask,
3431 	.irq_ack = ack_apic_edge,
3432 #ifdef CONFIG_SMP
3433 	.irq_set_affinity = hpet_msi_set_affinity,
3434 #endif
3435 	.irq_retrigger = ioapic_retrigger_irq,
3436 };
3437 
3438 int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3439 {
3440 	struct irq_chip *chip = &hpet_msi_type;
3441 	struct msi_msg msg;
3442 	int ret;
3443 
3444 	if (intr_remapping_enabled) {
3445 		struct intel_iommu *iommu = map_hpet_to_ir(id);
3446 		int index;
3447 
3448 		if (!iommu)
3449 			return -1;
3450 
3451 		index = alloc_irte(iommu, irq, 1);
3452 		if (index < 0)
3453 			return -1;
3454 	}
3455 
3456 	ret = msi_compose_msg(NULL, irq, &msg, id);
3457 	if (ret < 0)
3458 		return ret;
3459 
3460 	hpet_msi_write(irq_get_handler_data(irq), &msg);
3461 	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3462 	if (irq_remapped(irq_get_chip_data(irq)))
3463 		irq_remap_modify_chip_defaults(chip);
3464 
3465 	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3466 	return 0;
3467 }
3468 #endif
3469 
3470 #endif /* CONFIG_PCI_MSI */
3471 /*
3472  * Hypertransport interrupt support
3473  */
3474 #ifdef CONFIG_HT_IRQ
3475 
3476 #ifdef CONFIG_SMP
3477 
3478 static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3479 {
3480 	struct ht_irq_msg msg;
3481 	fetch_ht_irq_msg(irq, &msg);
3482 
3483 	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
3484 	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
3485 
3486 	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
3487 	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
3488 
3489 	write_ht_irq_msg(irq, &msg);
3490 }
3491 
3492 static int
3493 ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3494 {
3495 	struct irq_cfg *cfg = data->chip_data;
3496 	unsigned int dest;
3497 
3498 	if (__ioapic_set_affinity(data, mask, &dest))
3499 		return -1;
3500 
3501 	target_ht_irq(data->irq, dest, cfg->vector);
3502 	return 0;
3503 }
3504 
3505 #endif
3506 
3507 static struct irq_chip ht_irq_chip = {
3508 	.name			= "PCI-HT",
3509 	.irq_mask		= mask_ht_irq,
3510 	.irq_unmask		= unmask_ht_irq,
3511 	.irq_ack		= ack_apic_edge,
3512 #ifdef CONFIG_SMP
3513 	.irq_set_affinity	= ht_set_affinity,
3514 #endif
3515 	.irq_retrigger		= ioapic_retrigger_irq,
3516 };
3517 
3518 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3519 {
3520 	struct irq_cfg *cfg;
3521 	int err;
3522 
3523 	if (disable_apic)
3524 		return -ENXIO;
3525 
3526 	cfg = irq_cfg(irq);
3527 	err = assign_irq_vector(irq, cfg, apic->target_cpus());
3528 	if (!err) {
3529 		struct ht_irq_msg msg;
3530 		unsigned dest;
3531 
3532 		dest = apic->cpu_mask_to_apicid_and(cfg->domain,
3533 						    apic->target_cpus());
3534 
3535 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3536 
3537 		msg.address_lo =
3538 			HT_IRQ_LOW_BASE |
3539 			HT_IRQ_LOW_DEST_ID(dest) |
3540 			HT_IRQ_LOW_VECTOR(cfg->vector) |
3541 			((apic->irq_dest_mode == 0) ?
3542 				HT_IRQ_LOW_DM_PHYSICAL :
3543 				HT_IRQ_LOW_DM_LOGICAL) |
3544 			HT_IRQ_LOW_RQEOI_EDGE |
3545 			((apic->irq_delivery_mode != dest_LowestPrio) ?
3546 				HT_IRQ_LOW_MT_FIXED :
3547 				HT_IRQ_LOW_MT_ARBITRATED) |
3548 			HT_IRQ_LOW_IRQ_MASKED;
3549 
3550 		write_ht_irq_msg(irq, &msg);
3551 
3552 		irq_set_chip_and_handler_name(irq, &ht_irq_chip,
3553 					      handle_edge_irq, "edge");
3554 
3555 		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
3556 	}
3557 	return err;
3558 }
3559 #endif /* CONFIG_HT_IRQ */
3560 
3561 static int
3562 io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3563 {
3564 	struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
3565 	int ret;
3566 
3567 	if (!cfg)
3568 		return -EINVAL;
3569 	ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
3570 	if (!ret)
3571 		setup_ioapic_irq(irq, cfg, attr);
3572 	return ret;
3573 }
3574 
3575 int io_apic_setup_irq_pin_once(unsigned int irq, int node,
3576 			       struct io_apic_irq_attr *attr)
3577 {
3578 	unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
3579 	int ret;
3580 
3581 	/* Avoid redundant programming */
3582 	if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
3583 		pr_debug("Pin %d-%d already programmed\n",
3584 			 mpc_ioapic_id(ioapic_idx), pin);
3585 		return 0;
3586 	}
3587 	ret = io_apic_setup_irq_pin(irq, node, attr);
3588 	if (!ret)
3589 		set_bit(pin, ioapics[ioapic_idx].pin_programmed);
3590 	return ret;
3591 }
3592 
3593 static int __init io_apic_get_redir_entries(int ioapic)
3594 {
3595 	union IO_APIC_reg_01	reg_01;
3596 	unsigned long flags;
3597 
3598 	raw_spin_lock_irqsave(&ioapic_lock, flags);
3599 	reg_01.raw = io_apic_read(ioapic, 1);
3600 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3601 
3602 	/* The register returns the maximum index redir index
3603 	 * supported, which is one less than the total number of redir
3604 	 * entries.
3605 	 */
3606 	return reg_01.bits.entries + 1;
3607 }
3608 
3609 static void __init probe_nr_irqs_gsi(void)
3610 {
3611 	int nr;
3612 
3613 	nr = gsi_top + NR_IRQS_LEGACY;
3614 	if (nr > nr_irqs_gsi)
3615 		nr_irqs_gsi = nr;
3616 
3617 	printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
3618 }
3619 
3620 int get_nr_irqs_gsi(void)
3621 {
3622 	return nr_irqs_gsi;
3623 }
3624 
3625 int __init arch_probe_nr_irqs(void)
3626 {
3627 	int nr;
3628 
3629 	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
3630 		nr_irqs = NR_VECTORS * nr_cpu_ids;
3631 
3632 	nr = nr_irqs_gsi + 8 * nr_cpu_ids;
3633 #if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
3634 	/*
3635 	 * for MSI and HT dyn irq
3636 	 */
3637 	nr += nr_irqs_gsi * 16;
3638 #endif
3639 	if (nr < nr_irqs)
3640 		nr_irqs = nr;
3641 
3642 	return NR_IRQS_LEGACY;
3643 }
3644 
3645 int io_apic_set_pci_routing(struct device *dev, int irq,
3646 			    struct io_apic_irq_attr *irq_attr)
3647 {
3648 	int node;
3649 
3650 	if (!IO_APIC_IRQ(irq)) {
3651 		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3652 			    irq_attr->ioapic);
3653 		return -EINVAL;
3654 	}
3655 
3656 	node = dev ? dev_to_node(dev) : cpu_to_node(0);
3657 
3658 	return io_apic_setup_irq_pin_once(irq, node, irq_attr);
3659 }
3660 
3661 #ifdef CONFIG_X86_32
3662 static int __init io_apic_get_unique_id(int ioapic, int apic_id)
3663 {
3664 	union IO_APIC_reg_00 reg_00;
3665 	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
3666 	physid_mask_t tmp;
3667 	unsigned long flags;
3668 	int i = 0;
3669 
3670 	/*
3671 	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
3672 	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
3673 	 * supports up to 16 on one shared APIC bus.
3674 	 *
3675 	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
3676 	 *      advantage of new APIC bus architecture.
3677 	 */
3678 
3679 	if (physids_empty(apic_id_map))
3680 		apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
3681 
3682 	raw_spin_lock_irqsave(&ioapic_lock, flags);
3683 	reg_00.raw = io_apic_read(ioapic, 0);
3684 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3685 
3686 	if (apic_id >= get_physical_broadcast()) {
3687 		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
3688 			"%d\n", ioapic, apic_id, reg_00.bits.ID);
3689 		apic_id = reg_00.bits.ID;
3690 	}
3691 
3692 	/*
3693 	 * Every APIC in a system must have a unique ID or we get lots of nice
3694 	 * 'stuck on smp_invalidate_needed IPI wait' messages.
3695 	 */
3696 	if (apic->check_apicid_used(&apic_id_map, apic_id)) {
3697 
3698 		for (i = 0; i < get_physical_broadcast(); i++) {
3699 			if (!apic->check_apicid_used(&apic_id_map, i))
3700 				break;
3701 		}
3702 
3703 		if (i == get_physical_broadcast())
3704 			panic("Max apic_id exceeded!\n");
3705 
3706 		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
3707 			"trying %d\n", ioapic, apic_id, i);
3708 
3709 		apic_id = i;
3710 	}
3711 
3712 	apic->apicid_to_cpu_present(apic_id, &tmp);
3713 	physids_or(apic_id_map, apic_id_map, tmp);
3714 
3715 	if (reg_00.bits.ID != apic_id) {
3716 		reg_00.bits.ID = apic_id;
3717 
3718 		raw_spin_lock_irqsave(&ioapic_lock, flags);
3719 		io_apic_write(ioapic, 0, reg_00.raw);
3720 		reg_00.raw = io_apic_read(ioapic, 0);
3721 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3722 
3723 		/* Sanity check */
3724 		if (reg_00.bits.ID != apic_id) {
3725 			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
3726 			return -1;
3727 		}
3728 	}
3729 
3730 	apic_printk(APIC_VERBOSE, KERN_INFO
3731 			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
3732 
3733 	return apic_id;
3734 }
3735 
3736 static u8 __init io_apic_unique_id(u8 id)
3737 {
3738 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
3739 	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
3740 		return io_apic_get_unique_id(nr_ioapics, id);
3741 	else
3742 		return id;
3743 }
3744 #else
3745 static u8 __init io_apic_unique_id(u8 id)
3746 {
3747 	int i;
3748 	DECLARE_BITMAP(used, 256);
3749 
3750 	bitmap_zero(used, 256);
3751 	for (i = 0; i < nr_ioapics; i++) {
3752 		__set_bit(mpc_ioapic_id(i), used);
3753 	}
3754 	if (!test_bit(id, used))
3755 		return id;
3756 	return find_first_zero_bit(used, 256);
3757 }
3758 #endif
3759 
3760 static int __init io_apic_get_version(int ioapic)
3761 {
3762 	union IO_APIC_reg_01	reg_01;
3763 	unsigned long flags;
3764 
3765 	raw_spin_lock_irqsave(&ioapic_lock, flags);
3766 	reg_01.raw = io_apic_read(ioapic, 1);
3767 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3768 
3769 	return reg_01.bits.version;
3770 }
3771 
3772 int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
3773 {
3774 	int ioapic, pin, idx;
3775 
3776 	if (skip_ioapic_setup)
3777 		return -1;
3778 
3779 	ioapic = mp_find_ioapic(gsi);
3780 	if (ioapic < 0)
3781 		return -1;
3782 
3783 	pin = mp_find_ioapic_pin(ioapic, gsi);
3784 	if (pin < 0)
3785 		return -1;
3786 
3787 	idx = find_irq_entry(ioapic, pin, mp_INT);
3788 	if (idx < 0)
3789 		return -1;
3790 
3791 	*trigger = irq_trigger(idx);
3792 	*polarity = irq_polarity(idx);
3793 	return 0;
3794 }
3795 
3796 /*
3797  * This function currently is only a helper for the i386 smp boot process where
3798  * we need to reprogram the ioredtbls to cater for the cpus which have come online
3799  * so mask in all cases should simply be apic->target_cpus()
3800  */
3801 #ifdef CONFIG_SMP
3802 void __init setup_ioapic_dest(void)
3803 {
3804 	int pin, ioapic, irq, irq_entry;
3805 	const struct cpumask *mask;
3806 	struct irq_data *idata;
3807 
3808 	if (skip_ioapic_setup == 1)
3809 		return;
3810 
3811 	for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
3812 	for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) {
3813 		irq_entry = find_irq_entry(ioapic, pin, mp_INT);
3814 		if (irq_entry == -1)
3815 			continue;
3816 		irq = pin_2_irq(irq_entry, ioapic, pin);
3817 
3818 		if ((ioapic > 0) && (irq > 16))
3819 			continue;
3820 
3821 		idata = irq_get_irq_data(irq);
3822 
3823 		/*
3824 		 * Honour affinities which have been set in early boot
3825 		 */
3826 		if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
3827 			mask = idata->affinity;
3828 		else
3829 			mask = apic->target_cpus();
3830 
3831 		if (intr_remapping_enabled)
3832 			ir_ioapic_set_affinity(idata, mask, false);
3833 		else
3834 			ioapic_set_affinity(idata, mask, false);
3835 	}
3836 
3837 }
3838 #endif
3839 
3840 #define IOAPIC_RESOURCE_NAME_SIZE 11
3841 
3842 static struct resource *ioapic_resources;
3843 
3844 static struct resource * __init ioapic_setup_resources(int nr_ioapics)
3845 {
3846 	unsigned long n;
3847 	struct resource *res;
3848 	char *mem;
3849 	int i;
3850 
3851 	if (nr_ioapics <= 0)
3852 		return NULL;
3853 
3854 	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
3855 	n *= nr_ioapics;
3856 
3857 	mem = alloc_bootmem(n);
3858 	res = (void *)mem;
3859 
3860 	mem += sizeof(struct resource) * nr_ioapics;
3861 
3862 	for (i = 0; i < nr_ioapics; i++) {
3863 		res[i].name = mem;
3864 		res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
3865 		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
3866 		mem += IOAPIC_RESOURCE_NAME_SIZE;
3867 	}
3868 
3869 	ioapic_resources = res;
3870 
3871 	return res;
3872 }
3873 
3874 void __init ioapic_and_gsi_init(void)
3875 {
3876 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3877 	struct resource *ioapic_res;
3878 	int i;
3879 
3880 	ioapic_res = ioapic_setup_resources(nr_ioapics);
3881 	for (i = 0; i < nr_ioapics; i++) {
3882 		if (smp_found_config) {
3883 			ioapic_phys = mpc_ioapic_addr(i);
3884 #ifdef CONFIG_X86_32
3885 			if (!ioapic_phys) {
3886 				printk(KERN_ERR
3887 				       "WARNING: bogus zero IO-APIC "
3888 				       "address found in MPTABLE, "
3889 				       "disabling IO/APIC support!\n");
3890 				smp_found_config = 0;
3891 				skip_ioapic_setup = 1;
3892 				goto fake_ioapic_page;
3893 			}
3894 #endif
3895 		} else {
3896 #ifdef CONFIG_X86_32
3897 fake_ioapic_page:
3898 #endif
3899 			ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
3900 			ioapic_phys = __pa(ioapic_phys);
3901 		}
3902 		set_fixmap_nocache(idx, ioapic_phys);
3903 		apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
3904 			__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
3905 			ioapic_phys);
3906 		idx++;
3907 
3908 		ioapic_res->start = ioapic_phys;
3909 		ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
3910 		ioapic_res++;
3911 	}
3912 
3913 	probe_nr_irqs_gsi();
3914 }
3915 
3916 void __init ioapic_insert_resources(void)
3917 {
3918 	int i;
3919 	struct resource *r = ioapic_resources;
3920 
3921 	if (!r) {
3922 		if (nr_ioapics > 0)
3923 			printk(KERN_ERR
3924 				"IO APIC resources couldn't be allocated.\n");
3925 		return;
3926 	}
3927 
3928 	for (i = 0; i < nr_ioapics; i++) {
3929 		insert_resource(&iomem_resource, r);
3930 		r++;
3931 	}
3932 }
3933 
3934 int mp_find_ioapic(u32 gsi)
3935 {
3936 	int i = 0;
3937 
3938 	if (nr_ioapics == 0)
3939 		return -1;
3940 
3941 	/* Find the IOAPIC that manages this GSI. */
3942 	for (i = 0; i < nr_ioapics; i++) {
3943 		struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
3944 		if ((gsi >= gsi_cfg->gsi_base)
3945 		    && (gsi <= gsi_cfg->gsi_end))
3946 			return i;
3947 	}
3948 
3949 	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
3950 	return -1;
3951 }
3952 
3953 int mp_find_ioapic_pin(int ioapic, u32 gsi)
3954 {
3955 	struct mp_ioapic_gsi *gsi_cfg;
3956 
3957 	if (WARN_ON(ioapic == -1))
3958 		return -1;
3959 
3960 	gsi_cfg = mp_ioapic_gsi_routing(ioapic);
3961 	if (WARN_ON(gsi > gsi_cfg->gsi_end))
3962 		return -1;
3963 
3964 	return gsi - gsi_cfg->gsi_base;
3965 }
3966 
3967 static __init int bad_ioapic(unsigned long address)
3968 {
3969 	if (nr_ioapics >= MAX_IO_APICS) {
3970 		printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
3971 		       "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
3972 		return 1;
3973 	}
3974 	if (!address) {
3975 		printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
3976 		       " found in table, skipping!\n");
3977 		return 1;
3978 	}
3979 	return 0;
3980 }
3981 
3982 void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
3983 {
3984 	int idx = 0;
3985 	int entries;
3986 	struct mp_ioapic_gsi *gsi_cfg;
3987 
3988 	if (bad_ioapic(address))
3989 		return;
3990 
3991 	idx = nr_ioapics;
3992 
3993 	ioapics[idx].mp_config.type = MP_IOAPIC;
3994 	ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
3995 	ioapics[idx].mp_config.apicaddr = address;
3996 
3997 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
3998 	ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
3999 	ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
4000 
4001 	/*
4002 	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
4003 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
4004 	 */
4005 	entries = io_apic_get_redir_entries(idx);
4006 	gsi_cfg = mp_ioapic_gsi_routing(idx);
4007 	gsi_cfg->gsi_base = gsi_base;
4008 	gsi_cfg->gsi_end = gsi_base + entries - 1;
4009 
4010 	/*
4011 	 * The number of IO-APIC IRQ registers (== #pins):
4012 	 */
4013 	ioapics[idx].nr_registers = entries;
4014 
4015 	if (gsi_cfg->gsi_end >= gsi_top)
4016 		gsi_top = gsi_cfg->gsi_end + 1;
4017 
4018 	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
4019 	       "GSI %d-%d\n", idx, mpc_ioapic_id(idx),
4020 	       mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
4021 	       gsi_cfg->gsi_base, gsi_cfg->gsi_end);
4022 
4023 	nr_ioapics++;
4024 }
4025 
4026 /* Enable IOAPIC early just for system timer */
4027 void __init pre_init_apic_IRQ0(void)
4028 {
4029 	struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
4030 
4031 	printk(KERN_INFO "Early APIC setup for system timer0\n");
4032 #ifndef CONFIG_SMP
4033 	physid_set_mask_of_physid(boot_cpu_physical_apicid,
4034 					 &phys_cpu_present_map);
4035 #endif
4036 	setup_local_APIC();
4037 
4038 	io_apic_setup_irq_pin(0, 0, &attr);
4039 	irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
4040 				      "edge");
4041 }
4042