xref: /freebsd/sys/kern/subr_smp.c (revision 4cf49a43559ed9fdad601bdcccd2c55963008675)
1 /*
2  * Copyright (c) 1996, by Steve Passe
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. The name of the developer may NOT be used to endorse or promote products
11  *    derived from this software without specific prior written permission.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "opt_smp.h"
29 #include "opt_cpu.h"
30 #include "opt_user_ldt.h"
31 
32 #ifdef SMP
33 #include <machine/smptests.h>
34 #else
35 #error
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/proc.h>
42 #include <sys/sysctl.h>
43 #include <sys/malloc.h>
44 #include <sys/memrange.h>
45 #ifdef BETTER_CLOCK
46 #include <sys/dkstat.h>
47 #endif
48 #include <sys/cons.h>	/* cngetc() */
49 
50 #include <vm/vm.h>
51 #include <vm/vm_param.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_extern.h>
55 #ifdef BETTER_CLOCK
56 #include <sys/lock.h>
57 #include <vm/vm_map.h>
58 #include <sys/user.h>
59 #ifdef GPROF
60 #include <sys/gmon.h>
61 #endif
62 #endif
63 
64 #include <machine/smp.h>
65 #include <machine/apic.h>
66 #include <machine/atomic.h>
67 #include <machine/cpufunc.h>
68 #include <machine/mpapic.h>
69 #include <machine/psl.h>
70 #include <machine/segments.h>
71 #include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
72 #include <machine/tss.h>
73 #include <machine/specialreg.h>
74 #include <machine/globaldata.h>
75 
76 #if defined(APIC_IO)
77 #include <machine/md_var.h>		/* setidt() */
78 #include <i386/isa/icu.h>		/* IPIs */
79 #include <i386/isa/intr_machdep.h>	/* IPIs */
80 #endif	/* APIC_IO */
81 
82 #if defined(TEST_DEFAULT_CONFIG)
83 #define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
84 #else
85 #define MPFPS_MPFB1	mpfps->mpfb1
86 #endif  /* TEST_DEFAULT_CONFIG */
87 
88 #define WARMBOOT_TARGET		0
89 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
90 #define WARMBOOT_SEG		(KERNBASE + 0x0469)
91 
92 #ifdef PC98
93 #define BIOS_BASE		(0xe8000)
94 #define BIOS_SIZE		(0x18000)
95 #else
96 #define BIOS_BASE		(0xf0000)
97 #define BIOS_SIZE		(0x10000)
98 #endif
99 #define BIOS_COUNT		(BIOS_SIZE/4)
100 
101 #define CMOS_REG		(0x70)
102 #define CMOS_DATA		(0x71)
103 #define BIOS_RESET		(0x0f)
104 #define BIOS_WARM		(0x0a)
105 
106 #define PROCENTRY_FLAG_EN	0x01
107 #define PROCENTRY_FLAG_BP	0x02
108 #define IOAPICENTRY_FLAG_EN	0x01
109 
110 
111 /* MP Floating Pointer Structure */
112 typedef struct MPFPS {
113 	char    signature[4];
114 	void   *pap;
115 	u_char  length;
116 	u_char  spec_rev;
117 	u_char  checksum;
118 	u_char  mpfb1;
119 	u_char  mpfb2;
120 	u_char  mpfb3;
121 	u_char  mpfb4;
122 	u_char  mpfb5;
123 }      *mpfps_t;
124 
125 /* MP Configuration Table Header */
126 typedef struct MPCTH {
127 	char    signature[4];
128 	u_short base_table_length;
129 	u_char  spec_rev;
130 	u_char  checksum;
131 	u_char  oem_id[8];
132 	u_char  product_id[12];
133 	void   *oem_table_pointer;
134 	u_short oem_table_size;
135 	u_short entry_count;
136 	void   *apic_address;
137 	u_short extended_table_length;
138 	u_char  extended_table_checksum;
139 	u_char  reserved;
140 }      *mpcth_t;
141 
142 
143 typedef struct PROCENTRY {
144 	u_char  type;
145 	u_char  apic_id;
146 	u_char  apic_version;
147 	u_char  cpu_flags;
148 	u_long  cpu_signature;
149 	u_long  feature_flags;
150 	u_long  reserved1;
151 	u_long  reserved2;
152 }      *proc_entry_ptr;
153 
154 typedef struct BUSENTRY {
155 	u_char  type;
156 	u_char  bus_id;
157 	char    bus_type[6];
158 }      *bus_entry_ptr;
159 
160 typedef struct IOAPICENTRY {
161 	u_char  type;
162 	u_char  apic_id;
163 	u_char  apic_version;
164 	u_char  apic_flags;
165 	void   *apic_address;
166 }      *io_apic_entry_ptr;
167 
168 typedef struct INTENTRY {
169 	u_char  type;
170 	u_char  int_type;
171 	u_short int_flags;
172 	u_char  src_bus_id;
173 	u_char  src_bus_irq;
174 	u_char  dst_apic_id;
175 	u_char  dst_apic_int;
176 }      *int_entry_ptr;
177 
178 /* descriptions of MP basetable entries */
179 typedef struct BASETABLE_ENTRY {
180 	u_char  type;
181 	u_char  length;
182 	char    name[16];
183 }       basetable_entry;
184 
185 /*
186  * this code MUST be enabled here and in mpboot.s.
187  * it follows the very early stages of AP boot by placing values in CMOS ram.
188  * it NORMALLY will never be needed and thus the primitive method for enabling.
189  *
190 #define CHECK_POINTS
191  */
192 
193 #if defined(CHECK_POINTS) && !defined(PC98)
194 #define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
195 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
196 
197 #define CHECK_INIT(D);				\
198 	CHECK_WRITE(0x34, (D));			\
199 	CHECK_WRITE(0x35, (D));			\
200 	CHECK_WRITE(0x36, (D));			\
201 	CHECK_WRITE(0x37, (D));			\
202 	CHECK_WRITE(0x38, (D));			\
203 	CHECK_WRITE(0x39, (D));
204 
205 #define CHECK_PRINT(S);				\
206 	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
207 	   (S),					\
208 	   CHECK_READ(0x34),			\
209 	   CHECK_READ(0x35),			\
210 	   CHECK_READ(0x36),			\
211 	   CHECK_READ(0x37),			\
212 	   CHECK_READ(0x38),			\
213 	   CHECK_READ(0x39));
214 
215 #else				/* CHECK_POINTS */
216 
217 #define CHECK_INIT(D)
218 #define CHECK_PRINT(S)
219 
220 #endif				/* CHECK_POINTS */
221 
222 /*
223  * Values to send to the POST hardware.
224  */
225 #define MP_BOOTADDRESS_POST	0x10
226 #define MP_PROBE_POST		0x11
227 #define MPTABLE_PASS1_POST	0x12
228 
229 #define MP_START_POST		0x13
230 #define MP_ENABLE_POST		0x14
231 #define MPTABLE_PASS2_POST	0x15
232 
233 #define START_ALL_APS_POST	0x16
234 #define INSTALL_AP_TRAMP_POST	0x17
235 #define START_AP_POST		0x18
236 
237 #define MP_ANNOUNCE_POST	0x19
238 
239 
240 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
241 int	current_postcode;
242 
243 /** XXX FIXME: what system files declare these??? */
244 extern struct region_descriptor r_gdt, r_idt;
245 
246 int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
247 int	mp_ncpus;		/* # of CPUs, including BSP */
248 int	mp_naps;		/* # of Applications processors */
249 int	mp_nbusses;		/* # of busses */
250 int	mp_napics;		/* # of IO APICs */
251 int	boot_cpu_id;		/* designated BSP */
252 vm_offset_t cpu_apic_address;
253 vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
254 extern	int nkpt;
255 
256 u_int32_t cpu_apic_versions[NCPU];
257 u_int32_t io_apic_versions[NAPIC];
258 
259 #ifdef APIC_INTR_DIAGNOSTIC
260 int apic_itrace_enter[32];
261 int apic_itrace_tryisrlock[32];
262 int apic_itrace_gotisrlock[32];
263 int apic_itrace_active[32];
264 int apic_itrace_masked[32];
265 int apic_itrace_noisrlock[32];
266 int apic_itrace_masked2[32];
267 int apic_itrace_unmask[32];
268 int apic_itrace_noforward[32];
269 int apic_itrace_leave[32];
270 int apic_itrace_enter2[32];
271 int apic_itrace_doreti[32];
272 int apic_itrace_splz[32];
273 int apic_itrace_eoi[32];
274 #ifdef APIC_INTR_DIAGNOSTIC_IRQ
275 unsigned short apic_itrace_debugbuffer[32768];
276 int apic_itrace_debugbuffer_idx;
277 struct simplelock apic_itrace_debuglock;
278 #endif
279 #endif
280 
281 #ifdef APIC_INTR_REORDER
282 struct {
283 	volatile int *location;
284 	int bit;
285 } apic_isrbit_location[32];
286 #endif
287 
288 struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
289 
290 /*
291  * APIC ID logical/physical mapping structures.
292  * We oversize these to simplify boot-time config.
293  */
294 int     cpu_num_to_apic_id[NAPICID];
295 int     io_num_to_apic_id[NAPICID];
296 int     apic_id_to_logical[NAPICID];
297 
298 
299 /* Bitmap of all available CPUs */
300 u_int	all_cpus;
301 
302 /* AP uses this during bootstrap.  Do not staticize.  */
303 char *bootSTK;
304 static int bootAP;
305 
306 /* Hotwire a 0->4MB V==P mapping */
307 extern pt_entry_t *KPTphys;
308 
309 /* SMP page table page */
310 extern pt_entry_t *SMPpt;
311 
312 struct pcb stoppcbs[NCPU];
313 
314 int smp_started;		/* has the system started? */
315 
316 /*
317  * Local data and functions.
318  */
319 
320 static int	mp_capable;
321 static u_int	boot_address;
322 static u_int	base_memory;
323 
324 static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
325 static mpfps_t	mpfps;
326 static int	search_for_sig(u_int32_t target, int count);
327 static void	mp_enable(u_int boot_addr);
328 
329 static int	mptable_pass1(void);
330 static int	mptable_pass2(void);
331 static void	default_mp_table(int type);
332 static void	fix_mp_table(void);
333 static void	setup_apic_irq_mapping(void);
334 static void	init_locks(void);
335 static int	start_all_aps(u_int boot_addr);
336 static void	install_ap_tramp(u_int boot_addr);
337 static int	start_ap(int logicalCpu, u_int boot_addr);
338 
339 /*
340  * Calculate usable address in base memory for AP trampoline code.
341  */
342 u_int
343 mp_bootaddress(u_int basemem)
344 {
345 	POSTCODE(MP_BOOTADDRESS_POST);
346 
347 	base_memory = basemem * 1024;	/* convert to bytes */
348 
349 	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
350 	if ((base_memory - boot_address) < bootMP_size)
351 		boot_address -= 4096;	/* not enough, lower by 4k */
352 
353 	return boot_address;
354 }
355 
356 
357 /*
358  * Look for an Intel MP spec table (ie, SMP capable hardware).
359  */
360 int
361 mp_probe(void)
362 {
363 	int     x;
364 	u_long  segment;
365 	u_int32_t target;
366 
367 	POSTCODE(MP_PROBE_POST);
368 
369 	/* see if EBDA exists */
370 	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
371 		/* search first 1K of EBDA */
372 		target = (u_int32_t) (segment << 4);
373 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
374 			goto found;
375 	} else {
376 		/* last 1K of base memory, effective 'top of base' passed in */
377 		target = (u_int32_t) (base_memory - 0x400);
378 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
379 			goto found;
380 	}
381 
382 	/* search the BIOS */
383 	target = (u_int32_t) BIOS_BASE;
384 	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
385 		goto found;
386 
387 	/* nothing found */
388 	mpfps = (mpfps_t)0;
389 	mp_capable = 0;
390 	return 0;
391 
392 found:
393 	/* calculate needed resources */
394 	mpfps = (mpfps_t)x;
395 	if (mptable_pass1())
396 		panic("you must reconfigure your kernel");
397 
398 	/* flag fact that we are running multiple processors */
399 	mp_capable = 1;
400 	return 1;
401 }
402 
403 
404 /*
405  * Startup the SMP processors.
406  */
407 void
408 mp_start(void)
409 {
410 	POSTCODE(MP_START_POST);
411 
412 	/* look for MP capable motherboard */
413 	if (mp_capable)
414 		mp_enable(boot_address);
415 	else
416 		panic("MP hardware not found!");
417 }
418 
419 
420 /*
421  * Print various information about the SMP system hardware and setup.
422  */
423 void
424 mp_announce(void)
425 {
426 	int     x;
427 
428 	POSTCODE(MP_ANNOUNCE_POST);
429 
430 	printf("FreeBSD/SMP: Multiprocessor motherboard\n");
431 	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
432 	printf(", version: 0x%08x", cpu_apic_versions[0]);
433 	printf(", at 0x%08x\n", cpu_apic_address);
434 	for (x = 1; x <= mp_naps; ++x) {
435 		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
436 		printf(", version: 0x%08x", cpu_apic_versions[x]);
437 		printf(", at 0x%08x\n", cpu_apic_address);
438 	}
439 
440 #if defined(APIC_IO)
441 	for (x = 0; x < mp_napics; ++x) {
442 		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
443 		printf(", version: 0x%08x", io_apic_versions[x]);
444 		printf(", at 0x%08x\n", io_apic_address[x]);
445 	}
446 #else
447 	printf(" Warning: APIC I/O disabled\n");
448 #endif	/* APIC_IO */
449 }
450 
451 /*
452  * AP cpu's call this to sync up protected mode.
453  */
454 void
455 init_secondary(void)
456 {
457 	int	gsel_tss;
458 	int	x, myid = bootAP;
459 
460 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
461 	gdt_segs[GPROC0_SEL].ssd_base =
462 		(int) &SMP_prvspace[myid].globaldata.gd_common_tss;
463 	SMP_prvspace[myid].globaldata.gd_prvspace = &SMP_prvspace[myid];
464 
465 	for (x = 0; x < NGDT; x++) {
466 		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
467 	}
468 
469 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
470 	r_gdt.rd_base = (int) &gdt[myid * NGDT];
471 	lgdt(&r_gdt);			/* does magic intra-segment return */
472 
473 	lidt(&r_idt);
474 
475 	lldt(_default_ldt);
476 #ifdef USER_LDT
477 	currentldt = _default_ldt;
478 #endif
479 
480 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
481 	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
482 	common_tss.tss_esp0 = 0;	/* not used until after switch */
483 	common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
484 	common_tss.tss_ioopt = (sizeof common_tss) << 16;
485 	tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd;
486 	common_tssd = *tss_gdt;
487 	ltr(gsel_tss);
488 
489 	load_cr0(0x8005003b);		/* XXX! */
490 
491 	pmap_set_opt();
492 }
493 
494 
495 #if defined(APIC_IO)
496 /*
497  * Final configuration of the BSP's local APIC:
498  *  - disable 'pic mode'.
499  *  - disable 'virtual wire mode'.
500  *  - enable NMI.
501  */
502 void
503 bsp_apic_configure(void)
504 {
505 	u_char		byte;
506 	u_int32_t	temp;
507 
508 	/* leave 'pic mode' if necessary */
509 	if (picmode) {
510 		outb(0x22, 0x70);	/* select IMCR */
511 		byte = inb(0x23);	/* current contents */
512 		byte |= 0x01;		/* mask external INTR */
513 		outb(0x23, byte);	/* disconnect 8259s/NMI */
514 	}
515 
516 	/* mask lint0 (the 8259 'virtual wire' connection) */
517 	temp = lapic.lvt_lint0;
518 	temp |= APIC_LVT_M;		/* set the mask */
519 	lapic.lvt_lint0 = temp;
520 
521         /* setup lint1 to handle NMI */
522         temp = lapic.lvt_lint1;
523         temp &= ~APIC_LVT_M;		/* clear the mask */
524         lapic.lvt_lint1 = temp;
525 
526 	if (bootverbose)
527 		apic_dump("bsp_apic_configure()");
528 }
529 #endif  /* APIC_IO */
530 
531 
532 /*******************************************************************
533  * local functions and data
534  */
535 
536 /*
537  * start the SMP system
538  */
539 static void
540 mp_enable(u_int boot_addr)
541 {
542 	int     x;
543 #if defined(APIC_IO)
544 	int     apic;
545 	u_int   ux;
546 #endif	/* APIC_IO */
547 
548 	POSTCODE(MP_ENABLE_POST);
549 
550 	/* turn on 4MB of V == P addressing so we can get to MP table */
551 	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
552 	invltlb();
553 
554 	/* examine the MP table for needed info, uses physical addresses */
555 	x = mptable_pass2();
556 
557 	*(int *)PTD = 0;
558 	invltlb();
559 
560 	/* can't process default configs till the CPU APIC is pmapped */
561 	if (x)
562 		default_mp_table(x);
563 
564 	/* post scan cleanup */
565 	fix_mp_table();
566 	setup_apic_irq_mapping();
567 
568 #if defined(APIC_IO)
569 
570 	/* fill the LOGICAL io_apic_versions table */
571 	for (apic = 0; apic < mp_napics; ++apic) {
572 		ux = io_apic_read(apic, IOAPIC_VER);
573 		io_apic_versions[apic] = ux;
574 	}
575 
576 	/* program each IO APIC in the system */
577 	for (apic = 0; apic < mp_napics; ++apic)
578 		if (io_apic_setup(apic) < 0)
579 			panic("IO APIC setup failure");
580 
581 	/* install a 'Spurious INTerrupt' vector */
582 	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
583 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
584 
585 	/* install an inter-CPU IPI for TLB invalidation */
586 	setidt(XINVLTLB_OFFSET, Xinvltlb,
587 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
588 
589 #ifdef BETTER_CLOCK
590 	/* install an inter-CPU IPI for reading processor state */
591 	setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate,
592 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
593 #endif
594 
595 	/* install an inter-CPU IPI for all-CPU rendezvous */
596 	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
597 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
598 
599 	/* install an inter-CPU IPI for forcing an additional software trap */
600 	setidt(XCPUAST_OFFSET, Xcpuast,
601 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
602 
603 	/* install an inter-CPU IPI for interrupt forwarding */
604 	setidt(XFORWARD_IRQ_OFFSET, Xforward_irq,
605 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
606 
607 	/* install an inter-CPU IPI for CPU stop/restart */
608 	setidt(XCPUSTOP_OFFSET, Xcpustop,
609 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
610 
611 #if defined(TEST_TEST1)
612 	/* install a "fake hardware INTerrupt" vector */
613 	setidt(XTEST1_OFFSET, Xtest1,
614 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
615 #endif  /** TEST_TEST1 */
616 
617 #endif	/* APIC_IO */
618 
619 	/* initialize all SMP locks */
620 	init_locks();
621 
622 	/* start each Application Processor */
623 	start_all_aps(boot_addr);
624 
625 	/*
626 	 * The init process might be started on a different CPU now,
627 	 * and the boot CPU might not call prepare_usermode to get
628 	 * cr0 correctly configured. Thus we initialize cr0 here.
629 	 */
630 	load_cr0(rcr0() | CR0_WP | CR0_AM);
631 }
632 
633 
634 /*
635  * look for the MP spec signature
636  */
637 
638 /* string defined by the Intel MP Spec as identifying the MP table */
639 #define MP_SIG		0x5f504d5f	/* _MP_ */
640 #define NEXT(X)		((X) += 4)
641 static int
642 search_for_sig(u_int32_t target, int count)
643 {
644 	int     x;
645 	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
646 
647 	for (x = 0; x < count; NEXT(x))
648 		if (addr[x] == MP_SIG)
649 			/* make array index a byte index */
650 			return (target + (x * sizeof(u_int32_t)));
651 
652 	return -1;
653 }
654 
655 
656 static basetable_entry basetable_entry_types[] =
657 {
658 	{0, 20, "Processor"},
659 	{1, 8, "Bus"},
660 	{2, 8, "I/O APIC"},
661 	{3, 8, "I/O INT"},
662 	{4, 8, "Local INT"}
663 };
664 
665 typedef struct BUSDATA {
666 	u_char  bus_id;
667 	enum busTypes bus_type;
668 }       bus_datum;
669 
670 typedef struct INTDATA {
671 	u_char  int_type;
672 	u_short int_flags;
673 	u_char  src_bus_id;
674 	u_char  src_bus_irq;
675 	u_char  dst_apic_id;
676 	u_char  dst_apic_int;
677 	u_char	int_vector;
678 }       io_int, local_int;
679 
680 typedef struct BUSTYPENAME {
681 	u_char  type;
682 	char    name[7];
683 }       bus_type_name;
684 
685 static bus_type_name bus_type_table[] =
686 {
687 	{CBUS, "CBUS"},
688 	{CBUSII, "CBUSII"},
689 	{EISA, "EISA"},
690 	{UNKNOWN_BUSTYPE, "---"},
691 	{UNKNOWN_BUSTYPE, "---"},
692 	{ISA, "ISA"},
693 	{UNKNOWN_BUSTYPE, "---"},
694 	{UNKNOWN_BUSTYPE, "---"},
695 	{UNKNOWN_BUSTYPE, "---"},
696 	{UNKNOWN_BUSTYPE, "---"},
697 	{UNKNOWN_BUSTYPE, "---"},
698 	{UNKNOWN_BUSTYPE, "---"},
699 	{PCI, "PCI"},
700 	{UNKNOWN_BUSTYPE, "---"},
701 	{UNKNOWN_BUSTYPE, "---"},
702 	{UNKNOWN_BUSTYPE, "---"},
703 	{UNKNOWN_BUSTYPE, "---"},
704 	{XPRESS, "XPRESS"},
705 	{UNKNOWN_BUSTYPE, "---"}
706 };
707 /* from MP spec v1.4, table 5-1 */
708 static int default_data[7][5] =
709 {
710 /*   nbus, id0, type0, id1, type1 */
711 	{1, 0, ISA, 255, 255},
712 	{1, 0, EISA, 255, 255},
713 	{1, 0, EISA, 255, 255},
714 	{0, 255, 255, 255, 255},/* MCA not supported */
715 	{2, 0, ISA, 1, PCI},
716 	{2, 0, EISA, 1, PCI},
717 	{0, 255, 255, 255, 255}	/* MCA not supported */
718 };
719 
720 
721 /* the bus data */
722 static bus_datum bus_data[NBUS];
723 
724 /* the IO INT data, one entry per possible APIC INTerrupt */
725 static io_int  io_apic_ints[NINTR];
726 
727 static int nintrs;
728 
729 static int processor_entry	__P((proc_entry_ptr entry, int cpu));
730 static int bus_entry		__P((bus_entry_ptr entry, int bus));
731 static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
732 static int int_entry		__P((int_entry_ptr entry, int intr));
733 static int lookup_bus_type	__P((char *name));
734 
735 
736 /*
737  * 1st pass on motherboard's Intel MP specification table.
738  *
739  * initializes:
740  *	mp_ncpus = 1
741  *
742  * determines:
743  *	cpu_apic_address (common to all CPUs)
744  *	io_apic_address[N]
745  *	mp_naps
746  *	mp_nbusses
747  *	mp_napics
748  *	nintrs
749  */
750 static int
751 mptable_pass1(void)
752 {
753 	int	x;
754 	mpcth_t	cth;
755 	int	totalSize;
756 	void*	position;
757 	int	count;
758 	int	type;
759 	int	mustpanic;
760 
761 	POSTCODE(MPTABLE_PASS1_POST);
762 
763 	mustpanic = 0;
764 
765 	/* clear various tables */
766 	for (x = 0; x < NAPICID; ++x) {
767 		io_apic_address[x] = ~0;	/* IO APIC address table */
768 	}
769 
770 	/* init everything to empty */
771 	mp_naps = 0;
772 	mp_nbusses = 0;
773 	mp_napics = 0;
774 	nintrs = 0;
775 
776 	/* check for use of 'default' configuration */
777 	if (MPFPS_MPFB1 != 0) {
778 		/* use default addresses */
779 		cpu_apic_address = DEFAULT_APIC_BASE;
780 		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
781 
782 		/* fill in with defaults */
783 		mp_naps = 2;		/* includes BSP */
784 		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
785 #if defined(APIC_IO)
786 		mp_napics = 1;
787 		nintrs = 16;
788 #endif	/* APIC_IO */
789 	}
790 	else {
791 		if ((cth = mpfps->pap) == 0)
792 			panic("MP Configuration Table Header MISSING!");
793 
794 		cpu_apic_address = (vm_offset_t) cth->apic_address;
795 
796 		/* walk the table, recording info of interest */
797 		totalSize = cth->base_table_length - sizeof(struct MPCTH);
798 		position = (u_char *) cth + sizeof(struct MPCTH);
799 		count = cth->entry_count;
800 
801 		while (count--) {
802 			switch (type = *(u_char *) position) {
803 			case 0: /* processor_entry */
804 				if (((proc_entry_ptr)position)->cpu_flags
805 					& PROCENTRY_FLAG_EN)
806 					++mp_naps;
807 				break;
808 			case 1: /* bus_entry */
809 				++mp_nbusses;
810 				break;
811 			case 2: /* io_apic_entry */
812 				if (((io_apic_entry_ptr)position)->apic_flags
813 					& IOAPICENTRY_FLAG_EN)
814 					io_apic_address[mp_napics++] =
815 					    (vm_offset_t)((io_apic_entry_ptr)
816 						position)->apic_address;
817 				break;
818 			case 3: /* int_entry */
819 				++nintrs;
820 				break;
821 			case 4:	/* int_entry */
822 				break;
823 			default:
824 				panic("mpfps Base Table HOSED!");
825 				/* NOTREACHED */
826 			}
827 
828 			totalSize -= basetable_entry_types[type].length;
829 			(u_char*)position += basetable_entry_types[type].length;
830 		}
831 	}
832 
833 	/* qualify the numbers */
834 	if (mp_naps > NCPU) {
835 		printf("Warning: only using %d of %d available CPUs!\n",
836 			NCPU, mp_naps);
837 		mp_naps = NCPU;
838 	}
839 	if (mp_nbusses > NBUS) {
840 		printf("found %d busses, increase NBUS\n", mp_nbusses);
841 		mustpanic = 1;
842 	}
843 	if (mp_napics > NAPIC) {
844 		printf("found %d apics, increase NAPIC\n", mp_napics);
845 		mustpanic = 1;
846 	}
847 	if (nintrs > NINTR) {
848 		printf("found %d intrs, increase NINTR\n", nintrs);
849 		mustpanic = 1;
850 	}
851 
852 	/*
853 	 * Count the BSP.
854 	 * This is also used as a counter while starting the APs.
855 	 */
856 	mp_ncpus = 1;
857 
858 	--mp_naps;	/* subtract the BSP */
859 
860 	return mustpanic;
861 }
862 
863 
864 /*
865  * 2nd pass on motherboard's Intel MP specification table.
866  *
867  * sets:
868  *	boot_cpu_id
869  *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
870  *	CPU_TO_ID(N), logical CPU to APIC ID table
871  *	IO_TO_ID(N), logical IO to APIC ID table
872  *	bus_data[N]
873  *	io_apic_ints[N]
874  */
875 static int
876 mptable_pass2(void)
877 {
878 	int     x;
879 	mpcth_t cth;
880 	int     totalSize;
881 	void*   position;
882 	int     count;
883 	int     type;
884 	int     apic, bus, cpu, intr;
885 
886 	POSTCODE(MPTABLE_PASS2_POST);
887 
888 	/* clear various tables */
889 	for (x = 0; x < NAPICID; ++x) {
890 		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
891 		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
892 		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
893 	}
894 
895 	/* clear bus data table */
896 	for (x = 0; x < NBUS; ++x)
897 		bus_data[x].bus_id = 0xff;
898 
899 	/* clear IO APIC INT table */
900 	for (x = 0; x < NINTR; ++x) {
901 		io_apic_ints[x].int_type = 0xff;
902 		io_apic_ints[x].int_vector = 0xff;
903 	}
904 
905 	/* setup the cpu/apic mapping arrays */
906 	boot_cpu_id = -1;
907 
908 	/* record whether PIC or virtual-wire mode */
909 	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
910 
911 	/* check for use of 'default' configuration */
912 	if (MPFPS_MPFB1 != 0)
913 		return MPFPS_MPFB1;	/* return default configuration type */
914 
915 	if ((cth = mpfps->pap) == 0)
916 		panic("MP Configuration Table Header MISSING!");
917 
918 	/* walk the table, recording info of interest */
919 	totalSize = cth->base_table_length - sizeof(struct MPCTH);
920 	position = (u_char *) cth + sizeof(struct MPCTH);
921 	count = cth->entry_count;
922 	apic = bus = intr = 0;
923 	cpu = 1;				/* pre-count the BSP */
924 
925 	while (count--) {
926 		switch (type = *(u_char *) position) {
927 		case 0:
928 			if (processor_entry(position, cpu))
929 				++cpu;
930 			break;
931 		case 1:
932 			if (bus_entry(position, bus))
933 				++bus;
934 			break;
935 		case 2:
936 			if (io_apic_entry(position, apic))
937 				++apic;
938 			break;
939 		case 3:
940 			if (int_entry(position, intr))
941 				++intr;
942 			break;
943 		case 4:
944 			/* int_entry(position); */
945 			break;
946 		default:
947 			panic("mpfps Base Table HOSED!");
948 			/* NOTREACHED */
949 		}
950 
951 		totalSize -= basetable_entry_types[type].length;
952 		(u_char *) position += basetable_entry_types[type].length;
953 	}
954 
955 	if (boot_cpu_id == -1)
956 		panic("NO BSP found!");
957 
958 	/* report fact that its NOT a default configuration */
959 	return 0;
960 }
961 
962 
963 static void
964 assign_apic_irq(int apic, int intpin, int irq)
965 {
966 	int x;
967 
968 	if (int_to_apicintpin[irq].ioapic != -1)
969 		panic("assign_apic_irq: inconsistent table");
970 
971 	int_to_apicintpin[irq].ioapic = apic;
972 	int_to_apicintpin[irq].int_pin = intpin;
973 	int_to_apicintpin[irq].apic_address = ioapic[apic];
974 	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
975 
976 	for (x = 0; x < nintrs; x++) {
977 		if ((io_apic_ints[x].int_type == 0 ||
978 		     io_apic_ints[x].int_type == 3) &&
979 		    io_apic_ints[x].int_vector == 0xff &&
980 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
981 		    io_apic_ints[x].dst_apic_int == intpin)
982 			io_apic_ints[x].int_vector = irq;
983 	}
984 }
985 
986 /*
987  * parse an Intel MP specification table
988  */
989 static void
990 fix_mp_table(void)
991 {
992 	int	x;
993 	int	id;
994 	int	bus_0 = 0;	/* Stop GCC warning */
995 	int	bus_pci = 0;	/* Stop GCC warning */
996 	int	num_pci_bus;
997 
998 	/*
999 	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1000 	 * did it wrong.  The MP spec says that when more than 1 PCI bus
1001 	 * exists the BIOS must begin with bus entries for the PCI bus and use
1002 	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
1003 	 * exists the BIOS can choose to ignore this ordering, and indeed many
1004 	 * MP motherboards do ignore it.  This causes a problem when the PCI
1005 	 * sub-system makes requests of the MP sub-system based on PCI bus
1006 	 * numbers.	So here we look for the situation and renumber the
1007 	 * busses and associated INTs in an effort to "make it right".
1008 	 */
1009 
1010 	/* find bus 0, PCI bus, count the number of PCI busses */
1011 	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1012 		if (bus_data[x].bus_id == 0) {
1013 			bus_0 = x;
1014 		}
1015 		if (bus_data[x].bus_type == PCI) {
1016 			++num_pci_bus;
1017 			bus_pci = x;
1018 		}
1019 	}
1020 	/*
1021 	 * bus_0 == slot of bus with ID of 0
1022 	 * bus_pci == slot of last PCI bus encountered
1023 	 */
1024 
1025 	/* check the 1 PCI bus case for sanity */
1026 	if (num_pci_bus == 1) {
1027 
1028 		/* if it is number 0 all is well */
1029 		if (bus_data[bus_pci].bus_id == 0)
1030 			return;
1031 
1032 		/* mis-numbered, swap with whichever bus uses slot 0 */
1033 
1034 		/* swap the bus entry types */
1035 		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1036 		bus_data[bus_0].bus_type = PCI;
1037 
1038 		/* swap each relavant INTerrupt entry */
1039 		id = bus_data[bus_pci].bus_id;
1040 		for (x = 0; x < nintrs; ++x) {
1041 			if (io_apic_ints[x].src_bus_id == id) {
1042 				io_apic_ints[x].src_bus_id = 0;
1043 			}
1044 			else if (io_apic_ints[x].src_bus_id == 0) {
1045 				io_apic_ints[x].src_bus_id = id;
1046 			}
1047 		}
1048 	}
1049 }
1050 
1051 
1052 static void
1053 setup_apic_irq_mapping(void)
1054 {
1055 	int	x;
1056 	int	int_vector;
1057 
1058 	/* Assign low level interrupt handlers */
1059 	for (x = 0; x < APIC_INTMAPSIZE; x++) {
1060 		int_to_apicintpin[x].ioapic = -1;
1061 		int_to_apicintpin[x].int_pin = 0;
1062 		int_to_apicintpin[x].apic_address = NULL;
1063 		int_to_apicintpin[x].redirindex = 0;
1064 	}
1065 	for (x = 0; x < nintrs; x++) {
1066 		if (io_apic_ints[x].dst_apic_int < APIC_INTMAPSIZE &&
1067 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1068 		    io_apic_ints[x].int_vector == 0xff &&
1069 		    (io_apic_ints[x].int_type == 0 ||
1070 		     io_apic_ints[x].int_type == 3)) {
1071 			assign_apic_irq(0,
1072 					io_apic_ints[x].dst_apic_int,
1073 					io_apic_ints[x].dst_apic_int);
1074 		}
1075 	}
1076 	int_vector = 0;
1077 	while (int_vector < APIC_INTMAPSIZE &&
1078 	       int_to_apicintpin[int_vector].ioapic != -1)
1079 		int_vector++;
1080 	for (x = 0; x < nintrs && int_vector < APIC_INTMAPSIZE; x++) {
1081 		if ((io_apic_ints[x].int_type == 0 ||
1082 		     io_apic_ints[x].int_type == 3) &&
1083 		    io_apic_ints[x].int_vector == 0xff) {
1084 			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1085 					io_apic_ints[x].dst_apic_int,
1086 					int_vector);
1087 			int_vector++;
1088 			while (int_vector < APIC_INTMAPSIZE &&
1089 			       int_to_apicintpin[int_vector].ioapic != -1)
1090 				int_vector++;
1091 		}
1092 	}
1093 }
1094 
1095 
1096 static int
1097 processor_entry(proc_entry_ptr entry, int cpu)
1098 {
1099 	/* check for usability */
1100 	if ((cpu >= NCPU) || !(entry->cpu_flags & PROCENTRY_FLAG_EN))
1101 		return 0;
1102 
1103 	/* check for BSP flag */
1104 	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1105 		boot_cpu_id = entry->apic_id;
1106 		CPU_TO_ID(0) = entry->apic_id;
1107 		ID_TO_CPU(entry->apic_id) = 0;
1108 		return 0;	/* its already been counted */
1109 	}
1110 
1111 	/* add another AP to list, if less than max number of CPUs */
1112 	else {
1113 		CPU_TO_ID(cpu) = entry->apic_id;
1114 		ID_TO_CPU(entry->apic_id) = cpu;
1115 		return 1;
1116 	}
1117 }
1118 
1119 
1120 static int
1121 bus_entry(bus_entry_ptr entry, int bus)
1122 {
1123 	int     x;
1124 	char    c, name[8];
1125 
1126 	/* encode the name into an index */
1127 	for (x = 0; x < 6; ++x) {
1128 		if ((c = entry->bus_type[x]) == ' ')
1129 			break;
1130 		name[x] = c;
1131 	}
1132 	name[x] = '\0';
1133 
1134 	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1135 		panic("unknown bus type: '%s'", name);
1136 
1137 	bus_data[bus].bus_id = entry->bus_id;
1138 	bus_data[bus].bus_type = x;
1139 
1140 	return 1;
1141 }
1142 
1143 
1144 static int
1145 io_apic_entry(io_apic_entry_ptr entry, int apic)
1146 {
1147 	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1148 		return 0;
1149 
1150 	IO_TO_ID(apic) = entry->apic_id;
1151 	ID_TO_IO(entry->apic_id) = apic;
1152 
1153 	return 1;
1154 }
1155 
1156 
1157 static int
1158 lookup_bus_type(char *name)
1159 {
1160 	int     x;
1161 
1162 	for (x = 0; x < MAX_BUSTYPE; ++x)
1163 		if (strcmp(bus_type_table[x].name, name) == 0)
1164 			return bus_type_table[x].type;
1165 
1166 	return UNKNOWN_BUSTYPE;
1167 }
1168 
1169 
1170 static int
1171 int_entry(int_entry_ptr entry, int intr)
1172 {
1173 	int apic;
1174 
1175 	io_apic_ints[intr].int_type = entry->int_type;
1176 	io_apic_ints[intr].int_flags = entry->int_flags;
1177 	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1178 	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1179 	if (entry->dst_apic_id == 255) {
1180 		/* This signal goes to all IO APICS.  Select an IO APIC
1181 		   with sufficient number of interrupt pins */
1182 		for (apic = 0; apic < mp_napics; apic++)
1183 			if (((io_apic_read(apic, IOAPIC_VER) &
1184 			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1185 			    entry->dst_apic_int)
1186 				break;
1187 		if (apic < mp_napics)
1188 			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1189 		else
1190 			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1191 	} else
1192 		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1193 	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1194 
1195 	return 1;
1196 }
1197 
1198 
1199 static int
1200 apic_int_is_bus_type(int intr, int bus_type)
1201 {
1202 	int     bus;
1203 
1204 	for (bus = 0; bus < mp_nbusses; ++bus)
1205 		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1206 		    && ((int) bus_data[bus].bus_type == bus_type))
1207 			return 1;
1208 
1209 	return 0;
1210 }
1211 
1212 
1213 /*
1214  * Given a traditional ISA INT mask, return an APIC mask.
1215  */
1216 u_int
1217 isa_apic_mask(u_int isa_mask)
1218 {
1219 	int isa_irq;
1220 	int apic_pin;
1221 
1222 #if defined(SKIP_IRQ15_REDIRECT)
1223 	if (isa_mask == (1 << 15)) {
1224 		printf("skipping ISA IRQ15 redirect\n");
1225 		return isa_mask;
1226 	}
1227 #endif  /* SKIP_IRQ15_REDIRECT */
1228 
1229 	isa_irq = ffs(isa_mask);		/* find its bit position */
1230 	if (isa_irq == 0)			/* doesn't exist */
1231 		return 0;
1232 	--isa_irq;				/* make it zero based */
1233 
1234 	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
1235 	if (apic_pin == -1)
1236 		return 0;
1237 
1238 	return (1 << apic_pin);			/* convert pin# to a mask */
1239 }
1240 
1241 
1242 /*
1243  * Determine which APIC pin an ISA/EISA INT is attached to.
1244  */
1245 #define INTTYPE(I)	(io_apic_ints[(I)].int_type)
1246 #define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
1247 #define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
1248 #define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1249 
1250 #define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
1251 int
1252 isa_apic_irq(int isa_irq)
1253 {
1254 	int     intr;
1255 
1256 	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
1257 		if (INTTYPE(intr) == 0) {		/* standard INT */
1258 			if (SRCBUSIRQ(intr) == isa_irq) {
1259 				if (apic_int_is_bus_type(intr, ISA) ||
1260 			            apic_int_is_bus_type(intr, EISA))
1261 					return INTIRQ(intr);	/* found */
1262 			}
1263 		}
1264 	}
1265 	return -1;					/* NOT found */
1266 }
1267 
1268 
1269 /*
1270  * Determine which APIC pin a PCI INT is attached to.
1271  */
1272 #define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
1273 #define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1274 #define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
1275 int
1276 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1277 {
1278 	int     intr;
1279 
1280 	--pciInt;					/* zero based */
1281 
1282 	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
1283 		if ((INTTYPE(intr) == 0)		/* standard INT */
1284 		    && (SRCBUSID(intr) == pciBus)
1285 		    && (SRCBUSDEVICE(intr) == pciDevice)
1286 		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
1287 			if (apic_int_is_bus_type(intr, PCI))
1288 				return INTIRQ(intr);	/* exact match */
1289 
1290 	return -1;					/* NOT found */
1291 }
1292 
1293 int
1294 next_apic_irq(int irq)
1295 {
1296 	int intr, ointr;
1297 	int bus, bustype;
1298 
1299 	bus = 0;
1300 	bustype = 0;
1301 	for (intr = 0; intr < nintrs; intr++) {
1302 		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1303 			continue;
1304 		bus = SRCBUSID(intr);
1305 		bustype = apic_bus_type(bus);
1306 		if (bustype != ISA &&
1307 		    bustype != EISA &&
1308 		    bustype != PCI)
1309 			continue;
1310 		break;
1311 	}
1312 	if (intr >= nintrs) {
1313 		return -1;
1314 	}
1315 	for (ointr = intr + 1; ointr < nintrs; ointr++) {
1316 		if (INTTYPE(ointr) != 0)
1317 			continue;
1318 		if (bus != SRCBUSID(ointr))
1319 			continue;
1320 		if (bustype == PCI) {
1321 			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1322 				continue;
1323 			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1324 				continue;
1325 		}
1326 		if (bustype == ISA || bustype == EISA) {
1327 			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1328 				continue;
1329 		}
1330 		if (INTPIN(intr) == INTPIN(ointr))
1331 			continue;
1332 		break;
1333 	}
1334 	if (ointr >= nintrs) {
1335 		return -1;
1336 	}
1337 	return INTIRQ(ointr);
1338 }
1339 #undef SRCBUSLINE
1340 #undef SRCBUSDEVICE
1341 #undef SRCBUSID
1342 #undef SRCBUSIRQ
1343 
1344 #undef INTPIN
1345 #undef INTIRQ
1346 #undef INTAPIC
1347 #undef INTTYPE
1348 
1349 
1350 /*
1351  * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1352  *
1353  * XXX FIXME:
1354  *  Exactly what this means is unclear at this point.  It is a solution
1355  *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
1356  *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
1357  *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1358  *  option.
1359  */
1360 int
1361 undirect_isa_irq(int rirq)
1362 {
1363 #if defined(READY)
1364 	if (bootverbose)
1365 	    printf("Freeing redirected ISA irq %d.\n", rirq);
1366 	/** FIXME: tickle the MB redirector chip */
1367 	return ???;
1368 #else
1369 	if (bootverbose)
1370 	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1371 	return 0;
1372 #endif  /* READY */
1373 }
1374 
1375 
1376 /*
1377  * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1378  */
1379 int
1380 undirect_pci_irq(int rirq)
1381 {
1382 #if defined(READY)
1383 	if (bootverbose)
1384 		printf("Freeing redirected PCI irq %d.\n", rirq);
1385 
1386 	/** FIXME: tickle the MB redirector chip */
1387 	return ???;
1388 #else
1389 	if (bootverbose)
1390 		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1391 		       rirq);
1392 	return 0;
1393 #endif  /* READY */
1394 }
1395 
1396 
1397 /*
1398  * given a bus ID, return:
1399  *  the bus type if found
1400  *  -1 if NOT found
1401  */
1402 int
1403 apic_bus_type(int id)
1404 {
1405 	int     x;
1406 
1407 	for (x = 0; x < mp_nbusses; ++x)
1408 		if (bus_data[x].bus_id == id)
1409 			return bus_data[x].bus_type;
1410 
1411 	return -1;
1412 }
1413 
1414 
1415 /*
1416  * given a LOGICAL APIC# and pin#, return:
1417  *  the associated src bus ID if found
1418  *  -1 if NOT found
1419  */
1420 int
1421 apic_src_bus_id(int apic, int pin)
1422 {
1423 	int     x;
1424 
1425 	/* search each of the possible INTerrupt sources */
1426 	for (x = 0; x < nintrs; ++x)
1427 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1428 		    (pin == io_apic_ints[x].dst_apic_int))
1429 			return (io_apic_ints[x].src_bus_id);
1430 
1431 	return -1;		/* NOT found */
1432 }
1433 
1434 
1435 /*
1436  * given a LOGICAL APIC# and pin#, return:
1437  *  the associated src bus IRQ if found
1438  *  -1 if NOT found
1439  */
1440 int
1441 apic_src_bus_irq(int apic, int pin)
1442 {
1443 	int     x;
1444 
1445 	for (x = 0; x < nintrs; x++)
1446 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1447 		    (pin == io_apic_ints[x].dst_apic_int))
1448 			return (io_apic_ints[x].src_bus_irq);
1449 
1450 	return -1;		/* NOT found */
1451 }
1452 
1453 
1454 /*
1455  * given a LOGICAL APIC# and pin#, return:
1456  *  the associated INTerrupt type if found
1457  *  -1 if NOT found
1458  */
1459 int
1460 apic_int_type(int apic, int pin)
1461 {
1462 	int     x;
1463 
1464 	/* search each of the possible INTerrupt sources */
1465 	for (x = 0; x < nintrs; ++x)
1466 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1467 		    (pin == io_apic_ints[x].dst_apic_int))
1468 			return (io_apic_ints[x].int_type);
1469 
1470 	return -1;		/* NOT found */
1471 }
1472 
1473 int
1474 apic_irq(int apic, int pin)
1475 {
1476 	int x;
1477 	int res;
1478 
1479 	for (x = 0; x < nintrs; ++x)
1480 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1481 		    (pin == io_apic_ints[x].dst_apic_int)) {
1482 			res = io_apic_ints[x].int_vector;
1483 			if (res == 0xff)
1484 				return -1;
1485 			if (apic != int_to_apicintpin[res].ioapic)
1486 				panic("apic_irq: inconsistent table");
1487 			if (pin != int_to_apicintpin[res].int_pin)
1488 				panic("apic_irq inconsistent table (2)");
1489 			return res;
1490 		}
1491 	return -1;
1492 }
1493 
1494 
1495 /*
1496  * given a LOGICAL APIC# and pin#, return:
1497  *  the associated trigger mode if found
1498  *  -1 if NOT found
1499  */
1500 int
1501 apic_trigger(int apic, int pin)
1502 {
1503 	int     x;
1504 
1505 	/* search each of the possible INTerrupt sources */
1506 	for (x = 0; x < nintrs; ++x)
1507 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1508 		    (pin == io_apic_ints[x].dst_apic_int))
1509 			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1510 
1511 	return -1;		/* NOT found */
1512 }
1513 
1514 
1515 /*
1516  * given a LOGICAL APIC# and pin#, return:
1517  *  the associated 'active' level if found
1518  *  -1 if NOT found
1519  */
1520 int
1521 apic_polarity(int apic, int pin)
1522 {
1523 	int     x;
1524 
1525 	/* search each of the possible INTerrupt sources */
1526 	for (x = 0; x < nintrs; ++x)
1527 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1528 		    (pin == io_apic_ints[x].dst_apic_int))
1529 			return (io_apic_ints[x].int_flags & 0x03);
1530 
1531 	return -1;		/* NOT found */
1532 }
1533 
1534 
1535 /*
1536  * set data according to MP defaults
1537  * FIXME: probably not complete yet...
1538  */
1539 static void
1540 default_mp_table(int type)
1541 {
1542 	int     ap_cpu_id;
1543 #if defined(APIC_IO)
1544 	u_int32_t ux;
1545 	int     io_apic_id;
1546 	int     pin;
1547 #endif	/* APIC_IO */
1548 
1549 #if 0
1550 	printf("  MP default config type: %d\n", type);
1551 	switch (type) {
1552 	case 1:
1553 		printf("   bus: ISA, APIC: 82489DX\n");
1554 		break;
1555 	case 2:
1556 		printf("   bus: EISA, APIC: 82489DX\n");
1557 		break;
1558 	case 3:
1559 		printf("   bus: EISA, APIC: 82489DX\n");
1560 		break;
1561 	case 4:
1562 		printf("   bus: MCA, APIC: 82489DX\n");
1563 		break;
1564 	case 5:
1565 		printf("   bus: ISA+PCI, APIC: Integrated\n");
1566 		break;
1567 	case 6:
1568 		printf("   bus: EISA+PCI, APIC: Integrated\n");
1569 		break;
1570 	case 7:
1571 		printf("   bus: MCA+PCI, APIC: Integrated\n");
1572 		break;
1573 	default:
1574 		printf("   future type\n");
1575 		break;
1576 		/* NOTREACHED */
1577 	}
1578 #endif	/* 0 */
1579 
1580 	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1581 	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1582 
1583 	/* BSP */
1584 	CPU_TO_ID(0) = boot_cpu_id;
1585 	ID_TO_CPU(boot_cpu_id) = 0;
1586 
1587 	/* one and only AP */
1588 	CPU_TO_ID(1) = ap_cpu_id;
1589 	ID_TO_CPU(ap_cpu_id) = 1;
1590 
1591 #if defined(APIC_IO)
1592 	/* one and only IO APIC */
1593 	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1594 
1595 	/*
1596 	 * sanity check, refer to MP spec section 3.6.6, last paragraph
1597 	 * necessary as some hardware isn't properly setting up the IO APIC
1598 	 */
1599 #if defined(REALLY_ANAL_IOAPICID_VALUE)
1600 	if (io_apic_id != 2) {
1601 #else
1602 	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1603 #endif	/* REALLY_ANAL_IOAPICID_VALUE */
1604 		ux = io_apic_read(0, IOAPIC_ID);	/* get current contents */
1605 		ux &= ~APIC_ID_MASK;	/* clear the ID field */
1606 		ux |= 0x02000000;	/* set it to '2' */
1607 		io_apic_write(0, IOAPIC_ID, ux);	/* write new value */
1608 		ux = io_apic_read(0, IOAPIC_ID);	/* re-read && test */
1609 		if ((ux & APIC_ID_MASK) != 0x02000000)
1610 			panic("can't control IO APIC ID, reg: 0x%08x", ux);
1611 		io_apic_id = 2;
1612 	}
1613 	IO_TO_ID(0) = io_apic_id;
1614 	ID_TO_IO(io_apic_id) = 0;
1615 #endif	/* APIC_IO */
1616 
1617 	/* fill out bus entries */
1618 	switch (type) {
1619 	case 1:
1620 	case 2:
1621 	case 3:
1622 	case 5:
1623 	case 6:
1624 		bus_data[0].bus_id = default_data[type - 1][1];
1625 		bus_data[0].bus_type = default_data[type - 1][2];
1626 		bus_data[1].bus_id = default_data[type - 1][3];
1627 		bus_data[1].bus_type = default_data[type - 1][4];
1628 		break;
1629 
1630 	/* case 4: case 7:		   MCA NOT supported */
1631 	default:		/* illegal/reserved */
1632 		panic("BAD default MP config: %d", type);
1633 		/* NOTREACHED */
1634 	}
1635 
1636 #if defined(APIC_IO)
1637 	/* general cases from MP v1.4, table 5-2 */
1638 	for (pin = 0; pin < 16; ++pin) {
1639 		io_apic_ints[pin].int_type = 0;
1640 		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
1641 		io_apic_ints[pin].src_bus_id = 0;
1642 		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
1643 		io_apic_ints[pin].dst_apic_id = io_apic_id;
1644 		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
1645 	}
1646 
1647 	/* special cases from MP v1.4, table 5-2 */
1648 	if (type == 2) {
1649 		io_apic_ints[2].int_type = 0xff;	/* N/C */
1650 		io_apic_ints[13].int_type = 0xff;	/* N/C */
1651 #if !defined(APIC_MIXED_MODE)
1652 		/** FIXME: ??? */
1653 		panic("sorry, can't support type 2 default yet");
1654 #endif	/* APIC_MIXED_MODE */
1655 	}
1656 	else
1657 		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
1658 
1659 	if (type == 7)
1660 		io_apic_ints[0].int_type = 0xff;	/* N/C */
1661 	else
1662 		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
1663 #endif	/* APIC_IO */
1664 }
1665 
1666 
1667 /*
1668  * initialize all the SMP locks
1669  */
1670 
1671 /* critical region around IO APIC, apic_imen */
1672 struct simplelock	imen_lock;
1673 
1674 /* critical region around splxx(), cpl, cml, cil, ipending */
1675 struct simplelock	cpl_lock;
1676 
1677 /* Make FAST_INTR() routines sequential */
1678 struct simplelock	fast_intr_lock;
1679 
1680 /* critical region around INTR() routines */
1681 struct simplelock	intr_lock;
1682 
1683 /* lock regions protected in UP kernel via cli/sti */
1684 struct simplelock	mpintr_lock;
1685 
1686 /* lock region used by kernel profiling */
1687 struct simplelock	mcount_lock;
1688 
1689 #ifdef USE_COMLOCK
1690 /* locks com (tty) data/hardware accesses: a FASTINTR() */
1691 struct simplelock	com_lock;
1692 #endif /* USE_COMLOCK */
1693 
1694 #ifdef USE_CLOCKLOCK
1695 /* lock regions around the clock hardware */
1696 struct simplelock	clock_lock;
1697 #endif /* USE_CLOCKLOCK */
1698 
1699 /* lock around the MP rendezvous */
1700 static struct simplelock smp_rv_lock;
1701 
1702 static void
1703 init_locks(void)
1704 {
1705 	/*
1706 	 * Get the initial mp_lock with a count of 1 for the BSP.
1707 	 * This uses a LOGICAL cpu ID, ie BSP == 0.
1708 	 */
1709 	mp_lock = 0x00000001;
1710 
1711 	/* ISR uses its own "giant lock" */
1712 	isr_lock = FREE_LOCK;
1713 
1714 #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
1715 	s_lock_init((struct simplelock*)&apic_itrace_debuglock);
1716 #endif
1717 
1718 	s_lock_init((struct simplelock*)&mpintr_lock);
1719 
1720 	s_lock_init((struct simplelock*)&mcount_lock);
1721 
1722 	s_lock_init((struct simplelock*)&fast_intr_lock);
1723 	s_lock_init((struct simplelock*)&intr_lock);
1724 	s_lock_init((struct simplelock*)&imen_lock);
1725 	s_lock_init((struct simplelock*)&cpl_lock);
1726 	s_lock_init(&smp_rv_lock);
1727 
1728 #ifdef USE_COMLOCK
1729 	s_lock_init((struct simplelock*)&com_lock);
1730 #endif /* USE_COMLOCK */
1731 #ifdef USE_CLOCKLOCK
1732 	s_lock_init((struct simplelock*)&clock_lock);
1733 #endif /* USE_CLOCKLOCK */
1734 }
1735 
1736 
1737 /* Wait for all APs to be fully initialized */
1738 extern int wait_ap(unsigned int);
1739 
1740 /*
1741  * start each AP in our list
1742  */
1743 static int
1744 start_all_aps(u_int boot_addr)
1745 {
1746 	int     x, i, pg;
1747 	u_char  mpbiosreason;
1748 	u_long  mpbioswarmvec;
1749 	struct globaldata *gd;
1750 	char *stack;
1751 
1752 	POSTCODE(START_ALL_APS_POST);
1753 
1754 	/* initialize BSP's local APIC */
1755 	apic_initialize();
1756 	bsp_apic_ready = 1;
1757 
1758 	/* install the AP 1st level boot code */
1759 	install_ap_tramp(boot_addr);
1760 
1761 
1762 	/* save the current value of the warm-start vector */
1763 	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1764 #ifndef PC98
1765 	outb(CMOS_REG, BIOS_RESET);
1766 	mpbiosreason = inb(CMOS_DATA);
1767 #endif
1768 
1769 	/* record BSP in CPU map */
1770 	all_cpus = 1;
1771 
1772 	/* set up 0 -> 4MB P==V mapping for AP boot */
1773 	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
1774 	invltlb();
1775 
1776 	/* start each AP */
1777 	for (x = 1; x <= mp_naps; ++x) {
1778 
1779 		/* This is a bit verbose, it will go away soon.  */
1780 
1781 		/* first page of AP's private space */
1782 		pg = x * i386_btop(sizeof(struct privatespace));
1783 
1784 		/* allocate a new private data page */
1785 		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
1786 
1787 		/* wire it into the private page table page */
1788 		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
1789 
1790 		/* allocate and set up an idle stack data page */
1791 		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
1792 		for (i = 0; i < UPAGES; i++)
1793 			SMPpt[pg + 5 + i] = (pt_entry_t)
1794 			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1795 
1796 		SMPpt[pg + 1] = 0;		/* *prv_CMAP1 */
1797 		SMPpt[pg + 2] = 0;		/* *prv_CMAP2 */
1798 		SMPpt[pg + 3] = 0;		/* *prv_CMAP3 */
1799 		SMPpt[pg + 4] = 0;		/* *prv_PMAP1 */
1800 
1801 		/* prime data page for it to use */
1802 		gd->gd_cpuid = x;
1803 		gd->gd_cpu_lockid = x << 24;
1804 		gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
1805 		gd->gd_prv_CMAP2 = &SMPpt[pg + 2];
1806 		gd->gd_prv_CMAP3 = &SMPpt[pg + 3];
1807 		gd->gd_prv_PMAP1 = &SMPpt[pg + 4];
1808 		gd->gd_prv_CADDR1 = SMP_prvspace[x].CPAGE1;
1809 		gd->gd_prv_CADDR2 = SMP_prvspace[x].CPAGE2;
1810 		gd->gd_prv_CADDR3 = SMP_prvspace[x].CPAGE3;
1811 		gd->gd_prv_PADDR1 = (unsigned *)SMP_prvspace[x].PPAGE1;
1812 
1813 		/* setup a vector to our boot code */
1814 		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1815 		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1816 #ifndef PC98
1817 		outb(CMOS_REG, BIOS_RESET);
1818 		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
1819 #endif
1820 
1821 		bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
1822 		bootAP = x;
1823 
1824 		/* attempt to start the Application Processor */
1825 		CHECK_INIT(99);	/* setup checkpoints */
1826 		if (!start_ap(x, boot_addr)) {
1827 			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
1828 			CHECK_PRINT("trace");	/* show checkpoints */
1829 			/* better panic as the AP may be running loose */
1830 			printf("panic y/n? [y] ");
1831 			if (cngetc() != 'n')
1832 				panic("bye-bye");
1833 		}
1834 		CHECK_PRINT("trace");		/* show checkpoints */
1835 
1836 		/* record its version info */
1837 		cpu_apic_versions[x] = cpu_apic_versions[0];
1838 
1839 		all_cpus |= (1 << x);		/* record AP in CPU map */
1840 	}
1841 
1842 	/* build our map of 'other' CPUs */
1843 	other_cpus = all_cpus & ~(1 << cpuid);
1844 
1845 	/* fill in our (BSP) APIC version */
1846 	cpu_apic_versions[0] = lapic.version;
1847 
1848 	/* restore the warmstart vector */
1849 	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
1850 #ifndef PC98
1851 	outb(CMOS_REG, BIOS_RESET);
1852 	outb(CMOS_DATA, mpbiosreason);
1853 #endif
1854 
1855 	/*
1856 	 * Set up the idle context for the BSP.  Similar to above except
1857 	 * that some was done by locore, some by pmap.c and some is implicit
1858 	 * because the BSP is cpu#0 and the page is initially zero, and also
1859 	 * because we can refer to variables by name on the BSP..
1860 	 */
1861 
1862 	/* Allocate and setup BSP idle stack */
1863 	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
1864 	for (i = 0; i < UPAGES; i++)
1865 		SMPpt[5 + i] = (pt_entry_t)
1866 		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1867 
1868 	*(int *)PTD = 0;
1869 	pmap_set_opt();
1870 
1871 	/* number of APs actually started */
1872 	return mp_ncpus - 1;
1873 }
1874 
1875 
1876 /*
1877  * load the 1st level AP boot code into base memory.
1878  */
1879 
1880 /* targets for relocation */
1881 extern void bigJump(void);
1882 extern void bootCodeSeg(void);
1883 extern void bootDataSeg(void);
1884 extern void MPentry(void);
1885 extern u_int MP_GDT;
1886 extern u_int mp_gdtbase;
1887 
1888 static void
1889 install_ap_tramp(u_int boot_addr)
1890 {
1891 	int     x;
1892 	int     size = *(int *) ((u_long) & bootMP_size);
1893 	u_char *src = (u_char *) ((u_long) bootMP);
1894 	u_char *dst = (u_char *) boot_addr + KERNBASE;
1895 	u_int   boot_base = (u_int) bootMP;
1896 	u_int8_t *dst8;
1897 	u_int16_t *dst16;
1898 	u_int32_t *dst32;
1899 
1900 	POSTCODE(INSTALL_AP_TRAMP_POST);
1901 
1902 	for (x = 0; x < size; ++x)
1903 		*dst++ = *src++;
1904 
1905 	/*
1906 	 * modify addresses in code we just moved to basemem. unfortunately we
1907 	 * need fairly detailed info about mpboot.s for this to work.  changes
1908 	 * to mpboot.s might require changes here.
1909 	 */
1910 
1911 	/* boot code is located in KERNEL space */
1912 	dst = (u_char *) boot_addr + KERNBASE;
1913 
1914 	/* modify the lgdt arg */
1915 	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
1916 	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
1917 
1918 	/* modify the ljmp target for MPentry() */
1919 	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
1920 	*dst32 = ((u_int) MPentry - KERNBASE);
1921 
1922 	/* modify the target for boot code segment */
1923 	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
1924 	dst8 = (u_int8_t *) (dst16 + 1);
1925 	*dst16 = (u_int) boot_addr & 0xffff;
1926 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
1927 
1928 	/* modify the target for boot data segment */
1929 	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
1930 	dst8 = (u_int8_t *) (dst16 + 1);
1931 	*dst16 = (u_int) boot_addr & 0xffff;
1932 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
1933 }
1934 
1935 
1936 /*
1937  * this function starts the AP (application processor) identified
1938  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
1939  * to accomplish this.  This is necessary because of the nuances
1940  * of the different hardware we might encounter.  It ain't pretty,
1941  * but it seems to work.
1942  */
1943 static int
1944 start_ap(int logical_cpu, u_int boot_addr)
1945 {
1946 	int     physical_cpu;
1947 	int     vector;
1948 	int     cpus;
1949 	u_long  icr_lo, icr_hi;
1950 
1951 	POSTCODE(START_AP_POST);
1952 
1953 	/* get the PHYSICAL APIC ID# */
1954 	physical_cpu = CPU_TO_ID(logical_cpu);
1955 
1956 	/* calculate the vector */
1957 	vector = (boot_addr >> 12) & 0xff;
1958 
1959 	/* used as a watchpoint to signal AP startup */
1960 	cpus = mp_ncpus;
1961 
1962 	/*
1963 	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
1964 	 * and running the target CPU. OR this INIT IPI might be latched (P5
1965 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
1966 	 * ignored.
1967 	 */
1968 
1969 	/* setup the address for the target AP */
1970 	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
1971 	icr_hi |= (physical_cpu << 24);
1972 	lapic.icr_hi = icr_hi;
1973 
1974 	/* do an INIT IPI: assert RESET */
1975 	icr_lo = lapic.icr_lo & 0xfff00000;
1976 	lapic.icr_lo = icr_lo | 0x0000c500;
1977 
1978 	/* wait for pending status end */
1979 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
1980 		 /* spin */ ;
1981 
1982 	/* do an INIT IPI: deassert RESET */
1983 	lapic.icr_lo = icr_lo | 0x00008500;
1984 
1985 	/* wait for pending status end */
1986 	u_sleep(10000);		/* wait ~10mS */
1987 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
1988 		 /* spin */ ;
1989 
1990 	/*
1991 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
1992 	 * latched, (P5 bug) this 1st STARTUP would then terminate
1993 	 * immediately, and the previously started INIT IPI would continue. OR
1994 	 * the previous INIT IPI has already run. and this STARTUP IPI will
1995 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
1996 	 * will run.
1997 	 */
1998 
1999 	/* do a STARTUP IPI */
2000 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2001 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2002 		 /* spin */ ;
2003 	u_sleep(200);		/* wait ~200uS */
2004 
2005 	/*
2006 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2007 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2008 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2009 	 * recognized after hardware RESET or INIT IPI.
2010 	 */
2011 
2012 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2013 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2014 		 /* spin */ ;
2015 	u_sleep(200);		/* wait ~200uS */
2016 
2017 	/* wait for it to start */
2018 	set_apic_timer(5000000);/* == 5 seconds */
2019 	while (read_apic_timer())
2020 		if (mp_ncpus > cpus)
2021 			return 1;	/* return SUCCESS */
2022 
2023 	return 0;		/* return FAILURE */
2024 }
2025 
2026 
2027 /*
2028  * Flush the TLB on all other CPU's
2029  *
2030  * XXX: Needs to handshake and wait for completion before proceding.
2031  */
2032 void
2033 smp_invltlb(void)
2034 {
2035 #if defined(APIC_IO)
2036 	if (smp_started && invltlb_ok)
2037 		all_but_self_ipi(XINVLTLB_OFFSET);
2038 #endif  /* APIC_IO */
2039 }
2040 
2041 void
2042 invlpg(u_int addr)
2043 {
2044 	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
2045 
2046 	/* send a message to the other CPUs */
2047 	smp_invltlb();
2048 }
2049 
2050 void
2051 invltlb(void)
2052 {
2053 	u_long  temp;
2054 
2055 	/*
2056 	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
2057 	 * inlined.
2058 	 */
2059 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
2060 
2061 	/* send a message to the other CPUs */
2062 	smp_invltlb();
2063 }
2064 
2065 
2066 /*
2067  * When called the executing CPU will send an IPI to all other CPUs
2068  *  requesting that they halt execution.
2069  *
2070  * Usually (but not necessarily) called with 'other_cpus' as its arg.
2071  *
2072  *  - Signals all CPUs in map to stop.
2073  *  - Waits for each to stop.
2074  *
2075  * Returns:
2076  *  -1: error
2077  *   0: NA
2078  *   1: ok
2079  *
2080  * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2081  *            from executing at same time.
2082  */
2083 int
2084 stop_cpus(u_int map)
2085 {
2086 	if (!smp_started)
2087 		return 0;
2088 
2089 	/* send the Xcpustop IPI to all CPUs in map */
2090 	selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
2091 
2092 	while ((stopped_cpus & map) != map)
2093 		/* spin */ ;
2094 
2095 	return 1;
2096 }
2097 
2098 
2099 /*
2100  * Called by a CPU to restart stopped CPUs.
2101  *
2102  * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2103  *
2104  *  - Signals all CPUs in map to restart.
2105  *  - Waits for each to restart.
2106  *
2107  * Returns:
2108  *  -1: error
2109  *   0: NA
2110  *   1: ok
2111  */
2112 int
2113 restart_cpus(u_int map)
2114 {
2115 	if (!smp_started)
2116 		return 0;
2117 
2118 	started_cpus = map;		/* signal other cpus to restart */
2119 
2120 	while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
2121 		/* spin */ ;
2122 
2123 	return 1;
2124 }
2125 
2126 int smp_active = 0;	/* are the APs allowed to run? */
2127 SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
2128 
2129 /* XXX maybe should be hw.ncpu */
2130 static int smp_cpus = 1;	/* how many cpu's running */
2131 SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
2132 
2133 int invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
2134 SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
2135 
2136 /* Warning: Do not staticize.  Used from swtch.s */
2137 int do_page_zero_idle = 1; /* bzero pages for fun and profit in idleloop */
2138 SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW,
2139 	   &do_page_zero_idle, 0, "");
2140 
2141 /* Is forwarding of a interrupt to the CPU holding the ISR lock enabled ? */
2142 int forward_irq_enabled = 1;
2143 SYSCTL_INT(_machdep, OID_AUTO, forward_irq_enabled, CTLFLAG_RW,
2144 	   &forward_irq_enabled, 0, "");
2145 
2146 /* Enable forwarding of a signal to a process running on a different CPU */
2147 static int forward_signal_enabled = 1;
2148 SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
2149 	   &forward_signal_enabled, 0, "");
2150 
2151 /* Enable forwarding of roundrobin to all other cpus */
2152 static int forward_roundrobin_enabled = 1;
2153 SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
2154 	   &forward_roundrobin_enabled, 0, "");
2155 
2156 /*
2157  * This is called once the rest of the system is up and running and we're
2158  * ready to let the AP's out of the pen.
2159  */
2160 void ap_init(void);
2161 
2162 void
2163 ap_init()
2164 {
2165 	u_int	apic_id;
2166 
2167 	/* BSP may have changed PTD while we're waiting for the lock */
2168 	cpu_invltlb();
2169 
2170 	smp_cpus++;
2171 
2172 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
2173 	lidt(&r_idt);
2174 #endif
2175 
2176 	/* Build our map of 'other' CPUs. */
2177 	other_cpus = all_cpus & ~(1 << cpuid);
2178 
2179 	printf("SMP: AP CPU #%d Launched!\n", cpuid);
2180 
2181 	/* XXX FIXME: i386 specific, and redundant: Setup the FPU. */
2182 	load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS);
2183 
2184 	/* set up FPU state on the AP */
2185 	npxinit(__INITIAL_NPXCW__);
2186 
2187 	/* A quick check from sanity claus */
2188 	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2189 	if (cpuid != apic_id) {
2190 		printf("SMP: cpuid = %d\n", cpuid);
2191 		printf("SMP: apic_id = %d\n", apic_id);
2192 		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2193 		panic("cpuid mismatch! boom!!");
2194 	}
2195 
2196 	/* Init local apic for irq's */
2197 	apic_initialize();
2198 
2199 	/* Set memory range attributes for this CPU to match the BSP */
2200 	mem_range_AP_init();
2201 
2202 	/*
2203 	 * Activate smp_invltlb, although strictly speaking, this isn't
2204 	 * quite correct yet.  We should have a bitfield for cpus willing
2205 	 * to accept TLB flush IPI's or something and sync them.
2206 	 */
2207 	if (smp_cpus == mp_ncpus) {
2208 		invltlb_ok = 1;
2209 		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2210 		smp_active = 1;	 /* historic */
2211 	}
2212 }
2213 
2214 #ifdef BETTER_CLOCK
2215 
2216 #define CHECKSTATE_USER	0
2217 #define CHECKSTATE_SYS	1
2218 #define CHECKSTATE_INTR	2
2219 
2220 /* Do not staticize.  Used from apic_vector.s */
2221 struct proc*	checkstate_curproc[NCPU];
2222 int		checkstate_cpustate[NCPU];
2223 u_long		checkstate_pc[NCPU];
2224 
2225 extern long	cp_time[CPUSTATES];
2226 
2227 #define PC_TO_INDEX(pc, prof)				\
2228         ((int)(((u_quad_t)((pc) - (prof)->pr_off) *	\
2229             (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
2230 
2231 static void
2232 addupc_intr_forwarded(struct proc *p, int id, int *astmap)
2233 {
2234 	int i;
2235 	struct uprof *prof;
2236 	u_long pc;
2237 
2238 	pc = checkstate_pc[id];
2239 	prof = &p->p_stats->p_prof;
2240 	if (pc >= prof->pr_off &&
2241 	    (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) {
2242 		if ((p->p_flag & P_OWEUPC) == 0) {
2243 			prof->pr_addr = pc;
2244 			prof->pr_ticks = 1;
2245 			p->p_flag |= P_OWEUPC;
2246 		}
2247 		*astmap |= (1 << id);
2248 	}
2249 }
2250 
2251 static void
2252 forwarded_statclock(int id, int pscnt, int *astmap)
2253 {
2254 	struct pstats *pstats;
2255 	long rss;
2256 	struct rusage *ru;
2257 	struct vmspace *vm;
2258 	int cpustate;
2259 	struct proc *p;
2260 #ifdef GPROF
2261 	register struct gmonparam *g;
2262 	int i;
2263 #endif
2264 
2265 	p = checkstate_curproc[id];
2266 	cpustate = checkstate_cpustate[id];
2267 
2268 	switch (cpustate) {
2269 	case CHECKSTATE_USER:
2270 		if (p->p_flag & P_PROFIL)
2271 			addupc_intr_forwarded(p, id, astmap);
2272 		if (pscnt > 1)
2273 			return;
2274 		p->p_uticks++;
2275 		if (p->p_nice > NZERO)
2276 			cp_time[CP_NICE]++;
2277 		else
2278 			cp_time[CP_USER]++;
2279 		break;
2280 	case CHECKSTATE_SYS:
2281 #ifdef GPROF
2282 		/*
2283 		 * Kernel statistics are just like addupc_intr, only easier.
2284 		 */
2285 		g = &_gmonparam;
2286 		if (g->state == GMON_PROF_ON) {
2287 			i = checkstate_pc[id] - g->lowpc;
2288 			if (i < g->textsize) {
2289 				i /= HISTFRACTION * sizeof(*g->kcount);
2290 				g->kcount[i]++;
2291 			}
2292 		}
2293 #endif
2294 		if (pscnt > 1)
2295 			return;
2296 
2297 		if (!p)
2298 			cp_time[CP_IDLE]++;
2299 		else {
2300 			p->p_sticks++;
2301 			cp_time[CP_SYS]++;
2302 		}
2303 		break;
2304 	case CHECKSTATE_INTR:
2305 	default:
2306 #ifdef GPROF
2307 		/*
2308 		 * Kernel statistics are just like addupc_intr, only easier.
2309 		 */
2310 		g = &_gmonparam;
2311 		if (g->state == GMON_PROF_ON) {
2312 			i = checkstate_pc[id] - g->lowpc;
2313 			if (i < g->textsize) {
2314 				i /= HISTFRACTION * sizeof(*g->kcount);
2315 				g->kcount[i]++;
2316 			}
2317 		}
2318 #endif
2319 		if (pscnt > 1)
2320 			return;
2321 		if (p)
2322 			p->p_iticks++;
2323 		cp_time[CP_INTR]++;
2324 	}
2325 	if (p != NULL) {
2326 		p->p_cpticks++;
2327 		if (++p->p_estcpu == 0)
2328 			p->p_estcpu--;
2329 		if ((p->p_estcpu & 3) == 0) {
2330 			resetpriority(p);
2331 			if (p->p_priority >= PUSER)
2332 				p->p_priority = p->p_usrpri;
2333 		}
2334 
2335 		/* Update resource usage integrals and maximums. */
2336 		if ((pstats = p->p_stats) != NULL &&
2337 		    (ru = &pstats->p_ru) != NULL &&
2338 		    (vm = p->p_vmspace) != NULL) {
2339 			ru->ru_ixrss += pgtok(vm->vm_tsize);
2340 			ru->ru_idrss += pgtok(vm->vm_dsize);
2341 			ru->ru_isrss += pgtok(vm->vm_ssize);
2342 			rss = pgtok(vmspace_resident_count(vm));
2343 			if (ru->ru_maxrss < rss)
2344 				ru->ru_maxrss = rss;
2345         	}
2346 	}
2347 }
2348 
2349 void
2350 forward_statclock(int pscnt)
2351 {
2352 	int map;
2353 	int id;
2354 	int i;
2355 
2356 	/* Kludge. We don't yet have separate locks for the interrupts
2357 	 * and the kernel. This means that we cannot let the other processors
2358 	 * handle complex interrupts while inhibiting them from entering
2359 	 * the kernel in a non-interrupt context.
2360 	 *
2361 	 * What we can do, without changing the locking mechanisms yet,
2362 	 * is letting the other processors handle a very simple interrupt
2363 	 * (wich determines the processor states), and do the main
2364 	 * work ourself.
2365 	 */
2366 
2367 	if (!smp_started || !invltlb_ok || cold || panicstr)
2368 		return;
2369 
2370 	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle ) */
2371 
2372 	map = other_cpus & ~stopped_cpus ;
2373 	checkstate_probed_cpus = 0;
2374 	if (map != 0)
2375 		selected_apic_ipi(map,
2376 				  XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2377 
2378 	i = 0;
2379 	while (checkstate_probed_cpus != map) {
2380 		/* spin */
2381 		i++;
2382 		if (i == 100000) {
2383 #ifdef BETTER_CLOCK_DIAGNOSTIC
2384 			printf("forward_statclock: checkstate %x\n",
2385 			       checkstate_probed_cpus);
2386 #endif
2387 			break;
2388 		}
2389 	}
2390 
2391 	/*
2392 	 * Step 2: walk through other processors processes, update ticks and
2393 	 * profiling info.
2394 	 */
2395 
2396 	map = 0;
2397 	for (id = 0; id < mp_ncpus; id++) {
2398 		if (id == cpuid)
2399 			continue;
2400 		if (((1 << id) & checkstate_probed_cpus) == 0)
2401 			continue;
2402 		forwarded_statclock(id, pscnt, &map);
2403 	}
2404 	if (map != 0) {
2405 		checkstate_need_ast |= map;
2406 		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2407 		i = 0;
2408 		while ((checkstate_need_ast & map) != 0) {
2409 			/* spin */
2410 			i++;
2411 			if (i > 100000) {
2412 #ifdef BETTER_CLOCK_DIAGNOSTIC
2413 				printf("forward_statclock: dropped ast 0x%x\n",
2414 				       checkstate_need_ast & map);
2415 #endif
2416 				break;
2417 			}
2418 		}
2419 	}
2420 }
2421 
2422 void
2423 forward_hardclock(int pscnt)
2424 {
2425 	int map;
2426 	int id;
2427 	struct proc *p;
2428 	struct pstats *pstats;
2429 	int i;
2430 
2431 	/* Kludge. We don't yet have separate locks for the interrupts
2432 	 * and the kernel. This means that we cannot let the other processors
2433 	 * handle complex interrupts while inhibiting them from entering
2434 	 * the kernel in a non-interrupt context.
2435 	 *
2436 	 * What we can do, without changing the locking mechanisms yet,
2437 	 * is letting the other processors handle a very simple interrupt
2438 	 * (wich determines the processor states), and do the main
2439 	 * work ourself.
2440 	 */
2441 
2442 	if (!smp_started || !invltlb_ok || cold || panicstr)
2443 		return;
2444 
2445 	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle) */
2446 
2447 	map = other_cpus & ~stopped_cpus ;
2448 	checkstate_probed_cpus = 0;
2449 	if (map != 0)
2450 		selected_apic_ipi(map,
2451 				  XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2452 
2453 	i = 0;
2454 	while (checkstate_probed_cpus != map) {
2455 		/* spin */
2456 		i++;
2457 		if (i == 100000) {
2458 #ifdef BETTER_CLOCK_DIAGNOSTIC
2459 			printf("forward_hardclock: checkstate %x\n",
2460 			       checkstate_probed_cpus);
2461 #endif
2462 			break;
2463 		}
2464 	}
2465 
2466 	/*
2467 	 * Step 2: walk through other processors processes, update virtual
2468 	 * timer and profiling timer. If stathz == 0, also update ticks and
2469 	 * profiling info.
2470 	 */
2471 
2472 	map = 0;
2473 	for (id = 0; id < mp_ncpus; id++) {
2474 		if (id == cpuid)
2475 			continue;
2476 		if (((1 << id) & checkstate_probed_cpus) == 0)
2477 			continue;
2478 		p = checkstate_curproc[id];
2479 		if (p) {
2480 			pstats = p->p_stats;
2481 			if (checkstate_cpustate[id] == CHECKSTATE_USER &&
2482 			    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
2483 			    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
2484 				psignal(p, SIGVTALRM);
2485 				map |= (1 << id);
2486 			}
2487 			if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
2488 			    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
2489 				psignal(p, SIGPROF);
2490 				map |= (1 << id);
2491 			}
2492 		}
2493 		if (stathz == 0) {
2494 			forwarded_statclock( id, pscnt, &map);
2495 		}
2496 	}
2497 	if (map != 0) {
2498 		checkstate_need_ast |= map;
2499 		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2500 		i = 0;
2501 		while ((checkstate_need_ast & map) != 0) {
2502 			/* spin */
2503 			i++;
2504 			if (i > 100000) {
2505 #ifdef BETTER_CLOCK_DIAGNOSTIC
2506 				printf("forward_hardclock: dropped ast 0x%x\n",
2507 				       checkstate_need_ast & map);
2508 #endif
2509 				break;
2510 			}
2511 		}
2512 	}
2513 }
2514 
2515 #endif /* BETTER_CLOCK */
2516 
2517 void
2518 forward_signal(struct proc *p)
2519 {
2520 	int map;
2521 	int id;
2522 	int i;
2523 
2524 	/* Kludge. We don't yet have separate locks for the interrupts
2525 	 * and the kernel. This means that we cannot let the other processors
2526 	 * handle complex interrupts while inhibiting them from entering
2527 	 * the kernel in a non-interrupt context.
2528 	 *
2529 	 * What we can do, without changing the locking mechanisms yet,
2530 	 * is letting the other processors handle a very simple interrupt
2531 	 * (wich determines the processor states), and do the main
2532 	 * work ourself.
2533 	 */
2534 
2535 	if (!smp_started || !invltlb_ok || cold || panicstr)
2536 		return;
2537 	if (!forward_signal_enabled)
2538 		return;
2539 	while (1) {
2540 		if (p->p_stat != SRUN)
2541 			return;
2542 		id = p->p_oncpu;
2543 		if (id == 0xff)
2544 			return;
2545 		map = (1<<id);
2546 		checkstate_need_ast |= map;
2547 		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2548 		i = 0;
2549 		while ((checkstate_need_ast & map) != 0) {
2550 			/* spin */
2551 			i++;
2552 			if (i > 100000) {
2553 #if 0
2554 				printf("forward_signal: dropped ast 0x%x\n",
2555 				       checkstate_need_ast & map);
2556 #endif
2557 				break;
2558 			}
2559 		}
2560 		if (id == p->p_oncpu)
2561 			return;
2562 	}
2563 }
2564 
2565 void
2566 forward_roundrobin(void)
2567 {
2568 	u_int map;
2569 	int i;
2570 
2571 	if (!smp_started || !invltlb_ok || cold || panicstr)
2572 		return;
2573 	if (!forward_roundrobin_enabled)
2574 		return;
2575 	resched_cpus |= other_cpus;
2576 	map = other_cpus & ~stopped_cpus ;
2577 #if 1
2578 	selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2579 #else
2580 	(void) all_but_self_ipi(XCPUAST_OFFSET);
2581 #endif
2582 	i = 0;
2583 	while ((checkstate_need_ast & map) != 0) {
2584 		/* spin */
2585 		i++;
2586 		if (i > 100000) {
2587 #if 0
2588 			printf("forward_roundrobin: dropped ast 0x%x\n",
2589 			       checkstate_need_ast & map);
2590 #endif
2591 			break;
2592 		}
2593 	}
2594 }
2595 
2596 
2597 #ifdef APIC_INTR_REORDER
2598 /*
2599  *	Maintain mapping from softintr vector to isr bit in local apic.
2600  */
2601 void
2602 set_lapic_isrloc(int intr, int vector)
2603 {
2604 	if (intr < 0 || intr > 32)
2605 		panic("set_apic_isrloc: bad intr argument: %d",intr);
2606 	if (vector < ICU_OFFSET || vector > 255)
2607 		panic("set_apic_isrloc: bad vector argument: %d",vector);
2608 	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2609 	apic_isrbit_location[intr].bit = (1<<(vector & 31));
2610 }
2611 #endif
2612 
2613 /*
2614  * All-CPU rendezvous.  CPUs are signalled, all execute the setup function
2615  * (if specified), rendezvous, execute the action function (if specified),
2616  * rendezvous again, execute the teardown function (if specified), and then
2617  * resume.
2618  *
2619  * Note that the supplied external functions _must_ be reentrant and aware
2620  * that they are running in parallel and in an unknown lock context.
2621  */
2622 static void (*smp_rv_setup_func)(void *arg);
2623 static void (*smp_rv_action_func)(void *arg);
2624 static void (*smp_rv_teardown_func)(void *arg);
2625 static void *smp_rv_func_arg;
2626 static volatile int smp_rv_waiters[2];
2627 
2628 void
2629 smp_rendezvous_action(void)
2630 {
2631 	/* setup function */
2632 	if (smp_rv_setup_func != NULL)
2633 		smp_rv_setup_func(smp_rv_func_arg);
2634 	/* spin on entry rendezvous */
2635 	atomic_add_int(&smp_rv_waiters[0], 1);
2636 	while (smp_rv_waiters[0] < mp_ncpus)
2637 		;
2638 	/* action function */
2639 	if (smp_rv_action_func != NULL)
2640 		smp_rv_action_func(smp_rv_func_arg);
2641 	/* spin on exit rendezvous */
2642 	atomic_add_int(&smp_rv_waiters[1], 1);
2643 	while (smp_rv_waiters[1] < mp_ncpus)
2644 		;
2645 	/* teardown function */
2646 	if (smp_rv_teardown_func != NULL)
2647 		smp_rv_teardown_func(smp_rv_func_arg);
2648 }
2649 
2650 void
2651 smp_rendezvous(void (* setup_func)(void *),
2652 	       void (* action_func)(void *),
2653 	       void (* teardown_func)(void *),
2654 	       void *arg)
2655 {
2656 	u_int	efl;
2657 
2658 	/* obtain rendezvous lock */
2659 	s_lock(&smp_rv_lock);		/* XXX sleep here? NOWAIT flag? */
2660 
2661 	/* set static function pointers */
2662 	smp_rv_setup_func = setup_func;
2663 	smp_rv_action_func = action_func;
2664 	smp_rv_teardown_func = teardown_func;
2665 	smp_rv_func_arg = arg;
2666 	smp_rv_waiters[0] = 0;
2667 	smp_rv_waiters[1] = 0;
2668 
2669 	/* disable interrupts on this CPU, save interrupt status */
2670 	efl = read_eflags();
2671 	write_eflags(efl & ~PSL_I);
2672 
2673 	/* signal other processors, which will enter the IPI with interrupts off */
2674 	all_but_self_ipi(XRENDEZVOUS_OFFSET);
2675 
2676 	/* call executor function */
2677 	smp_rendezvous_action();
2678 
2679 	/* restore interrupt flag */
2680 	write_eflags(efl);
2681 
2682 	/* release lock */
2683 	s_unlock(&smp_rv_lock);
2684 }
2685