xref: /freebsd/sys/kern/subr_smp.c (revision f35e5d0ef0a10ebda81a076bbd838d12b916dab5)
1 /*
2  * Copyright (c) 1996, by Steve Passe
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. The name of the developer may NOT be used to endorse or promote products
11  *    derived from this software without specific prior written permission.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "opt_smp.h"
29 #include "opt_cpu.h"
30 #include "opt_user_ldt.h"
31 
32 #ifdef SMP
33 #include <machine/smptests.h>
34 #else
35 #error
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/proc.h>
42 #include <sys/sysctl.h>
43 #include <sys/malloc.h>
44 #include <sys/memrange.h>
45 #ifdef BETTER_CLOCK
46 #include <sys/dkstat.h>
47 #endif
48 #include <sys/cons.h>	/* cngetc() */
49 
50 #include <vm/vm.h>
51 #include <vm/vm_param.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_extern.h>
55 #ifdef BETTER_CLOCK
56 #include <sys/lock.h>
57 #include <vm/vm_map.h>
58 #include <sys/user.h>
59 #ifdef GPROF
60 #include <sys/gmon.h>
61 #endif
62 #endif
63 
64 #include <machine/smp.h>
65 #include <machine/apic.h>
66 #include <machine/atomic.h>
67 #include <machine/cpufunc.h>
68 #include <machine/mpapic.h>
69 #include <machine/psl.h>
70 #include <machine/segments.h>
71 #include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
72 #include <machine/tss.h>
73 #include <machine/specialreg.h>
74 #include <machine/cputypes.h>
75 #include <machine/cputypes.h>
76 #include <machine/globaldata.h>
77 
78 #if defined(APIC_IO)
79 #include <machine/md_var.h>		/* setidt() */
80 #include <i386/isa/icu.h>		/* IPIs */
81 #include <i386/isa/intr_machdep.h>	/* IPIs */
82 #endif	/* APIC_IO */
83 
84 #if defined(TEST_DEFAULT_CONFIG)
85 #define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
86 #else
87 #define MPFPS_MPFB1	mpfps->mpfb1
88 #endif  /* TEST_DEFAULT_CONFIG */
89 
90 #define WARMBOOT_TARGET		0
91 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
92 #define WARMBOOT_SEG		(KERNBASE + 0x0469)
93 
94 #ifdef PC98
95 #define BIOS_BASE		(0xe8000)
96 #define BIOS_SIZE		(0x18000)
97 #else
98 #define BIOS_BASE		(0xf0000)
99 #define BIOS_SIZE		(0x10000)
100 #endif
101 #define BIOS_COUNT		(BIOS_SIZE/4)
102 
103 #define CMOS_REG		(0x70)
104 #define CMOS_DATA		(0x71)
105 #define BIOS_RESET		(0x0f)
106 #define BIOS_WARM		(0x0a)
107 
108 #define PROCENTRY_FLAG_EN	0x01
109 #define PROCENTRY_FLAG_BP	0x02
110 #define IOAPICENTRY_FLAG_EN	0x01
111 
112 
113 /* MP Floating Pointer Structure */
114 typedef struct MPFPS {
115 	char    signature[4];
116 	void   *pap;
117 	u_char  length;
118 	u_char  spec_rev;
119 	u_char  checksum;
120 	u_char  mpfb1;
121 	u_char  mpfb2;
122 	u_char  mpfb3;
123 	u_char  mpfb4;
124 	u_char  mpfb5;
125 }      *mpfps_t;
126 
127 /* MP Configuration Table Header */
128 typedef struct MPCTH {
129 	char    signature[4];
130 	u_short base_table_length;
131 	u_char  spec_rev;
132 	u_char  checksum;
133 	u_char  oem_id[8];
134 	u_char  product_id[12];
135 	void   *oem_table_pointer;
136 	u_short oem_table_size;
137 	u_short entry_count;
138 	void   *apic_address;
139 	u_short extended_table_length;
140 	u_char  extended_table_checksum;
141 	u_char  reserved;
142 }      *mpcth_t;
143 
144 
145 typedef struct PROCENTRY {
146 	u_char  type;
147 	u_char  apic_id;
148 	u_char  apic_version;
149 	u_char  cpu_flags;
150 	u_long  cpu_signature;
151 	u_long  feature_flags;
152 	u_long  reserved1;
153 	u_long  reserved2;
154 }      *proc_entry_ptr;
155 
156 typedef struct BUSENTRY {
157 	u_char  type;
158 	u_char  bus_id;
159 	char    bus_type[6];
160 }      *bus_entry_ptr;
161 
162 typedef struct IOAPICENTRY {
163 	u_char  type;
164 	u_char  apic_id;
165 	u_char  apic_version;
166 	u_char  apic_flags;
167 	void   *apic_address;
168 }      *io_apic_entry_ptr;
169 
170 typedef struct INTENTRY {
171 	u_char  type;
172 	u_char  int_type;
173 	u_short int_flags;
174 	u_char  src_bus_id;
175 	u_char  src_bus_irq;
176 	u_char  dst_apic_id;
177 	u_char  dst_apic_int;
178 }      *int_entry_ptr;
179 
180 /* descriptions of MP basetable entries */
181 typedef struct BASETABLE_ENTRY {
182 	u_char  type;
183 	u_char  length;
184 	char    name[16];
185 }       basetable_entry;
186 
187 /*
188  * this code MUST be enabled here and in mpboot.s.
189  * it follows the very early stages of AP boot by placing values in CMOS ram.
190  * it NORMALLY will never be needed and thus the primitive method for enabling.
191  *
192 #define CHECK_POINTS
193  */
194 
195 #if defined(CHECK_POINTS) && !defined(PC98)
196 #define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
197 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
198 
199 #define CHECK_INIT(D);				\
200 	CHECK_WRITE(0x34, (D));			\
201 	CHECK_WRITE(0x35, (D));			\
202 	CHECK_WRITE(0x36, (D));			\
203 	CHECK_WRITE(0x37, (D));			\
204 	CHECK_WRITE(0x38, (D));			\
205 	CHECK_WRITE(0x39, (D));
206 
207 #define CHECK_PRINT(S);				\
208 	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
209 	   (S),					\
210 	   CHECK_READ(0x34),			\
211 	   CHECK_READ(0x35),			\
212 	   CHECK_READ(0x36),			\
213 	   CHECK_READ(0x37),			\
214 	   CHECK_READ(0x38),			\
215 	   CHECK_READ(0x39));
216 
217 #else				/* CHECK_POINTS */
218 
219 #define CHECK_INIT(D)
220 #define CHECK_PRINT(S)
221 
222 #endif				/* CHECK_POINTS */
223 
224 /*
225  * Values to send to the POST hardware.
226  */
227 #define MP_BOOTADDRESS_POST	0x10
228 #define MP_PROBE_POST		0x11
229 #define MPTABLE_PASS1_POST	0x12
230 
231 #define MP_START_POST		0x13
232 #define MP_ENABLE_POST		0x14
233 #define MPTABLE_PASS2_POST	0x15
234 
235 #define START_ALL_APS_POST	0x16
236 #define INSTALL_AP_TRAMP_POST	0x17
237 #define START_AP_POST		0x18
238 
239 #define MP_ANNOUNCE_POST	0x19
240 
241 
242 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
243 int	current_postcode;
244 
245 /** XXX FIXME: what system files declare these??? */
246 extern struct region_descriptor r_gdt, r_idt;
247 
248 int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
249 int	mp_ncpus;		/* # of CPUs, including BSP */
250 int	mp_naps;		/* # of Applications processors */
251 int	mp_nbusses;		/* # of busses */
252 int	mp_napics;		/* # of IO APICs */
253 int	boot_cpu_id;		/* designated BSP */
254 vm_offset_t cpu_apic_address;
255 vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
256 extern	int nkpt;
257 
258 u_int32_t cpu_apic_versions[NCPU];
259 u_int32_t io_apic_versions[NAPIC];
260 
261 #ifdef APIC_INTR_DIAGNOSTIC
262 int apic_itrace_enter[32];
263 int apic_itrace_tryisrlock[32];
264 int apic_itrace_gotisrlock[32];
265 int apic_itrace_active[32];
266 int apic_itrace_masked[32];
267 int apic_itrace_noisrlock[32];
268 int apic_itrace_masked2[32];
269 int apic_itrace_unmask[32];
270 int apic_itrace_noforward[32];
271 int apic_itrace_leave[32];
272 int apic_itrace_enter2[32];
273 int apic_itrace_doreti[32];
274 int apic_itrace_splz[32];
275 int apic_itrace_eoi[32];
276 #ifdef APIC_INTR_DIAGNOSTIC_IRQ
277 unsigned short apic_itrace_debugbuffer[32768];
278 int apic_itrace_debugbuffer_idx;
279 struct simplelock apic_itrace_debuglock;
280 #endif
281 #endif
282 
283 #ifdef APIC_INTR_REORDER
284 struct {
285 	volatile int *location;
286 	int bit;
287 } apic_isrbit_location[32];
288 #endif
289 
290 struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
291 
292 /*
293  * APIC ID logical/physical mapping structures.
294  * We oversize these to simplify boot-time config.
295  */
296 int     cpu_num_to_apic_id[NAPICID];
297 int     io_num_to_apic_id[NAPICID];
298 int     apic_id_to_logical[NAPICID];
299 
300 
301 /* Bitmap of all available CPUs */
302 u_int	all_cpus;
303 
304 /* AP uses this during bootstrap.  Do not staticize.  */
305 char *bootSTK;
306 static int bootAP;
307 
308 /* Hotwire a 0->4MB V==P mapping */
309 extern pt_entry_t *KPTphys;
310 
311 /* SMP page table page */
312 extern pt_entry_t *SMPpt;
313 
314 struct pcb stoppcbs[NCPU];
315 
316 int smp_started;		/* has the system started? */
317 
318 /*
319  * Local data and functions.
320  */
321 
322 static int	mp_capable;
323 static u_int	boot_address;
324 static u_int	base_memory;
325 
326 static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
327 static mpfps_t	mpfps;
328 static int	search_for_sig(u_int32_t target, int count);
329 static void	mp_enable(u_int boot_addr);
330 
331 static int	mptable_pass1(void);
332 static int	mptable_pass2(void);
333 static void	default_mp_table(int type);
334 static void	fix_mp_table(void);
335 static void	setup_apic_irq_mapping(void);
336 static void	init_locks(void);
337 static int	start_all_aps(u_int boot_addr);
338 static void	install_ap_tramp(u_int boot_addr);
339 static int	start_ap(int logicalCpu, u_int boot_addr);
340 
341 /*
342  * Calculate usable address in base memory for AP trampoline code.
343  */
344 u_int
345 mp_bootaddress(u_int basemem)
346 {
347 	POSTCODE(MP_BOOTADDRESS_POST);
348 
349 	base_memory = basemem * 1024;	/* convert to bytes */
350 
351 	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
352 	if ((base_memory - boot_address) < bootMP_size)
353 		boot_address -= 4096;	/* not enough, lower by 4k */
354 
355 	return boot_address;
356 }
357 
358 
359 /*
360  * Look for an Intel MP spec table (ie, SMP capable hardware).
361  */
362 int
363 mp_probe(void)
364 {
365 	int     x;
366 	u_long  segment;
367 	u_int32_t target;
368 
369 	POSTCODE(MP_PROBE_POST);
370 
371 	/* see if EBDA exists */
372 	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
373 		/* search first 1K of EBDA */
374 		target = (u_int32_t) (segment << 4);
375 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
376 			goto found;
377 	} else {
378 		/* last 1K of base memory, effective 'top of base' passed in */
379 		target = (u_int32_t) (base_memory - 0x400);
380 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
381 			goto found;
382 	}
383 
384 	/* search the BIOS */
385 	target = (u_int32_t) BIOS_BASE;
386 	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
387 		goto found;
388 
389 	/* nothing found */
390 	mpfps = (mpfps_t)0;
391 	mp_capable = 0;
392 	return 0;
393 
394 found:
395 	/* calculate needed resources */
396 	mpfps = (mpfps_t)x;
397 	if (mptable_pass1())
398 		panic("you must reconfigure your kernel");
399 
400 	/* flag fact that we are running multiple processors */
401 	mp_capable = 1;
402 	return 1;
403 }
404 
405 
406 /*
407  * Startup the SMP processors.
408  */
409 void
410 mp_start(void)
411 {
412 	POSTCODE(MP_START_POST);
413 
414 	/* look for MP capable motherboard */
415 	if (mp_capable)
416 		mp_enable(boot_address);
417 	else
418 		panic("MP hardware not found!");
419 }
420 
421 
422 /*
423  * Print various information about the SMP system hardware and setup.
424  */
425 void
426 mp_announce(void)
427 {
428 	int     x;
429 
430 	POSTCODE(MP_ANNOUNCE_POST);
431 
432 	printf("FreeBSD/SMP: Multiprocessor motherboard\n");
433 	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
434 	printf(", version: 0x%08x", cpu_apic_versions[0]);
435 	printf(", at 0x%08x\n", cpu_apic_address);
436 	for (x = 1; x <= mp_naps; ++x) {
437 		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
438 		printf(", version: 0x%08x", cpu_apic_versions[x]);
439 		printf(", at 0x%08x\n", cpu_apic_address);
440 	}
441 
442 #if defined(APIC_IO)
443 	for (x = 0; x < mp_napics; ++x) {
444 		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
445 		printf(", version: 0x%08x", io_apic_versions[x]);
446 		printf(", at 0x%08x\n", io_apic_address[x]);
447 	}
448 #else
449 	printf(" Warning: APIC I/O disabled\n");
450 #endif	/* APIC_IO */
451 }
452 
453 /*
454  * AP cpu's call this to sync up protected mode.
455  */
456 void
457 init_secondary(void)
458 {
459 	int	gsel_tss;
460 	int	x, myid = bootAP;
461 
462 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
463 	gdt_segs[GPROC0_SEL].ssd_base =
464 		(int) &SMP_prvspace[myid].globaldata.gd_common_tss;
465 	SMP_prvspace[myid].globaldata.gd_prvspace = &SMP_prvspace[myid];
466 
467 	for (x = 0; x < NGDT; x++) {
468 		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
469 	}
470 
471 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
472 	r_gdt.rd_base = (int) &gdt[myid * NGDT];
473 	lgdt(&r_gdt);			/* does magic intra-segment return */
474 
475 	lidt(&r_idt);
476 
477 	lldt(_default_ldt);
478 #ifdef USER_LDT
479 	currentldt = _default_ldt;
480 #endif
481 
482 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
483 	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
484 	common_tss.tss_esp0 = 0;	/* not used until after switch */
485 	common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
486 	common_tss.tss_ioopt = (sizeof common_tss) << 16;
487 	tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd;
488 	common_tssd = *tss_gdt;
489 	ltr(gsel_tss);
490 
491 	load_cr0(0x8005003b);		/* XXX! */
492 
493 	pmap_set_opt();
494 }
495 
496 
497 #if defined(APIC_IO)
498 /*
499  * Final configuration of the BSP's local APIC:
500  *  - disable 'pic mode'.
501  *  - disable 'virtual wire mode'.
502  *  - enable NMI.
503  */
504 void
505 bsp_apic_configure(void)
506 {
507 	u_char		byte;
508 	u_int32_t	temp;
509 
510 	/* leave 'pic mode' if necessary */
511 	if (picmode) {
512 		outb(0x22, 0x70);	/* select IMCR */
513 		byte = inb(0x23);	/* current contents */
514 		byte |= 0x01;		/* mask external INTR */
515 		outb(0x23, byte);	/* disconnect 8259s/NMI */
516 	}
517 
518 	/* mask lint0 (the 8259 'virtual wire' connection) */
519 	temp = lapic.lvt_lint0;
520 	temp |= APIC_LVT_M;		/* set the mask */
521 	lapic.lvt_lint0 = temp;
522 
523         /* setup lint1 to handle NMI */
524         temp = lapic.lvt_lint1;
525         temp &= ~APIC_LVT_M;		/* clear the mask */
526         lapic.lvt_lint1 = temp;
527 
528 	if (bootverbose)
529 		apic_dump("bsp_apic_configure()");
530 }
531 #endif  /* APIC_IO */
532 
533 
534 /*******************************************************************
535  * local functions and data
536  */
537 
538 /*
539  * start the SMP system
540  */
541 static void
542 mp_enable(u_int boot_addr)
543 {
544 	int     x;
545 #if defined(APIC_IO)
546 	int     apic;
547 	u_int   ux;
548 #endif	/* APIC_IO */
549 
550 	POSTCODE(MP_ENABLE_POST);
551 
552 	/* turn on 4MB of V == P addressing so we can get to MP table */
553 	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
554 	invltlb();
555 
556 	/* examine the MP table for needed info, uses physical addresses */
557 	x = mptable_pass2();
558 
559 	*(int *)PTD = 0;
560 	invltlb();
561 
562 	/* can't process default configs till the CPU APIC is pmapped */
563 	if (x)
564 		default_mp_table(x);
565 
566 	/* post scan cleanup */
567 	fix_mp_table();
568 	setup_apic_irq_mapping();
569 
570 #if defined(APIC_IO)
571 
572 	/* fill the LOGICAL io_apic_versions table */
573 	for (apic = 0; apic < mp_napics; ++apic) {
574 		ux = io_apic_read(apic, IOAPIC_VER);
575 		io_apic_versions[apic] = ux;
576 	}
577 
578 	/* program each IO APIC in the system */
579 	for (apic = 0; apic < mp_napics; ++apic)
580 		if (io_apic_setup(apic) < 0)
581 			panic("IO APIC setup failure");
582 
583 	/* install a 'Spurious INTerrupt' vector */
584 	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
585 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
586 
587 	/* install an inter-CPU IPI for TLB invalidation */
588 	setidt(XINVLTLB_OFFSET, Xinvltlb,
589 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
590 
591 #ifdef BETTER_CLOCK
592 	/* install an inter-CPU IPI for reading processor state */
593 	setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate,
594 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
595 #endif
596 
597 	/* install an inter-CPU IPI for all-CPU rendezvous */
598 	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
599 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
600 
601 	/* install an inter-CPU IPI for forcing an additional software trap */
602 	setidt(XCPUAST_OFFSET, Xcpuast,
603 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
604 
605 	/* install an inter-CPU IPI for interrupt forwarding */
606 	setidt(XFORWARD_IRQ_OFFSET, Xforward_irq,
607 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
608 
609 	/* install an inter-CPU IPI for CPU stop/restart */
610 	setidt(XCPUSTOP_OFFSET, Xcpustop,
611 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
612 
613 #if defined(TEST_TEST1)
614 	/* install a "fake hardware INTerrupt" vector */
615 	setidt(XTEST1_OFFSET, Xtest1,
616 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
617 #endif  /** TEST_TEST1 */
618 
619 #endif	/* APIC_IO */
620 
621 	/* initialize all SMP locks */
622 	init_locks();
623 
624 	/* start each Application Processor */
625 	start_all_aps(boot_addr);
626 
627 	/*
628 	 * The init process might be started on a different CPU now,
629 	 * and the boot CPU might not call prepare_usermode to get
630 	 * cr0 correctly configured. Thus we initialize cr0 here.
631 	 */
632 	load_cr0(rcr0() | CR0_WP | CR0_AM);
633 }
634 
635 
636 /*
637  * look for the MP spec signature
638  */
639 
640 /* string defined by the Intel MP Spec as identifying the MP table */
641 #define MP_SIG		0x5f504d5f	/* _MP_ */
642 #define NEXT(X)		((X) += 4)
643 static int
644 search_for_sig(u_int32_t target, int count)
645 {
646 	int     x;
647 	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
648 
649 	for (x = 0; x < count; NEXT(x))
650 		if (addr[x] == MP_SIG)
651 			/* make array index a byte index */
652 			return (target + (x * sizeof(u_int32_t)));
653 
654 	return -1;
655 }
656 
657 
658 static basetable_entry basetable_entry_types[] =
659 {
660 	{0, 20, "Processor"},
661 	{1, 8, "Bus"},
662 	{2, 8, "I/O APIC"},
663 	{3, 8, "I/O INT"},
664 	{4, 8, "Local INT"}
665 };
666 
667 typedef struct BUSDATA {
668 	u_char  bus_id;
669 	enum busTypes bus_type;
670 }       bus_datum;
671 
672 typedef struct INTDATA {
673 	u_char  int_type;
674 	u_short int_flags;
675 	u_char  src_bus_id;
676 	u_char  src_bus_irq;
677 	u_char  dst_apic_id;
678 	u_char  dst_apic_int;
679 	u_char	int_vector;
680 }       io_int, local_int;
681 
682 typedef struct BUSTYPENAME {
683 	u_char  type;
684 	char    name[7];
685 }       bus_type_name;
686 
687 static bus_type_name bus_type_table[] =
688 {
689 	{CBUS, "CBUS"},
690 	{CBUSII, "CBUSII"},
691 	{EISA, "EISA"},
692 	{UNKNOWN_BUSTYPE, "---"},
693 	{UNKNOWN_BUSTYPE, "---"},
694 	{ISA, "ISA"},
695 	{UNKNOWN_BUSTYPE, "---"},
696 	{UNKNOWN_BUSTYPE, "---"},
697 	{UNKNOWN_BUSTYPE, "---"},
698 	{UNKNOWN_BUSTYPE, "---"},
699 	{UNKNOWN_BUSTYPE, "---"},
700 	{UNKNOWN_BUSTYPE, "---"},
701 	{PCI, "PCI"},
702 	{UNKNOWN_BUSTYPE, "---"},
703 	{UNKNOWN_BUSTYPE, "---"},
704 	{UNKNOWN_BUSTYPE, "---"},
705 	{UNKNOWN_BUSTYPE, "---"},
706 	{XPRESS, "XPRESS"},
707 	{UNKNOWN_BUSTYPE, "---"}
708 };
709 /* from MP spec v1.4, table 5-1 */
710 static int default_data[7][5] =
711 {
712 /*   nbus, id0, type0, id1, type1 */
713 	{1, 0, ISA, 255, 255},
714 	{1, 0, EISA, 255, 255},
715 	{1, 0, EISA, 255, 255},
716 	{0, 255, 255, 255, 255},/* MCA not supported */
717 	{2, 0, ISA, 1, PCI},
718 	{2, 0, EISA, 1, PCI},
719 	{0, 255, 255, 255, 255}	/* MCA not supported */
720 };
721 
722 
723 /* the bus data */
724 static bus_datum bus_data[NBUS];
725 
726 /* the IO INT data, one entry per possible APIC INTerrupt */
727 static io_int  io_apic_ints[NINTR];
728 
729 static int nintrs;
730 
731 static int processor_entry	__P((proc_entry_ptr entry, int cpu));
732 static int bus_entry		__P((bus_entry_ptr entry, int bus));
733 static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
734 static int int_entry		__P((int_entry_ptr entry, int intr));
735 static int lookup_bus_type	__P((char *name));
736 
737 
738 /*
739  * 1st pass on motherboard's Intel MP specification table.
740  *
741  * initializes:
742  *	mp_ncpus = 1
743  *
744  * determines:
745  *	cpu_apic_address (common to all CPUs)
746  *	io_apic_address[N]
747  *	mp_naps
748  *	mp_nbusses
749  *	mp_napics
750  *	nintrs
751  */
752 static int
753 mptable_pass1(void)
754 {
755 	int	x;
756 	mpcth_t	cth;
757 	int	totalSize;
758 	void*	position;
759 	int	count;
760 	int	type;
761 	int	mustpanic;
762 
763 	POSTCODE(MPTABLE_PASS1_POST);
764 
765 	mustpanic = 0;
766 
767 	/* clear various tables */
768 	for (x = 0; x < NAPICID; ++x) {
769 		io_apic_address[x] = ~0;	/* IO APIC address table */
770 	}
771 
772 	/* init everything to empty */
773 	mp_naps = 0;
774 	mp_nbusses = 0;
775 	mp_napics = 0;
776 	nintrs = 0;
777 
778 	/* check for use of 'default' configuration */
779 	if (MPFPS_MPFB1 != 0) {
780 		/* use default addresses */
781 		cpu_apic_address = DEFAULT_APIC_BASE;
782 		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
783 
784 		/* fill in with defaults */
785 		mp_naps = 2;		/* includes BSP */
786 		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
787 #if defined(APIC_IO)
788 		mp_napics = 1;
789 		nintrs = 16;
790 #endif	/* APIC_IO */
791 	}
792 	else {
793 		if ((cth = mpfps->pap) == 0)
794 			panic("MP Configuration Table Header MISSING!");
795 
796 		cpu_apic_address = (vm_offset_t) cth->apic_address;
797 
798 		/* walk the table, recording info of interest */
799 		totalSize = cth->base_table_length - sizeof(struct MPCTH);
800 		position = (u_char *) cth + sizeof(struct MPCTH);
801 		count = cth->entry_count;
802 
803 		while (count--) {
804 			switch (type = *(u_char *) position) {
805 			case 0: /* processor_entry */
806 				if (((proc_entry_ptr)position)->cpu_flags
807 					& PROCENTRY_FLAG_EN)
808 					++mp_naps;
809 				break;
810 			case 1: /* bus_entry */
811 				++mp_nbusses;
812 				break;
813 			case 2: /* io_apic_entry */
814 				if (((io_apic_entry_ptr)position)->apic_flags
815 					& IOAPICENTRY_FLAG_EN)
816 					io_apic_address[mp_napics++] =
817 					    (vm_offset_t)((io_apic_entry_ptr)
818 						position)->apic_address;
819 				break;
820 			case 3: /* int_entry */
821 				++nintrs;
822 				break;
823 			case 4:	/* int_entry */
824 				break;
825 			default:
826 				panic("mpfps Base Table HOSED!");
827 				/* NOTREACHED */
828 			}
829 
830 			totalSize -= basetable_entry_types[type].length;
831 			(u_char*)position += basetable_entry_types[type].length;
832 		}
833 	}
834 
835 	/* qualify the numbers */
836 	if (mp_naps > NCPU) {
837 		printf("Warning: only using %d of %d available CPUs!\n",
838 			NCPU, mp_naps);
839 		mp_naps = NCPU;
840 	}
841 	if (mp_nbusses > NBUS) {
842 		printf("found %d busses, increase NBUS\n", mp_nbusses);
843 		mustpanic = 1;
844 	}
845 	if (mp_napics > NAPIC) {
846 		printf("found %d apics, increase NAPIC\n", mp_napics);
847 		mustpanic = 1;
848 	}
849 	if (nintrs > NINTR) {
850 		printf("found %d intrs, increase NINTR\n", nintrs);
851 		mustpanic = 1;
852 	}
853 
854 	/*
855 	 * Count the BSP.
856 	 * This is also used as a counter while starting the APs.
857 	 */
858 	mp_ncpus = 1;
859 
860 	--mp_naps;	/* subtract the BSP */
861 
862 	return mustpanic;
863 }
864 
865 
866 /*
867  * 2nd pass on motherboard's Intel MP specification table.
868  *
869  * sets:
870  *	boot_cpu_id
871  *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
872  *	CPU_TO_ID(N), logical CPU to APIC ID table
873  *	IO_TO_ID(N), logical IO to APIC ID table
874  *	bus_data[N]
875  *	io_apic_ints[N]
876  */
877 static int
878 mptable_pass2(void)
879 {
880 	int     x;
881 	mpcth_t cth;
882 	int     totalSize;
883 	void*   position;
884 	int     count;
885 	int     type;
886 	int     apic, bus, cpu, intr;
887 
888 	POSTCODE(MPTABLE_PASS2_POST);
889 
890 	/* clear various tables */
891 	for (x = 0; x < NAPICID; ++x) {
892 		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
893 		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
894 		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
895 	}
896 
897 	/* clear bus data table */
898 	for (x = 0; x < NBUS; ++x)
899 		bus_data[x].bus_id = 0xff;
900 
901 	/* clear IO APIC INT table */
902 	for (x = 0; x < NINTR; ++x) {
903 		io_apic_ints[x].int_type = 0xff;
904 		io_apic_ints[x].int_vector = 0xff;
905 	}
906 
907 	/* setup the cpu/apic mapping arrays */
908 	boot_cpu_id = -1;
909 
910 	/* record whether PIC or virtual-wire mode */
911 	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
912 
913 	/* check for use of 'default' configuration */
914 	if (MPFPS_MPFB1 != 0)
915 		return MPFPS_MPFB1;	/* return default configuration type */
916 
917 	if ((cth = mpfps->pap) == 0)
918 		panic("MP Configuration Table Header MISSING!");
919 
920 	/* walk the table, recording info of interest */
921 	totalSize = cth->base_table_length - sizeof(struct MPCTH);
922 	position = (u_char *) cth + sizeof(struct MPCTH);
923 	count = cth->entry_count;
924 	apic = bus = intr = 0;
925 	cpu = 1;				/* pre-count the BSP */
926 
927 	while (count--) {
928 		switch (type = *(u_char *) position) {
929 		case 0:
930 			if (processor_entry(position, cpu))
931 				++cpu;
932 			break;
933 		case 1:
934 			if (bus_entry(position, bus))
935 				++bus;
936 			break;
937 		case 2:
938 			if (io_apic_entry(position, apic))
939 				++apic;
940 			break;
941 		case 3:
942 			if (int_entry(position, intr))
943 				++intr;
944 			break;
945 		case 4:
946 			/* int_entry(position); */
947 			break;
948 		default:
949 			panic("mpfps Base Table HOSED!");
950 			/* NOTREACHED */
951 		}
952 
953 		totalSize -= basetable_entry_types[type].length;
954 		(u_char *) position += basetable_entry_types[type].length;
955 	}
956 
957 	if (boot_cpu_id == -1)
958 		panic("NO BSP found!");
959 
960 	/* report fact that its NOT a default configuration */
961 	return 0;
962 }
963 
964 
965 static void
966 assign_apic_irq(int apic, int intpin, int irq)
967 {
968 	int x;
969 
970 	if (int_to_apicintpin[irq].ioapic != -1)
971 		panic("assign_apic_irq: inconsistent table");
972 
973 	int_to_apicintpin[irq].ioapic = apic;
974 	int_to_apicintpin[irq].int_pin = intpin;
975 	int_to_apicintpin[irq].apic_address = ioapic[apic];
976 	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
977 
978 	for (x = 0; x < nintrs; x++) {
979 		if ((io_apic_ints[x].int_type == 0 ||
980 		     io_apic_ints[x].int_type == 3) &&
981 		    io_apic_ints[x].int_vector == 0xff &&
982 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
983 		    io_apic_ints[x].dst_apic_int == intpin)
984 			io_apic_ints[x].int_vector = irq;
985 	}
986 }
987 
988 /*
989  * parse an Intel MP specification table
990  */
991 static void
992 fix_mp_table(void)
993 {
994 	int	x;
995 	int	id;
996 	int	bus_0 = 0;	/* Stop GCC warning */
997 	int	bus_pci = 0;	/* Stop GCC warning */
998 	int	num_pci_bus;
999 
1000 	/*
1001 	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1002 	 * did it wrong.  The MP spec says that when more than 1 PCI bus
1003 	 * exists the BIOS must begin with bus entries for the PCI bus and use
1004 	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
1005 	 * exists the BIOS can choose to ignore this ordering, and indeed many
1006 	 * MP motherboards do ignore it.  This causes a problem when the PCI
1007 	 * sub-system makes requests of the MP sub-system based on PCI bus
1008 	 * numbers.	So here we look for the situation and renumber the
1009 	 * busses and associated INTs in an effort to "make it right".
1010 	 */
1011 
1012 	/* find bus 0, PCI bus, count the number of PCI busses */
1013 	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1014 		if (bus_data[x].bus_id == 0) {
1015 			bus_0 = x;
1016 		}
1017 		if (bus_data[x].bus_type == PCI) {
1018 			++num_pci_bus;
1019 			bus_pci = x;
1020 		}
1021 	}
1022 	/*
1023 	 * bus_0 == slot of bus with ID of 0
1024 	 * bus_pci == slot of last PCI bus encountered
1025 	 */
1026 
1027 	/* check the 1 PCI bus case for sanity */
1028 	if (num_pci_bus == 1) {
1029 
1030 		/* if it is number 0 all is well */
1031 		if (bus_data[bus_pci].bus_id == 0)
1032 			return;
1033 
1034 		/* mis-numbered, swap with whichever bus uses slot 0 */
1035 
1036 		/* swap the bus entry types */
1037 		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1038 		bus_data[bus_0].bus_type = PCI;
1039 
1040 		/* swap each relavant INTerrupt entry */
1041 		id = bus_data[bus_pci].bus_id;
1042 		for (x = 0; x < nintrs; ++x) {
1043 			if (io_apic_ints[x].src_bus_id == id) {
1044 				io_apic_ints[x].src_bus_id = 0;
1045 			}
1046 			else if (io_apic_ints[x].src_bus_id == 0) {
1047 				io_apic_ints[x].src_bus_id = id;
1048 			}
1049 		}
1050 	}
1051 	/* sanity check if more than 1 PCI bus */
1052 	else if (num_pci_bus > 1) {
1053 		for (x = 0; x < mp_nbusses; ++x) {
1054 			if (bus_data[x].bus_type != PCI)
1055 				continue;
1056 		}
1057 	}
1058 }
1059 
1060 
1061 static void
1062 setup_apic_irq_mapping(void)
1063 {
1064 	int	x;
1065 	int	int_vector;
1066 
1067 	/* Assign low level interrupt handlers */
1068 	for (x = 0; x < APIC_INTMAPSIZE; x++) {
1069 		int_to_apicintpin[x].ioapic = -1;
1070 		int_to_apicintpin[x].int_pin = 0;
1071 		int_to_apicintpin[x].apic_address = NULL;
1072 		int_to_apicintpin[x].redirindex = 0;
1073 	}
1074 	for (x = 0; x < nintrs; x++) {
1075 		if (io_apic_ints[x].dst_apic_int < APIC_INTMAPSIZE &&
1076 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1077 		    io_apic_ints[x].int_vector == 0xff &&
1078 		    (io_apic_ints[x].int_type == 0 ||
1079 		     io_apic_ints[x].int_type == 3)) {
1080 			assign_apic_irq(0,
1081 					io_apic_ints[x].dst_apic_int,
1082 					io_apic_ints[x].dst_apic_int);
1083 		}
1084 	}
1085 	int_vector = 0;
1086 	while (int_vector < APIC_INTMAPSIZE &&
1087 	       int_to_apicintpin[int_vector].ioapic != -1)
1088 		int_vector++;
1089 	for (x = 0; x < nintrs && int_vector < APIC_INTMAPSIZE; x++) {
1090 		if ((io_apic_ints[x].int_type == 0 ||
1091 		     io_apic_ints[x].int_type == 3) &&
1092 		    io_apic_ints[x].int_vector == 0xff) {
1093 			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1094 					io_apic_ints[x].dst_apic_int,
1095 					int_vector);
1096 			int_vector++;
1097 			while (int_vector < APIC_INTMAPSIZE &&
1098 			       int_to_apicintpin[int_vector].ioapic != -1)
1099 				int_vector++;
1100 		}
1101 	}
1102 }
1103 
1104 
1105 static int
1106 processor_entry(proc_entry_ptr entry, int cpu)
1107 {
1108 	/* check for usability */
1109 	if ((cpu >= NCPU) || !(entry->cpu_flags & PROCENTRY_FLAG_EN))
1110 		return 0;
1111 
1112 	/* check for BSP flag */
1113 	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1114 		boot_cpu_id = entry->apic_id;
1115 		CPU_TO_ID(0) = entry->apic_id;
1116 		ID_TO_CPU(entry->apic_id) = 0;
1117 		return 0;	/* its already been counted */
1118 	}
1119 
1120 	/* add another AP to list, if less than max number of CPUs */
1121 	else {
1122 		CPU_TO_ID(cpu) = entry->apic_id;
1123 		ID_TO_CPU(entry->apic_id) = cpu;
1124 		return 1;
1125 	}
1126 }
1127 
1128 
1129 static int
1130 bus_entry(bus_entry_ptr entry, int bus)
1131 {
1132 	int     x;
1133 	char    c, name[8];
1134 
1135 	/* encode the name into an index */
1136 	for (x = 0; x < 6; ++x) {
1137 		if ((c = entry->bus_type[x]) == ' ')
1138 			break;
1139 		name[x] = c;
1140 	}
1141 	name[x] = '\0';
1142 
1143 	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1144 		panic("unknown bus type: '%s'", name);
1145 
1146 	bus_data[bus].bus_id = entry->bus_id;
1147 	bus_data[bus].bus_type = x;
1148 
1149 	return 1;
1150 }
1151 
1152 
1153 static int
1154 io_apic_entry(io_apic_entry_ptr entry, int apic)
1155 {
1156 	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1157 		return 0;
1158 
1159 	IO_TO_ID(apic) = entry->apic_id;
1160 	ID_TO_IO(entry->apic_id) = apic;
1161 
1162 	return 1;
1163 }
1164 
1165 
1166 static int
1167 lookup_bus_type(char *name)
1168 {
1169 	int     x;
1170 
1171 	for (x = 0; x < MAX_BUSTYPE; ++x)
1172 		if (strcmp(bus_type_table[x].name, name) == 0)
1173 			return bus_type_table[x].type;
1174 
1175 	return UNKNOWN_BUSTYPE;
1176 }
1177 
1178 
1179 static int
1180 int_entry(int_entry_ptr entry, int intr)
1181 {
1182 	int apic;
1183 
1184 	io_apic_ints[intr].int_type = entry->int_type;
1185 	io_apic_ints[intr].int_flags = entry->int_flags;
1186 	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1187 	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1188 	if (entry->dst_apic_id == 255) {
1189 		/* This signal goes to all IO APICS.  Select an IO APIC
1190 		   with sufficient number of interrupt pins */
1191 		for (apic = 0; apic < mp_napics; apic++)
1192 			if (((io_apic_read(apic, IOAPIC_VER) &
1193 			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1194 			    entry->dst_apic_int)
1195 				break;
1196 		if (apic < mp_napics)
1197 			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1198 		else
1199 			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1200 	} else
1201 		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1202 	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1203 
1204 	return 1;
1205 }
1206 
1207 
1208 static int
1209 apic_int_is_bus_type(int intr, int bus_type)
1210 {
1211 	int     bus;
1212 
1213 	for (bus = 0; bus < mp_nbusses; ++bus)
1214 		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1215 		    && ((int) bus_data[bus].bus_type == bus_type))
1216 			return 1;
1217 
1218 	return 0;
1219 }
1220 
1221 
1222 /*
1223  * Given a traditional ISA INT mask, return an APIC mask.
1224  */
1225 u_int
1226 isa_apic_mask(u_int isa_mask)
1227 {
1228 	int isa_irq;
1229 	int apic_pin;
1230 
1231 #if defined(SKIP_IRQ15_REDIRECT)
1232 	if (isa_mask == (1 << 15)) {
1233 		printf("skipping ISA IRQ15 redirect\n");
1234 		return isa_mask;
1235 	}
1236 #endif  /* SKIP_IRQ15_REDIRECT */
1237 
1238 	isa_irq = ffs(isa_mask);		/* find its bit position */
1239 	if (isa_irq == 0)			/* doesn't exist */
1240 		return 0;
1241 	--isa_irq;				/* make it zero based */
1242 
1243 	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
1244 	if (apic_pin == -1)
1245 		return 0;
1246 
1247 	return (1 << apic_pin);			/* convert pin# to a mask */
1248 }
1249 
1250 
1251 /*
1252  * Determine which APIC pin an ISA/EISA INT is attached to.
1253  */
1254 #define INTTYPE(I)	(io_apic_ints[(I)].int_type)
1255 #define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
1256 #define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
1257 #define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1258 
1259 #define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
1260 int
1261 isa_apic_irq(int isa_irq)
1262 {
1263 	int     intr;
1264 
1265 	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
1266 		if (INTTYPE(intr) == 0) {		/* standard INT */
1267 			if (SRCBUSIRQ(intr) == isa_irq) {
1268 				if (apic_int_is_bus_type(intr, ISA) ||
1269 			            apic_int_is_bus_type(intr, EISA))
1270 					return INTIRQ(intr);	/* found */
1271 			}
1272 		}
1273 	}
1274 	return -1;					/* NOT found */
1275 }
1276 
1277 
1278 /*
1279  * Determine which APIC pin a PCI INT is attached to.
1280  */
1281 #define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
1282 #define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1283 #define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
1284 int
1285 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1286 {
1287 	int     intr;
1288 
1289 	--pciInt;					/* zero based */
1290 
1291 	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
1292 		if ((INTTYPE(intr) == 0)		/* standard INT */
1293 		    && (SRCBUSID(intr) == pciBus)
1294 		    && (SRCBUSDEVICE(intr) == pciDevice)
1295 		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
1296 			if (apic_int_is_bus_type(intr, PCI))
1297 				return INTIRQ(intr);	/* exact match */
1298 
1299 	return -1;					/* NOT found */
1300 }
1301 
1302 int
1303 next_apic_irq(int irq)
1304 {
1305 	int intr, ointr;
1306 	int bus, bustype;
1307 
1308 	bus = 0;
1309 	bustype = 0;
1310 	for (intr = 0; intr < nintrs; intr++) {
1311 		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1312 			continue;
1313 		bus = SRCBUSID(intr);
1314 		bustype = apic_bus_type(bus);
1315 		if (bustype != ISA &&
1316 		    bustype != EISA &&
1317 		    bustype != PCI)
1318 			continue;
1319 		break;
1320 	}
1321 	if (intr >= nintrs) {
1322 		return -1;
1323 	}
1324 	for (ointr = intr + 1; ointr < nintrs; ointr++) {
1325 		if (INTTYPE(ointr) != 0)
1326 			continue;
1327 		if (bus != SRCBUSID(ointr))
1328 			continue;
1329 		if (bustype == PCI) {
1330 			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1331 				continue;
1332 			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1333 				continue;
1334 		}
1335 		if (bustype == ISA || bustype == EISA) {
1336 			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1337 				continue;
1338 		}
1339 		if (INTPIN(intr) == INTPIN(ointr))
1340 			continue;
1341 		break;
1342 	}
1343 	if (ointr >= nintrs) {
1344 		return -1;
1345 	}
1346 	return INTIRQ(ointr);
1347 }
1348 #undef SRCBUSLINE
1349 #undef SRCBUSDEVICE
1350 #undef SRCBUSID
1351 #undef SRCBUSIRQ
1352 
1353 #undef INTPIN
1354 #undef INTIRQ
1355 #undef INTAPIC
1356 #undef INTTYPE
1357 
1358 
1359 /*
1360  * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1361  *
1362  * XXX FIXME:
1363  *  Exactly what this means is unclear at this point.  It is a solution
1364  *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
1365  *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
1366  *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1367  *  option.
1368  */
1369 int
1370 undirect_isa_irq(int rirq)
1371 {
1372 #if defined(READY)
1373 	if (bootverbose)
1374 	    printf("Freeing redirected ISA irq %d.\n", rirq);
1375 	/** FIXME: tickle the MB redirector chip */
1376 	return ???;
1377 #else
1378 	if (bootverbose)
1379 	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1380 	return 0;
1381 #endif  /* READY */
1382 }
1383 
1384 
1385 /*
1386  * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1387  */
1388 int
1389 undirect_pci_irq(int rirq)
1390 {
1391 #if defined(READY)
1392 	if (bootverbose)
1393 		printf("Freeing redirected PCI irq %d.\n", rirq);
1394 
1395 	/** FIXME: tickle the MB redirector chip */
1396 	return ???;
1397 #else
1398 	if (bootverbose)
1399 		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1400 		       rirq);
1401 	return 0;
1402 #endif  /* READY */
1403 }
1404 
1405 
1406 /*
1407  * given a bus ID, return:
1408  *  the bus type if found
1409  *  -1 if NOT found
1410  */
1411 int
1412 apic_bus_type(int id)
1413 {
1414 	int     x;
1415 
1416 	for (x = 0; x < mp_nbusses; ++x)
1417 		if (bus_data[x].bus_id == id)
1418 			return bus_data[x].bus_type;
1419 
1420 	return -1;
1421 }
1422 
1423 
1424 /*
1425  * given a LOGICAL APIC# and pin#, return:
1426  *  the associated src bus ID if found
1427  *  -1 if NOT found
1428  */
1429 int
1430 apic_src_bus_id(int apic, int pin)
1431 {
1432 	int     x;
1433 
1434 	/* search each of the possible INTerrupt sources */
1435 	for (x = 0; x < nintrs; ++x)
1436 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1437 		    (pin == io_apic_ints[x].dst_apic_int))
1438 			return (io_apic_ints[x].src_bus_id);
1439 
1440 	return -1;		/* NOT found */
1441 }
1442 
1443 
1444 /*
1445  * given a LOGICAL APIC# and pin#, return:
1446  *  the associated src bus IRQ if found
1447  *  -1 if NOT found
1448  */
1449 int
1450 apic_src_bus_irq(int apic, int pin)
1451 {
1452 	int     x;
1453 
1454 	for (x = 0; x < nintrs; x++)
1455 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1456 		    (pin == io_apic_ints[x].dst_apic_int))
1457 			return (io_apic_ints[x].src_bus_irq);
1458 
1459 	return -1;		/* NOT found */
1460 }
1461 
1462 
1463 /*
1464  * given a LOGICAL APIC# and pin#, return:
1465  *  the associated INTerrupt type if found
1466  *  -1 if NOT found
1467  */
1468 int
1469 apic_int_type(int apic, int pin)
1470 {
1471 	int     x;
1472 
1473 	/* search each of the possible INTerrupt sources */
1474 	for (x = 0; x < nintrs; ++x)
1475 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1476 		    (pin == io_apic_ints[x].dst_apic_int))
1477 			return (io_apic_ints[x].int_type);
1478 
1479 	return -1;		/* NOT found */
1480 }
1481 
1482 int
1483 apic_irq(int apic, int pin)
1484 {
1485 	int x;
1486 	int res;
1487 
1488 	for (x = 0; x < nintrs; ++x)
1489 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1490 		    (pin == io_apic_ints[x].dst_apic_int)) {
1491 			res = io_apic_ints[x].int_vector;
1492 			if (res == 0xff)
1493 				return -1;
1494 			if (apic != int_to_apicintpin[res].ioapic)
1495 				panic("apic_irq: inconsistent table");
1496 			if (pin != int_to_apicintpin[res].int_pin)
1497 				panic("apic_irq inconsistent table (2)");
1498 			return res;
1499 		}
1500 	return -1;
1501 }
1502 
1503 
1504 /*
1505  * given a LOGICAL APIC# and pin#, return:
1506  *  the associated trigger mode if found
1507  *  -1 if NOT found
1508  */
1509 int
1510 apic_trigger(int apic, int pin)
1511 {
1512 	int     x;
1513 
1514 	/* search each of the possible INTerrupt sources */
1515 	for (x = 0; x < nintrs; ++x)
1516 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1517 		    (pin == io_apic_ints[x].dst_apic_int))
1518 			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1519 
1520 	return -1;		/* NOT found */
1521 }
1522 
1523 
1524 /*
1525  * given a LOGICAL APIC# and pin#, return:
1526  *  the associated 'active' level if found
1527  *  -1 if NOT found
1528  */
1529 int
1530 apic_polarity(int apic, int pin)
1531 {
1532 	int     x;
1533 
1534 	/* search each of the possible INTerrupt sources */
1535 	for (x = 0; x < nintrs; ++x)
1536 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1537 		    (pin == io_apic_ints[x].dst_apic_int))
1538 			return (io_apic_ints[x].int_flags & 0x03);
1539 
1540 	return -1;		/* NOT found */
1541 }
1542 
1543 
1544 /*
1545  * set data according to MP defaults
1546  * FIXME: probably not complete yet...
1547  */
1548 static void
1549 default_mp_table(int type)
1550 {
1551 	int     ap_cpu_id;
1552 #if defined(APIC_IO)
1553 	u_int32_t ux;
1554 	int     io_apic_id;
1555 	int     pin;
1556 #endif	/* APIC_IO */
1557 
1558 #if 0
1559 	printf("  MP default config type: %d\n", type);
1560 	switch (type) {
1561 	case 1:
1562 		printf("   bus: ISA, APIC: 82489DX\n");
1563 		break;
1564 	case 2:
1565 		printf("   bus: EISA, APIC: 82489DX\n");
1566 		break;
1567 	case 3:
1568 		printf("   bus: EISA, APIC: 82489DX\n");
1569 		break;
1570 	case 4:
1571 		printf("   bus: MCA, APIC: 82489DX\n");
1572 		break;
1573 	case 5:
1574 		printf("   bus: ISA+PCI, APIC: Integrated\n");
1575 		break;
1576 	case 6:
1577 		printf("   bus: EISA+PCI, APIC: Integrated\n");
1578 		break;
1579 	case 7:
1580 		printf("   bus: MCA+PCI, APIC: Integrated\n");
1581 		break;
1582 	default:
1583 		printf("   future type\n");
1584 		break;
1585 		/* NOTREACHED */
1586 	}
1587 #endif	/* 0 */
1588 
1589 	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1590 	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1591 
1592 	/* BSP */
1593 	CPU_TO_ID(0) = boot_cpu_id;
1594 	ID_TO_CPU(boot_cpu_id) = 0;
1595 
1596 	/* one and only AP */
1597 	CPU_TO_ID(1) = ap_cpu_id;
1598 	ID_TO_CPU(ap_cpu_id) = 1;
1599 
1600 #if defined(APIC_IO)
1601 	/* one and only IO APIC */
1602 	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1603 
1604 	/*
1605 	 * sanity check, refer to MP spec section 3.6.6, last paragraph
1606 	 * necessary as some hardware isn't properly setting up the IO APIC
1607 	 */
1608 #if defined(REALLY_ANAL_IOAPICID_VALUE)
1609 	if (io_apic_id != 2) {
1610 #else
1611 	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1612 #endif	/* REALLY_ANAL_IOAPICID_VALUE */
1613 		ux = io_apic_read(0, IOAPIC_ID);	/* get current contents */
1614 		ux &= ~APIC_ID_MASK;	/* clear the ID field */
1615 		ux |= 0x02000000;	/* set it to '2' */
1616 		io_apic_write(0, IOAPIC_ID, ux);	/* write new value */
1617 		ux = io_apic_read(0, IOAPIC_ID);	/* re-read && test */
1618 		if ((ux & APIC_ID_MASK) != 0x02000000)
1619 			panic("can't control IO APIC ID, reg: 0x%08x", ux);
1620 		io_apic_id = 2;
1621 	}
1622 	IO_TO_ID(0) = io_apic_id;
1623 	ID_TO_IO(io_apic_id) = 0;
1624 #endif	/* APIC_IO */
1625 
1626 	/* fill out bus entries */
1627 	switch (type) {
1628 	case 1:
1629 	case 2:
1630 	case 3:
1631 	case 5:
1632 	case 6:
1633 		bus_data[0].bus_id = default_data[type - 1][1];
1634 		bus_data[0].bus_type = default_data[type - 1][2];
1635 		bus_data[1].bus_id = default_data[type - 1][3];
1636 		bus_data[1].bus_type = default_data[type - 1][4];
1637 		break;
1638 
1639 	/* case 4: case 7:		   MCA NOT supported */
1640 	default:		/* illegal/reserved */
1641 		panic("BAD default MP config: %d", type);
1642 		/* NOTREACHED */
1643 	}
1644 
1645 #if defined(APIC_IO)
1646 	/* general cases from MP v1.4, table 5-2 */
1647 	for (pin = 0; pin < 16; ++pin) {
1648 		io_apic_ints[pin].int_type = 0;
1649 		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
1650 		io_apic_ints[pin].src_bus_id = 0;
1651 		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
1652 		io_apic_ints[pin].dst_apic_id = io_apic_id;
1653 		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
1654 	}
1655 
1656 	/* special cases from MP v1.4, table 5-2 */
1657 	if (type == 2) {
1658 		io_apic_ints[2].int_type = 0xff;	/* N/C */
1659 		io_apic_ints[13].int_type = 0xff;	/* N/C */
1660 #if !defined(APIC_MIXED_MODE)
1661 		/** FIXME: ??? */
1662 		panic("sorry, can't support type 2 default yet");
1663 #endif	/* APIC_MIXED_MODE */
1664 	}
1665 	else
1666 		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
1667 
1668 	if (type == 7)
1669 		io_apic_ints[0].int_type = 0xff;	/* N/C */
1670 	else
1671 		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
1672 #endif	/* APIC_IO */
1673 }
1674 
1675 
1676 /*
1677  * initialize all the SMP locks
1678  */
1679 
1680 /* critical region around IO APIC, apic_imen */
1681 struct simplelock	imen_lock;
1682 
1683 /* critical region around splxx(), cpl, cml, cil, ipending */
1684 struct simplelock	cpl_lock;
1685 
1686 /* Make FAST_INTR() routines sequential */
1687 struct simplelock	fast_intr_lock;
1688 
1689 /* critical region around INTR() routines */
1690 struct simplelock	intr_lock;
1691 
1692 /* lock regions protected in UP kernel via cli/sti */
1693 struct simplelock	mpintr_lock;
1694 
1695 /* lock region used by kernel profiling */
1696 struct simplelock	mcount_lock;
1697 
1698 #ifdef USE_COMLOCK
1699 /* locks com (tty) data/hardware accesses: a FASTINTR() */
1700 struct simplelock	com_lock;
1701 #endif /* USE_COMLOCK */
1702 
1703 #ifdef USE_CLOCKLOCK
1704 /* lock regions around the clock hardware */
1705 struct simplelock	clock_lock;
1706 #endif /* USE_CLOCKLOCK */
1707 
1708 /* lock around the MP rendezvous */
1709 static struct simplelock smp_rv_lock;
1710 
1711 static void
1712 init_locks(void)
1713 {
1714 	/*
1715 	 * Get the initial mp_lock with a count of 1 for the BSP.
1716 	 * This uses a LOGICAL cpu ID, ie BSP == 0.
1717 	 */
1718 	mp_lock = 0x00000001;
1719 
1720 	/* ISR uses its own "giant lock" */
1721 	isr_lock = FREE_LOCK;
1722 
1723 #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
1724 	s_lock_init((struct simplelock*)&apic_itrace_debuglock);
1725 #endif
1726 
1727 	s_lock_init((struct simplelock*)&mpintr_lock);
1728 
1729 	s_lock_init((struct simplelock*)&mcount_lock);
1730 
1731 	s_lock_init((struct simplelock*)&fast_intr_lock);
1732 	s_lock_init((struct simplelock*)&intr_lock);
1733 	s_lock_init((struct simplelock*)&imen_lock);
1734 	s_lock_init((struct simplelock*)&cpl_lock);
1735 	s_lock_init(&smp_rv_lock);
1736 
1737 #ifdef USE_COMLOCK
1738 	s_lock_init((struct simplelock*)&com_lock);
1739 #endif /* USE_COMLOCK */
1740 #ifdef USE_CLOCKLOCK
1741 	s_lock_init((struct simplelock*)&clock_lock);
1742 #endif /* USE_CLOCKLOCK */
1743 }
1744 
1745 
1746 /* Wait for all APs to be fully initialized */
1747 extern int wait_ap(unsigned int);
1748 
1749 /*
1750  * start each AP in our list
1751  */
1752 static int
1753 start_all_aps(u_int boot_addr)
1754 {
1755 	int     x, i, pg;
1756 	u_char  mpbiosreason;
1757 	u_long  mpbioswarmvec;
1758 	struct globaldata *gd;
1759 	char *stack;
1760 
1761 	POSTCODE(START_ALL_APS_POST);
1762 
1763 	/* initialize BSP's local APIC */
1764 	apic_initialize();
1765 	bsp_apic_ready = 1;
1766 
1767 	/* install the AP 1st level boot code */
1768 	install_ap_tramp(boot_addr);
1769 
1770 
1771 	/* save the current value of the warm-start vector */
1772 	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1773 #ifndef PC98
1774 	outb(CMOS_REG, BIOS_RESET);
1775 	mpbiosreason = inb(CMOS_DATA);
1776 #endif
1777 
1778 	/* record BSP in CPU map */
1779 	all_cpus = 1;
1780 
1781 	/* set up 0 -> 4MB P==V mapping for AP boot */
1782 	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
1783 	invltlb();
1784 
1785 	/* start each AP */
1786 	for (x = 1; x <= mp_naps; ++x) {
1787 
1788 		/* This is a bit verbose, it will go away soon.  */
1789 
1790 		/* first page of AP's private space */
1791 		pg = x * i386_btop(sizeof(struct privatespace));
1792 
1793 		/* allocate a new private data page */
1794 		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
1795 
1796 		/* wire it into the private page table page */
1797 		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
1798 
1799 		/* allocate and set up an idle stack data page */
1800 		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
1801 		for (i = 0; i < UPAGES; i++)
1802 			SMPpt[pg + 5 + i] = (pt_entry_t)
1803 			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1804 
1805 		SMPpt[pg + 1] = 0;		/* *prv_CMAP1 */
1806 		SMPpt[pg + 2] = 0;		/* *prv_CMAP2 */
1807 		SMPpt[pg + 3] = 0;		/* *prv_CMAP3 */
1808 		SMPpt[pg + 4] = 0;		/* *prv_PMAP1 */
1809 
1810 		/* prime data page for it to use */
1811 		gd->gd_cpuid = x;
1812 		gd->gd_cpu_lockid = x << 24;
1813 		gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
1814 		gd->gd_prv_CMAP2 = &SMPpt[pg + 2];
1815 		gd->gd_prv_CMAP3 = &SMPpt[pg + 3];
1816 		gd->gd_prv_PMAP1 = &SMPpt[pg + 4];
1817 		gd->gd_prv_CADDR1 = SMP_prvspace[x].CPAGE1;
1818 		gd->gd_prv_CADDR2 = SMP_prvspace[x].CPAGE2;
1819 		gd->gd_prv_CADDR3 = SMP_prvspace[x].CPAGE3;
1820 		gd->gd_prv_PADDR1 = (unsigned *)SMP_prvspace[x].PPAGE1;
1821 
1822 		/* setup a vector to our boot code */
1823 		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1824 		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1825 #ifndef PC98
1826 		outb(CMOS_REG, BIOS_RESET);
1827 		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
1828 #endif
1829 
1830 		bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
1831 		bootAP = x;
1832 
1833 		/* attempt to start the Application Processor */
1834 		CHECK_INIT(99);	/* setup checkpoints */
1835 		if (!start_ap(x, boot_addr)) {
1836 			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
1837 			CHECK_PRINT("trace");	/* show checkpoints */
1838 			/* better panic as the AP may be running loose */
1839 			printf("panic y/n? [y] ");
1840 			if (cngetc() != 'n')
1841 				panic("bye-bye");
1842 		}
1843 		CHECK_PRINT("trace");		/* show checkpoints */
1844 
1845 		/* record its version info */
1846 		cpu_apic_versions[x] = cpu_apic_versions[0];
1847 
1848 		all_cpus |= (1 << x);		/* record AP in CPU map */
1849 	}
1850 
1851 	/* build our map of 'other' CPUs */
1852 	other_cpus = all_cpus & ~(1 << cpuid);
1853 
1854 	/* fill in our (BSP) APIC version */
1855 	cpu_apic_versions[0] = lapic.version;
1856 
1857 	/* restore the warmstart vector */
1858 	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
1859 #ifndef PC98
1860 	outb(CMOS_REG, BIOS_RESET);
1861 	outb(CMOS_DATA, mpbiosreason);
1862 #endif
1863 
1864 	/*
1865 	 * Set up the idle context for the BSP.  Similar to above except
1866 	 * that some was done by locore, some by pmap.c and some is implicit
1867 	 * because the BSP is cpu#0 and the page is initially zero, and also
1868 	 * because we can refer to variables by name on the BSP..
1869 	 */
1870 
1871 	/* Allocate and setup BSP idle stack */
1872 	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
1873 	for (i = 0; i < UPAGES; i++)
1874 		SMPpt[5 + i] = (pt_entry_t)
1875 		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1876 
1877 	*(int *)PTD = 0;
1878 	pmap_set_opt();
1879 
1880 	/* number of APs actually started */
1881 	return mp_ncpus - 1;
1882 }
1883 
1884 
1885 /*
1886  * load the 1st level AP boot code into base memory.
1887  */
1888 
1889 /* targets for relocation */
1890 extern void bigJump(void);
1891 extern void bootCodeSeg(void);
1892 extern void bootDataSeg(void);
1893 extern void MPentry(void);
1894 extern u_int MP_GDT;
1895 extern u_int mp_gdtbase;
1896 
1897 static void
1898 install_ap_tramp(u_int boot_addr)
1899 {
1900 	int     x;
1901 	int     size = *(int *) ((u_long) & bootMP_size);
1902 	u_char *src = (u_char *) ((u_long) bootMP);
1903 	u_char *dst = (u_char *) boot_addr + KERNBASE;
1904 	u_int   boot_base = (u_int) bootMP;
1905 	u_int8_t *dst8;
1906 	u_int16_t *dst16;
1907 	u_int32_t *dst32;
1908 
1909 	POSTCODE(INSTALL_AP_TRAMP_POST);
1910 
1911 	for (x = 0; x < size; ++x)
1912 		*dst++ = *src++;
1913 
1914 	/*
1915 	 * modify addresses in code we just moved to basemem. unfortunately we
1916 	 * need fairly detailed info about mpboot.s for this to work.  changes
1917 	 * to mpboot.s might require changes here.
1918 	 */
1919 
1920 	/* boot code is located in KERNEL space */
1921 	dst = (u_char *) boot_addr + KERNBASE;
1922 
1923 	/* modify the lgdt arg */
1924 	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
1925 	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
1926 
1927 	/* modify the ljmp target for MPentry() */
1928 	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
1929 	*dst32 = ((u_int) MPentry - KERNBASE);
1930 
1931 	/* modify the target for boot code segment */
1932 	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
1933 	dst8 = (u_int8_t *) (dst16 + 1);
1934 	*dst16 = (u_int) boot_addr & 0xffff;
1935 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
1936 
1937 	/* modify the target for boot data segment */
1938 	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
1939 	dst8 = (u_int8_t *) (dst16 + 1);
1940 	*dst16 = (u_int) boot_addr & 0xffff;
1941 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
1942 }
1943 
1944 
1945 /*
1946  * this function starts the AP (application processor) identified
1947  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
1948  * to accomplish this.  This is necessary because of the nuances
1949  * of the different hardware we might encounter.  It ain't pretty,
1950  * but it seems to work.
1951  */
1952 static int
1953 start_ap(int logical_cpu, u_int boot_addr)
1954 {
1955 	int     physical_cpu;
1956 	int     vector;
1957 	int     cpus;
1958 	u_long  icr_lo, icr_hi;
1959 
1960 	POSTCODE(START_AP_POST);
1961 
1962 	/* get the PHYSICAL APIC ID# */
1963 	physical_cpu = CPU_TO_ID(logical_cpu);
1964 
1965 	/* calculate the vector */
1966 	vector = (boot_addr >> 12) & 0xff;
1967 
1968 	/* used as a watchpoint to signal AP startup */
1969 	cpus = mp_ncpus;
1970 
1971 	/*
1972 	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
1973 	 * and running the target CPU. OR this INIT IPI might be latched (P5
1974 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
1975 	 * ignored.
1976 	 */
1977 
1978 	/* setup the address for the target AP */
1979 	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
1980 	icr_hi |= (physical_cpu << 24);
1981 	lapic.icr_hi = icr_hi;
1982 
1983 	/* do an INIT IPI: assert RESET */
1984 	icr_lo = lapic.icr_lo & 0xfff00000;
1985 	lapic.icr_lo = icr_lo | 0x0000c500;
1986 
1987 	/* wait for pending status end */
1988 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
1989 		 /* spin */ ;
1990 
1991 	/* do an INIT IPI: deassert RESET */
1992 	lapic.icr_lo = icr_lo | 0x00008500;
1993 
1994 	/* wait for pending status end */
1995 	u_sleep(10000);		/* wait ~10mS */
1996 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
1997 		 /* spin */ ;
1998 
1999 	/*
2000 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
2001 	 * latched, (P5 bug) this 1st STARTUP would then terminate
2002 	 * immediately, and the previously started INIT IPI would continue. OR
2003 	 * the previous INIT IPI has already run. and this STARTUP IPI will
2004 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2005 	 * will run.
2006 	 */
2007 
2008 	/* do a STARTUP IPI */
2009 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2010 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2011 		 /* spin */ ;
2012 	u_sleep(200);		/* wait ~200uS */
2013 
2014 	/*
2015 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2016 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2017 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2018 	 * recognized after hardware RESET or INIT IPI.
2019 	 */
2020 
2021 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2022 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2023 		 /* spin */ ;
2024 	u_sleep(200);		/* wait ~200uS */
2025 
2026 	/* wait for it to start */
2027 	set_apic_timer(5000000);/* == 5 seconds */
2028 	while (read_apic_timer())
2029 		if (mp_ncpus > cpus)
2030 			return 1;	/* return SUCCESS */
2031 
2032 	return 0;		/* return FAILURE */
2033 }
2034 
2035 
2036 /*
2037  * Flush the TLB on all other CPU's
2038  *
2039  * XXX: Needs to handshake and wait for completion before proceding.
2040  */
2041 void
2042 smp_invltlb(void)
2043 {
2044 #if defined(APIC_IO)
2045 	if (smp_started && invltlb_ok)
2046 		all_but_self_ipi(XINVLTLB_OFFSET);
2047 #endif  /* APIC_IO */
2048 }
2049 
2050 void
2051 invlpg(u_int addr)
2052 {
2053 	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
2054 
2055 	/* send a message to the other CPUs */
2056 	smp_invltlb();
2057 }
2058 
2059 void
2060 invltlb(void)
2061 {
2062 	u_long  temp;
2063 
2064 	/*
2065 	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
2066 	 * inlined.
2067 	 */
2068 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
2069 
2070 	/* send a message to the other CPUs */
2071 	smp_invltlb();
2072 }
2073 
2074 
2075 /*
2076  * When called the executing CPU will send an IPI to all other CPUs
2077  *  requesting that they halt execution.
2078  *
2079  * Usually (but not necessarily) called with 'other_cpus' as its arg.
2080  *
2081  *  - Signals all CPUs in map to stop.
2082  *  - Waits for each to stop.
2083  *
2084  * Returns:
2085  *  -1: error
2086  *   0: NA
2087  *   1: ok
2088  *
2089  * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2090  *            from executing at same time.
2091  */
2092 int
2093 stop_cpus(u_int map)
2094 {
2095 	if (!smp_started)
2096 		return 0;
2097 
2098 	/* send the Xcpustop IPI to all CPUs in map */
2099 	selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
2100 
2101 	while ((stopped_cpus & map) != map)
2102 		/* spin */ ;
2103 
2104 	return 1;
2105 }
2106 
2107 
2108 /*
2109  * Called by a CPU to restart stopped CPUs.
2110  *
2111  * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2112  *
2113  *  - Signals all CPUs in map to restart.
2114  *  - Waits for each to restart.
2115  *
2116  * Returns:
2117  *  -1: error
2118  *   0: NA
2119  *   1: ok
2120  */
2121 int
2122 restart_cpus(u_int map)
2123 {
2124 	if (!smp_started)
2125 		return 0;
2126 
2127 	started_cpus = map;		/* signal other cpus to restart */
2128 
2129 	while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
2130 		/* spin */ ;
2131 
2132 	return 1;
2133 }
2134 
2135 int smp_active = 0;	/* are the APs allowed to run? */
2136 SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
2137 
2138 /* XXX maybe should be hw.ncpu */
2139 static int smp_cpus = 1;	/* how many cpu's running */
2140 SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
2141 
2142 int invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
2143 SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
2144 
2145 /* Warning: Do not staticize.  Used from swtch.s */
2146 int do_page_zero_idle = 1; /* bzero pages for fun and profit in idleloop */
2147 SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW,
2148 	   &do_page_zero_idle, 0, "");
2149 
2150 /* Is forwarding of a interrupt to the CPU holding the ISR lock enabled ? */
2151 int forward_irq_enabled = 1;
2152 SYSCTL_INT(_machdep, OID_AUTO, forward_irq_enabled, CTLFLAG_RW,
2153 	   &forward_irq_enabled, 0, "");
2154 
2155 /* Enable forwarding of a signal to a process running on a different CPU */
2156 static int forward_signal_enabled = 1;
2157 SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
2158 	   &forward_signal_enabled, 0, "");
2159 
2160 /* Enable forwarding of roundrobin to all other cpus */
2161 static int forward_roundrobin_enabled = 1;
2162 SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
2163 	   &forward_roundrobin_enabled, 0, "");
2164 
2165 /*
2166  * This is called once the rest of the system is up and running and we're
2167  * ready to let the AP's out of the pen.
2168  */
2169 void ap_init(void);
2170 
2171 void
2172 ap_init()
2173 {
2174 	u_int	apic_id;
2175 
2176 	/* BSP may have changed PTD while we're waiting for the lock */
2177 	cpu_invltlb();
2178 
2179 	smp_cpus++;
2180 
2181 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
2182 	lidt(&r_idt);
2183 #endif
2184 
2185 	/* Build our map of 'other' CPUs. */
2186 	other_cpus = all_cpus & ~(1 << cpuid);
2187 
2188 	printf("SMP: AP CPU #%d Launched!\n", cpuid);
2189 
2190 	/* XXX FIXME: i386 specific, and redundant: Setup the FPU. */
2191 	load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS);
2192 
2193 	/* set up FPU state on the AP */
2194 	npxinit(__INITIAL_NPXCW__);
2195 
2196 	/* A quick check from sanity claus */
2197 	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2198 	if (cpuid != apic_id) {
2199 		printf("SMP: cpuid = %d\n", cpuid);
2200 		printf("SMP: apic_id = %d\n", apic_id);
2201 		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2202 		panic("cpuid mismatch! boom!!");
2203 	}
2204 
2205 	/* Init local apic for irq's */
2206 	apic_initialize();
2207 
2208 	/* Set memory range attributes for this CPU to match the BSP */
2209 	mem_range_AP_init();
2210 
2211 	/*
2212 	 * Activate smp_invltlb, although strictly speaking, this isn't
2213 	 * quite correct yet.  We should have a bitfield for cpus willing
2214 	 * to accept TLB flush IPI's or something and sync them.
2215 	 */
2216 	if (smp_cpus == mp_ncpus) {
2217 		invltlb_ok = 1;
2218 		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2219 		smp_active = 1;	 /* historic */
2220 	}
2221 }
2222 
2223 #ifdef BETTER_CLOCK
2224 
2225 #define CHECKSTATE_USER	0
2226 #define CHECKSTATE_SYS	1
2227 #define CHECKSTATE_INTR	2
2228 
2229 /* Do not staticize.  Used from apic_vector.s */
2230 struct proc*	checkstate_curproc[NCPU];
2231 int		checkstate_cpustate[NCPU];
2232 u_long		checkstate_pc[NCPU];
2233 
2234 extern long	cp_time[CPUSTATES];
2235 
2236 #define PC_TO_INDEX(pc, prof)				\
2237         ((int)(((u_quad_t)((pc) - (prof)->pr_off) *	\
2238             (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
2239 
2240 static void
2241 addupc_intr_forwarded(struct proc *p, int id, int *astmap)
2242 {
2243 	int i;
2244 	struct uprof *prof;
2245 	u_long pc;
2246 
2247 	pc = checkstate_pc[id];
2248 	prof = &p->p_stats->p_prof;
2249 	if (pc >= prof->pr_off &&
2250 	    (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) {
2251 		if ((p->p_flag & P_OWEUPC) == 0) {
2252 			prof->pr_addr = pc;
2253 			prof->pr_ticks = 1;
2254 			p->p_flag |= P_OWEUPC;
2255 		}
2256 		*astmap |= (1 << id);
2257 	}
2258 }
2259 
2260 static void
2261 forwarded_statclock(int id, int pscnt, int *astmap)
2262 {
2263 	struct pstats *pstats;
2264 	long rss;
2265 	struct rusage *ru;
2266 	struct vmspace *vm;
2267 	int cpustate;
2268 	struct proc *p;
2269 #ifdef GPROF
2270 	register struct gmonparam *g;
2271 	int i;
2272 #endif
2273 
2274 	p = checkstate_curproc[id];
2275 	cpustate = checkstate_cpustate[id];
2276 
2277 	switch (cpustate) {
2278 	case CHECKSTATE_USER:
2279 		if (p->p_flag & P_PROFIL)
2280 			addupc_intr_forwarded(p, id, astmap);
2281 		if (pscnt > 1)
2282 			return;
2283 		p->p_uticks++;
2284 		if (p->p_nice > NZERO)
2285 			cp_time[CP_NICE]++;
2286 		else
2287 			cp_time[CP_USER]++;
2288 		break;
2289 	case CHECKSTATE_SYS:
2290 #ifdef GPROF
2291 		/*
2292 		 * Kernel statistics are just like addupc_intr, only easier.
2293 		 */
2294 		g = &_gmonparam;
2295 		if (g->state == GMON_PROF_ON) {
2296 			i = checkstate_pc[id] - g->lowpc;
2297 			if (i < g->textsize) {
2298 				i /= HISTFRACTION * sizeof(*g->kcount);
2299 				g->kcount[i]++;
2300 			}
2301 		}
2302 #endif
2303 		if (pscnt > 1)
2304 			return;
2305 
2306 		if (!p)
2307 			cp_time[CP_IDLE]++;
2308 		else {
2309 			p->p_sticks++;
2310 			cp_time[CP_SYS]++;
2311 		}
2312 		break;
2313 	case CHECKSTATE_INTR:
2314 	default:
2315 #ifdef GPROF
2316 		/*
2317 		 * Kernel statistics are just like addupc_intr, only easier.
2318 		 */
2319 		g = &_gmonparam;
2320 		if (g->state == GMON_PROF_ON) {
2321 			i = checkstate_pc[id] - g->lowpc;
2322 			if (i < g->textsize) {
2323 				i /= HISTFRACTION * sizeof(*g->kcount);
2324 				g->kcount[i]++;
2325 			}
2326 		}
2327 #endif
2328 		if (pscnt > 1)
2329 			return;
2330 		if (p)
2331 			p->p_iticks++;
2332 		cp_time[CP_INTR]++;
2333 	}
2334 	if (p != NULL) {
2335 		p->p_cpticks++;
2336 		if (++p->p_estcpu == 0)
2337 			p->p_estcpu--;
2338 		if ((p->p_estcpu & 3) == 0) {
2339 			resetpriority(p);
2340 			if (p->p_priority >= PUSER)
2341 				p->p_priority = p->p_usrpri;
2342 		}
2343 
2344 		/* Update resource usage integrals and maximums. */
2345 		if ((pstats = p->p_stats) != NULL &&
2346 		    (ru = &pstats->p_ru) != NULL &&
2347 		    (vm = p->p_vmspace) != NULL) {
2348 			ru->ru_ixrss += pgtok(vm->vm_tsize);
2349 			ru->ru_idrss += pgtok(vm->vm_dsize);
2350 			ru->ru_isrss += pgtok(vm->vm_ssize);
2351 			rss = pgtok(vmspace_resident_count(vm));
2352 			if (ru->ru_maxrss < rss)
2353 				ru->ru_maxrss = rss;
2354         	}
2355 	}
2356 }
2357 
2358 void
2359 forward_statclock(int pscnt)
2360 {
2361 	int map;
2362 	int id;
2363 	int i;
2364 
2365 	/* Kludge. We don't yet have separate locks for the interrupts
2366 	 * and the kernel. This means that we cannot let the other processors
2367 	 * handle complex interrupts while inhibiting them from entering
2368 	 * the kernel in a non-interrupt context.
2369 	 *
2370 	 * What we can do, without changing the locking mechanisms yet,
2371 	 * is letting the other processors handle a very simple interrupt
2372 	 * (wich determines the processor states), and do the main
2373 	 * work ourself.
2374 	 */
2375 
2376 	if (!smp_started || !invltlb_ok || cold || panicstr)
2377 		return;
2378 
2379 	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle ) */
2380 
2381 	map = other_cpus & ~stopped_cpus ;
2382 	checkstate_probed_cpus = 0;
2383 	if (map != 0)
2384 		selected_apic_ipi(map,
2385 				  XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2386 
2387 	i = 0;
2388 	while (checkstate_probed_cpus != map) {
2389 		/* spin */
2390 		i++;
2391 		if (i == 100000) {
2392 #ifdef BETTER_CLOCK_DIAGNOSTIC
2393 			printf("forward_statclock: checkstate %x\n",
2394 			       checkstate_probed_cpus);
2395 #endif
2396 			break;
2397 		}
2398 	}
2399 
2400 	/*
2401 	 * Step 2: walk through other processors processes, update ticks and
2402 	 * profiling info.
2403 	 */
2404 
2405 	map = 0;
2406 	for (id = 0; id < mp_ncpus; id++) {
2407 		if (id == cpuid)
2408 			continue;
2409 		if (((1 << id) & checkstate_probed_cpus) == 0)
2410 			continue;
2411 		forwarded_statclock(id, pscnt, &map);
2412 	}
2413 	if (map != 0) {
2414 		checkstate_need_ast |= map;
2415 		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2416 		i = 0;
2417 		while ((checkstate_need_ast & map) != 0) {
2418 			/* spin */
2419 			i++;
2420 			if (i > 100000) {
2421 #ifdef BETTER_CLOCK_DIAGNOSTIC
2422 				printf("forward_statclock: dropped ast 0x%x\n",
2423 				       checkstate_need_ast & map);
2424 #endif
2425 				break;
2426 			}
2427 		}
2428 	}
2429 }
2430 
2431 void
2432 forward_hardclock(int pscnt)
2433 {
2434 	int map;
2435 	int id;
2436 	struct proc *p;
2437 	struct pstats *pstats;
2438 	int i;
2439 
2440 	/* Kludge. We don't yet have separate locks for the interrupts
2441 	 * and the kernel. This means that we cannot let the other processors
2442 	 * handle complex interrupts while inhibiting them from entering
2443 	 * the kernel in a non-interrupt context.
2444 	 *
2445 	 * What we can do, without changing the locking mechanisms yet,
2446 	 * is letting the other processors handle a very simple interrupt
2447 	 * (wich determines the processor states), and do the main
2448 	 * work ourself.
2449 	 */
2450 
2451 	if (!smp_started || !invltlb_ok || cold || panicstr)
2452 		return;
2453 
2454 	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle) */
2455 
2456 	map = other_cpus & ~stopped_cpus ;
2457 	checkstate_probed_cpus = 0;
2458 	if (map != 0)
2459 		selected_apic_ipi(map,
2460 				  XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2461 
2462 	i = 0;
2463 	while (checkstate_probed_cpus != map) {
2464 		/* spin */
2465 		i++;
2466 		if (i == 100000) {
2467 #ifdef BETTER_CLOCK_DIAGNOSTIC
2468 			printf("forward_hardclock: checkstate %x\n",
2469 			       checkstate_probed_cpus);
2470 #endif
2471 			break;
2472 		}
2473 	}
2474 
2475 	/*
2476 	 * Step 2: walk through other processors processes, update virtual
2477 	 * timer and profiling timer. If stathz == 0, also update ticks and
2478 	 * profiling info.
2479 	 */
2480 
2481 	map = 0;
2482 	for (id = 0; id < mp_ncpus; id++) {
2483 		if (id == cpuid)
2484 			continue;
2485 		if (((1 << id) & checkstate_probed_cpus) == 0)
2486 			continue;
2487 		p = checkstate_curproc[id];
2488 		if (p) {
2489 			pstats = p->p_stats;
2490 			if (checkstate_cpustate[id] == CHECKSTATE_USER &&
2491 			    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
2492 			    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
2493 				psignal(p, SIGVTALRM);
2494 				map |= (1 << id);
2495 			}
2496 			if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
2497 			    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
2498 				psignal(p, SIGPROF);
2499 				map |= (1 << id);
2500 			}
2501 		}
2502 		if (stathz == 0) {
2503 			forwarded_statclock( id, pscnt, &map);
2504 		}
2505 	}
2506 	if (map != 0) {
2507 		checkstate_need_ast |= map;
2508 		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2509 		i = 0;
2510 		while ((checkstate_need_ast & map) != 0) {
2511 			/* spin */
2512 			i++;
2513 			if (i > 100000) {
2514 #ifdef BETTER_CLOCK_DIAGNOSTIC
2515 				printf("forward_hardclock: dropped ast 0x%x\n",
2516 				       checkstate_need_ast & map);
2517 #endif
2518 				break;
2519 			}
2520 		}
2521 	}
2522 }
2523 
2524 #endif /* BETTER_CLOCK */
2525 
2526 void
2527 forward_signal(struct proc *p)
2528 {
2529 	int map;
2530 	int id;
2531 	int i;
2532 
2533 	/* Kludge. We don't yet have separate locks for the interrupts
2534 	 * and the kernel. This means that we cannot let the other processors
2535 	 * handle complex interrupts while inhibiting them from entering
2536 	 * the kernel in a non-interrupt context.
2537 	 *
2538 	 * What we can do, without changing the locking mechanisms yet,
2539 	 * is letting the other processors handle a very simple interrupt
2540 	 * (wich determines the processor states), and do the main
2541 	 * work ourself.
2542 	 */
2543 
2544 	if (!smp_started || !invltlb_ok || cold || panicstr)
2545 		return;
2546 	if (!forward_signal_enabled)
2547 		return;
2548 	while (1) {
2549 		if (p->p_stat != SRUN)
2550 			return;
2551 		id = p->p_oncpu;
2552 		if (id == 0xff)
2553 			return;
2554 		map = (1<<id);
2555 		checkstate_need_ast |= map;
2556 		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2557 		i = 0;
2558 		while ((checkstate_need_ast & map) != 0) {
2559 			/* spin */
2560 			i++;
2561 			if (i > 100000) {
2562 #if 0
2563 				printf("forward_signal: dropped ast 0x%x\n",
2564 				       checkstate_need_ast & map);
2565 #endif
2566 				break;
2567 			}
2568 		}
2569 		if (id == p->p_oncpu)
2570 			return;
2571 	}
2572 }
2573 
2574 void
2575 forward_roundrobin(void)
2576 {
2577 	u_int map;
2578 	int i;
2579 
2580 	if (!smp_started || !invltlb_ok || cold || panicstr)
2581 		return;
2582 	if (!forward_roundrobin_enabled)
2583 		return;
2584 	resched_cpus |= other_cpus;
2585 	map = other_cpus & ~stopped_cpus ;
2586 #if 1
2587 	selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2588 #else
2589 	(void) all_but_self_ipi(XCPUAST_OFFSET);
2590 #endif
2591 	i = 0;
2592 	while ((checkstate_need_ast & map) != 0) {
2593 		/* spin */
2594 		i++;
2595 		if (i > 100000) {
2596 #if 0
2597 			printf("forward_roundrobin: dropped ast 0x%x\n",
2598 			       checkstate_need_ast & map);
2599 #endif
2600 			break;
2601 		}
2602 	}
2603 }
2604 
2605 
2606 #ifdef APIC_INTR_REORDER
2607 /*
2608  *	Maintain mapping from softintr vector to isr bit in local apic.
2609  */
2610 void
2611 set_lapic_isrloc(int intr, int vector)
2612 {
2613 	if (intr < 0 || intr > 32)
2614 		panic("set_apic_isrloc: bad intr argument: %d",intr);
2615 	if (vector < ICU_OFFSET || vector > 255)
2616 		panic("set_apic_isrloc: bad vector argument: %d",vector);
2617 	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2618 	apic_isrbit_location[intr].bit = (1<<(vector & 31));
2619 }
2620 #endif
2621 
2622 /*
2623  * All-CPU rendezvous.  CPUs are signalled, all execute the setup function
2624  * (if specified), rendezvous, execute the action function (if specified),
2625  * rendezvous again, execute the teardown function (if specified), and then
2626  * resume.
2627  *
2628  * Note that the supplied external functions _must_ be reentrant and aware
2629  * that they are running in parallel and in an unknown lock context.
2630  */
2631 static void (*smp_rv_setup_func)(void *arg);
2632 static void (*smp_rv_action_func)(void *arg);
2633 static void (*smp_rv_teardown_func)(void *arg);
2634 static void *smp_rv_func_arg;
2635 static volatile int smp_rv_waiters[2];
2636 
2637 void
2638 smp_rendezvous_action(void)
2639 {
2640 	/* setup function */
2641 	if (smp_rv_setup_func != NULL)
2642 		smp_rv_setup_func(smp_rv_func_arg);
2643 	/* spin on entry rendezvous */
2644 	atomic_add_int(&smp_rv_waiters[0], 1);
2645 	while (smp_rv_waiters[0] < mp_ncpus)
2646 		;
2647 	/* action function */
2648 	if (smp_rv_action_func != NULL)
2649 		smp_rv_action_func(smp_rv_func_arg);
2650 	/* spin on exit rendezvous */
2651 	atomic_add_int(&smp_rv_waiters[1], 1);
2652 	while (smp_rv_waiters[1] < mp_ncpus)
2653 		;
2654 	/* teardown function */
2655 	if (smp_rv_teardown_func != NULL)
2656 		smp_rv_teardown_func(smp_rv_func_arg);
2657 }
2658 
2659 void
2660 smp_rendezvous(void (* setup_func)(void *),
2661 	       void (* action_func)(void *),
2662 	       void (* teardown_func)(void *),
2663 	       void *arg)
2664 {
2665 	u_int	efl;
2666 
2667 	/* obtain rendezvous lock */
2668 	s_lock(&smp_rv_lock);		/* XXX sleep here? NOWAIT flag? */
2669 
2670 	/* set static function pointers */
2671 	smp_rv_setup_func = setup_func;
2672 	smp_rv_action_func = action_func;
2673 	smp_rv_teardown_func = teardown_func;
2674 	smp_rv_func_arg = arg;
2675 	smp_rv_waiters[0] = 0;
2676 	smp_rv_waiters[1] = 0;
2677 
2678 	/* disable interrupts on this CPU, save interrupt status */
2679 	efl = read_eflags();
2680 	write_eflags(efl & ~PSL_I);
2681 
2682 	/* signal other processors, which will enter the IPI with interrupts off */
2683 	all_but_self_ipi(XRENDEZVOUS_OFFSET);
2684 
2685 	/* call executor function */
2686 	smp_rendezvous_action();
2687 
2688 	/* restore interrupt flag */
2689 	write_eflags(efl);
2690 
2691 	/* release lock */
2692 	s_unlock(&smp_rv_lock);
2693 }
2694