xref: /linux/arch/x86/kernel/smpboot.c (revision f7511d5f66f01fc451747b24e79f3ada7a3af9af)
1 /*
2  *	x86 SMP booting functions
3  *
4  *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5  *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6  *	Copyright 2001 Andi Kleen, SuSE Labs.
7  *
8  *	Much of the core SMP work is based on previous work by Thomas Radke, to
9  *	whom a great many thanks are extended.
10  *
11  *	Thanks to Intel for making available several different Pentium,
12  *	Pentium Pro and Pentium-II/Xeon MP machines.
13  *	Original development of Linux SMP code supported by Caldera.
14  *
15  *	This code is released under the GNU General Public License version 2 or
16  *	later.
17  *
18  *	Fixes
19  *		Felix Koop	:	NR_CPUS used properly
20  *		Jose Renau	:	Handle single CPU case.
21  *		Alan Cox	:	By repeated request 8) - Total BogoMIPS report.
22  *		Greg Wright	:	Fix for kernel stacks panic.
23  *		Erich Boleyn	:	MP v1.4 and additional changes.
24  *	Matthias Sattler	:	Changes for 2.1 kernel map.
25  *	Michel Lespinasse	:	Changes for 2.1 kernel map.
26  *	Michael Chastain	:	Change trampoline.S to gnu as.
27  *		Alan Cox	:	Dumb bug: 'B' step PPro's are fine
28  *		Ingo Molnar	:	Added APIC timers, based on code
29  *					from Jose Renau
30  *		Ingo Molnar	:	various cleanups and rewrites
31  *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
32  *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
33  *	Andi Kleen		:	Changed for SMP boot into long mode.
34  *		Martin J. Bligh	: 	Added support for multi-quad systems
35  *		Dave Jones	:	Report invalid combinations of Athlon CPUs.
36  *		Rusty Russell	:	Hacked into shape for new "hotplug" boot process.
37  *      Andi Kleen              :       Converted to new state machine.
38  *	Ashok Raj		: 	CPU hotplug support
39  *	Glauber Costa		:	i386 and x86_64 integration
40  */
41 
42 #include <linux/init.h>
43 #include <linux/smp.h>
44 #include <linux/module.h>
45 #include <linux/sched.h>
46 #include <linux/percpu.h>
47 #include <linux/bootmem.h>
48 #include <linux/err.h>
49 #include <linux/nmi.h>
50 
51 #include <asm/acpi.h>
52 #include <asm/desc.h>
53 #include <asm/nmi.h>
54 #include <asm/irq.h>
55 #include <asm/smp.h>
56 #include <asm/trampoline.h>
57 #include <asm/cpu.h>
58 #include <asm/numa.h>
59 #include <asm/pgtable.h>
60 #include <asm/tlbflush.h>
61 #include <asm/mtrr.h>
62 #include <asm/nmi.h>
63 #include <asm/vmi.h>
64 #include <asm/genapic.h>
65 #include <linux/mc146818rtc.h>
66 
67 #include <mach_apic.h>
68 #include <mach_wakecpu.h>
69 #include <smpboot_hooks.h>
70 
71 /*
72  * FIXME: For x86_64, those are defined in other files. But moving them here,
73  * would make the setup areas dependent on smp, which is a loss. When we
74  * integrate apic between arches, we can probably do a better job, but
75  * right now, they'll stay here -- glommer
76  */
77 
78 /* which logical CPU number maps to which CPU (physical APIC ID) */
79 u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
80 			{ [0 ... NR_CPUS-1] = BAD_APICID };
81 void *x86_cpu_to_apicid_early_ptr;
82 
83 u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
84 				= { [0 ... NR_CPUS-1] = BAD_APICID };
85 void *x86_bios_cpu_apicid_early_ptr;
86 
87 #ifdef CONFIG_X86_32
88 u8 apicid_2_node[MAX_APICID];
89 #endif
90 
91 /* State of each CPU */
92 DEFINE_PER_CPU(int, cpu_state) = { 0 };
93 
94 /* Store all idle threads, this can be reused instead of creating
95 * a new thread. Also avoids complicated thread destroy functionality
96 * for idle threads.
97 */
98 #ifdef CONFIG_HOTPLUG_CPU
99 /*
100  * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
101  * removed after init for !CONFIG_HOTPLUG_CPU.
102  */
103 static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
104 #define get_idle_for_cpu(x)      (per_cpu(idle_thread_array, x))
105 #define set_idle_for_cpu(x, p)   (per_cpu(idle_thread_array, x) = (p))
106 #else
107 struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
108 #define get_idle_for_cpu(x)      (idle_thread_array[(x)])
109 #define set_idle_for_cpu(x, p)   (idle_thread_array[(x)] = (p))
110 #endif
111 
112 /* Number of siblings per CPU package */
113 int smp_num_siblings = 1;
114 EXPORT_SYMBOL(smp_num_siblings);
115 
116 /* Last level cache ID of each logical CPU */
117 DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
118 
119 /* bitmap of online cpus */
120 cpumask_t cpu_online_map __read_mostly;
121 EXPORT_SYMBOL(cpu_online_map);
122 
123 cpumask_t cpu_callin_map;
124 cpumask_t cpu_callout_map;
125 cpumask_t cpu_possible_map;
126 EXPORT_SYMBOL(cpu_possible_map);
127 
128 /* representing HT siblings of each logical CPU */
129 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
130 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
131 
132 /* representing HT and core siblings of each logical CPU */
133 DEFINE_PER_CPU(cpumask_t, cpu_core_map);
134 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
135 
136 /* Per CPU bogomips and other parameters */
137 DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
138 EXPORT_PER_CPU_SYMBOL(cpu_info);
139 
140 static atomic_t init_deasserted;
141 
142 static int boot_cpu_logical_apicid;
143 
144 /* representing cpus for which sibling maps can be computed */
145 static cpumask_t cpu_sibling_setup_map;
146 
147 /* Set if we find a B stepping CPU */
148 int __cpuinitdata smp_b_stepping;
149 
150 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
151 
152 /* which logical CPUs are on which nodes */
153 cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
154 				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
155 EXPORT_SYMBOL(node_to_cpumask_map);
156 /* which node each logical CPU is on */
157 int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
158 EXPORT_SYMBOL(cpu_to_node_map);
159 
160 /* set up a mapping between cpu and node. */
161 static void map_cpu_to_node(int cpu, int node)
162 {
163 	printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
164 	cpu_set(cpu, node_to_cpumask_map[node]);
165 	cpu_to_node_map[cpu] = node;
166 }
167 
168 /* undo a mapping between cpu and node. */
169 static void unmap_cpu_to_node(int cpu)
170 {
171 	int node;
172 
173 	printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
174 	for (node = 0; node < MAX_NUMNODES; node++)
175 		cpu_clear(cpu, node_to_cpumask_map[node]);
176 	cpu_to_node_map[cpu] = 0;
177 }
178 #else /* !(CONFIG_NUMA && CONFIG_X86_32) */
179 #define map_cpu_to_node(cpu, node)	({})
180 #define unmap_cpu_to_node(cpu)	({})
181 #endif
182 
183 #ifdef CONFIG_X86_32
184 u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
185 					{ [0 ... NR_CPUS-1] = BAD_APICID };
186 
187 static void map_cpu_to_logical_apicid(void)
188 {
189 	int cpu = smp_processor_id();
190 	int apicid = logical_smp_processor_id();
191 	int node = apicid_to_node(apicid);
192 
193 	if (!node_online(node))
194 		node = first_online_node;
195 
196 	cpu_2_logical_apicid[cpu] = apicid;
197 	map_cpu_to_node(cpu, node);
198 }
199 
200 static void unmap_cpu_to_logical_apicid(int cpu)
201 {
202 	cpu_2_logical_apicid[cpu] = BAD_APICID;
203 	unmap_cpu_to_node(cpu);
204 }
205 #else
206 #define unmap_cpu_to_logical_apicid(cpu) do {} while (0)
207 #define map_cpu_to_logical_apicid()  do {} while (0)
208 #endif
209 
210 /*
211  * Report back to the Boot Processor.
212  * Running on AP.
213  */
214 static void __cpuinit smp_callin(void)
215 {
216 	int cpuid, phys_id;
217 	unsigned long timeout;
218 
219 	/*
220 	 * If waken up by an INIT in an 82489DX configuration
221 	 * we may get here before an INIT-deassert IPI reaches
222 	 * our local APIC.  We have to wait for the IPI or we'll
223 	 * lock up on an APIC access.
224 	 */
225 	wait_for_init_deassert(&init_deasserted);
226 
227 	/*
228 	 * (This works even if the APIC is not enabled.)
229 	 */
230 	phys_id = GET_APIC_ID(read_apic_id());
231 	cpuid = smp_processor_id();
232 	if (cpu_isset(cpuid, cpu_callin_map)) {
233 		panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
234 					phys_id, cpuid);
235 	}
236 	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
237 
238 	/*
239 	 * STARTUP IPIs are fragile beasts as they might sometimes
240 	 * trigger some glue motherboard logic. Complete APIC bus
241 	 * silence for 1 second, this overestimates the time the
242 	 * boot CPU is spending to send the up to 2 STARTUP IPIs
243 	 * by a factor of two. This should be enough.
244 	 */
245 
246 	/*
247 	 * Waiting 2s total for startup (udelay is not yet working)
248 	 */
249 	timeout = jiffies + 2*HZ;
250 	while (time_before(jiffies, timeout)) {
251 		/*
252 		 * Has the boot CPU finished it's STARTUP sequence?
253 		 */
254 		if (cpu_isset(cpuid, cpu_callout_map))
255 			break;
256 		cpu_relax();
257 	}
258 
259 	if (!time_before(jiffies, timeout)) {
260 		panic("%s: CPU%d started up but did not get a callout!\n",
261 		      __func__, cpuid);
262 	}
263 
264 	/*
265 	 * the boot CPU has finished the init stage and is spinning
266 	 * on callin_map until we finish. We are free to set up this
267 	 * CPU, first the APIC. (this is probably redundant on most
268 	 * boards)
269 	 */
270 
271 	Dprintk("CALLIN, before setup_local_APIC().\n");
272 	smp_callin_clear_local_apic();
273 	setup_local_APIC();
274 	end_local_APIC_setup();
275 	map_cpu_to_logical_apicid();
276 
277 	/*
278 	 * Get our bogomips.
279 	 *
280 	 * Need to enable IRQs because it can take longer and then
281 	 * the NMI watchdog might kill us.
282 	 */
283 	local_irq_enable();
284 	calibrate_delay();
285 	local_irq_disable();
286 	Dprintk("Stack at about %p\n", &cpuid);
287 
288 	/*
289 	 * Save our processor parameters
290 	 */
291 	smp_store_cpu_info(cpuid);
292 
293 	/*
294 	 * Allow the master to continue.
295 	 */
296 	cpu_set(cpuid, cpu_callin_map);
297 }
298 
299 /*
300  * Activate a secondary processor.
301  */
302 void __cpuinit start_secondary(void *unused)
303 {
304 	/*
305 	 * Don't put *anything* before cpu_init(), SMP booting is too
306 	 * fragile that we want to limit the things done here to the
307 	 * most necessary things.
308 	 */
309 #ifdef CONFIG_VMI
310 	vmi_bringup();
311 #endif
312 	cpu_init();
313 	preempt_disable();
314 	smp_callin();
315 
316 	/* otherwise gcc will move up smp_processor_id before the cpu_init */
317 	barrier();
318 	/*
319 	 * Check TSC synchronization with the BP:
320 	 */
321 	check_tsc_sync_target();
322 
323 	if (nmi_watchdog == NMI_IO_APIC) {
324 		disable_8259A_irq(0);
325 		enable_NMI_through_LVT0();
326 		enable_8259A_irq(0);
327 	}
328 
329 	/* This must be done before setting cpu_online_map */
330 	set_cpu_sibling_map(raw_smp_processor_id());
331 	wmb();
332 
333 	/*
334 	 * We need to hold call_lock, so there is no inconsistency
335 	 * between the time smp_call_function() determines number of
336 	 * IPI recipients, and the time when the determination is made
337 	 * for which cpus receive the IPI. Holding this
338 	 * lock helps us to not include this cpu in a currently in progress
339 	 * smp_call_function().
340 	 */
341 	lock_ipi_call_lock();
342 #ifdef CONFIG_X86_64
343 	spin_lock(&vector_lock);
344 
345 	/* Setup the per cpu irq handling data structures */
346 	__setup_vector_irq(smp_processor_id());
347 	/*
348 	 * Allow the master to continue.
349 	 */
350 	spin_unlock(&vector_lock);
351 #endif
352 	cpu_set(smp_processor_id(), cpu_online_map);
353 	unlock_ipi_call_lock();
354 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
355 
356 	setup_secondary_clock();
357 
358 	wmb();
359 	cpu_idle();
360 }
361 
362 #ifdef CONFIG_X86_32
363 /*
364  * Everything has been set up for the secondary
365  * CPUs - they just need to reload everything
366  * from the task structure
367  * This function must not return.
368  */
369 void __devinit initialize_secondary(void)
370 {
371 	/*
372 	 * We don't actually need to load the full TSS,
373 	 * basically just the stack pointer and the ip.
374 	 */
375 
376 	asm volatile(
377 		"movl %0,%%esp\n\t"
378 		"jmp *%1"
379 		:
380 		:"m" (current->thread.sp), "m" (current->thread.ip));
381 }
382 #endif
383 
384 static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
385 {
386 #ifdef CONFIG_X86_32
387 	/*
388 	 * Mask B, Pentium, but not Pentium MMX
389 	 */
390 	if (c->x86_vendor == X86_VENDOR_INTEL &&
391 	    c->x86 == 5 &&
392 	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
393 	    c->x86_model <= 3)
394 		/*
395 		 * Remember we have B step Pentia with bugs
396 		 */
397 		smp_b_stepping = 1;
398 
399 	/*
400 	 * Certain Athlons might work (for various values of 'work') in SMP
401 	 * but they are not certified as MP capable.
402 	 */
403 	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
404 
405 		if (num_possible_cpus() == 1)
406 			goto valid_k7;
407 
408 		/* Athlon 660/661 is valid. */
409 		if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
410 		    (c->x86_mask == 1)))
411 			goto valid_k7;
412 
413 		/* Duron 670 is valid */
414 		if ((c->x86_model == 7) && (c->x86_mask == 0))
415 			goto valid_k7;
416 
417 		/*
418 		 * Athlon 662, Duron 671, and Athlon >model 7 have capability
419 		 * bit. It's worth noting that the A5 stepping (662) of some
420 		 * Athlon XP's have the MP bit set.
421 		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
422 		 * more.
423 		 */
424 		if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
425 		    ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
426 		     (c->x86_model > 7))
427 			if (cpu_has_mp)
428 				goto valid_k7;
429 
430 		/* If we get here, not a certified SMP capable AMD system. */
431 		add_taint(TAINT_UNSAFE_SMP);
432 	}
433 
434 valid_k7:
435 	;
436 #endif
437 }
438 
439 static void __cpuinit smp_checks(void)
440 {
441 	if (smp_b_stepping)
442 		printk(KERN_WARNING "WARNING: SMP operation may be unreliable"
443 				    "with B stepping processors.\n");
444 
445 	/*
446 	 * Don't taint if we are running SMP kernel on a single non-MP
447 	 * approved Athlon
448 	 */
449 	if (tainted & TAINT_UNSAFE_SMP) {
450 		if (num_online_cpus())
451 			printk(KERN_INFO "WARNING: This combination of AMD"
452 				"processors is not suitable for SMP.\n");
453 		else
454 			tainted &= ~TAINT_UNSAFE_SMP;
455 	}
456 }
457 
458 /*
459  * The bootstrap kernel entry code has set these up. Save them for
460  * a given CPU
461  */
462 
463 void __cpuinit smp_store_cpu_info(int id)
464 {
465 	struct cpuinfo_x86 *c = &cpu_data(id);
466 
467 	*c = boot_cpu_data;
468 	c->cpu_index = id;
469 	if (id != 0)
470 		identify_secondary_cpu(c);
471 	smp_apply_quirks(c);
472 }
473 
474 
475 void __cpuinit set_cpu_sibling_map(int cpu)
476 {
477 	int i;
478 	struct cpuinfo_x86 *c = &cpu_data(cpu);
479 
480 	cpu_set(cpu, cpu_sibling_setup_map);
481 
482 	if (smp_num_siblings > 1) {
483 		for_each_cpu_mask(i, cpu_sibling_setup_map) {
484 			if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
485 			    c->cpu_core_id == cpu_data(i).cpu_core_id) {
486 				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
487 				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
488 				cpu_set(i, per_cpu(cpu_core_map, cpu));
489 				cpu_set(cpu, per_cpu(cpu_core_map, i));
490 				cpu_set(i, c->llc_shared_map);
491 				cpu_set(cpu, cpu_data(i).llc_shared_map);
492 			}
493 		}
494 	} else {
495 		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
496 	}
497 
498 	cpu_set(cpu, c->llc_shared_map);
499 
500 	if (current_cpu_data.x86_max_cores == 1) {
501 		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
502 		c->booted_cores = 1;
503 		return;
504 	}
505 
506 	for_each_cpu_mask(i, cpu_sibling_setup_map) {
507 		if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
508 		    per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
509 			cpu_set(i, c->llc_shared_map);
510 			cpu_set(cpu, cpu_data(i).llc_shared_map);
511 		}
512 		if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
513 			cpu_set(i, per_cpu(cpu_core_map, cpu));
514 			cpu_set(cpu, per_cpu(cpu_core_map, i));
515 			/*
516 			 *  Does this new cpu bringup a new core?
517 			 */
518 			if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
519 				/*
520 				 * for each core in package, increment
521 				 * the booted_cores for this new cpu
522 				 */
523 				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
524 					c->booted_cores++;
525 				/*
526 				 * increment the core count for all
527 				 * the other cpus in this package
528 				 */
529 				if (i != cpu)
530 					cpu_data(i).booted_cores++;
531 			} else if (i != cpu && !c->booted_cores)
532 				c->booted_cores = cpu_data(i).booted_cores;
533 		}
534 	}
535 }
536 
537 /* maps the cpu to the sched domain representing multi-core */
538 cpumask_t cpu_coregroup_map(int cpu)
539 {
540 	struct cpuinfo_x86 *c = &cpu_data(cpu);
541 	/*
542 	 * For perf, we return last level cache shared map.
543 	 * And for power savings, we return cpu_core_map
544 	 */
545 	if (sched_mc_power_savings || sched_smt_power_savings)
546 		return per_cpu(cpu_core_map, cpu);
547 	else
548 		return c->llc_shared_map;
549 }
550 
551 #ifdef CONFIG_X86_32
552 /*
553  * We are called very early to get the low memory for the
554  * SMP bootup trampoline page.
555  */
556 void __init smp_alloc_memory(void)
557 {
558 	trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
559 	/*
560 	 * Has to be in very low memory so we can execute
561 	 * real-mode AP code.
562 	 */
563 	if (__pa(trampoline_base) >= 0x9F000)
564 		BUG();
565 }
566 #endif
567 
568 static void impress_friends(void)
569 {
570 	int cpu;
571 	unsigned long bogosum = 0;
572 	/*
573 	 * Allow the user to impress friends.
574 	 */
575 	Dprintk("Before bogomips.\n");
576 	for_each_possible_cpu(cpu)
577 		if (cpu_isset(cpu, cpu_callout_map))
578 			bogosum += cpu_data(cpu).loops_per_jiffy;
579 	printk(KERN_INFO
580 		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
581 		num_online_cpus(),
582 		bogosum/(500000/HZ),
583 		(bogosum/(5000/HZ))%100);
584 
585 	Dprintk("Before bogocount - setting activated=1.\n");
586 }
587 
588 static inline void __inquire_remote_apic(int apicid)
589 {
590 	unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
591 	char *names[] = { "ID", "VERSION", "SPIV" };
592 	int timeout;
593 	u32 status;
594 
595 	printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
596 
597 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
598 		printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
599 
600 		/*
601 		 * Wait for idle.
602 		 */
603 		status = safe_apic_wait_icr_idle();
604 		if (status)
605 			printk(KERN_CONT
606 			       "a previous APIC delivery may have failed\n");
607 
608 		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
609 		apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
610 
611 		timeout = 0;
612 		do {
613 			udelay(100);
614 			status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
615 		} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
616 
617 		switch (status) {
618 		case APIC_ICR_RR_VALID:
619 			status = apic_read(APIC_RRR);
620 			printk(KERN_CONT "%08x\n", status);
621 			break;
622 		default:
623 			printk(KERN_CONT "failed\n");
624 		}
625 	}
626 }
627 
628 #ifdef WAKE_SECONDARY_VIA_NMI
629 /*
630  * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
631  * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
632  * won't ... remember to clear down the APIC, etc later.
633  */
634 static int __devinit
635 wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
636 {
637 	unsigned long send_status, accept_status = 0;
638 	int maxlvt;
639 
640 	/* Target chip */
641 	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
642 
643 	/* Boot on the stack */
644 	/* Kick the second */
645 	apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
646 
647 	Dprintk("Waiting for send to finish...\n");
648 	send_status = safe_apic_wait_icr_idle();
649 
650 	/*
651 	 * Give the other CPU some time to accept the IPI.
652 	 */
653 	udelay(200);
654 	/*
655 	 * Due to the Pentium erratum 3AP.
656 	 */
657 	maxlvt = lapic_get_maxlvt();
658 	if (maxlvt > 3) {
659 		apic_read_around(APIC_SPIV);
660 		apic_write(APIC_ESR, 0);
661 	}
662 	accept_status = (apic_read(APIC_ESR) & 0xEF);
663 	Dprintk("NMI sent.\n");
664 
665 	if (send_status)
666 		printk(KERN_ERR "APIC never delivered???\n");
667 	if (accept_status)
668 		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
669 
670 	return (send_status | accept_status);
671 }
672 #endif	/* WAKE_SECONDARY_VIA_NMI */
673 
674 #ifdef WAKE_SECONDARY_VIA_INIT
675 static int __devinit
676 wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
677 {
678 	unsigned long send_status, accept_status = 0;
679 	int maxlvt, num_starts, j;
680 
681 	if (get_uv_system_type() == UV_NON_UNIQUE_APIC) {
682 		send_status = uv_wakeup_secondary(phys_apicid, start_eip);
683 		atomic_set(&init_deasserted, 1);
684 		return send_status;
685 	}
686 
687 	/*
688 	 * Be paranoid about clearing APIC errors.
689 	 */
690 	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
691 		apic_read_around(APIC_SPIV);
692 		apic_write(APIC_ESR, 0);
693 		apic_read(APIC_ESR);
694 	}
695 
696 	Dprintk("Asserting INIT.\n");
697 
698 	/*
699 	 * Turn INIT on target chip
700 	 */
701 	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
702 
703 	/*
704 	 * Send IPI
705 	 */
706 	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
707 				| APIC_DM_INIT);
708 
709 	Dprintk("Waiting for send to finish...\n");
710 	send_status = safe_apic_wait_icr_idle();
711 
712 	mdelay(10);
713 
714 	Dprintk("Deasserting INIT.\n");
715 
716 	/* Target chip */
717 	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
718 
719 	/* Send IPI */
720 	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
721 
722 	Dprintk("Waiting for send to finish...\n");
723 	send_status = safe_apic_wait_icr_idle();
724 
725 	mb();
726 	atomic_set(&init_deasserted, 1);
727 
728 	/*
729 	 * Should we send STARTUP IPIs ?
730 	 *
731 	 * Determine this based on the APIC version.
732 	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
733 	 */
734 	if (APIC_INTEGRATED(apic_version[phys_apicid]))
735 		num_starts = 2;
736 	else
737 		num_starts = 0;
738 
739 	/*
740 	 * Paravirt / VMI wants a startup IPI hook here to set up the
741 	 * target processor state.
742 	 */
743 	startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
744 #ifdef CONFIG_X86_64
745 			 (unsigned long)init_rsp);
746 #else
747 			 (unsigned long)stack_start.sp);
748 #endif
749 
750 	/*
751 	 * Run STARTUP IPI loop.
752 	 */
753 	Dprintk("#startup loops: %d.\n", num_starts);
754 
755 	maxlvt = lapic_get_maxlvt();
756 
757 	for (j = 1; j <= num_starts; j++) {
758 		Dprintk("Sending STARTUP #%d.\n", j);
759 		apic_read_around(APIC_SPIV);
760 		apic_write(APIC_ESR, 0);
761 		apic_read(APIC_ESR);
762 		Dprintk("After apic_write.\n");
763 
764 		/*
765 		 * STARTUP IPI
766 		 */
767 
768 		/* Target chip */
769 		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
770 
771 		/* Boot on the stack */
772 		/* Kick the second */
773 		apic_write_around(APIC_ICR, APIC_DM_STARTUP
774 					| (start_eip >> 12));
775 
776 		/*
777 		 * Give the other CPU some time to accept the IPI.
778 		 */
779 		udelay(300);
780 
781 		Dprintk("Startup point 1.\n");
782 
783 		Dprintk("Waiting for send to finish...\n");
784 		send_status = safe_apic_wait_icr_idle();
785 
786 		/*
787 		 * Give the other CPU some time to accept the IPI.
788 		 */
789 		udelay(200);
790 		/*
791 		 * Due to the Pentium erratum 3AP.
792 		 */
793 		if (maxlvt > 3) {
794 			apic_read_around(APIC_SPIV);
795 			apic_write(APIC_ESR, 0);
796 		}
797 		accept_status = (apic_read(APIC_ESR) & 0xEF);
798 		if (send_status || accept_status)
799 			break;
800 	}
801 	Dprintk("After Startup.\n");
802 
803 	if (send_status)
804 		printk(KERN_ERR "APIC never delivered???\n");
805 	if (accept_status)
806 		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
807 
808 	return (send_status | accept_status);
809 }
810 #endif	/* WAKE_SECONDARY_VIA_INIT */
811 
812 struct create_idle {
813 	struct work_struct work;
814 	struct task_struct *idle;
815 	struct completion done;
816 	int cpu;
817 };
818 
819 static void __cpuinit do_fork_idle(struct work_struct *work)
820 {
821 	struct create_idle *c_idle =
822 		container_of(work, struct create_idle, work);
823 
824 	c_idle->idle = fork_idle(c_idle->cpu);
825 	complete(&c_idle->done);
826 }
827 
828 static int __cpuinit do_boot_cpu(int apicid, int cpu)
829 /*
830  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
831  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
832  * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
833  */
834 {
835 	unsigned long boot_error = 0;
836 	int timeout;
837 	unsigned long start_ip;
838 	unsigned short nmi_high = 0, nmi_low = 0;
839 	struct create_idle c_idle = {
840 		.cpu = cpu,
841 		.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
842 	};
843 	INIT_WORK(&c_idle.work, do_fork_idle);
844 #ifdef CONFIG_X86_64
845 	/* allocate memory for gdts of secondary cpus. Hotplug is considered */
846 	if (!cpu_gdt_descr[cpu].address &&
847 		!(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
848 		printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
849 		return -1;
850 	}
851 
852 	/* Allocate node local memory for AP pdas */
853 	if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
854 		struct x8664_pda *newpda, *pda;
855 		int node = cpu_to_node(cpu);
856 		pda = cpu_pda(cpu);
857 		newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC,
858 				      node);
859 		if (newpda) {
860 			memcpy(newpda, pda, sizeof(struct x8664_pda));
861 			cpu_pda(cpu) = newpda;
862 		} else
863 			printk(KERN_ERR
864 		"Could not allocate node local PDA for CPU %d on node %d\n",
865 				cpu, node);
866 	}
867 #endif
868 
869 	alternatives_smp_switch(1);
870 
871 	c_idle.idle = get_idle_for_cpu(cpu);
872 
873 	/*
874 	 * We can't use kernel_thread since we must avoid to
875 	 * reschedule the child.
876 	 */
877 	if (c_idle.idle) {
878 		c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
879 			(THREAD_SIZE +  task_stack_page(c_idle.idle))) - 1);
880 		init_idle(c_idle.idle, cpu);
881 		goto do_rest;
882 	}
883 
884 	if (!keventd_up() || current_is_keventd())
885 		c_idle.work.func(&c_idle.work);
886 	else {
887 		schedule_work(&c_idle.work);
888 		wait_for_completion(&c_idle.done);
889 	}
890 
891 	if (IS_ERR(c_idle.idle)) {
892 		printk("failed fork for CPU %d\n", cpu);
893 		return PTR_ERR(c_idle.idle);
894 	}
895 
896 	set_idle_for_cpu(cpu, c_idle.idle);
897 do_rest:
898 #ifdef CONFIG_X86_32
899 	per_cpu(current_task, cpu) = c_idle.idle;
900 	init_gdt(cpu);
901 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
902 	c_idle.idle->thread.ip = (unsigned long) start_secondary;
903 	/* Stack for startup_32 can be just as for start_secondary onwards */
904 	stack_start.sp = (void *) c_idle.idle->thread.sp;
905 	irq_ctx_init(cpu);
906 #else
907 	cpu_pda(cpu)->pcurrent = c_idle.idle;
908 	init_rsp = c_idle.idle->thread.sp;
909 	load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
910 	initial_code = (unsigned long)start_secondary;
911 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
912 #endif
913 
914 	/* start_ip had better be page-aligned! */
915 	start_ip = setup_trampoline();
916 
917 	/* So we see what's up   */
918 	printk(KERN_INFO "Booting processor %d/%d ip %lx\n",
919 			  cpu, apicid, start_ip);
920 
921 	/*
922 	 * This grunge runs the startup process for
923 	 * the targeted processor.
924 	 */
925 
926 	atomic_set(&init_deasserted, 0);
927 
928 	if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
929 
930 		Dprintk("Setting warm reset code and vector.\n");
931 
932 		store_NMI_vector(&nmi_high, &nmi_low);
933 
934 		smpboot_setup_warm_reset_vector(start_ip);
935 		/*
936 		 * Be paranoid about clearing APIC errors.
937 	 	*/
938 		apic_write(APIC_ESR, 0);
939 		apic_read(APIC_ESR);
940 	}
941 
942 	/*
943 	 * Starting actual IPI sequence...
944 	 */
945 	boot_error = wakeup_secondary_cpu(apicid, start_ip);
946 
947 	if (!boot_error) {
948 		/*
949 		 * allow APs to start initializing.
950 		 */
951 		Dprintk("Before Callout %d.\n", cpu);
952 		cpu_set(cpu, cpu_callout_map);
953 		Dprintk("After Callout %d.\n", cpu);
954 
955 		/*
956 		 * Wait 5s total for a response
957 		 */
958 		for (timeout = 0; timeout < 50000; timeout++) {
959 			if (cpu_isset(cpu, cpu_callin_map))
960 				break;	/* It has booted */
961 			udelay(100);
962 		}
963 
964 		if (cpu_isset(cpu, cpu_callin_map)) {
965 			/* number CPUs logically, starting from 1 (BSP is 0) */
966 			Dprintk("OK.\n");
967 			printk(KERN_INFO "CPU%d: ", cpu);
968 			print_cpu_info(&cpu_data(cpu));
969 			Dprintk("CPU has booted.\n");
970 		} else {
971 			boot_error = 1;
972 			if (*((volatile unsigned char *)trampoline_base)
973 					== 0xA5)
974 				/* trampoline started but...? */
975 				printk(KERN_ERR "Stuck ??\n");
976 			else
977 				/* trampoline code not run */
978 				printk(KERN_ERR "Not responding.\n");
979 			if (get_uv_system_type() != UV_NON_UNIQUE_APIC)
980 				inquire_remote_apic(apicid);
981 		}
982 	}
983 
984 	if (boot_error) {
985 		/* Try to put things back the way they were before ... */
986 		unmap_cpu_to_logical_apicid(cpu);
987 #ifdef CONFIG_X86_64
988 		clear_node_cpumask(cpu); /* was set by numa_add_cpu */
989 #endif
990 		cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
991 		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
992 		cpu_clear(cpu, cpu_possible_map);
993 		cpu_clear(cpu, cpu_present_map);
994 		per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
995 	}
996 
997 	/* mark "stuck" area as not stuck */
998 	*((volatile unsigned long *)trampoline_base) = 0;
999 
1000 	/*
1001 	 * Cleanup possible dangling ends...
1002 	 */
1003 	smpboot_restore_warm_reset_vector();
1004 
1005 	return boot_error;
1006 }
1007 
1008 int __cpuinit native_cpu_up(unsigned int cpu)
1009 {
1010 	int apicid = cpu_present_to_apicid(cpu);
1011 	unsigned long flags;
1012 	int err;
1013 
1014 	WARN_ON(irqs_disabled());
1015 
1016 	Dprintk("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
1017 
1018 	if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
1019 	    !physid_isset(apicid, phys_cpu_present_map)) {
1020 		printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
1021 		return -EINVAL;
1022 	}
1023 
1024 	/*
1025 	 * Already booted CPU?
1026 	 */
1027 	if (cpu_isset(cpu, cpu_callin_map)) {
1028 		Dprintk("do_boot_cpu %d Already started\n", cpu);
1029 		return -ENOSYS;
1030 	}
1031 
1032 	/*
1033 	 * Save current MTRR state in case it was changed since early boot
1034 	 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
1035 	 */
1036 	mtrr_save_state();
1037 
1038 	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1039 
1040 #ifdef CONFIG_X86_32
1041 	/* init low mem mapping */
1042 	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
1043 			min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
1044 	flush_tlb_all();
1045 #endif
1046 
1047 	err = do_boot_cpu(apicid, cpu);
1048 	if (err < 0) {
1049 		Dprintk("do_boot_cpu failed %d\n", err);
1050 		return err;
1051 	}
1052 
1053 	/*
1054 	 * Check TSC synchronization with the AP (keep irqs disabled
1055 	 * while doing so):
1056 	 */
1057 	local_irq_save(flags);
1058 	check_tsc_sync_source(cpu);
1059 	local_irq_restore(flags);
1060 
1061 	while (!cpu_online(cpu)) {
1062 		cpu_relax();
1063 		touch_nmi_watchdog();
1064 	}
1065 
1066 	return 0;
1067 }
1068 
1069 /*
1070  * Fall back to non SMP mode after errors.
1071  *
1072  * RED-PEN audit/test this more. I bet there is more state messed up here.
1073  */
1074 static __init void disable_smp(void)
1075 {
1076 	cpu_present_map = cpumask_of_cpu(0);
1077 	cpu_possible_map = cpumask_of_cpu(0);
1078 #ifdef CONFIG_X86_32
1079 	smpboot_clear_io_apic_irqs();
1080 #endif
1081 	if (smp_found_config)
1082 		phys_cpu_present_map =
1083 				physid_mask_of_physid(boot_cpu_physical_apicid);
1084 	else
1085 		phys_cpu_present_map = physid_mask_of_physid(0);
1086 	map_cpu_to_logical_apicid();
1087 	cpu_set(0, per_cpu(cpu_sibling_map, 0));
1088 	cpu_set(0, per_cpu(cpu_core_map, 0));
1089 }
1090 
1091 /*
1092  * Various sanity checks.
1093  */
1094 static int __init smp_sanity_check(unsigned max_cpus)
1095 {
1096 	preempt_disable();
1097 	if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
1098 		printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
1099 				    "by the BIOS.\n", hard_smp_processor_id());
1100 		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1101 	}
1102 
1103 	/*
1104 	 * If we couldn't find an SMP configuration at boot time,
1105 	 * get out of here now!
1106 	 */
1107 	if (!smp_found_config && !acpi_lapic) {
1108 		preempt_enable();
1109 		printk(KERN_NOTICE "SMP motherboard not detected.\n");
1110 		disable_smp();
1111 		if (APIC_init_uniprocessor())
1112 			printk(KERN_NOTICE "Local APIC not detected."
1113 					   " Using dummy APIC emulation.\n");
1114 		return -1;
1115 	}
1116 
1117 	/*
1118 	 * Should not be necessary because the MP table should list the boot
1119 	 * CPU too, but we do it for the sake of robustness anyway.
1120 	 */
1121 	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
1122 		printk(KERN_NOTICE
1123 			"weird, boot CPU (#%d) not listed by the BIOS.\n",
1124 			boot_cpu_physical_apicid);
1125 		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1126 	}
1127 	preempt_enable();
1128 
1129 	/*
1130 	 * If we couldn't find a local APIC, then get out of here now!
1131 	 */
1132 	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
1133 	    !cpu_has_apic) {
1134 		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1135 			boot_cpu_physical_apicid);
1136 		printk(KERN_ERR "... forcing use of dummy APIC emulation."
1137 				"(tell your hw vendor)\n");
1138 		smpboot_clear_io_apic();
1139 		return -1;
1140 	}
1141 
1142 	verify_local_APIC();
1143 
1144 	/*
1145 	 * If SMP should be disabled, then really disable it!
1146 	 */
1147 	if (!max_cpus) {
1148 		printk(KERN_INFO "SMP mode deactivated,"
1149 				 "forcing use of dummy APIC emulation.\n");
1150 		smpboot_clear_io_apic();
1151 #ifdef CONFIG_X86_32
1152 		connect_bsp_APIC();
1153 #endif
1154 		setup_local_APIC();
1155 		end_local_APIC_setup();
1156 		return -1;
1157 	}
1158 
1159 	return 0;
1160 }
1161 
1162 static void __init smp_cpu_index_default(void)
1163 {
1164 	int i;
1165 	struct cpuinfo_x86 *c;
1166 
1167 	for_each_possible_cpu(i) {
1168 		c = &cpu_data(i);
1169 		/* mark all to hotplug */
1170 		c->cpu_index = NR_CPUS;
1171 	}
1172 }
1173 
1174 /*
1175  * Prepare for SMP bootup.  The MP table or ACPI has been read
1176  * earlier.  Just do some sanity checking here and enable APIC mode.
1177  */
1178 void __init native_smp_prepare_cpus(unsigned int max_cpus)
1179 {
1180 	nmi_watchdog_default();
1181 	smp_cpu_index_default();
1182 	current_cpu_data = boot_cpu_data;
1183 	cpu_callin_map = cpumask_of_cpu(0);
1184 	mb();
1185 	/*
1186 	 * Setup boot CPU information
1187 	 */
1188 	smp_store_cpu_info(0); /* Final full version of the data */
1189 	boot_cpu_logical_apicid = logical_smp_processor_id();
1190 	current_thread_info()->cpu = 0;  /* needed? */
1191 	set_cpu_sibling_map(0);
1192 
1193 	if (smp_sanity_check(max_cpus) < 0) {
1194 		printk(KERN_INFO "SMP disabled\n");
1195 		disable_smp();
1196 		return;
1197 	}
1198 
1199 	preempt_disable();
1200 	if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
1201 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1202 		     GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
1203 		/* Or can we switch back to PIC here? */
1204 	}
1205 	preempt_enable();
1206 
1207 #ifdef CONFIG_X86_32
1208 	connect_bsp_APIC();
1209 #endif
1210 	/*
1211 	 * Switch from PIC to APIC mode.
1212 	 */
1213 	setup_local_APIC();
1214 
1215 #ifdef CONFIG_X86_64
1216 	/*
1217 	 * Enable IO APIC before setting up error vector
1218 	 */
1219 	if (!skip_ioapic_setup && nr_ioapics)
1220 		enable_IO_APIC();
1221 #endif
1222 	end_local_APIC_setup();
1223 
1224 	map_cpu_to_logical_apicid();
1225 
1226 	setup_portio_remap();
1227 
1228 	smpboot_setup_io_apic();
1229 	/*
1230 	 * Set up local APIC timer on boot CPU.
1231 	 */
1232 
1233 	printk(KERN_INFO "CPU%d: ", 0);
1234 	print_cpu_info(&cpu_data(0));
1235 	setup_boot_clock();
1236 }
1237 /*
1238  * Early setup to make printk work.
1239  */
1240 void __init native_smp_prepare_boot_cpu(void)
1241 {
1242 	int me = smp_processor_id();
1243 #ifdef CONFIG_X86_32
1244 	init_gdt(me);
1245 	switch_to_new_gdt();
1246 #endif
1247 	/* already set me in cpu_online_map in boot_cpu_init() */
1248 	cpu_set(me, cpu_callout_map);
1249 	per_cpu(cpu_state, me) = CPU_ONLINE;
1250 }
1251 
1252 void __init native_smp_cpus_done(unsigned int max_cpus)
1253 {
1254 	Dprintk("Boot done.\n");
1255 
1256 	impress_friends();
1257 	smp_checks();
1258 #ifdef CONFIG_X86_IO_APIC
1259 	setup_ioapic_dest();
1260 #endif
1261 	check_nmi_watchdog();
1262 #ifdef CONFIG_X86_32
1263 	zap_low_mappings();
1264 #endif
1265 }
1266 
1267 #ifdef CONFIG_HOTPLUG_CPU
1268 
1269 #  ifdef CONFIG_X86_32
1270 void cpu_exit_clear(void)
1271 {
1272 	int cpu = raw_smp_processor_id();
1273 
1274 	idle_task_exit();
1275 
1276 	cpu_uninit();
1277 	irq_ctx_exit(cpu);
1278 
1279 	cpu_clear(cpu, cpu_callout_map);
1280 	cpu_clear(cpu, cpu_callin_map);
1281 
1282 	unmap_cpu_to_logical_apicid(cpu);
1283 }
1284 #  endif /* CONFIG_X86_32 */
1285 
1286 static void remove_siblinginfo(int cpu)
1287 {
1288 	int sibling;
1289 	struct cpuinfo_x86 *c = &cpu_data(cpu);
1290 
1291 	for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
1292 		cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
1293 		/*/
1294 		 * last thread sibling in this cpu core going down
1295 		 */
1296 		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
1297 			cpu_data(sibling).booted_cores--;
1298 	}
1299 
1300 	for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
1301 		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
1302 	cpus_clear(per_cpu(cpu_sibling_map, cpu));
1303 	cpus_clear(per_cpu(cpu_core_map, cpu));
1304 	c->phys_proc_id = 0;
1305 	c->cpu_core_id = 0;
1306 	cpu_clear(cpu, cpu_sibling_setup_map);
1307 }
1308 
1309 int additional_cpus __initdata = -1;
1310 
1311 static __init int setup_additional_cpus(char *s)
1312 {
1313 	return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL;
1314 }
1315 early_param("additional_cpus", setup_additional_cpus);
1316 
1317 /*
1318  * cpu_possible_map should be static, it cannot change as cpu's
1319  * are onlined, or offlined. The reason is per-cpu data-structures
1320  * are allocated by some modules at init time, and dont expect to
1321  * do this dynamically on cpu arrival/departure.
1322  * cpu_present_map on the other hand can change dynamically.
1323  * In case when cpu_hotplug is not compiled, then we resort to current
1324  * behaviour, which is cpu_possible == cpu_present.
1325  * - Ashok Raj
1326  *
1327  * Three ways to find out the number of additional hotplug CPUs:
1328  * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
1329  * - The user can overwrite it with additional_cpus=NUM
1330  * - Otherwise don't reserve additional CPUs.
1331  * We do this because additional CPUs waste a lot of memory.
1332  * -AK
1333  */
1334 __init void prefill_possible_map(void)
1335 {
1336 	int i;
1337 	int possible;
1338 
1339 	if (additional_cpus == -1) {
1340 		if (disabled_cpus > 0)
1341 			additional_cpus = disabled_cpus;
1342 		else
1343 			additional_cpus = 0;
1344 	}
1345 	possible = num_processors + additional_cpus;
1346 	if (possible > NR_CPUS)
1347 		possible = NR_CPUS;
1348 
1349 	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
1350 		possible, max_t(int, possible - num_processors, 0));
1351 
1352 	for (i = 0; i < possible; i++)
1353 		cpu_set(i, cpu_possible_map);
1354 }
1355 
1356 static void __ref remove_cpu_from_maps(int cpu)
1357 {
1358 	cpu_clear(cpu, cpu_online_map);
1359 #ifdef CONFIG_X86_64
1360 	cpu_clear(cpu, cpu_callout_map);
1361 	cpu_clear(cpu, cpu_callin_map);
1362 	/* was set by cpu_init() */
1363 	clear_bit(cpu, (unsigned long *)&cpu_initialized);
1364 	clear_node_cpumask(cpu);
1365 #endif
1366 }
1367 
1368 int __cpu_disable(void)
1369 {
1370 	int cpu = smp_processor_id();
1371 
1372 	/*
1373 	 * Perhaps use cpufreq to drop frequency, but that could go
1374 	 * into generic code.
1375 	 *
1376 	 * We won't take down the boot processor on i386 due to some
1377 	 * interrupts only being able to be serviced by the BSP.
1378 	 * Especially so if we're not using an IOAPIC	-zwane
1379 	 */
1380 	if (cpu == 0)
1381 		return -EBUSY;
1382 
1383 	if (nmi_watchdog == NMI_LOCAL_APIC)
1384 		stop_apic_nmi_watchdog(NULL);
1385 	clear_local_APIC();
1386 
1387 	/*
1388 	 * HACK:
1389 	 * Allow any queued timer interrupts to get serviced
1390 	 * This is only a temporary solution until we cleanup
1391 	 * fixup_irqs as we do for IA64.
1392 	 */
1393 	local_irq_enable();
1394 	mdelay(1);
1395 
1396 	local_irq_disable();
1397 	remove_siblinginfo(cpu);
1398 
1399 	/* It's now safe to remove this processor from the online map */
1400 	remove_cpu_from_maps(cpu);
1401 	fixup_irqs(cpu_online_map);
1402 	return 0;
1403 }
1404 
1405 void __cpu_die(unsigned int cpu)
1406 {
1407 	/* We don't do anything here: idle task is faking death itself. */
1408 	unsigned int i;
1409 
1410 	for (i = 0; i < 10; i++) {
1411 		/* They ack this in play_dead by setting CPU_DEAD */
1412 		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1413 			printk(KERN_INFO "CPU %d is now offline\n", cpu);
1414 			if (1 == num_online_cpus())
1415 				alternatives_smp_switch(0);
1416 			return;
1417 		}
1418 		msleep(100);
1419 	}
1420 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1421 }
1422 #else /* ... !CONFIG_HOTPLUG_CPU */
1423 int __cpu_disable(void)
1424 {
1425 	return -ENOSYS;
1426 }
1427 
1428 void __cpu_die(unsigned int cpu)
1429 {
1430 	/* We said "no" in __cpu_disable */
1431 	BUG();
1432 }
1433 #endif
1434 
1435 /*
1436  * If the BIOS enumerates physical processors before logical,
1437  * maxcpus=N at enumeration-time can be used to disable HT.
1438  */
1439 static int __init parse_maxcpus(char *arg)
1440 {
1441 	extern unsigned int maxcpus;
1442 
1443 	maxcpus = simple_strtoul(arg, NULL, 0);
1444 	return 0;
1445 }
1446 early_param("maxcpus", parse_maxcpus);
1447