xref: /linux/kernel/smp.c (revision c4ee0af3fa0dc65f690fc908f02b8355f9576ea0)
1 /*
2  * Generic helpers for smp ipi calls
3  *
4  * (C) Jens Axboe <jens.axboe@oracle.com> 2008
5  */
6 #include <linux/rcupdate.h>
7 #include <linux/rculist.h>
8 #include <linux/kernel.h>
9 #include <linux/export.h>
10 #include <linux/percpu.h>
11 #include <linux/init.h>
12 #include <linux/gfp.h>
13 #include <linux/smp.h>
14 #include <linux/cpu.h>
15 
16 #include "smpboot.h"
17 
18 enum {
19 	CSD_FLAG_LOCK		= 0x01,
20 	CSD_FLAG_WAIT		= 0x02,
21 };
22 
23 struct call_function_data {
24 	struct call_single_data	__percpu *csd;
25 	cpumask_var_t		cpumask;
26 	cpumask_var_t		cpumask_ipi;
27 };
28 
29 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
30 
31 struct call_single_queue {
32 	struct list_head	list;
33 	raw_spinlock_t		lock;
34 };
35 
36 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue);
37 
38 static int
39 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
40 {
41 	long cpu = (long)hcpu;
42 	struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
43 
44 	switch (action) {
45 	case CPU_UP_PREPARE:
46 	case CPU_UP_PREPARE_FROZEN:
47 		if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
48 				cpu_to_node(cpu)))
49 			return notifier_from_errno(-ENOMEM);
50 		if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
51 				cpu_to_node(cpu))) {
52 			free_cpumask_var(cfd->cpumask);
53 			return notifier_from_errno(-ENOMEM);
54 		}
55 		cfd->csd = alloc_percpu(struct call_single_data);
56 		if (!cfd->csd) {
57 			free_cpumask_var(cfd->cpumask_ipi);
58 			free_cpumask_var(cfd->cpumask);
59 			return notifier_from_errno(-ENOMEM);
60 		}
61 		break;
62 
63 #ifdef CONFIG_HOTPLUG_CPU
64 	case CPU_UP_CANCELED:
65 	case CPU_UP_CANCELED_FROZEN:
66 
67 	case CPU_DEAD:
68 	case CPU_DEAD_FROZEN:
69 		free_cpumask_var(cfd->cpumask);
70 		free_cpumask_var(cfd->cpumask_ipi);
71 		free_percpu(cfd->csd);
72 		break;
73 #endif
74 	};
75 
76 	return NOTIFY_OK;
77 }
78 
79 static struct notifier_block hotplug_cfd_notifier = {
80 	.notifier_call		= hotplug_cfd,
81 };
82 
83 void __init call_function_init(void)
84 {
85 	void *cpu = (void *)(long)smp_processor_id();
86 	int i;
87 
88 	for_each_possible_cpu(i) {
89 		struct call_single_queue *q = &per_cpu(call_single_queue, i);
90 
91 		raw_spin_lock_init(&q->lock);
92 		INIT_LIST_HEAD(&q->list);
93 	}
94 
95 	hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
96 	register_cpu_notifier(&hotplug_cfd_notifier);
97 }
98 
99 /*
100  * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
101  *
102  * For non-synchronous ipi calls the csd can still be in use by the
103  * previous function call. For multi-cpu calls its even more interesting
104  * as we'll have to ensure no other cpu is observing our csd.
105  */
106 static void csd_lock_wait(struct call_single_data *csd)
107 {
108 	while (csd->flags & CSD_FLAG_LOCK)
109 		cpu_relax();
110 }
111 
112 static void csd_lock(struct call_single_data *csd)
113 {
114 	csd_lock_wait(csd);
115 	csd->flags |= CSD_FLAG_LOCK;
116 
117 	/*
118 	 * prevent CPU from reordering the above assignment
119 	 * to ->flags with any subsequent assignments to other
120 	 * fields of the specified call_single_data structure:
121 	 */
122 	smp_mb();
123 }
124 
125 static void csd_unlock(struct call_single_data *csd)
126 {
127 	WARN_ON((csd->flags & CSD_FLAG_WAIT) && !(csd->flags & CSD_FLAG_LOCK));
128 
129 	/*
130 	 * ensure we're all done before releasing data:
131 	 */
132 	smp_mb();
133 
134 	csd->flags &= ~CSD_FLAG_LOCK;
135 }
136 
137 /*
138  * Insert a previously allocated call_single_data element
139  * for execution on the given CPU. data must already have
140  * ->func, ->info, and ->flags set.
141  */
142 static void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
143 {
144 	struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
145 	unsigned long flags;
146 	int ipi;
147 
148 	if (wait)
149 		csd->flags |= CSD_FLAG_WAIT;
150 
151 	raw_spin_lock_irqsave(&dst->lock, flags);
152 	ipi = list_empty(&dst->list);
153 	list_add_tail(&csd->list, &dst->list);
154 	raw_spin_unlock_irqrestore(&dst->lock, flags);
155 
156 	/*
157 	 * The list addition should be visible before sending the IPI
158 	 * handler locks the list to pull the entry off it because of
159 	 * normal cache coherency rules implied by spinlocks.
160 	 *
161 	 * If IPIs can go out of order to the cache coherency protocol
162 	 * in an architecture, sufficient synchronisation should be added
163 	 * to arch code to make it appear to obey cache coherency WRT
164 	 * locking and barrier primitives. Generic code isn't really
165 	 * equipped to do the right thing...
166 	 */
167 	if (ipi)
168 		arch_send_call_function_single_ipi(cpu);
169 
170 	if (wait)
171 		csd_lock_wait(csd);
172 }
173 
174 /*
175  * Invoked by arch to handle an IPI for call function single. Must be
176  * called from the arch with interrupts disabled.
177  */
178 void generic_smp_call_function_single_interrupt(void)
179 {
180 	struct call_single_queue *q = &__get_cpu_var(call_single_queue);
181 	LIST_HEAD(list);
182 
183 	/*
184 	 * Shouldn't receive this interrupt on a cpu that is not yet online.
185 	 */
186 	WARN_ON_ONCE(!cpu_online(smp_processor_id()));
187 
188 	raw_spin_lock(&q->lock);
189 	list_replace_init(&q->list, &list);
190 	raw_spin_unlock(&q->lock);
191 
192 	while (!list_empty(&list)) {
193 		struct call_single_data *csd;
194 
195 		csd = list_entry(list.next, struct call_single_data, list);
196 		list_del(&csd->list);
197 
198 		csd->func(csd->info);
199 
200 		csd_unlock(csd);
201 	}
202 }
203 
204 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
205 
206 /*
207  * smp_call_function_single - Run a function on a specific CPU
208  * @func: The function to run. This must be fast and non-blocking.
209  * @info: An arbitrary pointer to pass to the function.
210  * @wait: If true, wait until function has completed on other CPUs.
211  *
212  * Returns 0 on success, else a negative status code.
213  */
214 int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
215 			     int wait)
216 {
217 	struct call_single_data d = {
218 		.flags = 0,
219 	};
220 	unsigned long flags;
221 	int this_cpu;
222 	int err = 0;
223 
224 	/*
225 	 * prevent preemption and reschedule on another processor,
226 	 * as well as CPU removal
227 	 */
228 	this_cpu = get_cpu();
229 
230 	/*
231 	 * Can deadlock when called with interrupts disabled.
232 	 * We allow cpu's that are not yet online though, as no one else can
233 	 * send smp call function interrupt to this cpu and as such deadlocks
234 	 * can't happen.
235 	 */
236 	WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
237 		     && !oops_in_progress);
238 
239 	if (cpu == this_cpu) {
240 		local_irq_save(flags);
241 		func(info);
242 		local_irq_restore(flags);
243 	} else {
244 		if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
245 			struct call_single_data *csd = &d;
246 
247 			if (!wait)
248 				csd = &__get_cpu_var(csd_data);
249 
250 			csd_lock(csd);
251 
252 			csd->func = func;
253 			csd->info = info;
254 			generic_exec_single(cpu, csd, wait);
255 		} else {
256 			err = -ENXIO;	/* CPU not online */
257 		}
258 	}
259 
260 	put_cpu();
261 
262 	return err;
263 }
264 EXPORT_SYMBOL(smp_call_function_single);
265 
266 /*
267  * smp_call_function_any - Run a function on any of the given cpus
268  * @mask: The mask of cpus it can run on.
269  * @func: The function to run. This must be fast and non-blocking.
270  * @info: An arbitrary pointer to pass to the function.
271  * @wait: If true, wait until function has completed.
272  *
273  * Returns 0 on success, else a negative status code (if no cpus were online).
274  *
275  * Selection preference:
276  *	1) current cpu if in @mask
277  *	2) any cpu of current node if in @mask
278  *	3) any other online cpu in @mask
279  */
280 int smp_call_function_any(const struct cpumask *mask,
281 			  smp_call_func_t func, void *info, int wait)
282 {
283 	unsigned int cpu;
284 	const struct cpumask *nodemask;
285 	int ret;
286 
287 	/* Try for same CPU (cheapest) */
288 	cpu = get_cpu();
289 	if (cpumask_test_cpu(cpu, mask))
290 		goto call;
291 
292 	/* Try for same node. */
293 	nodemask = cpumask_of_node(cpu_to_node(cpu));
294 	for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
295 	     cpu = cpumask_next_and(cpu, nodemask, mask)) {
296 		if (cpu_online(cpu))
297 			goto call;
298 	}
299 
300 	/* Any online will do: smp_call_function_single handles nr_cpu_ids. */
301 	cpu = cpumask_any_and(mask, cpu_online_mask);
302 call:
303 	ret = smp_call_function_single(cpu, func, info, wait);
304 	put_cpu();
305 	return ret;
306 }
307 EXPORT_SYMBOL_GPL(smp_call_function_any);
308 
309 /**
310  * __smp_call_function_single(): Run a function on a specific CPU
311  * @cpu: The CPU to run on.
312  * @data: Pre-allocated and setup data structure
313  * @wait: If true, wait until function has completed on specified CPU.
314  *
315  * Like smp_call_function_single(), but allow caller to pass in a
316  * pre-allocated data structure. Useful for embedding @data inside
317  * other structures, for instance.
318  */
319 void __smp_call_function_single(int cpu, struct call_single_data *csd,
320 				int wait)
321 {
322 	unsigned int this_cpu;
323 	unsigned long flags;
324 
325 	this_cpu = get_cpu();
326 	/*
327 	 * Can deadlock when called with interrupts disabled.
328 	 * We allow cpu's that are not yet online though, as no one else can
329 	 * send smp call function interrupt to this cpu and as such deadlocks
330 	 * can't happen.
331 	 */
332 	WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled()
333 		     && !oops_in_progress);
334 
335 	if (cpu == this_cpu) {
336 		local_irq_save(flags);
337 		csd->func(csd->info);
338 		local_irq_restore(flags);
339 	} else {
340 		csd_lock(csd);
341 		generic_exec_single(cpu, csd, wait);
342 	}
343 	put_cpu();
344 }
345 EXPORT_SYMBOL_GPL(__smp_call_function_single);
346 
347 /**
348  * smp_call_function_many(): Run a function on a set of other CPUs.
349  * @mask: The set of cpus to run on (only runs on online subset).
350  * @func: The function to run. This must be fast and non-blocking.
351  * @info: An arbitrary pointer to pass to the function.
352  * @wait: If true, wait (atomically) until function has completed
353  *        on other CPUs.
354  *
355  * If @wait is true, then returns once @func has returned.
356  *
357  * You must not call this function with disabled interrupts or from a
358  * hardware interrupt handler or from a bottom half handler. Preemption
359  * must be disabled when calling this function.
360  */
361 void smp_call_function_many(const struct cpumask *mask,
362 			    smp_call_func_t func, void *info, bool wait)
363 {
364 	struct call_function_data *cfd;
365 	int cpu, next_cpu, this_cpu = smp_processor_id();
366 
367 	/*
368 	 * Can deadlock when called with interrupts disabled.
369 	 * We allow cpu's that are not yet online though, as no one else can
370 	 * send smp call function interrupt to this cpu and as such deadlocks
371 	 * can't happen.
372 	 */
373 	WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
374 		     && !oops_in_progress && !early_boot_irqs_disabled);
375 
376 	/* Try to fastpath.  So, what's a CPU they want? Ignoring this one. */
377 	cpu = cpumask_first_and(mask, cpu_online_mask);
378 	if (cpu == this_cpu)
379 		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
380 
381 	/* No online cpus?  We're done. */
382 	if (cpu >= nr_cpu_ids)
383 		return;
384 
385 	/* Do we have another CPU which isn't us? */
386 	next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
387 	if (next_cpu == this_cpu)
388 		next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
389 
390 	/* Fastpath: do that cpu by itself. */
391 	if (next_cpu >= nr_cpu_ids) {
392 		smp_call_function_single(cpu, func, info, wait);
393 		return;
394 	}
395 
396 	cfd = &__get_cpu_var(cfd_data);
397 
398 	cpumask_and(cfd->cpumask, mask, cpu_online_mask);
399 	cpumask_clear_cpu(this_cpu, cfd->cpumask);
400 
401 	/* Some callers race with other cpus changing the passed mask */
402 	if (unlikely(!cpumask_weight(cfd->cpumask)))
403 		return;
404 
405 	/*
406 	 * After we put an entry into the list, cfd->cpumask may be cleared
407 	 * again when another CPU sends another IPI for a SMP function call, so
408 	 * cfd->cpumask will be zero.
409 	 */
410 	cpumask_copy(cfd->cpumask_ipi, cfd->cpumask);
411 
412 	for_each_cpu(cpu, cfd->cpumask) {
413 		struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
414 		struct call_single_queue *dst =
415 					&per_cpu(call_single_queue, cpu);
416 		unsigned long flags;
417 
418 		csd_lock(csd);
419 		csd->func = func;
420 		csd->info = info;
421 
422 		raw_spin_lock_irqsave(&dst->lock, flags);
423 		list_add_tail(&csd->list, &dst->list);
424 		raw_spin_unlock_irqrestore(&dst->lock, flags);
425 	}
426 
427 	/* Send a message to all CPUs in the map */
428 	arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
429 
430 	if (wait) {
431 		for_each_cpu(cpu, cfd->cpumask) {
432 			struct call_single_data *csd;
433 
434 			csd = per_cpu_ptr(cfd->csd, cpu);
435 			csd_lock_wait(csd);
436 		}
437 	}
438 }
439 EXPORT_SYMBOL(smp_call_function_many);
440 
441 /**
442  * smp_call_function(): Run a function on all other CPUs.
443  * @func: The function to run. This must be fast and non-blocking.
444  * @info: An arbitrary pointer to pass to the function.
445  * @wait: If true, wait (atomically) until function has completed
446  *        on other CPUs.
447  *
448  * Returns 0.
449  *
450  * If @wait is true, then returns once @func has returned; otherwise
451  * it returns just before the target cpu calls @func.
452  *
453  * You must not call this function with disabled interrupts or from a
454  * hardware interrupt handler or from a bottom half handler.
455  */
456 int smp_call_function(smp_call_func_t func, void *info, int wait)
457 {
458 	preempt_disable();
459 	smp_call_function_many(cpu_online_mask, func, info, wait);
460 	preempt_enable();
461 
462 	return 0;
463 }
464 EXPORT_SYMBOL(smp_call_function);
465 
466 /* Setup configured maximum number of CPUs to activate */
467 unsigned int setup_max_cpus = NR_CPUS;
468 EXPORT_SYMBOL(setup_max_cpus);
469 
470 
471 /*
472  * Setup routine for controlling SMP activation
473  *
474  * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
475  * activation entirely (the MPS table probe still happens, though).
476  *
477  * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
478  * greater than 0, limits the maximum number of CPUs activated in
479  * SMP mode to <NUM>.
480  */
481 
482 void __weak arch_disable_smp_support(void) { }
483 
484 static int __init nosmp(char *str)
485 {
486 	setup_max_cpus = 0;
487 	arch_disable_smp_support();
488 
489 	return 0;
490 }
491 
492 early_param("nosmp", nosmp);
493 
494 /* this is hard limit */
495 static int __init nrcpus(char *str)
496 {
497 	int nr_cpus;
498 
499 	get_option(&str, &nr_cpus);
500 	if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
501 		nr_cpu_ids = nr_cpus;
502 
503 	return 0;
504 }
505 
506 early_param("nr_cpus", nrcpus);
507 
508 static int __init maxcpus(char *str)
509 {
510 	get_option(&str, &setup_max_cpus);
511 	if (setup_max_cpus == 0)
512 		arch_disable_smp_support();
513 
514 	return 0;
515 }
516 
517 early_param("maxcpus", maxcpus);
518 
519 /* Setup number of possible processor ids */
520 int nr_cpu_ids __read_mostly = NR_CPUS;
521 EXPORT_SYMBOL(nr_cpu_ids);
522 
523 /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
524 void __init setup_nr_cpu_ids(void)
525 {
526 	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
527 }
528 
529 void __weak smp_announce(void)
530 {
531 	printk(KERN_INFO "Brought up %d CPUs\n", num_online_cpus());
532 }
533 
534 /* Called by boot processor to activate the rest. */
535 void __init smp_init(void)
536 {
537 	unsigned int cpu;
538 
539 	idle_threads_init();
540 
541 	/* FIXME: This should be done in userspace --RR */
542 	for_each_present_cpu(cpu) {
543 		if (num_online_cpus() >= setup_max_cpus)
544 			break;
545 		if (!cpu_online(cpu))
546 			cpu_up(cpu);
547 	}
548 
549 	/* Any cleanup work */
550 	smp_announce();
551 	smp_cpus_done(setup_max_cpus);
552 }
553 
554 /*
555  * Call a function on all processors.  May be used during early boot while
556  * early_boot_irqs_disabled is set.  Use local_irq_save/restore() instead
557  * of local_irq_disable/enable().
558  */
559 int on_each_cpu(void (*func) (void *info), void *info, int wait)
560 {
561 	unsigned long flags;
562 	int ret = 0;
563 
564 	preempt_disable();
565 	ret = smp_call_function(func, info, wait);
566 	local_irq_save(flags);
567 	func(info);
568 	local_irq_restore(flags);
569 	preempt_enable();
570 	return ret;
571 }
572 EXPORT_SYMBOL(on_each_cpu);
573 
574 /**
575  * on_each_cpu_mask(): Run a function on processors specified by
576  * cpumask, which may include the local processor.
577  * @mask: The set of cpus to run on (only runs on online subset).
578  * @func: The function to run. This must be fast and non-blocking.
579  * @info: An arbitrary pointer to pass to the function.
580  * @wait: If true, wait (atomically) until function has completed
581  *        on other CPUs.
582  *
583  * If @wait is true, then returns once @func has returned.
584  *
585  * You must not call this function with disabled interrupts or from a
586  * hardware interrupt handler or from a bottom half handler.  The
587  * exception is that it may be used during early boot while
588  * early_boot_irqs_disabled is set.
589  */
590 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
591 			void *info, bool wait)
592 {
593 	int cpu = get_cpu();
594 
595 	smp_call_function_many(mask, func, info, wait);
596 	if (cpumask_test_cpu(cpu, mask)) {
597 		unsigned long flags;
598 		local_irq_save(flags);
599 		func(info);
600 		local_irq_restore(flags);
601 	}
602 	put_cpu();
603 }
604 EXPORT_SYMBOL(on_each_cpu_mask);
605 
606 /*
607  * on_each_cpu_cond(): Call a function on each processor for which
608  * the supplied function cond_func returns true, optionally waiting
609  * for all the required CPUs to finish. This may include the local
610  * processor.
611  * @cond_func:	A callback function that is passed a cpu id and
612  *		the the info parameter. The function is called
613  *		with preemption disabled. The function should
614  *		return a blooean value indicating whether to IPI
615  *		the specified CPU.
616  * @func:	The function to run on all applicable CPUs.
617  *		This must be fast and non-blocking.
618  * @info:	An arbitrary pointer to pass to both functions.
619  * @wait:	If true, wait (atomically) until function has
620  *		completed on other CPUs.
621  * @gfp_flags:	GFP flags to use when allocating the cpumask
622  *		used internally by the function.
623  *
624  * The function might sleep if the GFP flags indicates a non
625  * atomic allocation is allowed.
626  *
627  * Preemption is disabled to protect against CPUs going offline but not online.
628  * CPUs going online during the call will not be seen or sent an IPI.
629  *
630  * You must not call this function with disabled interrupts or
631  * from a hardware interrupt handler or from a bottom half handler.
632  */
633 void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
634 			smp_call_func_t func, void *info, bool wait,
635 			gfp_t gfp_flags)
636 {
637 	cpumask_var_t cpus;
638 	int cpu, ret;
639 
640 	might_sleep_if(gfp_flags & __GFP_WAIT);
641 
642 	if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
643 		preempt_disable();
644 		for_each_online_cpu(cpu)
645 			if (cond_func(cpu, info))
646 				cpumask_set_cpu(cpu, cpus);
647 		on_each_cpu_mask(cpus, func, info, wait);
648 		preempt_enable();
649 		free_cpumask_var(cpus);
650 	} else {
651 		/*
652 		 * No free cpumask, bother. No matter, we'll
653 		 * just have to IPI them one by one.
654 		 */
655 		preempt_disable();
656 		for_each_online_cpu(cpu)
657 			if (cond_func(cpu, info)) {
658 				ret = smp_call_function_single(cpu, func,
659 								info, wait);
660 				WARN_ON_ONCE(!ret);
661 			}
662 		preempt_enable();
663 	}
664 }
665 EXPORT_SYMBOL(on_each_cpu_cond);
666 
667 static void do_nothing(void *unused)
668 {
669 }
670 
671 /**
672  * kick_all_cpus_sync - Force all cpus out of idle
673  *
674  * Used to synchronize the update of pm_idle function pointer. It's
675  * called after the pointer is updated and returns after the dummy
676  * callback function has been executed on all cpus. The execution of
677  * the function can only happen on the remote cpus after they have
678  * left the idle function which had been called via pm_idle function
679  * pointer. So it's guaranteed that nothing uses the previous pointer
680  * anymore.
681  */
682 void kick_all_cpus_sync(void)
683 {
684 	/* Make sure the change is visible before we kick the cpus */
685 	smp_mb();
686 	smp_call_function(do_nothing, NULL, 1);
687 }
688 EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
689