xref: /linux/kernel/stop_machine.c (revision 860a0ffaa3e1a9cf0ebb5f43d6a2a2ce67463e93)
11142d810STejun Heo /*
21142d810STejun Heo  * kernel/stop_machine.c
31142d810STejun Heo  *
41142d810STejun Heo  * Copyright (C) 2008, 2005	IBM Corporation.
51142d810STejun Heo  * Copyright (C) 2008, 2005	Rusty Russell rusty@rustcorp.com.au
61142d810STejun Heo  * Copyright (C) 2010		SUSE Linux Products GmbH
71142d810STejun Heo  * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
81142d810STejun Heo  *
91142d810STejun Heo  * This file is released under the GPLv2 and any later version.
10e5582ca2SRusty Russell  */
111142d810STejun Heo #include <linux/completion.h>
121da177e4SLinus Torvalds #include <linux/cpu.h>
131142d810STejun Heo #include <linux/init.h>
14ee527cd3SPrarit Bhargava #include <linux/kthread.h>
159984de1aSPaul Gortmaker #include <linux/export.h>
161142d810STejun Heo #include <linux/percpu.h>
17ee527cd3SPrarit Bhargava #include <linux/sched.h>
18ee527cd3SPrarit Bhargava #include <linux/stop_machine.h>
19a12bb444SBenjamin Herrenschmidt #include <linux/interrupt.h>
201142d810STejun Heo #include <linux/kallsyms.h>
21a12bb444SBenjamin Herrenschmidt 
2260063497SArun Sharma #include <linux/atomic.h>
231142d810STejun Heo 
241142d810STejun Heo /*
251142d810STejun Heo  * Structure to determine completion condition and record errors.  May
261142d810STejun Heo  * be shared by works on different cpus.
271142d810STejun Heo  */
281142d810STejun Heo struct cpu_stop_done {
291142d810STejun Heo 	atomic_t		nr_todo;	/* nr left to execute */
301142d810STejun Heo 	bool			executed;	/* actually executed? */
311142d810STejun Heo 	int			ret;		/* collected return value */
321142d810STejun Heo 	struct completion	completion;	/* fired if nr_todo reaches 0 */
331142d810STejun Heo };
341142d810STejun Heo 
351142d810STejun Heo /* the actual stopper, one per every possible cpu, enabled on online cpus */
361142d810STejun Heo struct cpu_stopper {
371142d810STejun Heo 	spinlock_t		lock;
38878ae127SRichard Kennedy 	bool			enabled;	/* is this stopper enabled? */
391142d810STejun Heo 	struct list_head	works;		/* list of pending works */
401142d810STejun Heo };
411142d810STejun Heo 
421142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
43*860a0ffaSThomas Gleixner static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task);
44f445027eSJeremy Fitzhardinge static bool stop_machine_initialized = false;
451142d810STejun Heo 
461142d810STejun Heo static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
471142d810STejun Heo {
481142d810STejun Heo 	memset(done, 0, sizeof(*done));
491142d810STejun Heo 	atomic_set(&done->nr_todo, nr_todo);
501142d810STejun Heo 	init_completion(&done->completion);
511142d810STejun Heo }
521142d810STejun Heo 
531142d810STejun Heo /* signal completion unless @done is NULL */
541142d810STejun Heo static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
551142d810STejun Heo {
561142d810STejun Heo 	if (done) {
571142d810STejun Heo 		if (executed)
581142d810STejun Heo 			done->executed = true;
591142d810STejun Heo 		if (atomic_dec_and_test(&done->nr_todo))
601142d810STejun Heo 			complete(&done->completion);
611142d810STejun Heo 	}
621142d810STejun Heo }
631142d810STejun Heo 
641142d810STejun Heo /* queue @work to @stopper.  if offline, @work is completed immediately */
65*860a0ffaSThomas Gleixner static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
661142d810STejun Heo {
67*860a0ffaSThomas Gleixner 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
68*860a0ffaSThomas Gleixner 	struct task_struct *p = per_cpu(cpu_stopper_task, cpu);
69*860a0ffaSThomas Gleixner 
701142d810STejun Heo 	unsigned long flags;
711142d810STejun Heo 
721142d810STejun Heo 	spin_lock_irqsave(&stopper->lock, flags);
731142d810STejun Heo 
741142d810STejun Heo 	if (stopper->enabled) {
751142d810STejun Heo 		list_add_tail(&work->list, &stopper->works);
76*860a0ffaSThomas Gleixner 		wake_up_process(p);
771142d810STejun Heo 	} else
781142d810STejun Heo 		cpu_stop_signal_done(work->done, false);
791142d810STejun Heo 
801142d810STejun Heo 	spin_unlock_irqrestore(&stopper->lock, flags);
811142d810STejun Heo }
821142d810STejun Heo 
831142d810STejun Heo /**
841142d810STejun Heo  * stop_one_cpu - stop a cpu
851142d810STejun Heo  * @cpu: cpu to stop
861142d810STejun Heo  * @fn: function to execute
871142d810STejun Heo  * @arg: argument to @fn
881142d810STejun Heo  *
891142d810STejun Heo  * Execute @fn(@arg) on @cpu.  @fn is run in a process context with
901142d810STejun Heo  * the highest priority preempting any task on the cpu and
911142d810STejun Heo  * monopolizing it.  This function returns after the execution is
921142d810STejun Heo  * complete.
931142d810STejun Heo  *
941142d810STejun Heo  * This function doesn't guarantee @cpu stays online till @fn
951142d810STejun Heo  * completes.  If @cpu goes down in the middle, execution may happen
961142d810STejun Heo  * partially or fully on different cpus.  @fn should either be ready
971142d810STejun Heo  * for that or the caller should ensure that @cpu stays online until
981142d810STejun Heo  * this function completes.
991142d810STejun Heo  *
1001142d810STejun Heo  * CONTEXT:
1011142d810STejun Heo  * Might sleep.
1021142d810STejun Heo  *
1031142d810STejun Heo  * RETURNS:
1041142d810STejun Heo  * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
1051142d810STejun Heo  * otherwise, the return value of @fn.
1061142d810STejun Heo  */
1071142d810STejun Heo int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
1081142d810STejun Heo {
1091142d810STejun Heo 	struct cpu_stop_done done;
1101142d810STejun Heo 	struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
1111142d810STejun Heo 
1121142d810STejun Heo 	cpu_stop_init_done(&done, 1);
113*860a0ffaSThomas Gleixner 	cpu_stop_queue_work(cpu, &work);
1141142d810STejun Heo 	wait_for_completion(&done.completion);
1151142d810STejun Heo 	return done.executed ? done.ret : -ENOENT;
1161142d810STejun Heo }
1171142d810STejun Heo 
1181142d810STejun Heo /**
1191142d810STejun Heo  * stop_one_cpu_nowait - stop a cpu but don't wait for completion
1201142d810STejun Heo  * @cpu: cpu to stop
1211142d810STejun Heo  * @fn: function to execute
1221142d810STejun Heo  * @arg: argument to @fn
1231142d810STejun Heo  *
1241142d810STejun Heo  * Similar to stop_one_cpu() but doesn't wait for completion.  The
1251142d810STejun Heo  * caller is responsible for ensuring @work_buf is currently unused
1261142d810STejun Heo  * and will remain untouched until stopper starts executing @fn.
1271142d810STejun Heo  *
1281142d810STejun Heo  * CONTEXT:
1291142d810STejun Heo  * Don't care.
1301142d810STejun Heo  */
1311142d810STejun Heo void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
1321142d810STejun Heo 			struct cpu_stop_work *work_buf)
1331142d810STejun Heo {
1341142d810STejun Heo 	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
135*860a0ffaSThomas Gleixner 	cpu_stop_queue_work(cpu, work_buf);
1361142d810STejun Heo }
1371142d810STejun Heo 
1381142d810STejun Heo /* static data for stop_cpus */
139192d8857SSuresh Siddha static DEFINE_MUTEX(stop_cpus_mutex);
1401142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
1411142d810STejun Heo 
142fd7355baSTejun Heo static void queue_stop_cpus_work(const struct cpumask *cpumask,
143fd7355baSTejun Heo 				 cpu_stop_fn_t fn, void *arg,
144fd7355baSTejun Heo 				 struct cpu_stop_done *done)
1451142d810STejun Heo {
1461142d810STejun Heo 	struct cpu_stop_work *work;
1471142d810STejun Heo 	unsigned int cpu;
1481142d810STejun Heo 
1491142d810STejun Heo 	/* initialize works and done */
1501142d810STejun Heo 	for_each_cpu(cpu, cpumask) {
1511142d810STejun Heo 		work = &per_cpu(stop_cpus_work, cpu);
1521142d810STejun Heo 		work->fn = fn;
1531142d810STejun Heo 		work->arg = arg;
154fd7355baSTejun Heo 		work->done = done;
1551142d810STejun Heo 	}
1561142d810STejun Heo 
1571142d810STejun Heo 	/*
1581142d810STejun Heo 	 * Disable preemption while queueing to avoid getting
1591142d810STejun Heo 	 * preempted by a stopper which might wait for other stoppers
1601142d810STejun Heo 	 * to enter @fn which can lead to deadlock.
1611142d810STejun Heo 	 */
1621142d810STejun Heo 	preempt_disable();
1631142d810STejun Heo 	for_each_cpu(cpu, cpumask)
164*860a0ffaSThomas Gleixner 		cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
1651142d810STejun Heo 	preempt_enable();
166fd7355baSTejun Heo }
1671142d810STejun Heo 
168fd7355baSTejun Heo static int __stop_cpus(const struct cpumask *cpumask,
169fd7355baSTejun Heo 		       cpu_stop_fn_t fn, void *arg)
170fd7355baSTejun Heo {
171fd7355baSTejun Heo 	struct cpu_stop_done done;
172fd7355baSTejun Heo 
173fd7355baSTejun Heo 	cpu_stop_init_done(&done, cpumask_weight(cpumask));
174fd7355baSTejun Heo 	queue_stop_cpus_work(cpumask, fn, arg, &done);
1751142d810STejun Heo 	wait_for_completion(&done.completion);
1761142d810STejun Heo 	return done.executed ? done.ret : -ENOENT;
1771142d810STejun Heo }
1781142d810STejun Heo 
1791142d810STejun Heo /**
1801142d810STejun Heo  * stop_cpus - stop multiple cpus
1811142d810STejun Heo  * @cpumask: cpus to stop
1821142d810STejun Heo  * @fn: function to execute
1831142d810STejun Heo  * @arg: argument to @fn
1841142d810STejun Heo  *
1851142d810STejun Heo  * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu,
1861142d810STejun Heo  * @fn is run in a process context with the highest priority
1871142d810STejun Heo  * preempting any task on the cpu and monopolizing it.  This function
1881142d810STejun Heo  * returns after all executions are complete.
1891142d810STejun Heo  *
1901142d810STejun Heo  * This function doesn't guarantee the cpus in @cpumask stay online
1911142d810STejun Heo  * till @fn completes.  If some cpus go down in the middle, execution
1921142d810STejun Heo  * on the cpu may happen partially or fully on different cpus.  @fn
1931142d810STejun Heo  * should either be ready for that or the caller should ensure that
1941142d810STejun Heo  * the cpus stay online until this function completes.
1951142d810STejun Heo  *
1961142d810STejun Heo  * All stop_cpus() calls are serialized making it safe for @fn to wait
1971142d810STejun Heo  * for all cpus to start executing it.
1981142d810STejun Heo  *
1991142d810STejun Heo  * CONTEXT:
2001142d810STejun Heo  * Might sleep.
2011142d810STejun Heo  *
2021142d810STejun Heo  * RETURNS:
2031142d810STejun Heo  * -ENOENT if @fn(@arg) was not executed at all because all cpus in
2041142d810STejun Heo  * @cpumask were offline; otherwise, 0 if all executions of @fn
2051142d810STejun Heo  * returned 0, any non zero return value if any returned non zero.
2061142d810STejun Heo  */
2071142d810STejun Heo int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
2081142d810STejun Heo {
2091142d810STejun Heo 	int ret;
2101142d810STejun Heo 
2111142d810STejun Heo 	/* static works are used, process one request at a time */
2121142d810STejun Heo 	mutex_lock(&stop_cpus_mutex);
2131142d810STejun Heo 	ret = __stop_cpus(cpumask, fn, arg);
2141142d810STejun Heo 	mutex_unlock(&stop_cpus_mutex);
2151142d810STejun Heo 	return ret;
2161142d810STejun Heo }
2171142d810STejun Heo 
2181142d810STejun Heo /**
2191142d810STejun Heo  * try_stop_cpus - try to stop multiple cpus
2201142d810STejun Heo  * @cpumask: cpus to stop
2211142d810STejun Heo  * @fn: function to execute
2221142d810STejun Heo  * @arg: argument to @fn
2231142d810STejun Heo  *
2241142d810STejun Heo  * Identical to stop_cpus() except that it fails with -EAGAIN if
2251142d810STejun Heo  * someone else is already using the facility.
2261142d810STejun Heo  *
2271142d810STejun Heo  * CONTEXT:
2281142d810STejun Heo  * Might sleep.
2291142d810STejun Heo  *
2301142d810STejun Heo  * RETURNS:
2311142d810STejun Heo  * -EAGAIN if someone else is already stopping cpus, -ENOENT if
2321142d810STejun Heo  * @fn(@arg) was not executed at all because all cpus in @cpumask were
2331142d810STejun Heo  * offline; otherwise, 0 if all executions of @fn returned 0, any non
2341142d810STejun Heo  * zero return value if any returned non zero.
2351142d810STejun Heo  */
2361142d810STejun Heo int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
2371142d810STejun Heo {
2381142d810STejun Heo 	int ret;
2391142d810STejun Heo 
2401142d810STejun Heo 	/* static works are used, process one request at a time */
2411142d810STejun Heo 	if (!mutex_trylock(&stop_cpus_mutex))
2421142d810STejun Heo 		return -EAGAIN;
2431142d810STejun Heo 	ret = __stop_cpus(cpumask, fn, arg);
2441142d810STejun Heo 	mutex_unlock(&stop_cpus_mutex);
2451142d810STejun Heo 	return ret;
2461142d810STejun Heo }
2471142d810STejun Heo 
2481142d810STejun Heo static int cpu_stopper_thread(void *data)
2491142d810STejun Heo {
2501142d810STejun Heo 	struct cpu_stopper *stopper = data;
2511142d810STejun Heo 	struct cpu_stop_work *work;
2521142d810STejun Heo 	int ret;
2531142d810STejun Heo 
2541142d810STejun Heo repeat:
2551142d810STejun Heo 	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */
2561142d810STejun Heo 
2571142d810STejun Heo 	if (kthread_should_stop()) {
2581142d810STejun Heo 		__set_current_state(TASK_RUNNING);
2591142d810STejun Heo 		return 0;
2601142d810STejun Heo 	}
2611142d810STejun Heo 
2621142d810STejun Heo 	work = NULL;
2631142d810STejun Heo 	spin_lock_irq(&stopper->lock);
2641142d810STejun Heo 	if (!list_empty(&stopper->works)) {
2651142d810STejun Heo 		work = list_first_entry(&stopper->works,
2661142d810STejun Heo 					struct cpu_stop_work, list);
2671142d810STejun Heo 		list_del_init(&work->list);
2681142d810STejun Heo 	}
2691142d810STejun Heo 	spin_unlock_irq(&stopper->lock);
2701142d810STejun Heo 
2711142d810STejun Heo 	if (work) {
2721142d810STejun Heo 		cpu_stop_fn_t fn = work->fn;
2731142d810STejun Heo 		void *arg = work->arg;
2741142d810STejun Heo 		struct cpu_stop_done *done = work->done;
275ca51c5a7SRakib Mullick 		char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
2761142d810STejun Heo 
2771142d810STejun Heo 		__set_current_state(TASK_RUNNING);
2781142d810STejun Heo 
2791142d810STejun Heo 		/* cpu stop callbacks are not allowed to sleep */
2801142d810STejun Heo 		preempt_disable();
2811142d810STejun Heo 
2821142d810STejun Heo 		ret = fn(arg);
2831142d810STejun Heo 		if (ret)
2841142d810STejun Heo 			done->ret = ret;
2851142d810STejun Heo 
2861142d810STejun Heo 		/* restore preemption and check it's still balanced */
2871142d810STejun Heo 		preempt_enable();
2881142d810STejun Heo 		WARN_ONCE(preempt_count(),
2891142d810STejun Heo 			  "cpu_stop: %s(%p) leaked preempt count\n",
2901142d810STejun Heo 			  kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
2911142d810STejun Heo 					  ksym_buf), arg);
2921142d810STejun Heo 
2931142d810STejun Heo 		cpu_stop_signal_done(done, true);
2941142d810STejun Heo 	} else
2951142d810STejun Heo 		schedule();
2961142d810STejun Heo 
2971142d810STejun Heo 	goto repeat;
2981142d810STejun Heo }
2991142d810STejun Heo 
30034f971f6SPeter Zijlstra extern void sched_set_stop_task(int cpu, struct task_struct *stop);
30134f971f6SPeter Zijlstra 
3021142d810STejun Heo /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */
3031142d810STejun Heo static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
3041142d810STejun Heo 					   unsigned long action, void *hcpu)
3051142d810STejun Heo {
3061142d810STejun Heo 	unsigned int cpu = (unsigned long)hcpu;
3071142d810STejun Heo 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
308*860a0ffaSThomas Gleixner 	struct task_struct *p = per_cpu(cpu_stopper_task, cpu);
3091142d810STejun Heo 
3101142d810STejun Heo 	switch (action & ~CPU_TASKS_FROZEN) {
3111142d810STejun Heo 	case CPU_UP_PREPARE:
312*860a0ffaSThomas Gleixner 		BUG_ON(p || stopper->enabled || !list_empty(&stopper->works));
31394dcf29aSEric Dumazet 		p = kthread_create_on_node(cpu_stopper_thread,
31494dcf29aSEric Dumazet 					   stopper,
31594dcf29aSEric Dumazet 					   cpu_to_node(cpu),
31694dcf29aSEric Dumazet 					   "migration/%d", cpu);
3171142d810STejun Heo 		if (IS_ERR(p))
3184ce6494dSAkinobu Mita 			return notifier_from_errno(PTR_ERR(p));
3191142d810STejun Heo 		get_task_struct(p);
32034f971f6SPeter Zijlstra 		kthread_bind(p, cpu);
32134f971f6SPeter Zijlstra 		sched_set_stop_task(cpu, p);
322*860a0ffaSThomas Gleixner 		per_cpu(cpu_stopper_task, cpu) = p;
3231142d810STejun Heo 		break;
3241142d810STejun Heo 
3251142d810STejun Heo 	case CPU_ONLINE:
3261142d810STejun Heo 		/* strictly unnecessary, as first user will wake it */
327*860a0ffaSThomas Gleixner 		wake_up_process(p);
3281142d810STejun Heo 		/* mark enabled */
3291142d810STejun Heo 		spin_lock_irq(&stopper->lock);
3301142d810STejun Heo 		stopper->enabled = true;
3311142d810STejun Heo 		spin_unlock_irq(&stopper->lock);
3321142d810STejun Heo 		break;
3331142d810STejun Heo 
3341142d810STejun Heo #ifdef CONFIG_HOTPLUG_CPU
3351142d810STejun Heo 	case CPU_UP_CANCELED:
33654e88fadSAmit K. Arora 	case CPU_POST_DEAD:
3379c6f7e43SIngo Molnar 	{
3389c6f7e43SIngo Molnar 		struct cpu_stop_work *work;
3399c6f7e43SIngo Molnar 
34034f971f6SPeter Zijlstra 		sched_set_stop_task(cpu, NULL);
3411142d810STejun Heo 		/* kill the stopper */
342*860a0ffaSThomas Gleixner 		kthread_stop(p);
3431142d810STejun Heo 		/* drain remaining works */
3441142d810STejun Heo 		spin_lock_irq(&stopper->lock);
3451142d810STejun Heo 		list_for_each_entry(work, &stopper->works, list)
3461142d810STejun Heo 			cpu_stop_signal_done(work->done, false);
3471142d810STejun Heo 		stopper->enabled = false;
3481142d810STejun Heo 		spin_unlock_irq(&stopper->lock);
3491142d810STejun Heo 		/* release the stopper */
350*860a0ffaSThomas Gleixner 		put_task_struct(p);
351*860a0ffaSThomas Gleixner 		per_cpu(cpu_stopper_task, cpu) = NULL;
3521142d810STejun Heo 		break;
3539c6f7e43SIngo Molnar 	}
3541142d810STejun Heo #endif
3551142d810STejun Heo 	}
3561142d810STejun Heo 
3571142d810STejun Heo 	return NOTIFY_OK;
3581142d810STejun Heo }
3591142d810STejun Heo 
3601142d810STejun Heo /*
3611142d810STejun Heo  * Give it a higher priority so that cpu stopper is available to other
3621142d810STejun Heo  * cpu notifiers.  It currently shares the same priority as sched
3631142d810STejun Heo  * migration_notifier.
3641142d810STejun Heo  */
3651142d810STejun Heo static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = {
3661142d810STejun Heo 	.notifier_call	= cpu_stop_cpu_callback,
3671142d810STejun Heo 	.priority	= 10,
3681142d810STejun Heo };
3691142d810STejun Heo 
3701142d810STejun Heo static int __init cpu_stop_init(void)
3711142d810STejun Heo {
3721142d810STejun Heo 	void *bcpu = (void *)(long)smp_processor_id();
3731142d810STejun Heo 	unsigned int cpu;
3741142d810STejun Heo 	int err;
3751142d810STejun Heo 
3761142d810STejun Heo 	for_each_possible_cpu(cpu) {
3771142d810STejun Heo 		struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
3781142d810STejun Heo 
3791142d810STejun Heo 		spin_lock_init(&stopper->lock);
3801142d810STejun Heo 		INIT_LIST_HEAD(&stopper->works);
3811142d810STejun Heo 	}
3821142d810STejun Heo 
3831142d810STejun Heo 	/* start one for the boot cpu */
3841142d810STejun Heo 	err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE,
3851142d810STejun Heo 				    bcpu);
3864ce6494dSAkinobu Mita 	BUG_ON(err != NOTIFY_OK);
3871142d810STejun Heo 	cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu);
3881142d810STejun Heo 	register_cpu_notifier(&cpu_stop_cpu_notifier);
3891142d810STejun Heo 
390f445027eSJeremy Fitzhardinge 	stop_machine_initialized = true;
391f445027eSJeremy Fitzhardinge 
3921142d810STejun Heo 	return 0;
3931142d810STejun Heo }
3941142d810STejun Heo early_initcall(cpu_stop_init);
3951da177e4SLinus Torvalds 
396bbf1bb3eSTejun Heo #ifdef CONFIG_STOP_MACHINE
397bbf1bb3eSTejun Heo 
398ffdb5976SRusty Russell /* This controls the threads on each CPU. */
3991da177e4SLinus Torvalds enum stopmachine_state {
400ffdb5976SRusty Russell 	/* Dummy starting state for thread. */
401ffdb5976SRusty Russell 	STOPMACHINE_NONE,
402ffdb5976SRusty Russell 	/* Awaiting everyone to be scheduled. */
4031da177e4SLinus Torvalds 	STOPMACHINE_PREPARE,
404ffdb5976SRusty Russell 	/* Disable interrupts. */
4051da177e4SLinus Torvalds 	STOPMACHINE_DISABLE_IRQ,
406ffdb5976SRusty Russell 	/* Run the function */
4075c2aed62SJason Baron 	STOPMACHINE_RUN,
408ffdb5976SRusty Russell 	/* Exit */
4091da177e4SLinus Torvalds 	STOPMACHINE_EXIT,
4101da177e4SLinus Torvalds };
4111da177e4SLinus Torvalds 
4125c2aed62SJason Baron struct stop_machine_data {
4135c2aed62SJason Baron 	int			(*fn)(void *);
4145c2aed62SJason Baron 	void			*data;
4153fc1f1e2STejun Heo 	/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
4163fc1f1e2STejun Heo 	unsigned int		num_threads;
4173fc1f1e2STejun Heo 	const struct cpumask	*active_cpus;
4183fc1f1e2STejun Heo 
4193fc1f1e2STejun Heo 	enum stopmachine_state	state;
4203fc1f1e2STejun Heo 	atomic_t		thread_ack;
421ffdb5976SRusty Russell };
4225c2aed62SJason Baron 
4233fc1f1e2STejun Heo static void set_state(struct stop_machine_data *smdata,
4243fc1f1e2STejun Heo 		      enum stopmachine_state newstate)
4251da177e4SLinus Torvalds {
426ffdb5976SRusty Russell 	/* Reset ack counter. */
4273fc1f1e2STejun Heo 	atomic_set(&smdata->thread_ack, smdata->num_threads);
428ffdb5976SRusty Russell 	smp_wmb();
4293fc1f1e2STejun Heo 	smdata->state = newstate;
430ffdb5976SRusty Russell }
4311da177e4SLinus Torvalds 
432ffdb5976SRusty Russell /* Last one to ack a state moves to the next state. */
4333fc1f1e2STejun Heo static void ack_state(struct stop_machine_data *smdata)
434ffdb5976SRusty Russell {
4353fc1f1e2STejun Heo 	if (atomic_dec_and_test(&smdata->thread_ack))
4363fc1f1e2STejun Heo 		set_state(smdata, smdata->state + 1);
437ffdb5976SRusty Russell }
438d8cb7c1dSAndrew Morton 
4393fc1f1e2STejun Heo /* This is the cpu_stop function which stops the CPU. */
4403fc1f1e2STejun Heo static int stop_machine_cpu_stop(void *data)
441ffdb5976SRusty Russell {
4423fc1f1e2STejun Heo 	struct stop_machine_data *smdata = data;
443ffdb5976SRusty Russell 	enum stopmachine_state curstate = STOPMACHINE_NONE;
4443fc1f1e2STejun Heo 	int cpu = smp_processor_id(), err = 0;
445f740e6cdSTejun Heo 	unsigned long flags;
4463fc1f1e2STejun Heo 	bool is_active;
4471da177e4SLinus Torvalds 
448f740e6cdSTejun Heo 	/*
449f740e6cdSTejun Heo 	 * When called from stop_machine_from_inactive_cpu(), irq might
450f740e6cdSTejun Heo 	 * already be disabled.  Save the state and restore it on exit.
451f740e6cdSTejun Heo 	 */
452f740e6cdSTejun Heo 	local_save_flags(flags);
453f740e6cdSTejun Heo 
4543fc1f1e2STejun Heo 	if (!smdata->active_cpus)
4553fc1f1e2STejun Heo 		is_active = cpu == cpumask_first(cpu_online_mask);
4563fc1f1e2STejun Heo 	else
4573fc1f1e2STejun Heo 		is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
4583fc1f1e2STejun Heo 
4591da177e4SLinus Torvalds 	/* Simple state machine */
460ffdb5976SRusty Russell 	do {
461ffdb5976SRusty Russell 		/* Chill out and ensure we re-read stopmachine_state. */
462ffdb5976SRusty Russell 		cpu_relax();
4633fc1f1e2STejun Heo 		if (smdata->state != curstate) {
4643fc1f1e2STejun Heo 			curstate = smdata->state;
465ffdb5976SRusty Russell 			switch (curstate) {
466ffdb5976SRusty Russell 			case STOPMACHINE_DISABLE_IRQ:
4671da177e4SLinus Torvalds 				local_irq_disable();
468a12bb444SBenjamin Herrenschmidt 				hard_irq_disable();
469ffdb5976SRusty Russell 				break;
470ffdb5976SRusty Russell 			case STOPMACHINE_RUN:
4713fc1f1e2STejun Heo 				if (is_active)
4728163bcacSHeiko Carstens 					err = smdata->fn(smdata->data);
473ffdb5976SRusty Russell 				break;
474ffdb5976SRusty Russell 			default:
475ffdb5976SRusty Russell 				break;
4761da177e4SLinus Torvalds 			}
4773fc1f1e2STejun Heo 			ack_state(smdata);
4781da177e4SLinus Torvalds 		}
479ffdb5976SRusty Russell 	} while (curstate != STOPMACHINE_EXIT);
4801da177e4SLinus Torvalds 
481f740e6cdSTejun Heo 	local_irq_restore(flags);
4823fc1f1e2STejun Heo 	return err;
483ffdb5976SRusty Russell }
4841da177e4SLinus Torvalds 
48541c7bb95SRusty Russell int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
4861da177e4SLinus Torvalds {
4873fc1f1e2STejun Heo 	struct stop_machine_data smdata = { .fn = fn, .data = data,
4883fc1f1e2STejun Heo 					    .num_threads = num_online_cpus(),
4893fc1f1e2STejun Heo 					    .active_cpus = cpus };
4901da177e4SLinus Torvalds 
491f445027eSJeremy Fitzhardinge 	if (!stop_machine_initialized) {
492f445027eSJeremy Fitzhardinge 		/*
493f445027eSJeremy Fitzhardinge 		 * Handle the case where stop_machine() is called
494f445027eSJeremy Fitzhardinge 		 * early in boot before stop_machine() has been
495f445027eSJeremy Fitzhardinge 		 * initialized.
496f445027eSJeremy Fitzhardinge 		 */
497f445027eSJeremy Fitzhardinge 		unsigned long flags;
498f445027eSJeremy Fitzhardinge 		int ret;
499f445027eSJeremy Fitzhardinge 
500f445027eSJeremy Fitzhardinge 		WARN_ON_ONCE(smdata.num_threads != 1);
501f445027eSJeremy Fitzhardinge 
502f445027eSJeremy Fitzhardinge 		local_irq_save(flags);
503f445027eSJeremy Fitzhardinge 		hard_irq_disable();
504f445027eSJeremy Fitzhardinge 		ret = (*fn)(data);
505f445027eSJeremy Fitzhardinge 		local_irq_restore(flags);
506f445027eSJeremy Fitzhardinge 
507f445027eSJeremy Fitzhardinge 		return ret;
508f445027eSJeremy Fitzhardinge 	}
509f445027eSJeremy Fitzhardinge 
5103fc1f1e2STejun Heo 	/* Set the initial state and stop all online cpus. */
5113fc1f1e2STejun Heo 	set_state(&smdata, STOPMACHINE_PREPARE);
5123fc1f1e2STejun Heo 	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
5131da177e4SLinus Torvalds }
5141da177e4SLinus Torvalds 
51541c7bb95SRusty Russell int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
5161da177e4SLinus Torvalds {
5171da177e4SLinus Torvalds 	int ret;
5181da177e4SLinus Torvalds 
5191da177e4SLinus Torvalds 	/* No CPUs can come up or down during this. */
52086ef5c9aSGautham R Shenoy 	get_online_cpus();
521eeec4fadSRusty Russell 	ret = __stop_machine(fn, data, cpus);
52286ef5c9aSGautham R Shenoy 	put_online_cpus();
5231da177e4SLinus Torvalds 	return ret;
5241da177e4SLinus Torvalds }
525eeec4fadSRusty Russell EXPORT_SYMBOL_GPL(stop_machine);
526bbf1bb3eSTejun Heo 
527f740e6cdSTejun Heo /**
528f740e6cdSTejun Heo  * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
529f740e6cdSTejun Heo  * @fn: the function to run
530f740e6cdSTejun Heo  * @data: the data ptr for the @fn()
531f740e6cdSTejun Heo  * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
532f740e6cdSTejun Heo  *
533f740e6cdSTejun Heo  * This is identical to stop_machine() but can be called from a CPU which
534f740e6cdSTejun Heo  * is not active.  The local CPU is in the process of hotplug (so no other
535f740e6cdSTejun Heo  * CPU hotplug can start) and not marked active and doesn't have enough
536f740e6cdSTejun Heo  * context to sleep.
537f740e6cdSTejun Heo  *
538f740e6cdSTejun Heo  * This function provides stop_machine() functionality for such state by
539f740e6cdSTejun Heo  * using busy-wait for synchronization and executing @fn directly for local
540f740e6cdSTejun Heo  * CPU.
541f740e6cdSTejun Heo  *
542f740e6cdSTejun Heo  * CONTEXT:
543f740e6cdSTejun Heo  * Local CPU is inactive.  Temporarily stops all active CPUs.
544f740e6cdSTejun Heo  *
545f740e6cdSTejun Heo  * RETURNS:
546f740e6cdSTejun Heo  * 0 if all executions of @fn returned 0, any non zero return value if any
547f740e6cdSTejun Heo  * returned non zero.
548f740e6cdSTejun Heo  */
549f740e6cdSTejun Heo int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
550f740e6cdSTejun Heo 				  const struct cpumask *cpus)
551f740e6cdSTejun Heo {
552f740e6cdSTejun Heo 	struct stop_machine_data smdata = { .fn = fn, .data = data,
553f740e6cdSTejun Heo 					    .active_cpus = cpus };
554f740e6cdSTejun Heo 	struct cpu_stop_done done;
555f740e6cdSTejun Heo 	int ret;
556f740e6cdSTejun Heo 
557f740e6cdSTejun Heo 	/* Local CPU must be inactive and CPU hotplug in progress. */
558f740e6cdSTejun Heo 	BUG_ON(cpu_active(raw_smp_processor_id()));
559f740e6cdSTejun Heo 	smdata.num_threads = num_active_cpus() + 1;	/* +1 for local */
560f740e6cdSTejun Heo 
561f740e6cdSTejun Heo 	/* No proper task established and can't sleep - busy wait for lock. */
562f740e6cdSTejun Heo 	while (!mutex_trylock(&stop_cpus_mutex))
563f740e6cdSTejun Heo 		cpu_relax();
564f740e6cdSTejun Heo 
565f740e6cdSTejun Heo 	/* Schedule work on other CPUs and execute directly for local CPU */
566f740e6cdSTejun Heo 	set_state(&smdata, STOPMACHINE_PREPARE);
567f740e6cdSTejun Heo 	cpu_stop_init_done(&done, num_active_cpus());
568f740e6cdSTejun Heo 	queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata,
569f740e6cdSTejun Heo 			     &done);
570f740e6cdSTejun Heo 	ret = stop_machine_cpu_stop(&smdata);
571f740e6cdSTejun Heo 
572f740e6cdSTejun Heo 	/* Busy wait for completion. */
573f740e6cdSTejun Heo 	while (!completion_done(&done.completion))
574f740e6cdSTejun Heo 		cpu_relax();
575f740e6cdSTejun Heo 
576f740e6cdSTejun Heo 	mutex_unlock(&stop_cpus_mutex);
577f740e6cdSTejun Heo 	return ret ?: done.ret;
578f740e6cdSTejun Heo }
579f740e6cdSTejun Heo 
580bbf1bb3eSTejun Heo #endif	/* CONFIG_STOP_MACHINE */
581