xref: /linux/kernel/stop_machine.c (revision 38f2c691a4b3e89d476f8e8350d1ca299974b89d)
16ff3f917SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21142d810STejun Heo /*
31142d810STejun Heo  * kernel/stop_machine.c
41142d810STejun Heo  *
51142d810STejun Heo  * Copyright (C) 2008, 2005	IBM Corporation.
61142d810STejun Heo  * Copyright (C) 2008, 2005	Rusty Russell rusty@rustcorp.com.au
71142d810STejun Heo  * Copyright (C) 2010		SUSE Linux Products GmbH
81142d810STejun Heo  * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
9e5582ca2SRusty Russell  */
101142d810STejun Heo #include <linux/completion.h>
111da177e4SLinus Torvalds #include <linux/cpu.h>
121142d810STejun Heo #include <linux/init.h>
13ee527cd3SPrarit Bhargava #include <linux/kthread.h>
149984de1aSPaul Gortmaker #include <linux/export.h>
151142d810STejun Heo #include <linux/percpu.h>
16ee527cd3SPrarit Bhargava #include <linux/sched.h>
17ee527cd3SPrarit Bhargava #include <linux/stop_machine.h>
18a12bb444SBenjamin Herrenschmidt #include <linux/interrupt.h>
191142d810STejun Heo #include <linux/kallsyms.h>
2014e568e7SThomas Gleixner #include <linux/smpboot.h>
2160063497SArun Sharma #include <linux/atomic.h>
22ce4f06dcSOleg Nesterov #include <linux/nmi.h>
230b26351bSPeter Zijlstra #include <linux/sched/wake_q.h>
241142d810STejun Heo 
251142d810STejun Heo /*
261142d810STejun Heo  * Structure to determine completion condition and record errors.  May
271142d810STejun Heo  * be shared by works on different cpus.
281142d810STejun Heo  */
291142d810STejun Heo struct cpu_stop_done {
301142d810STejun Heo 	atomic_t		nr_todo;	/* nr left to execute */
311142d810STejun Heo 	int			ret;		/* collected return value */
321142d810STejun Heo 	struct completion	completion;	/* fired if nr_todo reaches 0 */
331142d810STejun Heo };
341142d810STejun Heo 
351142d810STejun Heo /* the actual stopper, one per every possible cpu, enabled on online cpus */
361142d810STejun Heo struct cpu_stopper {
3702cb7aa9SOleg Nesterov 	struct task_struct	*thread;
3802cb7aa9SOleg Nesterov 
39de5b55c1SThomas Gleixner 	raw_spinlock_t		lock;
40878ae127SRichard Kennedy 	bool			enabled;	/* is this stopper enabled? */
411142d810STejun Heo 	struct list_head	works;		/* list of pending works */
4202cb7aa9SOleg Nesterov 
4302cb7aa9SOleg Nesterov 	struct cpu_stop_work	stop_work;	/* for stop_cpus */
441142d810STejun Heo };
451142d810STejun Heo 
461142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
47f445027eSJeremy Fitzhardinge static bool stop_machine_initialized = false;
481142d810STejun Heo 
49e6253970SOleg Nesterov /* static data for stop_cpus */
50e6253970SOleg Nesterov static DEFINE_MUTEX(stop_cpus_mutex);
51e6253970SOleg Nesterov static bool stop_cpus_in_progress;
527053ea1aSRik van Riel 
531142d810STejun Heo static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
541142d810STejun Heo {
551142d810STejun Heo 	memset(done, 0, sizeof(*done));
561142d810STejun Heo 	atomic_set(&done->nr_todo, nr_todo);
571142d810STejun Heo 	init_completion(&done->completion);
581142d810STejun Heo }
591142d810STejun Heo 
601142d810STejun Heo /* signal completion unless @done is NULL */
616fa3b826SOleg Nesterov static void cpu_stop_signal_done(struct cpu_stop_done *done)
621142d810STejun Heo {
631142d810STejun Heo 	if (atomic_dec_and_test(&done->nr_todo))
641142d810STejun Heo 		complete(&done->completion);
651142d810STejun Heo }
661142d810STejun Heo 
675caa1c08SOleg Nesterov static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
680b26351bSPeter Zijlstra 					struct cpu_stop_work *work,
690b26351bSPeter Zijlstra 					struct wake_q_head *wakeq)
705caa1c08SOleg Nesterov {
715caa1c08SOleg Nesterov 	list_add_tail(&work->list, &stopper->works);
720b26351bSPeter Zijlstra 	wake_q_add(wakeq, stopper->thread);
735caa1c08SOleg Nesterov }
745caa1c08SOleg Nesterov 
751142d810STejun Heo /* queue @work to @stopper.  if offline, @work is completed immediately */
761b034bd9SOleg Nesterov static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
771142d810STejun Heo {
78860a0ffaSThomas Gleixner 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
790b26351bSPeter Zijlstra 	DEFINE_WAKE_Q(wakeq);
801142d810STejun Heo 	unsigned long flags;
811b034bd9SOleg Nesterov 	bool enabled;
821142d810STejun Heo 
83cfd35514SPrasad Sodagudi 	preempt_disable();
84de5b55c1SThomas Gleixner 	raw_spin_lock_irqsave(&stopper->lock, flags);
851b034bd9SOleg Nesterov 	enabled = stopper->enabled;
861b034bd9SOleg Nesterov 	if (enabled)
870b26351bSPeter Zijlstra 		__cpu_stop_queue_work(stopper, work, &wakeq);
88dd2e3121SOleg Nesterov 	else if (work->done)
896fa3b826SOleg Nesterov 		cpu_stop_signal_done(work->done);
90de5b55c1SThomas Gleixner 	raw_spin_unlock_irqrestore(&stopper->lock, flags);
911b034bd9SOleg Nesterov 
920b26351bSPeter Zijlstra 	wake_up_q(&wakeq);
93cfd35514SPrasad Sodagudi 	preempt_enable();
940b26351bSPeter Zijlstra 
951b034bd9SOleg Nesterov 	return enabled;
961142d810STejun Heo }
971142d810STejun Heo 
981142d810STejun Heo /**
991142d810STejun Heo  * stop_one_cpu - stop a cpu
1001142d810STejun Heo  * @cpu: cpu to stop
1011142d810STejun Heo  * @fn: function to execute
1021142d810STejun Heo  * @arg: argument to @fn
1031142d810STejun Heo  *
1041142d810STejun Heo  * Execute @fn(@arg) on @cpu.  @fn is run in a process context with
1051142d810STejun Heo  * the highest priority preempting any task on the cpu and
1061142d810STejun Heo  * monopolizing it.  This function returns after the execution is
1071142d810STejun Heo  * complete.
1081142d810STejun Heo  *
1091142d810STejun Heo  * This function doesn't guarantee @cpu stays online till @fn
1101142d810STejun Heo  * completes.  If @cpu goes down in the middle, execution may happen
1111142d810STejun Heo  * partially or fully on different cpus.  @fn should either be ready
1121142d810STejun Heo  * for that or the caller should ensure that @cpu stays online until
1131142d810STejun Heo  * this function completes.
1141142d810STejun Heo  *
1151142d810STejun Heo  * CONTEXT:
1161142d810STejun Heo  * Might sleep.
1171142d810STejun Heo  *
1181142d810STejun Heo  * RETURNS:
1191142d810STejun Heo  * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
1201142d810STejun Heo  * otherwise, the return value of @fn.
1211142d810STejun Heo  */
1221142d810STejun Heo int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
1231142d810STejun Heo {
1241142d810STejun Heo 	struct cpu_stop_done done;
1251142d810STejun Heo 	struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
1261142d810STejun Heo 
1271142d810STejun Heo 	cpu_stop_init_done(&done, 1);
128958c5f84SOleg Nesterov 	if (!cpu_stop_queue_work(cpu, &work))
129958c5f84SOleg Nesterov 		return -ENOENT;
130bf89a304SCheng Chao 	/*
131bf89a304SCheng Chao 	 * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
132bf89a304SCheng Chao 	 * cycle by doing a preemption:
133bf89a304SCheng Chao 	 */
134bf89a304SCheng Chao 	cond_resched();
1351142d810STejun Heo 	wait_for_completion(&done.completion);
136958c5f84SOleg Nesterov 	return done.ret;
1371142d810STejun Heo }
1381142d810STejun Heo 
1391be0bd77SPeter Zijlstra /* This controls the threads on each CPU. */
1401be0bd77SPeter Zijlstra enum multi_stop_state {
1411be0bd77SPeter Zijlstra 	/* Dummy starting state for thread. */
1421be0bd77SPeter Zijlstra 	MULTI_STOP_NONE,
1431be0bd77SPeter Zijlstra 	/* Awaiting everyone to be scheduled. */
1441be0bd77SPeter Zijlstra 	MULTI_STOP_PREPARE,
1451be0bd77SPeter Zijlstra 	/* Disable interrupts. */
1461be0bd77SPeter Zijlstra 	MULTI_STOP_DISABLE_IRQ,
1471be0bd77SPeter Zijlstra 	/* Run the function */
1481be0bd77SPeter Zijlstra 	MULTI_STOP_RUN,
1491be0bd77SPeter Zijlstra 	/* Exit */
1501be0bd77SPeter Zijlstra 	MULTI_STOP_EXIT,
1511be0bd77SPeter Zijlstra };
1521be0bd77SPeter Zijlstra 
1531be0bd77SPeter Zijlstra struct multi_stop_data {
1549a301f22SOleg Nesterov 	cpu_stop_fn_t		fn;
1551be0bd77SPeter Zijlstra 	void			*data;
1561be0bd77SPeter Zijlstra 	/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
1571be0bd77SPeter Zijlstra 	unsigned int		num_threads;
1581be0bd77SPeter Zijlstra 	const struct cpumask	*active_cpus;
1591be0bd77SPeter Zijlstra 
1601be0bd77SPeter Zijlstra 	enum multi_stop_state	state;
1611be0bd77SPeter Zijlstra 	atomic_t		thread_ack;
1621be0bd77SPeter Zijlstra };
1631be0bd77SPeter Zijlstra 
1641be0bd77SPeter Zijlstra static void set_state(struct multi_stop_data *msdata,
1651be0bd77SPeter Zijlstra 		      enum multi_stop_state newstate)
1661be0bd77SPeter Zijlstra {
1671be0bd77SPeter Zijlstra 	/* Reset ack counter. */
1681be0bd77SPeter Zijlstra 	atomic_set(&msdata->thread_ack, msdata->num_threads);
1691be0bd77SPeter Zijlstra 	smp_wmb();
1701be0bd77SPeter Zijlstra 	msdata->state = newstate;
1711be0bd77SPeter Zijlstra }
1721be0bd77SPeter Zijlstra 
1731be0bd77SPeter Zijlstra /* Last one to ack a state moves to the next state. */
1741be0bd77SPeter Zijlstra static void ack_state(struct multi_stop_data *msdata)
1751be0bd77SPeter Zijlstra {
1761be0bd77SPeter Zijlstra 	if (atomic_dec_and_test(&msdata->thread_ack))
1771be0bd77SPeter Zijlstra 		set_state(msdata, msdata->state + 1);
1781be0bd77SPeter Zijlstra }
1791be0bd77SPeter Zijlstra 
1801be0bd77SPeter Zijlstra /* This is the cpu_stop function which stops the CPU. */
1811be0bd77SPeter Zijlstra static int multi_cpu_stop(void *data)
1821be0bd77SPeter Zijlstra {
1831be0bd77SPeter Zijlstra 	struct multi_stop_data *msdata = data;
1841be0bd77SPeter Zijlstra 	enum multi_stop_state curstate = MULTI_STOP_NONE;
1851be0bd77SPeter Zijlstra 	int cpu = smp_processor_id(), err = 0;
186*38f2c691SMartin Schwidefsky 	const struct cpumask *cpumask;
1871be0bd77SPeter Zijlstra 	unsigned long flags;
1881be0bd77SPeter Zijlstra 	bool is_active;
1891be0bd77SPeter Zijlstra 
1901be0bd77SPeter Zijlstra 	/*
1911be0bd77SPeter Zijlstra 	 * When called from stop_machine_from_inactive_cpu(), irq might
1921be0bd77SPeter Zijlstra 	 * already be disabled.  Save the state and restore it on exit.
1931be0bd77SPeter Zijlstra 	 */
1941be0bd77SPeter Zijlstra 	local_save_flags(flags);
1951be0bd77SPeter Zijlstra 
196*38f2c691SMartin Schwidefsky 	if (!msdata->active_cpus) {
197*38f2c691SMartin Schwidefsky 		cpumask = cpu_online_mask;
198*38f2c691SMartin Schwidefsky 		is_active = cpu == cpumask_first(cpumask);
199*38f2c691SMartin Schwidefsky 	} else {
200*38f2c691SMartin Schwidefsky 		cpumask = msdata->active_cpus;
201*38f2c691SMartin Schwidefsky 		is_active = cpumask_test_cpu(cpu, cpumask);
202*38f2c691SMartin Schwidefsky 	}
2031be0bd77SPeter Zijlstra 
2041be0bd77SPeter Zijlstra 	/* Simple state machine */
2051be0bd77SPeter Zijlstra 	do {
2061be0bd77SPeter Zijlstra 		/* Chill out and ensure we re-read multi_stop_state. */
207*38f2c691SMartin Schwidefsky 		cpu_relax_yield(cpumask);
2081be0bd77SPeter Zijlstra 		if (msdata->state != curstate) {
2091be0bd77SPeter Zijlstra 			curstate = msdata->state;
2101be0bd77SPeter Zijlstra 			switch (curstate) {
2111be0bd77SPeter Zijlstra 			case MULTI_STOP_DISABLE_IRQ:
2121be0bd77SPeter Zijlstra 				local_irq_disable();
2131be0bd77SPeter Zijlstra 				hard_irq_disable();
2141be0bd77SPeter Zijlstra 				break;
2151be0bd77SPeter Zijlstra 			case MULTI_STOP_RUN:
2161be0bd77SPeter Zijlstra 				if (is_active)
2171be0bd77SPeter Zijlstra 					err = msdata->fn(msdata->data);
2181be0bd77SPeter Zijlstra 				break;
2191be0bd77SPeter Zijlstra 			default:
2201be0bd77SPeter Zijlstra 				break;
2211be0bd77SPeter Zijlstra 			}
2221be0bd77SPeter Zijlstra 			ack_state(msdata);
223ce4f06dcSOleg Nesterov 		} else if (curstate > MULTI_STOP_PREPARE) {
224ce4f06dcSOleg Nesterov 			/*
225ce4f06dcSOleg Nesterov 			 * At this stage all other CPUs we depend on must spin
226ce4f06dcSOleg Nesterov 			 * in the same loop. Any reason for hard-lockup should
227ce4f06dcSOleg Nesterov 			 * be detected and reported on their side.
228ce4f06dcSOleg Nesterov 			 */
229ce4f06dcSOleg Nesterov 			touch_nmi_watchdog();
2301be0bd77SPeter Zijlstra 		}
2311be0bd77SPeter Zijlstra 	} while (curstate != MULTI_STOP_EXIT);
2321be0bd77SPeter Zijlstra 
2331be0bd77SPeter Zijlstra 	local_irq_restore(flags);
2341be0bd77SPeter Zijlstra 	return err;
2351be0bd77SPeter Zijlstra }
2361be0bd77SPeter Zijlstra 
2375caa1c08SOleg Nesterov static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
2385caa1c08SOleg Nesterov 				    int cpu2, struct cpu_stop_work *work2)
2395caa1c08SOleg Nesterov {
240d8bc8535SOleg Nesterov 	struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
241d8bc8535SOleg Nesterov 	struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
2420b26351bSPeter Zijlstra 	DEFINE_WAKE_Q(wakeq);
243d8bc8535SOleg Nesterov 	int err;
244b80a2bfcSPeter Zijlstra 
245e6253970SOleg Nesterov retry:
246b80a2bfcSPeter Zijlstra 	/*
247b80a2bfcSPeter Zijlstra 	 * The waking up of stopper threads has to happen in the same
248b80a2bfcSPeter Zijlstra 	 * scheduling context as the queueing.  Otherwise, there is a
249b80a2bfcSPeter Zijlstra 	 * possibility of one of the above stoppers being woken up by another
250b80a2bfcSPeter Zijlstra 	 * CPU, and preempting us. This will cause us to not wake up the other
251b80a2bfcSPeter Zijlstra 	 * stopper forever.
252b80a2bfcSPeter Zijlstra 	 */
253b80a2bfcSPeter Zijlstra 	preempt_disable();
254de5b55c1SThomas Gleixner 	raw_spin_lock_irq(&stopper1->lock);
255de5b55c1SThomas Gleixner 	raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
256d8bc8535SOleg Nesterov 
257b80a2bfcSPeter Zijlstra 	if (!stopper1->enabled || !stopper2->enabled) {
258d8bc8535SOleg Nesterov 		err = -ENOENT;
259d8bc8535SOleg Nesterov 		goto unlock;
260b80a2bfcSPeter Zijlstra 	}
261b80a2bfcSPeter Zijlstra 
262e6253970SOleg Nesterov 	/*
263e6253970SOleg Nesterov 	 * Ensure that if we race with __stop_cpus() the stoppers won't get
264e6253970SOleg Nesterov 	 * queued up in reverse order leading to system deadlock.
265e6253970SOleg Nesterov 	 *
266e6253970SOleg Nesterov 	 * We can't miss stop_cpus_in_progress if queue_stop_cpus_work() has
267e6253970SOleg Nesterov 	 * queued a work on cpu1 but not on cpu2, we hold both locks.
268e6253970SOleg Nesterov 	 *
269e6253970SOleg Nesterov 	 * It can be falsely true but it is safe to spin until it is cleared,
270e6253970SOleg Nesterov 	 * queue_stop_cpus_work() does everything under preempt_disable().
271e6253970SOleg Nesterov 	 */
272b80a2bfcSPeter Zijlstra 	if (unlikely(stop_cpus_in_progress)) {
273e6253970SOleg Nesterov 		err = -EDEADLK;
274e6253970SOleg Nesterov 		goto unlock;
275b80a2bfcSPeter Zijlstra 	}
276d8bc8535SOleg Nesterov 
277d8bc8535SOleg Nesterov 	err = 0;
2780b26351bSPeter Zijlstra 	__cpu_stop_queue_work(stopper1, work1, &wakeq);
2790b26351bSPeter Zijlstra 	__cpu_stop_queue_work(stopper2, work2, &wakeq);
280b80a2bfcSPeter Zijlstra 
281d8bc8535SOleg Nesterov unlock:
282de5b55c1SThomas Gleixner 	raw_spin_unlock(&stopper2->lock);
283de5b55c1SThomas Gleixner 	raw_spin_unlock_irq(&stopper1->lock);
2845caa1c08SOleg Nesterov 
285e6253970SOleg Nesterov 	if (unlikely(err == -EDEADLK)) {
286b80a2bfcSPeter Zijlstra 		preempt_enable();
287b80a2bfcSPeter Zijlstra 
288e6253970SOleg Nesterov 		while (stop_cpus_in_progress)
289e6253970SOleg Nesterov 			cpu_relax();
290b80a2bfcSPeter Zijlstra 
291e6253970SOleg Nesterov 		goto retry;
292e6253970SOleg Nesterov 	}
2930b26351bSPeter Zijlstra 
2940b26351bSPeter Zijlstra 	wake_up_q(&wakeq);
2959fb8d5dcSIsaac J. Manjarres 	preempt_enable();
2960b26351bSPeter Zijlstra 
297d8bc8535SOleg Nesterov 	return err;
2985caa1c08SOleg Nesterov }
2991be0bd77SPeter Zijlstra /**
3001be0bd77SPeter Zijlstra  * stop_two_cpus - stops two cpus
3011be0bd77SPeter Zijlstra  * @cpu1: the cpu to stop
3021be0bd77SPeter Zijlstra  * @cpu2: the other cpu to stop
3031be0bd77SPeter Zijlstra  * @fn: function to execute
3041be0bd77SPeter Zijlstra  * @arg: argument to @fn
3051be0bd77SPeter Zijlstra  *
3061be0bd77SPeter Zijlstra  * Stops both the current and specified CPU and runs @fn on one of them.
3071be0bd77SPeter Zijlstra  *
3081be0bd77SPeter Zijlstra  * returns when both are completed.
3091be0bd77SPeter Zijlstra  */
3101be0bd77SPeter Zijlstra int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
3111be0bd77SPeter Zijlstra {
3121be0bd77SPeter Zijlstra 	struct cpu_stop_done done;
3131be0bd77SPeter Zijlstra 	struct cpu_stop_work work1, work2;
3146acce3efSPeter Zijlstra 	struct multi_stop_data msdata;
3156acce3efSPeter Zijlstra 
3166acce3efSPeter Zijlstra 	msdata = (struct multi_stop_data){
3171be0bd77SPeter Zijlstra 		.fn = fn,
3181be0bd77SPeter Zijlstra 		.data = arg,
3191be0bd77SPeter Zijlstra 		.num_threads = 2,
3201be0bd77SPeter Zijlstra 		.active_cpus = cpumask_of(cpu1),
3211be0bd77SPeter Zijlstra 	};
3221be0bd77SPeter Zijlstra 
3231be0bd77SPeter Zijlstra 	work1 = work2 = (struct cpu_stop_work){
3241be0bd77SPeter Zijlstra 		.fn = multi_cpu_stop,
3251be0bd77SPeter Zijlstra 		.arg = &msdata,
3261be0bd77SPeter Zijlstra 		.done = &done
3271be0bd77SPeter Zijlstra 	};
3281be0bd77SPeter Zijlstra 
3291be0bd77SPeter Zijlstra 	cpu_stop_init_done(&done, 2);
3301be0bd77SPeter Zijlstra 	set_state(&msdata, MULTI_STOP_PREPARE);
3311be0bd77SPeter Zijlstra 
3325caa1c08SOleg Nesterov 	if (cpu1 > cpu2)
3335caa1c08SOleg Nesterov 		swap(cpu1, cpu2);
3346a190051SOleg Nesterov 	if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
3355caa1c08SOleg Nesterov 		return -ENOENT;
3361be0bd77SPeter Zijlstra 
3371be0bd77SPeter Zijlstra 	wait_for_completion(&done.completion);
3386a190051SOleg Nesterov 	return done.ret;
3391be0bd77SPeter Zijlstra }
3401be0bd77SPeter Zijlstra 
3411142d810STejun Heo /**
3421142d810STejun Heo  * stop_one_cpu_nowait - stop a cpu but don't wait for completion
3431142d810STejun Heo  * @cpu: cpu to stop
3441142d810STejun Heo  * @fn: function to execute
3451142d810STejun Heo  * @arg: argument to @fn
346cf250040SFabian Frederick  * @work_buf: pointer to cpu_stop_work structure
3471142d810STejun Heo  *
3481142d810STejun Heo  * Similar to stop_one_cpu() but doesn't wait for completion.  The
3491142d810STejun Heo  * caller is responsible for ensuring @work_buf is currently unused
3501142d810STejun Heo  * and will remain untouched until stopper starts executing @fn.
3511142d810STejun Heo  *
3521142d810STejun Heo  * CONTEXT:
3531142d810STejun Heo  * Don't care.
3541b034bd9SOleg Nesterov  *
3551b034bd9SOleg Nesterov  * RETURNS:
3561b034bd9SOleg Nesterov  * true if cpu_stop_work was queued successfully and @fn will be called,
3571b034bd9SOleg Nesterov  * false otherwise.
3581142d810STejun Heo  */
3591b034bd9SOleg Nesterov bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
3601142d810STejun Heo 			struct cpu_stop_work *work_buf)
3611142d810STejun Heo {
3621142d810STejun Heo 	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
3631b034bd9SOleg Nesterov 	return cpu_stop_queue_work(cpu, work_buf);
3641142d810STejun Heo }
3651142d810STejun Heo 
3664aff1ca6SOleg Nesterov static bool queue_stop_cpus_work(const struct cpumask *cpumask,
367fd7355baSTejun Heo 				 cpu_stop_fn_t fn, void *arg,
368fd7355baSTejun Heo 				 struct cpu_stop_done *done)
3691142d810STejun Heo {
3701142d810STejun Heo 	struct cpu_stop_work *work;
3711142d810STejun Heo 	unsigned int cpu;
3724aff1ca6SOleg Nesterov 	bool queued = false;
3731142d810STejun Heo 
3741142d810STejun Heo 	/*
3751142d810STejun Heo 	 * Disable preemption while queueing to avoid getting
3761142d810STejun Heo 	 * preempted by a stopper which might wait for other stoppers
3771142d810STejun Heo 	 * to enter @fn which can lead to deadlock.
3781142d810STejun Heo 	 */
379e6253970SOleg Nesterov 	preempt_disable();
380e6253970SOleg Nesterov 	stop_cpus_in_progress = true;
381b377c2a0SOleg Nesterov 	for_each_cpu(cpu, cpumask) {
382b377c2a0SOleg Nesterov 		work = &per_cpu(cpu_stopper.stop_work, cpu);
383b377c2a0SOleg Nesterov 		work->fn = fn;
384b377c2a0SOleg Nesterov 		work->arg = arg;
385b377c2a0SOleg Nesterov 		work->done = done;
3864aff1ca6SOleg Nesterov 		if (cpu_stop_queue_work(cpu, work))
3874aff1ca6SOleg Nesterov 			queued = true;
388b377c2a0SOleg Nesterov 	}
389e6253970SOleg Nesterov 	stop_cpus_in_progress = false;
390e6253970SOleg Nesterov 	preempt_enable();
3914aff1ca6SOleg Nesterov 
3924aff1ca6SOleg Nesterov 	return queued;
393fd7355baSTejun Heo }
3941142d810STejun Heo 
395fd7355baSTejun Heo static int __stop_cpus(const struct cpumask *cpumask,
396fd7355baSTejun Heo 		       cpu_stop_fn_t fn, void *arg)
397fd7355baSTejun Heo {
398fd7355baSTejun Heo 	struct cpu_stop_done done;
399fd7355baSTejun Heo 
400fd7355baSTejun Heo 	cpu_stop_init_done(&done, cpumask_weight(cpumask));
4014aff1ca6SOleg Nesterov 	if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
4024aff1ca6SOleg Nesterov 		return -ENOENT;
4031142d810STejun Heo 	wait_for_completion(&done.completion);
4044aff1ca6SOleg Nesterov 	return done.ret;
4051142d810STejun Heo }
4061142d810STejun Heo 
4071142d810STejun Heo /**
4081142d810STejun Heo  * stop_cpus - stop multiple cpus
4091142d810STejun Heo  * @cpumask: cpus to stop
4101142d810STejun Heo  * @fn: function to execute
4111142d810STejun Heo  * @arg: argument to @fn
4121142d810STejun Heo  *
4131142d810STejun Heo  * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu,
4141142d810STejun Heo  * @fn is run in a process context with the highest priority
4151142d810STejun Heo  * preempting any task on the cpu and monopolizing it.  This function
4161142d810STejun Heo  * returns after all executions are complete.
4171142d810STejun Heo  *
4181142d810STejun Heo  * This function doesn't guarantee the cpus in @cpumask stay online
4191142d810STejun Heo  * till @fn completes.  If some cpus go down in the middle, execution
4201142d810STejun Heo  * on the cpu may happen partially or fully on different cpus.  @fn
4211142d810STejun Heo  * should either be ready for that or the caller should ensure that
4221142d810STejun Heo  * the cpus stay online until this function completes.
4231142d810STejun Heo  *
4241142d810STejun Heo  * All stop_cpus() calls are serialized making it safe for @fn to wait
4251142d810STejun Heo  * for all cpus to start executing it.
4261142d810STejun Heo  *
4271142d810STejun Heo  * CONTEXT:
4281142d810STejun Heo  * Might sleep.
4291142d810STejun Heo  *
4301142d810STejun Heo  * RETURNS:
4311142d810STejun Heo  * -ENOENT if @fn(@arg) was not executed at all because all cpus in
4321142d810STejun Heo  * @cpumask were offline; otherwise, 0 if all executions of @fn
4331142d810STejun Heo  * returned 0, any non zero return value if any returned non zero.
4341142d810STejun Heo  */
4351142d810STejun Heo int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
4361142d810STejun Heo {
4371142d810STejun Heo 	int ret;
4381142d810STejun Heo 
4391142d810STejun Heo 	/* static works are used, process one request at a time */
4401142d810STejun Heo 	mutex_lock(&stop_cpus_mutex);
4411142d810STejun Heo 	ret = __stop_cpus(cpumask, fn, arg);
4421142d810STejun Heo 	mutex_unlock(&stop_cpus_mutex);
4431142d810STejun Heo 	return ret;
4441142d810STejun Heo }
4451142d810STejun Heo 
4461142d810STejun Heo /**
4471142d810STejun Heo  * try_stop_cpus - try to stop multiple cpus
4481142d810STejun Heo  * @cpumask: cpus to stop
4491142d810STejun Heo  * @fn: function to execute
4501142d810STejun Heo  * @arg: argument to @fn
4511142d810STejun Heo  *
4521142d810STejun Heo  * Identical to stop_cpus() except that it fails with -EAGAIN if
4531142d810STejun Heo  * someone else is already using the facility.
4541142d810STejun Heo  *
4551142d810STejun Heo  * CONTEXT:
4561142d810STejun Heo  * Might sleep.
4571142d810STejun Heo  *
4581142d810STejun Heo  * RETURNS:
4591142d810STejun Heo  * -EAGAIN if someone else is already stopping cpus, -ENOENT if
4601142d810STejun Heo  * @fn(@arg) was not executed at all because all cpus in @cpumask were
4611142d810STejun Heo  * offline; otherwise, 0 if all executions of @fn returned 0, any non
4621142d810STejun Heo  * zero return value if any returned non zero.
4631142d810STejun Heo  */
4641142d810STejun Heo int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
4651142d810STejun Heo {
4661142d810STejun Heo 	int ret;
4671142d810STejun Heo 
4681142d810STejun Heo 	/* static works are used, process one request at a time */
4691142d810STejun Heo 	if (!mutex_trylock(&stop_cpus_mutex))
4701142d810STejun Heo 		return -EAGAIN;
4711142d810STejun Heo 	ret = __stop_cpus(cpumask, fn, arg);
4721142d810STejun Heo 	mutex_unlock(&stop_cpus_mutex);
4731142d810STejun Heo 	return ret;
4741142d810STejun Heo }
4751142d810STejun Heo 
47614e568e7SThomas Gleixner static int cpu_stop_should_run(unsigned int cpu)
4771142d810STejun Heo {
47814e568e7SThomas Gleixner 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
47914e568e7SThomas Gleixner 	unsigned long flags;
48014e568e7SThomas Gleixner 	int run;
48114e568e7SThomas Gleixner 
482de5b55c1SThomas Gleixner 	raw_spin_lock_irqsave(&stopper->lock, flags);
48314e568e7SThomas Gleixner 	run = !list_empty(&stopper->works);
484de5b55c1SThomas Gleixner 	raw_spin_unlock_irqrestore(&stopper->lock, flags);
48514e568e7SThomas Gleixner 	return run;
48614e568e7SThomas Gleixner }
48714e568e7SThomas Gleixner 
48814e568e7SThomas Gleixner static void cpu_stopper_thread(unsigned int cpu)
48914e568e7SThomas Gleixner {
49014e568e7SThomas Gleixner 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
4911142d810STejun Heo 	struct cpu_stop_work *work;
4921142d810STejun Heo 
4931142d810STejun Heo repeat:
4941142d810STejun Heo 	work = NULL;
495de5b55c1SThomas Gleixner 	raw_spin_lock_irq(&stopper->lock);
4961142d810STejun Heo 	if (!list_empty(&stopper->works)) {
4971142d810STejun Heo 		work = list_first_entry(&stopper->works,
4981142d810STejun Heo 					struct cpu_stop_work, list);
4991142d810STejun Heo 		list_del_init(&work->list);
5001142d810STejun Heo 	}
501de5b55c1SThomas Gleixner 	raw_spin_unlock_irq(&stopper->lock);
5021142d810STejun Heo 
5031142d810STejun Heo 	if (work) {
5041142d810STejun Heo 		cpu_stop_fn_t fn = work->fn;
5051142d810STejun Heo 		void *arg = work->arg;
5061142d810STejun Heo 		struct cpu_stop_done *done = work->done;
507accaf6eaSOleg Nesterov 		int ret;
5081142d810STejun Heo 
509accaf6eaSOleg Nesterov 		/* cpu stop callbacks must not sleep, make in_atomic() == T */
510accaf6eaSOleg Nesterov 		preempt_count_inc();
5111142d810STejun Heo 		ret = fn(arg);
512dd2e3121SOleg Nesterov 		if (done) {
513dd2e3121SOleg Nesterov 			if (ret)
5141142d810STejun Heo 				done->ret = ret;
5156fa3b826SOleg Nesterov 			cpu_stop_signal_done(done);
516dd2e3121SOleg Nesterov 		}
517accaf6eaSOleg Nesterov 		preempt_count_dec();
5181142d810STejun Heo 		WARN_ONCE(preempt_count(),
519d75f773cSSakari Ailus 			  "cpu_stop: %ps(%p) leaked preempt count\n", fn, arg);
5201142d810STejun Heo 		goto repeat;
5211142d810STejun Heo 	}
52214e568e7SThomas Gleixner }
5231142d810STejun Heo 
524233e7f26SOleg Nesterov void stop_machine_park(int cpu)
525233e7f26SOleg Nesterov {
526233e7f26SOleg Nesterov 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
527233e7f26SOleg Nesterov 	/*
528233e7f26SOleg Nesterov 	 * Lockless. cpu_stopper_thread() will take stopper->lock and flush
529233e7f26SOleg Nesterov 	 * the pending works before it parks, until then it is fine to queue
530233e7f26SOleg Nesterov 	 * the new works.
531233e7f26SOleg Nesterov 	 */
532233e7f26SOleg Nesterov 	stopper->enabled = false;
533233e7f26SOleg Nesterov 	kthread_park(stopper->thread);
534233e7f26SOleg Nesterov }
535233e7f26SOleg Nesterov 
53634f971f6SPeter Zijlstra extern void sched_set_stop_task(int cpu, struct task_struct *stop);
53734f971f6SPeter Zijlstra 
53814e568e7SThomas Gleixner static void cpu_stop_create(unsigned int cpu)
5391142d810STejun Heo {
54002cb7aa9SOleg Nesterov 	sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
54114e568e7SThomas Gleixner }
54214e568e7SThomas Gleixner 
54314e568e7SThomas Gleixner static void cpu_stop_park(unsigned int cpu)
54414e568e7SThomas Gleixner {
5451142d810STejun Heo 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
5469c6f7e43SIngo Molnar 
547233e7f26SOleg Nesterov 	WARN_ON(!list_empty(&stopper->works));
54814e568e7SThomas Gleixner }
54914e568e7SThomas Gleixner 
550c00166d8SOleg Nesterov void stop_machine_unpark(int cpu)
551c00166d8SOleg Nesterov {
552c00166d8SOleg Nesterov 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
553c00166d8SOleg Nesterov 
554f0cf16cbSOleg Nesterov 	stopper->enabled = true;
555c00166d8SOleg Nesterov 	kthread_unpark(stopper->thread);
556c00166d8SOleg Nesterov }
557c00166d8SOleg Nesterov 
55814e568e7SThomas Gleixner static struct smp_hotplug_thread cpu_stop_threads = {
55902cb7aa9SOleg Nesterov 	.store			= &cpu_stopper.thread,
56014e568e7SThomas Gleixner 	.thread_should_run	= cpu_stop_should_run,
56114e568e7SThomas Gleixner 	.thread_fn		= cpu_stopper_thread,
56214e568e7SThomas Gleixner 	.thread_comm		= "migration/%u",
56314e568e7SThomas Gleixner 	.create			= cpu_stop_create,
56414e568e7SThomas Gleixner 	.park			= cpu_stop_park,
56514e568e7SThomas Gleixner 	.selfparking		= true,
5661142d810STejun Heo };
5671142d810STejun Heo 
5681142d810STejun Heo static int __init cpu_stop_init(void)
5691142d810STejun Heo {
5701142d810STejun Heo 	unsigned int cpu;
5711142d810STejun Heo 
5721142d810STejun Heo 	for_each_possible_cpu(cpu) {
5731142d810STejun Heo 		struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
5741142d810STejun Heo 
575de5b55c1SThomas Gleixner 		raw_spin_lock_init(&stopper->lock);
5761142d810STejun Heo 		INIT_LIST_HEAD(&stopper->works);
5771142d810STejun Heo 	}
5781142d810STejun Heo 
57914e568e7SThomas Gleixner 	BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
580c00166d8SOleg Nesterov 	stop_machine_unpark(raw_smp_processor_id());
581f445027eSJeremy Fitzhardinge 	stop_machine_initialized = true;
5821142d810STejun Heo 	return 0;
5831142d810STejun Heo }
5841142d810STejun Heo early_initcall(cpu_stop_init);
5851da177e4SLinus Torvalds 
586fe5595c0SSebastian Andrzej Siewior int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
587fe5595c0SSebastian Andrzej Siewior 			    const struct cpumask *cpus)
5881da177e4SLinus Torvalds {
5891be0bd77SPeter Zijlstra 	struct multi_stop_data msdata = {
5901be0bd77SPeter Zijlstra 		.fn = fn,
5911be0bd77SPeter Zijlstra 		.data = data,
5923fc1f1e2STejun Heo 		.num_threads = num_online_cpus(),
5931be0bd77SPeter Zijlstra 		.active_cpus = cpus,
5941be0bd77SPeter Zijlstra 	};
5951da177e4SLinus Torvalds 
596fe5595c0SSebastian Andrzej Siewior 	lockdep_assert_cpus_held();
597fe5595c0SSebastian Andrzej Siewior 
598f445027eSJeremy Fitzhardinge 	if (!stop_machine_initialized) {
599f445027eSJeremy Fitzhardinge 		/*
600f445027eSJeremy Fitzhardinge 		 * Handle the case where stop_machine() is called
601f445027eSJeremy Fitzhardinge 		 * early in boot before stop_machine() has been
602f445027eSJeremy Fitzhardinge 		 * initialized.
603f445027eSJeremy Fitzhardinge 		 */
604f445027eSJeremy Fitzhardinge 		unsigned long flags;
605f445027eSJeremy Fitzhardinge 		int ret;
606f445027eSJeremy Fitzhardinge 
6071be0bd77SPeter Zijlstra 		WARN_ON_ONCE(msdata.num_threads != 1);
608f445027eSJeremy Fitzhardinge 
609f445027eSJeremy Fitzhardinge 		local_irq_save(flags);
610f445027eSJeremy Fitzhardinge 		hard_irq_disable();
611f445027eSJeremy Fitzhardinge 		ret = (*fn)(data);
612f445027eSJeremy Fitzhardinge 		local_irq_restore(flags);
613f445027eSJeremy Fitzhardinge 
614f445027eSJeremy Fitzhardinge 		return ret;
615f445027eSJeremy Fitzhardinge 	}
616f445027eSJeremy Fitzhardinge 
6173fc1f1e2STejun Heo 	/* Set the initial state and stop all online cpus. */
6181be0bd77SPeter Zijlstra 	set_state(&msdata, MULTI_STOP_PREPARE);
6191be0bd77SPeter Zijlstra 	return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
6201da177e4SLinus Torvalds }
6211da177e4SLinus Torvalds 
6229a301f22SOleg Nesterov int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
6231da177e4SLinus Torvalds {
6241da177e4SLinus Torvalds 	int ret;
6251da177e4SLinus Torvalds 
6261da177e4SLinus Torvalds 	/* No CPUs can come up or down during this. */
627fe5595c0SSebastian Andrzej Siewior 	cpus_read_lock();
628fe5595c0SSebastian Andrzej Siewior 	ret = stop_machine_cpuslocked(fn, data, cpus);
629fe5595c0SSebastian Andrzej Siewior 	cpus_read_unlock();
6301da177e4SLinus Torvalds 	return ret;
6311da177e4SLinus Torvalds }
632eeec4fadSRusty Russell EXPORT_SYMBOL_GPL(stop_machine);
633bbf1bb3eSTejun Heo 
634f740e6cdSTejun Heo /**
635f740e6cdSTejun Heo  * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
636f740e6cdSTejun Heo  * @fn: the function to run
637f740e6cdSTejun Heo  * @data: the data ptr for the @fn()
638f740e6cdSTejun Heo  * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
639f740e6cdSTejun Heo  *
640f740e6cdSTejun Heo  * This is identical to stop_machine() but can be called from a CPU which
641f740e6cdSTejun Heo  * is not active.  The local CPU is in the process of hotplug (so no other
642f740e6cdSTejun Heo  * CPU hotplug can start) and not marked active and doesn't have enough
643f740e6cdSTejun Heo  * context to sleep.
644f740e6cdSTejun Heo  *
645f740e6cdSTejun Heo  * This function provides stop_machine() functionality for such state by
646f740e6cdSTejun Heo  * using busy-wait for synchronization and executing @fn directly for local
647f740e6cdSTejun Heo  * CPU.
648f740e6cdSTejun Heo  *
649f740e6cdSTejun Heo  * CONTEXT:
650f740e6cdSTejun Heo  * Local CPU is inactive.  Temporarily stops all active CPUs.
651f740e6cdSTejun Heo  *
652f740e6cdSTejun Heo  * RETURNS:
653f740e6cdSTejun Heo  * 0 if all executions of @fn returned 0, any non zero return value if any
654f740e6cdSTejun Heo  * returned non zero.
655f740e6cdSTejun Heo  */
6569a301f22SOleg Nesterov int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
657f740e6cdSTejun Heo 				  const struct cpumask *cpus)
658f740e6cdSTejun Heo {
6591be0bd77SPeter Zijlstra 	struct multi_stop_data msdata = { .fn = fn, .data = data,
660f740e6cdSTejun Heo 					    .active_cpus = cpus };
661f740e6cdSTejun Heo 	struct cpu_stop_done done;
662f740e6cdSTejun Heo 	int ret;
663f740e6cdSTejun Heo 
664f740e6cdSTejun Heo 	/* Local CPU must be inactive and CPU hotplug in progress. */
665f740e6cdSTejun Heo 	BUG_ON(cpu_active(raw_smp_processor_id()));
6661be0bd77SPeter Zijlstra 	msdata.num_threads = num_active_cpus() + 1;	/* +1 for local */
667f740e6cdSTejun Heo 
668f740e6cdSTejun Heo 	/* No proper task established and can't sleep - busy wait for lock. */
669f740e6cdSTejun Heo 	while (!mutex_trylock(&stop_cpus_mutex))
670f740e6cdSTejun Heo 		cpu_relax();
671f740e6cdSTejun Heo 
672f740e6cdSTejun Heo 	/* Schedule work on other CPUs and execute directly for local CPU */
6731be0bd77SPeter Zijlstra 	set_state(&msdata, MULTI_STOP_PREPARE);
674f740e6cdSTejun Heo 	cpu_stop_init_done(&done, num_active_cpus());
6751be0bd77SPeter Zijlstra 	queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
676f740e6cdSTejun Heo 			     &done);
6771be0bd77SPeter Zijlstra 	ret = multi_cpu_stop(&msdata);
678f740e6cdSTejun Heo 
679f740e6cdSTejun Heo 	/* Busy wait for completion. */
680f740e6cdSTejun Heo 	while (!completion_done(&done.completion))
681f740e6cdSTejun Heo 		cpu_relax();
682f740e6cdSTejun Heo 
683f740e6cdSTejun Heo 	mutex_unlock(&stop_cpus_mutex);
684f740e6cdSTejun Heo 	return ret ?: done.ret;
685f740e6cdSTejun Heo }
686