xref: /linux/kernel/cgroup/freezer.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
176f969e8SRoman Gushchin // SPDX-License-Identifier: GPL-2.0
276f969e8SRoman Gushchin #include <linux/cgroup.h>
376f969e8SRoman Gushchin #include <linux/sched.h>
476f969e8SRoman Gushchin #include <linux/sched/task.h>
576f969e8SRoman Gushchin #include <linux/sched/signal.h>
676f969e8SRoman Gushchin 
776f969e8SRoman Gushchin #include "cgroup-internal.h"
876f969e8SRoman Gushchin 
94c476d8cSRoman Gushchin #include <trace/events/cgroup.h>
104c476d8cSRoman Gushchin 
1176f969e8SRoman Gushchin /*
1276f969e8SRoman Gushchin  * Propagate the cgroup frozen state upwards by the cgroup tree.
1376f969e8SRoman Gushchin  */
cgroup_propagate_frozen(struct cgroup * cgrp,bool frozen)1476f969e8SRoman Gushchin static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
1576f969e8SRoman Gushchin {
1676f969e8SRoman Gushchin 	int desc = 1;
1776f969e8SRoman Gushchin 
1876f969e8SRoman Gushchin 	/*
1976f969e8SRoman Gushchin 	 * If the new state is frozen, some freezing ancestor cgroups may change
2076f969e8SRoman Gushchin 	 * their state too, depending on if all their descendants are frozen.
2176f969e8SRoman Gushchin 	 *
2276f969e8SRoman Gushchin 	 * Otherwise, all ancestor cgroups are forced into the non-frozen state.
2376f969e8SRoman Gushchin 	 */
2476f969e8SRoman Gushchin 	while ((cgrp = cgroup_parent(cgrp))) {
2576f969e8SRoman Gushchin 		if (frozen) {
2676f969e8SRoman Gushchin 			cgrp->freezer.nr_frozen_descendants += desc;
2776f969e8SRoman Gushchin 			if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
2876f969e8SRoman Gushchin 			    test_bit(CGRP_FREEZE, &cgrp->flags) &&
2976f969e8SRoman Gushchin 			    cgrp->freezer.nr_frozen_descendants ==
3076f969e8SRoman Gushchin 			    cgrp->nr_descendants) {
3176f969e8SRoman Gushchin 				set_bit(CGRP_FROZEN, &cgrp->flags);
3276f969e8SRoman Gushchin 				cgroup_file_notify(&cgrp->events_file);
334c476d8cSRoman Gushchin 				TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
3476f969e8SRoman Gushchin 				desc++;
3576f969e8SRoman Gushchin 			}
3676f969e8SRoman Gushchin 		} else {
3776f969e8SRoman Gushchin 			cgrp->freezer.nr_frozen_descendants -= desc;
3876f969e8SRoman Gushchin 			if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
3976f969e8SRoman Gushchin 				clear_bit(CGRP_FROZEN, &cgrp->flags);
4076f969e8SRoman Gushchin 				cgroup_file_notify(&cgrp->events_file);
414c476d8cSRoman Gushchin 				TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
4276f969e8SRoman Gushchin 				desc++;
4376f969e8SRoman Gushchin 			}
4476f969e8SRoman Gushchin 		}
4576f969e8SRoman Gushchin 	}
4676f969e8SRoman Gushchin }
4776f969e8SRoman Gushchin 
4876f969e8SRoman Gushchin /*
4976f969e8SRoman Gushchin  * Revisit the cgroup frozen state.
5076f969e8SRoman Gushchin  * Checks if the cgroup is really frozen and perform all state transitions.
5176f969e8SRoman Gushchin  */
cgroup_update_frozen(struct cgroup * cgrp)5276f969e8SRoman Gushchin void cgroup_update_frozen(struct cgroup *cgrp)
5376f969e8SRoman Gushchin {
5476f969e8SRoman Gushchin 	bool frozen;
5576f969e8SRoman Gushchin 
5676f969e8SRoman Gushchin 	lockdep_assert_held(&css_set_lock);
5776f969e8SRoman Gushchin 
5876f969e8SRoman Gushchin 	/*
5976f969e8SRoman Gushchin 	 * If the cgroup has to be frozen (CGRP_FREEZE bit set),
6076f969e8SRoman Gushchin 	 * and all tasks are frozen and/or stopped, let's consider
6176f969e8SRoman Gushchin 	 * the cgroup frozen. Otherwise it's not frozen.
6276f969e8SRoman Gushchin 	 */
6376f969e8SRoman Gushchin 	frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
6476f969e8SRoman Gushchin 		cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
6576f969e8SRoman Gushchin 
6676f969e8SRoman Gushchin 	if (frozen) {
6776f969e8SRoman Gushchin 		/* Already there? */
6876f969e8SRoman Gushchin 		if (test_bit(CGRP_FROZEN, &cgrp->flags))
6976f969e8SRoman Gushchin 			return;
7076f969e8SRoman Gushchin 
7176f969e8SRoman Gushchin 		set_bit(CGRP_FROZEN, &cgrp->flags);
7276f969e8SRoman Gushchin 	} else {
7376f969e8SRoman Gushchin 		/* Already there? */
7476f969e8SRoman Gushchin 		if (!test_bit(CGRP_FROZEN, &cgrp->flags))
7576f969e8SRoman Gushchin 			return;
7676f969e8SRoman Gushchin 
7776f969e8SRoman Gushchin 		clear_bit(CGRP_FROZEN, &cgrp->flags);
7876f969e8SRoman Gushchin 	}
7976f969e8SRoman Gushchin 	cgroup_file_notify(&cgrp->events_file);
804c476d8cSRoman Gushchin 	TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
8176f969e8SRoman Gushchin 
8276f969e8SRoman Gushchin 	/* Update the state of ancestor cgroups. */
8376f969e8SRoman Gushchin 	cgroup_propagate_frozen(cgrp, frozen);
8476f969e8SRoman Gushchin }
8576f969e8SRoman Gushchin 
8676f969e8SRoman Gushchin /*
8776f969e8SRoman Gushchin  * Increment cgroup's nr_frozen_tasks.
8876f969e8SRoman Gushchin  */
cgroup_inc_frozen_cnt(struct cgroup * cgrp)8976f969e8SRoman Gushchin static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
9076f969e8SRoman Gushchin {
9176f969e8SRoman Gushchin 	cgrp->freezer.nr_frozen_tasks++;
9276f969e8SRoman Gushchin }
9376f969e8SRoman Gushchin 
9476f969e8SRoman Gushchin /*
9576f969e8SRoman Gushchin  * Decrement cgroup's nr_frozen_tasks.
9676f969e8SRoman Gushchin  */
cgroup_dec_frozen_cnt(struct cgroup * cgrp)9776f969e8SRoman Gushchin static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
9876f969e8SRoman Gushchin {
9976f969e8SRoman Gushchin 	cgrp->freezer.nr_frozen_tasks--;
10076f969e8SRoman Gushchin 	WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
10176f969e8SRoman Gushchin }
10276f969e8SRoman Gushchin 
10376f969e8SRoman Gushchin /*
10476f969e8SRoman Gushchin  * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
10576f969e8SRoman Gushchin  * and revisit the state of the cgroup, if necessary.
10676f969e8SRoman Gushchin  */
cgroup_enter_frozen(void)10776f969e8SRoman Gushchin void cgroup_enter_frozen(void)
10876f969e8SRoman Gushchin {
10976f969e8SRoman Gushchin 	struct cgroup *cgrp;
11076f969e8SRoman Gushchin 
11176f969e8SRoman Gushchin 	if (current->frozen)
11276f969e8SRoman Gushchin 		return;
11376f969e8SRoman Gushchin 
11476f969e8SRoman Gushchin 	spin_lock_irq(&css_set_lock);
11576f969e8SRoman Gushchin 	current->frozen = true;
11676f969e8SRoman Gushchin 	cgrp = task_dfl_cgroup(current);
11776f969e8SRoman Gushchin 	cgroup_inc_frozen_cnt(cgrp);
11876f969e8SRoman Gushchin 	cgroup_update_frozen(cgrp);
11976f969e8SRoman Gushchin 	spin_unlock_irq(&css_set_lock);
12076f969e8SRoman Gushchin }
12176f969e8SRoman Gushchin 
12276f969e8SRoman Gushchin /*
12376f969e8SRoman Gushchin  * Conditionally leave frozen/stopped state. Update cgroup's counters,
12476f969e8SRoman Gushchin  * and revisit the state of the cgroup, if necessary.
12576f969e8SRoman Gushchin  *
12676f969e8SRoman Gushchin  * If always_leave is not set, and the cgroup is freezing,
12776f969e8SRoman Gushchin  * we're racing with the cgroup freezing. In this case, we don't
12876f969e8SRoman Gushchin  * drop the frozen counter to avoid a transient switch to
12976f969e8SRoman Gushchin  * the unfrozen state.
13076f969e8SRoman Gushchin  */
cgroup_leave_frozen(bool always_leave)13176f969e8SRoman Gushchin void cgroup_leave_frozen(bool always_leave)
13276f969e8SRoman Gushchin {
13376f969e8SRoman Gushchin 	struct cgroup *cgrp;
13476f969e8SRoman Gushchin 
13576f969e8SRoman Gushchin 	spin_lock_irq(&css_set_lock);
13676f969e8SRoman Gushchin 	cgrp = task_dfl_cgroup(current);
13776f969e8SRoman Gushchin 	if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
13876f969e8SRoman Gushchin 		cgroup_dec_frozen_cnt(cgrp);
13976f969e8SRoman Gushchin 		cgroup_update_frozen(cgrp);
14076f969e8SRoman Gushchin 		WARN_ON_ONCE(!current->frozen);
14176f969e8SRoman Gushchin 		current->frozen = false;
142cb2c4cd8SRoman Gushchin 	} else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
143cb2c4cd8SRoman Gushchin 		spin_lock(&current->sighand->siglock);
144cb2c4cd8SRoman Gushchin 		current->jobctl |= JOBCTL_TRAP_FREEZE;
145cb2c4cd8SRoman Gushchin 		set_thread_flag(TIF_SIGPENDING);
146cb2c4cd8SRoman Gushchin 		spin_unlock(&current->sighand->siglock);
14776f969e8SRoman Gushchin 	}
14876f969e8SRoman Gushchin 	spin_unlock_irq(&css_set_lock);
14976f969e8SRoman Gushchin }
15076f969e8SRoman Gushchin 
15176f969e8SRoman Gushchin /*
15276f969e8SRoman Gushchin  * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
15376f969e8SRoman Gushchin  * jobctl bit.
15476f969e8SRoman Gushchin  */
cgroup_freeze_task(struct task_struct * task,bool freeze)15576f969e8SRoman Gushchin static void cgroup_freeze_task(struct task_struct *task, bool freeze)
15676f969e8SRoman Gushchin {
15776f969e8SRoman Gushchin 	unsigned long flags;
15876f969e8SRoman Gushchin 
15976f969e8SRoman Gushchin 	/* If the task is about to die, don't bother with freezing it. */
16076f969e8SRoman Gushchin 	if (!lock_task_sighand(task, &flags))
16176f969e8SRoman Gushchin 		return;
16276f969e8SRoman Gushchin 
16376f969e8SRoman Gushchin 	if (freeze) {
16476f969e8SRoman Gushchin 		task->jobctl |= JOBCTL_TRAP_FREEZE;
16576f969e8SRoman Gushchin 		signal_wake_up(task, false);
16676f969e8SRoman Gushchin 	} else {
16776f969e8SRoman Gushchin 		task->jobctl &= ~JOBCTL_TRAP_FREEZE;
16876f969e8SRoman Gushchin 		wake_up_process(task);
16976f969e8SRoman Gushchin 	}
17076f969e8SRoman Gushchin 
17176f969e8SRoman Gushchin 	unlock_task_sighand(task, &flags);
17276f969e8SRoman Gushchin }
17376f969e8SRoman Gushchin 
17476f969e8SRoman Gushchin /*
17576f969e8SRoman Gushchin  * Freeze or unfreeze all tasks in the given cgroup.
17676f969e8SRoman Gushchin  */
cgroup_do_freeze(struct cgroup * cgrp,bool freeze)17776f969e8SRoman Gushchin static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
17876f969e8SRoman Gushchin {
17976f969e8SRoman Gushchin 	struct css_task_iter it;
18076f969e8SRoman Gushchin 	struct task_struct *task;
18176f969e8SRoman Gushchin 
18276f969e8SRoman Gushchin 	lockdep_assert_held(&cgroup_mutex);
18376f969e8SRoman Gushchin 
18476f969e8SRoman Gushchin 	spin_lock_irq(&css_set_lock);
18576f969e8SRoman Gushchin 	if (freeze)
18676f969e8SRoman Gushchin 		set_bit(CGRP_FREEZE, &cgrp->flags);
18776f969e8SRoman Gushchin 	else
18876f969e8SRoman Gushchin 		clear_bit(CGRP_FREEZE, &cgrp->flags);
18976f969e8SRoman Gushchin 	spin_unlock_irq(&css_set_lock);
19076f969e8SRoman Gushchin 
1914c476d8cSRoman Gushchin 	if (freeze)
1924c476d8cSRoman Gushchin 		TRACE_CGROUP_PATH(freeze, cgrp);
1934c476d8cSRoman Gushchin 	else
1944c476d8cSRoman Gushchin 		TRACE_CGROUP_PATH(unfreeze, cgrp);
1954c476d8cSRoman Gushchin 
19676f969e8SRoman Gushchin 	css_task_iter_start(&cgrp->self, 0, &it);
19776f969e8SRoman Gushchin 	while ((task = css_task_iter_next(&it))) {
19876f969e8SRoman Gushchin 		/*
19976f969e8SRoman Gushchin 		 * Ignore kernel threads here. Freezing cgroups containing
20076f969e8SRoman Gushchin 		 * kthreads isn't supported.
20176f969e8SRoman Gushchin 		 */
20276f969e8SRoman Gushchin 		if (task->flags & PF_KTHREAD)
20376f969e8SRoman Gushchin 			continue;
20476f969e8SRoman Gushchin 		cgroup_freeze_task(task, freeze);
20576f969e8SRoman Gushchin 	}
20676f969e8SRoman Gushchin 	css_task_iter_end(&it);
20776f969e8SRoman Gushchin 
20876f969e8SRoman Gushchin 	/*
20976f969e8SRoman Gushchin 	 * Cgroup state should be revisited here to cover empty leaf cgroups
21076f969e8SRoman Gushchin 	 * and cgroups which descendants are already in the desired state.
21176f969e8SRoman Gushchin 	 */
21276f969e8SRoman Gushchin 	spin_lock_irq(&css_set_lock);
21376f969e8SRoman Gushchin 	if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
21476f969e8SRoman Gushchin 		cgroup_update_frozen(cgrp);
21576f969e8SRoman Gushchin 	spin_unlock_irq(&css_set_lock);
21676f969e8SRoman Gushchin }
21776f969e8SRoman Gushchin 
21876f969e8SRoman Gushchin /*
21976f969e8SRoman Gushchin  * Adjust the task state (freeze or unfreeze) and revisit the state of
22076f969e8SRoman Gushchin  * source and destination cgroups.
22176f969e8SRoman Gushchin  */
cgroup_freezer_migrate_task(struct task_struct * task,struct cgroup * src,struct cgroup * dst)22276f969e8SRoman Gushchin void cgroup_freezer_migrate_task(struct task_struct *task,
22376f969e8SRoman Gushchin 				 struct cgroup *src, struct cgroup *dst)
22476f969e8SRoman Gushchin {
22576f969e8SRoman Gushchin 	lockdep_assert_held(&css_set_lock);
22676f969e8SRoman Gushchin 
22776f969e8SRoman Gushchin 	/*
22876f969e8SRoman Gushchin 	 * Kernel threads are not supposed to be frozen at all.
22976f969e8SRoman Gushchin 	 */
23076f969e8SRoman Gushchin 	if (task->flags & PF_KTHREAD)
23176f969e8SRoman Gushchin 		return;
23276f969e8SRoman Gushchin 
23376f969e8SRoman Gushchin 	/*
234*742e8cd3SHonglei Wang 	 * It's not necessary to do changes if both of the src and dst cgroups
235*742e8cd3SHonglei Wang 	 * are not freezing and task is not frozen.
236*742e8cd3SHonglei Wang 	 */
237*742e8cd3SHonglei Wang 	if (!test_bit(CGRP_FREEZE, &src->flags) &&
238*742e8cd3SHonglei Wang 	    !test_bit(CGRP_FREEZE, &dst->flags) &&
239*742e8cd3SHonglei Wang 	    !task->frozen)
240*742e8cd3SHonglei Wang 		return;
241*742e8cd3SHonglei Wang 
242*742e8cd3SHonglei Wang 	/*
24376f969e8SRoman Gushchin 	 * Adjust counters of freezing and frozen tasks.
24476f969e8SRoman Gushchin 	 * Note, that if the task is frozen, but the destination cgroup is not
24576f969e8SRoman Gushchin 	 * frozen, we bump both counters to keep them balanced.
24676f969e8SRoman Gushchin 	 */
24776f969e8SRoman Gushchin 	if (task->frozen) {
24876f969e8SRoman Gushchin 		cgroup_inc_frozen_cnt(dst);
24976f969e8SRoman Gushchin 		cgroup_dec_frozen_cnt(src);
25076f969e8SRoman Gushchin 	}
25176f969e8SRoman Gushchin 	cgroup_update_frozen(dst);
25276f969e8SRoman Gushchin 	cgroup_update_frozen(src);
25376f969e8SRoman Gushchin 
25476f969e8SRoman Gushchin 	/*
25576f969e8SRoman Gushchin 	 * Force the task to the desired state.
25676f969e8SRoman Gushchin 	 */
25776f969e8SRoman Gushchin 	cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
25876f969e8SRoman Gushchin }
25976f969e8SRoman Gushchin 
cgroup_freeze(struct cgroup * cgrp,bool freeze)26076f969e8SRoman Gushchin void cgroup_freeze(struct cgroup *cgrp, bool freeze)
26176f969e8SRoman Gushchin {
26276f969e8SRoman Gushchin 	struct cgroup_subsys_state *css;
26376f969e8SRoman Gushchin 	struct cgroup *dsct;
26476f969e8SRoman Gushchin 	bool applied = false;
26576f969e8SRoman Gushchin 
26676f969e8SRoman Gushchin 	lockdep_assert_held(&cgroup_mutex);
26776f969e8SRoman Gushchin 
26876f969e8SRoman Gushchin 	/*
26976f969e8SRoman Gushchin 	 * Nothing changed? Just exit.
27076f969e8SRoman Gushchin 	 */
27176f969e8SRoman Gushchin 	if (cgrp->freezer.freeze == freeze)
27276f969e8SRoman Gushchin 		return;
27376f969e8SRoman Gushchin 
27476f969e8SRoman Gushchin 	cgrp->freezer.freeze = freeze;
27576f969e8SRoman Gushchin 
27676f969e8SRoman Gushchin 	/*
27776f969e8SRoman Gushchin 	 * Propagate changes downwards the cgroup tree.
27876f969e8SRoman Gushchin 	 */
27976f969e8SRoman Gushchin 	css_for_each_descendant_pre(css, &cgrp->self) {
28076f969e8SRoman Gushchin 		dsct = css->cgroup;
28176f969e8SRoman Gushchin 
28276f969e8SRoman Gushchin 		if (cgroup_is_dead(dsct))
28376f969e8SRoman Gushchin 			continue;
28476f969e8SRoman Gushchin 
28576f969e8SRoman Gushchin 		if (freeze) {
28676f969e8SRoman Gushchin 			dsct->freezer.e_freeze++;
28776f969e8SRoman Gushchin 			/*
28876f969e8SRoman Gushchin 			 * Already frozen because of ancestor's settings?
28976f969e8SRoman Gushchin 			 */
29076f969e8SRoman Gushchin 			if (dsct->freezer.e_freeze > 1)
29176f969e8SRoman Gushchin 				continue;
29276f969e8SRoman Gushchin 		} else {
29376f969e8SRoman Gushchin 			dsct->freezer.e_freeze--;
29476f969e8SRoman Gushchin 			/*
29576f969e8SRoman Gushchin 			 * Still frozen because of ancestor's settings?
29676f969e8SRoman Gushchin 			 */
29776f969e8SRoman Gushchin 			if (dsct->freezer.e_freeze > 0)
29876f969e8SRoman Gushchin 				continue;
29976f969e8SRoman Gushchin 
30076f969e8SRoman Gushchin 			WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
30176f969e8SRoman Gushchin 		}
30276f969e8SRoman Gushchin 
30376f969e8SRoman Gushchin 		/*
30476f969e8SRoman Gushchin 		 * Do change actual state: freeze or unfreeze.
30576f969e8SRoman Gushchin 		 */
30676f969e8SRoman Gushchin 		cgroup_do_freeze(dsct, freeze);
30776f969e8SRoman Gushchin 		applied = true;
30876f969e8SRoman Gushchin 	}
30976f969e8SRoman Gushchin 
31076f969e8SRoman Gushchin 	/*
31176f969e8SRoman Gushchin 	 * Even if the actual state hasn't changed, let's notify a user.
31276f969e8SRoman Gushchin 	 * The state can be enforced by an ancestor cgroup: the cgroup
31376f969e8SRoman Gushchin 	 * can already be in the desired state or it can be locked in the
31476f969e8SRoman Gushchin 	 * opposite state, so that the transition will never happen.
31576f969e8SRoman Gushchin 	 * In both cases it's better to notify a user, that there is
31676f969e8SRoman Gushchin 	 * nothing to wait for.
31776f969e8SRoman Gushchin 	 */
3184c476d8cSRoman Gushchin 	if (!applied) {
3194c476d8cSRoman Gushchin 		TRACE_CGROUP_PATH(notify_frozen, cgrp,
3204c476d8cSRoman Gushchin 				  test_bit(CGRP_FROZEN, &cgrp->flags));
32176f969e8SRoman Gushchin 		cgroup_file_notify(&cgrp->events_file);
32276f969e8SRoman Gushchin 	}
3234c476d8cSRoman Gushchin }
324