176f969e8SRoman Gushchin // SPDX-License-Identifier: GPL-2.0
276f969e8SRoman Gushchin #include <linux/cgroup.h>
376f969e8SRoman Gushchin #include <linux/sched.h>
476f969e8SRoman Gushchin #include <linux/sched/task.h>
576f969e8SRoman Gushchin #include <linux/sched/signal.h>
676f969e8SRoman Gushchin
776f969e8SRoman Gushchin #include "cgroup-internal.h"
876f969e8SRoman Gushchin
94c476d8cSRoman Gushchin #include <trace/events/cgroup.h>
104c476d8cSRoman Gushchin
1176f969e8SRoman Gushchin /*
1276f969e8SRoman Gushchin * Propagate the cgroup frozen state upwards by the cgroup tree.
1376f969e8SRoman Gushchin */
cgroup_propagate_frozen(struct cgroup * cgrp,bool frozen)1476f969e8SRoman Gushchin static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
1576f969e8SRoman Gushchin {
1676f969e8SRoman Gushchin int desc = 1;
1776f969e8SRoman Gushchin
1876f969e8SRoman Gushchin /*
1976f969e8SRoman Gushchin * If the new state is frozen, some freezing ancestor cgroups may change
2076f969e8SRoman Gushchin * their state too, depending on if all their descendants are frozen.
2176f969e8SRoman Gushchin *
2276f969e8SRoman Gushchin * Otherwise, all ancestor cgroups are forced into the non-frozen state.
2376f969e8SRoman Gushchin */
2476f969e8SRoman Gushchin while ((cgrp = cgroup_parent(cgrp))) {
2576f969e8SRoman Gushchin if (frozen) {
2676f969e8SRoman Gushchin cgrp->freezer.nr_frozen_descendants += desc;
2776f969e8SRoman Gushchin if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
2876f969e8SRoman Gushchin test_bit(CGRP_FREEZE, &cgrp->flags) &&
2976f969e8SRoman Gushchin cgrp->freezer.nr_frozen_descendants ==
3076f969e8SRoman Gushchin cgrp->nr_descendants) {
3176f969e8SRoman Gushchin set_bit(CGRP_FROZEN, &cgrp->flags);
3276f969e8SRoman Gushchin cgroup_file_notify(&cgrp->events_file);
334c476d8cSRoman Gushchin TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
3476f969e8SRoman Gushchin desc++;
3576f969e8SRoman Gushchin }
3676f969e8SRoman Gushchin } else {
3776f969e8SRoman Gushchin cgrp->freezer.nr_frozen_descendants -= desc;
3876f969e8SRoman Gushchin if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
3976f969e8SRoman Gushchin clear_bit(CGRP_FROZEN, &cgrp->flags);
4076f969e8SRoman Gushchin cgroup_file_notify(&cgrp->events_file);
414c476d8cSRoman Gushchin TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
4276f969e8SRoman Gushchin desc++;
4376f969e8SRoman Gushchin }
4476f969e8SRoman Gushchin }
4576f969e8SRoman Gushchin }
4676f969e8SRoman Gushchin }
4776f969e8SRoman Gushchin
4876f969e8SRoman Gushchin /*
4976f969e8SRoman Gushchin * Revisit the cgroup frozen state.
5076f969e8SRoman Gushchin * Checks if the cgroup is really frozen and perform all state transitions.
5176f969e8SRoman Gushchin */
cgroup_update_frozen(struct cgroup * cgrp)5276f969e8SRoman Gushchin void cgroup_update_frozen(struct cgroup *cgrp)
5376f969e8SRoman Gushchin {
5476f969e8SRoman Gushchin bool frozen;
5576f969e8SRoman Gushchin
5676f969e8SRoman Gushchin lockdep_assert_held(&css_set_lock);
5776f969e8SRoman Gushchin
5876f969e8SRoman Gushchin /*
5976f969e8SRoman Gushchin * If the cgroup has to be frozen (CGRP_FREEZE bit set),
6076f969e8SRoman Gushchin * and all tasks are frozen and/or stopped, let's consider
6176f969e8SRoman Gushchin * the cgroup frozen. Otherwise it's not frozen.
6276f969e8SRoman Gushchin */
6376f969e8SRoman Gushchin frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
6476f969e8SRoman Gushchin cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
6576f969e8SRoman Gushchin
6676f969e8SRoman Gushchin if (frozen) {
6776f969e8SRoman Gushchin /* Already there? */
6876f969e8SRoman Gushchin if (test_bit(CGRP_FROZEN, &cgrp->flags))
6976f969e8SRoman Gushchin return;
7076f969e8SRoman Gushchin
7176f969e8SRoman Gushchin set_bit(CGRP_FROZEN, &cgrp->flags);
7276f969e8SRoman Gushchin } else {
7376f969e8SRoman Gushchin /* Already there? */
7476f969e8SRoman Gushchin if (!test_bit(CGRP_FROZEN, &cgrp->flags))
7576f969e8SRoman Gushchin return;
7676f969e8SRoman Gushchin
7776f969e8SRoman Gushchin clear_bit(CGRP_FROZEN, &cgrp->flags);
7876f969e8SRoman Gushchin }
7976f969e8SRoman Gushchin cgroup_file_notify(&cgrp->events_file);
804c476d8cSRoman Gushchin TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
8176f969e8SRoman Gushchin
8276f969e8SRoman Gushchin /* Update the state of ancestor cgroups. */
8376f969e8SRoman Gushchin cgroup_propagate_frozen(cgrp, frozen);
8476f969e8SRoman Gushchin }
8576f969e8SRoman Gushchin
8676f969e8SRoman Gushchin /*
8776f969e8SRoman Gushchin * Increment cgroup's nr_frozen_tasks.
8876f969e8SRoman Gushchin */
cgroup_inc_frozen_cnt(struct cgroup * cgrp)8976f969e8SRoman Gushchin static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
9076f969e8SRoman Gushchin {
9176f969e8SRoman Gushchin cgrp->freezer.nr_frozen_tasks++;
9276f969e8SRoman Gushchin }
9376f969e8SRoman Gushchin
9476f969e8SRoman Gushchin /*
9576f969e8SRoman Gushchin * Decrement cgroup's nr_frozen_tasks.
9676f969e8SRoman Gushchin */
cgroup_dec_frozen_cnt(struct cgroup * cgrp)9776f969e8SRoman Gushchin static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
9876f969e8SRoman Gushchin {
9976f969e8SRoman Gushchin cgrp->freezer.nr_frozen_tasks--;
10076f969e8SRoman Gushchin WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
10176f969e8SRoman Gushchin }
10276f969e8SRoman Gushchin
10376f969e8SRoman Gushchin /*
10476f969e8SRoman Gushchin * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
10576f969e8SRoman Gushchin * and revisit the state of the cgroup, if necessary.
10676f969e8SRoman Gushchin */
cgroup_enter_frozen(void)10776f969e8SRoman Gushchin void cgroup_enter_frozen(void)
10876f969e8SRoman Gushchin {
10976f969e8SRoman Gushchin struct cgroup *cgrp;
11076f969e8SRoman Gushchin
11176f969e8SRoman Gushchin if (current->frozen)
11276f969e8SRoman Gushchin return;
11376f969e8SRoman Gushchin
11476f969e8SRoman Gushchin spin_lock_irq(&css_set_lock);
11576f969e8SRoman Gushchin current->frozen = true;
11676f969e8SRoman Gushchin cgrp = task_dfl_cgroup(current);
11776f969e8SRoman Gushchin cgroup_inc_frozen_cnt(cgrp);
11876f969e8SRoman Gushchin cgroup_update_frozen(cgrp);
11976f969e8SRoman Gushchin spin_unlock_irq(&css_set_lock);
12076f969e8SRoman Gushchin }
12176f969e8SRoman Gushchin
12276f969e8SRoman Gushchin /*
12376f969e8SRoman Gushchin * Conditionally leave frozen/stopped state. Update cgroup's counters,
12476f969e8SRoman Gushchin * and revisit the state of the cgroup, if necessary.
12576f969e8SRoman Gushchin *
12676f969e8SRoman Gushchin * If always_leave is not set, and the cgroup is freezing,
12776f969e8SRoman Gushchin * we're racing with the cgroup freezing. In this case, we don't
12876f969e8SRoman Gushchin * drop the frozen counter to avoid a transient switch to
12976f969e8SRoman Gushchin * the unfrozen state.
13076f969e8SRoman Gushchin */
cgroup_leave_frozen(bool always_leave)13176f969e8SRoman Gushchin void cgroup_leave_frozen(bool always_leave)
13276f969e8SRoman Gushchin {
13376f969e8SRoman Gushchin struct cgroup *cgrp;
13476f969e8SRoman Gushchin
13576f969e8SRoman Gushchin spin_lock_irq(&css_set_lock);
13676f969e8SRoman Gushchin cgrp = task_dfl_cgroup(current);
13776f969e8SRoman Gushchin if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
13876f969e8SRoman Gushchin cgroup_dec_frozen_cnt(cgrp);
13976f969e8SRoman Gushchin cgroup_update_frozen(cgrp);
14076f969e8SRoman Gushchin WARN_ON_ONCE(!current->frozen);
14176f969e8SRoman Gushchin current->frozen = false;
142cb2c4cd8SRoman Gushchin } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
143cb2c4cd8SRoman Gushchin spin_lock(¤t->sighand->siglock);
144cb2c4cd8SRoman Gushchin current->jobctl |= JOBCTL_TRAP_FREEZE;
145cb2c4cd8SRoman Gushchin set_thread_flag(TIF_SIGPENDING);
146cb2c4cd8SRoman Gushchin spin_unlock(¤t->sighand->siglock);
14776f969e8SRoman Gushchin }
14876f969e8SRoman Gushchin spin_unlock_irq(&css_set_lock);
14976f969e8SRoman Gushchin }
15076f969e8SRoman Gushchin
15176f969e8SRoman Gushchin /*
15276f969e8SRoman Gushchin * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
15376f969e8SRoman Gushchin * jobctl bit.
15476f969e8SRoman Gushchin */
cgroup_freeze_task(struct task_struct * task,bool freeze)15576f969e8SRoman Gushchin static void cgroup_freeze_task(struct task_struct *task, bool freeze)
15676f969e8SRoman Gushchin {
15776f969e8SRoman Gushchin unsigned long flags;
15876f969e8SRoman Gushchin
15976f969e8SRoman Gushchin /* If the task is about to die, don't bother with freezing it. */
16076f969e8SRoman Gushchin if (!lock_task_sighand(task, &flags))
16176f969e8SRoman Gushchin return;
16276f969e8SRoman Gushchin
16376f969e8SRoman Gushchin if (freeze) {
16476f969e8SRoman Gushchin task->jobctl |= JOBCTL_TRAP_FREEZE;
16576f969e8SRoman Gushchin signal_wake_up(task, false);
16676f969e8SRoman Gushchin } else {
16776f969e8SRoman Gushchin task->jobctl &= ~JOBCTL_TRAP_FREEZE;
16876f969e8SRoman Gushchin wake_up_process(task);
16976f969e8SRoman Gushchin }
17076f969e8SRoman Gushchin
17176f969e8SRoman Gushchin unlock_task_sighand(task, &flags);
17276f969e8SRoman Gushchin }
17376f969e8SRoman Gushchin
17476f969e8SRoman Gushchin /*
17576f969e8SRoman Gushchin * Freeze or unfreeze all tasks in the given cgroup.
17676f969e8SRoman Gushchin */
cgroup_do_freeze(struct cgroup * cgrp,bool freeze)17776f969e8SRoman Gushchin static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
17876f969e8SRoman Gushchin {
17976f969e8SRoman Gushchin struct css_task_iter it;
18076f969e8SRoman Gushchin struct task_struct *task;
18176f969e8SRoman Gushchin
18276f969e8SRoman Gushchin lockdep_assert_held(&cgroup_mutex);
18376f969e8SRoman Gushchin
18476f969e8SRoman Gushchin spin_lock_irq(&css_set_lock);
18576f969e8SRoman Gushchin if (freeze)
18676f969e8SRoman Gushchin set_bit(CGRP_FREEZE, &cgrp->flags);
18776f969e8SRoman Gushchin else
18876f969e8SRoman Gushchin clear_bit(CGRP_FREEZE, &cgrp->flags);
18976f969e8SRoman Gushchin spin_unlock_irq(&css_set_lock);
19076f969e8SRoman Gushchin
1914c476d8cSRoman Gushchin if (freeze)
1924c476d8cSRoman Gushchin TRACE_CGROUP_PATH(freeze, cgrp);
1934c476d8cSRoman Gushchin else
1944c476d8cSRoman Gushchin TRACE_CGROUP_PATH(unfreeze, cgrp);
1954c476d8cSRoman Gushchin
19676f969e8SRoman Gushchin css_task_iter_start(&cgrp->self, 0, &it);
19776f969e8SRoman Gushchin while ((task = css_task_iter_next(&it))) {
19876f969e8SRoman Gushchin /*
19976f969e8SRoman Gushchin * Ignore kernel threads here. Freezing cgroups containing
20076f969e8SRoman Gushchin * kthreads isn't supported.
20176f969e8SRoman Gushchin */
20276f969e8SRoman Gushchin if (task->flags & PF_KTHREAD)
20376f969e8SRoman Gushchin continue;
20476f969e8SRoman Gushchin cgroup_freeze_task(task, freeze);
20576f969e8SRoman Gushchin }
20676f969e8SRoman Gushchin css_task_iter_end(&it);
20776f969e8SRoman Gushchin
20876f969e8SRoman Gushchin /*
20976f969e8SRoman Gushchin * Cgroup state should be revisited here to cover empty leaf cgroups
21076f969e8SRoman Gushchin * and cgroups which descendants are already in the desired state.
21176f969e8SRoman Gushchin */
21276f969e8SRoman Gushchin spin_lock_irq(&css_set_lock);
21376f969e8SRoman Gushchin if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
21476f969e8SRoman Gushchin cgroup_update_frozen(cgrp);
21576f969e8SRoman Gushchin spin_unlock_irq(&css_set_lock);
21676f969e8SRoman Gushchin }
21776f969e8SRoman Gushchin
21876f969e8SRoman Gushchin /*
21976f969e8SRoman Gushchin * Adjust the task state (freeze or unfreeze) and revisit the state of
22076f969e8SRoman Gushchin * source and destination cgroups.
22176f969e8SRoman Gushchin */
cgroup_freezer_migrate_task(struct task_struct * task,struct cgroup * src,struct cgroup * dst)22276f969e8SRoman Gushchin void cgroup_freezer_migrate_task(struct task_struct *task,
22376f969e8SRoman Gushchin struct cgroup *src, struct cgroup *dst)
22476f969e8SRoman Gushchin {
22576f969e8SRoman Gushchin lockdep_assert_held(&css_set_lock);
22676f969e8SRoman Gushchin
22776f969e8SRoman Gushchin /*
22876f969e8SRoman Gushchin * Kernel threads are not supposed to be frozen at all.
22976f969e8SRoman Gushchin */
23076f969e8SRoman Gushchin if (task->flags & PF_KTHREAD)
23176f969e8SRoman Gushchin return;
23276f969e8SRoman Gushchin
23376f969e8SRoman Gushchin /*
234*742e8cd3SHonglei Wang * It's not necessary to do changes if both of the src and dst cgroups
235*742e8cd3SHonglei Wang * are not freezing and task is not frozen.
236*742e8cd3SHonglei Wang */
237*742e8cd3SHonglei Wang if (!test_bit(CGRP_FREEZE, &src->flags) &&
238*742e8cd3SHonglei Wang !test_bit(CGRP_FREEZE, &dst->flags) &&
239*742e8cd3SHonglei Wang !task->frozen)
240*742e8cd3SHonglei Wang return;
241*742e8cd3SHonglei Wang
242*742e8cd3SHonglei Wang /*
24376f969e8SRoman Gushchin * Adjust counters of freezing and frozen tasks.
24476f969e8SRoman Gushchin * Note, that if the task is frozen, but the destination cgroup is not
24576f969e8SRoman Gushchin * frozen, we bump both counters to keep them balanced.
24676f969e8SRoman Gushchin */
24776f969e8SRoman Gushchin if (task->frozen) {
24876f969e8SRoman Gushchin cgroup_inc_frozen_cnt(dst);
24976f969e8SRoman Gushchin cgroup_dec_frozen_cnt(src);
25076f969e8SRoman Gushchin }
25176f969e8SRoman Gushchin cgroup_update_frozen(dst);
25276f969e8SRoman Gushchin cgroup_update_frozen(src);
25376f969e8SRoman Gushchin
25476f969e8SRoman Gushchin /*
25576f969e8SRoman Gushchin * Force the task to the desired state.
25676f969e8SRoman Gushchin */
25776f969e8SRoman Gushchin cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
25876f969e8SRoman Gushchin }
25976f969e8SRoman Gushchin
cgroup_freeze(struct cgroup * cgrp,bool freeze)26076f969e8SRoman Gushchin void cgroup_freeze(struct cgroup *cgrp, bool freeze)
26176f969e8SRoman Gushchin {
26276f969e8SRoman Gushchin struct cgroup_subsys_state *css;
26376f969e8SRoman Gushchin struct cgroup *dsct;
26476f969e8SRoman Gushchin bool applied = false;
26576f969e8SRoman Gushchin
26676f969e8SRoman Gushchin lockdep_assert_held(&cgroup_mutex);
26776f969e8SRoman Gushchin
26876f969e8SRoman Gushchin /*
26976f969e8SRoman Gushchin * Nothing changed? Just exit.
27076f969e8SRoman Gushchin */
27176f969e8SRoman Gushchin if (cgrp->freezer.freeze == freeze)
27276f969e8SRoman Gushchin return;
27376f969e8SRoman Gushchin
27476f969e8SRoman Gushchin cgrp->freezer.freeze = freeze;
27576f969e8SRoman Gushchin
27676f969e8SRoman Gushchin /*
27776f969e8SRoman Gushchin * Propagate changes downwards the cgroup tree.
27876f969e8SRoman Gushchin */
27976f969e8SRoman Gushchin css_for_each_descendant_pre(css, &cgrp->self) {
28076f969e8SRoman Gushchin dsct = css->cgroup;
28176f969e8SRoman Gushchin
28276f969e8SRoman Gushchin if (cgroup_is_dead(dsct))
28376f969e8SRoman Gushchin continue;
28476f969e8SRoman Gushchin
28576f969e8SRoman Gushchin if (freeze) {
28676f969e8SRoman Gushchin dsct->freezer.e_freeze++;
28776f969e8SRoman Gushchin /*
28876f969e8SRoman Gushchin * Already frozen because of ancestor's settings?
28976f969e8SRoman Gushchin */
29076f969e8SRoman Gushchin if (dsct->freezer.e_freeze > 1)
29176f969e8SRoman Gushchin continue;
29276f969e8SRoman Gushchin } else {
29376f969e8SRoman Gushchin dsct->freezer.e_freeze--;
29476f969e8SRoman Gushchin /*
29576f969e8SRoman Gushchin * Still frozen because of ancestor's settings?
29676f969e8SRoman Gushchin */
29776f969e8SRoman Gushchin if (dsct->freezer.e_freeze > 0)
29876f969e8SRoman Gushchin continue;
29976f969e8SRoman Gushchin
30076f969e8SRoman Gushchin WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
30176f969e8SRoman Gushchin }
30276f969e8SRoman Gushchin
30376f969e8SRoman Gushchin /*
30476f969e8SRoman Gushchin * Do change actual state: freeze or unfreeze.
30576f969e8SRoman Gushchin */
30676f969e8SRoman Gushchin cgroup_do_freeze(dsct, freeze);
30776f969e8SRoman Gushchin applied = true;
30876f969e8SRoman Gushchin }
30976f969e8SRoman Gushchin
31076f969e8SRoman Gushchin /*
31176f969e8SRoman Gushchin * Even if the actual state hasn't changed, let's notify a user.
31276f969e8SRoman Gushchin * The state can be enforced by an ancestor cgroup: the cgroup
31376f969e8SRoman Gushchin * can already be in the desired state or it can be locked in the
31476f969e8SRoman Gushchin * opposite state, so that the transition will never happen.
31576f969e8SRoman Gushchin * In both cases it's better to notify a user, that there is
31676f969e8SRoman Gushchin * nothing to wait for.
31776f969e8SRoman Gushchin */
3184c476d8cSRoman Gushchin if (!applied) {
3194c476d8cSRoman Gushchin TRACE_CGROUP_PATH(notify_frozen, cgrp,
3204c476d8cSRoman Gushchin test_bit(CGRP_FROZEN, &cgrp->flags));
32176f969e8SRoman Gushchin cgroup_file_notify(&cgrp->events_file);
32276f969e8SRoman Gushchin }
3234c476d8cSRoman Gushchin }
324