xref: /linux/kernel/cgroup/freezer.c (revision c8b90d40d5bba8e6fba457b8a7c10d3c0d467e37)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/cgroup.h>
3 #include <linux/sched.h>
4 #include <linux/sched/task.h>
5 #include <linux/sched/signal.h>
6 
7 #include "cgroup-internal.h"
8 
9 #include <trace/events/cgroup.h>
10 
11 /*
12  * Update CGRP_FROZEN of cgroup.flag
13  * Return true if flags is updated; false if flags has no change
14  */
15 static bool cgroup_update_frozen_flag(struct cgroup *cgrp, bool frozen)
16 {
17 	lockdep_assert_held(&css_set_lock);
18 
19 	/* Already there? */
20 	if (test_bit(CGRP_FROZEN, &cgrp->flags) == frozen)
21 		return false;
22 
23 	if (frozen)
24 		set_bit(CGRP_FROZEN, &cgrp->flags);
25 	else
26 		clear_bit(CGRP_FROZEN, &cgrp->flags);
27 
28 	cgroup_file_notify(&cgrp->events_file);
29 	TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
30 	return true;
31 }
32 
33 /*
34  * Propagate the cgroup frozen state upwards by the cgroup tree.
35  */
36 static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
37 {
38 	int desc = 1;
39 
40 	/*
41 	 * If the new state is frozen, some freezing ancestor cgroups may change
42 	 * their state too, depending on if all their descendants are frozen.
43 	 *
44 	 * Otherwise, all ancestor cgroups are forced into the non-frozen state.
45 	 */
46 	while ((cgrp = cgroup_parent(cgrp))) {
47 		if (frozen) {
48 			cgrp->freezer.nr_frozen_descendants += desc;
49 			if (!test_bit(CGRP_FREEZE, &cgrp->flags) ||
50 			    (cgrp->freezer.nr_frozen_descendants !=
51 			    cgrp->nr_descendants))
52 				continue;
53 		} else {
54 			cgrp->freezer.nr_frozen_descendants -= desc;
55 		}
56 
57 		if (cgroup_update_frozen_flag(cgrp, frozen))
58 			desc++;
59 	}
60 }
61 
62 /*
63  * Revisit the cgroup frozen state.
64  * Checks if the cgroup is really frozen and perform all state transitions.
65  */
66 void cgroup_update_frozen(struct cgroup *cgrp)
67 {
68 	bool frozen;
69 
70 	/*
71 	 * If the cgroup has to be frozen (CGRP_FREEZE bit set),
72 	 * and all tasks are frozen and/or stopped, let's consider
73 	 * the cgroup frozen. Otherwise it's not frozen.
74 	 */
75 	frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
76 		cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
77 
78 	/* If flags is updated, update the state of ancestor cgroups. */
79 	if (cgroup_update_frozen_flag(cgrp, frozen))
80 		cgroup_propagate_frozen(cgrp, frozen);
81 }
82 
83 /*
84  * Increment cgroup's nr_frozen_tasks.
85  */
86 static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
87 {
88 	cgrp->freezer.nr_frozen_tasks++;
89 }
90 
91 /*
92  * Decrement cgroup's nr_frozen_tasks.
93  */
94 static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
95 {
96 	cgrp->freezer.nr_frozen_tasks--;
97 	WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
98 }
99 
100 /*
101  * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
102  * and revisit the state of the cgroup, if necessary.
103  */
104 void cgroup_enter_frozen(void)
105 {
106 	struct cgroup *cgrp;
107 
108 	if (current->frozen)
109 		return;
110 
111 	spin_lock_irq(&css_set_lock);
112 	current->frozen = true;
113 	cgrp = task_dfl_cgroup(current);
114 	cgroup_inc_frozen_cnt(cgrp);
115 	cgroup_update_frozen(cgrp);
116 	spin_unlock_irq(&css_set_lock);
117 }
118 
119 /*
120  * Conditionally leave frozen/stopped state. Update cgroup's counters,
121  * and revisit the state of the cgroup, if necessary.
122  *
123  * If always_leave is not set, and the cgroup is freezing,
124  * we're racing with the cgroup freezing. In this case, we don't
125  * drop the frozen counter to avoid a transient switch to
126  * the unfrozen state.
127  */
128 void cgroup_leave_frozen(bool always_leave)
129 {
130 	struct cgroup *cgrp;
131 
132 	spin_lock_irq(&css_set_lock);
133 	cgrp = task_dfl_cgroup(current);
134 	if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
135 		cgroup_dec_frozen_cnt(cgrp);
136 		cgroup_update_frozen(cgrp);
137 		WARN_ON_ONCE(!current->frozen);
138 		current->frozen = false;
139 	} else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
140 		spin_lock(&current->sighand->siglock);
141 		current->jobctl |= JOBCTL_TRAP_FREEZE;
142 		set_thread_flag(TIF_SIGPENDING);
143 		spin_unlock(&current->sighand->siglock);
144 	}
145 	spin_unlock_irq(&css_set_lock);
146 }
147 
148 /*
149  * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
150  * jobctl bit.
151  */
152 static void cgroup_freeze_task(struct task_struct *task, bool freeze)
153 {
154 	unsigned long flags;
155 
156 	/* If the task is about to die, don't bother with freezing it. */
157 	if (!lock_task_sighand(task, &flags))
158 		return;
159 
160 	if (freeze) {
161 		task->jobctl |= JOBCTL_TRAP_FREEZE;
162 		signal_wake_up(task, false);
163 	} else {
164 		task->jobctl &= ~JOBCTL_TRAP_FREEZE;
165 		wake_up_process(task);
166 	}
167 
168 	unlock_task_sighand(task, &flags);
169 }
170 
171 /*
172  * Freeze or unfreeze all tasks in the given cgroup.
173  */
174 static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
175 {
176 	struct css_task_iter it;
177 	struct task_struct *task;
178 
179 	lockdep_assert_held(&cgroup_mutex);
180 
181 	spin_lock_irq(&css_set_lock);
182 	if (freeze)
183 		set_bit(CGRP_FREEZE, &cgrp->flags);
184 	else
185 		clear_bit(CGRP_FREEZE, &cgrp->flags);
186 	spin_unlock_irq(&css_set_lock);
187 
188 	if (freeze)
189 		TRACE_CGROUP_PATH(freeze, cgrp);
190 	else
191 		TRACE_CGROUP_PATH(unfreeze, cgrp);
192 
193 	css_task_iter_start(&cgrp->self, 0, &it);
194 	while ((task = css_task_iter_next(&it))) {
195 		/*
196 		 * Ignore kernel threads here. Freezing cgroups containing
197 		 * kthreads isn't supported.
198 		 */
199 		if (task->flags & PF_KTHREAD)
200 			continue;
201 		cgroup_freeze_task(task, freeze);
202 	}
203 	css_task_iter_end(&it);
204 
205 	/*
206 	 * Cgroup state should be revisited here to cover empty leaf cgroups
207 	 * and cgroups which descendants are already in the desired state.
208 	 */
209 	spin_lock_irq(&css_set_lock);
210 	if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
211 		cgroup_update_frozen(cgrp);
212 	spin_unlock_irq(&css_set_lock);
213 }
214 
215 /*
216  * Adjust the task state (freeze or unfreeze) and revisit the state of
217  * source and destination cgroups.
218  */
219 void cgroup_freezer_migrate_task(struct task_struct *task,
220 				 struct cgroup *src, struct cgroup *dst)
221 {
222 	lockdep_assert_held(&css_set_lock);
223 
224 	/*
225 	 * Kernel threads are not supposed to be frozen at all.
226 	 */
227 	if (task->flags & PF_KTHREAD)
228 		return;
229 
230 	/*
231 	 * It's not necessary to do changes if both of the src and dst cgroups
232 	 * are not freezing and task is not frozen.
233 	 */
234 	if (!test_bit(CGRP_FREEZE, &src->flags) &&
235 	    !test_bit(CGRP_FREEZE, &dst->flags) &&
236 	    !task->frozen)
237 		return;
238 
239 	/*
240 	 * Adjust counters of freezing and frozen tasks.
241 	 * Note, that if the task is frozen, but the destination cgroup is not
242 	 * frozen, we bump both counters to keep them balanced.
243 	 */
244 	if (task->frozen) {
245 		cgroup_inc_frozen_cnt(dst);
246 		cgroup_dec_frozen_cnt(src);
247 	}
248 	cgroup_update_frozen(dst);
249 	cgroup_update_frozen(src);
250 
251 	/*
252 	 * Force the task to the desired state.
253 	 */
254 	cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
255 }
256 
257 void cgroup_freeze(struct cgroup *cgrp, bool freeze)
258 {
259 	struct cgroup_subsys_state *css;
260 	struct cgroup *parent;
261 	struct cgroup *dsct;
262 	bool applied = false;
263 	bool old_e;
264 
265 	lockdep_assert_held(&cgroup_mutex);
266 
267 	/*
268 	 * Nothing changed? Just exit.
269 	 */
270 	if (cgrp->freezer.freeze == freeze)
271 		return;
272 
273 	cgrp->freezer.freeze = freeze;
274 
275 	/*
276 	 * Propagate changes downwards the cgroup tree.
277 	 */
278 	css_for_each_descendant_pre(css, &cgrp->self) {
279 		dsct = css->cgroup;
280 
281 		if (cgroup_is_dead(dsct))
282 			continue;
283 
284 		/*
285 		 * e_freeze is affected by parent's e_freeze and dst's freeze.
286 		 * If old e_freeze eq new e_freeze, no change, its children
287 		 * will not be affected. So do nothing and skip the subtree
288 		 */
289 		old_e = dsct->freezer.e_freeze;
290 		parent = cgroup_parent(dsct);
291 		dsct->freezer.e_freeze = (dsct->freezer.freeze ||
292 					  parent->freezer.e_freeze);
293 		if (dsct->freezer.e_freeze == old_e) {
294 			css = css_rightmost_descendant(css);
295 			continue;
296 		}
297 
298 		/*
299 		 * Do change actual state: freeze or unfreeze.
300 		 */
301 		cgroup_do_freeze(dsct, freeze);
302 		applied = true;
303 	}
304 
305 	/*
306 	 * Even if the actual state hasn't changed, let's notify a user.
307 	 * The state can be enforced by an ancestor cgroup: the cgroup
308 	 * can already be in the desired state or it can be locked in the
309 	 * opposite state, so that the transition will never happen.
310 	 * In both cases it's better to notify a user, that there is
311 	 * nothing to wait for.
312 	 */
313 	if (!applied) {
314 		TRACE_CGROUP_PATH(notify_frozen, cgrp,
315 				  test_bit(CGRP_FROZEN, &cgrp->flags));
316 		cgroup_file_notify(&cgrp->events_file);
317 	}
318 }
319