1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/cgroup.h>
3 #include <linux/sched.h>
4 #include <linux/sched/task.h>
5 #include <linux/sched/signal.h>
6
7 #include "cgroup-internal.h"
8
9 #include <trace/events/cgroup.h>
10
11 /*
12 * Update CGRP_FROZEN of cgroup.flag
13 * Return true if flags is updated; false if flags has no change
14 */
cgroup_update_frozen_flag(struct cgroup * cgrp,bool frozen)15 static bool cgroup_update_frozen_flag(struct cgroup *cgrp, bool frozen)
16 {
17 lockdep_assert_held(&css_set_lock);
18
19 /* Already there? */
20 if (test_bit(CGRP_FROZEN, &cgrp->flags) == frozen)
21 return false;
22
23 if (frozen)
24 set_bit(CGRP_FROZEN, &cgrp->flags);
25 else
26 clear_bit(CGRP_FROZEN, &cgrp->flags);
27
28 cgroup_file_notify(&cgrp->events_file);
29 TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
30 return true;
31 }
32
33 /*
34 * Propagate the cgroup frozen state upwards by the cgroup tree.
35 */
cgroup_propagate_frozen(struct cgroup * cgrp,bool frozen)36 static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
37 {
38 int desc = 1;
39
40 /*
41 * If the new state is frozen, some freezing ancestor cgroups may change
42 * their state too, depending on if all their descendants are frozen.
43 *
44 * Otherwise, all ancestor cgroups are forced into the non-frozen state.
45 */
46 while ((cgrp = cgroup_parent(cgrp))) {
47 if (frozen) {
48 cgrp->freezer.nr_frozen_descendants += desc;
49 if (!test_bit(CGRP_FREEZE, &cgrp->flags) ||
50 (cgrp->freezer.nr_frozen_descendants !=
51 cgrp->nr_descendants))
52 continue;
53 } else {
54 cgrp->freezer.nr_frozen_descendants -= desc;
55 }
56
57 if (cgroup_update_frozen_flag(cgrp, frozen))
58 desc++;
59 }
60 }
61
62 /*
63 * Revisit the cgroup frozen state.
64 * Checks if the cgroup is really frozen and perform all state transitions.
65 */
cgroup_update_frozen(struct cgroup * cgrp)66 void cgroup_update_frozen(struct cgroup *cgrp)
67 {
68 bool frozen;
69
70 /*
71 * If the cgroup has to be frozen (CGRP_FREEZE bit set),
72 * and all tasks are frozen and/or stopped, let's consider
73 * the cgroup frozen. Otherwise it's not frozen.
74 */
75 frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
76 cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
77
78 /* If flags is updated, update the state of ancestor cgroups. */
79 if (cgroup_update_frozen_flag(cgrp, frozen))
80 cgroup_propagate_frozen(cgrp, frozen);
81 }
82
83 /*
84 * Increment cgroup's nr_frozen_tasks.
85 */
cgroup_inc_frozen_cnt(struct cgroup * cgrp)86 static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
87 {
88 cgrp->freezer.nr_frozen_tasks++;
89 }
90
91 /*
92 * Decrement cgroup's nr_frozen_tasks.
93 */
cgroup_dec_frozen_cnt(struct cgroup * cgrp)94 static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
95 {
96 cgrp->freezer.nr_frozen_tasks--;
97 WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
98 }
99
100 /*
101 * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
102 * and revisit the state of the cgroup, if necessary.
103 */
cgroup_enter_frozen(void)104 void cgroup_enter_frozen(void)
105 {
106 struct cgroup *cgrp;
107
108 if (current->frozen)
109 return;
110
111 spin_lock_irq(&css_set_lock);
112 current->frozen = true;
113 cgrp = task_dfl_cgroup(current);
114 cgroup_inc_frozen_cnt(cgrp);
115 cgroup_update_frozen(cgrp);
116 spin_unlock_irq(&css_set_lock);
117 }
118
119 /*
120 * Conditionally leave frozen/stopped state. Update cgroup's counters,
121 * and revisit the state of the cgroup, if necessary.
122 *
123 * If always_leave is not set, and the cgroup is freezing,
124 * we're racing with the cgroup freezing. In this case, we don't
125 * drop the frozen counter to avoid a transient switch to
126 * the unfrozen state.
127 */
cgroup_leave_frozen(bool always_leave)128 void cgroup_leave_frozen(bool always_leave)
129 {
130 struct cgroup *cgrp;
131
132 spin_lock_irq(&css_set_lock);
133 cgrp = task_dfl_cgroup(current);
134 if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
135 cgroup_dec_frozen_cnt(cgrp);
136 cgroup_update_frozen(cgrp);
137 WARN_ON_ONCE(!current->frozen);
138 current->frozen = false;
139 } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
140 spin_lock(¤t->sighand->siglock);
141 current->jobctl |= JOBCTL_TRAP_FREEZE;
142 set_thread_flag(TIF_SIGPENDING);
143 spin_unlock(¤t->sighand->siglock);
144 }
145 spin_unlock_irq(&css_set_lock);
146 }
147
148 /*
149 * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
150 * jobctl bit.
151 */
cgroup_freeze_task(struct task_struct * task,bool freeze)152 static void cgroup_freeze_task(struct task_struct *task, bool freeze)
153 {
154 unsigned long flags;
155
156 /* If the task is about to die, don't bother with freezing it. */
157 if (!lock_task_sighand(task, &flags))
158 return;
159
160 if (freeze) {
161 task->jobctl |= JOBCTL_TRAP_FREEZE;
162 signal_wake_up(task, false);
163 } else {
164 task->jobctl &= ~JOBCTL_TRAP_FREEZE;
165 wake_up_process(task);
166 }
167
168 unlock_task_sighand(task, &flags);
169 }
170
171 /*
172 * Freeze or unfreeze all tasks in the given cgroup.
173 */
cgroup_do_freeze(struct cgroup * cgrp,bool freeze,u64 ts_nsec)174 static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze, u64 ts_nsec)
175 {
176 struct css_task_iter it;
177 struct task_struct *task;
178
179 lockdep_assert_held(&cgroup_mutex);
180
181 spin_lock_irq(&css_set_lock);
182 write_seqcount_begin(&cgrp->freezer.freeze_seq);
183 if (freeze) {
184 set_bit(CGRP_FREEZE, &cgrp->flags);
185 cgrp->freezer.freeze_start_nsec = ts_nsec;
186 } else {
187 clear_bit(CGRP_FREEZE, &cgrp->flags);
188 cgrp->freezer.frozen_nsec += (ts_nsec -
189 cgrp->freezer.freeze_start_nsec);
190 }
191 write_seqcount_end(&cgrp->freezer.freeze_seq);
192 spin_unlock_irq(&css_set_lock);
193
194 if (freeze)
195 TRACE_CGROUP_PATH(freeze, cgrp);
196 else
197 TRACE_CGROUP_PATH(unfreeze, cgrp);
198
199 css_task_iter_start(&cgrp->self, 0, &it);
200 while ((task = css_task_iter_next(&it))) {
201 /*
202 * Ignore kernel threads here. Freezing cgroups containing
203 * kthreads isn't supported.
204 */
205 if (task->flags & PF_KTHREAD)
206 continue;
207 cgroup_freeze_task(task, freeze);
208 }
209 css_task_iter_end(&it);
210
211 /*
212 * Cgroup state should be revisited here to cover empty leaf cgroups
213 * and cgroups which descendants are already in the desired state.
214 */
215 spin_lock_irq(&css_set_lock);
216 if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
217 cgroup_update_frozen(cgrp);
218 spin_unlock_irq(&css_set_lock);
219 }
220
221 /*
222 * Adjust the task state (freeze or unfreeze) and revisit the state of
223 * source and destination cgroups.
224 */
cgroup_freezer_migrate_task(struct task_struct * task,struct cgroup * src,struct cgroup * dst)225 void cgroup_freezer_migrate_task(struct task_struct *task,
226 struct cgroup *src, struct cgroup *dst)
227 {
228 lockdep_assert_held(&css_set_lock);
229
230 /*
231 * Kernel threads are not supposed to be frozen at all.
232 */
233 if (task->flags & PF_KTHREAD)
234 return;
235
236 /*
237 * It's not necessary to do changes if both of the src and dst cgroups
238 * are not freezing and task is not frozen.
239 */
240 if (!test_bit(CGRP_FREEZE, &src->flags) &&
241 !test_bit(CGRP_FREEZE, &dst->flags) &&
242 !task->frozen)
243 return;
244
245 /*
246 * Adjust counters of freezing and frozen tasks.
247 * Note, that if the task is frozen, but the destination cgroup is not
248 * frozen, we bump both counters to keep them balanced.
249 */
250 if (task->frozen) {
251 cgroup_inc_frozen_cnt(dst);
252 cgroup_dec_frozen_cnt(src);
253 }
254 cgroup_update_frozen(dst);
255 cgroup_update_frozen(src);
256
257 /*
258 * Force the task to the desired state.
259 */
260 cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
261 }
262
cgroup_freeze(struct cgroup * cgrp,bool freeze)263 void cgroup_freeze(struct cgroup *cgrp, bool freeze)
264 {
265 struct cgroup_subsys_state *css;
266 struct cgroup *parent;
267 struct cgroup *dsct;
268 bool applied = false;
269 u64 ts_nsec;
270 bool old_e;
271
272 lockdep_assert_held(&cgroup_mutex);
273
274 /*
275 * Nothing changed? Just exit.
276 */
277 if (cgrp->freezer.freeze == freeze)
278 return;
279
280 cgrp->freezer.freeze = freeze;
281 ts_nsec = ktime_get_ns();
282
283 /*
284 * Propagate changes downwards the cgroup tree.
285 */
286 css_for_each_descendant_pre(css, &cgrp->self) {
287 dsct = css->cgroup;
288
289 if (cgroup_is_dead(dsct))
290 continue;
291
292 /*
293 * e_freeze is affected by parent's e_freeze and dst's freeze.
294 * If old e_freeze eq new e_freeze, no change, its children
295 * will not be affected. So do nothing and skip the subtree
296 */
297 old_e = dsct->freezer.e_freeze;
298 parent = cgroup_parent(dsct);
299 dsct->freezer.e_freeze = (dsct->freezer.freeze ||
300 parent->freezer.e_freeze);
301 if (dsct->freezer.e_freeze == old_e) {
302 css = css_rightmost_descendant(css);
303 continue;
304 }
305
306 /*
307 * Do change actual state: freeze or unfreeze.
308 */
309 cgroup_do_freeze(dsct, freeze, ts_nsec);
310 applied = true;
311 }
312
313 /*
314 * Even if the actual state hasn't changed, let's notify a user.
315 * The state can be enforced by an ancestor cgroup: the cgroup
316 * can already be in the desired state or it can be locked in the
317 * opposite state, so that the transition will never happen.
318 * In both cases it's better to notify a user, that there is
319 * nothing to wait for.
320 */
321 if (!applied) {
322 TRACE_CGROUP_PATH(notify_frozen, cgrp,
323 test_bit(CGRP_FROZEN, &cgrp->flags));
324 cgroup_file_notify(&cgrp->events_file);
325 }
326 }
327