1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/cgroup.h> 3 #include <linux/sched.h> 4 #include <linux/sched/task.h> 5 #include <linux/sched/signal.h> 6 7 #include "cgroup-internal.h" 8 9 #include <trace/events/cgroup.h> 10 11 /* 12 * Update CGRP_FROZEN of cgroup.flag 13 * Return true if flags is updated; false if flags has no change 14 */ 15 static bool cgroup_update_frozen_flag(struct cgroup *cgrp, bool frozen) 16 { 17 lockdep_assert_held(&css_set_lock); 18 19 /* Already there? */ 20 if (test_bit(CGRP_FROZEN, &cgrp->flags) == frozen) 21 return false; 22 23 if (frozen) 24 set_bit(CGRP_FROZEN, &cgrp->flags); 25 else 26 clear_bit(CGRP_FROZEN, &cgrp->flags); 27 28 cgroup_file_notify(&cgrp->events_file); 29 TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen); 30 return true; 31 } 32 33 /* 34 * Propagate the cgroup frozen state upwards by the cgroup tree. 35 */ 36 static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) 37 { 38 int desc = 1; 39 40 /* 41 * If the new state is frozen, some freezing ancestor cgroups may change 42 * their state too, depending on if all their descendants are frozen. 43 * 44 * Otherwise, all ancestor cgroups are forced into the non-frozen state. 45 */ 46 while ((cgrp = cgroup_parent(cgrp))) { 47 if (frozen) { 48 cgrp->freezer.nr_frozen_descendants += desc; 49 if (!test_bit(CGRP_FREEZE, &cgrp->flags) || 50 (cgrp->freezer.nr_frozen_descendants != 51 cgrp->nr_descendants)) 52 continue; 53 } else { 54 cgrp->freezer.nr_frozen_descendants -= desc; 55 } 56 57 if (cgroup_update_frozen_flag(cgrp, frozen)) 58 desc++; 59 } 60 } 61 62 /* 63 * Revisit the cgroup frozen state. 64 * Checks if the cgroup is really frozen and perform all state transitions. 65 */ 66 void cgroup_update_frozen(struct cgroup *cgrp) 67 { 68 bool frozen; 69 70 /* 71 * If the cgroup has to be frozen (CGRP_FREEZE bit set), 72 * and all tasks are frozen and/or stopped, let's consider 73 * the cgroup frozen. Otherwise it's not frozen. 74 */ 75 frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && 76 cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); 77 78 /* If flags is updated, update the state of ancestor cgroups. */ 79 if (cgroup_update_frozen_flag(cgrp, frozen)) 80 cgroup_propagate_frozen(cgrp, frozen); 81 } 82 83 /* 84 * Increment cgroup's nr_frozen_tasks. 85 */ 86 static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) 87 { 88 cgrp->freezer.nr_frozen_tasks++; 89 } 90 91 /* 92 * Decrement cgroup's nr_frozen_tasks. 93 */ 94 static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) 95 { 96 cgrp->freezer.nr_frozen_tasks--; 97 WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); 98 } 99 100 /* 101 * Enter frozen/stopped state, if not yet there. Update cgroup's counters, 102 * and revisit the state of the cgroup, if necessary. 103 */ 104 void cgroup_enter_frozen(void) 105 { 106 struct cgroup *cgrp; 107 108 if (current->frozen) 109 return; 110 111 spin_lock_irq(&css_set_lock); 112 current->frozen = true; 113 cgrp = task_dfl_cgroup(current); 114 cgroup_inc_frozen_cnt(cgrp); 115 cgroup_update_frozen(cgrp); 116 spin_unlock_irq(&css_set_lock); 117 } 118 119 /* 120 * Conditionally leave frozen/stopped state. Update cgroup's counters, 121 * and revisit the state of the cgroup, if necessary. 122 * 123 * If always_leave is not set, and the cgroup is freezing, 124 * we're racing with the cgroup freezing. In this case, we don't 125 * drop the frozen counter to avoid a transient switch to 126 * the unfrozen state. 127 */ 128 void cgroup_leave_frozen(bool always_leave) 129 { 130 struct cgroup *cgrp; 131 132 spin_lock_irq(&css_set_lock); 133 cgrp = task_dfl_cgroup(current); 134 if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { 135 cgroup_dec_frozen_cnt(cgrp); 136 cgroup_update_frozen(cgrp); 137 WARN_ON_ONCE(!current->frozen); 138 current->frozen = false; 139 } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) { 140 spin_lock(¤t->sighand->siglock); 141 current->jobctl |= JOBCTL_TRAP_FREEZE; 142 set_thread_flag(TIF_SIGPENDING); 143 spin_unlock(¤t->sighand->siglock); 144 } 145 spin_unlock_irq(&css_set_lock); 146 } 147 148 /* 149 * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE 150 * jobctl bit. 151 */ 152 static void cgroup_freeze_task(struct task_struct *task, bool freeze) 153 { 154 unsigned long flags; 155 156 /* If the task is about to die, don't bother with freezing it. */ 157 if (!lock_task_sighand(task, &flags)) 158 return; 159 160 if (freeze) { 161 task->jobctl |= JOBCTL_TRAP_FREEZE; 162 signal_wake_up(task, false); 163 } else { 164 task->jobctl &= ~JOBCTL_TRAP_FREEZE; 165 wake_up_process(task); 166 } 167 168 unlock_task_sighand(task, &flags); 169 } 170 171 /* 172 * Freeze or unfreeze all tasks in the given cgroup. 173 */ 174 static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) 175 { 176 struct css_task_iter it; 177 struct task_struct *task; 178 179 lockdep_assert_held(&cgroup_mutex); 180 181 spin_lock_irq(&css_set_lock); 182 if (freeze) 183 set_bit(CGRP_FREEZE, &cgrp->flags); 184 else 185 clear_bit(CGRP_FREEZE, &cgrp->flags); 186 spin_unlock_irq(&css_set_lock); 187 188 if (freeze) 189 TRACE_CGROUP_PATH(freeze, cgrp); 190 else 191 TRACE_CGROUP_PATH(unfreeze, cgrp); 192 193 css_task_iter_start(&cgrp->self, 0, &it); 194 while ((task = css_task_iter_next(&it))) { 195 /* 196 * Ignore kernel threads here. Freezing cgroups containing 197 * kthreads isn't supported. 198 */ 199 if (task->flags & PF_KTHREAD) 200 continue; 201 cgroup_freeze_task(task, freeze); 202 } 203 css_task_iter_end(&it); 204 205 /* 206 * Cgroup state should be revisited here to cover empty leaf cgroups 207 * and cgroups which descendants are already in the desired state. 208 */ 209 spin_lock_irq(&css_set_lock); 210 if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) 211 cgroup_update_frozen(cgrp); 212 spin_unlock_irq(&css_set_lock); 213 } 214 215 /* 216 * Adjust the task state (freeze or unfreeze) and revisit the state of 217 * source and destination cgroups. 218 */ 219 void cgroup_freezer_migrate_task(struct task_struct *task, 220 struct cgroup *src, struct cgroup *dst) 221 { 222 lockdep_assert_held(&css_set_lock); 223 224 /* 225 * Kernel threads are not supposed to be frozen at all. 226 */ 227 if (task->flags & PF_KTHREAD) 228 return; 229 230 /* 231 * It's not necessary to do changes if both of the src and dst cgroups 232 * are not freezing and task is not frozen. 233 */ 234 if (!test_bit(CGRP_FREEZE, &src->flags) && 235 !test_bit(CGRP_FREEZE, &dst->flags) && 236 !task->frozen) 237 return; 238 239 /* 240 * Adjust counters of freezing and frozen tasks. 241 * Note, that if the task is frozen, but the destination cgroup is not 242 * frozen, we bump both counters to keep them balanced. 243 */ 244 if (task->frozen) { 245 cgroup_inc_frozen_cnt(dst); 246 cgroup_dec_frozen_cnt(src); 247 } 248 cgroup_update_frozen(dst); 249 cgroup_update_frozen(src); 250 251 /* 252 * Force the task to the desired state. 253 */ 254 cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); 255 } 256 257 void cgroup_freeze(struct cgroup *cgrp, bool freeze) 258 { 259 struct cgroup_subsys_state *css; 260 struct cgroup *parent; 261 struct cgroup *dsct; 262 bool applied = false; 263 bool old_e; 264 265 lockdep_assert_held(&cgroup_mutex); 266 267 /* 268 * Nothing changed? Just exit. 269 */ 270 if (cgrp->freezer.freeze == freeze) 271 return; 272 273 cgrp->freezer.freeze = freeze; 274 275 /* 276 * Propagate changes downwards the cgroup tree. 277 */ 278 css_for_each_descendant_pre(css, &cgrp->self) { 279 dsct = css->cgroup; 280 281 if (cgroup_is_dead(dsct)) 282 continue; 283 284 /* 285 * e_freeze is affected by parent's e_freeze and dst's freeze. 286 * If old e_freeze eq new e_freeze, no change, its children 287 * will not be affected. So do nothing and skip the subtree 288 */ 289 old_e = dsct->freezer.e_freeze; 290 parent = cgroup_parent(dsct); 291 dsct->freezer.e_freeze = (dsct->freezer.freeze || 292 parent->freezer.e_freeze); 293 if (dsct->freezer.e_freeze == old_e) { 294 css = css_rightmost_descendant(css); 295 continue; 296 } 297 298 /* 299 * Do change actual state: freeze or unfreeze. 300 */ 301 cgroup_do_freeze(dsct, freeze); 302 applied = true; 303 } 304 305 /* 306 * Even if the actual state hasn't changed, let's notify a user. 307 * The state can be enforced by an ancestor cgroup: the cgroup 308 * can already be in the desired state or it can be locked in the 309 * opposite state, so that the transition will never happen. 310 * In both cases it's better to notify a user, that there is 311 * nothing to wait for. 312 */ 313 if (!applied) { 314 TRACE_CGROUP_PATH(notify_frozen, cgrp, 315 test_bit(CGRP_FROZEN, &cgrp->flags)); 316 cgroup_file_notify(&cgrp->events_file); 317 } 318 } 319