1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/cgroup.h> 3 #include <linux/sched.h> 4 #include <linux/sched/task.h> 5 #include <linux/sched/signal.h> 6 7 #include "cgroup-internal.h" 8 9 #include <trace/events/cgroup.h> 10 11 /* 12 * Update CGRP_FROZEN of cgroup.flag 13 * Return true if flags is updated; false if flags has no change 14 */ 15 static bool cgroup_update_frozen_flag(struct cgroup *cgrp, bool frozen) 16 { 17 lockdep_assert_held(&css_set_lock); 18 19 /* Already there? */ 20 if (test_bit(CGRP_FROZEN, &cgrp->flags) == frozen) 21 return false; 22 23 if (frozen) 24 set_bit(CGRP_FROZEN, &cgrp->flags); 25 else 26 clear_bit(CGRP_FROZEN, &cgrp->flags); 27 28 cgroup_file_notify(&cgrp->events_file); 29 TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen); 30 return true; 31 } 32 33 /* 34 * Propagate the cgroup frozen state upwards by the cgroup tree. 35 */ 36 static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) 37 { 38 int desc = 1; 39 40 /* 41 * If the new state is frozen, some freezing ancestor cgroups may change 42 * their state too, depending on if all their descendants are frozen. 43 * 44 * Otherwise, all ancestor cgroups are forced into the non-frozen state. 45 */ 46 while ((cgrp = cgroup_parent(cgrp))) { 47 if (frozen) { 48 cgrp->freezer.nr_frozen_descendants += desc; 49 if (!test_bit(CGRP_FREEZE, &cgrp->flags) || 50 (cgrp->freezer.nr_frozen_descendants != 51 cgrp->nr_descendants)) 52 continue; 53 } else { 54 cgrp->freezer.nr_frozen_descendants -= desc; 55 } 56 57 if (cgroup_update_frozen_flag(cgrp, frozen)) 58 desc++; 59 } 60 } 61 62 /* 63 * Revisit the cgroup frozen state. 64 * Checks if the cgroup is really frozen and perform all state transitions. 65 */ 66 void cgroup_update_frozen(struct cgroup *cgrp) 67 { 68 bool frozen; 69 70 /* 71 * If the cgroup has to be frozen (CGRP_FREEZE bit set), 72 * and all tasks are frozen and/or stopped, let's consider 73 * the cgroup frozen. Otherwise it's not frozen. 74 */ 75 frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && 76 cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); 77 78 /* If flags is updated, update the state of ancestor cgroups. */ 79 if (cgroup_update_frozen_flag(cgrp, frozen)) 80 cgroup_propagate_frozen(cgrp, frozen); 81 } 82 83 /* 84 * Increment cgroup's nr_frozen_tasks. 85 */ 86 static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) 87 { 88 cgrp->freezer.nr_frozen_tasks++; 89 } 90 91 /* 92 * Decrement cgroup's nr_frozen_tasks. 93 */ 94 static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) 95 { 96 cgrp->freezer.nr_frozen_tasks--; 97 WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); 98 } 99 100 /* 101 * Enter frozen/stopped state, if not yet there. Update cgroup's counters, 102 * and revisit the state of the cgroup, if necessary. 103 */ 104 void cgroup_enter_frozen(void) 105 { 106 struct cgroup *cgrp; 107 108 if (current->frozen) 109 return; 110 111 spin_lock_irq(&css_set_lock); 112 current->frozen = true; 113 cgrp = task_dfl_cgroup(current); 114 cgroup_inc_frozen_cnt(cgrp); 115 cgroup_update_frozen(cgrp); 116 spin_unlock_irq(&css_set_lock); 117 } 118 119 /* 120 * Conditionally leave frozen/stopped state. Update cgroup's counters, 121 * and revisit the state of the cgroup, if necessary. 122 * 123 * If always_leave is not set, and the cgroup is freezing, 124 * we're racing with the cgroup freezing. In this case, we don't 125 * drop the frozen counter to avoid a transient switch to 126 * the unfrozen state. 127 */ 128 void cgroup_leave_frozen(bool always_leave) 129 { 130 struct cgroup *cgrp; 131 132 spin_lock_irq(&css_set_lock); 133 cgrp = task_dfl_cgroup(current); 134 if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { 135 cgroup_dec_frozen_cnt(cgrp); 136 cgroup_update_frozen(cgrp); 137 WARN_ON_ONCE(!current->frozen); 138 current->frozen = false; 139 } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) { 140 spin_lock(¤t->sighand->siglock); 141 current->jobctl |= JOBCTL_TRAP_FREEZE; 142 set_thread_flag(TIF_SIGPENDING); 143 spin_unlock(¤t->sighand->siglock); 144 } 145 spin_unlock_irq(&css_set_lock); 146 } 147 148 /* 149 * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE 150 * jobctl bit. 151 */ 152 static void cgroup_freeze_task(struct task_struct *task, bool freeze) 153 { 154 unsigned long flags; 155 156 /* If the task is about to die, don't bother with freezing it. */ 157 if (!lock_task_sighand(task, &flags)) 158 return; 159 160 if (freeze) { 161 task->jobctl |= JOBCTL_TRAP_FREEZE; 162 signal_wake_up(task, false); 163 } else { 164 task->jobctl &= ~JOBCTL_TRAP_FREEZE; 165 wake_up_process(task); 166 } 167 168 unlock_task_sighand(task, &flags); 169 } 170 171 /* 172 * Freeze or unfreeze all tasks in the given cgroup. 173 */ 174 static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze, u64 ts_nsec) 175 { 176 struct css_task_iter it; 177 struct task_struct *task; 178 179 lockdep_assert_held(&cgroup_mutex); 180 181 spin_lock_irq(&css_set_lock); 182 write_seqcount_begin(&cgrp->freezer.freeze_seq); 183 if (freeze) { 184 set_bit(CGRP_FREEZE, &cgrp->flags); 185 cgrp->freezer.freeze_start_nsec = ts_nsec; 186 } else { 187 clear_bit(CGRP_FREEZE, &cgrp->flags); 188 cgrp->freezer.frozen_nsec += (ts_nsec - 189 cgrp->freezer.freeze_start_nsec); 190 } 191 write_seqcount_end(&cgrp->freezer.freeze_seq); 192 spin_unlock_irq(&css_set_lock); 193 194 if (freeze) 195 TRACE_CGROUP_PATH(freeze, cgrp); 196 else 197 TRACE_CGROUP_PATH(unfreeze, cgrp); 198 199 css_task_iter_start(&cgrp->self, 0, &it); 200 while ((task = css_task_iter_next(&it))) { 201 /* 202 * Ignore kernel threads here. Freezing cgroups containing 203 * kthreads isn't supported. 204 */ 205 if (task->flags & PF_KTHREAD) 206 continue; 207 cgroup_freeze_task(task, freeze); 208 } 209 css_task_iter_end(&it); 210 211 /* 212 * Cgroup state should be revisited here to cover empty leaf cgroups 213 * and cgroups which descendants are already in the desired state. 214 */ 215 spin_lock_irq(&css_set_lock); 216 if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) 217 cgroup_update_frozen(cgrp); 218 spin_unlock_irq(&css_set_lock); 219 } 220 221 /* 222 * Adjust the task state (freeze or unfreeze) and revisit the state of 223 * source and destination cgroups. 224 */ 225 void cgroup_freezer_migrate_task(struct task_struct *task, 226 struct cgroup *src, struct cgroup *dst) 227 { 228 lockdep_assert_held(&css_set_lock); 229 230 /* 231 * Kernel threads are not supposed to be frozen at all. 232 */ 233 if (task->flags & PF_KTHREAD) 234 return; 235 236 /* 237 * It's not necessary to do changes if both of the src and dst cgroups 238 * are not freezing and task is not frozen. 239 */ 240 if (!test_bit(CGRP_FREEZE, &src->flags) && 241 !test_bit(CGRP_FREEZE, &dst->flags) && 242 !task->frozen) 243 return; 244 245 /* 246 * Adjust counters of freezing and frozen tasks. 247 * Note, that if the task is frozen, but the destination cgroup is not 248 * frozen, we bump both counters to keep them balanced. 249 */ 250 if (task->frozen) { 251 cgroup_inc_frozen_cnt(dst); 252 cgroup_dec_frozen_cnt(src); 253 } 254 cgroup_update_frozen(dst); 255 cgroup_update_frozen(src); 256 257 /* 258 * Force the task to the desired state. 259 */ 260 cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); 261 } 262 263 void cgroup_freeze(struct cgroup *cgrp, bool freeze) 264 { 265 struct cgroup_subsys_state *css; 266 struct cgroup *parent; 267 struct cgroup *dsct; 268 bool applied = false; 269 u64 ts_nsec; 270 bool old_e; 271 272 lockdep_assert_held(&cgroup_mutex); 273 274 /* 275 * Nothing changed? Just exit. 276 */ 277 if (cgrp->freezer.freeze == freeze) 278 return; 279 280 cgrp->freezer.freeze = freeze; 281 ts_nsec = ktime_get_ns(); 282 283 /* 284 * Propagate changes downwards the cgroup tree. 285 */ 286 css_for_each_descendant_pre(css, &cgrp->self) { 287 dsct = css->cgroup; 288 289 if (cgroup_is_dead(dsct)) 290 continue; 291 292 /* 293 * e_freeze is affected by parent's e_freeze and dst's freeze. 294 * If old e_freeze eq new e_freeze, no change, its children 295 * will not be affected. So do nothing and skip the subtree 296 */ 297 old_e = dsct->freezer.e_freeze; 298 parent = cgroup_parent(dsct); 299 dsct->freezer.e_freeze = (dsct->freezer.freeze || 300 parent->freezer.e_freeze); 301 if (dsct->freezer.e_freeze == old_e) { 302 css = css_rightmost_descendant(css); 303 continue; 304 } 305 306 /* 307 * Do change actual state: freeze or unfreeze. 308 */ 309 cgroup_do_freeze(dsct, freeze, ts_nsec); 310 applied = true; 311 } 312 313 /* 314 * Even if the actual state hasn't changed, let's notify a user. 315 * The state can be enforced by an ancestor cgroup: the cgroup 316 * can already be in the desired state or it can be locked in the 317 * opposite state, so that the transition will never happen. 318 * In both cases it's better to notify a user, that there is 319 * nothing to wait for. 320 */ 321 if (!applied) { 322 TRACE_CGROUP_PATH(notify_frozen, cgrp, 323 test_bit(CGRP_FROZEN, &cgrp->flags)); 324 cgroup_file_notify(&cgrp->events_file); 325 } 326 } 327