1 //SPDX-License-Identifier: GPL-2.0 2 #include <linux/cgroup.h> 3 #include <linux/sched.h> 4 #include <linux/sched/task.h> 5 #include <linux/sched/signal.h> 6 7 #include "cgroup-internal.h" 8 9 #include <trace/events/cgroup.h> 10 11 /* 12 * Propagate the cgroup frozen state upwards by the cgroup tree. 13 */ 14 static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) 15 { 16 int desc = 1; 17 18 /* 19 * If the new state is frozen, some freezing ancestor cgroups may change 20 * their state too, depending on if all their descendants are frozen. 21 * 22 * Otherwise, all ancestor cgroups are forced into the non-frozen state. 23 */ 24 while ((cgrp = cgroup_parent(cgrp))) { 25 if (frozen) { 26 cgrp->freezer.nr_frozen_descendants += desc; 27 if (!test_bit(CGRP_FROZEN, &cgrp->flags) && 28 test_bit(CGRP_FREEZE, &cgrp->flags) && 29 cgrp->freezer.nr_frozen_descendants == 30 cgrp->nr_descendants) { 31 set_bit(CGRP_FROZEN, &cgrp->flags); 32 cgroup_file_notify(&cgrp->events_file); 33 TRACE_CGROUP_PATH(notify_frozen, cgrp, 1); 34 desc++; 35 } 36 } else { 37 cgrp->freezer.nr_frozen_descendants -= desc; 38 if (test_bit(CGRP_FROZEN, &cgrp->flags)) { 39 clear_bit(CGRP_FROZEN, &cgrp->flags); 40 cgroup_file_notify(&cgrp->events_file); 41 TRACE_CGROUP_PATH(notify_frozen, cgrp, 0); 42 desc++; 43 } 44 } 45 } 46 } 47 48 /* 49 * Revisit the cgroup frozen state. 50 * Checks if the cgroup is really frozen and perform all state transitions. 51 */ 52 void cgroup_update_frozen(struct cgroup *cgrp) 53 { 54 bool frozen; 55 56 lockdep_assert_held(&css_set_lock); 57 58 /* 59 * If the cgroup has to be frozen (CGRP_FREEZE bit set), 60 * and all tasks are frozen and/or stopped, let's consider 61 * the cgroup frozen. Otherwise it's not frozen. 62 */ 63 frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && 64 cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); 65 66 if (frozen) { 67 /* Already there? */ 68 if (test_bit(CGRP_FROZEN, &cgrp->flags)) 69 return; 70 71 set_bit(CGRP_FROZEN, &cgrp->flags); 72 } else { 73 /* Already there? */ 74 if (!test_bit(CGRP_FROZEN, &cgrp->flags)) 75 return; 76 77 clear_bit(CGRP_FROZEN, &cgrp->flags); 78 } 79 cgroup_file_notify(&cgrp->events_file); 80 TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen); 81 82 /* Update the state of ancestor cgroups. */ 83 cgroup_propagate_frozen(cgrp, frozen); 84 } 85 86 /* 87 * Increment cgroup's nr_frozen_tasks. 88 */ 89 static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) 90 { 91 cgrp->freezer.nr_frozen_tasks++; 92 } 93 94 /* 95 * Decrement cgroup's nr_frozen_tasks. 96 */ 97 static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) 98 { 99 cgrp->freezer.nr_frozen_tasks--; 100 WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); 101 } 102 103 /* 104 * Enter frozen/stopped state, if not yet there. Update cgroup's counters, 105 * and revisit the state of the cgroup, if necessary. 106 */ 107 void cgroup_enter_frozen(void) 108 { 109 struct cgroup *cgrp; 110 111 if (current->frozen) 112 return; 113 114 spin_lock_irq(&css_set_lock); 115 current->frozen = true; 116 cgrp = task_dfl_cgroup(current); 117 cgroup_inc_frozen_cnt(cgrp); 118 cgroup_update_frozen(cgrp); 119 spin_unlock_irq(&css_set_lock); 120 } 121 122 /* 123 * Conditionally leave frozen/stopped state. Update cgroup's counters, 124 * and revisit the state of the cgroup, if necessary. 125 * 126 * If always_leave is not set, and the cgroup is freezing, 127 * we're racing with the cgroup freezing. In this case, we don't 128 * drop the frozen counter to avoid a transient switch to 129 * the unfrozen state. 130 */ 131 void cgroup_leave_frozen(bool always_leave) 132 { 133 struct cgroup *cgrp; 134 135 spin_lock_irq(&css_set_lock); 136 cgrp = task_dfl_cgroup(current); 137 if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { 138 cgroup_dec_frozen_cnt(cgrp); 139 cgroup_update_frozen(cgrp); 140 WARN_ON_ONCE(!current->frozen); 141 current->frozen = false; 142 } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) { 143 spin_lock(¤t->sighand->siglock); 144 current->jobctl |= JOBCTL_TRAP_FREEZE; 145 set_thread_flag(TIF_SIGPENDING); 146 spin_unlock(¤t->sighand->siglock); 147 } 148 spin_unlock_irq(&css_set_lock); 149 } 150 151 /* 152 * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE 153 * jobctl bit. 154 */ 155 static void cgroup_freeze_task(struct task_struct *task, bool freeze) 156 { 157 unsigned long flags; 158 159 /* If the task is about to die, don't bother with freezing it. */ 160 if (!lock_task_sighand(task, &flags)) 161 return; 162 163 if (freeze) { 164 task->jobctl |= JOBCTL_TRAP_FREEZE; 165 signal_wake_up(task, false); 166 } else { 167 task->jobctl &= ~JOBCTL_TRAP_FREEZE; 168 wake_up_process(task); 169 } 170 171 unlock_task_sighand(task, &flags); 172 } 173 174 /* 175 * Freeze or unfreeze all tasks in the given cgroup. 176 */ 177 static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) 178 { 179 struct css_task_iter it; 180 struct task_struct *task; 181 182 lockdep_assert_held(&cgroup_mutex); 183 184 spin_lock_irq(&css_set_lock); 185 if (freeze) 186 set_bit(CGRP_FREEZE, &cgrp->flags); 187 else 188 clear_bit(CGRP_FREEZE, &cgrp->flags); 189 spin_unlock_irq(&css_set_lock); 190 191 if (freeze) 192 TRACE_CGROUP_PATH(freeze, cgrp); 193 else 194 TRACE_CGROUP_PATH(unfreeze, cgrp); 195 196 css_task_iter_start(&cgrp->self, 0, &it); 197 while ((task = css_task_iter_next(&it))) { 198 /* 199 * Ignore kernel threads here. Freezing cgroups containing 200 * kthreads isn't supported. 201 */ 202 if (task->flags & PF_KTHREAD) 203 continue; 204 cgroup_freeze_task(task, freeze); 205 } 206 css_task_iter_end(&it); 207 208 /* 209 * Cgroup state should be revisited here to cover empty leaf cgroups 210 * and cgroups which descendants are already in the desired state. 211 */ 212 spin_lock_irq(&css_set_lock); 213 if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) 214 cgroup_update_frozen(cgrp); 215 spin_unlock_irq(&css_set_lock); 216 } 217 218 /* 219 * Adjust the task state (freeze or unfreeze) and revisit the state of 220 * source and destination cgroups. 221 */ 222 void cgroup_freezer_migrate_task(struct task_struct *task, 223 struct cgroup *src, struct cgroup *dst) 224 { 225 lockdep_assert_held(&css_set_lock); 226 227 /* 228 * Kernel threads are not supposed to be frozen at all. 229 */ 230 if (task->flags & PF_KTHREAD) 231 return; 232 233 /* 234 * Adjust counters of freezing and frozen tasks. 235 * Note, that if the task is frozen, but the destination cgroup is not 236 * frozen, we bump both counters to keep them balanced. 237 */ 238 if (task->frozen) { 239 cgroup_inc_frozen_cnt(dst); 240 cgroup_dec_frozen_cnt(src); 241 } 242 cgroup_update_frozen(dst); 243 cgroup_update_frozen(src); 244 245 /* 246 * Force the task to the desired state. 247 */ 248 cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); 249 } 250 251 void cgroup_freeze(struct cgroup *cgrp, bool freeze) 252 { 253 struct cgroup_subsys_state *css; 254 struct cgroup *dsct; 255 bool applied = false; 256 257 lockdep_assert_held(&cgroup_mutex); 258 259 /* 260 * Nothing changed? Just exit. 261 */ 262 if (cgrp->freezer.freeze == freeze) 263 return; 264 265 cgrp->freezer.freeze = freeze; 266 267 /* 268 * Propagate changes downwards the cgroup tree. 269 */ 270 css_for_each_descendant_pre(css, &cgrp->self) { 271 dsct = css->cgroup; 272 273 if (cgroup_is_dead(dsct)) 274 continue; 275 276 if (freeze) { 277 dsct->freezer.e_freeze++; 278 /* 279 * Already frozen because of ancestor's settings? 280 */ 281 if (dsct->freezer.e_freeze > 1) 282 continue; 283 } else { 284 dsct->freezer.e_freeze--; 285 /* 286 * Still frozen because of ancestor's settings? 287 */ 288 if (dsct->freezer.e_freeze > 0) 289 continue; 290 291 WARN_ON_ONCE(dsct->freezer.e_freeze < 0); 292 } 293 294 /* 295 * Do change actual state: freeze or unfreeze. 296 */ 297 cgroup_do_freeze(dsct, freeze); 298 applied = true; 299 } 300 301 /* 302 * Even if the actual state hasn't changed, let's notify a user. 303 * The state can be enforced by an ancestor cgroup: the cgroup 304 * can already be in the desired state or it can be locked in the 305 * opposite state, so that the transition will never happen. 306 * In both cases it's better to notify a user, that there is 307 * nothing to wait for. 308 */ 309 if (!applied) { 310 TRACE_CGROUP_PATH(notify_frozen, cgrp, 311 test_bit(CGRP_FROZEN, &cgrp->flags)); 312 cgroup_file_notify(&cgrp->events_file); 313 } 314 } 315