1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Auto-group scheduling implementation: 4 */ 5 #include <linux/nospec.h> 6 #include "sched.h" 7 8 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; 9 static struct autogroup autogroup_default; 10 static atomic_t autogroup_seq_nr; 11 12 void __init autogroup_init(struct task_struct *init_task) 13 { 14 autogroup_default.tg = &root_task_group; 15 kref_init(&autogroup_default.kref); 16 init_rwsem(&autogroup_default.lock); 17 init_task->signal->autogroup = &autogroup_default; 18 } 19 20 void autogroup_free(struct task_group *tg) 21 { 22 kfree(tg->autogroup); 23 } 24 25 static inline void autogroup_destroy(struct kref *kref) 26 { 27 struct autogroup *ag = container_of(kref, struct autogroup, kref); 28 29 #ifdef CONFIG_RT_GROUP_SCHED 30 /* We've redirected RT tasks to the root task group... */ 31 ag->tg->rt_se = NULL; 32 ag->tg->rt_rq = NULL; 33 #endif 34 sched_release_group(ag->tg); 35 sched_destroy_group(ag->tg); 36 } 37 38 static inline void autogroup_kref_put(struct autogroup *ag) 39 { 40 kref_put(&ag->kref, autogroup_destroy); 41 } 42 43 static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) 44 { 45 kref_get(&ag->kref); 46 return ag; 47 } 48 49 static inline struct autogroup *autogroup_task_get(struct task_struct *p) 50 { 51 struct autogroup *ag; 52 unsigned long flags; 53 54 if (!lock_task_sighand(p, &flags)) 55 return autogroup_kref_get(&autogroup_default); 56 57 ag = autogroup_kref_get(p->signal->autogroup); 58 unlock_task_sighand(p, &flags); 59 60 return ag; 61 } 62 63 static inline struct autogroup *autogroup_create(void) 64 { 65 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); 66 struct task_group *tg; 67 68 if (!ag) 69 goto out_fail; 70 71 tg = sched_create_group(&root_task_group); 72 if (IS_ERR(tg)) 73 goto out_free; 74 75 kref_init(&ag->kref); 76 init_rwsem(&ag->lock); 77 ag->id = atomic_inc_return(&autogroup_seq_nr); 78 ag->tg = tg; 79 #ifdef CONFIG_RT_GROUP_SCHED 80 /* 81 * Autogroup RT tasks are redirected to the root task group 82 * so we don't have to move tasks around upon policy change, 83 * or flail around trying to allocate bandwidth on the fly. 84 * A bandwidth exception in __sched_setscheduler() allows 85 * the policy change to proceed. 86 */ 87 free_rt_sched_group(tg); 88 tg->rt_se = root_task_group.rt_se; 89 tg->rt_rq = root_task_group.rt_rq; 90 #endif 91 tg->autogroup = ag; 92 93 sched_online_group(tg, &root_task_group); 94 return ag; 95 96 out_free: 97 kfree(ag); 98 out_fail: 99 if (printk_ratelimit()) { 100 printk(KERN_WARNING "autogroup_create: %s failure.\n", 101 ag ? "sched_create_group()" : "kzalloc()"); 102 } 103 104 return autogroup_kref_get(&autogroup_default); 105 } 106 107 bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) 108 { 109 if (tg != &root_task_group) 110 return false; 111 /* 112 * If we race with autogroup_move_group() the caller can use the old 113 * value of signal->autogroup but in this case sched_move_task() will 114 * be called again before autogroup_kref_put(). 115 * 116 * However, there is no way sched_autogroup_exit_task() could tell us 117 * to avoid autogroup->tg, so we abuse PF_EXITING flag for this case. 118 */ 119 if (p->flags & PF_EXITING) 120 return false; 121 122 return true; 123 } 124 125 void sched_autogroup_exit_task(struct task_struct *p) 126 { 127 /* 128 * We are going to call exit_notify() and autogroup_move_group() can't 129 * see this thread after that: we can no longer use signal->autogroup. 130 * See the PF_EXITING check in task_wants_autogroup(). 131 */ 132 sched_move_task(p); 133 } 134 135 static void 136 autogroup_move_group(struct task_struct *p, struct autogroup *ag) 137 { 138 struct autogroup *prev; 139 struct task_struct *t; 140 unsigned long flags; 141 142 BUG_ON(!lock_task_sighand(p, &flags)); 143 144 prev = p->signal->autogroup; 145 if (prev == ag) { 146 unlock_task_sighand(p, &flags); 147 return; 148 } 149 150 p->signal->autogroup = autogroup_kref_get(ag); 151 /* 152 * We can't avoid sched_move_task() after we changed signal->autogroup, 153 * this process can already run with task_group() == prev->tg or we can 154 * race with cgroup code which can read autogroup = prev under rq->lock. 155 * In the latter case for_each_thread() can not miss a migrating thread, 156 * cpu_cgroup_attach() must not be possible after cgroup_exit() and it 157 * can't be removed from thread list, we hold ->siglock. 158 * 159 * If an exiting thread was already removed from thread list we rely on 160 * sched_autogroup_exit_task(). 161 */ 162 for_each_thread(p, t) 163 sched_move_task(t); 164 165 unlock_task_sighand(p, &flags); 166 autogroup_kref_put(prev); 167 } 168 169 /* Allocates GFP_KERNEL, cannot be called under any spinlock: */ 170 void sched_autogroup_create_attach(struct task_struct *p) 171 { 172 struct autogroup *ag = autogroup_create(); 173 174 autogroup_move_group(p, ag); 175 176 /* Drop extra reference added by autogroup_create(): */ 177 autogroup_kref_put(ag); 178 } 179 EXPORT_SYMBOL(sched_autogroup_create_attach); 180 181 /* Cannot be called under siglock. Currently has no users: */ 182 void sched_autogroup_detach(struct task_struct *p) 183 { 184 autogroup_move_group(p, &autogroup_default); 185 } 186 EXPORT_SYMBOL(sched_autogroup_detach); 187 188 void sched_autogroup_fork(struct signal_struct *sig) 189 { 190 sig->autogroup = autogroup_task_get(current); 191 } 192 193 void sched_autogroup_exit(struct signal_struct *sig) 194 { 195 autogroup_kref_put(sig->autogroup); 196 } 197 198 static int __init setup_autogroup(char *str) 199 { 200 sysctl_sched_autogroup_enabled = 0; 201 202 return 1; 203 } 204 __setup("noautogroup", setup_autogroup); 205 206 #ifdef CONFIG_PROC_FS 207 208 int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) 209 { 210 static unsigned long next = INITIAL_JIFFIES; 211 struct autogroup *ag; 212 unsigned long shares; 213 int err, idx; 214 215 if (nice < MIN_NICE || nice > MAX_NICE) 216 return -EINVAL; 217 218 err = security_task_setnice(current, nice); 219 if (err) 220 return err; 221 222 if (nice < 0 && !can_nice(current, nice)) 223 return -EPERM; 224 225 /* This is a heavy operation, taking global locks.. */ 226 if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) 227 return -EAGAIN; 228 229 next = HZ / 10 + jiffies; 230 ag = autogroup_task_get(p); 231 232 idx = array_index_nospec(nice + 20, 40); 233 shares = scale_load(sched_prio_to_weight[idx]); 234 235 down_write(&ag->lock); 236 err = sched_group_set_shares(ag->tg, shares); 237 if (!err) 238 ag->nice = nice; 239 up_write(&ag->lock); 240 241 autogroup_kref_put(ag); 242 243 return err; 244 } 245 246 void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) 247 { 248 struct autogroup *ag = autogroup_task_get(p); 249 250 if (!task_group_is_autogroup(ag->tg)) 251 goto out; 252 253 down_read(&ag->lock); 254 seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); 255 up_read(&ag->lock); 256 257 out: 258 autogroup_kref_put(ag); 259 } 260 #endif /* CONFIG_PROC_FS */ 261 262 int autogroup_path(struct task_group *tg, char *buf, int buflen) 263 { 264 if (!task_group_is_autogroup(tg)) 265 return 0; 266 267 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); 268 } 269