1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Auto-group scheduling implementation: 5 */ 6 7 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; 8 static struct autogroup autogroup_default; 9 static atomic_t autogroup_seq_nr; 10 11 #ifdef CONFIG_SYSCTL 12 static struct ctl_table sched_autogroup_sysctls[] = { 13 { 14 .procname = "sched_autogroup_enabled", 15 .data = &sysctl_sched_autogroup_enabled, 16 .maxlen = sizeof(unsigned int), 17 .mode = 0644, 18 .proc_handler = proc_dointvec_minmax, 19 .extra1 = SYSCTL_ZERO, 20 .extra2 = SYSCTL_ONE, 21 }, 22 {} 23 }; 24 25 static void __init sched_autogroup_sysctl_init(void) 26 { 27 register_sysctl_init("kernel", sched_autogroup_sysctls); 28 } 29 #else 30 #define sched_autogroup_sysctl_init() do { } while (0) 31 #endif 32 33 void __init autogroup_init(struct task_struct *init_task) 34 { 35 autogroup_default.tg = &root_task_group; 36 kref_init(&autogroup_default.kref); 37 init_rwsem(&autogroup_default.lock); 38 init_task->signal->autogroup = &autogroup_default; 39 sched_autogroup_sysctl_init(); 40 } 41 42 void autogroup_free(struct task_group *tg) 43 { 44 kfree(tg->autogroup); 45 } 46 47 static inline void autogroup_destroy(struct kref *kref) 48 { 49 struct autogroup *ag = container_of(kref, struct autogroup, kref); 50 51 #ifdef CONFIG_RT_GROUP_SCHED 52 /* We've redirected RT tasks to the root task group... */ 53 ag->tg->rt_se = NULL; 54 ag->tg->rt_rq = NULL; 55 #endif 56 sched_release_group(ag->tg); 57 sched_destroy_group(ag->tg); 58 } 59 60 static inline void autogroup_kref_put(struct autogroup *ag) 61 { 62 kref_put(&ag->kref, autogroup_destroy); 63 } 64 65 static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) 66 { 67 kref_get(&ag->kref); 68 return ag; 69 } 70 71 static inline struct autogroup *autogroup_task_get(struct task_struct *p) 72 { 73 struct autogroup *ag; 74 unsigned long flags; 75 76 if (!lock_task_sighand(p, &flags)) 77 return autogroup_kref_get(&autogroup_default); 78 79 ag = autogroup_kref_get(p->signal->autogroup); 80 unlock_task_sighand(p, &flags); 81 82 return ag; 83 } 84 85 static inline struct autogroup *autogroup_create(void) 86 { 87 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); 88 struct task_group *tg; 89 90 if (!ag) 91 goto out_fail; 92 93 tg = sched_create_group(&root_task_group); 94 if (IS_ERR(tg)) 95 goto out_free; 96 97 kref_init(&ag->kref); 98 init_rwsem(&ag->lock); 99 ag->id = atomic_inc_return(&autogroup_seq_nr); 100 ag->tg = tg; 101 #ifdef CONFIG_RT_GROUP_SCHED 102 /* 103 * Autogroup RT tasks are redirected to the root task group 104 * so we don't have to move tasks around upon policy change, 105 * or flail around trying to allocate bandwidth on the fly. 106 * A bandwidth exception in __sched_setscheduler() allows 107 * the policy change to proceed. 108 */ 109 free_rt_sched_group(tg); 110 tg->rt_se = root_task_group.rt_se; 111 tg->rt_rq = root_task_group.rt_rq; 112 #endif 113 tg->autogroup = ag; 114 115 sched_online_group(tg, &root_task_group); 116 return ag; 117 118 out_free: 119 kfree(ag); 120 out_fail: 121 if (printk_ratelimit()) { 122 printk(KERN_WARNING "autogroup_create: %s failure.\n", 123 ag ? "sched_create_group()" : "kzalloc()"); 124 } 125 126 return autogroup_kref_get(&autogroup_default); 127 } 128 129 bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) 130 { 131 if (tg != &root_task_group) 132 return false; 133 /* 134 * If we race with autogroup_move_group() the caller can use the old 135 * value of signal->autogroup but in this case sched_move_task() will 136 * be called again before autogroup_kref_put(). 137 * 138 * However, there is no way sched_autogroup_exit_task() could tell us 139 * to avoid autogroup->tg, so we abuse PF_EXITING flag for this case. 140 */ 141 if (p->flags & PF_EXITING) 142 return false; 143 144 return true; 145 } 146 147 void sched_autogroup_exit_task(struct task_struct *p) 148 { 149 /* 150 * We are going to call exit_notify() and autogroup_move_group() can't 151 * see this thread after that: we can no longer use signal->autogroup. 152 * See the PF_EXITING check in task_wants_autogroup(). 153 */ 154 sched_move_task(p); 155 } 156 157 static void 158 autogroup_move_group(struct task_struct *p, struct autogroup *ag) 159 { 160 struct autogroup *prev; 161 struct task_struct *t; 162 unsigned long flags; 163 164 if (WARN_ON_ONCE(!lock_task_sighand(p, &flags))) 165 return; 166 167 prev = p->signal->autogroup; 168 if (prev == ag) { 169 unlock_task_sighand(p, &flags); 170 return; 171 } 172 173 p->signal->autogroup = autogroup_kref_get(ag); 174 /* 175 * We can't avoid sched_move_task() after we changed signal->autogroup, 176 * this process can already run with task_group() == prev->tg or we can 177 * race with cgroup code which can read autogroup = prev under rq->lock. 178 * In the latter case for_each_thread() can not miss a migrating thread, 179 * cpu_cgroup_attach() must not be possible after cgroup_exit() and it 180 * can't be removed from thread list, we hold ->siglock. 181 * 182 * If an exiting thread was already removed from thread list we rely on 183 * sched_autogroup_exit_task(). 184 */ 185 for_each_thread(p, t) 186 sched_move_task(t); 187 188 unlock_task_sighand(p, &flags); 189 autogroup_kref_put(prev); 190 } 191 192 /* Allocates GFP_KERNEL, cannot be called under any spinlock: */ 193 void sched_autogroup_create_attach(struct task_struct *p) 194 { 195 struct autogroup *ag = autogroup_create(); 196 197 autogroup_move_group(p, ag); 198 199 /* Drop extra reference added by autogroup_create(): */ 200 autogroup_kref_put(ag); 201 } 202 EXPORT_SYMBOL(sched_autogroup_create_attach); 203 204 /* Cannot be called under siglock. Currently has no users: */ 205 void sched_autogroup_detach(struct task_struct *p) 206 { 207 autogroup_move_group(p, &autogroup_default); 208 } 209 EXPORT_SYMBOL(sched_autogroup_detach); 210 211 void sched_autogroup_fork(struct signal_struct *sig) 212 { 213 sig->autogroup = autogroup_task_get(current); 214 } 215 216 void sched_autogroup_exit(struct signal_struct *sig) 217 { 218 autogroup_kref_put(sig->autogroup); 219 } 220 221 static int __init setup_autogroup(char *str) 222 { 223 sysctl_sched_autogroup_enabled = 0; 224 225 return 1; 226 } 227 __setup("noautogroup", setup_autogroup); 228 229 #ifdef CONFIG_PROC_FS 230 231 int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) 232 { 233 static unsigned long next = INITIAL_JIFFIES; 234 struct autogroup *ag; 235 unsigned long shares; 236 int err, idx; 237 238 if (nice < MIN_NICE || nice > MAX_NICE) 239 return -EINVAL; 240 241 err = security_task_setnice(current, nice); 242 if (err) 243 return err; 244 245 if (nice < 0 && !can_nice(current, nice)) 246 return -EPERM; 247 248 /* This is a heavy operation, taking global locks.. */ 249 if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) 250 return -EAGAIN; 251 252 next = HZ / 10 + jiffies; 253 ag = autogroup_task_get(p); 254 255 idx = array_index_nospec(nice + 20, 40); 256 shares = scale_load(sched_prio_to_weight[idx]); 257 258 down_write(&ag->lock); 259 err = sched_group_set_shares(ag->tg, shares); 260 if (!err) 261 ag->nice = nice; 262 up_write(&ag->lock); 263 264 autogroup_kref_put(ag); 265 266 return err; 267 } 268 269 void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) 270 { 271 struct autogroup *ag = autogroup_task_get(p); 272 273 if (!task_group_is_autogroup(ag->tg)) 274 goto out; 275 276 down_read(&ag->lock); 277 seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); 278 up_read(&ag->lock); 279 280 out: 281 autogroup_kref_put(ag); 282 } 283 #endif /* CONFIG_PROC_FS */ 284 285 int autogroup_path(struct task_group *tg, char *buf, int buflen) 286 { 287 if (!task_group_is_autogroup(tg)) 288 return 0; 289 290 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); 291 } 292