1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Auto-group scheduling implementation: 5 */ 6 7 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; 8 static struct autogroup autogroup_default; 9 static atomic_t autogroup_seq_nr; 10 11 #ifdef CONFIG_SYSCTL 12 static struct ctl_table sched_autogroup_sysctls[] = { 13 { 14 .procname = "sched_autogroup_enabled", 15 .data = &sysctl_sched_autogroup_enabled, 16 .maxlen = sizeof(unsigned int), 17 .mode = 0644, 18 .proc_handler = proc_dointvec_minmax, 19 .extra1 = SYSCTL_ZERO, 20 .extra2 = SYSCTL_ONE, 21 }, 22 }; 23 24 static void __init sched_autogroup_sysctl_init(void) 25 { 26 register_sysctl_init("kernel", sched_autogroup_sysctls); 27 } 28 #else 29 #define sched_autogroup_sysctl_init() do { } while (0) 30 #endif 31 32 void __init autogroup_init(struct task_struct *init_task) 33 { 34 autogroup_default.tg = &root_task_group; 35 kref_init(&autogroup_default.kref); 36 init_rwsem(&autogroup_default.lock); 37 init_task->signal->autogroup = &autogroup_default; 38 sched_autogroup_sysctl_init(); 39 } 40 41 void autogroup_free(struct task_group *tg) 42 { 43 kfree(tg->autogroup); 44 } 45 46 static inline void autogroup_destroy(struct kref *kref) 47 { 48 struct autogroup *ag = container_of(kref, struct autogroup, kref); 49 50 #ifdef CONFIG_RT_GROUP_SCHED 51 /* We've redirected RT tasks to the root task group... */ 52 ag->tg->rt_se = NULL; 53 ag->tg->rt_rq = NULL; 54 #endif 55 sched_release_group(ag->tg); 56 sched_destroy_group(ag->tg); 57 } 58 59 static inline void autogroup_kref_put(struct autogroup *ag) 60 { 61 kref_put(&ag->kref, autogroup_destroy); 62 } 63 64 static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) 65 { 66 kref_get(&ag->kref); 67 return ag; 68 } 69 70 static inline struct autogroup *autogroup_task_get(struct task_struct *p) 71 { 72 struct autogroup *ag; 73 unsigned long flags; 74 75 if (!lock_task_sighand(p, &flags)) 76 return autogroup_kref_get(&autogroup_default); 77 78 ag = autogroup_kref_get(p->signal->autogroup); 79 unlock_task_sighand(p, &flags); 80 81 return ag; 82 } 83 84 static inline struct autogroup *autogroup_create(void) 85 { 86 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); 87 struct task_group *tg; 88 89 if (!ag) 90 goto out_fail; 91 92 tg = sched_create_group(&root_task_group); 93 if (IS_ERR(tg)) 94 goto out_free; 95 96 kref_init(&ag->kref); 97 init_rwsem(&ag->lock); 98 ag->id = atomic_inc_return(&autogroup_seq_nr); 99 ag->tg = tg; 100 #ifdef CONFIG_RT_GROUP_SCHED 101 /* 102 * Autogroup RT tasks are redirected to the root task group 103 * so we don't have to move tasks around upon policy change, 104 * or flail around trying to allocate bandwidth on the fly. 105 * A bandwidth exception in __sched_setscheduler() allows 106 * the policy change to proceed. 107 */ 108 free_rt_sched_group(tg); 109 tg->rt_se = root_task_group.rt_se; 110 tg->rt_rq = root_task_group.rt_rq; 111 #endif 112 tg->autogroup = ag; 113 114 sched_online_group(tg, &root_task_group); 115 return ag; 116 117 out_free: 118 kfree(ag); 119 out_fail: 120 if (printk_ratelimit()) { 121 printk(KERN_WARNING "autogroup_create: %s failure.\n", 122 ag ? "sched_create_group()" : "kzalloc()"); 123 } 124 125 return autogroup_kref_get(&autogroup_default); 126 } 127 128 bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) 129 { 130 if (tg != &root_task_group) 131 return false; 132 /* 133 * If we race with autogroup_move_group() the caller can use the old 134 * value of signal->autogroup but in this case sched_move_task() will 135 * be called again before autogroup_kref_put(). 136 * 137 * However, there is no way sched_autogroup_exit_task() could tell us 138 * to avoid autogroup->tg, so we abuse PF_EXITING flag for this case. 139 */ 140 if (p->flags & PF_EXITING) 141 return false; 142 143 return true; 144 } 145 146 void sched_autogroup_exit_task(struct task_struct *p) 147 { 148 /* 149 * We are going to call exit_notify() and autogroup_move_group() can't 150 * see this thread after that: we can no longer use signal->autogroup. 151 * See the PF_EXITING check in task_wants_autogroup(). 152 */ 153 sched_move_task(p); 154 } 155 156 static void 157 autogroup_move_group(struct task_struct *p, struct autogroup *ag) 158 { 159 struct autogroup *prev; 160 struct task_struct *t; 161 unsigned long flags; 162 163 if (WARN_ON_ONCE(!lock_task_sighand(p, &flags))) 164 return; 165 166 prev = p->signal->autogroup; 167 if (prev == ag) { 168 unlock_task_sighand(p, &flags); 169 return; 170 } 171 172 p->signal->autogroup = autogroup_kref_get(ag); 173 /* 174 * We can't avoid sched_move_task() after we changed signal->autogroup, 175 * this process can already run with task_group() == prev->tg or we can 176 * race with cgroup code which can read autogroup = prev under rq->lock. 177 * In the latter case for_each_thread() can not miss a migrating thread, 178 * cpu_cgroup_attach() must not be possible after cgroup_exit() and it 179 * can't be removed from thread list, we hold ->siglock. 180 * 181 * If an exiting thread was already removed from thread list we rely on 182 * sched_autogroup_exit_task(). 183 */ 184 for_each_thread(p, t) 185 sched_move_task(t); 186 187 unlock_task_sighand(p, &flags); 188 autogroup_kref_put(prev); 189 } 190 191 /* Allocates GFP_KERNEL, cannot be called under any spinlock: */ 192 void sched_autogroup_create_attach(struct task_struct *p) 193 { 194 struct autogroup *ag = autogroup_create(); 195 196 autogroup_move_group(p, ag); 197 198 /* Drop extra reference added by autogroup_create(): */ 199 autogroup_kref_put(ag); 200 } 201 EXPORT_SYMBOL(sched_autogroup_create_attach); 202 203 /* Cannot be called under siglock. Currently has no users: */ 204 void sched_autogroup_detach(struct task_struct *p) 205 { 206 autogroup_move_group(p, &autogroup_default); 207 } 208 EXPORT_SYMBOL(sched_autogroup_detach); 209 210 void sched_autogroup_fork(struct signal_struct *sig) 211 { 212 sig->autogroup = autogroup_task_get(current); 213 } 214 215 void sched_autogroup_exit(struct signal_struct *sig) 216 { 217 autogroup_kref_put(sig->autogroup); 218 } 219 220 static int __init setup_autogroup(char *str) 221 { 222 sysctl_sched_autogroup_enabled = 0; 223 224 return 1; 225 } 226 __setup("noautogroup", setup_autogroup); 227 228 #ifdef CONFIG_PROC_FS 229 230 int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) 231 { 232 static unsigned long next = INITIAL_JIFFIES; 233 struct autogroup *ag; 234 unsigned long shares; 235 int err, idx; 236 237 if (nice < MIN_NICE || nice > MAX_NICE) 238 return -EINVAL; 239 240 err = security_task_setnice(current, nice); 241 if (err) 242 return err; 243 244 if (nice < 0 && !can_nice(current, nice)) 245 return -EPERM; 246 247 /* This is a heavy operation, taking global locks.. */ 248 if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) 249 return -EAGAIN; 250 251 next = HZ / 10 + jiffies; 252 ag = autogroup_task_get(p); 253 254 idx = array_index_nospec(nice + 20, 40); 255 shares = scale_load(sched_prio_to_weight[idx]); 256 257 down_write(&ag->lock); 258 err = sched_group_set_shares(ag->tg, shares); 259 if (!err) 260 ag->nice = nice; 261 up_write(&ag->lock); 262 263 autogroup_kref_put(ag); 264 265 return err; 266 } 267 268 void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) 269 { 270 struct autogroup *ag = autogroup_task_get(p); 271 272 if (!task_group_is_autogroup(ag->tg)) 273 goto out; 274 275 down_read(&ag->lock); 276 seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); 277 up_read(&ag->lock); 278 279 out: 280 autogroup_kref_put(ag); 281 } 282 #endif /* CONFIG_PROC_FS */ 283 284 int autogroup_path(struct task_group *tg, char *buf, int buflen) 285 { 286 if (!task_group_is_autogroup(tg)) 287 return 0; 288 289 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); 290 } 291