1 // SPDX-License-Identifier: GPL-2.0-only 2 3 /* 4 * A simple wrapper around refcount. An allocated sched_core_cookie's 5 * address is used to compute the cookie of the task. 6 */ 7 #include "sched.h" 8 9 struct sched_core_cookie { 10 refcount_t refcnt; 11 }; 12 13 static unsigned long sched_core_alloc_cookie(void) 14 { 15 struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL); 16 if (!ck) 17 return 0; 18 19 refcount_set(&ck->refcnt, 1); 20 sched_core_get(); 21 22 return (unsigned long)ck; 23 } 24 25 static void sched_core_put_cookie(unsigned long cookie) 26 { 27 struct sched_core_cookie *ptr = (void *)cookie; 28 29 if (ptr && refcount_dec_and_test(&ptr->refcnt)) { 30 kfree(ptr); 31 sched_core_put(); 32 } 33 } 34 35 static unsigned long sched_core_get_cookie(unsigned long cookie) 36 { 37 struct sched_core_cookie *ptr = (void *)cookie; 38 39 if (ptr) 40 refcount_inc(&ptr->refcnt); 41 42 return cookie; 43 } 44 45 /* 46 * sched_core_update_cookie - replace the cookie on a task 47 * @p: the task to update 48 * @cookie: the new cookie 49 * 50 * Effectively exchange the task cookie; caller is responsible for lifetimes on 51 * both ends. 52 * 53 * Returns: the old cookie 54 */ 55 static unsigned long sched_core_update_cookie(struct task_struct *p, 56 unsigned long cookie) 57 { 58 unsigned long old_cookie; 59 struct rq_flags rf; 60 struct rq *rq; 61 62 rq = task_rq_lock(p, &rf); 63 64 /* 65 * Since creating a cookie implies sched_core_get(), and we cannot set 66 * a cookie until after we've created it, similarly, we cannot destroy 67 * a cookie until after we've removed it, we must have core scheduling 68 * enabled here. 69 */ 70 WARN_ON_ONCE((p->core_cookie || cookie) && !sched_core_enabled(rq)); 71 72 if (sched_core_enqueued(p)) 73 sched_core_dequeue(rq, p, DEQUEUE_SAVE); 74 75 old_cookie = p->core_cookie; 76 p->core_cookie = cookie; 77 78 /* 79 * Consider the cases: !prev_cookie and !cookie. 80 */ 81 if (cookie && task_on_rq_queued(p)) 82 sched_core_enqueue(rq, p); 83 84 /* 85 * If task is currently running, it may not be compatible anymore after 86 * the cookie change, so enter the scheduler on its CPU to schedule it 87 * away. 88 * 89 * Note that it is possible that as a result of this cookie change, the 90 * core has now entered/left forced idle state. Defer accounting to the 91 * next scheduling edge, rather than always forcing a reschedule here. 92 */ 93 if (task_on_cpu(rq, p)) 94 resched_curr(rq); 95 96 task_rq_unlock(rq, p, &rf); 97 98 return old_cookie; 99 } 100 101 static unsigned long sched_core_clone_cookie(struct task_struct *p) 102 { 103 unsigned long cookie, flags; 104 105 raw_spin_lock_irqsave(&p->pi_lock, flags); 106 cookie = sched_core_get_cookie(p->core_cookie); 107 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 108 109 return cookie; 110 } 111 112 void sched_core_fork(struct task_struct *p) 113 { 114 RB_CLEAR_NODE(&p->core_node); 115 p->core_cookie = sched_core_clone_cookie(current); 116 } 117 118 void sched_core_free(struct task_struct *p) 119 { 120 sched_core_put_cookie(p->core_cookie); 121 } 122 123 static void __sched_core_set(struct task_struct *p, unsigned long cookie) 124 { 125 cookie = sched_core_get_cookie(cookie); 126 cookie = sched_core_update_cookie(p, cookie); 127 sched_core_put_cookie(cookie); 128 } 129 130 /* Called from prctl interface: PR_SCHED_CORE */ 131 int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, 132 unsigned long uaddr) 133 { 134 unsigned long cookie = 0, id = 0; 135 struct task_struct *task, *p; 136 struct pid *grp; 137 int err = 0; 138 139 if (!static_branch_likely(&sched_smt_present)) 140 return -ENODEV; 141 142 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID); 143 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID); 144 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID); 145 146 if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 || 147 (cmd != PR_SCHED_CORE_GET && uaddr)) 148 return -EINVAL; 149 150 rcu_read_lock(); 151 if (pid == 0) { 152 task = current; 153 } else { 154 task = find_task_by_vpid(pid); 155 if (!task) { 156 rcu_read_unlock(); 157 return -ESRCH; 158 } 159 } 160 get_task_struct(task); 161 rcu_read_unlock(); 162 163 /* 164 * Check if this process has the right to modify the specified 165 * process. Use the regular "ptrace_may_access()" checks. 166 */ 167 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { 168 err = -EPERM; 169 goto out; 170 } 171 172 switch (cmd) { 173 case PR_SCHED_CORE_GET: 174 if (type != PIDTYPE_PID || uaddr & 7) { 175 err = -EINVAL; 176 goto out; 177 } 178 cookie = sched_core_clone_cookie(task); 179 if (cookie) { 180 /* XXX improve ? */ 181 ptr_to_hashval((void *)cookie, &id); 182 } 183 err = put_user(id, (u64 __user *)uaddr); 184 goto out; 185 186 case PR_SCHED_CORE_CREATE: 187 cookie = sched_core_alloc_cookie(); 188 if (!cookie) { 189 err = -ENOMEM; 190 goto out; 191 } 192 break; 193 194 case PR_SCHED_CORE_SHARE_TO: 195 cookie = sched_core_clone_cookie(current); 196 break; 197 198 case PR_SCHED_CORE_SHARE_FROM: 199 if (type != PIDTYPE_PID) { 200 err = -EINVAL; 201 goto out; 202 } 203 cookie = sched_core_clone_cookie(task); 204 __sched_core_set(current, cookie); 205 goto out; 206 207 default: 208 err = -EINVAL; 209 goto out; 210 } 211 212 if (type == PIDTYPE_PID) { 213 __sched_core_set(task, cookie); 214 goto out; 215 } 216 217 read_lock(&tasklist_lock); 218 grp = task_pid_type(task, type); 219 220 do_each_pid_thread(grp, type, p) { 221 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) { 222 err = -EPERM; 223 goto out_tasklist; 224 } 225 } while_each_pid_thread(grp, type, p); 226 227 do_each_pid_thread(grp, type, p) { 228 __sched_core_set(p, cookie); 229 } while_each_pid_thread(grp, type, p); 230 out_tasklist: 231 read_unlock(&tasklist_lock); 232 233 out: 234 sched_core_put_cookie(cookie); 235 put_task_struct(task); 236 return err; 237 } 238 239 #ifdef CONFIG_SCHEDSTATS 240 241 /* REQUIRES: rq->core's clock recently updated. */ 242 void __sched_core_account_forceidle(struct rq *rq) 243 { 244 const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq)); 245 u64 delta, now = rq_clock(rq->core); 246 struct rq *rq_i; 247 struct task_struct *p; 248 int i; 249 250 lockdep_assert_rq_held(rq); 251 252 WARN_ON_ONCE(!rq->core->core_forceidle_count); 253 254 if (rq->core->core_forceidle_start == 0) 255 return; 256 257 delta = now - rq->core->core_forceidle_start; 258 if (unlikely((s64)delta <= 0)) 259 return; 260 261 rq->core->core_forceidle_start = now; 262 263 if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) { 264 /* can't be forced idle without a running task */ 265 } else if (rq->core->core_forceidle_count > 1 || 266 rq->core->core_forceidle_occupation > 1) { 267 /* 268 * For larger SMT configurations, we need to scale the charged 269 * forced idle amount since there can be more than one forced 270 * idle sibling and more than one running cookied task. 271 */ 272 delta *= rq->core->core_forceidle_count; 273 delta = div_u64(delta, rq->core->core_forceidle_occupation); 274 } 275 276 for_each_cpu(i, smt_mask) { 277 rq_i = cpu_rq(i); 278 p = rq_i->core_pick ?: rq_i->curr; 279 280 if (p == rq_i->idle) 281 continue; 282 283 /* 284 * Note: this will account forceidle to the current CPU, even 285 * if it comes from our SMT sibling. 286 */ 287 __account_forceidle_time(p, delta); 288 } 289 } 290 291 void __sched_core_tick(struct rq *rq) 292 { 293 if (!rq->core->core_forceidle_count) 294 return; 295 296 if (rq != rq->core) 297 update_rq_clock(rq->core); 298 299 __sched_core_account_forceidle(rq); 300 } 301 302 #endif /* CONFIG_SCHEDSTATS */ 303