1 // SPDX-License-Identifier: GPL-2.0-only 2 3 /* 4 * A simple wrapper around refcount. An allocated sched_core_cookie's 5 * address is used to compute the cookie of the task. 6 */ 7 struct sched_core_cookie { 8 refcount_t refcnt; 9 }; 10 11 static unsigned long sched_core_alloc_cookie(void) 12 { 13 struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL); 14 if (!ck) 15 return 0; 16 17 refcount_set(&ck->refcnt, 1); 18 sched_core_get(); 19 20 return (unsigned long)ck; 21 } 22 23 static void sched_core_put_cookie(unsigned long cookie) 24 { 25 struct sched_core_cookie *ptr = (void *)cookie; 26 27 if (ptr && refcount_dec_and_test(&ptr->refcnt)) { 28 kfree(ptr); 29 sched_core_put(); 30 } 31 } 32 33 static unsigned long sched_core_get_cookie(unsigned long cookie) 34 { 35 struct sched_core_cookie *ptr = (void *)cookie; 36 37 if (ptr) 38 refcount_inc(&ptr->refcnt); 39 40 return cookie; 41 } 42 43 /* 44 * sched_core_update_cookie - replace the cookie on a task 45 * @p: the task to update 46 * @cookie: the new cookie 47 * 48 * Effectively exchange the task cookie; caller is responsible for lifetimes on 49 * both ends. 50 * 51 * Returns: the old cookie 52 */ 53 static unsigned long sched_core_update_cookie(struct task_struct *p, 54 unsigned long cookie) 55 { 56 unsigned long old_cookie; 57 struct rq_flags rf; 58 struct rq *rq; 59 60 rq = task_rq_lock(p, &rf); 61 62 /* 63 * Since creating a cookie implies sched_core_get(), and we cannot set 64 * a cookie until after we've created it, similarly, we cannot destroy 65 * a cookie until after we've removed it, we must have core scheduling 66 * enabled here. 67 */ 68 SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq)); 69 70 if (sched_core_enqueued(p)) 71 sched_core_dequeue(rq, p, DEQUEUE_SAVE); 72 73 old_cookie = p->core_cookie; 74 p->core_cookie = cookie; 75 76 /* 77 * Consider the cases: !prev_cookie and !cookie. 78 */ 79 if (cookie && task_on_rq_queued(p)) 80 sched_core_enqueue(rq, p); 81 82 /* 83 * If task is currently running, it may not be compatible anymore after 84 * the cookie change, so enter the scheduler on its CPU to schedule it 85 * away. 86 * 87 * Note that it is possible that as a result of this cookie change, the 88 * core has now entered/left forced idle state. Defer accounting to the 89 * next scheduling edge, rather than always forcing a reschedule here. 90 */ 91 if (task_on_cpu(rq, p)) 92 resched_curr(rq); 93 94 task_rq_unlock(rq, p, &rf); 95 96 return old_cookie; 97 } 98 99 static unsigned long sched_core_clone_cookie(struct task_struct *p) 100 { 101 unsigned long cookie, flags; 102 103 raw_spin_lock_irqsave(&p->pi_lock, flags); 104 cookie = sched_core_get_cookie(p->core_cookie); 105 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 106 107 return cookie; 108 } 109 110 void sched_core_fork(struct task_struct *p) 111 { 112 RB_CLEAR_NODE(&p->core_node); 113 p->core_cookie = sched_core_clone_cookie(current); 114 } 115 116 void sched_core_free(struct task_struct *p) 117 { 118 sched_core_put_cookie(p->core_cookie); 119 } 120 121 static void __sched_core_set(struct task_struct *p, unsigned long cookie) 122 { 123 cookie = sched_core_get_cookie(cookie); 124 cookie = sched_core_update_cookie(p, cookie); 125 sched_core_put_cookie(cookie); 126 } 127 128 /* Called from prctl interface: PR_SCHED_CORE */ 129 int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, 130 unsigned long uaddr) 131 { 132 unsigned long cookie = 0, id = 0; 133 struct task_struct *task, *p; 134 struct pid *grp; 135 int err = 0; 136 137 if (!static_branch_likely(&sched_smt_present)) 138 return -ENODEV; 139 140 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID); 141 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID); 142 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID); 143 144 if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 || 145 (cmd != PR_SCHED_CORE_GET && uaddr)) 146 return -EINVAL; 147 148 rcu_read_lock(); 149 if (pid == 0) { 150 task = current; 151 } else { 152 task = find_task_by_vpid(pid); 153 if (!task) { 154 rcu_read_unlock(); 155 return -ESRCH; 156 } 157 } 158 get_task_struct(task); 159 rcu_read_unlock(); 160 161 /* 162 * Check if this process has the right to modify the specified 163 * process. Use the regular "ptrace_may_access()" checks. 164 */ 165 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { 166 err = -EPERM; 167 goto out; 168 } 169 170 switch (cmd) { 171 case PR_SCHED_CORE_GET: 172 if (type != PIDTYPE_PID || uaddr & 7) { 173 err = -EINVAL; 174 goto out; 175 } 176 cookie = sched_core_clone_cookie(task); 177 if (cookie) { 178 /* XXX improve ? */ 179 ptr_to_hashval((void *)cookie, &id); 180 } 181 err = put_user(id, (u64 __user *)uaddr); 182 goto out; 183 184 case PR_SCHED_CORE_CREATE: 185 cookie = sched_core_alloc_cookie(); 186 if (!cookie) { 187 err = -ENOMEM; 188 goto out; 189 } 190 break; 191 192 case PR_SCHED_CORE_SHARE_TO: 193 cookie = sched_core_clone_cookie(current); 194 break; 195 196 case PR_SCHED_CORE_SHARE_FROM: 197 if (type != PIDTYPE_PID) { 198 err = -EINVAL; 199 goto out; 200 } 201 cookie = sched_core_clone_cookie(task); 202 __sched_core_set(current, cookie); 203 goto out; 204 205 default: 206 err = -EINVAL; 207 goto out; 208 } 209 210 if (type == PIDTYPE_PID) { 211 __sched_core_set(task, cookie); 212 goto out; 213 } 214 215 read_lock(&tasklist_lock); 216 grp = task_pid_type(task, type); 217 218 do_each_pid_thread(grp, type, p) { 219 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) { 220 err = -EPERM; 221 goto out_tasklist; 222 } 223 } while_each_pid_thread(grp, type, p); 224 225 do_each_pid_thread(grp, type, p) { 226 __sched_core_set(p, cookie); 227 } while_each_pid_thread(grp, type, p); 228 out_tasklist: 229 read_unlock(&tasklist_lock); 230 231 out: 232 sched_core_put_cookie(cookie); 233 put_task_struct(task); 234 return err; 235 } 236 237 #ifdef CONFIG_SCHEDSTATS 238 239 /* REQUIRES: rq->core's clock recently updated. */ 240 void __sched_core_account_forceidle(struct rq *rq) 241 { 242 const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq)); 243 u64 delta, now = rq_clock(rq->core); 244 struct rq *rq_i; 245 struct task_struct *p; 246 int i; 247 248 lockdep_assert_rq_held(rq); 249 250 WARN_ON_ONCE(!rq->core->core_forceidle_count); 251 252 if (rq->core->core_forceidle_start == 0) 253 return; 254 255 delta = now - rq->core->core_forceidle_start; 256 if (unlikely((s64)delta <= 0)) 257 return; 258 259 rq->core->core_forceidle_start = now; 260 261 if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) { 262 /* can't be forced idle without a running task */ 263 } else if (rq->core->core_forceidle_count > 1 || 264 rq->core->core_forceidle_occupation > 1) { 265 /* 266 * For larger SMT configurations, we need to scale the charged 267 * forced idle amount since there can be more than one forced 268 * idle sibling and more than one running cookied task. 269 */ 270 delta *= rq->core->core_forceidle_count; 271 delta = div_u64(delta, rq->core->core_forceidle_occupation); 272 } 273 274 for_each_cpu(i, smt_mask) { 275 rq_i = cpu_rq(i); 276 p = rq_i->core_pick ?: rq_i->curr; 277 278 if (p == rq_i->idle) 279 continue; 280 281 /* 282 * Note: this will account forceidle to the current cpu, even 283 * if it comes from our SMT sibling. 284 */ 285 __account_forceidle_time(p, delta); 286 } 287 } 288 289 void __sched_core_tick(struct rq *rq) 290 { 291 if (!rq->core->core_forceidle_count) 292 return; 293 294 if (rq != rq->core) 295 update_rq_clock(rq->core); 296 297 __sched_core_account_forceidle(rq); 298 } 299 300 #endif /* CONFIG_SCHEDSTATS */ 301