1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2018 Linaro Limited 4 * 5 * Author: Daniel Lezcano <daniel.lezcano@linaro.org> 6 * 7 * The idle injection framework provides a way to force CPUs to enter idle 8 * states for a specified fraction of time over a specified period. 9 * 10 * It relies on the smpboot kthreads feature providing common code for CPU 11 * hotplug and thread [un]parking. 12 * 13 * All of the kthreads used for idle injection are created at init time. 14 * 15 * Next, the users of the idle injection framework provide a cpumask via 16 * its register function. The kthreads will be synchronized with respect to 17 * this cpumask. 18 * 19 * The idle + run duration is specified via separate helpers and that allows 20 * idle injection to be started. 21 * 22 * The idle injection kthreads will call play_idle_precise() with the idle 23 * duration and max allowed latency specified as per the above. 24 * 25 * After all of them have been woken up, a timer is set to start the next idle 26 * injection cycle. 27 * 28 * The timer interrupt handler will wake up the idle injection kthreads for 29 * all of the CPUs in the cpumask provided by the user. 30 * 31 * Idle injection is stopped synchronously and no leftover idle injection 32 * kthread activity after its completion is guaranteed. 33 * 34 * It is up to the user of this framework to provide a lock for higher-level 35 * synchronization to prevent race conditions like starting idle injection 36 * while unregistering from the framework. 37 */ 38 #define pr_fmt(fmt) "ii_dev: " fmt 39 40 #include <linux/cpu.h> 41 #include <linux/hrtimer.h> 42 #include <linux/kthread.h> 43 #include <linux/sched.h> 44 #include <linux/slab.h> 45 #include <linux/smpboot.h> 46 #include <linux/idle_inject.h> 47 48 #include <uapi/linux/sched/types.h> 49 50 /** 51 * struct idle_inject_thread - task on/off switch structure 52 * @tsk: task injecting the idle cycles 53 * @should_run: whether or not to run the task (for the smpboot kthread API) 54 */ 55 struct idle_inject_thread { 56 struct task_struct *tsk; 57 int should_run; 58 }; 59 60 /** 61 * struct idle_inject_device - idle injection data 62 * @timer: idle injection period timer 63 * @idle_duration_us: duration of CPU idle time to inject 64 * @run_duration_us: duration of CPU run time to allow 65 * @latency_us: max allowed latency 66 * @update: Optional callback deciding whether or not to skip idle 67 * injection in the given cycle. 68 * @cpumask: mask of CPUs affected by idle injection 69 * 70 * This structure is used to define per instance idle inject device data. Each 71 * instance has an idle duration, a run duration and mask of CPUs to inject 72 * idle. 73 * 74 * Actual CPU idle time is injected by calling kernel scheduler interface 75 * play_idle_precise(). There is one optional callback that can be registered 76 * by calling idle_inject_register_full(): 77 * 78 * update() - This callback is invoked just before waking up CPUs to inject 79 * idle. If it returns false, CPUs are not woken up to inject idle in the given 80 * cycle. It also allows the caller to readjust the idle and run duration by 81 * calling idle_inject_set_duration() for the next cycle. 82 */ 83 struct idle_inject_device { 84 struct hrtimer timer; 85 unsigned int idle_duration_us; 86 unsigned int run_duration_us; 87 unsigned int latency_us; 88 bool (*update)(void); 89 unsigned long cpumask[]; 90 }; 91 92 static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread); 93 static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device); 94 95 /** 96 * idle_inject_wakeup - Wake up idle injection threads 97 * @ii_dev: target idle injection device 98 * 99 * Every idle injection task associated with the given idle injection device 100 * and running on an online CPU will be woken up. 101 */ 102 static void idle_inject_wakeup(struct idle_inject_device *ii_dev) 103 { 104 struct idle_inject_thread *iit; 105 unsigned int cpu; 106 107 for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) { 108 iit = per_cpu_ptr(&idle_inject_thread, cpu); 109 iit->should_run = 1; 110 wake_up_process(iit->tsk); 111 } 112 } 113 114 /** 115 * idle_inject_timer_fn - idle injection timer function 116 * @timer: idle injection hrtimer 117 * 118 * This function is called when the idle injection timer expires. It wakes up 119 * idle injection tasks associated with the timer and they, in turn, invoke 120 * play_idle_precise() to inject a specified amount of CPU idle time. 121 * 122 * Return: HRTIMER_RESTART. 123 */ 124 static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) 125 { 126 unsigned int duration_us; 127 struct idle_inject_device *ii_dev = 128 container_of(timer, struct idle_inject_device, timer); 129 130 if (!ii_dev->update || (ii_dev->update && ii_dev->update())) 131 idle_inject_wakeup(ii_dev); 132 133 duration_us = READ_ONCE(ii_dev->run_duration_us); 134 duration_us += READ_ONCE(ii_dev->idle_duration_us); 135 136 hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC)); 137 138 return HRTIMER_RESTART; 139 } 140 141 /** 142 * idle_inject_fn - idle injection work function 143 * @cpu: the CPU owning the task 144 * 145 * This function calls play_idle_precise() to inject a specified amount of CPU 146 * idle time. 147 */ 148 static void idle_inject_fn(unsigned int cpu) 149 { 150 struct idle_inject_device *ii_dev; 151 struct idle_inject_thread *iit; 152 153 ii_dev = per_cpu(idle_inject_device, cpu); 154 iit = per_cpu_ptr(&idle_inject_thread, cpu); 155 156 /* 157 * Let the smpboot main loop know that the task should not run again. 158 */ 159 iit->should_run = 0; 160 161 play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) * NSEC_PER_USEC, 162 READ_ONCE(ii_dev->latency_us) * NSEC_PER_USEC); 163 } 164 165 /** 166 * idle_inject_set_duration - idle and run duration update helper 167 * @ii_dev: idle injection control device structure 168 * @run_duration_us: CPU run time to allow in microseconds 169 * @idle_duration_us: CPU idle time to inject in microseconds 170 */ 171 void idle_inject_set_duration(struct idle_inject_device *ii_dev, 172 unsigned int run_duration_us, 173 unsigned int idle_duration_us) 174 { 175 if (run_duration_us && idle_duration_us) { 176 WRITE_ONCE(ii_dev->run_duration_us, run_duration_us); 177 WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); 178 } 179 } 180 EXPORT_SYMBOL_NS_GPL(idle_inject_set_duration, IDLE_INJECT); 181 182 /** 183 * idle_inject_get_duration - idle and run duration retrieval helper 184 * @ii_dev: idle injection control device structure 185 * @run_duration_us: memory location to store the current CPU run time 186 * @idle_duration_us: memory location to store the current CPU idle time 187 */ 188 void idle_inject_get_duration(struct idle_inject_device *ii_dev, 189 unsigned int *run_duration_us, 190 unsigned int *idle_duration_us) 191 { 192 *run_duration_us = READ_ONCE(ii_dev->run_duration_us); 193 *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); 194 } 195 EXPORT_SYMBOL_NS_GPL(idle_inject_get_duration, IDLE_INJECT); 196 197 /** 198 * idle_inject_set_latency - set the maximum latency allowed 199 * @ii_dev: idle injection control device structure 200 * @latency_us: set the latency requirement for the idle state 201 */ 202 void idle_inject_set_latency(struct idle_inject_device *ii_dev, 203 unsigned int latency_us) 204 { 205 WRITE_ONCE(ii_dev->latency_us, latency_us); 206 } 207 EXPORT_SYMBOL_NS_GPL(idle_inject_set_latency, IDLE_INJECT); 208 209 /** 210 * idle_inject_start - start idle injections 211 * @ii_dev: idle injection control device structure 212 * 213 * The function starts idle injection by first waking up all of the idle 214 * injection kthreads associated with @ii_dev to let them inject CPU idle time 215 * sets up a timer to start the next idle injection period. 216 * 217 * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success. 218 */ 219 int idle_inject_start(struct idle_inject_device *ii_dev) 220 { 221 unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); 222 unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us); 223 224 if (!idle_duration_us || !run_duration_us) 225 return -EINVAL; 226 227 pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", 228 cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 229 230 idle_inject_wakeup(ii_dev); 231 232 hrtimer_start(&ii_dev->timer, 233 ns_to_ktime((idle_duration_us + run_duration_us) * 234 NSEC_PER_USEC), 235 HRTIMER_MODE_REL); 236 237 return 0; 238 } 239 EXPORT_SYMBOL_NS_GPL(idle_inject_start, IDLE_INJECT); 240 241 /** 242 * idle_inject_stop - stops idle injections 243 * @ii_dev: idle injection control device structure 244 * 245 * The function stops idle injection and waits for the threads to finish work. 246 * If CPU idle time is being injected when this function runs, then it will 247 * wait until the end of the cycle. 248 * 249 * When it returns, there is no more idle injection kthread activity. The 250 * kthreads are scheduled out and the periodic timer is off. 251 */ 252 void idle_inject_stop(struct idle_inject_device *ii_dev) 253 { 254 struct idle_inject_thread *iit; 255 unsigned int cpu; 256 257 pr_debug("Stopping idle injection on CPUs '%*pbl'\n", 258 cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 259 260 hrtimer_cancel(&ii_dev->timer); 261 262 /* 263 * Stopping idle injection requires all of the idle injection kthreads 264 * associated with the given cpumask to be parked and stay that way, so 265 * prevent CPUs from going online at this point. Any CPUs going online 266 * after the loop below will be covered by clearing the should_run flag 267 * that will cause the smpboot main loop to schedule them out. 268 */ 269 cpu_hotplug_disable(); 270 271 /* 272 * Iterate over all (online + offline) CPUs here in case one of them 273 * goes offline with the should_run flag set so as to prevent its idle 274 * injection kthread from running when the CPU goes online again after 275 * the ii_dev has been freed. 276 */ 277 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 278 iit = per_cpu_ptr(&idle_inject_thread, cpu); 279 iit->should_run = 0; 280 281 wait_task_inactive(iit->tsk, TASK_ANY); 282 } 283 284 cpu_hotplug_enable(); 285 } 286 EXPORT_SYMBOL_NS_GPL(idle_inject_stop, IDLE_INJECT); 287 288 /** 289 * idle_inject_setup - prepare the current task for idle injection 290 * @cpu: not used 291 * 292 * Called once, this function is in charge of setting the current task's 293 * scheduler parameters to make it an RT task. 294 */ 295 static void idle_inject_setup(unsigned int cpu) 296 { 297 sched_set_fifo(current); 298 } 299 300 /** 301 * idle_inject_should_run - function helper for the smpboot API 302 * @cpu: CPU the kthread is running on 303 * 304 * Return: whether or not the thread can run. 305 */ 306 static int idle_inject_should_run(unsigned int cpu) 307 { 308 struct idle_inject_thread *iit = 309 per_cpu_ptr(&idle_inject_thread, cpu); 310 311 return iit->should_run; 312 } 313 314 /** 315 * idle_inject_register_full - initialize idle injection on a set of CPUs 316 * @cpumask: CPUs to be affected by idle injection 317 * @update: This callback is called just before waking up CPUs to inject 318 * idle 319 * 320 * This function creates an idle injection control device structure for the 321 * given set of CPUs and initializes the timer associated with it. This 322 * function also allows to register update()callback. 323 * It does not start any injection cycles. 324 * 325 * Return: NULL if memory allocation fails, idle injection control device 326 * pointer on success. 327 */ 328 329 struct idle_inject_device *idle_inject_register_full(struct cpumask *cpumask, 330 bool (*update)(void)) 331 { 332 struct idle_inject_device *ii_dev; 333 int cpu, cpu_rb; 334 335 ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL); 336 if (!ii_dev) 337 return NULL; 338 339 cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask); 340 hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 341 ii_dev->timer.function = idle_inject_timer_fn; 342 ii_dev->latency_us = UINT_MAX; 343 ii_dev->update = update; 344 345 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 346 347 if (per_cpu(idle_inject_device, cpu)) { 348 pr_err("cpu%d is already registered\n", cpu); 349 goto out_rollback; 350 } 351 352 per_cpu(idle_inject_device, cpu) = ii_dev; 353 } 354 355 return ii_dev; 356 357 out_rollback: 358 for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) { 359 if (cpu == cpu_rb) 360 break; 361 per_cpu(idle_inject_device, cpu_rb) = NULL; 362 } 363 364 kfree(ii_dev); 365 366 return NULL; 367 } 368 EXPORT_SYMBOL_NS_GPL(idle_inject_register_full, IDLE_INJECT); 369 370 /** 371 * idle_inject_register - initialize idle injection on a set of CPUs 372 * @cpumask: CPUs to be affected by idle injection 373 * 374 * This function creates an idle injection control device structure for the 375 * given set of CPUs and initializes the timer associated with it. It does not 376 * start any injection cycles. 377 * 378 * Return: NULL if memory allocation fails, idle injection control device 379 * pointer on success. 380 */ 381 struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) 382 { 383 return idle_inject_register_full(cpumask, NULL); 384 } 385 EXPORT_SYMBOL_NS_GPL(idle_inject_register, IDLE_INJECT); 386 387 /** 388 * idle_inject_unregister - unregister idle injection control device 389 * @ii_dev: idle injection control device to unregister 390 * 391 * The function stops idle injection for the given control device, 392 * unregisters its kthreads and frees memory allocated when that device was 393 * created. 394 */ 395 void idle_inject_unregister(struct idle_inject_device *ii_dev) 396 { 397 unsigned int cpu; 398 399 idle_inject_stop(ii_dev); 400 401 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) 402 per_cpu(idle_inject_device, cpu) = NULL; 403 404 kfree(ii_dev); 405 } 406 EXPORT_SYMBOL_NS_GPL(idle_inject_unregister, IDLE_INJECT); 407 408 static struct smp_hotplug_thread idle_inject_threads = { 409 .store = &idle_inject_thread.tsk, 410 .setup = idle_inject_setup, 411 .thread_fn = idle_inject_fn, 412 .thread_comm = "idle_inject/%u", 413 .thread_should_run = idle_inject_should_run, 414 }; 415 416 static int __init idle_inject_init(void) 417 { 418 return smpboot_register_percpu_thread(&idle_inject_threads); 419 } 420 early_initcall(idle_inject_init); 421