1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2018 Linaro Limited 4 * 5 * Author: Daniel Lezcano <daniel.lezcano@linaro.org> 6 * 7 * The idle injection framework provides a way to force CPUs to enter idle 8 * states for a specified fraction of time over a specified period. 9 * 10 * It relies on the smpboot kthreads feature providing common code for CPU 11 * hotplug and thread [un]parking. 12 * 13 * All of the kthreads used for idle injection are created at init time. 14 * 15 * Next, the users of the idle injection framework provide a cpumask via 16 * its register function. The kthreads will be synchronized with respect to 17 * this cpumask. 18 * 19 * The idle + run duration is specified via separate helpers and that allows 20 * idle injection to be started. 21 * 22 * The idle injection kthreads will call play_idle_precise() with the idle 23 * duration and max allowed latency specified as per the above. 24 * 25 * After all of them have been woken up, a timer is set to start the next idle 26 * injection cycle. 27 * 28 * The timer interrupt handler will wake up the idle injection kthreads for 29 * all of the CPUs in the cpumask provided by the user. 30 * 31 * Idle injection is stopped synchronously and no leftover idle injection 32 * kthread activity after its completion is guaranteed. 33 * 34 * It is up to the user of this framework to provide a lock for higher-level 35 * synchronization to prevent race conditions like starting idle injection 36 * while unregistering from the framework. 37 */ 38 #define pr_fmt(fmt) "ii_dev: " fmt 39 40 #include <linux/cpu.h> 41 #include <linux/hrtimer.h> 42 #include <linux/kthread.h> 43 #include <linux/sched.h> 44 #include <linux/slab.h> 45 #include <linux/smpboot.h> 46 #include <linux/idle_inject.h> 47 48 #include <uapi/linux/sched/types.h> 49 50 /** 51 * struct idle_inject_thread - task on/off switch structure 52 * @tsk: task injecting the idle cycles 53 * @should_run: whether or not to run the task (for the smpboot kthread API) 54 */ 55 struct idle_inject_thread { 56 struct task_struct *tsk; 57 int should_run; 58 }; 59 60 /** 61 * struct idle_inject_device - idle injection data 62 * @timer: idle injection period timer 63 * @idle_duration_us: duration of CPU idle time to inject 64 * @run_duration_us: duration of CPU run time to allow 65 * @latency_us: max allowed latency 66 * @update: Optional callback deciding whether or not to skip idle 67 * injection in the given cycle. 68 * @cpumask: mask of CPUs affected by idle injection 69 * 70 * This structure is used to define per instance idle inject device data. Each 71 * instance has an idle duration, a run duration and mask of CPUs to inject 72 * idle. 73 * 74 * Actual CPU idle time is injected by calling kernel scheduler interface 75 * play_idle_precise(). There is one optional callback that can be registered 76 * by calling idle_inject_register_full(): 77 * 78 * update() - This callback is invoked just before waking up CPUs to inject 79 * idle. If it returns false, CPUs are not woken up to inject idle in the given 80 * cycle. It also allows the caller to readjust the idle and run duration by 81 * calling idle_inject_set_duration() for the next cycle. 82 */ 83 struct idle_inject_device { 84 struct hrtimer timer; 85 unsigned int idle_duration_us; 86 unsigned int run_duration_us; 87 unsigned int latency_us; 88 bool (*update)(void); 89 unsigned long cpumask[]; 90 }; 91 92 static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread); 93 static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device); 94 95 /** 96 * idle_inject_wakeup - Wake up idle injection threads 97 * @ii_dev: target idle injection device 98 * 99 * Every idle injection task associated with the given idle injection device 100 * and running on an online CPU will be woken up. 101 */ 102 static void idle_inject_wakeup(struct idle_inject_device *ii_dev) 103 { 104 struct idle_inject_thread *iit; 105 unsigned int cpu; 106 107 for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) { 108 iit = per_cpu_ptr(&idle_inject_thread, cpu); 109 iit->should_run = 1; 110 wake_up_process(iit->tsk); 111 } 112 } 113 114 /** 115 * idle_inject_timer_fn - idle injection timer function 116 * @timer: idle injection hrtimer 117 * 118 * This function is called when the idle injection timer expires. It wakes up 119 * idle injection tasks associated with the timer and they, in turn, invoke 120 * play_idle_precise() to inject a specified amount of CPU idle time. 121 * 122 * Return: HRTIMER_RESTART. 123 */ 124 static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) 125 { 126 unsigned int duration_us; 127 struct idle_inject_device *ii_dev = 128 container_of(timer, struct idle_inject_device, timer); 129 130 if (!ii_dev->update || ii_dev->update()) 131 idle_inject_wakeup(ii_dev); 132 133 duration_us = READ_ONCE(ii_dev->run_duration_us); 134 duration_us += READ_ONCE(ii_dev->idle_duration_us); 135 136 hrtimer_forward_now(timer, us_to_ktime(duration_us)); 137 138 return HRTIMER_RESTART; 139 } 140 141 /** 142 * idle_inject_fn - idle injection work function 143 * @cpu: the CPU owning the task 144 * 145 * This function calls play_idle_precise() to inject a specified amount of CPU 146 * idle time. 147 */ 148 static void idle_inject_fn(unsigned int cpu) 149 { 150 struct idle_inject_device *ii_dev; 151 struct idle_inject_thread *iit; 152 153 ii_dev = per_cpu(idle_inject_device, cpu); 154 iit = per_cpu_ptr(&idle_inject_thread, cpu); 155 156 /* 157 * Let the smpboot main loop know that the task should not run again. 158 */ 159 iit->should_run = 0; 160 161 play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) * NSEC_PER_USEC, 162 READ_ONCE(ii_dev->latency_us) * NSEC_PER_USEC); 163 } 164 165 /** 166 * idle_inject_set_duration - idle and run duration update helper 167 * @ii_dev: idle injection control device structure 168 * @run_duration_us: CPU run time to allow in microseconds 169 * @idle_duration_us: CPU idle time to inject in microseconds 170 */ 171 void idle_inject_set_duration(struct idle_inject_device *ii_dev, 172 unsigned int run_duration_us, 173 unsigned int idle_duration_us) 174 { 175 if (run_duration_us + idle_duration_us) { 176 WRITE_ONCE(ii_dev->run_duration_us, run_duration_us); 177 WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); 178 } 179 if (!run_duration_us) 180 pr_debug("CPU is forced to 100 percent idle\n"); 181 } 182 EXPORT_SYMBOL_NS_GPL(idle_inject_set_duration, "IDLE_INJECT"); 183 184 /** 185 * idle_inject_get_duration - idle and run duration retrieval helper 186 * @ii_dev: idle injection control device structure 187 * @run_duration_us: memory location to store the current CPU run time 188 * @idle_duration_us: memory location to store the current CPU idle time 189 */ 190 void idle_inject_get_duration(struct idle_inject_device *ii_dev, 191 unsigned int *run_duration_us, 192 unsigned int *idle_duration_us) 193 { 194 *run_duration_us = READ_ONCE(ii_dev->run_duration_us); 195 *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); 196 } 197 EXPORT_SYMBOL_NS_GPL(idle_inject_get_duration, "IDLE_INJECT"); 198 199 /** 200 * idle_inject_set_latency - set the maximum latency allowed 201 * @ii_dev: idle injection control device structure 202 * @latency_us: set the latency requirement for the idle state 203 */ 204 void idle_inject_set_latency(struct idle_inject_device *ii_dev, 205 unsigned int latency_us) 206 { 207 WRITE_ONCE(ii_dev->latency_us, latency_us); 208 } 209 EXPORT_SYMBOL_NS_GPL(idle_inject_set_latency, "IDLE_INJECT"); 210 211 /** 212 * idle_inject_start - start idle injections 213 * @ii_dev: idle injection control device structure 214 * 215 * The function starts idle injection by first waking up all of the idle 216 * injection kthreads associated with @ii_dev to let them inject CPU idle time 217 * sets up a timer to start the next idle injection period. 218 * 219 * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success. 220 */ 221 int idle_inject_start(struct idle_inject_device *ii_dev) 222 { 223 unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); 224 unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us); 225 226 if (!(idle_duration_us + run_duration_us)) 227 return -EINVAL; 228 229 pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", 230 cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 231 232 idle_inject_wakeup(ii_dev); 233 234 hrtimer_start(&ii_dev->timer, 235 us_to_ktime(idle_duration_us + run_duration_us), 236 HRTIMER_MODE_REL); 237 238 return 0; 239 } 240 EXPORT_SYMBOL_NS_GPL(idle_inject_start, "IDLE_INJECT"); 241 242 /** 243 * idle_inject_stop - stops idle injections 244 * @ii_dev: idle injection control device structure 245 * 246 * The function stops idle injection and waits for the threads to finish work. 247 * If CPU idle time is being injected when this function runs, then it will 248 * wait until the end of the cycle. 249 * 250 * When it returns, there is no more idle injection kthread activity. The 251 * kthreads are scheduled out and the periodic timer is off. 252 */ 253 void idle_inject_stop(struct idle_inject_device *ii_dev) 254 { 255 struct idle_inject_thread *iit; 256 unsigned int cpu; 257 258 pr_debug("Stopping idle injection on CPUs '%*pbl'\n", 259 cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 260 261 hrtimer_cancel(&ii_dev->timer); 262 263 /* 264 * Stopping idle injection requires all of the idle injection kthreads 265 * associated with the given cpumask to be parked and stay that way, so 266 * prevent CPUs from going online at this point. Any CPUs going online 267 * after the loop below will be covered by clearing the should_run flag 268 * that will cause the smpboot main loop to schedule them out. 269 */ 270 cpu_hotplug_disable(); 271 272 /* 273 * Iterate over all (online + offline) CPUs here in case one of them 274 * goes offline with the should_run flag set so as to prevent its idle 275 * injection kthread from running when the CPU goes online again after 276 * the ii_dev has been freed. 277 */ 278 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 279 iit = per_cpu_ptr(&idle_inject_thread, cpu); 280 iit->should_run = 0; 281 282 wait_task_inactive(iit->tsk, TASK_ANY); 283 } 284 285 cpu_hotplug_enable(); 286 } 287 EXPORT_SYMBOL_NS_GPL(idle_inject_stop, "IDLE_INJECT"); 288 289 /** 290 * idle_inject_setup - prepare the current task for idle injection 291 * @cpu: not used 292 * 293 * Called once, this function is in charge of setting the current task's 294 * scheduler parameters to make it an RT task. 295 */ 296 static void idle_inject_setup(unsigned int cpu) 297 { 298 sched_set_fifo(current); 299 } 300 301 /** 302 * idle_inject_should_run - function helper for the smpboot API 303 * @cpu: CPU the kthread is running on 304 * 305 * Return: whether or not the thread can run. 306 */ 307 static int idle_inject_should_run(unsigned int cpu) 308 { 309 struct idle_inject_thread *iit = 310 per_cpu_ptr(&idle_inject_thread, cpu); 311 312 return iit->should_run; 313 } 314 315 /** 316 * idle_inject_register_full - initialize idle injection on a set of CPUs 317 * @cpumask: CPUs to be affected by idle injection 318 * @update: This callback is called just before waking up CPUs to inject 319 * idle 320 * 321 * This function creates an idle injection control device structure for the 322 * given set of CPUs and initializes the timer associated with it. This 323 * function also allows to register update()callback. 324 * It does not start any injection cycles. 325 * 326 * Return: NULL if memory allocation fails, idle injection control device 327 * pointer on success. 328 */ 329 330 struct idle_inject_device *idle_inject_register_full(struct cpumask *cpumask, 331 bool (*update)(void)) 332 { 333 struct idle_inject_device *ii_dev; 334 int cpu, cpu_rb; 335 336 ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL); 337 if (!ii_dev) 338 return NULL; 339 340 cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask); 341 hrtimer_setup(&ii_dev->timer, idle_inject_timer_fn, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 342 ii_dev->latency_us = UINT_MAX; 343 ii_dev->update = update; 344 345 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 346 347 if (per_cpu(idle_inject_device, cpu)) { 348 pr_err("cpu%d is already registered\n", cpu); 349 goto out_rollback; 350 } 351 352 per_cpu(idle_inject_device, cpu) = ii_dev; 353 } 354 355 return ii_dev; 356 357 out_rollback: 358 for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) { 359 if (cpu == cpu_rb) 360 break; 361 per_cpu(idle_inject_device, cpu_rb) = NULL; 362 } 363 364 kfree(ii_dev); 365 366 return NULL; 367 } 368 EXPORT_SYMBOL_NS_GPL(idle_inject_register_full, "IDLE_INJECT"); 369 370 /** 371 * idle_inject_register - initialize idle injection on a set of CPUs 372 * @cpumask: CPUs to be affected by idle injection 373 * 374 * This function creates an idle injection control device structure for the 375 * given set of CPUs and initializes the timer associated with it. It does not 376 * start any injection cycles. 377 * 378 * Return: NULL if memory allocation fails, idle injection control device 379 * pointer on success. 380 */ 381 struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) 382 { 383 return idle_inject_register_full(cpumask, NULL); 384 } 385 EXPORT_SYMBOL_NS_GPL(idle_inject_register, "IDLE_INJECT"); 386 387 /** 388 * idle_inject_unregister - unregister idle injection control device 389 * @ii_dev: idle injection control device to unregister 390 * 391 * The function stops idle injection for the given control device, 392 * unregisters its kthreads and frees memory allocated when that device was 393 * created. 394 */ 395 void idle_inject_unregister(struct idle_inject_device *ii_dev) 396 { 397 unsigned int cpu; 398 399 idle_inject_stop(ii_dev); 400 401 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) 402 per_cpu(idle_inject_device, cpu) = NULL; 403 404 kfree(ii_dev); 405 } 406 EXPORT_SYMBOL_NS_GPL(idle_inject_unregister, "IDLE_INJECT"); 407 408 static struct smp_hotplug_thread idle_inject_threads = { 409 .store = &idle_inject_thread.tsk, 410 .setup = idle_inject_setup, 411 .thread_fn = idle_inject_fn, 412 .thread_comm = "idle_inject/%u", 413 .thread_should_run = idle_inject_should_run, 414 }; 415 416 static int __init idle_inject_init(void) 417 { 418 return smpboot_register_percpu_thread(&idle_inject_threads); 419 } 420 early_initcall(idle_inject_init); 421