1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Kernel internal schedule timeout and sleeping functions 4 */ 5 6 #include <linux/delay.h> 7 #include <linux/jiffies.h> 8 #include <linux/timer.h> 9 #include <linux/sched/signal.h> 10 #include <linux/sched/debug.h> 11 12 #include "tick-internal.h" 13 14 /* 15 * Since schedule_timeout()'s timer is defined on the stack, it must store 16 * the target task on the stack as well. 17 */ 18 struct process_timer { 19 struct timer_list timer; 20 struct task_struct *task; 21 }; 22 23 static void process_timeout(struct timer_list *t) 24 { 25 struct process_timer *timeout = from_timer(timeout, t, timer); 26 27 wake_up_process(timeout->task); 28 } 29 30 /** 31 * schedule_timeout - sleep until timeout 32 * @timeout: timeout value in jiffies 33 * 34 * Make the current task sleep until @timeout jiffies have elapsed. 35 * The function behavior depends on the current task state 36 * (see also set_current_state() description): 37 * 38 * %TASK_RUNNING - the scheduler is called, but the task does not sleep 39 * at all. That happens because sched_submit_work() does nothing for 40 * tasks in %TASK_RUNNING state. 41 * 42 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to 43 * pass before the routine returns unless the current task is explicitly 44 * woken up, (e.g. by wake_up_process()). 45 * 46 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 47 * delivered to the current task or the current task is explicitly woken 48 * up. 49 * 50 * The current task state is guaranteed to be %TASK_RUNNING when this 51 * routine returns. 52 * 53 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule 54 * the CPU away without a bound on the timeout. In this case the return 55 * value will be %MAX_SCHEDULE_TIMEOUT. 56 * 57 * Returns: 0 when the timer has expired otherwise the remaining time in 58 * jiffies will be returned. In all cases the return value is guaranteed 59 * to be non-negative. 60 */ 61 signed long __sched schedule_timeout(signed long timeout) 62 { 63 struct process_timer timer; 64 unsigned long expire; 65 66 switch (timeout) { 67 case MAX_SCHEDULE_TIMEOUT: 68 /* 69 * These two special cases are useful to be comfortable 70 * in the caller. Nothing more. We could take 71 * MAX_SCHEDULE_TIMEOUT from one of the negative value 72 * but I' d like to return a valid offset (>=0) to allow 73 * the caller to do everything it want with the retval. 74 */ 75 schedule(); 76 goto out; 77 default: 78 /* 79 * Another bit of PARANOID. Note that the retval will be 80 * 0 since no piece of kernel is supposed to do a check 81 * for a negative retval of schedule_timeout() (since it 82 * should never happens anyway). You just have the printk() 83 * that will tell you if something is gone wrong and where. 84 */ 85 if (timeout < 0) { 86 pr_err("%s: wrong timeout value %lx\n", __func__, timeout); 87 dump_stack(); 88 __set_current_state(TASK_RUNNING); 89 goto out; 90 } 91 } 92 93 expire = timeout + jiffies; 94 95 timer.task = current; 96 timer_setup_on_stack(&timer.timer, process_timeout, 0); 97 timer.timer.expires = expire; 98 add_timer(&timer.timer); 99 schedule(); 100 del_timer_sync(&timer.timer); 101 102 /* Remove the timer from the object tracker */ 103 destroy_timer_on_stack(&timer.timer); 104 105 timeout = expire - jiffies; 106 107 out: 108 return timeout < 0 ? 0 : timeout; 109 } 110 EXPORT_SYMBOL(schedule_timeout); 111 112 /* 113 * __set_current_state() can be used in schedule_timeout_*() functions, because 114 * schedule_timeout() calls schedule() unconditionally. 115 */ 116 117 /** 118 * schedule_timeout_interruptible - sleep until timeout (interruptible) 119 * @timeout: timeout value in jiffies 120 * 121 * See schedule_timeout() for details. 122 * 123 * Task state is set to TASK_INTERRUPTIBLE before starting the timeout. 124 */ 125 signed long __sched schedule_timeout_interruptible(signed long timeout) 126 { 127 __set_current_state(TASK_INTERRUPTIBLE); 128 return schedule_timeout(timeout); 129 } 130 EXPORT_SYMBOL(schedule_timeout_interruptible); 131 132 /** 133 * schedule_timeout_killable - sleep until timeout (killable) 134 * @timeout: timeout value in jiffies 135 * 136 * See schedule_timeout() for details. 137 * 138 * Task state is set to TASK_KILLABLE before starting the timeout. 139 */ 140 signed long __sched schedule_timeout_killable(signed long timeout) 141 { 142 __set_current_state(TASK_KILLABLE); 143 return schedule_timeout(timeout); 144 } 145 EXPORT_SYMBOL(schedule_timeout_killable); 146 147 /** 148 * schedule_timeout_uninterruptible - sleep until timeout (uninterruptible) 149 * @timeout: timeout value in jiffies 150 * 151 * See schedule_timeout() for details. 152 * 153 * Task state is set to TASK_UNINTERRUPTIBLE before starting the timeout. 154 */ 155 signed long __sched schedule_timeout_uninterruptible(signed long timeout) 156 { 157 __set_current_state(TASK_UNINTERRUPTIBLE); 158 return schedule_timeout(timeout); 159 } 160 EXPORT_SYMBOL(schedule_timeout_uninterruptible); 161 162 /** 163 * schedule_timeout_idle - sleep until timeout (idle) 164 * @timeout: timeout value in jiffies 165 * 166 * See schedule_timeout() for details. 167 * 168 * Task state is set to TASK_IDLE before starting the timeout. It is similar to 169 * schedule_timeout_uninterruptible(), except this task will not contribute to 170 * load average. 171 */ 172 signed long __sched schedule_timeout_idle(signed long timeout) 173 { 174 __set_current_state(TASK_IDLE); 175 return schedule_timeout(timeout); 176 } 177 EXPORT_SYMBOL(schedule_timeout_idle); 178 179 /** 180 * schedule_hrtimeout_range_clock - sleep until timeout 181 * @expires: timeout value (ktime_t) 182 * @delta: slack in expires timeout (ktime_t) 183 * @mode: timer mode 184 * @clock_id: timer clock to be used 185 * 186 * Details are explained in schedule_hrtimeout_range() function description as 187 * this function is commonly used. 188 */ 189 int __sched schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, 190 const enum hrtimer_mode mode, clockid_t clock_id) 191 { 192 struct hrtimer_sleeper t; 193 194 /* 195 * Optimize when a zero timeout value is given. It does not 196 * matter whether this is an absolute or a relative time. 197 */ 198 if (expires && *expires == 0) { 199 __set_current_state(TASK_RUNNING); 200 return 0; 201 } 202 203 /* 204 * A NULL parameter means "infinite" 205 */ 206 if (!expires) { 207 schedule(); 208 return -EINTR; 209 } 210 211 hrtimer_setup_sleeper_on_stack(&t, clock_id, mode); 212 hrtimer_set_expires_range_ns(&t.timer, *expires, delta); 213 hrtimer_sleeper_start_expires(&t, mode); 214 215 if (likely(t.task)) 216 schedule(); 217 218 hrtimer_cancel(&t.timer); 219 destroy_hrtimer_on_stack(&t.timer); 220 221 __set_current_state(TASK_RUNNING); 222 223 return !t.task ? 0 : -EINTR; 224 } 225 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); 226 227 /** 228 * schedule_hrtimeout_range - sleep until timeout 229 * @expires: timeout value (ktime_t) 230 * @delta: slack in expires timeout (ktime_t) 231 * @mode: timer mode 232 * 233 * Make the current task sleep until the given expiry time has 234 * elapsed. The routine will return immediately unless 235 * the current task state has been set (see set_current_state()). 236 * 237 * The @delta argument gives the kernel the freedom to schedule the 238 * actual wakeup to a time that is both power and performance friendly 239 * for regular (non RT/DL) tasks. 240 * The kernel give the normal best effort behavior for "@expires+@delta", 241 * but may decide to fire the timer earlier, but no earlier than @expires. 242 * 243 * You can set the task state as follows - 244 * 245 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to 246 * pass before the routine returns unless the current task is explicitly 247 * woken up, (e.g. by wake_up_process()). 248 * 249 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 250 * delivered to the current task or the current task is explicitly woken 251 * up. 252 * 253 * The current task state is guaranteed to be TASK_RUNNING when this 254 * routine returns. 255 * 256 * Returns: 0 when the timer has expired. If the task was woken before the 257 * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or 258 * by an explicit wakeup, it returns -EINTR. 259 */ 260 int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta, 261 const enum hrtimer_mode mode) 262 { 263 return schedule_hrtimeout_range_clock(expires, delta, mode, 264 CLOCK_MONOTONIC); 265 } 266 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); 267 268 /** 269 * schedule_hrtimeout - sleep until timeout 270 * @expires: timeout value (ktime_t) 271 * @mode: timer mode 272 * 273 * See schedule_hrtimeout_range() for details. @delta argument of 274 * schedule_hrtimeout_range() is set to 0 and has therefore no impact. 275 */ 276 int __sched schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode) 277 { 278 return schedule_hrtimeout_range(expires, 0, mode); 279 } 280 EXPORT_SYMBOL_GPL(schedule_hrtimeout); 281 282 /** 283 * msleep - sleep safely even with waitqueue interruptions 284 * @msecs: Requested sleep duration in milliseconds 285 * 286 * msleep() uses jiffy based timeouts for the sleep duration. Because of the 287 * design of the timer wheel, the maximum additional percentage delay (slack) is 288 * 12.5%. This is only valid for timers which will end up in level 1 or a higher 289 * level of the timer wheel. For explanation of those 12.5% please check the 290 * detailed description about the basics of the timer wheel. 291 * 292 * The slack of timers which will end up in level 0 depends on sleep duration 293 * (msecs) and HZ configuration and can be calculated in the following way (with 294 * the timer wheel design restriction that the slack is not less than 12.5%): 295 * 296 * ``slack = MSECS_PER_TICK / msecs`` 297 * 298 * When the allowed slack of the callsite is known, the calculation could be 299 * turned around to find the minimal allowed sleep duration to meet the 300 * constraints. For example: 301 * 302 * * ``HZ=1000`` with ``slack=25%``: ``MSECS_PER_TICK / slack = 1 / (1/4) = 4``: 303 * all sleep durations greater or equal 4ms will meet the constraints. 304 * * ``HZ=1000`` with ``slack=12.5%``: ``MSECS_PER_TICK / slack = 1 / (1/8) = 8``: 305 * all sleep durations greater or equal 8ms will meet the constraints. 306 * * ``HZ=250`` with ``slack=25%``: ``MSECS_PER_TICK / slack = 4 / (1/4) = 16``: 307 * all sleep durations greater or equal 16ms will meet the constraints. 308 * * ``HZ=250`` with ``slack=12.5%``: ``MSECS_PER_TICK / slack = 4 / (1/8) = 32``: 309 * all sleep durations greater or equal 32ms will meet the constraints. 310 * 311 * See also the signal aware variant msleep_interruptible(). 312 */ 313 void msleep(unsigned int msecs) 314 { 315 unsigned long timeout = msecs_to_jiffies(msecs); 316 317 while (timeout) 318 timeout = schedule_timeout_uninterruptible(timeout); 319 } 320 EXPORT_SYMBOL(msleep); 321 322 /** 323 * msleep_interruptible - sleep waiting for signals 324 * @msecs: Requested sleep duration in milliseconds 325 * 326 * See msleep() for some basic information. 327 * 328 * The difference between msleep() and msleep_interruptible() is that the sleep 329 * could be interrupted by a signal delivery and then returns early. 330 * 331 * Returns: The remaining time of the sleep duration transformed to msecs (see 332 * schedule_timeout() for details). 333 */ 334 unsigned long msleep_interruptible(unsigned int msecs) 335 { 336 unsigned long timeout = msecs_to_jiffies(msecs); 337 338 while (timeout && !signal_pending(current)) 339 timeout = schedule_timeout_interruptible(timeout); 340 return jiffies_to_msecs(timeout); 341 } 342 EXPORT_SYMBOL(msleep_interruptible); 343 344 /** 345 * usleep_range_state - Sleep for an approximate time in a given state 346 * @min: Minimum time in usecs to sleep 347 * @max: Maximum time in usecs to sleep 348 * @state: State of the current task that will be while sleeping 349 * 350 * usleep_range_state() sleeps at least for the minimum specified time but not 351 * longer than the maximum specified amount of time. The range might reduce 352 * power usage by allowing hrtimers to coalesce an already scheduled interrupt 353 * with this hrtimer. In the worst case, an interrupt is scheduled for the upper 354 * bound. 355 * 356 * The sleeping task is set to the specified state before starting the sleep. 357 * 358 * In non-atomic context where the exact wakeup time is flexible, use 359 * usleep_range() or its variants instead of udelay(). The sleep improves 360 * responsiveness by avoiding the CPU-hogging busy-wait of udelay(). 361 */ 362 void __sched usleep_range_state(unsigned long min, unsigned long max, unsigned int state) 363 { 364 ktime_t exp = ktime_add_us(ktime_get(), min); 365 u64 delta = (u64)(max - min) * NSEC_PER_USEC; 366 367 if (WARN_ON_ONCE(max < min)) 368 delta = 0; 369 370 for (;;) { 371 __set_current_state(state); 372 /* Do not return before the requested sleep time has elapsed */ 373 if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) 374 break; 375 } 376 } 377 EXPORT_SYMBOL(usleep_range_state); 378