xref: /linux/io_uring/timeout.c (revision 6dfafbd0299a60bfb5d5e277fdf100037c7ded07)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/io_uring.h>
6 
7 #include <trace/events/io_uring.h>
8 
9 #include <uapi/linux/io_uring.h>
10 
11 #include "io_uring.h"
12 #include "refs.h"
13 #include "cancel.h"
14 #include "timeout.h"
15 
16 struct io_timeout {
17 	struct file			*file;
18 	u32				off;
19 	u32				target_seq;
20 	u32				repeats;
21 	struct list_head		list;
22 	/* head of the link, used by linked timeouts only */
23 	struct io_kiocb			*head;
24 	/* for linked completions */
25 	struct io_kiocb			*prev;
26 };
27 
28 struct io_timeout_rem {
29 	struct file			*file;
30 	u64				addr;
31 
32 	/* timeout update */
33 	struct timespec64		ts;
34 	u32				flags;
35 	bool				ltimeout;
36 };
37 
38 static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
39 						   struct io_kiocb *link);
40 
41 static inline bool io_is_timeout_noseq(struct io_kiocb *req)
42 {
43 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
44 	struct io_timeout_data *data = req->async_data;
45 
46 	return !timeout->off || data->flags & IORING_TIMEOUT_MULTISHOT;
47 }
48 
49 static inline void io_put_req(struct io_kiocb *req)
50 {
51 	if (req_ref_put_and_test(req)) {
52 		io_queue_next(req);
53 		io_free_req(req);
54 	}
55 }
56 
57 static inline bool io_timeout_finish(struct io_timeout *timeout,
58 				     struct io_timeout_data *data)
59 {
60 	if (!(data->flags & IORING_TIMEOUT_MULTISHOT))
61 		return true;
62 
63 	if (!timeout->off || (timeout->repeats && --timeout->repeats))
64 		return false;
65 
66 	return true;
67 }
68 
69 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer);
70 
71 static void io_timeout_complete(struct io_tw_req tw_req, io_tw_token_t tw)
72 {
73 	struct io_kiocb *req = tw_req.req;
74 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
75 	struct io_timeout_data *data = req->async_data;
76 	struct io_ring_ctx *ctx = req->ctx;
77 
78 	if (!io_timeout_finish(timeout, data)) {
79 		if (io_req_post_cqe(req, -ETIME, IORING_CQE_F_MORE)) {
80 			/* re-arm timer */
81 			raw_spin_lock_irq(&ctx->timeout_lock);
82 			list_add(&timeout->list, ctx->timeout_list.prev);
83 			hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
84 			raw_spin_unlock_irq(&ctx->timeout_lock);
85 			return;
86 		}
87 	}
88 
89 	io_req_task_complete(tw_req, tw);
90 }
91 
92 static __cold bool io_flush_killed_timeouts(struct list_head *list, int err)
93 {
94 	if (list_empty(list))
95 		return false;
96 
97 	while (!list_empty(list)) {
98 		struct io_timeout *timeout;
99 		struct io_kiocb *req;
100 
101 		timeout = list_first_entry(list, struct io_timeout, list);
102 		list_del_init(&timeout->list);
103 		req = cmd_to_io_kiocb(timeout);
104 		if (err)
105 			req_set_fail(req);
106 		io_req_queue_tw_complete(req, err);
107 	}
108 
109 	return true;
110 }
111 
112 static void io_kill_timeout(struct io_kiocb *req, struct list_head *list)
113 	__must_hold(&req->ctx->timeout_lock)
114 {
115 	struct io_timeout_data *io = req->async_data;
116 
117 	if (hrtimer_try_to_cancel(&io->timer) != -1) {
118 		struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
119 
120 		atomic_set(&req->ctx->cq_timeouts,
121 			atomic_read(&req->ctx->cq_timeouts) + 1);
122 		list_move_tail(&timeout->list, list);
123 	}
124 }
125 
126 __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
127 {
128 	struct io_timeout *timeout, *tmp;
129 	LIST_HEAD(list);
130 	u32 seq;
131 
132 	raw_spin_lock_irq(&ctx->timeout_lock);
133 	seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
134 
135 	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
136 		struct io_kiocb *req = cmd_to_io_kiocb(timeout);
137 		u32 events_needed, events_got;
138 
139 		if (io_is_timeout_noseq(req))
140 			break;
141 
142 		/*
143 		 * Since seq can easily wrap around over time, subtract
144 		 * the last seq at which timeouts were flushed before comparing.
145 		 * Assuming not more than 2^31-1 events have happened since,
146 		 * these subtractions won't have wrapped, so we can check if
147 		 * target is in [last_seq, current_seq] by comparing the two.
148 		 */
149 		events_needed = timeout->target_seq - ctx->cq_last_tm_flush;
150 		events_got = seq - ctx->cq_last_tm_flush;
151 		if (events_got < events_needed)
152 			break;
153 
154 		io_kill_timeout(req, &list);
155 	}
156 	ctx->cq_last_tm_flush = seq;
157 	raw_spin_unlock_irq(&ctx->timeout_lock);
158 	io_flush_killed_timeouts(&list, 0);
159 }
160 
161 static void io_req_tw_fail_links(struct io_tw_req tw_req, io_tw_token_t tw)
162 {
163 	struct io_kiocb *link = tw_req.req;
164 
165 	io_tw_lock(link->ctx, tw);
166 	while (link) {
167 		struct io_kiocb *nxt = link->link;
168 		long res = -ECANCELED;
169 
170 		if (link->flags & REQ_F_FAIL)
171 			res = link->cqe.res;
172 		link->link = NULL;
173 		io_req_set_res(link, res, 0);
174 		io_req_task_complete((struct io_tw_req){link}, tw);
175 		link = nxt;
176 	}
177 }
178 
179 static void io_fail_links(struct io_kiocb *req)
180 	__must_hold(&req->ctx->completion_lock)
181 {
182 	struct io_kiocb *link = req->link;
183 	bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES;
184 
185 	if (!link)
186 		return;
187 
188 	while (link) {
189 		if (ignore_cqes)
190 			link->flags |= REQ_F_CQE_SKIP;
191 		else
192 			link->flags &= ~REQ_F_CQE_SKIP;
193 		trace_io_uring_fail_link(req, link);
194 		link = link->link;
195 	}
196 
197 	link = req->link;
198 	link->io_task_work.func = io_req_tw_fail_links;
199 	io_req_task_work_add(link);
200 	req->link = NULL;
201 }
202 
203 static inline void io_remove_next_linked(struct io_kiocb *req)
204 {
205 	struct io_kiocb *nxt = req->link;
206 
207 	req->link = nxt->link;
208 	nxt->link = NULL;
209 }
210 
211 void io_disarm_next(struct io_kiocb *req)
212 	__must_hold(&req->ctx->completion_lock)
213 {
214 	struct io_kiocb *link = NULL;
215 
216 	if (req->flags & REQ_F_ARM_LTIMEOUT) {
217 		link = req->link;
218 		req->flags &= ~REQ_F_ARM_LTIMEOUT;
219 		if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
220 			io_remove_next_linked(req);
221 			io_req_queue_tw_complete(link, -ECANCELED);
222 		}
223 	} else if (req->flags & REQ_F_LINK_TIMEOUT) {
224 		struct io_ring_ctx *ctx = req->ctx;
225 
226 		raw_spin_lock_irq(&ctx->timeout_lock);
227 		if (req->link && req->link->opcode == IORING_OP_LINK_TIMEOUT)
228 			link = __io_disarm_linked_timeout(req, req->link);
229 
230 		raw_spin_unlock_irq(&ctx->timeout_lock);
231 		if (link)
232 			io_req_queue_tw_complete(link, -ECANCELED);
233 	}
234 	if (unlikely((req->flags & REQ_F_FAIL) &&
235 		     !(req->flags & REQ_F_HARDLINK)))
236 		io_fail_links(req);
237 }
238 
239 static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
240 						   struct io_kiocb *link)
241 	__must_hold(&req->ctx->completion_lock)
242 	__must_hold(&req->ctx->timeout_lock)
243 {
244 	struct io_timeout_data *io = link->async_data;
245 	struct io_timeout *timeout = io_kiocb_to_cmd(link, struct io_timeout);
246 
247 	io_remove_next_linked(req);
248 	timeout->head = NULL;
249 	if (hrtimer_try_to_cancel(&io->timer) != -1) {
250 		list_del(&timeout->list);
251 		return link;
252 	}
253 
254 	return NULL;
255 }
256 
257 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
258 {
259 	struct io_timeout_data *data = container_of(timer,
260 						struct io_timeout_data, timer);
261 	struct io_kiocb *req = data->req;
262 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
263 	struct io_ring_ctx *ctx = req->ctx;
264 	unsigned long flags;
265 
266 	raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
267 	list_del_init(&timeout->list);
268 	atomic_set(&req->ctx->cq_timeouts,
269 		atomic_read(&req->ctx->cq_timeouts) + 1);
270 	raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
271 
272 	if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
273 		req_set_fail(req);
274 
275 	io_req_set_res(req, -ETIME, 0);
276 	req->io_task_work.func = io_timeout_complete;
277 	io_req_task_work_add(req);
278 	return HRTIMER_NORESTART;
279 }
280 
281 static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
282 					   struct io_cancel_data *cd)
283 	__must_hold(&ctx->timeout_lock)
284 {
285 	struct io_timeout *timeout;
286 	struct io_timeout_data *io;
287 	struct io_kiocb *req = NULL;
288 
289 	list_for_each_entry(timeout, &ctx->timeout_list, list) {
290 		struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
291 
292 		if (io_cancel_req_match(tmp, cd)) {
293 			req = tmp;
294 			break;
295 		}
296 	}
297 	if (!req)
298 		return ERR_PTR(-ENOENT);
299 
300 	io = req->async_data;
301 	if (hrtimer_try_to_cancel(&io->timer) == -1)
302 		return ERR_PTR(-EALREADY);
303 	timeout = io_kiocb_to_cmd(req, struct io_timeout);
304 	list_del_init(&timeout->list);
305 	return req;
306 }
307 
308 int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
309 	__must_hold(&ctx->completion_lock)
310 {
311 	struct io_kiocb *req;
312 
313 	raw_spin_lock_irq(&ctx->timeout_lock);
314 	req = io_timeout_extract(ctx, cd);
315 	raw_spin_unlock_irq(&ctx->timeout_lock);
316 
317 	if (IS_ERR(req))
318 		return PTR_ERR(req);
319 	io_req_task_queue_fail(req, -ECANCELED);
320 	return 0;
321 }
322 
323 static void io_req_task_link_timeout(struct io_tw_req tw_req, io_tw_token_t tw)
324 {
325 	struct io_kiocb *req = tw_req.req;
326 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
327 	struct io_kiocb *prev = timeout->prev;
328 	int ret;
329 
330 	if (prev) {
331 		if (!tw.cancel) {
332 			struct io_cancel_data cd = {
333 				.ctx		= req->ctx,
334 				.data		= prev->cqe.user_data,
335 			};
336 
337 			ret = io_try_cancel(req->tctx, &cd, 0);
338 		} else {
339 			ret = -ECANCELED;
340 		}
341 		io_req_set_res(req, ret ?: -ETIME, 0);
342 		io_req_task_complete(tw_req, tw);
343 		io_put_req(prev);
344 	} else {
345 		io_req_set_res(req, -ETIME, 0);
346 		io_req_task_complete(tw_req, tw);
347 	}
348 }
349 
350 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
351 {
352 	struct io_timeout_data *data = container_of(timer,
353 						struct io_timeout_data, timer);
354 	struct io_kiocb *prev, *req = data->req;
355 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
356 	struct io_ring_ctx *ctx = req->ctx;
357 	unsigned long flags;
358 
359 	raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
360 	prev = timeout->head;
361 	timeout->head = NULL;
362 
363 	/*
364 	 * We don't expect the list to be empty, that will only happen if we
365 	 * race with the completion of the linked work.
366 	 */
367 	if (prev) {
368 		io_remove_next_linked(prev);
369 		if (!req_ref_inc_not_zero(prev))
370 			prev = NULL;
371 	}
372 	list_del(&timeout->list);
373 	timeout->prev = prev;
374 	raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
375 
376 	req->io_task_work.func = io_req_task_link_timeout;
377 	io_req_task_work_add(req);
378 	return HRTIMER_NORESTART;
379 }
380 
381 static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
382 {
383 	switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
384 	case IORING_TIMEOUT_BOOTTIME:
385 		return CLOCK_BOOTTIME;
386 	case IORING_TIMEOUT_REALTIME:
387 		return CLOCK_REALTIME;
388 	default:
389 		/* can't happen, vetted at prep time */
390 		WARN_ON_ONCE(1);
391 		fallthrough;
392 	case 0:
393 		return CLOCK_MONOTONIC;
394 	}
395 }
396 
397 static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
398 				    struct timespec64 *ts, enum hrtimer_mode mode)
399 	__must_hold(&ctx->timeout_lock)
400 {
401 	struct io_timeout_data *io;
402 	struct io_timeout *timeout;
403 	struct io_kiocb *req = NULL;
404 
405 	list_for_each_entry(timeout, &ctx->ltimeout_list, list) {
406 		struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
407 
408 		if (user_data == tmp->cqe.user_data) {
409 			req = tmp;
410 			break;
411 		}
412 	}
413 	if (!req)
414 		return -ENOENT;
415 
416 	io = req->async_data;
417 	if (hrtimer_try_to_cancel(&io->timer) == -1)
418 		return -EALREADY;
419 	hrtimer_setup(&io->timer, io_link_timeout_fn, io_timeout_get_clock(io), mode);
420 	hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode);
421 	return 0;
422 }
423 
424 static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
425 			     struct timespec64 *ts, enum hrtimer_mode mode)
426 	__must_hold(&ctx->timeout_lock)
427 {
428 	struct io_cancel_data cd = { .ctx = ctx, .data = user_data, };
429 	struct io_kiocb *req = io_timeout_extract(ctx, &cd);
430 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
431 	struct io_timeout_data *data;
432 
433 	if (IS_ERR(req))
434 		return PTR_ERR(req);
435 
436 	timeout->off = 0; /* noseq */
437 	data = req->async_data;
438 	data->ts = *ts;
439 
440 	list_add_tail(&timeout->list, &ctx->timeout_list);
441 	hrtimer_setup(&data->timer, io_timeout_fn, io_timeout_get_clock(data), mode);
442 	hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), mode);
443 	return 0;
444 }
445 
446 int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
447 {
448 	struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem);
449 
450 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
451 		return -EINVAL;
452 	if (sqe->buf_index || sqe->len || sqe->splice_fd_in)
453 		return -EINVAL;
454 
455 	tr->ltimeout = false;
456 	tr->addr = READ_ONCE(sqe->addr);
457 	tr->flags = READ_ONCE(sqe->timeout_flags);
458 	if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) {
459 		if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
460 			return -EINVAL;
461 		if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
462 			tr->ltimeout = true;
463 		if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
464 			return -EINVAL;
465 		if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
466 			return -EFAULT;
467 		if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0)
468 			return -EINVAL;
469 	} else if (tr->flags) {
470 		/* timeout removal doesn't support flags */
471 		return -EINVAL;
472 	}
473 
474 	return 0;
475 }
476 
477 static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags)
478 {
479 	return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS
480 					    : HRTIMER_MODE_REL;
481 }
482 
483 /*
484  * Remove or update an existing timeout command
485  */
486 int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
487 {
488 	struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem);
489 	struct io_ring_ctx *ctx = req->ctx;
490 	int ret;
491 
492 	if (!(tr->flags & IORING_TIMEOUT_UPDATE)) {
493 		struct io_cancel_data cd = { .ctx = ctx, .data = tr->addr, };
494 
495 		spin_lock(&ctx->completion_lock);
496 		ret = io_timeout_cancel(ctx, &cd);
497 		spin_unlock(&ctx->completion_lock);
498 	} else {
499 		enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
500 
501 		raw_spin_lock_irq(&ctx->timeout_lock);
502 		if (tr->ltimeout)
503 			ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
504 		else
505 			ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
506 		raw_spin_unlock_irq(&ctx->timeout_lock);
507 	}
508 
509 	if (ret < 0)
510 		req_set_fail(req);
511 	io_req_set_res(req, ret, 0);
512 	return IOU_COMPLETE;
513 }
514 
515 static int __io_timeout_prep(struct io_kiocb *req,
516 			     const struct io_uring_sqe *sqe,
517 			     bool is_timeout_link)
518 {
519 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
520 	struct io_timeout_data *data;
521 	unsigned flags;
522 	u32 off = READ_ONCE(sqe->off);
523 
524 	if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in)
525 		return -EINVAL;
526 	if (off && is_timeout_link)
527 		return -EINVAL;
528 	flags = READ_ONCE(sqe->timeout_flags);
529 	if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
530 		      IORING_TIMEOUT_ETIME_SUCCESS |
531 		      IORING_TIMEOUT_MULTISHOT))
532 		return -EINVAL;
533 	/* more than one clock specified is invalid, obviously */
534 	if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
535 		return -EINVAL;
536 	/* multishot requests only make sense with rel values */
537 	if (!(~flags & (IORING_TIMEOUT_MULTISHOT | IORING_TIMEOUT_ABS)))
538 		return -EINVAL;
539 
540 	INIT_LIST_HEAD(&timeout->list);
541 	timeout->off = off;
542 	if (unlikely(off && !req->ctx->off_timeout_used))
543 		req->ctx->off_timeout_used = true;
544 	/*
545 	 * for multishot reqs w/ fixed nr of repeats, repeats tracks the
546 	 * remaining nr
547 	 */
548 	timeout->repeats = 0;
549 	if ((flags & IORING_TIMEOUT_MULTISHOT) && off > 0)
550 		timeout->repeats = off;
551 
552 	if (WARN_ON_ONCE(req_has_async_data(req)))
553 		return -EFAULT;
554 	data = io_uring_alloc_async_data(NULL, req);
555 	if (!data)
556 		return -ENOMEM;
557 	data->req = req;
558 	data->flags = flags;
559 
560 	if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
561 		return -EFAULT;
562 
563 	if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
564 		return -EINVAL;
565 
566 	data->mode = io_translate_timeout_mode(flags);
567 
568 	if (is_timeout_link) {
569 		struct io_submit_link *link = &req->ctx->submit_state.link;
570 
571 		if (!link->head)
572 			return -EINVAL;
573 		if (link->last->opcode == IORING_OP_LINK_TIMEOUT)
574 			return -EINVAL;
575 		timeout->head = link->last;
576 		link->last->flags |= REQ_F_ARM_LTIMEOUT;
577 		hrtimer_setup(&data->timer, io_link_timeout_fn, io_timeout_get_clock(data),
578 			      data->mode);
579 	} else {
580 		hrtimer_setup(&data->timer, io_timeout_fn, io_timeout_get_clock(data), data->mode);
581 	}
582 	return 0;
583 }
584 
585 int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
586 {
587 	return __io_timeout_prep(req, sqe, false);
588 }
589 
590 int io_link_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
591 {
592 	return __io_timeout_prep(req, sqe, true);
593 }
594 
595 int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
596 {
597 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
598 	struct io_ring_ctx *ctx = req->ctx;
599 	struct io_timeout_data *data = req->async_data;
600 	struct list_head *entry;
601 	u32 tail, off = timeout->off;
602 
603 	raw_spin_lock_irq(&ctx->timeout_lock);
604 
605 	/*
606 	 * sqe->off holds how many events that need to occur for this
607 	 * timeout event to be satisfied. If it isn't set, then this is
608 	 * a pure timeout request, sequence isn't used.
609 	 */
610 	if (io_is_timeout_noseq(req)) {
611 		entry = ctx->timeout_list.prev;
612 		goto add;
613 	}
614 
615 	tail = data_race(ctx->cached_cq_tail) - atomic_read(&ctx->cq_timeouts);
616 	timeout->target_seq = tail + off;
617 
618 	/* Update the last seq here in case io_flush_timeouts() hasn't.
619 	 * This is safe because ->completion_lock is held, and submissions
620 	 * and completions are never mixed in the same ->completion_lock section.
621 	 */
622 	ctx->cq_last_tm_flush = tail;
623 
624 	/*
625 	 * Insertion sort, ensuring the first entry in the list is always
626 	 * the one we need first.
627 	 */
628 	list_for_each_prev(entry, &ctx->timeout_list) {
629 		struct io_timeout *nextt = list_entry(entry, struct io_timeout, list);
630 		struct io_kiocb *nxt = cmd_to_io_kiocb(nextt);
631 
632 		if (io_is_timeout_noseq(nxt))
633 			continue;
634 		/* nxt.seq is behind @tail, otherwise would've been completed */
635 		if (off >= nextt->target_seq - tail)
636 			break;
637 	}
638 add:
639 	list_add(&timeout->list, entry);
640 	hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
641 	raw_spin_unlock_irq(&ctx->timeout_lock);
642 	return IOU_ISSUE_SKIP_COMPLETE;
643 }
644 
645 void io_queue_linked_timeout(struct io_kiocb *req)
646 {
647 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
648 	struct io_ring_ctx *ctx = req->ctx;
649 
650 	raw_spin_lock_irq(&ctx->timeout_lock);
651 	/*
652 	 * If the back reference is NULL, then our linked request finished
653 	 * before we got a chance to setup the timer
654 	 */
655 	if (timeout->head) {
656 		struct io_timeout_data *data = req->async_data;
657 
658 		hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
659 				data->mode);
660 		list_add_tail(&timeout->list, &ctx->ltimeout_list);
661 	}
662 	raw_spin_unlock_irq(&ctx->timeout_lock);
663 	/* drop submission reference */
664 	io_put_req(req);
665 }
666 
667 static bool io_match_task(struct io_kiocb *head, struct io_uring_task *tctx,
668 			  bool cancel_all)
669 	__must_hold(&head->ctx->timeout_lock)
670 {
671 	struct io_kiocb *req;
672 
673 	if (tctx && head->tctx != tctx)
674 		return false;
675 	if (cancel_all)
676 		return true;
677 
678 	io_for_each_link(req, head) {
679 		if (req->flags & REQ_F_INFLIGHT)
680 			return true;
681 	}
682 	return false;
683 }
684 
685 /* Returns true if we found and killed one or more timeouts */
686 __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
687 			     bool cancel_all)
688 {
689 	struct io_timeout *timeout, *tmp;
690 	LIST_HEAD(list);
691 
692 	/*
693 	 * completion_lock is needed for io_match_task(). Take it before
694 	 * timeout_lockfirst to keep locking ordering.
695 	 */
696 	spin_lock(&ctx->completion_lock);
697 	raw_spin_lock_irq(&ctx->timeout_lock);
698 	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
699 		struct io_kiocb *req = cmd_to_io_kiocb(timeout);
700 
701 		if (io_match_task(req, tctx, cancel_all))
702 			io_kill_timeout(req, &list);
703 	}
704 	raw_spin_unlock_irq(&ctx->timeout_lock);
705 	spin_unlock(&ctx->completion_lock);
706 
707 	return io_flush_killed_timeouts(&list, -ECANCELED);
708 }
709