xref: /linux/io_uring/timeout.c (revision 2c142b63c8ee982cdfdba49a616027c266294838)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/io_uring.h>
6 #include <linux/time_namespace.h>
7 
8 #include <trace/events/io_uring.h>
9 
10 #include <uapi/linux/io_uring.h>
11 
12 #include "io_uring.h"
13 #include "refs.h"
14 #include "cancel.h"
15 #include "timeout.h"
16 
17 struct io_timeout {
18 	struct file			*file;
19 	u32				off;
20 	u32				target_seq;
21 	u32				repeats;
22 	struct list_head		list;
23 	/* head of the link, used by linked timeouts only */
24 	struct io_kiocb			*head;
25 	/* for linked completions */
26 	struct io_kiocb			*prev;
27 };
28 
29 struct io_timeout_rem {
30 	struct file			*file;
31 	u64				addr;
32 
33 	/* timeout update */
34 	ktime_t				time;
35 	u32				flags;
36 	bool				ltimeout;
37 };
38 
io_flags_to_clock(unsigned flags)39 static clockid_t io_flags_to_clock(unsigned flags)
40 {
41 	switch (flags & IORING_TIMEOUT_CLOCK_MASK) {
42 	case IORING_TIMEOUT_BOOTTIME:
43 		return CLOCK_BOOTTIME;
44 	case IORING_TIMEOUT_REALTIME:
45 		return CLOCK_REALTIME;
46 	default:
47 		/* can't happen, vetted at prep time */
48 		WARN_ON_ONCE(1);
49 		fallthrough;
50 	case 0:
51 		return CLOCK_MONOTONIC;
52 	}
53 }
54 
io_parse_user_time(ktime_t * time,u64 arg,unsigned flags)55 static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags)
56 {
57 	struct timespec64 ts;
58 
59 	if (flags & IORING_TIMEOUT_IMMEDIATE_ARG) {
60 		*time = ns_to_ktime(arg);
61 		if (*time < 0)
62 			return -EINVAL;
63 		goto out;
64 	}
65 
66 	if (get_timespec64(&ts, u64_to_user_ptr(arg)))
67 		return -EFAULT;
68 	if (ts.tv_sec < 0 || ts.tv_nsec < 0)
69 		return -EINVAL;
70 	*time = timespec64_to_ktime(ts);
71 out:
72 	if (flags & IORING_TIMEOUT_ABS)
73 		*time = timens_ktime_to_host(io_flags_to_clock(flags), *time);
74 	return 0;
75 }
76 
77 static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
78 						   struct io_kiocb *link);
79 
io_is_timeout_noseq(struct io_kiocb * req)80 static inline bool io_is_timeout_noseq(struct io_kiocb *req)
81 {
82 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
83 	struct io_timeout_data *data = req->async_data;
84 
85 	return !timeout->off || data->flags & IORING_TIMEOUT_MULTISHOT;
86 }
87 
io_put_req(struct io_kiocb * req)88 static inline void io_put_req(struct io_kiocb *req)
89 {
90 	if (req_ref_put_and_test(req)) {
91 		io_queue_next(req);
92 		io_free_req(req);
93 	}
94 }
95 
io_timeout_finish(struct io_timeout * timeout,struct io_timeout_data * data)96 static inline bool io_timeout_finish(struct io_timeout *timeout,
97 				     struct io_timeout_data *data)
98 {
99 	if (!(data->flags & IORING_TIMEOUT_MULTISHOT))
100 		return true;
101 
102 	if (!timeout->off || (timeout->repeats && --timeout->repeats))
103 		return false;
104 
105 	return true;
106 }
107 
108 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer);
109 
io_timeout_complete(struct io_tw_req tw_req,io_tw_token_t tw)110 static void io_timeout_complete(struct io_tw_req tw_req, io_tw_token_t tw)
111 {
112 	struct io_kiocb *req = tw_req.req;
113 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
114 	struct io_timeout_data *data = req->async_data;
115 	struct io_ring_ctx *ctx = req->ctx;
116 
117 	if (!io_timeout_finish(timeout, data)) {
118 		if (io_req_post_cqe(req, -ETIME, IORING_CQE_F_MORE)) {
119 			/* re-arm timer */
120 			raw_spin_lock_irq(&ctx->timeout_lock);
121 			list_add(&timeout->list, ctx->timeout_list.prev);
122 			hrtimer_start(&data->timer, data->time, data->mode);
123 			raw_spin_unlock_irq(&ctx->timeout_lock);
124 			return;
125 		}
126 	}
127 
128 	io_req_task_complete(tw_req, tw);
129 }
130 
io_flush_killed_timeouts(struct list_head * list,int err)131 static __cold bool io_flush_killed_timeouts(struct list_head *list, int err)
132 {
133 	if (list_empty(list))
134 		return false;
135 
136 	while (!list_empty(list)) {
137 		struct io_timeout *timeout;
138 		struct io_kiocb *req;
139 
140 		timeout = list_first_entry(list, struct io_timeout, list);
141 		list_del_init(&timeout->list);
142 		req = cmd_to_io_kiocb(timeout);
143 		if (err)
144 			req_set_fail(req);
145 		io_req_queue_tw_complete(req, err);
146 	}
147 
148 	return true;
149 }
150 
io_kill_timeout(struct io_kiocb * req,struct list_head * list)151 static void io_kill_timeout(struct io_kiocb *req, struct list_head *list)
152 	__must_hold(&req->ctx->timeout_lock)
153 {
154 	struct io_timeout_data *io = req->async_data;
155 
156 	if (hrtimer_try_to_cancel(&io->timer) != -1) {
157 		struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
158 
159 		atomic_set(&req->ctx->cq_timeouts,
160 			atomic_read(&req->ctx->cq_timeouts) + 1);
161 		list_move_tail(&timeout->list, list);
162 	}
163 }
164 
io_flush_timeouts(struct io_ring_ctx * ctx)165 __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
166 {
167 	struct io_timeout *timeout, *tmp;
168 	LIST_HEAD(list);
169 	u32 seq;
170 
171 	raw_spin_lock_irq(&ctx->timeout_lock);
172 	seq = READ_ONCE(ctx->cached_cq_tail) - atomic_read(&ctx->cq_timeouts);
173 
174 	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
175 		struct io_kiocb *req = cmd_to_io_kiocb(timeout);
176 		u32 events_needed, events_got;
177 
178 		if (io_is_timeout_noseq(req))
179 			break;
180 
181 		/*
182 		 * Since seq can easily wrap around over time, subtract
183 		 * the last seq at which timeouts were flushed before comparing.
184 		 * Assuming not more than 2^31-1 events have happened since,
185 		 * these subtractions won't have wrapped, so we can check if
186 		 * target is in [last_seq, current_seq] by comparing the two.
187 		 */
188 		events_needed = timeout->target_seq - ctx->cq_last_tm_flush;
189 		events_got = seq - ctx->cq_last_tm_flush;
190 		if (events_got < events_needed)
191 			break;
192 
193 		io_kill_timeout(req, &list);
194 	}
195 	ctx->cq_last_tm_flush = seq;
196 	raw_spin_unlock_irq(&ctx->timeout_lock);
197 	io_flush_killed_timeouts(&list, 0);
198 }
199 
io_req_tw_fail_links(struct io_tw_req tw_req,io_tw_token_t tw)200 static void io_req_tw_fail_links(struct io_tw_req tw_req, io_tw_token_t tw)
201 {
202 	struct io_kiocb *link = tw_req.req;
203 
204 	io_tw_lock(link->ctx, tw);
205 	while (link) {
206 		struct io_kiocb *nxt = link->link;
207 		long res = -ECANCELED;
208 
209 		if (link->flags & REQ_F_FAIL)
210 			res = link->cqe.res;
211 		link->link = NULL;
212 		io_req_set_res(link, res, 0);
213 		io_req_task_complete((struct io_tw_req){link}, tw);
214 		link = nxt;
215 	}
216 }
217 
io_fail_links(struct io_kiocb * req)218 static void io_fail_links(struct io_kiocb *req)
219 	__must_hold(&req->ctx->completion_lock)
220 {
221 	struct io_kiocb *link = req->link;
222 	bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES;
223 
224 	if (!link)
225 		return;
226 
227 	while (link) {
228 		if (ignore_cqes)
229 			link->flags |= REQ_F_CQE_SKIP;
230 		else
231 			link->flags &= ~REQ_F_CQE_SKIP;
232 		trace_io_uring_fail_link(req, link);
233 		link = link->link;
234 	}
235 
236 	link = req->link;
237 	link->io_task_work.func = io_req_tw_fail_links;
238 	io_req_task_work_add(link);
239 	req->link = NULL;
240 }
241 
io_remove_next_linked(struct io_kiocb * req)242 static inline void io_remove_next_linked(struct io_kiocb *req)
243 {
244 	struct io_kiocb *nxt = req->link;
245 
246 	req->link = nxt->link;
247 	nxt->link = NULL;
248 }
249 
io_disarm_next(struct io_kiocb * req)250 void io_disarm_next(struct io_kiocb *req)
251 	__must_hold(&req->ctx->completion_lock)
252 {
253 	struct io_kiocb *link = NULL;
254 
255 	if (req->flags & REQ_F_ARM_LTIMEOUT) {
256 		link = req->link;
257 		req->flags &= ~REQ_F_ARM_LTIMEOUT;
258 		if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
259 			io_remove_next_linked(req);
260 			io_req_queue_tw_complete(link, -ECANCELED);
261 		}
262 	} else if (req->flags & REQ_F_LINK_TIMEOUT) {
263 		struct io_ring_ctx *ctx = req->ctx;
264 
265 		raw_spin_lock_irq(&ctx->timeout_lock);
266 		if (req->link && req->link->opcode == IORING_OP_LINK_TIMEOUT)
267 			link = __io_disarm_linked_timeout(req, req->link);
268 
269 		raw_spin_unlock_irq(&ctx->timeout_lock);
270 		if (link)
271 			io_req_queue_tw_complete(link, -ECANCELED);
272 	}
273 	if (unlikely((req->flags & REQ_F_FAIL) &&
274 		     !(req->flags & REQ_F_HARDLINK)))
275 		io_fail_links(req);
276 }
277 
__io_disarm_linked_timeout(struct io_kiocb * req,struct io_kiocb * link)278 static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
279 						   struct io_kiocb *link)
280 	__must_hold(&req->ctx->completion_lock)
281 	__must_hold(&req->ctx->timeout_lock)
282 {
283 	struct io_timeout_data *io = link->async_data;
284 	struct io_timeout *timeout = io_kiocb_to_cmd(link, struct io_timeout);
285 
286 	io_remove_next_linked(req);
287 
288 	/* If this is NULL, then timer already claimed it and will complete it */
289 	if (!timeout->head)
290 		return NULL;
291 	timeout->head = NULL;
292 	if (hrtimer_try_to_cancel(&io->timer) != -1) {
293 		list_del(&timeout->list);
294 		return link;
295 	}
296 
297 	return NULL;
298 }
299 
io_timeout_fn(struct hrtimer * timer)300 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
301 {
302 	struct io_timeout_data *data = container_of(timer,
303 						struct io_timeout_data, timer);
304 	struct io_kiocb *req = data->req;
305 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
306 	struct io_ring_ctx *ctx = req->ctx;
307 	unsigned long flags;
308 
309 	raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
310 	list_del_init(&timeout->list);
311 	atomic_set(&ctx->cq_timeouts,
312 		atomic_read(&ctx->cq_timeouts) + 1);
313 	raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
314 
315 	if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
316 		req_set_fail(req);
317 
318 	io_req_set_res(req, -ETIME, 0);
319 	req->io_task_work.func = io_timeout_complete;
320 	io_req_task_work_add(req);
321 	return HRTIMER_NORESTART;
322 }
323 
io_timeout_extract(struct io_ring_ctx * ctx,struct io_cancel_data * cd)324 static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
325 					   struct io_cancel_data *cd)
326 	__must_hold(&ctx->timeout_lock)
327 {
328 	struct io_timeout *timeout;
329 	struct io_timeout_data *io;
330 	struct io_kiocb *req = NULL;
331 
332 	list_for_each_entry(timeout, &ctx->timeout_list, list) {
333 		struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
334 
335 		if (io_cancel_req_match(tmp, cd)) {
336 			req = tmp;
337 			break;
338 		}
339 	}
340 	if (!req)
341 		return ERR_PTR(-ENOENT);
342 
343 	io = req->async_data;
344 	if (hrtimer_try_to_cancel(&io->timer) == -1)
345 		return ERR_PTR(-EALREADY);
346 	timeout = io_kiocb_to_cmd(req, struct io_timeout);
347 	list_del_init(&timeout->list);
348 	return req;
349 }
350 
io_timeout_cancel(struct io_ring_ctx * ctx,struct io_cancel_data * cd)351 int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
352 	__must_hold(&ctx->completion_lock)
353 {
354 	struct io_kiocb *req;
355 
356 	raw_spin_lock_irq(&ctx->timeout_lock);
357 	req = io_timeout_extract(ctx, cd);
358 	raw_spin_unlock_irq(&ctx->timeout_lock);
359 
360 	if (IS_ERR(req))
361 		return PTR_ERR(req);
362 	io_req_task_queue_fail(req, -ECANCELED);
363 	return 0;
364 }
365 
io_req_task_link_timeout(struct io_tw_req tw_req,io_tw_token_t tw)366 static void io_req_task_link_timeout(struct io_tw_req tw_req, io_tw_token_t tw)
367 {
368 	struct io_kiocb *req = tw_req.req;
369 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
370 	struct io_kiocb *prev = timeout->prev;
371 	int ret;
372 
373 	if (prev) {
374 		/*
375 		 * splice the linked timeout out of prev's chain if the regular
376 		 * completion path didn't already do it.
377 		 */
378 		if (prev->link == req)
379 			prev->link = req->link;
380 		req->link = NULL;
381 
382 		if (!tw.cancel) {
383 			struct io_cancel_data cd = {
384 				.ctx		= req->ctx,
385 				.data		= prev->cqe.user_data,
386 			};
387 
388 			ret = io_try_cancel(req->tctx, &cd, 0);
389 		} else {
390 			ret = -ECANCELED;
391 		}
392 		io_req_set_res(req, ret ?: -ETIME, 0);
393 		io_req_task_complete(tw_req, tw);
394 		io_put_req(prev);
395 	} else {
396 		io_req_set_res(req, -ETIME, 0);
397 		io_req_task_complete(tw_req, tw);
398 	}
399 }
400 
io_link_timeout_fn(struct hrtimer * timer)401 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
402 {
403 	struct io_timeout_data *data = container_of(timer,
404 						struct io_timeout_data, timer);
405 	struct io_kiocb *prev, *req = data->req;
406 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
407 	struct io_ring_ctx *ctx = req->ctx;
408 	unsigned long flags;
409 
410 	raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
411 	prev = timeout->head;
412 	timeout->head = NULL;
413 
414 	/*
415 	 * We don't expect the list to be empty, that will only happen if we
416 	 * race with the completion of the linked work. Splice of prev is
417 	 * done in io_req_task_link_timeout(), if needed.
418 	 */
419 	if (prev) {
420 		if (!req_ref_inc_not_zero(prev)) {
421 			io_remove_next_linked(prev);
422 			prev = NULL;
423 		}
424 	}
425 	list_del(&timeout->list);
426 	timeout->prev = prev;
427 	raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
428 
429 	req->io_task_work.func = io_req_task_link_timeout;
430 	io_req_task_work_add(req);
431 	return HRTIMER_NORESTART;
432 }
433 
io_timeout_get_clock(struct io_timeout_data * data)434 static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
435 {
436 	return io_flags_to_clock(data->flags);
437 }
438 
io_linked_timeout_update(struct io_ring_ctx * ctx,__u64 user_data,ktime_t ts,enum hrtimer_mode mode)439 static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
440 				    ktime_t ts, enum hrtimer_mode mode)
441 	__must_hold(&ctx->timeout_lock)
442 {
443 	struct io_timeout_data *io;
444 	struct io_timeout *timeout;
445 	struct io_kiocb *req = NULL;
446 
447 	list_for_each_entry(timeout, &ctx->ltimeout_list, list) {
448 		struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
449 
450 		if (user_data == tmp->cqe.user_data) {
451 			req = tmp;
452 			break;
453 		}
454 	}
455 	if (!req)
456 		return -ENOENT;
457 
458 	io = req->async_data;
459 	if (hrtimer_try_to_cancel(&io->timer) == -1)
460 		return -EALREADY;
461 	hrtimer_setup(&io->timer, io_link_timeout_fn, io_timeout_get_clock(io), mode);
462 	hrtimer_start(&io->timer, ts, mode);
463 	return 0;
464 }
465 
io_timeout_update(struct io_ring_ctx * ctx,__u64 user_data,ktime_t time,enum hrtimer_mode mode)466 static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
467 			     ktime_t time, enum hrtimer_mode mode)
468 	__must_hold(&ctx->timeout_lock)
469 {
470 	struct io_cancel_data cd = { .ctx = ctx, .data = user_data, };
471 	struct io_kiocb *req = io_timeout_extract(ctx, &cd);
472 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
473 	struct io_timeout_data *data;
474 
475 	if (IS_ERR(req))
476 		return PTR_ERR(req);
477 
478 	timeout->off = 0; /* noseq */
479 	data = req->async_data;
480 	data->time = time;
481 
482 	list_add_tail(&timeout->list, &ctx->timeout_list);
483 	hrtimer_setup(&data->timer, io_timeout_fn, io_timeout_get_clock(data), mode);
484 	hrtimer_start(&data->timer, data->time, mode);
485 	return 0;
486 }
487 
io_timeout_remove_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)488 int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
489 {
490 	struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem);
491 	int ret;
492 
493 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
494 		return -EINVAL;
495 	if (sqe->addr3 || sqe->__pad2[0])
496 		return -EINVAL;
497 	if (sqe->buf_index || sqe->len || sqe->splice_fd_in)
498 		return -EINVAL;
499 
500 	tr->ltimeout = false;
501 	tr->addr = READ_ONCE(sqe->addr);
502 	tr->flags = READ_ONCE(sqe->timeout_flags);
503 	if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) {
504 		if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
505 			return -EINVAL;
506 		if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
507 			tr->ltimeout = true;
508 		if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK |
509 				  IORING_TIMEOUT_ABS |
510 				  IORING_TIMEOUT_IMMEDIATE_ARG))
511 			return -EINVAL;
512 		ret = io_parse_user_time(&tr->time, READ_ONCE(sqe->addr2), tr->flags);
513 		if (ret)
514 			return ret;
515 	} else if (tr->flags) {
516 		/* timeout removal doesn't support flags */
517 		return -EINVAL;
518 	}
519 
520 	return 0;
521 }
522 
io_translate_timeout_mode(unsigned int flags)523 static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags)
524 {
525 	return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS
526 					    : HRTIMER_MODE_REL;
527 }
528 
529 /*
530  * Remove or update an existing timeout command
531  */
io_timeout_remove(struct io_kiocb * req,unsigned int issue_flags)532 int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
533 {
534 	struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem);
535 	struct io_ring_ctx *ctx = req->ctx;
536 	int ret;
537 
538 	if (!(tr->flags & IORING_TIMEOUT_UPDATE)) {
539 		struct io_cancel_data cd = { .ctx = ctx, .data = tr->addr, };
540 
541 		spin_lock(&ctx->completion_lock);
542 		ret = io_timeout_cancel(ctx, &cd);
543 		spin_unlock(&ctx->completion_lock);
544 	} else {
545 		enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
546 
547 		raw_spin_lock_irq(&ctx->timeout_lock);
548 		if (tr->ltimeout)
549 			ret = io_linked_timeout_update(ctx, tr->addr, tr->time, mode);
550 		else
551 			ret = io_timeout_update(ctx, tr->addr, tr->time, mode);
552 		raw_spin_unlock_irq(&ctx->timeout_lock);
553 	}
554 
555 	if (ret < 0)
556 		req_set_fail(req);
557 	io_req_set_res(req, ret, 0);
558 	return IOU_COMPLETE;
559 }
560 
__io_timeout_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe,bool is_timeout_link)561 static int __io_timeout_prep(struct io_kiocb *req,
562 			     const struct io_uring_sqe *sqe,
563 			     bool is_timeout_link)
564 {
565 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
566 	struct io_timeout_data *data;
567 	unsigned flags;
568 	u32 off = READ_ONCE(sqe->off);
569 	int ret;
570 
571 	if (sqe->addr3 || sqe->__pad2[0])
572 		return -EINVAL;
573 	if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in)
574 		return -EINVAL;
575 	if (off && is_timeout_link)
576 		return -EINVAL;
577 	flags = READ_ONCE(sqe->timeout_flags);
578 	if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
579 		      IORING_TIMEOUT_ETIME_SUCCESS |
580 		      IORING_TIMEOUT_MULTISHOT |
581 		      IORING_TIMEOUT_IMMEDIATE_ARG))
582 		return -EINVAL;
583 	/* more than one clock specified is invalid, obviously */
584 	if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
585 		return -EINVAL;
586 	/* multishot requests only make sense with rel values */
587 	if (!(~flags & (IORING_TIMEOUT_MULTISHOT | IORING_TIMEOUT_ABS)))
588 		return -EINVAL;
589 
590 	INIT_LIST_HEAD(&timeout->list);
591 	timeout->off = off;
592 	if (unlikely(off && !(req->ctx->int_flags & IO_RING_F_OFF_TIMEOUT_USED)))
593 		req->ctx->int_flags |= IO_RING_F_OFF_TIMEOUT_USED;
594 	/*
595 	 * for multishot reqs w/ fixed nr of repeats, repeats tracks the
596 	 * remaining nr
597 	 */
598 	timeout->repeats = 0;
599 	if ((flags & IORING_TIMEOUT_MULTISHOT) && off > 0)
600 		timeout->repeats = off;
601 
602 	if (WARN_ON_ONCE(req_has_async_data(req)))
603 		return -EFAULT;
604 	data = io_uring_alloc_async_data(NULL, req);
605 	if (!data)
606 		return -ENOMEM;
607 	data->req = req;
608 	data->flags = flags;
609 
610 	ret = io_parse_user_time(&data->time, READ_ONCE(sqe->addr), flags);
611 	if (ret)
612 		return ret;
613 
614 	data->mode = io_translate_timeout_mode(flags);
615 
616 	if (is_timeout_link) {
617 		struct io_submit_link *link = &req->ctx->submit_state.link;
618 
619 		if (!link->head)
620 			return -EINVAL;
621 		if (link->last->opcode == IORING_OP_LINK_TIMEOUT)
622 			return -EINVAL;
623 		timeout->head = link->last;
624 		link->last->flags |= REQ_F_ARM_LTIMEOUT;
625 		hrtimer_setup(&data->timer, io_link_timeout_fn, io_timeout_get_clock(data),
626 			      data->mode);
627 	} else {
628 		hrtimer_setup(&data->timer, io_timeout_fn, io_timeout_get_clock(data), data->mode);
629 	}
630 	return 0;
631 }
632 
io_timeout_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)633 int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
634 {
635 	return __io_timeout_prep(req, sqe, false);
636 }
637 
io_link_timeout_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)638 int io_link_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
639 {
640 	return __io_timeout_prep(req, sqe, true);
641 }
642 
io_timeout(struct io_kiocb * req,unsigned int issue_flags)643 int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
644 {
645 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
646 	struct io_ring_ctx *ctx = req->ctx;
647 	struct io_timeout_data *data = req->async_data;
648 	struct list_head *entry;
649 	u32 tail, off = timeout->off;
650 
651 	raw_spin_lock_irq(&ctx->timeout_lock);
652 
653 	/*
654 	 * sqe->off holds how many events that need to occur for this
655 	 * timeout event to be satisfied. If it isn't set, then this is
656 	 * a pure timeout request, sequence isn't used.
657 	 */
658 	if (io_is_timeout_noseq(req)) {
659 		entry = ctx->timeout_list.prev;
660 		goto add;
661 	}
662 
663 	tail = data_race(ctx->cached_cq_tail) - atomic_read(&ctx->cq_timeouts);
664 	timeout->target_seq = tail + off;
665 
666 	/* Update the last seq here in case io_flush_timeouts() hasn't.
667 	 * This is safe because ->completion_lock is held, and submissions
668 	 * and completions are never mixed in the same ->completion_lock section.
669 	 */
670 	ctx->cq_last_tm_flush = tail;
671 
672 	/*
673 	 * Insertion sort, ensuring the first entry in the list is always
674 	 * the one we need first.
675 	 */
676 	list_for_each_prev(entry, &ctx->timeout_list) {
677 		struct io_timeout *nextt = list_entry(entry, struct io_timeout, list);
678 		struct io_kiocb *nxt = cmd_to_io_kiocb(nextt);
679 
680 		if (io_is_timeout_noseq(nxt))
681 			continue;
682 		/* nxt.seq is behind @tail, otherwise would've been completed */
683 		if (off >= nextt->target_seq - tail)
684 			break;
685 	}
686 add:
687 	list_add(&timeout->list, entry);
688 	hrtimer_start(&data->timer, data->time, data->mode);
689 	raw_spin_unlock_irq(&ctx->timeout_lock);
690 	return IOU_ISSUE_SKIP_COMPLETE;
691 }
692 
io_queue_linked_timeout(struct io_kiocb * req)693 void io_queue_linked_timeout(struct io_kiocb *req)
694 {
695 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
696 	struct io_ring_ctx *ctx = req->ctx;
697 
698 	raw_spin_lock_irq(&ctx->timeout_lock);
699 	/*
700 	 * If the back reference is NULL, then our linked request finished
701 	 * before we got a chance to setup the timer
702 	 */
703 	if (timeout->head) {
704 		struct io_timeout_data *data = req->async_data;
705 
706 		hrtimer_start(&data->timer, data->time, data->mode);
707 		list_add_tail(&timeout->list, &ctx->ltimeout_list);
708 	}
709 	raw_spin_unlock_irq(&ctx->timeout_lock);
710 	/* drop submission reference */
711 	io_put_req(req);
712 }
713 
io_match_task(struct io_kiocb * head,struct io_uring_task * tctx,bool cancel_all)714 static bool io_match_task(struct io_kiocb *head, struct io_uring_task *tctx,
715 			  bool cancel_all)
716 	__must_hold(&head->ctx->timeout_lock)
717 {
718 	struct io_kiocb *req;
719 
720 	if (tctx && head->tctx != tctx)
721 		return false;
722 	if (cancel_all)
723 		return true;
724 
725 	io_for_each_link(req, head) {
726 		if (req->flags & REQ_F_INFLIGHT)
727 			return true;
728 	}
729 	return false;
730 }
731 
732 /* Returns true if we found and killed one or more timeouts */
io_kill_timeouts(struct io_ring_ctx * ctx,struct io_uring_task * tctx,bool cancel_all)733 __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
734 			     bool cancel_all)
735 {
736 	struct io_timeout *timeout, *tmp;
737 	LIST_HEAD(list);
738 
739 	/*
740 	 * completion_lock is needed for io_match_task(). Take it before
741 	 * timeout_lockfirst to keep locking ordering.
742 	 */
743 	spin_lock(&ctx->completion_lock);
744 	raw_spin_lock_irq(&ctx->timeout_lock);
745 	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
746 		struct io_kiocb *req = cmd_to_io_kiocb(timeout);
747 
748 		if (io_match_task(req, tctx, cancel_all))
749 			io_kill_timeout(req, &list);
750 	}
751 	raw_spin_unlock_irq(&ctx->timeout_lock);
752 	spin_unlock(&ctx->completion_lock);
753 
754 	return io_flush_killed_timeouts(&list, -ECANCELED);
755 }
756