xref: /linux/io_uring/cancel.c (revision 6dfafbd0299a60bfb5d5e277fdf100037c7ded07)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/fs.h>
5 #include <linux/file.h>
6 #include <linux/mm.h>
7 #include <linux/slab.h>
8 #include <linux/namei.h>
9 #include <linux/nospec.h>
10 #include <linux/io_uring.h>
11 
12 #include <uapi/linux/io_uring.h>
13 
14 #include "filetable.h"
15 #include "io_uring.h"
16 #include "tctx.h"
17 #include "sqpoll.h"
18 #include "uring_cmd.h"
19 #include "poll.h"
20 #include "timeout.h"
21 #include "waitid.h"
22 #include "futex.h"
23 #include "cancel.h"
24 
25 struct io_cancel {
26 	struct file			*file;
27 	u64				addr;
28 	u32				flags;
29 	s32				fd;
30 	u8				opcode;
31 };
32 
33 #define CANCEL_FLAGS	(IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \
34 			 IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \
35 			 IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP)
36 
37 /*
38  * Returns true if the request matches the criteria outlined by 'cd'.
39  */
40 bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd)
41 {
42 	bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA;
43 
44 	if (req->ctx != cd->ctx)
45 		return false;
46 
47 	if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP)))
48 		match_user_data = true;
49 
50 	if (cd->flags & IORING_ASYNC_CANCEL_ANY)
51 		goto check_seq;
52 	if (cd->flags & IORING_ASYNC_CANCEL_FD) {
53 		if (req->file != cd->file)
54 			return false;
55 	}
56 	if (cd->flags & IORING_ASYNC_CANCEL_OP) {
57 		if (req->opcode != cd->opcode)
58 			return false;
59 	}
60 	if (match_user_data && req->cqe.user_data != cd->data)
61 		return false;
62 	if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
63 check_seq:
64 		if (io_cancel_match_sequence(req, cd->seq))
65 			return false;
66 	}
67 
68 	return true;
69 }
70 
71 static bool io_cancel_cb(struct io_wq_work *work, void *data)
72 {
73 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
74 	struct io_cancel_data *cd = data;
75 
76 	return io_cancel_req_match(req, cd);
77 }
78 
79 static int io_async_cancel_one(struct io_uring_task *tctx,
80 			       struct io_cancel_data *cd)
81 {
82 	enum io_wq_cancel cancel_ret;
83 	int ret = 0;
84 	bool all;
85 
86 	if (!tctx || !tctx->io_wq)
87 		return -ENOENT;
88 
89 	all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
90 	cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all);
91 	switch (cancel_ret) {
92 	case IO_WQ_CANCEL_OK:
93 		ret = 0;
94 		break;
95 	case IO_WQ_CANCEL_RUNNING:
96 		ret = -EALREADY;
97 		break;
98 	case IO_WQ_CANCEL_NOTFOUND:
99 		ret = -ENOENT;
100 		break;
101 	}
102 
103 	return ret;
104 }
105 
106 int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
107 		  unsigned issue_flags)
108 {
109 	struct io_ring_ctx *ctx = cd->ctx;
110 	int ret;
111 
112 	WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring);
113 
114 	ret = io_async_cancel_one(tctx, cd);
115 	/*
116 	 * Fall-through even for -EALREADY, as we may have poll armed
117 	 * that need unarming.
118 	 */
119 	if (!ret)
120 		return 0;
121 
122 	ret = io_poll_cancel(ctx, cd, issue_flags);
123 	if (ret != -ENOENT)
124 		return ret;
125 
126 	ret = io_waitid_cancel(ctx, cd, issue_flags);
127 	if (ret != -ENOENT)
128 		return ret;
129 
130 	ret = io_futex_cancel(ctx, cd, issue_flags);
131 	if (ret != -ENOENT)
132 		return ret;
133 
134 	spin_lock(&ctx->completion_lock);
135 	if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
136 		ret = io_timeout_cancel(ctx, cd);
137 	spin_unlock(&ctx->completion_lock);
138 	return ret;
139 }
140 
141 int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
142 {
143 	struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
144 
145 	if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
146 		return -EINVAL;
147 	if (sqe->off || sqe->splice_fd_in)
148 		return -EINVAL;
149 
150 	cancel->addr = READ_ONCE(sqe->addr);
151 	cancel->flags = READ_ONCE(sqe->cancel_flags);
152 	if (cancel->flags & ~CANCEL_FLAGS)
153 		return -EINVAL;
154 	if (cancel->flags & IORING_ASYNC_CANCEL_FD) {
155 		if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
156 			return -EINVAL;
157 		cancel->fd = READ_ONCE(sqe->fd);
158 	}
159 	if (cancel->flags & IORING_ASYNC_CANCEL_OP) {
160 		if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
161 			return -EINVAL;
162 		cancel->opcode = READ_ONCE(sqe->len);
163 	}
164 
165 	return 0;
166 }
167 
168 static int __io_async_cancel(struct io_cancel_data *cd,
169 			     struct io_uring_task *tctx,
170 			     unsigned int issue_flags)
171 {
172 	bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
173 	struct io_ring_ctx *ctx = cd->ctx;
174 	struct io_tctx_node *node;
175 	int ret, nr = 0;
176 
177 	do {
178 		ret = io_try_cancel(tctx, cd, issue_flags);
179 		if (ret == -ENOENT)
180 			break;
181 		if (!all)
182 			return ret;
183 		nr++;
184 	} while (1);
185 
186 	/* slow path, try all io-wq's */
187 	io_ring_submit_lock(ctx, issue_flags);
188 	ret = -ENOENT;
189 	list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
190 		ret = io_async_cancel_one(node->task->io_uring, cd);
191 		if (ret != -ENOENT) {
192 			if (!all)
193 				break;
194 			nr++;
195 		}
196 	}
197 	io_ring_submit_unlock(ctx, issue_flags);
198 	return all ? nr : ret;
199 }
200 
201 int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
202 {
203 	struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
204 	struct io_cancel_data cd = {
205 		.ctx	= req->ctx,
206 		.data	= cancel->addr,
207 		.flags	= cancel->flags,
208 		.opcode	= cancel->opcode,
209 		.seq	= atomic_inc_return(&req->ctx->cancel_seq),
210 	};
211 	struct io_uring_task *tctx = req->tctx;
212 	int ret;
213 
214 	if (cd.flags & IORING_ASYNC_CANCEL_FD) {
215 		if (req->flags & REQ_F_FIXED_FILE ||
216 		    cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) {
217 			req->flags |= REQ_F_FIXED_FILE;
218 			req->file = io_file_get_fixed(req, cancel->fd,
219 							issue_flags);
220 		} else {
221 			req->file = io_file_get_normal(req, cancel->fd);
222 		}
223 		if (!req->file) {
224 			ret = -EBADF;
225 			goto done;
226 		}
227 		cd.file = req->file;
228 	}
229 
230 	ret = __io_async_cancel(&cd, tctx, issue_flags);
231 done:
232 	if (ret < 0)
233 		req_set_fail(req);
234 	io_req_set_res(req, ret, 0);
235 	return IOU_COMPLETE;
236 }
237 
238 static int __io_sync_cancel(struct io_uring_task *tctx,
239 			    struct io_cancel_data *cd, int fd)
240 {
241 	struct io_ring_ctx *ctx = cd->ctx;
242 
243 	/* fixed must be grabbed every time since we drop the uring_lock */
244 	if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
245 	    (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
246 		struct io_rsrc_node *node;
247 
248 		node = io_rsrc_node_lookup(&ctx->file_table.data, fd);
249 		if (unlikely(!node))
250 			return -EBADF;
251 		cd->file = io_slot_file(node);
252 		if (!cd->file)
253 			return -EBADF;
254 	}
255 
256 	return __io_async_cancel(cd, tctx, 0);
257 }
258 
259 int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
260 	__must_hold(&ctx->uring_lock)
261 {
262 	struct io_cancel_data cd = {
263 		.ctx	= ctx,
264 		.seq	= atomic_inc_return(&ctx->cancel_seq),
265 	};
266 	ktime_t timeout = KTIME_MAX;
267 	struct io_uring_sync_cancel_reg sc;
268 	struct file *file = NULL;
269 	DEFINE_WAIT(wait);
270 	int ret, i;
271 
272 	if (copy_from_user(&sc, arg, sizeof(sc)))
273 		return -EFAULT;
274 	if (sc.flags & ~CANCEL_FLAGS)
275 		return -EINVAL;
276 	for (i = 0; i < ARRAY_SIZE(sc.pad); i++)
277 		if (sc.pad[i])
278 			return -EINVAL;
279 	for (i = 0; i < ARRAY_SIZE(sc.pad2); i++)
280 		if (sc.pad2[i])
281 			return -EINVAL;
282 
283 	cd.data = sc.addr;
284 	cd.flags = sc.flags;
285 	cd.opcode = sc.opcode;
286 
287 	/* we can grab a normal file descriptor upfront */
288 	if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
289 	   !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
290 		file = fget(sc.fd);
291 		if (!file)
292 			return -EBADF;
293 		cd.file = file;
294 	}
295 
296 	ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
297 
298 	/* found something, done! */
299 	if (ret != -EALREADY)
300 		goto out;
301 
302 	if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) {
303 		struct timespec64 ts = {
304 			.tv_sec		= sc.timeout.tv_sec,
305 			.tv_nsec	= sc.timeout.tv_nsec
306 		};
307 
308 		timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
309 	}
310 
311 	/*
312 	 * Keep looking until we get -ENOENT. we'll get woken everytime
313 	 * every time a request completes and will retry the cancelation.
314 	 */
315 	do {
316 		cd.seq = atomic_inc_return(&ctx->cancel_seq);
317 
318 		prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE);
319 
320 		ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
321 
322 		mutex_unlock(&ctx->uring_lock);
323 		if (ret != -EALREADY)
324 			break;
325 
326 		ret = io_run_task_work_sig(ctx);
327 		if (ret < 0)
328 			break;
329 		ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS);
330 		if (!ret) {
331 			ret = -ETIME;
332 			break;
333 		}
334 		mutex_lock(&ctx->uring_lock);
335 	} while (1);
336 
337 	finish_wait(&ctx->cq_wait, &wait);
338 	mutex_lock(&ctx->uring_lock);
339 
340 	if (ret == -ENOENT || ret > 0)
341 		ret = 0;
342 out:
343 	if (file)
344 		fput(file);
345 	return ret;
346 }
347 
348 bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
349 			  struct hlist_head *list, bool cancel_all,
350 			  bool (*cancel)(struct io_kiocb *))
351 {
352 	struct hlist_node *tmp;
353 	struct io_kiocb *req;
354 	bool found = false;
355 
356 	lockdep_assert_held(&ctx->uring_lock);
357 
358 	hlist_for_each_entry_safe(req, tmp, list, hash_node) {
359 		if (!io_match_task_safe(req, tctx, cancel_all))
360 			continue;
361 		hlist_del_init(&req->hash_node);
362 		if (cancel(req))
363 			found = true;
364 	}
365 
366 	return found;
367 }
368 
369 int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
370 		     unsigned int issue_flags, struct hlist_head *list,
371 		     bool (*cancel)(struct io_kiocb *))
372 {
373 	struct hlist_node *tmp;
374 	struct io_kiocb *req;
375 	int nr = 0;
376 
377 	io_ring_submit_lock(ctx, issue_flags);
378 	hlist_for_each_entry_safe(req, tmp, list, hash_node) {
379 		if (!io_cancel_req_match(req, cd))
380 			continue;
381 		if (cancel(req))
382 			nr++;
383 		if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
384 			break;
385 	}
386 	io_ring_submit_unlock(ctx, issue_flags);
387 	return nr ?: -ENOENT;
388 }
389 
390 static bool io_match_linked(struct io_kiocb *head)
391 {
392 	struct io_kiocb *req;
393 
394 	io_for_each_link(req, head) {
395 		if (req->flags & REQ_F_INFLIGHT)
396 			return true;
397 	}
398 	return false;
399 }
400 
401 /*
402  * As io_match_task() but protected against racing with linked timeouts.
403  * User must not hold timeout_lock.
404  */
405 bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
406 			bool cancel_all)
407 {
408 	bool matched;
409 
410 	if (tctx && head->tctx != tctx)
411 		return false;
412 	if (cancel_all)
413 		return true;
414 
415 	if (head->flags & REQ_F_LINK_TIMEOUT) {
416 		struct io_ring_ctx *ctx = head->ctx;
417 
418 		/* protect against races with linked timeouts */
419 		raw_spin_lock_irq(&ctx->timeout_lock);
420 		matched = io_match_linked(head);
421 		raw_spin_unlock_irq(&ctx->timeout_lock);
422 	} else {
423 		matched = io_match_linked(head);
424 	}
425 	return matched;
426 }
427 
428 void __io_uring_cancel(bool cancel_all)
429 {
430 	io_uring_unreg_ringfd();
431 	io_uring_cancel_generic(cancel_all, NULL);
432 }
433 
434 struct io_task_cancel {
435 	struct io_uring_task *tctx;
436 	bool all;
437 };
438 
439 static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
440 {
441 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
442 	struct io_task_cancel *cancel = data;
443 
444 	return io_match_task_safe(req, cancel->tctx, cancel->all);
445 }
446 
447 static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
448 					 struct io_uring_task *tctx,
449 					 bool cancel_all)
450 {
451 	struct io_defer_entry *de;
452 	LIST_HEAD(list);
453 
454 	list_for_each_entry_reverse(de, &ctx->defer_list, list) {
455 		if (io_match_task_safe(de->req, tctx, cancel_all)) {
456 			list_cut_position(&list, &ctx->defer_list, &de->list);
457 			break;
458 		}
459 	}
460 	if (list_empty(&list))
461 		return false;
462 
463 	while (!list_empty(&list)) {
464 		de = list_first_entry(&list, struct io_defer_entry, list);
465 		list_del_init(&de->list);
466 		ctx->nr_drained -= io_linked_nr(de->req);
467 		io_req_task_queue_fail(de->req, -ECANCELED);
468 		kfree(de);
469 	}
470 	return true;
471 }
472 
473 __cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
474 {
475 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
476 
477 	return req->ctx == data;
478 }
479 
480 static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
481 {
482 	struct io_tctx_node *node;
483 	enum io_wq_cancel cret;
484 	bool ret = false;
485 
486 	mutex_lock(&ctx->uring_lock);
487 	list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
488 		struct io_uring_task *tctx = node->task->io_uring;
489 
490 		/*
491 		 * io_wq will stay alive while we hold uring_lock, because it's
492 		 * killed after ctx nodes, which requires to take the lock.
493 		 */
494 		if (!tctx || !tctx->io_wq)
495 			continue;
496 		cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
497 		ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
498 	}
499 	mutex_unlock(&ctx->uring_lock);
500 
501 	return ret;
502 }
503 
504 __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
505 					 struct io_uring_task *tctx,
506 					 bool cancel_all, bool is_sqpoll_thread)
507 {
508 	struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, };
509 	enum io_wq_cancel cret;
510 	bool ret = false;
511 
512 	/* set it so io_req_local_work_add() would wake us up */
513 	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
514 		atomic_set(&ctx->cq_wait_nr, 1);
515 		smp_mb();
516 	}
517 
518 	/* failed during ring init, it couldn't have issued any requests */
519 	if (!ctx->rings)
520 		return false;
521 
522 	if (!tctx) {
523 		ret |= io_uring_try_cancel_iowq(ctx);
524 	} else if (tctx->io_wq) {
525 		/*
526 		 * Cancels requests of all rings, not only @ctx, but
527 		 * it's fine as the task is in exit/exec.
528 		 */
529 		cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
530 				       &cancel, true);
531 		ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
532 	}
533 
534 	/* SQPOLL thread does its own polling */
535 	if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
536 	    is_sqpoll_thread) {
537 		while (!wq_list_empty(&ctx->iopoll_list)) {
538 			io_iopoll_try_reap_events(ctx);
539 			ret = true;
540 			cond_resched();
541 		}
542 	}
543 
544 	if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
545 	    io_allowed_defer_tw_run(ctx))
546 		ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0;
547 	mutex_lock(&ctx->uring_lock);
548 	ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
549 	ret |= io_poll_remove_all(ctx, tctx, cancel_all);
550 	ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
551 	ret |= io_futex_remove_all(ctx, tctx, cancel_all);
552 	ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
553 	mutex_unlock(&ctx->uring_lock);
554 	ret |= io_kill_timeouts(ctx, tctx, cancel_all);
555 	if (tctx)
556 		ret |= io_run_task_work() > 0;
557 	else
558 		ret |= flush_delayed_work(&ctx->fallback_work);
559 	return ret;
560 }
561 
562 static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
563 {
564 	if (tracked)
565 		return atomic_read(&tctx->inflight_tracked);
566 	return percpu_counter_sum(&tctx->inflight);
567 }
568 
569 /*
570  * Find any io_uring ctx that this task has registered or done IO on, and cancel
571  * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
572  */
573 __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
574 {
575 	struct io_uring_task *tctx = current->io_uring;
576 	struct io_ring_ctx *ctx;
577 	struct io_tctx_node *node;
578 	unsigned long index;
579 	s64 inflight;
580 	DEFINE_WAIT(wait);
581 
582 	WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current);
583 
584 	if (!current->io_uring)
585 		return;
586 	if (tctx->io_wq)
587 		io_wq_exit_start(tctx->io_wq);
588 
589 	atomic_inc(&tctx->in_cancel);
590 	do {
591 		bool loop = false;
592 
593 		io_uring_drop_tctx_refs(current);
594 		if (!tctx_inflight(tctx, !cancel_all))
595 			break;
596 
597 		/* read completions before cancelations */
598 		inflight = tctx_inflight(tctx, false);
599 		if (!inflight)
600 			break;
601 
602 		if (!sqd) {
603 			xa_for_each(&tctx->xa, index, node) {
604 				/* sqpoll task will cancel all its requests */
605 				if (node->ctx->sq_data)
606 					continue;
607 				loop |= io_uring_try_cancel_requests(node->ctx,
608 							current->io_uring,
609 							cancel_all,
610 							false);
611 			}
612 		} else {
613 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
614 				loop |= io_uring_try_cancel_requests(ctx,
615 								     current->io_uring,
616 								     cancel_all,
617 								     true);
618 		}
619 
620 		if (loop) {
621 			cond_resched();
622 			continue;
623 		}
624 
625 		prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
626 		io_run_task_work();
627 		io_uring_drop_tctx_refs(current);
628 		xa_for_each(&tctx->xa, index, node) {
629 			if (io_local_work_pending(node->ctx)) {
630 				WARN_ON_ONCE(node->ctx->submitter_task &&
631 					     node->ctx->submitter_task != current);
632 				goto end_wait;
633 			}
634 		}
635 		/*
636 		 * If we've seen completions, retry without waiting. This
637 		 * avoids a race where a completion comes in before we did
638 		 * prepare_to_wait().
639 		 */
640 		if (inflight == tctx_inflight(tctx, !cancel_all))
641 			schedule();
642 end_wait:
643 		finish_wait(&tctx->wait, &wait);
644 	} while (1);
645 
646 	io_uring_clean_tctx(tctx);
647 	if (cancel_all) {
648 		/*
649 		 * We shouldn't run task_works after cancel, so just leave
650 		 * ->in_cancel set for normal exit.
651 		 */
652 		atomic_dec(&tctx->in_cancel);
653 		/* for exec all current's requests should be gone, kill tctx */
654 		__io_uring_free(current);
655 	}
656 }
657