xref: /linux/io_uring/cancel.c (revision f5d4feed174ce9fb3c42886a3c36038fd5a43e25)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/fs.h>
5 #include <linux/mm.h>
6 #include <linux/slab.h>
7 #include <linux/nospec.h>
8 #include <linux/io_uring.h>
9 
10 #include <uapi/linux/io_uring.h>
11 
12 #include "filetable.h"
13 #include "io_uring.h"
14 #include "tctx.h"
15 #include "sqpoll.h"
16 #include "uring_cmd.h"
17 #include "poll.h"
18 #include "timeout.h"
19 #include "waitid.h"
20 #include "futex.h"
21 #include "cancel.h"
22 #include "wait.h"
23 
24 struct io_cancel {
25 	struct file			*file;
26 	u64				addr;
27 	u32				flags;
28 	s32				fd;
29 	u8				opcode;
30 };
31 
32 #define CANCEL_FLAGS	(IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \
33 			 IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \
34 			 IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP)
35 
36 /*
37  * Returns true if the request matches the criteria outlined by 'cd'.
38  */
39 bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd)
40 {
41 	bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA;
42 
43 	if (req->ctx != cd->ctx)
44 		return false;
45 
46 	if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP)))
47 		match_user_data = true;
48 
49 	if (cd->flags & IORING_ASYNC_CANCEL_ANY)
50 		goto check_seq;
51 	if (cd->flags & IORING_ASYNC_CANCEL_FD) {
52 		if (req->file != cd->file)
53 			return false;
54 	}
55 	if (cd->flags & IORING_ASYNC_CANCEL_OP) {
56 		if (req->opcode != cd->opcode)
57 			return false;
58 	}
59 	if (match_user_data && req->cqe.user_data != cd->data)
60 		return false;
61 	if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
62 check_seq:
63 		if (io_cancel_match_sequence(req, cd->seq))
64 			return false;
65 	}
66 
67 	return true;
68 }
69 
70 static bool io_cancel_cb(struct io_wq_work *work, void *data)
71 {
72 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
73 	struct io_cancel_data *cd = data;
74 
75 	return io_cancel_req_match(req, cd);
76 }
77 
78 static int io_async_cancel_one(struct io_uring_task *tctx,
79 			       struct io_cancel_data *cd)
80 {
81 	enum io_wq_cancel cancel_ret;
82 	int ret = 0;
83 	bool all;
84 
85 	if (!tctx || !tctx->io_wq)
86 		return -ENOENT;
87 
88 	all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
89 	cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all);
90 	switch (cancel_ret) {
91 	case IO_WQ_CANCEL_OK:
92 		ret = 0;
93 		break;
94 	case IO_WQ_CANCEL_RUNNING:
95 		ret = -EALREADY;
96 		break;
97 	case IO_WQ_CANCEL_NOTFOUND:
98 		ret = -ENOENT;
99 		break;
100 	}
101 
102 	return ret;
103 }
104 
105 int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
106 		  unsigned issue_flags)
107 {
108 	struct io_ring_ctx *ctx = cd->ctx;
109 	int ret;
110 
111 	WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring);
112 
113 	ret = io_async_cancel_one(tctx, cd);
114 	/*
115 	 * Fall-through even for -EALREADY, as we may have poll armed
116 	 * that need unarming.
117 	 */
118 	if (!ret)
119 		return 0;
120 
121 	ret = io_poll_cancel(ctx, cd, issue_flags);
122 	if (ret != -ENOENT)
123 		return ret;
124 
125 	ret = io_waitid_cancel(ctx, cd, issue_flags);
126 	if (ret != -ENOENT)
127 		return ret;
128 
129 	ret = io_futex_cancel(ctx, cd, issue_flags);
130 	if (ret != -ENOENT)
131 		return ret;
132 
133 	spin_lock(&ctx->completion_lock);
134 	if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
135 		ret = io_timeout_cancel(ctx, cd);
136 	spin_unlock(&ctx->completion_lock);
137 	return ret;
138 }
139 
140 int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
141 {
142 	struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
143 
144 	if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
145 		return -EINVAL;
146 	if (sqe->off || sqe->splice_fd_in)
147 		return -EINVAL;
148 
149 	cancel->addr = READ_ONCE(sqe->addr);
150 	cancel->flags = READ_ONCE(sqe->cancel_flags);
151 	if (cancel->flags & ~CANCEL_FLAGS)
152 		return -EINVAL;
153 	if (cancel->flags & IORING_ASYNC_CANCEL_FD) {
154 		if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
155 			return -EINVAL;
156 		cancel->fd = READ_ONCE(sqe->fd);
157 	}
158 	if (cancel->flags & IORING_ASYNC_CANCEL_OP) {
159 		if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
160 			return -EINVAL;
161 		cancel->opcode = READ_ONCE(sqe->len);
162 	}
163 
164 	return 0;
165 }
166 
167 static int __io_async_cancel(struct io_cancel_data *cd,
168 			     struct io_uring_task *tctx,
169 			     unsigned int issue_flags)
170 {
171 	bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
172 	struct io_ring_ctx *ctx = cd->ctx;
173 	struct io_tctx_node *node;
174 	int ret, nr = 0;
175 
176 	do {
177 		ret = io_try_cancel(tctx, cd, issue_flags);
178 		if (ret == -ENOENT)
179 			break;
180 		if (!all)
181 			return ret;
182 		nr++;
183 	} while (1);
184 
185 	/* slow path, try all io-wq's */
186 	__set_current_state(TASK_RUNNING);
187 	io_ring_submit_lock(ctx, issue_flags);
188 	mutex_lock(&ctx->tctx_lock);
189 	ret = -ENOENT;
190 	list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
191 		ret = io_async_cancel_one(node->task->io_uring, cd);
192 		if (ret != -ENOENT) {
193 			if (!all)
194 				break;
195 			nr++;
196 		}
197 	}
198 	mutex_unlock(&ctx->tctx_lock);
199 	io_ring_submit_unlock(ctx, issue_flags);
200 	return all ? nr : ret;
201 }
202 
203 int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
204 {
205 	struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
206 	struct io_cancel_data cd = {
207 		.ctx	= req->ctx,
208 		.data	= cancel->addr,
209 		.flags	= cancel->flags,
210 		.opcode	= cancel->opcode,
211 		.seq	= atomic_inc_return(&req->ctx->cancel_seq),
212 	};
213 	struct io_uring_task *tctx = req->tctx;
214 	int ret;
215 
216 	if (cd.flags & IORING_ASYNC_CANCEL_FD) {
217 		if (req->flags & REQ_F_FIXED_FILE ||
218 		    cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) {
219 			req->flags |= REQ_F_FIXED_FILE;
220 			req->file = io_file_get_fixed(req, cancel->fd,
221 							issue_flags);
222 		} else {
223 			req->file = io_file_get_normal(req, cancel->fd);
224 		}
225 		if (!req->file) {
226 			ret = -EBADF;
227 			goto done;
228 		}
229 		cd.file = req->file;
230 	}
231 
232 	ret = __io_async_cancel(&cd, tctx, issue_flags);
233 done:
234 	if (ret < 0)
235 		req_set_fail(req);
236 	io_req_set_res(req, ret, 0);
237 	return IOU_COMPLETE;
238 }
239 
240 static int __io_sync_cancel(struct io_uring_task *tctx,
241 			    struct io_cancel_data *cd, int fd)
242 {
243 	struct io_ring_ctx *ctx = cd->ctx;
244 
245 	/* fixed must be grabbed every time since we drop the uring_lock */
246 	if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
247 	    (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
248 		struct io_rsrc_node *node;
249 
250 		node = io_rsrc_node_lookup(&ctx->file_table.data, fd);
251 		if (unlikely(!node))
252 			return -EBADF;
253 		cd->file = io_slot_file(node);
254 		if (!cd->file)
255 			return -EBADF;
256 	}
257 
258 	return __io_async_cancel(cd, tctx, 0);
259 }
260 
261 int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
262 	__must_hold(&ctx->uring_lock)
263 {
264 	struct io_cancel_data cd = {
265 		.ctx	= ctx,
266 		.seq	= atomic_inc_return(&ctx->cancel_seq),
267 	};
268 	ktime_t timeout = KTIME_MAX;
269 	struct io_uring_sync_cancel_reg sc;
270 	struct file *file = NULL;
271 	DEFINE_WAIT(wait);
272 	int ret, i;
273 
274 	if (copy_from_user(&sc, arg, sizeof(sc)))
275 		return -EFAULT;
276 	if (sc.flags & ~CANCEL_FLAGS)
277 		return -EINVAL;
278 	for (i = 0; i < ARRAY_SIZE(sc.pad); i++)
279 		if (sc.pad[i])
280 			return -EINVAL;
281 	for (i = 0; i < ARRAY_SIZE(sc.pad2); i++)
282 		if (sc.pad2[i])
283 			return -EINVAL;
284 
285 	cd.data = sc.addr;
286 	cd.flags = sc.flags;
287 	cd.opcode = sc.opcode;
288 
289 	/* we can grab a normal file descriptor upfront */
290 	if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
291 	   !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
292 		file = fget(sc.fd);
293 		if (!file)
294 			return -EBADF;
295 		cd.file = file;
296 	}
297 
298 	ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
299 
300 	/* found something, done! */
301 	if (ret != -EALREADY)
302 		goto out;
303 
304 	if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) {
305 		struct timespec64 ts = {
306 			.tv_sec		= sc.timeout.tv_sec,
307 			.tv_nsec	= sc.timeout.tv_nsec
308 		};
309 
310 		timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
311 	}
312 
313 	/*
314 	 * Keep looking until we get -ENOENT. we'll get woken everytime
315 	 * every time a request completes and will retry the cancelation.
316 	 */
317 	do {
318 		cd.seq = atomic_inc_return(&ctx->cancel_seq);
319 
320 		prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE);
321 
322 		ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
323 
324 		mutex_unlock(&ctx->uring_lock);
325 		if (ret != -EALREADY)
326 			break;
327 
328 		ret = io_run_task_work_sig(ctx);
329 		if (ret < 0)
330 			break;
331 		ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS);
332 		if (!ret) {
333 			ret = -ETIME;
334 			break;
335 		}
336 		mutex_lock(&ctx->uring_lock);
337 	} while (1);
338 
339 	finish_wait(&ctx->cq_wait, &wait);
340 	mutex_lock(&ctx->uring_lock);
341 
342 	if (ret == -ENOENT || ret > 0)
343 		ret = 0;
344 out:
345 	if (file)
346 		fput(file);
347 	return ret;
348 }
349 
350 bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
351 			  struct hlist_head *list, bool cancel_all,
352 			  bool (*cancel)(struct io_kiocb *))
353 {
354 	struct hlist_node *tmp;
355 	struct io_kiocb *req;
356 	bool found = false;
357 
358 	lockdep_assert_held(&ctx->uring_lock);
359 
360 	hlist_for_each_entry_safe(req, tmp, list, hash_node) {
361 		if (!io_match_task_safe(req, tctx, cancel_all))
362 			continue;
363 		hlist_del_init(&req->hash_node);
364 		if (cancel(req))
365 			found = true;
366 	}
367 
368 	return found;
369 }
370 
371 int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
372 		     unsigned int issue_flags, struct hlist_head *list,
373 		     bool (*cancel)(struct io_kiocb *))
374 {
375 	struct hlist_node *tmp;
376 	struct io_kiocb *req;
377 	int nr = 0;
378 
379 	io_ring_submit_lock(ctx, issue_flags);
380 	hlist_for_each_entry_safe(req, tmp, list, hash_node) {
381 		if (!io_cancel_req_match(req, cd))
382 			continue;
383 		if (cancel(req))
384 			nr++;
385 		if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
386 			break;
387 	}
388 	io_ring_submit_unlock(ctx, issue_flags);
389 	return nr ?: -ENOENT;
390 }
391 
392 static bool io_match_linked(struct io_kiocb *head)
393 {
394 	struct io_kiocb *req;
395 
396 	io_for_each_link(req, head) {
397 		if (req->flags & REQ_F_INFLIGHT)
398 			return true;
399 	}
400 	return false;
401 }
402 
403 /*
404  * As io_match_task() but protected against racing with linked timeouts.
405  * User must not hold timeout_lock.
406  */
407 bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
408 			bool cancel_all)
409 {
410 	bool matched;
411 
412 	if (tctx && head->tctx != tctx)
413 		return false;
414 	if (cancel_all)
415 		return true;
416 
417 	if (head->flags & REQ_F_LINK_TIMEOUT) {
418 		struct io_ring_ctx *ctx = head->ctx;
419 
420 		/* protect against races with linked timeouts */
421 		raw_spin_lock_irq(&ctx->timeout_lock);
422 		matched = io_match_linked(head);
423 		raw_spin_unlock_irq(&ctx->timeout_lock);
424 	} else {
425 		matched = io_match_linked(head);
426 	}
427 	return matched;
428 }
429 
430 void __io_uring_cancel(bool cancel_all)
431 {
432 	io_uring_unreg_ringfd();
433 	io_uring_cancel_generic(cancel_all, NULL);
434 }
435 
436 struct io_task_cancel {
437 	struct io_uring_task *tctx;
438 	bool all;
439 };
440 
441 static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
442 {
443 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
444 	struct io_task_cancel *cancel = data;
445 
446 	return io_match_task_safe(req, cancel->tctx, cancel->all);
447 }
448 
449 static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
450 					 struct io_uring_task *tctx,
451 					 bool cancel_all)
452 {
453 	struct io_defer_entry *de;
454 	LIST_HEAD(list);
455 
456 	list_for_each_entry_reverse(de, &ctx->defer_list, list) {
457 		if (io_match_task_safe(de->req, tctx, cancel_all)) {
458 			list_cut_position(&list, &ctx->defer_list, &de->list);
459 			break;
460 		}
461 	}
462 	if (list_empty(&list))
463 		return false;
464 
465 	while (!list_empty(&list)) {
466 		de = list_first_entry(&list, struct io_defer_entry, list);
467 		list_del_init(&de->list);
468 		ctx->nr_drained -= io_linked_nr(de->req);
469 		io_req_task_queue_fail(de->req, -ECANCELED);
470 		kfree(de);
471 	}
472 	return true;
473 }
474 
475 __cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
476 {
477 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
478 
479 	return req->ctx == data;
480 }
481 
482 static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
483 {
484 	struct io_tctx_node *node;
485 	enum io_wq_cancel cret;
486 	bool ret = false;
487 
488 	mutex_lock(&ctx->uring_lock);
489 	mutex_lock(&ctx->tctx_lock);
490 	list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
491 		struct io_uring_task *tctx = node->task->io_uring;
492 
493 		/*
494 		 * io_wq will stay alive while we hold uring_lock, because it's
495 		 * killed after ctx nodes, which requires to take the lock.
496 		 */
497 		if (!tctx || !tctx->io_wq)
498 			continue;
499 		cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
500 		ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
501 	}
502 	mutex_unlock(&ctx->tctx_lock);
503 	mutex_unlock(&ctx->uring_lock);
504 
505 	return ret;
506 }
507 
508 __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
509 					 struct io_uring_task *tctx,
510 					 bool cancel_all, bool is_sqpoll_thread)
511 {
512 	struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, };
513 	enum io_wq_cancel cret;
514 	bool ret = false;
515 
516 	/* set it so io_req_local_work_add() would wake us up */
517 	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
518 		atomic_set(&ctx->cq_wait_nr, 1);
519 		smp_mb();
520 	}
521 
522 	/* failed during ring init, it couldn't have issued any requests */
523 	if (!ctx->rings)
524 		return false;
525 
526 	if (!tctx) {
527 		ret |= io_uring_try_cancel_iowq(ctx);
528 	} else if (tctx->io_wq) {
529 		/*
530 		 * Cancels requests of all rings, not only @ctx, but
531 		 * it's fine as the task is in exit/exec.
532 		 */
533 		cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
534 				       &cancel, true);
535 		ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
536 	}
537 
538 	/* SQPOLL thread does its own polling */
539 	if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
540 	    is_sqpoll_thread) {
541 		while (!list_empty(&ctx->iopoll_list)) {
542 			io_iopoll_try_reap_events(ctx);
543 			ret = true;
544 			cond_resched();
545 		}
546 	}
547 
548 	if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
549 	    io_allowed_defer_tw_run(ctx))
550 		ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0;
551 	mutex_lock(&ctx->uring_lock);
552 	ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
553 	ret |= io_poll_remove_all(ctx, tctx, cancel_all);
554 	ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
555 	ret |= io_futex_remove_all(ctx, tctx, cancel_all);
556 	ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
557 	mutex_unlock(&ctx->uring_lock);
558 	ret |= io_kill_timeouts(ctx, tctx, cancel_all);
559 	if (tctx)
560 		ret |= io_run_task_work() > 0;
561 	else
562 		ret |= flush_delayed_work(&ctx->fallback_work);
563 	return ret;
564 }
565 
566 static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
567 {
568 	if (tracked)
569 		return atomic_read(&tctx->inflight_tracked);
570 	return percpu_counter_sum(&tctx->inflight);
571 }
572 
573 /*
574  * Find any io_uring ctx that this task has registered or done IO on, and cancel
575  * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
576  */
577 __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
578 {
579 	struct io_uring_task *tctx = current->io_uring;
580 	struct io_ring_ctx *ctx;
581 	struct io_tctx_node *node;
582 	unsigned long index;
583 	s64 inflight;
584 	DEFINE_WAIT(wait);
585 
586 	WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current);
587 
588 	if (!current->io_uring)
589 		return;
590 	if (tctx->io_wq)
591 		io_wq_exit_start(tctx->io_wq);
592 
593 	atomic_inc(&tctx->in_cancel);
594 	do {
595 		bool loop = false;
596 
597 		io_uring_drop_tctx_refs(current);
598 		if (!tctx_inflight(tctx, !cancel_all))
599 			break;
600 
601 		/* read completions before cancelations */
602 		inflight = tctx_inflight(tctx, false);
603 		if (!inflight)
604 			break;
605 
606 		if (!sqd) {
607 			xa_for_each(&tctx->xa, index, node) {
608 				/* sqpoll task will cancel all its requests */
609 				if (node->ctx->sq_data)
610 					continue;
611 				loop |= io_uring_try_cancel_requests(node->ctx,
612 							current->io_uring,
613 							cancel_all,
614 							false);
615 			}
616 		} else {
617 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
618 				loop |= io_uring_try_cancel_requests(ctx,
619 								     current->io_uring,
620 								     cancel_all,
621 								     true);
622 		}
623 
624 		if (loop) {
625 			cond_resched();
626 			continue;
627 		}
628 
629 		prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
630 		io_run_task_work();
631 		io_uring_drop_tctx_refs(current);
632 		xa_for_each(&tctx->xa, index, node) {
633 			if (io_local_work_pending(node->ctx)) {
634 				WARN_ON_ONCE(node->ctx->submitter_task &&
635 					     node->ctx->submitter_task != current);
636 				goto end_wait;
637 			}
638 		}
639 		/*
640 		 * If we've seen completions, retry without waiting. This
641 		 * avoids a race where a completion comes in before we did
642 		 * prepare_to_wait().
643 		 */
644 		if (inflight == tctx_inflight(tctx, !cancel_all))
645 			schedule();
646 end_wait:
647 		finish_wait(&tctx->wait, &wait);
648 	} while (1);
649 
650 	io_uring_clean_tctx(tctx);
651 	if (cancel_all) {
652 		/*
653 		 * We shouldn't run task_works after cancel, so just leave
654 		 * ->in_cancel set for normal exit.
655 		 */
656 		atomic_dec(&tctx->in_cancel);
657 		/* for exec all current's requests should be gone, kill tctx */
658 		__io_uring_free(current);
659 	}
660 }
661