1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/fs.h> 5 #include <linux/file.h> 6 #include <linux/mm.h> 7 #include <linux/slab.h> 8 #include <linux/namei.h> 9 #include <linux/nospec.h> 10 #include <linux/io_uring.h> 11 12 #include <uapi/linux/io_uring.h> 13 14 #include "filetable.h" 15 #include "io_uring.h" 16 #include "tctx.h" 17 #include "sqpoll.h" 18 #include "uring_cmd.h" 19 #include "poll.h" 20 #include "timeout.h" 21 #include "waitid.h" 22 #include "futex.h" 23 #include "cancel.h" 24 25 struct io_cancel { 26 struct file *file; 27 u64 addr; 28 u32 flags; 29 s32 fd; 30 u8 opcode; 31 }; 32 33 #define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ 34 IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \ 35 IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP) 36 37 /* 38 * Returns true if the request matches the criteria outlined by 'cd'. 39 */ 40 bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd) 41 { 42 bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA; 43 44 if (req->ctx != cd->ctx) 45 return false; 46 47 if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP))) 48 match_user_data = true; 49 50 if (cd->flags & IORING_ASYNC_CANCEL_ANY) 51 goto check_seq; 52 if (cd->flags & IORING_ASYNC_CANCEL_FD) { 53 if (req->file != cd->file) 54 return false; 55 } 56 if (cd->flags & IORING_ASYNC_CANCEL_OP) { 57 if (req->opcode != cd->opcode) 58 return false; 59 } 60 if (match_user_data && req->cqe.user_data != cd->data) 61 return false; 62 if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 63 check_seq: 64 if (io_cancel_match_sequence(req, cd->seq)) 65 return false; 66 } 67 68 return true; 69 } 70 71 static bool io_cancel_cb(struct io_wq_work *work, void *data) 72 { 73 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 74 struct io_cancel_data *cd = data; 75 76 return io_cancel_req_match(req, cd); 77 } 78 79 static int io_async_cancel_one(struct io_uring_task *tctx, 80 struct io_cancel_data *cd) 81 { 82 enum io_wq_cancel cancel_ret; 83 int ret = 0; 84 bool all; 85 86 if (!tctx || !tctx->io_wq) 87 return -ENOENT; 88 89 all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); 90 cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all); 91 switch (cancel_ret) { 92 case IO_WQ_CANCEL_OK: 93 ret = 0; 94 break; 95 case IO_WQ_CANCEL_RUNNING: 96 ret = -EALREADY; 97 break; 98 case IO_WQ_CANCEL_NOTFOUND: 99 ret = -ENOENT; 100 break; 101 } 102 103 return ret; 104 } 105 106 int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, 107 unsigned issue_flags) 108 { 109 struct io_ring_ctx *ctx = cd->ctx; 110 int ret; 111 112 WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring); 113 114 ret = io_async_cancel_one(tctx, cd); 115 /* 116 * Fall-through even for -EALREADY, as we may have poll armed 117 * that need unarming. 118 */ 119 if (!ret) 120 return 0; 121 122 ret = io_poll_cancel(ctx, cd, issue_flags); 123 if (ret != -ENOENT) 124 return ret; 125 126 ret = io_waitid_cancel(ctx, cd, issue_flags); 127 if (ret != -ENOENT) 128 return ret; 129 130 ret = io_futex_cancel(ctx, cd, issue_flags); 131 if (ret != -ENOENT) 132 return ret; 133 134 spin_lock(&ctx->completion_lock); 135 if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) 136 ret = io_timeout_cancel(ctx, cd); 137 spin_unlock(&ctx->completion_lock); 138 return ret; 139 } 140 141 int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 142 { 143 struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel); 144 145 if (unlikely(req->flags & REQ_F_BUFFER_SELECT)) 146 return -EINVAL; 147 if (sqe->off || sqe->splice_fd_in) 148 return -EINVAL; 149 150 cancel->addr = READ_ONCE(sqe->addr); 151 cancel->flags = READ_ONCE(sqe->cancel_flags); 152 if (cancel->flags & ~CANCEL_FLAGS) 153 return -EINVAL; 154 if (cancel->flags & IORING_ASYNC_CANCEL_FD) { 155 if (cancel->flags & IORING_ASYNC_CANCEL_ANY) 156 return -EINVAL; 157 cancel->fd = READ_ONCE(sqe->fd); 158 } 159 if (cancel->flags & IORING_ASYNC_CANCEL_OP) { 160 if (cancel->flags & IORING_ASYNC_CANCEL_ANY) 161 return -EINVAL; 162 cancel->opcode = READ_ONCE(sqe->len); 163 } 164 165 return 0; 166 } 167 168 static int __io_async_cancel(struct io_cancel_data *cd, 169 struct io_uring_task *tctx, 170 unsigned int issue_flags) 171 { 172 bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); 173 struct io_ring_ctx *ctx = cd->ctx; 174 struct io_tctx_node *node; 175 int ret, nr = 0; 176 177 do { 178 ret = io_try_cancel(tctx, cd, issue_flags); 179 if (ret == -ENOENT) 180 break; 181 if (!all) 182 return ret; 183 nr++; 184 } while (1); 185 186 /* slow path, try all io-wq's */ 187 io_ring_submit_lock(ctx, issue_flags); 188 ret = -ENOENT; 189 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 190 ret = io_async_cancel_one(node->task->io_uring, cd); 191 if (ret != -ENOENT) { 192 if (!all) 193 break; 194 nr++; 195 } 196 } 197 io_ring_submit_unlock(ctx, issue_flags); 198 return all ? nr : ret; 199 } 200 201 int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) 202 { 203 struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel); 204 struct io_cancel_data cd = { 205 .ctx = req->ctx, 206 .data = cancel->addr, 207 .flags = cancel->flags, 208 .opcode = cancel->opcode, 209 .seq = atomic_inc_return(&req->ctx->cancel_seq), 210 }; 211 struct io_uring_task *tctx = req->tctx; 212 int ret; 213 214 if (cd.flags & IORING_ASYNC_CANCEL_FD) { 215 if (req->flags & REQ_F_FIXED_FILE || 216 cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) { 217 req->flags |= REQ_F_FIXED_FILE; 218 req->file = io_file_get_fixed(req, cancel->fd, 219 issue_flags); 220 } else { 221 req->file = io_file_get_normal(req, cancel->fd); 222 } 223 if (!req->file) { 224 ret = -EBADF; 225 goto done; 226 } 227 cd.file = req->file; 228 } 229 230 ret = __io_async_cancel(&cd, tctx, issue_flags); 231 done: 232 if (ret < 0) 233 req_set_fail(req); 234 io_req_set_res(req, ret, 0); 235 return IOU_COMPLETE; 236 } 237 238 static int __io_sync_cancel(struct io_uring_task *tctx, 239 struct io_cancel_data *cd, int fd) 240 { 241 struct io_ring_ctx *ctx = cd->ctx; 242 243 /* fixed must be grabbed every time since we drop the uring_lock */ 244 if ((cd->flags & IORING_ASYNC_CANCEL_FD) && 245 (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) { 246 struct io_rsrc_node *node; 247 248 node = io_rsrc_node_lookup(&ctx->file_table.data, fd); 249 if (unlikely(!node)) 250 return -EBADF; 251 cd->file = io_slot_file(node); 252 if (!cd->file) 253 return -EBADF; 254 } 255 256 return __io_async_cancel(cd, tctx, 0); 257 } 258 259 int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) 260 __must_hold(&ctx->uring_lock) 261 { 262 struct io_cancel_data cd = { 263 .ctx = ctx, 264 .seq = atomic_inc_return(&ctx->cancel_seq), 265 }; 266 ktime_t timeout = KTIME_MAX; 267 struct io_uring_sync_cancel_reg sc; 268 struct file *file = NULL; 269 DEFINE_WAIT(wait); 270 int ret, i; 271 272 if (copy_from_user(&sc, arg, sizeof(sc))) 273 return -EFAULT; 274 if (sc.flags & ~CANCEL_FLAGS) 275 return -EINVAL; 276 for (i = 0; i < ARRAY_SIZE(sc.pad); i++) 277 if (sc.pad[i]) 278 return -EINVAL; 279 for (i = 0; i < ARRAY_SIZE(sc.pad2); i++) 280 if (sc.pad2[i]) 281 return -EINVAL; 282 283 cd.data = sc.addr; 284 cd.flags = sc.flags; 285 cd.opcode = sc.opcode; 286 287 /* we can grab a normal file descriptor upfront */ 288 if ((cd.flags & IORING_ASYNC_CANCEL_FD) && 289 !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) { 290 file = fget(sc.fd); 291 if (!file) 292 return -EBADF; 293 cd.file = file; 294 } 295 296 ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); 297 298 /* found something, done! */ 299 if (ret != -EALREADY) 300 goto out; 301 302 if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) { 303 struct timespec64 ts = { 304 .tv_sec = sc.timeout.tv_sec, 305 .tv_nsec = sc.timeout.tv_nsec 306 }; 307 308 timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); 309 } 310 311 /* 312 * Keep looking until we get -ENOENT. we'll get woken everytime 313 * every time a request completes and will retry the cancelation. 314 */ 315 do { 316 cd.seq = atomic_inc_return(&ctx->cancel_seq); 317 318 prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE); 319 320 ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); 321 322 mutex_unlock(&ctx->uring_lock); 323 if (ret != -EALREADY) 324 break; 325 326 ret = io_run_task_work_sig(ctx); 327 if (ret < 0) 328 break; 329 ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS); 330 if (!ret) { 331 ret = -ETIME; 332 break; 333 } 334 mutex_lock(&ctx->uring_lock); 335 } while (1); 336 337 finish_wait(&ctx->cq_wait, &wait); 338 mutex_lock(&ctx->uring_lock); 339 340 if (ret == -ENOENT || ret > 0) 341 ret = 0; 342 out: 343 if (file) 344 fput(file); 345 return ret; 346 } 347 348 bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, 349 struct hlist_head *list, bool cancel_all, 350 bool (*cancel)(struct io_kiocb *)) 351 { 352 struct hlist_node *tmp; 353 struct io_kiocb *req; 354 bool found = false; 355 356 lockdep_assert_held(&ctx->uring_lock); 357 358 hlist_for_each_entry_safe(req, tmp, list, hash_node) { 359 if (!io_match_task_safe(req, tctx, cancel_all)) 360 continue; 361 hlist_del_init(&req->hash_node); 362 if (cancel(req)) 363 found = true; 364 } 365 366 return found; 367 } 368 369 int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 370 unsigned int issue_flags, struct hlist_head *list, 371 bool (*cancel)(struct io_kiocb *)) 372 { 373 struct hlist_node *tmp; 374 struct io_kiocb *req; 375 int nr = 0; 376 377 io_ring_submit_lock(ctx, issue_flags); 378 hlist_for_each_entry_safe(req, tmp, list, hash_node) { 379 if (!io_cancel_req_match(req, cd)) 380 continue; 381 if (cancel(req)) 382 nr++; 383 if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) 384 break; 385 } 386 io_ring_submit_unlock(ctx, issue_flags); 387 return nr ?: -ENOENT; 388 } 389 390 static bool io_match_linked(struct io_kiocb *head) 391 { 392 struct io_kiocb *req; 393 394 io_for_each_link(req, head) { 395 if (req->flags & REQ_F_INFLIGHT) 396 return true; 397 } 398 return false; 399 } 400 401 /* 402 * As io_match_task() but protected against racing with linked timeouts. 403 * User must not hold timeout_lock. 404 */ 405 bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx, 406 bool cancel_all) 407 { 408 bool matched; 409 410 if (tctx && head->tctx != tctx) 411 return false; 412 if (cancel_all) 413 return true; 414 415 if (head->flags & REQ_F_LINK_TIMEOUT) { 416 struct io_ring_ctx *ctx = head->ctx; 417 418 /* protect against races with linked timeouts */ 419 raw_spin_lock_irq(&ctx->timeout_lock); 420 matched = io_match_linked(head); 421 raw_spin_unlock_irq(&ctx->timeout_lock); 422 } else { 423 matched = io_match_linked(head); 424 } 425 return matched; 426 } 427 428 void __io_uring_cancel(bool cancel_all) 429 { 430 io_uring_unreg_ringfd(); 431 io_uring_cancel_generic(cancel_all, NULL); 432 } 433 434 struct io_task_cancel { 435 struct io_uring_task *tctx; 436 bool all; 437 }; 438 439 static bool io_cancel_task_cb(struct io_wq_work *work, void *data) 440 { 441 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 442 struct io_task_cancel *cancel = data; 443 444 return io_match_task_safe(req, cancel->tctx, cancel->all); 445 } 446 447 static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx, 448 struct io_uring_task *tctx, 449 bool cancel_all) 450 { 451 struct io_defer_entry *de; 452 LIST_HEAD(list); 453 454 list_for_each_entry_reverse(de, &ctx->defer_list, list) { 455 if (io_match_task_safe(de->req, tctx, cancel_all)) { 456 list_cut_position(&list, &ctx->defer_list, &de->list); 457 break; 458 } 459 } 460 if (list_empty(&list)) 461 return false; 462 463 while (!list_empty(&list)) { 464 de = list_first_entry(&list, struct io_defer_entry, list); 465 list_del_init(&de->list); 466 ctx->nr_drained -= io_linked_nr(de->req); 467 io_req_task_queue_fail(de->req, -ECANCELED); 468 kfree(de); 469 } 470 return true; 471 } 472 473 __cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) 474 { 475 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 476 477 return req->ctx == data; 478 } 479 480 static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) 481 { 482 struct io_tctx_node *node; 483 enum io_wq_cancel cret; 484 bool ret = false; 485 486 mutex_lock(&ctx->uring_lock); 487 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 488 struct io_uring_task *tctx = node->task->io_uring; 489 490 /* 491 * io_wq will stay alive while we hold uring_lock, because it's 492 * killed after ctx nodes, which requires to take the lock. 493 */ 494 if (!tctx || !tctx->io_wq) 495 continue; 496 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); 497 ret |= (cret != IO_WQ_CANCEL_NOTFOUND); 498 } 499 mutex_unlock(&ctx->uring_lock); 500 501 return ret; 502 } 503 504 __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, 505 struct io_uring_task *tctx, 506 bool cancel_all, bool is_sqpoll_thread) 507 { 508 struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, }; 509 enum io_wq_cancel cret; 510 bool ret = false; 511 512 /* set it so io_req_local_work_add() would wake us up */ 513 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { 514 atomic_set(&ctx->cq_wait_nr, 1); 515 smp_mb(); 516 } 517 518 /* failed during ring init, it couldn't have issued any requests */ 519 if (!ctx->rings) 520 return false; 521 522 if (!tctx) { 523 ret |= io_uring_try_cancel_iowq(ctx); 524 } else if (tctx->io_wq) { 525 /* 526 * Cancels requests of all rings, not only @ctx, but 527 * it's fine as the task is in exit/exec. 528 */ 529 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, 530 &cancel, true); 531 ret |= (cret != IO_WQ_CANCEL_NOTFOUND); 532 } 533 534 /* SQPOLL thread does its own polling */ 535 if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || 536 is_sqpoll_thread) { 537 while (!wq_list_empty(&ctx->iopoll_list)) { 538 io_iopoll_try_reap_events(ctx); 539 ret = true; 540 cond_resched(); 541 } 542 } 543 544 if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && 545 io_allowed_defer_tw_run(ctx)) 546 ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0; 547 mutex_lock(&ctx->uring_lock); 548 ret |= io_cancel_defer_files(ctx, tctx, cancel_all); 549 ret |= io_poll_remove_all(ctx, tctx, cancel_all); 550 ret |= io_waitid_remove_all(ctx, tctx, cancel_all); 551 ret |= io_futex_remove_all(ctx, tctx, cancel_all); 552 ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all); 553 mutex_unlock(&ctx->uring_lock); 554 ret |= io_kill_timeouts(ctx, tctx, cancel_all); 555 if (tctx) 556 ret |= io_run_task_work() > 0; 557 else 558 ret |= flush_delayed_work(&ctx->fallback_work); 559 return ret; 560 } 561 562 static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) 563 { 564 if (tracked) 565 return atomic_read(&tctx->inflight_tracked); 566 return percpu_counter_sum(&tctx->inflight); 567 } 568 569 /* 570 * Find any io_uring ctx that this task has registered or done IO on, and cancel 571 * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. 572 */ 573 __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) 574 { 575 struct io_uring_task *tctx = current->io_uring; 576 struct io_ring_ctx *ctx; 577 struct io_tctx_node *node; 578 unsigned long index; 579 s64 inflight; 580 DEFINE_WAIT(wait); 581 582 WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current); 583 584 if (!current->io_uring) 585 return; 586 if (tctx->io_wq) 587 io_wq_exit_start(tctx->io_wq); 588 589 atomic_inc(&tctx->in_cancel); 590 do { 591 bool loop = false; 592 593 io_uring_drop_tctx_refs(current); 594 if (!tctx_inflight(tctx, !cancel_all)) 595 break; 596 597 /* read completions before cancelations */ 598 inflight = tctx_inflight(tctx, false); 599 if (!inflight) 600 break; 601 602 if (!sqd) { 603 xa_for_each(&tctx->xa, index, node) { 604 /* sqpoll task will cancel all its requests */ 605 if (node->ctx->sq_data) 606 continue; 607 loop |= io_uring_try_cancel_requests(node->ctx, 608 current->io_uring, 609 cancel_all, 610 false); 611 } 612 } else { 613 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) 614 loop |= io_uring_try_cancel_requests(ctx, 615 current->io_uring, 616 cancel_all, 617 true); 618 } 619 620 if (loop) { 621 cond_resched(); 622 continue; 623 } 624 625 prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); 626 io_run_task_work(); 627 io_uring_drop_tctx_refs(current); 628 xa_for_each(&tctx->xa, index, node) { 629 if (io_local_work_pending(node->ctx)) { 630 WARN_ON_ONCE(node->ctx->submitter_task && 631 node->ctx->submitter_task != current); 632 goto end_wait; 633 } 634 } 635 /* 636 * If we've seen completions, retry without waiting. This 637 * avoids a race where a completion comes in before we did 638 * prepare_to_wait(). 639 */ 640 if (inflight == tctx_inflight(tctx, !cancel_all)) 641 schedule(); 642 end_wait: 643 finish_wait(&tctx->wait, &wait); 644 } while (1); 645 646 io_uring_clean_tctx(tctx); 647 if (cancel_all) { 648 /* 649 * We shouldn't run task_works after cancel, so just leave 650 * ->in_cancel set for normal exit. 651 */ 652 atomic_dec(&tctx->in_cancel); 653 /* for exec all current's requests should be gone, kill tctx */ 654 __io_uring_free(current); 655 } 656 } 657