1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/fs.h> 5 #include <linux/mm.h> 6 #include <linux/slab.h> 7 #include <linux/nospec.h> 8 #include <linux/io_uring.h> 9 10 #include <uapi/linux/io_uring.h> 11 12 #include "filetable.h" 13 #include "io_uring.h" 14 #include "tctx.h" 15 #include "sqpoll.h" 16 #include "uring_cmd.h" 17 #include "poll.h" 18 #include "timeout.h" 19 #include "waitid.h" 20 #include "futex.h" 21 #include "cancel.h" 22 #include "wait.h" 23 24 struct io_cancel { 25 struct file *file; 26 u64 addr; 27 u32 flags; 28 s32 fd; 29 u8 opcode; 30 }; 31 32 #define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ 33 IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \ 34 IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP) 35 36 /* 37 * Returns true if the request matches the criteria outlined by 'cd'. 38 */ 39 bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd) 40 { 41 bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA; 42 43 if (req->ctx != cd->ctx) 44 return false; 45 46 if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP))) 47 match_user_data = true; 48 49 if (cd->flags & IORING_ASYNC_CANCEL_ANY) 50 goto check_seq; 51 if (cd->flags & IORING_ASYNC_CANCEL_FD) { 52 if (req->file != cd->file) 53 return false; 54 } 55 if (cd->flags & IORING_ASYNC_CANCEL_OP) { 56 if (req->opcode != cd->opcode) 57 return false; 58 } 59 if (match_user_data && req->cqe.user_data != cd->data) 60 return false; 61 if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 62 check_seq: 63 if (io_cancel_match_sequence(req, cd->seq)) 64 return false; 65 } 66 67 return true; 68 } 69 70 static bool io_cancel_cb(struct io_wq_work *work, void *data) 71 { 72 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 73 struct io_cancel_data *cd = data; 74 75 return io_cancel_req_match(req, cd); 76 } 77 78 static int io_async_cancel_one(struct io_uring_task *tctx, 79 struct io_cancel_data *cd) 80 { 81 enum io_wq_cancel cancel_ret; 82 int ret = 0; 83 bool all; 84 85 if (!tctx || !tctx->io_wq) 86 return -ENOENT; 87 88 all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); 89 cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all); 90 switch (cancel_ret) { 91 case IO_WQ_CANCEL_OK: 92 ret = 0; 93 break; 94 case IO_WQ_CANCEL_RUNNING: 95 ret = -EALREADY; 96 break; 97 case IO_WQ_CANCEL_NOTFOUND: 98 ret = -ENOENT; 99 break; 100 } 101 102 return ret; 103 } 104 105 int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, 106 unsigned issue_flags) 107 { 108 struct io_ring_ctx *ctx = cd->ctx; 109 int ret; 110 111 WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring); 112 113 ret = io_async_cancel_one(tctx, cd); 114 /* 115 * Fall-through even for -EALREADY, as we may have poll armed 116 * that need unarming. 117 */ 118 if (!ret) 119 return 0; 120 121 ret = io_poll_cancel(ctx, cd, issue_flags); 122 if (ret != -ENOENT) 123 return ret; 124 125 ret = io_waitid_cancel(ctx, cd, issue_flags); 126 if (ret != -ENOENT) 127 return ret; 128 129 ret = io_futex_cancel(ctx, cd, issue_flags); 130 if (ret != -ENOENT) 131 return ret; 132 133 spin_lock(&ctx->completion_lock); 134 if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) 135 ret = io_timeout_cancel(ctx, cd); 136 spin_unlock(&ctx->completion_lock); 137 return ret; 138 } 139 140 int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 141 { 142 struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel); 143 144 if (unlikely(req->flags & REQ_F_BUFFER_SELECT)) 145 return -EINVAL; 146 if (sqe->off || sqe->splice_fd_in) 147 return -EINVAL; 148 149 cancel->addr = READ_ONCE(sqe->addr); 150 cancel->flags = READ_ONCE(sqe->cancel_flags); 151 if (cancel->flags & ~CANCEL_FLAGS) 152 return -EINVAL; 153 if (cancel->flags & IORING_ASYNC_CANCEL_FD) { 154 if (cancel->flags & IORING_ASYNC_CANCEL_ANY) 155 return -EINVAL; 156 cancel->fd = READ_ONCE(sqe->fd); 157 } 158 if (cancel->flags & IORING_ASYNC_CANCEL_OP) { 159 u32 op; 160 161 if (cancel->flags & IORING_ASYNC_CANCEL_ANY) 162 return -EINVAL; 163 164 op = READ_ONCE(sqe->len); 165 if (op >= IORING_OP_LAST) 166 return -EINVAL; 167 168 cancel->opcode = op; 169 } 170 171 return 0; 172 } 173 174 static int __io_async_cancel(struct io_cancel_data *cd, 175 struct io_uring_task *tctx, 176 unsigned int issue_flags) 177 { 178 bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); 179 struct io_ring_ctx *ctx = cd->ctx; 180 struct io_tctx_node *node; 181 int ret, nr = 0; 182 183 do { 184 ret = io_try_cancel(tctx, cd, issue_flags); 185 if (ret == -ENOENT) 186 break; 187 if (!all) 188 return ret; 189 nr++; 190 } while (1); 191 192 /* slow path, try all io-wq's */ 193 __set_current_state(TASK_RUNNING); 194 io_ring_submit_lock(ctx, issue_flags); 195 mutex_lock(&ctx->tctx_lock); 196 ret = -ENOENT; 197 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 198 ret = io_async_cancel_one(node->task->io_uring, cd); 199 if (ret != -ENOENT) { 200 if (!all) 201 break; 202 nr++; 203 } 204 } 205 mutex_unlock(&ctx->tctx_lock); 206 io_ring_submit_unlock(ctx, issue_flags); 207 return all ? nr : ret; 208 } 209 210 int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) 211 { 212 struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel); 213 struct io_cancel_data cd = { 214 .ctx = req->ctx, 215 .data = cancel->addr, 216 .flags = cancel->flags, 217 .opcode = cancel->opcode, 218 .seq = atomic_inc_return(&req->ctx->cancel_seq), 219 }; 220 struct io_uring_task *tctx = req->tctx; 221 int ret; 222 223 if (cd.flags & IORING_ASYNC_CANCEL_FD) { 224 if (req->flags & REQ_F_FIXED_FILE || 225 cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) { 226 req->flags |= REQ_F_FIXED_FILE; 227 req->file = io_file_get_fixed(req, cancel->fd, 228 issue_flags); 229 } else { 230 req->file = io_file_get_normal(req, cancel->fd); 231 } 232 if (!req->file) { 233 ret = -EBADF; 234 goto done; 235 } 236 cd.file = req->file; 237 } 238 239 ret = __io_async_cancel(&cd, tctx, issue_flags); 240 done: 241 if (ret < 0) 242 req_set_fail(req); 243 io_req_set_res(req, ret, 0); 244 return IOU_COMPLETE; 245 } 246 247 static int __io_sync_cancel(struct io_uring_task *tctx, 248 struct io_cancel_data *cd, int fd) 249 { 250 struct io_ring_ctx *ctx = cd->ctx; 251 252 /* fixed must be grabbed every time since we drop the uring_lock */ 253 if ((cd->flags & IORING_ASYNC_CANCEL_FD) && 254 (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) { 255 struct io_rsrc_node *node; 256 257 node = io_rsrc_node_lookup(&ctx->file_table.data, fd); 258 if (unlikely(!node)) 259 return -EBADF; 260 cd->file = io_slot_file(node); 261 if (!cd->file) 262 return -EBADF; 263 } 264 265 return __io_async_cancel(cd, tctx, 0); 266 } 267 268 int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) 269 __must_hold(&ctx->uring_lock) 270 { 271 struct io_cancel_data cd = { 272 .ctx = ctx, 273 .seq = atomic_inc_return(&ctx->cancel_seq), 274 }; 275 ktime_t timeout = KTIME_MAX; 276 struct io_uring_sync_cancel_reg sc; 277 struct file *file = NULL; 278 DEFINE_WAIT(wait); 279 int ret, i; 280 281 if (copy_from_user(&sc, arg, sizeof(sc))) 282 return -EFAULT; 283 if (sc.flags & ~CANCEL_FLAGS) 284 return -EINVAL; 285 for (i = 0; i < ARRAY_SIZE(sc.pad); i++) 286 if (sc.pad[i]) 287 return -EINVAL; 288 for (i = 0; i < ARRAY_SIZE(sc.pad2); i++) 289 if (sc.pad2[i]) 290 return -EINVAL; 291 292 cd.data = sc.addr; 293 cd.flags = sc.flags; 294 cd.opcode = sc.opcode; 295 296 /* we can grab a normal file descriptor upfront */ 297 if ((cd.flags & IORING_ASYNC_CANCEL_FD) && 298 !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) { 299 file = fget(sc.fd); 300 if (!file) 301 return -EBADF; 302 cd.file = file; 303 } 304 305 ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); 306 307 /* found something, done! */ 308 if (ret != -EALREADY) 309 goto out; 310 311 if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) { 312 struct timespec64 ts = { 313 .tv_sec = sc.timeout.tv_sec, 314 .tv_nsec = sc.timeout.tv_nsec 315 }; 316 317 timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); 318 } 319 320 /* 321 * Keep looking until we get -ENOENT. we'll get woken everytime 322 * every time a request completes and will retry the cancelation. 323 */ 324 do { 325 cd.seq = atomic_inc_return(&ctx->cancel_seq); 326 327 prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE); 328 329 ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); 330 331 mutex_unlock(&ctx->uring_lock); 332 if (ret != -EALREADY) 333 break; 334 335 ret = io_run_task_work_sig(ctx); 336 if (ret < 0) 337 break; 338 ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS); 339 if (!ret) { 340 ret = -ETIME; 341 break; 342 } 343 mutex_lock(&ctx->uring_lock); 344 } while (1); 345 346 finish_wait(&ctx->cq_wait, &wait); 347 mutex_lock(&ctx->uring_lock); 348 349 if (ret == -ENOENT || ret > 0) 350 ret = 0; 351 out: 352 if (file) 353 fput(file); 354 return ret; 355 } 356 357 bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, 358 struct hlist_head *list, bool cancel_all, 359 bool (*cancel)(struct io_kiocb *)) 360 { 361 struct hlist_node *tmp; 362 struct io_kiocb *req; 363 bool found = false; 364 365 lockdep_assert_held(&ctx->uring_lock); 366 367 hlist_for_each_entry_safe(req, tmp, list, hash_node) { 368 if (!io_match_task_safe(req, tctx, cancel_all)) 369 continue; 370 hlist_del_init(&req->hash_node); 371 if (cancel(req)) 372 found = true; 373 } 374 375 return found; 376 } 377 378 int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 379 unsigned int issue_flags, struct hlist_head *list, 380 bool (*cancel)(struct io_kiocb *)) 381 { 382 struct hlist_node *tmp; 383 struct io_kiocb *req; 384 int nr = 0; 385 386 io_ring_submit_lock(ctx, issue_flags); 387 hlist_for_each_entry_safe(req, tmp, list, hash_node) { 388 if (!io_cancel_req_match(req, cd)) 389 continue; 390 if (cancel(req)) 391 nr++; 392 if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) 393 break; 394 } 395 io_ring_submit_unlock(ctx, issue_flags); 396 return nr ?: -ENOENT; 397 } 398 399 static bool io_match_linked(struct io_kiocb *head) 400 { 401 struct io_kiocb *req; 402 403 io_for_each_link(req, head) { 404 if (req->flags & REQ_F_INFLIGHT) 405 return true; 406 } 407 return false; 408 } 409 410 /* 411 * As io_match_task() but protected against racing with linked timeouts. 412 * User must not hold timeout_lock. 413 */ 414 bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx, 415 bool cancel_all) 416 { 417 bool matched; 418 419 if (tctx && head->tctx != tctx) 420 return false; 421 if (cancel_all) 422 return true; 423 424 if (head->flags & REQ_F_LINK_TIMEOUT) { 425 struct io_ring_ctx *ctx = head->ctx; 426 427 /* protect against races with linked timeouts */ 428 raw_spin_lock_irq(&ctx->timeout_lock); 429 matched = io_match_linked(head); 430 raw_spin_unlock_irq(&ctx->timeout_lock); 431 } else { 432 matched = io_match_linked(head); 433 } 434 return matched; 435 } 436 437 void __io_uring_cancel(bool cancel_all) 438 { 439 io_uring_unreg_ringfd(); 440 io_uring_cancel_generic(cancel_all, NULL); 441 } 442 443 struct io_task_cancel { 444 struct io_uring_task *tctx; 445 bool all; 446 }; 447 448 static bool io_cancel_task_cb(struct io_wq_work *work, void *data) 449 { 450 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 451 struct io_task_cancel *cancel = data; 452 453 return io_match_task_safe(req, cancel->tctx, cancel->all); 454 } 455 456 static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx, 457 struct io_uring_task *tctx, 458 bool cancel_all) 459 { 460 struct io_defer_entry *de; 461 LIST_HEAD(list); 462 463 list_for_each_entry_reverse(de, &ctx->defer_list, list) { 464 if (io_match_task_safe(de->req, tctx, cancel_all)) { 465 list_cut_position(&list, &ctx->defer_list, &de->list); 466 break; 467 } 468 } 469 if (list_empty(&list)) 470 return false; 471 472 while (!list_empty(&list)) { 473 de = list_first_entry(&list, struct io_defer_entry, list); 474 list_del_init(&de->list); 475 ctx->nr_drained -= io_linked_nr(de->req); 476 io_req_task_queue_fail(de->req, -ECANCELED); 477 kfree(de); 478 } 479 return true; 480 } 481 482 __cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) 483 { 484 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 485 486 return req->ctx == data; 487 } 488 489 static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) 490 { 491 struct io_tctx_node *node; 492 enum io_wq_cancel cret; 493 bool ret = false; 494 495 mutex_lock(&ctx->uring_lock); 496 mutex_lock(&ctx->tctx_lock); 497 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 498 struct io_uring_task *tctx = node->task->io_uring; 499 500 /* 501 * io_wq will stay alive while we hold uring_lock, because it's 502 * killed after ctx nodes, which requires to take the lock. 503 */ 504 if (!tctx || !tctx->io_wq) 505 continue; 506 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); 507 ret |= (cret != IO_WQ_CANCEL_NOTFOUND); 508 } 509 mutex_unlock(&ctx->tctx_lock); 510 mutex_unlock(&ctx->uring_lock); 511 512 return ret; 513 } 514 515 __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, 516 struct io_uring_task *tctx, 517 bool cancel_all, bool is_sqpoll_thread) 518 { 519 struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, }; 520 enum io_wq_cancel cret; 521 bool ret = false; 522 523 /* set it so io_req_local_work_add() would wake us up */ 524 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { 525 atomic_set(&ctx->cq_wait_nr, 1); 526 smp_mb(); 527 } 528 529 /* failed during ring init, it couldn't have issued any requests */ 530 if (!ctx->rings) 531 return false; 532 533 if (!tctx) { 534 ret |= io_uring_try_cancel_iowq(ctx); 535 } else if (tctx->io_wq) { 536 /* 537 * Cancels requests of all rings, not only @ctx, but 538 * it's fine as the task is in exit/exec. 539 */ 540 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, 541 &cancel, true); 542 ret |= (cret != IO_WQ_CANCEL_NOTFOUND); 543 } 544 545 /* SQPOLL thread does its own polling */ 546 if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || 547 is_sqpoll_thread) { 548 while (!list_empty(&ctx->iopoll_list)) { 549 io_iopoll_try_reap_events(ctx); 550 ret = true; 551 cond_resched(); 552 } 553 } 554 555 if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && 556 io_allowed_defer_tw_run(ctx)) 557 ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0; 558 mutex_lock(&ctx->uring_lock); 559 ret |= io_cancel_defer_files(ctx, tctx, cancel_all); 560 ret |= io_poll_remove_all(ctx, tctx, cancel_all); 561 ret |= io_waitid_remove_all(ctx, tctx, cancel_all); 562 ret |= io_futex_remove_all(ctx, tctx, cancel_all); 563 ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all); 564 mutex_unlock(&ctx->uring_lock); 565 ret |= io_kill_timeouts(ctx, tctx, cancel_all); 566 if (tctx) 567 ret |= io_run_task_work() > 0; 568 else 569 ret |= flush_delayed_work(&ctx->fallback_work); 570 return ret; 571 } 572 573 static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) 574 { 575 if (tracked) 576 return atomic_read(&tctx->inflight_tracked); 577 return percpu_counter_sum(&tctx->inflight); 578 } 579 580 /* 581 * Find any io_uring ctx that this task has registered or done IO on, and cancel 582 * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. 583 */ 584 __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) 585 { 586 struct io_uring_task *tctx = current->io_uring; 587 struct io_ring_ctx *ctx; 588 struct io_tctx_node *node; 589 unsigned long index; 590 s64 inflight; 591 DEFINE_WAIT(wait); 592 593 WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current); 594 595 if (!current->io_uring) 596 return; 597 if (tctx->io_wq) 598 io_wq_exit_start(tctx->io_wq); 599 600 atomic_inc(&tctx->in_cancel); 601 do { 602 bool loop = false; 603 604 io_uring_drop_tctx_refs(current); 605 if (!tctx_inflight(tctx, !cancel_all)) 606 break; 607 608 /* read completions before cancelations */ 609 inflight = tctx_inflight(tctx, false); 610 if (!inflight) 611 break; 612 613 if (!sqd) { 614 xa_for_each(&tctx->xa, index, node) { 615 /* sqpoll task will cancel all its requests */ 616 if (node->ctx->sq_data) 617 continue; 618 loop |= io_uring_try_cancel_requests(node->ctx, 619 current->io_uring, 620 cancel_all, 621 false); 622 } 623 } else { 624 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) 625 loop |= io_uring_try_cancel_requests(ctx, 626 current->io_uring, 627 cancel_all, 628 true); 629 } 630 631 if (loop) { 632 cond_resched(); 633 continue; 634 } 635 636 prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); 637 io_run_task_work(); 638 io_uring_drop_tctx_refs(current); 639 xa_for_each(&tctx->xa, index, node) { 640 if (io_local_work_pending(node->ctx)) { 641 WARN_ON_ONCE(node->ctx->submitter_task && 642 node->ctx->submitter_task != current); 643 goto end_wait; 644 } 645 } 646 /* 647 * If we've seen completions, retry without waiting. This 648 * avoids a race where a completion comes in before we did 649 * prepare_to_wait(). 650 */ 651 if (inflight == tctx_inflight(tctx, !cancel_all)) 652 schedule(); 653 end_wait: 654 finish_wait(&tctx->wait, &wait); 655 } while (1); 656 657 io_uring_clean_tctx(tctx); 658 if (cancel_all) { 659 /* 660 * We shouldn't run task_works after cancel, so just leave 661 * ->in_cancel set for normal exit. 662 */ 663 atomic_dec(&tctx->in_cancel); 664 /* for exec all current's requests should be gone, kill tctx */ 665 __io_uring_free(current); 666 } 667 } 668