1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/fs.h> 5 #include <linux/mm.h> 6 #include <linux/slab.h> 7 #include <linux/nospec.h> 8 #include <linux/io_uring.h> 9 10 #include <uapi/linux/io_uring.h> 11 12 #include "filetable.h" 13 #include "io_uring.h" 14 #include "tctx.h" 15 #include "sqpoll.h" 16 #include "uring_cmd.h" 17 #include "poll.h" 18 #include "timeout.h" 19 #include "waitid.h" 20 #include "futex.h" 21 #include "cancel.h" 22 #include "wait.h" 23 24 struct io_cancel { 25 struct file *file; 26 u64 addr; 27 u32 flags; 28 s32 fd; 29 u8 opcode; 30 }; 31 32 #define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ 33 IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \ 34 IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP) 35 36 /* 37 * Returns true if the request matches the criteria outlined by 'cd'. 38 */ 39 bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd) 40 { 41 bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA; 42 43 if (req->ctx != cd->ctx) 44 return false; 45 46 if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP))) 47 match_user_data = true; 48 49 if (cd->flags & IORING_ASYNC_CANCEL_ANY) 50 goto check_seq; 51 if (cd->flags & IORING_ASYNC_CANCEL_FD) { 52 if (req->file != cd->file) 53 return false; 54 } 55 if (cd->flags & IORING_ASYNC_CANCEL_OP) { 56 if (req->opcode != cd->opcode) 57 return false; 58 } 59 if (match_user_data && req->cqe.user_data != cd->data) 60 return false; 61 if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 62 check_seq: 63 if (io_cancel_match_sequence(req, cd->seq)) 64 return false; 65 } 66 67 return true; 68 } 69 70 static bool io_cancel_cb(struct io_wq_work *work, void *data) 71 { 72 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 73 struct io_cancel_data *cd = data; 74 75 return io_cancel_req_match(req, cd); 76 } 77 78 static int io_async_cancel_one(struct io_uring_task *tctx, 79 struct io_cancel_data *cd) 80 { 81 enum io_wq_cancel cancel_ret; 82 int ret = 0; 83 bool all; 84 85 if (!tctx || !tctx->io_wq) 86 return -ENOENT; 87 88 all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); 89 cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all); 90 switch (cancel_ret) { 91 case IO_WQ_CANCEL_OK: 92 ret = 0; 93 break; 94 case IO_WQ_CANCEL_RUNNING: 95 ret = -EALREADY; 96 break; 97 case IO_WQ_CANCEL_NOTFOUND: 98 ret = -ENOENT; 99 break; 100 } 101 102 return ret; 103 } 104 105 int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, 106 unsigned issue_flags) 107 { 108 struct io_ring_ctx *ctx = cd->ctx; 109 int ret; 110 111 WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring); 112 113 ret = io_async_cancel_one(tctx, cd); 114 /* 115 * Fall-through even for -EALREADY, as we may have poll armed 116 * that need unarming. 117 */ 118 if (!ret) 119 return 0; 120 121 ret = io_poll_cancel(ctx, cd, issue_flags); 122 if (ret != -ENOENT) 123 return ret; 124 125 ret = io_waitid_cancel(ctx, cd, issue_flags); 126 if (ret != -ENOENT) 127 return ret; 128 129 ret = io_futex_cancel(ctx, cd, issue_flags); 130 if (ret != -ENOENT) 131 return ret; 132 133 spin_lock(&ctx->completion_lock); 134 if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) 135 ret = io_timeout_cancel(ctx, cd); 136 spin_unlock(&ctx->completion_lock); 137 return ret; 138 } 139 140 int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 141 { 142 struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel); 143 144 if (unlikely(req->flags & REQ_F_BUFFER_SELECT)) 145 return -EINVAL; 146 if (sqe->off || sqe->splice_fd_in) 147 return -EINVAL; 148 149 cancel->addr = READ_ONCE(sqe->addr); 150 cancel->flags = READ_ONCE(sqe->cancel_flags); 151 if (cancel->flags & ~CANCEL_FLAGS) 152 return -EINVAL; 153 if (cancel->flags & IORING_ASYNC_CANCEL_FD) { 154 if (cancel->flags & IORING_ASYNC_CANCEL_ANY) 155 return -EINVAL; 156 cancel->fd = READ_ONCE(sqe->fd); 157 } 158 if (cancel->flags & IORING_ASYNC_CANCEL_OP) { 159 if (cancel->flags & IORING_ASYNC_CANCEL_ANY) 160 return -EINVAL; 161 cancel->opcode = READ_ONCE(sqe->len); 162 } 163 164 return 0; 165 } 166 167 static int __io_async_cancel(struct io_cancel_data *cd, 168 struct io_uring_task *tctx, 169 unsigned int issue_flags) 170 { 171 bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); 172 struct io_ring_ctx *ctx = cd->ctx; 173 struct io_tctx_node *node; 174 int ret, nr = 0; 175 176 do { 177 ret = io_try_cancel(tctx, cd, issue_flags); 178 if (ret == -ENOENT) 179 break; 180 if (!all) 181 return ret; 182 nr++; 183 } while (1); 184 185 /* slow path, try all io-wq's */ 186 __set_current_state(TASK_RUNNING); 187 io_ring_submit_lock(ctx, issue_flags); 188 mutex_lock(&ctx->tctx_lock); 189 ret = -ENOENT; 190 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 191 ret = io_async_cancel_one(node->task->io_uring, cd); 192 if (ret != -ENOENT) { 193 if (!all) 194 break; 195 nr++; 196 } 197 } 198 mutex_unlock(&ctx->tctx_lock); 199 io_ring_submit_unlock(ctx, issue_flags); 200 return all ? nr : ret; 201 } 202 203 int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) 204 { 205 struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel); 206 struct io_cancel_data cd = { 207 .ctx = req->ctx, 208 .data = cancel->addr, 209 .flags = cancel->flags, 210 .opcode = cancel->opcode, 211 .seq = atomic_inc_return(&req->ctx->cancel_seq), 212 }; 213 struct io_uring_task *tctx = req->tctx; 214 int ret; 215 216 if (cd.flags & IORING_ASYNC_CANCEL_FD) { 217 if (req->flags & REQ_F_FIXED_FILE || 218 cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) { 219 req->flags |= REQ_F_FIXED_FILE; 220 req->file = io_file_get_fixed(req, cancel->fd, 221 issue_flags); 222 } else { 223 req->file = io_file_get_normal(req, cancel->fd); 224 } 225 if (!req->file) { 226 ret = -EBADF; 227 goto done; 228 } 229 cd.file = req->file; 230 } 231 232 ret = __io_async_cancel(&cd, tctx, issue_flags); 233 done: 234 if (ret < 0) 235 req_set_fail(req); 236 io_req_set_res(req, ret, 0); 237 return IOU_COMPLETE; 238 } 239 240 static int __io_sync_cancel(struct io_uring_task *tctx, 241 struct io_cancel_data *cd, int fd) 242 { 243 struct io_ring_ctx *ctx = cd->ctx; 244 245 /* fixed must be grabbed every time since we drop the uring_lock */ 246 if ((cd->flags & IORING_ASYNC_CANCEL_FD) && 247 (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) { 248 struct io_rsrc_node *node; 249 250 node = io_rsrc_node_lookup(&ctx->file_table.data, fd); 251 if (unlikely(!node)) 252 return -EBADF; 253 cd->file = io_slot_file(node); 254 if (!cd->file) 255 return -EBADF; 256 } 257 258 return __io_async_cancel(cd, tctx, 0); 259 } 260 261 int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) 262 __must_hold(&ctx->uring_lock) 263 { 264 struct io_cancel_data cd = { 265 .ctx = ctx, 266 .seq = atomic_inc_return(&ctx->cancel_seq), 267 }; 268 ktime_t timeout = KTIME_MAX; 269 struct io_uring_sync_cancel_reg sc; 270 struct file *file = NULL; 271 DEFINE_WAIT(wait); 272 int ret, i; 273 274 if (copy_from_user(&sc, arg, sizeof(sc))) 275 return -EFAULT; 276 if (sc.flags & ~CANCEL_FLAGS) 277 return -EINVAL; 278 for (i = 0; i < ARRAY_SIZE(sc.pad); i++) 279 if (sc.pad[i]) 280 return -EINVAL; 281 for (i = 0; i < ARRAY_SIZE(sc.pad2); i++) 282 if (sc.pad2[i]) 283 return -EINVAL; 284 285 cd.data = sc.addr; 286 cd.flags = sc.flags; 287 cd.opcode = sc.opcode; 288 289 /* we can grab a normal file descriptor upfront */ 290 if ((cd.flags & IORING_ASYNC_CANCEL_FD) && 291 !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) { 292 file = fget(sc.fd); 293 if (!file) 294 return -EBADF; 295 cd.file = file; 296 } 297 298 ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); 299 300 /* found something, done! */ 301 if (ret != -EALREADY) 302 goto out; 303 304 if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) { 305 struct timespec64 ts = { 306 .tv_sec = sc.timeout.tv_sec, 307 .tv_nsec = sc.timeout.tv_nsec 308 }; 309 310 timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); 311 } 312 313 /* 314 * Keep looking until we get -ENOENT. we'll get woken everytime 315 * every time a request completes and will retry the cancelation. 316 */ 317 do { 318 cd.seq = atomic_inc_return(&ctx->cancel_seq); 319 320 prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE); 321 322 ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); 323 324 mutex_unlock(&ctx->uring_lock); 325 if (ret != -EALREADY) 326 break; 327 328 ret = io_run_task_work_sig(ctx); 329 if (ret < 0) 330 break; 331 ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS); 332 if (!ret) { 333 ret = -ETIME; 334 break; 335 } 336 mutex_lock(&ctx->uring_lock); 337 } while (1); 338 339 finish_wait(&ctx->cq_wait, &wait); 340 mutex_lock(&ctx->uring_lock); 341 342 if (ret == -ENOENT || ret > 0) 343 ret = 0; 344 out: 345 if (file) 346 fput(file); 347 return ret; 348 } 349 350 bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, 351 struct hlist_head *list, bool cancel_all, 352 bool (*cancel)(struct io_kiocb *)) 353 { 354 struct hlist_node *tmp; 355 struct io_kiocb *req; 356 bool found = false; 357 358 lockdep_assert_held(&ctx->uring_lock); 359 360 hlist_for_each_entry_safe(req, tmp, list, hash_node) { 361 if (!io_match_task_safe(req, tctx, cancel_all)) 362 continue; 363 hlist_del_init(&req->hash_node); 364 if (cancel(req)) 365 found = true; 366 } 367 368 return found; 369 } 370 371 int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 372 unsigned int issue_flags, struct hlist_head *list, 373 bool (*cancel)(struct io_kiocb *)) 374 { 375 struct hlist_node *tmp; 376 struct io_kiocb *req; 377 int nr = 0; 378 379 io_ring_submit_lock(ctx, issue_flags); 380 hlist_for_each_entry_safe(req, tmp, list, hash_node) { 381 if (!io_cancel_req_match(req, cd)) 382 continue; 383 if (cancel(req)) 384 nr++; 385 if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) 386 break; 387 } 388 io_ring_submit_unlock(ctx, issue_flags); 389 return nr ?: -ENOENT; 390 } 391 392 static bool io_match_linked(struct io_kiocb *head) 393 { 394 struct io_kiocb *req; 395 396 io_for_each_link(req, head) { 397 if (req->flags & REQ_F_INFLIGHT) 398 return true; 399 } 400 return false; 401 } 402 403 /* 404 * As io_match_task() but protected against racing with linked timeouts. 405 * User must not hold timeout_lock. 406 */ 407 bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx, 408 bool cancel_all) 409 { 410 bool matched; 411 412 if (tctx && head->tctx != tctx) 413 return false; 414 if (cancel_all) 415 return true; 416 417 if (head->flags & REQ_F_LINK_TIMEOUT) { 418 struct io_ring_ctx *ctx = head->ctx; 419 420 /* protect against races with linked timeouts */ 421 raw_spin_lock_irq(&ctx->timeout_lock); 422 matched = io_match_linked(head); 423 raw_spin_unlock_irq(&ctx->timeout_lock); 424 } else { 425 matched = io_match_linked(head); 426 } 427 return matched; 428 } 429 430 void __io_uring_cancel(bool cancel_all) 431 { 432 io_uring_unreg_ringfd(); 433 io_uring_cancel_generic(cancel_all, NULL); 434 } 435 436 struct io_task_cancel { 437 struct io_uring_task *tctx; 438 bool all; 439 }; 440 441 static bool io_cancel_task_cb(struct io_wq_work *work, void *data) 442 { 443 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 444 struct io_task_cancel *cancel = data; 445 446 return io_match_task_safe(req, cancel->tctx, cancel->all); 447 } 448 449 static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx, 450 struct io_uring_task *tctx, 451 bool cancel_all) 452 { 453 struct io_defer_entry *de; 454 LIST_HEAD(list); 455 456 list_for_each_entry_reverse(de, &ctx->defer_list, list) { 457 if (io_match_task_safe(de->req, tctx, cancel_all)) { 458 list_cut_position(&list, &ctx->defer_list, &de->list); 459 break; 460 } 461 } 462 if (list_empty(&list)) 463 return false; 464 465 while (!list_empty(&list)) { 466 de = list_first_entry(&list, struct io_defer_entry, list); 467 list_del_init(&de->list); 468 ctx->nr_drained -= io_linked_nr(de->req); 469 io_req_task_queue_fail(de->req, -ECANCELED); 470 kfree(de); 471 } 472 return true; 473 } 474 475 __cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) 476 { 477 struct io_kiocb *req = container_of(work, struct io_kiocb, work); 478 479 return req->ctx == data; 480 } 481 482 static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) 483 { 484 struct io_tctx_node *node; 485 enum io_wq_cancel cret; 486 bool ret = false; 487 488 mutex_lock(&ctx->uring_lock); 489 mutex_lock(&ctx->tctx_lock); 490 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 491 struct io_uring_task *tctx = node->task->io_uring; 492 493 /* 494 * io_wq will stay alive while we hold uring_lock, because it's 495 * killed after ctx nodes, which requires to take the lock. 496 */ 497 if (!tctx || !tctx->io_wq) 498 continue; 499 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); 500 ret |= (cret != IO_WQ_CANCEL_NOTFOUND); 501 } 502 mutex_unlock(&ctx->tctx_lock); 503 mutex_unlock(&ctx->uring_lock); 504 505 return ret; 506 } 507 508 __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, 509 struct io_uring_task *tctx, 510 bool cancel_all, bool is_sqpoll_thread) 511 { 512 struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, }; 513 enum io_wq_cancel cret; 514 bool ret = false; 515 516 /* set it so io_req_local_work_add() would wake us up */ 517 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { 518 atomic_set(&ctx->cq_wait_nr, 1); 519 smp_mb(); 520 } 521 522 /* failed during ring init, it couldn't have issued any requests */ 523 if (!ctx->rings) 524 return false; 525 526 if (!tctx) { 527 ret |= io_uring_try_cancel_iowq(ctx); 528 } else if (tctx->io_wq) { 529 /* 530 * Cancels requests of all rings, not only @ctx, but 531 * it's fine as the task is in exit/exec. 532 */ 533 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, 534 &cancel, true); 535 ret |= (cret != IO_WQ_CANCEL_NOTFOUND); 536 } 537 538 /* SQPOLL thread does its own polling */ 539 if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || 540 is_sqpoll_thread) { 541 while (!list_empty(&ctx->iopoll_list)) { 542 io_iopoll_try_reap_events(ctx); 543 ret = true; 544 cond_resched(); 545 } 546 } 547 548 if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && 549 io_allowed_defer_tw_run(ctx)) 550 ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0; 551 mutex_lock(&ctx->uring_lock); 552 ret |= io_cancel_defer_files(ctx, tctx, cancel_all); 553 ret |= io_poll_remove_all(ctx, tctx, cancel_all); 554 ret |= io_waitid_remove_all(ctx, tctx, cancel_all); 555 ret |= io_futex_remove_all(ctx, tctx, cancel_all); 556 ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all); 557 mutex_unlock(&ctx->uring_lock); 558 ret |= io_kill_timeouts(ctx, tctx, cancel_all); 559 if (tctx) 560 ret |= io_run_task_work() > 0; 561 else 562 ret |= flush_delayed_work(&ctx->fallback_work); 563 return ret; 564 } 565 566 static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) 567 { 568 if (tracked) 569 return atomic_read(&tctx->inflight_tracked); 570 return percpu_counter_sum(&tctx->inflight); 571 } 572 573 /* 574 * Find any io_uring ctx that this task has registered or done IO on, and cancel 575 * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. 576 */ 577 __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) 578 { 579 struct io_uring_task *tctx = current->io_uring; 580 struct io_ring_ctx *ctx; 581 struct io_tctx_node *node; 582 unsigned long index; 583 s64 inflight; 584 DEFINE_WAIT(wait); 585 586 WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current); 587 588 if (!current->io_uring) 589 return; 590 if (tctx->io_wq) 591 io_wq_exit_start(tctx->io_wq); 592 593 atomic_inc(&tctx->in_cancel); 594 do { 595 bool loop = false; 596 597 io_uring_drop_tctx_refs(current); 598 if (!tctx_inflight(tctx, !cancel_all)) 599 break; 600 601 /* read completions before cancelations */ 602 inflight = tctx_inflight(tctx, false); 603 if (!inflight) 604 break; 605 606 if (!sqd) { 607 xa_for_each(&tctx->xa, index, node) { 608 /* sqpoll task will cancel all its requests */ 609 if (node->ctx->sq_data) 610 continue; 611 loop |= io_uring_try_cancel_requests(node->ctx, 612 current->io_uring, 613 cancel_all, 614 false); 615 } 616 } else { 617 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) 618 loop |= io_uring_try_cancel_requests(ctx, 619 current->io_uring, 620 cancel_all, 621 true); 622 } 623 624 if (loop) { 625 cond_resched(); 626 continue; 627 } 628 629 prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); 630 io_run_task_work(); 631 io_uring_drop_tctx_refs(current); 632 xa_for_each(&tctx->xa, index, node) { 633 if (io_local_work_pending(node->ctx)) { 634 WARN_ON_ONCE(node->ctx->submitter_task && 635 node->ctx->submitter_task != current); 636 goto end_wait; 637 } 638 } 639 /* 640 * If we've seen completions, retry without waiting. This 641 * avoids a race where a completion comes in before we did 642 * prepare_to_wait(). 643 */ 644 if (inflight == tctx_inflight(tctx, !cancel_all)) 645 schedule(); 646 end_wait: 647 finish_wait(&tctx->wait, &wait); 648 } while (1); 649 650 io_uring_clean_tctx(tctx); 651 if (cancel_all) { 652 /* 653 * We shouldn't run task_works after cancel, so just leave 654 * ->in_cancel set for normal exit. 655 */ 656 atomic_dec(&tctx->in_cancel); 657 /* for exec all current's requests should be gone, kill tctx */ 658 __io_uring_free(current); 659 } 660 } 661