Lines Matching +full:de +full:- +full:spread

1 // SPDX-License-Identifier: GPL-2.0
14 * through a control-dependency in io_get_cqe (smp_store_release to
39 * Copyright (C) 2018-2019 Jens Axboe
40 * Copyright (c) 2018-2019 Christoph Hellwig
80 #include "io-wq.h"
137 * so that tests against ->cq_wait_nr would fail and skip wake_up().
139 #define IO_CQ_WAKE_INIT (-1U)
140 /* Forced wake up if there is a waiter regardless of ->cq_wait_nr */
157 static int __read_mostly sysctl_io_uring_group = -1;
182 req->ctx = IO_URING_PTR_POISON; in io_poison_cached_req()
183 req->tctx = IO_URING_PTR_POISON; in io_poison_cached_req()
184 req->file = IO_URING_PTR_POISON; in io_poison_cached_req()
185 req->creds = IO_URING_PTR_POISON; in io_poison_cached_req()
186 req->io_task_work.func = IO_URING_PTR_POISON; in io_poison_cached_req()
187 req->apoll = IO_URING_PTR_POISON; in io_poison_cached_req()
193 req->async_data = IO_URING_PTR_POISON; in io_poison_req()
194 req->kbuf = IO_URING_PTR_POISON; in io_poison_req()
195 req->comp_list.next = IO_URING_PTR_POISON; in io_poison_req()
196 req->file_node = IO_URING_PTR_POISON; in io_poison_req()
197 req->link = IO_URING_PTR_POISON; in io_poison_req()
202 return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); in __io_cqring_events()
207 return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head); in __io_cqring_events_user()
215 if (req->flags & REQ_F_INFLIGHT) in io_match_linked()
230 if (tctx && head->tctx != tctx) in io_match_task_safe()
235 if (head->flags & REQ_F_LINK_TIMEOUT) { in io_match_task_safe()
236 struct io_ring_ctx *ctx = head->ctx; in io_match_task_safe()
239 raw_spin_lock_irq(&ctx->timeout_lock); in io_match_task_safe()
241 raw_spin_unlock_irq(&ctx->timeout_lock); in io_match_task_safe()
258 wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list); in io_req_add_to_cache()
265 complete(&ctx->ref_comp); in io_ring_ctx_ref_free()
272 struct llist_node *node = llist_del_all(&ctx->fallback_llist); in io_fallback_req_func()
276 percpu_ref_get(&ctx->refs); in io_fallback_req_func()
277 mutex_lock(&ctx->uring_lock); in io_fallback_req_func()
279 req->io_task_work.func(req, ts); in io_fallback_req_func()
281 mutex_unlock(&ctx->uring_lock); in io_fallback_req_func()
282 percpu_ref_put(&ctx->refs); in io_fallback_req_func()
292 table->hbs = kvmalloc_array(hash_buckets, sizeof(table->hbs[0]), in io_alloc_hash_table()
294 if (table->hbs) in io_alloc_hash_table()
297 return -ENOMEM; in io_alloc_hash_table()
298 bits--; in io_alloc_hash_table()
301 table->hash_bits = bits; in io_alloc_hash_table()
303 INIT_HLIST_HEAD(&table->hbs[i].list); in io_alloc_hash_table()
309 io_alloc_cache_free(&ctx->apoll_cache, kfree); in io_free_alloc_caches()
310 io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free); in io_free_alloc_caches()
311 io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free); in io_free_alloc_caches()
312 io_alloc_cache_free(&ctx->cmd_cache, io_cmd_cache_free); in io_free_alloc_caches()
327 xa_init(&ctx->io_bl_xa); in io_ring_ctx_alloc()
331 * 32 entries per hash list if totally full and uniformly spread, but in io_ring_ctx_alloc()
334 hash_bits = ilog2(p->cq_entries) - 5; in io_ring_ctx_alloc()
336 if (io_alloc_hash_table(&ctx->cancel_table, hash_bits)) in io_ring_ctx_alloc()
338 if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, in io_ring_ctx_alloc()
342 ctx->flags = p->flags; in io_ring_ctx_alloc()
343 ctx->hybrid_poll_time = LLONG_MAX; in io_ring_ctx_alloc()
344 atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT); in io_ring_ctx_alloc()
345 init_waitqueue_head(&ctx->sqo_sq_wait); in io_ring_ctx_alloc()
346 INIT_LIST_HEAD(&ctx->sqd_list); in io_ring_ctx_alloc()
347 INIT_LIST_HEAD(&ctx->cq_overflow_list); in io_ring_ctx_alloc()
348 ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX, in io_ring_ctx_alloc()
350 ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, in io_ring_ctx_alloc()
353 ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX, in io_ring_ctx_alloc()
356 ret |= io_alloc_cache_init(&ctx->cmd_cache, IO_ALLOC_CACHE_MAX, in io_ring_ctx_alloc()
363 init_completion(&ctx->ref_comp); in io_ring_ctx_alloc()
364 xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); in io_ring_ctx_alloc()
365 mutex_init(&ctx->uring_lock); in io_ring_ctx_alloc()
366 init_waitqueue_head(&ctx->cq_wait); in io_ring_ctx_alloc()
367 init_waitqueue_head(&ctx->poll_wq); in io_ring_ctx_alloc()
368 spin_lock_init(&ctx->completion_lock); in io_ring_ctx_alloc()
369 raw_spin_lock_init(&ctx->timeout_lock); in io_ring_ctx_alloc()
370 INIT_WQ_LIST(&ctx->iopoll_list); in io_ring_ctx_alloc()
371 INIT_LIST_HEAD(&ctx->defer_list); in io_ring_ctx_alloc()
372 INIT_LIST_HEAD(&ctx->timeout_list); in io_ring_ctx_alloc()
373 INIT_LIST_HEAD(&ctx->ltimeout_list); in io_ring_ctx_alloc()
374 init_llist_head(&ctx->work_llist); in io_ring_ctx_alloc()
375 INIT_LIST_HEAD(&ctx->tctx_list); in io_ring_ctx_alloc()
376 ctx->submit_state.free_list.next = NULL; in io_ring_ctx_alloc()
377 INIT_HLIST_HEAD(&ctx->waitid_list); in io_ring_ctx_alloc()
378 xa_init_flags(&ctx->zcrx_ctxs, XA_FLAGS_ALLOC); in io_ring_ctx_alloc()
380 INIT_HLIST_HEAD(&ctx->futex_list); in io_ring_ctx_alloc()
382 INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); in io_ring_ctx_alloc()
383 INIT_WQ_LIST(&ctx->submit_state.compl_reqs); in io_ring_ctx_alloc()
384 INIT_HLIST_HEAD(&ctx->cancelable_uring_cmd); in io_ring_ctx_alloc()
386 mutex_init(&ctx->mmap_lock); in io_ring_ctx_alloc()
391 percpu_ref_exit(&ctx->refs); in io_ring_ctx_alloc()
394 kvfree(ctx->cancel_table.hbs); in io_ring_ctx_alloc()
395 xa_destroy(&ctx->io_bl_xa); in io_ring_ctx_alloc()
402 if (unlikely(req->flags & REQ_F_BUFFER_SELECTED)) in io_clean_op()
405 if (req->flags & REQ_F_NEED_CLEANUP) { in io_clean_op()
406 const struct io_cold_def *def = &io_cold_defs[req->opcode]; in io_clean_op()
408 if (def->cleanup) in io_clean_op()
409 def->cleanup(req); in io_clean_op()
411 if (req->flags & REQ_F_INFLIGHT) in io_clean_op()
412 atomic_dec(&req->tctx->inflight_tracked); in io_clean_op()
413 if (req->flags & REQ_F_CREDS) in io_clean_op()
414 put_cred(req->creds); in io_clean_op()
415 if (req->flags & REQ_F_ASYNC_DATA) { in io_clean_op()
416 kfree(req->async_data); in io_clean_op()
417 req->async_data = NULL; in io_clean_op()
419 req->flags &= ~IO_REQ_CLEAN_FLAGS; in io_clean_op()
425 * relies on ->mm being alive for the duration of the request.
429 if (!(req->flags & REQ_F_INFLIGHT)) { in io_req_track_inflight()
430 req->flags |= REQ_F_INFLIGHT; in io_req_track_inflight()
431 atomic_inc(&req->tctx->inflight_tracked); in io_req_track_inflight()
437 if (WARN_ON_ONCE(!req->link)) in __io_prep_linked_timeout()
440 req->flags &= ~REQ_F_ARM_LTIMEOUT; in __io_prep_linked_timeout()
441 req->flags |= REQ_F_LINK_TIMEOUT; in __io_prep_linked_timeout()
445 __io_req_set_refcount(req->link, 2); in __io_prep_linked_timeout()
446 return req->link; in __io_prep_linked_timeout()
451 const struct io_issue_def *def = &io_issue_defs[req->opcode]; in io_prep_async_work()
452 struct io_ring_ctx *ctx = req->ctx; in io_prep_async_work()
454 if (!(req->flags & REQ_F_CREDS)) { in io_prep_async_work()
455 req->flags |= REQ_F_CREDS; in io_prep_async_work()
456 req->creds = get_current_cred(); in io_prep_async_work()
459 req->work.list.next = NULL; in io_prep_async_work()
460 atomic_set(&req->work.flags, 0); in io_prep_async_work()
461 if (req->flags & REQ_F_FORCE_ASYNC) in io_prep_async_work()
462 atomic_or(IO_WQ_WORK_CONCURRENT, &req->work.flags); in io_prep_async_work()
464 if (req->file && !(req->flags & REQ_F_FIXED_FILE)) in io_prep_async_work()
465 req->flags |= io_file_get_flags(req->file); in io_prep_async_work()
467 if (req->file && (req->flags & REQ_F_ISREG)) { in io_prep_async_work()
468 bool should_hash = def->hash_reg_file; in io_prep_async_work()
471 if (should_hash && (req->file->f_flags & O_DIRECT) && in io_prep_async_work()
472 (req->file->f_op->fop_flags & FOP_DIO_PARALLEL_WRITE)) in io_prep_async_work()
474 if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL)) in io_prep_async_work()
475 io_wq_hash_work(&req->work, file_inode(req->file)); in io_prep_async_work()
476 } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) { in io_prep_async_work()
477 if (def->unbound_nonreg_file) in io_prep_async_work()
478 atomic_or(IO_WQ_WORK_UNBOUND, &req->work.flags); in io_prep_async_work()
486 if (req->flags & REQ_F_LINK_TIMEOUT) { in io_prep_async_link()
487 struct io_ring_ctx *ctx = req->ctx; in io_prep_async_link()
489 raw_spin_lock_irq(&ctx->timeout_lock); in io_prep_async_link()
492 raw_spin_unlock_irq(&ctx->timeout_lock); in io_prep_async_link()
501 struct io_uring_task *tctx = req->tctx; in io_queue_iowq()
505 if ((current->flags & PF_KTHREAD) || !tctx->io_wq) { in io_queue_iowq()
506 io_req_task_queue_fail(req, -ECANCELED); in io_queue_iowq()
510 /* init ->work of the whole link before punting */ in io_queue_iowq()
516 * canceled. That will make io-wq go through the usual work cancel in io_queue_iowq()
520 if (WARN_ON_ONCE(!same_thread_group(tctx->task, current))) in io_queue_iowq()
521 atomic_or(IO_WQ_WORK_CANCEL, &req->work.flags); in io_queue_iowq()
523 trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work)); in io_queue_iowq()
524 io_wq_enqueue(tctx->io_wq, &req->work); in io_queue_iowq()
534 req->io_task_work.func = io_req_queue_iowq_tw; in io_req_queue_iowq()
552 lockdep_assert_held(&ctx->uring_lock); in io_queue_deferred()
555 while (!list_empty(&ctx->defer_list)) { in io_queue_deferred()
556 struct io_defer_entry *de = list_first_entry(&ctx->defer_list, in io_queue_deferred() local
559 drain_seen |= de->req->flags & REQ_F_IO_DRAIN; in io_queue_deferred()
560 if ((drain_seen || first) && ctx->nr_req_allocated != ctx->nr_drained) in io_queue_deferred()
563 list_del_init(&de->list); in io_queue_deferred()
564 ctx->nr_drained -= io_linked_nr(de->req); in io_queue_deferred()
565 io_req_task_queue(de->req); in io_queue_deferred()
566 kfree(de); in io_queue_deferred()
573 if (ctx->poll_activated) in __io_commit_cqring_flush()
575 if (ctx->off_timeout_used) in __io_commit_cqring_flush()
577 if (ctx->has_evfd) in __io_commit_cqring_flush()
583 if (!ctx->lockless_cq) in __io_cq_lock()
584 spin_lock(&ctx->completion_lock); in __io_cq_lock()
588 __acquires(ctx->completion_lock) in io_cq_lock()
590 spin_lock(&ctx->completion_lock); in io_cq_lock()
596 if (!ctx->task_complete) { in __io_cq_unlock_post()
597 if (!ctx->lockless_cq) in __io_cq_unlock_post()
598 spin_unlock(&ctx->completion_lock); in __io_cq_unlock_post()
600 if (!ctx->syscall_iopoll) in __io_cq_unlock_post()
607 __releases(ctx->completion_lock) in io_cq_unlock_post()
610 spin_unlock(&ctx->completion_lock); in io_cq_unlock_post()
617 lockdep_assert_held(&ctx->uring_lock); in __io_cqring_overflow_flush()
620 if (!dying && __io_cqring_events(ctx) == ctx->cq_entries) in __io_cqring_overflow_flush()
624 while (!list_empty(&ctx->cq_overflow_list)) { in __io_cqring_overflow_flush()
630 ocqe = list_first_entry(&ctx->cq_overflow_list, in __io_cqring_overflow_flush()
632 if (ocqe->cqe.flags & IORING_CQE_F_32 || in __io_cqring_overflow_flush()
633 ctx->flags & IORING_SETUP_CQE32) { in __io_cqring_overflow_flush()
641 memcpy(cqe, &ocqe->cqe, cqe_size); in __io_cqring_overflow_flush()
643 list_del(&ocqe->list); in __io_cqring_overflow_flush()
650 * Ideally we'd have a non-posting unlock for this, but hard in __io_cqring_overflow_flush()
651 * to care for a non-real case. in __io_cqring_overflow_flush()
654 ctx->cqe_sentinel = ctx->cqe_cached; in __io_cqring_overflow_flush()
656 mutex_unlock(&ctx->uring_lock); in __io_cqring_overflow_flush()
658 mutex_lock(&ctx->uring_lock); in __io_cqring_overflow_flush()
663 if (list_empty(&ctx->cq_overflow_list)) { in __io_cqring_overflow_flush()
664 clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq); in __io_cqring_overflow_flush()
665 atomic_andnot(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags); in __io_cqring_overflow_flush()
672 if (ctx->rings) in io_cqring_overflow_kill()
678 mutex_lock(&ctx->uring_lock); in io_cqring_do_overflow_flush()
680 mutex_unlock(&ctx->uring_lock); in io_cqring_do_overflow_flush()
686 struct io_uring_task *tctx = req->tctx; in io_put_task()
688 if (likely(tctx->task == current)) { in io_put_task()
689 tctx->cached_refs++; in io_put_task()
691 percpu_counter_sub(&tctx->inflight, 1); in io_put_task()
692 if (unlikely(atomic_read(&tctx->in_cancel))) in io_put_task()
693 wake_up(&tctx->wait); in io_put_task()
694 put_task_struct(tctx->task); in io_put_task()
700 unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR; in io_task_refs_refill()
702 percpu_counter_add(&tctx->inflight, refill); in io_task_refs_refill()
703 refcount_add(refill, &current->usage); in io_task_refs_refill()
704 tctx->cached_refs += refill; in io_task_refs_refill()
709 struct io_uring_task *tctx = task->io_uring; in io_uring_drop_tctx_refs()
710 unsigned int refs = tctx->cached_refs; in io_uring_drop_tctx_refs()
713 tctx->cached_refs = 0; in io_uring_drop_tctx_refs()
714 percpu_counter_sub(&tctx->inflight, refs); in io_uring_drop_tctx_refs()
722 lockdep_assert_held(&ctx->completion_lock); in io_cqring_add_overflow()
725 struct io_rings *r = ctx->rings; in io_cqring_add_overflow()
732 WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1); in io_cqring_add_overflow()
733 set_bit(IO_CHECK_CQ_DROPPED_BIT, &ctx->check_cq); in io_cqring_add_overflow()
736 if (list_empty(&ctx->cq_overflow_list)) { in io_cqring_add_overflow()
737 set_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq); in io_cqring_add_overflow()
738 atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags); in io_cqring_add_overflow()
741 list_add_tail(&ocqe->list, &ctx->cq_overflow_list); in io_cqring_add_overflow()
753 if (cqe->flags & IORING_CQE_F_32 || ctx->flags & IORING_SETUP_CQE32) { in io_alloc_ocqe()
759 trace_io_uring_cqe_overflow(ctx, cqe->user_data, cqe->res, cqe->flags, ocqe); in io_alloc_ocqe()
761 ocqe->cqe.user_data = cqe->user_data; in io_alloc_ocqe()
762 ocqe->cqe.res = cqe->res; in io_alloc_ocqe()
763 ocqe->cqe.flags = cqe->flags; in io_alloc_ocqe()
765 ocqe->cqe.big_cqe[0] = big_cqe->extra1; in io_alloc_ocqe()
766 ocqe->cqe.big_cqe[1] = big_cqe->extra2; in io_alloc_ocqe()
770 big_cqe->extra1 = big_cqe->extra2 = 0; in io_alloc_ocqe()
780 if (__io_cqring_events(ctx) < ctx->cq_entries) { in io_fill_nop_cqe()
781 struct io_uring_cqe *cqe = &ctx->rings->cqes[off]; in io_fill_nop_cqe()
783 cqe->user_data = 0; in io_fill_nop_cqe()
784 cqe->res = 0; in io_fill_nop_cqe()
785 cqe->flags = IORING_CQE_F_SKIP; in io_fill_nop_cqe()
786 ctx->cached_cq_tail++; in io_fill_nop_cqe()
799 struct io_rings *rings = ctx->rings; in io_cqe_cache_refill()
800 unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1); in io_cqe_cache_refill()
808 if (!overflow && (ctx->check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT))) in io_cqe_cache_refill()
815 if (cqe32 && off + 1 == ctx->cq_entries) { in io_cqe_cache_refill()
822 queued = min(__io_cqring_events(ctx), ctx->cq_entries); in io_cqe_cache_refill()
823 free = ctx->cq_entries - queued; in io_cqe_cache_refill()
825 len = min(free, ctx->cq_entries - off); in io_cqe_cache_refill()
829 if (ctx->flags & IORING_SETUP_CQE32) { in io_cqe_cache_refill()
834 ctx->cqe_cached = &rings->cqes[off]; in io_cqe_cache_refill()
835 ctx->cqe_sentinel = ctx->cqe_cached + len; in io_cqe_cache_refill()
844 if (WARN_ON_ONCE(!(ctx->flags & (IORING_SETUP_CQE32|IORING_SETUP_CQE_MIXED)))) in io_fill_cqe_aux32()
861 WRITE_ONCE(cqe->user_data, user_data); in io_fill_cqe_aux()
862 WRITE_ONCE(cqe->res, res); in io_fill_cqe_aux()
863 WRITE_ONCE(cqe->flags, cflags); in io_fill_cqe_aux()
866 WRITE_ONCE(cqe->big_cqe[0], 0); in io_fill_cqe_aux()
867 WRITE_ONCE(cqe->big_cqe[1], 0); in io_fill_cqe_aux()
887 spin_lock(&ctx->completion_lock); in io_cqe_overflow()
889 spin_unlock(&ctx->completion_lock); in io_cqe_overflow()
919 * and obviously with ctx->uring_lock held (tw always has that).
923 lockdep_assert_held(&ctx->uring_lock); in io_add_aux_cqe()
924 lockdep_assert(ctx->lockless_cq); in io_add_aux_cqe()
931 ctx->submit_state.cq_flush = true; in io_add_aux_cqe()
940 struct io_ring_ctx *ctx = req->ctx; in io_req_post_cqe()
948 if (!wq_list_empty(&ctx->submit_state.compl_reqs)) in io_req_post_cqe()
952 lockdep_assert_held(&ctx->uring_lock); in io_req_post_cqe()
954 if (!ctx->lockless_cq) { in io_req_post_cqe()
955 spin_lock(&ctx->completion_lock); in io_req_post_cqe()
956 posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); in io_req_post_cqe()
957 spin_unlock(&ctx->completion_lock); in io_req_post_cqe()
959 posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); in io_req_post_cqe()
962 ctx->submit_state.cq_flush = true; in io_req_post_cqe()
972 struct io_ring_ctx *ctx = req->ctx; in io_req_post_cqe32()
976 lockdep_assert_held(&ctx->uring_lock); in io_req_post_cqe32()
978 cqe[0].user_data = req->cqe.user_data; in io_req_post_cqe32()
979 if (!ctx->lockless_cq) { in io_req_post_cqe32()
980 spin_lock(&ctx->completion_lock); in io_req_post_cqe32()
982 spin_unlock(&ctx->completion_lock); in io_req_post_cqe32()
987 ctx->submit_state.cq_flush = true; in io_req_post_cqe32()
993 struct io_ring_ctx *ctx = req->ctx; in io_req_complete_post()
997 * All execution paths but io-wq use the deferred completions by in io_req_complete_post()
1007 if (ctx->lockless_cq || (req->flags & REQ_F_REISSUE)) { in io_req_complete_post()
1009 req->io_task_work.func = io_req_task_complete; in io_req_complete_post()
1015 if (!(req->flags & REQ_F_CQE_SKIP)) in io_req_complete_post()
1024 * io-wq only, which holds a reference, so it cannot be the last put. in io_req_complete_post()
1030 __must_hold(&ctx->uring_lock) in io_req_defer_failed()
1032 const struct io_cold_def *def = &io_cold_defs[req->opcode]; in io_req_defer_failed()
1034 lockdep_assert_held(&req->ctx->uring_lock); in io_req_defer_failed()
1038 if (def->fail) in io_req_defer_failed()
1039 def->fail(req); in io_req_defer_failed()
1046 * Because of that, io_alloc_req() should be called only under ->uring_lock
1050 __must_hold(&ctx->uring_lock) in __io_alloc_req_refill()
1059 * Bulk alloc is all-or-nothing. If we fail to get a batch, in __io_alloc_req_refill()
1069 percpu_ref_get_many(&ctx->refs, ret); in __io_alloc_req_refill()
1070 ctx->nr_req_allocated += ret; in __io_alloc_req_refill()
1072 while (ret--) { in __io_alloc_req_refill()
1083 req->flags &= ~REQ_F_REFCOUNT; in io_free_req()
1085 req->flags |= REQ_F_CQE_SKIP; in io_free_req()
1086 req->io_task_work.func = io_req_task_complete; in io_free_req()
1092 struct io_ring_ctx *ctx = req->ctx; in __io_req_find_next_prep()
1094 spin_lock(&ctx->completion_lock); in __io_req_find_next_prep()
1096 spin_unlock(&ctx->completion_lock); in __io_req_find_next_prep()
1109 if (unlikely(req->flags & IO_DISARM_MASK)) in io_req_find_next()
1111 nxt = req->link; in io_req_find_next()
1112 req->link = NULL; in io_req_find_next()
1120 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) in ctx_flush_and_put()
1121 atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); in ctx_flush_and_put()
1124 mutex_unlock(&ctx->uring_lock); in ctx_flush_and_put()
1125 percpu_ref_put(&ctx->refs); in ctx_flush_and_put()
1141 struct llist_node *next = node->next; in io_handle_tw_list()
1145 if (req->ctx != ctx) { in io_handle_tw_list()
1147 ctx = req->ctx; in io_handle_tw_list()
1148 mutex_lock(&ctx->uring_lock); in io_handle_tw_list()
1149 percpu_ref_get(&ctx->refs); in io_handle_tw_list()
1151 INDIRECT_CALL_2(req->io_task_work.func, in io_handle_tw_list()
1174 node = node->next; in __io_fallback_tw()
1175 if (last_ctx != req->ctx) { in __io_fallback_tw()
1178 flush_delayed_work(&last_ctx->fallback_work); in __io_fallback_tw()
1179 percpu_ref_put(&last_ctx->refs); in __io_fallback_tw()
1181 last_ctx = req->ctx; in __io_fallback_tw()
1182 percpu_ref_get(&last_ctx->refs); in __io_fallback_tw()
1184 if (llist_add(&req->io_task_work.node, &last_ctx->fallback_llist)) in __io_fallback_tw()
1185 schedule_delayed_work(&last_ctx->fallback_work, 1); in __io_fallback_tw()
1190 flush_delayed_work(&last_ctx->fallback_work); in __io_fallback_tw()
1191 percpu_ref_put(&last_ctx->refs); in __io_fallback_tw()
1197 struct llist_node *node = llist_del_all(&tctx->task_list); in io_fallback_tw()
1208 if (unlikely(current->flags & PF_EXITING)) { in tctx_task_work_run()
1213 node = llist_del_all(&tctx->task_list); in tctx_task_work_run()
1219 /* relaxed read is enough as only the task itself sets ->in_cancel */ in tctx_task_work_run()
1220 if (unlikely(atomic_read(&tctx->in_cancel))) in tctx_task_work_run()
1241 struct io_ring_ctx *ctx = req->ctx; in io_req_local_work_add()
1250 * they can even be queued lazily, fall back to non-lazy. in io_req_local_work_add()
1252 if (req->flags & IO_REQ_LINK_FLAGS) in io_req_local_work_add()
1257 head = READ_ONCE(ctx->work_llist.first); in io_req_local_work_add()
1268 nr_tw_prev = READ_ONCE(first_req->nr_tw); in io_req_local_work_add()
1279 req->nr_tw = nr_tw; in io_req_local_work_add()
1280 req->io_task_work.node.next = head; in io_req_local_work_add()
1281 } while (!try_cmpxchg(&ctx->work_llist.first, &head, in io_req_local_work_add()
1282 &req->io_task_work.node)); in io_req_local_work_add()
1287 * to ensure that either we see updated ->cq_wait_nr, or waiters in io_req_local_work_add()
1293 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) in io_req_local_work_add()
1294 atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); in io_req_local_work_add()
1295 if (ctx->has_evfd) in io_req_local_work_add()
1299 nr_wait = atomic_read(&ctx->cq_wait_nr); in io_req_local_work_add()
1306 wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE); in io_req_local_work_add()
1311 struct io_uring_task *tctx = req->tctx; in io_req_normal_work_add()
1312 struct io_ring_ctx *ctx = req->ctx; in io_req_normal_work_add()
1315 if (!llist_add(&req->io_task_work.node, &tctx->task_list)) in io_req_normal_work_add()
1318 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) in io_req_normal_work_add()
1319 atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); in io_req_normal_work_add()
1322 if (ctx->flags & IORING_SETUP_SQPOLL) { in io_req_normal_work_add()
1323 __set_notify_signal(tctx->task); in io_req_normal_work_add()
1327 if (likely(!task_work_add(tctx->task, &tctx->task_work, ctx->notify_method))) in io_req_normal_work_add()
1335 if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) in __io_req_task_work_add()
1343 if (WARN_ON_ONCE(!(req->ctx->flags & IORING_SETUP_DEFER_TASKRUN))) in io_req_task_work_add_remote()
1350 struct llist_node *node = llist_del_all(&ctx->work_llist); in io_move_task_work_from_local()
1353 node = llist_del_all(&ctx->retry_llist); in io_move_task_work_from_local()
1364 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) in io_run_local_work_continue()
1365 atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); in io_run_local_work_continue()
1376 struct llist_node *next = (*node)->next; in __io_run_local_work_loop()
1379 INDIRECT_CALL_2(req->io_task_work.func, in __io_run_local_work_loop()
1397 if (WARN_ON_ONCE(ctx->submitter_task != current)) in __io_run_local_work()
1398 return -EEXIST; in __io_run_local_work()
1399 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) in __io_run_local_work()
1400 atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); in __io_run_local_work()
1402 min_events -= ret; in __io_run_local_work()
1403 ret = __io_run_local_work_loop(&ctx->retry_llist.first, tw, max_events); in __io_run_local_work()
1404 if (ctx->retry_llist.first) in __io_run_local_work()
1411 node = llist_reverse_order(llist_del_all(&ctx->work_llist)); in __io_run_local_work()
1412 ret += __io_run_local_work_loop(&node, tw, max_events - ret); in __io_run_local_work()
1413 ctx->retry_llist.first = node; in __io_run_local_work()
1444 mutex_lock(&ctx->uring_lock); in io_run_local_work()
1446 mutex_unlock(&ctx->uring_lock); in io_run_local_work()
1452 io_tw_lock(req->ctx, tw); in io_req_task_cancel()
1453 io_req_defer_failed(req, req->cqe.res); in io_req_task_cancel()
1458 struct io_ring_ctx *ctx = req->ctx; in io_req_task_submit()
1462 io_req_defer_failed(req, -EFAULT); in io_req_task_submit()
1463 else if (req->flags & REQ_F_FORCE_ASYNC) in io_req_task_submit()
1472 req->io_task_work.func = io_req_task_cancel; in io_req_task_queue_fail()
1478 req->io_task_work.func = io_req_task_submit; in io_req_task_queue()
1492 if (req->file_node) { in io_req_put_rsrc_nodes()
1493 io_put_rsrc_node(req->ctx, req->file_node); in io_req_put_rsrc_nodes()
1494 req->file_node = NULL; in io_req_put_rsrc_nodes()
1496 if (req->flags & REQ_F_BUF_NODE) in io_req_put_rsrc_nodes()
1497 io_put_rsrc_node(req->ctx, req->buf_node); in io_req_put_rsrc_nodes()
1502 __must_hold(&ctx->uring_lock) in io_free_batch_list()
1508 if (unlikely(req->flags & IO_REQ_CLEAN_SLOW_FLAGS)) { in io_free_batch_list()
1509 if (req->flags & REQ_F_REISSUE) { in io_free_batch_list()
1510 node = req->comp_list.next; in io_free_batch_list()
1511 req->flags &= ~REQ_F_REISSUE; in io_free_batch_list()
1515 if (req->flags & REQ_F_REFCOUNT) { in io_free_batch_list()
1516 node = req->comp_list.next; in io_free_batch_list()
1520 if ((req->flags & REQ_F_POLLED) && req->apoll) { in io_free_batch_list()
1521 struct async_poll *apoll = req->apoll; in io_free_batch_list()
1523 if (apoll->double_poll) in io_free_batch_list()
1524 kfree(apoll->double_poll); in io_free_batch_list()
1525 io_cache_free(&ctx->apoll_cache, apoll); in io_free_batch_list()
1526 req->flags &= ~REQ_F_POLLED; in io_free_batch_list()
1528 if (req->flags & IO_REQ_LINK_FLAGS) in io_free_batch_list()
1530 if (unlikely(req->flags & IO_REQ_CLEAN_FLAGS)) in io_free_batch_list()
1537 node = req->comp_list.next; in io_free_batch_list()
1543 __must_hold(&ctx->uring_lock) in __io_submit_flush_completions()
1545 struct io_submit_state *state = &ctx->submit_state; in __io_submit_flush_completions()
1549 __wq_list_for_each(node, &state->compl_reqs) { in __io_submit_flush_completions()
1555 * will go through the io-wq retry machinery and post one in __io_submit_flush_completions()
1558 if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) && in __io_submit_flush_completions()
1560 if (ctx->lockless_cq) in __io_submit_flush_completions()
1561 io_cqe_overflow(ctx, &req->cqe, &req->big_cqe); in __io_submit_flush_completions()
1563 io_cqe_overflow_locked(ctx, &req->cqe, &req->big_cqe); in __io_submit_flush_completions()
1568 if (!wq_list_empty(&state->compl_reqs)) { in __io_submit_flush_completions()
1569 io_free_batch_list(ctx, state->compl_reqs.first); in __io_submit_flush_completions()
1570 INIT_WQ_LIST(&state->compl_reqs); in __io_submit_flush_completions()
1573 if (unlikely(ctx->drain_active)) in __io_submit_flush_completions()
1576 ctx->submit_state.cq_flush = false; in __io_submit_flush_completions()
1592 if (!(ctx->flags & IORING_SETUP_IOPOLL)) in io_iopoll_try_reap_events()
1595 mutex_lock(&ctx->uring_lock); in io_iopoll_try_reap_events()
1596 while (!wq_list_empty(&ctx->iopoll_list)) { in io_iopoll_try_reap_events()
1601 * Ensure we allow local-to-the-cpu processing to take place, in io_iopoll_try_reap_events()
1606 mutex_unlock(&ctx->uring_lock); in io_iopoll_try_reap_events()
1608 mutex_lock(&ctx->uring_lock); in io_iopoll_try_reap_events()
1611 mutex_unlock(&ctx->uring_lock); in io_iopoll_try_reap_events()
1613 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) in io_iopoll_try_reap_events()
1622 min_events = min(min_events, ctx->cq_entries); in io_iopoll_check()
1624 lockdep_assert_held(&ctx->uring_lock); in io_iopoll_check()
1627 return -EEXIST; in io_iopoll_check()
1629 check_cq = READ_ONCE(ctx->check_cq); in io_iopoll_check()
1638 return -EBADR; in io_iopoll_check()
1661 if (wq_list_empty(&ctx->iopoll_list) || in io_iopoll_check()
1663 u32 tail = ctx->cached_cq_tail; in io_iopoll_check()
1668 wq_list_empty(&ctx->iopoll_list)) { in io_iopoll_check()
1669 mutex_unlock(&ctx->uring_lock); in io_iopoll_check()
1671 mutex_lock(&ctx->uring_lock); in io_iopoll_check()
1674 if (tail != ctx->cached_cq_tail || in io_iopoll_check()
1675 wq_list_empty(&ctx->iopoll_list)) in io_iopoll_check()
1683 return -EINTR; in io_iopoll_check()
1706 struct io_ring_ctx *ctx = req->ctx; in io_iopoll_req_issued()
1711 mutex_lock(&ctx->uring_lock); in io_iopoll_req_issued()
1718 if (wq_list_empty(&ctx->iopoll_list)) { in io_iopoll_req_issued()
1719 ctx->poll_multi_queue = false; in io_iopoll_req_issued()
1720 } else if (!ctx->poll_multi_queue) { in io_iopoll_req_issued()
1723 list_req = container_of(ctx->iopoll_list.first, struct io_kiocb, in io_iopoll_req_issued()
1725 if (list_req->file != req->file) in io_iopoll_req_issued()
1726 ctx->poll_multi_queue = true; in io_iopoll_req_issued()
1733 if (READ_ONCE(req->iopoll_completed)) in io_iopoll_req_issued()
1734 wq_list_add_head(&req->comp_list, &ctx->iopoll_list); in io_iopoll_req_issued()
1736 wq_list_add_tail(&req->comp_list, &ctx->iopoll_list); in io_iopoll_req_issued()
1745 if ((ctx->flags & IORING_SETUP_SQPOLL) && in io_iopoll_req_issued()
1746 wq_has_sleeper(&ctx->sq_data->wait)) in io_iopoll_req_issued()
1747 wake_up(&ctx->sq_data->wait); in io_iopoll_req_issued()
1749 mutex_unlock(&ctx->uring_lock); in io_iopoll_req_issued()
1759 if (S_ISREG(file_inode(file)->i_mode)) in io_file_get_flags()
1761 if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT)) in io_file_get_flags()
1767 __must_hold(&ctx->uring_lock) in io_drain_req()
1769 struct io_ring_ctx *ctx = req->ctx; in io_drain_req()
1770 bool drain = req->flags & IOSQE_IO_DRAIN; in io_drain_req()
1771 struct io_defer_entry *de; in io_drain_req() local
1773 de = kmalloc(sizeof(*de), GFP_KERNEL_ACCOUNT); in io_drain_req()
1774 if (!de) { in io_drain_req()
1775 io_req_defer_failed(req, -ENOMEM); in io_drain_req()
1781 de->req = req; in io_drain_req()
1783 ctx->nr_drained += io_linked_nr(req); in io_drain_req()
1784 list_add_tail(&de->list, &ctx->defer_list); in io_drain_req()
1786 if (!drain && list_empty(&ctx->defer_list)) in io_drain_req()
1787 ctx->drain_active = false; in io_drain_req()
1793 if (req->file || !def->needs_file) in io_assign_file()
1796 if (req->flags & REQ_F_FIXED_FILE) in io_assign_file()
1797 req->file = io_file_get_fixed(req, req->cqe.fd, issue_flags); in io_assign_file()
1799 req->file = io_file_get_normal(req, req->cqe.fd); in io_assign_file()
1801 return !!req->file; in io_assign_file()
1814 if (unlikely(req->flags & REQ_ISSUE_SLOW_FLAGS)) { in __io_issue_sqe()
1815 if ((req->flags & REQ_F_CREDS) && req->creds != current_cred()) in __io_issue_sqe()
1816 creds = override_creds(req->creds); in __io_issue_sqe()
1817 if (req->flags & REQ_F_ARM_LTIMEOUT) in __io_issue_sqe()
1821 if (!def->audit_skip) in __io_issue_sqe()
1822 audit_uring_entry(req->opcode); in __io_issue_sqe()
1824 ret = def->issue(req, issue_flags); in __io_issue_sqe()
1826 if (!def->audit_skip) in __io_issue_sqe()
1841 const struct io_issue_def *def = &io_issue_defs[req->opcode]; in io_issue_sqe()
1845 return -EBADF; in io_issue_sqe()
1862 if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue) in io_issue_sqe()
1875 io_tw_lock(req->ctx, tw); in io_poll_issue()
1877 WARN_ON_ONCE(!req->file); in io_poll_issue()
1878 if (WARN_ON_ONCE(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_poll_issue()
1879 return -EFAULT; in io_poll_issue()
1881 ret = __io_issue_sqe(req, issue_flags, &io_issue_defs[req->opcode]); in io_poll_issue()
1893 if (req->flags & IO_REQ_LINK_FLAGS) in io_wq_free_work()
1897 return nxt ? &nxt->work : NULL; in io_wq_free_work()
1903 const struct io_issue_def *def = &io_issue_defs[req->opcode]; in io_wq_submit_work()
1906 int ret = 0, err = -ECANCELED; in io_wq_submit_work()
1908 /* one will be dropped by io_wq_free_work() after returning to io-wq */ in io_wq_submit_work()
1909 if (!(req->flags & REQ_F_REFCOUNT)) in io_wq_submit_work()
1914 /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ in io_wq_submit_work()
1915 if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) { in io_wq_submit_work()
1921 err = -EBADF; in io_wq_submit_work()
1922 atomic_or(IO_WQ_WORK_CANCEL, &work->flags); in io_wq_submit_work()
1931 * Don't allow any multishot execution from io-wq. It's more restrictive in io_wq_submit_work()
1934 if (req->flags & (REQ_F_MULTISHOT|REQ_F_APOLL_MULTISHOT)) { in io_wq_submit_work()
1935 err = -EBADFD; in io_wq_submit_work()
1938 if (req->file->f_flags & O_NONBLOCK || in io_wq_submit_work()
1939 req->file->f_mode & FMODE_NOWAIT) { in io_wq_submit_work()
1940 err = -ECANCELED; in io_wq_submit_work()
1945 req->flags &= ~(REQ_F_APOLL_MULTISHOT|REQ_F_MULTISHOT); in io_wq_submit_work()
1949 if (req->flags & REQ_F_FORCE_ASYNC) { in io_wq_submit_work()
1950 bool opcode_poll = def->pollin || def->pollout; in io_wq_submit_work()
1960 if (ret != -EAGAIN) in io_wq_submit_work()
1965 * poll. -EAGAIN is final for that case. in io_wq_submit_work()
1967 if (req->flags & REQ_F_NOWAIT) in io_wq_submit_work()
1976 if (!(req->ctx->flags & IORING_SETUP_IOPOLL)) in io_wq_submit_work()
1999 struct io_ring_ctx *ctx = req->ctx; in io_file_get_fixed()
2004 node = io_rsrc_node_lookup(&ctx->file_table.data, fd); in io_file_get_fixed()
2006 node->refs++; in io_file_get_fixed()
2007 req->file_node = node; in io_file_get_fixed()
2008 req->flags |= io_slot_flags(node); in io_file_get_fixed()
2029 const struct io_cold_def *def = &io_cold_defs[req->opcode]; in io_req_sqe_copy()
2031 if (req->flags & REQ_F_SQE_COPIED) in io_req_sqe_copy()
2033 req->flags |= REQ_F_SQE_COPIED; in io_req_sqe_copy()
2034 if (!def->sqe_copy) in io_req_sqe_copy()
2037 return -EFAULT; in io_req_sqe_copy()
2038 def->sqe_copy(req); in io_req_sqe_copy()
2043 __must_hold(&req->ctx->uring_lock) in io_queue_async()
2045 if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) { in io_queue_async()
2068 __must_hold(&req->ctx->uring_lock) in io_queue_sqe()
2078 * doesn't support non-blocking read/write attempts in io_queue_sqe()
2085 __must_hold(&req->ctx->uring_lock) in io_queue_sqe_fallback()
2087 if (unlikely(req->flags & REQ_F_FAIL)) { in io_queue_sqe_fallback()
2092 req->flags &= ~REQ_F_HARDLINK; in io_queue_sqe_fallback()
2093 req->flags |= REQ_F_LINK; in io_queue_sqe_fallback()
2094 io_req_defer_failed(req, req->cqe.res); in io_queue_sqe_fallback()
2098 if (unlikely(req->ctx->drain_active)) in io_queue_sqe_fallback()
2114 if (!test_bit(req->opcode, ctx->restrictions.sqe_op)) in io_check_restriction()
2117 if ((sqe_flags & ctx->restrictions.sqe_flags_required) != in io_check_restriction()
2118 ctx->restrictions.sqe_flags_required) in io_check_restriction()
2121 if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed | in io_check_restriction()
2122 ctx->restrictions.sqe_flags_required)) in io_check_restriction()
2130 struct io_kiocb *head = ctx->submit_state.link.head; in io_init_drain()
2132 ctx->drain_active = true; in io_init_drain()
2141 head->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC; in io_init_drain()
2142 ctx->drain_next = true; in io_init_drain()
2148 /* ensure per-opcode data is cleared if we fail before prep */ in io_init_fail_req()
2149 memset(&req->cmd.data, 0, sizeof(req->cmd.data)); in io_init_fail_req()
2155 __must_hold(&ctx->uring_lock) in io_init_req()
2162 req->ctx = ctx; in io_init_req()
2163 req->opcode = opcode = READ_ONCE(sqe->opcode); in io_init_req()
2165 sqe_flags = READ_ONCE(sqe->flags); in io_init_req()
2166 req->flags = (__force io_req_flags_t) sqe_flags; in io_init_req()
2167 req->cqe.user_data = READ_ONCE(sqe->user_data); in io_init_req()
2168 req->file = NULL; in io_init_req()
2169 req->tctx = current->io_uring; in io_init_req()
2170 req->cancel_seq_set = false; in io_init_req()
2171 req->async_data = NULL; in io_init_req()
2174 req->opcode = 0; in io_init_req()
2175 return io_init_fail_req(req, -EINVAL); in io_init_req()
2183 return io_init_fail_req(req, -EINVAL); in io_init_req()
2185 if (!def->buffer_select) in io_init_req()
2186 return io_init_fail_req(req, -EOPNOTSUPP); in io_init_req()
2187 req->buf_index = READ_ONCE(sqe->buf_group); in io_init_req()
2190 ctx->drain_disabled = true; in io_init_req()
2192 if (ctx->drain_disabled) in io_init_req()
2193 return io_init_fail_req(req, -EOPNOTSUPP); in io_init_req()
2197 if (unlikely(ctx->restricted || ctx->drain_active || ctx->drain_next)) { in io_init_req()
2198 if (ctx->restricted && !io_check_restriction(ctx, req, sqe_flags)) in io_init_req()
2199 return io_init_fail_req(req, -EACCES); in io_init_req()
2201 if (ctx->drain_active) in io_init_req()
2202 req->flags |= REQ_F_FORCE_ASYNC; in io_init_req()
2204 if (unlikely(ctx->drain_next) && !ctx->submit_state.link.head) { in io_init_req()
2205 ctx->drain_next = false; in io_init_req()
2206 ctx->drain_active = true; in io_init_req()
2207 req->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC; in io_init_req()
2211 if (!def->ioprio && sqe->ioprio) in io_init_req()
2212 return io_init_fail_req(req, -EINVAL); in io_init_req()
2213 if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL)) in io_init_req()
2214 return io_init_fail_req(req, -EINVAL); in io_init_req()
2216 if (def->needs_file) { in io_init_req()
2217 struct io_submit_state *state = &ctx->submit_state; in io_init_req()
2219 req->cqe.fd = READ_ONCE(sqe->fd); in io_init_req()
2225 if (state->need_plug && def->plug) { in io_init_req()
2226 state->plug_started = true; in io_init_req()
2227 state->need_plug = false; in io_init_req()
2228 blk_start_plug_nr_ios(&state->plug, state->submit_nr); in io_init_req()
2232 personality = READ_ONCE(sqe->personality); in io_init_req()
2236 req->creds = xa_load(&ctx->personalities, personality); in io_init_req()
2237 if (!req->creds) in io_init_req()
2238 return io_init_fail_req(req, -EINVAL); in io_init_req()
2239 get_cred(req->creds); in io_init_req()
2240 ret = security_uring_override_creds(req->creds); in io_init_req()
2242 put_cred(req->creds); in io_init_req()
2245 req->flags |= REQ_F_CREDS; in io_init_req()
2248 return def->prep(req, sqe); in io_init_req()
2254 struct io_ring_ctx *ctx = req->ctx; in io_submit_fail_init()
2255 struct io_submit_link *link = &ctx->submit_state.link; in io_submit_fail_init()
2256 struct io_kiocb *head = link->head; in io_submit_fail_init()
2267 if (head && !(head->flags & REQ_F_FAIL)) in io_submit_fail_init()
2268 req_fail_link_node(head, -ECANCELED); in io_submit_fail_init()
2270 if (!(req->flags & IO_REQ_LINK_FLAGS)) { in io_submit_fail_init()
2272 link->last->link = req; in io_submit_fail_init()
2273 link->head = NULL; in io_submit_fail_init()
2281 link->last->link = req; in io_submit_fail_init()
2283 link->head = req; in io_submit_fail_init()
2284 link->last = req; in io_submit_fail_init()
2290 __must_hold(&ctx->uring_lock) in io_submit_sqe()
2292 struct io_submit_link *link = &ctx->submit_state.link; in io_submit_sqe()
2308 if (unlikely(link->head)) { in io_submit_sqe()
2309 trace_io_uring_link(req, link->last); in io_submit_sqe()
2311 link->last->link = req; in io_submit_sqe()
2312 link->last = req; in io_submit_sqe()
2314 if (req->flags & IO_REQ_LINK_FLAGS) in io_submit_sqe()
2317 req = link->head; in io_submit_sqe()
2318 link->head = NULL; in io_submit_sqe()
2319 if (req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)) in io_submit_sqe()
2322 } else if (unlikely(req->flags & (IO_REQ_LINK_FLAGS | in io_submit_sqe()
2324 if (req->flags & IO_REQ_LINK_FLAGS) { in io_submit_sqe()
2325 link->head = req; in io_submit_sqe()
2326 link->last = req; in io_submit_sqe()
2343 struct io_submit_state *state = &ctx->submit_state; in io_submit_state_end()
2345 if (unlikely(state->link.head)) in io_submit_state_end()
2346 io_queue_sqe_fallback(state->link.head); in io_submit_state_end()
2349 if (state->plug_started) in io_submit_state_end()
2350 blk_finish_plug(&state->plug); in io_submit_state_end()
2359 state->plug_started = false; in io_submit_state_start()
2360 state->need_plug = max_ios > 2; in io_submit_state_start()
2361 state->submit_nr = max_ios; in io_submit_state_start()
2363 state->link.head = NULL; in io_submit_state_start()
2368 struct io_rings *rings = ctx->rings; in io_commit_sqring()
2375 smp_store_release(&rings->sq.head, ctx->cached_sq_head); in io_commit_sqring()
2384 * prevent a re-load down the line.
2388 unsigned mask = ctx->sq_entries - 1; in io_get_sqe()
2389 unsigned head = ctx->cached_sq_head++ & mask; in io_get_sqe()
2392 (!(ctx->flags & IORING_SETUP_NO_SQARRAY))) { in io_get_sqe()
2393 head = READ_ONCE(ctx->sq_array[head]); in io_get_sqe()
2394 if (unlikely(head >= ctx->sq_entries)) { in io_get_sqe()
2395 WRITE_ONCE(ctx->rings->sq_dropped, in io_get_sqe()
2396 READ_ONCE(ctx->rings->sq_dropped) + 1); in io_get_sqe()
2399 head = array_index_nospec(head, ctx->sq_entries); in io_get_sqe()
2411 /* double index for 128-byte SQEs, twice as long */ in io_get_sqe()
2412 if (ctx->flags & IORING_SETUP_SQE128) in io_get_sqe()
2414 *sqe = &ctx->sq_sqes[head]; in io_get_sqe()
2419 __must_hold(&ctx->uring_lock) in io_submit_sqes()
2430 io_submit_state_start(&ctx->submit_state, left); in io_submit_sqes()
2448 !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) { in io_submit_sqes()
2449 left--; in io_submit_sqes()
2452 } while (--left); in io_submit_sqes()
2455 ret -= left; in io_submit_sqes()
2458 ret = -EAGAIN; in io_submit_sqes()
2459 current->io_uring->cached_refs += left; in io_submit_sqes()
2477 if (io_should_wake(iowq) || io_has_work(iowq->ctx)) in io_wake_function()
2479 return -1; in io_wake_function()
2492 return -EINTR; in io_run_task_work_sig()
2498 struct io_uring_task *tctx = current->io_uring; in current_pending_io()
2502 return percpu_counter_read_positive(&tctx->inflight); in current_pending_io()
2509 WRITE_ONCE(iowq->hit_timeout, 1); in io_cqring_timer_wakeup()
2510 iowq->min_timeout = 0; in io_cqring_timer_wakeup()
2511 wake_up_process(iowq->wq.private); in io_cqring_timer_wakeup()
2523 struct io_ring_ctx *ctx = iowq->ctx; in io_cqring_min_timer_wakeup()
2526 if (iowq->timeout == KTIME_MAX || in io_cqring_min_timer_wakeup()
2527 ktime_compare(iowq->min_timeout, iowq->timeout) >= 0) in io_cqring_min_timer_wakeup()
2533 if (iowq->cq_min_tail != READ_ONCE(ctx->rings->cq.tail)) in io_cqring_min_timer_wakeup()
2545 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { in io_cqring_min_timer_wakeup()
2546 atomic_set(&ctx->cq_wait_nr, 1); in io_cqring_min_timer_wakeup()
2548 if (!llist_empty(&ctx->work_llist)) in io_cqring_min_timer_wakeup()
2552 hrtimer_update_function(&iowq->t, io_cqring_timer_wakeup); in io_cqring_min_timer_wakeup()
2553 hrtimer_set_expires(timer, iowq->timeout); in io_cqring_min_timer_wakeup()
2564 if (iowq->min_timeout) { in io_cqring_schedule_timeout()
2565 timeout = ktime_add_ns(iowq->min_timeout, start_time); in io_cqring_schedule_timeout()
2566 hrtimer_setup_on_stack(&iowq->t, io_cqring_min_timer_wakeup, clock_id, in io_cqring_schedule_timeout()
2569 timeout = iowq->timeout; in io_cqring_schedule_timeout()
2570 hrtimer_setup_on_stack(&iowq->t, io_cqring_timer_wakeup, clock_id, in io_cqring_schedule_timeout()
2574 hrtimer_set_expires_range_ns(&iowq->t, timeout, 0); in io_cqring_schedule_timeout()
2575 hrtimer_start_expires(&iowq->t, HRTIMER_MODE_ABS); in io_cqring_schedule_timeout()
2577 if (!READ_ONCE(iowq->hit_timeout)) in io_cqring_schedule_timeout()
2580 hrtimer_cancel(&iowq->t); in io_cqring_schedule_timeout()
2581 destroy_hrtimer_on_stack(&iowq->t); in io_cqring_schedule_timeout()
2584 return READ_ONCE(iowq->hit_timeout) ? -ETIME : 0; in io_cqring_schedule_timeout()
2605 * can take into account that the task is waiting for IO - turns out in __io_cqring_wait_schedule()
2608 if (ext_arg->iowait && current_pending_io()) in __io_cqring_wait_schedule()
2609 current->in_iowait = 1; in __io_cqring_wait_schedule()
2610 if (iowq->timeout != KTIME_MAX || iowq->min_timeout) in __io_cqring_wait_schedule()
2611 ret = io_cqring_schedule_timeout(iowq, ctx->clockid, start_time); in __io_cqring_wait_schedule()
2614 current->in_iowait = 0; in __io_cqring_wait_schedule()
2624 if (unlikely(READ_ONCE(ctx->check_cq))) in io_cqring_wait_schedule()
2631 return -EINTR; in io_cqring_wait_schedule()
2646 struct io_rings *rings = ctx->rings; in io_cqring_wait()
2650 min_events = min_t(int, min_events, ctx->cq_entries); in io_cqring_wait()
2653 return -EEXIST; in io_cqring_wait()
2659 if (unlikely(test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))) in io_cqring_wait()
2668 iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; in io_cqring_wait()
2669 iowq.cq_min_tail = READ_ONCE(ctx->rings->cq.tail); in io_cqring_wait()
2670 iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); in io_cqring_wait()
2672 iowq.min_timeout = ext_arg->min_time; in io_cqring_wait()
2676 if (ext_arg->ts_set) { in io_cqring_wait()
2677 iowq.timeout = timespec64_to_ktime(ext_arg->ts); in io_cqring_wait()
2682 if (ext_arg->sig) { in io_cqring_wait()
2685 ret = set_compat_user_sigmask((const compat_sigset_t __user *)ext_arg->sig, in io_cqring_wait()
2686 ext_arg->argsz); in io_cqring_wait()
2689 ret = set_user_sigmask(ext_arg->sig, ext_arg->argsz); in io_cqring_wait()
2704 nr_wait = (int) iowq.cq_tail - in io_cqring_wait()
2705 READ_ONCE(ctx->rings->cq.tail); in io_cqring_wait()
2709 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { in io_cqring_wait()
2710 atomic_set(&ctx->cq_wait_nr, nr_wait); in io_cqring_wait()
2713 prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, in io_cqring_wait()
2719 atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT); in io_cqring_wait()
2731 * Non-local task_work will be run on exit to userspace, but in io_cqring_wait()
2742 check_cq = READ_ONCE(ctx->check_cq); in io_cqring_wait()
2748 ret = -EBADR; in io_cqring_wait()
2760 if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) in io_cqring_wait()
2761 finish_wait(&ctx->cq_wait, &iowq.wq); in io_cqring_wait()
2762 restore_saved_sigmask_unless(ret == -EINTR); in io_cqring_wait()
2764 return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; in io_cqring_wait()
2769 io_free_region(ctx, &ctx->sq_region); in io_rings_free()
2770 io_free_region(ctx, &ctx->ring_region); in io_rings_free()
2771 ctx->rings = NULL; in io_rings_free()
2772 ctx->sq_sqes = NULL; in io_rings_free()
2828 ctx->nr_req_allocated -= nr; in __io_req_caches_free()
2829 percpu_ref_put_many(&ctx->refs, nr); in __io_req_caches_free()
2835 guard(mutex)(&ctx->uring_lock); in io_req_caches_free()
2843 mutex_lock(&ctx->uring_lock); in io_ring_ctx_free()
2851 io_free_region(ctx, &ctx->param_region); in io_ring_ctx_free()
2852 mutex_unlock(&ctx->uring_lock); in io_ring_ctx_free()
2853 if (ctx->sq_creds) in io_ring_ctx_free()
2854 put_cred(ctx->sq_creds); in io_ring_ctx_free()
2855 if (ctx->submitter_task) in io_ring_ctx_free()
2856 put_task_struct(ctx->submitter_task); in io_ring_ctx_free()
2858 WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); in io_ring_ctx_free()
2860 if (ctx->mm_account) { in io_ring_ctx_free()
2861 mmdrop(ctx->mm_account); in io_ring_ctx_free()
2862 ctx->mm_account = NULL; in io_ring_ctx_free()
2866 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) in io_ring_ctx_free()
2869 percpu_ref_exit(&ctx->refs); in io_ring_ctx_free()
2870 free_uid(ctx->user); in io_ring_ctx_free()
2873 WARN_ON_ONCE(ctx->nr_req_allocated); in io_ring_ctx_free()
2875 if (ctx->hash_map) in io_ring_ctx_free()
2876 io_wq_put_hash(ctx->hash_map); in io_ring_ctx_free()
2878 kvfree(ctx->cancel_table.hbs); in io_ring_ctx_free()
2879 xa_destroy(&ctx->io_bl_xa); in io_ring_ctx_free()
2888 mutex_lock(&ctx->uring_lock); in io_activate_pollwq_cb()
2889 ctx->poll_activated = true; in io_activate_pollwq_cb()
2890 mutex_unlock(&ctx->uring_lock); in io_activate_pollwq_cb()
2896 wake_up_all(&ctx->poll_wq); in io_activate_pollwq_cb()
2897 percpu_ref_put(&ctx->refs); in io_activate_pollwq_cb()
2902 spin_lock(&ctx->completion_lock); in io_activate_pollwq()
2904 if (ctx->poll_activated || ctx->poll_wq_task_work.func) in io_activate_pollwq()
2906 if (WARN_ON_ONCE(!ctx->task_complete)) in io_activate_pollwq()
2908 if (!ctx->submitter_task) in io_activate_pollwq()
2911 * with ->submitter_task only the submitter task completes requests, we in io_activate_pollwq()
2914 init_task_work(&ctx->poll_wq_task_work, io_activate_pollwq_cb); in io_activate_pollwq()
2915 percpu_ref_get(&ctx->refs); in io_activate_pollwq()
2916 if (task_work_add(ctx->submitter_task, &ctx->poll_wq_task_work, TWA_SIGNAL)) in io_activate_pollwq()
2917 percpu_ref_put(&ctx->refs); in io_activate_pollwq()
2919 spin_unlock(&ctx->completion_lock); in io_activate_pollwq()
2924 struct io_ring_ctx *ctx = file->private_data; in io_uring_poll()
2927 if (unlikely(!ctx->poll_activated)) in io_uring_poll()
2933 poll_wait(file, &ctx->poll_wq, wait); in io_uring_poll()
2942 * ---- ---- in io_uring_poll()
2943 * lock(&ctx->uring_lock); in io_uring_poll()
2944 * lock(&ep->mtx); in io_uring_poll()
2945 * lock(&ctx->uring_lock); in io_uring_poll()
2946 * lock(&ep->mtx); in io_uring_poll()
2966 struct io_uring_task *tctx = current->io_uring; in io_tctx_exit_cb()
2976 if (tctx && !atomic_read(&tctx->in_cancel)) in io_tctx_exit_cb()
2977 io_uring_del_tctx_node((unsigned long)work->ctx); in io_tctx_exit_cb()
2978 complete(&work->completion); in io_tctx_exit_cb()
2985 return req->ctx == data; in io_cancel_ctx_cb()
2999 * submitted async (out-of-line), then completions can come in while in io_ring_exit_work()
3004 if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { in io_ring_exit_work()
3005 mutex_lock(&ctx->uring_lock); in io_ring_exit_work()
3007 mutex_unlock(&ctx->uring_lock); in io_ring_exit_work()
3009 if (!xa_empty(&ctx->zcrx_ctxs)) { in io_ring_exit_work()
3010 mutex_lock(&ctx->uring_lock); in io_ring_exit_work()
3012 mutex_unlock(&ctx->uring_lock); in io_ring_exit_work()
3015 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) in io_ring_exit_work()
3022 if (ctx->sq_data) { in io_ring_exit_work()
3023 struct io_sq_data *sqd = ctx->sq_data; in io_ring_exit_work()
3028 if (tsk && tsk->io_uring && tsk->io_uring->io_wq) in io_ring_exit_work()
3029 io_wq_cancel_cb(tsk->io_uring->io_wq, in io_ring_exit_work()
3051 } while (!wait_for_completion_interruptible_timeout(&ctx->ref_comp, interval)); in io_ring_exit_work()
3057 mutex_lock(&ctx->uring_lock); in io_ring_exit_work()
3058 while (!list_empty(&ctx->tctx_list)) { in io_ring_exit_work()
3061 node = list_first_entry(&ctx->tctx_list, struct io_tctx_node, in io_ring_exit_work()
3064 list_rotate_left(&ctx->tctx_list); in io_ring_exit_work()
3065 ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL); in io_ring_exit_work()
3069 mutex_unlock(&ctx->uring_lock); in io_ring_exit_work()
3076 mutex_lock(&ctx->uring_lock); in io_ring_exit_work()
3078 mutex_unlock(&ctx->uring_lock); in io_ring_exit_work()
3079 spin_lock(&ctx->completion_lock); in io_ring_exit_work()
3080 spin_unlock(&ctx->completion_lock); in io_ring_exit_work()
3083 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) in io_ring_exit_work()
3094 mutex_lock(&ctx->uring_lock); in io_ring_ctx_wait_and_kill()
3095 percpu_ref_kill(&ctx->refs); in io_ring_ctx_wait_and_kill()
3096 xa_for_each(&ctx->personalities, index, creds) in io_ring_ctx_wait_and_kill()
3098 mutex_unlock(&ctx->uring_lock); in io_ring_ctx_wait_and_kill()
3100 flush_delayed_work(&ctx->fallback_work); in io_ring_ctx_wait_and_kill()
3102 INIT_WORK(&ctx->exit_work, io_ring_exit_work); in io_ring_ctx_wait_and_kill()
3109 queue_work(iou_wq, &ctx->exit_work); in io_ring_ctx_wait_and_kill()
3114 struct io_ring_ctx *ctx = file->private_data; in io_uring_release()
3116 file->private_data = NULL; in io_uring_release()
3131 return io_match_task_safe(req, cancel->tctx, cancel->all); in io_cancel_task_cb()
3138 struct io_defer_entry *de; in io_cancel_defer_files() local
3141 list_for_each_entry_reverse(de, &ctx->defer_list, list) { in io_cancel_defer_files()
3142 if (io_match_task_safe(de->req, tctx, cancel_all)) { in io_cancel_defer_files()
3143 list_cut_position(&list, &ctx->defer_list, &de->list); in io_cancel_defer_files()
3151 de = list_first_entry(&list, struct io_defer_entry, list); in io_cancel_defer_files()
3152 list_del_init(&de->list); in io_cancel_defer_files()
3153 ctx->nr_drained -= io_linked_nr(de->req); in io_cancel_defer_files()
3154 io_req_task_queue_fail(de->req, -ECANCELED); in io_cancel_defer_files()
3155 kfree(de); in io_cancel_defer_files()
3166 mutex_lock(&ctx->uring_lock); in io_uring_try_cancel_iowq()
3167 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { in io_uring_try_cancel_iowq()
3168 struct io_uring_task *tctx = node->task->io_uring; in io_uring_try_cancel_iowq()
3174 if (!tctx || !tctx->io_wq) in io_uring_try_cancel_iowq()
3176 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); in io_uring_try_cancel_iowq()
3179 mutex_unlock(&ctx->uring_lock); in io_uring_try_cancel_iowq()
3194 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { in io_uring_try_cancel_requests()
3195 atomic_set(&ctx->cq_wait_nr, 1); in io_uring_try_cancel_requests()
3200 if (!ctx->rings) in io_uring_try_cancel_requests()
3205 } else if (tctx->io_wq) { in io_uring_try_cancel_requests()
3210 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, in io_uring_try_cancel_requests()
3216 if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || in io_uring_try_cancel_requests()
3218 while (!wq_list_empty(&ctx->iopoll_list)) { in io_uring_try_cancel_requests()
3225 if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && in io_uring_try_cancel_requests()
3228 mutex_lock(&ctx->uring_lock); in io_uring_try_cancel_requests()
3234 mutex_unlock(&ctx->uring_lock); in io_uring_try_cancel_requests()
3239 ret |= flush_delayed_work(&ctx->fallback_work); in io_uring_try_cancel_requests()
3246 return atomic_read(&tctx->inflight_tracked); in tctx_inflight()
3247 return percpu_counter_sum(&tctx->inflight); in tctx_inflight()
3252 * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
3256 struct io_uring_task *tctx = current->io_uring; in io_uring_cancel_generic()
3265 if (!current->io_uring) in io_uring_cancel_generic()
3267 if (tctx->io_wq) in io_uring_cancel_generic()
3268 io_wq_exit_start(tctx->io_wq); in io_uring_cancel_generic()
3270 atomic_inc(&tctx->in_cancel); in io_uring_cancel_generic()
3284 xa_for_each(&tctx->xa, index, node) { in io_uring_cancel_generic()
3286 if (node->ctx->sq_data) in io_uring_cancel_generic()
3288 loop |= io_uring_try_cancel_requests(node->ctx, in io_uring_cancel_generic()
3289 current->io_uring, in io_uring_cancel_generic()
3294 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) in io_uring_cancel_generic()
3296 current->io_uring, in io_uring_cancel_generic()
3306 prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); in io_uring_cancel_generic()
3309 xa_for_each(&tctx->xa, index, node) { in io_uring_cancel_generic()
3310 if (io_local_work_pending(node->ctx)) { in io_uring_cancel_generic()
3311 WARN_ON_ONCE(node->ctx->submitter_task && in io_uring_cancel_generic()
3312 node->ctx->submitter_task != current); in io_uring_cancel_generic()
3324 finish_wait(&tctx->wait, &wait); in io_uring_cancel_generic()
3331 * ->in_cancel set for normal exit. in io_uring_cancel_generic()
3333 atomic_dec(&tctx->in_cancel); in io_uring_cancel_generic()
3353 return ERR_PTR(-EFAULT); in io_get_ext_arg_reg()
3355 /* also protects from NULL ->cq_wait_arg as the size would be 0 */ in io_get_ext_arg_reg()
3357 end > ctx->cq_wait_size)) in io_get_ext_arg_reg()
3358 return ERR_PTR(-EFAULT); in io_get_ext_arg_reg()
3360 offset = array_index_nospec(offset, ctx->cq_wait_size - size); in io_get_ext_arg_reg()
3361 return ctx->cq_wait_arg + offset; in io_get_ext_arg_reg()
3372 return -EINVAL; in io_validate_ext_arg()
3374 return -EINVAL; in io_validate_ext_arg()
3376 return -EFAULT; in io_validate_ext_arg()
3386 ext_arg->iowait = !(flags & IORING_ENTER_NO_IOWAIT); in io_get_ext_arg()
3393 ext_arg->sig = (const sigset_t __user *) argp; in io_get_ext_arg()
3400 if (ext_arg->argsz != sizeof(struct io_uring_reg_wait)) in io_get_ext_arg()
3401 return -EINVAL; in io_get_ext_arg()
3406 if (w->flags & ~IORING_REG_WAIT_TS) in io_get_ext_arg()
3407 return -EINVAL; in io_get_ext_arg()
3408 ext_arg->min_time = READ_ONCE(w->min_wait_usec) * NSEC_PER_USEC; in io_get_ext_arg()
3409 ext_arg->sig = u64_to_user_ptr(READ_ONCE(w->sigmask)); in io_get_ext_arg()
3410 ext_arg->argsz = READ_ONCE(w->sigmask_sz); in io_get_ext_arg()
3411 if (w->flags & IORING_REG_WAIT_TS) { in io_get_ext_arg()
3412 ext_arg->ts.tv_sec = READ_ONCE(w->ts.tv_sec); in io_get_ext_arg()
3413 ext_arg->ts.tv_nsec = READ_ONCE(w->ts.tv_nsec); in io_get_ext_arg()
3414 ext_arg->ts_set = true; in io_get_ext_arg()
3420 * EXT_ARG is set - ensure we agree on the size of it and copy in our in io_get_ext_arg()
3423 if (ext_arg->argsz != sizeof(arg)) in io_get_ext_arg()
3424 return -EINVAL; in io_get_ext_arg()
3427 return -EFAULT; in io_get_ext_arg()
3428 unsafe_get_user(arg.sigmask, &uarg->sigmask, uaccess_end); in io_get_ext_arg()
3429 unsafe_get_user(arg.sigmask_sz, &uarg->sigmask_sz, uaccess_end); in io_get_ext_arg()
3430 unsafe_get_user(arg.min_wait_usec, &uarg->min_wait_usec, uaccess_end); in io_get_ext_arg()
3431 unsafe_get_user(arg.ts, &uarg->ts, uaccess_end); in io_get_ext_arg()
3435 return -EFAULT; in io_get_ext_arg()
3437 ext_arg->min_time = arg.min_wait_usec * NSEC_PER_USEC; in io_get_ext_arg()
3438 ext_arg->sig = u64_to_user_ptr(arg.sigmask); in io_get_ext_arg()
3439 ext_arg->argsz = arg.sigmask_sz; in io_get_ext_arg()
3441 if (get_timespec64(&ext_arg->ts, u64_to_user_ptr(arg.ts))) in io_get_ext_arg()
3442 return -EFAULT; in io_get_ext_arg()
3443 ext_arg->ts_set = true; in io_get_ext_arg()
3449 return -EFAULT; in io_get_ext_arg()
3462 return -EINVAL; in SYSCALL_DEFINE6()
3469 struct io_uring_task *tctx = current->io_uring; in SYSCALL_DEFINE6()
3472 return -EINVAL; in SYSCALL_DEFINE6()
3474 file = tctx->registered_rings[fd]; in SYSCALL_DEFINE6()
3476 return -EBADF; in SYSCALL_DEFINE6()
3480 return -EBADF; in SYSCALL_DEFINE6()
3481 ret = -EOPNOTSUPP; in SYSCALL_DEFINE6()
3486 ctx = file->private_data; in SYSCALL_DEFINE6()
3487 ret = -EBADFD; in SYSCALL_DEFINE6()
3488 if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED)) in SYSCALL_DEFINE6()
3497 if (ctx->flags & IORING_SETUP_SQPOLL) { in SYSCALL_DEFINE6()
3498 if (unlikely(ctx->sq_data->thread == NULL)) { in SYSCALL_DEFINE6()
3499 ret = -EOWNERDEAD; in SYSCALL_DEFINE6()
3503 wake_up(&ctx->sq_data->wait); in SYSCALL_DEFINE6()
3513 mutex_lock(&ctx->uring_lock); in SYSCALL_DEFINE6()
3516 mutex_unlock(&ctx->uring_lock); in SYSCALL_DEFINE6()
3520 if (ctx->syscall_iopoll) in SYSCALL_DEFINE6()
3526 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) in SYSCALL_DEFINE6()
3529 mutex_unlock(&ctx->uring_lock); in SYSCALL_DEFINE6()
3535 if (ctx->syscall_iopoll) { in SYSCALL_DEFINE6()
3542 mutex_lock(&ctx->uring_lock); in SYSCALL_DEFINE6()
3547 mutex_unlock(&ctx->uring_lock); in SYSCALL_DEFINE6()
3565 if (unlikely(ret2 == -EBADR)) in SYSCALL_DEFINE6()
3567 &ctx->check_cq); in SYSCALL_DEFINE6()
3591 return file->f_op == &io_uring_fops; in io_is_uring_fops()
3603 ctx->sq_entries = p->sq_entries; in io_allocate_scq_urings()
3604 ctx->cq_entries = p->cq_entries; in io_allocate_scq_urings()
3606 size = rings_size(ctx->flags, p->sq_entries, p->cq_entries, in io_allocate_scq_urings()
3609 return -EOVERFLOW; in io_allocate_scq_urings()
3613 if (ctx->flags & IORING_SETUP_NO_MMAP) { in io_allocate_scq_urings()
3614 rd.user_addr = p->cq_off.user_addr; in io_allocate_scq_urings()
3617 ret = io_create_region(ctx, &ctx->ring_region, &rd, IORING_OFF_CQ_RING); in io_allocate_scq_urings()
3620 ctx->rings = rings = io_region_get_ptr(&ctx->ring_region); in io_allocate_scq_urings()
3622 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) in io_allocate_scq_urings()
3623 ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); in io_allocate_scq_urings()
3624 rings->sq_ring_mask = p->sq_entries - 1; in io_allocate_scq_urings()
3625 rings->cq_ring_mask = p->cq_entries - 1; in io_allocate_scq_urings()
3626 rings->sq_ring_entries = p->sq_entries; in io_allocate_scq_urings()
3627 rings->cq_ring_entries = p->cq_entries; in io_allocate_scq_urings()
3629 if (p->flags & IORING_SETUP_SQE128) in io_allocate_scq_urings()
3630 size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries); in io_allocate_scq_urings()
3632 size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); in io_allocate_scq_urings()
3635 return -EOVERFLOW; in io_allocate_scq_urings()
3640 if (ctx->flags & IORING_SETUP_NO_MMAP) { in io_allocate_scq_urings()
3641 rd.user_addr = p->sq_off.user_addr; in io_allocate_scq_urings()
3644 ret = io_create_region(ctx, &ctx->sq_region, &rd, IORING_OFF_SQES); in io_allocate_scq_urings()
3649 ctx->sq_sqes = io_region_get_ptr(&ctx->sq_region); in io_allocate_scq_urings()
3678 unsigned flags = p->flags; in io_uring_sanitise_params()
3683 return -EINVAL; in io_uring_sanitise_params()
3690 return -EINVAL; in io_uring_sanitise_params()
3696 return -EINVAL; in io_uring_sanitise_params()
3701 return -EINVAL; in io_uring_sanitise_params()
3709 return -EINVAL; in io_uring_sanitise_params()
3717 return -EINVAL; in io_uring_sanitise_params()
3725 return -EINVAL; in io_uring_fill_params()
3727 if (!(p->flags & IORING_SETUP_CLAMP)) in io_uring_fill_params()
3728 return -EINVAL; in io_uring_fill_params()
3740 p->sq_entries = roundup_pow_of_two(entries); in io_uring_fill_params()
3741 if (p->flags & IORING_SETUP_CQSIZE) { in io_uring_fill_params()
3744 * to a power-of-two, if it isn't already. We do NOT impose in io_uring_fill_params()
3747 if (!p->cq_entries) in io_uring_fill_params()
3748 return -EINVAL; in io_uring_fill_params()
3749 if (p->cq_entries > IORING_MAX_CQ_ENTRIES) { in io_uring_fill_params()
3750 if (!(p->flags & IORING_SETUP_CLAMP)) in io_uring_fill_params()
3751 return -EINVAL; in io_uring_fill_params()
3752 p->cq_entries = IORING_MAX_CQ_ENTRIES; in io_uring_fill_params()
3754 p->cq_entries = roundup_pow_of_two(p->cq_entries); in io_uring_fill_params()
3755 if (p->cq_entries < p->sq_entries) in io_uring_fill_params()
3756 return -EINVAL; in io_uring_fill_params()
3758 p->cq_entries = 2 * p->sq_entries; in io_uring_fill_params()
3761 p->sq_off.head = offsetof(struct io_rings, sq.head); in io_uring_fill_params()
3762 p->sq_off.tail = offsetof(struct io_rings, sq.tail); in io_uring_fill_params()
3763 p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask); in io_uring_fill_params()
3764 p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries); in io_uring_fill_params()
3765 p->sq_off.flags = offsetof(struct io_rings, sq_flags); in io_uring_fill_params()
3766 p->sq_off.dropped = offsetof(struct io_rings, sq_dropped); in io_uring_fill_params()
3767 p->sq_off.resv1 = 0; in io_uring_fill_params()
3768 if (!(p->flags & IORING_SETUP_NO_MMAP)) in io_uring_fill_params()
3769 p->sq_off.user_addr = 0; in io_uring_fill_params()
3771 p->cq_off.head = offsetof(struct io_rings, cq.head); in io_uring_fill_params()
3772 p->cq_off.tail = offsetof(struct io_rings, cq.tail); in io_uring_fill_params()
3773 p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask); in io_uring_fill_params()
3774 p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries); in io_uring_fill_params()
3775 p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); in io_uring_fill_params()
3776 p->cq_off.cqes = offsetof(struct io_rings, cqes); in io_uring_fill_params()
3777 p->cq_off.flags = offsetof(struct io_rings, cq_flags); in io_uring_fill_params()
3778 p->cq_off.resv1 = 0; in io_uring_fill_params()
3779 if (!(p->flags & IORING_SETUP_NO_MMAP)) in io_uring_fill_params()
3780 p->cq_off.user_addr = 0; in io_uring_fill_params()
3803 return -ENOMEM; in io_uring_create()
3805 ctx->clockid = CLOCK_MONOTONIC; in io_uring_create()
3806 ctx->clock_offset = 0; in io_uring_create()
3808 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) in io_uring_create()
3811 if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && in io_uring_create()
3812 !(ctx->flags & IORING_SETUP_IOPOLL) && in io_uring_create()
3813 !(ctx->flags & IORING_SETUP_SQPOLL)) in io_uring_create()
3814 ctx->task_complete = true; in io_uring_create()
3816 if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL)) in io_uring_create()
3817 ctx->lockless_cq = true; in io_uring_create()
3820 * lazy poll_wq activation relies on ->task_complete for synchronisation in io_uring_create()
3823 if (!ctx->task_complete) in io_uring_create()
3824 ctx->poll_activated = true; in io_uring_create()
3832 if (ctx->flags & IORING_SETUP_IOPOLL && in io_uring_create()
3833 !(ctx->flags & IORING_SETUP_SQPOLL)) in io_uring_create()
3834 ctx->syscall_iopoll = 1; in io_uring_create()
3836 ctx->compat = in_compat_syscall(); in io_uring_create()
3838 ctx->user = get_uid(current_user()); in io_uring_create()
3844 if (ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_COOP_TASKRUN)) in io_uring_create()
3845 ctx->notify_method = TWA_SIGNAL_NO_IPI; in io_uring_create()
3847 ctx->notify_method = TWA_SIGNAL; in io_uring_create()
3855 mmgrab(current->mm); in io_uring_create()
3856 ctx->mm_account = current->mm; in io_uring_create()
3862 if (!(p->flags & IORING_SETUP_NO_SQARRAY)) in io_uring_create()
3863 p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; in io_uring_create()
3869 p->features = IORING_FEAT_FLAGS; in io_uring_create()
3872 ret = -EFAULT; in io_uring_create()
3876 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER in io_uring_create()
3877 && !(ctx->flags & IORING_SETUP_R_DISABLED)) { in io_uring_create()
3882 ctx->submitter_task = get_task_struct(current); in io_uring_create()
3894 tctx = current->io_uring; in io_uring_create()
3900 if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY) in io_uring_create()
3907 trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); in io_uring_create()
3928 return -EFAULT; in io_uring_setup()
3931 return -EINVAL; in io_uring_setup()
3935 return -EINVAL; in io_uring_setup()
3945 return -EPERM; in io_uring_allowed()
3952 return -EPERM; in io_uring_allowed()
3955 return -EPERM; in io_uring_allowed()
4044 /* ->buf_index is u16 */ in io_uring_init()
4063 /* imu->dir is u8 */ in io_uring_init()
4067 * Allow user copy in the per-command field, which starts after the in io_uring_init()