124fe962cSBernd Schubert // SPDX-License-Identifier: GPL-2.0 224fe962cSBernd Schubert /* 324fe962cSBernd Schubert * FUSE: Filesystem in Userspace 424fe962cSBernd Schubert * Copyright (c) 2023-2024 DataDirect Networks. 524fe962cSBernd Schubert */ 624fe962cSBernd Schubert 724fe962cSBernd Schubert #include "fuse_i.h" 824fe962cSBernd Schubert #include "dev_uring_i.h" 924fe962cSBernd Schubert #include "fuse_dev_i.h" 1024fe962cSBernd Schubert 1124fe962cSBernd Schubert #include <linux/fs.h> 1224fe962cSBernd Schubert #include <linux/io_uring/cmd.h> 1324fe962cSBernd Schubert 1424fe962cSBernd Schubert static bool __read_mostly enable_uring; 1524fe962cSBernd Schubert module_param(enable_uring, bool, 0644); 1624fe962cSBernd Schubert MODULE_PARM_DESC(enable_uring, 1724fe962cSBernd Schubert "Enable userspace communication through io-uring"); 1824fe962cSBernd Schubert 1924fe962cSBernd Schubert #define FUSE_URING_IOV_SEGS 2 /* header and payload */ 2024fe962cSBernd Schubert 2124fe962cSBernd Schubert 2224fe962cSBernd Schubert bool fuse_uring_enabled(void) 2324fe962cSBernd Schubert { 2424fe962cSBernd Schubert return enable_uring; 2524fe962cSBernd Schubert } 2624fe962cSBernd Schubert 27c2c9af9aSBernd Schubert struct fuse_uring_pdu { 28c2c9af9aSBernd Schubert struct fuse_ring_ent *ent; 29c2c9af9aSBernd Schubert }; 30c2c9af9aSBernd Schubert 31c2c9af9aSBernd Schubert static const struct fuse_iqueue_ops fuse_io_uring_ops; 32c2c9af9aSBernd Schubert 33c2c9af9aSBernd Schubert static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd, 34c2c9af9aSBernd Schubert struct fuse_ring_ent *ring_ent) 35c2c9af9aSBernd Schubert { 36c2c9af9aSBernd Schubert struct fuse_uring_pdu *pdu = 37c2c9af9aSBernd Schubert io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu); 38c2c9af9aSBernd Schubert 39c2c9af9aSBernd Schubert pdu->ent = ring_ent; 40c2c9af9aSBernd Schubert } 41c2c9af9aSBernd Schubert 42c2c9af9aSBernd Schubert static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd) 43c2c9af9aSBernd Schubert { 44c2c9af9aSBernd Schubert struct fuse_uring_pdu *pdu = 45c2c9af9aSBernd Schubert io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu); 46c2c9af9aSBernd Schubert 47c2c9af9aSBernd Schubert return pdu->ent; 48c2c9af9aSBernd Schubert } 49c2c9af9aSBernd Schubert 50857b0263SBernd Schubert static void fuse_uring_flush_bg(struct fuse_ring_queue *queue) 51857b0263SBernd Schubert { 52857b0263SBernd Schubert struct fuse_ring *ring = queue->ring; 53857b0263SBernd Schubert struct fuse_conn *fc = ring->fc; 54857b0263SBernd Schubert 55857b0263SBernd Schubert lockdep_assert_held(&queue->lock); 56857b0263SBernd Schubert lockdep_assert_held(&fc->bg_lock); 57857b0263SBernd Schubert 58857b0263SBernd Schubert /* 59857b0263SBernd Schubert * Allow one bg request per queue, ignoring global fc limits. 60857b0263SBernd Schubert * This prevents a single queue from consuming all resources and 61857b0263SBernd Schubert * eliminates the need for remote queue wake-ups when global 62857b0263SBernd Schubert * limits are met but this queue has no more waiting requests. 63857b0263SBernd Schubert */ 64857b0263SBernd Schubert while ((fc->active_background < fc->max_background || 65857b0263SBernd Schubert !queue->active_background) && 66857b0263SBernd Schubert (!list_empty(&queue->fuse_req_bg_queue))) { 67857b0263SBernd Schubert struct fuse_req *req; 68857b0263SBernd Schubert 69857b0263SBernd Schubert req = list_first_entry(&queue->fuse_req_bg_queue, 70857b0263SBernd Schubert struct fuse_req, list); 71857b0263SBernd Schubert fc->active_background++; 72857b0263SBernd Schubert queue->active_background++; 73857b0263SBernd Schubert 74857b0263SBernd Schubert list_move_tail(&req->list, &queue->fuse_req_queue); 75857b0263SBernd Schubert } 76857b0263SBernd Schubert } 77857b0263SBernd Schubert 78c090c8abSBernd Schubert static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req, 79c090c8abSBernd Schubert int error) 80c090c8abSBernd Schubert { 81857b0263SBernd Schubert struct fuse_ring_queue *queue = ent->queue; 82857b0263SBernd Schubert struct fuse_ring *ring = queue->ring; 83857b0263SBernd Schubert struct fuse_conn *fc = ring->fc; 84857b0263SBernd Schubert 85857b0263SBernd Schubert lockdep_assert_not_held(&queue->lock); 86857b0263SBernd Schubert spin_lock(&queue->lock); 87c090c8abSBernd Schubert ent->fuse_req = NULL; 88857b0263SBernd Schubert if (test_bit(FR_BACKGROUND, &req->flags)) { 89857b0263SBernd Schubert queue->active_background--; 90857b0263SBernd Schubert spin_lock(&fc->bg_lock); 91857b0263SBernd Schubert fuse_uring_flush_bg(queue); 92857b0263SBernd Schubert spin_unlock(&fc->bg_lock); 93857b0263SBernd Schubert } 94857b0263SBernd Schubert 95857b0263SBernd Schubert spin_unlock(&queue->lock); 96857b0263SBernd Schubert 97c090c8abSBernd Schubert if (error) 98c090c8abSBernd Schubert req->out.h.error = error; 99c090c8abSBernd Schubert 100c090c8abSBernd Schubert clear_bit(FR_SENT, &req->flags); 101c090c8abSBernd Schubert fuse_request_end(req); 102c090c8abSBernd Schubert } 103c090c8abSBernd Schubert 1044a9bfb9bSBernd Schubert /* Abort all list queued request on the given ring queue */ 1054a9bfb9bSBernd Schubert static void fuse_uring_abort_end_queue_requests(struct fuse_ring_queue *queue) 1064a9bfb9bSBernd Schubert { 1074a9bfb9bSBernd Schubert struct fuse_req *req; 1084a9bfb9bSBernd Schubert LIST_HEAD(req_list); 1094a9bfb9bSBernd Schubert 1104a9bfb9bSBernd Schubert spin_lock(&queue->lock); 1114a9bfb9bSBernd Schubert list_for_each_entry(req, &queue->fuse_req_queue, list) 1124a9bfb9bSBernd Schubert clear_bit(FR_PENDING, &req->flags); 1134a9bfb9bSBernd Schubert list_splice_init(&queue->fuse_req_queue, &req_list); 1144a9bfb9bSBernd Schubert spin_unlock(&queue->lock); 1154a9bfb9bSBernd Schubert 1164a9bfb9bSBernd Schubert /* must not hold queue lock to avoid order issues with fi->lock */ 1174a9bfb9bSBernd Schubert fuse_dev_end_requests(&req_list); 1184a9bfb9bSBernd Schubert } 1194a9bfb9bSBernd Schubert 1204a9bfb9bSBernd Schubert void fuse_uring_abort_end_requests(struct fuse_ring *ring) 1214a9bfb9bSBernd Schubert { 1224a9bfb9bSBernd Schubert int qid; 1234a9bfb9bSBernd Schubert struct fuse_ring_queue *queue; 124857b0263SBernd Schubert struct fuse_conn *fc = ring->fc; 1254a9bfb9bSBernd Schubert 1264a9bfb9bSBernd Schubert for (qid = 0; qid < ring->nr_queues; qid++) { 1274a9bfb9bSBernd Schubert queue = READ_ONCE(ring->queues[qid]); 1284a9bfb9bSBernd Schubert if (!queue) 1294a9bfb9bSBernd Schubert continue; 1304a9bfb9bSBernd Schubert 1314a9bfb9bSBernd Schubert queue->stopped = true; 132857b0263SBernd Schubert 133857b0263SBernd Schubert WARN_ON_ONCE(ring->fc->max_background != UINT_MAX); 134857b0263SBernd Schubert spin_lock(&queue->lock); 135857b0263SBernd Schubert spin_lock(&fc->bg_lock); 136857b0263SBernd Schubert fuse_uring_flush_bg(queue); 137857b0263SBernd Schubert spin_unlock(&fc->bg_lock); 138857b0263SBernd Schubert spin_unlock(&queue->lock); 1394a9bfb9bSBernd Schubert fuse_uring_abort_end_queue_requests(queue); 1404a9bfb9bSBernd Schubert } 1414a9bfb9bSBernd Schubert } 1424a9bfb9bSBernd Schubert 14324fe962cSBernd Schubert void fuse_uring_destruct(struct fuse_conn *fc) 14424fe962cSBernd Schubert { 14524fe962cSBernd Schubert struct fuse_ring *ring = fc->ring; 14624fe962cSBernd Schubert int qid; 14724fe962cSBernd Schubert 14824fe962cSBernd Schubert if (!ring) 14924fe962cSBernd Schubert return; 15024fe962cSBernd Schubert 15124fe962cSBernd Schubert for (qid = 0; qid < ring->nr_queues; qid++) { 15224fe962cSBernd Schubert struct fuse_ring_queue *queue = ring->queues[qid]; 153b6236c84SBernd Schubert struct fuse_ring_ent *ent, *next; 15424fe962cSBernd Schubert 15524fe962cSBernd Schubert if (!queue) 15624fe962cSBernd Schubert continue; 15724fe962cSBernd Schubert 15824fe962cSBernd Schubert WARN_ON(!list_empty(&queue->ent_avail_queue)); 159c090c8abSBernd Schubert WARN_ON(!list_empty(&queue->ent_w_req_queue)); 16024fe962cSBernd Schubert WARN_ON(!list_empty(&queue->ent_commit_queue)); 161c090c8abSBernd Schubert WARN_ON(!list_empty(&queue->ent_in_userspace)); 16224fe962cSBernd Schubert 163b6236c84SBernd Schubert list_for_each_entry_safe(ent, next, &queue->ent_released, 164b6236c84SBernd Schubert list) { 165b6236c84SBernd Schubert list_del_init(&ent->list); 166b6236c84SBernd Schubert kfree(ent); 167b6236c84SBernd Schubert } 168b6236c84SBernd Schubert 169c090c8abSBernd Schubert kfree(queue->fpq.processing); 17024fe962cSBernd Schubert kfree(queue); 17124fe962cSBernd Schubert ring->queues[qid] = NULL; 17224fe962cSBernd Schubert } 17324fe962cSBernd Schubert 17424fe962cSBernd Schubert kfree(ring->queues); 17524fe962cSBernd Schubert kfree(ring); 17624fe962cSBernd Schubert fc->ring = NULL; 17724fe962cSBernd Schubert } 17824fe962cSBernd Schubert 17924fe962cSBernd Schubert /* 18024fe962cSBernd Schubert * Basic ring setup for this connection based on the provided configuration 18124fe962cSBernd Schubert */ 18224fe962cSBernd Schubert static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc) 18324fe962cSBernd Schubert { 18424fe962cSBernd Schubert struct fuse_ring *ring; 18524fe962cSBernd Schubert size_t nr_queues = num_possible_cpus(); 18624fe962cSBernd Schubert struct fuse_ring *res = NULL; 18724fe962cSBernd Schubert size_t max_payload_size; 18824fe962cSBernd Schubert 18924fe962cSBernd Schubert ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT); 19024fe962cSBernd Schubert if (!ring) 19124fe962cSBernd Schubert return NULL; 19224fe962cSBernd Schubert 19324fe962cSBernd Schubert ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *), 19424fe962cSBernd Schubert GFP_KERNEL_ACCOUNT); 19524fe962cSBernd Schubert if (!ring->queues) 19624fe962cSBernd Schubert goto out_err; 19724fe962cSBernd Schubert 19824fe962cSBernd Schubert max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write); 19924fe962cSBernd Schubert max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE); 20024fe962cSBernd Schubert 20124fe962cSBernd Schubert spin_lock(&fc->lock); 20224fe962cSBernd Schubert if (fc->ring) { 20324fe962cSBernd Schubert /* race, another thread created the ring in the meantime */ 20424fe962cSBernd Schubert spin_unlock(&fc->lock); 20524fe962cSBernd Schubert res = fc->ring; 20624fe962cSBernd Schubert goto out_err; 20724fe962cSBernd Schubert } 20824fe962cSBernd Schubert 2094a9bfb9bSBernd Schubert init_waitqueue_head(&ring->stop_waitq); 2104a9bfb9bSBernd Schubert 21124fe962cSBernd Schubert ring->nr_queues = nr_queues; 21224fe962cSBernd Schubert ring->fc = fc; 21324fe962cSBernd Schubert ring->max_payload_sz = max_payload_size; 2144a9bfb9bSBernd Schubert atomic_set(&ring->queue_refs, 0); 215*d9ecc771SJoanne Koong smp_store_release(&fc->ring, ring); 21624fe962cSBernd Schubert 21724fe962cSBernd Schubert spin_unlock(&fc->lock); 21824fe962cSBernd Schubert return ring; 21924fe962cSBernd Schubert 22024fe962cSBernd Schubert out_err: 22124fe962cSBernd Schubert kfree(ring->queues); 22224fe962cSBernd Schubert kfree(ring); 22324fe962cSBernd Schubert return res; 22424fe962cSBernd Schubert } 22524fe962cSBernd Schubert 22624fe962cSBernd Schubert static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring, 22724fe962cSBernd Schubert int qid) 22824fe962cSBernd Schubert { 22924fe962cSBernd Schubert struct fuse_conn *fc = ring->fc; 23024fe962cSBernd Schubert struct fuse_ring_queue *queue; 231c090c8abSBernd Schubert struct list_head *pq; 23224fe962cSBernd Schubert 23324fe962cSBernd Schubert queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT); 23424fe962cSBernd Schubert if (!queue) 23524fe962cSBernd Schubert return NULL; 236c090c8abSBernd Schubert pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); 237c090c8abSBernd Schubert if (!pq) { 238c090c8abSBernd Schubert kfree(queue); 239c090c8abSBernd Schubert return NULL; 240c090c8abSBernd Schubert } 241c090c8abSBernd Schubert 24224fe962cSBernd Schubert queue->qid = qid; 24324fe962cSBernd Schubert queue->ring = ring; 24424fe962cSBernd Schubert spin_lock_init(&queue->lock); 24524fe962cSBernd Schubert 24624fe962cSBernd Schubert INIT_LIST_HEAD(&queue->ent_avail_queue); 24724fe962cSBernd Schubert INIT_LIST_HEAD(&queue->ent_commit_queue); 248c090c8abSBernd Schubert INIT_LIST_HEAD(&queue->ent_w_req_queue); 249c090c8abSBernd Schubert INIT_LIST_HEAD(&queue->ent_in_userspace); 250c090c8abSBernd Schubert INIT_LIST_HEAD(&queue->fuse_req_queue); 251857b0263SBernd Schubert INIT_LIST_HEAD(&queue->fuse_req_bg_queue); 252b6236c84SBernd Schubert INIT_LIST_HEAD(&queue->ent_released); 253c090c8abSBernd Schubert 254c090c8abSBernd Schubert queue->fpq.processing = pq; 255c090c8abSBernd Schubert fuse_pqueue_init(&queue->fpq); 25624fe962cSBernd Schubert 25724fe962cSBernd Schubert spin_lock(&fc->lock); 25824fe962cSBernd Schubert if (ring->queues[qid]) { 25924fe962cSBernd Schubert spin_unlock(&fc->lock); 260c090c8abSBernd Schubert kfree(queue->fpq.processing); 26124fe962cSBernd Schubert kfree(queue); 26224fe962cSBernd Schubert return ring->queues[qid]; 26324fe962cSBernd Schubert } 26424fe962cSBernd Schubert 26524fe962cSBernd Schubert /* 26624fe962cSBernd Schubert * write_once and lock as the caller mostly doesn't take the lock at all 26724fe962cSBernd Schubert */ 26824fe962cSBernd Schubert WRITE_ONCE(ring->queues[qid], queue); 26924fe962cSBernd Schubert spin_unlock(&fc->lock); 27024fe962cSBernd Schubert 27124fe962cSBernd Schubert return queue; 27224fe962cSBernd Schubert } 27324fe962cSBernd Schubert 2744a9bfb9bSBernd Schubert static void fuse_uring_stop_fuse_req_end(struct fuse_req *req) 2754a9bfb9bSBernd Schubert { 2764a9bfb9bSBernd Schubert clear_bit(FR_SENT, &req->flags); 2774a9bfb9bSBernd Schubert req->out.h.error = -ECONNABORTED; 2784a9bfb9bSBernd Schubert fuse_request_end(req); 2794a9bfb9bSBernd Schubert } 2804a9bfb9bSBernd Schubert 2814a9bfb9bSBernd Schubert /* 2824a9bfb9bSBernd Schubert * Release a request/entry on connection tear down 2834a9bfb9bSBernd Schubert */ 2844a9bfb9bSBernd Schubert static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent) 2854a9bfb9bSBernd Schubert { 2864a9bfb9bSBernd Schubert struct fuse_req *req; 2874a9bfb9bSBernd Schubert struct io_uring_cmd *cmd; 2884a9bfb9bSBernd Schubert 2894a9bfb9bSBernd Schubert struct fuse_ring_queue *queue = ent->queue; 2904a9bfb9bSBernd Schubert 2914a9bfb9bSBernd Schubert spin_lock(&queue->lock); 2924a9bfb9bSBernd Schubert cmd = ent->cmd; 2934a9bfb9bSBernd Schubert ent->cmd = NULL; 2944a9bfb9bSBernd Schubert req = ent->fuse_req; 2954a9bfb9bSBernd Schubert ent->fuse_req = NULL; 2964a9bfb9bSBernd Schubert if (req) { 2974a9bfb9bSBernd Schubert /* remove entry from queue->fpq->processing */ 2984a9bfb9bSBernd Schubert list_del_init(&req->list); 2994a9bfb9bSBernd Schubert } 300b6236c84SBernd Schubert 301b6236c84SBernd Schubert /* 302b6236c84SBernd Schubert * The entry must not be freed immediately, due to access of direct 303b6236c84SBernd Schubert * pointer access of entries through IO_URING_F_CANCEL - there is a risk 304b6236c84SBernd Schubert * of race between daemon termination (which triggers IO_URING_F_CANCEL 305b6236c84SBernd Schubert * and accesses entries without checking the list state first 306b6236c84SBernd Schubert */ 307b6236c84SBernd Schubert list_move(&ent->list, &queue->ent_released); 308b6236c84SBernd Schubert ent->state = FRRS_RELEASED; 3094a9bfb9bSBernd Schubert spin_unlock(&queue->lock); 3104a9bfb9bSBernd Schubert 3114a9bfb9bSBernd Schubert if (cmd) 3124a9bfb9bSBernd Schubert io_uring_cmd_done(cmd, -ENOTCONN, 0, IO_URING_F_UNLOCKED); 3134a9bfb9bSBernd Schubert 3144a9bfb9bSBernd Schubert if (req) 3154a9bfb9bSBernd Schubert fuse_uring_stop_fuse_req_end(req); 3164a9bfb9bSBernd Schubert } 3174a9bfb9bSBernd Schubert 3184a9bfb9bSBernd Schubert static void fuse_uring_stop_list_entries(struct list_head *head, 3194a9bfb9bSBernd Schubert struct fuse_ring_queue *queue, 3204a9bfb9bSBernd Schubert enum fuse_ring_req_state exp_state) 3214a9bfb9bSBernd Schubert { 3224a9bfb9bSBernd Schubert struct fuse_ring *ring = queue->ring; 3234a9bfb9bSBernd Schubert struct fuse_ring_ent *ent, *next; 3244a9bfb9bSBernd Schubert ssize_t queue_refs = SSIZE_MAX; 3254a9bfb9bSBernd Schubert LIST_HEAD(to_teardown); 3264a9bfb9bSBernd Schubert 3274a9bfb9bSBernd Schubert spin_lock(&queue->lock); 3284a9bfb9bSBernd Schubert list_for_each_entry_safe(ent, next, head, list) { 3294a9bfb9bSBernd Schubert if (ent->state != exp_state) { 3304a9bfb9bSBernd Schubert pr_warn("entry teardown qid=%d state=%d expected=%d", 3314a9bfb9bSBernd Schubert queue->qid, ent->state, exp_state); 3324a9bfb9bSBernd Schubert continue; 3334a9bfb9bSBernd Schubert } 3344a9bfb9bSBernd Schubert 335b6236c84SBernd Schubert ent->state = FRRS_TEARDOWN; 3364a9bfb9bSBernd Schubert list_move(&ent->list, &to_teardown); 3374a9bfb9bSBernd Schubert } 3384a9bfb9bSBernd Schubert spin_unlock(&queue->lock); 3394a9bfb9bSBernd Schubert 3404a9bfb9bSBernd Schubert /* no queue lock to avoid lock order issues */ 3414a9bfb9bSBernd Schubert list_for_each_entry_safe(ent, next, &to_teardown, list) { 3424a9bfb9bSBernd Schubert fuse_uring_entry_teardown(ent); 3434a9bfb9bSBernd Schubert queue_refs = atomic_dec_return(&ring->queue_refs); 3444a9bfb9bSBernd Schubert WARN_ON_ONCE(queue_refs < 0); 3454a9bfb9bSBernd Schubert } 3464a9bfb9bSBernd Schubert } 3474a9bfb9bSBernd Schubert 3484a9bfb9bSBernd Schubert static void fuse_uring_teardown_entries(struct fuse_ring_queue *queue) 3494a9bfb9bSBernd Schubert { 3504a9bfb9bSBernd Schubert fuse_uring_stop_list_entries(&queue->ent_in_userspace, queue, 3514a9bfb9bSBernd Schubert FRRS_USERSPACE); 3524a9bfb9bSBernd Schubert fuse_uring_stop_list_entries(&queue->ent_avail_queue, queue, 3534a9bfb9bSBernd Schubert FRRS_AVAILABLE); 3544a9bfb9bSBernd Schubert } 3554a9bfb9bSBernd Schubert 3564a9bfb9bSBernd Schubert /* 3574a9bfb9bSBernd Schubert * Log state debug info 3584a9bfb9bSBernd Schubert */ 3594a9bfb9bSBernd Schubert static void fuse_uring_log_ent_state(struct fuse_ring *ring) 3604a9bfb9bSBernd Schubert { 3614a9bfb9bSBernd Schubert int qid; 3624a9bfb9bSBernd Schubert struct fuse_ring_ent *ent; 3634a9bfb9bSBernd Schubert 3644a9bfb9bSBernd Schubert for (qid = 0; qid < ring->nr_queues; qid++) { 3654a9bfb9bSBernd Schubert struct fuse_ring_queue *queue = ring->queues[qid]; 3664a9bfb9bSBernd Schubert 3674a9bfb9bSBernd Schubert if (!queue) 3684a9bfb9bSBernd Schubert continue; 3694a9bfb9bSBernd Schubert 3704a9bfb9bSBernd Schubert spin_lock(&queue->lock); 3714a9bfb9bSBernd Schubert /* 3724a9bfb9bSBernd Schubert * Log entries from the intermediate queue, the other queues 3734a9bfb9bSBernd Schubert * should be empty 3744a9bfb9bSBernd Schubert */ 3754a9bfb9bSBernd Schubert list_for_each_entry(ent, &queue->ent_w_req_queue, list) { 3764a9bfb9bSBernd Schubert pr_info(" ent-req-queue ring=%p qid=%d ent=%p state=%d\n", 3774a9bfb9bSBernd Schubert ring, qid, ent, ent->state); 3784a9bfb9bSBernd Schubert } 3794a9bfb9bSBernd Schubert list_for_each_entry(ent, &queue->ent_commit_queue, list) { 3804a9bfb9bSBernd Schubert pr_info(" ent-commit-queue ring=%p qid=%d ent=%p state=%d\n", 3814a9bfb9bSBernd Schubert ring, qid, ent, ent->state); 3824a9bfb9bSBernd Schubert } 3834a9bfb9bSBernd Schubert spin_unlock(&queue->lock); 3844a9bfb9bSBernd Schubert } 3854a9bfb9bSBernd Schubert ring->stop_debug_log = 1; 3864a9bfb9bSBernd Schubert } 3874a9bfb9bSBernd Schubert 3884a9bfb9bSBernd Schubert static void fuse_uring_async_stop_queues(struct work_struct *work) 3894a9bfb9bSBernd Schubert { 3904a9bfb9bSBernd Schubert int qid; 3914a9bfb9bSBernd Schubert struct fuse_ring *ring = 3924a9bfb9bSBernd Schubert container_of(work, struct fuse_ring, async_teardown_work.work); 3934a9bfb9bSBernd Schubert 3944a9bfb9bSBernd Schubert /* XXX code dup */ 3954a9bfb9bSBernd Schubert for (qid = 0; qid < ring->nr_queues; qid++) { 3964a9bfb9bSBernd Schubert struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]); 3974a9bfb9bSBernd Schubert 3984a9bfb9bSBernd Schubert if (!queue) 3994a9bfb9bSBernd Schubert continue; 4004a9bfb9bSBernd Schubert 4014a9bfb9bSBernd Schubert fuse_uring_teardown_entries(queue); 4024a9bfb9bSBernd Schubert } 4034a9bfb9bSBernd Schubert 4044a9bfb9bSBernd Schubert /* 4054a9bfb9bSBernd Schubert * Some ring entries might be in the middle of IO operations, 4064a9bfb9bSBernd Schubert * i.e. in process to get handled by file_operations::uring_cmd 4074a9bfb9bSBernd Schubert * or on the way to userspace - we could handle that with conditions in 4084a9bfb9bSBernd Schubert * run time code, but easier/cleaner to have an async tear down handler 4094a9bfb9bSBernd Schubert * If there are still queue references left 4104a9bfb9bSBernd Schubert */ 4114a9bfb9bSBernd Schubert if (atomic_read(&ring->queue_refs) > 0) { 4124a9bfb9bSBernd Schubert if (time_after(jiffies, 4134a9bfb9bSBernd Schubert ring->teardown_time + FUSE_URING_TEARDOWN_TIMEOUT)) 4144a9bfb9bSBernd Schubert fuse_uring_log_ent_state(ring); 4154a9bfb9bSBernd Schubert 4164a9bfb9bSBernd Schubert schedule_delayed_work(&ring->async_teardown_work, 4174a9bfb9bSBernd Schubert FUSE_URING_TEARDOWN_INTERVAL); 4184a9bfb9bSBernd Schubert } else { 4194a9bfb9bSBernd Schubert wake_up_all(&ring->stop_waitq); 4204a9bfb9bSBernd Schubert } 4214a9bfb9bSBernd Schubert } 4224a9bfb9bSBernd Schubert 4234a9bfb9bSBernd Schubert /* 4244a9bfb9bSBernd Schubert * Stop the ring queues 4254a9bfb9bSBernd Schubert */ 4264a9bfb9bSBernd Schubert void fuse_uring_stop_queues(struct fuse_ring *ring) 4274a9bfb9bSBernd Schubert { 4284a9bfb9bSBernd Schubert int qid; 4294a9bfb9bSBernd Schubert 4304a9bfb9bSBernd Schubert for (qid = 0; qid < ring->nr_queues; qid++) { 4314a9bfb9bSBernd Schubert struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]); 4324a9bfb9bSBernd Schubert 4334a9bfb9bSBernd Schubert if (!queue) 4344a9bfb9bSBernd Schubert continue; 4354a9bfb9bSBernd Schubert 4364a9bfb9bSBernd Schubert fuse_uring_teardown_entries(queue); 4374a9bfb9bSBernd Schubert } 4384a9bfb9bSBernd Schubert 4394a9bfb9bSBernd Schubert if (atomic_read(&ring->queue_refs) > 0) { 4404a9bfb9bSBernd Schubert ring->teardown_time = jiffies; 4414a9bfb9bSBernd Schubert INIT_DELAYED_WORK(&ring->async_teardown_work, 4424a9bfb9bSBernd Schubert fuse_uring_async_stop_queues); 4434a9bfb9bSBernd Schubert schedule_delayed_work(&ring->async_teardown_work, 4444a9bfb9bSBernd Schubert FUSE_URING_TEARDOWN_INTERVAL); 4454a9bfb9bSBernd Schubert } else { 4464a9bfb9bSBernd Schubert wake_up_all(&ring->stop_waitq); 4474a9bfb9bSBernd Schubert } 4484a9bfb9bSBernd Schubert } 4494a9bfb9bSBernd Schubert 45024fe962cSBernd Schubert /* 451b6236c84SBernd Schubert * Handle IO_URING_F_CANCEL, typically should come on daemon termination. 452b6236c84SBernd Schubert * 453b6236c84SBernd Schubert * Releasing the last entry should trigger fuse_dev_release() if 454b6236c84SBernd Schubert * the daemon was terminated 455b6236c84SBernd Schubert */ 456b6236c84SBernd Schubert static void fuse_uring_cancel(struct io_uring_cmd *cmd, 457b6236c84SBernd Schubert unsigned int issue_flags) 458b6236c84SBernd Schubert { 459b6236c84SBernd Schubert struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd); 460b6236c84SBernd Schubert struct fuse_ring_queue *queue; 461b6236c84SBernd Schubert bool need_cmd_done = false; 462b6236c84SBernd Schubert 463b6236c84SBernd Schubert /* 464b6236c84SBernd Schubert * direct access on ent - it must not be destructed as long as 465b6236c84SBernd Schubert * IO_URING_F_CANCEL might come up 466b6236c84SBernd Schubert */ 467b6236c84SBernd Schubert queue = ent->queue; 468b6236c84SBernd Schubert spin_lock(&queue->lock); 469b6236c84SBernd Schubert if (ent->state == FRRS_AVAILABLE) { 470b6236c84SBernd Schubert ent->state = FRRS_USERSPACE; 471b6236c84SBernd Schubert list_move(&ent->list, &queue->ent_in_userspace); 472b6236c84SBernd Schubert need_cmd_done = true; 473b6236c84SBernd Schubert ent->cmd = NULL; 474b6236c84SBernd Schubert } 475b6236c84SBernd Schubert spin_unlock(&queue->lock); 476b6236c84SBernd Schubert 477b6236c84SBernd Schubert if (need_cmd_done) { 478b6236c84SBernd Schubert /* no queue lock to avoid lock order issues */ 479b6236c84SBernd Schubert io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags); 480b6236c84SBernd Schubert } 481b6236c84SBernd Schubert } 482b6236c84SBernd Schubert 483b6236c84SBernd Schubert static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags, 484b6236c84SBernd Schubert struct fuse_ring_ent *ring_ent) 485b6236c84SBernd Schubert { 486b6236c84SBernd Schubert uring_cmd_set_ring_ent(cmd, ring_ent); 487b6236c84SBernd Schubert io_uring_cmd_mark_cancelable(cmd, issue_flags); 488b6236c84SBernd Schubert } 489b6236c84SBernd Schubert 490b6236c84SBernd Schubert /* 491c090c8abSBernd Schubert * Checks for errors and stores it into the request 492c090c8abSBernd Schubert */ 493c090c8abSBernd Schubert static int fuse_uring_out_header_has_err(struct fuse_out_header *oh, 494c090c8abSBernd Schubert struct fuse_req *req, 495c090c8abSBernd Schubert struct fuse_conn *fc) 496c090c8abSBernd Schubert { 497c090c8abSBernd Schubert int err; 498c090c8abSBernd Schubert 499c090c8abSBernd Schubert err = -EINVAL; 500c090c8abSBernd Schubert if (oh->unique == 0) { 501c090c8abSBernd Schubert /* Not supported through io-uring yet */ 502c090c8abSBernd Schubert pr_warn_once("notify through fuse-io-uring not supported\n"); 503c090c8abSBernd Schubert goto err; 504c090c8abSBernd Schubert } 505c090c8abSBernd Schubert 506c090c8abSBernd Schubert if (oh->error <= -ERESTARTSYS || oh->error > 0) 507c090c8abSBernd Schubert goto err; 508c090c8abSBernd Schubert 509c090c8abSBernd Schubert if (oh->error) { 510c090c8abSBernd Schubert err = oh->error; 511c090c8abSBernd Schubert goto err; 512c090c8abSBernd Schubert } 513c090c8abSBernd Schubert 514c090c8abSBernd Schubert err = -ENOENT; 515c090c8abSBernd Schubert if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) { 516c090c8abSBernd Schubert pr_warn_ratelimited("unique mismatch, expected: %llu got %llu\n", 517c090c8abSBernd Schubert req->in.h.unique, 518c090c8abSBernd Schubert oh->unique & ~FUSE_INT_REQ_BIT); 519c090c8abSBernd Schubert goto err; 520c090c8abSBernd Schubert } 521c090c8abSBernd Schubert 522c090c8abSBernd Schubert /* 523c090c8abSBernd Schubert * Is it an interrupt reply ID? 524c090c8abSBernd Schubert * XXX: Not supported through fuse-io-uring yet, it should not even 525c090c8abSBernd Schubert * find the request - should not happen. 526c090c8abSBernd Schubert */ 527c090c8abSBernd Schubert WARN_ON_ONCE(oh->unique & FUSE_INT_REQ_BIT); 528c090c8abSBernd Schubert 529c090c8abSBernd Schubert err = 0; 530c090c8abSBernd Schubert err: 531c090c8abSBernd Schubert return err; 532c090c8abSBernd Schubert } 533c090c8abSBernd Schubert 534c090c8abSBernd Schubert static int fuse_uring_copy_from_ring(struct fuse_ring *ring, 535c090c8abSBernd Schubert struct fuse_req *req, 536c090c8abSBernd Schubert struct fuse_ring_ent *ent) 537c090c8abSBernd Schubert { 538c090c8abSBernd Schubert struct fuse_copy_state cs; 539c090c8abSBernd Schubert struct fuse_args *args = req->args; 540c090c8abSBernd Schubert struct iov_iter iter; 541c090c8abSBernd Schubert int err; 542c090c8abSBernd Schubert struct fuse_uring_ent_in_out ring_in_out; 543c090c8abSBernd Schubert 544c090c8abSBernd Schubert err = copy_from_user(&ring_in_out, &ent->headers->ring_ent_in_out, 545c090c8abSBernd Schubert sizeof(ring_in_out)); 546c090c8abSBernd Schubert if (err) 547c090c8abSBernd Schubert return -EFAULT; 548c090c8abSBernd Schubert 549c090c8abSBernd Schubert err = import_ubuf(ITER_SOURCE, ent->payload, ring->max_payload_sz, 550c090c8abSBernd Schubert &iter); 551c090c8abSBernd Schubert if (err) 552c090c8abSBernd Schubert return err; 553c090c8abSBernd Schubert 554c090c8abSBernd Schubert fuse_copy_init(&cs, 0, &iter); 555c090c8abSBernd Schubert cs.is_uring = 1; 556c090c8abSBernd Schubert cs.req = req; 557c090c8abSBernd Schubert 558c090c8abSBernd Schubert return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz); 559c090c8abSBernd Schubert } 560c090c8abSBernd Schubert 561c090c8abSBernd Schubert /* 562c090c8abSBernd Schubert * Copy data from the req to the ring buffer 563c090c8abSBernd Schubert */ 564c090c8abSBernd Schubert static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req, 565c090c8abSBernd Schubert struct fuse_ring_ent *ent) 566c090c8abSBernd Schubert { 567c090c8abSBernd Schubert struct fuse_copy_state cs; 568c090c8abSBernd Schubert struct fuse_args *args = req->args; 569c090c8abSBernd Schubert struct fuse_in_arg *in_args = args->in_args; 570c090c8abSBernd Schubert int num_args = args->in_numargs; 571c090c8abSBernd Schubert int err; 572c090c8abSBernd Schubert struct iov_iter iter; 573c090c8abSBernd Schubert struct fuse_uring_ent_in_out ent_in_out = { 574c090c8abSBernd Schubert .flags = 0, 575c090c8abSBernd Schubert .commit_id = req->in.h.unique, 576c090c8abSBernd Schubert }; 577c090c8abSBernd Schubert 578c090c8abSBernd Schubert err = import_ubuf(ITER_DEST, ent->payload, ring->max_payload_sz, &iter); 579c090c8abSBernd Schubert if (err) { 580c090c8abSBernd Schubert pr_info_ratelimited("fuse: Import of user buffer failed\n"); 581c090c8abSBernd Schubert return err; 582c090c8abSBernd Schubert } 583c090c8abSBernd Schubert 584c090c8abSBernd Schubert fuse_copy_init(&cs, 1, &iter); 585c090c8abSBernd Schubert cs.is_uring = 1; 586c090c8abSBernd Schubert cs.req = req; 587c090c8abSBernd Schubert 588c090c8abSBernd Schubert if (num_args > 0) { 589c090c8abSBernd Schubert /* 590c090c8abSBernd Schubert * Expectation is that the first argument is the per op header. 591c090c8abSBernd Schubert * Some op code have that as zero size. 592c090c8abSBernd Schubert */ 593c090c8abSBernd Schubert if (args->in_args[0].size > 0) { 594c090c8abSBernd Schubert err = copy_to_user(&ent->headers->op_in, in_args->value, 595c090c8abSBernd Schubert in_args->size); 596c090c8abSBernd Schubert if (err) { 597c090c8abSBernd Schubert pr_info_ratelimited( 598c090c8abSBernd Schubert "Copying the header failed.\n"); 599c090c8abSBernd Schubert return -EFAULT; 600c090c8abSBernd Schubert } 601c090c8abSBernd Schubert } 602c090c8abSBernd Schubert in_args++; 603c090c8abSBernd Schubert num_args--; 604c090c8abSBernd Schubert } 605c090c8abSBernd Schubert 606c090c8abSBernd Schubert /* copy the payload */ 607c090c8abSBernd Schubert err = fuse_copy_args(&cs, num_args, args->in_pages, 608c090c8abSBernd Schubert (struct fuse_arg *)in_args, 0); 609c090c8abSBernd Schubert if (err) { 610c090c8abSBernd Schubert pr_info_ratelimited("%s fuse_copy_args failed\n", __func__); 611c090c8abSBernd Schubert return err; 612c090c8abSBernd Schubert } 613c090c8abSBernd Schubert 614c090c8abSBernd Schubert ent_in_out.payload_sz = cs.ring.copied_sz; 615c090c8abSBernd Schubert err = copy_to_user(&ent->headers->ring_ent_in_out, &ent_in_out, 616c090c8abSBernd Schubert sizeof(ent_in_out)); 617c090c8abSBernd Schubert return err ? -EFAULT : 0; 618c090c8abSBernd Schubert } 619c090c8abSBernd Schubert 620c090c8abSBernd Schubert static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent, 621c090c8abSBernd Schubert struct fuse_req *req) 622c090c8abSBernd Schubert { 623c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue; 624c090c8abSBernd Schubert struct fuse_ring *ring = queue->ring; 625c090c8abSBernd Schubert int err; 626c090c8abSBernd Schubert 627c090c8abSBernd Schubert err = -EIO; 628c090c8abSBernd Schubert if (WARN_ON(ent->state != FRRS_FUSE_REQ)) { 629c090c8abSBernd Schubert pr_err("qid=%d ring-req=%p invalid state %d on send\n", 630c090c8abSBernd Schubert queue->qid, ent, ent->state); 631c090c8abSBernd Schubert return err; 632c090c8abSBernd Schubert } 633c090c8abSBernd Schubert 634c090c8abSBernd Schubert err = -EINVAL; 635c090c8abSBernd Schubert if (WARN_ON(req->in.h.unique == 0)) 636c090c8abSBernd Schubert return err; 637c090c8abSBernd Schubert 638c090c8abSBernd Schubert /* copy the request */ 639c090c8abSBernd Schubert err = fuse_uring_args_to_ring(ring, req, ent); 640c090c8abSBernd Schubert if (unlikely(err)) { 641c090c8abSBernd Schubert pr_info_ratelimited("Copy to ring failed: %d\n", err); 642c090c8abSBernd Schubert return err; 643c090c8abSBernd Schubert } 644c090c8abSBernd Schubert 645c090c8abSBernd Schubert /* copy fuse_in_header */ 646c090c8abSBernd Schubert err = copy_to_user(&ent->headers->in_out, &req->in.h, 647c090c8abSBernd Schubert sizeof(req->in.h)); 648c090c8abSBernd Schubert if (err) { 649c090c8abSBernd Schubert err = -EFAULT; 650c090c8abSBernd Schubert return err; 651c090c8abSBernd Schubert } 652c090c8abSBernd Schubert 653c090c8abSBernd Schubert return 0; 654c090c8abSBernd Schubert } 655c090c8abSBernd Schubert 656c090c8abSBernd Schubert static int fuse_uring_prepare_send(struct fuse_ring_ent *ent, 657c090c8abSBernd Schubert struct fuse_req *req) 658c090c8abSBernd Schubert { 659c090c8abSBernd Schubert int err; 660c090c8abSBernd Schubert 661c090c8abSBernd Schubert err = fuse_uring_copy_to_ring(ent, req); 662c090c8abSBernd Schubert if (!err) 663c090c8abSBernd Schubert set_bit(FR_SENT, &req->flags); 664c090c8abSBernd Schubert else 665c090c8abSBernd Schubert fuse_uring_req_end(ent, req, err); 666c090c8abSBernd Schubert 667c090c8abSBernd Schubert return err; 668c090c8abSBernd Schubert } 669c090c8abSBernd Schubert 670c090c8abSBernd Schubert /* 671c090c8abSBernd Schubert * Write data to the ring buffer and send the request to userspace, 672c090c8abSBernd Schubert * userspace will read it 673c090c8abSBernd Schubert * This is comparable with classical read(/dev/fuse) 674c090c8abSBernd Schubert */ 675c090c8abSBernd Schubert static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent, 676c090c8abSBernd Schubert struct fuse_req *req, 677c090c8abSBernd Schubert unsigned int issue_flags) 678c090c8abSBernd Schubert { 679c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue; 680c090c8abSBernd Schubert int err; 681c090c8abSBernd Schubert struct io_uring_cmd *cmd; 682c090c8abSBernd Schubert 683c090c8abSBernd Schubert err = fuse_uring_prepare_send(ent, req); 684c090c8abSBernd Schubert if (err) 685c090c8abSBernd Schubert return err; 686c090c8abSBernd Schubert 687c090c8abSBernd Schubert spin_lock(&queue->lock); 688c090c8abSBernd Schubert cmd = ent->cmd; 689c090c8abSBernd Schubert ent->cmd = NULL; 690c090c8abSBernd Schubert ent->state = FRRS_USERSPACE; 691c090c8abSBernd Schubert list_move(&ent->list, &queue->ent_in_userspace); 692c090c8abSBernd Schubert spin_unlock(&queue->lock); 693c090c8abSBernd Schubert 694c090c8abSBernd Schubert io_uring_cmd_done(cmd, 0, 0, issue_flags); 695c090c8abSBernd Schubert return 0; 696c090c8abSBernd Schubert } 697c090c8abSBernd Schubert 698c090c8abSBernd Schubert /* 69924fe962cSBernd Schubert * Make a ring entry available for fuse_req assignment 70024fe962cSBernd Schubert */ 70124fe962cSBernd Schubert static void fuse_uring_ent_avail(struct fuse_ring_ent *ent, 70224fe962cSBernd Schubert struct fuse_ring_queue *queue) 70324fe962cSBernd Schubert { 70424fe962cSBernd Schubert WARN_ON_ONCE(!ent->cmd); 70524fe962cSBernd Schubert list_move(&ent->list, &queue->ent_avail_queue); 70624fe962cSBernd Schubert ent->state = FRRS_AVAILABLE; 70724fe962cSBernd Schubert } 70824fe962cSBernd Schubert 709c090c8abSBernd Schubert /* Used to find the request on SQE commit */ 710c090c8abSBernd Schubert static void fuse_uring_add_to_pq(struct fuse_ring_ent *ent, 711c090c8abSBernd Schubert struct fuse_req *req) 712c090c8abSBernd Schubert { 713c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue; 714c090c8abSBernd Schubert struct fuse_pqueue *fpq = &queue->fpq; 715c090c8abSBernd Schubert unsigned int hash; 716c090c8abSBernd Schubert 717c090c8abSBernd Schubert req->ring_entry = ent; 718c090c8abSBernd Schubert hash = fuse_req_hash(req->in.h.unique); 719c090c8abSBernd Schubert list_move_tail(&req->list, &fpq->processing[hash]); 720c090c8abSBernd Schubert } 721c090c8abSBernd Schubert 722c090c8abSBernd Schubert /* 723c090c8abSBernd Schubert * Assign a fuse queue entry to the given entry 724c090c8abSBernd Schubert */ 725c090c8abSBernd Schubert static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent, 726c090c8abSBernd Schubert struct fuse_req *req) 727c090c8abSBernd Schubert { 728c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue; 729c090c8abSBernd Schubert struct fuse_conn *fc = req->fm->fc; 730c090c8abSBernd Schubert struct fuse_iqueue *fiq = &fc->iq; 731c090c8abSBernd Schubert 732c090c8abSBernd Schubert lockdep_assert_held(&queue->lock); 733c090c8abSBernd Schubert 734c090c8abSBernd Schubert if (WARN_ON_ONCE(ent->state != FRRS_AVAILABLE && 735c090c8abSBernd Schubert ent->state != FRRS_COMMIT)) { 736c090c8abSBernd Schubert pr_warn("%s qid=%d state=%d\n", __func__, ent->queue->qid, 737c090c8abSBernd Schubert ent->state); 738c090c8abSBernd Schubert } 739c090c8abSBernd Schubert 740c090c8abSBernd Schubert spin_lock(&fiq->lock); 741c090c8abSBernd Schubert clear_bit(FR_PENDING, &req->flags); 742c090c8abSBernd Schubert spin_unlock(&fiq->lock); 743c090c8abSBernd Schubert ent->fuse_req = req; 744c090c8abSBernd Schubert ent->state = FRRS_FUSE_REQ; 745c090c8abSBernd Schubert list_move(&ent->list, &queue->ent_w_req_queue); 746c090c8abSBernd Schubert fuse_uring_add_to_pq(ent, req); 747c090c8abSBernd Schubert } 748c090c8abSBernd Schubert 749c090c8abSBernd Schubert /* Fetch the next fuse request if available */ 750c090c8abSBernd Schubert static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent) 751c090c8abSBernd Schubert __must_hold(&queue->lock) 752c090c8abSBernd Schubert { 753c090c8abSBernd Schubert struct fuse_req *req; 754c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue; 755c090c8abSBernd Schubert struct list_head *req_queue = &queue->fuse_req_queue; 756c090c8abSBernd Schubert 757c090c8abSBernd Schubert lockdep_assert_held(&queue->lock); 758c090c8abSBernd Schubert 759c090c8abSBernd Schubert /* get and assign the next entry while it is still holding the lock */ 760c090c8abSBernd Schubert req = list_first_entry_or_null(req_queue, struct fuse_req, list); 761c090c8abSBernd Schubert if (req) 762c090c8abSBernd Schubert fuse_uring_add_req_to_ring_ent(ent, req); 763c090c8abSBernd Schubert 764c090c8abSBernd Schubert return req; 765c090c8abSBernd Schubert } 766c090c8abSBernd Schubert 767c090c8abSBernd Schubert /* 768c090c8abSBernd Schubert * Read data from the ring buffer, which user space has written to 769c090c8abSBernd Schubert * This is comparible with handling of classical write(/dev/fuse). 770c090c8abSBernd Schubert * Also make the ring request available again for new fuse requests. 771c090c8abSBernd Schubert */ 772c090c8abSBernd Schubert static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req, 773c090c8abSBernd Schubert unsigned int issue_flags) 774c090c8abSBernd Schubert { 775c090c8abSBernd Schubert struct fuse_ring *ring = ent->queue->ring; 776c090c8abSBernd Schubert struct fuse_conn *fc = ring->fc; 777c090c8abSBernd Schubert ssize_t err = 0; 778c090c8abSBernd Schubert 779c090c8abSBernd Schubert err = copy_from_user(&req->out.h, &ent->headers->in_out, 780c090c8abSBernd Schubert sizeof(req->out.h)); 781c090c8abSBernd Schubert if (err) { 782c090c8abSBernd Schubert req->out.h.error = -EFAULT; 783c090c8abSBernd Schubert goto out; 784c090c8abSBernd Schubert } 785c090c8abSBernd Schubert 786c090c8abSBernd Schubert err = fuse_uring_out_header_has_err(&req->out.h, req, fc); 787c090c8abSBernd Schubert if (err) { 788c090c8abSBernd Schubert /* req->out.h.error already set */ 789c090c8abSBernd Schubert goto out; 790c090c8abSBernd Schubert } 791c090c8abSBernd Schubert 792c090c8abSBernd Schubert err = fuse_uring_copy_from_ring(ring, req, ent); 793c090c8abSBernd Schubert out: 794c090c8abSBernd Schubert fuse_uring_req_end(ent, req, err); 795c090c8abSBernd Schubert } 796c090c8abSBernd Schubert 797c090c8abSBernd Schubert /* 798c090c8abSBernd Schubert * Get the next fuse req and send it 799c090c8abSBernd Schubert */ 800c090c8abSBernd Schubert static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ent, 801c090c8abSBernd Schubert struct fuse_ring_queue *queue, 802c090c8abSBernd Schubert unsigned int issue_flags) 803c090c8abSBernd Schubert { 804c090c8abSBernd Schubert int err; 805c090c8abSBernd Schubert struct fuse_req *req; 806c090c8abSBernd Schubert 807c090c8abSBernd Schubert retry: 808c090c8abSBernd Schubert spin_lock(&queue->lock); 809c090c8abSBernd Schubert fuse_uring_ent_avail(ent, queue); 810c090c8abSBernd Schubert req = fuse_uring_ent_assign_req(ent); 811c090c8abSBernd Schubert spin_unlock(&queue->lock); 812c090c8abSBernd Schubert 813c090c8abSBernd Schubert if (req) { 814c090c8abSBernd Schubert err = fuse_uring_send_next_to_ring(ent, req, issue_flags); 815c090c8abSBernd Schubert if (err) 816c090c8abSBernd Schubert goto retry; 817c090c8abSBernd Schubert } 818c090c8abSBernd Schubert } 819c090c8abSBernd Schubert 820c090c8abSBernd Schubert static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent) 821c090c8abSBernd Schubert { 822c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue; 823c090c8abSBernd Schubert 824c090c8abSBernd Schubert lockdep_assert_held(&queue->lock); 825c090c8abSBernd Schubert 826c090c8abSBernd Schubert if (WARN_ON_ONCE(ent->state != FRRS_USERSPACE)) 827c090c8abSBernd Schubert return -EIO; 828c090c8abSBernd Schubert 829c090c8abSBernd Schubert ent->state = FRRS_COMMIT; 830c090c8abSBernd Schubert list_move(&ent->list, &queue->ent_commit_queue); 831c090c8abSBernd Schubert 832c090c8abSBernd Schubert return 0; 833c090c8abSBernd Schubert } 834c090c8abSBernd Schubert 835c090c8abSBernd Schubert /* FUSE_URING_CMD_COMMIT_AND_FETCH handler */ 836c090c8abSBernd Schubert static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, 837c090c8abSBernd Schubert struct fuse_conn *fc) 838c090c8abSBernd Schubert { 839c090c8abSBernd Schubert const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe); 840c090c8abSBernd Schubert struct fuse_ring_ent *ent; 841c090c8abSBernd Schubert int err; 842c090c8abSBernd Schubert struct fuse_ring *ring = fc->ring; 843c090c8abSBernd Schubert struct fuse_ring_queue *queue; 844c090c8abSBernd Schubert uint64_t commit_id = READ_ONCE(cmd_req->commit_id); 845c090c8abSBernd Schubert unsigned int qid = READ_ONCE(cmd_req->qid); 846c090c8abSBernd Schubert struct fuse_pqueue *fpq; 847c090c8abSBernd Schubert struct fuse_req *req; 848c090c8abSBernd Schubert 849c090c8abSBernd Schubert err = -ENOTCONN; 850c090c8abSBernd Schubert if (!ring) 851c090c8abSBernd Schubert return err; 852c090c8abSBernd Schubert 853c090c8abSBernd Schubert if (qid >= ring->nr_queues) 854c090c8abSBernd Schubert return -EINVAL; 855c090c8abSBernd Schubert 856c090c8abSBernd Schubert queue = ring->queues[qid]; 857c090c8abSBernd Schubert if (!queue) 858c090c8abSBernd Schubert return err; 859c090c8abSBernd Schubert fpq = &queue->fpq; 860c090c8abSBernd Schubert 8614a9bfb9bSBernd Schubert if (!READ_ONCE(fc->connected) || READ_ONCE(queue->stopped)) 8624a9bfb9bSBernd Schubert return err; 8634a9bfb9bSBernd Schubert 864c090c8abSBernd Schubert spin_lock(&queue->lock); 865c090c8abSBernd Schubert /* Find a request based on the unique ID of the fuse request 866c090c8abSBernd Schubert * This should get revised, as it needs a hash calculation and list 867c090c8abSBernd Schubert * search. And full struct fuse_pqueue is needed (memory overhead). 868c090c8abSBernd Schubert * As well as the link from req to ring_ent. 869c090c8abSBernd Schubert */ 870c090c8abSBernd Schubert req = fuse_request_find(fpq, commit_id); 871c090c8abSBernd Schubert err = -ENOENT; 872c090c8abSBernd Schubert if (!req) { 873c090c8abSBernd Schubert pr_info("qid=%d commit_id %llu not found\n", queue->qid, 874c090c8abSBernd Schubert commit_id); 875c090c8abSBernd Schubert spin_unlock(&queue->lock); 876c090c8abSBernd Schubert return err; 877c090c8abSBernd Schubert } 878c090c8abSBernd Schubert list_del_init(&req->list); 879c090c8abSBernd Schubert ent = req->ring_entry; 880c090c8abSBernd Schubert req->ring_entry = NULL; 881c090c8abSBernd Schubert 882c090c8abSBernd Schubert err = fuse_ring_ent_set_commit(ent); 883c090c8abSBernd Schubert if (err != 0) { 884c090c8abSBernd Schubert pr_info_ratelimited("qid=%d commit_id %llu state %d", 885c090c8abSBernd Schubert queue->qid, commit_id, ent->state); 886c090c8abSBernd Schubert spin_unlock(&queue->lock); 887c090c8abSBernd Schubert req->out.h.error = err; 888c090c8abSBernd Schubert clear_bit(FR_SENT, &req->flags); 889c090c8abSBernd Schubert fuse_request_end(req); 890c090c8abSBernd Schubert return err; 891c090c8abSBernd Schubert } 892c090c8abSBernd Schubert 893c090c8abSBernd Schubert ent->cmd = cmd; 894c090c8abSBernd Schubert spin_unlock(&queue->lock); 895c090c8abSBernd Schubert 896c090c8abSBernd Schubert /* without the queue lock, as other locks are taken */ 897b6236c84SBernd Schubert fuse_uring_prepare_cancel(cmd, issue_flags, ent); 898c090c8abSBernd Schubert fuse_uring_commit(ent, req, issue_flags); 899c090c8abSBernd Schubert 900c090c8abSBernd Schubert /* 901c090c8abSBernd Schubert * Fetching the next request is absolutely required as queued 902c090c8abSBernd Schubert * fuse requests would otherwise not get processed - committing 903c090c8abSBernd Schubert * and fetching is done in one step vs legacy fuse, which has separated 904c090c8abSBernd Schubert * read (fetch request) and write (commit result). 905c090c8abSBernd Schubert */ 906c090c8abSBernd Schubert fuse_uring_next_fuse_req(ent, queue, issue_flags); 907c090c8abSBernd Schubert return 0; 908c090c8abSBernd Schubert } 909c090c8abSBernd Schubert 910c2c9af9aSBernd Schubert static bool is_ring_ready(struct fuse_ring *ring, int current_qid) 911c2c9af9aSBernd Schubert { 912c2c9af9aSBernd Schubert int qid; 913c2c9af9aSBernd Schubert struct fuse_ring_queue *queue; 914c2c9af9aSBernd Schubert bool ready = true; 915c2c9af9aSBernd Schubert 916c2c9af9aSBernd Schubert for (qid = 0; qid < ring->nr_queues && ready; qid++) { 917c2c9af9aSBernd Schubert if (current_qid == qid) 918c2c9af9aSBernd Schubert continue; 919c2c9af9aSBernd Schubert 920c2c9af9aSBernd Schubert queue = ring->queues[qid]; 921c2c9af9aSBernd Schubert if (!queue) { 922c2c9af9aSBernd Schubert ready = false; 923c2c9af9aSBernd Schubert break; 924c2c9af9aSBernd Schubert } 925c2c9af9aSBernd Schubert 926c2c9af9aSBernd Schubert spin_lock(&queue->lock); 927c2c9af9aSBernd Schubert if (list_empty(&queue->ent_avail_queue)) 928c2c9af9aSBernd Schubert ready = false; 929c2c9af9aSBernd Schubert spin_unlock(&queue->lock); 930c2c9af9aSBernd Schubert } 931c2c9af9aSBernd Schubert 932c2c9af9aSBernd Schubert return ready; 933c2c9af9aSBernd Schubert } 934c2c9af9aSBernd Schubert 93524fe962cSBernd Schubert /* 93624fe962cSBernd Schubert * fuse_uring_req_fetch command handling 93724fe962cSBernd Schubert */ 93824fe962cSBernd Schubert static void fuse_uring_do_register(struct fuse_ring_ent *ent, 93924fe962cSBernd Schubert struct io_uring_cmd *cmd, 94024fe962cSBernd Schubert unsigned int issue_flags) 94124fe962cSBernd Schubert { 94224fe962cSBernd Schubert struct fuse_ring_queue *queue = ent->queue; 943c2c9af9aSBernd Schubert struct fuse_ring *ring = queue->ring; 944c2c9af9aSBernd Schubert struct fuse_conn *fc = ring->fc; 945c2c9af9aSBernd Schubert struct fuse_iqueue *fiq = &fc->iq; 94624fe962cSBernd Schubert 947b6236c84SBernd Schubert fuse_uring_prepare_cancel(cmd, issue_flags, ent); 948b6236c84SBernd Schubert 94924fe962cSBernd Schubert spin_lock(&queue->lock); 95024fe962cSBernd Schubert ent->cmd = cmd; 95124fe962cSBernd Schubert fuse_uring_ent_avail(ent, queue); 95224fe962cSBernd Schubert spin_unlock(&queue->lock); 953c2c9af9aSBernd Schubert 954c2c9af9aSBernd Schubert if (!ring->ready) { 955c2c9af9aSBernd Schubert bool ready = is_ring_ready(ring, queue->qid); 956c2c9af9aSBernd Schubert 957c2c9af9aSBernd Schubert if (ready) { 958c2c9af9aSBernd Schubert WRITE_ONCE(fiq->ops, &fuse_io_uring_ops); 959c2c9af9aSBernd Schubert WRITE_ONCE(ring->ready, true); 9603393ff96SBernd Schubert wake_up_all(&fc->blocked_waitq); 961c2c9af9aSBernd Schubert } 962c2c9af9aSBernd Schubert } 96324fe962cSBernd Schubert } 96424fe962cSBernd Schubert 96524fe962cSBernd Schubert /* 96624fe962cSBernd Schubert * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1] 96724fe962cSBernd Schubert * the payload 96824fe962cSBernd Schubert */ 96924fe962cSBernd Schubert static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe, 97024fe962cSBernd Schubert struct iovec iov[FUSE_URING_IOV_SEGS]) 97124fe962cSBernd Schubert { 97224fe962cSBernd Schubert struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr)); 97324fe962cSBernd Schubert struct iov_iter iter; 97424fe962cSBernd Schubert ssize_t ret; 97524fe962cSBernd Schubert 97624fe962cSBernd Schubert if (sqe->len != FUSE_URING_IOV_SEGS) 97724fe962cSBernd Schubert return -EINVAL; 97824fe962cSBernd Schubert 97924fe962cSBernd Schubert /* 98024fe962cSBernd Schubert * Direction for buffer access will actually be READ and WRITE, 98124fe962cSBernd Schubert * using write for the import should include READ access as well. 98224fe962cSBernd Schubert */ 98324fe962cSBernd Schubert ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS, 98424fe962cSBernd Schubert FUSE_URING_IOV_SEGS, &iov, &iter); 98524fe962cSBernd Schubert if (ret < 0) 98624fe962cSBernd Schubert return ret; 98724fe962cSBernd Schubert 98824fe962cSBernd Schubert return 0; 98924fe962cSBernd Schubert } 99024fe962cSBernd Schubert 99124fe962cSBernd Schubert static struct fuse_ring_ent * 99224fe962cSBernd Schubert fuse_uring_create_ring_ent(struct io_uring_cmd *cmd, 99324fe962cSBernd Schubert struct fuse_ring_queue *queue) 99424fe962cSBernd Schubert { 99524fe962cSBernd Schubert struct fuse_ring *ring = queue->ring; 99624fe962cSBernd Schubert struct fuse_ring_ent *ent; 99724fe962cSBernd Schubert size_t payload_size; 99824fe962cSBernd Schubert struct iovec iov[FUSE_URING_IOV_SEGS]; 99924fe962cSBernd Schubert int err; 100024fe962cSBernd Schubert 100124fe962cSBernd Schubert err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov); 100224fe962cSBernd Schubert if (err) { 100324fe962cSBernd Schubert pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n", 100424fe962cSBernd Schubert err); 100524fe962cSBernd Schubert return ERR_PTR(err); 100624fe962cSBernd Schubert } 100724fe962cSBernd Schubert 100824fe962cSBernd Schubert err = -EINVAL; 100924fe962cSBernd Schubert if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) { 101024fe962cSBernd Schubert pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len); 101124fe962cSBernd Schubert return ERR_PTR(err); 101224fe962cSBernd Schubert } 101324fe962cSBernd Schubert 101424fe962cSBernd Schubert payload_size = iov[1].iov_len; 101524fe962cSBernd Schubert if (payload_size < ring->max_payload_sz) { 101624fe962cSBernd Schubert pr_info_ratelimited("Invalid req payload len %zu\n", 101724fe962cSBernd Schubert payload_size); 101824fe962cSBernd Schubert return ERR_PTR(err); 101924fe962cSBernd Schubert } 102024fe962cSBernd Schubert 102124fe962cSBernd Schubert err = -ENOMEM; 102224fe962cSBernd Schubert ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT); 102324fe962cSBernd Schubert if (!ent) 102424fe962cSBernd Schubert return ERR_PTR(err); 102524fe962cSBernd Schubert 102624fe962cSBernd Schubert INIT_LIST_HEAD(&ent->list); 102724fe962cSBernd Schubert 102824fe962cSBernd Schubert ent->queue = queue; 102924fe962cSBernd Schubert ent->headers = iov[0].iov_base; 103024fe962cSBernd Schubert ent->payload = iov[1].iov_base; 103124fe962cSBernd Schubert 10324a9bfb9bSBernd Schubert atomic_inc(&ring->queue_refs); 103324fe962cSBernd Schubert return ent; 103424fe962cSBernd Schubert } 103524fe962cSBernd Schubert 103624fe962cSBernd Schubert /* 103724fe962cSBernd Schubert * Register header and payload buffer with the kernel and puts the 103824fe962cSBernd Schubert * entry as "ready to get fuse requests" on the queue 103924fe962cSBernd Schubert */ 104024fe962cSBernd Schubert static int fuse_uring_register(struct io_uring_cmd *cmd, 104124fe962cSBernd Schubert unsigned int issue_flags, struct fuse_conn *fc) 104224fe962cSBernd Schubert { 104324fe962cSBernd Schubert const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe); 1044*d9ecc771SJoanne Koong struct fuse_ring *ring = smp_load_acquire(&fc->ring); 104524fe962cSBernd Schubert struct fuse_ring_queue *queue; 104624fe962cSBernd Schubert struct fuse_ring_ent *ent; 104724fe962cSBernd Schubert int err; 104824fe962cSBernd Schubert unsigned int qid = READ_ONCE(cmd_req->qid); 104924fe962cSBernd Schubert 105024fe962cSBernd Schubert err = -ENOMEM; 105124fe962cSBernd Schubert if (!ring) { 105224fe962cSBernd Schubert ring = fuse_uring_create(fc); 105324fe962cSBernd Schubert if (!ring) 105424fe962cSBernd Schubert return err; 105524fe962cSBernd Schubert } 105624fe962cSBernd Schubert 105724fe962cSBernd Schubert if (qid >= ring->nr_queues) { 105824fe962cSBernd Schubert pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid); 105924fe962cSBernd Schubert return -EINVAL; 106024fe962cSBernd Schubert } 106124fe962cSBernd Schubert 106224fe962cSBernd Schubert queue = ring->queues[qid]; 106324fe962cSBernd Schubert if (!queue) { 106424fe962cSBernd Schubert queue = fuse_uring_create_queue(ring, qid); 106524fe962cSBernd Schubert if (!queue) 106624fe962cSBernd Schubert return err; 106724fe962cSBernd Schubert } 106824fe962cSBernd Schubert 106924fe962cSBernd Schubert /* 107024fe962cSBernd Schubert * The created queue above does not need to be destructed in 107124fe962cSBernd Schubert * case of entry errors below, will be done at ring destruction time. 107224fe962cSBernd Schubert */ 107324fe962cSBernd Schubert 107424fe962cSBernd Schubert ent = fuse_uring_create_ring_ent(cmd, queue); 107524fe962cSBernd Schubert if (IS_ERR(ent)) 107624fe962cSBernd Schubert return PTR_ERR(ent); 107724fe962cSBernd Schubert 107824fe962cSBernd Schubert fuse_uring_do_register(ent, cmd, issue_flags); 107924fe962cSBernd Schubert 108024fe962cSBernd Schubert return 0; 108124fe962cSBernd Schubert } 108224fe962cSBernd Schubert 108324fe962cSBernd Schubert /* 108424fe962cSBernd Schubert * Entry function from io_uring to handle the given passthrough command 108524fe962cSBernd Schubert * (op code IORING_OP_URING_CMD) 108624fe962cSBernd Schubert */ 1087786412a7SBernd Schubert int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) 108824fe962cSBernd Schubert { 108924fe962cSBernd Schubert struct fuse_dev *fud; 109024fe962cSBernd Schubert struct fuse_conn *fc; 109124fe962cSBernd Schubert u32 cmd_op = cmd->cmd_op; 109224fe962cSBernd Schubert int err; 109324fe962cSBernd Schubert 1094b6236c84SBernd Schubert if ((unlikely(issue_flags & IO_URING_F_CANCEL))) { 1095b6236c84SBernd Schubert fuse_uring_cancel(cmd, issue_flags); 1096b6236c84SBernd Schubert return 0; 1097b6236c84SBernd Schubert } 1098b6236c84SBernd Schubert 109924fe962cSBernd Schubert /* This extra SQE size holds struct fuse_uring_cmd_req */ 110024fe962cSBernd Schubert if (!(issue_flags & IO_URING_F_SQE128)) 110124fe962cSBernd Schubert return -EINVAL; 110224fe962cSBernd Schubert 110324fe962cSBernd Schubert fud = fuse_get_dev(cmd->file); 110424fe962cSBernd Schubert if (!fud) { 110524fe962cSBernd Schubert pr_info_ratelimited("No fuse device found\n"); 110624fe962cSBernd Schubert return -ENOTCONN; 110724fe962cSBernd Schubert } 110824fe962cSBernd Schubert fc = fud->fc; 110924fe962cSBernd Schubert 11102d4fde59SBernd Schubert /* Once a connection has io-uring enabled on it, it can't be disabled */ 11112d4fde59SBernd Schubert if (!enable_uring && !fc->io_uring) { 11122d4fde59SBernd Schubert pr_info_ratelimited("fuse-io-uring is disabled\n"); 11132d4fde59SBernd Schubert return -EOPNOTSUPP; 11142d4fde59SBernd Schubert } 11152d4fde59SBernd Schubert 111624fe962cSBernd Schubert if (fc->aborted) 111724fe962cSBernd Schubert return -ECONNABORTED; 111824fe962cSBernd Schubert if (!fc->connected) 111924fe962cSBernd Schubert return -ENOTCONN; 112024fe962cSBernd Schubert 112124fe962cSBernd Schubert /* 112224fe962cSBernd Schubert * fuse_uring_register() needs the ring to be initialized, 112324fe962cSBernd Schubert * we need to know the max payload size 112424fe962cSBernd Schubert */ 112524fe962cSBernd Schubert if (!fc->initialized) 112624fe962cSBernd Schubert return -EAGAIN; 112724fe962cSBernd Schubert 112824fe962cSBernd Schubert switch (cmd_op) { 112924fe962cSBernd Schubert case FUSE_IO_URING_CMD_REGISTER: 113024fe962cSBernd Schubert err = fuse_uring_register(cmd, issue_flags, fc); 113124fe962cSBernd Schubert if (err) { 113224fe962cSBernd Schubert pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n", 113324fe962cSBernd Schubert err); 11343393ff96SBernd Schubert fc->io_uring = 0; 11353393ff96SBernd Schubert wake_up_all(&fc->blocked_waitq); 113624fe962cSBernd Schubert return err; 113724fe962cSBernd Schubert } 113824fe962cSBernd Schubert break; 1139c090c8abSBernd Schubert case FUSE_IO_URING_CMD_COMMIT_AND_FETCH: 1140c090c8abSBernd Schubert err = fuse_uring_commit_fetch(cmd, issue_flags, fc); 1141c090c8abSBernd Schubert if (err) { 1142c090c8abSBernd Schubert pr_info_once("FUSE_IO_URING_COMMIT_AND_FETCH failed err=%d\n", 1143c090c8abSBernd Schubert err); 1144c090c8abSBernd Schubert return err; 1145c090c8abSBernd Schubert } 1146c090c8abSBernd Schubert break; 114724fe962cSBernd Schubert default: 114824fe962cSBernd Schubert return -EINVAL; 114924fe962cSBernd Schubert } 115024fe962cSBernd Schubert 115124fe962cSBernd Schubert return -EIOCBQUEUED; 115224fe962cSBernd Schubert } 1153c2c9af9aSBernd Schubert 1154c2c9af9aSBernd Schubert static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd, 1155c2c9af9aSBernd Schubert ssize_t ret, unsigned int issue_flags) 1156c2c9af9aSBernd Schubert { 1157c2c9af9aSBernd Schubert struct fuse_ring_queue *queue = ent->queue; 1158c2c9af9aSBernd Schubert 1159c2c9af9aSBernd Schubert spin_lock(&queue->lock); 1160c2c9af9aSBernd Schubert ent->state = FRRS_USERSPACE; 1161c2c9af9aSBernd Schubert list_move(&ent->list, &queue->ent_in_userspace); 1162c2c9af9aSBernd Schubert ent->cmd = NULL; 1163c2c9af9aSBernd Schubert spin_unlock(&queue->lock); 1164c2c9af9aSBernd Schubert 1165c2c9af9aSBernd Schubert io_uring_cmd_done(cmd, ret, 0, issue_flags); 1166c2c9af9aSBernd Schubert } 1167c2c9af9aSBernd Schubert 1168c2c9af9aSBernd Schubert /* 1169c2c9af9aSBernd Schubert * This prepares and sends the ring request in fuse-uring task context. 1170c2c9af9aSBernd Schubert * User buffers are not mapped yet - the application does not have permission 1171c2c9af9aSBernd Schubert * to write to it - this has to be executed in ring task context. 1172c2c9af9aSBernd Schubert */ 1173c2c9af9aSBernd Schubert static void fuse_uring_send_in_task(struct io_uring_cmd *cmd, 1174c2c9af9aSBernd Schubert unsigned int issue_flags) 1175c2c9af9aSBernd Schubert { 1176c2c9af9aSBernd Schubert struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd); 1177c2c9af9aSBernd Schubert struct fuse_ring_queue *queue = ent->queue; 1178c2c9af9aSBernd Schubert int err; 1179c2c9af9aSBernd Schubert 1180c2c9af9aSBernd Schubert if (!(issue_flags & IO_URING_F_TASK_DEAD)) { 1181c2c9af9aSBernd Schubert err = fuse_uring_prepare_send(ent, ent->fuse_req); 1182c2c9af9aSBernd Schubert if (err) { 1183c2c9af9aSBernd Schubert fuse_uring_next_fuse_req(ent, queue, issue_flags); 1184c2c9af9aSBernd Schubert return; 1185c2c9af9aSBernd Schubert } 1186c2c9af9aSBernd Schubert } else { 1187c2c9af9aSBernd Schubert err = -ECANCELED; 1188c2c9af9aSBernd Schubert } 1189c2c9af9aSBernd Schubert 1190c2c9af9aSBernd Schubert fuse_uring_send(ent, cmd, err, issue_flags); 1191c2c9af9aSBernd Schubert } 1192c2c9af9aSBernd Schubert 1193c2c9af9aSBernd Schubert static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring) 1194c2c9af9aSBernd Schubert { 1195c2c9af9aSBernd Schubert unsigned int qid; 1196c2c9af9aSBernd Schubert struct fuse_ring_queue *queue; 1197c2c9af9aSBernd Schubert 1198c2c9af9aSBernd Schubert qid = task_cpu(current); 1199c2c9af9aSBernd Schubert 1200c2c9af9aSBernd Schubert if (WARN_ONCE(qid >= ring->nr_queues, 1201c2c9af9aSBernd Schubert "Core number (%u) exceeds nr queues (%zu)\n", qid, 1202c2c9af9aSBernd Schubert ring->nr_queues)) 1203c2c9af9aSBernd Schubert qid = 0; 1204c2c9af9aSBernd Schubert 1205c2c9af9aSBernd Schubert queue = ring->queues[qid]; 1206c2c9af9aSBernd Schubert WARN_ONCE(!queue, "Missing queue for qid %d\n", qid); 1207c2c9af9aSBernd Schubert 1208c2c9af9aSBernd Schubert return queue; 1209c2c9af9aSBernd Schubert } 1210c2c9af9aSBernd Schubert 1211c2c9af9aSBernd Schubert static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent) 1212c2c9af9aSBernd Schubert { 1213c2c9af9aSBernd Schubert struct io_uring_cmd *cmd = ent->cmd; 1214c2c9af9aSBernd Schubert 1215c2c9af9aSBernd Schubert uring_cmd_set_ring_ent(cmd, ent); 1216c2c9af9aSBernd Schubert io_uring_cmd_complete_in_task(cmd, fuse_uring_send_in_task); 1217c2c9af9aSBernd Schubert } 1218c2c9af9aSBernd Schubert 1219c2c9af9aSBernd Schubert /* queue a fuse request and send it if a ring entry is available */ 1220c2c9af9aSBernd Schubert void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req) 1221c2c9af9aSBernd Schubert { 1222c2c9af9aSBernd Schubert struct fuse_conn *fc = req->fm->fc; 1223c2c9af9aSBernd Schubert struct fuse_ring *ring = fc->ring; 1224c2c9af9aSBernd Schubert struct fuse_ring_queue *queue; 1225c2c9af9aSBernd Schubert struct fuse_ring_ent *ent = NULL; 1226c2c9af9aSBernd Schubert int err; 1227c2c9af9aSBernd Schubert 1228c2c9af9aSBernd Schubert err = -EINVAL; 1229c2c9af9aSBernd Schubert queue = fuse_uring_task_to_queue(ring); 1230c2c9af9aSBernd Schubert if (!queue) 1231c2c9af9aSBernd Schubert goto err; 1232c2c9af9aSBernd Schubert 1233c2c9af9aSBernd Schubert if (req->in.h.opcode != FUSE_NOTIFY_REPLY) 1234c2c9af9aSBernd Schubert req->in.h.unique = fuse_get_unique(fiq); 1235c2c9af9aSBernd Schubert 1236c2c9af9aSBernd Schubert spin_lock(&queue->lock); 1237c2c9af9aSBernd Schubert err = -ENOTCONN; 1238c2c9af9aSBernd Schubert if (unlikely(queue->stopped)) 1239c2c9af9aSBernd Schubert goto err_unlock; 1240c2c9af9aSBernd Schubert 1241c2c9af9aSBernd Schubert ent = list_first_entry_or_null(&queue->ent_avail_queue, 1242c2c9af9aSBernd Schubert struct fuse_ring_ent, list); 1243c2c9af9aSBernd Schubert if (ent) 1244c2c9af9aSBernd Schubert fuse_uring_add_req_to_ring_ent(ent, req); 1245c2c9af9aSBernd Schubert else 1246c2c9af9aSBernd Schubert list_add_tail(&req->list, &queue->fuse_req_queue); 1247c2c9af9aSBernd Schubert spin_unlock(&queue->lock); 1248c2c9af9aSBernd Schubert 1249c2c9af9aSBernd Schubert if (ent) 1250c2c9af9aSBernd Schubert fuse_uring_dispatch_ent(ent); 1251c2c9af9aSBernd Schubert 1252c2c9af9aSBernd Schubert return; 1253c2c9af9aSBernd Schubert 1254c2c9af9aSBernd Schubert err_unlock: 1255c2c9af9aSBernd Schubert spin_unlock(&queue->lock); 1256c2c9af9aSBernd Schubert err: 1257c2c9af9aSBernd Schubert req->out.h.error = err; 1258c2c9af9aSBernd Schubert clear_bit(FR_PENDING, &req->flags); 1259c2c9af9aSBernd Schubert fuse_request_end(req); 1260c2c9af9aSBernd Schubert } 1261c2c9af9aSBernd Schubert 1262857b0263SBernd Schubert bool fuse_uring_queue_bq_req(struct fuse_req *req) 1263857b0263SBernd Schubert { 1264857b0263SBernd Schubert struct fuse_conn *fc = req->fm->fc; 1265857b0263SBernd Schubert struct fuse_ring *ring = fc->ring; 1266857b0263SBernd Schubert struct fuse_ring_queue *queue; 1267857b0263SBernd Schubert struct fuse_ring_ent *ent = NULL; 1268857b0263SBernd Schubert 1269857b0263SBernd Schubert queue = fuse_uring_task_to_queue(ring); 1270857b0263SBernd Schubert if (!queue) 1271857b0263SBernd Schubert return false; 1272857b0263SBernd Schubert 1273857b0263SBernd Schubert spin_lock(&queue->lock); 1274857b0263SBernd Schubert if (unlikely(queue->stopped)) { 1275857b0263SBernd Schubert spin_unlock(&queue->lock); 1276857b0263SBernd Schubert return false; 1277857b0263SBernd Schubert } 1278857b0263SBernd Schubert 1279857b0263SBernd Schubert list_add_tail(&req->list, &queue->fuse_req_bg_queue); 1280857b0263SBernd Schubert 1281857b0263SBernd Schubert ent = list_first_entry_or_null(&queue->ent_avail_queue, 1282857b0263SBernd Schubert struct fuse_ring_ent, list); 1283857b0263SBernd Schubert spin_lock(&fc->bg_lock); 1284857b0263SBernd Schubert fc->num_background++; 1285857b0263SBernd Schubert if (fc->num_background == fc->max_background) 1286857b0263SBernd Schubert fc->blocked = 1; 1287857b0263SBernd Schubert fuse_uring_flush_bg(queue); 1288857b0263SBernd Schubert spin_unlock(&fc->bg_lock); 1289857b0263SBernd Schubert 1290857b0263SBernd Schubert /* 1291857b0263SBernd Schubert * Due to bg_queue flush limits there might be other bg requests 1292857b0263SBernd Schubert * in the queue that need to be handled first. Or no further req 1293857b0263SBernd Schubert * might be available. 1294857b0263SBernd Schubert */ 1295857b0263SBernd Schubert req = list_first_entry_or_null(&queue->fuse_req_queue, struct fuse_req, 1296857b0263SBernd Schubert list); 1297857b0263SBernd Schubert if (ent && req) { 1298857b0263SBernd Schubert fuse_uring_add_req_to_ring_ent(ent, req); 1299857b0263SBernd Schubert spin_unlock(&queue->lock); 1300857b0263SBernd Schubert 1301857b0263SBernd Schubert fuse_uring_dispatch_ent(ent); 1302857b0263SBernd Schubert } else { 1303857b0263SBernd Schubert spin_unlock(&queue->lock); 1304857b0263SBernd Schubert } 1305857b0263SBernd Schubert 1306857b0263SBernd Schubert return true; 1307857b0263SBernd Schubert } 1308857b0263SBernd Schubert 1309c2c9af9aSBernd Schubert static const struct fuse_iqueue_ops fuse_io_uring_ops = { 1310c2c9af9aSBernd Schubert /* should be send over io-uring as enhancement */ 1311c2c9af9aSBernd Schubert .send_forget = fuse_dev_queue_forget, 1312c2c9af9aSBernd Schubert 1313c2c9af9aSBernd Schubert /* 1314c2c9af9aSBernd Schubert * could be send over io-uring, but interrupts should be rare, 1315c2c9af9aSBernd Schubert * no need to make the code complex 1316c2c9af9aSBernd Schubert */ 1317c2c9af9aSBernd Schubert .send_interrupt = fuse_dev_queue_interrupt, 1318c2c9af9aSBernd Schubert .send_req = fuse_uring_queue_fuse_req, 1319c2c9af9aSBernd Schubert }; 1320