124fe962cSBernd Schubert // SPDX-License-Identifier: GPL-2.0
224fe962cSBernd Schubert /*
324fe962cSBernd Schubert * FUSE: Filesystem in Userspace
424fe962cSBernd Schubert * Copyright (c) 2023-2024 DataDirect Networks.
524fe962cSBernd Schubert */
624fe962cSBernd Schubert
724fe962cSBernd Schubert #include "fuse_i.h"
824fe962cSBernd Schubert #include "dev_uring_i.h"
924fe962cSBernd Schubert #include "fuse_dev_i.h"
1024fe962cSBernd Schubert
1124fe962cSBernd Schubert #include <linux/fs.h>
1224fe962cSBernd Schubert #include <linux/io_uring/cmd.h>
1324fe962cSBernd Schubert
1424fe962cSBernd Schubert static bool __read_mostly enable_uring;
1524fe962cSBernd Schubert module_param(enable_uring, bool, 0644);
1624fe962cSBernd Schubert MODULE_PARM_DESC(enable_uring,
1724fe962cSBernd Schubert "Enable userspace communication through io-uring");
1824fe962cSBernd Schubert
1924fe962cSBernd Schubert #define FUSE_URING_IOV_SEGS 2 /* header and payload */
2024fe962cSBernd Schubert
2124fe962cSBernd Schubert
fuse_uring_enabled(void)2224fe962cSBernd Schubert bool fuse_uring_enabled(void)
2324fe962cSBernd Schubert {
2424fe962cSBernd Schubert return enable_uring;
2524fe962cSBernd Schubert }
2624fe962cSBernd Schubert
27c2c9af9aSBernd Schubert struct fuse_uring_pdu {
28c2c9af9aSBernd Schubert struct fuse_ring_ent *ent;
29c2c9af9aSBernd Schubert };
30c2c9af9aSBernd Schubert
31c2c9af9aSBernd Schubert static const struct fuse_iqueue_ops fuse_io_uring_ops;
32c2c9af9aSBernd Schubert
uring_cmd_set_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_ent * ring_ent)33c2c9af9aSBernd Schubert static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd,
34c2c9af9aSBernd Schubert struct fuse_ring_ent *ring_ent)
35c2c9af9aSBernd Schubert {
36c2c9af9aSBernd Schubert struct fuse_uring_pdu *pdu =
37c2c9af9aSBernd Schubert io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
38c2c9af9aSBernd Schubert
39c2c9af9aSBernd Schubert pdu->ent = ring_ent;
40c2c9af9aSBernd Schubert }
41c2c9af9aSBernd Schubert
uring_cmd_to_ring_ent(struct io_uring_cmd * cmd)42c2c9af9aSBernd Schubert static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd)
43c2c9af9aSBernd Schubert {
44c2c9af9aSBernd Schubert struct fuse_uring_pdu *pdu =
45c2c9af9aSBernd Schubert io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
46c2c9af9aSBernd Schubert
47c2c9af9aSBernd Schubert return pdu->ent;
48c2c9af9aSBernd Schubert }
49c2c9af9aSBernd Schubert
fuse_uring_flush_bg(struct fuse_ring_queue * queue)50857b0263SBernd Schubert static void fuse_uring_flush_bg(struct fuse_ring_queue *queue)
51857b0263SBernd Schubert {
52857b0263SBernd Schubert struct fuse_ring *ring = queue->ring;
53857b0263SBernd Schubert struct fuse_conn *fc = ring->fc;
54857b0263SBernd Schubert
55857b0263SBernd Schubert lockdep_assert_held(&queue->lock);
56857b0263SBernd Schubert lockdep_assert_held(&fc->bg_lock);
57857b0263SBernd Schubert
58857b0263SBernd Schubert /*
59857b0263SBernd Schubert * Allow one bg request per queue, ignoring global fc limits.
60857b0263SBernd Schubert * This prevents a single queue from consuming all resources and
61857b0263SBernd Schubert * eliminates the need for remote queue wake-ups when global
62857b0263SBernd Schubert * limits are met but this queue has no more waiting requests.
63857b0263SBernd Schubert */
64857b0263SBernd Schubert while ((fc->active_background < fc->max_background ||
65857b0263SBernd Schubert !queue->active_background) &&
66857b0263SBernd Schubert (!list_empty(&queue->fuse_req_bg_queue))) {
67857b0263SBernd Schubert struct fuse_req *req;
68857b0263SBernd Schubert
69857b0263SBernd Schubert req = list_first_entry(&queue->fuse_req_bg_queue,
70857b0263SBernd Schubert struct fuse_req, list);
71857b0263SBernd Schubert fc->active_background++;
72857b0263SBernd Schubert queue->active_background++;
73857b0263SBernd Schubert
74857b0263SBernd Schubert list_move_tail(&req->list, &queue->fuse_req_queue);
75857b0263SBernd Schubert }
76857b0263SBernd Schubert }
77857b0263SBernd Schubert
fuse_uring_req_end(struct fuse_ring_ent * ent,struct fuse_req * req,int error)78c090c8abSBernd Schubert static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req,
79c090c8abSBernd Schubert int error)
80c090c8abSBernd Schubert {
81857b0263SBernd Schubert struct fuse_ring_queue *queue = ent->queue;
82857b0263SBernd Schubert struct fuse_ring *ring = queue->ring;
83857b0263SBernd Schubert struct fuse_conn *fc = ring->fc;
84857b0263SBernd Schubert
85857b0263SBernd Schubert lockdep_assert_not_held(&queue->lock);
86857b0263SBernd Schubert spin_lock(&queue->lock);
87c090c8abSBernd Schubert ent->fuse_req = NULL;
88857b0263SBernd Schubert if (test_bit(FR_BACKGROUND, &req->flags)) {
89857b0263SBernd Schubert queue->active_background--;
90857b0263SBernd Schubert spin_lock(&fc->bg_lock);
91857b0263SBernd Schubert fuse_uring_flush_bg(queue);
92857b0263SBernd Schubert spin_unlock(&fc->bg_lock);
93857b0263SBernd Schubert }
94857b0263SBernd Schubert
95857b0263SBernd Schubert spin_unlock(&queue->lock);
96857b0263SBernd Schubert
97c090c8abSBernd Schubert if (error)
98c090c8abSBernd Schubert req->out.h.error = error;
99c090c8abSBernd Schubert
100c090c8abSBernd Schubert clear_bit(FR_SENT, &req->flags);
101c090c8abSBernd Schubert fuse_request_end(req);
102c090c8abSBernd Schubert }
103c090c8abSBernd Schubert
1044a9bfb9bSBernd Schubert /* Abort all list queued request on the given ring queue */
fuse_uring_abort_end_queue_requests(struct fuse_ring_queue * queue)1054a9bfb9bSBernd Schubert static void fuse_uring_abort_end_queue_requests(struct fuse_ring_queue *queue)
1064a9bfb9bSBernd Schubert {
1074a9bfb9bSBernd Schubert struct fuse_req *req;
1084a9bfb9bSBernd Schubert LIST_HEAD(req_list);
1094a9bfb9bSBernd Schubert
1104a9bfb9bSBernd Schubert spin_lock(&queue->lock);
1114a9bfb9bSBernd Schubert list_for_each_entry(req, &queue->fuse_req_queue, list)
1124a9bfb9bSBernd Schubert clear_bit(FR_PENDING, &req->flags);
1134a9bfb9bSBernd Schubert list_splice_init(&queue->fuse_req_queue, &req_list);
1144a9bfb9bSBernd Schubert spin_unlock(&queue->lock);
1154a9bfb9bSBernd Schubert
1164a9bfb9bSBernd Schubert /* must not hold queue lock to avoid order issues with fi->lock */
1174a9bfb9bSBernd Schubert fuse_dev_end_requests(&req_list);
1184a9bfb9bSBernd Schubert }
1194a9bfb9bSBernd Schubert
fuse_uring_abort_end_requests(struct fuse_ring * ring)1204a9bfb9bSBernd Schubert void fuse_uring_abort_end_requests(struct fuse_ring *ring)
1214a9bfb9bSBernd Schubert {
1224a9bfb9bSBernd Schubert int qid;
1234a9bfb9bSBernd Schubert struct fuse_ring_queue *queue;
124857b0263SBernd Schubert struct fuse_conn *fc = ring->fc;
1254a9bfb9bSBernd Schubert
1264a9bfb9bSBernd Schubert for (qid = 0; qid < ring->nr_queues; qid++) {
1274a9bfb9bSBernd Schubert queue = READ_ONCE(ring->queues[qid]);
1284a9bfb9bSBernd Schubert if (!queue)
1294a9bfb9bSBernd Schubert continue;
1304a9bfb9bSBernd Schubert
1314a9bfb9bSBernd Schubert queue->stopped = true;
132857b0263SBernd Schubert
133857b0263SBernd Schubert WARN_ON_ONCE(ring->fc->max_background != UINT_MAX);
134857b0263SBernd Schubert spin_lock(&queue->lock);
135857b0263SBernd Schubert spin_lock(&fc->bg_lock);
136857b0263SBernd Schubert fuse_uring_flush_bg(queue);
137857b0263SBernd Schubert spin_unlock(&fc->bg_lock);
138857b0263SBernd Schubert spin_unlock(&queue->lock);
1394a9bfb9bSBernd Schubert fuse_uring_abort_end_queue_requests(queue);
1404a9bfb9bSBernd Schubert }
1414a9bfb9bSBernd Schubert }
1424a9bfb9bSBernd Schubert
fuse_uring_destruct(struct fuse_conn * fc)14324fe962cSBernd Schubert void fuse_uring_destruct(struct fuse_conn *fc)
14424fe962cSBernd Schubert {
14524fe962cSBernd Schubert struct fuse_ring *ring = fc->ring;
14624fe962cSBernd Schubert int qid;
14724fe962cSBernd Schubert
14824fe962cSBernd Schubert if (!ring)
14924fe962cSBernd Schubert return;
15024fe962cSBernd Schubert
15124fe962cSBernd Schubert for (qid = 0; qid < ring->nr_queues; qid++) {
15224fe962cSBernd Schubert struct fuse_ring_queue *queue = ring->queues[qid];
153b6236c84SBernd Schubert struct fuse_ring_ent *ent, *next;
15424fe962cSBernd Schubert
15524fe962cSBernd Schubert if (!queue)
15624fe962cSBernd Schubert continue;
15724fe962cSBernd Schubert
15824fe962cSBernd Schubert WARN_ON(!list_empty(&queue->ent_avail_queue));
159c090c8abSBernd Schubert WARN_ON(!list_empty(&queue->ent_w_req_queue));
16024fe962cSBernd Schubert WARN_ON(!list_empty(&queue->ent_commit_queue));
161c090c8abSBernd Schubert WARN_ON(!list_empty(&queue->ent_in_userspace));
16224fe962cSBernd Schubert
163b6236c84SBernd Schubert list_for_each_entry_safe(ent, next, &queue->ent_released,
164b6236c84SBernd Schubert list) {
165b6236c84SBernd Schubert list_del_init(&ent->list);
166b6236c84SBernd Schubert kfree(ent);
167b6236c84SBernd Schubert }
168b6236c84SBernd Schubert
169c090c8abSBernd Schubert kfree(queue->fpq.processing);
17024fe962cSBernd Schubert kfree(queue);
17124fe962cSBernd Schubert ring->queues[qid] = NULL;
17224fe962cSBernd Schubert }
17324fe962cSBernd Schubert
17424fe962cSBernd Schubert kfree(ring->queues);
17524fe962cSBernd Schubert kfree(ring);
17624fe962cSBernd Schubert fc->ring = NULL;
17724fe962cSBernd Schubert }
17824fe962cSBernd Schubert
17924fe962cSBernd Schubert /*
18024fe962cSBernd Schubert * Basic ring setup for this connection based on the provided configuration
18124fe962cSBernd Schubert */
fuse_uring_create(struct fuse_conn * fc)18224fe962cSBernd Schubert static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
18324fe962cSBernd Schubert {
18424fe962cSBernd Schubert struct fuse_ring *ring;
18524fe962cSBernd Schubert size_t nr_queues = num_possible_cpus();
18624fe962cSBernd Schubert struct fuse_ring *res = NULL;
18724fe962cSBernd Schubert size_t max_payload_size;
18824fe962cSBernd Schubert
18924fe962cSBernd Schubert ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT);
19024fe962cSBernd Schubert if (!ring)
19124fe962cSBernd Schubert return NULL;
19224fe962cSBernd Schubert
19324fe962cSBernd Schubert ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *),
19424fe962cSBernd Schubert GFP_KERNEL_ACCOUNT);
19524fe962cSBernd Schubert if (!ring->queues)
19624fe962cSBernd Schubert goto out_err;
19724fe962cSBernd Schubert
19824fe962cSBernd Schubert max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write);
19924fe962cSBernd Schubert max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE);
20024fe962cSBernd Schubert
20124fe962cSBernd Schubert spin_lock(&fc->lock);
20224fe962cSBernd Schubert if (fc->ring) {
20324fe962cSBernd Schubert /* race, another thread created the ring in the meantime */
20424fe962cSBernd Schubert spin_unlock(&fc->lock);
20524fe962cSBernd Schubert res = fc->ring;
20624fe962cSBernd Schubert goto out_err;
20724fe962cSBernd Schubert }
20824fe962cSBernd Schubert
2094a9bfb9bSBernd Schubert init_waitqueue_head(&ring->stop_waitq);
2104a9bfb9bSBernd Schubert
21124fe962cSBernd Schubert ring->nr_queues = nr_queues;
21224fe962cSBernd Schubert ring->fc = fc;
21324fe962cSBernd Schubert ring->max_payload_sz = max_payload_size;
2144a9bfb9bSBernd Schubert atomic_set(&ring->queue_refs, 0);
215*d9ecc771SJoanne Koong smp_store_release(&fc->ring, ring);
21624fe962cSBernd Schubert
21724fe962cSBernd Schubert spin_unlock(&fc->lock);
21824fe962cSBernd Schubert return ring;
21924fe962cSBernd Schubert
22024fe962cSBernd Schubert out_err:
22124fe962cSBernd Schubert kfree(ring->queues);
22224fe962cSBernd Schubert kfree(ring);
22324fe962cSBernd Schubert return res;
22424fe962cSBernd Schubert }
22524fe962cSBernd Schubert
fuse_uring_create_queue(struct fuse_ring * ring,int qid)22624fe962cSBernd Schubert static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
22724fe962cSBernd Schubert int qid)
22824fe962cSBernd Schubert {
22924fe962cSBernd Schubert struct fuse_conn *fc = ring->fc;
23024fe962cSBernd Schubert struct fuse_ring_queue *queue;
231c090c8abSBernd Schubert struct list_head *pq;
23224fe962cSBernd Schubert
23324fe962cSBernd Schubert queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
23424fe962cSBernd Schubert if (!queue)
23524fe962cSBernd Schubert return NULL;
236c090c8abSBernd Schubert pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
237c090c8abSBernd Schubert if (!pq) {
238c090c8abSBernd Schubert kfree(queue);
239c090c8abSBernd Schubert return NULL;
240c090c8abSBernd Schubert }
241c090c8abSBernd Schubert
24224fe962cSBernd Schubert queue->qid = qid;
24324fe962cSBernd Schubert queue->ring = ring;
24424fe962cSBernd Schubert spin_lock_init(&queue->lock);
24524fe962cSBernd Schubert
24624fe962cSBernd Schubert INIT_LIST_HEAD(&queue->ent_avail_queue);
24724fe962cSBernd Schubert INIT_LIST_HEAD(&queue->ent_commit_queue);
248c090c8abSBernd Schubert INIT_LIST_HEAD(&queue->ent_w_req_queue);
249c090c8abSBernd Schubert INIT_LIST_HEAD(&queue->ent_in_userspace);
250c090c8abSBernd Schubert INIT_LIST_HEAD(&queue->fuse_req_queue);
251857b0263SBernd Schubert INIT_LIST_HEAD(&queue->fuse_req_bg_queue);
252b6236c84SBernd Schubert INIT_LIST_HEAD(&queue->ent_released);
253c090c8abSBernd Schubert
254c090c8abSBernd Schubert queue->fpq.processing = pq;
255c090c8abSBernd Schubert fuse_pqueue_init(&queue->fpq);
25624fe962cSBernd Schubert
25724fe962cSBernd Schubert spin_lock(&fc->lock);
25824fe962cSBernd Schubert if (ring->queues[qid]) {
25924fe962cSBernd Schubert spin_unlock(&fc->lock);
260c090c8abSBernd Schubert kfree(queue->fpq.processing);
26124fe962cSBernd Schubert kfree(queue);
26224fe962cSBernd Schubert return ring->queues[qid];
26324fe962cSBernd Schubert }
26424fe962cSBernd Schubert
26524fe962cSBernd Schubert /*
26624fe962cSBernd Schubert * write_once and lock as the caller mostly doesn't take the lock at all
26724fe962cSBernd Schubert */
26824fe962cSBernd Schubert WRITE_ONCE(ring->queues[qid], queue);
26924fe962cSBernd Schubert spin_unlock(&fc->lock);
27024fe962cSBernd Schubert
27124fe962cSBernd Schubert return queue;
27224fe962cSBernd Schubert }
27324fe962cSBernd Schubert
fuse_uring_stop_fuse_req_end(struct fuse_req * req)2744a9bfb9bSBernd Schubert static void fuse_uring_stop_fuse_req_end(struct fuse_req *req)
2754a9bfb9bSBernd Schubert {
2764a9bfb9bSBernd Schubert clear_bit(FR_SENT, &req->flags);
2774a9bfb9bSBernd Schubert req->out.h.error = -ECONNABORTED;
2784a9bfb9bSBernd Schubert fuse_request_end(req);
2794a9bfb9bSBernd Schubert }
2804a9bfb9bSBernd Schubert
2814a9bfb9bSBernd Schubert /*
2824a9bfb9bSBernd Schubert * Release a request/entry on connection tear down
2834a9bfb9bSBernd Schubert */
fuse_uring_entry_teardown(struct fuse_ring_ent * ent)2844a9bfb9bSBernd Schubert static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent)
2854a9bfb9bSBernd Schubert {
2864a9bfb9bSBernd Schubert struct fuse_req *req;
2874a9bfb9bSBernd Schubert struct io_uring_cmd *cmd;
2884a9bfb9bSBernd Schubert
2894a9bfb9bSBernd Schubert struct fuse_ring_queue *queue = ent->queue;
2904a9bfb9bSBernd Schubert
2914a9bfb9bSBernd Schubert spin_lock(&queue->lock);
2924a9bfb9bSBernd Schubert cmd = ent->cmd;
2934a9bfb9bSBernd Schubert ent->cmd = NULL;
2944a9bfb9bSBernd Schubert req = ent->fuse_req;
2954a9bfb9bSBernd Schubert ent->fuse_req = NULL;
2964a9bfb9bSBernd Schubert if (req) {
2974a9bfb9bSBernd Schubert /* remove entry from queue->fpq->processing */
2984a9bfb9bSBernd Schubert list_del_init(&req->list);
2994a9bfb9bSBernd Schubert }
300b6236c84SBernd Schubert
301b6236c84SBernd Schubert /*
302b6236c84SBernd Schubert * The entry must not be freed immediately, due to access of direct
303b6236c84SBernd Schubert * pointer access of entries through IO_URING_F_CANCEL - there is a risk
304b6236c84SBernd Schubert * of race between daemon termination (which triggers IO_URING_F_CANCEL
305b6236c84SBernd Schubert * and accesses entries without checking the list state first
306b6236c84SBernd Schubert */
307b6236c84SBernd Schubert list_move(&ent->list, &queue->ent_released);
308b6236c84SBernd Schubert ent->state = FRRS_RELEASED;
3094a9bfb9bSBernd Schubert spin_unlock(&queue->lock);
3104a9bfb9bSBernd Schubert
3114a9bfb9bSBernd Schubert if (cmd)
3124a9bfb9bSBernd Schubert io_uring_cmd_done(cmd, -ENOTCONN, 0, IO_URING_F_UNLOCKED);
3134a9bfb9bSBernd Schubert
3144a9bfb9bSBernd Schubert if (req)
3154a9bfb9bSBernd Schubert fuse_uring_stop_fuse_req_end(req);
3164a9bfb9bSBernd Schubert }
3174a9bfb9bSBernd Schubert
fuse_uring_stop_list_entries(struct list_head * head,struct fuse_ring_queue * queue,enum fuse_ring_req_state exp_state)3184a9bfb9bSBernd Schubert static void fuse_uring_stop_list_entries(struct list_head *head,
3194a9bfb9bSBernd Schubert struct fuse_ring_queue *queue,
3204a9bfb9bSBernd Schubert enum fuse_ring_req_state exp_state)
3214a9bfb9bSBernd Schubert {
3224a9bfb9bSBernd Schubert struct fuse_ring *ring = queue->ring;
3234a9bfb9bSBernd Schubert struct fuse_ring_ent *ent, *next;
3244a9bfb9bSBernd Schubert ssize_t queue_refs = SSIZE_MAX;
3254a9bfb9bSBernd Schubert LIST_HEAD(to_teardown);
3264a9bfb9bSBernd Schubert
3274a9bfb9bSBernd Schubert spin_lock(&queue->lock);
3284a9bfb9bSBernd Schubert list_for_each_entry_safe(ent, next, head, list) {
3294a9bfb9bSBernd Schubert if (ent->state != exp_state) {
3304a9bfb9bSBernd Schubert pr_warn("entry teardown qid=%d state=%d expected=%d",
3314a9bfb9bSBernd Schubert queue->qid, ent->state, exp_state);
3324a9bfb9bSBernd Schubert continue;
3334a9bfb9bSBernd Schubert }
3344a9bfb9bSBernd Schubert
335b6236c84SBernd Schubert ent->state = FRRS_TEARDOWN;
3364a9bfb9bSBernd Schubert list_move(&ent->list, &to_teardown);
3374a9bfb9bSBernd Schubert }
3384a9bfb9bSBernd Schubert spin_unlock(&queue->lock);
3394a9bfb9bSBernd Schubert
3404a9bfb9bSBernd Schubert /* no queue lock to avoid lock order issues */
3414a9bfb9bSBernd Schubert list_for_each_entry_safe(ent, next, &to_teardown, list) {
3424a9bfb9bSBernd Schubert fuse_uring_entry_teardown(ent);
3434a9bfb9bSBernd Schubert queue_refs = atomic_dec_return(&ring->queue_refs);
3444a9bfb9bSBernd Schubert WARN_ON_ONCE(queue_refs < 0);
3454a9bfb9bSBernd Schubert }
3464a9bfb9bSBernd Schubert }
3474a9bfb9bSBernd Schubert
fuse_uring_teardown_entries(struct fuse_ring_queue * queue)3484a9bfb9bSBernd Schubert static void fuse_uring_teardown_entries(struct fuse_ring_queue *queue)
3494a9bfb9bSBernd Schubert {
3504a9bfb9bSBernd Schubert fuse_uring_stop_list_entries(&queue->ent_in_userspace, queue,
3514a9bfb9bSBernd Schubert FRRS_USERSPACE);
3524a9bfb9bSBernd Schubert fuse_uring_stop_list_entries(&queue->ent_avail_queue, queue,
3534a9bfb9bSBernd Schubert FRRS_AVAILABLE);
3544a9bfb9bSBernd Schubert }
3554a9bfb9bSBernd Schubert
3564a9bfb9bSBernd Schubert /*
3574a9bfb9bSBernd Schubert * Log state debug info
3584a9bfb9bSBernd Schubert */
fuse_uring_log_ent_state(struct fuse_ring * ring)3594a9bfb9bSBernd Schubert static void fuse_uring_log_ent_state(struct fuse_ring *ring)
3604a9bfb9bSBernd Schubert {
3614a9bfb9bSBernd Schubert int qid;
3624a9bfb9bSBernd Schubert struct fuse_ring_ent *ent;
3634a9bfb9bSBernd Schubert
3644a9bfb9bSBernd Schubert for (qid = 0; qid < ring->nr_queues; qid++) {
3654a9bfb9bSBernd Schubert struct fuse_ring_queue *queue = ring->queues[qid];
3664a9bfb9bSBernd Schubert
3674a9bfb9bSBernd Schubert if (!queue)
3684a9bfb9bSBernd Schubert continue;
3694a9bfb9bSBernd Schubert
3704a9bfb9bSBernd Schubert spin_lock(&queue->lock);
3714a9bfb9bSBernd Schubert /*
3724a9bfb9bSBernd Schubert * Log entries from the intermediate queue, the other queues
3734a9bfb9bSBernd Schubert * should be empty
3744a9bfb9bSBernd Schubert */
3754a9bfb9bSBernd Schubert list_for_each_entry(ent, &queue->ent_w_req_queue, list) {
3764a9bfb9bSBernd Schubert pr_info(" ent-req-queue ring=%p qid=%d ent=%p state=%d\n",
3774a9bfb9bSBernd Schubert ring, qid, ent, ent->state);
3784a9bfb9bSBernd Schubert }
3794a9bfb9bSBernd Schubert list_for_each_entry(ent, &queue->ent_commit_queue, list) {
3804a9bfb9bSBernd Schubert pr_info(" ent-commit-queue ring=%p qid=%d ent=%p state=%d\n",
3814a9bfb9bSBernd Schubert ring, qid, ent, ent->state);
3824a9bfb9bSBernd Schubert }
3834a9bfb9bSBernd Schubert spin_unlock(&queue->lock);
3844a9bfb9bSBernd Schubert }
3854a9bfb9bSBernd Schubert ring->stop_debug_log = 1;
3864a9bfb9bSBernd Schubert }
3874a9bfb9bSBernd Schubert
fuse_uring_async_stop_queues(struct work_struct * work)3884a9bfb9bSBernd Schubert static void fuse_uring_async_stop_queues(struct work_struct *work)
3894a9bfb9bSBernd Schubert {
3904a9bfb9bSBernd Schubert int qid;
3914a9bfb9bSBernd Schubert struct fuse_ring *ring =
3924a9bfb9bSBernd Schubert container_of(work, struct fuse_ring, async_teardown_work.work);
3934a9bfb9bSBernd Schubert
3944a9bfb9bSBernd Schubert /* XXX code dup */
3954a9bfb9bSBernd Schubert for (qid = 0; qid < ring->nr_queues; qid++) {
3964a9bfb9bSBernd Schubert struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
3974a9bfb9bSBernd Schubert
3984a9bfb9bSBernd Schubert if (!queue)
3994a9bfb9bSBernd Schubert continue;
4004a9bfb9bSBernd Schubert
4014a9bfb9bSBernd Schubert fuse_uring_teardown_entries(queue);
4024a9bfb9bSBernd Schubert }
4034a9bfb9bSBernd Schubert
4044a9bfb9bSBernd Schubert /*
4054a9bfb9bSBernd Schubert * Some ring entries might be in the middle of IO operations,
4064a9bfb9bSBernd Schubert * i.e. in process to get handled by file_operations::uring_cmd
4074a9bfb9bSBernd Schubert * or on the way to userspace - we could handle that with conditions in
4084a9bfb9bSBernd Schubert * run time code, but easier/cleaner to have an async tear down handler
4094a9bfb9bSBernd Schubert * If there are still queue references left
4104a9bfb9bSBernd Schubert */
4114a9bfb9bSBernd Schubert if (atomic_read(&ring->queue_refs) > 0) {
4124a9bfb9bSBernd Schubert if (time_after(jiffies,
4134a9bfb9bSBernd Schubert ring->teardown_time + FUSE_URING_TEARDOWN_TIMEOUT))
4144a9bfb9bSBernd Schubert fuse_uring_log_ent_state(ring);
4154a9bfb9bSBernd Schubert
4164a9bfb9bSBernd Schubert schedule_delayed_work(&ring->async_teardown_work,
4174a9bfb9bSBernd Schubert FUSE_URING_TEARDOWN_INTERVAL);
4184a9bfb9bSBernd Schubert } else {
4194a9bfb9bSBernd Schubert wake_up_all(&ring->stop_waitq);
4204a9bfb9bSBernd Schubert }
4214a9bfb9bSBernd Schubert }
4224a9bfb9bSBernd Schubert
4234a9bfb9bSBernd Schubert /*
4244a9bfb9bSBernd Schubert * Stop the ring queues
4254a9bfb9bSBernd Schubert */
fuse_uring_stop_queues(struct fuse_ring * ring)4264a9bfb9bSBernd Schubert void fuse_uring_stop_queues(struct fuse_ring *ring)
4274a9bfb9bSBernd Schubert {
4284a9bfb9bSBernd Schubert int qid;
4294a9bfb9bSBernd Schubert
4304a9bfb9bSBernd Schubert for (qid = 0; qid < ring->nr_queues; qid++) {
4314a9bfb9bSBernd Schubert struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
4324a9bfb9bSBernd Schubert
4334a9bfb9bSBernd Schubert if (!queue)
4344a9bfb9bSBernd Schubert continue;
4354a9bfb9bSBernd Schubert
4364a9bfb9bSBernd Schubert fuse_uring_teardown_entries(queue);
4374a9bfb9bSBernd Schubert }
4384a9bfb9bSBernd Schubert
4394a9bfb9bSBernd Schubert if (atomic_read(&ring->queue_refs) > 0) {
4404a9bfb9bSBernd Schubert ring->teardown_time = jiffies;
4414a9bfb9bSBernd Schubert INIT_DELAYED_WORK(&ring->async_teardown_work,
4424a9bfb9bSBernd Schubert fuse_uring_async_stop_queues);
4434a9bfb9bSBernd Schubert schedule_delayed_work(&ring->async_teardown_work,
4444a9bfb9bSBernd Schubert FUSE_URING_TEARDOWN_INTERVAL);
4454a9bfb9bSBernd Schubert } else {
4464a9bfb9bSBernd Schubert wake_up_all(&ring->stop_waitq);
4474a9bfb9bSBernd Schubert }
4484a9bfb9bSBernd Schubert }
4494a9bfb9bSBernd Schubert
45024fe962cSBernd Schubert /*
451b6236c84SBernd Schubert * Handle IO_URING_F_CANCEL, typically should come on daemon termination.
452b6236c84SBernd Schubert *
453b6236c84SBernd Schubert * Releasing the last entry should trigger fuse_dev_release() if
454b6236c84SBernd Schubert * the daemon was terminated
455b6236c84SBernd Schubert */
fuse_uring_cancel(struct io_uring_cmd * cmd,unsigned int issue_flags)456b6236c84SBernd Schubert static void fuse_uring_cancel(struct io_uring_cmd *cmd,
457b6236c84SBernd Schubert unsigned int issue_flags)
458b6236c84SBernd Schubert {
459b6236c84SBernd Schubert struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
460b6236c84SBernd Schubert struct fuse_ring_queue *queue;
461b6236c84SBernd Schubert bool need_cmd_done = false;
462b6236c84SBernd Schubert
463b6236c84SBernd Schubert /*
464b6236c84SBernd Schubert * direct access on ent - it must not be destructed as long as
465b6236c84SBernd Schubert * IO_URING_F_CANCEL might come up
466b6236c84SBernd Schubert */
467b6236c84SBernd Schubert queue = ent->queue;
468b6236c84SBernd Schubert spin_lock(&queue->lock);
469b6236c84SBernd Schubert if (ent->state == FRRS_AVAILABLE) {
470b6236c84SBernd Schubert ent->state = FRRS_USERSPACE;
471b6236c84SBernd Schubert list_move(&ent->list, &queue->ent_in_userspace);
472b6236c84SBernd Schubert need_cmd_done = true;
473b6236c84SBernd Schubert ent->cmd = NULL;
474b6236c84SBernd Schubert }
475b6236c84SBernd Schubert spin_unlock(&queue->lock);
476b6236c84SBernd Schubert
477b6236c84SBernd Schubert if (need_cmd_done) {
478b6236c84SBernd Schubert /* no queue lock to avoid lock order issues */
479b6236c84SBernd Schubert io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags);
480b6236c84SBernd Schubert }
481b6236c84SBernd Schubert }
482b6236c84SBernd Schubert
fuse_uring_prepare_cancel(struct io_uring_cmd * cmd,int issue_flags,struct fuse_ring_ent * ring_ent)483b6236c84SBernd Schubert static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags,
484b6236c84SBernd Schubert struct fuse_ring_ent *ring_ent)
485b6236c84SBernd Schubert {
486b6236c84SBernd Schubert uring_cmd_set_ring_ent(cmd, ring_ent);
487b6236c84SBernd Schubert io_uring_cmd_mark_cancelable(cmd, issue_flags);
488b6236c84SBernd Schubert }
489b6236c84SBernd Schubert
490b6236c84SBernd Schubert /*
491c090c8abSBernd Schubert * Checks for errors and stores it into the request
492c090c8abSBernd Schubert */
fuse_uring_out_header_has_err(struct fuse_out_header * oh,struct fuse_req * req,struct fuse_conn * fc)493c090c8abSBernd Schubert static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
494c090c8abSBernd Schubert struct fuse_req *req,
495c090c8abSBernd Schubert struct fuse_conn *fc)
496c090c8abSBernd Schubert {
497c090c8abSBernd Schubert int err;
498c090c8abSBernd Schubert
499c090c8abSBernd Schubert err = -EINVAL;
500c090c8abSBernd Schubert if (oh->unique == 0) {
501c090c8abSBernd Schubert /* Not supported through io-uring yet */
502c090c8abSBernd Schubert pr_warn_once("notify through fuse-io-uring not supported\n");
503c090c8abSBernd Schubert goto err;
504c090c8abSBernd Schubert }
505c090c8abSBernd Schubert
506c090c8abSBernd Schubert if (oh->error <= -ERESTARTSYS || oh->error > 0)
507c090c8abSBernd Schubert goto err;
508c090c8abSBernd Schubert
509c090c8abSBernd Schubert if (oh->error) {
510c090c8abSBernd Schubert err = oh->error;
511c090c8abSBernd Schubert goto err;
512c090c8abSBernd Schubert }
513c090c8abSBernd Schubert
514c090c8abSBernd Schubert err = -ENOENT;
515c090c8abSBernd Schubert if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) {
516c090c8abSBernd Schubert pr_warn_ratelimited("unique mismatch, expected: %llu got %llu\n",
517c090c8abSBernd Schubert req->in.h.unique,
518c090c8abSBernd Schubert oh->unique & ~FUSE_INT_REQ_BIT);
519c090c8abSBernd Schubert goto err;
520c090c8abSBernd Schubert }
521c090c8abSBernd Schubert
522c090c8abSBernd Schubert /*
523c090c8abSBernd Schubert * Is it an interrupt reply ID?
524c090c8abSBernd Schubert * XXX: Not supported through fuse-io-uring yet, it should not even
525c090c8abSBernd Schubert * find the request - should not happen.
526c090c8abSBernd Schubert */
527c090c8abSBernd Schubert WARN_ON_ONCE(oh->unique & FUSE_INT_REQ_BIT);
528c090c8abSBernd Schubert
529c090c8abSBernd Schubert err = 0;
530c090c8abSBernd Schubert err:
531c090c8abSBernd Schubert return err;
532c090c8abSBernd Schubert }
533c090c8abSBernd Schubert
fuse_uring_copy_from_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)534c090c8abSBernd Schubert static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
535c090c8abSBernd Schubert struct fuse_req *req,
536c090c8abSBernd Schubert struct fuse_ring_ent *ent)
537c090c8abSBernd Schubert {
538c090c8abSBernd Schubert struct fuse_copy_state cs;
539c090c8abSBernd Schubert struct fuse_args *args = req->args;
540c090c8abSBernd Schubert struct iov_iter iter;
541c090c8abSBernd Schubert int err;
542c090c8abSBernd Schubert struct fuse_uring_ent_in_out ring_in_out;
543c090c8abSBernd Schubert
544c090c8abSBernd Schubert err = copy_from_user(&ring_in_out, &ent->headers->ring_ent_in_out,
545c090c8abSBernd Schubert sizeof(ring_in_out));
546c090c8abSBernd Schubert if (err)
547c090c8abSBernd Schubert return -EFAULT;
548c090c8abSBernd Schubert
549c090c8abSBernd Schubert err = import_ubuf(ITER_SOURCE, ent->payload, ring->max_payload_sz,
550c090c8abSBernd Schubert &iter);
551c090c8abSBernd Schubert if (err)
552c090c8abSBernd Schubert return err;
553c090c8abSBernd Schubert
554c090c8abSBernd Schubert fuse_copy_init(&cs, 0, &iter);
555c090c8abSBernd Schubert cs.is_uring = 1;
556c090c8abSBernd Schubert cs.req = req;
557c090c8abSBernd Schubert
558c090c8abSBernd Schubert return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
559c090c8abSBernd Schubert }
560c090c8abSBernd Schubert
561c090c8abSBernd Schubert /*
562c090c8abSBernd Schubert * Copy data from the req to the ring buffer
563c090c8abSBernd Schubert */
fuse_uring_args_to_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)564c090c8abSBernd Schubert static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
565c090c8abSBernd Schubert struct fuse_ring_ent *ent)
566c090c8abSBernd Schubert {
567c090c8abSBernd Schubert struct fuse_copy_state cs;
568c090c8abSBernd Schubert struct fuse_args *args = req->args;
569c090c8abSBernd Schubert struct fuse_in_arg *in_args = args->in_args;
570c090c8abSBernd Schubert int num_args = args->in_numargs;
571c090c8abSBernd Schubert int err;
572c090c8abSBernd Schubert struct iov_iter iter;
573c090c8abSBernd Schubert struct fuse_uring_ent_in_out ent_in_out = {
574c090c8abSBernd Schubert .flags = 0,
575c090c8abSBernd Schubert .commit_id = req->in.h.unique,
576c090c8abSBernd Schubert };
577c090c8abSBernd Schubert
578c090c8abSBernd Schubert err = import_ubuf(ITER_DEST, ent->payload, ring->max_payload_sz, &iter);
579c090c8abSBernd Schubert if (err) {
580c090c8abSBernd Schubert pr_info_ratelimited("fuse: Import of user buffer failed\n");
581c090c8abSBernd Schubert return err;
582c090c8abSBernd Schubert }
583c090c8abSBernd Schubert
584c090c8abSBernd Schubert fuse_copy_init(&cs, 1, &iter);
585c090c8abSBernd Schubert cs.is_uring = 1;
586c090c8abSBernd Schubert cs.req = req;
587c090c8abSBernd Schubert
588c090c8abSBernd Schubert if (num_args > 0) {
589c090c8abSBernd Schubert /*
590c090c8abSBernd Schubert * Expectation is that the first argument is the per op header.
591c090c8abSBernd Schubert * Some op code have that as zero size.
592c090c8abSBernd Schubert */
593c090c8abSBernd Schubert if (args->in_args[0].size > 0) {
594c090c8abSBernd Schubert err = copy_to_user(&ent->headers->op_in, in_args->value,
595c090c8abSBernd Schubert in_args->size);
596c090c8abSBernd Schubert if (err) {
597c090c8abSBernd Schubert pr_info_ratelimited(
598c090c8abSBernd Schubert "Copying the header failed.\n");
599c090c8abSBernd Schubert return -EFAULT;
600c090c8abSBernd Schubert }
601c090c8abSBernd Schubert }
602c090c8abSBernd Schubert in_args++;
603c090c8abSBernd Schubert num_args--;
604c090c8abSBernd Schubert }
605c090c8abSBernd Schubert
606c090c8abSBernd Schubert /* copy the payload */
607c090c8abSBernd Schubert err = fuse_copy_args(&cs, num_args, args->in_pages,
608c090c8abSBernd Schubert (struct fuse_arg *)in_args, 0);
609c090c8abSBernd Schubert if (err) {
610c090c8abSBernd Schubert pr_info_ratelimited("%s fuse_copy_args failed\n", __func__);
611c090c8abSBernd Schubert return err;
612c090c8abSBernd Schubert }
613c090c8abSBernd Schubert
614c090c8abSBernd Schubert ent_in_out.payload_sz = cs.ring.copied_sz;
615c090c8abSBernd Schubert err = copy_to_user(&ent->headers->ring_ent_in_out, &ent_in_out,
616c090c8abSBernd Schubert sizeof(ent_in_out));
617c090c8abSBernd Schubert return err ? -EFAULT : 0;
618c090c8abSBernd Schubert }
619c090c8abSBernd Schubert
fuse_uring_copy_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req)620c090c8abSBernd Schubert static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
621c090c8abSBernd Schubert struct fuse_req *req)
622c090c8abSBernd Schubert {
623c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue;
624c090c8abSBernd Schubert struct fuse_ring *ring = queue->ring;
625c090c8abSBernd Schubert int err;
626c090c8abSBernd Schubert
627c090c8abSBernd Schubert err = -EIO;
628c090c8abSBernd Schubert if (WARN_ON(ent->state != FRRS_FUSE_REQ)) {
629c090c8abSBernd Schubert pr_err("qid=%d ring-req=%p invalid state %d on send\n",
630c090c8abSBernd Schubert queue->qid, ent, ent->state);
631c090c8abSBernd Schubert return err;
632c090c8abSBernd Schubert }
633c090c8abSBernd Schubert
634c090c8abSBernd Schubert err = -EINVAL;
635c090c8abSBernd Schubert if (WARN_ON(req->in.h.unique == 0))
636c090c8abSBernd Schubert return err;
637c090c8abSBernd Schubert
638c090c8abSBernd Schubert /* copy the request */
639c090c8abSBernd Schubert err = fuse_uring_args_to_ring(ring, req, ent);
640c090c8abSBernd Schubert if (unlikely(err)) {
641c090c8abSBernd Schubert pr_info_ratelimited("Copy to ring failed: %d\n", err);
642c090c8abSBernd Schubert return err;
643c090c8abSBernd Schubert }
644c090c8abSBernd Schubert
645c090c8abSBernd Schubert /* copy fuse_in_header */
646c090c8abSBernd Schubert err = copy_to_user(&ent->headers->in_out, &req->in.h,
647c090c8abSBernd Schubert sizeof(req->in.h));
648c090c8abSBernd Schubert if (err) {
649c090c8abSBernd Schubert err = -EFAULT;
650c090c8abSBernd Schubert return err;
651c090c8abSBernd Schubert }
652c090c8abSBernd Schubert
653c090c8abSBernd Schubert return 0;
654c090c8abSBernd Schubert }
655c090c8abSBernd Schubert
fuse_uring_prepare_send(struct fuse_ring_ent * ent,struct fuse_req * req)656c090c8abSBernd Schubert static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
657c090c8abSBernd Schubert struct fuse_req *req)
658c090c8abSBernd Schubert {
659c090c8abSBernd Schubert int err;
660c090c8abSBernd Schubert
661c090c8abSBernd Schubert err = fuse_uring_copy_to_ring(ent, req);
662c090c8abSBernd Schubert if (!err)
663c090c8abSBernd Schubert set_bit(FR_SENT, &req->flags);
664c090c8abSBernd Schubert else
665c090c8abSBernd Schubert fuse_uring_req_end(ent, req, err);
666c090c8abSBernd Schubert
667c090c8abSBernd Schubert return err;
668c090c8abSBernd Schubert }
669c090c8abSBernd Schubert
670c090c8abSBernd Schubert /*
671c090c8abSBernd Schubert * Write data to the ring buffer and send the request to userspace,
672c090c8abSBernd Schubert * userspace will read it
673c090c8abSBernd Schubert * This is comparable with classical read(/dev/fuse)
674c090c8abSBernd Schubert */
fuse_uring_send_next_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)675c090c8abSBernd Schubert static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent,
676c090c8abSBernd Schubert struct fuse_req *req,
677c090c8abSBernd Schubert unsigned int issue_flags)
678c090c8abSBernd Schubert {
679c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue;
680c090c8abSBernd Schubert int err;
681c090c8abSBernd Schubert struct io_uring_cmd *cmd;
682c090c8abSBernd Schubert
683c090c8abSBernd Schubert err = fuse_uring_prepare_send(ent, req);
684c090c8abSBernd Schubert if (err)
685c090c8abSBernd Schubert return err;
686c090c8abSBernd Schubert
687c090c8abSBernd Schubert spin_lock(&queue->lock);
688c090c8abSBernd Schubert cmd = ent->cmd;
689c090c8abSBernd Schubert ent->cmd = NULL;
690c090c8abSBernd Schubert ent->state = FRRS_USERSPACE;
691c090c8abSBernd Schubert list_move(&ent->list, &queue->ent_in_userspace);
692c090c8abSBernd Schubert spin_unlock(&queue->lock);
693c090c8abSBernd Schubert
694c090c8abSBernd Schubert io_uring_cmd_done(cmd, 0, 0, issue_flags);
695c090c8abSBernd Schubert return 0;
696c090c8abSBernd Schubert }
697c090c8abSBernd Schubert
698c090c8abSBernd Schubert /*
69924fe962cSBernd Schubert * Make a ring entry available for fuse_req assignment
70024fe962cSBernd Schubert */
fuse_uring_ent_avail(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue)70124fe962cSBernd Schubert static void fuse_uring_ent_avail(struct fuse_ring_ent *ent,
70224fe962cSBernd Schubert struct fuse_ring_queue *queue)
70324fe962cSBernd Schubert {
70424fe962cSBernd Schubert WARN_ON_ONCE(!ent->cmd);
70524fe962cSBernd Schubert list_move(&ent->list, &queue->ent_avail_queue);
70624fe962cSBernd Schubert ent->state = FRRS_AVAILABLE;
70724fe962cSBernd Schubert }
70824fe962cSBernd Schubert
709c090c8abSBernd Schubert /* Used to find the request on SQE commit */
fuse_uring_add_to_pq(struct fuse_ring_ent * ent,struct fuse_req * req)710c090c8abSBernd Schubert static void fuse_uring_add_to_pq(struct fuse_ring_ent *ent,
711c090c8abSBernd Schubert struct fuse_req *req)
712c090c8abSBernd Schubert {
713c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue;
714c090c8abSBernd Schubert struct fuse_pqueue *fpq = &queue->fpq;
715c090c8abSBernd Schubert unsigned int hash;
716c090c8abSBernd Schubert
717c090c8abSBernd Schubert req->ring_entry = ent;
718c090c8abSBernd Schubert hash = fuse_req_hash(req->in.h.unique);
719c090c8abSBernd Schubert list_move_tail(&req->list, &fpq->processing[hash]);
720c090c8abSBernd Schubert }
721c090c8abSBernd Schubert
722c090c8abSBernd Schubert /*
723c090c8abSBernd Schubert * Assign a fuse queue entry to the given entry
724c090c8abSBernd Schubert */
fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent * ent,struct fuse_req * req)725c090c8abSBernd Schubert static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent,
726c090c8abSBernd Schubert struct fuse_req *req)
727c090c8abSBernd Schubert {
728c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue;
729c090c8abSBernd Schubert struct fuse_conn *fc = req->fm->fc;
730c090c8abSBernd Schubert struct fuse_iqueue *fiq = &fc->iq;
731c090c8abSBernd Schubert
732c090c8abSBernd Schubert lockdep_assert_held(&queue->lock);
733c090c8abSBernd Schubert
734c090c8abSBernd Schubert if (WARN_ON_ONCE(ent->state != FRRS_AVAILABLE &&
735c090c8abSBernd Schubert ent->state != FRRS_COMMIT)) {
736c090c8abSBernd Schubert pr_warn("%s qid=%d state=%d\n", __func__, ent->queue->qid,
737c090c8abSBernd Schubert ent->state);
738c090c8abSBernd Schubert }
739c090c8abSBernd Schubert
740c090c8abSBernd Schubert spin_lock(&fiq->lock);
741c090c8abSBernd Schubert clear_bit(FR_PENDING, &req->flags);
742c090c8abSBernd Schubert spin_unlock(&fiq->lock);
743c090c8abSBernd Schubert ent->fuse_req = req;
744c090c8abSBernd Schubert ent->state = FRRS_FUSE_REQ;
745c090c8abSBernd Schubert list_move(&ent->list, &queue->ent_w_req_queue);
746c090c8abSBernd Schubert fuse_uring_add_to_pq(ent, req);
747c090c8abSBernd Schubert }
748c090c8abSBernd Schubert
749c090c8abSBernd Schubert /* Fetch the next fuse request if available */
fuse_uring_ent_assign_req(struct fuse_ring_ent * ent)750c090c8abSBernd Schubert static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent)
751c090c8abSBernd Schubert __must_hold(&queue->lock)
752c090c8abSBernd Schubert {
753c090c8abSBernd Schubert struct fuse_req *req;
754c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue;
755c090c8abSBernd Schubert struct list_head *req_queue = &queue->fuse_req_queue;
756c090c8abSBernd Schubert
757c090c8abSBernd Schubert lockdep_assert_held(&queue->lock);
758c090c8abSBernd Schubert
759c090c8abSBernd Schubert /* get and assign the next entry while it is still holding the lock */
760c090c8abSBernd Schubert req = list_first_entry_or_null(req_queue, struct fuse_req, list);
761c090c8abSBernd Schubert if (req)
762c090c8abSBernd Schubert fuse_uring_add_req_to_ring_ent(ent, req);
763c090c8abSBernd Schubert
764c090c8abSBernd Schubert return req;
765c090c8abSBernd Schubert }
766c090c8abSBernd Schubert
767c090c8abSBernd Schubert /*
768c090c8abSBernd Schubert * Read data from the ring buffer, which user space has written to
769c090c8abSBernd Schubert * This is comparible with handling of classical write(/dev/fuse).
770c090c8abSBernd Schubert * Also make the ring request available again for new fuse requests.
771c090c8abSBernd Schubert */
fuse_uring_commit(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)772c090c8abSBernd Schubert static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req,
773c090c8abSBernd Schubert unsigned int issue_flags)
774c090c8abSBernd Schubert {
775c090c8abSBernd Schubert struct fuse_ring *ring = ent->queue->ring;
776c090c8abSBernd Schubert struct fuse_conn *fc = ring->fc;
777c090c8abSBernd Schubert ssize_t err = 0;
778c090c8abSBernd Schubert
779c090c8abSBernd Schubert err = copy_from_user(&req->out.h, &ent->headers->in_out,
780c090c8abSBernd Schubert sizeof(req->out.h));
781c090c8abSBernd Schubert if (err) {
782c090c8abSBernd Schubert req->out.h.error = -EFAULT;
783c090c8abSBernd Schubert goto out;
784c090c8abSBernd Schubert }
785c090c8abSBernd Schubert
786c090c8abSBernd Schubert err = fuse_uring_out_header_has_err(&req->out.h, req, fc);
787c090c8abSBernd Schubert if (err) {
788c090c8abSBernd Schubert /* req->out.h.error already set */
789c090c8abSBernd Schubert goto out;
790c090c8abSBernd Schubert }
791c090c8abSBernd Schubert
792c090c8abSBernd Schubert err = fuse_uring_copy_from_ring(ring, req, ent);
793c090c8abSBernd Schubert out:
794c090c8abSBernd Schubert fuse_uring_req_end(ent, req, err);
795c090c8abSBernd Schubert }
796c090c8abSBernd Schubert
797c090c8abSBernd Schubert /*
798c090c8abSBernd Schubert * Get the next fuse req and send it
799c090c8abSBernd Schubert */
fuse_uring_next_fuse_req(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue,unsigned int issue_flags)800c090c8abSBernd Schubert static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ent,
801c090c8abSBernd Schubert struct fuse_ring_queue *queue,
802c090c8abSBernd Schubert unsigned int issue_flags)
803c090c8abSBernd Schubert {
804c090c8abSBernd Schubert int err;
805c090c8abSBernd Schubert struct fuse_req *req;
806c090c8abSBernd Schubert
807c090c8abSBernd Schubert retry:
808c090c8abSBernd Schubert spin_lock(&queue->lock);
809c090c8abSBernd Schubert fuse_uring_ent_avail(ent, queue);
810c090c8abSBernd Schubert req = fuse_uring_ent_assign_req(ent);
811c090c8abSBernd Schubert spin_unlock(&queue->lock);
812c090c8abSBernd Schubert
813c090c8abSBernd Schubert if (req) {
814c090c8abSBernd Schubert err = fuse_uring_send_next_to_ring(ent, req, issue_flags);
815c090c8abSBernd Schubert if (err)
816c090c8abSBernd Schubert goto retry;
817c090c8abSBernd Schubert }
818c090c8abSBernd Schubert }
819c090c8abSBernd Schubert
fuse_ring_ent_set_commit(struct fuse_ring_ent * ent)820c090c8abSBernd Schubert static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent)
821c090c8abSBernd Schubert {
822c090c8abSBernd Schubert struct fuse_ring_queue *queue = ent->queue;
823c090c8abSBernd Schubert
824c090c8abSBernd Schubert lockdep_assert_held(&queue->lock);
825c090c8abSBernd Schubert
826c090c8abSBernd Schubert if (WARN_ON_ONCE(ent->state != FRRS_USERSPACE))
827c090c8abSBernd Schubert return -EIO;
828c090c8abSBernd Schubert
829c090c8abSBernd Schubert ent->state = FRRS_COMMIT;
830c090c8abSBernd Schubert list_move(&ent->list, &queue->ent_commit_queue);
831c090c8abSBernd Schubert
832c090c8abSBernd Schubert return 0;
833c090c8abSBernd Schubert }
834c090c8abSBernd Schubert
835c090c8abSBernd Schubert /* FUSE_URING_CMD_COMMIT_AND_FETCH handler */
fuse_uring_commit_fetch(struct io_uring_cmd * cmd,int issue_flags,struct fuse_conn * fc)836c090c8abSBernd Schubert static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
837c090c8abSBernd Schubert struct fuse_conn *fc)
838c090c8abSBernd Schubert {
839c090c8abSBernd Schubert const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
840c090c8abSBernd Schubert struct fuse_ring_ent *ent;
841c090c8abSBernd Schubert int err;
842c090c8abSBernd Schubert struct fuse_ring *ring = fc->ring;
843c090c8abSBernd Schubert struct fuse_ring_queue *queue;
844c090c8abSBernd Schubert uint64_t commit_id = READ_ONCE(cmd_req->commit_id);
845c090c8abSBernd Schubert unsigned int qid = READ_ONCE(cmd_req->qid);
846c090c8abSBernd Schubert struct fuse_pqueue *fpq;
847c090c8abSBernd Schubert struct fuse_req *req;
848c090c8abSBernd Schubert
849c090c8abSBernd Schubert err = -ENOTCONN;
850c090c8abSBernd Schubert if (!ring)
851c090c8abSBernd Schubert return err;
852c090c8abSBernd Schubert
853c090c8abSBernd Schubert if (qid >= ring->nr_queues)
854c090c8abSBernd Schubert return -EINVAL;
855c090c8abSBernd Schubert
856c090c8abSBernd Schubert queue = ring->queues[qid];
857c090c8abSBernd Schubert if (!queue)
858c090c8abSBernd Schubert return err;
859c090c8abSBernd Schubert fpq = &queue->fpq;
860c090c8abSBernd Schubert
8614a9bfb9bSBernd Schubert if (!READ_ONCE(fc->connected) || READ_ONCE(queue->stopped))
8624a9bfb9bSBernd Schubert return err;
8634a9bfb9bSBernd Schubert
864c090c8abSBernd Schubert spin_lock(&queue->lock);
865c090c8abSBernd Schubert /* Find a request based on the unique ID of the fuse request
866c090c8abSBernd Schubert * This should get revised, as it needs a hash calculation and list
867c090c8abSBernd Schubert * search. And full struct fuse_pqueue is needed (memory overhead).
868c090c8abSBernd Schubert * As well as the link from req to ring_ent.
869c090c8abSBernd Schubert */
870c090c8abSBernd Schubert req = fuse_request_find(fpq, commit_id);
871c090c8abSBernd Schubert err = -ENOENT;
872c090c8abSBernd Schubert if (!req) {
873c090c8abSBernd Schubert pr_info("qid=%d commit_id %llu not found\n", queue->qid,
874c090c8abSBernd Schubert commit_id);
875c090c8abSBernd Schubert spin_unlock(&queue->lock);
876c090c8abSBernd Schubert return err;
877c090c8abSBernd Schubert }
878c090c8abSBernd Schubert list_del_init(&req->list);
879c090c8abSBernd Schubert ent = req->ring_entry;
880c090c8abSBernd Schubert req->ring_entry = NULL;
881c090c8abSBernd Schubert
882c090c8abSBernd Schubert err = fuse_ring_ent_set_commit(ent);
883c090c8abSBernd Schubert if (err != 0) {
884c090c8abSBernd Schubert pr_info_ratelimited("qid=%d commit_id %llu state %d",
885c090c8abSBernd Schubert queue->qid, commit_id, ent->state);
886c090c8abSBernd Schubert spin_unlock(&queue->lock);
887c090c8abSBernd Schubert req->out.h.error = err;
888c090c8abSBernd Schubert clear_bit(FR_SENT, &req->flags);
889c090c8abSBernd Schubert fuse_request_end(req);
890c090c8abSBernd Schubert return err;
891c090c8abSBernd Schubert }
892c090c8abSBernd Schubert
893c090c8abSBernd Schubert ent->cmd = cmd;
894c090c8abSBernd Schubert spin_unlock(&queue->lock);
895c090c8abSBernd Schubert
896c090c8abSBernd Schubert /* without the queue lock, as other locks are taken */
897b6236c84SBernd Schubert fuse_uring_prepare_cancel(cmd, issue_flags, ent);
898c090c8abSBernd Schubert fuse_uring_commit(ent, req, issue_flags);
899c090c8abSBernd Schubert
900c090c8abSBernd Schubert /*
901c090c8abSBernd Schubert * Fetching the next request is absolutely required as queued
902c090c8abSBernd Schubert * fuse requests would otherwise not get processed - committing
903c090c8abSBernd Schubert * and fetching is done in one step vs legacy fuse, which has separated
904c090c8abSBernd Schubert * read (fetch request) and write (commit result).
905c090c8abSBernd Schubert */
906c090c8abSBernd Schubert fuse_uring_next_fuse_req(ent, queue, issue_flags);
907c090c8abSBernd Schubert return 0;
908c090c8abSBernd Schubert }
909c090c8abSBernd Schubert
is_ring_ready(struct fuse_ring * ring,int current_qid)910c2c9af9aSBernd Schubert static bool is_ring_ready(struct fuse_ring *ring, int current_qid)
911c2c9af9aSBernd Schubert {
912c2c9af9aSBernd Schubert int qid;
913c2c9af9aSBernd Schubert struct fuse_ring_queue *queue;
914c2c9af9aSBernd Schubert bool ready = true;
915c2c9af9aSBernd Schubert
916c2c9af9aSBernd Schubert for (qid = 0; qid < ring->nr_queues && ready; qid++) {
917c2c9af9aSBernd Schubert if (current_qid == qid)
918c2c9af9aSBernd Schubert continue;
919c2c9af9aSBernd Schubert
920c2c9af9aSBernd Schubert queue = ring->queues[qid];
921c2c9af9aSBernd Schubert if (!queue) {
922c2c9af9aSBernd Schubert ready = false;
923c2c9af9aSBernd Schubert break;
924c2c9af9aSBernd Schubert }
925c2c9af9aSBernd Schubert
926c2c9af9aSBernd Schubert spin_lock(&queue->lock);
927c2c9af9aSBernd Schubert if (list_empty(&queue->ent_avail_queue))
928c2c9af9aSBernd Schubert ready = false;
929c2c9af9aSBernd Schubert spin_unlock(&queue->lock);
930c2c9af9aSBernd Schubert }
931c2c9af9aSBernd Schubert
932c2c9af9aSBernd Schubert return ready;
933c2c9af9aSBernd Schubert }
934c2c9af9aSBernd Schubert
93524fe962cSBernd Schubert /*
93624fe962cSBernd Schubert * fuse_uring_req_fetch command handling
93724fe962cSBernd Schubert */
fuse_uring_do_register(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,unsigned int issue_flags)93824fe962cSBernd Schubert static void fuse_uring_do_register(struct fuse_ring_ent *ent,
93924fe962cSBernd Schubert struct io_uring_cmd *cmd,
94024fe962cSBernd Schubert unsigned int issue_flags)
94124fe962cSBernd Schubert {
94224fe962cSBernd Schubert struct fuse_ring_queue *queue = ent->queue;
943c2c9af9aSBernd Schubert struct fuse_ring *ring = queue->ring;
944c2c9af9aSBernd Schubert struct fuse_conn *fc = ring->fc;
945c2c9af9aSBernd Schubert struct fuse_iqueue *fiq = &fc->iq;
94624fe962cSBernd Schubert
947b6236c84SBernd Schubert fuse_uring_prepare_cancel(cmd, issue_flags, ent);
948b6236c84SBernd Schubert
94924fe962cSBernd Schubert spin_lock(&queue->lock);
95024fe962cSBernd Schubert ent->cmd = cmd;
95124fe962cSBernd Schubert fuse_uring_ent_avail(ent, queue);
95224fe962cSBernd Schubert spin_unlock(&queue->lock);
953c2c9af9aSBernd Schubert
954c2c9af9aSBernd Schubert if (!ring->ready) {
955c2c9af9aSBernd Schubert bool ready = is_ring_ready(ring, queue->qid);
956c2c9af9aSBernd Schubert
957c2c9af9aSBernd Schubert if (ready) {
958c2c9af9aSBernd Schubert WRITE_ONCE(fiq->ops, &fuse_io_uring_ops);
959c2c9af9aSBernd Schubert WRITE_ONCE(ring->ready, true);
9603393ff96SBernd Schubert wake_up_all(&fc->blocked_waitq);
961c2c9af9aSBernd Schubert }
962c2c9af9aSBernd Schubert }
96324fe962cSBernd Schubert }
96424fe962cSBernd Schubert
96524fe962cSBernd Schubert /*
96624fe962cSBernd Schubert * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
96724fe962cSBernd Schubert * the payload
96824fe962cSBernd Schubert */
fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe * sqe,struct iovec iov[FUSE_URING_IOV_SEGS])96924fe962cSBernd Schubert static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
97024fe962cSBernd Schubert struct iovec iov[FUSE_URING_IOV_SEGS])
97124fe962cSBernd Schubert {
97224fe962cSBernd Schubert struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr));
97324fe962cSBernd Schubert struct iov_iter iter;
97424fe962cSBernd Schubert ssize_t ret;
97524fe962cSBernd Schubert
97624fe962cSBernd Schubert if (sqe->len != FUSE_URING_IOV_SEGS)
97724fe962cSBernd Schubert return -EINVAL;
97824fe962cSBernd Schubert
97924fe962cSBernd Schubert /*
98024fe962cSBernd Schubert * Direction for buffer access will actually be READ and WRITE,
98124fe962cSBernd Schubert * using write for the import should include READ access as well.
98224fe962cSBernd Schubert */
98324fe962cSBernd Schubert ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS,
98424fe962cSBernd Schubert FUSE_URING_IOV_SEGS, &iov, &iter);
98524fe962cSBernd Schubert if (ret < 0)
98624fe962cSBernd Schubert return ret;
98724fe962cSBernd Schubert
98824fe962cSBernd Schubert return 0;
98924fe962cSBernd Schubert }
99024fe962cSBernd Schubert
99124fe962cSBernd Schubert static struct fuse_ring_ent *
fuse_uring_create_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_queue * queue)99224fe962cSBernd Schubert fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
99324fe962cSBernd Schubert struct fuse_ring_queue *queue)
99424fe962cSBernd Schubert {
99524fe962cSBernd Schubert struct fuse_ring *ring = queue->ring;
99624fe962cSBernd Schubert struct fuse_ring_ent *ent;
99724fe962cSBernd Schubert size_t payload_size;
99824fe962cSBernd Schubert struct iovec iov[FUSE_URING_IOV_SEGS];
99924fe962cSBernd Schubert int err;
100024fe962cSBernd Schubert
100124fe962cSBernd Schubert err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
100224fe962cSBernd Schubert if (err) {
100324fe962cSBernd Schubert pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
100424fe962cSBernd Schubert err);
100524fe962cSBernd Schubert return ERR_PTR(err);
100624fe962cSBernd Schubert }
100724fe962cSBernd Schubert
100824fe962cSBernd Schubert err = -EINVAL;
100924fe962cSBernd Schubert if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
101024fe962cSBernd Schubert pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
101124fe962cSBernd Schubert return ERR_PTR(err);
101224fe962cSBernd Schubert }
101324fe962cSBernd Schubert
101424fe962cSBernd Schubert payload_size = iov[1].iov_len;
101524fe962cSBernd Schubert if (payload_size < ring->max_payload_sz) {
101624fe962cSBernd Schubert pr_info_ratelimited("Invalid req payload len %zu\n",
101724fe962cSBernd Schubert payload_size);
101824fe962cSBernd Schubert return ERR_PTR(err);
101924fe962cSBernd Schubert }
102024fe962cSBernd Schubert
102124fe962cSBernd Schubert err = -ENOMEM;
102224fe962cSBernd Schubert ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
102324fe962cSBernd Schubert if (!ent)
102424fe962cSBernd Schubert return ERR_PTR(err);
102524fe962cSBernd Schubert
102624fe962cSBernd Schubert INIT_LIST_HEAD(&ent->list);
102724fe962cSBernd Schubert
102824fe962cSBernd Schubert ent->queue = queue;
102924fe962cSBernd Schubert ent->headers = iov[0].iov_base;
103024fe962cSBernd Schubert ent->payload = iov[1].iov_base;
103124fe962cSBernd Schubert
10324a9bfb9bSBernd Schubert atomic_inc(&ring->queue_refs);
103324fe962cSBernd Schubert return ent;
103424fe962cSBernd Schubert }
103524fe962cSBernd Schubert
103624fe962cSBernd Schubert /*
103724fe962cSBernd Schubert * Register header and payload buffer with the kernel and puts the
103824fe962cSBernd Schubert * entry as "ready to get fuse requests" on the queue
103924fe962cSBernd Schubert */
fuse_uring_register(struct io_uring_cmd * cmd,unsigned int issue_flags,struct fuse_conn * fc)104024fe962cSBernd Schubert static int fuse_uring_register(struct io_uring_cmd *cmd,
104124fe962cSBernd Schubert unsigned int issue_flags, struct fuse_conn *fc)
104224fe962cSBernd Schubert {
104324fe962cSBernd Schubert const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
1044*d9ecc771SJoanne Koong struct fuse_ring *ring = smp_load_acquire(&fc->ring);
104524fe962cSBernd Schubert struct fuse_ring_queue *queue;
104624fe962cSBernd Schubert struct fuse_ring_ent *ent;
104724fe962cSBernd Schubert int err;
104824fe962cSBernd Schubert unsigned int qid = READ_ONCE(cmd_req->qid);
104924fe962cSBernd Schubert
105024fe962cSBernd Schubert err = -ENOMEM;
105124fe962cSBernd Schubert if (!ring) {
105224fe962cSBernd Schubert ring = fuse_uring_create(fc);
105324fe962cSBernd Schubert if (!ring)
105424fe962cSBernd Schubert return err;
105524fe962cSBernd Schubert }
105624fe962cSBernd Schubert
105724fe962cSBernd Schubert if (qid >= ring->nr_queues) {
105824fe962cSBernd Schubert pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid);
105924fe962cSBernd Schubert return -EINVAL;
106024fe962cSBernd Schubert }
106124fe962cSBernd Schubert
106224fe962cSBernd Schubert queue = ring->queues[qid];
106324fe962cSBernd Schubert if (!queue) {
106424fe962cSBernd Schubert queue = fuse_uring_create_queue(ring, qid);
106524fe962cSBernd Schubert if (!queue)
106624fe962cSBernd Schubert return err;
106724fe962cSBernd Schubert }
106824fe962cSBernd Schubert
106924fe962cSBernd Schubert /*
107024fe962cSBernd Schubert * The created queue above does not need to be destructed in
107124fe962cSBernd Schubert * case of entry errors below, will be done at ring destruction time.
107224fe962cSBernd Schubert */
107324fe962cSBernd Schubert
107424fe962cSBernd Schubert ent = fuse_uring_create_ring_ent(cmd, queue);
107524fe962cSBernd Schubert if (IS_ERR(ent))
107624fe962cSBernd Schubert return PTR_ERR(ent);
107724fe962cSBernd Schubert
107824fe962cSBernd Schubert fuse_uring_do_register(ent, cmd, issue_flags);
107924fe962cSBernd Schubert
108024fe962cSBernd Schubert return 0;
108124fe962cSBernd Schubert }
108224fe962cSBernd Schubert
108324fe962cSBernd Schubert /*
108424fe962cSBernd Schubert * Entry function from io_uring to handle the given passthrough command
108524fe962cSBernd Schubert * (op code IORING_OP_URING_CMD)
108624fe962cSBernd Schubert */
fuse_uring_cmd(struct io_uring_cmd * cmd,unsigned int issue_flags)1087786412a7SBernd Schubert int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
108824fe962cSBernd Schubert {
108924fe962cSBernd Schubert struct fuse_dev *fud;
109024fe962cSBernd Schubert struct fuse_conn *fc;
109124fe962cSBernd Schubert u32 cmd_op = cmd->cmd_op;
109224fe962cSBernd Schubert int err;
109324fe962cSBernd Schubert
1094b6236c84SBernd Schubert if ((unlikely(issue_flags & IO_URING_F_CANCEL))) {
1095b6236c84SBernd Schubert fuse_uring_cancel(cmd, issue_flags);
1096b6236c84SBernd Schubert return 0;
1097b6236c84SBernd Schubert }
1098b6236c84SBernd Schubert
109924fe962cSBernd Schubert /* This extra SQE size holds struct fuse_uring_cmd_req */
110024fe962cSBernd Schubert if (!(issue_flags & IO_URING_F_SQE128))
110124fe962cSBernd Schubert return -EINVAL;
110224fe962cSBernd Schubert
110324fe962cSBernd Schubert fud = fuse_get_dev(cmd->file);
110424fe962cSBernd Schubert if (!fud) {
110524fe962cSBernd Schubert pr_info_ratelimited("No fuse device found\n");
110624fe962cSBernd Schubert return -ENOTCONN;
110724fe962cSBernd Schubert }
110824fe962cSBernd Schubert fc = fud->fc;
110924fe962cSBernd Schubert
11102d4fde59SBernd Schubert /* Once a connection has io-uring enabled on it, it can't be disabled */
11112d4fde59SBernd Schubert if (!enable_uring && !fc->io_uring) {
11122d4fde59SBernd Schubert pr_info_ratelimited("fuse-io-uring is disabled\n");
11132d4fde59SBernd Schubert return -EOPNOTSUPP;
11142d4fde59SBernd Schubert }
11152d4fde59SBernd Schubert
111624fe962cSBernd Schubert if (fc->aborted)
111724fe962cSBernd Schubert return -ECONNABORTED;
111824fe962cSBernd Schubert if (!fc->connected)
111924fe962cSBernd Schubert return -ENOTCONN;
112024fe962cSBernd Schubert
112124fe962cSBernd Schubert /*
112224fe962cSBernd Schubert * fuse_uring_register() needs the ring to be initialized,
112324fe962cSBernd Schubert * we need to know the max payload size
112424fe962cSBernd Schubert */
112524fe962cSBernd Schubert if (!fc->initialized)
112624fe962cSBernd Schubert return -EAGAIN;
112724fe962cSBernd Schubert
112824fe962cSBernd Schubert switch (cmd_op) {
112924fe962cSBernd Schubert case FUSE_IO_URING_CMD_REGISTER:
113024fe962cSBernd Schubert err = fuse_uring_register(cmd, issue_flags, fc);
113124fe962cSBernd Schubert if (err) {
113224fe962cSBernd Schubert pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n",
113324fe962cSBernd Schubert err);
11343393ff96SBernd Schubert fc->io_uring = 0;
11353393ff96SBernd Schubert wake_up_all(&fc->blocked_waitq);
113624fe962cSBernd Schubert return err;
113724fe962cSBernd Schubert }
113824fe962cSBernd Schubert break;
1139c090c8abSBernd Schubert case FUSE_IO_URING_CMD_COMMIT_AND_FETCH:
1140c090c8abSBernd Schubert err = fuse_uring_commit_fetch(cmd, issue_flags, fc);
1141c090c8abSBernd Schubert if (err) {
1142c090c8abSBernd Schubert pr_info_once("FUSE_IO_URING_COMMIT_AND_FETCH failed err=%d\n",
1143c090c8abSBernd Schubert err);
1144c090c8abSBernd Schubert return err;
1145c090c8abSBernd Schubert }
1146c090c8abSBernd Schubert break;
114724fe962cSBernd Schubert default:
114824fe962cSBernd Schubert return -EINVAL;
114924fe962cSBernd Schubert }
115024fe962cSBernd Schubert
115124fe962cSBernd Schubert return -EIOCBQUEUED;
115224fe962cSBernd Schubert }
1153c2c9af9aSBernd Schubert
fuse_uring_send(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,ssize_t ret,unsigned int issue_flags)1154c2c9af9aSBernd Schubert static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
1155c2c9af9aSBernd Schubert ssize_t ret, unsigned int issue_flags)
1156c2c9af9aSBernd Schubert {
1157c2c9af9aSBernd Schubert struct fuse_ring_queue *queue = ent->queue;
1158c2c9af9aSBernd Schubert
1159c2c9af9aSBernd Schubert spin_lock(&queue->lock);
1160c2c9af9aSBernd Schubert ent->state = FRRS_USERSPACE;
1161c2c9af9aSBernd Schubert list_move(&ent->list, &queue->ent_in_userspace);
1162c2c9af9aSBernd Schubert ent->cmd = NULL;
1163c2c9af9aSBernd Schubert spin_unlock(&queue->lock);
1164c2c9af9aSBernd Schubert
1165c2c9af9aSBernd Schubert io_uring_cmd_done(cmd, ret, 0, issue_flags);
1166c2c9af9aSBernd Schubert }
1167c2c9af9aSBernd Schubert
1168c2c9af9aSBernd Schubert /*
1169c2c9af9aSBernd Schubert * This prepares and sends the ring request in fuse-uring task context.
1170c2c9af9aSBernd Schubert * User buffers are not mapped yet - the application does not have permission
1171c2c9af9aSBernd Schubert * to write to it - this has to be executed in ring task context.
1172c2c9af9aSBernd Schubert */
fuse_uring_send_in_task(struct io_uring_cmd * cmd,unsigned int issue_flags)1173c2c9af9aSBernd Schubert static void fuse_uring_send_in_task(struct io_uring_cmd *cmd,
1174c2c9af9aSBernd Schubert unsigned int issue_flags)
1175c2c9af9aSBernd Schubert {
1176c2c9af9aSBernd Schubert struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
1177c2c9af9aSBernd Schubert struct fuse_ring_queue *queue = ent->queue;
1178c2c9af9aSBernd Schubert int err;
1179c2c9af9aSBernd Schubert
1180c2c9af9aSBernd Schubert if (!(issue_flags & IO_URING_F_TASK_DEAD)) {
1181c2c9af9aSBernd Schubert err = fuse_uring_prepare_send(ent, ent->fuse_req);
1182c2c9af9aSBernd Schubert if (err) {
1183c2c9af9aSBernd Schubert fuse_uring_next_fuse_req(ent, queue, issue_flags);
1184c2c9af9aSBernd Schubert return;
1185c2c9af9aSBernd Schubert }
1186c2c9af9aSBernd Schubert } else {
1187c2c9af9aSBernd Schubert err = -ECANCELED;
1188c2c9af9aSBernd Schubert }
1189c2c9af9aSBernd Schubert
1190c2c9af9aSBernd Schubert fuse_uring_send(ent, cmd, err, issue_flags);
1191c2c9af9aSBernd Schubert }
1192c2c9af9aSBernd Schubert
fuse_uring_task_to_queue(struct fuse_ring * ring)1193c2c9af9aSBernd Schubert static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
1194c2c9af9aSBernd Schubert {
1195c2c9af9aSBernd Schubert unsigned int qid;
1196c2c9af9aSBernd Schubert struct fuse_ring_queue *queue;
1197c2c9af9aSBernd Schubert
1198c2c9af9aSBernd Schubert qid = task_cpu(current);
1199c2c9af9aSBernd Schubert
1200c2c9af9aSBernd Schubert if (WARN_ONCE(qid >= ring->nr_queues,
1201c2c9af9aSBernd Schubert "Core number (%u) exceeds nr queues (%zu)\n", qid,
1202c2c9af9aSBernd Schubert ring->nr_queues))
1203c2c9af9aSBernd Schubert qid = 0;
1204c2c9af9aSBernd Schubert
1205c2c9af9aSBernd Schubert queue = ring->queues[qid];
1206c2c9af9aSBernd Schubert WARN_ONCE(!queue, "Missing queue for qid %d\n", qid);
1207c2c9af9aSBernd Schubert
1208c2c9af9aSBernd Schubert return queue;
1209c2c9af9aSBernd Schubert }
1210c2c9af9aSBernd Schubert
fuse_uring_dispatch_ent(struct fuse_ring_ent * ent)1211c2c9af9aSBernd Schubert static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent)
1212c2c9af9aSBernd Schubert {
1213c2c9af9aSBernd Schubert struct io_uring_cmd *cmd = ent->cmd;
1214c2c9af9aSBernd Schubert
1215c2c9af9aSBernd Schubert uring_cmd_set_ring_ent(cmd, ent);
1216c2c9af9aSBernd Schubert io_uring_cmd_complete_in_task(cmd, fuse_uring_send_in_task);
1217c2c9af9aSBernd Schubert }
1218c2c9af9aSBernd Schubert
1219c2c9af9aSBernd Schubert /* queue a fuse request and send it if a ring entry is available */
fuse_uring_queue_fuse_req(struct fuse_iqueue * fiq,struct fuse_req * req)1220c2c9af9aSBernd Schubert void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
1221c2c9af9aSBernd Schubert {
1222c2c9af9aSBernd Schubert struct fuse_conn *fc = req->fm->fc;
1223c2c9af9aSBernd Schubert struct fuse_ring *ring = fc->ring;
1224c2c9af9aSBernd Schubert struct fuse_ring_queue *queue;
1225c2c9af9aSBernd Schubert struct fuse_ring_ent *ent = NULL;
1226c2c9af9aSBernd Schubert int err;
1227c2c9af9aSBernd Schubert
1228c2c9af9aSBernd Schubert err = -EINVAL;
1229c2c9af9aSBernd Schubert queue = fuse_uring_task_to_queue(ring);
1230c2c9af9aSBernd Schubert if (!queue)
1231c2c9af9aSBernd Schubert goto err;
1232c2c9af9aSBernd Schubert
1233c2c9af9aSBernd Schubert if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
1234c2c9af9aSBernd Schubert req->in.h.unique = fuse_get_unique(fiq);
1235c2c9af9aSBernd Schubert
1236c2c9af9aSBernd Schubert spin_lock(&queue->lock);
1237c2c9af9aSBernd Schubert err = -ENOTCONN;
1238c2c9af9aSBernd Schubert if (unlikely(queue->stopped))
1239c2c9af9aSBernd Schubert goto err_unlock;
1240c2c9af9aSBernd Schubert
1241c2c9af9aSBernd Schubert ent = list_first_entry_or_null(&queue->ent_avail_queue,
1242c2c9af9aSBernd Schubert struct fuse_ring_ent, list);
1243c2c9af9aSBernd Schubert if (ent)
1244c2c9af9aSBernd Schubert fuse_uring_add_req_to_ring_ent(ent, req);
1245c2c9af9aSBernd Schubert else
1246c2c9af9aSBernd Schubert list_add_tail(&req->list, &queue->fuse_req_queue);
1247c2c9af9aSBernd Schubert spin_unlock(&queue->lock);
1248c2c9af9aSBernd Schubert
1249c2c9af9aSBernd Schubert if (ent)
1250c2c9af9aSBernd Schubert fuse_uring_dispatch_ent(ent);
1251c2c9af9aSBernd Schubert
1252c2c9af9aSBernd Schubert return;
1253c2c9af9aSBernd Schubert
1254c2c9af9aSBernd Schubert err_unlock:
1255c2c9af9aSBernd Schubert spin_unlock(&queue->lock);
1256c2c9af9aSBernd Schubert err:
1257c2c9af9aSBernd Schubert req->out.h.error = err;
1258c2c9af9aSBernd Schubert clear_bit(FR_PENDING, &req->flags);
1259c2c9af9aSBernd Schubert fuse_request_end(req);
1260c2c9af9aSBernd Schubert }
1261c2c9af9aSBernd Schubert
fuse_uring_queue_bq_req(struct fuse_req * req)1262857b0263SBernd Schubert bool fuse_uring_queue_bq_req(struct fuse_req *req)
1263857b0263SBernd Schubert {
1264857b0263SBernd Schubert struct fuse_conn *fc = req->fm->fc;
1265857b0263SBernd Schubert struct fuse_ring *ring = fc->ring;
1266857b0263SBernd Schubert struct fuse_ring_queue *queue;
1267857b0263SBernd Schubert struct fuse_ring_ent *ent = NULL;
1268857b0263SBernd Schubert
1269857b0263SBernd Schubert queue = fuse_uring_task_to_queue(ring);
1270857b0263SBernd Schubert if (!queue)
1271857b0263SBernd Schubert return false;
1272857b0263SBernd Schubert
1273857b0263SBernd Schubert spin_lock(&queue->lock);
1274857b0263SBernd Schubert if (unlikely(queue->stopped)) {
1275857b0263SBernd Schubert spin_unlock(&queue->lock);
1276857b0263SBernd Schubert return false;
1277857b0263SBernd Schubert }
1278857b0263SBernd Schubert
1279857b0263SBernd Schubert list_add_tail(&req->list, &queue->fuse_req_bg_queue);
1280857b0263SBernd Schubert
1281857b0263SBernd Schubert ent = list_first_entry_or_null(&queue->ent_avail_queue,
1282857b0263SBernd Schubert struct fuse_ring_ent, list);
1283857b0263SBernd Schubert spin_lock(&fc->bg_lock);
1284857b0263SBernd Schubert fc->num_background++;
1285857b0263SBernd Schubert if (fc->num_background == fc->max_background)
1286857b0263SBernd Schubert fc->blocked = 1;
1287857b0263SBernd Schubert fuse_uring_flush_bg(queue);
1288857b0263SBernd Schubert spin_unlock(&fc->bg_lock);
1289857b0263SBernd Schubert
1290857b0263SBernd Schubert /*
1291857b0263SBernd Schubert * Due to bg_queue flush limits there might be other bg requests
1292857b0263SBernd Schubert * in the queue that need to be handled first. Or no further req
1293857b0263SBernd Schubert * might be available.
1294857b0263SBernd Schubert */
1295857b0263SBernd Schubert req = list_first_entry_or_null(&queue->fuse_req_queue, struct fuse_req,
1296857b0263SBernd Schubert list);
1297857b0263SBernd Schubert if (ent && req) {
1298857b0263SBernd Schubert fuse_uring_add_req_to_ring_ent(ent, req);
1299857b0263SBernd Schubert spin_unlock(&queue->lock);
1300857b0263SBernd Schubert
1301857b0263SBernd Schubert fuse_uring_dispatch_ent(ent);
1302857b0263SBernd Schubert } else {
1303857b0263SBernd Schubert spin_unlock(&queue->lock);
1304857b0263SBernd Schubert }
1305857b0263SBernd Schubert
1306857b0263SBernd Schubert return true;
1307857b0263SBernd Schubert }
1308857b0263SBernd Schubert
1309c2c9af9aSBernd Schubert static const struct fuse_iqueue_ops fuse_io_uring_ops = {
1310c2c9af9aSBernd Schubert /* should be send over io-uring as enhancement */
1311c2c9af9aSBernd Schubert .send_forget = fuse_dev_queue_forget,
1312c2c9af9aSBernd Schubert
1313c2c9af9aSBernd Schubert /*
1314c2c9af9aSBernd Schubert * could be send over io-uring, but interrupts should be rare,
1315c2c9af9aSBernd Schubert * no need to make the code complex
1316c2c9af9aSBernd Schubert */
1317c2c9af9aSBernd Schubert .send_interrupt = fuse_dev_queue_interrupt,
1318c2c9af9aSBernd Schubert .send_req = fuse_uring_queue_fuse_req,
1319c2c9af9aSBernd Schubert };
1320