xref: /linux/fs/fuse/dev_uring.c (revision b5329d5a35582abbef57562f9fb6cb26a643f252)
124fe962cSBernd Schubert // SPDX-License-Identifier: GPL-2.0
224fe962cSBernd Schubert /*
324fe962cSBernd Schubert  * FUSE: Filesystem in Userspace
424fe962cSBernd Schubert  * Copyright (c) 2023-2024 DataDirect Networks.
524fe962cSBernd Schubert  */
624fe962cSBernd Schubert 
724fe962cSBernd Schubert #include "fuse_i.h"
824fe962cSBernd Schubert #include "dev_uring_i.h"
924fe962cSBernd Schubert #include "fuse_dev_i.h"
1024fe962cSBernd Schubert 
1124fe962cSBernd Schubert #include <linux/fs.h>
1224fe962cSBernd Schubert #include <linux/io_uring/cmd.h>
1324fe962cSBernd Schubert 
1424fe962cSBernd Schubert static bool __read_mostly enable_uring;
1524fe962cSBernd Schubert module_param(enable_uring, bool, 0644);
1624fe962cSBernd Schubert MODULE_PARM_DESC(enable_uring,
1724fe962cSBernd Schubert 		 "Enable userspace communication through io-uring");
1824fe962cSBernd Schubert 
1924fe962cSBernd Schubert #define FUSE_URING_IOV_SEGS 2 /* header and payload */
2024fe962cSBernd Schubert 
2124fe962cSBernd Schubert 
fuse_uring_enabled(void)2224fe962cSBernd Schubert bool fuse_uring_enabled(void)
2324fe962cSBernd Schubert {
2424fe962cSBernd Schubert 	return enable_uring;
2524fe962cSBernd Schubert }
2624fe962cSBernd Schubert 
27c2c9af9aSBernd Schubert struct fuse_uring_pdu {
28c2c9af9aSBernd Schubert 	struct fuse_ring_ent *ent;
29c2c9af9aSBernd Schubert };
30c2c9af9aSBernd Schubert 
31c2c9af9aSBernd Schubert static const struct fuse_iqueue_ops fuse_io_uring_ops;
32c2c9af9aSBernd Schubert 
uring_cmd_set_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_ent * ring_ent)33c2c9af9aSBernd Schubert static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd,
34c2c9af9aSBernd Schubert 				   struct fuse_ring_ent *ring_ent)
35c2c9af9aSBernd Schubert {
36c2c9af9aSBernd Schubert 	struct fuse_uring_pdu *pdu =
37c2c9af9aSBernd Schubert 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
38c2c9af9aSBernd Schubert 
39c2c9af9aSBernd Schubert 	pdu->ent = ring_ent;
40c2c9af9aSBernd Schubert }
41c2c9af9aSBernd Schubert 
uring_cmd_to_ring_ent(struct io_uring_cmd * cmd)42c2c9af9aSBernd Schubert static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd)
43c2c9af9aSBernd Schubert {
44c2c9af9aSBernd Schubert 	struct fuse_uring_pdu *pdu =
45c2c9af9aSBernd Schubert 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
46c2c9af9aSBernd Schubert 
47c2c9af9aSBernd Schubert 	return pdu->ent;
48c2c9af9aSBernd Schubert }
49c2c9af9aSBernd Schubert 
fuse_uring_flush_bg(struct fuse_ring_queue * queue)50857b0263SBernd Schubert static void fuse_uring_flush_bg(struct fuse_ring_queue *queue)
51857b0263SBernd Schubert {
52857b0263SBernd Schubert 	struct fuse_ring *ring = queue->ring;
53857b0263SBernd Schubert 	struct fuse_conn *fc = ring->fc;
54857b0263SBernd Schubert 
55857b0263SBernd Schubert 	lockdep_assert_held(&queue->lock);
56857b0263SBernd Schubert 	lockdep_assert_held(&fc->bg_lock);
57857b0263SBernd Schubert 
58857b0263SBernd Schubert 	/*
59857b0263SBernd Schubert 	 * Allow one bg request per queue, ignoring global fc limits.
60857b0263SBernd Schubert 	 * This prevents a single queue from consuming all resources and
61857b0263SBernd Schubert 	 * eliminates the need for remote queue wake-ups when global
62857b0263SBernd Schubert 	 * limits are met but this queue has no more waiting requests.
63857b0263SBernd Schubert 	 */
64857b0263SBernd Schubert 	while ((fc->active_background < fc->max_background ||
65857b0263SBernd Schubert 		!queue->active_background) &&
66857b0263SBernd Schubert 	       (!list_empty(&queue->fuse_req_bg_queue))) {
67857b0263SBernd Schubert 		struct fuse_req *req;
68857b0263SBernd Schubert 
69857b0263SBernd Schubert 		req = list_first_entry(&queue->fuse_req_bg_queue,
70857b0263SBernd Schubert 				       struct fuse_req, list);
71857b0263SBernd Schubert 		fc->active_background++;
72857b0263SBernd Schubert 		queue->active_background++;
73857b0263SBernd Schubert 
74857b0263SBernd Schubert 		list_move_tail(&req->list, &queue->fuse_req_queue);
75857b0263SBernd Schubert 	}
76857b0263SBernd Schubert }
77857b0263SBernd Schubert 
fuse_uring_req_end(struct fuse_ring_ent * ent,struct fuse_req * req,int error)78c090c8abSBernd Schubert static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req,
79c090c8abSBernd Schubert 			       int error)
80c090c8abSBernd Schubert {
81857b0263SBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
82857b0263SBernd Schubert 	struct fuse_ring *ring = queue->ring;
83857b0263SBernd Schubert 	struct fuse_conn *fc = ring->fc;
84857b0263SBernd Schubert 
85857b0263SBernd Schubert 	lockdep_assert_not_held(&queue->lock);
86857b0263SBernd Schubert 	spin_lock(&queue->lock);
87c090c8abSBernd Schubert 	ent->fuse_req = NULL;
88857b0263SBernd Schubert 	if (test_bit(FR_BACKGROUND, &req->flags)) {
89857b0263SBernd Schubert 		queue->active_background--;
90857b0263SBernd Schubert 		spin_lock(&fc->bg_lock);
91857b0263SBernd Schubert 		fuse_uring_flush_bg(queue);
92857b0263SBernd Schubert 		spin_unlock(&fc->bg_lock);
93857b0263SBernd Schubert 	}
94857b0263SBernd Schubert 
95857b0263SBernd Schubert 	spin_unlock(&queue->lock);
96857b0263SBernd Schubert 
97c090c8abSBernd Schubert 	if (error)
98c090c8abSBernd Schubert 		req->out.h.error = error;
99c090c8abSBernd Schubert 
100c090c8abSBernd Schubert 	clear_bit(FR_SENT, &req->flags);
101c090c8abSBernd Schubert 	fuse_request_end(req);
102c090c8abSBernd Schubert }
103c090c8abSBernd Schubert 
1044a9bfb9bSBernd Schubert /* Abort all list queued request on the given ring queue */
fuse_uring_abort_end_queue_requests(struct fuse_ring_queue * queue)1054a9bfb9bSBernd Schubert static void fuse_uring_abort_end_queue_requests(struct fuse_ring_queue *queue)
1064a9bfb9bSBernd Schubert {
1074a9bfb9bSBernd Schubert 	struct fuse_req *req;
1084a9bfb9bSBernd Schubert 	LIST_HEAD(req_list);
1094a9bfb9bSBernd Schubert 
1104a9bfb9bSBernd Schubert 	spin_lock(&queue->lock);
1114a9bfb9bSBernd Schubert 	list_for_each_entry(req, &queue->fuse_req_queue, list)
1124a9bfb9bSBernd Schubert 		clear_bit(FR_PENDING, &req->flags);
1134a9bfb9bSBernd Schubert 	list_splice_init(&queue->fuse_req_queue, &req_list);
1144a9bfb9bSBernd Schubert 	spin_unlock(&queue->lock);
1154a9bfb9bSBernd Schubert 
1164a9bfb9bSBernd Schubert 	/* must not hold queue lock to avoid order issues with fi->lock */
1174a9bfb9bSBernd Schubert 	fuse_dev_end_requests(&req_list);
1184a9bfb9bSBernd Schubert }
1194a9bfb9bSBernd Schubert 
fuse_uring_abort_end_requests(struct fuse_ring * ring)1204a9bfb9bSBernd Schubert void fuse_uring_abort_end_requests(struct fuse_ring *ring)
1214a9bfb9bSBernd Schubert {
1224a9bfb9bSBernd Schubert 	int qid;
1234a9bfb9bSBernd Schubert 	struct fuse_ring_queue *queue;
124857b0263SBernd Schubert 	struct fuse_conn *fc = ring->fc;
1254a9bfb9bSBernd Schubert 
1264a9bfb9bSBernd Schubert 	for (qid = 0; qid < ring->nr_queues; qid++) {
1274a9bfb9bSBernd Schubert 		queue = READ_ONCE(ring->queues[qid]);
1284a9bfb9bSBernd Schubert 		if (!queue)
1294a9bfb9bSBernd Schubert 			continue;
1304a9bfb9bSBernd Schubert 
1314a9bfb9bSBernd Schubert 		queue->stopped = true;
132857b0263SBernd Schubert 
133857b0263SBernd Schubert 		WARN_ON_ONCE(ring->fc->max_background != UINT_MAX);
134857b0263SBernd Schubert 		spin_lock(&queue->lock);
135857b0263SBernd Schubert 		spin_lock(&fc->bg_lock);
136857b0263SBernd Schubert 		fuse_uring_flush_bg(queue);
137857b0263SBernd Schubert 		spin_unlock(&fc->bg_lock);
138857b0263SBernd Schubert 		spin_unlock(&queue->lock);
1394a9bfb9bSBernd Schubert 		fuse_uring_abort_end_queue_requests(queue);
1404a9bfb9bSBernd Schubert 	}
1414a9bfb9bSBernd Schubert }
1424a9bfb9bSBernd Schubert 
fuse_uring_destruct(struct fuse_conn * fc)14324fe962cSBernd Schubert void fuse_uring_destruct(struct fuse_conn *fc)
14424fe962cSBernd Schubert {
14524fe962cSBernd Schubert 	struct fuse_ring *ring = fc->ring;
14624fe962cSBernd Schubert 	int qid;
14724fe962cSBernd Schubert 
14824fe962cSBernd Schubert 	if (!ring)
14924fe962cSBernd Schubert 		return;
15024fe962cSBernd Schubert 
15124fe962cSBernd Schubert 	for (qid = 0; qid < ring->nr_queues; qid++) {
15224fe962cSBernd Schubert 		struct fuse_ring_queue *queue = ring->queues[qid];
153b6236c84SBernd Schubert 		struct fuse_ring_ent *ent, *next;
15424fe962cSBernd Schubert 
15524fe962cSBernd Schubert 		if (!queue)
15624fe962cSBernd Schubert 			continue;
15724fe962cSBernd Schubert 
15824fe962cSBernd Schubert 		WARN_ON(!list_empty(&queue->ent_avail_queue));
159c090c8abSBernd Schubert 		WARN_ON(!list_empty(&queue->ent_w_req_queue));
16024fe962cSBernd Schubert 		WARN_ON(!list_empty(&queue->ent_commit_queue));
161c090c8abSBernd Schubert 		WARN_ON(!list_empty(&queue->ent_in_userspace));
16224fe962cSBernd Schubert 
163b6236c84SBernd Schubert 		list_for_each_entry_safe(ent, next, &queue->ent_released,
164b6236c84SBernd Schubert 					 list) {
165b6236c84SBernd Schubert 			list_del_init(&ent->list);
166b6236c84SBernd Schubert 			kfree(ent);
167b6236c84SBernd Schubert 		}
168b6236c84SBernd Schubert 
169c090c8abSBernd Schubert 		kfree(queue->fpq.processing);
17024fe962cSBernd Schubert 		kfree(queue);
17124fe962cSBernd Schubert 		ring->queues[qid] = NULL;
17224fe962cSBernd Schubert 	}
17324fe962cSBernd Schubert 
17424fe962cSBernd Schubert 	kfree(ring->queues);
17524fe962cSBernd Schubert 	kfree(ring);
17624fe962cSBernd Schubert 	fc->ring = NULL;
17724fe962cSBernd Schubert }
17824fe962cSBernd Schubert 
17924fe962cSBernd Schubert /*
18024fe962cSBernd Schubert  * Basic ring setup for this connection based on the provided configuration
18124fe962cSBernd Schubert  */
fuse_uring_create(struct fuse_conn * fc)18224fe962cSBernd Schubert static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
18324fe962cSBernd Schubert {
18424fe962cSBernd Schubert 	struct fuse_ring *ring;
18524fe962cSBernd Schubert 	size_t nr_queues = num_possible_cpus();
18624fe962cSBernd Schubert 	struct fuse_ring *res = NULL;
18724fe962cSBernd Schubert 	size_t max_payload_size;
18824fe962cSBernd Schubert 
18924fe962cSBernd Schubert 	ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT);
19024fe962cSBernd Schubert 	if (!ring)
19124fe962cSBernd Schubert 		return NULL;
19224fe962cSBernd Schubert 
19324fe962cSBernd Schubert 	ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *),
19424fe962cSBernd Schubert 			       GFP_KERNEL_ACCOUNT);
19524fe962cSBernd Schubert 	if (!ring->queues)
19624fe962cSBernd Schubert 		goto out_err;
19724fe962cSBernd Schubert 
19824fe962cSBernd Schubert 	max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write);
19924fe962cSBernd Schubert 	max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE);
20024fe962cSBernd Schubert 
20124fe962cSBernd Schubert 	spin_lock(&fc->lock);
20224fe962cSBernd Schubert 	if (fc->ring) {
20324fe962cSBernd Schubert 		/* race, another thread created the ring in the meantime */
20424fe962cSBernd Schubert 		spin_unlock(&fc->lock);
20524fe962cSBernd Schubert 		res = fc->ring;
20624fe962cSBernd Schubert 		goto out_err;
20724fe962cSBernd Schubert 	}
20824fe962cSBernd Schubert 
2094a9bfb9bSBernd Schubert 	init_waitqueue_head(&ring->stop_waitq);
2104a9bfb9bSBernd Schubert 
21124fe962cSBernd Schubert 	ring->nr_queues = nr_queues;
21224fe962cSBernd Schubert 	ring->fc = fc;
21324fe962cSBernd Schubert 	ring->max_payload_sz = max_payload_size;
2144a9bfb9bSBernd Schubert 	atomic_set(&ring->queue_refs, 0);
215*d9ecc771SJoanne Koong 	smp_store_release(&fc->ring, ring);
21624fe962cSBernd Schubert 
21724fe962cSBernd Schubert 	spin_unlock(&fc->lock);
21824fe962cSBernd Schubert 	return ring;
21924fe962cSBernd Schubert 
22024fe962cSBernd Schubert out_err:
22124fe962cSBernd Schubert 	kfree(ring->queues);
22224fe962cSBernd Schubert 	kfree(ring);
22324fe962cSBernd Schubert 	return res;
22424fe962cSBernd Schubert }
22524fe962cSBernd Schubert 
fuse_uring_create_queue(struct fuse_ring * ring,int qid)22624fe962cSBernd Schubert static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
22724fe962cSBernd Schubert 						       int qid)
22824fe962cSBernd Schubert {
22924fe962cSBernd Schubert 	struct fuse_conn *fc = ring->fc;
23024fe962cSBernd Schubert 	struct fuse_ring_queue *queue;
231c090c8abSBernd Schubert 	struct list_head *pq;
23224fe962cSBernd Schubert 
23324fe962cSBernd Schubert 	queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
23424fe962cSBernd Schubert 	if (!queue)
23524fe962cSBernd Schubert 		return NULL;
236c090c8abSBernd Schubert 	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
237c090c8abSBernd Schubert 	if (!pq) {
238c090c8abSBernd Schubert 		kfree(queue);
239c090c8abSBernd Schubert 		return NULL;
240c090c8abSBernd Schubert 	}
241c090c8abSBernd Schubert 
24224fe962cSBernd Schubert 	queue->qid = qid;
24324fe962cSBernd Schubert 	queue->ring = ring;
24424fe962cSBernd Schubert 	spin_lock_init(&queue->lock);
24524fe962cSBernd Schubert 
24624fe962cSBernd Schubert 	INIT_LIST_HEAD(&queue->ent_avail_queue);
24724fe962cSBernd Schubert 	INIT_LIST_HEAD(&queue->ent_commit_queue);
248c090c8abSBernd Schubert 	INIT_LIST_HEAD(&queue->ent_w_req_queue);
249c090c8abSBernd Schubert 	INIT_LIST_HEAD(&queue->ent_in_userspace);
250c090c8abSBernd Schubert 	INIT_LIST_HEAD(&queue->fuse_req_queue);
251857b0263SBernd Schubert 	INIT_LIST_HEAD(&queue->fuse_req_bg_queue);
252b6236c84SBernd Schubert 	INIT_LIST_HEAD(&queue->ent_released);
253c090c8abSBernd Schubert 
254c090c8abSBernd Schubert 	queue->fpq.processing = pq;
255c090c8abSBernd Schubert 	fuse_pqueue_init(&queue->fpq);
25624fe962cSBernd Schubert 
25724fe962cSBernd Schubert 	spin_lock(&fc->lock);
25824fe962cSBernd Schubert 	if (ring->queues[qid]) {
25924fe962cSBernd Schubert 		spin_unlock(&fc->lock);
260c090c8abSBernd Schubert 		kfree(queue->fpq.processing);
26124fe962cSBernd Schubert 		kfree(queue);
26224fe962cSBernd Schubert 		return ring->queues[qid];
26324fe962cSBernd Schubert 	}
26424fe962cSBernd Schubert 
26524fe962cSBernd Schubert 	/*
26624fe962cSBernd Schubert 	 * write_once and lock as the caller mostly doesn't take the lock at all
26724fe962cSBernd Schubert 	 */
26824fe962cSBernd Schubert 	WRITE_ONCE(ring->queues[qid], queue);
26924fe962cSBernd Schubert 	spin_unlock(&fc->lock);
27024fe962cSBernd Schubert 
27124fe962cSBernd Schubert 	return queue;
27224fe962cSBernd Schubert }
27324fe962cSBernd Schubert 
fuse_uring_stop_fuse_req_end(struct fuse_req * req)2744a9bfb9bSBernd Schubert static void fuse_uring_stop_fuse_req_end(struct fuse_req *req)
2754a9bfb9bSBernd Schubert {
2764a9bfb9bSBernd Schubert 	clear_bit(FR_SENT, &req->flags);
2774a9bfb9bSBernd Schubert 	req->out.h.error = -ECONNABORTED;
2784a9bfb9bSBernd Schubert 	fuse_request_end(req);
2794a9bfb9bSBernd Schubert }
2804a9bfb9bSBernd Schubert 
2814a9bfb9bSBernd Schubert /*
2824a9bfb9bSBernd Schubert  * Release a request/entry on connection tear down
2834a9bfb9bSBernd Schubert  */
fuse_uring_entry_teardown(struct fuse_ring_ent * ent)2844a9bfb9bSBernd Schubert static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent)
2854a9bfb9bSBernd Schubert {
2864a9bfb9bSBernd Schubert 	struct fuse_req *req;
2874a9bfb9bSBernd Schubert 	struct io_uring_cmd *cmd;
2884a9bfb9bSBernd Schubert 
2894a9bfb9bSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
2904a9bfb9bSBernd Schubert 
2914a9bfb9bSBernd Schubert 	spin_lock(&queue->lock);
2924a9bfb9bSBernd Schubert 	cmd = ent->cmd;
2934a9bfb9bSBernd Schubert 	ent->cmd = NULL;
2944a9bfb9bSBernd Schubert 	req = ent->fuse_req;
2954a9bfb9bSBernd Schubert 	ent->fuse_req = NULL;
2964a9bfb9bSBernd Schubert 	if (req) {
2974a9bfb9bSBernd Schubert 		/* remove entry from queue->fpq->processing */
2984a9bfb9bSBernd Schubert 		list_del_init(&req->list);
2994a9bfb9bSBernd Schubert 	}
300b6236c84SBernd Schubert 
301b6236c84SBernd Schubert 	/*
302b6236c84SBernd Schubert 	 * The entry must not be freed immediately, due to access of direct
303b6236c84SBernd Schubert 	 * pointer access of entries through IO_URING_F_CANCEL - there is a risk
304b6236c84SBernd Schubert 	 * of race between daemon termination (which triggers IO_URING_F_CANCEL
305b6236c84SBernd Schubert 	 * and accesses entries without checking the list state first
306b6236c84SBernd Schubert 	 */
307b6236c84SBernd Schubert 	list_move(&ent->list, &queue->ent_released);
308b6236c84SBernd Schubert 	ent->state = FRRS_RELEASED;
3094a9bfb9bSBernd Schubert 	spin_unlock(&queue->lock);
3104a9bfb9bSBernd Schubert 
3114a9bfb9bSBernd Schubert 	if (cmd)
3124a9bfb9bSBernd Schubert 		io_uring_cmd_done(cmd, -ENOTCONN, 0, IO_URING_F_UNLOCKED);
3134a9bfb9bSBernd Schubert 
3144a9bfb9bSBernd Schubert 	if (req)
3154a9bfb9bSBernd Schubert 		fuse_uring_stop_fuse_req_end(req);
3164a9bfb9bSBernd Schubert }
3174a9bfb9bSBernd Schubert 
fuse_uring_stop_list_entries(struct list_head * head,struct fuse_ring_queue * queue,enum fuse_ring_req_state exp_state)3184a9bfb9bSBernd Schubert static void fuse_uring_stop_list_entries(struct list_head *head,
3194a9bfb9bSBernd Schubert 					 struct fuse_ring_queue *queue,
3204a9bfb9bSBernd Schubert 					 enum fuse_ring_req_state exp_state)
3214a9bfb9bSBernd Schubert {
3224a9bfb9bSBernd Schubert 	struct fuse_ring *ring = queue->ring;
3234a9bfb9bSBernd Schubert 	struct fuse_ring_ent *ent, *next;
3244a9bfb9bSBernd Schubert 	ssize_t queue_refs = SSIZE_MAX;
3254a9bfb9bSBernd Schubert 	LIST_HEAD(to_teardown);
3264a9bfb9bSBernd Schubert 
3274a9bfb9bSBernd Schubert 	spin_lock(&queue->lock);
3284a9bfb9bSBernd Schubert 	list_for_each_entry_safe(ent, next, head, list) {
3294a9bfb9bSBernd Schubert 		if (ent->state != exp_state) {
3304a9bfb9bSBernd Schubert 			pr_warn("entry teardown qid=%d state=%d expected=%d",
3314a9bfb9bSBernd Schubert 				queue->qid, ent->state, exp_state);
3324a9bfb9bSBernd Schubert 			continue;
3334a9bfb9bSBernd Schubert 		}
3344a9bfb9bSBernd Schubert 
335b6236c84SBernd Schubert 		ent->state = FRRS_TEARDOWN;
3364a9bfb9bSBernd Schubert 		list_move(&ent->list, &to_teardown);
3374a9bfb9bSBernd Schubert 	}
3384a9bfb9bSBernd Schubert 	spin_unlock(&queue->lock);
3394a9bfb9bSBernd Schubert 
3404a9bfb9bSBernd Schubert 	/* no queue lock to avoid lock order issues */
3414a9bfb9bSBernd Schubert 	list_for_each_entry_safe(ent, next, &to_teardown, list) {
3424a9bfb9bSBernd Schubert 		fuse_uring_entry_teardown(ent);
3434a9bfb9bSBernd Schubert 		queue_refs = atomic_dec_return(&ring->queue_refs);
3444a9bfb9bSBernd Schubert 		WARN_ON_ONCE(queue_refs < 0);
3454a9bfb9bSBernd Schubert 	}
3464a9bfb9bSBernd Schubert }
3474a9bfb9bSBernd Schubert 
fuse_uring_teardown_entries(struct fuse_ring_queue * queue)3484a9bfb9bSBernd Schubert static void fuse_uring_teardown_entries(struct fuse_ring_queue *queue)
3494a9bfb9bSBernd Schubert {
3504a9bfb9bSBernd Schubert 	fuse_uring_stop_list_entries(&queue->ent_in_userspace, queue,
3514a9bfb9bSBernd Schubert 				     FRRS_USERSPACE);
3524a9bfb9bSBernd Schubert 	fuse_uring_stop_list_entries(&queue->ent_avail_queue, queue,
3534a9bfb9bSBernd Schubert 				     FRRS_AVAILABLE);
3544a9bfb9bSBernd Schubert }
3554a9bfb9bSBernd Schubert 
3564a9bfb9bSBernd Schubert /*
3574a9bfb9bSBernd Schubert  * Log state debug info
3584a9bfb9bSBernd Schubert  */
fuse_uring_log_ent_state(struct fuse_ring * ring)3594a9bfb9bSBernd Schubert static void fuse_uring_log_ent_state(struct fuse_ring *ring)
3604a9bfb9bSBernd Schubert {
3614a9bfb9bSBernd Schubert 	int qid;
3624a9bfb9bSBernd Schubert 	struct fuse_ring_ent *ent;
3634a9bfb9bSBernd Schubert 
3644a9bfb9bSBernd Schubert 	for (qid = 0; qid < ring->nr_queues; qid++) {
3654a9bfb9bSBernd Schubert 		struct fuse_ring_queue *queue = ring->queues[qid];
3664a9bfb9bSBernd Schubert 
3674a9bfb9bSBernd Schubert 		if (!queue)
3684a9bfb9bSBernd Schubert 			continue;
3694a9bfb9bSBernd Schubert 
3704a9bfb9bSBernd Schubert 		spin_lock(&queue->lock);
3714a9bfb9bSBernd Schubert 		/*
3724a9bfb9bSBernd Schubert 		 * Log entries from the intermediate queue, the other queues
3734a9bfb9bSBernd Schubert 		 * should be empty
3744a9bfb9bSBernd Schubert 		 */
3754a9bfb9bSBernd Schubert 		list_for_each_entry(ent, &queue->ent_w_req_queue, list) {
3764a9bfb9bSBernd Schubert 			pr_info(" ent-req-queue ring=%p qid=%d ent=%p state=%d\n",
3774a9bfb9bSBernd Schubert 				ring, qid, ent, ent->state);
3784a9bfb9bSBernd Schubert 		}
3794a9bfb9bSBernd Schubert 		list_for_each_entry(ent, &queue->ent_commit_queue, list) {
3804a9bfb9bSBernd Schubert 			pr_info(" ent-commit-queue ring=%p qid=%d ent=%p state=%d\n",
3814a9bfb9bSBernd Schubert 				ring, qid, ent, ent->state);
3824a9bfb9bSBernd Schubert 		}
3834a9bfb9bSBernd Schubert 		spin_unlock(&queue->lock);
3844a9bfb9bSBernd Schubert 	}
3854a9bfb9bSBernd Schubert 	ring->stop_debug_log = 1;
3864a9bfb9bSBernd Schubert }
3874a9bfb9bSBernd Schubert 
fuse_uring_async_stop_queues(struct work_struct * work)3884a9bfb9bSBernd Schubert static void fuse_uring_async_stop_queues(struct work_struct *work)
3894a9bfb9bSBernd Schubert {
3904a9bfb9bSBernd Schubert 	int qid;
3914a9bfb9bSBernd Schubert 	struct fuse_ring *ring =
3924a9bfb9bSBernd Schubert 		container_of(work, struct fuse_ring, async_teardown_work.work);
3934a9bfb9bSBernd Schubert 
3944a9bfb9bSBernd Schubert 	/* XXX code dup */
3954a9bfb9bSBernd Schubert 	for (qid = 0; qid < ring->nr_queues; qid++) {
3964a9bfb9bSBernd Schubert 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
3974a9bfb9bSBernd Schubert 
3984a9bfb9bSBernd Schubert 		if (!queue)
3994a9bfb9bSBernd Schubert 			continue;
4004a9bfb9bSBernd Schubert 
4014a9bfb9bSBernd Schubert 		fuse_uring_teardown_entries(queue);
4024a9bfb9bSBernd Schubert 	}
4034a9bfb9bSBernd Schubert 
4044a9bfb9bSBernd Schubert 	/*
4054a9bfb9bSBernd Schubert 	 * Some ring entries might be in the middle of IO operations,
4064a9bfb9bSBernd Schubert 	 * i.e. in process to get handled by file_operations::uring_cmd
4074a9bfb9bSBernd Schubert 	 * or on the way to userspace - we could handle that with conditions in
4084a9bfb9bSBernd Schubert 	 * run time code, but easier/cleaner to have an async tear down handler
4094a9bfb9bSBernd Schubert 	 * If there are still queue references left
4104a9bfb9bSBernd Schubert 	 */
4114a9bfb9bSBernd Schubert 	if (atomic_read(&ring->queue_refs) > 0) {
4124a9bfb9bSBernd Schubert 		if (time_after(jiffies,
4134a9bfb9bSBernd Schubert 			       ring->teardown_time + FUSE_URING_TEARDOWN_TIMEOUT))
4144a9bfb9bSBernd Schubert 			fuse_uring_log_ent_state(ring);
4154a9bfb9bSBernd Schubert 
4164a9bfb9bSBernd Schubert 		schedule_delayed_work(&ring->async_teardown_work,
4174a9bfb9bSBernd Schubert 				      FUSE_URING_TEARDOWN_INTERVAL);
4184a9bfb9bSBernd Schubert 	} else {
4194a9bfb9bSBernd Schubert 		wake_up_all(&ring->stop_waitq);
4204a9bfb9bSBernd Schubert 	}
4214a9bfb9bSBernd Schubert }
4224a9bfb9bSBernd Schubert 
4234a9bfb9bSBernd Schubert /*
4244a9bfb9bSBernd Schubert  * Stop the ring queues
4254a9bfb9bSBernd Schubert  */
fuse_uring_stop_queues(struct fuse_ring * ring)4264a9bfb9bSBernd Schubert void fuse_uring_stop_queues(struct fuse_ring *ring)
4274a9bfb9bSBernd Schubert {
4284a9bfb9bSBernd Schubert 	int qid;
4294a9bfb9bSBernd Schubert 
4304a9bfb9bSBernd Schubert 	for (qid = 0; qid < ring->nr_queues; qid++) {
4314a9bfb9bSBernd Schubert 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
4324a9bfb9bSBernd Schubert 
4334a9bfb9bSBernd Schubert 		if (!queue)
4344a9bfb9bSBernd Schubert 			continue;
4354a9bfb9bSBernd Schubert 
4364a9bfb9bSBernd Schubert 		fuse_uring_teardown_entries(queue);
4374a9bfb9bSBernd Schubert 	}
4384a9bfb9bSBernd Schubert 
4394a9bfb9bSBernd Schubert 	if (atomic_read(&ring->queue_refs) > 0) {
4404a9bfb9bSBernd Schubert 		ring->teardown_time = jiffies;
4414a9bfb9bSBernd Schubert 		INIT_DELAYED_WORK(&ring->async_teardown_work,
4424a9bfb9bSBernd Schubert 				  fuse_uring_async_stop_queues);
4434a9bfb9bSBernd Schubert 		schedule_delayed_work(&ring->async_teardown_work,
4444a9bfb9bSBernd Schubert 				      FUSE_URING_TEARDOWN_INTERVAL);
4454a9bfb9bSBernd Schubert 	} else {
4464a9bfb9bSBernd Schubert 		wake_up_all(&ring->stop_waitq);
4474a9bfb9bSBernd Schubert 	}
4484a9bfb9bSBernd Schubert }
4494a9bfb9bSBernd Schubert 
45024fe962cSBernd Schubert /*
451b6236c84SBernd Schubert  * Handle IO_URING_F_CANCEL, typically should come on daemon termination.
452b6236c84SBernd Schubert  *
453b6236c84SBernd Schubert  * Releasing the last entry should trigger fuse_dev_release() if
454b6236c84SBernd Schubert  * the daemon was terminated
455b6236c84SBernd Schubert  */
fuse_uring_cancel(struct io_uring_cmd * cmd,unsigned int issue_flags)456b6236c84SBernd Schubert static void fuse_uring_cancel(struct io_uring_cmd *cmd,
457b6236c84SBernd Schubert 			      unsigned int issue_flags)
458b6236c84SBernd Schubert {
459b6236c84SBernd Schubert 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
460b6236c84SBernd Schubert 	struct fuse_ring_queue *queue;
461b6236c84SBernd Schubert 	bool need_cmd_done = false;
462b6236c84SBernd Schubert 
463b6236c84SBernd Schubert 	/*
464b6236c84SBernd Schubert 	 * direct access on ent - it must not be destructed as long as
465b6236c84SBernd Schubert 	 * IO_URING_F_CANCEL might come up
466b6236c84SBernd Schubert 	 */
467b6236c84SBernd Schubert 	queue = ent->queue;
468b6236c84SBernd Schubert 	spin_lock(&queue->lock);
469b6236c84SBernd Schubert 	if (ent->state == FRRS_AVAILABLE) {
470b6236c84SBernd Schubert 		ent->state = FRRS_USERSPACE;
471b6236c84SBernd Schubert 		list_move(&ent->list, &queue->ent_in_userspace);
472b6236c84SBernd Schubert 		need_cmd_done = true;
473b6236c84SBernd Schubert 		ent->cmd = NULL;
474b6236c84SBernd Schubert 	}
475b6236c84SBernd Schubert 	spin_unlock(&queue->lock);
476b6236c84SBernd Schubert 
477b6236c84SBernd Schubert 	if (need_cmd_done) {
478b6236c84SBernd Schubert 		/* no queue lock to avoid lock order issues */
479b6236c84SBernd Schubert 		io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags);
480b6236c84SBernd Schubert 	}
481b6236c84SBernd Schubert }
482b6236c84SBernd Schubert 
fuse_uring_prepare_cancel(struct io_uring_cmd * cmd,int issue_flags,struct fuse_ring_ent * ring_ent)483b6236c84SBernd Schubert static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags,
484b6236c84SBernd Schubert 				      struct fuse_ring_ent *ring_ent)
485b6236c84SBernd Schubert {
486b6236c84SBernd Schubert 	uring_cmd_set_ring_ent(cmd, ring_ent);
487b6236c84SBernd Schubert 	io_uring_cmd_mark_cancelable(cmd, issue_flags);
488b6236c84SBernd Schubert }
489b6236c84SBernd Schubert 
490b6236c84SBernd Schubert /*
491c090c8abSBernd Schubert  * Checks for errors and stores it into the request
492c090c8abSBernd Schubert  */
fuse_uring_out_header_has_err(struct fuse_out_header * oh,struct fuse_req * req,struct fuse_conn * fc)493c090c8abSBernd Schubert static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
494c090c8abSBernd Schubert 					 struct fuse_req *req,
495c090c8abSBernd Schubert 					 struct fuse_conn *fc)
496c090c8abSBernd Schubert {
497c090c8abSBernd Schubert 	int err;
498c090c8abSBernd Schubert 
499c090c8abSBernd Schubert 	err = -EINVAL;
500c090c8abSBernd Schubert 	if (oh->unique == 0) {
501c090c8abSBernd Schubert 		/* Not supported through io-uring yet */
502c090c8abSBernd Schubert 		pr_warn_once("notify through fuse-io-uring not supported\n");
503c090c8abSBernd Schubert 		goto err;
504c090c8abSBernd Schubert 	}
505c090c8abSBernd Schubert 
506c090c8abSBernd Schubert 	if (oh->error <= -ERESTARTSYS || oh->error > 0)
507c090c8abSBernd Schubert 		goto err;
508c090c8abSBernd Schubert 
509c090c8abSBernd Schubert 	if (oh->error) {
510c090c8abSBernd Schubert 		err = oh->error;
511c090c8abSBernd Schubert 		goto err;
512c090c8abSBernd Schubert 	}
513c090c8abSBernd Schubert 
514c090c8abSBernd Schubert 	err = -ENOENT;
515c090c8abSBernd Schubert 	if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) {
516c090c8abSBernd Schubert 		pr_warn_ratelimited("unique mismatch, expected: %llu got %llu\n",
517c090c8abSBernd Schubert 				    req->in.h.unique,
518c090c8abSBernd Schubert 				    oh->unique & ~FUSE_INT_REQ_BIT);
519c090c8abSBernd Schubert 		goto err;
520c090c8abSBernd Schubert 	}
521c090c8abSBernd Schubert 
522c090c8abSBernd Schubert 	/*
523c090c8abSBernd Schubert 	 * Is it an interrupt reply ID?
524c090c8abSBernd Schubert 	 * XXX: Not supported through fuse-io-uring yet, it should not even
525c090c8abSBernd Schubert 	 *      find the request - should not happen.
526c090c8abSBernd Schubert 	 */
527c090c8abSBernd Schubert 	WARN_ON_ONCE(oh->unique & FUSE_INT_REQ_BIT);
528c090c8abSBernd Schubert 
529c090c8abSBernd Schubert 	err = 0;
530c090c8abSBernd Schubert err:
531c090c8abSBernd Schubert 	return err;
532c090c8abSBernd Schubert }
533c090c8abSBernd Schubert 
fuse_uring_copy_from_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)534c090c8abSBernd Schubert static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
535c090c8abSBernd Schubert 				     struct fuse_req *req,
536c090c8abSBernd Schubert 				     struct fuse_ring_ent *ent)
537c090c8abSBernd Schubert {
538c090c8abSBernd Schubert 	struct fuse_copy_state cs;
539c090c8abSBernd Schubert 	struct fuse_args *args = req->args;
540c090c8abSBernd Schubert 	struct iov_iter iter;
541c090c8abSBernd Schubert 	int err;
542c090c8abSBernd Schubert 	struct fuse_uring_ent_in_out ring_in_out;
543c090c8abSBernd Schubert 
544c090c8abSBernd Schubert 	err = copy_from_user(&ring_in_out, &ent->headers->ring_ent_in_out,
545c090c8abSBernd Schubert 			     sizeof(ring_in_out));
546c090c8abSBernd Schubert 	if (err)
547c090c8abSBernd Schubert 		return -EFAULT;
548c090c8abSBernd Schubert 
549c090c8abSBernd Schubert 	err = import_ubuf(ITER_SOURCE, ent->payload, ring->max_payload_sz,
550c090c8abSBernd Schubert 			  &iter);
551c090c8abSBernd Schubert 	if (err)
552c090c8abSBernd Schubert 		return err;
553c090c8abSBernd Schubert 
554c090c8abSBernd Schubert 	fuse_copy_init(&cs, 0, &iter);
555c090c8abSBernd Schubert 	cs.is_uring = 1;
556c090c8abSBernd Schubert 	cs.req = req;
557c090c8abSBernd Schubert 
558c090c8abSBernd Schubert 	return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
559c090c8abSBernd Schubert }
560c090c8abSBernd Schubert 
561c090c8abSBernd Schubert  /*
562c090c8abSBernd Schubert   * Copy data from the req to the ring buffer
563c090c8abSBernd Schubert   */
fuse_uring_args_to_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)564c090c8abSBernd Schubert static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
565c090c8abSBernd Schubert 				   struct fuse_ring_ent *ent)
566c090c8abSBernd Schubert {
567c090c8abSBernd Schubert 	struct fuse_copy_state cs;
568c090c8abSBernd Schubert 	struct fuse_args *args = req->args;
569c090c8abSBernd Schubert 	struct fuse_in_arg *in_args = args->in_args;
570c090c8abSBernd Schubert 	int num_args = args->in_numargs;
571c090c8abSBernd Schubert 	int err;
572c090c8abSBernd Schubert 	struct iov_iter iter;
573c090c8abSBernd Schubert 	struct fuse_uring_ent_in_out ent_in_out = {
574c090c8abSBernd Schubert 		.flags = 0,
575c090c8abSBernd Schubert 		.commit_id = req->in.h.unique,
576c090c8abSBernd Schubert 	};
577c090c8abSBernd Schubert 
578c090c8abSBernd Schubert 	err = import_ubuf(ITER_DEST, ent->payload, ring->max_payload_sz, &iter);
579c090c8abSBernd Schubert 	if (err) {
580c090c8abSBernd Schubert 		pr_info_ratelimited("fuse: Import of user buffer failed\n");
581c090c8abSBernd Schubert 		return err;
582c090c8abSBernd Schubert 	}
583c090c8abSBernd Schubert 
584c090c8abSBernd Schubert 	fuse_copy_init(&cs, 1, &iter);
585c090c8abSBernd Schubert 	cs.is_uring = 1;
586c090c8abSBernd Schubert 	cs.req = req;
587c090c8abSBernd Schubert 
588c090c8abSBernd Schubert 	if (num_args > 0) {
589c090c8abSBernd Schubert 		/*
590c090c8abSBernd Schubert 		 * Expectation is that the first argument is the per op header.
591c090c8abSBernd Schubert 		 * Some op code have that as zero size.
592c090c8abSBernd Schubert 		 */
593c090c8abSBernd Schubert 		if (args->in_args[0].size > 0) {
594c090c8abSBernd Schubert 			err = copy_to_user(&ent->headers->op_in, in_args->value,
595c090c8abSBernd Schubert 					   in_args->size);
596c090c8abSBernd Schubert 			if (err) {
597c090c8abSBernd Schubert 				pr_info_ratelimited(
598c090c8abSBernd Schubert 					"Copying the header failed.\n");
599c090c8abSBernd Schubert 				return -EFAULT;
600c090c8abSBernd Schubert 			}
601c090c8abSBernd Schubert 		}
602c090c8abSBernd Schubert 		in_args++;
603c090c8abSBernd Schubert 		num_args--;
604c090c8abSBernd Schubert 	}
605c090c8abSBernd Schubert 
606c090c8abSBernd Schubert 	/* copy the payload */
607c090c8abSBernd Schubert 	err = fuse_copy_args(&cs, num_args, args->in_pages,
608c090c8abSBernd Schubert 			     (struct fuse_arg *)in_args, 0);
609c090c8abSBernd Schubert 	if (err) {
610c090c8abSBernd Schubert 		pr_info_ratelimited("%s fuse_copy_args failed\n", __func__);
611c090c8abSBernd Schubert 		return err;
612c090c8abSBernd Schubert 	}
613c090c8abSBernd Schubert 
614c090c8abSBernd Schubert 	ent_in_out.payload_sz = cs.ring.copied_sz;
615c090c8abSBernd Schubert 	err = copy_to_user(&ent->headers->ring_ent_in_out, &ent_in_out,
616c090c8abSBernd Schubert 			   sizeof(ent_in_out));
617c090c8abSBernd Schubert 	return err ? -EFAULT : 0;
618c090c8abSBernd Schubert }
619c090c8abSBernd Schubert 
fuse_uring_copy_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req)620c090c8abSBernd Schubert static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
621c090c8abSBernd Schubert 				   struct fuse_req *req)
622c090c8abSBernd Schubert {
623c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
624c090c8abSBernd Schubert 	struct fuse_ring *ring = queue->ring;
625c090c8abSBernd Schubert 	int err;
626c090c8abSBernd Schubert 
627c090c8abSBernd Schubert 	err = -EIO;
628c090c8abSBernd Schubert 	if (WARN_ON(ent->state != FRRS_FUSE_REQ)) {
629c090c8abSBernd Schubert 		pr_err("qid=%d ring-req=%p invalid state %d on send\n",
630c090c8abSBernd Schubert 		       queue->qid, ent, ent->state);
631c090c8abSBernd Schubert 		return err;
632c090c8abSBernd Schubert 	}
633c090c8abSBernd Schubert 
634c090c8abSBernd Schubert 	err = -EINVAL;
635c090c8abSBernd Schubert 	if (WARN_ON(req->in.h.unique == 0))
636c090c8abSBernd Schubert 		return err;
637c090c8abSBernd Schubert 
638c090c8abSBernd Schubert 	/* copy the request */
639c090c8abSBernd Schubert 	err = fuse_uring_args_to_ring(ring, req, ent);
640c090c8abSBernd Schubert 	if (unlikely(err)) {
641c090c8abSBernd Schubert 		pr_info_ratelimited("Copy to ring failed: %d\n", err);
642c090c8abSBernd Schubert 		return err;
643c090c8abSBernd Schubert 	}
644c090c8abSBernd Schubert 
645c090c8abSBernd Schubert 	/* copy fuse_in_header */
646c090c8abSBernd Schubert 	err = copy_to_user(&ent->headers->in_out, &req->in.h,
647c090c8abSBernd Schubert 			   sizeof(req->in.h));
648c090c8abSBernd Schubert 	if (err) {
649c090c8abSBernd Schubert 		err = -EFAULT;
650c090c8abSBernd Schubert 		return err;
651c090c8abSBernd Schubert 	}
652c090c8abSBernd Schubert 
653c090c8abSBernd Schubert 	return 0;
654c090c8abSBernd Schubert }
655c090c8abSBernd Schubert 
fuse_uring_prepare_send(struct fuse_ring_ent * ent,struct fuse_req * req)656c090c8abSBernd Schubert static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
657c090c8abSBernd Schubert 				   struct fuse_req *req)
658c090c8abSBernd Schubert {
659c090c8abSBernd Schubert 	int err;
660c090c8abSBernd Schubert 
661c090c8abSBernd Schubert 	err = fuse_uring_copy_to_ring(ent, req);
662c090c8abSBernd Schubert 	if (!err)
663c090c8abSBernd Schubert 		set_bit(FR_SENT, &req->flags);
664c090c8abSBernd Schubert 	else
665c090c8abSBernd Schubert 		fuse_uring_req_end(ent, req, err);
666c090c8abSBernd Schubert 
667c090c8abSBernd Schubert 	return err;
668c090c8abSBernd Schubert }
669c090c8abSBernd Schubert 
670c090c8abSBernd Schubert /*
671c090c8abSBernd Schubert  * Write data to the ring buffer and send the request to userspace,
672c090c8abSBernd Schubert  * userspace will read it
673c090c8abSBernd Schubert  * This is comparable with classical read(/dev/fuse)
674c090c8abSBernd Schubert  */
fuse_uring_send_next_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)675c090c8abSBernd Schubert static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent,
676c090c8abSBernd Schubert 					struct fuse_req *req,
677c090c8abSBernd Schubert 					unsigned int issue_flags)
678c090c8abSBernd Schubert {
679c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
680c090c8abSBernd Schubert 	int err;
681c090c8abSBernd Schubert 	struct io_uring_cmd *cmd;
682c090c8abSBernd Schubert 
683c090c8abSBernd Schubert 	err = fuse_uring_prepare_send(ent, req);
684c090c8abSBernd Schubert 	if (err)
685c090c8abSBernd Schubert 		return err;
686c090c8abSBernd Schubert 
687c090c8abSBernd Schubert 	spin_lock(&queue->lock);
688c090c8abSBernd Schubert 	cmd = ent->cmd;
689c090c8abSBernd Schubert 	ent->cmd = NULL;
690c090c8abSBernd Schubert 	ent->state = FRRS_USERSPACE;
691c090c8abSBernd Schubert 	list_move(&ent->list, &queue->ent_in_userspace);
692c090c8abSBernd Schubert 	spin_unlock(&queue->lock);
693c090c8abSBernd Schubert 
694c090c8abSBernd Schubert 	io_uring_cmd_done(cmd, 0, 0, issue_flags);
695c090c8abSBernd Schubert 	return 0;
696c090c8abSBernd Schubert }
697c090c8abSBernd Schubert 
698c090c8abSBernd Schubert /*
69924fe962cSBernd Schubert  * Make a ring entry available for fuse_req assignment
70024fe962cSBernd Schubert  */
fuse_uring_ent_avail(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue)70124fe962cSBernd Schubert static void fuse_uring_ent_avail(struct fuse_ring_ent *ent,
70224fe962cSBernd Schubert 				 struct fuse_ring_queue *queue)
70324fe962cSBernd Schubert {
70424fe962cSBernd Schubert 	WARN_ON_ONCE(!ent->cmd);
70524fe962cSBernd Schubert 	list_move(&ent->list, &queue->ent_avail_queue);
70624fe962cSBernd Schubert 	ent->state = FRRS_AVAILABLE;
70724fe962cSBernd Schubert }
70824fe962cSBernd Schubert 
709c090c8abSBernd Schubert /* Used to find the request on SQE commit */
fuse_uring_add_to_pq(struct fuse_ring_ent * ent,struct fuse_req * req)710c090c8abSBernd Schubert static void fuse_uring_add_to_pq(struct fuse_ring_ent *ent,
711c090c8abSBernd Schubert 				 struct fuse_req *req)
712c090c8abSBernd Schubert {
713c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
714c090c8abSBernd Schubert 	struct fuse_pqueue *fpq = &queue->fpq;
715c090c8abSBernd Schubert 	unsigned int hash;
716c090c8abSBernd Schubert 
717c090c8abSBernd Schubert 	req->ring_entry = ent;
718c090c8abSBernd Schubert 	hash = fuse_req_hash(req->in.h.unique);
719c090c8abSBernd Schubert 	list_move_tail(&req->list, &fpq->processing[hash]);
720c090c8abSBernd Schubert }
721c090c8abSBernd Schubert 
722c090c8abSBernd Schubert /*
723c090c8abSBernd Schubert  * Assign a fuse queue entry to the given entry
724c090c8abSBernd Schubert  */
fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent * ent,struct fuse_req * req)725c090c8abSBernd Schubert static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent,
726c090c8abSBernd Schubert 					   struct fuse_req *req)
727c090c8abSBernd Schubert {
728c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
729c090c8abSBernd Schubert 	struct fuse_conn *fc = req->fm->fc;
730c090c8abSBernd Schubert 	struct fuse_iqueue *fiq = &fc->iq;
731c090c8abSBernd Schubert 
732c090c8abSBernd Schubert 	lockdep_assert_held(&queue->lock);
733c090c8abSBernd Schubert 
734c090c8abSBernd Schubert 	if (WARN_ON_ONCE(ent->state != FRRS_AVAILABLE &&
735c090c8abSBernd Schubert 			 ent->state != FRRS_COMMIT)) {
736c090c8abSBernd Schubert 		pr_warn("%s qid=%d state=%d\n", __func__, ent->queue->qid,
737c090c8abSBernd Schubert 			ent->state);
738c090c8abSBernd Schubert 	}
739c090c8abSBernd Schubert 
740c090c8abSBernd Schubert 	spin_lock(&fiq->lock);
741c090c8abSBernd Schubert 	clear_bit(FR_PENDING, &req->flags);
742c090c8abSBernd Schubert 	spin_unlock(&fiq->lock);
743c090c8abSBernd Schubert 	ent->fuse_req = req;
744c090c8abSBernd Schubert 	ent->state = FRRS_FUSE_REQ;
745c090c8abSBernd Schubert 	list_move(&ent->list, &queue->ent_w_req_queue);
746c090c8abSBernd Schubert 	fuse_uring_add_to_pq(ent, req);
747c090c8abSBernd Schubert }
748c090c8abSBernd Schubert 
749c090c8abSBernd Schubert /* Fetch the next fuse request if available */
fuse_uring_ent_assign_req(struct fuse_ring_ent * ent)750c090c8abSBernd Schubert static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent)
751c090c8abSBernd Schubert 	__must_hold(&queue->lock)
752c090c8abSBernd Schubert {
753c090c8abSBernd Schubert 	struct fuse_req *req;
754c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
755c090c8abSBernd Schubert 	struct list_head *req_queue = &queue->fuse_req_queue;
756c090c8abSBernd Schubert 
757c090c8abSBernd Schubert 	lockdep_assert_held(&queue->lock);
758c090c8abSBernd Schubert 
759c090c8abSBernd Schubert 	/* get and assign the next entry while it is still holding the lock */
760c090c8abSBernd Schubert 	req = list_first_entry_or_null(req_queue, struct fuse_req, list);
761c090c8abSBernd Schubert 	if (req)
762c090c8abSBernd Schubert 		fuse_uring_add_req_to_ring_ent(ent, req);
763c090c8abSBernd Schubert 
764c090c8abSBernd Schubert 	return req;
765c090c8abSBernd Schubert }
766c090c8abSBernd Schubert 
767c090c8abSBernd Schubert /*
768c090c8abSBernd Schubert  * Read data from the ring buffer, which user space has written to
769c090c8abSBernd Schubert  * This is comparible with handling of classical write(/dev/fuse).
770c090c8abSBernd Schubert  * Also make the ring request available again for new fuse requests.
771c090c8abSBernd Schubert  */
fuse_uring_commit(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)772c090c8abSBernd Schubert static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req,
773c090c8abSBernd Schubert 			      unsigned int issue_flags)
774c090c8abSBernd Schubert {
775c090c8abSBernd Schubert 	struct fuse_ring *ring = ent->queue->ring;
776c090c8abSBernd Schubert 	struct fuse_conn *fc = ring->fc;
777c090c8abSBernd Schubert 	ssize_t err = 0;
778c090c8abSBernd Schubert 
779c090c8abSBernd Schubert 	err = copy_from_user(&req->out.h, &ent->headers->in_out,
780c090c8abSBernd Schubert 			     sizeof(req->out.h));
781c090c8abSBernd Schubert 	if (err) {
782c090c8abSBernd Schubert 		req->out.h.error = -EFAULT;
783c090c8abSBernd Schubert 		goto out;
784c090c8abSBernd Schubert 	}
785c090c8abSBernd Schubert 
786c090c8abSBernd Schubert 	err = fuse_uring_out_header_has_err(&req->out.h, req, fc);
787c090c8abSBernd Schubert 	if (err) {
788c090c8abSBernd Schubert 		/* req->out.h.error already set */
789c090c8abSBernd Schubert 		goto out;
790c090c8abSBernd Schubert 	}
791c090c8abSBernd Schubert 
792c090c8abSBernd Schubert 	err = fuse_uring_copy_from_ring(ring, req, ent);
793c090c8abSBernd Schubert out:
794c090c8abSBernd Schubert 	fuse_uring_req_end(ent, req, err);
795c090c8abSBernd Schubert }
796c090c8abSBernd Schubert 
797c090c8abSBernd Schubert /*
798c090c8abSBernd Schubert  * Get the next fuse req and send it
799c090c8abSBernd Schubert  */
fuse_uring_next_fuse_req(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue,unsigned int issue_flags)800c090c8abSBernd Schubert static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ent,
801c090c8abSBernd Schubert 				     struct fuse_ring_queue *queue,
802c090c8abSBernd Schubert 				     unsigned int issue_flags)
803c090c8abSBernd Schubert {
804c090c8abSBernd Schubert 	int err;
805c090c8abSBernd Schubert 	struct fuse_req *req;
806c090c8abSBernd Schubert 
807c090c8abSBernd Schubert retry:
808c090c8abSBernd Schubert 	spin_lock(&queue->lock);
809c090c8abSBernd Schubert 	fuse_uring_ent_avail(ent, queue);
810c090c8abSBernd Schubert 	req = fuse_uring_ent_assign_req(ent);
811c090c8abSBernd Schubert 	spin_unlock(&queue->lock);
812c090c8abSBernd Schubert 
813c090c8abSBernd Schubert 	if (req) {
814c090c8abSBernd Schubert 		err = fuse_uring_send_next_to_ring(ent, req, issue_flags);
815c090c8abSBernd Schubert 		if (err)
816c090c8abSBernd Schubert 			goto retry;
817c090c8abSBernd Schubert 	}
818c090c8abSBernd Schubert }
819c090c8abSBernd Schubert 
fuse_ring_ent_set_commit(struct fuse_ring_ent * ent)820c090c8abSBernd Schubert static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent)
821c090c8abSBernd Schubert {
822c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
823c090c8abSBernd Schubert 
824c090c8abSBernd Schubert 	lockdep_assert_held(&queue->lock);
825c090c8abSBernd Schubert 
826c090c8abSBernd Schubert 	if (WARN_ON_ONCE(ent->state != FRRS_USERSPACE))
827c090c8abSBernd Schubert 		return -EIO;
828c090c8abSBernd Schubert 
829c090c8abSBernd Schubert 	ent->state = FRRS_COMMIT;
830c090c8abSBernd Schubert 	list_move(&ent->list, &queue->ent_commit_queue);
831c090c8abSBernd Schubert 
832c090c8abSBernd Schubert 	return 0;
833c090c8abSBernd Schubert }
834c090c8abSBernd Schubert 
835c090c8abSBernd Schubert /* FUSE_URING_CMD_COMMIT_AND_FETCH handler */
fuse_uring_commit_fetch(struct io_uring_cmd * cmd,int issue_flags,struct fuse_conn * fc)836c090c8abSBernd Schubert static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
837c090c8abSBernd Schubert 				   struct fuse_conn *fc)
838c090c8abSBernd Schubert {
839c090c8abSBernd Schubert 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
840c090c8abSBernd Schubert 	struct fuse_ring_ent *ent;
841c090c8abSBernd Schubert 	int err;
842c090c8abSBernd Schubert 	struct fuse_ring *ring = fc->ring;
843c090c8abSBernd Schubert 	struct fuse_ring_queue *queue;
844c090c8abSBernd Schubert 	uint64_t commit_id = READ_ONCE(cmd_req->commit_id);
845c090c8abSBernd Schubert 	unsigned int qid = READ_ONCE(cmd_req->qid);
846c090c8abSBernd Schubert 	struct fuse_pqueue *fpq;
847c090c8abSBernd Schubert 	struct fuse_req *req;
848c090c8abSBernd Schubert 
849c090c8abSBernd Schubert 	err = -ENOTCONN;
850c090c8abSBernd Schubert 	if (!ring)
851c090c8abSBernd Schubert 		return err;
852c090c8abSBernd Schubert 
853c090c8abSBernd Schubert 	if (qid >= ring->nr_queues)
854c090c8abSBernd Schubert 		return -EINVAL;
855c090c8abSBernd Schubert 
856c090c8abSBernd Schubert 	queue = ring->queues[qid];
857c090c8abSBernd Schubert 	if (!queue)
858c090c8abSBernd Schubert 		return err;
859c090c8abSBernd Schubert 	fpq = &queue->fpq;
860c090c8abSBernd Schubert 
8614a9bfb9bSBernd Schubert 	if (!READ_ONCE(fc->connected) || READ_ONCE(queue->stopped))
8624a9bfb9bSBernd Schubert 		return err;
8634a9bfb9bSBernd Schubert 
864c090c8abSBernd Schubert 	spin_lock(&queue->lock);
865c090c8abSBernd Schubert 	/* Find a request based on the unique ID of the fuse request
866c090c8abSBernd Schubert 	 * This should get revised, as it needs a hash calculation and list
867c090c8abSBernd Schubert 	 * search. And full struct fuse_pqueue is needed (memory overhead).
868c090c8abSBernd Schubert 	 * As well as the link from req to ring_ent.
869c090c8abSBernd Schubert 	 */
870c090c8abSBernd Schubert 	req = fuse_request_find(fpq, commit_id);
871c090c8abSBernd Schubert 	err = -ENOENT;
872c090c8abSBernd Schubert 	if (!req) {
873c090c8abSBernd Schubert 		pr_info("qid=%d commit_id %llu not found\n", queue->qid,
874c090c8abSBernd Schubert 			commit_id);
875c090c8abSBernd Schubert 		spin_unlock(&queue->lock);
876c090c8abSBernd Schubert 		return err;
877c090c8abSBernd Schubert 	}
878c090c8abSBernd Schubert 	list_del_init(&req->list);
879c090c8abSBernd Schubert 	ent = req->ring_entry;
880c090c8abSBernd Schubert 	req->ring_entry = NULL;
881c090c8abSBernd Schubert 
882c090c8abSBernd Schubert 	err = fuse_ring_ent_set_commit(ent);
883c090c8abSBernd Schubert 	if (err != 0) {
884c090c8abSBernd Schubert 		pr_info_ratelimited("qid=%d commit_id %llu state %d",
885c090c8abSBernd Schubert 				    queue->qid, commit_id, ent->state);
886c090c8abSBernd Schubert 		spin_unlock(&queue->lock);
887c090c8abSBernd Schubert 		req->out.h.error = err;
888c090c8abSBernd Schubert 		clear_bit(FR_SENT, &req->flags);
889c090c8abSBernd Schubert 		fuse_request_end(req);
890c090c8abSBernd Schubert 		return err;
891c090c8abSBernd Schubert 	}
892c090c8abSBernd Schubert 
893c090c8abSBernd Schubert 	ent->cmd = cmd;
894c090c8abSBernd Schubert 	spin_unlock(&queue->lock);
895c090c8abSBernd Schubert 
896c090c8abSBernd Schubert 	/* without the queue lock, as other locks are taken */
897b6236c84SBernd Schubert 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
898c090c8abSBernd Schubert 	fuse_uring_commit(ent, req, issue_flags);
899c090c8abSBernd Schubert 
900c090c8abSBernd Schubert 	/*
901c090c8abSBernd Schubert 	 * Fetching the next request is absolutely required as queued
902c090c8abSBernd Schubert 	 * fuse requests would otherwise not get processed - committing
903c090c8abSBernd Schubert 	 * and fetching is done in one step vs legacy fuse, which has separated
904c090c8abSBernd Schubert 	 * read (fetch request) and write (commit result).
905c090c8abSBernd Schubert 	 */
906c090c8abSBernd Schubert 	fuse_uring_next_fuse_req(ent, queue, issue_flags);
907c090c8abSBernd Schubert 	return 0;
908c090c8abSBernd Schubert }
909c090c8abSBernd Schubert 
is_ring_ready(struct fuse_ring * ring,int current_qid)910c2c9af9aSBernd Schubert static bool is_ring_ready(struct fuse_ring *ring, int current_qid)
911c2c9af9aSBernd Schubert {
912c2c9af9aSBernd Schubert 	int qid;
913c2c9af9aSBernd Schubert 	struct fuse_ring_queue *queue;
914c2c9af9aSBernd Schubert 	bool ready = true;
915c2c9af9aSBernd Schubert 
916c2c9af9aSBernd Schubert 	for (qid = 0; qid < ring->nr_queues && ready; qid++) {
917c2c9af9aSBernd Schubert 		if (current_qid == qid)
918c2c9af9aSBernd Schubert 			continue;
919c2c9af9aSBernd Schubert 
920c2c9af9aSBernd Schubert 		queue = ring->queues[qid];
921c2c9af9aSBernd Schubert 		if (!queue) {
922c2c9af9aSBernd Schubert 			ready = false;
923c2c9af9aSBernd Schubert 			break;
924c2c9af9aSBernd Schubert 		}
925c2c9af9aSBernd Schubert 
926c2c9af9aSBernd Schubert 		spin_lock(&queue->lock);
927c2c9af9aSBernd Schubert 		if (list_empty(&queue->ent_avail_queue))
928c2c9af9aSBernd Schubert 			ready = false;
929c2c9af9aSBernd Schubert 		spin_unlock(&queue->lock);
930c2c9af9aSBernd Schubert 	}
931c2c9af9aSBernd Schubert 
932c2c9af9aSBernd Schubert 	return ready;
933c2c9af9aSBernd Schubert }
934c2c9af9aSBernd Schubert 
93524fe962cSBernd Schubert /*
93624fe962cSBernd Schubert  * fuse_uring_req_fetch command handling
93724fe962cSBernd Schubert  */
fuse_uring_do_register(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,unsigned int issue_flags)93824fe962cSBernd Schubert static void fuse_uring_do_register(struct fuse_ring_ent *ent,
93924fe962cSBernd Schubert 				   struct io_uring_cmd *cmd,
94024fe962cSBernd Schubert 				   unsigned int issue_flags)
94124fe962cSBernd Schubert {
94224fe962cSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
943c2c9af9aSBernd Schubert 	struct fuse_ring *ring = queue->ring;
944c2c9af9aSBernd Schubert 	struct fuse_conn *fc = ring->fc;
945c2c9af9aSBernd Schubert 	struct fuse_iqueue *fiq = &fc->iq;
94624fe962cSBernd Schubert 
947b6236c84SBernd Schubert 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
948b6236c84SBernd Schubert 
94924fe962cSBernd Schubert 	spin_lock(&queue->lock);
95024fe962cSBernd Schubert 	ent->cmd = cmd;
95124fe962cSBernd Schubert 	fuse_uring_ent_avail(ent, queue);
95224fe962cSBernd Schubert 	spin_unlock(&queue->lock);
953c2c9af9aSBernd Schubert 
954c2c9af9aSBernd Schubert 	if (!ring->ready) {
955c2c9af9aSBernd Schubert 		bool ready = is_ring_ready(ring, queue->qid);
956c2c9af9aSBernd Schubert 
957c2c9af9aSBernd Schubert 		if (ready) {
958c2c9af9aSBernd Schubert 			WRITE_ONCE(fiq->ops, &fuse_io_uring_ops);
959c2c9af9aSBernd Schubert 			WRITE_ONCE(ring->ready, true);
9603393ff96SBernd Schubert 			wake_up_all(&fc->blocked_waitq);
961c2c9af9aSBernd Schubert 		}
962c2c9af9aSBernd Schubert 	}
96324fe962cSBernd Schubert }
96424fe962cSBernd Schubert 
96524fe962cSBernd Schubert /*
96624fe962cSBernd Schubert  * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
96724fe962cSBernd Schubert  * the payload
96824fe962cSBernd Schubert  */
fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe * sqe,struct iovec iov[FUSE_URING_IOV_SEGS])96924fe962cSBernd Schubert static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
97024fe962cSBernd Schubert 					 struct iovec iov[FUSE_URING_IOV_SEGS])
97124fe962cSBernd Schubert {
97224fe962cSBernd Schubert 	struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr));
97324fe962cSBernd Schubert 	struct iov_iter iter;
97424fe962cSBernd Schubert 	ssize_t ret;
97524fe962cSBernd Schubert 
97624fe962cSBernd Schubert 	if (sqe->len != FUSE_URING_IOV_SEGS)
97724fe962cSBernd Schubert 		return -EINVAL;
97824fe962cSBernd Schubert 
97924fe962cSBernd Schubert 	/*
98024fe962cSBernd Schubert 	 * Direction for buffer access will actually be READ and WRITE,
98124fe962cSBernd Schubert 	 * using write for the import should include READ access as well.
98224fe962cSBernd Schubert 	 */
98324fe962cSBernd Schubert 	ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS,
98424fe962cSBernd Schubert 			   FUSE_URING_IOV_SEGS, &iov, &iter);
98524fe962cSBernd Schubert 	if (ret < 0)
98624fe962cSBernd Schubert 		return ret;
98724fe962cSBernd Schubert 
98824fe962cSBernd Schubert 	return 0;
98924fe962cSBernd Schubert }
99024fe962cSBernd Schubert 
99124fe962cSBernd Schubert static struct fuse_ring_ent *
fuse_uring_create_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_queue * queue)99224fe962cSBernd Schubert fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
99324fe962cSBernd Schubert 			   struct fuse_ring_queue *queue)
99424fe962cSBernd Schubert {
99524fe962cSBernd Schubert 	struct fuse_ring *ring = queue->ring;
99624fe962cSBernd Schubert 	struct fuse_ring_ent *ent;
99724fe962cSBernd Schubert 	size_t payload_size;
99824fe962cSBernd Schubert 	struct iovec iov[FUSE_URING_IOV_SEGS];
99924fe962cSBernd Schubert 	int err;
100024fe962cSBernd Schubert 
100124fe962cSBernd Schubert 	err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
100224fe962cSBernd Schubert 	if (err) {
100324fe962cSBernd Schubert 		pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
100424fe962cSBernd Schubert 				    err);
100524fe962cSBernd Schubert 		return ERR_PTR(err);
100624fe962cSBernd Schubert 	}
100724fe962cSBernd Schubert 
100824fe962cSBernd Schubert 	err = -EINVAL;
100924fe962cSBernd Schubert 	if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
101024fe962cSBernd Schubert 		pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
101124fe962cSBernd Schubert 		return ERR_PTR(err);
101224fe962cSBernd Schubert 	}
101324fe962cSBernd Schubert 
101424fe962cSBernd Schubert 	payload_size = iov[1].iov_len;
101524fe962cSBernd Schubert 	if (payload_size < ring->max_payload_sz) {
101624fe962cSBernd Schubert 		pr_info_ratelimited("Invalid req payload len %zu\n",
101724fe962cSBernd Schubert 				    payload_size);
101824fe962cSBernd Schubert 		return ERR_PTR(err);
101924fe962cSBernd Schubert 	}
102024fe962cSBernd Schubert 
102124fe962cSBernd Schubert 	err = -ENOMEM;
102224fe962cSBernd Schubert 	ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
102324fe962cSBernd Schubert 	if (!ent)
102424fe962cSBernd Schubert 		return ERR_PTR(err);
102524fe962cSBernd Schubert 
102624fe962cSBernd Schubert 	INIT_LIST_HEAD(&ent->list);
102724fe962cSBernd Schubert 
102824fe962cSBernd Schubert 	ent->queue = queue;
102924fe962cSBernd Schubert 	ent->headers = iov[0].iov_base;
103024fe962cSBernd Schubert 	ent->payload = iov[1].iov_base;
103124fe962cSBernd Schubert 
10324a9bfb9bSBernd Schubert 	atomic_inc(&ring->queue_refs);
103324fe962cSBernd Schubert 	return ent;
103424fe962cSBernd Schubert }
103524fe962cSBernd Schubert 
103624fe962cSBernd Schubert /*
103724fe962cSBernd Schubert  * Register header and payload buffer with the kernel and puts the
103824fe962cSBernd Schubert  * entry as "ready to get fuse requests" on the queue
103924fe962cSBernd Schubert  */
fuse_uring_register(struct io_uring_cmd * cmd,unsigned int issue_flags,struct fuse_conn * fc)104024fe962cSBernd Schubert static int fuse_uring_register(struct io_uring_cmd *cmd,
104124fe962cSBernd Schubert 			       unsigned int issue_flags, struct fuse_conn *fc)
104224fe962cSBernd Schubert {
104324fe962cSBernd Schubert 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
1044*d9ecc771SJoanne Koong 	struct fuse_ring *ring = smp_load_acquire(&fc->ring);
104524fe962cSBernd Schubert 	struct fuse_ring_queue *queue;
104624fe962cSBernd Schubert 	struct fuse_ring_ent *ent;
104724fe962cSBernd Schubert 	int err;
104824fe962cSBernd Schubert 	unsigned int qid = READ_ONCE(cmd_req->qid);
104924fe962cSBernd Schubert 
105024fe962cSBernd Schubert 	err = -ENOMEM;
105124fe962cSBernd Schubert 	if (!ring) {
105224fe962cSBernd Schubert 		ring = fuse_uring_create(fc);
105324fe962cSBernd Schubert 		if (!ring)
105424fe962cSBernd Schubert 			return err;
105524fe962cSBernd Schubert 	}
105624fe962cSBernd Schubert 
105724fe962cSBernd Schubert 	if (qid >= ring->nr_queues) {
105824fe962cSBernd Schubert 		pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid);
105924fe962cSBernd Schubert 		return -EINVAL;
106024fe962cSBernd Schubert 	}
106124fe962cSBernd Schubert 
106224fe962cSBernd Schubert 	queue = ring->queues[qid];
106324fe962cSBernd Schubert 	if (!queue) {
106424fe962cSBernd Schubert 		queue = fuse_uring_create_queue(ring, qid);
106524fe962cSBernd Schubert 		if (!queue)
106624fe962cSBernd Schubert 			return err;
106724fe962cSBernd Schubert 	}
106824fe962cSBernd Schubert 
106924fe962cSBernd Schubert 	/*
107024fe962cSBernd Schubert 	 * The created queue above does not need to be destructed in
107124fe962cSBernd Schubert 	 * case of entry errors below, will be done at ring destruction time.
107224fe962cSBernd Schubert 	 */
107324fe962cSBernd Schubert 
107424fe962cSBernd Schubert 	ent = fuse_uring_create_ring_ent(cmd, queue);
107524fe962cSBernd Schubert 	if (IS_ERR(ent))
107624fe962cSBernd Schubert 		return PTR_ERR(ent);
107724fe962cSBernd Schubert 
107824fe962cSBernd Schubert 	fuse_uring_do_register(ent, cmd, issue_flags);
107924fe962cSBernd Schubert 
108024fe962cSBernd Schubert 	return 0;
108124fe962cSBernd Schubert }
108224fe962cSBernd Schubert 
108324fe962cSBernd Schubert /*
108424fe962cSBernd Schubert  * Entry function from io_uring to handle the given passthrough command
108524fe962cSBernd Schubert  * (op code IORING_OP_URING_CMD)
108624fe962cSBernd Schubert  */
fuse_uring_cmd(struct io_uring_cmd * cmd,unsigned int issue_flags)1087786412a7SBernd Schubert int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
108824fe962cSBernd Schubert {
108924fe962cSBernd Schubert 	struct fuse_dev *fud;
109024fe962cSBernd Schubert 	struct fuse_conn *fc;
109124fe962cSBernd Schubert 	u32 cmd_op = cmd->cmd_op;
109224fe962cSBernd Schubert 	int err;
109324fe962cSBernd Schubert 
1094b6236c84SBernd Schubert 	if ((unlikely(issue_flags & IO_URING_F_CANCEL))) {
1095b6236c84SBernd Schubert 		fuse_uring_cancel(cmd, issue_flags);
1096b6236c84SBernd Schubert 		return 0;
1097b6236c84SBernd Schubert 	}
1098b6236c84SBernd Schubert 
109924fe962cSBernd Schubert 	/* This extra SQE size holds struct fuse_uring_cmd_req */
110024fe962cSBernd Schubert 	if (!(issue_flags & IO_URING_F_SQE128))
110124fe962cSBernd Schubert 		return -EINVAL;
110224fe962cSBernd Schubert 
110324fe962cSBernd Schubert 	fud = fuse_get_dev(cmd->file);
110424fe962cSBernd Schubert 	if (!fud) {
110524fe962cSBernd Schubert 		pr_info_ratelimited("No fuse device found\n");
110624fe962cSBernd Schubert 		return -ENOTCONN;
110724fe962cSBernd Schubert 	}
110824fe962cSBernd Schubert 	fc = fud->fc;
110924fe962cSBernd Schubert 
11102d4fde59SBernd Schubert 	/* Once a connection has io-uring enabled on it, it can't be disabled */
11112d4fde59SBernd Schubert 	if (!enable_uring && !fc->io_uring) {
11122d4fde59SBernd Schubert 		pr_info_ratelimited("fuse-io-uring is disabled\n");
11132d4fde59SBernd Schubert 		return -EOPNOTSUPP;
11142d4fde59SBernd Schubert 	}
11152d4fde59SBernd Schubert 
111624fe962cSBernd Schubert 	if (fc->aborted)
111724fe962cSBernd Schubert 		return -ECONNABORTED;
111824fe962cSBernd Schubert 	if (!fc->connected)
111924fe962cSBernd Schubert 		return -ENOTCONN;
112024fe962cSBernd Schubert 
112124fe962cSBernd Schubert 	/*
112224fe962cSBernd Schubert 	 * fuse_uring_register() needs the ring to be initialized,
112324fe962cSBernd Schubert 	 * we need to know the max payload size
112424fe962cSBernd Schubert 	 */
112524fe962cSBernd Schubert 	if (!fc->initialized)
112624fe962cSBernd Schubert 		return -EAGAIN;
112724fe962cSBernd Schubert 
112824fe962cSBernd Schubert 	switch (cmd_op) {
112924fe962cSBernd Schubert 	case FUSE_IO_URING_CMD_REGISTER:
113024fe962cSBernd Schubert 		err = fuse_uring_register(cmd, issue_flags, fc);
113124fe962cSBernd Schubert 		if (err) {
113224fe962cSBernd Schubert 			pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n",
113324fe962cSBernd Schubert 				     err);
11343393ff96SBernd Schubert 			fc->io_uring = 0;
11353393ff96SBernd Schubert 			wake_up_all(&fc->blocked_waitq);
113624fe962cSBernd Schubert 			return err;
113724fe962cSBernd Schubert 		}
113824fe962cSBernd Schubert 		break;
1139c090c8abSBernd Schubert 	case FUSE_IO_URING_CMD_COMMIT_AND_FETCH:
1140c090c8abSBernd Schubert 		err = fuse_uring_commit_fetch(cmd, issue_flags, fc);
1141c090c8abSBernd Schubert 		if (err) {
1142c090c8abSBernd Schubert 			pr_info_once("FUSE_IO_URING_COMMIT_AND_FETCH failed err=%d\n",
1143c090c8abSBernd Schubert 				     err);
1144c090c8abSBernd Schubert 			return err;
1145c090c8abSBernd Schubert 		}
1146c090c8abSBernd Schubert 		break;
114724fe962cSBernd Schubert 	default:
114824fe962cSBernd Schubert 		return -EINVAL;
114924fe962cSBernd Schubert 	}
115024fe962cSBernd Schubert 
115124fe962cSBernd Schubert 	return -EIOCBQUEUED;
115224fe962cSBernd Schubert }
1153c2c9af9aSBernd Schubert 
fuse_uring_send(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,ssize_t ret,unsigned int issue_flags)1154c2c9af9aSBernd Schubert static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
1155c2c9af9aSBernd Schubert 			    ssize_t ret, unsigned int issue_flags)
1156c2c9af9aSBernd Schubert {
1157c2c9af9aSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
1158c2c9af9aSBernd Schubert 
1159c2c9af9aSBernd Schubert 	spin_lock(&queue->lock);
1160c2c9af9aSBernd Schubert 	ent->state = FRRS_USERSPACE;
1161c2c9af9aSBernd Schubert 	list_move(&ent->list, &queue->ent_in_userspace);
1162c2c9af9aSBernd Schubert 	ent->cmd = NULL;
1163c2c9af9aSBernd Schubert 	spin_unlock(&queue->lock);
1164c2c9af9aSBernd Schubert 
1165c2c9af9aSBernd Schubert 	io_uring_cmd_done(cmd, ret, 0, issue_flags);
1166c2c9af9aSBernd Schubert }
1167c2c9af9aSBernd Schubert 
1168c2c9af9aSBernd Schubert /*
1169c2c9af9aSBernd Schubert  * This prepares and sends the ring request in fuse-uring task context.
1170c2c9af9aSBernd Schubert  * User buffers are not mapped yet - the application does not have permission
1171c2c9af9aSBernd Schubert  * to write to it - this has to be executed in ring task context.
1172c2c9af9aSBernd Schubert  */
fuse_uring_send_in_task(struct io_uring_cmd * cmd,unsigned int issue_flags)1173c2c9af9aSBernd Schubert static void fuse_uring_send_in_task(struct io_uring_cmd *cmd,
1174c2c9af9aSBernd Schubert 				    unsigned int issue_flags)
1175c2c9af9aSBernd Schubert {
1176c2c9af9aSBernd Schubert 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
1177c2c9af9aSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
1178c2c9af9aSBernd Schubert 	int err;
1179c2c9af9aSBernd Schubert 
1180c2c9af9aSBernd Schubert 	if (!(issue_flags & IO_URING_F_TASK_DEAD)) {
1181c2c9af9aSBernd Schubert 		err = fuse_uring_prepare_send(ent, ent->fuse_req);
1182c2c9af9aSBernd Schubert 		if (err) {
1183c2c9af9aSBernd Schubert 			fuse_uring_next_fuse_req(ent, queue, issue_flags);
1184c2c9af9aSBernd Schubert 			return;
1185c2c9af9aSBernd Schubert 		}
1186c2c9af9aSBernd Schubert 	} else {
1187c2c9af9aSBernd Schubert 		err = -ECANCELED;
1188c2c9af9aSBernd Schubert 	}
1189c2c9af9aSBernd Schubert 
1190c2c9af9aSBernd Schubert 	fuse_uring_send(ent, cmd, err, issue_flags);
1191c2c9af9aSBernd Schubert }
1192c2c9af9aSBernd Schubert 
fuse_uring_task_to_queue(struct fuse_ring * ring)1193c2c9af9aSBernd Schubert static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
1194c2c9af9aSBernd Schubert {
1195c2c9af9aSBernd Schubert 	unsigned int qid;
1196c2c9af9aSBernd Schubert 	struct fuse_ring_queue *queue;
1197c2c9af9aSBernd Schubert 
1198c2c9af9aSBernd Schubert 	qid = task_cpu(current);
1199c2c9af9aSBernd Schubert 
1200c2c9af9aSBernd Schubert 	if (WARN_ONCE(qid >= ring->nr_queues,
1201c2c9af9aSBernd Schubert 		      "Core number (%u) exceeds nr queues (%zu)\n", qid,
1202c2c9af9aSBernd Schubert 		      ring->nr_queues))
1203c2c9af9aSBernd Schubert 		qid = 0;
1204c2c9af9aSBernd Schubert 
1205c2c9af9aSBernd Schubert 	queue = ring->queues[qid];
1206c2c9af9aSBernd Schubert 	WARN_ONCE(!queue, "Missing queue for qid %d\n", qid);
1207c2c9af9aSBernd Schubert 
1208c2c9af9aSBernd Schubert 	return queue;
1209c2c9af9aSBernd Schubert }
1210c2c9af9aSBernd Schubert 
fuse_uring_dispatch_ent(struct fuse_ring_ent * ent)1211c2c9af9aSBernd Schubert static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent)
1212c2c9af9aSBernd Schubert {
1213c2c9af9aSBernd Schubert 	struct io_uring_cmd *cmd = ent->cmd;
1214c2c9af9aSBernd Schubert 
1215c2c9af9aSBernd Schubert 	uring_cmd_set_ring_ent(cmd, ent);
1216c2c9af9aSBernd Schubert 	io_uring_cmd_complete_in_task(cmd, fuse_uring_send_in_task);
1217c2c9af9aSBernd Schubert }
1218c2c9af9aSBernd Schubert 
1219c2c9af9aSBernd Schubert /* queue a fuse request and send it if a ring entry is available */
fuse_uring_queue_fuse_req(struct fuse_iqueue * fiq,struct fuse_req * req)1220c2c9af9aSBernd Schubert void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
1221c2c9af9aSBernd Schubert {
1222c2c9af9aSBernd Schubert 	struct fuse_conn *fc = req->fm->fc;
1223c2c9af9aSBernd Schubert 	struct fuse_ring *ring = fc->ring;
1224c2c9af9aSBernd Schubert 	struct fuse_ring_queue *queue;
1225c2c9af9aSBernd Schubert 	struct fuse_ring_ent *ent = NULL;
1226c2c9af9aSBernd Schubert 	int err;
1227c2c9af9aSBernd Schubert 
1228c2c9af9aSBernd Schubert 	err = -EINVAL;
1229c2c9af9aSBernd Schubert 	queue = fuse_uring_task_to_queue(ring);
1230c2c9af9aSBernd Schubert 	if (!queue)
1231c2c9af9aSBernd Schubert 		goto err;
1232c2c9af9aSBernd Schubert 
1233c2c9af9aSBernd Schubert 	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
1234c2c9af9aSBernd Schubert 		req->in.h.unique = fuse_get_unique(fiq);
1235c2c9af9aSBernd Schubert 
1236c2c9af9aSBernd Schubert 	spin_lock(&queue->lock);
1237c2c9af9aSBernd Schubert 	err = -ENOTCONN;
1238c2c9af9aSBernd Schubert 	if (unlikely(queue->stopped))
1239c2c9af9aSBernd Schubert 		goto err_unlock;
1240c2c9af9aSBernd Schubert 
1241c2c9af9aSBernd Schubert 	ent = list_first_entry_or_null(&queue->ent_avail_queue,
1242c2c9af9aSBernd Schubert 				       struct fuse_ring_ent, list);
1243c2c9af9aSBernd Schubert 	if (ent)
1244c2c9af9aSBernd Schubert 		fuse_uring_add_req_to_ring_ent(ent, req);
1245c2c9af9aSBernd Schubert 	else
1246c2c9af9aSBernd Schubert 		list_add_tail(&req->list, &queue->fuse_req_queue);
1247c2c9af9aSBernd Schubert 	spin_unlock(&queue->lock);
1248c2c9af9aSBernd Schubert 
1249c2c9af9aSBernd Schubert 	if (ent)
1250c2c9af9aSBernd Schubert 		fuse_uring_dispatch_ent(ent);
1251c2c9af9aSBernd Schubert 
1252c2c9af9aSBernd Schubert 	return;
1253c2c9af9aSBernd Schubert 
1254c2c9af9aSBernd Schubert err_unlock:
1255c2c9af9aSBernd Schubert 	spin_unlock(&queue->lock);
1256c2c9af9aSBernd Schubert err:
1257c2c9af9aSBernd Schubert 	req->out.h.error = err;
1258c2c9af9aSBernd Schubert 	clear_bit(FR_PENDING, &req->flags);
1259c2c9af9aSBernd Schubert 	fuse_request_end(req);
1260c2c9af9aSBernd Schubert }
1261c2c9af9aSBernd Schubert 
fuse_uring_queue_bq_req(struct fuse_req * req)1262857b0263SBernd Schubert bool fuse_uring_queue_bq_req(struct fuse_req *req)
1263857b0263SBernd Schubert {
1264857b0263SBernd Schubert 	struct fuse_conn *fc = req->fm->fc;
1265857b0263SBernd Schubert 	struct fuse_ring *ring = fc->ring;
1266857b0263SBernd Schubert 	struct fuse_ring_queue *queue;
1267857b0263SBernd Schubert 	struct fuse_ring_ent *ent = NULL;
1268857b0263SBernd Schubert 
1269857b0263SBernd Schubert 	queue = fuse_uring_task_to_queue(ring);
1270857b0263SBernd Schubert 	if (!queue)
1271857b0263SBernd Schubert 		return false;
1272857b0263SBernd Schubert 
1273857b0263SBernd Schubert 	spin_lock(&queue->lock);
1274857b0263SBernd Schubert 	if (unlikely(queue->stopped)) {
1275857b0263SBernd Schubert 		spin_unlock(&queue->lock);
1276857b0263SBernd Schubert 		return false;
1277857b0263SBernd Schubert 	}
1278857b0263SBernd Schubert 
1279857b0263SBernd Schubert 	list_add_tail(&req->list, &queue->fuse_req_bg_queue);
1280857b0263SBernd Schubert 
1281857b0263SBernd Schubert 	ent = list_first_entry_or_null(&queue->ent_avail_queue,
1282857b0263SBernd Schubert 				       struct fuse_ring_ent, list);
1283857b0263SBernd Schubert 	spin_lock(&fc->bg_lock);
1284857b0263SBernd Schubert 	fc->num_background++;
1285857b0263SBernd Schubert 	if (fc->num_background == fc->max_background)
1286857b0263SBernd Schubert 		fc->blocked = 1;
1287857b0263SBernd Schubert 	fuse_uring_flush_bg(queue);
1288857b0263SBernd Schubert 	spin_unlock(&fc->bg_lock);
1289857b0263SBernd Schubert 
1290857b0263SBernd Schubert 	/*
1291857b0263SBernd Schubert 	 * Due to bg_queue flush limits there might be other bg requests
1292857b0263SBernd Schubert 	 * in the queue that need to be handled first. Or no further req
1293857b0263SBernd Schubert 	 * might be available.
1294857b0263SBernd Schubert 	 */
1295857b0263SBernd Schubert 	req = list_first_entry_or_null(&queue->fuse_req_queue, struct fuse_req,
1296857b0263SBernd Schubert 				       list);
1297857b0263SBernd Schubert 	if (ent && req) {
1298857b0263SBernd Schubert 		fuse_uring_add_req_to_ring_ent(ent, req);
1299857b0263SBernd Schubert 		spin_unlock(&queue->lock);
1300857b0263SBernd Schubert 
1301857b0263SBernd Schubert 		fuse_uring_dispatch_ent(ent);
1302857b0263SBernd Schubert 	} else {
1303857b0263SBernd Schubert 		spin_unlock(&queue->lock);
1304857b0263SBernd Schubert 	}
1305857b0263SBernd Schubert 
1306857b0263SBernd Schubert 	return true;
1307857b0263SBernd Schubert }
1308857b0263SBernd Schubert 
1309c2c9af9aSBernd Schubert static const struct fuse_iqueue_ops fuse_io_uring_ops = {
1310c2c9af9aSBernd Schubert 	/* should be send over io-uring as enhancement */
1311c2c9af9aSBernd Schubert 	.send_forget = fuse_dev_queue_forget,
1312c2c9af9aSBernd Schubert 
1313c2c9af9aSBernd Schubert 	/*
1314c2c9af9aSBernd Schubert 	 * could be send over io-uring, but interrupts should be rare,
1315c2c9af9aSBernd Schubert 	 * no need to make the code complex
1316c2c9af9aSBernd Schubert 	 */
1317c2c9af9aSBernd Schubert 	.send_interrupt = fuse_dev_queue_interrupt,
1318c2c9af9aSBernd Schubert 	.send_req = fuse_uring_queue_fuse_req,
1319c2c9af9aSBernd Schubert };
1320