xref: /linux/fs/fuse/dev_uring.c (revision 6238729bfce13f94b701766996a5d116d2df8bff)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * FUSE: Filesystem in Userspace
4  * Copyright (c) 2023-2024 DataDirect Networks.
5  */
6 
7 #include "fuse_i.h"
8 #include "dev_uring_i.h"
9 #include "fuse_dev_i.h"
10 #include "fuse_trace.h"
11 
12 #include <linux/fs.h>
13 #include <linux/io_uring/cmd.h>
14 
15 static bool __read_mostly enable_uring;
16 module_param(enable_uring, bool, 0644);
17 MODULE_PARM_DESC(enable_uring,
18 		 "Enable userspace communication through io-uring");
19 
20 #define FUSE_URING_IOV_SEGS 2 /* header and payload */
21 
22 
fuse_uring_enabled(void)23 bool fuse_uring_enabled(void)
24 {
25 	return enable_uring;
26 }
27 
28 struct fuse_uring_pdu {
29 	struct fuse_ring_ent *ent;
30 };
31 
32 static const struct fuse_iqueue_ops fuse_io_uring_ops;
33 
uring_cmd_set_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_ent * ring_ent)34 static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd,
35 				   struct fuse_ring_ent *ring_ent)
36 {
37 	struct fuse_uring_pdu *pdu =
38 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
39 
40 	pdu->ent = ring_ent;
41 }
42 
uring_cmd_to_ring_ent(struct io_uring_cmd * cmd)43 static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd)
44 {
45 	struct fuse_uring_pdu *pdu =
46 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
47 
48 	return pdu->ent;
49 }
50 
fuse_uring_flush_bg(struct fuse_ring_queue * queue)51 static void fuse_uring_flush_bg(struct fuse_ring_queue *queue)
52 {
53 	struct fuse_ring *ring = queue->ring;
54 	struct fuse_conn *fc = ring->fc;
55 
56 	lockdep_assert_held(&queue->lock);
57 	lockdep_assert_held(&fc->bg_lock);
58 
59 	/*
60 	 * Allow one bg request per queue, ignoring global fc limits.
61 	 * This prevents a single queue from consuming all resources and
62 	 * eliminates the need for remote queue wake-ups when global
63 	 * limits are met but this queue has no more waiting requests.
64 	 */
65 	while ((fc->active_background < fc->max_background ||
66 		!queue->active_background) &&
67 	       (!list_empty(&queue->fuse_req_bg_queue))) {
68 		struct fuse_req *req;
69 
70 		req = list_first_entry(&queue->fuse_req_bg_queue,
71 				       struct fuse_req, list);
72 		fc->active_background++;
73 		queue->active_background++;
74 
75 		list_move_tail(&req->list, &queue->fuse_req_queue);
76 	}
77 }
78 
fuse_uring_req_end(struct fuse_ring_ent * ent,struct fuse_req * req,int error)79 static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req,
80 			       int error)
81 {
82 	struct fuse_ring_queue *queue = ent->queue;
83 	struct fuse_ring *ring = queue->ring;
84 	struct fuse_conn *fc = ring->fc;
85 
86 	lockdep_assert_not_held(&queue->lock);
87 	spin_lock(&queue->lock);
88 	ent->fuse_req = NULL;
89 	if (test_bit(FR_BACKGROUND, &req->flags)) {
90 		queue->active_background--;
91 		spin_lock(&fc->bg_lock);
92 		fuse_uring_flush_bg(queue);
93 		spin_unlock(&fc->bg_lock);
94 	}
95 
96 	spin_unlock(&queue->lock);
97 
98 	if (error)
99 		req->out.h.error = error;
100 
101 	clear_bit(FR_SENT, &req->flags);
102 	fuse_request_end(req);
103 }
104 
105 /* Abort all list queued request on the given ring queue */
fuse_uring_abort_end_queue_requests(struct fuse_ring_queue * queue)106 static void fuse_uring_abort_end_queue_requests(struct fuse_ring_queue *queue)
107 {
108 	struct fuse_req *req;
109 	LIST_HEAD(req_list);
110 
111 	spin_lock(&queue->lock);
112 	list_for_each_entry(req, &queue->fuse_req_queue, list)
113 		clear_bit(FR_PENDING, &req->flags);
114 	list_splice_init(&queue->fuse_req_queue, &req_list);
115 	spin_unlock(&queue->lock);
116 
117 	/* must not hold queue lock to avoid order issues with fi->lock */
118 	fuse_dev_end_requests(&req_list);
119 }
120 
fuse_uring_abort_end_requests(struct fuse_ring * ring)121 void fuse_uring_abort_end_requests(struct fuse_ring *ring)
122 {
123 	int qid;
124 	struct fuse_ring_queue *queue;
125 	struct fuse_conn *fc = ring->fc;
126 
127 	for (qid = 0; qid < ring->nr_queues; qid++) {
128 		queue = READ_ONCE(ring->queues[qid]);
129 		if (!queue)
130 			continue;
131 
132 		queue->stopped = true;
133 
134 		WARN_ON_ONCE(ring->fc->max_background != UINT_MAX);
135 		spin_lock(&queue->lock);
136 		spin_lock(&fc->bg_lock);
137 		fuse_uring_flush_bg(queue);
138 		spin_unlock(&fc->bg_lock);
139 		spin_unlock(&queue->lock);
140 		fuse_uring_abort_end_queue_requests(queue);
141 	}
142 }
143 
ent_list_request_expired(struct fuse_conn * fc,struct list_head * list)144 static bool ent_list_request_expired(struct fuse_conn *fc, struct list_head *list)
145 {
146 	struct fuse_ring_ent *ent;
147 	struct fuse_req *req;
148 
149 	ent = list_first_entry_or_null(list, struct fuse_ring_ent, list);
150 	if (!ent)
151 		return false;
152 
153 	req = ent->fuse_req;
154 
155 	return time_is_before_jiffies(req->create_time +
156 				      fc->timeout.req_timeout);
157 }
158 
fuse_uring_request_expired(struct fuse_conn * fc)159 bool fuse_uring_request_expired(struct fuse_conn *fc)
160 {
161 	struct fuse_ring *ring = fc->ring;
162 	struct fuse_ring_queue *queue;
163 	int qid;
164 
165 	if (!ring)
166 		return false;
167 
168 	for (qid = 0; qid < ring->nr_queues; qid++) {
169 		queue = READ_ONCE(ring->queues[qid]);
170 		if (!queue)
171 			continue;
172 
173 		spin_lock(&queue->lock);
174 		if (fuse_request_expired(fc, &queue->fuse_req_queue) ||
175 		    fuse_request_expired(fc, &queue->fuse_req_bg_queue) ||
176 		    ent_list_request_expired(fc, &queue->ent_w_req_queue) ||
177 		    ent_list_request_expired(fc, &queue->ent_in_userspace)) {
178 			spin_unlock(&queue->lock);
179 			return true;
180 		}
181 		spin_unlock(&queue->lock);
182 	}
183 
184 	return false;
185 }
186 
fuse_uring_destruct(struct fuse_conn * fc)187 void fuse_uring_destruct(struct fuse_conn *fc)
188 {
189 	struct fuse_ring *ring = fc->ring;
190 	int qid;
191 
192 	if (!ring)
193 		return;
194 
195 	for (qid = 0; qid < ring->nr_queues; qid++) {
196 		struct fuse_ring_queue *queue = ring->queues[qid];
197 		struct fuse_ring_ent *ent, *next;
198 
199 		if (!queue)
200 			continue;
201 
202 		WARN_ON(!list_empty(&queue->ent_avail_queue));
203 		WARN_ON(!list_empty(&queue->ent_w_req_queue));
204 		WARN_ON(!list_empty(&queue->ent_commit_queue));
205 		WARN_ON(!list_empty(&queue->ent_in_userspace));
206 
207 		list_for_each_entry_safe(ent, next, &queue->ent_released,
208 					 list) {
209 			list_del_init(&ent->list);
210 			kfree(ent);
211 		}
212 
213 		kfree(queue->fpq.processing);
214 		kfree(queue);
215 		ring->queues[qid] = NULL;
216 	}
217 
218 	kfree(ring->queues);
219 	kfree(ring);
220 	fc->ring = NULL;
221 }
222 
223 /*
224  * Basic ring setup for this connection based on the provided configuration
225  */
fuse_uring_create(struct fuse_conn * fc)226 static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
227 {
228 	struct fuse_ring *ring;
229 	size_t nr_queues = num_possible_cpus();
230 	struct fuse_ring *res = NULL;
231 	size_t max_payload_size;
232 
233 	ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT);
234 	if (!ring)
235 		return NULL;
236 
237 	ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *),
238 			       GFP_KERNEL_ACCOUNT);
239 	if (!ring->queues)
240 		goto out_err;
241 
242 	max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write);
243 	max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE);
244 
245 	spin_lock(&fc->lock);
246 	if (fc->ring) {
247 		/* race, another thread created the ring in the meantime */
248 		spin_unlock(&fc->lock);
249 		res = fc->ring;
250 		goto out_err;
251 	}
252 
253 	init_waitqueue_head(&ring->stop_waitq);
254 
255 	ring->nr_queues = nr_queues;
256 	ring->fc = fc;
257 	ring->max_payload_sz = max_payload_size;
258 	smp_store_release(&fc->ring, ring);
259 
260 	spin_unlock(&fc->lock);
261 	return ring;
262 
263 out_err:
264 	kfree(ring->queues);
265 	kfree(ring);
266 	return res;
267 }
268 
fuse_uring_create_queue(struct fuse_ring * ring,int qid)269 static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
270 						       int qid)
271 {
272 	struct fuse_conn *fc = ring->fc;
273 	struct fuse_ring_queue *queue;
274 	struct list_head *pq;
275 
276 	queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
277 	if (!queue)
278 		return NULL;
279 	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
280 	if (!pq) {
281 		kfree(queue);
282 		return NULL;
283 	}
284 
285 	queue->qid = qid;
286 	queue->ring = ring;
287 	spin_lock_init(&queue->lock);
288 
289 	INIT_LIST_HEAD(&queue->ent_avail_queue);
290 	INIT_LIST_HEAD(&queue->ent_commit_queue);
291 	INIT_LIST_HEAD(&queue->ent_w_req_queue);
292 	INIT_LIST_HEAD(&queue->ent_in_userspace);
293 	INIT_LIST_HEAD(&queue->fuse_req_queue);
294 	INIT_LIST_HEAD(&queue->fuse_req_bg_queue);
295 	INIT_LIST_HEAD(&queue->ent_released);
296 
297 	queue->fpq.processing = pq;
298 	fuse_pqueue_init(&queue->fpq);
299 
300 	spin_lock(&fc->lock);
301 	if (ring->queues[qid]) {
302 		spin_unlock(&fc->lock);
303 		kfree(queue->fpq.processing);
304 		kfree(queue);
305 		return ring->queues[qid];
306 	}
307 
308 	/*
309 	 * write_once and lock as the caller mostly doesn't take the lock at all
310 	 */
311 	WRITE_ONCE(ring->queues[qid], queue);
312 	spin_unlock(&fc->lock);
313 
314 	return queue;
315 }
316 
fuse_uring_stop_fuse_req_end(struct fuse_req * req)317 static void fuse_uring_stop_fuse_req_end(struct fuse_req *req)
318 {
319 	clear_bit(FR_SENT, &req->flags);
320 	req->out.h.error = -ECONNABORTED;
321 	fuse_request_end(req);
322 }
323 
324 /*
325  * Release a request/entry on connection tear down
326  */
fuse_uring_entry_teardown(struct fuse_ring_ent * ent)327 static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent)
328 {
329 	struct fuse_req *req;
330 	struct io_uring_cmd *cmd;
331 
332 	struct fuse_ring_queue *queue = ent->queue;
333 
334 	spin_lock(&queue->lock);
335 	cmd = ent->cmd;
336 	ent->cmd = NULL;
337 	req = ent->fuse_req;
338 	ent->fuse_req = NULL;
339 	if (req) {
340 		/* remove entry from queue->fpq->processing */
341 		list_del_init(&req->list);
342 	}
343 
344 	/*
345 	 * The entry must not be freed immediately, due to access of direct
346 	 * pointer access of entries through IO_URING_F_CANCEL - there is a risk
347 	 * of race between daemon termination (which triggers IO_URING_F_CANCEL
348 	 * and accesses entries without checking the list state first
349 	 */
350 	list_move(&ent->list, &queue->ent_released);
351 	ent->state = FRRS_RELEASED;
352 	spin_unlock(&queue->lock);
353 
354 	if (cmd)
355 		io_uring_cmd_done(cmd, -ENOTCONN, IO_URING_F_UNLOCKED);
356 
357 	if (req)
358 		fuse_uring_stop_fuse_req_end(req);
359 }
360 
fuse_uring_stop_list_entries(struct list_head * head,struct fuse_ring_queue * queue,enum fuse_ring_req_state exp_state)361 static void fuse_uring_stop_list_entries(struct list_head *head,
362 					 struct fuse_ring_queue *queue,
363 					 enum fuse_ring_req_state exp_state)
364 {
365 	struct fuse_ring *ring = queue->ring;
366 	struct fuse_ring_ent *ent, *next;
367 	ssize_t queue_refs = SSIZE_MAX;
368 	LIST_HEAD(to_teardown);
369 
370 	spin_lock(&queue->lock);
371 	list_for_each_entry_safe(ent, next, head, list) {
372 		if (ent->state != exp_state) {
373 			pr_warn("entry teardown qid=%d state=%d expected=%d",
374 				queue->qid, ent->state, exp_state);
375 			continue;
376 		}
377 
378 		ent->state = FRRS_TEARDOWN;
379 		list_move(&ent->list, &to_teardown);
380 	}
381 	spin_unlock(&queue->lock);
382 
383 	/* no queue lock to avoid lock order issues */
384 	list_for_each_entry_safe(ent, next, &to_teardown, list) {
385 		fuse_uring_entry_teardown(ent);
386 		queue_refs = atomic_dec_return(&ring->queue_refs);
387 		WARN_ON_ONCE(queue_refs < 0);
388 	}
389 }
390 
fuse_uring_teardown_entries(struct fuse_ring_queue * queue)391 static void fuse_uring_teardown_entries(struct fuse_ring_queue *queue)
392 {
393 	fuse_uring_stop_list_entries(&queue->ent_in_userspace, queue,
394 				     FRRS_USERSPACE);
395 	fuse_uring_stop_list_entries(&queue->ent_avail_queue, queue,
396 				     FRRS_AVAILABLE);
397 }
398 
399 /*
400  * Log state debug info
401  */
fuse_uring_log_ent_state(struct fuse_ring * ring)402 static void fuse_uring_log_ent_state(struct fuse_ring *ring)
403 {
404 	int qid;
405 	struct fuse_ring_ent *ent;
406 
407 	for (qid = 0; qid < ring->nr_queues; qid++) {
408 		struct fuse_ring_queue *queue = ring->queues[qid];
409 
410 		if (!queue)
411 			continue;
412 
413 		spin_lock(&queue->lock);
414 		/*
415 		 * Log entries from the intermediate queue, the other queues
416 		 * should be empty
417 		 */
418 		list_for_each_entry(ent, &queue->ent_w_req_queue, list) {
419 			pr_info(" ent-req-queue ring=%p qid=%d ent=%p state=%d\n",
420 				ring, qid, ent, ent->state);
421 		}
422 		list_for_each_entry(ent, &queue->ent_commit_queue, list) {
423 			pr_info(" ent-commit-queue ring=%p qid=%d ent=%p state=%d\n",
424 				ring, qid, ent, ent->state);
425 		}
426 		spin_unlock(&queue->lock);
427 	}
428 	ring->stop_debug_log = 1;
429 }
430 
fuse_uring_async_stop_queues(struct work_struct * work)431 static void fuse_uring_async_stop_queues(struct work_struct *work)
432 {
433 	int qid;
434 	struct fuse_ring *ring =
435 		container_of(work, struct fuse_ring, async_teardown_work.work);
436 
437 	/* XXX code dup */
438 	for (qid = 0; qid < ring->nr_queues; qid++) {
439 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
440 
441 		if (!queue)
442 			continue;
443 
444 		fuse_uring_teardown_entries(queue);
445 	}
446 
447 	/*
448 	 * Some ring entries might be in the middle of IO operations,
449 	 * i.e. in process to get handled by file_operations::uring_cmd
450 	 * or on the way to userspace - we could handle that with conditions in
451 	 * run time code, but easier/cleaner to have an async tear down handler
452 	 * If there are still queue references left
453 	 */
454 	if (atomic_read(&ring->queue_refs) > 0) {
455 		if (time_after(jiffies,
456 			       ring->teardown_time + FUSE_URING_TEARDOWN_TIMEOUT))
457 			fuse_uring_log_ent_state(ring);
458 
459 		schedule_delayed_work(&ring->async_teardown_work,
460 				      FUSE_URING_TEARDOWN_INTERVAL);
461 	} else {
462 		wake_up_all(&ring->stop_waitq);
463 	}
464 }
465 
466 /*
467  * Stop the ring queues
468  */
fuse_uring_stop_queues(struct fuse_ring * ring)469 void fuse_uring_stop_queues(struct fuse_ring *ring)
470 {
471 	int qid;
472 
473 	for (qid = 0; qid < ring->nr_queues; qid++) {
474 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
475 
476 		if (!queue)
477 			continue;
478 
479 		fuse_uring_teardown_entries(queue);
480 	}
481 
482 	if (atomic_read(&ring->queue_refs) > 0) {
483 		ring->teardown_time = jiffies;
484 		INIT_DELAYED_WORK(&ring->async_teardown_work,
485 				  fuse_uring_async_stop_queues);
486 		schedule_delayed_work(&ring->async_teardown_work,
487 				      FUSE_URING_TEARDOWN_INTERVAL);
488 	} else {
489 		wake_up_all(&ring->stop_waitq);
490 	}
491 }
492 
493 /*
494  * Handle IO_URING_F_CANCEL, typically should come on daemon termination.
495  *
496  * Releasing the last entry should trigger fuse_dev_release() if
497  * the daemon was terminated
498  */
fuse_uring_cancel(struct io_uring_cmd * cmd,unsigned int issue_flags)499 static void fuse_uring_cancel(struct io_uring_cmd *cmd,
500 			      unsigned int issue_flags)
501 {
502 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
503 	struct fuse_ring_queue *queue;
504 	bool need_cmd_done = false;
505 
506 	/*
507 	 * direct access on ent - it must not be destructed as long as
508 	 * IO_URING_F_CANCEL might come up
509 	 */
510 	queue = ent->queue;
511 	spin_lock(&queue->lock);
512 	if (ent->state == FRRS_AVAILABLE) {
513 		ent->state = FRRS_USERSPACE;
514 		list_move_tail(&ent->list, &queue->ent_in_userspace);
515 		need_cmd_done = true;
516 		ent->cmd = NULL;
517 	}
518 	spin_unlock(&queue->lock);
519 
520 	if (need_cmd_done) {
521 		/* no queue lock to avoid lock order issues */
522 		io_uring_cmd_done(cmd, -ENOTCONN, issue_flags);
523 	}
524 }
525 
fuse_uring_prepare_cancel(struct io_uring_cmd * cmd,int issue_flags,struct fuse_ring_ent * ring_ent)526 static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags,
527 				      struct fuse_ring_ent *ring_ent)
528 {
529 	uring_cmd_set_ring_ent(cmd, ring_ent);
530 	io_uring_cmd_mark_cancelable(cmd, issue_flags);
531 }
532 
533 /*
534  * Checks for errors and stores it into the request
535  */
fuse_uring_out_header_has_err(struct fuse_out_header * oh,struct fuse_req * req,struct fuse_conn * fc)536 static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
537 					 struct fuse_req *req,
538 					 struct fuse_conn *fc)
539 {
540 	int err;
541 
542 	err = -EINVAL;
543 	if (oh->unique == 0) {
544 		/* Not supported through io-uring yet */
545 		pr_warn_once("notify through fuse-io-uring not supported\n");
546 		goto err;
547 	}
548 
549 	if (oh->error <= -ERESTARTSYS || oh->error > 0)
550 		goto err;
551 
552 	if (oh->error) {
553 		err = oh->error;
554 		goto err;
555 	}
556 
557 	err = -ENOENT;
558 	if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) {
559 		pr_warn_ratelimited("unique mismatch, expected: %llu got %llu\n",
560 				    req->in.h.unique,
561 				    oh->unique & ~FUSE_INT_REQ_BIT);
562 		goto err;
563 	}
564 
565 	/*
566 	 * Is it an interrupt reply ID?
567 	 * XXX: Not supported through fuse-io-uring yet, it should not even
568 	 *      find the request - should not happen.
569 	 */
570 	WARN_ON_ONCE(oh->unique & FUSE_INT_REQ_BIT);
571 
572 	err = 0;
573 err:
574 	return err;
575 }
576 
fuse_uring_copy_from_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)577 static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
578 				     struct fuse_req *req,
579 				     struct fuse_ring_ent *ent)
580 {
581 	struct fuse_copy_state cs;
582 	struct fuse_args *args = req->args;
583 	struct iov_iter iter;
584 	int err;
585 	struct fuse_uring_ent_in_out ring_in_out;
586 
587 	err = copy_from_user(&ring_in_out, &ent->headers->ring_ent_in_out,
588 			     sizeof(ring_in_out));
589 	if (err)
590 		return -EFAULT;
591 
592 	err = import_ubuf(ITER_SOURCE, ent->payload, ring->max_payload_sz,
593 			  &iter);
594 	if (err)
595 		return err;
596 
597 	fuse_copy_init(&cs, false, &iter);
598 	cs.is_uring = true;
599 	cs.req = req;
600 
601 	return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
602 }
603 
604  /*
605   * Copy data from the req to the ring buffer
606   */
fuse_uring_args_to_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)607 static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
608 				   struct fuse_ring_ent *ent)
609 {
610 	struct fuse_copy_state cs;
611 	struct fuse_args *args = req->args;
612 	struct fuse_in_arg *in_args = args->in_args;
613 	int num_args = args->in_numargs;
614 	int err;
615 	struct iov_iter iter;
616 	struct fuse_uring_ent_in_out ent_in_out = {
617 		.flags = 0,
618 		.commit_id = req->in.h.unique,
619 	};
620 
621 	err = import_ubuf(ITER_DEST, ent->payload, ring->max_payload_sz, &iter);
622 	if (err) {
623 		pr_info_ratelimited("fuse: Import of user buffer failed\n");
624 		return err;
625 	}
626 
627 	fuse_copy_init(&cs, true, &iter);
628 	cs.is_uring = true;
629 	cs.req = req;
630 
631 	if (num_args > 0) {
632 		/*
633 		 * Expectation is that the first argument is the per op header.
634 		 * Some op code have that as zero size.
635 		 */
636 		if (args->in_args[0].size > 0) {
637 			err = copy_to_user(&ent->headers->op_in, in_args->value,
638 					   in_args->size);
639 			if (err) {
640 				pr_info_ratelimited(
641 					"Copying the header failed.\n");
642 				return -EFAULT;
643 			}
644 		}
645 		in_args++;
646 		num_args--;
647 	}
648 
649 	/* copy the payload */
650 	err = fuse_copy_args(&cs, num_args, args->in_pages,
651 			     (struct fuse_arg *)in_args, 0);
652 	if (err) {
653 		pr_info_ratelimited("%s fuse_copy_args failed\n", __func__);
654 		return err;
655 	}
656 
657 	ent_in_out.payload_sz = cs.ring.copied_sz;
658 	err = copy_to_user(&ent->headers->ring_ent_in_out, &ent_in_out,
659 			   sizeof(ent_in_out));
660 	return err ? -EFAULT : 0;
661 }
662 
fuse_uring_copy_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req)663 static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
664 				   struct fuse_req *req)
665 {
666 	struct fuse_ring_queue *queue = ent->queue;
667 	struct fuse_ring *ring = queue->ring;
668 	int err;
669 
670 	err = -EIO;
671 	if (WARN_ON(ent->state != FRRS_FUSE_REQ)) {
672 		pr_err("qid=%d ring-req=%p invalid state %d on send\n",
673 		       queue->qid, ent, ent->state);
674 		return err;
675 	}
676 
677 	err = -EINVAL;
678 	if (WARN_ON(req->in.h.unique == 0))
679 		return err;
680 
681 	/* copy the request */
682 	err = fuse_uring_args_to_ring(ring, req, ent);
683 	if (unlikely(err)) {
684 		pr_info_ratelimited("Copy to ring failed: %d\n", err);
685 		return err;
686 	}
687 
688 	/* copy fuse_in_header */
689 	err = copy_to_user(&ent->headers->in_out, &req->in.h,
690 			   sizeof(req->in.h));
691 	if (err) {
692 		err = -EFAULT;
693 		return err;
694 	}
695 
696 	return 0;
697 }
698 
fuse_uring_prepare_send(struct fuse_ring_ent * ent,struct fuse_req * req)699 static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
700 				   struct fuse_req *req)
701 {
702 	int err;
703 
704 	err = fuse_uring_copy_to_ring(ent, req);
705 	if (!err)
706 		set_bit(FR_SENT, &req->flags);
707 	else
708 		fuse_uring_req_end(ent, req, err);
709 
710 	return err;
711 }
712 
713 /*
714  * Write data to the ring buffer and send the request to userspace,
715  * userspace will read it
716  * This is comparable with classical read(/dev/fuse)
717  */
fuse_uring_send_next_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)718 static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent,
719 					struct fuse_req *req,
720 					unsigned int issue_flags)
721 {
722 	struct fuse_ring_queue *queue = ent->queue;
723 	int err;
724 	struct io_uring_cmd *cmd;
725 
726 	err = fuse_uring_prepare_send(ent, req);
727 	if (err)
728 		return err;
729 
730 	spin_lock(&queue->lock);
731 	cmd = ent->cmd;
732 	ent->cmd = NULL;
733 	ent->state = FRRS_USERSPACE;
734 	list_move_tail(&ent->list, &queue->ent_in_userspace);
735 	spin_unlock(&queue->lock);
736 
737 	io_uring_cmd_done(cmd, 0, issue_flags);
738 	return 0;
739 }
740 
741 /*
742  * Make a ring entry available for fuse_req assignment
743  */
fuse_uring_ent_avail(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue)744 static void fuse_uring_ent_avail(struct fuse_ring_ent *ent,
745 				 struct fuse_ring_queue *queue)
746 {
747 	WARN_ON_ONCE(!ent->cmd);
748 	list_move(&ent->list, &queue->ent_avail_queue);
749 	ent->state = FRRS_AVAILABLE;
750 }
751 
752 /* Used to find the request on SQE commit */
fuse_uring_add_to_pq(struct fuse_ring_ent * ent,struct fuse_req * req)753 static void fuse_uring_add_to_pq(struct fuse_ring_ent *ent,
754 				 struct fuse_req *req)
755 {
756 	struct fuse_ring_queue *queue = ent->queue;
757 	struct fuse_pqueue *fpq = &queue->fpq;
758 	unsigned int hash;
759 
760 	req->ring_entry = ent;
761 	hash = fuse_req_hash(req->in.h.unique);
762 	list_move_tail(&req->list, &fpq->processing[hash]);
763 }
764 
765 /*
766  * Assign a fuse queue entry to the given entry
767  */
fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent * ent,struct fuse_req * req)768 static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent,
769 					   struct fuse_req *req)
770 {
771 	struct fuse_ring_queue *queue = ent->queue;
772 
773 	lockdep_assert_held(&queue->lock);
774 
775 	if (WARN_ON_ONCE(ent->state != FRRS_AVAILABLE &&
776 			 ent->state != FRRS_COMMIT)) {
777 		pr_warn("%s qid=%d state=%d\n", __func__, ent->queue->qid,
778 			ent->state);
779 	}
780 
781 	clear_bit(FR_PENDING, &req->flags);
782 	ent->fuse_req = req;
783 	ent->state = FRRS_FUSE_REQ;
784 	list_move_tail(&ent->list, &queue->ent_w_req_queue);
785 	fuse_uring_add_to_pq(ent, req);
786 }
787 
788 /* Fetch the next fuse request if available */
fuse_uring_ent_assign_req(struct fuse_ring_ent * ent)789 static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent)
790 	__must_hold(&queue->lock)
791 {
792 	struct fuse_req *req;
793 	struct fuse_ring_queue *queue = ent->queue;
794 	struct list_head *req_queue = &queue->fuse_req_queue;
795 
796 	lockdep_assert_held(&queue->lock);
797 
798 	/* get and assign the next entry while it is still holding the lock */
799 	req = list_first_entry_or_null(req_queue, struct fuse_req, list);
800 	if (req)
801 		fuse_uring_add_req_to_ring_ent(ent, req);
802 
803 	return req;
804 }
805 
806 /*
807  * Read data from the ring buffer, which user space has written to
808  * This is comparible with handling of classical write(/dev/fuse).
809  * Also make the ring request available again for new fuse requests.
810  */
fuse_uring_commit(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)811 static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req,
812 			      unsigned int issue_flags)
813 {
814 	struct fuse_ring *ring = ent->queue->ring;
815 	struct fuse_conn *fc = ring->fc;
816 	ssize_t err = 0;
817 
818 	err = copy_from_user(&req->out.h, &ent->headers->in_out,
819 			     sizeof(req->out.h));
820 	if (err) {
821 		req->out.h.error = -EFAULT;
822 		goto out;
823 	}
824 
825 	err = fuse_uring_out_header_has_err(&req->out.h, req, fc);
826 	if (err) {
827 		/* req->out.h.error already set */
828 		goto out;
829 	}
830 
831 	err = fuse_uring_copy_from_ring(ring, req, ent);
832 out:
833 	fuse_uring_req_end(ent, req, err);
834 }
835 
836 /*
837  * Get the next fuse req and send it
838  */
fuse_uring_next_fuse_req(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue,unsigned int issue_flags)839 static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ent,
840 				     struct fuse_ring_queue *queue,
841 				     unsigned int issue_flags)
842 {
843 	int err;
844 	struct fuse_req *req;
845 
846 retry:
847 	spin_lock(&queue->lock);
848 	fuse_uring_ent_avail(ent, queue);
849 	req = fuse_uring_ent_assign_req(ent);
850 	spin_unlock(&queue->lock);
851 
852 	if (req) {
853 		err = fuse_uring_send_next_to_ring(ent, req, issue_flags);
854 		if (err)
855 			goto retry;
856 	}
857 }
858 
fuse_ring_ent_set_commit(struct fuse_ring_ent * ent)859 static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent)
860 {
861 	struct fuse_ring_queue *queue = ent->queue;
862 
863 	lockdep_assert_held(&queue->lock);
864 
865 	if (WARN_ON_ONCE(ent->state != FRRS_USERSPACE))
866 		return -EIO;
867 
868 	ent->state = FRRS_COMMIT;
869 	list_move(&ent->list, &queue->ent_commit_queue);
870 
871 	return 0;
872 }
873 
874 /* FUSE_URING_CMD_COMMIT_AND_FETCH handler */
fuse_uring_commit_fetch(struct io_uring_cmd * cmd,int issue_flags,struct fuse_conn * fc)875 static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
876 				   struct fuse_conn *fc)
877 {
878 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
879 	struct fuse_ring_ent *ent;
880 	int err;
881 	struct fuse_ring *ring = fc->ring;
882 	struct fuse_ring_queue *queue;
883 	uint64_t commit_id = READ_ONCE(cmd_req->commit_id);
884 	unsigned int qid = READ_ONCE(cmd_req->qid);
885 	struct fuse_pqueue *fpq;
886 	struct fuse_req *req;
887 
888 	err = -ENOTCONN;
889 	if (!ring)
890 		return err;
891 
892 	if (qid >= ring->nr_queues)
893 		return -EINVAL;
894 
895 	queue = ring->queues[qid];
896 	if (!queue)
897 		return err;
898 	fpq = &queue->fpq;
899 
900 	if (!READ_ONCE(fc->connected) || READ_ONCE(queue->stopped))
901 		return err;
902 
903 	spin_lock(&queue->lock);
904 	/* Find a request based on the unique ID of the fuse request
905 	 * This should get revised, as it needs a hash calculation and list
906 	 * search. And full struct fuse_pqueue is needed (memory overhead).
907 	 * As well as the link from req to ring_ent.
908 	 */
909 	req = fuse_request_find(fpq, commit_id);
910 	err = -ENOENT;
911 	if (!req) {
912 		pr_info("qid=%d commit_id %llu not found\n", queue->qid,
913 			commit_id);
914 		spin_unlock(&queue->lock);
915 		return err;
916 	}
917 	list_del_init(&req->list);
918 	ent = req->ring_entry;
919 	req->ring_entry = NULL;
920 
921 	err = fuse_ring_ent_set_commit(ent);
922 	if (err != 0) {
923 		pr_info_ratelimited("qid=%d commit_id %llu state %d",
924 				    queue->qid, commit_id, ent->state);
925 		spin_unlock(&queue->lock);
926 		req->out.h.error = err;
927 		clear_bit(FR_SENT, &req->flags);
928 		fuse_request_end(req);
929 		return err;
930 	}
931 
932 	ent->cmd = cmd;
933 	spin_unlock(&queue->lock);
934 
935 	/* without the queue lock, as other locks are taken */
936 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
937 	fuse_uring_commit(ent, req, issue_flags);
938 
939 	/*
940 	 * Fetching the next request is absolutely required as queued
941 	 * fuse requests would otherwise not get processed - committing
942 	 * and fetching is done in one step vs legacy fuse, which has separated
943 	 * read (fetch request) and write (commit result).
944 	 */
945 	fuse_uring_next_fuse_req(ent, queue, issue_flags);
946 	return 0;
947 }
948 
is_ring_ready(struct fuse_ring * ring,int current_qid)949 static bool is_ring_ready(struct fuse_ring *ring, int current_qid)
950 {
951 	int qid;
952 	struct fuse_ring_queue *queue;
953 	bool ready = true;
954 
955 	for (qid = 0; qid < ring->nr_queues && ready; qid++) {
956 		if (current_qid == qid)
957 			continue;
958 
959 		queue = ring->queues[qid];
960 		if (!queue) {
961 			ready = false;
962 			break;
963 		}
964 
965 		spin_lock(&queue->lock);
966 		if (list_empty(&queue->ent_avail_queue))
967 			ready = false;
968 		spin_unlock(&queue->lock);
969 	}
970 
971 	return ready;
972 }
973 
974 /*
975  * fuse_uring_req_fetch command handling
976  */
fuse_uring_do_register(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,unsigned int issue_flags)977 static void fuse_uring_do_register(struct fuse_ring_ent *ent,
978 				   struct io_uring_cmd *cmd,
979 				   unsigned int issue_flags)
980 {
981 	struct fuse_ring_queue *queue = ent->queue;
982 	struct fuse_ring *ring = queue->ring;
983 	struct fuse_conn *fc = ring->fc;
984 	struct fuse_iqueue *fiq = &fc->iq;
985 
986 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
987 
988 	spin_lock(&queue->lock);
989 	ent->cmd = cmd;
990 	fuse_uring_ent_avail(ent, queue);
991 	spin_unlock(&queue->lock);
992 
993 	if (!ring->ready) {
994 		bool ready = is_ring_ready(ring, queue->qid);
995 
996 		if (ready) {
997 			WRITE_ONCE(fiq->ops, &fuse_io_uring_ops);
998 			WRITE_ONCE(ring->ready, true);
999 			wake_up_all(&fc->blocked_waitq);
1000 		}
1001 	}
1002 }
1003 
1004 /*
1005  * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
1006  * the payload
1007  */
fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe * sqe,struct iovec iov[FUSE_URING_IOV_SEGS])1008 static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
1009 					 struct iovec iov[FUSE_URING_IOV_SEGS])
1010 {
1011 	struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr));
1012 	struct iov_iter iter;
1013 	ssize_t ret;
1014 
1015 	if (sqe->len != FUSE_URING_IOV_SEGS)
1016 		return -EINVAL;
1017 
1018 	/*
1019 	 * Direction for buffer access will actually be READ and WRITE,
1020 	 * using write for the import should include READ access as well.
1021 	 */
1022 	ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS,
1023 			   FUSE_URING_IOV_SEGS, &iov, &iter);
1024 	if (ret < 0)
1025 		return ret;
1026 
1027 	return 0;
1028 }
1029 
1030 static struct fuse_ring_ent *
fuse_uring_create_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_queue * queue)1031 fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
1032 			   struct fuse_ring_queue *queue)
1033 {
1034 	struct fuse_ring *ring = queue->ring;
1035 	struct fuse_ring_ent *ent;
1036 	size_t payload_size;
1037 	struct iovec iov[FUSE_URING_IOV_SEGS];
1038 	int err;
1039 
1040 	err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
1041 	if (err) {
1042 		pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
1043 				    err);
1044 		return ERR_PTR(err);
1045 	}
1046 
1047 	err = -EINVAL;
1048 	if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
1049 		pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
1050 		return ERR_PTR(err);
1051 	}
1052 
1053 	payload_size = iov[1].iov_len;
1054 	if (payload_size < ring->max_payload_sz) {
1055 		pr_info_ratelimited("Invalid req payload len %zu\n",
1056 				    payload_size);
1057 		return ERR_PTR(err);
1058 	}
1059 
1060 	err = -ENOMEM;
1061 	ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
1062 	if (!ent)
1063 		return ERR_PTR(err);
1064 
1065 	INIT_LIST_HEAD(&ent->list);
1066 
1067 	ent->queue = queue;
1068 	ent->headers = iov[0].iov_base;
1069 	ent->payload = iov[1].iov_base;
1070 
1071 	atomic_inc(&ring->queue_refs);
1072 	return ent;
1073 }
1074 
1075 /*
1076  * Register header and payload buffer with the kernel and puts the
1077  * entry as "ready to get fuse requests" on the queue
1078  */
fuse_uring_register(struct io_uring_cmd * cmd,unsigned int issue_flags,struct fuse_conn * fc)1079 static int fuse_uring_register(struct io_uring_cmd *cmd,
1080 			       unsigned int issue_flags, struct fuse_conn *fc)
1081 {
1082 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
1083 	struct fuse_ring *ring = smp_load_acquire(&fc->ring);
1084 	struct fuse_ring_queue *queue;
1085 	struct fuse_ring_ent *ent;
1086 	int err;
1087 	unsigned int qid = READ_ONCE(cmd_req->qid);
1088 
1089 	err = -ENOMEM;
1090 	if (!ring) {
1091 		ring = fuse_uring_create(fc);
1092 		if (!ring)
1093 			return err;
1094 	}
1095 
1096 	if (qid >= ring->nr_queues) {
1097 		pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid);
1098 		return -EINVAL;
1099 	}
1100 
1101 	queue = ring->queues[qid];
1102 	if (!queue) {
1103 		queue = fuse_uring_create_queue(ring, qid);
1104 		if (!queue)
1105 			return err;
1106 	}
1107 
1108 	/*
1109 	 * The created queue above does not need to be destructed in
1110 	 * case of entry errors below, will be done at ring destruction time.
1111 	 */
1112 
1113 	ent = fuse_uring_create_ring_ent(cmd, queue);
1114 	if (IS_ERR(ent))
1115 		return PTR_ERR(ent);
1116 
1117 	fuse_uring_do_register(ent, cmd, issue_flags);
1118 
1119 	return 0;
1120 }
1121 
1122 /*
1123  * Entry function from io_uring to handle the given passthrough command
1124  * (op code IORING_OP_URING_CMD)
1125  */
fuse_uring_cmd(struct io_uring_cmd * cmd,unsigned int issue_flags)1126 int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
1127 {
1128 	struct fuse_dev *fud;
1129 	struct fuse_conn *fc;
1130 	u32 cmd_op = cmd->cmd_op;
1131 	int err;
1132 
1133 	if ((unlikely(issue_flags & IO_URING_F_CANCEL))) {
1134 		fuse_uring_cancel(cmd, issue_flags);
1135 		return 0;
1136 	}
1137 
1138 	/* This extra SQE size holds struct fuse_uring_cmd_req */
1139 	if (!(issue_flags & IO_URING_F_SQE128))
1140 		return -EINVAL;
1141 
1142 	fud = fuse_get_dev(cmd->file);
1143 	if (IS_ERR(fud)) {
1144 		pr_info_ratelimited("No fuse device found\n");
1145 		return PTR_ERR(fud);
1146 	}
1147 	fc = fud->fc;
1148 
1149 	/* Once a connection has io-uring enabled on it, it can't be disabled */
1150 	if (!enable_uring && !fc->io_uring) {
1151 		pr_info_ratelimited("fuse-io-uring is disabled\n");
1152 		return -EOPNOTSUPP;
1153 	}
1154 
1155 	if (fc->aborted)
1156 		return -ECONNABORTED;
1157 	if (!fc->connected)
1158 		return -ENOTCONN;
1159 
1160 	/*
1161 	 * fuse_uring_register() needs the ring to be initialized,
1162 	 * we need to know the max payload size
1163 	 */
1164 	if (!fc->initialized)
1165 		return -EAGAIN;
1166 
1167 	switch (cmd_op) {
1168 	case FUSE_IO_URING_CMD_REGISTER:
1169 		err = fuse_uring_register(cmd, issue_flags, fc);
1170 		if (err) {
1171 			pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n",
1172 				     err);
1173 			fc->io_uring = 0;
1174 			wake_up_all(&fc->blocked_waitq);
1175 			return err;
1176 		}
1177 		break;
1178 	case FUSE_IO_URING_CMD_COMMIT_AND_FETCH:
1179 		err = fuse_uring_commit_fetch(cmd, issue_flags, fc);
1180 		if (err) {
1181 			pr_info_once("FUSE_IO_URING_COMMIT_AND_FETCH failed err=%d\n",
1182 				     err);
1183 			return err;
1184 		}
1185 		break;
1186 	default:
1187 		return -EINVAL;
1188 	}
1189 
1190 	return -EIOCBQUEUED;
1191 }
1192 
fuse_uring_send(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,ssize_t ret,unsigned int issue_flags)1193 static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
1194 			    ssize_t ret, unsigned int issue_flags)
1195 {
1196 	struct fuse_ring_queue *queue = ent->queue;
1197 
1198 	spin_lock(&queue->lock);
1199 	ent->state = FRRS_USERSPACE;
1200 	list_move_tail(&ent->list, &queue->ent_in_userspace);
1201 	ent->cmd = NULL;
1202 	spin_unlock(&queue->lock);
1203 
1204 	io_uring_cmd_done(cmd, ret, issue_flags);
1205 }
1206 
1207 /*
1208  * This prepares and sends the ring request in fuse-uring task context.
1209  * User buffers are not mapped yet - the application does not have permission
1210  * to write to it - this has to be executed in ring task context.
1211  */
fuse_uring_send_in_task(struct io_uring_cmd * cmd,unsigned int issue_flags)1212 static void fuse_uring_send_in_task(struct io_uring_cmd *cmd,
1213 				    unsigned int issue_flags)
1214 {
1215 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
1216 	struct fuse_ring_queue *queue = ent->queue;
1217 	int err;
1218 
1219 	if (!(issue_flags & IO_URING_F_TASK_DEAD)) {
1220 		err = fuse_uring_prepare_send(ent, ent->fuse_req);
1221 		if (err) {
1222 			fuse_uring_next_fuse_req(ent, queue, issue_flags);
1223 			return;
1224 		}
1225 	} else {
1226 		err = -ECANCELED;
1227 	}
1228 
1229 	fuse_uring_send(ent, cmd, err, issue_flags);
1230 }
1231 
fuse_uring_task_to_queue(struct fuse_ring * ring)1232 static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
1233 {
1234 	unsigned int qid;
1235 	struct fuse_ring_queue *queue;
1236 
1237 	qid = task_cpu(current);
1238 
1239 	if (WARN_ONCE(qid >= ring->nr_queues,
1240 		      "Core number (%u) exceeds nr queues (%zu)\n", qid,
1241 		      ring->nr_queues))
1242 		qid = 0;
1243 
1244 	queue = ring->queues[qid];
1245 	WARN_ONCE(!queue, "Missing queue for qid %d\n", qid);
1246 
1247 	return queue;
1248 }
1249 
fuse_uring_dispatch_ent(struct fuse_ring_ent * ent)1250 static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent)
1251 {
1252 	struct io_uring_cmd *cmd = ent->cmd;
1253 
1254 	uring_cmd_set_ring_ent(cmd, ent);
1255 	io_uring_cmd_complete_in_task(cmd, fuse_uring_send_in_task);
1256 }
1257 
1258 /* queue a fuse request and send it if a ring entry is available */
fuse_uring_queue_fuse_req(struct fuse_iqueue * fiq,struct fuse_req * req)1259 void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
1260 {
1261 	struct fuse_conn *fc = req->fm->fc;
1262 	struct fuse_ring *ring = fc->ring;
1263 	struct fuse_ring_queue *queue;
1264 	struct fuse_ring_ent *ent = NULL;
1265 	int err;
1266 
1267 	err = -EINVAL;
1268 	queue = fuse_uring_task_to_queue(ring);
1269 	if (!queue)
1270 		goto err;
1271 
1272 	fuse_request_assign_unique(fiq, req);
1273 
1274 	spin_lock(&queue->lock);
1275 	err = -ENOTCONN;
1276 	if (unlikely(queue->stopped))
1277 		goto err_unlock;
1278 
1279 	set_bit(FR_URING, &req->flags);
1280 	req->ring_queue = queue;
1281 	ent = list_first_entry_or_null(&queue->ent_avail_queue,
1282 				       struct fuse_ring_ent, list);
1283 	if (ent)
1284 		fuse_uring_add_req_to_ring_ent(ent, req);
1285 	else
1286 		list_add_tail(&req->list, &queue->fuse_req_queue);
1287 	spin_unlock(&queue->lock);
1288 
1289 	if (ent)
1290 		fuse_uring_dispatch_ent(ent);
1291 
1292 	return;
1293 
1294 err_unlock:
1295 	spin_unlock(&queue->lock);
1296 err:
1297 	req->out.h.error = err;
1298 	clear_bit(FR_PENDING, &req->flags);
1299 	fuse_request_end(req);
1300 }
1301 
fuse_uring_queue_bq_req(struct fuse_req * req)1302 bool fuse_uring_queue_bq_req(struct fuse_req *req)
1303 {
1304 	struct fuse_conn *fc = req->fm->fc;
1305 	struct fuse_ring *ring = fc->ring;
1306 	struct fuse_ring_queue *queue;
1307 	struct fuse_ring_ent *ent = NULL;
1308 
1309 	queue = fuse_uring_task_to_queue(ring);
1310 	if (!queue)
1311 		return false;
1312 
1313 	spin_lock(&queue->lock);
1314 	if (unlikely(queue->stopped)) {
1315 		spin_unlock(&queue->lock);
1316 		return false;
1317 	}
1318 
1319 	set_bit(FR_URING, &req->flags);
1320 	req->ring_queue = queue;
1321 	list_add_tail(&req->list, &queue->fuse_req_bg_queue);
1322 
1323 	ent = list_first_entry_or_null(&queue->ent_avail_queue,
1324 				       struct fuse_ring_ent, list);
1325 	spin_lock(&fc->bg_lock);
1326 	fc->num_background++;
1327 	if (fc->num_background == fc->max_background)
1328 		fc->blocked = 1;
1329 	fuse_uring_flush_bg(queue);
1330 	spin_unlock(&fc->bg_lock);
1331 
1332 	/*
1333 	 * Due to bg_queue flush limits there might be other bg requests
1334 	 * in the queue that need to be handled first. Or no further req
1335 	 * might be available.
1336 	 */
1337 	req = list_first_entry_or_null(&queue->fuse_req_queue, struct fuse_req,
1338 				       list);
1339 	if (ent && req) {
1340 		fuse_uring_add_req_to_ring_ent(ent, req);
1341 		spin_unlock(&queue->lock);
1342 
1343 		fuse_uring_dispatch_ent(ent);
1344 	} else {
1345 		spin_unlock(&queue->lock);
1346 	}
1347 
1348 	return true;
1349 }
1350 
fuse_uring_remove_pending_req(struct fuse_req * req)1351 bool fuse_uring_remove_pending_req(struct fuse_req *req)
1352 {
1353 	struct fuse_ring_queue *queue = req->ring_queue;
1354 
1355 	return fuse_remove_pending_req(req, &queue->lock);
1356 }
1357 
1358 static const struct fuse_iqueue_ops fuse_io_uring_ops = {
1359 	/* should be send over io-uring as enhancement */
1360 	.send_forget = fuse_dev_queue_forget,
1361 
1362 	/*
1363 	 * could be send over io-uring, but interrupts should be rare,
1364 	 * no need to make the code complex
1365 	 */
1366 	.send_interrupt = fuse_dev_queue_interrupt,
1367 	.send_req = fuse_uring_queue_fuse_req,
1368 };
1369