xref: /linux/fs/fuse/dev_uring.c (revision 8934827db5403eae57d4537114a9ff88b0a8460f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * FUSE: Filesystem in Userspace
4  * Copyright (c) 2023-2024 DataDirect Networks.
5  */
6 
7 #include "fuse_i.h"
8 #include "dev_uring_i.h"
9 #include "fuse_dev_i.h"
10 #include "fuse_trace.h"
11 
12 #include <linux/fs.h>
13 #include <linux/io_uring/cmd.h>
14 
15 static bool __read_mostly enable_uring;
16 module_param(enable_uring, bool, 0644);
17 MODULE_PARM_DESC(enable_uring,
18 		 "Enable userspace communication through io-uring");
19 
20 #define FUSE_URING_IOV_SEGS 2 /* header and payload */
21 
22 
fuse_uring_enabled(void)23 bool fuse_uring_enabled(void)
24 {
25 	return enable_uring;
26 }
27 
28 struct fuse_uring_pdu {
29 	struct fuse_ring_ent *ent;
30 };
31 
32 static const struct fuse_iqueue_ops fuse_io_uring_ops;
33 
uring_cmd_set_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_ent * ring_ent)34 static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd,
35 				   struct fuse_ring_ent *ring_ent)
36 {
37 	struct fuse_uring_pdu *pdu =
38 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
39 
40 	pdu->ent = ring_ent;
41 }
42 
uring_cmd_to_ring_ent(struct io_uring_cmd * cmd)43 static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd)
44 {
45 	struct fuse_uring_pdu *pdu =
46 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
47 
48 	return pdu->ent;
49 }
50 
fuse_uring_flush_bg(struct fuse_ring_queue * queue)51 static void fuse_uring_flush_bg(struct fuse_ring_queue *queue)
52 {
53 	struct fuse_ring *ring = queue->ring;
54 	struct fuse_conn *fc = ring->fc;
55 
56 	lockdep_assert_held(&queue->lock);
57 	lockdep_assert_held(&fc->bg_lock);
58 
59 	/*
60 	 * Allow one bg request per queue, ignoring global fc limits.
61 	 * This prevents a single queue from consuming all resources and
62 	 * eliminates the need for remote queue wake-ups when global
63 	 * limits are met but this queue has no more waiting requests.
64 	 */
65 	while ((fc->active_background < fc->max_background ||
66 		!queue->active_background) &&
67 	       (!list_empty(&queue->fuse_req_bg_queue))) {
68 		struct fuse_req *req;
69 
70 		req = list_first_entry(&queue->fuse_req_bg_queue,
71 				       struct fuse_req, list);
72 		fc->active_background++;
73 		queue->active_background++;
74 
75 		list_move_tail(&req->list, &queue->fuse_req_queue);
76 	}
77 }
78 
fuse_uring_req_end(struct fuse_ring_ent * ent,struct fuse_req * req,int error)79 static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req,
80 			       int error)
81 {
82 	struct fuse_ring_queue *queue = ent->queue;
83 	struct fuse_ring *ring = queue->ring;
84 	struct fuse_conn *fc = ring->fc;
85 
86 	lockdep_assert_not_held(&queue->lock);
87 	spin_lock(&queue->lock);
88 	ent->fuse_req = NULL;
89 	list_del_init(&req->list);
90 	if (test_bit(FR_BACKGROUND, &req->flags)) {
91 		queue->active_background--;
92 		spin_lock(&fc->bg_lock);
93 		fuse_uring_flush_bg(queue);
94 		spin_unlock(&fc->bg_lock);
95 	}
96 
97 	spin_unlock(&queue->lock);
98 
99 	if (error)
100 		req->out.h.error = error;
101 
102 	clear_bit(FR_SENT, &req->flags);
103 	fuse_request_end(req);
104 }
105 
106 /* Abort all list queued request on the given ring queue */
fuse_uring_abort_end_queue_requests(struct fuse_ring_queue * queue)107 static void fuse_uring_abort_end_queue_requests(struct fuse_ring_queue *queue)
108 {
109 	struct fuse_req *req;
110 	LIST_HEAD(req_list);
111 
112 	spin_lock(&queue->lock);
113 	list_for_each_entry(req, &queue->fuse_req_queue, list)
114 		clear_bit(FR_PENDING, &req->flags);
115 	list_splice_init(&queue->fuse_req_queue, &req_list);
116 	spin_unlock(&queue->lock);
117 
118 	/* must not hold queue lock to avoid order issues with fi->lock */
119 	fuse_dev_end_requests(&req_list);
120 }
121 
fuse_uring_abort_end_requests(struct fuse_ring * ring)122 void fuse_uring_abort_end_requests(struct fuse_ring *ring)
123 {
124 	int qid;
125 	struct fuse_ring_queue *queue;
126 	struct fuse_conn *fc = ring->fc;
127 
128 	for (qid = 0; qid < ring->nr_queues; qid++) {
129 		queue = READ_ONCE(ring->queues[qid]);
130 		if (!queue)
131 			continue;
132 
133 		queue->stopped = true;
134 
135 		WARN_ON_ONCE(ring->fc->max_background != UINT_MAX);
136 		spin_lock(&queue->lock);
137 		spin_lock(&fc->bg_lock);
138 		fuse_uring_flush_bg(queue);
139 		spin_unlock(&fc->bg_lock);
140 		spin_unlock(&queue->lock);
141 		fuse_uring_abort_end_queue_requests(queue);
142 	}
143 }
144 
ent_list_request_expired(struct fuse_conn * fc,struct list_head * list)145 static bool ent_list_request_expired(struct fuse_conn *fc, struct list_head *list)
146 {
147 	struct fuse_ring_ent *ent;
148 	struct fuse_req *req;
149 
150 	ent = list_first_entry_or_null(list, struct fuse_ring_ent, list);
151 	if (!ent)
152 		return false;
153 
154 	req = ent->fuse_req;
155 
156 	return time_is_before_jiffies(req->create_time +
157 				      fc->timeout.req_timeout);
158 }
159 
fuse_uring_request_expired(struct fuse_conn * fc)160 bool fuse_uring_request_expired(struct fuse_conn *fc)
161 {
162 	struct fuse_ring *ring = fc->ring;
163 	struct fuse_ring_queue *queue;
164 	int qid;
165 
166 	if (!ring)
167 		return false;
168 
169 	for (qid = 0; qid < ring->nr_queues; qid++) {
170 		queue = READ_ONCE(ring->queues[qid]);
171 		if (!queue)
172 			continue;
173 
174 		spin_lock(&queue->lock);
175 		if (fuse_request_expired(fc, &queue->fuse_req_queue) ||
176 		    fuse_request_expired(fc, &queue->fuse_req_bg_queue) ||
177 		    ent_list_request_expired(fc, &queue->ent_w_req_queue) ||
178 		    ent_list_request_expired(fc, &queue->ent_in_userspace)) {
179 			spin_unlock(&queue->lock);
180 			return true;
181 		}
182 		spin_unlock(&queue->lock);
183 	}
184 
185 	return false;
186 }
187 
fuse_uring_destruct(struct fuse_conn * fc)188 void fuse_uring_destruct(struct fuse_conn *fc)
189 {
190 	struct fuse_ring *ring = fc->ring;
191 	int qid;
192 
193 	if (!ring)
194 		return;
195 
196 	for (qid = 0; qid < ring->nr_queues; qid++) {
197 		struct fuse_ring_queue *queue = ring->queues[qid];
198 		struct fuse_ring_ent *ent, *next;
199 
200 		if (!queue)
201 			continue;
202 
203 		WARN_ON(!list_empty(&queue->ent_avail_queue));
204 		WARN_ON(!list_empty(&queue->ent_w_req_queue));
205 		WARN_ON(!list_empty(&queue->ent_commit_queue));
206 		WARN_ON(!list_empty(&queue->ent_in_userspace));
207 
208 		list_for_each_entry_safe(ent, next, &queue->ent_released,
209 					 list) {
210 			list_del_init(&ent->list);
211 			kfree(ent);
212 		}
213 
214 		kfree(queue->fpq.processing);
215 		kfree(queue);
216 		ring->queues[qid] = NULL;
217 	}
218 
219 	kfree(ring->queues);
220 	kfree(ring);
221 	fc->ring = NULL;
222 }
223 
224 /*
225  * Basic ring setup for this connection based on the provided configuration
226  */
fuse_uring_create(struct fuse_conn * fc)227 static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
228 {
229 	struct fuse_ring *ring;
230 	size_t nr_queues = num_possible_cpus();
231 	struct fuse_ring *res = NULL;
232 	size_t max_payload_size;
233 
234 	ring = kzalloc_obj(*fc->ring, GFP_KERNEL_ACCOUNT);
235 	if (!ring)
236 		return NULL;
237 
238 	ring->queues = kzalloc_objs(struct fuse_ring_queue *, nr_queues,
239 				    GFP_KERNEL_ACCOUNT);
240 	if (!ring->queues)
241 		goto out_err;
242 
243 	max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write);
244 	max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE);
245 
246 	spin_lock(&fc->lock);
247 	if (fc->ring) {
248 		/* race, another thread created the ring in the meantime */
249 		spin_unlock(&fc->lock);
250 		res = fc->ring;
251 		goto out_err;
252 	}
253 
254 	init_waitqueue_head(&ring->stop_waitq);
255 
256 	ring->nr_queues = nr_queues;
257 	ring->fc = fc;
258 	ring->max_payload_sz = max_payload_size;
259 	smp_store_release(&fc->ring, ring);
260 
261 	spin_unlock(&fc->lock);
262 	return ring;
263 
264 out_err:
265 	kfree(ring->queues);
266 	kfree(ring);
267 	return res;
268 }
269 
fuse_uring_create_queue(struct fuse_ring * ring,int qid)270 static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
271 						       int qid)
272 {
273 	struct fuse_conn *fc = ring->fc;
274 	struct fuse_ring_queue *queue;
275 	struct list_head *pq;
276 
277 	queue = kzalloc_obj(*queue, GFP_KERNEL_ACCOUNT);
278 	if (!queue)
279 		return NULL;
280 	pq = kzalloc_objs(struct list_head, FUSE_PQ_HASH_SIZE, GFP_KERNEL);
281 	if (!pq) {
282 		kfree(queue);
283 		return NULL;
284 	}
285 
286 	queue->qid = qid;
287 	queue->ring = ring;
288 	spin_lock_init(&queue->lock);
289 
290 	INIT_LIST_HEAD(&queue->ent_avail_queue);
291 	INIT_LIST_HEAD(&queue->ent_commit_queue);
292 	INIT_LIST_HEAD(&queue->ent_w_req_queue);
293 	INIT_LIST_HEAD(&queue->ent_in_userspace);
294 	INIT_LIST_HEAD(&queue->fuse_req_queue);
295 	INIT_LIST_HEAD(&queue->fuse_req_bg_queue);
296 	INIT_LIST_HEAD(&queue->ent_released);
297 
298 	queue->fpq.processing = pq;
299 	fuse_pqueue_init(&queue->fpq);
300 
301 	spin_lock(&fc->lock);
302 	if (ring->queues[qid]) {
303 		spin_unlock(&fc->lock);
304 		kfree(queue->fpq.processing);
305 		kfree(queue);
306 		return ring->queues[qid];
307 	}
308 
309 	/*
310 	 * write_once and lock as the caller mostly doesn't take the lock at all
311 	 */
312 	WRITE_ONCE(ring->queues[qid], queue);
313 	spin_unlock(&fc->lock);
314 
315 	return queue;
316 }
317 
fuse_uring_stop_fuse_req_end(struct fuse_req * req)318 static void fuse_uring_stop_fuse_req_end(struct fuse_req *req)
319 {
320 	clear_bit(FR_SENT, &req->flags);
321 	req->out.h.error = -ECONNABORTED;
322 	fuse_request_end(req);
323 }
324 
325 /*
326  * Release a request/entry on connection tear down
327  */
fuse_uring_entry_teardown(struct fuse_ring_ent * ent)328 static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent)
329 {
330 	struct fuse_req *req;
331 	struct io_uring_cmd *cmd;
332 
333 	struct fuse_ring_queue *queue = ent->queue;
334 
335 	spin_lock(&queue->lock);
336 	cmd = ent->cmd;
337 	ent->cmd = NULL;
338 	req = ent->fuse_req;
339 	ent->fuse_req = NULL;
340 	if (req) {
341 		/* remove entry from queue->fpq->processing */
342 		list_del_init(&req->list);
343 	}
344 
345 	/*
346 	 * The entry must not be freed immediately, due to access of direct
347 	 * pointer access of entries through IO_URING_F_CANCEL - there is a risk
348 	 * of race between daemon termination (which triggers IO_URING_F_CANCEL
349 	 * and accesses entries without checking the list state first
350 	 */
351 	list_move(&ent->list, &queue->ent_released);
352 	ent->state = FRRS_RELEASED;
353 	spin_unlock(&queue->lock);
354 
355 	if (cmd)
356 		io_uring_cmd_done(cmd, -ENOTCONN, IO_URING_F_UNLOCKED);
357 
358 	if (req)
359 		fuse_uring_stop_fuse_req_end(req);
360 }
361 
fuse_uring_stop_list_entries(struct list_head * head,struct fuse_ring_queue * queue,enum fuse_ring_req_state exp_state)362 static void fuse_uring_stop_list_entries(struct list_head *head,
363 					 struct fuse_ring_queue *queue,
364 					 enum fuse_ring_req_state exp_state)
365 {
366 	struct fuse_ring *ring = queue->ring;
367 	struct fuse_ring_ent *ent, *next;
368 	ssize_t queue_refs = SSIZE_MAX;
369 	LIST_HEAD(to_teardown);
370 
371 	spin_lock(&queue->lock);
372 	list_for_each_entry_safe(ent, next, head, list) {
373 		if (ent->state != exp_state) {
374 			pr_warn("entry teardown qid=%d state=%d expected=%d",
375 				queue->qid, ent->state, exp_state);
376 			continue;
377 		}
378 
379 		ent->state = FRRS_TEARDOWN;
380 		list_move(&ent->list, &to_teardown);
381 	}
382 	spin_unlock(&queue->lock);
383 
384 	/* no queue lock to avoid lock order issues */
385 	list_for_each_entry_safe(ent, next, &to_teardown, list) {
386 		fuse_uring_entry_teardown(ent);
387 		queue_refs = atomic_dec_return(&ring->queue_refs);
388 		WARN_ON_ONCE(queue_refs < 0);
389 	}
390 }
391 
fuse_uring_teardown_entries(struct fuse_ring_queue * queue)392 static void fuse_uring_teardown_entries(struct fuse_ring_queue *queue)
393 {
394 	fuse_uring_stop_list_entries(&queue->ent_in_userspace, queue,
395 				     FRRS_USERSPACE);
396 	fuse_uring_stop_list_entries(&queue->ent_avail_queue, queue,
397 				     FRRS_AVAILABLE);
398 }
399 
400 /*
401  * Log state debug info
402  */
fuse_uring_log_ent_state(struct fuse_ring * ring)403 static void fuse_uring_log_ent_state(struct fuse_ring *ring)
404 {
405 	int qid;
406 	struct fuse_ring_ent *ent;
407 
408 	for (qid = 0; qid < ring->nr_queues; qid++) {
409 		struct fuse_ring_queue *queue = ring->queues[qid];
410 
411 		if (!queue)
412 			continue;
413 
414 		spin_lock(&queue->lock);
415 		/*
416 		 * Log entries from the intermediate queue, the other queues
417 		 * should be empty
418 		 */
419 		list_for_each_entry(ent, &queue->ent_w_req_queue, list) {
420 			pr_info(" ent-req-queue ring=%p qid=%d ent=%p state=%d\n",
421 				ring, qid, ent, ent->state);
422 		}
423 		list_for_each_entry(ent, &queue->ent_commit_queue, list) {
424 			pr_info(" ent-commit-queue ring=%p qid=%d ent=%p state=%d\n",
425 				ring, qid, ent, ent->state);
426 		}
427 		spin_unlock(&queue->lock);
428 	}
429 	ring->stop_debug_log = 1;
430 }
431 
fuse_uring_async_stop_queues(struct work_struct * work)432 static void fuse_uring_async_stop_queues(struct work_struct *work)
433 {
434 	int qid;
435 	struct fuse_ring *ring =
436 		container_of(work, struct fuse_ring, async_teardown_work.work);
437 
438 	/* XXX code dup */
439 	for (qid = 0; qid < ring->nr_queues; qid++) {
440 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
441 
442 		if (!queue)
443 			continue;
444 
445 		fuse_uring_teardown_entries(queue);
446 	}
447 
448 	/*
449 	 * Some ring entries might be in the middle of IO operations,
450 	 * i.e. in process to get handled by file_operations::uring_cmd
451 	 * or on the way to userspace - we could handle that with conditions in
452 	 * run time code, but easier/cleaner to have an async tear down handler
453 	 * If there are still queue references left
454 	 */
455 	if (atomic_read(&ring->queue_refs) > 0) {
456 		if (time_after(jiffies,
457 			       ring->teardown_time + FUSE_URING_TEARDOWN_TIMEOUT))
458 			fuse_uring_log_ent_state(ring);
459 
460 		schedule_delayed_work(&ring->async_teardown_work,
461 				      FUSE_URING_TEARDOWN_INTERVAL);
462 	} else {
463 		wake_up_all(&ring->stop_waitq);
464 	}
465 }
466 
467 /*
468  * Stop the ring queues
469  */
fuse_uring_stop_queues(struct fuse_ring * ring)470 void fuse_uring_stop_queues(struct fuse_ring *ring)
471 {
472 	int qid;
473 
474 	for (qid = 0; qid < ring->nr_queues; qid++) {
475 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
476 
477 		if (!queue)
478 			continue;
479 
480 		fuse_uring_teardown_entries(queue);
481 	}
482 
483 	if (atomic_read(&ring->queue_refs) > 0) {
484 		ring->teardown_time = jiffies;
485 		INIT_DELAYED_WORK(&ring->async_teardown_work,
486 				  fuse_uring_async_stop_queues);
487 		schedule_delayed_work(&ring->async_teardown_work,
488 				      FUSE_URING_TEARDOWN_INTERVAL);
489 	} else {
490 		wake_up_all(&ring->stop_waitq);
491 	}
492 }
493 
494 /*
495  * Handle IO_URING_F_CANCEL, typically should come on daemon termination.
496  *
497  * Releasing the last entry should trigger fuse_dev_release() if
498  * the daemon was terminated
499  */
fuse_uring_cancel(struct io_uring_cmd * cmd,unsigned int issue_flags)500 static void fuse_uring_cancel(struct io_uring_cmd *cmd,
501 			      unsigned int issue_flags)
502 {
503 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
504 	struct fuse_ring_queue *queue;
505 	bool need_cmd_done = false;
506 
507 	/*
508 	 * direct access on ent - it must not be destructed as long as
509 	 * IO_URING_F_CANCEL might come up
510 	 */
511 	queue = ent->queue;
512 	spin_lock(&queue->lock);
513 	if (ent->state == FRRS_AVAILABLE) {
514 		ent->state = FRRS_USERSPACE;
515 		list_move_tail(&ent->list, &queue->ent_in_userspace);
516 		need_cmd_done = true;
517 		ent->cmd = NULL;
518 	}
519 	spin_unlock(&queue->lock);
520 
521 	if (need_cmd_done) {
522 		/* no queue lock to avoid lock order issues */
523 		io_uring_cmd_done(cmd, -ENOTCONN, issue_flags);
524 	}
525 }
526 
fuse_uring_prepare_cancel(struct io_uring_cmd * cmd,int issue_flags,struct fuse_ring_ent * ring_ent)527 static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags,
528 				      struct fuse_ring_ent *ring_ent)
529 {
530 	uring_cmd_set_ring_ent(cmd, ring_ent);
531 	io_uring_cmd_mark_cancelable(cmd, issue_flags);
532 }
533 
534 /*
535  * Checks for errors and stores it into the request
536  */
fuse_uring_out_header_has_err(struct fuse_out_header * oh,struct fuse_req * req,struct fuse_conn * fc)537 static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
538 					 struct fuse_req *req,
539 					 struct fuse_conn *fc)
540 {
541 	int err;
542 
543 	err = -EINVAL;
544 	if (oh->unique == 0) {
545 		/* Not supported through io-uring yet */
546 		pr_warn_once("notify through fuse-io-uring not supported\n");
547 		goto err;
548 	}
549 
550 	if (oh->error <= -ERESTARTSYS || oh->error > 0)
551 		goto err;
552 
553 	if (oh->error) {
554 		err = oh->error;
555 		goto err;
556 	}
557 
558 	err = -ENOENT;
559 	if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) {
560 		pr_warn_ratelimited("unique mismatch, expected: %llu got %llu\n",
561 				    req->in.h.unique,
562 				    oh->unique & ~FUSE_INT_REQ_BIT);
563 		goto err;
564 	}
565 
566 	/*
567 	 * Is it an interrupt reply ID?
568 	 * XXX: Not supported through fuse-io-uring yet, it should not even
569 	 *      find the request - should not happen.
570 	 */
571 	WARN_ON_ONCE(oh->unique & FUSE_INT_REQ_BIT);
572 
573 	err = 0;
574 err:
575 	return err;
576 }
577 
fuse_uring_copy_from_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)578 static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
579 				     struct fuse_req *req,
580 				     struct fuse_ring_ent *ent)
581 {
582 	struct fuse_copy_state cs;
583 	struct fuse_args *args = req->args;
584 	struct iov_iter iter;
585 	int err;
586 	struct fuse_uring_ent_in_out ring_in_out;
587 
588 	err = copy_from_user(&ring_in_out, &ent->headers->ring_ent_in_out,
589 			     sizeof(ring_in_out));
590 	if (err)
591 		return -EFAULT;
592 
593 	err = import_ubuf(ITER_SOURCE, ent->payload, ring->max_payload_sz,
594 			  &iter);
595 	if (err)
596 		return err;
597 
598 	fuse_copy_init(&cs, false, &iter);
599 	cs.is_uring = true;
600 	cs.req = req;
601 
602 	err = fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
603 	fuse_copy_finish(&cs);
604 	return err;
605 }
606 
607 /*
608  * Copy data from the req to the ring buffer
609  */
fuse_uring_args_to_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)610 static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
611 				   struct fuse_ring_ent *ent)
612 {
613 	struct fuse_copy_state cs;
614 	struct fuse_args *args = req->args;
615 	struct fuse_in_arg *in_args = args->in_args;
616 	int num_args = args->in_numargs;
617 	int err;
618 	struct iov_iter iter;
619 	struct fuse_uring_ent_in_out ent_in_out = {
620 		.flags = 0,
621 		.commit_id = req->in.h.unique,
622 	};
623 
624 	err = import_ubuf(ITER_DEST, ent->payload, ring->max_payload_sz, &iter);
625 	if (err) {
626 		pr_info_ratelimited("fuse: Import of user buffer failed\n");
627 		return err;
628 	}
629 
630 	fuse_copy_init(&cs, true, &iter);
631 	cs.is_uring = true;
632 	cs.req = req;
633 
634 	if (num_args > 0) {
635 		/*
636 		 * Expectation is that the first argument is the per op header.
637 		 * Some op code have that as zero size.
638 		 */
639 		if (args->in_args[0].size > 0) {
640 			err = copy_to_user(&ent->headers->op_in, in_args->value,
641 					   in_args->size);
642 			if (err) {
643 				pr_info_ratelimited(
644 					"Copying the header failed.\n");
645 				return -EFAULT;
646 			}
647 		}
648 		in_args++;
649 		num_args--;
650 	}
651 
652 	/* copy the payload */
653 	err = fuse_copy_args(&cs, num_args, args->in_pages,
654 			     (struct fuse_arg *)in_args, 0);
655 	fuse_copy_finish(&cs);
656 	if (err) {
657 		pr_info_ratelimited("%s fuse_copy_args failed\n", __func__);
658 		return err;
659 	}
660 
661 	ent_in_out.payload_sz = cs.ring.copied_sz;
662 	err = copy_to_user(&ent->headers->ring_ent_in_out, &ent_in_out,
663 			   sizeof(ent_in_out));
664 	return err ? -EFAULT : 0;
665 }
666 
fuse_uring_copy_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req)667 static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
668 				   struct fuse_req *req)
669 {
670 	struct fuse_ring_queue *queue = ent->queue;
671 	struct fuse_ring *ring = queue->ring;
672 	int err;
673 
674 	err = -EIO;
675 	if (WARN_ON(ent->state != FRRS_FUSE_REQ)) {
676 		pr_err("qid=%d ring-req=%p invalid state %d on send\n",
677 		       queue->qid, ent, ent->state);
678 		return err;
679 	}
680 
681 	err = -EINVAL;
682 	if (WARN_ON(req->in.h.unique == 0))
683 		return err;
684 
685 	/* copy the request */
686 	err = fuse_uring_args_to_ring(ring, req, ent);
687 	if (unlikely(err)) {
688 		pr_info_ratelimited("Copy to ring failed: %d\n", err);
689 		return err;
690 	}
691 
692 	/* copy fuse_in_header */
693 	err = copy_to_user(&ent->headers->in_out, &req->in.h,
694 			   sizeof(req->in.h));
695 	if (err) {
696 		err = -EFAULT;
697 		return err;
698 	}
699 
700 	return 0;
701 }
702 
fuse_uring_prepare_send(struct fuse_ring_ent * ent,struct fuse_req * req)703 static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
704 				   struct fuse_req *req)
705 {
706 	int err;
707 
708 	err = fuse_uring_copy_to_ring(ent, req);
709 	if (!err)
710 		set_bit(FR_SENT, &req->flags);
711 	else
712 		fuse_uring_req_end(ent, req, err);
713 
714 	return err;
715 }
716 
717 /*
718  * Write data to the ring buffer and send the request to userspace,
719  * userspace will read it
720  * This is comparable with classical read(/dev/fuse)
721  */
fuse_uring_send_next_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)722 static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent,
723 					struct fuse_req *req,
724 					unsigned int issue_flags)
725 {
726 	struct fuse_ring_queue *queue = ent->queue;
727 	int err;
728 	struct io_uring_cmd *cmd;
729 
730 	err = fuse_uring_prepare_send(ent, req);
731 	if (err)
732 		return err;
733 
734 	spin_lock(&queue->lock);
735 	cmd = ent->cmd;
736 	ent->cmd = NULL;
737 	ent->state = FRRS_USERSPACE;
738 	list_move_tail(&ent->list, &queue->ent_in_userspace);
739 	spin_unlock(&queue->lock);
740 
741 	io_uring_cmd_done(cmd, 0, issue_flags);
742 	return 0;
743 }
744 
745 /*
746  * Make a ring entry available for fuse_req assignment
747  */
fuse_uring_ent_avail(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue)748 static void fuse_uring_ent_avail(struct fuse_ring_ent *ent,
749 				 struct fuse_ring_queue *queue)
750 {
751 	WARN_ON_ONCE(!ent->cmd);
752 	list_move(&ent->list, &queue->ent_avail_queue);
753 	ent->state = FRRS_AVAILABLE;
754 }
755 
756 /* Used to find the request on SQE commit */
fuse_uring_add_to_pq(struct fuse_ring_ent * ent,struct fuse_req * req)757 static void fuse_uring_add_to_pq(struct fuse_ring_ent *ent,
758 				 struct fuse_req *req)
759 {
760 	struct fuse_ring_queue *queue = ent->queue;
761 	struct fuse_pqueue *fpq = &queue->fpq;
762 	unsigned int hash;
763 
764 	req->ring_entry = ent;
765 	hash = fuse_req_hash(req->in.h.unique);
766 	list_move_tail(&req->list, &fpq->processing[hash]);
767 }
768 
769 /*
770  * Assign a fuse queue entry to the given entry
771  */
fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent * ent,struct fuse_req * req)772 static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent,
773 					   struct fuse_req *req)
774 {
775 	struct fuse_ring_queue *queue = ent->queue;
776 
777 	lockdep_assert_held(&queue->lock);
778 
779 	if (WARN_ON_ONCE(ent->state != FRRS_AVAILABLE &&
780 			 ent->state != FRRS_COMMIT)) {
781 		pr_warn("%s qid=%d state=%d\n", __func__, ent->queue->qid,
782 			ent->state);
783 	}
784 
785 	clear_bit(FR_PENDING, &req->flags);
786 	ent->fuse_req = req;
787 	ent->state = FRRS_FUSE_REQ;
788 	list_move_tail(&ent->list, &queue->ent_w_req_queue);
789 	fuse_uring_add_to_pq(ent, req);
790 }
791 
792 /* Fetch the next fuse request if available */
fuse_uring_ent_assign_req(struct fuse_ring_ent * ent)793 static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent)
794 	__must_hold(&queue->lock)
795 {
796 	struct fuse_req *req;
797 	struct fuse_ring_queue *queue = ent->queue;
798 	struct list_head *req_queue = &queue->fuse_req_queue;
799 
800 	lockdep_assert_held(&queue->lock);
801 
802 	/* get and assign the next entry while it is still holding the lock */
803 	req = list_first_entry_or_null(req_queue, struct fuse_req, list);
804 	if (req)
805 		fuse_uring_add_req_to_ring_ent(ent, req);
806 
807 	return req;
808 }
809 
810 /*
811  * Read data from the ring buffer, which user space has written to
812  * This is comparible with handling of classical write(/dev/fuse).
813  * Also make the ring request available again for new fuse requests.
814  */
fuse_uring_commit(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)815 static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req,
816 			      unsigned int issue_flags)
817 {
818 	struct fuse_ring *ring = ent->queue->ring;
819 	struct fuse_conn *fc = ring->fc;
820 	ssize_t err = 0;
821 
822 	err = copy_from_user(&req->out.h, &ent->headers->in_out,
823 			     sizeof(req->out.h));
824 	if (err) {
825 		req->out.h.error = -EFAULT;
826 		goto out;
827 	}
828 
829 	err = fuse_uring_out_header_has_err(&req->out.h, req, fc);
830 	if (err) {
831 		/* req->out.h.error already set */
832 		goto out;
833 	}
834 
835 	err = fuse_uring_copy_from_ring(ring, req, ent);
836 out:
837 	fuse_uring_req_end(ent, req, err);
838 }
839 
840 /*
841  * Get the next fuse req and send it
842  */
fuse_uring_next_fuse_req(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue,unsigned int issue_flags)843 static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ent,
844 				     struct fuse_ring_queue *queue,
845 				     unsigned int issue_flags)
846 {
847 	int err;
848 	struct fuse_req *req;
849 
850 retry:
851 	spin_lock(&queue->lock);
852 	fuse_uring_ent_avail(ent, queue);
853 	req = fuse_uring_ent_assign_req(ent);
854 	spin_unlock(&queue->lock);
855 
856 	if (req) {
857 		err = fuse_uring_send_next_to_ring(ent, req, issue_flags);
858 		if (err)
859 			goto retry;
860 	}
861 }
862 
fuse_ring_ent_set_commit(struct fuse_ring_ent * ent)863 static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent)
864 {
865 	struct fuse_ring_queue *queue = ent->queue;
866 
867 	lockdep_assert_held(&queue->lock);
868 
869 	if (WARN_ON_ONCE(ent->state != FRRS_USERSPACE))
870 		return -EIO;
871 
872 	ent->state = FRRS_COMMIT;
873 	list_move(&ent->list, &queue->ent_commit_queue);
874 
875 	return 0;
876 }
877 
878 /* FUSE_URING_CMD_COMMIT_AND_FETCH handler */
fuse_uring_commit_fetch(struct io_uring_cmd * cmd,int issue_flags,struct fuse_conn * fc)879 static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
880 				   struct fuse_conn *fc)
881 {
882 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe128_cmd(cmd->sqe,
883 								       struct fuse_uring_cmd_req);
884 	struct fuse_ring_ent *ent;
885 	int err;
886 	struct fuse_ring *ring = fc->ring;
887 	struct fuse_ring_queue *queue;
888 	uint64_t commit_id = READ_ONCE(cmd_req->commit_id);
889 	unsigned int qid = READ_ONCE(cmd_req->qid);
890 	struct fuse_pqueue *fpq;
891 	struct fuse_req *req;
892 
893 	err = -ENOTCONN;
894 	if (!ring)
895 		return err;
896 
897 	if (qid >= ring->nr_queues)
898 		return -EINVAL;
899 
900 	queue = ring->queues[qid];
901 	if (!queue)
902 		return err;
903 	fpq = &queue->fpq;
904 
905 	if (!READ_ONCE(fc->connected) || READ_ONCE(queue->stopped))
906 		return err;
907 
908 	spin_lock(&queue->lock);
909 	/* Find a request based on the unique ID of the fuse request
910 	 * This should get revised, as it needs a hash calculation and list
911 	 * search. And full struct fuse_pqueue is needed (memory overhead).
912 	 * As well as the link from req to ring_ent.
913 	 */
914 	req = fuse_request_find(fpq, commit_id);
915 	err = -ENOENT;
916 	if (!req) {
917 		pr_info("qid=%d commit_id %llu not found\n", queue->qid,
918 			commit_id);
919 		spin_unlock(&queue->lock);
920 		return err;
921 	}
922 	list_del_init(&req->list);
923 	ent = req->ring_entry;
924 	req->ring_entry = NULL;
925 
926 	err = fuse_ring_ent_set_commit(ent);
927 	if (err != 0) {
928 		pr_info_ratelimited("qid=%d commit_id %llu state %d",
929 				    queue->qid, commit_id, ent->state);
930 		spin_unlock(&queue->lock);
931 		req->out.h.error = err;
932 		clear_bit(FR_SENT, &req->flags);
933 		fuse_request_end(req);
934 		return err;
935 	}
936 
937 	ent->cmd = cmd;
938 	spin_unlock(&queue->lock);
939 
940 	/* without the queue lock, as other locks are taken */
941 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
942 	fuse_uring_commit(ent, req, issue_flags);
943 
944 	/*
945 	 * Fetching the next request is absolutely required as queued
946 	 * fuse requests would otherwise not get processed - committing
947 	 * and fetching is done in one step vs legacy fuse, which has separated
948 	 * read (fetch request) and write (commit result).
949 	 */
950 	fuse_uring_next_fuse_req(ent, queue, issue_flags);
951 	return 0;
952 }
953 
is_ring_ready(struct fuse_ring * ring,int current_qid)954 static bool is_ring_ready(struct fuse_ring *ring, int current_qid)
955 {
956 	int qid;
957 	struct fuse_ring_queue *queue;
958 	bool ready = true;
959 
960 	for (qid = 0; qid < ring->nr_queues && ready; qid++) {
961 		if (current_qid == qid)
962 			continue;
963 
964 		queue = ring->queues[qid];
965 		if (!queue) {
966 			ready = false;
967 			break;
968 		}
969 
970 		spin_lock(&queue->lock);
971 		if (list_empty(&queue->ent_avail_queue))
972 			ready = false;
973 		spin_unlock(&queue->lock);
974 	}
975 
976 	return ready;
977 }
978 
979 /*
980  * fuse_uring_req_fetch command handling
981  */
fuse_uring_do_register(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,unsigned int issue_flags)982 static void fuse_uring_do_register(struct fuse_ring_ent *ent,
983 				   struct io_uring_cmd *cmd,
984 				   unsigned int issue_flags)
985 {
986 	struct fuse_ring_queue *queue = ent->queue;
987 	struct fuse_ring *ring = queue->ring;
988 	struct fuse_conn *fc = ring->fc;
989 	struct fuse_iqueue *fiq = &fc->iq;
990 
991 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
992 
993 	spin_lock(&queue->lock);
994 	ent->cmd = cmd;
995 	fuse_uring_ent_avail(ent, queue);
996 	spin_unlock(&queue->lock);
997 
998 	if (!ring->ready) {
999 		bool ready = is_ring_ready(ring, queue->qid);
1000 
1001 		if (ready) {
1002 			WRITE_ONCE(fiq->ops, &fuse_io_uring_ops);
1003 			WRITE_ONCE(ring->ready, true);
1004 			wake_up_all(&fc->blocked_waitq);
1005 		}
1006 	}
1007 }
1008 
1009 /*
1010  * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
1011  * the payload
1012  */
fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe * sqe,struct iovec iov[FUSE_URING_IOV_SEGS])1013 static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
1014 					 struct iovec iov[FUSE_URING_IOV_SEGS])
1015 {
1016 	struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr));
1017 	struct iov_iter iter;
1018 	ssize_t ret;
1019 
1020 	if (sqe->len != FUSE_URING_IOV_SEGS)
1021 		return -EINVAL;
1022 
1023 	/*
1024 	 * Direction for buffer access will actually be READ and WRITE,
1025 	 * using write for the import should include READ access as well.
1026 	 */
1027 	ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS,
1028 			   FUSE_URING_IOV_SEGS, &iov, &iter);
1029 	if (ret < 0)
1030 		return ret;
1031 
1032 	return 0;
1033 }
1034 
1035 static struct fuse_ring_ent *
fuse_uring_create_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_queue * queue)1036 fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
1037 			   struct fuse_ring_queue *queue)
1038 {
1039 	struct fuse_ring *ring = queue->ring;
1040 	struct fuse_ring_ent *ent;
1041 	size_t payload_size;
1042 	struct iovec iov[FUSE_URING_IOV_SEGS];
1043 	int err;
1044 
1045 	err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
1046 	if (err) {
1047 		pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
1048 				    err);
1049 		return ERR_PTR(err);
1050 	}
1051 
1052 	err = -EINVAL;
1053 	if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
1054 		pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
1055 		return ERR_PTR(err);
1056 	}
1057 
1058 	payload_size = iov[1].iov_len;
1059 	if (payload_size < ring->max_payload_sz) {
1060 		pr_info_ratelimited("Invalid req payload len %zu\n",
1061 				    payload_size);
1062 		return ERR_PTR(err);
1063 	}
1064 
1065 	err = -ENOMEM;
1066 	ent = kzalloc_obj(*ent, GFP_KERNEL_ACCOUNT);
1067 	if (!ent)
1068 		return ERR_PTR(err);
1069 
1070 	INIT_LIST_HEAD(&ent->list);
1071 
1072 	ent->queue = queue;
1073 	ent->headers = iov[0].iov_base;
1074 	ent->payload = iov[1].iov_base;
1075 
1076 	atomic_inc(&ring->queue_refs);
1077 	return ent;
1078 }
1079 
1080 /*
1081  * Register header and payload buffer with the kernel and puts the
1082  * entry as "ready to get fuse requests" on the queue
1083  */
fuse_uring_register(struct io_uring_cmd * cmd,unsigned int issue_flags,struct fuse_conn * fc)1084 static int fuse_uring_register(struct io_uring_cmd *cmd,
1085 			       unsigned int issue_flags, struct fuse_conn *fc)
1086 {
1087 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe128_cmd(cmd->sqe,
1088 								       struct fuse_uring_cmd_req);
1089 	struct fuse_ring *ring = smp_load_acquire(&fc->ring);
1090 	struct fuse_ring_queue *queue;
1091 	struct fuse_ring_ent *ent;
1092 	int err;
1093 	unsigned int qid = READ_ONCE(cmd_req->qid);
1094 
1095 	err = -ENOMEM;
1096 	if (!ring) {
1097 		ring = fuse_uring_create(fc);
1098 		if (!ring)
1099 			return err;
1100 	}
1101 
1102 	if (qid >= ring->nr_queues) {
1103 		pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid);
1104 		return -EINVAL;
1105 	}
1106 
1107 	queue = ring->queues[qid];
1108 	if (!queue) {
1109 		queue = fuse_uring_create_queue(ring, qid);
1110 		if (!queue)
1111 			return err;
1112 	}
1113 
1114 	/*
1115 	 * The created queue above does not need to be destructed in
1116 	 * case of entry errors below, will be done at ring destruction time.
1117 	 */
1118 
1119 	ent = fuse_uring_create_ring_ent(cmd, queue);
1120 	if (IS_ERR(ent))
1121 		return PTR_ERR(ent);
1122 
1123 	fuse_uring_do_register(ent, cmd, issue_flags);
1124 
1125 	return 0;
1126 }
1127 
1128 /*
1129  * Entry function from io_uring to handle the given passthrough command
1130  * (op code IORING_OP_URING_CMD)
1131  */
fuse_uring_cmd(struct io_uring_cmd * cmd,unsigned int issue_flags)1132 int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
1133 {
1134 	struct fuse_dev *fud;
1135 	struct fuse_conn *fc;
1136 	u32 cmd_op = cmd->cmd_op;
1137 	int err;
1138 
1139 	if ((unlikely(issue_flags & IO_URING_F_CANCEL))) {
1140 		fuse_uring_cancel(cmd, issue_flags);
1141 		return 0;
1142 	}
1143 
1144 	/* This extra SQE size holds struct fuse_uring_cmd_req */
1145 	if (!(issue_flags & IO_URING_F_SQE128))
1146 		return -EINVAL;
1147 
1148 	fud = fuse_get_dev(cmd->file);
1149 	if (IS_ERR(fud)) {
1150 		pr_info_ratelimited("No fuse device found\n");
1151 		return PTR_ERR(fud);
1152 	}
1153 	fc = fud->fc;
1154 
1155 	/* Once a connection has io-uring enabled on it, it can't be disabled */
1156 	if (!enable_uring && !fc->io_uring) {
1157 		pr_info_ratelimited("fuse-io-uring is disabled\n");
1158 		return -EOPNOTSUPP;
1159 	}
1160 
1161 	if (fc->aborted)
1162 		return -ECONNABORTED;
1163 	if (!fc->connected)
1164 		return -ENOTCONN;
1165 
1166 	/*
1167 	 * fuse_uring_register() needs the ring to be initialized,
1168 	 * we need to know the max payload size
1169 	 */
1170 	if (!fc->initialized)
1171 		return -EAGAIN;
1172 
1173 	switch (cmd_op) {
1174 	case FUSE_IO_URING_CMD_REGISTER:
1175 		err = fuse_uring_register(cmd, issue_flags, fc);
1176 		if (err) {
1177 			pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n",
1178 				     err);
1179 			fc->io_uring = 0;
1180 			wake_up_all(&fc->blocked_waitq);
1181 			return err;
1182 		}
1183 		break;
1184 	case FUSE_IO_URING_CMD_COMMIT_AND_FETCH:
1185 		err = fuse_uring_commit_fetch(cmd, issue_flags, fc);
1186 		if (err) {
1187 			pr_info_once("FUSE_IO_URING_COMMIT_AND_FETCH failed err=%d\n",
1188 				     err);
1189 			return err;
1190 		}
1191 		break;
1192 	default:
1193 		return -EINVAL;
1194 	}
1195 
1196 	return -EIOCBQUEUED;
1197 }
1198 
fuse_uring_send(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,ssize_t ret,unsigned int issue_flags)1199 static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
1200 			    ssize_t ret, unsigned int issue_flags)
1201 {
1202 	struct fuse_ring_queue *queue = ent->queue;
1203 
1204 	spin_lock(&queue->lock);
1205 	ent->state = FRRS_USERSPACE;
1206 	list_move_tail(&ent->list, &queue->ent_in_userspace);
1207 	ent->cmd = NULL;
1208 	spin_unlock(&queue->lock);
1209 
1210 	io_uring_cmd_done(cmd, ret, issue_flags);
1211 }
1212 
1213 /*
1214  * This prepares and sends the ring request in fuse-uring task context.
1215  * User buffers are not mapped yet - the application does not have permission
1216  * to write to it - this has to be executed in ring task context.
1217  */
fuse_uring_send_in_task(struct io_tw_req tw_req,io_tw_token_t tw)1218 static void fuse_uring_send_in_task(struct io_tw_req tw_req, io_tw_token_t tw)
1219 {
1220 	unsigned int issue_flags = IO_URING_CMD_TASK_WORK_ISSUE_FLAGS;
1221 	struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
1222 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
1223 	struct fuse_ring_queue *queue = ent->queue;
1224 	int err;
1225 
1226 	if (!tw.cancel) {
1227 		err = fuse_uring_prepare_send(ent, ent->fuse_req);
1228 		if (err) {
1229 			fuse_uring_next_fuse_req(ent, queue, issue_flags);
1230 			return;
1231 		}
1232 	} else {
1233 		err = -ECANCELED;
1234 	}
1235 
1236 	fuse_uring_send(ent, cmd, err, issue_flags);
1237 }
1238 
fuse_uring_task_to_queue(struct fuse_ring * ring)1239 static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
1240 {
1241 	unsigned int qid;
1242 	struct fuse_ring_queue *queue;
1243 
1244 	qid = task_cpu(current);
1245 
1246 	if (WARN_ONCE(qid >= ring->nr_queues,
1247 		      "Core number (%u) exceeds nr queues (%zu)\n", qid,
1248 		      ring->nr_queues))
1249 		qid = 0;
1250 
1251 	queue = ring->queues[qid];
1252 	WARN_ONCE(!queue, "Missing queue for qid %d\n", qid);
1253 
1254 	return queue;
1255 }
1256 
fuse_uring_dispatch_ent(struct fuse_ring_ent * ent)1257 static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent)
1258 {
1259 	struct io_uring_cmd *cmd = ent->cmd;
1260 
1261 	uring_cmd_set_ring_ent(cmd, ent);
1262 	io_uring_cmd_complete_in_task(cmd, fuse_uring_send_in_task);
1263 }
1264 
1265 /* queue a fuse request and send it if a ring entry is available */
fuse_uring_queue_fuse_req(struct fuse_iqueue * fiq,struct fuse_req * req)1266 void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
1267 {
1268 	struct fuse_conn *fc = req->fm->fc;
1269 	struct fuse_ring *ring = fc->ring;
1270 	struct fuse_ring_queue *queue;
1271 	struct fuse_ring_ent *ent = NULL;
1272 	int err;
1273 
1274 	err = -EINVAL;
1275 	queue = fuse_uring_task_to_queue(ring);
1276 	if (!queue)
1277 		goto err;
1278 
1279 	fuse_request_assign_unique(fiq, req);
1280 
1281 	spin_lock(&queue->lock);
1282 	err = -ENOTCONN;
1283 	if (unlikely(queue->stopped))
1284 		goto err_unlock;
1285 
1286 	set_bit(FR_URING, &req->flags);
1287 	req->ring_queue = queue;
1288 	ent = list_first_entry_or_null(&queue->ent_avail_queue,
1289 				       struct fuse_ring_ent, list);
1290 	if (ent)
1291 		fuse_uring_add_req_to_ring_ent(ent, req);
1292 	else
1293 		list_add_tail(&req->list, &queue->fuse_req_queue);
1294 	spin_unlock(&queue->lock);
1295 
1296 	if (ent)
1297 		fuse_uring_dispatch_ent(ent);
1298 
1299 	return;
1300 
1301 err_unlock:
1302 	spin_unlock(&queue->lock);
1303 err:
1304 	req->out.h.error = err;
1305 	clear_bit(FR_PENDING, &req->flags);
1306 	fuse_request_end(req);
1307 }
1308 
fuse_uring_queue_bq_req(struct fuse_req * req)1309 bool fuse_uring_queue_bq_req(struct fuse_req *req)
1310 {
1311 	struct fuse_conn *fc = req->fm->fc;
1312 	struct fuse_ring *ring = fc->ring;
1313 	struct fuse_ring_queue *queue;
1314 	struct fuse_ring_ent *ent = NULL;
1315 
1316 	queue = fuse_uring_task_to_queue(ring);
1317 	if (!queue)
1318 		return false;
1319 
1320 	spin_lock(&queue->lock);
1321 	if (unlikely(queue->stopped)) {
1322 		spin_unlock(&queue->lock);
1323 		return false;
1324 	}
1325 
1326 	set_bit(FR_URING, &req->flags);
1327 	req->ring_queue = queue;
1328 	list_add_tail(&req->list, &queue->fuse_req_bg_queue);
1329 
1330 	ent = list_first_entry_or_null(&queue->ent_avail_queue,
1331 				       struct fuse_ring_ent, list);
1332 	spin_lock(&fc->bg_lock);
1333 	fc->num_background++;
1334 	if (fc->num_background == fc->max_background)
1335 		fc->blocked = 1;
1336 	fuse_uring_flush_bg(queue);
1337 	spin_unlock(&fc->bg_lock);
1338 
1339 	/*
1340 	 * Due to bg_queue flush limits there might be other bg requests
1341 	 * in the queue that need to be handled first. Or no further req
1342 	 * might be available.
1343 	 */
1344 	req = list_first_entry_or_null(&queue->fuse_req_queue, struct fuse_req,
1345 				       list);
1346 	if (ent && req) {
1347 		fuse_uring_add_req_to_ring_ent(ent, req);
1348 		spin_unlock(&queue->lock);
1349 
1350 		fuse_uring_dispatch_ent(ent);
1351 	} else {
1352 		spin_unlock(&queue->lock);
1353 	}
1354 
1355 	return true;
1356 }
1357 
fuse_uring_remove_pending_req(struct fuse_req * req)1358 bool fuse_uring_remove_pending_req(struct fuse_req *req)
1359 {
1360 	struct fuse_ring_queue *queue = req->ring_queue;
1361 
1362 	return fuse_remove_pending_req(req, &queue->lock);
1363 }
1364 
1365 static const struct fuse_iqueue_ops fuse_io_uring_ops = {
1366 	/* should be send over io-uring as enhancement */
1367 	.send_forget = fuse_dev_queue_forget,
1368 
1369 	/*
1370 	 * could be send over io-uring, but interrupts should be rare,
1371 	 * no need to make the code complex
1372 	 */
1373 	.send_interrupt = fuse_dev_queue_interrupt,
1374 	.send_req = fuse_uring_queue_fuse_req,
1375 };
1376