xref: /linux/io_uring/kbuf.c (revision cfc4ca8986bb1f6182da6cd7bb57f228590b4643)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/fs.h>
5 #include <linux/file.h>
6 #include <linux/mm.h>
7 #include <linux/slab.h>
8 #include <linux/namei.h>
9 #include <linux/poll.h>
10 #include <linux/vmalloc.h>
11 #include <linux/io_uring.h>
12 
13 #include <uapi/linux/io_uring.h>
14 
15 #include "io_uring.h"
16 #include "opdef.h"
17 #include "kbuf.h"
18 #include "memmap.h"
19 
20 /* BIDs are addressed by a 16-bit field in a CQE */
21 #define MAX_BIDS_PER_BGID (1 << 16)
22 
23 /* Mapped buffer ring, return io_uring_buf from head */
24 #define io_ring_head_to_buf(br, head, mask)	&(br)->bufs[(head) & (mask)]
25 
26 struct io_provide_buf {
27 	struct file			*file;
28 	__u64				addr;
29 	__u32				len;
30 	__u32				bgid;
31 	__u32				nbufs;
32 	__u16				bid;
33 };
34 
35 static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
36 {
37 	while (len) {
38 		struct io_uring_buf *buf;
39 		u32 this_len;
40 
41 		buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
42 		this_len = min_t(int, len, buf->len);
43 		buf->len -= this_len;
44 		if (buf->len) {
45 			buf->addr += this_len;
46 			return false;
47 		}
48 		bl->head++;
49 		len -= this_len;
50 	}
51 	return true;
52 }
53 
54 bool io_kbuf_commit(struct io_kiocb *req,
55 		    struct io_buffer_list *bl, int len, int nr)
56 {
57 	if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT)))
58 		return true;
59 
60 	req->flags &= ~REQ_F_BUFFERS_COMMIT;
61 
62 	if (unlikely(len < 0))
63 		return true;
64 	if (bl->flags & IOBL_INC)
65 		return io_kbuf_inc_commit(bl, len);
66 	bl->head += nr;
67 	return true;
68 }
69 
70 static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
71 							unsigned int bgid)
72 {
73 	lockdep_assert_held(&ctx->uring_lock);
74 
75 	return xa_load(&ctx->io_bl_xa, bgid);
76 }
77 
78 static int io_buffer_add_list(struct io_ring_ctx *ctx,
79 			      struct io_buffer_list *bl, unsigned int bgid)
80 {
81 	/*
82 	 * Store buffer group ID and finally mark the list as visible.
83 	 * The normal lookup doesn't care about the visibility as we're
84 	 * always under the ->uring_lock, but lookups from mmap do.
85 	 */
86 	bl->bgid = bgid;
87 	guard(mutex)(&ctx->mmap_lock);
88 	return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
89 }
90 
91 void io_kbuf_drop_legacy(struct io_kiocb *req)
92 {
93 	if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED)))
94 		return;
95 	req->flags &= ~REQ_F_BUFFER_SELECTED;
96 	kfree(req->kbuf);
97 	req->kbuf = NULL;
98 }
99 
100 bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
101 {
102 	struct io_ring_ctx *ctx = req->ctx;
103 	struct io_buffer_list *bl;
104 	struct io_buffer *buf;
105 
106 	io_ring_submit_lock(ctx, issue_flags);
107 
108 	buf = req->kbuf;
109 	bl = io_buffer_get_list(ctx, buf->bgid);
110 	list_add(&buf->list, &bl->buf_list);
111 	req->flags &= ~REQ_F_BUFFER_SELECTED;
112 
113 	io_ring_submit_unlock(ctx, issue_flags);
114 	return true;
115 }
116 
117 static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
118 					      struct io_buffer_list *bl)
119 {
120 	if (!list_empty(&bl->buf_list)) {
121 		struct io_buffer *kbuf;
122 
123 		kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
124 		list_del(&kbuf->list);
125 		if (*len == 0 || *len > kbuf->len)
126 			*len = kbuf->len;
127 		if (list_empty(&bl->buf_list))
128 			req->flags |= REQ_F_BL_EMPTY;
129 		req->flags |= REQ_F_BUFFER_SELECTED;
130 		req->kbuf = kbuf;
131 		req->buf_index = kbuf->bid;
132 		return u64_to_user_ptr(kbuf->addr);
133 	}
134 	return NULL;
135 }
136 
137 static int io_provided_buffers_select(struct io_kiocb *req, size_t *len,
138 				      struct io_buffer_list *bl,
139 				      struct iovec *iov)
140 {
141 	void __user *buf;
142 
143 	buf = io_provided_buffer_select(req, len, bl);
144 	if (unlikely(!buf))
145 		return -ENOBUFS;
146 
147 	iov[0].iov_base = buf;
148 	iov[0].iov_len = *len;
149 	return 1;
150 }
151 
152 static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
153 					  struct io_buffer_list *bl,
154 					  unsigned int issue_flags)
155 {
156 	struct io_uring_buf_ring *br = bl->buf_ring;
157 	__u16 tail, head = bl->head;
158 	struct io_uring_buf *buf;
159 	void __user *ret;
160 
161 	tail = smp_load_acquire(&br->tail);
162 	if (unlikely(tail == head))
163 		return NULL;
164 
165 	if (head + 1 == tail)
166 		req->flags |= REQ_F_BL_EMPTY;
167 
168 	buf = io_ring_head_to_buf(br, head, bl->mask);
169 	if (*len == 0 || *len > buf->len)
170 		*len = buf->len;
171 	req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
172 	req->buf_list = bl;
173 	req->buf_index = buf->bid;
174 	ret = u64_to_user_ptr(buf->addr);
175 
176 	if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
177 		/*
178 		 * If we came in unlocked, we have no choice but to consume the
179 		 * buffer here, otherwise nothing ensures that the buffer won't
180 		 * get used by others. This does mean it'll be pinned until the
181 		 * IO completes, coming in unlocked means we're being called from
182 		 * io-wq context and there may be further retries in async hybrid
183 		 * mode. For the locked case, the caller must call commit when
184 		 * the transfer completes (or if we get -EAGAIN and must poll of
185 		 * retry).
186 		 */
187 		io_kbuf_commit(req, bl, *len, 1);
188 		req->buf_list = NULL;
189 	}
190 	return ret;
191 }
192 
193 void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
194 			      unsigned buf_group, unsigned int issue_flags)
195 {
196 	struct io_ring_ctx *ctx = req->ctx;
197 	struct io_buffer_list *bl;
198 	void __user *ret = NULL;
199 
200 	io_ring_submit_lock(req->ctx, issue_flags);
201 
202 	bl = io_buffer_get_list(ctx, buf_group);
203 	if (likely(bl)) {
204 		if (bl->flags & IOBL_BUF_RING)
205 			ret = io_ring_buffer_select(req, len, bl, issue_flags);
206 		else
207 			ret = io_provided_buffer_select(req, len, bl);
208 	}
209 	io_ring_submit_unlock(req->ctx, issue_flags);
210 	return ret;
211 }
212 
213 /* cap it at a reasonable 256, will be one page even for 4K */
214 #define PEEK_MAX_IMPORT		256
215 
216 static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
217 				struct io_buffer_list *bl)
218 {
219 	struct io_uring_buf_ring *br = bl->buf_ring;
220 	struct iovec *iov = arg->iovs;
221 	int nr_iovs = arg->nr_iovs;
222 	__u16 nr_avail, tail, head;
223 	struct io_uring_buf *buf;
224 
225 	tail = smp_load_acquire(&br->tail);
226 	head = bl->head;
227 	nr_avail = min_t(__u16, tail - head, UIO_MAXIOV);
228 	if (unlikely(!nr_avail))
229 		return -ENOBUFS;
230 
231 	buf = io_ring_head_to_buf(br, head, bl->mask);
232 	if (arg->max_len) {
233 		u32 len = READ_ONCE(buf->len);
234 		size_t needed;
235 
236 		if (unlikely(!len))
237 			return -ENOBUFS;
238 		needed = (arg->max_len + len - 1) / len;
239 		needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT);
240 		if (nr_avail > needed)
241 			nr_avail = needed;
242 	}
243 
244 	/*
245 	 * only alloc a bigger array if we know we have data to map, eg not
246 	 * a speculative peek operation.
247 	 */
248 	if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs && arg->max_len) {
249 		iov = kmalloc_array(nr_avail, sizeof(struct iovec), GFP_KERNEL);
250 		if (unlikely(!iov))
251 			return -ENOMEM;
252 		if (arg->mode & KBUF_MODE_FREE)
253 			kfree(arg->iovs);
254 		arg->iovs = iov;
255 		nr_iovs = nr_avail;
256 	} else if (nr_avail < nr_iovs) {
257 		nr_iovs = nr_avail;
258 	}
259 
260 	/* set it to max, if not set, so we can use it unconditionally */
261 	if (!arg->max_len)
262 		arg->max_len = INT_MAX;
263 
264 	req->buf_index = buf->bid;
265 	do {
266 		u32 len = buf->len;
267 
268 		/* truncate end piece, if needed, for non partial buffers */
269 		if (len > arg->max_len) {
270 			len = arg->max_len;
271 			if (!(bl->flags & IOBL_INC))
272 				buf->len = len;
273 		}
274 
275 		iov->iov_base = u64_to_user_ptr(buf->addr);
276 		iov->iov_len = len;
277 		iov++;
278 
279 		arg->out_len += len;
280 		arg->max_len -= len;
281 		if (!arg->max_len)
282 			break;
283 
284 		buf = io_ring_head_to_buf(br, ++head, bl->mask);
285 	} while (--nr_iovs);
286 
287 	if (head == tail)
288 		req->flags |= REQ_F_BL_EMPTY;
289 
290 	req->flags |= REQ_F_BUFFER_RING;
291 	req->buf_list = bl;
292 	return iov - arg->iovs;
293 }
294 
295 int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
296 		      unsigned int issue_flags)
297 {
298 	struct io_ring_ctx *ctx = req->ctx;
299 	struct io_buffer_list *bl;
300 	int ret = -ENOENT;
301 
302 	io_ring_submit_lock(ctx, issue_flags);
303 	bl = io_buffer_get_list(ctx, arg->buf_group);
304 	if (unlikely(!bl))
305 		goto out_unlock;
306 
307 	if (bl->flags & IOBL_BUF_RING) {
308 		ret = io_ring_buffers_peek(req, arg, bl);
309 		/*
310 		 * Don't recycle these buffers if we need to go through poll.
311 		 * Nobody else can use them anyway, and holding on to provided
312 		 * buffers for a send/write operation would happen on the app
313 		 * side anyway with normal buffers. Besides, we already
314 		 * committed them, they cannot be put back in the queue.
315 		 */
316 		if (ret > 0) {
317 			req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE;
318 			io_kbuf_commit(req, bl, arg->out_len, ret);
319 		}
320 	} else {
321 		ret = io_provided_buffers_select(req, &arg->out_len, bl, arg->iovs);
322 	}
323 out_unlock:
324 	io_ring_submit_unlock(ctx, issue_flags);
325 	return ret;
326 }
327 
328 int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg)
329 {
330 	struct io_ring_ctx *ctx = req->ctx;
331 	struct io_buffer_list *bl;
332 	int ret;
333 
334 	lockdep_assert_held(&ctx->uring_lock);
335 
336 	bl = io_buffer_get_list(ctx, arg->buf_group);
337 	if (unlikely(!bl))
338 		return -ENOENT;
339 
340 	if (bl->flags & IOBL_BUF_RING) {
341 		ret = io_ring_buffers_peek(req, arg, bl);
342 		if (ret > 0)
343 			req->flags |= REQ_F_BUFFERS_COMMIT;
344 		return ret;
345 	}
346 
347 	/* don't support multiple buffer selections for legacy */
348 	return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
349 }
350 
351 static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
352 {
353 	struct io_buffer_list *bl = req->buf_list;
354 	bool ret = true;
355 
356 	if (bl)
357 		ret = io_kbuf_commit(req, bl, len, nr);
358 
359 	req->flags &= ~REQ_F_BUFFER_RING;
360 	return ret;
361 }
362 
363 unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs)
364 {
365 	unsigned int ret;
366 
367 	ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
368 
369 	if (unlikely(!(req->flags & REQ_F_BUFFER_RING))) {
370 		io_kbuf_drop_legacy(req);
371 		return ret;
372 	}
373 
374 	if (!__io_put_kbuf_ring(req, len, nbufs))
375 		ret |= IORING_CQE_F_BUF_MORE;
376 	return ret;
377 }
378 
379 static int io_remove_buffers_legacy(struct io_ring_ctx *ctx,
380 				    struct io_buffer_list *bl,
381 				    unsigned long nbufs)
382 {
383 	unsigned long i = 0;
384 	struct io_buffer *nxt;
385 
386 	/* protects io_buffers_cache */
387 	lockdep_assert_held(&ctx->uring_lock);
388 	WARN_ON_ONCE(bl->flags & IOBL_BUF_RING);
389 
390 	for (i = 0; i < nbufs && !list_empty(&bl->buf_list); i++) {
391 		nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
392 		list_del(&nxt->list);
393 		kfree(nxt);
394 		cond_resched();
395 	}
396 	return i;
397 }
398 
399 static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
400 {
401 	if (bl->flags & IOBL_BUF_RING)
402 		io_free_region(ctx, &bl->region);
403 	else
404 		io_remove_buffers_legacy(ctx, bl, -1U);
405 
406 	kfree(bl);
407 }
408 
409 void io_destroy_buffers(struct io_ring_ctx *ctx)
410 {
411 	struct io_buffer_list *bl;
412 
413 	while (1) {
414 		unsigned long index = 0;
415 
416 		scoped_guard(mutex, &ctx->mmap_lock) {
417 			bl = xa_find(&ctx->io_bl_xa, &index, ULONG_MAX, XA_PRESENT);
418 			if (bl)
419 				xa_erase(&ctx->io_bl_xa, bl->bgid);
420 		}
421 		if (!bl)
422 			break;
423 		io_put_bl(ctx, bl);
424 	}
425 }
426 
427 static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
428 {
429 	scoped_guard(mutex, &ctx->mmap_lock)
430 		WARN_ON_ONCE(xa_erase(&ctx->io_bl_xa, bl->bgid) != bl);
431 	io_put_bl(ctx, bl);
432 }
433 
434 int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
435 {
436 	struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
437 	u64 tmp;
438 
439 	if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
440 	    sqe->splice_fd_in)
441 		return -EINVAL;
442 
443 	tmp = READ_ONCE(sqe->fd);
444 	if (!tmp || tmp > MAX_BIDS_PER_BGID)
445 		return -EINVAL;
446 
447 	memset(p, 0, sizeof(*p));
448 	p->nbufs = tmp;
449 	p->bgid = READ_ONCE(sqe->buf_group);
450 	return 0;
451 }
452 
453 int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
454 {
455 	unsigned long size, tmp_check;
456 	struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
457 	u64 tmp;
458 
459 	if (sqe->rw_flags || sqe->splice_fd_in)
460 		return -EINVAL;
461 
462 	tmp = READ_ONCE(sqe->fd);
463 	if (!tmp || tmp > MAX_BIDS_PER_BGID)
464 		return -E2BIG;
465 	p->nbufs = tmp;
466 	p->addr = READ_ONCE(sqe->addr);
467 	p->len = READ_ONCE(sqe->len);
468 	if (!p->len)
469 		return -EINVAL;
470 
471 	if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs,
472 				&size))
473 		return -EOVERFLOW;
474 	if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
475 		return -EOVERFLOW;
476 	if (!access_ok(u64_to_user_ptr(p->addr), size))
477 		return -EFAULT;
478 
479 	p->bgid = READ_ONCE(sqe->buf_group);
480 	tmp = READ_ONCE(sqe->off);
481 	if (tmp > USHRT_MAX)
482 		return -E2BIG;
483 	if (tmp + p->nbufs > MAX_BIDS_PER_BGID)
484 		return -EINVAL;
485 	p->bid = tmp;
486 	return 0;
487 }
488 
489 static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
490 			  struct io_buffer_list *bl)
491 {
492 	struct io_buffer *buf;
493 	u64 addr = pbuf->addr;
494 	int i, bid = pbuf->bid;
495 
496 	for (i = 0; i < pbuf->nbufs; i++) {
497 		buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
498 		if (!buf)
499 			break;
500 
501 		list_add_tail(&buf->list, &bl->buf_list);
502 		buf->addr = addr;
503 		buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
504 		buf->bid = bid;
505 		buf->bgid = pbuf->bgid;
506 		addr += pbuf->len;
507 		bid++;
508 		cond_resched();
509 	}
510 
511 	return i ? 0 : -ENOMEM;
512 }
513 
514 static int __io_manage_buffers_legacy(struct io_kiocb *req,
515 					struct io_buffer_list *bl)
516 {
517 	struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
518 	int ret;
519 
520 	if (!bl) {
521 		if (req->opcode != IORING_OP_PROVIDE_BUFFERS)
522 			return -ENOENT;
523 		bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
524 		if (!bl)
525 			return -ENOMEM;
526 
527 		INIT_LIST_HEAD(&bl->buf_list);
528 		ret = io_buffer_add_list(req->ctx, bl, p->bgid);
529 		if (ret) {
530 			kfree(bl);
531 			return ret;
532 		}
533 	}
534 	/* can't use provide/remove buffers command on mapped buffers */
535 	if (bl->flags & IOBL_BUF_RING)
536 		return -EINVAL;
537 	if (req->opcode == IORING_OP_PROVIDE_BUFFERS)
538 		return io_add_buffers(req->ctx, p, bl);
539 	return io_remove_buffers_legacy(req->ctx, bl, p->nbufs);
540 }
541 
542 int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags)
543 {
544 	struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
545 	struct io_ring_ctx *ctx = req->ctx;
546 	struct io_buffer_list *bl;
547 	int ret;
548 
549 	io_ring_submit_lock(ctx, issue_flags);
550 	bl = io_buffer_get_list(ctx, p->bgid);
551 	ret = __io_manage_buffers_legacy(req, bl);
552 	io_ring_submit_unlock(ctx, issue_flags);
553 
554 	if (ret < 0)
555 		req_set_fail(req);
556 	io_req_set_res(req, ret, 0);
557 	return IOU_COMPLETE;
558 }
559 
560 int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
561 {
562 	struct io_uring_buf_reg reg;
563 	struct io_buffer_list *bl;
564 	struct io_uring_region_desc rd;
565 	struct io_uring_buf_ring *br;
566 	unsigned long mmap_offset;
567 	unsigned long ring_size;
568 	int ret;
569 
570 	lockdep_assert_held(&ctx->uring_lock);
571 
572 	if (copy_from_user(&reg, arg, sizeof(reg)))
573 		return -EFAULT;
574 	if (!mem_is_zero(reg.resv, sizeof(reg.resv)))
575 		return -EINVAL;
576 	if (reg.flags & ~(IOU_PBUF_RING_MMAP | IOU_PBUF_RING_INC))
577 		return -EINVAL;
578 	if (!is_power_of_2(reg.ring_entries))
579 		return -EINVAL;
580 	/* cannot disambiguate full vs empty due to head/tail size */
581 	if (reg.ring_entries >= 65536)
582 		return -EINVAL;
583 
584 	bl = io_buffer_get_list(ctx, reg.bgid);
585 	if (bl) {
586 		/* if mapped buffer ring OR classic exists, don't allow */
587 		if (bl->flags & IOBL_BUF_RING || !list_empty(&bl->buf_list))
588 			return -EEXIST;
589 		io_destroy_bl(ctx, bl);
590 	}
591 
592 	bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
593 	if (!bl)
594 		return -ENOMEM;
595 
596 	mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT;
597 	ring_size = flex_array_size(br, bufs, reg.ring_entries);
598 
599 	memset(&rd, 0, sizeof(rd));
600 	rd.size = PAGE_ALIGN(ring_size);
601 	if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
602 		rd.user_addr = reg.ring_addr;
603 		rd.flags |= IORING_MEM_REGION_TYPE_USER;
604 	}
605 	ret = io_create_region_mmap_safe(ctx, &bl->region, &rd, mmap_offset);
606 	if (ret)
607 		goto fail;
608 	br = io_region_get_ptr(&bl->region);
609 
610 #ifdef SHM_COLOUR
611 	/*
612 	 * On platforms that have specific aliasing requirements, SHM_COLOUR
613 	 * is set and we must guarantee that the kernel and user side align
614 	 * nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and
615 	 * the application mmap's the provided ring buffer. Fail the request
616 	 * if we, by chance, don't end up with aligned addresses. The app
617 	 * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
618 	 * this transparently.
619 	 */
620 	if (!(reg.flags & IOU_PBUF_RING_MMAP) &&
621 	    ((reg.ring_addr | (unsigned long)br) & (SHM_COLOUR - 1))) {
622 		ret = -EINVAL;
623 		goto fail;
624 	}
625 #endif
626 
627 	bl->nr_entries = reg.ring_entries;
628 	bl->mask = reg.ring_entries - 1;
629 	bl->flags |= IOBL_BUF_RING;
630 	bl->buf_ring = br;
631 	if (reg.flags & IOU_PBUF_RING_INC)
632 		bl->flags |= IOBL_INC;
633 	io_buffer_add_list(ctx, bl, reg.bgid);
634 	return 0;
635 fail:
636 	io_free_region(ctx, &bl->region);
637 	kfree(bl);
638 	return ret;
639 }
640 
641 int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
642 {
643 	struct io_uring_buf_reg reg;
644 	struct io_buffer_list *bl;
645 
646 	lockdep_assert_held(&ctx->uring_lock);
647 
648 	if (copy_from_user(&reg, arg, sizeof(reg)))
649 		return -EFAULT;
650 	if (!mem_is_zero(reg.resv, sizeof(reg.resv)) || reg.flags)
651 		return -EINVAL;
652 
653 	bl = io_buffer_get_list(ctx, reg.bgid);
654 	if (!bl)
655 		return -ENOENT;
656 	if (!(bl->flags & IOBL_BUF_RING))
657 		return -EINVAL;
658 
659 	scoped_guard(mutex, &ctx->mmap_lock)
660 		xa_erase(&ctx->io_bl_xa, bl->bgid);
661 
662 	io_put_bl(ctx, bl);
663 	return 0;
664 }
665 
666 int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
667 {
668 	struct io_uring_buf_status buf_status;
669 	struct io_buffer_list *bl;
670 
671 	if (copy_from_user(&buf_status, arg, sizeof(buf_status)))
672 		return -EFAULT;
673 	if (!mem_is_zero(buf_status.resv, sizeof(buf_status.resv)))
674 		return -EINVAL;
675 
676 	bl = io_buffer_get_list(ctx, buf_status.buf_group);
677 	if (!bl)
678 		return -ENOENT;
679 	if (!(bl->flags & IOBL_BUF_RING))
680 		return -EINVAL;
681 
682 	buf_status.head = bl->head;
683 	if (copy_to_user(arg, &buf_status, sizeof(buf_status)))
684 		return -EFAULT;
685 
686 	return 0;
687 }
688 
689 struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx,
690 					    unsigned int bgid)
691 {
692 	struct io_buffer_list *bl;
693 
694 	lockdep_assert_held(&ctx->mmap_lock);
695 
696 	bl = xa_load(&ctx->io_bl_xa, bgid);
697 	if (!bl || !(bl->flags & IOBL_BUF_RING))
698 		return NULL;
699 	return &bl->region;
700 }
701