1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/fs.h>
5 #include <linux/file.h>
6 #include <linux/mm.h>
7 #include <linux/slab.h>
8 #include <linux/namei.h>
9 #include <linux/poll.h>
10 #include <linux/vmalloc.h>
11 #include <linux/io_uring.h>
12
13 #include <uapi/linux/io_uring.h>
14
15 #include "io_uring.h"
16 #include "opdef.h"
17 #include "kbuf.h"
18 #include "memmap.h"
19
20 /* BIDs are addressed by a 16-bit field in a CQE */
21 #define MAX_BIDS_PER_BGID (1 << 16)
22
23 /* Mapped buffer ring, return io_uring_buf from head */
24 #define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)]
25
26 struct io_provide_buf {
27 struct file *file;
28 __u64 addr;
29 __u32 len;
30 __u32 bgid;
31 __u32 nbufs;
32 __u16 bid;
33 };
34
io_kbuf_inc_commit(struct io_buffer_list * bl,int len)35 static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
36 {
37 while (len) {
38 struct io_uring_buf *buf;
39 u32 buf_len, this_len;
40
41 buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
42 buf_len = READ_ONCE(buf->len);
43 this_len = min_t(u32, len, buf_len);
44 buf_len -= this_len;
45 /* Stop looping for invalid buffer length of 0 */
46 if (buf_len || !this_len) {
47 buf->addr += this_len;
48 buf->len = buf_len;
49 return false;
50 }
51 buf->len = 0;
52 bl->head++;
53 len -= this_len;
54 }
55 return true;
56 }
57
io_kbuf_commit(struct io_kiocb * req,struct io_buffer_list * bl,int len,int nr)58 bool io_kbuf_commit(struct io_kiocb *req,
59 struct io_buffer_list *bl, int len, int nr)
60 {
61 if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT)))
62 return true;
63
64 req->flags &= ~REQ_F_BUFFERS_COMMIT;
65
66 if (unlikely(len < 0))
67 return true;
68 if (bl->flags & IOBL_INC)
69 return io_kbuf_inc_commit(bl, len);
70 bl->head += nr;
71 return true;
72 }
73
io_buffer_get_list(struct io_ring_ctx * ctx,unsigned int bgid)74 static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
75 unsigned int bgid)
76 {
77 lockdep_assert_held(&ctx->uring_lock);
78
79 return xa_load(&ctx->io_bl_xa, bgid);
80 }
81
io_buffer_add_list(struct io_ring_ctx * ctx,struct io_buffer_list * bl,unsigned int bgid)82 static int io_buffer_add_list(struct io_ring_ctx *ctx,
83 struct io_buffer_list *bl, unsigned int bgid)
84 {
85 /*
86 * Store buffer group ID and finally mark the list as visible.
87 * The normal lookup doesn't care about the visibility as we're
88 * always under the ->uring_lock, but lookups from mmap do.
89 */
90 bl->bgid = bgid;
91 guard(mutex)(&ctx->mmap_lock);
92 return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
93 }
94
io_kbuf_drop_legacy(struct io_kiocb * req)95 void io_kbuf_drop_legacy(struct io_kiocb *req)
96 {
97 if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED)))
98 return;
99 req->flags &= ~REQ_F_BUFFER_SELECTED;
100 kfree(req->kbuf);
101 req->kbuf = NULL;
102 }
103
io_kbuf_recycle_legacy(struct io_kiocb * req,unsigned issue_flags)104 bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
105 {
106 struct io_ring_ctx *ctx = req->ctx;
107 struct io_buffer_list *bl;
108 struct io_buffer *buf;
109
110 io_ring_submit_lock(ctx, issue_flags);
111
112 buf = req->kbuf;
113 bl = io_buffer_get_list(ctx, buf->bgid);
114 list_add(&buf->list, &bl->buf_list);
115 bl->nbufs++;
116 req->flags &= ~REQ_F_BUFFER_SELECTED;
117
118 io_ring_submit_unlock(ctx, issue_flags);
119 return true;
120 }
121
io_provided_buffer_select(struct io_kiocb * req,size_t * len,struct io_buffer_list * bl)122 static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
123 struct io_buffer_list *bl)
124 {
125 if (!list_empty(&bl->buf_list)) {
126 struct io_buffer *kbuf;
127
128 kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
129 list_del(&kbuf->list);
130 bl->nbufs--;
131 if (*len == 0 || *len > kbuf->len)
132 *len = kbuf->len;
133 if (list_empty(&bl->buf_list))
134 req->flags |= REQ_F_BL_EMPTY;
135 req->flags |= REQ_F_BUFFER_SELECTED;
136 req->kbuf = kbuf;
137 req->buf_index = kbuf->bid;
138 return u64_to_user_ptr(kbuf->addr);
139 }
140 return NULL;
141 }
142
io_provided_buffers_select(struct io_kiocb * req,size_t * len,struct io_buffer_list * bl,struct iovec * iov)143 static int io_provided_buffers_select(struct io_kiocb *req, size_t *len,
144 struct io_buffer_list *bl,
145 struct iovec *iov)
146 {
147 void __user *buf;
148
149 buf = io_provided_buffer_select(req, len, bl);
150 if (unlikely(!buf))
151 return -ENOBUFS;
152
153 iov[0].iov_base = buf;
154 iov[0].iov_len = *len;
155 return 1;
156 }
157
io_should_commit(struct io_kiocb * req,unsigned int issue_flags)158 static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags)
159 {
160 /*
161 * If we came in unlocked, we have no choice but to consume the
162 * buffer here, otherwise nothing ensures that the buffer won't
163 * get used by others. This does mean it'll be pinned until the
164 * IO completes, coming in unlocked means we're being called from
165 * io-wq context and there may be further retries in async hybrid
166 * mode. For the locked case, the caller must call commit when
167 * the transfer completes (or if we get -EAGAIN and must poll of
168 * retry).
169 */
170 if (issue_flags & IO_URING_F_UNLOCKED)
171 return true;
172
173 /* uring_cmd commits kbuf upfront, no need to auto-commit */
174 if (!io_file_can_poll(req) && req->opcode != IORING_OP_URING_CMD)
175 return true;
176 return false;
177 }
178
io_ring_buffer_select(struct io_kiocb * req,size_t * len,struct io_buffer_list * bl,unsigned int issue_flags)179 static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
180 struct io_buffer_list *bl,
181 unsigned int issue_flags)
182 {
183 struct io_uring_buf_ring *br = bl->buf_ring;
184 __u16 tail, head = bl->head;
185 struct io_br_sel sel = { };
186 struct io_uring_buf *buf;
187 u32 buf_len;
188
189 tail = smp_load_acquire(&br->tail);
190 if (unlikely(tail == head))
191 return sel;
192
193 if (head + 1 == tail)
194 req->flags |= REQ_F_BL_EMPTY;
195
196 buf = io_ring_head_to_buf(br, head, bl->mask);
197 buf_len = READ_ONCE(buf->len);
198 if (*len == 0 || *len > buf_len)
199 *len = buf_len;
200 req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
201 req->buf_index = buf->bid;
202 sel.buf_list = bl;
203 sel.addr = u64_to_user_ptr(buf->addr);
204
205 if (io_should_commit(req, issue_flags)) {
206 io_kbuf_commit(req, sel.buf_list, *len, 1);
207 sel.buf_list = NULL;
208 }
209 return sel;
210 }
211
io_buffer_select(struct io_kiocb * req,size_t * len,unsigned buf_group,unsigned int issue_flags)212 struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len,
213 unsigned buf_group, unsigned int issue_flags)
214 {
215 struct io_ring_ctx *ctx = req->ctx;
216 struct io_br_sel sel = { };
217 struct io_buffer_list *bl;
218
219 io_ring_submit_lock(req->ctx, issue_flags);
220
221 bl = io_buffer_get_list(ctx, buf_group);
222 if (likely(bl)) {
223 if (bl->flags & IOBL_BUF_RING)
224 sel = io_ring_buffer_select(req, len, bl, issue_flags);
225 else
226 sel.addr = io_provided_buffer_select(req, len, bl);
227 }
228 io_ring_submit_unlock(req->ctx, issue_flags);
229 return sel;
230 }
231
232 /* cap it at a reasonable 256, will be one page even for 4K */
233 #define PEEK_MAX_IMPORT 256
234
io_ring_buffers_peek(struct io_kiocb * req,struct buf_sel_arg * arg,struct io_buffer_list * bl)235 static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
236 struct io_buffer_list *bl)
237 {
238 struct io_uring_buf_ring *br = bl->buf_ring;
239 struct iovec *iov = arg->iovs;
240 int nr_iovs = arg->nr_iovs;
241 __u16 nr_avail, tail, head;
242 struct io_uring_buf *buf;
243
244 tail = smp_load_acquire(&br->tail);
245 head = bl->head;
246 nr_avail = min_t(__u16, tail - head, UIO_MAXIOV);
247 if (unlikely(!nr_avail))
248 return -ENOBUFS;
249
250 buf = io_ring_head_to_buf(br, head, bl->mask);
251 if (arg->max_len) {
252 u32 len = READ_ONCE(buf->len);
253 size_t needed;
254
255 if (unlikely(!len))
256 return -ENOBUFS;
257 needed = (arg->max_len + len - 1) / len;
258 needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT);
259 if (nr_avail > needed)
260 nr_avail = needed;
261 }
262
263 /*
264 * only alloc a bigger array if we know we have data to map, eg not
265 * a speculative peek operation.
266 */
267 if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs && arg->max_len) {
268 iov = kmalloc_array(nr_avail, sizeof(struct iovec), GFP_KERNEL);
269 if (unlikely(!iov))
270 return -ENOMEM;
271 if (arg->mode & KBUF_MODE_FREE)
272 kfree(arg->iovs);
273 arg->iovs = iov;
274 nr_iovs = nr_avail;
275 } else if (nr_avail < nr_iovs) {
276 nr_iovs = nr_avail;
277 }
278
279 /* set it to max, if not set, so we can use it unconditionally */
280 if (!arg->max_len)
281 arg->max_len = INT_MAX;
282
283 req->buf_index = buf->bid;
284 do {
285 u32 len = READ_ONCE(buf->len);
286
287 /* truncate end piece, if needed, for non partial buffers */
288 if (len > arg->max_len) {
289 len = arg->max_len;
290 if (!(bl->flags & IOBL_INC)) {
291 arg->partial_map = 1;
292 if (iov != arg->iovs)
293 break;
294 buf->len = len;
295 }
296 }
297
298 iov->iov_base = u64_to_user_ptr(buf->addr);
299 iov->iov_len = len;
300 iov++;
301
302 arg->out_len += len;
303 arg->max_len -= len;
304 if (!arg->max_len)
305 break;
306
307 buf = io_ring_head_to_buf(br, ++head, bl->mask);
308 } while (--nr_iovs);
309
310 if (head == tail)
311 req->flags |= REQ_F_BL_EMPTY;
312
313 req->flags |= REQ_F_BUFFER_RING;
314 return iov - arg->iovs;
315 }
316
io_buffers_select(struct io_kiocb * req,struct buf_sel_arg * arg,struct io_br_sel * sel,unsigned int issue_flags)317 int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
318 struct io_br_sel *sel, unsigned int issue_flags)
319 {
320 struct io_ring_ctx *ctx = req->ctx;
321 int ret = -ENOENT;
322
323 io_ring_submit_lock(ctx, issue_flags);
324 sel->buf_list = io_buffer_get_list(ctx, arg->buf_group);
325 if (unlikely(!sel->buf_list))
326 goto out_unlock;
327
328 if (sel->buf_list->flags & IOBL_BUF_RING) {
329 ret = io_ring_buffers_peek(req, arg, sel->buf_list);
330 /*
331 * Don't recycle these buffers if we need to go through poll.
332 * Nobody else can use them anyway, and holding on to provided
333 * buffers for a send/write operation would happen on the app
334 * side anyway with normal buffers. Besides, we already
335 * committed them, they cannot be put back in the queue.
336 */
337 if (ret > 0) {
338 req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE;
339 io_kbuf_commit(req, sel->buf_list, arg->out_len, ret);
340 }
341 } else {
342 ret = io_provided_buffers_select(req, &arg->out_len, sel->buf_list, arg->iovs);
343 }
344 out_unlock:
345 if (issue_flags & IO_URING_F_UNLOCKED) {
346 sel->buf_list = NULL;
347 mutex_unlock(&ctx->uring_lock);
348 }
349 return ret;
350 }
351
io_buffers_peek(struct io_kiocb * req,struct buf_sel_arg * arg,struct io_br_sel * sel)352 int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
353 struct io_br_sel *sel)
354 {
355 struct io_ring_ctx *ctx = req->ctx;
356 struct io_buffer_list *bl;
357 int ret;
358
359 lockdep_assert_held(&ctx->uring_lock);
360
361 bl = io_buffer_get_list(ctx, arg->buf_group);
362 if (unlikely(!bl))
363 return -ENOENT;
364
365 if (bl->flags & IOBL_BUF_RING) {
366 ret = io_ring_buffers_peek(req, arg, bl);
367 if (ret > 0)
368 req->flags |= REQ_F_BUFFERS_COMMIT;
369 sel->buf_list = bl;
370 return ret;
371 }
372
373 /* don't support multiple buffer selections for legacy */
374 sel->buf_list = NULL;
375 return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
376 }
377
__io_put_kbuf_ring(struct io_kiocb * req,struct io_buffer_list * bl,int len,int nr)378 static inline bool __io_put_kbuf_ring(struct io_kiocb *req,
379 struct io_buffer_list *bl, int len, int nr)
380 {
381 bool ret = true;
382
383 if (bl)
384 ret = io_kbuf_commit(req, bl, len, nr);
385
386 req->flags &= ~REQ_F_BUFFER_RING;
387 return ret;
388 }
389
__io_put_kbufs(struct io_kiocb * req,struct io_buffer_list * bl,int len,int nbufs)390 unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl,
391 int len, int nbufs)
392 {
393 unsigned int ret;
394
395 ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
396
397 if (unlikely(!(req->flags & REQ_F_BUFFER_RING))) {
398 io_kbuf_drop_legacy(req);
399 return ret;
400 }
401
402 if (!__io_put_kbuf_ring(req, bl, len, nbufs))
403 ret |= IORING_CQE_F_BUF_MORE;
404 return ret;
405 }
406
io_remove_buffers_legacy(struct io_ring_ctx * ctx,struct io_buffer_list * bl,unsigned long nbufs)407 static int io_remove_buffers_legacy(struct io_ring_ctx *ctx,
408 struct io_buffer_list *bl,
409 unsigned long nbufs)
410 {
411 unsigned long i = 0;
412 struct io_buffer *nxt;
413
414 /* protects io_buffers_cache */
415 lockdep_assert_held(&ctx->uring_lock);
416 WARN_ON_ONCE(bl->flags & IOBL_BUF_RING);
417
418 for (i = 0; i < nbufs && !list_empty(&bl->buf_list); i++) {
419 nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
420 list_del(&nxt->list);
421 bl->nbufs--;
422 kfree(nxt);
423 cond_resched();
424 }
425 return i;
426 }
427
io_put_bl(struct io_ring_ctx * ctx,struct io_buffer_list * bl)428 static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
429 {
430 if (bl->flags & IOBL_BUF_RING)
431 io_free_region(ctx, &bl->region);
432 else
433 io_remove_buffers_legacy(ctx, bl, -1U);
434
435 kfree(bl);
436 }
437
io_destroy_buffers(struct io_ring_ctx * ctx)438 void io_destroy_buffers(struct io_ring_ctx *ctx)
439 {
440 struct io_buffer_list *bl;
441
442 while (1) {
443 unsigned long index = 0;
444
445 scoped_guard(mutex, &ctx->mmap_lock) {
446 bl = xa_find(&ctx->io_bl_xa, &index, ULONG_MAX, XA_PRESENT);
447 if (bl)
448 xa_erase(&ctx->io_bl_xa, bl->bgid);
449 }
450 if (!bl)
451 break;
452 io_put_bl(ctx, bl);
453 }
454 }
455
io_destroy_bl(struct io_ring_ctx * ctx,struct io_buffer_list * bl)456 static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
457 {
458 scoped_guard(mutex, &ctx->mmap_lock)
459 WARN_ON_ONCE(xa_erase(&ctx->io_bl_xa, bl->bgid) != bl);
460 io_put_bl(ctx, bl);
461 }
462
io_remove_buffers_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)463 int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
464 {
465 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
466 u64 tmp;
467
468 if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
469 sqe->splice_fd_in)
470 return -EINVAL;
471
472 tmp = READ_ONCE(sqe->fd);
473 if (!tmp || tmp > MAX_BIDS_PER_BGID)
474 return -EINVAL;
475
476 memset(p, 0, sizeof(*p));
477 p->nbufs = tmp;
478 p->bgid = READ_ONCE(sqe->buf_group);
479 return 0;
480 }
481
io_provide_buffers_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)482 int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
483 {
484 unsigned long size, tmp_check;
485 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
486 u64 tmp;
487
488 if (sqe->rw_flags || sqe->splice_fd_in)
489 return -EINVAL;
490
491 tmp = READ_ONCE(sqe->fd);
492 if (!tmp || tmp > MAX_BIDS_PER_BGID)
493 return -E2BIG;
494 p->nbufs = tmp;
495 p->addr = READ_ONCE(sqe->addr);
496 p->len = READ_ONCE(sqe->len);
497 if (!p->len)
498 return -EINVAL;
499
500 if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs,
501 &size))
502 return -EOVERFLOW;
503 if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
504 return -EOVERFLOW;
505 if (!access_ok(u64_to_user_ptr(p->addr), size))
506 return -EFAULT;
507
508 p->bgid = READ_ONCE(sqe->buf_group);
509 tmp = READ_ONCE(sqe->off);
510 if (tmp > USHRT_MAX)
511 return -E2BIG;
512 if (tmp + p->nbufs > MAX_BIDS_PER_BGID)
513 return -EINVAL;
514 p->bid = tmp;
515 return 0;
516 }
517
io_add_buffers(struct io_ring_ctx * ctx,struct io_provide_buf * pbuf,struct io_buffer_list * bl)518 static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
519 struct io_buffer_list *bl)
520 {
521 struct io_buffer *buf;
522 u64 addr = pbuf->addr;
523 int ret = -ENOMEM, i, bid = pbuf->bid;
524
525 for (i = 0; i < pbuf->nbufs; i++) {
526 /*
527 * Nonsensical to have more than sizeof(bid) buffers in a
528 * buffer list, as the application then has no way of knowing
529 * which duplicate bid refers to what buffer.
530 */
531 if (bl->nbufs == USHRT_MAX) {
532 ret = -EOVERFLOW;
533 break;
534 }
535 buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
536 if (!buf)
537 break;
538
539 list_add_tail(&buf->list, &bl->buf_list);
540 bl->nbufs++;
541 buf->addr = addr;
542 buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
543 buf->bid = bid;
544 buf->bgid = pbuf->bgid;
545 addr += pbuf->len;
546 bid++;
547 cond_resched();
548 }
549
550 return i ? 0 : ret;
551 }
552
__io_manage_buffers_legacy(struct io_kiocb * req,struct io_buffer_list * bl)553 static int __io_manage_buffers_legacy(struct io_kiocb *req,
554 struct io_buffer_list *bl)
555 {
556 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
557 int ret;
558
559 if (!bl) {
560 if (req->opcode != IORING_OP_PROVIDE_BUFFERS)
561 return -ENOENT;
562 bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
563 if (!bl)
564 return -ENOMEM;
565
566 INIT_LIST_HEAD(&bl->buf_list);
567 ret = io_buffer_add_list(req->ctx, bl, p->bgid);
568 if (ret) {
569 kfree(bl);
570 return ret;
571 }
572 }
573 /* can't use provide/remove buffers command on mapped buffers */
574 if (bl->flags & IOBL_BUF_RING)
575 return -EINVAL;
576 if (req->opcode == IORING_OP_PROVIDE_BUFFERS)
577 return io_add_buffers(req->ctx, p, bl);
578 return io_remove_buffers_legacy(req->ctx, bl, p->nbufs);
579 }
580
io_manage_buffers_legacy(struct io_kiocb * req,unsigned int issue_flags)581 int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags)
582 {
583 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
584 struct io_ring_ctx *ctx = req->ctx;
585 struct io_buffer_list *bl;
586 int ret;
587
588 io_ring_submit_lock(ctx, issue_flags);
589 bl = io_buffer_get_list(ctx, p->bgid);
590 ret = __io_manage_buffers_legacy(req, bl);
591 io_ring_submit_unlock(ctx, issue_flags);
592
593 if (ret < 0)
594 req_set_fail(req);
595 io_req_set_res(req, ret, 0);
596 return IOU_COMPLETE;
597 }
598
io_register_pbuf_ring(struct io_ring_ctx * ctx,void __user * arg)599 int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
600 {
601 struct io_uring_buf_reg reg;
602 struct io_buffer_list *bl;
603 struct io_uring_region_desc rd;
604 struct io_uring_buf_ring *br;
605 unsigned long mmap_offset;
606 unsigned long ring_size;
607 int ret;
608
609 lockdep_assert_held(&ctx->uring_lock);
610
611 if (copy_from_user(®, arg, sizeof(reg)))
612 return -EFAULT;
613 if (!mem_is_zero(reg.resv, sizeof(reg.resv)))
614 return -EINVAL;
615 if (reg.flags & ~(IOU_PBUF_RING_MMAP | IOU_PBUF_RING_INC))
616 return -EINVAL;
617 if (!is_power_of_2(reg.ring_entries))
618 return -EINVAL;
619 /* cannot disambiguate full vs empty due to head/tail size */
620 if (reg.ring_entries >= 65536)
621 return -EINVAL;
622
623 bl = io_buffer_get_list(ctx, reg.bgid);
624 if (bl) {
625 /* if mapped buffer ring OR classic exists, don't allow */
626 if (bl->flags & IOBL_BUF_RING || !list_empty(&bl->buf_list))
627 return -EEXIST;
628 io_destroy_bl(ctx, bl);
629 }
630
631 bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
632 if (!bl)
633 return -ENOMEM;
634
635 mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT;
636 ring_size = flex_array_size(br, bufs, reg.ring_entries);
637
638 memset(&rd, 0, sizeof(rd));
639 rd.size = PAGE_ALIGN(ring_size);
640 if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
641 rd.user_addr = reg.ring_addr;
642 rd.flags |= IORING_MEM_REGION_TYPE_USER;
643 }
644 ret = io_create_region_mmap_safe(ctx, &bl->region, &rd, mmap_offset);
645 if (ret)
646 goto fail;
647 br = io_region_get_ptr(&bl->region);
648
649 #ifdef SHM_COLOUR
650 /*
651 * On platforms that have specific aliasing requirements, SHM_COLOUR
652 * is set and we must guarantee that the kernel and user side align
653 * nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and
654 * the application mmap's the provided ring buffer. Fail the request
655 * if we, by chance, don't end up with aligned addresses. The app
656 * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
657 * this transparently.
658 */
659 if (!(reg.flags & IOU_PBUF_RING_MMAP) &&
660 ((reg.ring_addr | (unsigned long)br) & (SHM_COLOUR - 1))) {
661 ret = -EINVAL;
662 goto fail;
663 }
664 #endif
665
666 bl->nr_entries = reg.ring_entries;
667 bl->mask = reg.ring_entries - 1;
668 bl->flags |= IOBL_BUF_RING;
669 bl->buf_ring = br;
670 if (reg.flags & IOU_PBUF_RING_INC)
671 bl->flags |= IOBL_INC;
672 io_buffer_add_list(ctx, bl, reg.bgid);
673 return 0;
674 fail:
675 io_free_region(ctx, &bl->region);
676 kfree(bl);
677 return ret;
678 }
679
io_unregister_pbuf_ring(struct io_ring_ctx * ctx,void __user * arg)680 int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
681 {
682 struct io_uring_buf_reg reg;
683 struct io_buffer_list *bl;
684
685 lockdep_assert_held(&ctx->uring_lock);
686
687 if (copy_from_user(®, arg, sizeof(reg)))
688 return -EFAULT;
689 if (!mem_is_zero(reg.resv, sizeof(reg.resv)) || reg.flags)
690 return -EINVAL;
691
692 bl = io_buffer_get_list(ctx, reg.bgid);
693 if (!bl)
694 return -ENOENT;
695 if (!(bl->flags & IOBL_BUF_RING))
696 return -EINVAL;
697
698 scoped_guard(mutex, &ctx->mmap_lock)
699 xa_erase(&ctx->io_bl_xa, bl->bgid);
700
701 io_put_bl(ctx, bl);
702 return 0;
703 }
704
io_register_pbuf_status(struct io_ring_ctx * ctx,void __user * arg)705 int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
706 {
707 struct io_uring_buf_status buf_status;
708 struct io_buffer_list *bl;
709
710 if (copy_from_user(&buf_status, arg, sizeof(buf_status)))
711 return -EFAULT;
712 if (!mem_is_zero(buf_status.resv, sizeof(buf_status.resv)))
713 return -EINVAL;
714
715 bl = io_buffer_get_list(ctx, buf_status.buf_group);
716 if (!bl)
717 return -ENOENT;
718 if (!(bl->flags & IOBL_BUF_RING))
719 return -EINVAL;
720
721 buf_status.head = bl->head;
722 if (copy_to_user(arg, &buf_status, sizeof(buf_status)))
723 return -EFAULT;
724
725 return 0;
726 }
727
io_pbuf_get_region(struct io_ring_ctx * ctx,unsigned int bgid)728 struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx,
729 unsigned int bgid)
730 {
731 struct io_buffer_list *bl;
732
733 lockdep_assert_held(&ctx->mmap_lock);
734
735 bl = xa_load(&ctx->io_bl_xa, bgid);
736 if (!bl || !(bl->flags & IOBL_BUF_RING))
737 return NULL;
738 return &bl->region;
739 }
740