1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/fs.h> 5 #include <linux/file.h> 6 #include <linux/mm.h> 7 #include <linux/slab.h> 8 #include <linux/namei.h> 9 #include <linux/poll.h> 10 #include <linux/io_uring.h> 11 12 #include <uapi/linux/io_uring.h> 13 14 #include "io_uring.h" 15 #include "opdef.h" 16 #include "kbuf.h" 17 18 #define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) 19 20 #define BGID_ARRAY 64 21 22 /* BIDs are addressed by a 16-bit field in a CQE */ 23 #define MAX_BIDS_PER_BGID (1 << 16) 24 25 struct kmem_cache *io_buf_cachep; 26 27 struct io_provide_buf { 28 struct file *file; 29 __u64 addr; 30 __u32 len; 31 __u32 bgid; 32 __u32 nbufs; 33 __u16 bid; 34 }; 35 36 struct io_buf_free { 37 struct hlist_node list; 38 void *mem; 39 size_t size; 40 int inuse; 41 }; 42 43 static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, 44 struct io_buffer_list *bl, 45 unsigned int bgid) 46 { 47 if (bl && bgid < BGID_ARRAY) 48 return &bl[bgid]; 49 50 return xa_load(&ctx->io_bl_xa, bgid); 51 } 52 53 static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, 54 unsigned int bgid) 55 { 56 lockdep_assert_held(&ctx->uring_lock); 57 58 return __io_buffer_get_list(ctx, ctx->io_bl, bgid); 59 } 60 61 static int io_buffer_add_list(struct io_ring_ctx *ctx, 62 struct io_buffer_list *bl, unsigned int bgid) 63 { 64 /* 65 * Store buffer group ID and finally mark the list as visible. 66 * The normal lookup doesn't care about the visibility as we're 67 * always under the ->uring_lock, but the RCU lookup from mmap does. 68 */ 69 bl->bgid = bgid; 70 smp_store_release(&bl->is_ready, 1); 71 72 if (bgid < BGID_ARRAY) 73 return 0; 74 75 return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); 76 } 77 78 bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) 79 { 80 struct io_ring_ctx *ctx = req->ctx; 81 struct io_buffer_list *bl; 82 struct io_buffer *buf; 83 84 io_ring_submit_lock(ctx, issue_flags); 85 86 buf = req->kbuf; 87 bl = io_buffer_get_list(ctx, buf->bgid); 88 list_add(&buf->list, &bl->buf_list); 89 req->flags &= ~REQ_F_BUFFER_SELECTED; 90 req->buf_index = buf->bgid; 91 92 io_ring_submit_unlock(ctx, issue_flags); 93 return true; 94 } 95 96 void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags) 97 { 98 /* 99 * We can add this buffer back to two lists: 100 * 101 * 1) The io_buffers_cache list. This one is protected by the 102 * ctx->uring_lock. If we already hold this lock, add back to this 103 * list as we can grab it from issue as well. 104 * 2) The io_buffers_comp list. This one is protected by the 105 * ctx->completion_lock. 106 * 107 * We migrate buffers from the comp_list to the issue cache list 108 * when we need one. 109 */ 110 if (issue_flags & IO_URING_F_UNLOCKED) { 111 struct io_ring_ctx *ctx = req->ctx; 112 113 spin_lock(&ctx->completion_lock); 114 __io_put_kbuf_list(req, &ctx->io_buffers_comp); 115 spin_unlock(&ctx->completion_lock); 116 } else { 117 lockdep_assert_held(&req->ctx->uring_lock); 118 119 __io_put_kbuf_list(req, &req->ctx->io_buffers_cache); 120 } 121 } 122 123 static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, 124 struct io_buffer_list *bl) 125 { 126 if (!list_empty(&bl->buf_list)) { 127 struct io_buffer *kbuf; 128 129 kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list); 130 list_del(&kbuf->list); 131 if (*len == 0 || *len > kbuf->len) 132 *len = kbuf->len; 133 if (list_empty(&bl->buf_list)) 134 req->flags |= REQ_F_BL_EMPTY; 135 req->flags |= REQ_F_BUFFER_SELECTED; 136 req->kbuf = kbuf; 137 req->buf_index = kbuf->bid; 138 return u64_to_user_ptr(kbuf->addr); 139 } 140 return NULL; 141 } 142 143 static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, 144 struct io_buffer_list *bl, 145 unsigned int issue_flags) 146 { 147 struct io_uring_buf_ring *br = bl->buf_ring; 148 __u16 tail, head = bl->head; 149 struct io_uring_buf *buf; 150 151 tail = smp_load_acquire(&br->tail); 152 if (unlikely(tail == head)) 153 return NULL; 154 155 if (head + 1 == tail) 156 req->flags |= REQ_F_BL_EMPTY; 157 158 head &= bl->mask; 159 /* mmaped buffers are always contig */ 160 if (bl->is_mmap || head < IO_BUFFER_LIST_BUF_PER_PAGE) { 161 buf = &br->bufs[head]; 162 } else { 163 int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1); 164 int index = head / IO_BUFFER_LIST_BUF_PER_PAGE; 165 buf = page_address(bl->buf_pages[index]); 166 buf += off; 167 } 168 if (*len == 0 || *len > buf->len) 169 *len = buf->len; 170 req->flags |= REQ_F_BUFFER_RING; 171 req->buf_list = bl; 172 req->buf_index = buf->bid; 173 174 if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { 175 /* 176 * If we came in unlocked, we have no choice but to consume the 177 * buffer here, otherwise nothing ensures that the buffer won't 178 * get used by others. This does mean it'll be pinned until the 179 * IO completes, coming in unlocked means we're being called from 180 * io-wq context and there may be further retries in async hybrid 181 * mode. For the locked case, the caller must call commit when 182 * the transfer completes (or if we get -EAGAIN and must poll of 183 * retry). 184 */ 185 req->buf_list = NULL; 186 bl->head++; 187 } 188 return u64_to_user_ptr(buf->addr); 189 } 190 191 void __user *io_buffer_select(struct io_kiocb *req, size_t *len, 192 unsigned int issue_flags) 193 { 194 struct io_ring_ctx *ctx = req->ctx; 195 struct io_buffer_list *bl; 196 void __user *ret = NULL; 197 198 io_ring_submit_lock(req->ctx, issue_flags); 199 200 bl = io_buffer_get_list(ctx, req->buf_index); 201 if (likely(bl)) { 202 if (bl->is_buf_ring) 203 ret = io_ring_buffer_select(req, len, bl, issue_flags); 204 else 205 ret = io_provided_buffer_select(req, len, bl); 206 } 207 io_ring_submit_unlock(req->ctx, issue_flags); 208 return ret; 209 } 210 211 static __cold int io_init_bl_list(struct io_ring_ctx *ctx) 212 { 213 struct io_buffer_list *bl; 214 int i; 215 216 bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL); 217 if (!bl) 218 return -ENOMEM; 219 220 for (i = 0; i < BGID_ARRAY; i++) { 221 INIT_LIST_HEAD(&bl[i].buf_list); 222 bl[i].bgid = i; 223 } 224 225 smp_store_release(&ctx->io_bl, bl); 226 return 0; 227 } 228 229 /* 230 * Mark the given mapped range as free for reuse 231 */ 232 static void io_kbuf_mark_free(struct io_ring_ctx *ctx, struct io_buffer_list *bl) 233 { 234 struct io_buf_free *ibf; 235 236 hlist_for_each_entry(ibf, &ctx->io_buf_list, list) { 237 if (bl->buf_ring == ibf->mem) { 238 ibf->inuse = 0; 239 return; 240 } 241 } 242 243 /* can't happen... */ 244 WARN_ON_ONCE(1); 245 } 246 247 static int __io_remove_buffers(struct io_ring_ctx *ctx, 248 struct io_buffer_list *bl, unsigned nbufs) 249 { 250 unsigned i = 0; 251 252 /* shouldn't happen */ 253 if (!nbufs) 254 return 0; 255 256 if (bl->is_buf_ring) { 257 i = bl->buf_ring->tail - bl->head; 258 if (bl->is_mmap) { 259 /* 260 * io_kbuf_list_free() will free the page(s) at 261 * ->release() time. 262 */ 263 io_kbuf_mark_free(ctx, bl); 264 bl->buf_ring = NULL; 265 bl->is_mmap = 0; 266 } else if (bl->buf_nr_pages) { 267 int j; 268 269 for (j = 0; j < bl->buf_nr_pages; j++) 270 unpin_user_page(bl->buf_pages[j]); 271 kvfree(bl->buf_pages); 272 bl->buf_pages = NULL; 273 bl->buf_nr_pages = 0; 274 } 275 /* make sure it's seen as empty */ 276 INIT_LIST_HEAD(&bl->buf_list); 277 bl->is_buf_ring = 0; 278 return i; 279 } 280 281 /* protects io_buffers_cache */ 282 lockdep_assert_held(&ctx->uring_lock); 283 284 while (!list_empty(&bl->buf_list)) { 285 struct io_buffer *nxt; 286 287 nxt = list_first_entry(&bl->buf_list, struct io_buffer, list); 288 list_move(&nxt->list, &ctx->io_buffers_cache); 289 if (++i == nbufs) 290 return i; 291 cond_resched(); 292 } 293 294 return i; 295 } 296 297 void io_destroy_buffers(struct io_ring_ctx *ctx) 298 { 299 struct io_buffer_list *bl; 300 struct list_head *item, *tmp; 301 struct io_buffer *buf; 302 unsigned long index; 303 int i; 304 305 for (i = 0; i < BGID_ARRAY; i++) { 306 if (!ctx->io_bl) 307 break; 308 __io_remove_buffers(ctx, &ctx->io_bl[i], -1U); 309 } 310 311 xa_for_each(&ctx->io_bl_xa, index, bl) { 312 xa_erase(&ctx->io_bl_xa, bl->bgid); 313 __io_remove_buffers(ctx, bl, -1U); 314 kfree_rcu(bl, rcu); 315 } 316 317 /* 318 * Move deferred locked entries to cache before pruning 319 */ 320 spin_lock(&ctx->completion_lock); 321 if (!list_empty(&ctx->io_buffers_comp)) 322 list_splice_init(&ctx->io_buffers_comp, &ctx->io_buffers_cache); 323 spin_unlock(&ctx->completion_lock); 324 325 list_for_each_safe(item, tmp, &ctx->io_buffers_cache) { 326 buf = list_entry(item, struct io_buffer, list); 327 kmem_cache_free(io_buf_cachep, buf); 328 } 329 } 330 331 int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 332 { 333 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); 334 u64 tmp; 335 336 if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off || 337 sqe->splice_fd_in) 338 return -EINVAL; 339 340 tmp = READ_ONCE(sqe->fd); 341 if (!tmp || tmp > MAX_BIDS_PER_BGID) 342 return -EINVAL; 343 344 memset(p, 0, sizeof(*p)); 345 p->nbufs = tmp; 346 p->bgid = READ_ONCE(sqe->buf_group); 347 return 0; 348 } 349 350 int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) 351 { 352 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); 353 struct io_ring_ctx *ctx = req->ctx; 354 struct io_buffer_list *bl; 355 int ret = 0; 356 357 io_ring_submit_lock(ctx, issue_flags); 358 359 ret = -ENOENT; 360 bl = io_buffer_get_list(ctx, p->bgid); 361 if (bl) { 362 ret = -EINVAL; 363 /* can't use provide/remove buffers command on mapped buffers */ 364 if (!bl->is_buf_ring) 365 ret = __io_remove_buffers(ctx, bl, p->nbufs); 366 } 367 io_ring_submit_unlock(ctx, issue_flags); 368 if (ret < 0) 369 req_set_fail(req); 370 io_req_set_res(req, ret, 0); 371 return IOU_OK; 372 } 373 374 int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 375 { 376 unsigned long size, tmp_check; 377 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); 378 u64 tmp; 379 380 if (sqe->rw_flags || sqe->splice_fd_in) 381 return -EINVAL; 382 383 tmp = READ_ONCE(sqe->fd); 384 if (!tmp || tmp > MAX_BIDS_PER_BGID) 385 return -E2BIG; 386 p->nbufs = tmp; 387 p->addr = READ_ONCE(sqe->addr); 388 p->len = READ_ONCE(sqe->len); 389 390 if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs, 391 &size)) 392 return -EOVERFLOW; 393 if (check_add_overflow((unsigned long)p->addr, size, &tmp_check)) 394 return -EOVERFLOW; 395 396 size = (unsigned long)p->len * p->nbufs; 397 if (!access_ok(u64_to_user_ptr(p->addr), size)) 398 return -EFAULT; 399 400 p->bgid = READ_ONCE(sqe->buf_group); 401 tmp = READ_ONCE(sqe->off); 402 if (tmp > USHRT_MAX) 403 return -E2BIG; 404 if (tmp + p->nbufs > MAX_BIDS_PER_BGID) 405 return -EINVAL; 406 p->bid = tmp; 407 return 0; 408 } 409 410 #define IO_BUFFER_ALLOC_BATCH 64 411 412 static int io_refill_buffer_cache(struct io_ring_ctx *ctx) 413 { 414 struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH]; 415 int allocated; 416 417 /* 418 * Completions that don't happen inline (eg not under uring_lock) will 419 * add to ->io_buffers_comp. If we don't have any free buffers, check 420 * the completion list and splice those entries first. 421 */ 422 if (!list_empty_careful(&ctx->io_buffers_comp)) { 423 spin_lock(&ctx->completion_lock); 424 if (!list_empty(&ctx->io_buffers_comp)) { 425 list_splice_init(&ctx->io_buffers_comp, 426 &ctx->io_buffers_cache); 427 spin_unlock(&ctx->completion_lock); 428 return 0; 429 } 430 spin_unlock(&ctx->completion_lock); 431 } 432 433 /* 434 * No free buffers and no completion entries either. Allocate a new 435 * batch of buffer entries and add those to our freelist. 436 */ 437 438 allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT, 439 ARRAY_SIZE(bufs), (void **) bufs); 440 if (unlikely(!allocated)) { 441 /* 442 * Bulk alloc is all-or-nothing. If we fail to get a batch, 443 * retry single alloc to be on the safe side. 444 */ 445 bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL); 446 if (!bufs[0]) 447 return -ENOMEM; 448 allocated = 1; 449 } 450 451 while (allocated) 452 list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache); 453 454 return 0; 455 } 456 457 static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf, 458 struct io_buffer_list *bl) 459 { 460 struct io_buffer *buf; 461 u64 addr = pbuf->addr; 462 int i, bid = pbuf->bid; 463 464 for (i = 0; i < pbuf->nbufs; i++) { 465 if (list_empty(&ctx->io_buffers_cache) && 466 io_refill_buffer_cache(ctx)) 467 break; 468 buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer, 469 list); 470 list_move_tail(&buf->list, &bl->buf_list); 471 buf->addr = addr; 472 buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT); 473 buf->bid = bid; 474 buf->bgid = pbuf->bgid; 475 addr += pbuf->len; 476 bid++; 477 cond_resched(); 478 } 479 480 return i ? 0 : -ENOMEM; 481 } 482 483 int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) 484 { 485 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); 486 struct io_ring_ctx *ctx = req->ctx; 487 struct io_buffer_list *bl; 488 int ret = 0; 489 490 io_ring_submit_lock(ctx, issue_flags); 491 492 if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) { 493 ret = io_init_bl_list(ctx); 494 if (ret) 495 goto err; 496 } 497 498 bl = io_buffer_get_list(ctx, p->bgid); 499 if (unlikely(!bl)) { 500 bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT); 501 if (!bl) { 502 ret = -ENOMEM; 503 goto err; 504 } 505 INIT_LIST_HEAD(&bl->buf_list); 506 ret = io_buffer_add_list(ctx, bl, p->bgid); 507 if (ret) { 508 /* 509 * Doesn't need rcu free as it was never visible, but 510 * let's keep it consistent throughout. Also can't 511 * be a lower indexed array group, as adding one 512 * where lookup failed cannot happen. 513 */ 514 if (p->bgid >= BGID_ARRAY) 515 kfree_rcu(bl, rcu); 516 else 517 WARN_ON_ONCE(1); 518 goto err; 519 } 520 } 521 /* can't add buffers via this command for a mapped buffer ring */ 522 if (bl->is_buf_ring) { 523 ret = -EINVAL; 524 goto err; 525 } 526 527 ret = io_add_buffers(ctx, p, bl); 528 err: 529 io_ring_submit_unlock(ctx, issue_flags); 530 531 if (ret < 0) 532 req_set_fail(req); 533 io_req_set_res(req, ret, 0); 534 return IOU_OK; 535 } 536 537 static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, 538 struct io_buffer_list *bl) 539 { 540 struct io_uring_buf_ring *br; 541 struct page **pages; 542 int i, nr_pages; 543 544 pages = io_pin_pages(reg->ring_addr, 545 flex_array_size(br, bufs, reg->ring_entries), 546 &nr_pages); 547 if (IS_ERR(pages)) 548 return PTR_ERR(pages); 549 550 /* 551 * Apparently some 32-bit boxes (ARM) will return highmem pages, 552 * which then need to be mapped. We could support that, but it'd 553 * complicate the code and slowdown the common cases quite a bit. 554 * So just error out, returning -EINVAL just like we did on kernels 555 * that didn't support mapped buffer rings. 556 */ 557 for (i = 0; i < nr_pages; i++) 558 if (PageHighMem(pages[i])) 559 goto error_unpin; 560 561 br = page_address(pages[0]); 562 #ifdef SHM_COLOUR 563 /* 564 * On platforms that have specific aliasing requirements, SHM_COLOUR 565 * is set and we must guarantee that the kernel and user side align 566 * nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and 567 * the application mmap's the provided ring buffer. Fail the request 568 * if we, by chance, don't end up with aligned addresses. The app 569 * should use IOU_PBUF_RING_MMAP instead, and liburing will handle 570 * this transparently. 571 */ 572 if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) 573 goto error_unpin; 574 #endif 575 bl->buf_pages = pages; 576 bl->buf_nr_pages = nr_pages; 577 bl->buf_ring = br; 578 bl->is_buf_ring = 1; 579 bl->is_mmap = 0; 580 return 0; 581 error_unpin: 582 for (i = 0; i < nr_pages; i++) 583 unpin_user_page(pages[i]); 584 kvfree(pages); 585 return -EINVAL; 586 } 587 588 /* 589 * See if we have a suitable region that we can reuse, rather than allocate 590 * both a new io_buf_free and mem region again. We leave it on the list as 591 * even a reused entry will need freeing at ring release. 592 */ 593 static struct io_buf_free *io_lookup_buf_free_entry(struct io_ring_ctx *ctx, 594 size_t ring_size) 595 { 596 struct io_buf_free *ibf, *best = NULL; 597 size_t best_dist; 598 599 hlist_for_each_entry(ibf, &ctx->io_buf_list, list) { 600 size_t dist; 601 602 if (ibf->inuse || ibf->size < ring_size) 603 continue; 604 dist = ibf->size - ring_size; 605 if (!best || dist < best_dist) { 606 best = ibf; 607 if (!dist) 608 break; 609 best_dist = dist; 610 } 611 } 612 613 return best; 614 } 615 616 static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx, 617 struct io_uring_buf_reg *reg, 618 struct io_buffer_list *bl) 619 { 620 struct io_buf_free *ibf; 621 size_t ring_size; 622 void *ptr; 623 624 ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring); 625 626 /* Reuse existing entry, if we can */ 627 ibf = io_lookup_buf_free_entry(ctx, ring_size); 628 if (!ibf) { 629 ptr = io_mem_alloc(ring_size); 630 if (IS_ERR(ptr)) 631 return PTR_ERR(ptr); 632 633 /* Allocate and store deferred free entry */ 634 ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT); 635 if (!ibf) { 636 io_mem_free(ptr); 637 return -ENOMEM; 638 } 639 ibf->mem = ptr; 640 ibf->size = ring_size; 641 hlist_add_head(&ibf->list, &ctx->io_buf_list); 642 } 643 ibf->inuse = 1; 644 bl->buf_ring = ibf->mem; 645 bl->is_buf_ring = 1; 646 bl->is_mmap = 1; 647 return 0; 648 } 649 650 int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) 651 { 652 struct io_uring_buf_reg reg; 653 struct io_buffer_list *bl, *free_bl = NULL; 654 int ret; 655 656 lockdep_assert_held(&ctx->uring_lock); 657 658 if (copy_from_user(®, arg, sizeof(reg))) 659 return -EFAULT; 660 661 if (reg.resv[0] || reg.resv[1] || reg.resv[2]) 662 return -EINVAL; 663 if (reg.flags & ~IOU_PBUF_RING_MMAP) 664 return -EINVAL; 665 if (!(reg.flags & IOU_PBUF_RING_MMAP)) { 666 if (!reg.ring_addr) 667 return -EFAULT; 668 if (reg.ring_addr & ~PAGE_MASK) 669 return -EINVAL; 670 } else { 671 if (reg.ring_addr) 672 return -EINVAL; 673 } 674 675 if (!is_power_of_2(reg.ring_entries)) 676 return -EINVAL; 677 678 /* cannot disambiguate full vs empty due to head/tail size */ 679 if (reg.ring_entries >= 65536) 680 return -EINVAL; 681 682 if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { 683 int ret = io_init_bl_list(ctx); 684 if (ret) 685 return ret; 686 } 687 688 bl = io_buffer_get_list(ctx, reg.bgid); 689 if (bl) { 690 /* if mapped buffer ring OR classic exists, don't allow */ 691 if (bl->is_buf_ring || !list_empty(&bl->buf_list)) 692 return -EEXIST; 693 } else { 694 free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); 695 if (!bl) 696 return -ENOMEM; 697 } 698 699 if (!(reg.flags & IOU_PBUF_RING_MMAP)) 700 ret = io_pin_pbuf_ring(®, bl); 701 else 702 ret = io_alloc_pbuf_ring(ctx, ®, bl); 703 704 if (!ret) { 705 bl->nr_entries = reg.ring_entries; 706 bl->mask = reg.ring_entries - 1; 707 708 io_buffer_add_list(ctx, bl, reg.bgid); 709 return 0; 710 } 711 712 kfree_rcu(free_bl, rcu); 713 return ret; 714 } 715 716 int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) 717 { 718 struct io_uring_buf_reg reg; 719 struct io_buffer_list *bl; 720 721 lockdep_assert_held(&ctx->uring_lock); 722 723 if (copy_from_user(®, arg, sizeof(reg))) 724 return -EFAULT; 725 if (reg.resv[0] || reg.resv[1] || reg.resv[2]) 726 return -EINVAL; 727 if (reg.flags) 728 return -EINVAL; 729 730 bl = io_buffer_get_list(ctx, reg.bgid); 731 if (!bl) 732 return -ENOENT; 733 if (!bl->is_buf_ring) 734 return -EINVAL; 735 736 __io_remove_buffers(ctx, bl, -1U); 737 if (bl->bgid >= BGID_ARRAY) { 738 xa_erase(&ctx->io_bl_xa, bl->bgid); 739 kfree_rcu(bl, rcu); 740 } 741 return 0; 742 } 743 744 int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg) 745 { 746 struct io_uring_buf_status buf_status; 747 struct io_buffer_list *bl; 748 int i; 749 750 if (copy_from_user(&buf_status, arg, sizeof(buf_status))) 751 return -EFAULT; 752 753 for (i = 0; i < ARRAY_SIZE(buf_status.resv); i++) 754 if (buf_status.resv[i]) 755 return -EINVAL; 756 757 bl = io_buffer_get_list(ctx, buf_status.buf_group); 758 if (!bl) 759 return -ENOENT; 760 if (!bl->is_buf_ring) 761 return -EINVAL; 762 763 buf_status.head = bl->head; 764 if (copy_to_user(arg, &buf_status, sizeof(buf_status))) 765 return -EFAULT; 766 767 return 0; 768 } 769 770 void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) 771 { 772 struct io_buffer_list *bl; 773 774 bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); 775 776 if (!bl || !bl->is_mmap) 777 return NULL; 778 /* 779 * Ensure the list is fully setup. Only strictly needed for RCU lookup 780 * via mmap, and in that case only for the array indexed groups. For 781 * the xarray lookups, it's either visible and ready, or not at all. 782 */ 783 if (!smp_load_acquire(&bl->is_ready)) 784 return NULL; 785 786 return bl->buf_ring; 787 } 788 789 /* 790 * Called at or after ->release(), free the mmap'ed buffers that we used 791 * for memory mapped provided buffer rings. 792 */ 793 void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx) 794 { 795 struct io_buf_free *ibf; 796 struct hlist_node *tmp; 797 798 hlist_for_each_entry_safe(ibf, tmp, &ctx->io_buf_list, list) { 799 hlist_del(&ibf->list); 800 io_mem_free(ibf->mem); 801 kfree(ibf); 802 } 803 } 804