1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/fs.h> 5 #include <linux/file.h> 6 #include <linux/mm.h> 7 #include <linux/slab.h> 8 #include <linux/namei.h> 9 #include <linux/poll.h> 10 #include <linux/io_uring.h> 11 12 #include <uapi/linux/io_uring.h> 13 14 #include "io_uring.h" 15 #include "opdef.h" 16 #include "kbuf.h" 17 18 #define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) 19 20 /* BIDs are addressed by a 16-bit field in a CQE */ 21 #define MAX_BIDS_PER_BGID (1 << 16) 22 23 struct kmem_cache *io_buf_cachep; 24 25 struct io_provide_buf { 26 struct file *file; 27 __u64 addr; 28 __u32 len; 29 __u32 bgid; 30 __u32 nbufs; 31 __u16 bid; 32 }; 33 34 struct io_buf_free { 35 struct hlist_node list; 36 void *mem; 37 size_t size; 38 int inuse; 39 }; 40 41 static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, 42 unsigned int bgid) 43 { 44 return xa_load(&ctx->io_bl_xa, bgid); 45 } 46 47 static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, 48 unsigned int bgid) 49 { 50 lockdep_assert_held(&ctx->uring_lock); 51 52 return __io_buffer_get_list(ctx, bgid); 53 } 54 55 static int io_buffer_add_list(struct io_ring_ctx *ctx, 56 struct io_buffer_list *bl, unsigned int bgid) 57 { 58 /* 59 * Store buffer group ID and finally mark the list as visible. 60 * The normal lookup doesn't care about the visibility as we're 61 * always under the ->uring_lock, but the RCU lookup from mmap does. 62 */ 63 bl->bgid = bgid; 64 atomic_set(&bl->refs, 1); 65 return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); 66 } 67 68 bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) 69 { 70 struct io_ring_ctx *ctx = req->ctx; 71 struct io_buffer_list *bl; 72 struct io_buffer *buf; 73 74 io_ring_submit_lock(ctx, issue_flags); 75 76 buf = req->kbuf; 77 bl = io_buffer_get_list(ctx, buf->bgid); 78 list_add(&buf->list, &bl->buf_list); 79 req->flags &= ~REQ_F_BUFFER_SELECTED; 80 req->buf_index = buf->bgid; 81 82 io_ring_submit_unlock(ctx, issue_flags); 83 return true; 84 } 85 86 void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags) 87 { 88 /* 89 * We can add this buffer back to two lists: 90 * 91 * 1) The io_buffers_cache list. This one is protected by the 92 * ctx->uring_lock. If we already hold this lock, add back to this 93 * list as we can grab it from issue as well. 94 * 2) The io_buffers_comp list. This one is protected by the 95 * ctx->completion_lock. 96 * 97 * We migrate buffers from the comp_list to the issue cache list 98 * when we need one. 99 */ 100 if (issue_flags & IO_URING_F_UNLOCKED) { 101 struct io_ring_ctx *ctx = req->ctx; 102 103 spin_lock(&ctx->completion_lock); 104 __io_put_kbuf_list(req, &ctx->io_buffers_comp); 105 spin_unlock(&ctx->completion_lock); 106 } else { 107 lockdep_assert_held(&req->ctx->uring_lock); 108 109 __io_put_kbuf_list(req, &req->ctx->io_buffers_cache); 110 } 111 } 112 113 static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, 114 struct io_buffer_list *bl) 115 { 116 if (!list_empty(&bl->buf_list)) { 117 struct io_buffer *kbuf; 118 119 kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list); 120 list_del(&kbuf->list); 121 if (*len == 0 || *len > kbuf->len) 122 *len = kbuf->len; 123 if (list_empty(&bl->buf_list)) 124 req->flags |= REQ_F_BL_EMPTY; 125 req->flags |= REQ_F_BUFFER_SELECTED; 126 req->kbuf = kbuf; 127 req->buf_index = kbuf->bid; 128 return u64_to_user_ptr(kbuf->addr); 129 } 130 return NULL; 131 } 132 133 static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, 134 struct io_buffer_list *bl, 135 unsigned int issue_flags) 136 { 137 struct io_uring_buf_ring *br = bl->buf_ring; 138 __u16 tail, head = bl->head; 139 struct io_uring_buf *buf; 140 141 tail = smp_load_acquire(&br->tail); 142 if (unlikely(tail == head)) 143 return NULL; 144 145 if (head + 1 == tail) 146 req->flags |= REQ_F_BL_EMPTY; 147 148 head &= bl->mask; 149 /* mmaped buffers are always contig */ 150 if (bl->is_mmap || head < IO_BUFFER_LIST_BUF_PER_PAGE) { 151 buf = &br->bufs[head]; 152 } else { 153 int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1); 154 int index = head / IO_BUFFER_LIST_BUF_PER_PAGE; 155 buf = page_address(bl->buf_pages[index]); 156 buf += off; 157 } 158 if (*len == 0 || *len > buf->len) 159 *len = buf->len; 160 req->flags |= REQ_F_BUFFER_RING; 161 req->buf_list = bl; 162 req->buf_index = buf->bid; 163 164 if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { 165 /* 166 * If we came in unlocked, we have no choice but to consume the 167 * buffer here, otherwise nothing ensures that the buffer won't 168 * get used by others. This does mean it'll be pinned until the 169 * IO completes, coming in unlocked means we're being called from 170 * io-wq context and there may be further retries in async hybrid 171 * mode. For the locked case, the caller must call commit when 172 * the transfer completes (or if we get -EAGAIN and must poll of 173 * retry). 174 */ 175 req->buf_list = NULL; 176 bl->head++; 177 } 178 return u64_to_user_ptr(buf->addr); 179 } 180 181 void __user *io_buffer_select(struct io_kiocb *req, size_t *len, 182 unsigned int issue_flags) 183 { 184 struct io_ring_ctx *ctx = req->ctx; 185 struct io_buffer_list *bl; 186 void __user *ret = NULL; 187 188 io_ring_submit_lock(req->ctx, issue_flags); 189 190 bl = io_buffer_get_list(ctx, req->buf_index); 191 if (likely(bl)) { 192 if (bl->is_buf_ring) 193 ret = io_ring_buffer_select(req, len, bl, issue_flags); 194 else 195 ret = io_provided_buffer_select(req, len, bl); 196 } 197 io_ring_submit_unlock(req->ctx, issue_flags); 198 return ret; 199 } 200 201 /* 202 * Mark the given mapped range as free for reuse 203 */ 204 static void io_kbuf_mark_free(struct io_ring_ctx *ctx, struct io_buffer_list *bl) 205 { 206 struct io_buf_free *ibf; 207 208 hlist_for_each_entry(ibf, &ctx->io_buf_list, list) { 209 if (bl->buf_ring == ibf->mem) { 210 ibf->inuse = 0; 211 return; 212 } 213 } 214 215 /* can't happen... */ 216 WARN_ON_ONCE(1); 217 } 218 219 static int __io_remove_buffers(struct io_ring_ctx *ctx, 220 struct io_buffer_list *bl, unsigned nbufs) 221 { 222 unsigned i = 0; 223 224 /* shouldn't happen */ 225 if (!nbufs) 226 return 0; 227 228 if (bl->is_buf_ring) { 229 i = bl->buf_ring->tail - bl->head; 230 if (bl->is_mmap) { 231 /* 232 * io_kbuf_list_free() will free the page(s) at 233 * ->release() time. 234 */ 235 io_kbuf_mark_free(ctx, bl); 236 bl->buf_ring = NULL; 237 bl->is_mmap = 0; 238 } else if (bl->buf_nr_pages) { 239 int j; 240 241 for (j = 0; j < bl->buf_nr_pages; j++) 242 unpin_user_page(bl->buf_pages[j]); 243 kvfree(bl->buf_pages); 244 bl->buf_pages = NULL; 245 bl->buf_nr_pages = 0; 246 } 247 /* make sure it's seen as empty */ 248 INIT_LIST_HEAD(&bl->buf_list); 249 bl->is_buf_ring = 0; 250 return i; 251 } 252 253 /* protects io_buffers_cache */ 254 lockdep_assert_held(&ctx->uring_lock); 255 256 while (!list_empty(&bl->buf_list)) { 257 struct io_buffer *nxt; 258 259 nxt = list_first_entry(&bl->buf_list, struct io_buffer, list); 260 list_move(&nxt->list, &ctx->io_buffers_cache); 261 if (++i == nbufs) 262 return i; 263 cond_resched(); 264 } 265 266 return i; 267 } 268 269 void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) 270 { 271 if (atomic_dec_and_test(&bl->refs)) { 272 __io_remove_buffers(ctx, bl, -1U); 273 kfree_rcu(bl, rcu); 274 } 275 } 276 277 void io_destroy_buffers(struct io_ring_ctx *ctx) 278 { 279 struct io_buffer_list *bl; 280 struct list_head *item, *tmp; 281 struct io_buffer *buf; 282 unsigned long index; 283 284 xa_for_each(&ctx->io_bl_xa, index, bl) { 285 xa_erase(&ctx->io_bl_xa, bl->bgid); 286 io_put_bl(ctx, bl); 287 } 288 289 /* 290 * Move deferred locked entries to cache before pruning 291 */ 292 spin_lock(&ctx->completion_lock); 293 if (!list_empty(&ctx->io_buffers_comp)) 294 list_splice_init(&ctx->io_buffers_comp, &ctx->io_buffers_cache); 295 spin_unlock(&ctx->completion_lock); 296 297 list_for_each_safe(item, tmp, &ctx->io_buffers_cache) { 298 buf = list_entry(item, struct io_buffer, list); 299 kmem_cache_free(io_buf_cachep, buf); 300 } 301 } 302 303 int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 304 { 305 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); 306 u64 tmp; 307 308 if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off || 309 sqe->splice_fd_in) 310 return -EINVAL; 311 312 tmp = READ_ONCE(sqe->fd); 313 if (!tmp || tmp > MAX_BIDS_PER_BGID) 314 return -EINVAL; 315 316 memset(p, 0, sizeof(*p)); 317 p->nbufs = tmp; 318 p->bgid = READ_ONCE(sqe->buf_group); 319 return 0; 320 } 321 322 int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) 323 { 324 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); 325 struct io_ring_ctx *ctx = req->ctx; 326 struct io_buffer_list *bl; 327 int ret = 0; 328 329 io_ring_submit_lock(ctx, issue_flags); 330 331 ret = -ENOENT; 332 bl = io_buffer_get_list(ctx, p->bgid); 333 if (bl) { 334 ret = -EINVAL; 335 /* can't use provide/remove buffers command on mapped buffers */ 336 if (!bl->is_buf_ring) 337 ret = __io_remove_buffers(ctx, bl, p->nbufs); 338 } 339 io_ring_submit_unlock(ctx, issue_flags); 340 if (ret < 0) 341 req_set_fail(req); 342 io_req_set_res(req, ret, 0); 343 return IOU_OK; 344 } 345 346 int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 347 { 348 unsigned long size, tmp_check; 349 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); 350 u64 tmp; 351 352 if (sqe->rw_flags || sqe->splice_fd_in) 353 return -EINVAL; 354 355 tmp = READ_ONCE(sqe->fd); 356 if (!tmp || tmp > MAX_BIDS_PER_BGID) 357 return -E2BIG; 358 p->nbufs = tmp; 359 p->addr = READ_ONCE(sqe->addr); 360 p->len = READ_ONCE(sqe->len); 361 362 if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs, 363 &size)) 364 return -EOVERFLOW; 365 if (check_add_overflow((unsigned long)p->addr, size, &tmp_check)) 366 return -EOVERFLOW; 367 368 size = (unsigned long)p->len * p->nbufs; 369 if (!access_ok(u64_to_user_ptr(p->addr), size)) 370 return -EFAULT; 371 372 p->bgid = READ_ONCE(sqe->buf_group); 373 tmp = READ_ONCE(sqe->off); 374 if (tmp > USHRT_MAX) 375 return -E2BIG; 376 if (tmp + p->nbufs > MAX_BIDS_PER_BGID) 377 return -EINVAL; 378 p->bid = tmp; 379 return 0; 380 } 381 382 #define IO_BUFFER_ALLOC_BATCH 64 383 384 static int io_refill_buffer_cache(struct io_ring_ctx *ctx) 385 { 386 struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH]; 387 int allocated; 388 389 /* 390 * Completions that don't happen inline (eg not under uring_lock) will 391 * add to ->io_buffers_comp. If we don't have any free buffers, check 392 * the completion list and splice those entries first. 393 */ 394 if (!list_empty_careful(&ctx->io_buffers_comp)) { 395 spin_lock(&ctx->completion_lock); 396 if (!list_empty(&ctx->io_buffers_comp)) { 397 list_splice_init(&ctx->io_buffers_comp, 398 &ctx->io_buffers_cache); 399 spin_unlock(&ctx->completion_lock); 400 return 0; 401 } 402 spin_unlock(&ctx->completion_lock); 403 } 404 405 /* 406 * No free buffers and no completion entries either. Allocate a new 407 * batch of buffer entries and add those to our freelist. 408 */ 409 410 allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT, 411 ARRAY_SIZE(bufs), (void **) bufs); 412 if (unlikely(!allocated)) { 413 /* 414 * Bulk alloc is all-or-nothing. If we fail to get a batch, 415 * retry single alloc to be on the safe side. 416 */ 417 bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL); 418 if (!bufs[0]) 419 return -ENOMEM; 420 allocated = 1; 421 } 422 423 while (allocated) 424 list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache); 425 426 return 0; 427 } 428 429 static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf, 430 struct io_buffer_list *bl) 431 { 432 struct io_buffer *buf; 433 u64 addr = pbuf->addr; 434 int i, bid = pbuf->bid; 435 436 for (i = 0; i < pbuf->nbufs; i++) { 437 if (list_empty(&ctx->io_buffers_cache) && 438 io_refill_buffer_cache(ctx)) 439 break; 440 buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer, 441 list); 442 list_move_tail(&buf->list, &bl->buf_list); 443 buf->addr = addr; 444 buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT); 445 buf->bid = bid; 446 buf->bgid = pbuf->bgid; 447 addr += pbuf->len; 448 bid++; 449 cond_resched(); 450 } 451 452 return i ? 0 : -ENOMEM; 453 } 454 455 int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) 456 { 457 struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); 458 struct io_ring_ctx *ctx = req->ctx; 459 struct io_buffer_list *bl; 460 int ret = 0; 461 462 io_ring_submit_lock(ctx, issue_flags); 463 464 bl = io_buffer_get_list(ctx, p->bgid); 465 if (unlikely(!bl)) { 466 bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT); 467 if (!bl) { 468 ret = -ENOMEM; 469 goto err; 470 } 471 INIT_LIST_HEAD(&bl->buf_list); 472 ret = io_buffer_add_list(ctx, bl, p->bgid); 473 if (ret) { 474 /* 475 * Doesn't need rcu free as it was never visible, but 476 * let's keep it consistent throughout. 477 */ 478 kfree_rcu(bl, rcu); 479 goto err; 480 } 481 } 482 /* can't add buffers via this command for a mapped buffer ring */ 483 if (bl->is_buf_ring) { 484 ret = -EINVAL; 485 goto err; 486 } 487 488 ret = io_add_buffers(ctx, p, bl); 489 err: 490 io_ring_submit_unlock(ctx, issue_flags); 491 492 if (ret < 0) 493 req_set_fail(req); 494 io_req_set_res(req, ret, 0); 495 return IOU_OK; 496 } 497 498 static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, 499 struct io_buffer_list *bl) 500 { 501 struct io_uring_buf_ring *br; 502 struct page **pages; 503 int i, nr_pages; 504 505 pages = io_pin_pages(reg->ring_addr, 506 flex_array_size(br, bufs, reg->ring_entries), 507 &nr_pages); 508 if (IS_ERR(pages)) 509 return PTR_ERR(pages); 510 511 /* 512 * Apparently some 32-bit boxes (ARM) will return highmem pages, 513 * which then need to be mapped. We could support that, but it'd 514 * complicate the code and slowdown the common cases quite a bit. 515 * So just error out, returning -EINVAL just like we did on kernels 516 * that didn't support mapped buffer rings. 517 */ 518 for (i = 0; i < nr_pages; i++) 519 if (PageHighMem(pages[i])) 520 goto error_unpin; 521 522 br = page_address(pages[0]); 523 #ifdef SHM_COLOUR 524 /* 525 * On platforms that have specific aliasing requirements, SHM_COLOUR 526 * is set and we must guarantee that the kernel and user side align 527 * nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and 528 * the application mmap's the provided ring buffer. Fail the request 529 * if we, by chance, don't end up with aligned addresses. The app 530 * should use IOU_PBUF_RING_MMAP instead, and liburing will handle 531 * this transparently. 532 */ 533 if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) 534 goto error_unpin; 535 #endif 536 bl->buf_pages = pages; 537 bl->buf_nr_pages = nr_pages; 538 bl->buf_ring = br; 539 bl->is_buf_ring = 1; 540 bl->is_mmap = 0; 541 return 0; 542 error_unpin: 543 for (i = 0; i < nr_pages; i++) 544 unpin_user_page(pages[i]); 545 kvfree(pages); 546 return -EINVAL; 547 } 548 549 /* 550 * See if we have a suitable region that we can reuse, rather than allocate 551 * both a new io_buf_free and mem region again. We leave it on the list as 552 * even a reused entry will need freeing at ring release. 553 */ 554 static struct io_buf_free *io_lookup_buf_free_entry(struct io_ring_ctx *ctx, 555 size_t ring_size) 556 { 557 struct io_buf_free *ibf, *best = NULL; 558 size_t best_dist; 559 560 hlist_for_each_entry(ibf, &ctx->io_buf_list, list) { 561 size_t dist; 562 563 if (ibf->inuse || ibf->size < ring_size) 564 continue; 565 dist = ibf->size - ring_size; 566 if (!best || dist < best_dist) { 567 best = ibf; 568 if (!dist) 569 break; 570 best_dist = dist; 571 } 572 } 573 574 return best; 575 } 576 577 static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx, 578 struct io_uring_buf_reg *reg, 579 struct io_buffer_list *bl) 580 { 581 struct io_buf_free *ibf; 582 size_t ring_size; 583 void *ptr; 584 585 ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring); 586 587 /* Reuse existing entry, if we can */ 588 ibf = io_lookup_buf_free_entry(ctx, ring_size); 589 if (!ibf) { 590 ptr = io_mem_alloc(ring_size); 591 if (IS_ERR(ptr)) 592 return PTR_ERR(ptr); 593 594 /* Allocate and store deferred free entry */ 595 ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT); 596 if (!ibf) { 597 io_mem_free(ptr); 598 return -ENOMEM; 599 } 600 ibf->mem = ptr; 601 ibf->size = ring_size; 602 hlist_add_head(&ibf->list, &ctx->io_buf_list); 603 } 604 ibf->inuse = 1; 605 bl->buf_ring = ibf->mem; 606 bl->is_buf_ring = 1; 607 bl->is_mmap = 1; 608 return 0; 609 } 610 611 int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) 612 { 613 struct io_uring_buf_reg reg; 614 struct io_buffer_list *bl, *free_bl = NULL; 615 int ret; 616 617 lockdep_assert_held(&ctx->uring_lock); 618 619 if (copy_from_user(®, arg, sizeof(reg))) 620 return -EFAULT; 621 622 if (reg.resv[0] || reg.resv[1] || reg.resv[2]) 623 return -EINVAL; 624 if (reg.flags & ~IOU_PBUF_RING_MMAP) 625 return -EINVAL; 626 if (!(reg.flags & IOU_PBUF_RING_MMAP)) { 627 if (!reg.ring_addr) 628 return -EFAULT; 629 if (reg.ring_addr & ~PAGE_MASK) 630 return -EINVAL; 631 } else { 632 if (reg.ring_addr) 633 return -EINVAL; 634 } 635 636 if (!is_power_of_2(reg.ring_entries)) 637 return -EINVAL; 638 639 /* cannot disambiguate full vs empty due to head/tail size */ 640 if (reg.ring_entries >= 65536) 641 return -EINVAL; 642 643 bl = io_buffer_get_list(ctx, reg.bgid); 644 if (bl) { 645 /* if mapped buffer ring OR classic exists, don't allow */ 646 if (bl->is_buf_ring || !list_empty(&bl->buf_list)) 647 return -EEXIST; 648 } else { 649 free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); 650 if (!bl) 651 return -ENOMEM; 652 } 653 654 if (!(reg.flags & IOU_PBUF_RING_MMAP)) 655 ret = io_pin_pbuf_ring(®, bl); 656 else 657 ret = io_alloc_pbuf_ring(ctx, ®, bl); 658 659 if (!ret) { 660 bl->nr_entries = reg.ring_entries; 661 bl->mask = reg.ring_entries - 1; 662 663 io_buffer_add_list(ctx, bl, reg.bgid); 664 return 0; 665 } 666 667 kfree_rcu(free_bl, rcu); 668 return ret; 669 } 670 671 int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) 672 { 673 struct io_uring_buf_reg reg; 674 struct io_buffer_list *bl; 675 676 lockdep_assert_held(&ctx->uring_lock); 677 678 if (copy_from_user(®, arg, sizeof(reg))) 679 return -EFAULT; 680 if (reg.resv[0] || reg.resv[1] || reg.resv[2]) 681 return -EINVAL; 682 if (reg.flags) 683 return -EINVAL; 684 685 bl = io_buffer_get_list(ctx, reg.bgid); 686 if (!bl) 687 return -ENOENT; 688 if (!bl->is_buf_ring) 689 return -EINVAL; 690 691 xa_erase(&ctx->io_bl_xa, bl->bgid); 692 io_put_bl(ctx, bl); 693 return 0; 694 } 695 696 int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg) 697 { 698 struct io_uring_buf_status buf_status; 699 struct io_buffer_list *bl; 700 int i; 701 702 if (copy_from_user(&buf_status, arg, sizeof(buf_status))) 703 return -EFAULT; 704 705 for (i = 0; i < ARRAY_SIZE(buf_status.resv); i++) 706 if (buf_status.resv[i]) 707 return -EINVAL; 708 709 bl = io_buffer_get_list(ctx, buf_status.buf_group); 710 if (!bl) 711 return -ENOENT; 712 if (!bl->is_buf_ring) 713 return -EINVAL; 714 715 buf_status.head = bl->head; 716 if (copy_to_user(arg, &buf_status, sizeof(buf_status))) 717 return -EFAULT; 718 719 return 0; 720 } 721 722 struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, 723 unsigned long bgid) 724 { 725 struct io_buffer_list *bl; 726 bool ret; 727 728 /* 729 * We have to be a bit careful here - we're inside mmap and cannot grab 730 * the uring_lock. This means the buffer_list could be simultaneously 731 * going away, if someone is trying to be sneaky. Look it up under rcu 732 * so we know it's not going away, and attempt to grab a reference to 733 * it. If the ref is already zero, then fail the mapping. If successful, 734 * the caller will call io_put_bl() to drop the the reference at at the 735 * end. This may then safely free the buffer_list (and drop the pages) 736 * at that point, vm_insert_pages() would've already grabbed the 737 * necessary vma references. 738 */ 739 rcu_read_lock(); 740 bl = xa_load(&ctx->io_bl_xa, bgid); 741 /* must be a mmap'able buffer ring and have pages */ 742 ret = false; 743 if (bl && bl->is_mmap) 744 ret = atomic_inc_not_zero(&bl->refs); 745 rcu_read_unlock(); 746 747 if (ret) 748 return bl; 749 750 return ERR_PTR(-EINVAL); 751 } 752 753 /* 754 * Called at or after ->release(), free the mmap'ed buffers that we used 755 * for memory mapped provided buffer rings. 756 */ 757 void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx) 758 { 759 struct io_buf_free *ibf; 760 struct hlist_node *tmp; 761 762 hlist_for_each_entry_safe(ibf, tmp, &ctx->io_buf_list, list) { 763 hlist_del(&ibf->list); 764 io_mem_free(ibf->mem); 765 kfree(ibf); 766 } 767 } 768