1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Code related to the io_uring_register() syscall 4 * 5 * Copyright (C) 2023 Jens Axboe 6 */ 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/syscalls.h> 10 #include <linux/refcount.h> 11 #include <linux/bits.h> 12 #include <linux/fs.h> 13 #include <linux/file.h> 14 #include <linux/slab.h> 15 #include <linux/uaccess.h> 16 #include <linux/nospec.h> 17 #include <linux/compat.h> 18 #include <linux/io_uring.h> 19 #include <linux/io_uring_types.h> 20 21 #include "io_uring.h" 22 #include "opdef.h" 23 #include "tctx.h" 24 #include "rsrc.h" 25 #include "sqpoll.h" 26 #include "register.h" 27 #include "cancel.h" 28 #include "kbuf.h" 29 #include "napi.h" 30 #include "eventfd.h" 31 #include "msg_ring.h" 32 #include "memmap.h" 33 34 #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ 35 IORING_REGISTER_LAST + IORING_OP_LAST) 36 37 static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg, 38 unsigned nr_args) 39 { 40 struct io_uring_probe *p; 41 size_t size; 42 int i, ret; 43 44 if (nr_args > IORING_OP_LAST) 45 nr_args = IORING_OP_LAST; 46 47 size = struct_size(p, ops, nr_args); 48 p = kzalloc(size, GFP_KERNEL); 49 if (!p) 50 return -ENOMEM; 51 52 ret = -EFAULT; 53 if (copy_from_user(p, arg, size)) 54 goto out; 55 ret = -EINVAL; 56 if (memchr_inv(p, 0, size)) 57 goto out; 58 59 p->last_op = IORING_OP_LAST - 1; 60 61 for (i = 0; i < nr_args; i++) { 62 p->ops[i].op = i; 63 if (io_uring_op_supported(i)) 64 p->ops[i].flags = IO_URING_OP_SUPPORTED; 65 } 66 p->ops_len = i; 67 68 ret = 0; 69 if (copy_to_user(arg, p, size)) 70 ret = -EFAULT; 71 out: 72 kfree(p); 73 return ret; 74 } 75 76 int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) 77 { 78 const struct cred *creds; 79 80 creds = xa_erase(&ctx->personalities, id); 81 if (creds) { 82 put_cred(creds); 83 return 0; 84 } 85 86 return -EINVAL; 87 } 88 89 90 static int io_register_personality(struct io_ring_ctx *ctx) 91 { 92 const struct cred *creds; 93 u32 id; 94 int ret; 95 96 creds = get_current_cred(); 97 98 ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, 99 XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); 100 if (ret < 0) { 101 put_cred(creds); 102 return ret; 103 } 104 return id; 105 } 106 107 static __cold int io_register_restrictions(struct io_ring_ctx *ctx, 108 void __user *arg, unsigned int nr_args) 109 { 110 struct io_uring_restriction *res; 111 size_t size; 112 int i, ret; 113 114 /* Restrictions allowed only if rings started disabled */ 115 if (!(ctx->flags & IORING_SETUP_R_DISABLED)) 116 return -EBADFD; 117 118 /* We allow only a single restrictions registration */ 119 if (ctx->restrictions.registered) 120 return -EBUSY; 121 122 if (!arg || nr_args > IORING_MAX_RESTRICTIONS) 123 return -EINVAL; 124 125 size = array_size(nr_args, sizeof(*res)); 126 if (size == SIZE_MAX) 127 return -EOVERFLOW; 128 129 res = memdup_user(arg, size); 130 if (IS_ERR(res)) 131 return PTR_ERR(res); 132 133 ret = 0; 134 135 for (i = 0; i < nr_args; i++) { 136 switch (res[i].opcode) { 137 case IORING_RESTRICTION_REGISTER_OP: 138 if (res[i].register_op >= IORING_REGISTER_LAST) { 139 ret = -EINVAL; 140 goto out; 141 } 142 143 __set_bit(res[i].register_op, 144 ctx->restrictions.register_op); 145 break; 146 case IORING_RESTRICTION_SQE_OP: 147 if (res[i].sqe_op >= IORING_OP_LAST) { 148 ret = -EINVAL; 149 goto out; 150 } 151 152 __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op); 153 break; 154 case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: 155 ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags; 156 break; 157 case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: 158 ctx->restrictions.sqe_flags_required = res[i].sqe_flags; 159 break; 160 default: 161 ret = -EINVAL; 162 goto out; 163 } 164 } 165 166 out: 167 /* Reset all restrictions if an error happened */ 168 if (ret != 0) 169 memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); 170 else 171 ctx->restrictions.registered = true; 172 173 kfree(res); 174 return ret; 175 } 176 177 static int io_register_enable_rings(struct io_ring_ctx *ctx) 178 { 179 if (!(ctx->flags & IORING_SETUP_R_DISABLED)) 180 return -EBADFD; 181 182 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) { 183 WRITE_ONCE(ctx->submitter_task, get_task_struct(current)); 184 /* 185 * Lazy activation attempts would fail if it was polled before 186 * submitter_task is set. 187 */ 188 if (wq_has_sleeper(&ctx->poll_wq)) 189 io_activate_pollwq(ctx); 190 } 191 192 if (ctx->restrictions.registered) 193 ctx->restricted = 1; 194 195 ctx->flags &= ~IORING_SETUP_R_DISABLED; 196 if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) 197 wake_up(&ctx->sq_data->wait); 198 return 0; 199 } 200 201 static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx, 202 cpumask_var_t new_mask) 203 { 204 int ret; 205 206 if (!(ctx->flags & IORING_SETUP_SQPOLL)) { 207 ret = io_wq_cpu_affinity(current->io_uring, new_mask); 208 } else { 209 mutex_unlock(&ctx->uring_lock); 210 ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask); 211 mutex_lock(&ctx->uring_lock); 212 } 213 214 return ret; 215 } 216 217 static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, 218 void __user *arg, unsigned len) 219 { 220 cpumask_var_t new_mask; 221 int ret; 222 223 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) 224 return -ENOMEM; 225 226 cpumask_clear(new_mask); 227 if (len > cpumask_size()) 228 len = cpumask_size(); 229 230 #ifdef CONFIG_COMPAT 231 if (in_compat_syscall()) 232 ret = compat_get_bitmap(cpumask_bits(new_mask), 233 (const compat_ulong_t __user *)arg, 234 len * 8 /* CHAR_BIT */); 235 else 236 #endif 237 ret = copy_from_user(new_mask, arg, len); 238 239 if (ret) { 240 free_cpumask_var(new_mask); 241 return -EFAULT; 242 } 243 244 ret = __io_register_iowq_aff(ctx, new_mask); 245 free_cpumask_var(new_mask); 246 return ret; 247 } 248 249 static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx) 250 { 251 return __io_register_iowq_aff(ctx, NULL); 252 } 253 254 static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, 255 void __user *arg) 256 __must_hold(&ctx->uring_lock) 257 { 258 struct io_tctx_node *node; 259 struct io_uring_task *tctx = NULL; 260 struct io_sq_data *sqd = NULL; 261 __u32 new_count[2]; 262 int i, ret; 263 264 if (copy_from_user(new_count, arg, sizeof(new_count))) 265 return -EFAULT; 266 for (i = 0; i < ARRAY_SIZE(new_count); i++) 267 if (new_count[i] > INT_MAX) 268 return -EINVAL; 269 270 if (ctx->flags & IORING_SETUP_SQPOLL) { 271 sqd = ctx->sq_data; 272 if (sqd) { 273 /* 274 * Observe the correct sqd->lock -> ctx->uring_lock 275 * ordering. Fine to drop uring_lock here, we hold 276 * a ref to the ctx. 277 */ 278 refcount_inc(&sqd->refs); 279 mutex_unlock(&ctx->uring_lock); 280 mutex_lock(&sqd->lock); 281 mutex_lock(&ctx->uring_lock); 282 if (sqd->thread) 283 tctx = sqd->thread->io_uring; 284 } 285 } else { 286 tctx = current->io_uring; 287 } 288 289 BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); 290 291 for (i = 0; i < ARRAY_SIZE(new_count); i++) 292 if (new_count[i]) 293 ctx->iowq_limits[i] = new_count[i]; 294 ctx->iowq_limits_set = true; 295 296 if (tctx && tctx->io_wq) { 297 ret = io_wq_max_workers(tctx->io_wq, new_count); 298 if (ret) 299 goto err; 300 } else { 301 memset(new_count, 0, sizeof(new_count)); 302 } 303 304 if (sqd) { 305 mutex_unlock(&ctx->uring_lock); 306 mutex_unlock(&sqd->lock); 307 io_put_sq_data(sqd); 308 mutex_lock(&ctx->uring_lock); 309 } 310 311 if (copy_to_user(arg, new_count, sizeof(new_count))) 312 return -EFAULT; 313 314 /* that's it for SQPOLL, only the SQPOLL task creates requests */ 315 if (sqd) 316 return 0; 317 318 /* now propagate the restriction to all registered users */ 319 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 320 tctx = node->task->io_uring; 321 if (WARN_ON_ONCE(!tctx->io_wq)) 322 continue; 323 324 for (i = 0; i < ARRAY_SIZE(new_count); i++) 325 new_count[i] = ctx->iowq_limits[i]; 326 /* ignore errors, it always returns zero anyway */ 327 (void)io_wq_max_workers(tctx->io_wq, new_count); 328 } 329 return 0; 330 err: 331 if (sqd) { 332 mutex_unlock(&ctx->uring_lock); 333 mutex_unlock(&sqd->lock); 334 io_put_sq_data(sqd); 335 mutex_lock(&ctx->uring_lock); 336 } 337 return ret; 338 } 339 340 static int io_register_clock(struct io_ring_ctx *ctx, 341 struct io_uring_clock_register __user *arg) 342 { 343 struct io_uring_clock_register reg; 344 345 if (copy_from_user(®, arg, sizeof(reg))) 346 return -EFAULT; 347 if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) 348 return -EINVAL; 349 350 switch (reg.clockid) { 351 case CLOCK_MONOTONIC: 352 ctx->clock_offset = 0; 353 break; 354 case CLOCK_BOOTTIME: 355 ctx->clock_offset = TK_OFFS_BOOT; 356 break; 357 default: 358 return -EINVAL; 359 } 360 361 ctx->clockid = reg.clockid; 362 return 0; 363 } 364 365 /* 366 * State to maintain until we can swap. Both new and old state, used for 367 * either mapping or freeing. 368 */ 369 struct io_ring_ctx_rings { 370 unsigned short n_ring_pages; 371 unsigned short n_sqe_pages; 372 struct page **ring_pages; 373 struct page **sqe_pages; 374 struct io_uring_sqe *sq_sqes; 375 struct io_rings *rings; 376 }; 377 378 static void io_register_free_rings(struct io_uring_params *p, 379 struct io_ring_ctx_rings *r) 380 { 381 if (!(p->flags & IORING_SETUP_NO_MMAP)) { 382 io_pages_unmap(r->rings, &r->ring_pages, &r->n_ring_pages, 383 true); 384 io_pages_unmap(r->sq_sqes, &r->sqe_pages, &r->n_sqe_pages, 385 true); 386 } else { 387 io_pages_free(&r->ring_pages, r->n_ring_pages); 388 io_pages_free(&r->sqe_pages, r->n_sqe_pages); 389 vunmap(r->rings); 390 vunmap(r->sq_sqes); 391 } 392 } 393 394 #define swap_old(ctx, o, n, field) \ 395 do { \ 396 (o).field = (ctx)->field; \ 397 (ctx)->field = (n).field; \ 398 } while (0) 399 400 #define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP) 401 #define COPY_FLAGS (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \ 402 IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP) 403 404 static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) 405 { 406 struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL; 407 size_t size, sq_array_offset; 408 struct io_uring_params p; 409 unsigned i, tail; 410 void *ptr; 411 int ret; 412 413 /* for single issuer, must be owner resizing */ 414 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && 415 current != ctx->submitter_task) 416 return -EEXIST; 417 /* limited to DEFER_TASKRUN for now */ 418 if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) 419 return -EINVAL; 420 if (copy_from_user(&p, arg, sizeof(p))) 421 return -EFAULT; 422 if (p.flags & ~RESIZE_FLAGS) 423 return -EINVAL; 424 425 /* properties that are always inherited */ 426 p.flags |= (ctx->flags & COPY_FLAGS); 427 428 ret = io_uring_fill_params(p.sq_entries, &p); 429 if (unlikely(ret)) 430 return ret; 431 432 /* nothing to do, but copy params back */ 433 if (p.sq_entries == ctx->sq_entries && p.cq_entries == ctx->cq_entries) { 434 if (copy_to_user(arg, &p, sizeof(p))) 435 return -EFAULT; 436 return 0; 437 } 438 439 size = rings_size(p.flags, p.sq_entries, p.cq_entries, 440 &sq_array_offset); 441 if (size == SIZE_MAX) 442 return -EOVERFLOW; 443 444 if (!(p.flags & IORING_SETUP_NO_MMAP)) 445 n.rings = io_pages_map(&n.ring_pages, &n.n_ring_pages, size); 446 else 447 n.rings = __io_uaddr_map(&n.ring_pages, &n.n_ring_pages, 448 p.cq_off.user_addr, size); 449 if (IS_ERR(n.rings)) 450 return PTR_ERR(n.rings); 451 452 n.rings->sq_ring_mask = p.sq_entries - 1; 453 n.rings->cq_ring_mask = p.cq_entries - 1; 454 n.rings->sq_ring_entries = p.sq_entries; 455 n.rings->cq_ring_entries = p.cq_entries; 456 457 if (copy_to_user(arg, &p, sizeof(p))) { 458 io_register_free_rings(&p, &n); 459 return -EFAULT; 460 } 461 462 if (p.flags & IORING_SETUP_SQE128) 463 size = array_size(2 * sizeof(struct io_uring_sqe), p.sq_entries); 464 else 465 size = array_size(sizeof(struct io_uring_sqe), p.sq_entries); 466 if (size == SIZE_MAX) { 467 io_register_free_rings(&p, &n); 468 return -EOVERFLOW; 469 } 470 471 if (!(p.flags & IORING_SETUP_NO_MMAP)) 472 ptr = io_pages_map(&n.sqe_pages, &n.n_sqe_pages, size); 473 else 474 ptr = __io_uaddr_map(&n.sqe_pages, &n.n_sqe_pages, 475 p.sq_off.user_addr, 476 size); 477 if (IS_ERR(ptr)) { 478 io_register_free_rings(&p, &n); 479 return PTR_ERR(ptr); 480 } 481 482 /* 483 * If using SQPOLL, park the thread 484 */ 485 if (ctx->sq_data) { 486 mutex_unlock(&ctx->uring_lock); 487 io_sq_thread_park(ctx->sq_data); 488 mutex_lock(&ctx->uring_lock); 489 } 490 491 /* 492 * We'll do the swap. Grab the ctx->resize_lock, which will exclude 493 * any new mmap's on the ring fd. Clear out existing mappings to prevent 494 * mmap from seeing them, as we'll unmap them. Any attempt to mmap 495 * existing rings beyond this point will fail. Not that it could proceed 496 * at this point anyway, as the io_uring mmap side needs go grab the 497 * ctx->resize_lock as well. Likewise, hold the completion lock over the 498 * duration of the actual swap. 499 */ 500 mutex_lock(&ctx->resize_lock); 501 spin_lock(&ctx->completion_lock); 502 o.rings = ctx->rings; 503 ctx->rings = NULL; 504 o.sq_sqes = ctx->sq_sqes; 505 ctx->sq_sqes = NULL; 506 507 /* 508 * Now copy SQ and CQ entries, if any. If either of the destination 509 * rings can't hold what is already there, then fail the operation. 510 */ 511 n.sq_sqes = ptr; 512 tail = o.rings->sq.tail; 513 if (tail - o.rings->sq.head > p.sq_entries) 514 goto overflow; 515 for (i = o.rings->sq.head; i < tail; i++) { 516 unsigned src_head = i & (ctx->sq_entries - 1); 517 unsigned dst_head = i & n.rings->sq_ring_mask; 518 519 n.sq_sqes[dst_head] = o.sq_sqes[src_head]; 520 } 521 n.rings->sq.head = o.rings->sq.head; 522 n.rings->sq.tail = o.rings->sq.tail; 523 524 tail = o.rings->cq.tail; 525 if (tail - o.rings->cq.head > p.cq_entries) { 526 overflow: 527 /* restore old rings, and return -EOVERFLOW via cleanup path */ 528 ctx->rings = o.rings; 529 ctx->sq_sqes = o.sq_sqes; 530 to_free = &n; 531 ret = -EOVERFLOW; 532 goto out; 533 } 534 for (i = o.rings->cq.head; i < tail; i++) { 535 unsigned src_head = i & (ctx->cq_entries - 1); 536 unsigned dst_head = i & n.rings->cq_ring_mask; 537 538 n.rings->cqes[dst_head] = o.rings->cqes[src_head]; 539 } 540 n.rings->cq.head = o.rings->cq.head; 541 n.rings->cq.tail = o.rings->cq.tail; 542 /* invalidate cached cqe refill */ 543 ctx->cqe_cached = ctx->cqe_sentinel = NULL; 544 545 n.rings->sq_dropped = o.rings->sq_dropped; 546 n.rings->sq_flags = o.rings->sq_flags; 547 n.rings->cq_flags = o.rings->cq_flags; 548 n.rings->cq_overflow = o.rings->cq_overflow; 549 550 /* all done, store old pointers and assign new ones */ 551 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) 552 ctx->sq_array = (u32 *)((char *)n.rings + sq_array_offset); 553 554 ctx->sq_entries = p.sq_entries; 555 ctx->cq_entries = p.cq_entries; 556 557 ctx->rings = n.rings; 558 ctx->sq_sqes = n.sq_sqes; 559 swap_old(ctx, o, n, n_ring_pages); 560 swap_old(ctx, o, n, n_sqe_pages); 561 swap_old(ctx, o, n, ring_pages); 562 swap_old(ctx, o, n, sqe_pages); 563 to_free = &o; 564 ret = 0; 565 out: 566 spin_unlock(&ctx->completion_lock); 567 mutex_unlock(&ctx->resize_lock); 568 io_register_free_rings(&p, to_free); 569 570 if (ctx->sq_data) 571 io_sq_thread_unpark(ctx->sq_data); 572 573 return ret; 574 } 575 576 static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) 577 { 578 struct io_uring_mem_region_reg __user *reg_uptr = uarg; 579 struct io_uring_mem_region_reg reg; 580 struct io_uring_region_desc __user *rd_uptr; 581 struct io_uring_region_desc rd; 582 int ret; 583 584 if (io_region_is_set(&ctx->param_region)) 585 return -EBUSY; 586 if (copy_from_user(®, reg_uptr, sizeof(reg))) 587 return -EFAULT; 588 rd_uptr = u64_to_user_ptr(reg.region_uptr); 589 if (copy_from_user(&rd, rd_uptr, sizeof(rd))) 590 return -EFAULT; 591 592 if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) 593 return -EINVAL; 594 if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG) 595 return -EINVAL; 596 597 /* 598 * This ensures there are no waiters. Waiters are unlocked and it's 599 * hard to synchronise with them, especially if we need to initialise 600 * the region. 601 */ 602 if ((reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) && 603 !(ctx->flags & IORING_SETUP_R_DISABLED)) 604 return -EINVAL; 605 606 ret = io_create_region(ctx, &ctx->param_region, &rd); 607 if (ret) 608 return ret; 609 if (copy_to_user(rd_uptr, &rd, sizeof(rd))) { 610 io_free_region(ctx, &ctx->param_region); 611 return -EFAULT; 612 } 613 614 if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) { 615 ctx->cq_wait_arg = io_region_get_ptr(&ctx->param_region); 616 ctx->cq_wait_size = rd.size; 617 } 618 return 0; 619 } 620 621 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, 622 void __user *arg, unsigned nr_args) 623 __releases(ctx->uring_lock) 624 __acquires(ctx->uring_lock) 625 { 626 int ret; 627 628 /* 629 * We don't quiesce the refs for register anymore and so it can't be 630 * dying as we're holding a file ref here. 631 */ 632 if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs))) 633 return -ENXIO; 634 635 if (ctx->submitter_task && ctx->submitter_task != current) 636 return -EEXIST; 637 638 if (ctx->restricted) { 639 opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); 640 if (!test_bit(opcode, ctx->restrictions.register_op)) 641 return -EACCES; 642 } 643 644 switch (opcode) { 645 case IORING_REGISTER_BUFFERS: 646 ret = -EFAULT; 647 if (!arg) 648 break; 649 ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); 650 break; 651 case IORING_UNREGISTER_BUFFERS: 652 ret = -EINVAL; 653 if (arg || nr_args) 654 break; 655 ret = io_sqe_buffers_unregister(ctx); 656 break; 657 case IORING_REGISTER_FILES: 658 ret = -EFAULT; 659 if (!arg) 660 break; 661 ret = io_sqe_files_register(ctx, arg, nr_args, NULL); 662 break; 663 case IORING_UNREGISTER_FILES: 664 ret = -EINVAL; 665 if (arg || nr_args) 666 break; 667 ret = io_sqe_files_unregister(ctx); 668 break; 669 case IORING_REGISTER_FILES_UPDATE: 670 ret = io_register_files_update(ctx, arg, nr_args); 671 break; 672 case IORING_REGISTER_EVENTFD: 673 ret = -EINVAL; 674 if (nr_args != 1) 675 break; 676 ret = io_eventfd_register(ctx, arg, 0); 677 break; 678 case IORING_REGISTER_EVENTFD_ASYNC: 679 ret = -EINVAL; 680 if (nr_args != 1) 681 break; 682 ret = io_eventfd_register(ctx, arg, 1); 683 break; 684 case IORING_UNREGISTER_EVENTFD: 685 ret = -EINVAL; 686 if (arg || nr_args) 687 break; 688 ret = io_eventfd_unregister(ctx); 689 break; 690 case IORING_REGISTER_PROBE: 691 ret = -EINVAL; 692 if (!arg || nr_args > 256) 693 break; 694 ret = io_probe(ctx, arg, nr_args); 695 break; 696 case IORING_REGISTER_PERSONALITY: 697 ret = -EINVAL; 698 if (arg || nr_args) 699 break; 700 ret = io_register_personality(ctx); 701 break; 702 case IORING_UNREGISTER_PERSONALITY: 703 ret = -EINVAL; 704 if (arg) 705 break; 706 ret = io_unregister_personality(ctx, nr_args); 707 break; 708 case IORING_REGISTER_ENABLE_RINGS: 709 ret = -EINVAL; 710 if (arg || nr_args) 711 break; 712 ret = io_register_enable_rings(ctx); 713 break; 714 case IORING_REGISTER_RESTRICTIONS: 715 ret = io_register_restrictions(ctx, arg, nr_args); 716 break; 717 case IORING_REGISTER_FILES2: 718 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); 719 break; 720 case IORING_REGISTER_FILES_UPDATE2: 721 ret = io_register_rsrc_update(ctx, arg, nr_args, 722 IORING_RSRC_FILE); 723 break; 724 case IORING_REGISTER_BUFFERS2: 725 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); 726 break; 727 case IORING_REGISTER_BUFFERS_UPDATE: 728 ret = io_register_rsrc_update(ctx, arg, nr_args, 729 IORING_RSRC_BUFFER); 730 break; 731 case IORING_REGISTER_IOWQ_AFF: 732 ret = -EINVAL; 733 if (!arg || !nr_args) 734 break; 735 ret = io_register_iowq_aff(ctx, arg, nr_args); 736 break; 737 case IORING_UNREGISTER_IOWQ_AFF: 738 ret = -EINVAL; 739 if (arg || nr_args) 740 break; 741 ret = io_unregister_iowq_aff(ctx); 742 break; 743 case IORING_REGISTER_IOWQ_MAX_WORKERS: 744 ret = -EINVAL; 745 if (!arg || nr_args != 2) 746 break; 747 ret = io_register_iowq_max_workers(ctx, arg); 748 break; 749 case IORING_REGISTER_RING_FDS: 750 ret = io_ringfd_register(ctx, arg, nr_args); 751 break; 752 case IORING_UNREGISTER_RING_FDS: 753 ret = io_ringfd_unregister(ctx, arg, nr_args); 754 break; 755 case IORING_REGISTER_PBUF_RING: 756 ret = -EINVAL; 757 if (!arg || nr_args != 1) 758 break; 759 ret = io_register_pbuf_ring(ctx, arg); 760 break; 761 case IORING_UNREGISTER_PBUF_RING: 762 ret = -EINVAL; 763 if (!arg || nr_args != 1) 764 break; 765 ret = io_unregister_pbuf_ring(ctx, arg); 766 break; 767 case IORING_REGISTER_SYNC_CANCEL: 768 ret = -EINVAL; 769 if (!arg || nr_args != 1) 770 break; 771 ret = io_sync_cancel(ctx, arg); 772 break; 773 case IORING_REGISTER_FILE_ALLOC_RANGE: 774 ret = -EINVAL; 775 if (!arg || nr_args) 776 break; 777 ret = io_register_file_alloc_range(ctx, arg); 778 break; 779 case IORING_REGISTER_PBUF_STATUS: 780 ret = -EINVAL; 781 if (!arg || nr_args != 1) 782 break; 783 ret = io_register_pbuf_status(ctx, arg); 784 break; 785 case IORING_REGISTER_NAPI: 786 ret = -EINVAL; 787 if (!arg || nr_args != 1) 788 break; 789 ret = io_register_napi(ctx, arg); 790 break; 791 case IORING_UNREGISTER_NAPI: 792 ret = -EINVAL; 793 if (nr_args != 1) 794 break; 795 ret = io_unregister_napi(ctx, arg); 796 break; 797 case IORING_REGISTER_CLOCK: 798 ret = -EINVAL; 799 if (!arg || nr_args) 800 break; 801 ret = io_register_clock(ctx, arg); 802 break; 803 case IORING_REGISTER_CLONE_BUFFERS: 804 ret = -EINVAL; 805 if (!arg || nr_args != 1) 806 break; 807 ret = io_register_clone_buffers(ctx, arg); 808 break; 809 case IORING_REGISTER_RESIZE_RINGS: 810 ret = -EINVAL; 811 if (!arg || nr_args != 1) 812 break; 813 ret = io_register_resize_rings(ctx, arg); 814 break; 815 case IORING_REGISTER_MEM_REGION: 816 ret = -EINVAL; 817 if (!arg || nr_args != 1) 818 break; 819 ret = io_register_mem_region(ctx, arg); 820 break; 821 default: 822 ret = -EINVAL; 823 break; 824 } 825 826 return ret; 827 } 828 829 /* 830 * Given an 'fd' value, return the ctx associated with if. If 'registered' is 831 * true, then the registered index is used. Otherwise, the normal fd table. 832 * Caller must call fput() on the returned file, unless it's an ERR_PTR. 833 */ 834 struct file *io_uring_register_get_file(unsigned int fd, bool registered) 835 { 836 struct file *file; 837 838 if (registered) { 839 /* 840 * Ring fd has been registered via IORING_REGISTER_RING_FDS, we 841 * need only dereference our task private array to find it. 842 */ 843 struct io_uring_task *tctx = current->io_uring; 844 845 if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) 846 return ERR_PTR(-EINVAL); 847 fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); 848 file = tctx->registered_rings[fd]; 849 } else { 850 file = fget(fd); 851 } 852 853 if (unlikely(!file)) 854 return ERR_PTR(-EBADF); 855 if (io_is_uring_fops(file)) 856 return file; 857 fput(file); 858 return ERR_PTR(-EOPNOTSUPP); 859 } 860 861 /* 862 * "blind" registration opcodes are ones where there's no ring given, and 863 * hence the source fd must be -1. 864 */ 865 static int io_uring_register_blind(unsigned int opcode, void __user *arg, 866 unsigned int nr_args) 867 { 868 switch (opcode) { 869 case IORING_REGISTER_SEND_MSG_RING: { 870 struct io_uring_sqe sqe; 871 872 if (!arg || nr_args != 1) 873 return -EINVAL; 874 if (copy_from_user(&sqe, arg, sizeof(sqe))) 875 return -EFAULT; 876 /* no flags supported */ 877 if (sqe.flags) 878 return -EINVAL; 879 if (sqe.opcode == IORING_OP_MSG_RING) 880 return io_uring_sync_msg_ring(&sqe); 881 } 882 } 883 884 return -EINVAL; 885 } 886 887 SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, 888 void __user *, arg, unsigned int, nr_args) 889 { 890 struct io_ring_ctx *ctx; 891 long ret = -EBADF; 892 struct file *file; 893 bool use_registered_ring; 894 895 use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING); 896 opcode &= ~IORING_REGISTER_USE_REGISTERED_RING; 897 898 if (opcode >= IORING_REGISTER_LAST) 899 return -EINVAL; 900 901 if (fd == -1) 902 return io_uring_register_blind(opcode, arg, nr_args); 903 904 file = io_uring_register_get_file(fd, use_registered_ring); 905 if (IS_ERR(file)) 906 return PTR_ERR(file); 907 ctx = file->private_data; 908 909 mutex_lock(&ctx->uring_lock); 910 ret = __io_uring_register(ctx, opcode, arg, nr_args); 911 912 trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr, 913 ctx->buf_table.nr, ret); 914 mutex_unlock(&ctx->uring_lock); 915 if (!use_registered_ring) 916 fput(file); 917 return ret; 918 } 919