1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Code related to the io_uring_register() syscall 4 * 5 * Copyright (C) 2023 Jens Axboe 6 */ 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/syscalls.h> 10 #include <linux/refcount.h> 11 #include <linux/bits.h> 12 #include <linux/fs.h> 13 #include <linux/file.h> 14 #include <linux/slab.h> 15 #include <linux/uaccess.h> 16 #include <linux/nospec.h> 17 #include <linux/compat.h> 18 #include <linux/io_uring.h> 19 #include <linux/io_uring_types.h> 20 21 #include "io_uring.h" 22 #include "opdef.h" 23 #include "tctx.h" 24 #include "rsrc.h" 25 #include "sqpoll.h" 26 #include "register.h" 27 #include "cancel.h" 28 #include "kbuf.h" 29 #include "napi.h" 30 #include "eventfd.h" 31 #include "msg_ring.h" 32 #include "memmap.h" 33 34 #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ 35 IORING_REGISTER_LAST + IORING_OP_LAST) 36 37 static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg, 38 unsigned nr_args) 39 { 40 struct io_uring_probe *p; 41 size_t size; 42 int i, ret; 43 44 if (nr_args > IORING_OP_LAST) 45 nr_args = IORING_OP_LAST; 46 47 size = struct_size(p, ops, nr_args); 48 p = kzalloc(size, GFP_KERNEL); 49 if (!p) 50 return -ENOMEM; 51 52 ret = -EFAULT; 53 if (copy_from_user(p, arg, size)) 54 goto out; 55 ret = -EINVAL; 56 if (memchr_inv(p, 0, size)) 57 goto out; 58 59 p->last_op = IORING_OP_LAST - 1; 60 61 for (i = 0; i < nr_args; i++) { 62 p->ops[i].op = i; 63 if (io_uring_op_supported(i)) 64 p->ops[i].flags = IO_URING_OP_SUPPORTED; 65 } 66 p->ops_len = i; 67 68 ret = 0; 69 if (copy_to_user(arg, p, size)) 70 ret = -EFAULT; 71 out: 72 kfree(p); 73 return ret; 74 } 75 76 int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) 77 { 78 const struct cred *creds; 79 80 creds = xa_erase(&ctx->personalities, id); 81 if (creds) { 82 put_cred(creds); 83 return 0; 84 } 85 86 return -EINVAL; 87 } 88 89 90 static int io_register_personality(struct io_ring_ctx *ctx) 91 { 92 const struct cred *creds; 93 u32 id; 94 int ret; 95 96 creds = get_current_cred(); 97 98 ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, 99 XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); 100 if (ret < 0) { 101 put_cred(creds); 102 return ret; 103 } 104 return id; 105 } 106 107 static __cold int io_register_restrictions(struct io_ring_ctx *ctx, 108 void __user *arg, unsigned int nr_args) 109 { 110 struct io_uring_restriction *res; 111 size_t size; 112 int i, ret; 113 114 /* Restrictions allowed only if rings started disabled */ 115 if (!(ctx->flags & IORING_SETUP_R_DISABLED)) 116 return -EBADFD; 117 118 /* We allow only a single restrictions registration */ 119 if (ctx->restrictions.registered) 120 return -EBUSY; 121 122 if (!arg || nr_args > IORING_MAX_RESTRICTIONS) 123 return -EINVAL; 124 125 size = array_size(nr_args, sizeof(*res)); 126 if (size == SIZE_MAX) 127 return -EOVERFLOW; 128 129 res = memdup_user(arg, size); 130 if (IS_ERR(res)) 131 return PTR_ERR(res); 132 133 ret = 0; 134 135 for (i = 0; i < nr_args; i++) { 136 switch (res[i].opcode) { 137 case IORING_RESTRICTION_REGISTER_OP: 138 if (res[i].register_op >= IORING_REGISTER_LAST) { 139 ret = -EINVAL; 140 goto out; 141 } 142 143 __set_bit(res[i].register_op, 144 ctx->restrictions.register_op); 145 break; 146 case IORING_RESTRICTION_SQE_OP: 147 if (res[i].sqe_op >= IORING_OP_LAST) { 148 ret = -EINVAL; 149 goto out; 150 } 151 152 __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op); 153 break; 154 case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: 155 ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags; 156 break; 157 case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: 158 ctx->restrictions.sqe_flags_required = res[i].sqe_flags; 159 break; 160 default: 161 ret = -EINVAL; 162 goto out; 163 } 164 } 165 166 out: 167 /* Reset all restrictions if an error happened */ 168 if (ret != 0) 169 memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); 170 else 171 ctx->restrictions.registered = true; 172 173 kfree(res); 174 return ret; 175 } 176 177 static int io_register_enable_rings(struct io_ring_ctx *ctx) 178 { 179 if (!(ctx->flags & IORING_SETUP_R_DISABLED)) 180 return -EBADFD; 181 182 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) { 183 WRITE_ONCE(ctx->submitter_task, get_task_struct(current)); 184 /* 185 * Lazy activation attempts would fail if it was polled before 186 * submitter_task is set. 187 */ 188 if (wq_has_sleeper(&ctx->poll_wq)) 189 io_activate_pollwq(ctx); 190 } 191 192 if (ctx->restrictions.registered) 193 ctx->restricted = 1; 194 195 ctx->flags &= ~IORING_SETUP_R_DISABLED; 196 if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) 197 wake_up(&ctx->sq_data->wait); 198 return 0; 199 } 200 201 static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx, 202 cpumask_var_t new_mask) 203 { 204 int ret; 205 206 if (!(ctx->flags & IORING_SETUP_SQPOLL)) { 207 ret = io_wq_cpu_affinity(current->io_uring, new_mask); 208 } else { 209 mutex_unlock(&ctx->uring_lock); 210 ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask); 211 mutex_lock(&ctx->uring_lock); 212 } 213 214 return ret; 215 } 216 217 static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, 218 void __user *arg, unsigned len) 219 { 220 cpumask_var_t new_mask; 221 int ret; 222 223 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) 224 return -ENOMEM; 225 226 cpumask_clear(new_mask); 227 if (len > cpumask_size()) 228 len = cpumask_size(); 229 230 #ifdef CONFIG_COMPAT 231 if (in_compat_syscall()) 232 ret = compat_get_bitmap(cpumask_bits(new_mask), 233 (const compat_ulong_t __user *)arg, 234 len * 8 /* CHAR_BIT */); 235 else 236 #endif 237 ret = copy_from_user(new_mask, arg, len); 238 239 if (ret) { 240 free_cpumask_var(new_mask); 241 return -EFAULT; 242 } 243 244 ret = __io_register_iowq_aff(ctx, new_mask); 245 free_cpumask_var(new_mask); 246 return ret; 247 } 248 249 static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx) 250 { 251 return __io_register_iowq_aff(ctx, NULL); 252 } 253 254 static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, 255 void __user *arg) 256 __must_hold(&ctx->uring_lock) 257 { 258 struct io_tctx_node *node; 259 struct io_uring_task *tctx = NULL; 260 struct io_sq_data *sqd = NULL; 261 __u32 new_count[2]; 262 int i, ret; 263 264 if (copy_from_user(new_count, arg, sizeof(new_count))) 265 return -EFAULT; 266 for (i = 0; i < ARRAY_SIZE(new_count); i++) 267 if (new_count[i] > INT_MAX) 268 return -EINVAL; 269 270 if (ctx->flags & IORING_SETUP_SQPOLL) { 271 sqd = ctx->sq_data; 272 if (sqd) { 273 /* 274 * Observe the correct sqd->lock -> ctx->uring_lock 275 * ordering. Fine to drop uring_lock here, we hold 276 * a ref to the ctx. 277 */ 278 refcount_inc(&sqd->refs); 279 mutex_unlock(&ctx->uring_lock); 280 mutex_lock(&sqd->lock); 281 mutex_lock(&ctx->uring_lock); 282 if (sqd->thread) 283 tctx = sqd->thread->io_uring; 284 } 285 } else { 286 tctx = current->io_uring; 287 } 288 289 BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); 290 291 for (i = 0; i < ARRAY_SIZE(new_count); i++) 292 if (new_count[i]) 293 ctx->iowq_limits[i] = new_count[i]; 294 ctx->iowq_limits_set = true; 295 296 if (tctx && tctx->io_wq) { 297 ret = io_wq_max_workers(tctx->io_wq, new_count); 298 if (ret) 299 goto err; 300 } else { 301 memset(new_count, 0, sizeof(new_count)); 302 } 303 304 if (sqd) { 305 mutex_unlock(&ctx->uring_lock); 306 mutex_unlock(&sqd->lock); 307 io_put_sq_data(sqd); 308 mutex_lock(&ctx->uring_lock); 309 } 310 311 if (copy_to_user(arg, new_count, sizeof(new_count))) 312 return -EFAULT; 313 314 /* that's it for SQPOLL, only the SQPOLL task creates requests */ 315 if (sqd) 316 return 0; 317 318 /* now propagate the restriction to all registered users */ 319 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 320 tctx = node->task->io_uring; 321 if (WARN_ON_ONCE(!tctx->io_wq)) 322 continue; 323 324 for (i = 0; i < ARRAY_SIZE(new_count); i++) 325 new_count[i] = ctx->iowq_limits[i]; 326 /* ignore errors, it always returns zero anyway */ 327 (void)io_wq_max_workers(tctx->io_wq, new_count); 328 } 329 return 0; 330 err: 331 if (sqd) { 332 mutex_unlock(&ctx->uring_lock); 333 mutex_unlock(&sqd->lock); 334 io_put_sq_data(sqd); 335 mutex_lock(&ctx->uring_lock); 336 } 337 return ret; 338 } 339 340 static int io_register_clock(struct io_ring_ctx *ctx, 341 struct io_uring_clock_register __user *arg) 342 { 343 struct io_uring_clock_register reg; 344 345 if (copy_from_user(®, arg, sizeof(reg))) 346 return -EFAULT; 347 if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) 348 return -EINVAL; 349 350 switch (reg.clockid) { 351 case CLOCK_MONOTONIC: 352 ctx->clock_offset = 0; 353 break; 354 case CLOCK_BOOTTIME: 355 ctx->clock_offset = TK_OFFS_BOOT; 356 break; 357 default: 358 return -EINVAL; 359 } 360 361 ctx->clockid = reg.clockid; 362 return 0; 363 } 364 365 /* 366 * State to maintain until we can swap. Both new and old state, used for 367 * either mapping or freeing. 368 */ 369 struct io_ring_ctx_rings { 370 unsigned short n_ring_pages; 371 unsigned short n_sqe_pages; 372 struct page **ring_pages; 373 struct page **sqe_pages; 374 struct io_uring_sqe *sq_sqes; 375 struct io_rings *rings; 376 }; 377 378 static void io_register_free_rings(struct io_uring_params *p, 379 struct io_ring_ctx_rings *r) 380 { 381 if (!(p->flags & IORING_SETUP_NO_MMAP)) { 382 io_pages_unmap(r->rings, &r->ring_pages, &r->n_ring_pages, 383 true); 384 io_pages_unmap(r->sq_sqes, &r->sqe_pages, &r->n_sqe_pages, 385 true); 386 } else { 387 io_pages_free(&r->ring_pages, r->n_ring_pages); 388 io_pages_free(&r->sqe_pages, r->n_sqe_pages); 389 vunmap(r->rings); 390 vunmap(r->sq_sqes); 391 } 392 } 393 394 #define swap_old(ctx, o, n, field) \ 395 do { \ 396 (o).field = (ctx)->field; \ 397 (ctx)->field = (n).field; \ 398 } while (0) 399 400 #define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP) 401 #define COPY_FLAGS (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \ 402 IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP) 403 404 static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) 405 { 406 struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL; 407 size_t size, sq_array_offset; 408 struct io_uring_params p; 409 unsigned i, tail; 410 void *ptr; 411 int ret; 412 413 /* for single issuer, must be owner resizing */ 414 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && 415 current != ctx->submitter_task) 416 return -EEXIST; 417 if (copy_from_user(&p, arg, sizeof(p))) 418 return -EFAULT; 419 if (p.flags & ~RESIZE_FLAGS) 420 return -EINVAL; 421 422 /* properties that are always inherited */ 423 p.flags |= (ctx->flags & COPY_FLAGS); 424 425 ret = io_uring_fill_params(p.sq_entries, &p); 426 if (unlikely(ret)) 427 return ret; 428 429 /* nothing to do, but copy params back */ 430 if (p.sq_entries == ctx->sq_entries && p.cq_entries == ctx->cq_entries) { 431 if (copy_to_user(arg, &p, sizeof(p))) 432 return -EFAULT; 433 return 0; 434 } 435 436 size = rings_size(p.flags, p.sq_entries, p.cq_entries, 437 &sq_array_offset); 438 if (size == SIZE_MAX) 439 return -EOVERFLOW; 440 441 if (!(p.flags & IORING_SETUP_NO_MMAP)) 442 n.rings = io_pages_map(&n.ring_pages, &n.n_ring_pages, size); 443 else 444 n.rings = __io_uaddr_map(&n.ring_pages, &n.n_ring_pages, 445 p.cq_off.user_addr, size); 446 if (IS_ERR(n.rings)) 447 return PTR_ERR(n.rings); 448 449 n.rings->sq_ring_mask = p.sq_entries - 1; 450 n.rings->cq_ring_mask = p.cq_entries - 1; 451 n.rings->sq_ring_entries = p.sq_entries; 452 n.rings->cq_ring_entries = p.cq_entries; 453 454 if (copy_to_user(arg, &p, sizeof(p))) { 455 io_register_free_rings(&p, &n); 456 return -EFAULT; 457 } 458 459 if (p.flags & IORING_SETUP_SQE128) 460 size = array_size(2 * sizeof(struct io_uring_sqe), p.sq_entries); 461 else 462 size = array_size(sizeof(struct io_uring_sqe), p.sq_entries); 463 if (size == SIZE_MAX) { 464 io_register_free_rings(&p, &n); 465 return -EOVERFLOW; 466 } 467 468 if (!(p.flags & IORING_SETUP_NO_MMAP)) 469 ptr = io_pages_map(&n.sqe_pages, &n.n_sqe_pages, size); 470 else 471 ptr = __io_uaddr_map(&n.sqe_pages, &n.n_sqe_pages, 472 p.sq_off.user_addr, 473 size); 474 if (IS_ERR(ptr)) { 475 io_register_free_rings(&p, &n); 476 return PTR_ERR(ptr); 477 } 478 479 /* 480 * If using SQPOLL, park the thread 481 */ 482 if (ctx->sq_data) { 483 mutex_unlock(&ctx->uring_lock); 484 io_sq_thread_park(ctx->sq_data); 485 mutex_lock(&ctx->uring_lock); 486 } 487 488 /* 489 * We'll do the swap. Grab the ctx->resize_lock, which will exclude 490 * any new mmap's on the ring fd. Clear out existing mappings to prevent 491 * mmap from seeing them, as we'll unmap them. Any attempt to mmap 492 * existing rings beyond this point will fail. Not that it could proceed 493 * at this point anyway, as the io_uring mmap side needs go grab the 494 * ctx->resize_lock as well. Likewise, hold the completion lock over the 495 * duration of the actual swap. 496 */ 497 mutex_lock(&ctx->resize_lock); 498 spin_lock(&ctx->completion_lock); 499 o.rings = ctx->rings; 500 ctx->rings = NULL; 501 o.sq_sqes = ctx->sq_sqes; 502 ctx->sq_sqes = NULL; 503 504 /* 505 * Now copy SQ and CQ entries, if any. If either of the destination 506 * rings can't hold what is already there, then fail the operation. 507 */ 508 n.sq_sqes = ptr; 509 tail = o.rings->sq.tail; 510 if (tail - o.rings->sq.head > p.sq_entries) 511 goto overflow; 512 for (i = o.rings->sq.head; i < tail; i++) { 513 unsigned src_head = i & (ctx->sq_entries - 1); 514 unsigned dst_head = i & n.rings->sq_ring_mask; 515 516 n.sq_sqes[dst_head] = o.sq_sqes[src_head]; 517 } 518 n.rings->sq.head = o.rings->sq.head; 519 n.rings->sq.tail = o.rings->sq.tail; 520 521 tail = o.rings->cq.tail; 522 if (tail - o.rings->cq.head > p.cq_entries) { 523 overflow: 524 /* restore old rings, and return -EOVERFLOW via cleanup path */ 525 ctx->rings = o.rings; 526 ctx->sq_sqes = o.sq_sqes; 527 to_free = &n; 528 ret = -EOVERFLOW; 529 goto out; 530 } 531 for (i = o.rings->cq.head; i < tail; i++) { 532 unsigned src_head = i & (ctx->cq_entries - 1); 533 unsigned dst_head = i & n.rings->cq_ring_mask; 534 535 n.rings->cqes[dst_head] = o.rings->cqes[src_head]; 536 } 537 n.rings->cq.head = o.rings->cq.head; 538 n.rings->cq.tail = o.rings->cq.tail; 539 /* invalidate cached cqe refill */ 540 ctx->cqe_cached = ctx->cqe_sentinel = NULL; 541 542 n.rings->sq_dropped = o.rings->sq_dropped; 543 n.rings->sq_flags = o.rings->sq_flags; 544 n.rings->cq_flags = o.rings->cq_flags; 545 n.rings->cq_overflow = o.rings->cq_overflow; 546 547 /* all done, store old pointers and assign new ones */ 548 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) 549 ctx->sq_array = (u32 *)((char *)n.rings + sq_array_offset); 550 551 ctx->sq_entries = p.sq_entries; 552 ctx->cq_entries = p.cq_entries; 553 554 ctx->rings = n.rings; 555 ctx->sq_sqes = n.sq_sqes; 556 swap_old(ctx, o, n, n_ring_pages); 557 swap_old(ctx, o, n, n_sqe_pages); 558 swap_old(ctx, o, n, ring_pages); 559 swap_old(ctx, o, n, sqe_pages); 560 to_free = &o; 561 ret = 0; 562 out: 563 spin_unlock(&ctx->completion_lock); 564 mutex_unlock(&ctx->resize_lock); 565 io_register_free_rings(&p, to_free); 566 567 if (ctx->sq_data) 568 io_sq_thread_unpark(ctx->sq_data); 569 570 return ret; 571 } 572 573 static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) 574 { 575 struct io_uring_mem_region_reg __user *reg_uptr = uarg; 576 struct io_uring_mem_region_reg reg; 577 struct io_uring_region_desc __user *rd_uptr; 578 struct io_uring_region_desc rd; 579 int ret; 580 581 if (io_region_is_set(&ctx->param_region)) 582 return -EBUSY; 583 if (copy_from_user(®, reg_uptr, sizeof(reg))) 584 return -EFAULT; 585 rd_uptr = u64_to_user_ptr(reg.region_uptr); 586 if (copy_from_user(&rd, rd_uptr, sizeof(rd))) 587 return -EFAULT; 588 589 if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) 590 return -EINVAL; 591 if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG) 592 return -EINVAL; 593 594 /* 595 * This ensures there are no waiters. Waiters are unlocked and it's 596 * hard to synchronise with them, especially if we need to initialise 597 * the region. 598 */ 599 if ((reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) && 600 !(ctx->flags & IORING_SETUP_R_DISABLED)) 601 return -EINVAL; 602 603 ret = io_create_region(ctx, &ctx->param_region, &rd); 604 if (ret) 605 return ret; 606 if (copy_to_user(rd_uptr, &rd, sizeof(rd))) { 607 io_free_region(ctx, &ctx->param_region); 608 return -EFAULT; 609 } 610 611 if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) { 612 ctx->cq_wait_arg = io_region_get_ptr(&ctx->param_region); 613 ctx->cq_wait_size = rd.size; 614 } 615 return 0; 616 } 617 618 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, 619 void __user *arg, unsigned nr_args) 620 __releases(ctx->uring_lock) 621 __acquires(ctx->uring_lock) 622 { 623 int ret; 624 625 /* 626 * We don't quiesce the refs for register anymore and so it can't be 627 * dying as we're holding a file ref here. 628 */ 629 if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs))) 630 return -ENXIO; 631 632 if (ctx->submitter_task && ctx->submitter_task != current) 633 return -EEXIST; 634 635 if (ctx->restricted) { 636 opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); 637 if (!test_bit(opcode, ctx->restrictions.register_op)) 638 return -EACCES; 639 } 640 641 switch (opcode) { 642 case IORING_REGISTER_BUFFERS: 643 ret = -EFAULT; 644 if (!arg) 645 break; 646 ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); 647 break; 648 case IORING_UNREGISTER_BUFFERS: 649 ret = -EINVAL; 650 if (arg || nr_args) 651 break; 652 ret = io_sqe_buffers_unregister(ctx); 653 break; 654 case IORING_REGISTER_FILES: 655 ret = -EFAULT; 656 if (!arg) 657 break; 658 ret = io_sqe_files_register(ctx, arg, nr_args, NULL); 659 break; 660 case IORING_UNREGISTER_FILES: 661 ret = -EINVAL; 662 if (arg || nr_args) 663 break; 664 ret = io_sqe_files_unregister(ctx); 665 break; 666 case IORING_REGISTER_FILES_UPDATE: 667 ret = io_register_files_update(ctx, arg, nr_args); 668 break; 669 case IORING_REGISTER_EVENTFD: 670 ret = -EINVAL; 671 if (nr_args != 1) 672 break; 673 ret = io_eventfd_register(ctx, arg, 0); 674 break; 675 case IORING_REGISTER_EVENTFD_ASYNC: 676 ret = -EINVAL; 677 if (nr_args != 1) 678 break; 679 ret = io_eventfd_register(ctx, arg, 1); 680 break; 681 case IORING_UNREGISTER_EVENTFD: 682 ret = -EINVAL; 683 if (arg || nr_args) 684 break; 685 ret = io_eventfd_unregister(ctx); 686 break; 687 case IORING_REGISTER_PROBE: 688 ret = -EINVAL; 689 if (!arg || nr_args > 256) 690 break; 691 ret = io_probe(ctx, arg, nr_args); 692 break; 693 case IORING_REGISTER_PERSONALITY: 694 ret = -EINVAL; 695 if (arg || nr_args) 696 break; 697 ret = io_register_personality(ctx); 698 break; 699 case IORING_UNREGISTER_PERSONALITY: 700 ret = -EINVAL; 701 if (arg) 702 break; 703 ret = io_unregister_personality(ctx, nr_args); 704 break; 705 case IORING_REGISTER_ENABLE_RINGS: 706 ret = -EINVAL; 707 if (arg || nr_args) 708 break; 709 ret = io_register_enable_rings(ctx); 710 break; 711 case IORING_REGISTER_RESTRICTIONS: 712 ret = io_register_restrictions(ctx, arg, nr_args); 713 break; 714 case IORING_REGISTER_FILES2: 715 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); 716 break; 717 case IORING_REGISTER_FILES_UPDATE2: 718 ret = io_register_rsrc_update(ctx, arg, nr_args, 719 IORING_RSRC_FILE); 720 break; 721 case IORING_REGISTER_BUFFERS2: 722 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); 723 break; 724 case IORING_REGISTER_BUFFERS_UPDATE: 725 ret = io_register_rsrc_update(ctx, arg, nr_args, 726 IORING_RSRC_BUFFER); 727 break; 728 case IORING_REGISTER_IOWQ_AFF: 729 ret = -EINVAL; 730 if (!arg || !nr_args) 731 break; 732 ret = io_register_iowq_aff(ctx, arg, nr_args); 733 break; 734 case IORING_UNREGISTER_IOWQ_AFF: 735 ret = -EINVAL; 736 if (arg || nr_args) 737 break; 738 ret = io_unregister_iowq_aff(ctx); 739 break; 740 case IORING_REGISTER_IOWQ_MAX_WORKERS: 741 ret = -EINVAL; 742 if (!arg || nr_args != 2) 743 break; 744 ret = io_register_iowq_max_workers(ctx, arg); 745 break; 746 case IORING_REGISTER_RING_FDS: 747 ret = io_ringfd_register(ctx, arg, nr_args); 748 break; 749 case IORING_UNREGISTER_RING_FDS: 750 ret = io_ringfd_unregister(ctx, arg, nr_args); 751 break; 752 case IORING_REGISTER_PBUF_RING: 753 ret = -EINVAL; 754 if (!arg || nr_args != 1) 755 break; 756 ret = io_register_pbuf_ring(ctx, arg); 757 break; 758 case IORING_UNREGISTER_PBUF_RING: 759 ret = -EINVAL; 760 if (!arg || nr_args != 1) 761 break; 762 ret = io_unregister_pbuf_ring(ctx, arg); 763 break; 764 case IORING_REGISTER_SYNC_CANCEL: 765 ret = -EINVAL; 766 if (!arg || nr_args != 1) 767 break; 768 ret = io_sync_cancel(ctx, arg); 769 break; 770 case IORING_REGISTER_FILE_ALLOC_RANGE: 771 ret = -EINVAL; 772 if (!arg || nr_args) 773 break; 774 ret = io_register_file_alloc_range(ctx, arg); 775 break; 776 case IORING_REGISTER_PBUF_STATUS: 777 ret = -EINVAL; 778 if (!arg || nr_args != 1) 779 break; 780 ret = io_register_pbuf_status(ctx, arg); 781 break; 782 case IORING_REGISTER_NAPI: 783 ret = -EINVAL; 784 if (!arg || nr_args != 1) 785 break; 786 ret = io_register_napi(ctx, arg); 787 break; 788 case IORING_UNREGISTER_NAPI: 789 ret = -EINVAL; 790 if (nr_args != 1) 791 break; 792 ret = io_unregister_napi(ctx, arg); 793 break; 794 case IORING_REGISTER_CLOCK: 795 ret = -EINVAL; 796 if (!arg || nr_args) 797 break; 798 ret = io_register_clock(ctx, arg); 799 break; 800 case IORING_REGISTER_CLONE_BUFFERS: 801 ret = -EINVAL; 802 if (!arg || nr_args != 1) 803 break; 804 ret = io_register_clone_buffers(ctx, arg); 805 break; 806 case IORING_REGISTER_RESIZE_RINGS: 807 ret = -EINVAL; 808 if (!arg || nr_args != 1) 809 break; 810 ret = io_register_resize_rings(ctx, arg); 811 break; 812 case IORING_REGISTER_MEM_REGION: 813 ret = -EINVAL; 814 if (!arg || nr_args != 1) 815 break; 816 ret = io_register_mem_region(ctx, arg); 817 break; 818 default: 819 ret = -EINVAL; 820 break; 821 } 822 823 return ret; 824 } 825 826 /* 827 * Given an 'fd' value, return the ctx associated with if. If 'registered' is 828 * true, then the registered index is used. Otherwise, the normal fd table. 829 * Caller must call fput() on the returned file, unless it's an ERR_PTR. 830 */ 831 struct file *io_uring_register_get_file(unsigned int fd, bool registered) 832 { 833 struct file *file; 834 835 if (registered) { 836 /* 837 * Ring fd has been registered via IORING_REGISTER_RING_FDS, we 838 * need only dereference our task private array to find it. 839 */ 840 struct io_uring_task *tctx = current->io_uring; 841 842 if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) 843 return ERR_PTR(-EINVAL); 844 fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); 845 file = tctx->registered_rings[fd]; 846 } else { 847 file = fget(fd); 848 } 849 850 if (unlikely(!file)) 851 return ERR_PTR(-EBADF); 852 if (io_is_uring_fops(file)) 853 return file; 854 fput(file); 855 return ERR_PTR(-EOPNOTSUPP); 856 } 857 858 /* 859 * "blind" registration opcodes are ones where there's no ring given, and 860 * hence the source fd must be -1. 861 */ 862 static int io_uring_register_blind(unsigned int opcode, void __user *arg, 863 unsigned int nr_args) 864 { 865 switch (opcode) { 866 case IORING_REGISTER_SEND_MSG_RING: { 867 struct io_uring_sqe sqe; 868 869 if (!arg || nr_args != 1) 870 return -EINVAL; 871 if (copy_from_user(&sqe, arg, sizeof(sqe))) 872 return -EFAULT; 873 /* no flags supported */ 874 if (sqe.flags) 875 return -EINVAL; 876 if (sqe.opcode == IORING_OP_MSG_RING) 877 return io_uring_sync_msg_ring(&sqe); 878 } 879 } 880 881 return -EINVAL; 882 } 883 884 SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, 885 void __user *, arg, unsigned int, nr_args) 886 { 887 struct io_ring_ctx *ctx; 888 long ret = -EBADF; 889 struct file *file; 890 bool use_registered_ring; 891 892 use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING); 893 opcode &= ~IORING_REGISTER_USE_REGISTERED_RING; 894 895 if (opcode >= IORING_REGISTER_LAST) 896 return -EINVAL; 897 898 if (fd == -1) 899 return io_uring_register_blind(opcode, arg, nr_args); 900 901 file = io_uring_register_get_file(fd, use_registered_ring); 902 if (IS_ERR(file)) 903 return PTR_ERR(file); 904 ctx = file->private_data; 905 906 mutex_lock(&ctx->uring_lock); 907 ret = __io_uring_register(ctx, opcode, arg, nr_args); 908 909 trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr, 910 ctx->buf_table.nr, ret); 911 mutex_unlock(&ctx->uring_lock); 912 if (!use_registered_ring) 913 fput(file); 914 return ret; 915 } 916