1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Code related to the io_uring_register() syscall 4 * 5 * Copyright (C) 2023 Jens Axboe 6 */ 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/syscalls.h> 10 #include <linux/refcount.h> 11 #include <linux/bits.h> 12 #include <linux/fs.h> 13 #include <linux/file.h> 14 #include <linux/slab.h> 15 #include <linux/uaccess.h> 16 #include <linux/nospec.h> 17 #include <linux/compat.h> 18 #include <linux/io_uring.h> 19 #include <linux/io_uring_types.h> 20 21 #include "io_uring.h" 22 #include "opdef.h" 23 #include "tctx.h" 24 #include "rsrc.h" 25 #include "sqpoll.h" 26 #include "register.h" 27 #include "cancel.h" 28 #include "kbuf.h" 29 #include "napi.h" 30 #include "eventfd.h" 31 #include "msg_ring.h" 32 #include "memmap.h" 33 34 #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ 35 IORING_REGISTER_LAST + IORING_OP_LAST) 36 37 static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg, 38 unsigned nr_args) 39 { 40 struct io_uring_probe *p; 41 size_t size; 42 int i, ret; 43 44 if (nr_args > IORING_OP_LAST) 45 nr_args = IORING_OP_LAST; 46 47 size = struct_size(p, ops, nr_args); 48 p = kzalloc(size, GFP_KERNEL); 49 if (!p) 50 return -ENOMEM; 51 52 ret = -EFAULT; 53 if (copy_from_user(p, arg, size)) 54 goto out; 55 ret = -EINVAL; 56 if (memchr_inv(p, 0, size)) 57 goto out; 58 59 p->last_op = IORING_OP_LAST - 1; 60 61 for (i = 0; i < nr_args; i++) { 62 p->ops[i].op = i; 63 if (io_uring_op_supported(i)) 64 p->ops[i].flags = IO_URING_OP_SUPPORTED; 65 } 66 p->ops_len = i; 67 68 ret = 0; 69 if (copy_to_user(arg, p, size)) 70 ret = -EFAULT; 71 out: 72 kfree(p); 73 return ret; 74 } 75 76 int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) 77 { 78 const struct cred *creds; 79 80 creds = xa_erase(&ctx->personalities, id); 81 if (creds) { 82 put_cred(creds); 83 return 0; 84 } 85 86 return -EINVAL; 87 } 88 89 90 static int io_register_personality(struct io_ring_ctx *ctx) 91 { 92 const struct cred *creds; 93 u32 id; 94 int ret; 95 96 creds = get_current_cred(); 97 98 ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, 99 XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); 100 if (ret < 0) { 101 put_cred(creds); 102 return ret; 103 } 104 return id; 105 } 106 107 static __cold int io_parse_restrictions(void __user *arg, unsigned int nr_args, 108 struct io_restriction *restrictions) 109 { 110 struct io_uring_restriction *res; 111 size_t size; 112 int i, ret; 113 114 if (!arg || nr_args > IORING_MAX_RESTRICTIONS) 115 return -EINVAL; 116 117 size = array_size(nr_args, sizeof(*res)); 118 if (size == SIZE_MAX) 119 return -EOVERFLOW; 120 121 res = memdup_user(arg, size); 122 if (IS_ERR(res)) 123 return PTR_ERR(res); 124 125 ret = -EINVAL; 126 127 for (i = 0; i < nr_args; i++) { 128 switch (res[i].opcode) { 129 case IORING_RESTRICTION_REGISTER_OP: 130 if (res[i].register_op >= IORING_REGISTER_LAST) 131 goto err; 132 __set_bit(res[i].register_op, restrictions->register_op); 133 break; 134 case IORING_RESTRICTION_SQE_OP: 135 if (res[i].sqe_op >= IORING_OP_LAST) 136 goto err; 137 __set_bit(res[i].sqe_op, restrictions->sqe_op); 138 break; 139 case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: 140 restrictions->sqe_flags_allowed = res[i].sqe_flags; 141 break; 142 case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: 143 restrictions->sqe_flags_required = res[i].sqe_flags; 144 break; 145 default: 146 goto err; 147 } 148 } 149 150 ret = 0; 151 152 err: 153 kfree(res); 154 return ret; 155 } 156 157 static __cold int io_register_restrictions(struct io_ring_ctx *ctx, 158 void __user *arg, unsigned int nr_args) 159 { 160 int ret; 161 162 /* Restrictions allowed only if rings started disabled */ 163 if (!(ctx->flags & IORING_SETUP_R_DISABLED)) 164 return -EBADFD; 165 166 /* We allow only a single restrictions registration */ 167 if (ctx->restrictions.registered) 168 return -EBUSY; 169 170 ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions); 171 /* Reset all restrictions if an error happened */ 172 if (ret != 0) 173 memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); 174 else 175 ctx->restrictions.registered = true; 176 return ret; 177 } 178 179 static int io_register_enable_rings(struct io_ring_ctx *ctx) 180 { 181 if (!(ctx->flags & IORING_SETUP_R_DISABLED)) 182 return -EBADFD; 183 184 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) { 185 WRITE_ONCE(ctx->submitter_task, get_task_struct(current)); 186 /* 187 * Lazy activation attempts would fail if it was polled before 188 * submitter_task is set. 189 */ 190 if (wq_has_sleeper(&ctx->poll_wq)) 191 io_activate_pollwq(ctx); 192 } 193 194 if (ctx->restrictions.registered) 195 ctx->restricted = 1; 196 197 ctx->flags &= ~IORING_SETUP_R_DISABLED; 198 if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) 199 wake_up(&ctx->sq_data->wait); 200 return 0; 201 } 202 203 static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx, 204 cpumask_var_t new_mask) 205 { 206 int ret; 207 208 if (!(ctx->flags & IORING_SETUP_SQPOLL)) { 209 ret = io_wq_cpu_affinity(current->io_uring, new_mask); 210 } else { 211 mutex_unlock(&ctx->uring_lock); 212 ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask); 213 mutex_lock(&ctx->uring_lock); 214 } 215 216 return ret; 217 } 218 219 static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, 220 void __user *arg, unsigned len) 221 { 222 cpumask_var_t new_mask; 223 int ret; 224 225 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) 226 return -ENOMEM; 227 228 cpumask_clear(new_mask); 229 if (len > cpumask_size()) 230 len = cpumask_size(); 231 232 #ifdef CONFIG_COMPAT 233 if (in_compat_syscall()) 234 ret = compat_get_bitmap(cpumask_bits(new_mask), 235 (const compat_ulong_t __user *)arg, 236 len * 8 /* CHAR_BIT */); 237 else 238 #endif 239 ret = copy_from_user(new_mask, arg, len); 240 241 if (ret) { 242 free_cpumask_var(new_mask); 243 return -EFAULT; 244 } 245 246 ret = __io_register_iowq_aff(ctx, new_mask); 247 free_cpumask_var(new_mask); 248 return ret; 249 } 250 251 static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx) 252 { 253 return __io_register_iowq_aff(ctx, NULL); 254 } 255 256 static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, 257 void __user *arg) 258 __must_hold(&ctx->uring_lock) 259 { 260 struct io_tctx_node *node; 261 struct io_uring_task *tctx = NULL; 262 struct io_sq_data *sqd = NULL; 263 __u32 new_count[2]; 264 int i, ret; 265 266 if (copy_from_user(new_count, arg, sizeof(new_count))) 267 return -EFAULT; 268 for (i = 0; i < ARRAY_SIZE(new_count); i++) 269 if (new_count[i] > INT_MAX) 270 return -EINVAL; 271 272 if (ctx->flags & IORING_SETUP_SQPOLL) { 273 sqd = ctx->sq_data; 274 if (sqd) { 275 /* 276 * Observe the correct sqd->lock -> ctx->uring_lock 277 * ordering. Fine to drop uring_lock here, we hold 278 * a ref to the ctx. 279 */ 280 refcount_inc(&sqd->refs); 281 mutex_unlock(&ctx->uring_lock); 282 mutex_lock(&sqd->lock); 283 mutex_lock(&ctx->uring_lock); 284 if (sqd->thread) 285 tctx = sqd->thread->io_uring; 286 } 287 } else { 288 tctx = current->io_uring; 289 } 290 291 BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); 292 293 for (i = 0; i < ARRAY_SIZE(new_count); i++) 294 if (new_count[i]) 295 ctx->iowq_limits[i] = new_count[i]; 296 ctx->iowq_limits_set = true; 297 298 if (tctx && tctx->io_wq) { 299 ret = io_wq_max_workers(tctx->io_wq, new_count); 300 if (ret) 301 goto err; 302 } else { 303 memset(new_count, 0, sizeof(new_count)); 304 } 305 306 if (sqd) { 307 mutex_unlock(&ctx->uring_lock); 308 mutex_unlock(&sqd->lock); 309 io_put_sq_data(sqd); 310 mutex_lock(&ctx->uring_lock); 311 } 312 313 if (copy_to_user(arg, new_count, sizeof(new_count))) 314 return -EFAULT; 315 316 /* that's it for SQPOLL, only the SQPOLL task creates requests */ 317 if (sqd) 318 return 0; 319 320 /* now propagate the restriction to all registered users */ 321 list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 322 tctx = node->task->io_uring; 323 if (WARN_ON_ONCE(!tctx->io_wq)) 324 continue; 325 326 for (i = 0; i < ARRAY_SIZE(new_count); i++) 327 new_count[i] = ctx->iowq_limits[i]; 328 /* ignore errors, it always returns zero anyway */ 329 (void)io_wq_max_workers(tctx->io_wq, new_count); 330 } 331 return 0; 332 err: 333 if (sqd) { 334 mutex_unlock(&ctx->uring_lock); 335 mutex_unlock(&sqd->lock); 336 io_put_sq_data(sqd); 337 mutex_lock(&ctx->uring_lock); 338 } 339 return ret; 340 } 341 342 static int io_register_clock(struct io_ring_ctx *ctx, 343 struct io_uring_clock_register __user *arg) 344 { 345 struct io_uring_clock_register reg; 346 347 if (copy_from_user(®, arg, sizeof(reg))) 348 return -EFAULT; 349 if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) 350 return -EINVAL; 351 352 switch (reg.clockid) { 353 case CLOCK_MONOTONIC: 354 ctx->clock_offset = 0; 355 break; 356 case CLOCK_BOOTTIME: 357 ctx->clock_offset = TK_OFFS_BOOT; 358 break; 359 default: 360 return -EINVAL; 361 } 362 363 ctx->clockid = reg.clockid; 364 return 0; 365 } 366 367 /* 368 * State to maintain until we can swap. Both new and old state, used for 369 * either mapping or freeing. 370 */ 371 struct io_ring_ctx_rings { 372 struct io_rings *rings; 373 struct io_uring_sqe *sq_sqes; 374 375 struct io_mapped_region sq_region; 376 struct io_mapped_region ring_region; 377 }; 378 379 static void io_register_free_rings(struct io_ring_ctx *ctx, 380 struct io_uring_params *p, 381 struct io_ring_ctx_rings *r) 382 { 383 io_free_region(ctx, &r->sq_region); 384 io_free_region(ctx, &r->ring_region); 385 } 386 387 #define swap_old(ctx, o, n, field) \ 388 do { \ 389 (o).field = (ctx)->field; \ 390 (ctx)->field = (n).field; \ 391 } while (0) 392 393 #define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP) 394 #define COPY_FLAGS (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \ 395 IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP) 396 397 static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) 398 { 399 struct io_uring_region_desc rd; 400 struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL; 401 size_t size, sq_array_offset; 402 unsigned i, tail, old_head; 403 struct io_uring_params p; 404 int ret; 405 406 /* for single issuer, must be owner resizing */ 407 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && 408 current != ctx->submitter_task) 409 return -EEXIST; 410 /* limited to DEFER_TASKRUN for now */ 411 if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) 412 return -EINVAL; 413 if (copy_from_user(&p, arg, sizeof(p))) 414 return -EFAULT; 415 if (p.flags & ~RESIZE_FLAGS) 416 return -EINVAL; 417 418 /* properties that are always inherited */ 419 p.flags |= (ctx->flags & COPY_FLAGS); 420 421 ret = io_uring_fill_params(p.sq_entries, &p); 422 if (unlikely(ret)) 423 return ret; 424 425 /* nothing to do, but copy params back */ 426 if (p.sq_entries == ctx->sq_entries && p.cq_entries == ctx->cq_entries) { 427 if (copy_to_user(arg, &p, sizeof(p))) 428 return -EFAULT; 429 return 0; 430 } 431 432 size = rings_size(p.flags, p.sq_entries, p.cq_entries, 433 &sq_array_offset); 434 if (size == SIZE_MAX) 435 return -EOVERFLOW; 436 437 memset(&rd, 0, sizeof(rd)); 438 rd.size = PAGE_ALIGN(size); 439 if (p.flags & IORING_SETUP_NO_MMAP) { 440 rd.user_addr = p.cq_off.user_addr; 441 rd.flags |= IORING_MEM_REGION_TYPE_USER; 442 } 443 ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING); 444 if (ret) { 445 io_register_free_rings(ctx, &p, &n); 446 return ret; 447 } 448 n.rings = io_region_get_ptr(&n.ring_region); 449 450 /* 451 * At this point n.rings is shared with userspace, just like o.rings 452 * is as well. While we don't expect userspace to modify it while 453 * a resize is in progress, and it's most likely that userspace will 454 * shoot itself in the foot if it does, we can't always assume good 455 * intent... Use read/write once helpers from here on to indicate the 456 * shared nature of it. 457 */ 458 WRITE_ONCE(n.rings->sq_ring_mask, p.sq_entries - 1); 459 WRITE_ONCE(n.rings->cq_ring_mask, p.cq_entries - 1); 460 WRITE_ONCE(n.rings->sq_ring_entries, p.sq_entries); 461 WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries); 462 463 if (copy_to_user(arg, &p, sizeof(p))) { 464 io_register_free_rings(ctx, &p, &n); 465 return -EFAULT; 466 } 467 468 if (p.flags & IORING_SETUP_SQE128) 469 size = array_size(2 * sizeof(struct io_uring_sqe), p.sq_entries); 470 else 471 size = array_size(sizeof(struct io_uring_sqe), p.sq_entries); 472 if (size == SIZE_MAX) { 473 io_register_free_rings(ctx, &p, &n); 474 return -EOVERFLOW; 475 } 476 477 memset(&rd, 0, sizeof(rd)); 478 rd.size = PAGE_ALIGN(size); 479 if (p.flags & IORING_SETUP_NO_MMAP) { 480 rd.user_addr = p.sq_off.user_addr; 481 rd.flags |= IORING_MEM_REGION_TYPE_USER; 482 } 483 ret = io_create_region_mmap_safe(ctx, &n.sq_region, &rd, IORING_OFF_SQES); 484 if (ret) { 485 io_register_free_rings(ctx, &p, &n); 486 return ret; 487 } 488 n.sq_sqes = io_region_get_ptr(&n.sq_region); 489 490 /* 491 * If using SQPOLL, park the thread 492 */ 493 if (ctx->sq_data) { 494 mutex_unlock(&ctx->uring_lock); 495 io_sq_thread_park(ctx->sq_data); 496 mutex_lock(&ctx->uring_lock); 497 } 498 499 /* 500 * We'll do the swap. Grab the ctx->mmap_lock, which will exclude 501 * any new mmap's on the ring fd. Clear out existing mappings to prevent 502 * mmap from seeing them, as we'll unmap them. Any attempt to mmap 503 * existing rings beyond this point will fail. Not that it could proceed 504 * at this point anyway, as the io_uring mmap side needs go grab the 505 * ctx->mmap_lock as well. Likewise, hold the completion lock over the 506 * duration of the actual swap. 507 */ 508 mutex_lock(&ctx->mmap_lock); 509 spin_lock(&ctx->completion_lock); 510 o.rings = ctx->rings; 511 ctx->rings = NULL; 512 o.sq_sqes = ctx->sq_sqes; 513 ctx->sq_sqes = NULL; 514 515 /* 516 * Now copy SQ and CQ entries, if any. If either of the destination 517 * rings can't hold what is already there, then fail the operation. 518 */ 519 tail = READ_ONCE(o.rings->sq.tail); 520 old_head = READ_ONCE(o.rings->sq.head); 521 if (tail - old_head > p.sq_entries) 522 goto overflow; 523 for (i = old_head; i < tail; i++) { 524 unsigned src_head = i & (ctx->sq_entries - 1); 525 unsigned dst_head = i & (p.sq_entries - 1); 526 527 n.sq_sqes[dst_head] = o.sq_sqes[src_head]; 528 } 529 WRITE_ONCE(n.rings->sq.head, old_head); 530 WRITE_ONCE(n.rings->sq.tail, tail); 531 532 tail = READ_ONCE(o.rings->cq.tail); 533 old_head = READ_ONCE(o.rings->cq.head); 534 if (tail - old_head > p.cq_entries) { 535 overflow: 536 /* restore old rings, and return -EOVERFLOW via cleanup path */ 537 ctx->rings = o.rings; 538 ctx->sq_sqes = o.sq_sqes; 539 to_free = &n; 540 ret = -EOVERFLOW; 541 goto out; 542 } 543 for (i = old_head; i < tail; i++) { 544 unsigned src_head = i & (ctx->cq_entries - 1); 545 unsigned dst_head = i & (p.cq_entries - 1); 546 547 n.rings->cqes[dst_head] = o.rings->cqes[src_head]; 548 } 549 WRITE_ONCE(n.rings->cq.head, old_head); 550 WRITE_ONCE(n.rings->cq.tail, tail); 551 /* invalidate cached cqe refill */ 552 ctx->cqe_cached = ctx->cqe_sentinel = NULL; 553 554 WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped)); 555 WRITE_ONCE(n.rings->sq_flags, READ_ONCE(o.rings->sq_flags)); 556 WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags)); 557 WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow)); 558 559 /* all done, store old pointers and assign new ones */ 560 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) 561 ctx->sq_array = (u32 *)((char *)n.rings + sq_array_offset); 562 563 ctx->sq_entries = p.sq_entries; 564 ctx->cq_entries = p.cq_entries; 565 566 ctx->rings = n.rings; 567 ctx->sq_sqes = n.sq_sqes; 568 swap_old(ctx, o, n, ring_region); 569 swap_old(ctx, o, n, sq_region); 570 to_free = &o; 571 ret = 0; 572 out: 573 spin_unlock(&ctx->completion_lock); 574 mutex_unlock(&ctx->mmap_lock); 575 io_register_free_rings(ctx, &p, to_free); 576 577 if (ctx->sq_data) 578 io_sq_thread_unpark(ctx->sq_data); 579 580 return ret; 581 } 582 583 static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) 584 { 585 struct io_uring_mem_region_reg __user *reg_uptr = uarg; 586 struct io_uring_mem_region_reg reg; 587 struct io_uring_region_desc __user *rd_uptr; 588 struct io_uring_region_desc rd; 589 int ret; 590 591 if (io_region_is_set(&ctx->param_region)) 592 return -EBUSY; 593 if (copy_from_user(®, reg_uptr, sizeof(reg))) 594 return -EFAULT; 595 rd_uptr = u64_to_user_ptr(reg.region_uptr); 596 if (copy_from_user(&rd, rd_uptr, sizeof(rd))) 597 return -EFAULT; 598 if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) 599 return -EINVAL; 600 if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG) 601 return -EINVAL; 602 603 /* 604 * This ensures there are no waiters. Waiters are unlocked and it's 605 * hard to synchronise with them, especially if we need to initialise 606 * the region. 607 */ 608 if ((reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) && 609 !(ctx->flags & IORING_SETUP_R_DISABLED)) 610 return -EINVAL; 611 612 ret = io_create_region_mmap_safe(ctx, &ctx->param_region, &rd, 613 IORING_MAP_OFF_PARAM_REGION); 614 if (ret) 615 return ret; 616 if (copy_to_user(rd_uptr, &rd, sizeof(rd))) { 617 io_free_region(ctx, &ctx->param_region); 618 return -EFAULT; 619 } 620 621 if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) { 622 ctx->cq_wait_arg = io_region_get_ptr(&ctx->param_region); 623 ctx->cq_wait_size = rd.size; 624 } 625 return 0; 626 } 627 628 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, 629 void __user *arg, unsigned nr_args) 630 __releases(ctx->uring_lock) 631 __acquires(ctx->uring_lock) 632 { 633 int ret; 634 635 /* 636 * We don't quiesce the refs for register anymore and so it can't be 637 * dying as we're holding a file ref here. 638 */ 639 if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs))) 640 return -ENXIO; 641 642 if (ctx->submitter_task && ctx->submitter_task != current) 643 return -EEXIST; 644 645 if (ctx->restricted) { 646 opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); 647 if (!test_bit(opcode, ctx->restrictions.register_op)) 648 return -EACCES; 649 } 650 651 switch (opcode) { 652 case IORING_REGISTER_BUFFERS: 653 ret = -EFAULT; 654 if (!arg) 655 break; 656 ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); 657 break; 658 case IORING_UNREGISTER_BUFFERS: 659 ret = -EINVAL; 660 if (arg || nr_args) 661 break; 662 ret = io_sqe_buffers_unregister(ctx); 663 break; 664 case IORING_REGISTER_FILES: 665 ret = -EFAULT; 666 if (!arg) 667 break; 668 ret = io_sqe_files_register(ctx, arg, nr_args, NULL); 669 break; 670 case IORING_UNREGISTER_FILES: 671 ret = -EINVAL; 672 if (arg || nr_args) 673 break; 674 ret = io_sqe_files_unregister(ctx); 675 break; 676 case IORING_REGISTER_FILES_UPDATE: 677 ret = io_register_files_update(ctx, arg, nr_args); 678 break; 679 case IORING_REGISTER_EVENTFD: 680 ret = -EINVAL; 681 if (nr_args != 1) 682 break; 683 ret = io_eventfd_register(ctx, arg, 0); 684 break; 685 case IORING_REGISTER_EVENTFD_ASYNC: 686 ret = -EINVAL; 687 if (nr_args != 1) 688 break; 689 ret = io_eventfd_register(ctx, arg, 1); 690 break; 691 case IORING_UNREGISTER_EVENTFD: 692 ret = -EINVAL; 693 if (arg || nr_args) 694 break; 695 ret = io_eventfd_unregister(ctx); 696 break; 697 case IORING_REGISTER_PROBE: 698 ret = -EINVAL; 699 if (!arg || nr_args > 256) 700 break; 701 ret = io_probe(ctx, arg, nr_args); 702 break; 703 case IORING_REGISTER_PERSONALITY: 704 ret = -EINVAL; 705 if (arg || nr_args) 706 break; 707 ret = io_register_personality(ctx); 708 break; 709 case IORING_UNREGISTER_PERSONALITY: 710 ret = -EINVAL; 711 if (arg) 712 break; 713 ret = io_unregister_personality(ctx, nr_args); 714 break; 715 case IORING_REGISTER_ENABLE_RINGS: 716 ret = -EINVAL; 717 if (arg || nr_args) 718 break; 719 ret = io_register_enable_rings(ctx); 720 break; 721 case IORING_REGISTER_RESTRICTIONS: 722 ret = io_register_restrictions(ctx, arg, nr_args); 723 break; 724 case IORING_REGISTER_FILES2: 725 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); 726 break; 727 case IORING_REGISTER_FILES_UPDATE2: 728 ret = io_register_rsrc_update(ctx, arg, nr_args, 729 IORING_RSRC_FILE); 730 break; 731 case IORING_REGISTER_BUFFERS2: 732 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); 733 break; 734 case IORING_REGISTER_BUFFERS_UPDATE: 735 ret = io_register_rsrc_update(ctx, arg, nr_args, 736 IORING_RSRC_BUFFER); 737 break; 738 case IORING_REGISTER_IOWQ_AFF: 739 ret = -EINVAL; 740 if (!arg || !nr_args) 741 break; 742 ret = io_register_iowq_aff(ctx, arg, nr_args); 743 break; 744 case IORING_UNREGISTER_IOWQ_AFF: 745 ret = -EINVAL; 746 if (arg || nr_args) 747 break; 748 ret = io_unregister_iowq_aff(ctx); 749 break; 750 case IORING_REGISTER_IOWQ_MAX_WORKERS: 751 ret = -EINVAL; 752 if (!arg || nr_args != 2) 753 break; 754 ret = io_register_iowq_max_workers(ctx, arg); 755 break; 756 case IORING_REGISTER_RING_FDS: 757 ret = io_ringfd_register(ctx, arg, nr_args); 758 break; 759 case IORING_UNREGISTER_RING_FDS: 760 ret = io_ringfd_unregister(ctx, arg, nr_args); 761 break; 762 case IORING_REGISTER_PBUF_RING: 763 ret = -EINVAL; 764 if (!arg || nr_args != 1) 765 break; 766 ret = io_register_pbuf_ring(ctx, arg); 767 break; 768 case IORING_UNREGISTER_PBUF_RING: 769 ret = -EINVAL; 770 if (!arg || nr_args != 1) 771 break; 772 ret = io_unregister_pbuf_ring(ctx, arg); 773 break; 774 case IORING_REGISTER_SYNC_CANCEL: 775 ret = -EINVAL; 776 if (!arg || nr_args != 1) 777 break; 778 ret = io_sync_cancel(ctx, arg); 779 break; 780 case IORING_REGISTER_FILE_ALLOC_RANGE: 781 ret = -EINVAL; 782 if (!arg || nr_args) 783 break; 784 ret = io_register_file_alloc_range(ctx, arg); 785 break; 786 case IORING_REGISTER_PBUF_STATUS: 787 ret = -EINVAL; 788 if (!arg || nr_args != 1) 789 break; 790 ret = io_register_pbuf_status(ctx, arg); 791 break; 792 case IORING_REGISTER_NAPI: 793 ret = -EINVAL; 794 if (!arg || nr_args != 1) 795 break; 796 ret = io_register_napi(ctx, arg); 797 break; 798 case IORING_UNREGISTER_NAPI: 799 ret = -EINVAL; 800 if (nr_args != 1) 801 break; 802 ret = io_unregister_napi(ctx, arg); 803 break; 804 case IORING_REGISTER_CLOCK: 805 ret = -EINVAL; 806 if (!arg || nr_args) 807 break; 808 ret = io_register_clock(ctx, arg); 809 break; 810 case IORING_REGISTER_CLONE_BUFFERS: 811 ret = -EINVAL; 812 if (!arg || nr_args != 1) 813 break; 814 ret = io_register_clone_buffers(ctx, arg); 815 break; 816 case IORING_REGISTER_RESIZE_RINGS: 817 ret = -EINVAL; 818 if (!arg || nr_args != 1) 819 break; 820 ret = io_register_resize_rings(ctx, arg); 821 break; 822 case IORING_REGISTER_MEM_REGION: 823 ret = -EINVAL; 824 if (!arg || nr_args != 1) 825 break; 826 ret = io_register_mem_region(ctx, arg); 827 break; 828 default: 829 ret = -EINVAL; 830 break; 831 } 832 833 return ret; 834 } 835 836 /* 837 * Given an 'fd' value, return the ctx associated with if. If 'registered' is 838 * true, then the registered index is used. Otherwise, the normal fd table. 839 * Caller must call fput() on the returned file, unless it's an ERR_PTR. 840 */ 841 struct file *io_uring_register_get_file(unsigned int fd, bool registered) 842 { 843 struct file *file; 844 845 if (registered) { 846 /* 847 * Ring fd has been registered via IORING_REGISTER_RING_FDS, we 848 * need only dereference our task private array to find it. 849 */ 850 struct io_uring_task *tctx = current->io_uring; 851 852 if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) 853 return ERR_PTR(-EINVAL); 854 fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); 855 file = tctx->registered_rings[fd]; 856 } else { 857 file = fget(fd); 858 } 859 860 if (unlikely(!file)) 861 return ERR_PTR(-EBADF); 862 if (io_is_uring_fops(file)) 863 return file; 864 fput(file); 865 return ERR_PTR(-EOPNOTSUPP); 866 } 867 868 /* 869 * "blind" registration opcodes are ones where there's no ring given, and 870 * hence the source fd must be -1. 871 */ 872 static int io_uring_register_blind(unsigned int opcode, void __user *arg, 873 unsigned int nr_args) 874 { 875 switch (opcode) { 876 case IORING_REGISTER_SEND_MSG_RING: { 877 struct io_uring_sqe sqe; 878 879 if (!arg || nr_args != 1) 880 return -EINVAL; 881 if (copy_from_user(&sqe, arg, sizeof(sqe))) 882 return -EFAULT; 883 /* no flags supported */ 884 if (sqe.flags) 885 return -EINVAL; 886 if (sqe.opcode == IORING_OP_MSG_RING) 887 return io_uring_sync_msg_ring(&sqe); 888 } 889 } 890 891 return -EINVAL; 892 } 893 894 SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, 895 void __user *, arg, unsigned int, nr_args) 896 { 897 struct io_ring_ctx *ctx; 898 long ret = -EBADF; 899 struct file *file; 900 bool use_registered_ring; 901 902 use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING); 903 opcode &= ~IORING_REGISTER_USE_REGISTERED_RING; 904 905 if (opcode >= IORING_REGISTER_LAST) 906 return -EINVAL; 907 908 if (fd == -1) 909 return io_uring_register_blind(opcode, arg, nr_args); 910 911 file = io_uring_register_get_file(fd, use_registered_ring); 912 if (IS_ERR(file)) 913 return PTR_ERR(file); 914 ctx = file->private_data; 915 916 mutex_lock(&ctx->uring_lock); 917 ret = __io_uring_register(ctx, opcode, arg, nr_args); 918 919 trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr, 920 ctx->buf_table.nr, ret); 921 mutex_unlock(&ctx->uring_lock); 922 if (!use_registered_ring) 923 fput(file); 924 return ret; 925 } 926