1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 int iou_flags; 32 u32 file_slot; 33 unsigned long nofile; 34 }; 35 36 struct io_socket { 37 struct file *file; 38 int domain; 39 int type; 40 int protocol; 41 int flags; 42 u32 file_slot; 43 unsigned long nofile; 44 }; 45 46 struct io_connect { 47 struct file *file; 48 struct sockaddr __user *addr; 49 int addr_len; 50 bool in_progress; 51 bool seen_econnaborted; 52 }; 53 54 struct io_bind { 55 struct file *file; 56 int addr_len; 57 }; 58 59 struct io_listen { 60 struct file *file; 61 int backlog; 62 }; 63 64 struct io_sr_msg { 65 struct file *file; 66 union { 67 struct compat_msghdr __user *umsg_compat; 68 struct user_msghdr __user *umsg; 69 void __user *buf; 70 }; 71 int len; 72 unsigned done_io; 73 unsigned msg_flags; 74 unsigned nr_multishot_loops; 75 u16 flags; 76 /* initialised and used only by !msg send variants */ 77 u16 addr_len; 78 u16 buf_group; 79 void __user *addr; 80 void __user *msg_control; 81 /* used only for send zerocopy */ 82 struct io_kiocb *notif; 83 }; 84 85 /* 86 * Number of times we'll try and do receives if there's more data. If we 87 * exceed this limit, then add us to the back of the queue and retry from 88 * there. This helps fairness between flooding clients. 89 */ 90 #define MULTISHOT_MAX_RETRY 32 91 92 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 93 { 94 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 95 96 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 97 sqe->buf_index || sqe->splice_fd_in)) 98 return -EINVAL; 99 100 shutdown->how = READ_ONCE(sqe->len); 101 req->flags |= REQ_F_FORCE_ASYNC; 102 return 0; 103 } 104 105 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 106 { 107 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 108 struct socket *sock; 109 int ret; 110 111 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 112 113 sock = sock_from_file(req->file); 114 if (unlikely(!sock)) 115 return -ENOTSOCK; 116 117 ret = __sys_shutdown_sock(sock, shutdown->how); 118 io_req_set_res(req, ret, 0); 119 return IOU_OK; 120 } 121 122 static bool io_net_retry(struct socket *sock, int flags) 123 { 124 if (!(flags & MSG_WAITALL)) 125 return false; 126 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 127 } 128 129 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 130 { 131 if (kmsg->free_iov) { 132 kfree(kmsg->free_iov); 133 kmsg->free_iov_nr = 0; 134 kmsg->free_iov = NULL; 135 } 136 } 137 138 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 139 { 140 struct io_async_msghdr *hdr = req->async_data; 141 struct iovec *iov; 142 143 /* can't recycle, ensure we free the iovec if we have one */ 144 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 145 io_netmsg_iovec_free(hdr); 146 return; 147 } 148 149 /* Let normal cleanup path reap it if we fail adding to the cache */ 150 iov = hdr->free_iov; 151 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { 152 if (iov) 153 kasan_mempool_poison_object(iov); 154 req->async_data = NULL; 155 req->flags &= ~REQ_F_ASYNC_DATA; 156 } 157 } 158 159 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 160 { 161 struct io_ring_ctx *ctx = req->ctx; 162 struct io_async_msghdr *hdr; 163 164 hdr = io_alloc_cache_get(&ctx->netmsg_cache); 165 if (hdr) { 166 if (hdr->free_iov) { 167 kasan_mempool_unpoison_object(hdr->free_iov, 168 hdr->free_iov_nr * sizeof(struct iovec)); 169 req->flags |= REQ_F_NEED_CLEANUP; 170 } 171 req->flags |= REQ_F_ASYNC_DATA; 172 req->async_data = hdr; 173 return hdr; 174 } 175 176 if (!io_alloc_async_data(req)) { 177 hdr = req->async_data; 178 hdr->free_iov_nr = 0; 179 hdr->free_iov = NULL; 180 return hdr; 181 } 182 return NULL; 183 } 184 185 /* assign new iovec to kmsg, if we need to */ 186 static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, 187 struct iovec *iov) 188 { 189 if (iov) { 190 req->flags |= REQ_F_NEED_CLEANUP; 191 kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs; 192 if (kmsg->free_iov) 193 kfree(kmsg->free_iov); 194 kmsg->free_iov = iov; 195 } 196 return 0; 197 } 198 199 static inline void io_mshot_prep_retry(struct io_kiocb *req, 200 struct io_async_msghdr *kmsg) 201 { 202 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 203 204 req->flags &= ~REQ_F_BL_EMPTY; 205 sr->done_io = 0; 206 sr->len = 0; /* get from the provided buffer */ 207 req->buf_index = sr->buf_group; 208 } 209 210 #ifdef CONFIG_COMPAT 211 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 212 struct io_async_msghdr *iomsg, 213 struct compat_msghdr *msg, int ddir) 214 { 215 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 216 struct compat_iovec __user *uiov; 217 struct iovec *iov; 218 int ret, nr_segs; 219 220 if (iomsg->free_iov) { 221 nr_segs = iomsg->free_iov_nr; 222 iov = iomsg->free_iov; 223 } else { 224 iov = &iomsg->fast_iov; 225 nr_segs = 1; 226 } 227 228 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 229 return -EFAULT; 230 231 uiov = compat_ptr(msg->msg_iov); 232 if (req->flags & REQ_F_BUFFER_SELECT) { 233 compat_ssize_t clen; 234 235 if (msg->msg_iovlen == 0) { 236 sr->len = iov->iov_len = 0; 237 iov->iov_base = NULL; 238 } else if (msg->msg_iovlen > 1) { 239 return -EINVAL; 240 } else { 241 if (!access_ok(uiov, sizeof(*uiov))) 242 return -EFAULT; 243 if (__get_user(clen, &uiov->iov_len)) 244 return -EFAULT; 245 if (clen < 0) 246 return -EINVAL; 247 sr->len = clen; 248 } 249 250 return 0; 251 } 252 253 ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, 254 nr_segs, &iov, &iomsg->msg.msg_iter, true); 255 if (unlikely(ret < 0)) 256 return ret; 257 258 return io_net_vec_assign(req, iomsg, iov); 259 } 260 #endif 261 262 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 263 struct user_msghdr *msg, int ddir) 264 { 265 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 266 struct iovec *iov; 267 int ret, nr_segs; 268 269 if (iomsg->free_iov) { 270 nr_segs = iomsg->free_iov_nr; 271 iov = iomsg->free_iov; 272 } else { 273 iov = &iomsg->fast_iov; 274 nr_segs = 1; 275 } 276 277 if (!user_access_begin(sr->umsg, sizeof(*sr->umsg))) 278 return -EFAULT; 279 280 ret = -EFAULT; 281 unsafe_get_user(msg->msg_name, &sr->umsg->msg_name, ua_end); 282 unsafe_get_user(msg->msg_namelen, &sr->umsg->msg_namelen, ua_end); 283 unsafe_get_user(msg->msg_iov, &sr->umsg->msg_iov, ua_end); 284 unsafe_get_user(msg->msg_iovlen, &sr->umsg->msg_iovlen, ua_end); 285 unsafe_get_user(msg->msg_control, &sr->umsg->msg_control, ua_end); 286 unsafe_get_user(msg->msg_controllen, &sr->umsg->msg_controllen, ua_end); 287 msg->msg_flags = 0; 288 289 if (req->flags & REQ_F_BUFFER_SELECT) { 290 if (msg->msg_iovlen == 0) { 291 sr->len = iov->iov_len = 0; 292 iov->iov_base = NULL; 293 } else if (msg->msg_iovlen > 1) { 294 ret = -EINVAL; 295 goto ua_end; 296 } else { 297 /* we only need the length for provided buffers */ 298 if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) 299 goto ua_end; 300 unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, 301 ua_end); 302 sr->len = iov->iov_len; 303 } 304 ret = 0; 305 ua_end: 306 user_access_end(); 307 return ret; 308 } 309 310 user_access_end(); 311 ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs, 312 &iov, &iomsg->msg.msg_iter, false); 313 if (unlikely(ret < 0)) 314 return ret; 315 316 return io_net_vec_assign(req, iomsg, iov); 317 } 318 319 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 320 struct io_async_msghdr *iomsg) 321 { 322 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 323 struct user_msghdr msg; 324 int ret; 325 326 iomsg->msg.msg_name = &iomsg->addr; 327 iomsg->msg.msg_iter.nr_segs = 0; 328 329 #ifdef CONFIG_COMPAT 330 if (unlikely(req->ctx->compat)) { 331 struct compat_msghdr cmsg; 332 333 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); 334 if (unlikely(ret)) 335 return ret; 336 337 return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); 338 } 339 #endif 340 341 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); 342 if (unlikely(ret)) 343 return ret; 344 345 ret = __copy_msghdr(&iomsg->msg, &msg, NULL); 346 347 /* save msg_control as sys_sendmsg() overwrites it */ 348 sr->msg_control = iomsg->msg.msg_control_user; 349 return ret; 350 } 351 352 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 353 { 354 struct io_async_msghdr *io = req->async_data; 355 356 io_netmsg_iovec_free(io); 357 } 358 359 static int io_send_setup(struct io_kiocb *req) 360 { 361 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 362 struct io_async_msghdr *kmsg = req->async_data; 363 int ret; 364 365 kmsg->msg.msg_name = NULL; 366 kmsg->msg.msg_namelen = 0; 367 kmsg->msg.msg_control = NULL; 368 kmsg->msg.msg_controllen = 0; 369 kmsg->msg.msg_ubuf = NULL; 370 371 if (sr->addr) { 372 ret = move_addr_to_kernel(sr->addr, sr->addr_len, &kmsg->addr); 373 if (unlikely(ret < 0)) 374 return ret; 375 kmsg->msg.msg_name = &kmsg->addr; 376 kmsg->msg.msg_namelen = sr->addr_len; 377 } 378 if (!io_do_buffer_select(req)) { 379 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 380 &kmsg->msg.msg_iter); 381 if (unlikely(ret < 0)) 382 return ret; 383 } 384 return 0; 385 } 386 387 static int io_sendmsg_prep_setup(struct io_kiocb *req, int is_msg) 388 { 389 struct io_async_msghdr *kmsg; 390 int ret; 391 392 kmsg = io_msg_alloc_async(req); 393 if (unlikely(!kmsg)) 394 return -ENOMEM; 395 if (!is_msg) 396 return io_send_setup(req); 397 ret = io_sendmsg_copy_hdr(req, kmsg); 398 if (!ret) 399 req->flags |= REQ_F_NEED_CLEANUP; 400 return ret; 401 } 402 403 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) 404 405 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 406 { 407 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 408 409 sr->done_io = 0; 410 411 if (req->opcode == IORING_OP_SEND) { 412 if (READ_ONCE(sqe->__pad3[0])) 413 return -EINVAL; 414 sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 415 sr->addr_len = READ_ONCE(sqe->addr_len); 416 } else if (sqe->addr2 || sqe->file_index) { 417 return -EINVAL; 418 } 419 420 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 421 sr->len = READ_ONCE(sqe->len); 422 sr->flags = READ_ONCE(sqe->ioprio); 423 if (sr->flags & ~SENDMSG_FLAGS) 424 return -EINVAL; 425 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 426 if (sr->msg_flags & MSG_DONTWAIT) 427 req->flags |= REQ_F_NOWAIT; 428 if (sr->flags & IORING_RECVSEND_BUNDLE) { 429 if (req->opcode == IORING_OP_SENDMSG) 430 return -EINVAL; 431 if (!(req->flags & REQ_F_BUFFER_SELECT)) 432 return -EINVAL; 433 sr->msg_flags |= MSG_WAITALL; 434 sr->buf_group = req->buf_index; 435 req->buf_list = NULL; 436 } 437 if (req->flags & REQ_F_BUFFER_SELECT && sr->len) 438 return -EINVAL; 439 440 #ifdef CONFIG_COMPAT 441 if (req->ctx->compat) 442 sr->msg_flags |= MSG_CMSG_COMPAT; 443 #endif 444 return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG); 445 } 446 447 static void io_req_msg_cleanup(struct io_kiocb *req, 448 unsigned int issue_flags) 449 { 450 req->flags &= ~REQ_F_NEED_CLEANUP; 451 io_netmsg_recycle(req, issue_flags); 452 } 453 454 /* 455 * For bundle completions, we need to figure out how many segments we consumed. 456 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 457 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 458 * the segments, then it's a trivial questiont o answer. If we have residual 459 * data in the iter, then loop the segments to figure out how much we 460 * transferred. 461 */ 462 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 463 { 464 struct iovec *iov; 465 int nbufs; 466 467 /* no data is always zero segments, and a ubuf is always 1 segment */ 468 if (ret <= 0) 469 return 0; 470 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 471 return 1; 472 473 iov = kmsg->free_iov; 474 if (!iov) 475 iov = &kmsg->fast_iov; 476 477 /* if all data was transferred, it's basic pointer math */ 478 if (!iov_iter_count(&kmsg->msg.msg_iter)) 479 return iter_iov(&kmsg->msg.msg_iter) - iov; 480 481 /* short transfer, count segments */ 482 nbufs = 0; 483 do { 484 int this_len = min_t(int, iov[nbufs].iov_len, ret); 485 486 nbufs++; 487 ret -= this_len; 488 } while (ret); 489 490 return nbufs; 491 } 492 493 static inline bool io_send_finish(struct io_kiocb *req, int *ret, 494 struct io_async_msghdr *kmsg, 495 unsigned issue_flags) 496 { 497 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 498 bool bundle_finished = *ret <= 0; 499 unsigned int cflags; 500 501 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 502 cflags = io_put_kbuf(req, issue_flags); 503 goto finish; 504 } 505 506 cflags = io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret), issue_flags); 507 508 if (bundle_finished || req->flags & REQ_F_BL_EMPTY) 509 goto finish; 510 511 /* 512 * Fill CQE for this receive and see if we should keep trying to 513 * receive from this socket. 514 */ 515 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 516 io_mshot_prep_retry(req, kmsg); 517 return false; 518 } 519 520 /* Otherwise stop bundle and use the current result. */ 521 finish: 522 io_req_set_res(req, *ret, cflags); 523 *ret = IOU_OK; 524 return true; 525 } 526 527 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 528 { 529 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 530 struct io_async_msghdr *kmsg = req->async_data; 531 struct socket *sock; 532 unsigned flags; 533 int min_ret = 0; 534 int ret; 535 536 sock = sock_from_file(req->file); 537 if (unlikely(!sock)) 538 return -ENOTSOCK; 539 540 if (!(req->flags & REQ_F_POLLED) && 541 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 542 return -EAGAIN; 543 544 flags = sr->msg_flags; 545 if (issue_flags & IO_URING_F_NONBLOCK) 546 flags |= MSG_DONTWAIT; 547 if (flags & MSG_WAITALL) 548 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 549 550 kmsg->msg.msg_control_user = sr->msg_control; 551 552 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 553 554 if (ret < min_ret) { 555 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 556 return -EAGAIN; 557 if (ret > 0 && io_net_retry(sock, flags)) { 558 kmsg->msg.msg_controllen = 0; 559 kmsg->msg.msg_control = NULL; 560 sr->done_io += ret; 561 req->flags |= REQ_F_BL_NO_RECYCLE; 562 return -EAGAIN; 563 } 564 if (ret == -ERESTARTSYS) 565 ret = -EINTR; 566 req_set_fail(req); 567 } 568 io_req_msg_cleanup(req, issue_flags); 569 if (ret >= 0) 570 ret += sr->done_io; 571 else if (sr->done_io) 572 ret = sr->done_io; 573 io_req_set_res(req, ret, 0); 574 return IOU_OK; 575 } 576 577 int io_send(struct io_kiocb *req, unsigned int issue_flags) 578 { 579 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 580 struct io_async_msghdr *kmsg = req->async_data; 581 struct socket *sock; 582 unsigned flags; 583 int min_ret = 0; 584 int ret; 585 586 sock = sock_from_file(req->file); 587 if (unlikely(!sock)) 588 return -ENOTSOCK; 589 590 if (!(req->flags & REQ_F_POLLED) && 591 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 592 return -EAGAIN; 593 594 flags = sr->msg_flags; 595 if (issue_flags & IO_URING_F_NONBLOCK) 596 flags |= MSG_DONTWAIT; 597 598 retry_bundle: 599 if (io_do_buffer_select(req)) { 600 struct buf_sel_arg arg = { 601 .iovs = &kmsg->fast_iov, 602 .max_len = INT_MAX, 603 .nr_iovs = 1, 604 .mode = KBUF_MODE_EXPAND, 605 }; 606 607 if (kmsg->free_iov) { 608 arg.nr_iovs = kmsg->free_iov_nr; 609 arg.iovs = kmsg->free_iov; 610 arg.mode |= KBUF_MODE_FREE; 611 } 612 613 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 614 arg.nr_iovs = 1; 615 616 ret = io_buffers_select(req, &arg, issue_flags); 617 if (unlikely(ret < 0)) 618 return ret; 619 620 sr->len = arg.out_len; 621 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, arg.iovs, ret, 622 arg.out_len); 623 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 624 kmsg->free_iov_nr = ret; 625 kmsg->free_iov = arg.iovs; 626 } 627 } 628 629 /* 630 * If MSG_WAITALL is set, or this is a bundle send, then we need 631 * the full amount. If just bundle is set, if we do a short send 632 * then we complete the bundle sequence rather than continue on. 633 */ 634 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 635 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 636 637 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 638 kmsg->msg.msg_flags = flags; 639 ret = sock_sendmsg(sock, &kmsg->msg); 640 if (ret < min_ret) { 641 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 642 return -EAGAIN; 643 644 if (ret > 0 && io_net_retry(sock, flags)) { 645 sr->len -= ret; 646 sr->buf += ret; 647 sr->done_io += ret; 648 req->flags |= REQ_F_BL_NO_RECYCLE; 649 return -EAGAIN; 650 } 651 if (ret == -ERESTARTSYS) 652 ret = -EINTR; 653 req_set_fail(req); 654 } 655 if (ret >= 0) 656 ret += sr->done_io; 657 else if (sr->done_io) 658 ret = sr->done_io; 659 660 if (!io_send_finish(req, &ret, kmsg, issue_flags)) 661 goto retry_bundle; 662 663 io_req_msg_cleanup(req, issue_flags); 664 return ret; 665 } 666 667 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 668 struct io_async_msghdr *iomsg, 669 int namelen, size_t controllen) 670 { 671 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 672 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 673 int hdr; 674 675 if (unlikely(namelen < 0)) 676 return -EOVERFLOW; 677 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 678 namelen, &hdr)) 679 return -EOVERFLOW; 680 if (check_add_overflow(hdr, controllen, &hdr)) 681 return -EOVERFLOW; 682 683 iomsg->namelen = namelen; 684 iomsg->controllen = controllen; 685 return 0; 686 } 687 688 return 0; 689 } 690 691 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 692 struct io_async_msghdr *iomsg) 693 { 694 struct user_msghdr msg; 695 int ret; 696 697 iomsg->msg.msg_name = &iomsg->addr; 698 iomsg->msg.msg_iter.nr_segs = 0; 699 700 #ifdef CONFIG_COMPAT 701 if (unlikely(req->ctx->compat)) { 702 struct compat_msghdr cmsg; 703 704 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); 705 if (unlikely(ret)) 706 return ret; 707 708 ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); 709 if (unlikely(ret)) 710 return ret; 711 712 return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, 713 cmsg.msg_controllen); 714 } 715 #endif 716 717 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); 718 if (unlikely(ret)) 719 return ret; 720 721 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 722 if (unlikely(ret)) 723 return ret; 724 725 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 726 msg.msg_controllen); 727 } 728 729 static int io_recvmsg_prep_setup(struct io_kiocb *req) 730 { 731 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 732 struct io_async_msghdr *kmsg; 733 int ret; 734 735 kmsg = io_msg_alloc_async(req); 736 if (unlikely(!kmsg)) 737 return -ENOMEM; 738 739 if (req->opcode == IORING_OP_RECV) { 740 kmsg->msg.msg_name = NULL; 741 kmsg->msg.msg_namelen = 0; 742 kmsg->msg.msg_control = NULL; 743 kmsg->msg.msg_get_inq = 1; 744 kmsg->msg.msg_controllen = 0; 745 kmsg->msg.msg_iocb = NULL; 746 kmsg->msg.msg_ubuf = NULL; 747 748 if (!io_do_buffer_select(req)) { 749 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 750 &kmsg->msg.msg_iter); 751 if (unlikely(ret)) 752 return ret; 753 } 754 return 0; 755 } 756 757 ret = io_recvmsg_copy_hdr(req, kmsg); 758 if (!ret) 759 req->flags |= REQ_F_NEED_CLEANUP; 760 return ret; 761 } 762 763 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 764 IORING_RECVSEND_BUNDLE) 765 766 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 767 { 768 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 769 770 sr->done_io = 0; 771 772 if (unlikely(sqe->file_index || sqe->addr2)) 773 return -EINVAL; 774 775 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 776 sr->len = READ_ONCE(sqe->len); 777 sr->flags = READ_ONCE(sqe->ioprio); 778 if (sr->flags & ~RECVMSG_FLAGS) 779 return -EINVAL; 780 sr->msg_flags = READ_ONCE(sqe->msg_flags); 781 if (sr->msg_flags & MSG_DONTWAIT) 782 req->flags |= REQ_F_NOWAIT; 783 if (sr->msg_flags & MSG_ERRQUEUE) 784 req->flags |= REQ_F_CLEAR_POLLIN; 785 if (req->flags & REQ_F_BUFFER_SELECT) { 786 /* 787 * Store the buffer group for this multishot receive separately, 788 * as if we end up doing an io-wq based issue that selects a 789 * buffer, it has to be committed immediately and that will 790 * clear ->buf_list. This means we lose the link to the buffer 791 * list, and the eventual buffer put on completion then cannot 792 * restore it. 793 */ 794 sr->buf_group = req->buf_index; 795 req->buf_list = NULL; 796 } 797 if (sr->flags & IORING_RECV_MULTISHOT) { 798 if (!(req->flags & REQ_F_BUFFER_SELECT)) 799 return -EINVAL; 800 if (sr->msg_flags & MSG_WAITALL) 801 return -EINVAL; 802 if (req->opcode == IORING_OP_RECV && sr->len) 803 return -EINVAL; 804 req->flags |= REQ_F_APOLL_MULTISHOT; 805 } 806 if (sr->flags & IORING_RECVSEND_BUNDLE) { 807 if (req->opcode == IORING_OP_RECVMSG) 808 return -EINVAL; 809 } 810 811 #ifdef CONFIG_COMPAT 812 if (req->ctx->compat) 813 sr->msg_flags |= MSG_CMSG_COMPAT; 814 #endif 815 sr->nr_multishot_loops = 0; 816 return io_recvmsg_prep_setup(req); 817 } 818 819 /* 820 * Finishes io_recv and io_recvmsg. 821 * 822 * Returns true if it is actually finished, or false if it should run 823 * again (for multishot). 824 */ 825 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 826 struct io_async_msghdr *kmsg, 827 bool mshot_finished, unsigned issue_flags) 828 { 829 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 830 unsigned int cflags = 0; 831 832 if (kmsg->msg.msg_inq > 0) 833 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 834 835 if (sr->flags & IORING_RECVSEND_BUNDLE) { 836 cflags |= io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret), 837 issue_flags); 838 /* bundle with no more immediate buffers, we're done */ 839 if (req->flags & REQ_F_BL_EMPTY) 840 goto finish; 841 } else { 842 cflags |= io_put_kbuf(req, issue_flags); 843 } 844 845 /* 846 * Fill CQE for this receive and see if we should keep trying to 847 * receive from this socket. 848 */ 849 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 850 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 851 int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; 852 853 io_mshot_prep_retry(req, kmsg); 854 /* Known not-empty or unknown state, retry */ 855 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 856 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) 857 return false; 858 /* mshot retries exceeded, force a requeue */ 859 sr->nr_multishot_loops = 0; 860 mshot_retry_ret = IOU_REQUEUE; 861 } 862 if (issue_flags & IO_URING_F_MULTISHOT) 863 *ret = mshot_retry_ret; 864 else 865 *ret = -EAGAIN; 866 return true; 867 } 868 869 /* Finish the request / stop multishot. */ 870 finish: 871 io_req_set_res(req, *ret, cflags); 872 873 if (issue_flags & IO_URING_F_MULTISHOT) 874 *ret = IOU_STOP_MULTISHOT; 875 else 876 *ret = IOU_OK; 877 io_req_msg_cleanup(req, issue_flags); 878 return true; 879 } 880 881 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 882 struct io_sr_msg *sr, void __user **buf, 883 size_t *len) 884 { 885 unsigned long ubuf = (unsigned long) *buf; 886 unsigned long hdr; 887 888 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 889 kmsg->controllen; 890 if (*len < hdr) 891 return -EFAULT; 892 893 if (kmsg->controllen) { 894 unsigned long control = ubuf + hdr - kmsg->controllen; 895 896 kmsg->msg.msg_control_user = (void __user *) control; 897 kmsg->msg.msg_controllen = kmsg->controllen; 898 } 899 900 sr->buf = *buf; /* stash for later copy */ 901 *buf = (void __user *) (ubuf + hdr); 902 kmsg->payloadlen = *len = *len - hdr; 903 return 0; 904 } 905 906 struct io_recvmsg_multishot_hdr { 907 struct io_uring_recvmsg_out msg; 908 struct sockaddr_storage addr; 909 }; 910 911 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 912 struct io_async_msghdr *kmsg, 913 unsigned int flags, bool *finished) 914 { 915 int err; 916 int copy_len; 917 struct io_recvmsg_multishot_hdr hdr; 918 919 if (kmsg->namelen) 920 kmsg->msg.msg_name = &hdr.addr; 921 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 922 kmsg->msg.msg_namelen = 0; 923 924 if (sock->file->f_flags & O_NONBLOCK) 925 flags |= MSG_DONTWAIT; 926 927 err = sock_recvmsg(sock, &kmsg->msg, flags); 928 *finished = err <= 0; 929 if (err < 0) 930 return err; 931 932 hdr.msg = (struct io_uring_recvmsg_out) { 933 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 934 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 935 }; 936 937 hdr.msg.payloadlen = err; 938 if (err > kmsg->payloadlen) 939 err = kmsg->payloadlen; 940 941 copy_len = sizeof(struct io_uring_recvmsg_out); 942 if (kmsg->msg.msg_namelen > kmsg->namelen) 943 copy_len += kmsg->namelen; 944 else 945 copy_len += kmsg->msg.msg_namelen; 946 947 /* 948 * "fromlen shall refer to the value before truncation.." 949 * 1003.1g 950 */ 951 hdr.msg.namelen = kmsg->msg.msg_namelen; 952 953 /* ensure that there is no gap between hdr and sockaddr_storage */ 954 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 955 sizeof(struct io_uring_recvmsg_out)); 956 if (copy_to_user(io->buf, &hdr, copy_len)) { 957 *finished = true; 958 return -EFAULT; 959 } 960 961 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 962 kmsg->controllen + err; 963 } 964 965 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 966 { 967 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 968 struct io_async_msghdr *kmsg = req->async_data; 969 struct socket *sock; 970 unsigned flags; 971 int ret, min_ret = 0; 972 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 973 bool mshot_finished = true; 974 975 sock = sock_from_file(req->file); 976 if (unlikely(!sock)) 977 return -ENOTSOCK; 978 979 if (!(req->flags & REQ_F_POLLED) && 980 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 981 return -EAGAIN; 982 983 flags = sr->msg_flags; 984 if (force_nonblock) 985 flags |= MSG_DONTWAIT; 986 987 retry_multishot: 988 if (io_do_buffer_select(req)) { 989 void __user *buf; 990 size_t len = sr->len; 991 992 buf = io_buffer_select(req, &len, issue_flags); 993 if (!buf) 994 return -ENOBUFS; 995 996 if (req->flags & REQ_F_APOLL_MULTISHOT) { 997 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 998 if (ret) { 999 io_kbuf_recycle(req, issue_flags); 1000 return ret; 1001 } 1002 } 1003 1004 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 1005 } 1006 1007 kmsg->msg.msg_get_inq = 1; 1008 kmsg->msg.msg_inq = -1; 1009 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1010 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1011 &mshot_finished); 1012 } else { 1013 /* disable partial retry for recvmsg with cmsg attached */ 1014 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1015 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1016 1017 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1018 kmsg->uaddr, flags); 1019 } 1020 1021 if (ret < min_ret) { 1022 if (ret == -EAGAIN && force_nonblock) { 1023 if (issue_flags & IO_URING_F_MULTISHOT) { 1024 io_kbuf_recycle(req, issue_flags); 1025 return IOU_ISSUE_SKIP_COMPLETE; 1026 } 1027 return -EAGAIN; 1028 } 1029 if (ret > 0 && io_net_retry(sock, flags)) { 1030 sr->done_io += ret; 1031 req->flags |= REQ_F_BL_NO_RECYCLE; 1032 return -EAGAIN; 1033 } 1034 if (ret == -ERESTARTSYS) 1035 ret = -EINTR; 1036 req_set_fail(req); 1037 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1038 req_set_fail(req); 1039 } 1040 1041 if (ret > 0) 1042 ret += sr->done_io; 1043 else if (sr->done_io) 1044 ret = sr->done_io; 1045 else 1046 io_kbuf_recycle(req, issue_flags); 1047 1048 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1049 goto retry_multishot; 1050 1051 return ret; 1052 } 1053 1054 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1055 size_t *len, unsigned int issue_flags) 1056 { 1057 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1058 int ret; 1059 1060 /* 1061 * If the ring isn't locked, then don't use the peek interface 1062 * to grab multiple buffers as we will lock/unlock between 1063 * this selection and posting the buffers. 1064 */ 1065 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1066 sr->flags & IORING_RECVSEND_BUNDLE) { 1067 struct buf_sel_arg arg = { 1068 .iovs = &kmsg->fast_iov, 1069 .nr_iovs = 1, 1070 .mode = KBUF_MODE_EXPAND, 1071 }; 1072 1073 if (kmsg->free_iov) { 1074 arg.nr_iovs = kmsg->free_iov_nr; 1075 arg.iovs = kmsg->free_iov; 1076 arg.mode |= KBUF_MODE_FREE; 1077 } 1078 1079 if (kmsg->msg.msg_inq > 0) 1080 arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq); 1081 1082 ret = io_buffers_peek(req, &arg); 1083 if (unlikely(ret < 0)) 1084 return ret; 1085 1086 /* special case 1 vec, can be a fast path */ 1087 if (ret == 1) { 1088 sr->buf = arg.iovs[0].iov_base; 1089 sr->len = arg.iovs[0].iov_len; 1090 goto map_ubuf; 1091 } 1092 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1093 arg.out_len); 1094 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 1095 kmsg->free_iov_nr = ret; 1096 kmsg->free_iov = arg.iovs; 1097 } 1098 } else { 1099 void __user *buf; 1100 1101 *len = sr->len; 1102 buf = io_buffer_select(req, len, issue_flags); 1103 if (!buf) 1104 return -ENOBUFS; 1105 sr->buf = buf; 1106 sr->len = *len; 1107 map_ubuf: 1108 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1109 &kmsg->msg.msg_iter); 1110 if (unlikely(ret)) 1111 return ret; 1112 } 1113 1114 return 0; 1115 } 1116 1117 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1118 { 1119 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1120 struct io_async_msghdr *kmsg = req->async_data; 1121 struct socket *sock; 1122 unsigned flags; 1123 int ret, min_ret = 0; 1124 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1125 size_t len = sr->len; 1126 1127 if (!(req->flags & REQ_F_POLLED) && 1128 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1129 return -EAGAIN; 1130 1131 sock = sock_from_file(req->file); 1132 if (unlikely(!sock)) 1133 return -ENOTSOCK; 1134 1135 flags = sr->msg_flags; 1136 if (force_nonblock) 1137 flags |= MSG_DONTWAIT; 1138 1139 retry_multishot: 1140 if (io_do_buffer_select(req)) { 1141 ret = io_recv_buf_select(req, kmsg, &len, issue_flags); 1142 if (unlikely(ret)) { 1143 kmsg->msg.msg_inq = -1; 1144 goto out_free; 1145 } 1146 sr->buf = NULL; 1147 } 1148 1149 kmsg->msg.msg_flags = 0; 1150 kmsg->msg.msg_inq = -1; 1151 1152 if (flags & MSG_WAITALL) 1153 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1154 1155 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1156 if (ret < min_ret) { 1157 if (ret == -EAGAIN && force_nonblock) { 1158 if (issue_flags & IO_URING_F_MULTISHOT) { 1159 io_kbuf_recycle(req, issue_flags); 1160 return IOU_ISSUE_SKIP_COMPLETE; 1161 } 1162 1163 return -EAGAIN; 1164 } 1165 if (ret > 0 && io_net_retry(sock, flags)) { 1166 sr->len -= ret; 1167 sr->buf += ret; 1168 sr->done_io += ret; 1169 req->flags |= REQ_F_BL_NO_RECYCLE; 1170 return -EAGAIN; 1171 } 1172 if (ret == -ERESTARTSYS) 1173 ret = -EINTR; 1174 req_set_fail(req); 1175 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1176 out_free: 1177 req_set_fail(req); 1178 } 1179 1180 if (ret > 0) 1181 ret += sr->done_io; 1182 else if (sr->done_io) 1183 ret = sr->done_io; 1184 else 1185 io_kbuf_recycle(req, issue_flags); 1186 1187 if (!io_recv_finish(req, &ret, kmsg, ret <= 0, issue_flags)) 1188 goto retry_multishot; 1189 1190 return ret; 1191 } 1192 1193 void io_send_zc_cleanup(struct io_kiocb *req) 1194 { 1195 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1196 struct io_async_msghdr *io = req->async_data; 1197 1198 if (req_has_async_data(req)) 1199 io_netmsg_iovec_free(io); 1200 if (zc->notif) { 1201 io_notif_flush(zc->notif); 1202 zc->notif = NULL; 1203 } 1204 } 1205 1206 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1207 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 1208 1209 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1210 { 1211 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1212 struct io_ring_ctx *ctx = req->ctx; 1213 struct io_kiocb *notif; 1214 1215 zc->done_io = 0; 1216 req->flags |= REQ_F_POLL_NO_LAZY; 1217 1218 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1219 return -EINVAL; 1220 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1221 if (req->flags & REQ_F_CQE_SKIP) 1222 return -EINVAL; 1223 1224 notif = zc->notif = io_alloc_notif(ctx); 1225 if (!notif) 1226 return -ENOMEM; 1227 notif->cqe.user_data = req->cqe.user_data; 1228 notif->cqe.res = 0; 1229 notif->cqe.flags = IORING_CQE_F_NOTIF; 1230 req->flags |= REQ_F_NEED_CLEANUP; 1231 1232 zc->flags = READ_ONCE(sqe->ioprio); 1233 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1234 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1235 return -EINVAL; 1236 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1237 struct io_notif_data *nd = io_notif_to_data(notif); 1238 1239 nd->zc_report = true; 1240 nd->zc_used = false; 1241 nd->zc_copied = false; 1242 } 1243 } 1244 1245 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1246 unsigned idx = READ_ONCE(sqe->buf_index); 1247 1248 if (unlikely(idx >= ctx->nr_user_bufs)) 1249 return -EFAULT; 1250 idx = array_index_nospec(idx, ctx->nr_user_bufs); 1251 req->imu = READ_ONCE(ctx->user_bufs[idx]); 1252 io_req_set_rsrc_node(notif, ctx, 0); 1253 } 1254 1255 if (req->opcode == IORING_OP_SEND_ZC) { 1256 if (READ_ONCE(sqe->__pad3[0])) 1257 return -EINVAL; 1258 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1259 zc->addr_len = READ_ONCE(sqe->addr_len); 1260 } else { 1261 if (unlikely(sqe->addr2 || sqe->file_index)) 1262 return -EINVAL; 1263 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 1264 return -EINVAL; 1265 } 1266 1267 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1268 zc->len = READ_ONCE(sqe->len); 1269 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1270 if (zc->msg_flags & MSG_DONTWAIT) 1271 req->flags |= REQ_F_NOWAIT; 1272 1273 #ifdef CONFIG_COMPAT 1274 if (req->ctx->compat) 1275 zc->msg_flags |= MSG_CMSG_COMPAT; 1276 #endif 1277 return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG_ZC); 1278 } 1279 1280 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1281 struct iov_iter *from, size_t length) 1282 { 1283 skb_zcopy_downgrade_managed(skb); 1284 return zerocopy_fill_skb_from_iter(skb, from, length); 1285 } 1286 1287 static int io_sg_from_iter(struct sk_buff *skb, 1288 struct iov_iter *from, size_t length) 1289 { 1290 struct skb_shared_info *shinfo = skb_shinfo(skb); 1291 int frag = shinfo->nr_frags; 1292 int ret = 0; 1293 struct bvec_iter bi; 1294 ssize_t copied = 0; 1295 unsigned long truesize = 0; 1296 1297 if (!frag) 1298 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1299 else if (unlikely(!skb_zcopy_managed(skb))) 1300 return zerocopy_fill_skb_from_iter(skb, from, length); 1301 1302 bi.bi_size = min(from->count, length); 1303 bi.bi_bvec_done = from->iov_offset; 1304 bi.bi_idx = 0; 1305 1306 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1307 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1308 1309 copied += v.bv_len; 1310 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1311 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1312 v.bv_offset, v.bv_len); 1313 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1314 } 1315 if (bi.bi_size) 1316 ret = -EMSGSIZE; 1317 1318 shinfo->nr_frags = frag; 1319 from->bvec += bi.bi_idx; 1320 from->nr_segs -= bi.bi_idx; 1321 from->count -= copied; 1322 from->iov_offset = bi.bi_bvec_done; 1323 1324 skb->data_len += copied; 1325 skb->len += copied; 1326 skb->truesize += truesize; 1327 return ret; 1328 } 1329 1330 static int io_send_zc_import(struct io_kiocb *req, struct io_async_msghdr *kmsg) 1331 { 1332 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1333 int ret; 1334 1335 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 1336 ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter, req->imu, 1337 (u64)(uintptr_t)sr->buf, sr->len); 1338 if (unlikely(ret)) 1339 return ret; 1340 kmsg->msg.sg_from_iter = io_sg_from_iter; 1341 } else { 1342 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 1343 if (unlikely(ret)) 1344 return ret; 1345 ret = io_notif_account_mem(sr->notif, sr->len); 1346 if (unlikely(ret)) 1347 return ret; 1348 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1349 } 1350 1351 return ret; 1352 } 1353 1354 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1355 { 1356 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1357 struct io_async_msghdr *kmsg = req->async_data; 1358 struct socket *sock; 1359 unsigned msg_flags; 1360 int ret, min_ret = 0; 1361 1362 sock = sock_from_file(req->file); 1363 if (unlikely(!sock)) 1364 return -ENOTSOCK; 1365 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1366 return -EOPNOTSUPP; 1367 1368 if (!(req->flags & REQ_F_POLLED) && 1369 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1370 return -EAGAIN; 1371 1372 if (!zc->done_io) { 1373 ret = io_send_zc_import(req, kmsg); 1374 if (unlikely(ret)) 1375 return ret; 1376 } 1377 1378 msg_flags = zc->msg_flags; 1379 if (issue_flags & IO_URING_F_NONBLOCK) 1380 msg_flags |= MSG_DONTWAIT; 1381 if (msg_flags & MSG_WAITALL) 1382 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1383 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1384 1385 kmsg->msg.msg_flags = msg_flags; 1386 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1387 ret = sock_sendmsg(sock, &kmsg->msg); 1388 1389 if (unlikely(ret < min_ret)) { 1390 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1391 return -EAGAIN; 1392 1393 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { 1394 zc->len -= ret; 1395 zc->buf += ret; 1396 zc->done_io += ret; 1397 req->flags |= REQ_F_BL_NO_RECYCLE; 1398 return -EAGAIN; 1399 } 1400 if (ret == -ERESTARTSYS) 1401 ret = -EINTR; 1402 req_set_fail(req); 1403 } 1404 1405 if (ret >= 0) 1406 ret += zc->done_io; 1407 else if (zc->done_io) 1408 ret = zc->done_io; 1409 1410 /* 1411 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1412 * flushing notif to io_send_zc_cleanup() 1413 */ 1414 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1415 io_notif_flush(zc->notif); 1416 io_req_msg_cleanup(req, 0); 1417 } 1418 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1419 return IOU_OK; 1420 } 1421 1422 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1423 { 1424 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1425 struct io_async_msghdr *kmsg = req->async_data; 1426 struct socket *sock; 1427 unsigned flags; 1428 int ret, min_ret = 0; 1429 1430 sock = sock_from_file(req->file); 1431 if (unlikely(!sock)) 1432 return -ENOTSOCK; 1433 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1434 return -EOPNOTSUPP; 1435 1436 if (!(req->flags & REQ_F_POLLED) && 1437 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1438 return -EAGAIN; 1439 1440 flags = sr->msg_flags; 1441 if (issue_flags & IO_URING_F_NONBLOCK) 1442 flags |= MSG_DONTWAIT; 1443 if (flags & MSG_WAITALL) 1444 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1445 1446 kmsg->msg.msg_control_user = sr->msg_control; 1447 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1448 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1449 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1450 1451 if (unlikely(ret < min_ret)) { 1452 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1453 return -EAGAIN; 1454 1455 if (ret > 0 && io_net_retry(sock, flags)) { 1456 sr->done_io += ret; 1457 req->flags |= REQ_F_BL_NO_RECYCLE; 1458 return -EAGAIN; 1459 } 1460 if (ret == -ERESTARTSYS) 1461 ret = -EINTR; 1462 req_set_fail(req); 1463 } 1464 1465 if (ret >= 0) 1466 ret += sr->done_io; 1467 else if (sr->done_io) 1468 ret = sr->done_io; 1469 1470 /* 1471 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1472 * flushing notif to io_send_zc_cleanup() 1473 */ 1474 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1475 io_notif_flush(sr->notif); 1476 io_req_msg_cleanup(req, 0); 1477 } 1478 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1479 return IOU_OK; 1480 } 1481 1482 void io_sendrecv_fail(struct io_kiocb *req) 1483 { 1484 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1485 1486 if (sr->done_io) 1487 req->cqe.res = sr->done_io; 1488 1489 if ((req->flags & REQ_F_NEED_CLEANUP) && 1490 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1491 req->cqe.flags |= IORING_CQE_F_MORE; 1492 } 1493 1494 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1495 IORING_ACCEPT_POLL_FIRST) 1496 1497 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1498 { 1499 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1500 1501 if (sqe->len || sqe->buf_index) 1502 return -EINVAL; 1503 1504 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1505 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1506 accept->flags = READ_ONCE(sqe->accept_flags); 1507 accept->nofile = rlimit(RLIMIT_NOFILE); 1508 accept->iou_flags = READ_ONCE(sqe->ioprio); 1509 if (accept->iou_flags & ~ACCEPT_FLAGS) 1510 return -EINVAL; 1511 1512 accept->file_slot = READ_ONCE(sqe->file_index); 1513 if (accept->file_slot) { 1514 if (accept->flags & SOCK_CLOEXEC) 1515 return -EINVAL; 1516 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1517 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1518 return -EINVAL; 1519 } 1520 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1521 return -EINVAL; 1522 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1523 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1524 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1525 req->flags |= REQ_F_APOLL_MULTISHOT; 1526 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1527 req->flags |= REQ_F_NOWAIT; 1528 return 0; 1529 } 1530 1531 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1532 { 1533 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1534 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1535 bool fixed = !!accept->file_slot; 1536 struct proto_accept_arg arg = { 1537 .flags = force_nonblock ? O_NONBLOCK : 0, 1538 }; 1539 struct file *file; 1540 unsigned cflags; 1541 int ret, fd; 1542 1543 if (!(req->flags & REQ_F_POLLED) && 1544 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1545 return -EAGAIN; 1546 1547 retry: 1548 if (!fixed) { 1549 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1550 if (unlikely(fd < 0)) 1551 return fd; 1552 } 1553 arg.err = 0; 1554 arg.is_empty = -1; 1555 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1556 accept->flags); 1557 if (IS_ERR(file)) { 1558 if (!fixed) 1559 put_unused_fd(fd); 1560 ret = PTR_ERR(file); 1561 if (ret == -EAGAIN && force_nonblock && 1562 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) { 1563 /* 1564 * if it's multishot and polled, we don't need to 1565 * return EAGAIN to arm the poll infra since it 1566 * has already been done 1567 */ 1568 if (issue_flags & IO_URING_F_MULTISHOT) 1569 return IOU_ISSUE_SKIP_COMPLETE; 1570 return ret; 1571 } 1572 if (ret == -ERESTARTSYS) 1573 ret = -EINTR; 1574 req_set_fail(req); 1575 } else if (!fixed) { 1576 fd_install(fd, file); 1577 ret = fd; 1578 } else { 1579 ret = io_fixed_fd_install(req, issue_flags, file, 1580 accept->file_slot); 1581 } 1582 1583 cflags = 0; 1584 if (!arg.is_empty) 1585 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1586 1587 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1588 io_req_set_res(req, ret, cflags); 1589 return IOU_OK; 1590 } 1591 1592 if (ret < 0) 1593 return ret; 1594 if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1595 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1596 goto retry; 1597 if (issue_flags & IO_URING_F_MULTISHOT) 1598 return IOU_ISSUE_SKIP_COMPLETE; 1599 return -EAGAIN; 1600 } 1601 1602 io_req_set_res(req, ret, cflags); 1603 return IOU_STOP_MULTISHOT; 1604 } 1605 1606 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1607 { 1608 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1609 1610 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1611 return -EINVAL; 1612 1613 sock->domain = READ_ONCE(sqe->fd); 1614 sock->type = READ_ONCE(sqe->off); 1615 sock->protocol = READ_ONCE(sqe->len); 1616 sock->file_slot = READ_ONCE(sqe->file_index); 1617 sock->nofile = rlimit(RLIMIT_NOFILE); 1618 1619 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1620 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1621 return -EINVAL; 1622 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1623 return -EINVAL; 1624 return 0; 1625 } 1626 1627 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1628 { 1629 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1630 bool fixed = !!sock->file_slot; 1631 struct file *file; 1632 int ret, fd; 1633 1634 if (!fixed) { 1635 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1636 if (unlikely(fd < 0)) 1637 return fd; 1638 } 1639 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1640 if (IS_ERR(file)) { 1641 if (!fixed) 1642 put_unused_fd(fd); 1643 ret = PTR_ERR(file); 1644 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1645 return -EAGAIN; 1646 if (ret == -ERESTARTSYS) 1647 ret = -EINTR; 1648 req_set_fail(req); 1649 } else if (!fixed) { 1650 fd_install(fd, file); 1651 ret = fd; 1652 } else { 1653 ret = io_fixed_fd_install(req, issue_flags, file, 1654 sock->file_slot); 1655 } 1656 io_req_set_res(req, ret, 0); 1657 return IOU_OK; 1658 } 1659 1660 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1661 { 1662 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1663 struct io_async_msghdr *io; 1664 1665 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1666 return -EINVAL; 1667 1668 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1669 conn->addr_len = READ_ONCE(sqe->addr2); 1670 conn->in_progress = conn->seen_econnaborted = false; 1671 1672 io = io_msg_alloc_async(req); 1673 if (unlikely(!io)) 1674 return -ENOMEM; 1675 1676 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1677 } 1678 1679 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1680 { 1681 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1682 struct io_async_msghdr *io = req->async_data; 1683 unsigned file_flags; 1684 int ret; 1685 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1686 1687 file_flags = force_nonblock ? O_NONBLOCK : 0; 1688 1689 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1690 file_flags); 1691 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1692 && force_nonblock) { 1693 if (ret == -EINPROGRESS) { 1694 connect->in_progress = true; 1695 } else if (ret == -ECONNABORTED) { 1696 if (connect->seen_econnaborted) 1697 goto out; 1698 connect->seen_econnaborted = true; 1699 } 1700 return -EAGAIN; 1701 } 1702 if (connect->in_progress) { 1703 /* 1704 * At least bluetooth will return -EBADFD on a re-connect 1705 * attempt, and it's (supposedly) also valid to get -EISCONN 1706 * which means the previous result is good. For both of these, 1707 * grab the sock_error() and use that for the completion. 1708 */ 1709 if (ret == -EBADFD || ret == -EISCONN) 1710 ret = sock_error(sock_from_file(req->file)->sk); 1711 } 1712 if (ret == -ERESTARTSYS) 1713 ret = -EINTR; 1714 out: 1715 if (ret < 0) 1716 req_set_fail(req); 1717 io_req_msg_cleanup(req, issue_flags); 1718 io_req_set_res(req, ret, 0); 1719 return IOU_OK; 1720 } 1721 1722 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1723 { 1724 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1725 struct sockaddr __user *uaddr; 1726 struct io_async_msghdr *io; 1727 1728 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1729 return -EINVAL; 1730 1731 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1732 bind->addr_len = READ_ONCE(sqe->addr2); 1733 1734 io = io_msg_alloc_async(req); 1735 if (unlikely(!io)) 1736 return -ENOMEM; 1737 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1738 } 1739 1740 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1741 { 1742 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1743 struct io_async_msghdr *io = req->async_data; 1744 struct socket *sock; 1745 int ret; 1746 1747 sock = sock_from_file(req->file); 1748 if (unlikely(!sock)) 1749 return -ENOTSOCK; 1750 1751 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1752 if (ret < 0) 1753 req_set_fail(req); 1754 io_req_set_res(req, ret, 0); 1755 return 0; 1756 } 1757 1758 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1759 { 1760 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1761 1762 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1763 return -EINVAL; 1764 1765 listen->backlog = READ_ONCE(sqe->len); 1766 return 0; 1767 } 1768 1769 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1770 { 1771 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1772 struct socket *sock; 1773 int ret; 1774 1775 sock = sock_from_file(req->file); 1776 if (unlikely(!sock)) 1777 return -ENOTSOCK; 1778 1779 ret = __sys_listen_socket(sock, listen->backlog); 1780 if (ret < 0) 1781 req_set_fail(req); 1782 io_req_set_res(req, ret, 0); 1783 return 0; 1784 } 1785 1786 void io_netmsg_cache_free(const void *entry) 1787 { 1788 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1789 1790 if (kmsg->free_iov) { 1791 kasan_mempool_unpoison_object(kmsg->free_iov, 1792 kmsg->free_iov_nr * sizeof(struct iovec)); 1793 io_netmsg_iovec_free(kmsg); 1794 } 1795 kfree(kmsg); 1796 } 1797 #endif 1798