1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 int iou_flags; 32 u32 file_slot; 33 unsigned long nofile; 34 }; 35 36 struct io_socket { 37 struct file *file; 38 int domain; 39 int type; 40 int protocol; 41 int flags; 42 u32 file_slot; 43 unsigned long nofile; 44 }; 45 46 struct io_connect { 47 struct file *file; 48 struct sockaddr __user *addr; 49 int addr_len; 50 bool in_progress; 51 bool seen_econnaborted; 52 }; 53 54 struct io_bind { 55 struct file *file; 56 int addr_len; 57 }; 58 59 struct io_listen { 60 struct file *file; 61 int backlog; 62 }; 63 64 struct io_sr_msg { 65 struct file *file; 66 union { 67 struct compat_msghdr __user *umsg_compat; 68 struct user_msghdr __user *umsg; 69 void __user *buf; 70 }; 71 int len; 72 unsigned done_io; 73 unsigned msg_flags; 74 unsigned nr_multishot_loops; 75 u16 flags; 76 /* initialised and used only by !msg send variants */ 77 u16 addr_len; 78 u16 buf_group; 79 void __user *addr; 80 void __user *msg_control; 81 /* used only for send zerocopy */ 82 struct io_kiocb *notif; 83 }; 84 85 /* 86 * Number of times we'll try and do receives if there's more data. If we 87 * exceed this limit, then add us to the back of the queue and retry from 88 * there. This helps fairness between flooding clients. 89 */ 90 #define MULTISHOT_MAX_RETRY 32 91 92 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 93 { 94 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 95 96 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 97 sqe->buf_index || sqe->splice_fd_in)) 98 return -EINVAL; 99 100 shutdown->how = READ_ONCE(sqe->len); 101 req->flags |= REQ_F_FORCE_ASYNC; 102 return 0; 103 } 104 105 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 106 { 107 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 108 struct socket *sock; 109 int ret; 110 111 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 112 113 sock = sock_from_file(req->file); 114 if (unlikely(!sock)) 115 return -ENOTSOCK; 116 117 ret = __sys_shutdown_sock(sock, shutdown->how); 118 io_req_set_res(req, ret, 0); 119 return IOU_OK; 120 } 121 122 static bool io_net_retry(struct socket *sock, int flags) 123 { 124 if (!(flags & MSG_WAITALL)) 125 return false; 126 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 127 } 128 129 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 130 { 131 if (kmsg->free_iov) { 132 kfree(kmsg->free_iov); 133 kmsg->free_iov_nr = 0; 134 kmsg->free_iov = NULL; 135 } 136 } 137 138 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 139 { 140 struct io_async_msghdr *hdr = req->async_data; 141 struct iovec *iov; 142 143 /* can't recycle, ensure we free the iovec if we have one */ 144 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 145 io_netmsg_iovec_free(hdr); 146 return; 147 } 148 149 /* Let normal cleanup path reap it if we fail adding to the cache */ 150 iov = hdr->free_iov; 151 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { 152 if (iov) 153 kasan_mempool_poison_object(iov); 154 req->async_data = NULL; 155 req->flags &= ~REQ_F_ASYNC_DATA; 156 } 157 } 158 159 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 160 { 161 struct io_ring_ctx *ctx = req->ctx; 162 struct io_async_msghdr *hdr; 163 164 hdr = io_alloc_cache_get(&ctx->netmsg_cache); 165 if (hdr) { 166 if (hdr->free_iov) { 167 kasan_mempool_unpoison_object(hdr->free_iov, 168 hdr->free_iov_nr * sizeof(struct iovec)); 169 req->flags |= REQ_F_NEED_CLEANUP; 170 } 171 req->flags |= REQ_F_ASYNC_DATA; 172 req->async_data = hdr; 173 return hdr; 174 } 175 176 if (!io_alloc_async_data(req)) { 177 hdr = req->async_data; 178 hdr->free_iov_nr = 0; 179 hdr->free_iov = NULL; 180 return hdr; 181 } 182 return NULL; 183 } 184 185 /* assign new iovec to kmsg, if we need to */ 186 static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, 187 struct iovec *iov) 188 { 189 if (iov) { 190 req->flags |= REQ_F_NEED_CLEANUP; 191 kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs; 192 if (kmsg->free_iov) 193 kfree(kmsg->free_iov); 194 kmsg->free_iov = iov; 195 } 196 return 0; 197 } 198 199 static inline void io_mshot_prep_retry(struct io_kiocb *req, 200 struct io_async_msghdr *kmsg) 201 { 202 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 203 204 req->flags &= ~REQ_F_BL_EMPTY; 205 sr->done_io = 0; 206 sr->len = 0; /* get from the provided buffer */ 207 req->buf_index = sr->buf_group; 208 } 209 210 #ifdef CONFIG_COMPAT 211 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 212 struct io_async_msghdr *iomsg, 213 struct compat_msghdr *msg, int ddir) 214 { 215 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 216 struct compat_iovec __user *uiov; 217 struct iovec *iov; 218 int ret, nr_segs; 219 220 if (iomsg->free_iov) { 221 nr_segs = iomsg->free_iov_nr; 222 iov = iomsg->free_iov; 223 } else { 224 iov = &iomsg->fast_iov; 225 nr_segs = 1; 226 } 227 228 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 229 return -EFAULT; 230 231 uiov = compat_ptr(msg->msg_iov); 232 if (req->flags & REQ_F_BUFFER_SELECT) { 233 compat_ssize_t clen; 234 235 if (msg->msg_iovlen == 0) { 236 sr->len = iov->iov_len = 0; 237 iov->iov_base = NULL; 238 } else if (msg->msg_iovlen > 1) { 239 return -EINVAL; 240 } else { 241 if (!access_ok(uiov, sizeof(*uiov))) 242 return -EFAULT; 243 if (__get_user(clen, &uiov->iov_len)) 244 return -EFAULT; 245 if (clen < 0) 246 return -EINVAL; 247 sr->len = clen; 248 } 249 250 return 0; 251 } 252 253 ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, 254 nr_segs, &iov, &iomsg->msg.msg_iter, true); 255 if (unlikely(ret < 0)) 256 return ret; 257 258 return io_net_vec_assign(req, iomsg, iov); 259 } 260 #endif 261 262 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 263 struct user_msghdr *msg, int ddir) 264 { 265 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 266 struct iovec *iov; 267 int ret, nr_segs; 268 269 if (iomsg->free_iov) { 270 nr_segs = iomsg->free_iov_nr; 271 iov = iomsg->free_iov; 272 } else { 273 iov = &iomsg->fast_iov; 274 nr_segs = 1; 275 } 276 277 if (!user_access_begin(sr->umsg, sizeof(*sr->umsg))) 278 return -EFAULT; 279 280 ret = -EFAULT; 281 unsafe_get_user(msg->msg_name, &sr->umsg->msg_name, ua_end); 282 unsafe_get_user(msg->msg_namelen, &sr->umsg->msg_namelen, ua_end); 283 unsafe_get_user(msg->msg_iov, &sr->umsg->msg_iov, ua_end); 284 unsafe_get_user(msg->msg_iovlen, &sr->umsg->msg_iovlen, ua_end); 285 unsafe_get_user(msg->msg_control, &sr->umsg->msg_control, ua_end); 286 unsafe_get_user(msg->msg_controllen, &sr->umsg->msg_controllen, ua_end); 287 msg->msg_flags = 0; 288 289 if (req->flags & REQ_F_BUFFER_SELECT) { 290 if (msg->msg_iovlen == 0) { 291 sr->len = iov->iov_len = 0; 292 iov->iov_base = NULL; 293 } else if (msg->msg_iovlen > 1) { 294 ret = -EINVAL; 295 goto ua_end; 296 } else { 297 /* we only need the length for provided buffers */ 298 if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) 299 goto ua_end; 300 unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, 301 ua_end); 302 sr->len = iov->iov_len; 303 } 304 ret = 0; 305 ua_end: 306 user_access_end(); 307 return ret; 308 } 309 310 user_access_end(); 311 ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs, 312 &iov, &iomsg->msg.msg_iter, false); 313 if (unlikely(ret < 0)) 314 return ret; 315 316 return io_net_vec_assign(req, iomsg, iov); 317 } 318 319 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 320 struct io_async_msghdr *iomsg) 321 { 322 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 323 struct user_msghdr msg; 324 int ret; 325 326 iomsg->msg.msg_name = &iomsg->addr; 327 iomsg->msg.msg_iter.nr_segs = 0; 328 329 #ifdef CONFIG_COMPAT 330 if (unlikely(req->ctx->compat)) { 331 struct compat_msghdr cmsg; 332 333 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); 334 if (unlikely(ret)) 335 return ret; 336 337 return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); 338 } 339 #endif 340 341 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); 342 if (unlikely(ret)) 343 return ret; 344 345 ret = __copy_msghdr(&iomsg->msg, &msg, NULL); 346 347 /* save msg_control as sys_sendmsg() overwrites it */ 348 sr->msg_control = iomsg->msg.msg_control_user; 349 return ret; 350 } 351 352 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 353 { 354 struct io_async_msghdr *io = req->async_data; 355 356 io_netmsg_iovec_free(io); 357 } 358 359 static int io_send_setup(struct io_kiocb *req) 360 { 361 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 362 struct io_async_msghdr *kmsg = req->async_data; 363 int ret; 364 365 kmsg->msg.msg_name = NULL; 366 kmsg->msg.msg_namelen = 0; 367 kmsg->msg.msg_control = NULL; 368 kmsg->msg.msg_controllen = 0; 369 kmsg->msg.msg_ubuf = NULL; 370 371 if (sr->addr) { 372 ret = move_addr_to_kernel(sr->addr, sr->addr_len, &kmsg->addr); 373 if (unlikely(ret < 0)) 374 return ret; 375 kmsg->msg.msg_name = &kmsg->addr; 376 kmsg->msg.msg_namelen = sr->addr_len; 377 } 378 if (!io_do_buffer_select(req)) { 379 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 380 &kmsg->msg.msg_iter); 381 if (unlikely(ret < 0)) 382 return ret; 383 } 384 return 0; 385 } 386 387 static int io_sendmsg_prep_setup(struct io_kiocb *req, int is_msg) 388 { 389 struct io_async_msghdr *kmsg; 390 int ret; 391 392 kmsg = io_msg_alloc_async(req); 393 if (unlikely(!kmsg)) 394 return -ENOMEM; 395 if (!is_msg) 396 return io_send_setup(req); 397 ret = io_sendmsg_copy_hdr(req, kmsg); 398 if (!ret) 399 req->flags |= REQ_F_NEED_CLEANUP; 400 return ret; 401 } 402 403 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) 404 405 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 406 { 407 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 408 409 sr->done_io = 0; 410 411 if (req->opcode == IORING_OP_SEND) { 412 if (READ_ONCE(sqe->__pad3[0])) 413 return -EINVAL; 414 sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 415 sr->addr_len = READ_ONCE(sqe->addr_len); 416 } else if (sqe->addr2 || sqe->file_index) { 417 return -EINVAL; 418 } 419 420 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 421 sr->len = READ_ONCE(sqe->len); 422 sr->flags = READ_ONCE(sqe->ioprio); 423 if (sr->flags & ~SENDMSG_FLAGS) 424 return -EINVAL; 425 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 426 if (sr->msg_flags & MSG_DONTWAIT) 427 req->flags |= REQ_F_NOWAIT; 428 if (sr->flags & IORING_RECVSEND_BUNDLE) { 429 if (req->opcode == IORING_OP_SENDMSG) 430 return -EINVAL; 431 if (!(req->flags & REQ_F_BUFFER_SELECT)) 432 return -EINVAL; 433 sr->msg_flags |= MSG_WAITALL; 434 sr->buf_group = req->buf_index; 435 req->buf_list = NULL; 436 } 437 438 #ifdef CONFIG_COMPAT 439 if (req->ctx->compat) 440 sr->msg_flags |= MSG_CMSG_COMPAT; 441 #endif 442 return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG); 443 } 444 445 static void io_req_msg_cleanup(struct io_kiocb *req, 446 unsigned int issue_flags) 447 { 448 req->flags &= ~REQ_F_NEED_CLEANUP; 449 io_netmsg_recycle(req, issue_flags); 450 } 451 452 /* 453 * For bundle completions, we need to figure out how many segments we consumed. 454 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 455 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 456 * the segments, then it's a trivial questiont o answer. If we have residual 457 * data in the iter, then loop the segments to figure out how much we 458 * transferred. 459 */ 460 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 461 { 462 struct iovec *iov; 463 int nbufs; 464 465 /* no data is always zero segments, and a ubuf is always 1 segment */ 466 if (ret <= 0) 467 return 0; 468 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 469 return 1; 470 471 iov = kmsg->free_iov; 472 if (!iov) 473 iov = &kmsg->fast_iov; 474 475 /* if all data was transferred, it's basic pointer math */ 476 if (!iov_iter_count(&kmsg->msg.msg_iter)) 477 return iter_iov(&kmsg->msg.msg_iter) - iov; 478 479 /* short transfer, count segments */ 480 nbufs = 0; 481 do { 482 int this_len = min_t(int, iov[nbufs].iov_len, ret); 483 484 nbufs++; 485 ret -= this_len; 486 } while (ret); 487 488 return nbufs; 489 } 490 491 static inline bool io_send_finish(struct io_kiocb *req, int *ret, 492 struct io_async_msghdr *kmsg, 493 unsigned issue_flags) 494 { 495 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 496 bool bundle_finished = *ret <= 0; 497 unsigned int cflags; 498 499 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 500 cflags = io_put_kbuf(req, *ret, issue_flags); 501 goto finish; 502 } 503 504 cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags); 505 506 if (bundle_finished || req->flags & REQ_F_BL_EMPTY) 507 goto finish; 508 509 /* 510 * Fill CQE for this receive and see if we should keep trying to 511 * receive from this socket. 512 */ 513 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 514 io_mshot_prep_retry(req, kmsg); 515 return false; 516 } 517 518 /* Otherwise stop bundle and use the current result. */ 519 finish: 520 io_req_set_res(req, *ret, cflags); 521 *ret = IOU_OK; 522 return true; 523 } 524 525 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 526 { 527 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 528 struct io_async_msghdr *kmsg = req->async_data; 529 struct socket *sock; 530 unsigned flags; 531 int min_ret = 0; 532 int ret; 533 534 sock = sock_from_file(req->file); 535 if (unlikely(!sock)) 536 return -ENOTSOCK; 537 538 if (!(req->flags & REQ_F_POLLED) && 539 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 540 return -EAGAIN; 541 542 flags = sr->msg_flags; 543 if (issue_flags & IO_URING_F_NONBLOCK) 544 flags |= MSG_DONTWAIT; 545 if (flags & MSG_WAITALL) 546 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 547 548 kmsg->msg.msg_control_user = sr->msg_control; 549 550 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 551 552 if (ret < min_ret) { 553 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 554 return -EAGAIN; 555 if (ret > 0 && io_net_retry(sock, flags)) { 556 kmsg->msg.msg_controllen = 0; 557 kmsg->msg.msg_control = NULL; 558 sr->done_io += ret; 559 req->flags |= REQ_F_BL_NO_RECYCLE; 560 return -EAGAIN; 561 } 562 if (ret == -ERESTARTSYS) 563 ret = -EINTR; 564 req_set_fail(req); 565 } 566 io_req_msg_cleanup(req, issue_flags); 567 if (ret >= 0) 568 ret += sr->done_io; 569 else if (sr->done_io) 570 ret = sr->done_io; 571 io_req_set_res(req, ret, 0); 572 return IOU_OK; 573 } 574 575 int io_send(struct io_kiocb *req, unsigned int issue_flags) 576 { 577 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 578 struct io_async_msghdr *kmsg = req->async_data; 579 struct socket *sock; 580 unsigned flags; 581 int min_ret = 0; 582 int ret; 583 584 sock = sock_from_file(req->file); 585 if (unlikely(!sock)) 586 return -ENOTSOCK; 587 588 if (!(req->flags & REQ_F_POLLED) && 589 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 590 return -EAGAIN; 591 592 flags = sr->msg_flags; 593 if (issue_flags & IO_URING_F_NONBLOCK) 594 flags |= MSG_DONTWAIT; 595 596 retry_bundle: 597 if (io_do_buffer_select(req)) { 598 struct buf_sel_arg arg = { 599 .iovs = &kmsg->fast_iov, 600 .max_len = min_not_zero(sr->len, INT_MAX), 601 .nr_iovs = 1, 602 }; 603 604 if (kmsg->free_iov) { 605 arg.nr_iovs = kmsg->free_iov_nr; 606 arg.iovs = kmsg->free_iov; 607 arg.mode = KBUF_MODE_FREE; 608 } 609 610 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 611 arg.nr_iovs = 1; 612 else 613 arg.mode |= KBUF_MODE_EXPAND; 614 615 ret = io_buffers_select(req, &arg, issue_flags); 616 if (unlikely(ret < 0)) 617 return ret; 618 619 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 620 kmsg->free_iov_nr = ret; 621 kmsg->free_iov = arg.iovs; 622 req->flags |= REQ_F_NEED_CLEANUP; 623 } 624 sr->len = arg.out_len; 625 626 if (ret == 1) { 627 sr->buf = arg.iovs[0].iov_base; 628 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 629 &kmsg->msg.msg_iter); 630 if (unlikely(ret)) 631 return ret; 632 } else { 633 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 634 arg.iovs, ret, arg.out_len); 635 } 636 } 637 638 /* 639 * If MSG_WAITALL is set, or this is a bundle send, then we need 640 * the full amount. If just bundle is set, if we do a short send 641 * then we complete the bundle sequence rather than continue on. 642 */ 643 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 644 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 645 646 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 647 kmsg->msg.msg_flags = flags; 648 ret = sock_sendmsg(sock, &kmsg->msg); 649 if (ret < min_ret) { 650 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 651 return -EAGAIN; 652 653 if (ret > 0 && io_net_retry(sock, flags)) { 654 sr->len -= ret; 655 sr->buf += ret; 656 sr->done_io += ret; 657 req->flags |= REQ_F_BL_NO_RECYCLE; 658 return -EAGAIN; 659 } 660 if (ret == -ERESTARTSYS) 661 ret = -EINTR; 662 req_set_fail(req); 663 } 664 if (ret >= 0) 665 ret += sr->done_io; 666 else if (sr->done_io) 667 ret = sr->done_io; 668 669 if (!io_send_finish(req, &ret, kmsg, issue_flags)) 670 goto retry_bundle; 671 672 io_req_msg_cleanup(req, issue_flags); 673 return ret; 674 } 675 676 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 677 struct io_async_msghdr *iomsg, 678 int namelen, size_t controllen) 679 { 680 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 681 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 682 int hdr; 683 684 if (unlikely(namelen < 0)) 685 return -EOVERFLOW; 686 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 687 namelen, &hdr)) 688 return -EOVERFLOW; 689 if (check_add_overflow(hdr, controllen, &hdr)) 690 return -EOVERFLOW; 691 692 iomsg->namelen = namelen; 693 iomsg->controllen = controllen; 694 return 0; 695 } 696 697 return 0; 698 } 699 700 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 701 struct io_async_msghdr *iomsg) 702 { 703 struct user_msghdr msg; 704 int ret; 705 706 iomsg->msg.msg_name = &iomsg->addr; 707 iomsg->msg.msg_iter.nr_segs = 0; 708 709 #ifdef CONFIG_COMPAT 710 if (unlikely(req->ctx->compat)) { 711 struct compat_msghdr cmsg; 712 713 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); 714 if (unlikely(ret)) 715 return ret; 716 717 ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); 718 if (unlikely(ret)) 719 return ret; 720 721 return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, 722 cmsg.msg_controllen); 723 } 724 #endif 725 726 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); 727 if (unlikely(ret)) 728 return ret; 729 730 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 731 if (unlikely(ret)) 732 return ret; 733 734 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 735 msg.msg_controllen); 736 } 737 738 static int io_recvmsg_prep_setup(struct io_kiocb *req) 739 { 740 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 741 struct io_async_msghdr *kmsg; 742 int ret; 743 744 kmsg = io_msg_alloc_async(req); 745 if (unlikely(!kmsg)) 746 return -ENOMEM; 747 748 if (req->opcode == IORING_OP_RECV) { 749 kmsg->msg.msg_name = NULL; 750 kmsg->msg.msg_namelen = 0; 751 kmsg->msg.msg_control = NULL; 752 kmsg->msg.msg_get_inq = 1; 753 kmsg->msg.msg_controllen = 0; 754 kmsg->msg.msg_iocb = NULL; 755 kmsg->msg.msg_ubuf = NULL; 756 757 if (!io_do_buffer_select(req)) { 758 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 759 &kmsg->msg.msg_iter); 760 if (unlikely(ret)) 761 return ret; 762 } 763 return 0; 764 } 765 766 ret = io_recvmsg_copy_hdr(req, kmsg); 767 if (!ret) 768 req->flags |= REQ_F_NEED_CLEANUP; 769 return ret; 770 } 771 772 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 773 IORING_RECVSEND_BUNDLE) 774 775 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 776 { 777 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 778 779 sr->done_io = 0; 780 781 if (unlikely(sqe->file_index || sqe->addr2)) 782 return -EINVAL; 783 784 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 785 sr->len = READ_ONCE(sqe->len); 786 sr->flags = READ_ONCE(sqe->ioprio); 787 if (sr->flags & ~RECVMSG_FLAGS) 788 return -EINVAL; 789 sr->msg_flags = READ_ONCE(sqe->msg_flags); 790 if (sr->msg_flags & MSG_DONTWAIT) 791 req->flags |= REQ_F_NOWAIT; 792 if (sr->msg_flags & MSG_ERRQUEUE) 793 req->flags |= REQ_F_CLEAR_POLLIN; 794 if (req->flags & REQ_F_BUFFER_SELECT) { 795 /* 796 * Store the buffer group for this multishot receive separately, 797 * as if we end up doing an io-wq based issue that selects a 798 * buffer, it has to be committed immediately and that will 799 * clear ->buf_list. This means we lose the link to the buffer 800 * list, and the eventual buffer put on completion then cannot 801 * restore it. 802 */ 803 sr->buf_group = req->buf_index; 804 req->buf_list = NULL; 805 } 806 if (sr->flags & IORING_RECV_MULTISHOT) { 807 if (!(req->flags & REQ_F_BUFFER_SELECT)) 808 return -EINVAL; 809 if (sr->msg_flags & MSG_WAITALL) 810 return -EINVAL; 811 if (req->opcode == IORING_OP_RECV && sr->len) 812 return -EINVAL; 813 req->flags |= REQ_F_APOLL_MULTISHOT; 814 } 815 if (sr->flags & IORING_RECVSEND_BUNDLE) { 816 if (req->opcode == IORING_OP_RECVMSG) 817 return -EINVAL; 818 } 819 820 #ifdef CONFIG_COMPAT 821 if (req->ctx->compat) 822 sr->msg_flags |= MSG_CMSG_COMPAT; 823 #endif 824 sr->nr_multishot_loops = 0; 825 return io_recvmsg_prep_setup(req); 826 } 827 828 /* 829 * Finishes io_recv and io_recvmsg. 830 * 831 * Returns true if it is actually finished, or false if it should run 832 * again (for multishot). 833 */ 834 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 835 struct io_async_msghdr *kmsg, 836 bool mshot_finished, unsigned issue_flags) 837 { 838 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 839 unsigned int cflags = 0; 840 841 if (kmsg->msg.msg_inq > 0) 842 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 843 844 if (sr->flags & IORING_RECVSEND_BUNDLE) { 845 cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), 846 issue_flags); 847 /* bundle with no more immediate buffers, we're done */ 848 if (req->flags & REQ_F_BL_EMPTY) 849 goto finish; 850 } else { 851 cflags |= io_put_kbuf(req, *ret, issue_flags); 852 } 853 854 /* 855 * Fill CQE for this receive and see if we should keep trying to 856 * receive from this socket. 857 */ 858 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 859 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 860 int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; 861 862 io_mshot_prep_retry(req, kmsg); 863 /* Known not-empty or unknown state, retry */ 864 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 865 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) 866 return false; 867 /* mshot retries exceeded, force a requeue */ 868 sr->nr_multishot_loops = 0; 869 mshot_retry_ret = IOU_REQUEUE; 870 } 871 if (issue_flags & IO_URING_F_MULTISHOT) 872 *ret = mshot_retry_ret; 873 else 874 *ret = -EAGAIN; 875 return true; 876 } 877 878 /* Finish the request / stop multishot. */ 879 finish: 880 io_req_set_res(req, *ret, cflags); 881 882 if (issue_flags & IO_URING_F_MULTISHOT) 883 *ret = IOU_STOP_MULTISHOT; 884 else 885 *ret = IOU_OK; 886 io_req_msg_cleanup(req, issue_flags); 887 return true; 888 } 889 890 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 891 struct io_sr_msg *sr, void __user **buf, 892 size_t *len) 893 { 894 unsigned long ubuf = (unsigned long) *buf; 895 unsigned long hdr; 896 897 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 898 kmsg->controllen; 899 if (*len < hdr) 900 return -EFAULT; 901 902 if (kmsg->controllen) { 903 unsigned long control = ubuf + hdr - kmsg->controllen; 904 905 kmsg->msg.msg_control_user = (void __user *) control; 906 kmsg->msg.msg_controllen = kmsg->controllen; 907 } 908 909 sr->buf = *buf; /* stash for later copy */ 910 *buf = (void __user *) (ubuf + hdr); 911 kmsg->payloadlen = *len = *len - hdr; 912 return 0; 913 } 914 915 struct io_recvmsg_multishot_hdr { 916 struct io_uring_recvmsg_out msg; 917 struct sockaddr_storage addr; 918 }; 919 920 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 921 struct io_async_msghdr *kmsg, 922 unsigned int flags, bool *finished) 923 { 924 int err; 925 int copy_len; 926 struct io_recvmsg_multishot_hdr hdr; 927 928 if (kmsg->namelen) 929 kmsg->msg.msg_name = &hdr.addr; 930 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 931 kmsg->msg.msg_namelen = 0; 932 933 if (sock->file->f_flags & O_NONBLOCK) 934 flags |= MSG_DONTWAIT; 935 936 err = sock_recvmsg(sock, &kmsg->msg, flags); 937 *finished = err <= 0; 938 if (err < 0) 939 return err; 940 941 hdr.msg = (struct io_uring_recvmsg_out) { 942 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 943 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 944 }; 945 946 hdr.msg.payloadlen = err; 947 if (err > kmsg->payloadlen) 948 err = kmsg->payloadlen; 949 950 copy_len = sizeof(struct io_uring_recvmsg_out); 951 if (kmsg->msg.msg_namelen > kmsg->namelen) 952 copy_len += kmsg->namelen; 953 else 954 copy_len += kmsg->msg.msg_namelen; 955 956 /* 957 * "fromlen shall refer to the value before truncation.." 958 * 1003.1g 959 */ 960 hdr.msg.namelen = kmsg->msg.msg_namelen; 961 962 /* ensure that there is no gap between hdr and sockaddr_storage */ 963 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 964 sizeof(struct io_uring_recvmsg_out)); 965 if (copy_to_user(io->buf, &hdr, copy_len)) { 966 *finished = true; 967 return -EFAULT; 968 } 969 970 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 971 kmsg->controllen + err; 972 } 973 974 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 975 { 976 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 977 struct io_async_msghdr *kmsg = req->async_data; 978 struct socket *sock; 979 unsigned flags; 980 int ret, min_ret = 0; 981 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 982 bool mshot_finished = true; 983 984 sock = sock_from_file(req->file); 985 if (unlikely(!sock)) 986 return -ENOTSOCK; 987 988 if (!(req->flags & REQ_F_POLLED) && 989 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 990 return -EAGAIN; 991 992 flags = sr->msg_flags; 993 if (force_nonblock) 994 flags |= MSG_DONTWAIT; 995 996 retry_multishot: 997 if (io_do_buffer_select(req)) { 998 void __user *buf; 999 size_t len = sr->len; 1000 1001 buf = io_buffer_select(req, &len, issue_flags); 1002 if (!buf) 1003 return -ENOBUFS; 1004 1005 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1006 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 1007 if (ret) { 1008 io_kbuf_recycle(req, issue_flags); 1009 return ret; 1010 } 1011 } 1012 1013 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 1014 } 1015 1016 kmsg->msg.msg_get_inq = 1; 1017 kmsg->msg.msg_inq = -1; 1018 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1019 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1020 &mshot_finished); 1021 } else { 1022 /* disable partial retry for recvmsg with cmsg attached */ 1023 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1024 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1025 1026 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1027 kmsg->uaddr, flags); 1028 } 1029 1030 if (ret < min_ret) { 1031 if (ret == -EAGAIN && force_nonblock) { 1032 if (issue_flags & IO_URING_F_MULTISHOT) { 1033 io_kbuf_recycle(req, issue_flags); 1034 return IOU_ISSUE_SKIP_COMPLETE; 1035 } 1036 return -EAGAIN; 1037 } 1038 if (ret > 0 && io_net_retry(sock, flags)) { 1039 sr->done_io += ret; 1040 req->flags |= REQ_F_BL_NO_RECYCLE; 1041 return -EAGAIN; 1042 } 1043 if (ret == -ERESTARTSYS) 1044 ret = -EINTR; 1045 req_set_fail(req); 1046 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1047 req_set_fail(req); 1048 } 1049 1050 if (ret > 0) 1051 ret += sr->done_io; 1052 else if (sr->done_io) 1053 ret = sr->done_io; 1054 else 1055 io_kbuf_recycle(req, issue_flags); 1056 1057 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1058 goto retry_multishot; 1059 1060 return ret; 1061 } 1062 1063 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1064 size_t *len, unsigned int issue_flags) 1065 { 1066 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1067 int ret; 1068 1069 /* 1070 * If the ring isn't locked, then don't use the peek interface 1071 * to grab multiple buffers as we will lock/unlock between 1072 * this selection and posting the buffers. 1073 */ 1074 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1075 sr->flags & IORING_RECVSEND_BUNDLE) { 1076 struct buf_sel_arg arg = { 1077 .iovs = &kmsg->fast_iov, 1078 .nr_iovs = 1, 1079 .mode = KBUF_MODE_EXPAND, 1080 }; 1081 1082 if (kmsg->free_iov) { 1083 arg.nr_iovs = kmsg->free_iov_nr; 1084 arg.iovs = kmsg->free_iov; 1085 arg.mode |= KBUF_MODE_FREE; 1086 } 1087 1088 if (kmsg->msg.msg_inq > 0) 1089 arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq); 1090 1091 ret = io_buffers_peek(req, &arg); 1092 if (unlikely(ret < 0)) 1093 return ret; 1094 1095 /* special case 1 vec, can be a fast path */ 1096 if (ret == 1) { 1097 sr->buf = arg.iovs[0].iov_base; 1098 sr->len = arg.iovs[0].iov_len; 1099 goto map_ubuf; 1100 } 1101 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1102 arg.out_len); 1103 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 1104 kmsg->free_iov_nr = ret; 1105 kmsg->free_iov = arg.iovs; 1106 req->flags |= REQ_F_NEED_CLEANUP; 1107 } 1108 } else { 1109 void __user *buf; 1110 1111 *len = sr->len; 1112 buf = io_buffer_select(req, len, issue_flags); 1113 if (!buf) 1114 return -ENOBUFS; 1115 sr->buf = buf; 1116 sr->len = *len; 1117 map_ubuf: 1118 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1119 &kmsg->msg.msg_iter); 1120 if (unlikely(ret)) 1121 return ret; 1122 } 1123 1124 return 0; 1125 } 1126 1127 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1128 { 1129 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1130 struct io_async_msghdr *kmsg = req->async_data; 1131 struct socket *sock; 1132 unsigned flags; 1133 int ret, min_ret = 0; 1134 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1135 size_t len = sr->len; 1136 bool mshot_finished; 1137 1138 if (!(req->flags & REQ_F_POLLED) && 1139 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1140 return -EAGAIN; 1141 1142 sock = sock_from_file(req->file); 1143 if (unlikely(!sock)) 1144 return -ENOTSOCK; 1145 1146 flags = sr->msg_flags; 1147 if (force_nonblock) 1148 flags |= MSG_DONTWAIT; 1149 1150 retry_multishot: 1151 if (io_do_buffer_select(req)) { 1152 ret = io_recv_buf_select(req, kmsg, &len, issue_flags); 1153 if (unlikely(ret)) { 1154 kmsg->msg.msg_inq = -1; 1155 goto out_free; 1156 } 1157 sr->buf = NULL; 1158 } 1159 1160 kmsg->msg.msg_flags = 0; 1161 kmsg->msg.msg_inq = -1; 1162 1163 if (flags & MSG_WAITALL) 1164 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1165 1166 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1167 if (ret < min_ret) { 1168 if (ret == -EAGAIN && force_nonblock) { 1169 if (issue_flags & IO_URING_F_MULTISHOT) { 1170 io_kbuf_recycle(req, issue_flags); 1171 return IOU_ISSUE_SKIP_COMPLETE; 1172 } 1173 1174 return -EAGAIN; 1175 } 1176 if (ret > 0 && io_net_retry(sock, flags)) { 1177 sr->len -= ret; 1178 sr->buf += ret; 1179 sr->done_io += ret; 1180 req->flags |= REQ_F_BL_NO_RECYCLE; 1181 return -EAGAIN; 1182 } 1183 if (ret == -ERESTARTSYS) 1184 ret = -EINTR; 1185 req_set_fail(req); 1186 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1187 out_free: 1188 req_set_fail(req); 1189 } 1190 1191 mshot_finished = ret <= 0; 1192 if (ret > 0) 1193 ret += sr->done_io; 1194 else if (sr->done_io) 1195 ret = sr->done_io; 1196 else 1197 io_kbuf_recycle(req, issue_flags); 1198 1199 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1200 goto retry_multishot; 1201 1202 return ret; 1203 } 1204 1205 void io_send_zc_cleanup(struct io_kiocb *req) 1206 { 1207 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1208 struct io_async_msghdr *io = req->async_data; 1209 1210 if (req_has_async_data(req)) 1211 io_netmsg_iovec_free(io); 1212 if (zc->notif) { 1213 io_notif_flush(zc->notif); 1214 zc->notif = NULL; 1215 } 1216 } 1217 1218 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1219 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 1220 1221 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1222 { 1223 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1224 struct io_ring_ctx *ctx = req->ctx; 1225 struct io_kiocb *notif; 1226 1227 zc->done_io = 0; 1228 req->flags |= REQ_F_POLL_NO_LAZY; 1229 1230 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1231 return -EINVAL; 1232 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1233 if (req->flags & REQ_F_CQE_SKIP) 1234 return -EINVAL; 1235 1236 notif = zc->notif = io_alloc_notif(ctx); 1237 if (!notif) 1238 return -ENOMEM; 1239 notif->cqe.user_data = req->cqe.user_data; 1240 notif->cqe.res = 0; 1241 notif->cqe.flags = IORING_CQE_F_NOTIF; 1242 req->flags |= REQ_F_NEED_CLEANUP; 1243 1244 zc->flags = READ_ONCE(sqe->ioprio); 1245 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1246 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1247 return -EINVAL; 1248 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1249 struct io_notif_data *nd = io_notif_to_data(notif); 1250 1251 nd->zc_report = true; 1252 nd->zc_used = false; 1253 nd->zc_copied = false; 1254 } 1255 } 1256 1257 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1258 unsigned idx = READ_ONCE(sqe->buf_index); 1259 1260 if (unlikely(idx >= ctx->nr_user_bufs)) 1261 return -EFAULT; 1262 idx = array_index_nospec(idx, ctx->nr_user_bufs); 1263 req->imu = READ_ONCE(ctx->user_bufs[idx]); 1264 io_req_set_rsrc_node(notif, ctx, 0); 1265 } 1266 1267 if (req->opcode == IORING_OP_SEND_ZC) { 1268 if (READ_ONCE(sqe->__pad3[0])) 1269 return -EINVAL; 1270 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1271 zc->addr_len = READ_ONCE(sqe->addr_len); 1272 } else { 1273 if (unlikely(sqe->addr2 || sqe->file_index)) 1274 return -EINVAL; 1275 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 1276 return -EINVAL; 1277 } 1278 1279 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1280 zc->len = READ_ONCE(sqe->len); 1281 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1282 if (zc->msg_flags & MSG_DONTWAIT) 1283 req->flags |= REQ_F_NOWAIT; 1284 1285 #ifdef CONFIG_COMPAT 1286 if (req->ctx->compat) 1287 zc->msg_flags |= MSG_CMSG_COMPAT; 1288 #endif 1289 return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG_ZC); 1290 } 1291 1292 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1293 struct iov_iter *from, size_t length) 1294 { 1295 skb_zcopy_downgrade_managed(skb); 1296 return zerocopy_fill_skb_from_iter(skb, from, length); 1297 } 1298 1299 static int io_sg_from_iter(struct sk_buff *skb, 1300 struct iov_iter *from, size_t length) 1301 { 1302 struct skb_shared_info *shinfo = skb_shinfo(skb); 1303 int frag = shinfo->nr_frags; 1304 int ret = 0; 1305 struct bvec_iter bi; 1306 ssize_t copied = 0; 1307 unsigned long truesize = 0; 1308 1309 if (!frag) 1310 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1311 else if (unlikely(!skb_zcopy_managed(skb))) 1312 return zerocopy_fill_skb_from_iter(skb, from, length); 1313 1314 bi.bi_size = min(from->count, length); 1315 bi.bi_bvec_done = from->iov_offset; 1316 bi.bi_idx = 0; 1317 1318 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1319 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1320 1321 copied += v.bv_len; 1322 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1323 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1324 v.bv_offset, v.bv_len); 1325 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1326 } 1327 if (bi.bi_size) 1328 ret = -EMSGSIZE; 1329 1330 shinfo->nr_frags = frag; 1331 from->bvec += bi.bi_idx; 1332 from->nr_segs -= bi.bi_idx; 1333 from->count -= copied; 1334 from->iov_offset = bi.bi_bvec_done; 1335 1336 skb->data_len += copied; 1337 skb->len += copied; 1338 skb->truesize += truesize; 1339 return ret; 1340 } 1341 1342 static int io_send_zc_import(struct io_kiocb *req, struct io_async_msghdr *kmsg) 1343 { 1344 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1345 int ret; 1346 1347 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 1348 ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter, req->imu, 1349 (u64)(uintptr_t)sr->buf, sr->len); 1350 if (unlikely(ret)) 1351 return ret; 1352 kmsg->msg.sg_from_iter = io_sg_from_iter; 1353 } else { 1354 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 1355 if (unlikely(ret)) 1356 return ret; 1357 ret = io_notif_account_mem(sr->notif, sr->len); 1358 if (unlikely(ret)) 1359 return ret; 1360 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1361 } 1362 1363 return ret; 1364 } 1365 1366 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1367 { 1368 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1369 struct io_async_msghdr *kmsg = req->async_data; 1370 struct socket *sock; 1371 unsigned msg_flags; 1372 int ret, min_ret = 0; 1373 1374 sock = sock_from_file(req->file); 1375 if (unlikely(!sock)) 1376 return -ENOTSOCK; 1377 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1378 return -EOPNOTSUPP; 1379 1380 if (!(req->flags & REQ_F_POLLED) && 1381 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1382 return -EAGAIN; 1383 1384 if (!zc->done_io) { 1385 ret = io_send_zc_import(req, kmsg); 1386 if (unlikely(ret)) 1387 return ret; 1388 } 1389 1390 msg_flags = zc->msg_flags; 1391 if (issue_flags & IO_URING_F_NONBLOCK) 1392 msg_flags |= MSG_DONTWAIT; 1393 if (msg_flags & MSG_WAITALL) 1394 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1395 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1396 1397 kmsg->msg.msg_flags = msg_flags; 1398 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1399 ret = sock_sendmsg(sock, &kmsg->msg); 1400 1401 if (unlikely(ret < min_ret)) { 1402 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1403 return -EAGAIN; 1404 1405 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { 1406 zc->len -= ret; 1407 zc->buf += ret; 1408 zc->done_io += ret; 1409 req->flags |= REQ_F_BL_NO_RECYCLE; 1410 return -EAGAIN; 1411 } 1412 if (ret == -ERESTARTSYS) 1413 ret = -EINTR; 1414 req_set_fail(req); 1415 } 1416 1417 if (ret >= 0) 1418 ret += zc->done_io; 1419 else if (zc->done_io) 1420 ret = zc->done_io; 1421 1422 /* 1423 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1424 * flushing notif to io_send_zc_cleanup() 1425 */ 1426 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1427 io_notif_flush(zc->notif); 1428 io_req_msg_cleanup(req, 0); 1429 } 1430 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1431 return IOU_OK; 1432 } 1433 1434 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1435 { 1436 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1437 struct io_async_msghdr *kmsg = req->async_data; 1438 struct socket *sock; 1439 unsigned flags; 1440 int ret, min_ret = 0; 1441 1442 sock = sock_from_file(req->file); 1443 if (unlikely(!sock)) 1444 return -ENOTSOCK; 1445 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1446 return -EOPNOTSUPP; 1447 1448 if (!(req->flags & REQ_F_POLLED) && 1449 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1450 return -EAGAIN; 1451 1452 flags = sr->msg_flags; 1453 if (issue_flags & IO_URING_F_NONBLOCK) 1454 flags |= MSG_DONTWAIT; 1455 if (flags & MSG_WAITALL) 1456 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1457 1458 kmsg->msg.msg_control_user = sr->msg_control; 1459 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1460 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1461 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1462 1463 if (unlikely(ret < min_ret)) { 1464 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1465 return -EAGAIN; 1466 1467 if (ret > 0 && io_net_retry(sock, flags)) { 1468 sr->done_io += ret; 1469 req->flags |= REQ_F_BL_NO_RECYCLE; 1470 return -EAGAIN; 1471 } 1472 if (ret == -ERESTARTSYS) 1473 ret = -EINTR; 1474 req_set_fail(req); 1475 } 1476 1477 if (ret >= 0) 1478 ret += sr->done_io; 1479 else if (sr->done_io) 1480 ret = sr->done_io; 1481 1482 /* 1483 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1484 * flushing notif to io_send_zc_cleanup() 1485 */ 1486 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1487 io_notif_flush(sr->notif); 1488 io_req_msg_cleanup(req, 0); 1489 } 1490 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1491 return IOU_OK; 1492 } 1493 1494 void io_sendrecv_fail(struct io_kiocb *req) 1495 { 1496 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1497 1498 if (sr->done_io) 1499 req->cqe.res = sr->done_io; 1500 1501 if ((req->flags & REQ_F_NEED_CLEANUP) && 1502 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1503 req->cqe.flags |= IORING_CQE_F_MORE; 1504 } 1505 1506 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1507 IORING_ACCEPT_POLL_FIRST) 1508 1509 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1510 { 1511 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1512 1513 if (sqe->len || sqe->buf_index) 1514 return -EINVAL; 1515 1516 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1517 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1518 accept->flags = READ_ONCE(sqe->accept_flags); 1519 accept->nofile = rlimit(RLIMIT_NOFILE); 1520 accept->iou_flags = READ_ONCE(sqe->ioprio); 1521 if (accept->iou_flags & ~ACCEPT_FLAGS) 1522 return -EINVAL; 1523 1524 accept->file_slot = READ_ONCE(sqe->file_index); 1525 if (accept->file_slot) { 1526 if (accept->flags & SOCK_CLOEXEC) 1527 return -EINVAL; 1528 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1529 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1530 return -EINVAL; 1531 } 1532 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1533 return -EINVAL; 1534 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1535 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1536 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1537 req->flags |= REQ_F_APOLL_MULTISHOT; 1538 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1539 req->flags |= REQ_F_NOWAIT; 1540 return 0; 1541 } 1542 1543 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1544 { 1545 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1546 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1547 bool fixed = !!accept->file_slot; 1548 struct proto_accept_arg arg = { 1549 .flags = force_nonblock ? O_NONBLOCK : 0, 1550 }; 1551 struct file *file; 1552 unsigned cflags; 1553 int ret, fd; 1554 1555 if (!(req->flags & REQ_F_POLLED) && 1556 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1557 return -EAGAIN; 1558 1559 retry: 1560 if (!fixed) { 1561 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1562 if (unlikely(fd < 0)) 1563 return fd; 1564 } 1565 arg.err = 0; 1566 arg.is_empty = -1; 1567 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1568 accept->flags); 1569 if (IS_ERR(file)) { 1570 if (!fixed) 1571 put_unused_fd(fd); 1572 ret = PTR_ERR(file); 1573 if (ret == -EAGAIN && force_nonblock && 1574 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) { 1575 /* 1576 * if it's multishot and polled, we don't need to 1577 * return EAGAIN to arm the poll infra since it 1578 * has already been done 1579 */ 1580 if (issue_flags & IO_URING_F_MULTISHOT) 1581 return IOU_ISSUE_SKIP_COMPLETE; 1582 return ret; 1583 } 1584 if (ret == -ERESTARTSYS) 1585 ret = -EINTR; 1586 req_set_fail(req); 1587 } else if (!fixed) { 1588 fd_install(fd, file); 1589 ret = fd; 1590 } else { 1591 ret = io_fixed_fd_install(req, issue_flags, file, 1592 accept->file_slot); 1593 } 1594 1595 cflags = 0; 1596 if (!arg.is_empty) 1597 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1598 1599 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1600 io_req_set_res(req, ret, cflags); 1601 return IOU_OK; 1602 } 1603 1604 if (ret < 0) 1605 return ret; 1606 if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1607 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1608 goto retry; 1609 if (issue_flags & IO_URING_F_MULTISHOT) 1610 return IOU_ISSUE_SKIP_COMPLETE; 1611 return -EAGAIN; 1612 } 1613 1614 io_req_set_res(req, ret, cflags); 1615 return IOU_STOP_MULTISHOT; 1616 } 1617 1618 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1619 { 1620 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1621 1622 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1623 return -EINVAL; 1624 1625 sock->domain = READ_ONCE(sqe->fd); 1626 sock->type = READ_ONCE(sqe->off); 1627 sock->protocol = READ_ONCE(sqe->len); 1628 sock->file_slot = READ_ONCE(sqe->file_index); 1629 sock->nofile = rlimit(RLIMIT_NOFILE); 1630 1631 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1632 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1633 return -EINVAL; 1634 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1635 return -EINVAL; 1636 return 0; 1637 } 1638 1639 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1640 { 1641 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1642 bool fixed = !!sock->file_slot; 1643 struct file *file; 1644 int ret, fd; 1645 1646 if (!fixed) { 1647 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1648 if (unlikely(fd < 0)) 1649 return fd; 1650 } 1651 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1652 if (IS_ERR(file)) { 1653 if (!fixed) 1654 put_unused_fd(fd); 1655 ret = PTR_ERR(file); 1656 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1657 return -EAGAIN; 1658 if (ret == -ERESTARTSYS) 1659 ret = -EINTR; 1660 req_set_fail(req); 1661 } else if (!fixed) { 1662 fd_install(fd, file); 1663 ret = fd; 1664 } else { 1665 ret = io_fixed_fd_install(req, issue_flags, file, 1666 sock->file_slot); 1667 } 1668 io_req_set_res(req, ret, 0); 1669 return IOU_OK; 1670 } 1671 1672 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1673 { 1674 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1675 struct io_async_msghdr *io; 1676 1677 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1678 return -EINVAL; 1679 1680 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1681 conn->addr_len = READ_ONCE(sqe->addr2); 1682 conn->in_progress = conn->seen_econnaborted = false; 1683 1684 io = io_msg_alloc_async(req); 1685 if (unlikely(!io)) 1686 return -ENOMEM; 1687 1688 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1689 } 1690 1691 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1692 { 1693 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1694 struct io_async_msghdr *io = req->async_data; 1695 unsigned file_flags; 1696 int ret; 1697 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1698 1699 file_flags = force_nonblock ? O_NONBLOCK : 0; 1700 1701 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1702 file_flags); 1703 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1704 && force_nonblock) { 1705 if (ret == -EINPROGRESS) { 1706 connect->in_progress = true; 1707 } else if (ret == -ECONNABORTED) { 1708 if (connect->seen_econnaborted) 1709 goto out; 1710 connect->seen_econnaborted = true; 1711 } 1712 return -EAGAIN; 1713 } 1714 if (connect->in_progress) { 1715 /* 1716 * At least bluetooth will return -EBADFD on a re-connect 1717 * attempt, and it's (supposedly) also valid to get -EISCONN 1718 * which means the previous result is good. For both of these, 1719 * grab the sock_error() and use that for the completion. 1720 */ 1721 if (ret == -EBADFD || ret == -EISCONN) 1722 ret = sock_error(sock_from_file(req->file)->sk); 1723 } 1724 if (ret == -ERESTARTSYS) 1725 ret = -EINTR; 1726 out: 1727 if (ret < 0) 1728 req_set_fail(req); 1729 io_req_msg_cleanup(req, issue_flags); 1730 io_req_set_res(req, ret, 0); 1731 return IOU_OK; 1732 } 1733 1734 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1735 { 1736 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1737 struct sockaddr __user *uaddr; 1738 struct io_async_msghdr *io; 1739 1740 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1741 return -EINVAL; 1742 1743 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1744 bind->addr_len = READ_ONCE(sqe->addr2); 1745 1746 io = io_msg_alloc_async(req); 1747 if (unlikely(!io)) 1748 return -ENOMEM; 1749 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1750 } 1751 1752 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1753 { 1754 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1755 struct io_async_msghdr *io = req->async_data; 1756 struct socket *sock; 1757 int ret; 1758 1759 sock = sock_from_file(req->file); 1760 if (unlikely(!sock)) 1761 return -ENOTSOCK; 1762 1763 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1764 if (ret < 0) 1765 req_set_fail(req); 1766 io_req_set_res(req, ret, 0); 1767 return 0; 1768 } 1769 1770 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1771 { 1772 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1773 1774 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1775 return -EINVAL; 1776 1777 listen->backlog = READ_ONCE(sqe->len); 1778 return 0; 1779 } 1780 1781 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1782 { 1783 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1784 struct socket *sock; 1785 int ret; 1786 1787 sock = sock_from_file(req->file); 1788 if (unlikely(!sock)) 1789 return -ENOTSOCK; 1790 1791 ret = __sys_listen_socket(sock, listen->backlog); 1792 if (ret < 0) 1793 req_set_fail(req); 1794 io_req_set_res(req, ret, 0); 1795 return 0; 1796 } 1797 1798 void io_netmsg_cache_free(const void *entry) 1799 { 1800 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1801 1802 if (kmsg->free_iov) { 1803 kasan_mempool_unpoison_object(kmsg->free_iov, 1804 kmsg->free_iov_nr * sizeof(struct iovec)); 1805 io_netmsg_iovec_free(kmsg); 1806 } 1807 kfree(kmsg); 1808 } 1809 #endif 1810