1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 int iou_flags; 32 u32 file_slot; 33 unsigned long nofile; 34 }; 35 36 struct io_socket { 37 struct file *file; 38 int domain; 39 int type; 40 int protocol; 41 int flags; 42 u32 file_slot; 43 unsigned long nofile; 44 }; 45 46 struct io_connect { 47 struct file *file; 48 struct sockaddr __user *addr; 49 int addr_len; 50 bool in_progress; 51 bool seen_econnaborted; 52 }; 53 54 struct io_bind { 55 struct file *file; 56 int addr_len; 57 }; 58 59 struct io_listen { 60 struct file *file; 61 int backlog; 62 }; 63 64 struct io_sr_msg { 65 struct file *file; 66 union { 67 struct compat_msghdr __user *umsg_compat; 68 struct user_msghdr __user *umsg; 69 void __user *buf; 70 }; 71 int len; 72 unsigned done_io; 73 unsigned msg_flags; 74 unsigned nr_multishot_loops; 75 u16 flags; 76 /* initialised and used only by !msg send variants */ 77 u16 addr_len; 78 u16 buf_group; 79 void __user *addr; 80 void __user *msg_control; 81 /* used only for send zerocopy */ 82 struct io_kiocb *notif; 83 }; 84 85 /* 86 * Number of times we'll try and do receives if there's more data. If we 87 * exceed this limit, then add us to the back of the queue and retry from 88 * there. This helps fairness between flooding clients. 89 */ 90 #define MULTISHOT_MAX_RETRY 32 91 92 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 93 { 94 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 95 96 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 97 sqe->buf_index || sqe->splice_fd_in)) 98 return -EINVAL; 99 100 shutdown->how = READ_ONCE(sqe->len); 101 req->flags |= REQ_F_FORCE_ASYNC; 102 return 0; 103 } 104 105 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 106 { 107 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 108 struct socket *sock; 109 int ret; 110 111 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 112 113 sock = sock_from_file(req->file); 114 if (unlikely(!sock)) 115 return -ENOTSOCK; 116 117 ret = __sys_shutdown_sock(sock, shutdown->how); 118 io_req_set_res(req, ret, 0); 119 return IOU_OK; 120 } 121 122 static bool io_net_retry(struct socket *sock, int flags) 123 { 124 if (!(flags & MSG_WAITALL)) 125 return false; 126 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 127 } 128 129 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 130 { 131 if (kmsg->free_iov) { 132 kfree(kmsg->free_iov); 133 kmsg->free_iov_nr = 0; 134 kmsg->free_iov = NULL; 135 } 136 } 137 138 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 139 { 140 struct io_async_msghdr *hdr = req->async_data; 141 struct iovec *iov; 142 143 /* can't recycle, ensure we free the iovec if we have one */ 144 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 145 io_netmsg_iovec_free(hdr); 146 return; 147 } 148 149 /* Let normal cleanup path reap it if we fail adding to the cache */ 150 iov = hdr->free_iov; 151 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { 152 if (iov) 153 kasan_mempool_poison_object(iov); 154 req->async_data = NULL; 155 req->flags &= ~REQ_F_ASYNC_DATA; 156 } 157 } 158 159 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 160 { 161 struct io_ring_ctx *ctx = req->ctx; 162 struct io_async_msghdr *hdr; 163 164 hdr = io_alloc_cache_get(&ctx->netmsg_cache); 165 if (hdr) { 166 if (hdr->free_iov) { 167 kasan_mempool_unpoison_object(hdr->free_iov, 168 hdr->free_iov_nr * sizeof(struct iovec)); 169 req->flags |= REQ_F_NEED_CLEANUP; 170 } 171 req->flags |= REQ_F_ASYNC_DATA; 172 req->async_data = hdr; 173 return hdr; 174 } 175 176 if (!io_alloc_async_data(req)) { 177 hdr = req->async_data; 178 hdr->free_iov_nr = 0; 179 hdr->free_iov = NULL; 180 return hdr; 181 } 182 return NULL; 183 } 184 185 /* assign new iovec to kmsg, if we need to */ 186 static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, 187 struct iovec *iov) 188 { 189 if (iov) { 190 req->flags |= REQ_F_NEED_CLEANUP; 191 kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs; 192 if (kmsg->free_iov) 193 kfree(kmsg->free_iov); 194 kmsg->free_iov = iov; 195 } 196 return 0; 197 } 198 199 static inline void io_mshot_prep_retry(struct io_kiocb *req, 200 struct io_async_msghdr *kmsg) 201 { 202 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 203 204 req->flags &= ~REQ_F_BL_EMPTY; 205 sr->done_io = 0; 206 sr->len = 0; /* get from the provided buffer */ 207 req->buf_index = sr->buf_group; 208 } 209 210 #ifdef CONFIG_COMPAT 211 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 212 struct io_async_msghdr *iomsg, 213 struct compat_msghdr *msg, int ddir) 214 { 215 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 216 struct compat_iovec __user *uiov; 217 struct iovec *iov; 218 int ret, nr_segs; 219 220 if (iomsg->free_iov) { 221 nr_segs = iomsg->free_iov_nr; 222 iov = iomsg->free_iov; 223 } else { 224 iov = &iomsg->fast_iov; 225 nr_segs = 1; 226 } 227 228 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 229 return -EFAULT; 230 231 uiov = compat_ptr(msg->msg_iov); 232 if (req->flags & REQ_F_BUFFER_SELECT) { 233 compat_ssize_t clen; 234 235 if (msg->msg_iovlen == 0) { 236 sr->len = iov->iov_len = 0; 237 iov->iov_base = NULL; 238 } else if (msg->msg_iovlen > 1) { 239 return -EINVAL; 240 } else { 241 if (!access_ok(uiov, sizeof(*uiov))) 242 return -EFAULT; 243 if (__get_user(clen, &uiov->iov_len)) 244 return -EFAULT; 245 if (clen < 0) 246 return -EINVAL; 247 sr->len = clen; 248 } 249 250 return 0; 251 } 252 253 ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, 254 nr_segs, &iov, &iomsg->msg.msg_iter, true); 255 if (unlikely(ret < 0)) 256 return ret; 257 258 return io_net_vec_assign(req, iomsg, iov); 259 } 260 #endif 261 262 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 263 struct user_msghdr *msg, int ddir) 264 { 265 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 266 struct iovec *iov; 267 int ret, nr_segs; 268 269 if (iomsg->free_iov) { 270 nr_segs = iomsg->free_iov_nr; 271 iov = iomsg->free_iov; 272 } else { 273 iov = &iomsg->fast_iov; 274 nr_segs = 1; 275 } 276 277 if (!user_access_begin(sr->umsg, sizeof(*sr->umsg))) 278 return -EFAULT; 279 280 ret = -EFAULT; 281 unsafe_get_user(msg->msg_name, &sr->umsg->msg_name, ua_end); 282 unsafe_get_user(msg->msg_namelen, &sr->umsg->msg_namelen, ua_end); 283 unsafe_get_user(msg->msg_iov, &sr->umsg->msg_iov, ua_end); 284 unsafe_get_user(msg->msg_iovlen, &sr->umsg->msg_iovlen, ua_end); 285 unsafe_get_user(msg->msg_control, &sr->umsg->msg_control, ua_end); 286 unsafe_get_user(msg->msg_controllen, &sr->umsg->msg_controllen, ua_end); 287 msg->msg_flags = 0; 288 289 if (req->flags & REQ_F_BUFFER_SELECT) { 290 if (msg->msg_iovlen == 0) { 291 sr->len = iov->iov_len = 0; 292 iov->iov_base = NULL; 293 } else if (msg->msg_iovlen > 1) { 294 ret = -EINVAL; 295 goto ua_end; 296 } else { 297 /* we only need the length for provided buffers */ 298 if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) 299 goto ua_end; 300 unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, 301 ua_end); 302 sr->len = iov->iov_len; 303 } 304 ret = 0; 305 ua_end: 306 user_access_end(); 307 return ret; 308 } 309 310 user_access_end(); 311 ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs, 312 &iov, &iomsg->msg.msg_iter, false); 313 if (unlikely(ret < 0)) 314 return ret; 315 316 return io_net_vec_assign(req, iomsg, iov); 317 } 318 319 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 320 struct io_async_msghdr *iomsg) 321 { 322 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 323 struct user_msghdr msg; 324 int ret; 325 326 iomsg->msg.msg_name = &iomsg->addr; 327 iomsg->msg.msg_iter.nr_segs = 0; 328 329 #ifdef CONFIG_COMPAT 330 if (unlikely(req->ctx->compat)) { 331 struct compat_msghdr cmsg; 332 333 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); 334 if (unlikely(ret)) 335 return ret; 336 337 return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); 338 } 339 #endif 340 341 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); 342 if (unlikely(ret)) 343 return ret; 344 345 ret = __copy_msghdr(&iomsg->msg, &msg, NULL); 346 347 /* save msg_control as sys_sendmsg() overwrites it */ 348 sr->msg_control = iomsg->msg.msg_control_user; 349 return ret; 350 } 351 352 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 353 { 354 struct io_async_msghdr *io = req->async_data; 355 356 io_netmsg_iovec_free(io); 357 } 358 359 static int io_send_setup(struct io_kiocb *req) 360 { 361 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 362 struct io_async_msghdr *kmsg = req->async_data; 363 int ret; 364 365 kmsg->msg.msg_name = NULL; 366 kmsg->msg.msg_namelen = 0; 367 kmsg->msg.msg_control = NULL; 368 kmsg->msg.msg_controllen = 0; 369 kmsg->msg.msg_ubuf = NULL; 370 371 if (sr->addr) { 372 ret = move_addr_to_kernel(sr->addr, sr->addr_len, &kmsg->addr); 373 if (unlikely(ret < 0)) 374 return ret; 375 kmsg->msg.msg_name = &kmsg->addr; 376 kmsg->msg.msg_namelen = sr->addr_len; 377 } 378 if (!io_do_buffer_select(req)) { 379 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 380 &kmsg->msg.msg_iter); 381 if (unlikely(ret < 0)) 382 return ret; 383 } 384 return 0; 385 } 386 387 static int io_sendmsg_prep_setup(struct io_kiocb *req, int is_msg) 388 { 389 struct io_async_msghdr *kmsg; 390 int ret; 391 392 kmsg = io_msg_alloc_async(req); 393 if (unlikely(!kmsg)) 394 return -ENOMEM; 395 if (!is_msg) 396 return io_send_setup(req); 397 ret = io_sendmsg_copy_hdr(req, kmsg); 398 if (!ret) 399 req->flags |= REQ_F_NEED_CLEANUP; 400 return ret; 401 } 402 403 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) 404 405 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 406 { 407 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 408 409 sr->done_io = 0; 410 411 if (req->opcode == IORING_OP_SEND) { 412 if (READ_ONCE(sqe->__pad3[0])) 413 return -EINVAL; 414 sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 415 sr->addr_len = READ_ONCE(sqe->addr_len); 416 } else if (sqe->addr2 || sqe->file_index) { 417 return -EINVAL; 418 } 419 420 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 421 sr->len = READ_ONCE(sqe->len); 422 sr->flags = READ_ONCE(sqe->ioprio); 423 if (sr->flags & ~SENDMSG_FLAGS) 424 return -EINVAL; 425 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 426 if (sr->msg_flags & MSG_DONTWAIT) 427 req->flags |= REQ_F_NOWAIT; 428 if (sr->flags & IORING_RECVSEND_BUNDLE) { 429 if (req->opcode == IORING_OP_SENDMSG) 430 return -EINVAL; 431 if (!(req->flags & REQ_F_BUFFER_SELECT)) 432 return -EINVAL; 433 sr->msg_flags |= MSG_WAITALL; 434 sr->buf_group = req->buf_index; 435 req->buf_list = NULL; 436 } 437 if (req->flags & REQ_F_BUFFER_SELECT && sr->len) 438 return -EINVAL; 439 440 #ifdef CONFIG_COMPAT 441 if (req->ctx->compat) 442 sr->msg_flags |= MSG_CMSG_COMPAT; 443 #endif 444 return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG); 445 } 446 447 static void io_req_msg_cleanup(struct io_kiocb *req, 448 unsigned int issue_flags) 449 { 450 req->flags &= ~REQ_F_NEED_CLEANUP; 451 io_netmsg_recycle(req, issue_flags); 452 } 453 454 /* 455 * For bundle completions, we need to figure out how many segments we consumed. 456 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 457 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 458 * the segments, then it's a trivial questiont o answer. If we have residual 459 * data in the iter, then loop the segments to figure out how much we 460 * transferred. 461 */ 462 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 463 { 464 struct iovec *iov; 465 int nbufs; 466 467 /* no data is always zero segments, and a ubuf is always 1 segment */ 468 if (ret <= 0) 469 return 0; 470 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 471 return 1; 472 473 iov = kmsg->free_iov; 474 if (!iov) 475 iov = &kmsg->fast_iov; 476 477 /* if all data was transferred, it's basic pointer math */ 478 if (!iov_iter_count(&kmsg->msg.msg_iter)) 479 return iter_iov(&kmsg->msg.msg_iter) - iov; 480 481 /* short transfer, count segments */ 482 nbufs = 0; 483 do { 484 int this_len = min_t(int, iov[nbufs].iov_len, ret); 485 486 nbufs++; 487 ret -= this_len; 488 } while (ret); 489 490 return nbufs; 491 } 492 493 static inline bool io_send_finish(struct io_kiocb *req, int *ret, 494 struct io_async_msghdr *kmsg, 495 unsigned issue_flags) 496 { 497 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 498 bool bundle_finished = *ret <= 0; 499 unsigned int cflags; 500 501 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 502 cflags = io_put_kbuf(req, issue_flags); 503 goto finish; 504 } 505 506 cflags = io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret), issue_flags); 507 508 if (bundle_finished || req->flags & REQ_F_BL_EMPTY) 509 goto finish; 510 511 /* 512 * Fill CQE for this receive and see if we should keep trying to 513 * receive from this socket. 514 */ 515 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 516 io_mshot_prep_retry(req, kmsg); 517 return false; 518 } 519 520 /* Otherwise stop bundle and use the current result. */ 521 finish: 522 io_req_set_res(req, *ret, cflags); 523 *ret = IOU_OK; 524 return true; 525 } 526 527 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 528 { 529 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 530 struct io_async_msghdr *kmsg = req->async_data; 531 struct socket *sock; 532 unsigned flags; 533 int min_ret = 0; 534 int ret; 535 536 sock = sock_from_file(req->file); 537 if (unlikely(!sock)) 538 return -ENOTSOCK; 539 540 if (!(req->flags & REQ_F_POLLED) && 541 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 542 return -EAGAIN; 543 544 flags = sr->msg_flags; 545 if (issue_flags & IO_URING_F_NONBLOCK) 546 flags |= MSG_DONTWAIT; 547 if (flags & MSG_WAITALL) 548 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 549 550 kmsg->msg.msg_control_user = sr->msg_control; 551 552 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 553 554 if (ret < min_ret) { 555 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 556 return -EAGAIN; 557 if (ret > 0 && io_net_retry(sock, flags)) { 558 kmsg->msg.msg_controllen = 0; 559 kmsg->msg.msg_control = NULL; 560 sr->done_io += ret; 561 req->flags |= REQ_F_BL_NO_RECYCLE; 562 return -EAGAIN; 563 } 564 if (ret == -ERESTARTSYS) 565 ret = -EINTR; 566 req_set_fail(req); 567 } 568 io_req_msg_cleanup(req, issue_flags); 569 if (ret >= 0) 570 ret += sr->done_io; 571 else if (sr->done_io) 572 ret = sr->done_io; 573 io_req_set_res(req, ret, 0); 574 return IOU_OK; 575 } 576 577 int io_send(struct io_kiocb *req, unsigned int issue_flags) 578 { 579 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 580 struct io_async_msghdr *kmsg = req->async_data; 581 struct socket *sock; 582 unsigned flags; 583 int min_ret = 0; 584 int ret; 585 586 sock = sock_from_file(req->file); 587 if (unlikely(!sock)) 588 return -ENOTSOCK; 589 590 if (!(req->flags & REQ_F_POLLED) && 591 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 592 return -EAGAIN; 593 594 flags = sr->msg_flags; 595 if (issue_flags & IO_URING_F_NONBLOCK) 596 flags |= MSG_DONTWAIT; 597 598 retry_bundle: 599 if (io_do_buffer_select(req)) { 600 struct buf_sel_arg arg = { 601 .iovs = &kmsg->fast_iov, 602 .max_len = INT_MAX, 603 .nr_iovs = 1, 604 }; 605 606 if (kmsg->free_iov) { 607 arg.nr_iovs = kmsg->free_iov_nr; 608 arg.iovs = kmsg->free_iov; 609 arg.mode = KBUF_MODE_FREE; 610 } 611 612 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 613 arg.nr_iovs = 1; 614 else 615 arg.mode |= KBUF_MODE_EXPAND; 616 617 ret = io_buffers_select(req, &arg, issue_flags); 618 if (unlikely(ret < 0)) 619 return ret; 620 621 sr->len = arg.out_len; 622 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, arg.iovs, ret, 623 arg.out_len); 624 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 625 kmsg->free_iov_nr = ret; 626 kmsg->free_iov = arg.iovs; 627 req->flags |= REQ_F_NEED_CLEANUP; 628 } 629 } 630 631 /* 632 * If MSG_WAITALL is set, or this is a bundle send, then we need 633 * the full amount. If just bundle is set, if we do a short send 634 * then we complete the bundle sequence rather than continue on. 635 */ 636 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 637 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 638 639 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 640 kmsg->msg.msg_flags = flags; 641 ret = sock_sendmsg(sock, &kmsg->msg); 642 if (ret < min_ret) { 643 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 644 return -EAGAIN; 645 646 if (ret > 0 && io_net_retry(sock, flags)) { 647 sr->len -= ret; 648 sr->buf += ret; 649 sr->done_io += ret; 650 req->flags |= REQ_F_BL_NO_RECYCLE; 651 return -EAGAIN; 652 } 653 if (ret == -ERESTARTSYS) 654 ret = -EINTR; 655 req_set_fail(req); 656 } 657 if (ret >= 0) 658 ret += sr->done_io; 659 else if (sr->done_io) 660 ret = sr->done_io; 661 662 if (!io_send_finish(req, &ret, kmsg, issue_flags)) 663 goto retry_bundle; 664 665 io_req_msg_cleanup(req, issue_flags); 666 return ret; 667 } 668 669 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 670 struct io_async_msghdr *iomsg, 671 int namelen, size_t controllen) 672 { 673 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 674 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 675 int hdr; 676 677 if (unlikely(namelen < 0)) 678 return -EOVERFLOW; 679 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 680 namelen, &hdr)) 681 return -EOVERFLOW; 682 if (check_add_overflow(hdr, controllen, &hdr)) 683 return -EOVERFLOW; 684 685 iomsg->namelen = namelen; 686 iomsg->controllen = controllen; 687 return 0; 688 } 689 690 return 0; 691 } 692 693 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 694 struct io_async_msghdr *iomsg) 695 { 696 struct user_msghdr msg; 697 int ret; 698 699 iomsg->msg.msg_name = &iomsg->addr; 700 iomsg->msg.msg_iter.nr_segs = 0; 701 702 #ifdef CONFIG_COMPAT 703 if (unlikely(req->ctx->compat)) { 704 struct compat_msghdr cmsg; 705 706 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); 707 if (unlikely(ret)) 708 return ret; 709 710 ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); 711 if (unlikely(ret)) 712 return ret; 713 714 return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, 715 cmsg.msg_controllen); 716 } 717 #endif 718 719 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); 720 if (unlikely(ret)) 721 return ret; 722 723 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 724 if (unlikely(ret)) 725 return ret; 726 727 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 728 msg.msg_controllen); 729 } 730 731 static int io_recvmsg_prep_setup(struct io_kiocb *req) 732 { 733 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 734 struct io_async_msghdr *kmsg; 735 int ret; 736 737 kmsg = io_msg_alloc_async(req); 738 if (unlikely(!kmsg)) 739 return -ENOMEM; 740 741 if (req->opcode == IORING_OP_RECV) { 742 kmsg->msg.msg_name = NULL; 743 kmsg->msg.msg_namelen = 0; 744 kmsg->msg.msg_control = NULL; 745 kmsg->msg.msg_get_inq = 1; 746 kmsg->msg.msg_controllen = 0; 747 kmsg->msg.msg_iocb = NULL; 748 kmsg->msg.msg_ubuf = NULL; 749 750 if (!io_do_buffer_select(req)) { 751 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 752 &kmsg->msg.msg_iter); 753 if (unlikely(ret)) 754 return ret; 755 } 756 return 0; 757 } 758 759 ret = io_recvmsg_copy_hdr(req, kmsg); 760 if (!ret) 761 req->flags |= REQ_F_NEED_CLEANUP; 762 return ret; 763 } 764 765 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 766 IORING_RECVSEND_BUNDLE) 767 768 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 769 { 770 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 771 772 sr->done_io = 0; 773 774 if (unlikely(sqe->file_index || sqe->addr2)) 775 return -EINVAL; 776 777 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 778 sr->len = READ_ONCE(sqe->len); 779 sr->flags = READ_ONCE(sqe->ioprio); 780 if (sr->flags & ~RECVMSG_FLAGS) 781 return -EINVAL; 782 sr->msg_flags = READ_ONCE(sqe->msg_flags); 783 if (sr->msg_flags & MSG_DONTWAIT) 784 req->flags |= REQ_F_NOWAIT; 785 if (sr->msg_flags & MSG_ERRQUEUE) 786 req->flags |= REQ_F_CLEAR_POLLIN; 787 if (req->flags & REQ_F_BUFFER_SELECT) { 788 /* 789 * Store the buffer group for this multishot receive separately, 790 * as if we end up doing an io-wq based issue that selects a 791 * buffer, it has to be committed immediately and that will 792 * clear ->buf_list. This means we lose the link to the buffer 793 * list, and the eventual buffer put on completion then cannot 794 * restore it. 795 */ 796 sr->buf_group = req->buf_index; 797 req->buf_list = NULL; 798 } 799 if (sr->flags & IORING_RECV_MULTISHOT) { 800 if (!(req->flags & REQ_F_BUFFER_SELECT)) 801 return -EINVAL; 802 if (sr->msg_flags & MSG_WAITALL) 803 return -EINVAL; 804 if (req->opcode == IORING_OP_RECV && sr->len) 805 return -EINVAL; 806 req->flags |= REQ_F_APOLL_MULTISHOT; 807 } 808 if (sr->flags & IORING_RECVSEND_BUNDLE) { 809 if (req->opcode == IORING_OP_RECVMSG) 810 return -EINVAL; 811 } 812 813 #ifdef CONFIG_COMPAT 814 if (req->ctx->compat) 815 sr->msg_flags |= MSG_CMSG_COMPAT; 816 #endif 817 sr->nr_multishot_loops = 0; 818 return io_recvmsg_prep_setup(req); 819 } 820 821 /* 822 * Finishes io_recv and io_recvmsg. 823 * 824 * Returns true if it is actually finished, or false if it should run 825 * again (for multishot). 826 */ 827 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 828 struct io_async_msghdr *kmsg, 829 bool mshot_finished, unsigned issue_flags) 830 { 831 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 832 unsigned int cflags = 0; 833 834 if (kmsg->msg.msg_inq > 0) 835 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 836 837 if (sr->flags & IORING_RECVSEND_BUNDLE) { 838 cflags |= io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret), 839 issue_flags); 840 /* bundle with no more immediate buffers, we're done */ 841 if (req->flags & REQ_F_BL_EMPTY) 842 goto finish; 843 } else { 844 cflags |= io_put_kbuf(req, issue_flags); 845 } 846 847 /* 848 * Fill CQE for this receive and see if we should keep trying to 849 * receive from this socket. 850 */ 851 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 852 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 853 int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; 854 855 io_mshot_prep_retry(req, kmsg); 856 /* Known not-empty or unknown state, retry */ 857 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 858 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) 859 return false; 860 /* mshot retries exceeded, force a requeue */ 861 sr->nr_multishot_loops = 0; 862 mshot_retry_ret = IOU_REQUEUE; 863 } 864 if (issue_flags & IO_URING_F_MULTISHOT) 865 *ret = mshot_retry_ret; 866 else 867 *ret = -EAGAIN; 868 return true; 869 } 870 871 /* Finish the request / stop multishot. */ 872 finish: 873 io_req_set_res(req, *ret, cflags); 874 875 if (issue_flags & IO_URING_F_MULTISHOT) 876 *ret = IOU_STOP_MULTISHOT; 877 else 878 *ret = IOU_OK; 879 io_req_msg_cleanup(req, issue_flags); 880 return true; 881 } 882 883 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 884 struct io_sr_msg *sr, void __user **buf, 885 size_t *len) 886 { 887 unsigned long ubuf = (unsigned long) *buf; 888 unsigned long hdr; 889 890 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 891 kmsg->controllen; 892 if (*len < hdr) 893 return -EFAULT; 894 895 if (kmsg->controllen) { 896 unsigned long control = ubuf + hdr - kmsg->controllen; 897 898 kmsg->msg.msg_control_user = (void __user *) control; 899 kmsg->msg.msg_controllen = kmsg->controllen; 900 } 901 902 sr->buf = *buf; /* stash for later copy */ 903 *buf = (void __user *) (ubuf + hdr); 904 kmsg->payloadlen = *len = *len - hdr; 905 return 0; 906 } 907 908 struct io_recvmsg_multishot_hdr { 909 struct io_uring_recvmsg_out msg; 910 struct sockaddr_storage addr; 911 }; 912 913 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 914 struct io_async_msghdr *kmsg, 915 unsigned int flags, bool *finished) 916 { 917 int err; 918 int copy_len; 919 struct io_recvmsg_multishot_hdr hdr; 920 921 if (kmsg->namelen) 922 kmsg->msg.msg_name = &hdr.addr; 923 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 924 kmsg->msg.msg_namelen = 0; 925 926 if (sock->file->f_flags & O_NONBLOCK) 927 flags |= MSG_DONTWAIT; 928 929 err = sock_recvmsg(sock, &kmsg->msg, flags); 930 *finished = err <= 0; 931 if (err < 0) 932 return err; 933 934 hdr.msg = (struct io_uring_recvmsg_out) { 935 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 936 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 937 }; 938 939 hdr.msg.payloadlen = err; 940 if (err > kmsg->payloadlen) 941 err = kmsg->payloadlen; 942 943 copy_len = sizeof(struct io_uring_recvmsg_out); 944 if (kmsg->msg.msg_namelen > kmsg->namelen) 945 copy_len += kmsg->namelen; 946 else 947 copy_len += kmsg->msg.msg_namelen; 948 949 /* 950 * "fromlen shall refer to the value before truncation.." 951 * 1003.1g 952 */ 953 hdr.msg.namelen = kmsg->msg.msg_namelen; 954 955 /* ensure that there is no gap between hdr and sockaddr_storage */ 956 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 957 sizeof(struct io_uring_recvmsg_out)); 958 if (copy_to_user(io->buf, &hdr, copy_len)) { 959 *finished = true; 960 return -EFAULT; 961 } 962 963 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 964 kmsg->controllen + err; 965 } 966 967 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 968 { 969 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 970 struct io_async_msghdr *kmsg = req->async_data; 971 struct socket *sock; 972 unsigned flags; 973 int ret, min_ret = 0; 974 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 975 bool mshot_finished = true; 976 977 sock = sock_from_file(req->file); 978 if (unlikely(!sock)) 979 return -ENOTSOCK; 980 981 if (!(req->flags & REQ_F_POLLED) && 982 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 983 return -EAGAIN; 984 985 flags = sr->msg_flags; 986 if (force_nonblock) 987 flags |= MSG_DONTWAIT; 988 989 retry_multishot: 990 if (io_do_buffer_select(req)) { 991 void __user *buf; 992 size_t len = sr->len; 993 994 buf = io_buffer_select(req, &len, issue_flags); 995 if (!buf) 996 return -ENOBUFS; 997 998 if (req->flags & REQ_F_APOLL_MULTISHOT) { 999 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 1000 if (ret) { 1001 io_kbuf_recycle(req, issue_flags); 1002 return ret; 1003 } 1004 } 1005 1006 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 1007 } 1008 1009 kmsg->msg.msg_get_inq = 1; 1010 kmsg->msg.msg_inq = -1; 1011 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1012 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1013 &mshot_finished); 1014 } else { 1015 /* disable partial retry for recvmsg with cmsg attached */ 1016 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1017 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1018 1019 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1020 kmsg->uaddr, flags); 1021 } 1022 1023 if (ret < min_ret) { 1024 if (ret == -EAGAIN && force_nonblock) { 1025 if (issue_flags & IO_URING_F_MULTISHOT) { 1026 io_kbuf_recycle(req, issue_flags); 1027 return IOU_ISSUE_SKIP_COMPLETE; 1028 } 1029 return -EAGAIN; 1030 } 1031 if (ret > 0 && io_net_retry(sock, flags)) { 1032 sr->done_io += ret; 1033 req->flags |= REQ_F_BL_NO_RECYCLE; 1034 return -EAGAIN; 1035 } 1036 if (ret == -ERESTARTSYS) 1037 ret = -EINTR; 1038 req_set_fail(req); 1039 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1040 req_set_fail(req); 1041 } 1042 1043 if (ret > 0) 1044 ret += sr->done_io; 1045 else if (sr->done_io) 1046 ret = sr->done_io; 1047 else 1048 io_kbuf_recycle(req, issue_flags); 1049 1050 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1051 goto retry_multishot; 1052 1053 return ret; 1054 } 1055 1056 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1057 size_t *len, unsigned int issue_flags) 1058 { 1059 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1060 int ret; 1061 1062 /* 1063 * If the ring isn't locked, then don't use the peek interface 1064 * to grab multiple buffers as we will lock/unlock between 1065 * this selection and posting the buffers. 1066 */ 1067 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1068 sr->flags & IORING_RECVSEND_BUNDLE) { 1069 struct buf_sel_arg arg = { 1070 .iovs = &kmsg->fast_iov, 1071 .nr_iovs = 1, 1072 .mode = KBUF_MODE_EXPAND, 1073 }; 1074 1075 if (kmsg->free_iov) { 1076 arg.nr_iovs = kmsg->free_iov_nr; 1077 arg.iovs = kmsg->free_iov; 1078 arg.mode |= KBUF_MODE_FREE; 1079 } 1080 1081 if (kmsg->msg.msg_inq > 0) 1082 arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq); 1083 1084 ret = io_buffers_peek(req, &arg); 1085 if (unlikely(ret < 0)) 1086 return ret; 1087 1088 /* special case 1 vec, can be a fast path */ 1089 if (ret == 1) { 1090 sr->buf = arg.iovs[0].iov_base; 1091 sr->len = arg.iovs[0].iov_len; 1092 goto map_ubuf; 1093 } 1094 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1095 arg.out_len); 1096 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 1097 kmsg->free_iov_nr = ret; 1098 kmsg->free_iov = arg.iovs; 1099 req->flags |= REQ_F_NEED_CLEANUP; 1100 } 1101 } else { 1102 void __user *buf; 1103 1104 *len = sr->len; 1105 buf = io_buffer_select(req, len, issue_flags); 1106 if (!buf) 1107 return -ENOBUFS; 1108 sr->buf = buf; 1109 sr->len = *len; 1110 map_ubuf: 1111 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1112 &kmsg->msg.msg_iter); 1113 if (unlikely(ret)) 1114 return ret; 1115 } 1116 1117 return 0; 1118 } 1119 1120 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1121 { 1122 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1123 struct io_async_msghdr *kmsg = req->async_data; 1124 struct socket *sock; 1125 unsigned flags; 1126 int ret, min_ret = 0; 1127 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1128 size_t len = sr->len; 1129 1130 if (!(req->flags & REQ_F_POLLED) && 1131 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1132 return -EAGAIN; 1133 1134 sock = sock_from_file(req->file); 1135 if (unlikely(!sock)) 1136 return -ENOTSOCK; 1137 1138 flags = sr->msg_flags; 1139 if (force_nonblock) 1140 flags |= MSG_DONTWAIT; 1141 1142 retry_multishot: 1143 if (io_do_buffer_select(req)) { 1144 ret = io_recv_buf_select(req, kmsg, &len, issue_flags); 1145 if (unlikely(ret)) { 1146 kmsg->msg.msg_inq = -1; 1147 goto out_free; 1148 } 1149 sr->buf = NULL; 1150 } 1151 1152 kmsg->msg.msg_flags = 0; 1153 kmsg->msg.msg_inq = -1; 1154 1155 if (flags & MSG_WAITALL) 1156 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1157 1158 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1159 if (ret < min_ret) { 1160 if (ret == -EAGAIN && force_nonblock) { 1161 if (issue_flags & IO_URING_F_MULTISHOT) { 1162 io_kbuf_recycle(req, issue_flags); 1163 return IOU_ISSUE_SKIP_COMPLETE; 1164 } 1165 1166 return -EAGAIN; 1167 } 1168 if (ret > 0 && io_net_retry(sock, flags)) { 1169 sr->len -= ret; 1170 sr->buf += ret; 1171 sr->done_io += ret; 1172 req->flags |= REQ_F_BL_NO_RECYCLE; 1173 return -EAGAIN; 1174 } 1175 if (ret == -ERESTARTSYS) 1176 ret = -EINTR; 1177 req_set_fail(req); 1178 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1179 out_free: 1180 req_set_fail(req); 1181 } 1182 1183 if (ret > 0) 1184 ret += sr->done_io; 1185 else if (sr->done_io) 1186 ret = sr->done_io; 1187 else 1188 io_kbuf_recycle(req, issue_flags); 1189 1190 if (!io_recv_finish(req, &ret, kmsg, ret <= 0, issue_flags)) 1191 goto retry_multishot; 1192 1193 return ret; 1194 } 1195 1196 void io_send_zc_cleanup(struct io_kiocb *req) 1197 { 1198 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1199 struct io_async_msghdr *io = req->async_data; 1200 1201 if (req_has_async_data(req)) 1202 io_netmsg_iovec_free(io); 1203 if (zc->notif) { 1204 io_notif_flush(zc->notif); 1205 zc->notif = NULL; 1206 } 1207 } 1208 1209 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1210 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 1211 1212 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1213 { 1214 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1215 struct io_ring_ctx *ctx = req->ctx; 1216 struct io_kiocb *notif; 1217 1218 zc->done_io = 0; 1219 req->flags |= REQ_F_POLL_NO_LAZY; 1220 1221 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1222 return -EINVAL; 1223 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1224 if (req->flags & REQ_F_CQE_SKIP) 1225 return -EINVAL; 1226 1227 notif = zc->notif = io_alloc_notif(ctx); 1228 if (!notif) 1229 return -ENOMEM; 1230 notif->cqe.user_data = req->cqe.user_data; 1231 notif->cqe.res = 0; 1232 notif->cqe.flags = IORING_CQE_F_NOTIF; 1233 req->flags |= REQ_F_NEED_CLEANUP; 1234 1235 zc->flags = READ_ONCE(sqe->ioprio); 1236 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1237 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1238 return -EINVAL; 1239 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1240 struct io_notif_data *nd = io_notif_to_data(notif); 1241 1242 nd->zc_report = true; 1243 nd->zc_used = false; 1244 nd->zc_copied = false; 1245 } 1246 } 1247 1248 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1249 unsigned idx = READ_ONCE(sqe->buf_index); 1250 1251 if (unlikely(idx >= ctx->nr_user_bufs)) 1252 return -EFAULT; 1253 idx = array_index_nospec(idx, ctx->nr_user_bufs); 1254 req->imu = READ_ONCE(ctx->user_bufs[idx]); 1255 io_req_set_rsrc_node(notif, ctx, 0); 1256 } 1257 1258 if (req->opcode == IORING_OP_SEND_ZC) { 1259 if (READ_ONCE(sqe->__pad3[0])) 1260 return -EINVAL; 1261 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1262 zc->addr_len = READ_ONCE(sqe->addr_len); 1263 } else { 1264 if (unlikely(sqe->addr2 || sqe->file_index)) 1265 return -EINVAL; 1266 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 1267 return -EINVAL; 1268 } 1269 1270 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1271 zc->len = READ_ONCE(sqe->len); 1272 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1273 if (zc->msg_flags & MSG_DONTWAIT) 1274 req->flags |= REQ_F_NOWAIT; 1275 1276 #ifdef CONFIG_COMPAT 1277 if (req->ctx->compat) 1278 zc->msg_flags |= MSG_CMSG_COMPAT; 1279 #endif 1280 return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG_ZC); 1281 } 1282 1283 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1284 struct iov_iter *from, size_t length) 1285 { 1286 skb_zcopy_downgrade_managed(skb); 1287 return zerocopy_fill_skb_from_iter(skb, from, length); 1288 } 1289 1290 static int io_sg_from_iter(struct sk_buff *skb, 1291 struct iov_iter *from, size_t length) 1292 { 1293 struct skb_shared_info *shinfo = skb_shinfo(skb); 1294 int frag = shinfo->nr_frags; 1295 int ret = 0; 1296 struct bvec_iter bi; 1297 ssize_t copied = 0; 1298 unsigned long truesize = 0; 1299 1300 if (!frag) 1301 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1302 else if (unlikely(!skb_zcopy_managed(skb))) 1303 return zerocopy_fill_skb_from_iter(skb, from, length); 1304 1305 bi.bi_size = min(from->count, length); 1306 bi.bi_bvec_done = from->iov_offset; 1307 bi.bi_idx = 0; 1308 1309 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1310 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1311 1312 copied += v.bv_len; 1313 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1314 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1315 v.bv_offset, v.bv_len); 1316 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1317 } 1318 if (bi.bi_size) 1319 ret = -EMSGSIZE; 1320 1321 shinfo->nr_frags = frag; 1322 from->bvec += bi.bi_idx; 1323 from->nr_segs -= bi.bi_idx; 1324 from->count -= copied; 1325 from->iov_offset = bi.bi_bvec_done; 1326 1327 skb->data_len += copied; 1328 skb->len += copied; 1329 skb->truesize += truesize; 1330 return ret; 1331 } 1332 1333 static int io_send_zc_import(struct io_kiocb *req, struct io_async_msghdr *kmsg) 1334 { 1335 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1336 int ret; 1337 1338 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 1339 ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter, req->imu, 1340 (u64)(uintptr_t)sr->buf, sr->len); 1341 if (unlikely(ret)) 1342 return ret; 1343 kmsg->msg.sg_from_iter = io_sg_from_iter; 1344 } else { 1345 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 1346 if (unlikely(ret)) 1347 return ret; 1348 ret = io_notif_account_mem(sr->notif, sr->len); 1349 if (unlikely(ret)) 1350 return ret; 1351 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1352 } 1353 1354 return ret; 1355 } 1356 1357 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1358 { 1359 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1360 struct io_async_msghdr *kmsg = req->async_data; 1361 struct socket *sock; 1362 unsigned msg_flags; 1363 int ret, min_ret = 0; 1364 1365 sock = sock_from_file(req->file); 1366 if (unlikely(!sock)) 1367 return -ENOTSOCK; 1368 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1369 return -EOPNOTSUPP; 1370 1371 if (!(req->flags & REQ_F_POLLED) && 1372 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1373 return -EAGAIN; 1374 1375 if (!zc->done_io) { 1376 ret = io_send_zc_import(req, kmsg); 1377 if (unlikely(ret)) 1378 return ret; 1379 } 1380 1381 msg_flags = zc->msg_flags; 1382 if (issue_flags & IO_URING_F_NONBLOCK) 1383 msg_flags |= MSG_DONTWAIT; 1384 if (msg_flags & MSG_WAITALL) 1385 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1386 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1387 1388 kmsg->msg.msg_flags = msg_flags; 1389 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1390 ret = sock_sendmsg(sock, &kmsg->msg); 1391 1392 if (unlikely(ret < min_ret)) { 1393 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1394 return -EAGAIN; 1395 1396 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { 1397 zc->len -= ret; 1398 zc->buf += ret; 1399 zc->done_io += ret; 1400 req->flags |= REQ_F_BL_NO_RECYCLE; 1401 return -EAGAIN; 1402 } 1403 if (ret == -ERESTARTSYS) 1404 ret = -EINTR; 1405 req_set_fail(req); 1406 } 1407 1408 if (ret >= 0) 1409 ret += zc->done_io; 1410 else if (zc->done_io) 1411 ret = zc->done_io; 1412 1413 /* 1414 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1415 * flushing notif to io_send_zc_cleanup() 1416 */ 1417 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1418 io_notif_flush(zc->notif); 1419 io_req_msg_cleanup(req, 0); 1420 } 1421 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1422 return IOU_OK; 1423 } 1424 1425 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1426 { 1427 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1428 struct io_async_msghdr *kmsg = req->async_data; 1429 struct socket *sock; 1430 unsigned flags; 1431 int ret, min_ret = 0; 1432 1433 sock = sock_from_file(req->file); 1434 if (unlikely(!sock)) 1435 return -ENOTSOCK; 1436 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1437 return -EOPNOTSUPP; 1438 1439 if (!(req->flags & REQ_F_POLLED) && 1440 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1441 return -EAGAIN; 1442 1443 flags = sr->msg_flags; 1444 if (issue_flags & IO_URING_F_NONBLOCK) 1445 flags |= MSG_DONTWAIT; 1446 if (flags & MSG_WAITALL) 1447 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1448 1449 kmsg->msg.msg_control_user = sr->msg_control; 1450 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1451 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1452 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1453 1454 if (unlikely(ret < min_ret)) { 1455 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1456 return -EAGAIN; 1457 1458 if (ret > 0 && io_net_retry(sock, flags)) { 1459 sr->done_io += ret; 1460 req->flags |= REQ_F_BL_NO_RECYCLE; 1461 return -EAGAIN; 1462 } 1463 if (ret == -ERESTARTSYS) 1464 ret = -EINTR; 1465 req_set_fail(req); 1466 } 1467 1468 if (ret >= 0) 1469 ret += sr->done_io; 1470 else if (sr->done_io) 1471 ret = sr->done_io; 1472 1473 /* 1474 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1475 * flushing notif to io_send_zc_cleanup() 1476 */ 1477 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1478 io_notif_flush(sr->notif); 1479 io_req_msg_cleanup(req, 0); 1480 } 1481 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1482 return IOU_OK; 1483 } 1484 1485 void io_sendrecv_fail(struct io_kiocb *req) 1486 { 1487 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1488 1489 if (sr->done_io) 1490 req->cqe.res = sr->done_io; 1491 1492 if ((req->flags & REQ_F_NEED_CLEANUP) && 1493 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1494 req->cqe.flags |= IORING_CQE_F_MORE; 1495 } 1496 1497 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1498 IORING_ACCEPT_POLL_FIRST) 1499 1500 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1501 { 1502 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1503 1504 if (sqe->len || sqe->buf_index) 1505 return -EINVAL; 1506 1507 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1508 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1509 accept->flags = READ_ONCE(sqe->accept_flags); 1510 accept->nofile = rlimit(RLIMIT_NOFILE); 1511 accept->iou_flags = READ_ONCE(sqe->ioprio); 1512 if (accept->iou_flags & ~ACCEPT_FLAGS) 1513 return -EINVAL; 1514 1515 accept->file_slot = READ_ONCE(sqe->file_index); 1516 if (accept->file_slot) { 1517 if (accept->flags & SOCK_CLOEXEC) 1518 return -EINVAL; 1519 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1520 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1521 return -EINVAL; 1522 } 1523 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1524 return -EINVAL; 1525 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1526 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1527 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1528 req->flags |= REQ_F_APOLL_MULTISHOT; 1529 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1530 req->flags |= REQ_F_NOWAIT; 1531 return 0; 1532 } 1533 1534 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1535 { 1536 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1537 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1538 bool fixed = !!accept->file_slot; 1539 struct proto_accept_arg arg = { 1540 .flags = force_nonblock ? O_NONBLOCK : 0, 1541 }; 1542 struct file *file; 1543 unsigned cflags; 1544 int ret, fd; 1545 1546 if (!(req->flags & REQ_F_POLLED) && 1547 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1548 return -EAGAIN; 1549 1550 retry: 1551 if (!fixed) { 1552 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1553 if (unlikely(fd < 0)) 1554 return fd; 1555 } 1556 arg.err = 0; 1557 arg.is_empty = -1; 1558 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1559 accept->flags); 1560 if (IS_ERR(file)) { 1561 if (!fixed) 1562 put_unused_fd(fd); 1563 ret = PTR_ERR(file); 1564 if (ret == -EAGAIN && force_nonblock && 1565 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) { 1566 /* 1567 * if it's multishot and polled, we don't need to 1568 * return EAGAIN to arm the poll infra since it 1569 * has already been done 1570 */ 1571 if (issue_flags & IO_URING_F_MULTISHOT) 1572 return IOU_ISSUE_SKIP_COMPLETE; 1573 return ret; 1574 } 1575 if (ret == -ERESTARTSYS) 1576 ret = -EINTR; 1577 req_set_fail(req); 1578 } else if (!fixed) { 1579 fd_install(fd, file); 1580 ret = fd; 1581 } else { 1582 ret = io_fixed_fd_install(req, issue_flags, file, 1583 accept->file_slot); 1584 } 1585 1586 cflags = 0; 1587 if (!arg.is_empty) 1588 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1589 1590 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1591 io_req_set_res(req, ret, cflags); 1592 return IOU_OK; 1593 } 1594 1595 if (ret < 0) 1596 return ret; 1597 if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1598 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1599 goto retry; 1600 if (issue_flags & IO_URING_F_MULTISHOT) 1601 return IOU_ISSUE_SKIP_COMPLETE; 1602 return -EAGAIN; 1603 } 1604 1605 io_req_set_res(req, ret, cflags); 1606 return IOU_STOP_MULTISHOT; 1607 } 1608 1609 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1610 { 1611 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1612 1613 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1614 return -EINVAL; 1615 1616 sock->domain = READ_ONCE(sqe->fd); 1617 sock->type = READ_ONCE(sqe->off); 1618 sock->protocol = READ_ONCE(sqe->len); 1619 sock->file_slot = READ_ONCE(sqe->file_index); 1620 sock->nofile = rlimit(RLIMIT_NOFILE); 1621 1622 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1623 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1624 return -EINVAL; 1625 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1626 return -EINVAL; 1627 return 0; 1628 } 1629 1630 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1631 { 1632 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1633 bool fixed = !!sock->file_slot; 1634 struct file *file; 1635 int ret, fd; 1636 1637 if (!fixed) { 1638 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1639 if (unlikely(fd < 0)) 1640 return fd; 1641 } 1642 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1643 if (IS_ERR(file)) { 1644 if (!fixed) 1645 put_unused_fd(fd); 1646 ret = PTR_ERR(file); 1647 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1648 return -EAGAIN; 1649 if (ret == -ERESTARTSYS) 1650 ret = -EINTR; 1651 req_set_fail(req); 1652 } else if (!fixed) { 1653 fd_install(fd, file); 1654 ret = fd; 1655 } else { 1656 ret = io_fixed_fd_install(req, issue_flags, file, 1657 sock->file_slot); 1658 } 1659 io_req_set_res(req, ret, 0); 1660 return IOU_OK; 1661 } 1662 1663 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1664 { 1665 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1666 struct io_async_msghdr *io; 1667 1668 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1669 return -EINVAL; 1670 1671 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1672 conn->addr_len = READ_ONCE(sqe->addr2); 1673 conn->in_progress = conn->seen_econnaborted = false; 1674 1675 io = io_msg_alloc_async(req); 1676 if (unlikely(!io)) 1677 return -ENOMEM; 1678 1679 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1680 } 1681 1682 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1683 { 1684 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1685 struct io_async_msghdr *io = req->async_data; 1686 unsigned file_flags; 1687 int ret; 1688 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1689 1690 file_flags = force_nonblock ? O_NONBLOCK : 0; 1691 1692 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1693 file_flags); 1694 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1695 && force_nonblock) { 1696 if (ret == -EINPROGRESS) { 1697 connect->in_progress = true; 1698 } else if (ret == -ECONNABORTED) { 1699 if (connect->seen_econnaborted) 1700 goto out; 1701 connect->seen_econnaborted = true; 1702 } 1703 return -EAGAIN; 1704 } 1705 if (connect->in_progress) { 1706 /* 1707 * At least bluetooth will return -EBADFD on a re-connect 1708 * attempt, and it's (supposedly) also valid to get -EISCONN 1709 * which means the previous result is good. For both of these, 1710 * grab the sock_error() and use that for the completion. 1711 */ 1712 if (ret == -EBADFD || ret == -EISCONN) 1713 ret = sock_error(sock_from_file(req->file)->sk); 1714 } 1715 if (ret == -ERESTARTSYS) 1716 ret = -EINTR; 1717 out: 1718 if (ret < 0) 1719 req_set_fail(req); 1720 io_req_msg_cleanup(req, issue_flags); 1721 io_req_set_res(req, ret, 0); 1722 return IOU_OK; 1723 } 1724 1725 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1726 { 1727 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1728 struct sockaddr __user *uaddr; 1729 struct io_async_msghdr *io; 1730 1731 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1732 return -EINVAL; 1733 1734 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1735 bind->addr_len = READ_ONCE(sqe->addr2); 1736 1737 io = io_msg_alloc_async(req); 1738 if (unlikely(!io)) 1739 return -ENOMEM; 1740 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1741 } 1742 1743 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1744 { 1745 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1746 struct io_async_msghdr *io = req->async_data; 1747 struct socket *sock; 1748 int ret; 1749 1750 sock = sock_from_file(req->file); 1751 if (unlikely(!sock)) 1752 return -ENOTSOCK; 1753 1754 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1755 if (ret < 0) 1756 req_set_fail(req); 1757 io_req_set_res(req, ret, 0); 1758 return 0; 1759 } 1760 1761 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1762 { 1763 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1764 1765 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1766 return -EINVAL; 1767 1768 listen->backlog = READ_ONCE(sqe->len); 1769 return 0; 1770 } 1771 1772 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1773 { 1774 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1775 struct socket *sock; 1776 int ret; 1777 1778 sock = sock_from_file(req->file); 1779 if (unlikely(!sock)) 1780 return -ENOTSOCK; 1781 1782 ret = __sys_listen_socket(sock, listen->backlog); 1783 if (ret < 0) 1784 req_set_fail(req); 1785 io_req_set_res(req, ret, 0); 1786 return 0; 1787 } 1788 1789 void io_netmsg_cache_free(const void *entry) 1790 { 1791 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1792 1793 if (kmsg->free_iov) { 1794 kasan_mempool_unpoison_object(kmsg->free_iov, 1795 kmsg->free_iov_nr * sizeof(struct iovec)); 1796 io_netmsg_iovec_free(kmsg); 1797 } 1798 kfree(kmsg); 1799 } 1800 #endif 1801