1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 int iou_flags; 32 u32 file_slot; 33 unsigned long nofile; 34 }; 35 36 struct io_socket { 37 struct file *file; 38 int domain; 39 int type; 40 int protocol; 41 int flags; 42 u32 file_slot; 43 unsigned long nofile; 44 }; 45 46 struct io_connect { 47 struct file *file; 48 struct sockaddr __user *addr; 49 int addr_len; 50 bool in_progress; 51 bool seen_econnaborted; 52 }; 53 54 struct io_bind { 55 struct file *file; 56 int addr_len; 57 }; 58 59 struct io_listen { 60 struct file *file; 61 int backlog; 62 }; 63 64 struct io_sr_msg { 65 struct file *file; 66 union { 67 struct compat_msghdr __user *umsg_compat; 68 struct user_msghdr __user *umsg; 69 void __user *buf; 70 }; 71 int len; 72 unsigned done_io; 73 unsigned msg_flags; 74 unsigned nr_multishot_loops; 75 u16 flags; 76 /* initialised and used only by !msg send variants */ 77 u16 buf_group; 78 u16 buf_index; 79 void __user *msg_control; 80 /* used only for send zerocopy */ 81 struct io_kiocb *notif; 82 }; 83 84 /* 85 * Number of times we'll try and do receives if there's more data. If we 86 * exceed this limit, then add us to the back of the queue and retry from 87 * there. This helps fairness between flooding clients. 88 */ 89 #define MULTISHOT_MAX_RETRY 32 90 91 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 92 { 93 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 94 95 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 96 sqe->buf_index || sqe->splice_fd_in)) 97 return -EINVAL; 98 99 shutdown->how = READ_ONCE(sqe->len); 100 req->flags |= REQ_F_FORCE_ASYNC; 101 return 0; 102 } 103 104 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 105 { 106 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 107 struct socket *sock; 108 int ret; 109 110 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 111 112 sock = sock_from_file(req->file); 113 if (unlikely(!sock)) 114 return -ENOTSOCK; 115 116 ret = __sys_shutdown_sock(sock, shutdown->how); 117 io_req_set_res(req, ret, 0); 118 return IOU_OK; 119 } 120 121 static bool io_net_retry(struct socket *sock, int flags) 122 { 123 if (!(flags & MSG_WAITALL)) 124 return false; 125 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 126 } 127 128 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 129 { 130 if (kmsg->free_iov) { 131 kfree(kmsg->free_iov); 132 kmsg->free_iov_nr = 0; 133 kmsg->free_iov = NULL; 134 } 135 } 136 137 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 138 { 139 struct io_async_msghdr *hdr = req->async_data; 140 struct iovec *iov; 141 142 /* can't recycle, ensure we free the iovec if we have one */ 143 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 144 io_netmsg_iovec_free(hdr); 145 return; 146 } 147 148 /* Let normal cleanup path reap it if we fail adding to the cache */ 149 iov = hdr->free_iov; 150 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { 151 if (iov) 152 kasan_mempool_poison_object(iov); 153 req->async_data = NULL; 154 req->flags &= ~REQ_F_ASYNC_DATA; 155 } 156 } 157 158 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 159 { 160 struct io_ring_ctx *ctx = req->ctx; 161 struct io_async_msghdr *hdr; 162 163 hdr = io_alloc_cache_get(&ctx->netmsg_cache); 164 if (hdr) { 165 if (hdr->free_iov) { 166 kasan_mempool_unpoison_object(hdr->free_iov, 167 hdr->free_iov_nr * sizeof(struct iovec)); 168 req->flags |= REQ_F_NEED_CLEANUP; 169 } 170 req->flags |= REQ_F_ASYNC_DATA; 171 req->async_data = hdr; 172 return hdr; 173 } 174 175 if (!io_alloc_async_data(req)) { 176 hdr = req->async_data; 177 hdr->free_iov_nr = 0; 178 hdr->free_iov = NULL; 179 return hdr; 180 } 181 return NULL; 182 } 183 184 /* assign new iovec to kmsg, if we need to */ 185 static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, 186 struct iovec *iov) 187 { 188 if (iov) { 189 req->flags |= REQ_F_NEED_CLEANUP; 190 kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs; 191 if (kmsg->free_iov) 192 kfree(kmsg->free_iov); 193 kmsg->free_iov = iov; 194 } 195 return 0; 196 } 197 198 static inline void io_mshot_prep_retry(struct io_kiocb *req, 199 struct io_async_msghdr *kmsg) 200 { 201 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 202 203 req->flags &= ~REQ_F_BL_EMPTY; 204 sr->done_io = 0; 205 sr->len = 0; /* get from the provided buffer */ 206 req->buf_index = sr->buf_group; 207 } 208 209 #ifdef CONFIG_COMPAT 210 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 211 struct io_async_msghdr *iomsg, 212 struct compat_msghdr *msg, int ddir) 213 { 214 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 215 struct compat_iovec __user *uiov; 216 struct iovec *iov; 217 int ret, nr_segs; 218 219 if (iomsg->free_iov) { 220 nr_segs = iomsg->free_iov_nr; 221 iov = iomsg->free_iov; 222 } else { 223 iov = &iomsg->fast_iov; 224 nr_segs = 1; 225 } 226 227 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 228 return -EFAULT; 229 230 uiov = compat_ptr(msg->msg_iov); 231 if (req->flags & REQ_F_BUFFER_SELECT) { 232 compat_ssize_t clen; 233 234 if (msg->msg_iovlen == 0) { 235 sr->len = iov->iov_len = 0; 236 iov->iov_base = NULL; 237 } else if (msg->msg_iovlen > 1) { 238 return -EINVAL; 239 } else { 240 if (!access_ok(uiov, sizeof(*uiov))) 241 return -EFAULT; 242 if (__get_user(clen, &uiov->iov_len)) 243 return -EFAULT; 244 if (clen < 0) 245 return -EINVAL; 246 sr->len = clen; 247 } 248 249 return 0; 250 } 251 252 ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, 253 nr_segs, &iov, &iomsg->msg.msg_iter, true); 254 if (unlikely(ret < 0)) 255 return ret; 256 257 return io_net_vec_assign(req, iomsg, iov); 258 } 259 #endif 260 261 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 262 struct user_msghdr *msg, int ddir) 263 { 264 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 265 struct user_msghdr __user *umsg = sr->umsg; 266 struct iovec *iov; 267 int ret, nr_segs; 268 269 if (iomsg->free_iov) { 270 nr_segs = iomsg->free_iov_nr; 271 iov = iomsg->free_iov; 272 } else { 273 iov = &iomsg->fast_iov; 274 nr_segs = 1; 275 } 276 277 if (!user_access_begin(umsg, sizeof(*umsg))) 278 return -EFAULT; 279 280 ret = -EFAULT; 281 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end); 282 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end); 283 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end); 284 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end); 285 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end); 286 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end); 287 msg->msg_flags = 0; 288 289 if (req->flags & REQ_F_BUFFER_SELECT) { 290 if (msg->msg_iovlen == 0) { 291 sr->len = iov->iov_len = 0; 292 iov->iov_base = NULL; 293 } else if (msg->msg_iovlen > 1) { 294 ret = -EINVAL; 295 goto ua_end; 296 } else { 297 /* we only need the length for provided buffers */ 298 if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) 299 goto ua_end; 300 unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, 301 ua_end); 302 sr->len = iov->iov_len; 303 } 304 ret = 0; 305 ua_end: 306 user_access_end(); 307 return ret; 308 } 309 310 user_access_end(); 311 ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs, 312 &iov, &iomsg->msg.msg_iter, false); 313 if (unlikely(ret < 0)) 314 return ret; 315 316 return io_net_vec_assign(req, iomsg, iov); 317 } 318 319 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 320 struct io_async_msghdr *iomsg) 321 { 322 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 323 struct user_msghdr msg; 324 int ret; 325 326 iomsg->msg.msg_name = &iomsg->addr; 327 iomsg->msg.msg_iter.nr_segs = 0; 328 329 #ifdef CONFIG_COMPAT 330 if (unlikely(req->ctx->compat)) { 331 struct compat_msghdr cmsg; 332 333 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); 334 if (unlikely(ret)) 335 return ret; 336 337 return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); 338 } 339 #endif 340 341 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); 342 if (unlikely(ret)) 343 return ret; 344 345 ret = __copy_msghdr(&iomsg->msg, &msg, NULL); 346 347 /* save msg_control as sys_sendmsg() overwrites it */ 348 sr->msg_control = iomsg->msg.msg_control_user; 349 return ret; 350 } 351 352 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 353 { 354 struct io_async_msghdr *io = req->async_data; 355 356 io_netmsg_iovec_free(io); 357 } 358 359 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 360 { 361 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 362 struct io_async_msghdr *kmsg = req->async_data; 363 void __user *addr; 364 u16 addr_len; 365 int ret; 366 367 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 368 369 if (READ_ONCE(sqe->__pad3[0])) 370 return -EINVAL; 371 372 kmsg->msg.msg_name = NULL; 373 kmsg->msg.msg_namelen = 0; 374 kmsg->msg.msg_control = NULL; 375 kmsg->msg.msg_controllen = 0; 376 kmsg->msg.msg_ubuf = NULL; 377 378 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 379 addr_len = READ_ONCE(sqe->addr_len); 380 if (addr) { 381 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr); 382 if (unlikely(ret < 0)) 383 return ret; 384 kmsg->msg.msg_name = &kmsg->addr; 385 kmsg->msg.msg_namelen = addr_len; 386 } 387 if (!io_do_buffer_select(req)) { 388 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 389 &kmsg->msg.msg_iter); 390 if (unlikely(ret < 0)) 391 return ret; 392 } 393 return 0; 394 } 395 396 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 397 { 398 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 399 struct io_async_msghdr *kmsg = req->async_data; 400 int ret; 401 402 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 403 404 ret = io_sendmsg_copy_hdr(req, kmsg); 405 if (!ret) 406 req->flags |= REQ_F_NEED_CLEANUP; 407 return ret; 408 } 409 410 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) 411 412 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 413 { 414 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 415 416 sr->done_io = 0; 417 418 if (req->opcode != IORING_OP_SEND) { 419 if (sqe->addr2 || sqe->file_index) 420 return -EINVAL; 421 } 422 423 sr->len = READ_ONCE(sqe->len); 424 sr->flags = READ_ONCE(sqe->ioprio); 425 if (sr->flags & ~SENDMSG_FLAGS) 426 return -EINVAL; 427 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 428 if (sr->msg_flags & MSG_DONTWAIT) 429 req->flags |= REQ_F_NOWAIT; 430 if (sr->flags & IORING_RECVSEND_BUNDLE) { 431 if (req->opcode == IORING_OP_SENDMSG) 432 return -EINVAL; 433 if (!(req->flags & REQ_F_BUFFER_SELECT)) 434 return -EINVAL; 435 sr->msg_flags |= MSG_WAITALL; 436 sr->buf_group = req->buf_index; 437 req->buf_list = NULL; 438 } 439 440 #ifdef CONFIG_COMPAT 441 if (req->ctx->compat) 442 sr->msg_flags |= MSG_CMSG_COMPAT; 443 #endif 444 if (unlikely(!io_msg_alloc_async(req))) 445 return -ENOMEM; 446 if (req->opcode != IORING_OP_SENDMSG) 447 return io_send_setup(req, sqe); 448 return io_sendmsg_setup(req, sqe); 449 } 450 451 static void io_req_msg_cleanup(struct io_kiocb *req, 452 unsigned int issue_flags) 453 { 454 req->flags &= ~REQ_F_NEED_CLEANUP; 455 io_netmsg_recycle(req, issue_flags); 456 } 457 458 /* 459 * For bundle completions, we need to figure out how many segments we consumed. 460 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 461 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 462 * the segments, then it's a trivial questiont o answer. If we have residual 463 * data in the iter, then loop the segments to figure out how much we 464 * transferred. 465 */ 466 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 467 { 468 struct iovec *iov; 469 int nbufs; 470 471 /* no data is always zero segments, and a ubuf is always 1 segment */ 472 if (ret <= 0) 473 return 0; 474 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 475 return 1; 476 477 iov = kmsg->free_iov; 478 if (!iov) 479 iov = &kmsg->fast_iov; 480 481 /* if all data was transferred, it's basic pointer math */ 482 if (!iov_iter_count(&kmsg->msg.msg_iter)) 483 return iter_iov(&kmsg->msg.msg_iter) - iov; 484 485 /* short transfer, count segments */ 486 nbufs = 0; 487 do { 488 int this_len = min_t(int, iov[nbufs].iov_len, ret); 489 490 nbufs++; 491 ret -= this_len; 492 } while (ret); 493 494 return nbufs; 495 } 496 497 static inline bool io_send_finish(struct io_kiocb *req, int *ret, 498 struct io_async_msghdr *kmsg, 499 unsigned issue_flags) 500 { 501 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 502 bool bundle_finished = *ret <= 0; 503 unsigned int cflags; 504 505 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 506 cflags = io_put_kbuf(req, *ret, issue_flags); 507 goto finish; 508 } 509 510 cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags); 511 512 if (bundle_finished || req->flags & REQ_F_BL_EMPTY) 513 goto finish; 514 515 /* 516 * Fill CQE for this receive and see if we should keep trying to 517 * receive from this socket. 518 */ 519 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 520 io_mshot_prep_retry(req, kmsg); 521 return false; 522 } 523 524 /* Otherwise stop bundle and use the current result. */ 525 finish: 526 io_req_set_res(req, *ret, cflags); 527 *ret = IOU_OK; 528 return true; 529 } 530 531 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 532 { 533 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 534 struct io_async_msghdr *kmsg = req->async_data; 535 struct socket *sock; 536 unsigned flags; 537 int min_ret = 0; 538 int ret; 539 540 sock = sock_from_file(req->file); 541 if (unlikely(!sock)) 542 return -ENOTSOCK; 543 544 if (!(req->flags & REQ_F_POLLED) && 545 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 546 return -EAGAIN; 547 548 flags = sr->msg_flags; 549 if (issue_flags & IO_URING_F_NONBLOCK) 550 flags |= MSG_DONTWAIT; 551 if (flags & MSG_WAITALL) 552 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 553 554 kmsg->msg.msg_control_user = sr->msg_control; 555 556 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 557 558 if (ret < min_ret) { 559 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 560 return -EAGAIN; 561 if (ret > 0 && io_net_retry(sock, flags)) { 562 kmsg->msg.msg_controllen = 0; 563 kmsg->msg.msg_control = NULL; 564 sr->done_io += ret; 565 req->flags |= REQ_F_BL_NO_RECYCLE; 566 return -EAGAIN; 567 } 568 if (ret == -ERESTARTSYS) 569 ret = -EINTR; 570 req_set_fail(req); 571 } 572 io_req_msg_cleanup(req, issue_flags); 573 if (ret >= 0) 574 ret += sr->done_io; 575 else if (sr->done_io) 576 ret = sr->done_io; 577 io_req_set_res(req, ret, 0); 578 return IOU_OK; 579 } 580 581 int io_send(struct io_kiocb *req, unsigned int issue_flags) 582 { 583 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 584 struct io_async_msghdr *kmsg = req->async_data; 585 struct socket *sock; 586 unsigned flags; 587 int min_ret = 0; 588 int ret; 589 590 sock = sock_from_file(req->file); 591 if (unlikely(!sock)) 592 return -ENOTSOCK; 593 594 if (!(req->flags & REQ_F_POLLED) && 595 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 596 return -EAGAIN; 597 598 flags = sr->msg_flags; 599 if (issue_flags & IO_URING_F_NONBLOCK) 600 flags |= MSG_DONTWAIT; 601 602 retry_bundle: 603 if (io_do_buffer_select(req)) { 604 struct buf_sel_arg arg = { 605 .iovs = &kmsg->fast_iov, 606 .max_len = min_not_zero(sr->len, INT_MAX), 607 .nr_iovs = 1, 608 }; 609 610 if (kmsg->free_iov) { 611 arg.nr_iovs = kmsg->free_iov_nr; 612 arg.iovs = kmsg->free_iov; 613 arg.mode = KBUF_MODE_FREE; 614 } 615 616 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 617 arg.nr_iovs = 1; 618 else 619 arg.mode |= KBUF_MODE_EXPAND; 620 621 ret = io_buffers_select(req, &arg, issue_flags); 622 if (unlikely(ret < 0)) 623 return ret; 624 625 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 626 kmsg->free_iov_nr = ret; 627 kmsg->free_iov = arg.iovs; 628 req->flags |= REQ_F_NEED_CLEANUP; 629 } 630 sr->len = arg.out_len; 631 632 if (ret == 1) { 633 sr->buf = arg.iovs[0].iov_base; 634 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 635 &kmsg->msg.msg_iter); 636 if (unlikely(ret)) 637 return ret; 638 } else { 639 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 640 arg.iovs, ret, arg.out_len); 641 } 642 } 643 644 /* 645 * If MSG_WAITALL is set, or this is a bundle send, then we need 646 * the full amount. If just bundle is set, if we do a short send 647 * then we complete the bundle sequence rather than continue on. 648 */ 649 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 650 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 651 652 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 653 kmsg->msg.msg_flags = flags; 654 ret = sock_sendmsg(sock, &kmsg->msg); 655 if (ret < min_ret) { 656 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 657 return -EAGAIN; 658 659 if (ret > 0 && io_net_retry(sock, flags)) { 660 sr->len -= ret; 661 sr->buf += ret; 662 sr->done_io += ret; 663 req->flags |= REQ_F_BL_NO_RECYCLE; 664 return -EAGAIN; 665 } 666 if (ret == -ERESTARTSYS) 667 ret = -EINTR; 668 req_set_fail(req); 669 } 670 if (ret >= 0) 671 ret += sr->done_io; 672 else if (sr->done_io) 673 ret = sr->done_io; 674 675 if (!io_send_finish(req, &ret, kmsg, issue_flags)) 676 goto retry_bundle; 677 678 io_req_msg_cleanup(req, issue_flags); 679 return ret; 680 } 681 682 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 683 struct io_async_msghdr *iomsg, 684 int namelen, size_t controllen) 685 { 686 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 687 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 688 int hdr; 689 690 if (unlikely(namelen < 0)) 691 return -EOVERFLOW; 692 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 693 namelen, &hdr)) 694 return -EOVERFLOW; 695 if (check_add_overflow(hdr, controllen, &hdr)) 696 return -EOVERFLOW; 697 698 iomsg->namelen = namelen; 699 iomsg->controllen = controllen; 700 return 0; 701 } 702 703 return 0; 704 } 705 706 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 707 struct io_async_msghdr *iomsg) 708 { 709 struct user_msghdr msg; 710 int ret; 711 712 iomsg->msg.msg_name = &iomsg->addr; 713 iomsg->msg.msg_iter.nr_segs = 0; 714 715 #ifdef CONFIG_COMPAT 716 if (unlikely(req->ctx->compat)) { 717 struct compat_msghdr cmsg; 718 719 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); 720 if (unlikely(ret)) 721 return ret; 722 723 ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); 724 if (unlikely(ret)) 725 return ret; 726 727 return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, 728 cmsg.msg_controllen); 729 } 730 #endif 731 732 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); 733 if (unlikely(ret)) 734 return ret; 735 736 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 737 if (unlikely(ret)) 738 return ret; 739 740 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 741 msg.msg_controllen); 742 } 743 744 static int io_recvmsg_prep_setup(struct io_kiocb *req) 745 { 746 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 747 struct io_async_msghdr *kmsg; 748 int ret; 749 750 kmsg = io_msg_alloc_async(req); 751 if (unlikely(!kmsg)) 752 return -ENOMEM; 753 754 if (req->opcode == IORING_OP_RECV) { 755 kmsg->msg.msg_name = NULL; 756 kmsg->msg.msg_namelen = 0; 757 kmsg->msg.msg_control = NULL; 758 kmsg->msg.msg_get_inq = 1; 759 kmsg->msg.msg_controllen = 0; 760 kmsg->msg.msg_iocb = NULL; 761 kmsg->msg.msg_ubuf = NULL; 762 763 if (!io_do_buffer_select(req)) { 764 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 765 &kmsg->msg.msg_iter); 766 if (unlikely(ret)) 767 return ret; 768 } 769 return 0; 770 } 771 772 ret = io_recvmsg_copy_hdr(req, kmsg); 773 if (!ret) 774 req->flags |= REQ_F_NEED_CLEANUP; 775 return ret; 776 } 777 778 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 779 IORING_RECVSEND_BUNDLE) 780 781 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 782 { 783 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 784 785 sr->done_io = 0; 786 787 if (unlikely(sqe->file_index || sqe->addr2)) 788 return -EINVAL; 789 790 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 791 sr->len = READ_ONCE(sqe->len); 792 sr->flags = READ_ONCE(sqe->ioprio); 793 if (sr->flags & ~RECVMSG_FLAGS) 794 return -EINVAL; 795 sr->msg_flags = READ_ONCE(sqe->msg_flags); 796 if (sr->msg_flags & MSG_DONTWAIT) 797 req->flags |= REQ_F_NOWAIT; 798 if (sr->msg_flags & MSG_ERRQUEUE) 799 req->flags |= REQ_F_CLEAR_POLLIN; 800 if (req->flags & REQ_F_BUFFER_SELECT) { 801 /* 802 * Store the buffer group for this multishot receive separately, 803 * as if we end up doing an io-wq based issue that selects a 804 * buffer, it has to be committed immediately and that will 805 * clear ->buf_list. This means we lose the link to the buffer 806 * list, and the eventual buffer put on completion then cannot 807 * restore it. 808 */ 809 sr->buf_group = req->buf_index; 810 req->buf_list = NULL; 811 } 812 if (sr->flags & IORING_RECV_MULTISHOT) { 813 if (!(req->flags & REQ_F_BUFFER_SELECT)) 814 return -EINVAL; 815 if (sr->msg_flags & MSG_WAITALL) 816 return -EINVAL; 817 if (req->opcode == IORING_OP_RECV && sr->len) 818 return -EINVAL; 819 req->flags |= REQ_F_APOLL_MULTISHOT; 820 } 821 if (sr->flags & IORING_RECVSEND_BUNDLE) { 822 if (req->opcode == IORING_OP_RECVMSG) 823 return -EINVAL; 824 } 825 826 #ifdef CONFIG_COMPAT 827 if (req->ctx->compat) 828 sr->msg_flags |= MSG_CMSG_COMPAT; 829 #endif 830 sr->nr_multishot_loops = 0; 831 return io_recvmsg_prep_setup(req); 832 } 833 834 /* 835 * Finishes io_recv and io_recvmsg. 836 * 837 * Returns true if it is actually finished, or false if it should run 838 * again (for multishot). 839 */ 840 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 841 struct io_async_msghdr *kmsg, 842 bool mshot_finished, unsigned issue_flags) 843 { 844 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 845 unsigned int cflags = 0; 846 847 if (kmsg->msg.msg_inq > 0) 848 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 849 850 if (sr->flags & IORING_RECVSEND_BUNDLE) { 851 cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), 852 issue_flags); 853 /* bundle with no more immediate buffers, we're done */ 854 if (req->flags & REQ_F_BL_EMPTY) 855 goto finish; 856 } else { 857 cflags |= io_put_kbuf(req, *ret, issue_flags); 858 } 859 860 /* 861 * Fill CQE for this receive and see if we should keep trying to 862 * receive from this socket. 863 */ 864 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 865 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 866 int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; 867 868 io_mshot_prep_retry(req, kmsg); 869 /* Known not-empty or unknown state, retry */ 870 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 871 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) 872 return false; 873 /* mshot retries exceeded, force a requeue */ 874 sr->nr_multishot_loops = 0; 875 mshot_retry_ret = IOU_REQUEUE; 876 } 877 if (issue_flags & IO_URING_F_MULTISHOT) 878 *ret = mshot_retry_ret; 879 else 880 *ret = -EAGAIN; 881 return true; 882 } 883 884 /* Finish the request / stop multishot. */ 885 finish: 886 io_req_set_res(req, *ret, cflags); 887 888 if (issue_flags & IO_URING_F_MULTISHOT) 889 *ret = IOU_STOP_MULTISHOT; 890 else 891 *ret = IOU_OK; 892 io_req_msg_cleanup(req, issue_flags); 893 return true; 894 } 895 896 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 897 struct io_sr_msg *sr, void __user **buf, 898 size_t *len) 899 { 900 unsigned long ubuf = (unsigned long) *buf; 901 unsigned long hdr; 902 903 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 904 kmsg->controllen; 905 if (*len < hdr) 906 return -EFAULT; 907 908 if (kmsg->controllen) { 909 unsigned long control = ubuf + hdr - kmsg->controllen; 910 911 kmsg->msg.msg_control_user = (void __user *) control; 912 kmsg->msg.msg_controllen = kmsg->controllen; 913 } 914 915 sr->buf = *buf; /* stash for later copy */ 916 *buf = (void __user *) (ubuf + hdr); 917 kmsg->payloadlen = *len = *len - hdr; 918 return 0; 919 } 920 921 struct io_recvmsg_multishot_hdr { 922 struct io_uring_recvmsg_out msg; 923 struct sockaddr_storage addr; 924 }; 925 926 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 927 struct io_async_msghdr *kmsg, 928 unsigned int flags, bool *finished) 929 { 930 int err; 931 int copy_len; 932 struct io_recvmsg_multishot_hdr hdr; 933 934 if (kmsg->namelen) 935 kmsg->msg.msg_name = &hdr.addr; 936 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 937 kmsg->msg.msg_namelen = 0; 938 939 if (sock->file->f_flags & O_NONBLOCK) 940 flags |= MSG_DONTWAIT; 941 942 err = sock_recvmsg(sock, &kmsg->msg, flags); 943 *finished = err <= 0; 944 if (err < 0) 945 return err; 946 947 hdr.msg = (struct io_uring_recvmsg_out) { 948 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 949 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 950 }; 951 952 hdr.msg.payloadlen = err; 953 if (err > kmsg->payloadlen) 954 err = kmsg->payloadlen; 955 956 copy_len = sizeof(struct io_uring_recvmsg_out); 957 if (kmsg->msg.msg_namelen > kmsg->namelen) 958 copy_len += kmsg->namelen; 959 else 960 copy_len += kmsg->msg.msg_namelen; 961 962 /* 963 * "fromlen shall refer to the value before truncation.." 964 * 1003.1g 965 */ 966 hdr.msg.namelen = kmsg->msg.msg_namelen; 967 968 /* ensure that there is no gap between hdr and sockaddr_storage */ 969 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 970 sizeof(struct io_uring_recvmsg_out)); 971 if (copy_to_user(io->buf, &hdr, copy_len)) { 972 *finished = true; 973 return -EFAULT; 974 } 975 976 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 977 kmsg->controllen + err; 978 } 979 980 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 981 { 982 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 983 struct io_async_msghdr *kmsg = req->async_data; 984 struct socket *sock; 985 unsigned flags; 986 int ret, min_ret = 0; 987 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 988 bool mshot_finished = true; 989 990 sock = sock_from_file(req->file); 991 if (unlikely(!sock)) 992 return -ENOTSOCK; 993 994 if (!(req->flags & REQ_F_POLLED) && 995 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 996 return -EAGAIN; 997 998 flags = sr->msg_flags; 999 if (force_nonblock) 1000 flags |= MSG_DONTWAIT; 1001 1002 retry_multishot: 1003 if (io_do_buffer_select(req)) { 1004 void __user *buf; 1005 size_t len = sr->len; 1006 1007 buf = io_buffer_select(req, &len, issue_flags); 1008 if (!buf) 1009 return -ENOBUFS; 1010 1011 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1012 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 1013 if (ret) { 1014 io_kbuf_recycle(req, issue_flags); 1015 return ret; 1016 } 1017 } 1018 1019 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 1020 } 1021 1022 kmsg->msg.msg_get_inq = 1; 1023 kmsg->msg.msg_inq = -1; 1024 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1025 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1026 &mshot_finished); 1027 } else { 1028 /* disable partial retry for recvmsg with cmsg attached */ 1029 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1030 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1031 1032 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1033 kmsg->uaddr, flags); 1034 } 1035 1036 if (ret < min_ret) { 1037 if (ret == -EAGAIN && force_nonblock) { 1038 if (issue_flags & IO_URING_F_MULTISHOT) { 1039 io_kbuf_recycle(req, issue_flags); 1040 return IOU_ISSUE_SKIP_COMPLETE; 1041 } 1042 return -EAGAIN; 1043 } 1044 if (ret > 0 && io_net_retry(sock, flags)) { 1045 sr->done_io += ret; 1046 req->flags |= REQ_F_BL_NO_RECYCLE; 1047 return -EAGAIN; 1048 } 1049 if (ret == -ERESTARTSYS) 1050 ret = -EINTR; 1051 req_set_fail(req); 1052 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1053 req_set_fail(req); 1054 } 1055 1056 if (ret > 0) 1057 ret += sr->done_io; 1058 else if (sr->done_io) 1059 ret = sr->done_io; 1060 else 1061 io_kbuf_recycle(req, issue_flags); 1062 1063 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1064 goto retry_multishot; 1065 1066 return ret; 1067 } 1068 1069 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1070 size_t *len, unsigned int issue_flags) 1071 { 1072 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1073 int ret; 1074 1075 /* 1076 * If the ring isn't locked, then don't use the peek interface 1077 * to grab multiple buffers as we will lock/unlock between 1078 * this selection and posting the buffers. 1079 */ 1080 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1081 sr->flags & IORING_RECVSEND_BUNDLE) { 1082 struct buf_sel_arg arg = { 1083 .iovs = &kmsg->fast_iov, 1084 .nr_iovs = 1, 1085 .mode = KBUF_MODE_EXPAND, 1086 }; 1087 1088 if (kmsg->free_iov) { 1089 arg.nr_iovs = kmsg->free_iov_nr; 1090 arg.iovs = kmsg->free_iov; 1091 arg.mode |= KBUF_MODE_FREE; 1092 } 1093 1094 if (kmsg->msg.msg_inq > 0) 1095 arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq); 1096 1097 ret = io_buffers_peek(req, &arg); 1098 if (unlikely(ret < 0)) 1099 return ret; 1100 1101 /* special case 1 vec, can be a fast path */ 1102 if (ret == 1) { 1103 sr->buf = arg.iovs[0].iov_base; 1104 sr->len = arg.iovs[0].iov_len; 1105 goto map_ubuf; 1106 } 1107 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1108 arg.out_len); 1109 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 1110 kmsg->free_iov_nr = ret; 1111 kmsg->free_iov = arg.iovs; 1112 req->flags |= REQ_F_NEED_CLEANUP; 1113 } 1114 } else { 1115 void __user *buf; 1116 1117 *len = sr->len; 1118 buf = io_buffer_select(req, len, issue_flags); 1119 if (!buf) 1120 return -ENOBUFS; 1121 sr->buf = buf; 1122 sr->len = *len; 1123 map_ubuf: 1124 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1125 &kmsg->msg.msg_iter); 1126 if (unlikely(ret)) 1127 return ret; 1128 } 1129 1130 return 0; 1131 } 1132 1133 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1134 { 1135 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1136 struct io_async_msghdr *kmsg = req->async_data; 1137 struct socket *sock; 1138 unsigned flags; 1139 int ret, min_ret = 0; 1140 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1141 size_t len = sr->len; 1142 bool mshot_finished; 1143 1144 if (!(req->flags & REQ_F_POLLED) && 1145 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1146 return -EAGAIN; 1147 1148 sock = sock_from_file(req->file); 1149 if (unlikely(!sock)) 1150 return -ENOTSOCK; 1151 1152 flags = sr->msg_flags; 1153 if (force_nonblock) 1154 flags |= MSG_DONTWAIT; 1155 1156 retry_multishot: 1157 if (io_do_buffer_select(req)) { 1158 ret = io_recv_buf_select(req, kmsg, &len, issue_flags); 1159 if (unlikely(ret)) { 1160 kmsg->msg.msg_inq = -1; 1161 goto out_free; 1162 } 1163 sr->buf = NULL; 1164 } 1165 1166 kmsg->msg.msg_flags = 0; 1167 kmsg->msg.msg_inq = -1; 1168 1169 if (flags & MSG_WAITALL) 1170 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1171 1172 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1173 if (ret < min_ret) { 1174 if (ret == -EAGAIN && force_nonblock) { 1175 if (issue_flags & IO_URING_F_MULTISHOT) { 1176 io_kbuf_recycle(req, issue_flags); 1177 return IOU_ISSUE_SKIP_COMPLETE; 1178 } 1179 1180 return -EAGAIN; 1181 } 1182 if (ret > 0 && io_net_retry(sock, flags)) { 1183 sr->len -= ret; 1184 sr->buf += ret; 1185 sr->done_io += ret; 1186 req->flags |= REQ_F_BL_NO_RECYCLE; 1187 return -EAGAIN; 1188 } 1189 if (ret == -ERESTARTSYS) 1190 ret = -EINTR; 1191 req_set_fail(req); 1192 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1193 out_free: 1194 req_set_fail(req); 1195 } 1196 1197 mshot_finished = ret <= 0; 1198 if (ret > 0) 1199 ret += sr->done_io; 1200 else if (sr->done_io) 1201 ret = sr->done_io; 1202 else 1203 io_kbuf_recycle(req, issue_flags); 1204 1205 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1206 goto retry_multishot; 1207 1208 return ret; 1209 } 1210 1211 void io_send_zc_cleanup(struct io_kiocb *req) 1212 { 1213 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1214 struct io_async_msghdr *io = req->async_data; 1215 1216 if (req_has_async_data(req)) 1217 io_netmsg_iovec_free(io); 1218 if (zc->notif) { 1219 io_notif_flush(zc->notif); 1220 zc->notif = NULL; 1221 } 1222 } 1223 1224 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1225 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 1226 1227 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1228 { 1229 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1230 struct io_ring_ctx *ctx = req->ctx; 1231 struct io_kiocb *notif; 1232 1233 zc->done_io = 0; 1234 req->flags |= REQ_F_POLL_NO_LAZY; 1235 1236 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1237 return -EINVAL; 1238 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1239 if (req->flags & REQ_F_CQE_SKIP) 1240 return -EINVAL; 1241 1242 notif = zc->notif = io_alloc_notif(ctx); 1243 if (!notif) 1244 return -ENOMEM; 1245 notif->cqe.user_data = req->cqe.user_data; 1246 notif->cqe.res = 0; 1247 notif->cqe.flags = IORING_CQE_F_NOTIF; 1248 req->flags |= REQ_F_NEED_CLEANUP; 1249 1250 zc->flags = READ_ONCE(sqe->ioprio); 1251 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1252 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1253 return -EINVAL; 1254 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1255 struct io_notif_data *nd = io_notif_to_data(notif); 1256 1257 nd->zc_report = true; 1258 nd->zc_used = false; 1259 nd->zc_copied = false; 1260 } 1261 } 1262 1263 if (req->opcode != IORING_OP_SEND_ZC) { 1264 if (unlikely(sqe->addr2 || sqe->file_index)) 1265 return -EINVAL; 1266 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 1267 return -EINVAL; 1268 } 1269 1270 zc->len = READ_ONCE(sqe->len); 1271 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1272 zc->buf_index = READ_ONCE(sqe->buf_index); 1273 if (zc->msg_flags & MSG_DONTWAIT) 1274 req->flags |= REQ_F_NOWAIT; 1275 1276 #ifdef CONFIG_COMPAT 1277 if (req->ctx->compat) 1278 zc->msg_flags |= MSG_CMSG_COMPAT; 1279 #endif 1280 if (unlikely(!io_msg_alloc_async(req))) 1281 return -ENOMEM; 1282 if (req->opcode != IORING_OP_SENDMSG_ZC) 1283 return io_send_setup(req, sqe); 1284 return io_sendmsg_setup(req, sqe); 1285 } 1286 1287 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1288 struct iov_iter *from, size_t length) 1289 { 1290 skb_zcopy_downgrade_managed(skb); 1291 return zerocopy_fill_skb_from_iter(skb, from, length); 1292 } 1293 1294 static int io_sg_from_iter(struct sk_buff *skb, 1295 struct iov_iter *from, size_t length) 1296 { 1297 struct skb_shared_info *shinfo = skb_shinfo(skb); 1298 int frag = shinfo->nr_frags; 1299 int ret = 0; 1300 struct bvec_iter bi; 1301 ssize_t copied = 0; 1302 unsigned long truesize = 0; 1303 1304 if (!frag) 1305 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1306 else if (unlikely(!skb_zcopy_managed(skb))) 1307 return zerocopy_fill_skb_from_iter(skb, from, length); 1308 1309 bi.bi_size = min(from->count, length); 1310 bi.bi_bvec_done = from->iov_offset; 1311 bi.bi_idx = 0; 1312 1313 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1314 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1315 1316 copied += v.bv_len; 1317 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1318 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1319 v.bv_offset, v.bv_len); 1320 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1321 } 1322 if (bi.bi_size) 1323 ret = -EMSGSIZE; 1324 1325 shinfo->nr_frags = frag; 1326 from->bvec += bi.bi_idx; 1327 from->nr_segs -= bi.bi_idx; 1328 from->count -= copied; 1329 from->iov_offset = bi.bi_bvec_done; 1330 1331 skb->data_len += copied; 1332 skb->len += copied; 1333 skb->truesize += truesize; 1334 return ret; 1335 } 1336 1337 static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) 1338 { 1339 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1340 struct io_async_msghdr *kmsg = req->async_data; 1341 int ret; 1342 1343 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 1344 struct io_ring_ctx *ctx = req->ctx; 1345 struct io_rsrc_node *node; 1346 1347 ret = -EFAULT; 1348 io_ring_submit_lock(ctx, issue_flags); 1349 node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index); 1350 if (node) { 1351 io_req_assign_buf_node(sr->notif, node); 1352 ret = 0; 1353 } 1354 io_ring_submit_unlock(ctx, issue_flags); 1355 1356 if (unlikely(ret)) 1357 return ret; 1358 1359 ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter, 1360 node->buf, (u64)(uintptr_t)sr->buf, 1361 sr->len); 1362 if (unlikely(ret)) 1363 return ret; 1364 kmsg->msg.sg_from_iter = io_sg_from_iter; 1365 } else { 1366 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 1367 if (unlikely(ret)) 1368 return ret; 1369 ret = io_notif_account_mem(sr->notif, sr->len); 1370 if (unlikely(ret)) 1371 return ret; 1372 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1373 } 1374 1375 return ret; 1376 } 1377 1378 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1379 { 1380 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1381 struct io_async_msghdr *kmsg = req->async_data; 1382 struct socket *sock; 1383 unsigned msg_flags; 1384 int ret, min_ret = 0; 1385 1386 sock = sock_from_file(req->file); 1387 if (unlikely(!sock)) 1388 return -ENOTSOCK; 1389 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1390 return -EOPNOTSUPP; 1391 1392 if (!(req->flags & REQ_F_POLLED) && 1393 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1394 return -EAGAIN; 1395 1396 if (!zc->done_io) { 1397 ret = io_send_zc_import(req, issue_flags); 1398 if (unlikely(ret)) 1399 return ret; 1400 } 1401 1402 msg_flags = zc->msg_flags; 1403 if (issue_flags & IO_URING_F_NONBLOCK) 1404 msg_flags |= MSG_DONTWAIT; 1405 if (msg_flags & MSG_WAITALL) 1406 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1407 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1408 1409 kmsg->msg.msg_flags = msg_flags; 1410 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1411 ret = sock_sendmsg(sock, &kmsg->msg); 1412 1413 if (unlikely(ret < min_ret)) { 1414 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1415 return -EAGAIN; 1416 1417 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { 1418 zc->len -= ret; 1419 zc->buf += ret; 1420 zc->done_io += ret; 1421 req->flags |= REQ_F_BL_NO_RECYCLE; 1422 return -EAGAIN; 1423 } 1424 if (ret == -ERESTARTSYS) 1425 ret = -EINTR; 1426 req_set_fail(req); 1427 } 1428 1429 if (ret >= 0) 1430 ret += zc->done_io; 1431 else if (zc->done_io) 1432 ret = zc->done_io; 1433 1434 /* 1435 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1436 * flushing notif to io_send_zc_cleanup() 1437 */ 1438 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1439 io_notif_flush(zc->notif); 1440 io_req_msg_cleanup(req, 0); 1441 } 1442 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1443 return IOU_OK; 1444 } 1445 1446 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1447 { 1448 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1449 struct io_async_msghdr *kmsg = req->async_data; 1450 struct socket *sock; 1451 unsigned flags; 1452 int ret, min_ret = 0; 1453 1454 sock = sock_from_file(req->file); 1455 if (unlikely(!sock)) 1456 return -ENOTSOCK; 1457 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1458 return -EOPNOTSUPP; 1459 1460 if (!(req->flags & REQ_F_POLLED) && 1461 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1462 return -EAGAIN; 1463 1464 flags = sr->msg_flags; 1465 if (issue_flags & IO_URING_F_NONBLOCK) 1466 flags |= MSG_DONTWAIT; 1467 if (flags & MSG_WAITALL) 1468 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1469 1470 kmsg->msg.msg_control_user = sr->msg_control; 1471 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1472 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1473 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1474 1475 if (unlikely(ret < min_ret)) { 1476 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1477 return -EAGAIN; 1478 1479 if (ret > 0 && io_net_retry(sock, flags)) { 1480 sr->done_io += ret; 1481 req->flags |= REQ_F_BL_NO_RECYCLE; 1482 return -EAGAIN; 1483 } 1484 if (ret == -ERESTARTSYS) 1485 ret = -EINTR; 1486 req_set_fail(req); 1487 } 1488 1489 if (ret >= 0) 1490 ret += sr->done_io; 1491 else if (sr->done_io) 1492 ret = sr->done_io; 1493 1494 /* 1495 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1496 * flushing notif to io_send_zc_cleanup() 1497 */ 1498 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1499 io_notif_flush(sr->notif); 1500 io_req_msg_cleanup(req, 0); 1501 } 1502 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1503 return IOU_OK; 1504 } 1505 1506 void io_sendrecv_fail(struct io_kiocb *req) 1507 { 1508 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1509 1510 if (sr->done_io) 1511 req->cqe.res = sr->done_io; 1512 1513 if ((req->flags & REQ_F_NEED_CLEANUP) && 1514 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1515 req->cqe.flags |= IORING_CQE_F_MORE; 1516 } 1517 1518 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1519 IORING_ACCEPT_POLL_FIRST) 1520 1521 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1522 { 1523 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1524 1525 if (sqe->len || sqe->buf_index) 1526 return -EINVAL; 1527 1528 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1529 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1530 accept->flags = READ_ONCE(sqe->accept_flags); 1531 accept->nofile = rlimit(RLIMIT_NOFILE); 1532 accept->iou_flags = READ_ONCE(sqe->ioprio); 1533 if (accept->iou_flags & ~ACCEPT_FLAGS) 1534 return -EINVAL; 1535 1536 accept->file_slot = READ_ONCE(sqe->file_index); 1537 if (accept->file_slot) { 1538 if (accept->flags & SOCK_CLOEXEC) 1539 return -EINVAL; 1540 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1541 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1542 return -EINVAL; 1543 } 1544 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1545 return -EINVAL; 1546 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1547 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1548 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1549 req->flags |= REQ_F_APOLL_MULTISHOT; 1550 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1551 req->flags |= REQ_F_NOWAIT; 1552 return 0; 1553 } 1554 1555 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1556 { 1557 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1558 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1559 bool fixed = !!accept->file_slot; 1560 struct proto_accept_arg arg = { 1561 .flags = force_nonblock ? O_NONBLOCK : 0, 1562 }; 1563 struct file *file; 1564 unsigned cflags; 1565 int ret, fd; 1566 1567 if (!(req->flags & REQ_F_POLLED) && 1568 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1569 return -EAGAIN; 1570 1571 retry: 1572 if (!fixed) { 1573 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1574 if (unlikely(fd < 0)) 1575 return fd; 1576 } 1577 arg.err = 0; 1578 arg.is_empty = -1; 1579 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1580 accept->flags); 1581 if (IS_ERR(file)) { 1582 if (!fixed) 1583 put_unused_fd(fd); 1584 ret = PTR_ERR(file); 1585 if (ret == -EAGAIN && force_nonblock && 1586 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) { 1587 /* 1588 * if it's multishot and polled, we don't need to 1589 * return EAGAIN to arm the poll infra since it 1590 * has already been done 1591 */ 1592 if (issue_flags & IO_URING_F_MULTISHOT) 1593 return IOU_ISSUE_SKIP_COMPLETE; 1594 return ret; 1595 } 1596 if (ret == -ERESTARTSYS) 1597 ret = -EINTR; 1598 req_set_fail(req); 1599 } else if (!fixed) { 1600 fd_install(fd, file); 1601 ret = fd; 1602 } else { 1603 ret = io_fixed_fd_install(req, issue_flags, file, 1604 accept->file_slot); 1605 } 1606 1607 cflags = 0; 1608 if (!arg.is_empty) 1609 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1610 1611 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1612 io_req_set_res(req, ret, cflags); 1613 return IOU_OK; 1614 } 1615 1616 if (ret < 0) 1617 return ret; 1618 if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1619 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1620 goto retry; 1621 if (issue_flags & IO_URING_F_MULTISHOT) 1622 return IOU_ISSUE_SKIP_COMPLETE; 1623 return -EAGAIN; 1624 } 1625 1626 io_req_set_res(req, ret, cflags); 1627 return IOU_STOP_MULTISHOT; 1628 } 1629 1630 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1631 { 1632 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1633 1634 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1635 return -EINVAL; 1636 1637 sock->domain = READ_ONCE(sqe->fd); 1638 sock->type = READ_ONCE(sqe->off); 1639 sock->protocol = READ_ONCE(sqe->len); 1640 sock->file_slot = READ_ONCE(sqe->file_index); 1641 sock->nofile = rlimit(RLIMIT_NOFILE); 1642 1643 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1644 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1645 return -EINVAL; 1646 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1647 return -EINVAL; 1648 return 0; 1649 } 1650 1651 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1652 { 1653 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1654 bool fixed = !!sock->file_slot; 1655 struct file *file; 1656 int ret, fd; 1657 1658 if (!fixed) { 1659 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1660 if (unlikely(fd < 0)) 1661 return fd; 1662 } 1663 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1664 if (IS_ERR(file)) { 1665 if (!fixed) 1666 put_unused_fd(fd); 1667 ret = PTR_ERR(file); 1668 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1669 return -EAGAIN; 1670 if (ret == -ERESTARTSYS) 1671 ret = -EINTR; 1672 req_set_fail(req); 1673 } else if (!fixed) { 1674 fd_install(fd, file); 1675 ret = fd; 1676 } else { 1677 ret = io_fixed_fd_install(req, issue_flags, file, 1678 sock->file_slot); 1679 } 1680 io_req_set_res(req, ret, 0); 1681 return IOU_OK; 1682 } 1683 1684 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1685 { 1686 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1687 struct io_async_msghdr *io; 1688 1689 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1690 return -EINVAL; 1691 1692 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1693 conn->addr_len = READ_ONCE(sqe->addr2); 1694 conn->in_progress = conn->seen_econnaborted = false; 1695 1696 io = io_msg_alloc_async(req); 1697 if (unlikely(!io)) 1698 return -ENOMEM; 1699 1700 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1701 } 1702 1703 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1704 { 1705 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1706 struct io_async_msghdr *io = req->async_data; 1707 unsigned file_flags; 1708 int ret; 1709 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1710 1711 file_flags = force_nonblock ? O_NONBLOCK : 0; 1712 1713 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1714 file_flags); 1715 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1716 && force_nonblock) { 1717 if (ret == -EINPROGRESS) { 1718 connect->in_progress = true; 1719 } else if (ret == -ECONNABORTED) { 1720 if (connect->seen_econnaborted) 1721 goto out; 1722 connect->seen_econnaborted = true; 1723 } 1724 return -EAGAIN; 1725 } 1726 if (connect->in_progress) { 1727 /* 1728 * At least bluetooth will return -EBADFD on a re-connect 1729 * attempt, and it's (supposedly) also valid to get -EISCONN 1730 * which means the previous result is good. For both of these, 1731 * grab the sock_error() and use that for the completion. 1732 */ 1733 if (ret == -EBADFD || ret == -EISCONN) 1734 ret = sock_error(sock_from_file(req->file)->sk); 1735 } 1736 if (ret == -ERESTARTSYS) 1737 ret = -EINTR; 1738 out: 1739 if (ret < 0) 1740 req_set_fail(req); 1741 io_req_msg_cleanup(req, issue_flags); 1742 io_req_set_res(req, ret, 0); 1743 return IOU_OK; 1744 } 1745 1746 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1747 { 1748 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1749 struct sockaddr __user *uaddr; 1750 struct io_async_msghdr *io; 1751 1752 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1753 return -EINVAL; 1754 1755 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1756 bind->addr_len = READ_ONCE(sqe->addr2); 1757 1758 io = io_msg_alloc_async(req); 1759 if (unlikely(!io)) 1760 return -ENOMEM; 1761 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1762 } 1763 1764 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1765 { 1766 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1767 struct io_async_msghdr *io = req->async_data; 1768 struct socket *sock; 1769 int ret; 1770 1771 sock = sock_from_file(req->file); 1772 if (unlikely(!sock)) 1773 return -ENOTSOCK; 1774 1775 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1776 if (ret < 0) 1777 req_set_fail(req); 1778 io_req_set_res(req, ret, 0); 1779 return 0; 1780 } 1781 1782 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1783 { 1784 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1785 1786 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1787 return -EINVAL; 1788 1789 listen->backlog = READ_ONCE(sqe->len); 1790 return 0; 1791 } 1792 1793 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1794 { 1795 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1796 struct socket *sock; 1797 int ret; 1798 1799 sock = sock_from_file(req->file); 1800 if (unlikely(!sock)) 1801 return -ENOTSOCK; 1802 1803 ret = __sys_listen_socket(sock, listen->backlog); 1804 if (ret < 0) 1805 req_set_fail(req); 1806 io_req_set_res(req, ret, 0); 1807 return 0; 1808 } 1809 1810 void io_netmsg_cache_free(const void *entry) 1811 { 1812 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1813 1814 if (kmsg->free_iov) { 1815 kasan_mempool_unpoison_object(kmsg->free_iov, 1816 kmsg->free_iov_nr * sizeof(struct iovec)); 1817 io_netmsg_iovec_free(kmsg); 1818 } 1819 kfree(kmsg); 1820 } 1821 #endif 1822