1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 int iou_flags; 32 u32 file_slot; 33 unsigned long nofile; 34 }; 35 36 struct io_socket { 37 struct file *file; 38 int domain; 39 int type; 40 int protocol; 41 int flags; 42 u32 file_slot; 43 unsigned long nofile; 44 }; 45 46 struct io_connect { 47 struct file *file; 48 struct sockaddr __user *addr; 49 int addr_len; 50 bool in_progress; 51 bool seen_econnaborted; 52 }; 53 54 struct io_bind { 55 struct file *file; 56 int addr_len; 57 }; 58 59 struct io_listen { 60 struct file *file; 61 int backlog; 62 }; 63 64 struct io_sr_msg { 65 struct file *file; 66 union { 67 struct compat_msghdr __user *umsg_compat; 68 struct user_msghdr __user *umsg; 69 void __user *buf; 70 }; 71 int len; 72 unsigned done_io; 73 unsigned msg_flags; 74 unsigned nr_multishot_loops; 75 u16 flags; 76 /* initialised and used only by !msg send variants */ 77 u16 buf_group; 78 u16 buf_index; 79 void __user *msg_control; 80 /* used only for send zerocopy */ 81 struct io_kiocb *notif; 82 }; 83 84 /* 85 * Number of times we'll try and do receives if there's more data. If we 86 * exceed this limit, then add us to the back of the queue and retry from 87 * there. This helps fairness between flooding clients. 88 */ 89 #define MULTISHOT_MAX_RETRY 32 90 91 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 92 { 93 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 94 95 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 96 sqe->buf_index || sqe->splice_fd_in)) 97 return -EINVAL; 98 99 shutdown->how = READ_ONCE(sqe->len); 100 req->flags |= REQ_F_FORCE_ASYNC; 101 return 0; 102 } 103 104 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 105 { 106 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 107 struct socket *sock; 108 int ret; 109 110 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 111 112 sock = sock_from_file(req->file); 113 if (unlikely(!sock)) 114 return -ENOTSOCK; 115 116 ret = __sys_shutdown_sock(sock, shutdown->how); 117 io_req_set_res(req, ret, 0); 118 return IOU_OK; 119 } 120 121 static bool io_net_retry(struct socket *sock, int flags) 122 { 123 if (!(flags & MSG_WAITALL)) 124 return false; 125 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 126 } 127 128 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 129 { 130 if (kmsg->free_iov) { 131 kfree(kmsg->free_iov); 132 kmsg->free_iov_nr = 0; 133 kmsg->free_iov = NULL; 134 } 135 } 136 137 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 138 { 139 struct io_async_msghdr *hdr = req->async_data; 140 struct iovec *iov; 141 142 /* can't recycle, ensure we free the iovec if we have one */ 143 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 144 io_netmsg_iovec_free(hdr); 145 return; 146 } 147 148 /* Let normal cleanup path reap it if we fail adding to the cache */ 149 iov = hdr->free_iov; 150 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { 151 if (iov) 152 kasan_mempool_poison_object(iov); 153 req->async_data = NULL; 154 req->flags &= ~REQ_F_ASYNC_DATA; 155 } 156 } 157 158 static void io_msg_async_data_init(void *obj) 159 { 160 struct io_async_msghdr *hdr = (struct io_async_msghdr *)obj; 161 162 hdr->free_iov = NULL; 163 hdr->free_iov_nr = 0; 164 } 165 166 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 167 { 168 struct io_ring_ctx *ctx = req->ctx; 169 struct io_async_msghdr *hdr; 170 171 hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req, 172 io_msg_async_data_init); 173 if (!hdr) 174 return NULL; 175 176 /* If the async data was cached, we might have an iov cached inside. */ 177 if (hdr->free_iov) { 178 kasan_mempool_unpoison_object(hdr->free_iov, 179 hdr->free_iov_nr * sizeof(struct iovec)); 180 req->flags |= REQ_F_NEED_CLEANUP; 181 } 182 return hdr; 183 } 184 185 /* assign new iovec to kmsg, if we need to */ 186 static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, 187 struct iovec *iov) 188 { 189 if (iov) { 190 req->flags |= REQ_F_NEED_CLEANUP; 191 kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs; 192 if (kmsg->free_iov) 193 kfree(kmsg->free_iov); 194 kmsg->free_iov = iov; 195 } 196 return 0; 197 } 198 199 static inline void io_mshot_prep_retry(struct io_kiocb *req, 200 struct io_async_msghdr *kmsg) 201 { 202 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 203 204 req->flags &= ~REQ_F_BL_EMPTY; 205 sr->done_io = 0; 206 sr->len = 0; /* get from the provided buffer */ 207 req->buf_index = sr->buf_group; 208 } 209 210 #ifdef CONFIG_COMPAT 211 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 212 struct io_async_msghdr *iomsg, 213 struct compat_msghdr *msg, int ddir) 214 { 215 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 216 struct compat_iovec __user *uiov; 217 struct iovec *iov; 218 int ret, nr_segs; 219 220 if (iomsg->free_iov) { 221 nr_segs = iomsg->free_iov_nr; 222 iov = iomsg->free_iov; 223 } else { 224 iov = &iomsg->fast_iov; 225 nr_segs = 1; 226 } 227 228 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 229 return -EFAULT; 230 231 uiov = compat_ptr(msg->msg_iov); 232 if (req->flags & REQ_F_BUFFER_SELECT) { 233 compat_ssize_t clen; 234 235 if (msg->msg_iovlen == 0) { 236 sr->len = iov->iov_len = 0; 237 iov->iov_base = NULL; 238 } else if (msg->msg_iovlen > 1) { 239 return -EINVAL; 240 } else { 241 if (!access_ok(uiov, sizeof(*uiov))) 242 return -EFAULT; 243 if (__get_user(clen, &uiov->iov_len)) 244 return -EFAULT; 245 if (clen < 0) 246 return -EINVAL; 247 sr->len = clen; 248 } 249 250 return 0; 251 } 252 253 ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, 254 nr_segs, &iov, &iomsg->msg.msg_iter, true); 255 if (unlikely(ret < 0)) 256 return ret; 257 258 return io_net_vec_assign(req, iomsg, iov); 259 } 260 #endif 261 262 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 263 struct user_msghdr *msg, int ddir) 264 { 265 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 266 struct user_msghdr __user *umsg = sr->umsg; 267 struct iovec *iov; 268 int ret, nr_segs; 269 270 if (iomsg->free_iov) { 271 nr_segs = iomsg->free_iov_nr; 272 iov = iomsg->free_iov; 273 } else { 274 iov = &iomsg->fast_iov; 275 nr_segs = 1; 276 } 277 278 if (!user_access_begin(umsg, sizeof(*umsg))) 279 return -EFAULT; 280 281 ret = -EFAULT; 282 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end); 283 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end); 284 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end); 285 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end); 286 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end); 287 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end); 288 msg->msg_flags = 0; 289 290 if (req->flags & REQ_F_BUFFER_SELECT) { 291 if (msg->msg_iovlen == 0) { 292 sr->len = iov->iov_len = 0; 293 iov->iov_base = NULL; 294 } else if (msg->msg_iovlen > 1) { 295 ret = -EINVAL; 296 goto ua_end; 297 } else { 298 /* we only need the length for provided buffers */ 299 if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) 300 goto ua_end; 301 unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, 302 ua_end); 303 sr->len = iov->iov_len; 304 } 305 ret = 0; 306 ua_end: 307 user_access_end(); 308 return ret; 309 } 310 311 user_access_end(); 312 ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs, 313 &iov, &iomsg->msg.msg_iter, false); 314 if (unlikely(ret < 0)) 315 return ret; 316 317 return io_net_vec_assign(req, iomsg, iov); 318 } 319 320 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 321 struct io_async_msghdr *iomsg) 322 { 323 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 324 struct user_msghdr msg; 325 int ret; 326 327 iomsg->msg.msg_name = &iomsg->addr; 328 iomsg->msg.msg_iter.nr_segs = 0; 329 330 #ifdef CONFIG_COMPAT 331 if (unlikely(req->ctx->compat)) { 332 struct compat_msghdr cmsg; 333 334 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); 335 if (unlikely(ret)) 336 return ret; 337 338 return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); 339 } 340 #endif 341 342 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); 343 if (unlikely(ret)) 344 return ret; 345 346 ret = __copy_msghdr(&iomsg->msg, &msg, NULL); 347 348 /* save msg_control as sys_sendmsg() overwrites it */ 349 sr->msg_control = iomsg->msg.msg_control_user; 350 return ret; 351 } 352 353 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 354 { 355 struct io_async_msghdr *io = req->async_data; 356 357 io_netmsg_iovec_free(io); 358 } 359 360 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 361 { 362 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 363 struct io_async_msghdr *kmsg = req->async_data; 364 void __user *addr; 365 u16 addr_len; 366 int ret; 367 368 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 369 370 if (READ_ONCE(sqe->__pad3[0])) 371 return -EINVAL; 372 373 kmsg->msg.msg_name = NULL; 374 kmsg->msg.msg_namelen = 0; 375 kmsg->msg.msg_control = NULL; 376 kmsg->msg.msg_controllen = 0; 377 kmsg->msg.msg_ubuf = NULL; 378 379 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 380 addr_len = READ_ONCE(sqe->addr_len); 381 if (addr) { 382 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr); 383 if (unlikely(ret < 0)) 384 return ret; 385 kmsg->msg.msg_name = &kmsg->addr; 386 kmsg->msg.msg_namelen = addr_len; 387 } 388 if (!io_do_buffer_select(req)) { 389 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 390 &kmsg->msg.msg_iter); 391 if (unlikely(ret < 0)) 392 return ret; 393 } 394 return 0; 395 } 396 397 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 398 { 399 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 400 struct io_async_msghdr *kmsg = req->async_data; 401 int ret; 402 403 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 404 405 ret = io_sendmsg_copy_hdr(req, kmsg); 406 if (!ret) 407 req->flags |= REQ_F_NEED_CLEANUP; 408 return ret; 409 } 410 411 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) 412 413 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 414 { 415 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 416 417 sr->done_io = 0; 418 419 if (req->opcode != IORING_OP_SEND) { 420 if (sqe->addr2 || sqe->file_index) 421 return -EINVAL; 422 } 423 424 sr->len = READ_ONCE(sqe->len); 425 sr->flags = READ_ONCE(sqe->ioprio); 426 if (sr->flags & ~SENDMSG_FLAGS) 427 return -EINVAL; 428 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 429 if (sr->msg_flags & MSG_DONTWAIT) 430 req->flags |= REQ_F_NOWAIT; 431 if (sr->flags & IORING_RECVSEND_BUNDLE) { 432 if (req->opcode == IORING_OP_SENDMSG) 433 return -EINVAL; 434 if (!(req->flags & REQ_F_BUFFER_SELECT)) 435 return -EINVAL; 436 sr->msg_flags |= MSG_WAITALL; 437 sr->buf_group = req->buf_index; 438 req->buf_list = NULL; 439 } 440 441 #ifdef CONFIG_COMPAT 442 if (req->ctx->compat) 443 sr->msg_flags |= MSG_CMSG_COMPAT; 444 #endif 445 if (unlikely(!io_msg_alloc_async(req))) 446 return -ENOMEM; 447 if (req->opcode != IORING_OP_SENDMSG) 448 return io_send_setup(req, sqe); 449 return io_sendmsg_setup(req, sqe); 450 } 451 452 static void io_req_msg_cleanup(struct io_kiocb *req, 453 unsigned int issue_flags) 454 { 455 req->flags &= ~REQ_F_NEED_CLEANUP; 456 io_netmsg_recycle(req, issue_flags); 457 } 458 459 /* 460 * For bundle completions, we need to figure out how many segments we consumed. 461 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 462 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 463 * the segments, then it's a trivial questiont o answer. If we have residual 464 * data in the iter, then loop the segments to figure out how much we 465 * transferred. 466 */ 467 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 468 { 469 struct iovec *iov; 470 int nbufs; 471 472 /* no data is always zero segments, and a ubuf is always 1 segment */ 473 if (ret <= 0) 474 return 0; 475 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 476 return 1; 477 478 iov = kmsg->free_iov; 479 if (!iov) 480 iov = &kmsg->fast_iov; 481 482 /* if all data was transferred, it's basic pointer math */ 483 if (!iov_iter_count(&kmsg->msg.msg_iter)) 484 return iter_iov(&kmsg->msg.msg_iter) - iov; 485 486 /* short transfer, count segments */ 487 nbufs = 0; 488 do { 489 int this_len = min_t(int, iov[nbufs].iov_len, ret); 490 491 nbufs++; 492 ret -= this_len; 493 } while (ret); 494 495 return nbufs; 496 } 497 498 static inline bool io_send_finish(struct io_kiocb *req, int *ret, 499 struct io_async_msghdr *kmsg, 500 unsigned issue_flags) 501 { 502 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 503 bool bundle_finished = *ret <= 0; 504 unsigned int cflags; 505 506 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 507 cflags = io_put_kbuf(req, *ret, issue_flags); 508 goto finish; 509 } 510 511 cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags); 512 513 if (bundle_finished || req->flags & REQ_F_BL_EMPTY) 514 goto finish; 515 516 /* 517 * Fill CQE for this receive and see if we should keep trying to 518 * receive from this socket. 519 */ 520 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 521 io_mshot_prep_retry(req, kmsg); 522 return false; 523 } 524 525 /* Otherwise stop bundle and use the current result. */ 526 finish: 527 io_req_set_res(req, *ret, cflags); 528 *ret = IOU_OK; 529 return true; 530 } 531 532 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 533 { 534 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 535 struct io_async_msghdr *kmsg = req->async_data; 536 struct socket *sock; 537 unsigned flags; 538 int min_ret = 0; 539 int ret; 540 541 sock = sock_from_file(req->file); 542 if (unlikely(!sock)) 543 return -ENOTSOCK; 544 545 if (!(req->flags & REQ_F_POLLED) && 546 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 547 return -EAGAIN; 548 549 flags = sr->msg_flags; 550 if (issue_flags & IO_URING_F_NONBLOCK) 551 flags |= MSG_DONTWAIT; 552 if (flags & MSG_WAITALL) 553 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 554 555 kmsg->msg.msg_control_user = sr->msg_control; 556 557 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 558 559 if (ret < min_ret) { 560 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 561 return -EAGAIN; 562 if (ret > 0 && io_net_retry(sock, flags)) { 563 kmsg->msg.msg_controllen = 0; 564 kmsg->msg.msg_control = NULL; 565 sr->done_io += ret; 566 req->flags |= REQ_F_BL_NO_RECYCLE; 567 return -EAGAIN; 568 } 569 if (ret == -ERESTARTSYS) 570 ret = -EINTR; 571 req_set_fail(req); 572 } 573 io_req_msg_cleanup(req, issue_flags); 574 if (ret >= 0) 575 ret += sr->done_io; 576 else if (sr->done_io) 577 ret = sr->done_io; 578 io_req_set_res(req, ret, 0); 579 return IOU_OK; 580 } 581 582 int io_send(struct io_kiocb *req, unsigned int issue_flags) 583 { 584 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 585 struct io_async_msghdr *kmsg = req->async_data; 586 struct socket *sock; 587 unsigned flags; 588 int min_ret = 0; 589 int ret; 590 591 sock = sock_from_file(req->file); 592 if (unlikely(!sock)) 593 return -ENOTSOCK; 594 595 if (!(req->flags & REQ_F_POLLED) && 596 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 597 return -EAGAIN; 598 599 flags = sr->msg_flags; 600 if (issue_flags & IO_URING_F_NONBLOCK) 601 flags |= MSG_DONTWAIT; 602 603 retry_bundle: 604 if (io_do_buffer_select(req)) { 605 struct buf_sel_arg arg = { 606 .iovs = &kmsg->fast_iov, 607 .max_len = min_not_zero(sr->len, INT_MAX), 608 .nr_iovs = 1, 609 }; 610 611 if (kmsg->free_iov) { 612 arg.nr_iovs = kmsg->free_iov_nr; 613 arg.iovs = kmsg->free_iov; 614 arg.mode = KBUF_MODE_FREE; 615 } 616 617 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 618 arg.nr_iovs = 1; 619 else 620 arg.mode |= KBUF_MODE_EXPAND; 621 622 ret = io_buffers_select(req, &arg, issue_flags); 623 if (unlikely(ret < 0)) 624 return ret; 625 626 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 627 kmsg->free_iov_nr = ret; 628 kmsg->free_iov = arg.iovs; 629 req->flags |= REQ_F_NEED_CLEANUP; 630 } 631 sr->len = arg.out_len; 632 633 if (ret == 1) { 634 sr->buf = arg.iovs[0].iov_base; 635 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 636 &kmsg->msg.msg_iter); 637 if (unlikely(ret)) 638 return ret; 639 } else { 640 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 641 arg.iovs, ret, arg.out_len); 642 } 643 } 644 645 /* 646 * If MSG_WAITALL is set, or this is a bundle send, then we need 647 * the full amount. If just bundle is set, if we do a short send 648 * then we complete the bundle sequence rather than continue on. 649 */ 650 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 651 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 652 653 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 654 kmsg->msg.msg_flags = flags; 655 ret = sock_sendmsg(sock, &kmsg->msg); 656 if (ret < min_ret) { 657 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 658 return -EAGAIN; 659 660 if (ret > 0 && io_net_retry(sock, flags)) { 661 sr->len -= ret; 662 sr->buf += ret; 663 sr->done_io += ret; 664 req->flags |= REQ_F_BL_NO_RECYCLE; 665 return -EAGAIN; 666 } 667 if (ret == -ERESTARTSYS) 668 ret = -EINTR; 669 req_set_fail(req); 670 } 671 if (ret >= 0) 672 ret += sr->done_io; 673 else if (sr->done_io) 674 ret = sr->done_io; 675 676 if (!io_send_finish(req, &ret, kmsg, issue_flags)) 677 goto retry_bundle; 678 679 io_req_msg_cleanup(req, issue_flags); 680 return ret; 681 } 682 683 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 684 struct io_async_msghdr *iomsg, 685 int namelen, size_t controllen) 686 { 687 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 688 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 689 int hdr; 690 691 if (unlikely(namelen < 0)) 692 return -EOVERFLOW; 693 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 694 namelen, &hdr)) 695 return -EOVERFLOW; 696 if (check_add_overflow(hdr, controllen, &hdr)) 697 return -EOVERFLOW; 698 699 iomsg->namelen = namelen; 700 iomsg->controllen = controllen; 701 return 0; 702 } 703 704 return 0; 705 } 706 707 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 708 struct io_async_msghdr *iomsg) 709 { 710 struct user_msghdr msg; 711 int ret; 712 713 iomsg->msg.msg_name = &iomsg->addr; 714 iomsg->msg.msg_iter.nr_segs = 0; 715 716 #ifdef CONFIG_COMPAT 717 if (unlikely(req->ctx->compat)) { 718 struct compat_msghdr cmsg; 719 720 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); 721 if (unlikely(ret)) 722 return ret; 723 724 ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); 725 if (unlikely(ret)) 726 return ret; 727 728 return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, 729 cmsg.msg_controllen); 730 } 731 #endif 732 733 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); 734 if (unlikely(ret)) 735 return ret; 736 737 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 738 if (unlikely(ret)) 739 return ret; 740 741 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 742 msg.msg_controllen); 743 } 744 745 static int io_recvmsg_prep_setup(struct io_kiocb *req) 746 { 747 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 748 struct io_async_msghdr *kmsg; 749 int ret; 750 751 kmsg = io_msg_alloc_async(req); 752 if (unlikely(!kmsg)) 753 return -ENOMEM; 754 755 if (req->opcode == IORING_OP_RECV) { 756 kmsg->msg.msg_name = NULL; 757 kmsg->msg.msg_namelen = 0; 758 kmsg->msg.msg_inq = 0; 759 kmsg->msg.msg_control = NULL; 760 kmsg->msg.msg_get_inq = 1; 761 kmsg->msg.msg_controllen = 0; 762 kmsg->msg.msg_iocb = NULL; 763 kmsg->msg.msg_ubuf = NULL; 764 765 if (!io_do_buffer_select(req)) { 766 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 767 &kmsg->msg.msg_iter); 768 if (unlikely(ret)) 769 return ret; 770 } 771 return 0; 772 } 773 774 ret = io_recvmsg_copy_hdr(req, kmsg); 775 if (!ret) 776 req->flags |= REQ_F_NEED_CLEANUP; 777 return ret; 778 } 779 780 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 781 IORING_RECVSEND_BUNDLE) 782 783 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 784 { 785 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 786 787 sr->done_io = 0; 788 789 if (unlikely(sqe->file_index || sqe->addr2)) 790 return -EINVAL; 791 792 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 793 sr->len = READ_ONCE(sqe->len); 794 sr->flags = READ_ONCE(sqe->ioprio); 795 if (sr->flags & ~RECVMSG_FLAGS) 796 return -EINVAL; 797 sr->msg_flags = READ_ONCE(sqe->msg_flags); 798 if (sr->msg_flags & MSG_DONTWAIT) 799 req->flags |= REQ_F_NOWAIT; 800 if (sr->msg_flags & MSG_ERRQUEUE) 801 req->flags |= REQ_F_CLEAR_POLLIN; 802 if (req->flags & REQ_F_BUFFER_SELECT) { 803 /* 804 * Store the buffer group for this multishot receive separately, 805 * as if we end up doing an io-wq based issue that selects a 806 * buffer, it has to be committed immediately and that will 807 * clear ->buf_list. This means we lose the link to the buffer 808 * list, and the eventual buffer put on completion then cannot 809 * restore it. 810 */ 811 sr->buf_group = req->buf_index; 812 req->buf_list = NULL; 813 } 814 if (sr->flags & IORING_RECV_MULTISHOT) { 815 if (!(req->flags & REQ_F_BUFFER_SELECT)) 816 return -EINVAL; 817 if (sr->msg_flags & MSG_WAITALL) 818 return -EINVAL; 819 if (req->opcode == IORING_OP_RECV && sr->len) 820 return -EINVAL; 821 req->flags |= REQ_F_APOLL_MULTISHOT; 822 } 823 if (sr->flags & IORING_RECVSEND_BUNDLE) { 824 if (req->opcode == IORING_OP_RECVMSG) 825 return -EINVAL; 826 } 827 828 #ifdef CONFIG_COMPAT 829 if (req->ctx->compat) 830 sr->msg_flags |= MSG_CMSG_COMPAT; 831 #endif 832 sr->nr_multishot_loops = 0; 833 return io_recvmsg_prep_setup(req); 834 } 835 836 /* 837 * Finishes io_recv and io_recvmsg. 838 * 839 * Returns true if it is actually finished, or false if it should run 840 * again (for multishot). 841 */ 842 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 843 struct io_async_msghdr *kmsg, 844 bool mshot_finished, unsigned issue_flags) 845 { 846 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 847 unsigned int cflags = 0; 848 849 if (kmsg->msg.msg_inq > 0) 850 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 851 852 if (sr->flags & IORING_RECVSEND_BUNDLE) { 853 cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), 854 issue_flags); 855 /* bundle with no more immediate buffers, we're done */ 856 if (req->flags & REQ_F_BL_EMPTY) 857 goto finish; 858 } else { 859 cflags |= io_put_kbuf(req, *ret, issue_flags); 860 } 861 862 /* 863 * Fill CQE for this receive and see if we should keep trying to 864 * receive from this socket. 865 */ 866 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 867 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 868 int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; 869 870 io_mshot_prep_retry(req, kmsg); 871 /* Known not-empty or unknown state, retry */ 872 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 873 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) 874 return false; 875 /* mshot retries exceeded, force a requeue */ 876 sr->nr_multishot_loops = 0; 877 mshot_retry_ret = IOU_REQUEUE; 878 } 879 if (issue_flags & IO_URING_F_MULTISHOT) 880 *ret = mshot_retry_ret; 881 else 882 *ret = -EAGAIN; 883 return true; 884 } 885 886 /* Finish the request / stop multishot. */ 887 finish: 888 io_req_set_res(req, *ret, cflags); 889 890 if (issue_flags & IO_URING_F_MULTISHOT) 891 *ret = IOU_STOP_MULTISHOT; 892 else 893 *ret = IOU_OK; 894 io_req_msg_cleanup(req, issue_flags); 895 return true; 896 } 897 898 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 899 struct io_sr_msg *sr, void __user **buf, 900 size_t *len) 901 { 902 unsigned long ubuf = (unsigned long) *buf; 903 unsigned long hdr; 904 905 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 906 kmsg->controllen; 907 if (*len < hdr) 908 return -EFAULT; 909 910 if (kmsg->controllen) { 911 unsigned long control = ubuf + hdr - kmsg->controllen; 912 913 kmsg->msg.msg_control_user = (void __user *) control; 914 kmsg->msg.msg_controllen = kmsg->controllen; 915 } 916 917 sr->buf = *buf; /* stash for later copy */ 918 *buf = (void __user *) (ubuf + hdr); 919 kmsg->payloadlen = *len = *len - hdr; 920 return 0; 921 } 922 923 struct io_recvmsg_multishot_hdr { 924 struct io_uring_recvmsg_out msg; 925 struct sockaddr_storage addr; 926 }; 927 928 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 929 struct io_async_msghdr *kmsg, 930 unsigned int flags, bool *finished) 931 { 932 int err; 933 int copy_len; 934 struct io_recvmsg_multishot_hdr hdr; 935 936 if (kmsg->namelen) 937 kmsg->msg.msg_name = &hdr.addr; 938 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 939 kmsg->msg.msg_namelen = 0; 940 941 if (sock->file->f_flags & O_NONBLOCK) 942 flags |= MSG_DONTWAIT; 943 944 err = sock_recvmsg(sock, &kmsg->msg, flags); 945 *finished = err <= 0; 946 if (err < 0) 947 return err; 948 949 hdr.msg = (struct io_uring_recvmsg_out) { 950 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 951 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 952 }; 953 954 hdr.msg.payloadlen = err; 955 if (err > kmsg->payloadlen) 956 err = kmsg->payloadlen; 957 958 copy_len = sizeof(struct io_uring_recvmsg_out); 959 if (kmsg->msg.msg_namelen > kmsg->namelen) 960 copy_len += kmsg->namelen; 961 else 962 copy_len += kmsg->msg.msg_namelen; 963 964 /* 965 * "fromlen shall refer to the value before truncation.." 966 * 1003.1g 967 */ 968 hdr.msg.namelen = kmsg->msg.msg_namelen; 969 970 /* ensure that there is no gap between hdr and sockaddr_storage */ 971 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 972 sizeof(struct io_uring_recvmsg_out)); 973 if (copy_to_user(io->buf, &hdr, copy_len)) { 974 *finished = true; 975 return -EFAULT; 976 } 977 978 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 979 kmsg->controllen + err; 980 } 981 982 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 983 { 984 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 985 struct io_async_msghdr *kmsg = req->async_data; 986 struct socket *sock; 987 unsigned flags; 988 int ret, min_ret = 0; 989 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 990 bool mshot_finished = true; 991 992 sock = sock_from_file(req->file); 993 if (unlikely(!sock)) 994 return -ENOTSOCK; 995 996 if (!(req->flags & REQ_F_POLLED) && 997 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 998 return -EAGAIN; 999 1000 flags = sr->msg_flags; 1001 if (force_nonblock) 1002 flags |= MSG_DONTWAIT; 1003 1004 retry_multishot: 1005 if (io_do_buffer_select(req)) { 1006 void __user *buf; 1007 size_t len = sr->len; 1008 1009 buf = io_buffer_select(req, &len, issue_flags); 1010 if (!buf) 1011 return -ENOBUFS; 1012 1013 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1014 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 1015 if (ret) { 1016 io_kbuf_recycle(req, issue_flags); 1017 return ret; 1018 } 1019 } 1020 1021 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 1022 } 1023 1024 kmsg->msg.msg_get_inq = 1; 1025 kmsg->msg.msg_inq = -1; 1026 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1027 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1028 &mshot_finished); 1029 } else { 1030 /* disable partial retry for recvmsg with cmsg attached */ 1031 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1032 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1033 1034 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1035 kmsg->uaddr, flags); 1036 } 1037 1038 if (ret < min_ret) { 1039 if (ret == -EAGAIN && force_nonblock) { 1040 if (issue_flags & IO_URING_F_MULTISHOT) { 1041 io_kbuf_recycle(req, issue_flags); 1042 return IOU_ISSUE_SKIP_COMPLETE; 1043 } 1044 return -EAGAIN; 1045 } 1046 if (ret > 0 && io_net_retry(sock, flags)) { 1047 sr->done_io += ret; 1048 req->flags |= REQ_F_BL_NO_RECYCLE; 1049 return -EAGAIN; 1050 } 1051 if (ret == -ERESTARTSYS) 1052 ret = -EINTR; 1053 req_set_fail(req); 1054 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1055 req_set_fail(req); 1056 } 1057 1058 if (ret > 0) 1059 ret += sr->done_io; 1060 else if (sr->done_io) 1061 ret = sr->done_io; 1062 else 1063 io_kbuf_recycle(req, issue_flags); 1064 1065 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1066 goto retry_multishot; 1067 1068 return ret; 1069 } 1070 1071 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1072 size_t *len, unsigned int issue_flags) 1073 { 1074 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1075 int ret; 1076 1077 /* 1078 * If the ring isn't locked, then don't use the peek interface 1079 * to grab multiple buffers as we will lock/unlock between 1080 * this selection and posting the buffers. 1081 */ 1082 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1083 sr->flags & IORING_RECVSEND_BUNDLE) { 1084 struct buf_sel_arg arg = { 1085 .iovs = &kmsg->fast_iov, 1086 .nr_iovs = 1, 1087 .mode = KBUF_MODE_EXPAND, 1088 }; 1089 1090 if (kmsg->free_iov) { 1091 arg.nr_iovs = kmsg->free_iov_nr; 1092 arg.iovs = kmsg->free_iov; 1093 arg.mode |= KBUF_MODE_FREE; 1094 } 1095 1096 if (kmsg->msg.msg_inq > 0) 1097 arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq); 1098 1099 ret = io_buffers_peek(req, &arg); 1100 if (unlikely(ret < 0)) 1101 return ret; 1102 1103 /* special case 1 vec, can be a fast path */ 1104 if (ret == 1) { 1105 sr->buf = arg.iovs[0].iov_base; 1106 sr->len = arg.iovs[0].iov_len; 1107 goto map_ubuf; 1108 } 1109 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1110 arg.out_len); 1111 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 1112 kmsg->free_iov_nr = ret; 1113 kmsg->free_iov = arg.iovs; 1114 req->flags |= REQ_F_NEED_CLEANUP; 1115 } 1116 } else { 1117 void __user *buf; 1118 1119 *len = sr->len; 1120 buf = io_buffer_select(req, len, issue_flags); 1121 if (!buf) 1122 return -ENOBUFS; 1123 sr->buf = buf; 1124 sr->len = *len; 1125 map_ubuf: 1126 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1127 &kmsg->msg.msg_iter); 1128 if (unlikely(ret)) 1129 return ret; 1130 } 1131 1132 return 0; 1133 } 1134 1135 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1136 { 1137 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1138 struct io_async_msghdr *kmsg = req->async_data; 1139 struct socket *sock; 1140 unsigned flags; 1141 int ret, min_ret = 0; 1142 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1143 size_t len = sr->len; 1144 bool mshot_finished; 1145 1146 if (!(req->flags & REQ_F_POLLED) && 1147 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1148 return -EAGAIN; 1149 1150 sock = sock_from_file(req->file); 1151 if (unlikely(!sock)) 1152 return -ENOTSOCK; 1153 1154 flags = sr->msg_flags; 1155 if (force_nonblock) 1156 flags |= MSG_DONTWAIT; 1157 1158 retry_multishot: 1159 if (io_do_buffer_select(req)) { 1160 ret = io_recv_buf_select(req, kmsg, &len, issue_flags); 1161 if (unlikely(ret)) { 1162 kmsg->msg.msg_inq = -1; 1163 goto out_free; 1164 } 1165 sr->buf = NULL; 1166 } 1167 1168 kmsg->msg.msg_flags = 0; 1169 kmsg->msg.msg_inq = -1; 1170 1171 if (flags & MSG_WAITALL) 1172 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1173 1174 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1175 if (ret < min_ret) { 1176 if (ret == -EAGAIN && force_nonblock) { 1177 if (issue_flags & IO_URING_F_MULTISHOT) { 1178 io_kbuf_recycle(req, issue_flags); 1179 return IOU_ISSUE_SKIP_COMPLETE; 1180 } 1181 1182 return -EAGAIN; 1183 } 1184 if (ret > 0 && io_net_retry(sock, flags)) { 1185 sr->len -= ret; 1186 sr->buf += ret; 1187 sr->done_io += ret; 1188 req->flags |= REQ_F_BL_NO_RECYCLE; 1189 return -EAGAIN; 1190 } 1191 if (ret == -ERESTARTSYS) 1192 ret = -EINTR; 1193 req_set_fail(req); 1194 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1195 out_free: 1196 req_set_fail(req); 1197 } 1198 1199 mshot_finished = ret <= 0; 1200 if (ret > 0) 1201 ret += sr->done_io; 1202 else if (sr->done_io) 1203 ret = sr->done_io; 1204 else 1205 io_kbuf_recycle(req, issue_flags); 1206 1207 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1208 goto retry_multishot; 1209 1210 return ret; 1211 } 1212 1213 void io_send_zc_cleanup(struct io_kiocb *req) 1214 { 1215 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1216 struct io_async_msghdr *io = req->async_data; 1217 1218 if (req_has_async_data(req)) 1219 io_netmsg_iovec_free(io); 1220 if (zc->notif) { 1221 io_notif_flush(zc->notif); 1222 zc->notif = NULL; 1223 } 1224 } 1225 1226 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1227 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 1228 1229 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1230 { 1231 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1232 struct io_ring_ctx *ctx = req->ctx; 1233 struct io_kiocb *notif; 1234 1235 zc->done_io = 0; 1236 req->flags |= REQ_F_POLL_NO_LAZY; 1237 1238 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1239 return -EINVAL; 1240 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1241 if (req->flags & REQ_F_CQE_SKIP) 1242 return -EINVAL; 1243 1244 notif = zc->notif = io_alloc_notif(ctx); 1245 if (!notif) 1246 return -ENOMEM; 1247 notif->cqe.user_data = req->cqe.user_data; 1248 notif->cqe.res = 0; 1249 notif->cqe.flags = IORING_CQE_F_NOTIF; 1250 req->flags |= REQ_F_NEED_CLEANUP; 1251 1252 zc->flags = READ_ONCE(sqe->ioprio); 1253 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1254 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1255 return -EINVAL; 1256 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1257 struct io_notif_data *nd = io_notif_to_data(notif); 1258 1259 nd->zc_report = true; 1260 nd->zc_used = false; 1261 nd->zc_copied = false; 1262 } 1263 } 1264 1265 if (req->opcode != IORING_OP_SEND_ZC) { 1266 if (unlikely(sqe->addr2 || sqe->file_index)) 1267 return -EINVAL; 1268 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 1269 return -EINVAL; 1270 } 1271 1272 zc->len = READ_ONCE(sqe->len); 1273 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1274 zc->buf_index = READ_ONCE(sqe->buf_index); 1275 if (zc->msg_flags & MSG_DONTWAIT) 1276 req->flags |= REQ_F_NOWAIT; 1277 1278 #ifdef CONFIG_COMPAT 1279 if (req->ctx->compat) 1280 zc->msg_flags |= MSG_CMSG_COMPAT; 1281 #endif 1282 if (unlikely(!io_msg_alloc_async(req))) 1283 return -ENOMEM; 1284 if (req->opcode != IORING_OP_SENDMSG_ZC) 1285 return io_send_setup(req, sqe); 1286 return io_sendmsg_setup(req, sqe); 1287 } 1288 1289 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1290 struct iov_iter *from, size_t length) 1291 { 1292 skb_zcopy_downgrade_managed(skb); 1293 return zerocopy_fill_skb_from_iter(skb, from, length); 1294 } 1295 1296 static int io_sg_from_iter(struct sk_buff *skb, 1297 struct iov_iter *from, size_t length) 1298 { 1299 struct skb_shared_info *shinfo = skb_shinfo(skb); 1300 int frag = shinfo->nr_frags; 1301 int ret = 0; 1302 struct bvec_iter bi; 1303 ssize_t copied = 0; 1304 unsigned long truesize = 0; 1305 1306 if (!frag) 1307 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1308 else if (unlikely(!skb_zcopy_managed(skb))) 1309 return zerocopy_fill_skb_from_iter(skb, from, length); 1310 1311 bi.bi_size = min(from->count, length); 1312 bi.bi_bvec_done = from->iov_offset; 1313 bi.bi_idx = 0; 1314 1315 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1316 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1317 1318 copied += v.bv_len; 1319 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1320 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1321 v.bv_offset, v.bv_len); 1322 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1323 } 1324 if (bi.bi_size) 1325 ret = -EMSGSIZE; 1326 1327 shinfo->nr_frags = frag; 1328 from->bvec += bi.bi_idx; 1329 from->nr_segs -= bi.bi_idx; 1330 from->count -= copied; 1331 from->iov_offset = bi.bi_bvec_done; 1332 1333 skb->data_len += copied; 1334 skb->len += copied; 1335 skb->truesize += truesize; 1336 return ret; 1337 } 1338 1339 static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) 1340 { 1341 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1342 struct io_async_msghdr *kmsg = req->async_data; 1343 int ret; 1344 1345 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 1346 struct io_ring_ctx *ctx = req->ctx; 1347 struct io_rsrc_node *node; 1348 1349 ret = -EFAULT; 1350 io_ring_submit_lock(ctx, issue_flags); 1351 node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index); 1352 if (node) { 1353 io_req_assign_buf_node(sr->notif, node); 1354 ret = 0; 1355 } 1356 io_ring_submit_unlock(ctx, issue_flags); 1357 1358 if (unlikely(ret)) 1359 return ret; 1360 1361 ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter, 1362 node->buf, (u64)(uintptr_t)sr->buf, 1363 sr->len); 1364 if (unlikely(ret)) 1365 return ret; 1366 kmsg->msg.sg_from_iter = io_sg_from_iter; 1367 } else { 1368 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 1369 if (unlikely(ret)) 1370 return ret; 1371 ret = io_notif_account_mem(sr->notif, sr->len); 1372 if (unlikely(ret)) 1373 return ret; 1374 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1375 } 1376 1377 return ret; 1378 } 1379 1380 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1381 { 1382 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1383 struct io_async_msghdr *kmsg = req->async_data; 1384 struct socket *sock; 1385 unsigned msg_flags; 1386 int ret, min_ret = 0; 1387 1388 sock = sock_from_file(req->file); 1389 if (unlikely(!sock)) 1390 return -ENOTSOCK; 1391 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1392 return -EOPNOTSUPP; 1393 1394 if (!(req->flags & REQ_F_POLLED) && 1395 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1396 return -EAGAIN; 1397 1398 if (!zc->done_io) { 1399 ret = io_send_zc_import(req, issue_flags); 1400 if (unlikely(ret)) 1401 return ret; 1402 } 1403 1404 msg_flags = zc->msg_flags; 1405 if (issue_flags & IO_URING_F_NONBLOCK) 1406 msg_flags |= MSG_DONTWAIT; 1407 if (msg_flags & MSG_WAITALL) 1408 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1409 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1410 1411 kmsg->msg.msg_flags = msg_flags; 1412 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1413 ret = sock_sendmsg(sock, &kmsg->msg); 1414 1415 if (unlikely(ret < min_ret)) { 1416 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1417 return -EAGAIN; 1418 1419 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { 1420 zc->len -= ret; 1421 zc->buf += ret; 1422 zc->done_io += ret; 1423 req->flags |= REQ_F_BL_NO_RECYCLE; 1424 return -EAGAIN; 1425 } 1426 if (ret == -ERESTARTSYS) 1427 ret = -EINTR; 1428 req_set_fail(req); 1429 } 1430 1431 if (ret >= 0) 1432 ret += zc->done_io; 1433 else if (zc->done_io) 1434 ret = zc->done_io; 1435 1436 /* 1437 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1438 * flushing notif to io_send_zc_cleanup() 1439 */ 1440 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1441 io_notif_flush(zc->notif); 1442 io_req_msg_cleanup(req, 0); 1443 } 1444 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1445 return IOU_OK; 1446 } 1447 1448 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1449 { 1450 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1451 struct io_async_msghdr *kmsg = req->async_data; 1452 struct socket *sock; 1453 unsigned flags; 1454 int ret, min_ret = 0; 1455 1456 sock = sock_from_file(req->file); 1457 if (unlikely(!sock)) 1458 return -ENOTSOCK; 1459 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1460 return -EOPNOTSUPP; 1461 1462 if (!(req->flags & REQ_F_POLLED) && 1463 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1464 return -EAGAIN; 1465 1466 flags = sr->msg_flags; 1467 if (issue_flags & IO_URING_F_NONBLOCK) 1468 flags |= MSG_DONTWAIT; 1469 if (flags & MSG_WAITALL) 1470 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1471 1472 kmsg->msg.msg_control_user = sr->msg_control; 1473 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1474 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1475 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1476 1477 if (unlikely(ret < min_ret)) { 1478 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1479 return -EAGAIN; 1480 1481 if (ret > 0 && io_net_retry(sock, flags)) { 1482 sr->done_io += ret; 1483 req->flags |= REQ_F_BL_NO_RECYCLE; 1484 return -EAGAIN; 1485 } 1486 if (ret == -ERESTARTSYS) 1487 ret = -EINTR; 1488 req_set_fail(req); 1489 } 1490 1491 if (ret >= 0) 1492 ret += sr->done_io; 1493 else if (sr->done_io) 1494 ret = sr->done_io; 1495 1496 /* 1497 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1498 * flushing notif to io_send_zc_cleanup() 1499 */ 1500 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1501 io_notif_flush(sr->notif); 1502 io_req_msg_cleanup(req, 0); 1503 } 1504 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1505 return IOU_OK; 1506 } 1507 1508 void io_sendrecv_fail(struct io_kiocb *req) 1509 { 1510 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1511 1512 if (sr->done_io) 1513 req->cqe.res = sr->done_io; 1514 1515 if ((req->flags & REQ_F_NEED_CLEANUP) && 1516 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1517 req->cqe.flags |= IORING_CQE_F_MORE; 1518 } 1519 1520 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1521 IORING_ACCEPT_POLL_FIRST) 1522 1523 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1524 { 1525 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1526 1527 if (sqe->len || sqe->buf_index) 1528 return -EINVAL; 1529 1530 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1531 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1532 accept->flags = READ_ONCE(sqe->accept_flags); 1533 accept->nofile = rlimit(RLIMIT_NOFILE); 1534 accept->iou_flags = READ_ONCE(sqe->ioprio); 1535 if (accept->iou_flags & ~ACCEPT_FLAGS) 1536 return -EINVAL; 1537 1538 accept->file_slot = READ_ONCE(sqe->file_index); 1539 if (accept->file_slot) { 1540 if (accept->flags & SOCK_CLOEXEC) 1541 return -EINVAL; 1542 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1543 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1544 return -EINVAL; 1545 } 1546 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1547 return -EINVAL; 1548 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1549 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1550 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1551 req->flags |= REQ_F_APOLL_MULTISHOT; 1552 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1553 req->flags |= REQ_F_NOWAIT; 1554 return 0; 1555 } 1556 1557 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1558 { 1559 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1560 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1561 bool fixed = !!accept->file_slot; 1562 struct proto_accept_arg arg = { 1563 .flags = force_nonblock ? O_NONBLOCK : 0, 1564 }; 1565 struct file *file; 1566 unsigned cflags; 1567 int ret, fd; 1568 1569 if (!(req->flags & REQ_F_POLLED) && 1570 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1571 return -EAGAIN; 1572 1573 retry: 1574 if (!fixed) { 1575 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1576 if (unlikely(fd < 0)) 1577 return fd; 1578 } 1579 arg.err = 0; 1580 arg.is_empty = -1; 1581 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1582 accept->flags); 1583 if (IS_ERR(file)) { 1584 if (!fixed) 1585 put_unused_fd(fd); 1586 ret = PTR_ERR(file); 1587 if (ret == -EAGAIN && force_nonblock && 1588 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) { 1589 /* 1590 * if it's multishot and polled, we don't need to 1591 * return EAGAIN to arm the poll infra since it 1592 * has already been done 1593 */ 1594 if (issue_flags & IO_URING_F_MULTISHOT) 1595 return IOU_ISSUE_SKIP_COMPLETE; 1596 return ret; 1597 } 1598 if (ret == -ERESTARTSYS) 1599 ret = -EINTR; 1600 req_set_fail(req); 1601 } else if (!fixed) { 1602 fd_install(fd, file); 1603 ret = fd; 1604 } else { 1605 ret = io_fixed_fd_install(req, issue_flags, file, 1606 accept->file_slot); 1607 } 1608 1609 cflags = 0; 1610 if (!arg.is_empty) 1611 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1612 1613 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1614 io_req_set_res(req, ret, cflags); 1615 return IOU_OK; 1616 } 1617 1618 if (ret < 0) 1619 return ret; 1620 if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1621 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1622 goto retry; 1623 if (issue_flags & IO_URING_F_MULTISHOT) 1624 return IOU_ISSUE_SKIP_COMPLETE; 1625 return -EAGAIN; 1626 } 1627 1628 io_req_set_res(req, ret, cflags); 1629 return IOU_STOP_MULTISHOT; 1630 } 1631 1632 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1633 { 1634 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1635 1636 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1637 return -EINVAL; 1638 1639 sock->domain = READ_ONCE(sqe->fd); 1640 sock->type = READ_ONCE(sqe->off); 1641 sock->protocol = READ_ONCE(sqe->len); 1642 sock->file_slot = READ_ONCE(sqe->file_index); 1643 sock->nofile = rlimit(RLIMIT_NOFILE); 1644 1645 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1646 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1647 return -EINVAL; 1648 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1649 return -EINVAL; 1650 return 0; 1651 } 1652 1653 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1654 { 1655 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1656 bool fixed = !!sock->file_slot; 1657 struct file *file; 1658 int ret, fd; 1659 1660 if (!fixed) { 1661 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1662 if (unlikely(fd < 0)) 1663 return fd; 1664 } 1665 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1666 if (IS_ERR(file)) { 1667 if (!fixed) 1668 put_unused_fd(fd); 1669 ret = PTR_ERR(file); 1670 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1671 return -EAGAIN; 1672 if (ret == -ERESTARTSYS) 1673 ret = -EINTR; 1674 req_set_fail(req); 1675 } else if (!fixed) { 1676 fd_install(fd, file); 1677 ret = fd; 1678 } else { 1679 ret = io_fixed_fd_install(req, issue_flags, file, 1680 sock->file_slot); 1681 } 1682 io_req_set_res(req, ret, 0); 1683 return IOU_OK; 1684 } 1685 1686 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1687 { 1688 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1689 struct io_async_msghdr *io; 1690 1691 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1692 return -EINVAL; 1693 1694 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1695 conn->addr_len = READ_ONCE(sqe->addr2); 1696 conn->in_progress = conn->seen_econnaborted = false; 1697 1698 io = io_msg_alloc_async(req); 1699 if (unlikely(!io)) 1700 return -ENOMEM; 1701 1702 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1703 } 1704 1705 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1706 { 1707 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1708 struct io_async_msghdr *io = req->async_data; 1709 unsigned file_flags; 1710 int ret; 1711 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1712 1713 file_flags = force_nonblock ? O_NONBLOCK : 0; 1714 1715 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1716 file_flags); 1717 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1718 && force_nonblock) { 1719 if (ret == -EINPROGRESS) { 1720 connect->in_progress = true; 1721 } else if (ret == -ECONNABORTED) { 1722 if (connect->seen_econnaborted) 1723 goto out; 1724 connect->seen_econnaborted = true; 1725 } 1726 return -EAGAIN; 1727 } 1728 if (connect->in_progress) { 1729 /* 1730 * At least bluetooth will return -EBADFD on a re-connect 1731 * attempt, and it's (supposedly) also valid to get -EISCONN 1732 * which means the previous result is good. For both of these, 1733 * grab the sock_error() and use that for the completion. 1734 */ 1735 if (ret == -EBADFD || ret == -EISCONN) 1736 ret = sock_error(sock_from_file(req->file)->sk); 1737 } 1738 if (ret == -ERESTARTSYS) 1739 ret = -EINTR; 1740 out: 1741 if (ret < 0) 1742 req_set_fail(req); 1743 io_req_msg_cleanup(req, issue_flags); 1744 io_req_set_res(req, ret, 0); 1745 return IOU_OK; 1746 } 1747 1748 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1749 { 1750 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1751 struct sockaddr __user *uaddr; 1752 struct io_async_msghdr *io; 1753 1754 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1755 return -EINVAL; 1756 1757 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1758 bind->addr_len = READ_ONCE(sqe->addr2); 1759 1760 io = io_msg_alloc_async(req); 1761 if (unlikely(!io)) 1762 return -ENOMEM; 1763 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1764 } 1765 1766 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1767 { 1768 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1769 struct io_async_msghdr *io = req->async_data; 1770 struct socket *sock; 1771 int ret; 1772 1773 sock = sock_from_file(req->file); 1774 if (unlikely(!sock)) 1775 return -ENOTSOCK; 1776 1777 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1778 if (ret < 0) 1779 req_set_fail(req); 1780 io_req_set_res(req, ret, 0); 1781 return 0; 1782 } 1783 1784 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1785 { 1786 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1787 1788 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1789 return -EINVAL; 1790 1791 listen->backlog = READ_ONCE(sqe->len); 1792 return 0; 1793 } 1794 1795 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1796 { 1797 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1798 struct socket *sock; 1799 int ret; 1800 1801 sock = sock_from_file(req->file); 1802 if (unlikely(!sock)) 1803 return -ENOTSOCK; 1804 1805 ret = __sys_listen_socket(sock, listen->backlog); 1806 if (ret < 0) 1807 req_set_fail(req); 1808 io_req_set_res(req, ret, 0); 1809 return 0; 1810 } 1811 1812 void io_netmsg_cache_free(const void *entry) 1813 { 1814 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1815 1816 if (kmsg->free_iov) { 1817 kasan_mempool_unpoison_object(kmsg->free_iov, 1818 kmsg->free_iov_nr * sizeof(struct iovec)); 1819 io_netmsg_iovec_free(kmsg); 1820 } 1821 kfree(kmsg); 1822 } 1823 #endif 1824