1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 int iou_flags; 32 u32 file_slot; 33 unsigned long nofile; 34 }; 35 36 struct io_socket { 37 struct file *file; 38 int domain; 39 int type; 40 int protocol; 41 int flags; 42 u32 file_slot; 43 unsigned long nofile; 44 }; 45 46 struct io_connect { 47 struct file *file; 48 struct sockaddr __user *addr; 49 int addr_len; 50 bool in_progress; 51 bool seen_econnaborted; 52 }; 53 54 struct io_bind { 55 struct file *file; 56 int addr_len; 57 }; 58 59 struct io_listen { 60 struct file *file; 61 int backlog; 62 }; 63 64 struct io_sr_msg { 65 struct file *file; 66 union { 67 struct compat_msghdr __user *umsg_compat; 68 struct user_msghdr __user *umsg; 69 void __user *buf; 70 }; 71 int len; 72 unsigned done_io; 73 unsigned msg_flags; 74 unsigned nr_multishot_loops; 75 u16 flags; 76 /* initialised and used only by !msg send variants */ 77 u16 buf_group; 78 u16 buf_index; 79 void __user *msg_control; 80 /* used only for send zerocopy */ 81 struct io_kiocb *notif; 82 }; 83 84 /* 85 * Number of times we'll try and do receives if there's more data. If we 86 * exceed this limit, then add us to the back of the queue and retry from 87 * there. This helps fairness between flooding clients. 88 */ 89 #define MULTISHOT_MAX_RETRY 32 90 91 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 92 { 93 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 94 95 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 96 sqe->buf_index || sqe->splice_fd_in)) 97 return -EINVAL; 98 99 shutdown->how = READ_ONCE(sqe->len); 100 req->flags |= REQ_F_FORCE_ASYNC; 101 return 0; 102 } 103 104 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 105 { 106 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 107 struct socket *sock; 108 int ret; 109 110 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 111 112 sock = sock_from_file(req->file); 113 if (unlikely(!sock)) 114 return -ENOTSOCK; 115 116 ret = __sys_shutdown_sock(sock, shutdown->how); 117 io_req_set_res(req, ret, 0); 118 return IOU_OK; 119 } 120 121 static bool io_net_retry(struct socket *sock, int flags) 122 { 123 if (!(flags & MSG_WAITALL)) 124 return false; 125 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 126 } 127 128 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 129 { 130 if (kmsg->free_iov) { 131 kfree(kmsg->free_iov); 132 kmsg->free_iov_nr = 0; 133 kmsg->free_iov = NULL; 134 } 135 } 136 137 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 138 { 139 struct io_async_msghdr *hdr = req->async_data; 140 struct iovec *iov; 141 142 /* can't recycle, ensure we free the iovec if we have one */ 143 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 144 io_netmsg_iovec_free(hdr); 145 return; 146 } 147 148 /* Let normal cleanup path reap it if we fail adding to the cache */ 149 iov = hdr->free_iov; 150 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { 151 if (iov) 152 kasan_mempool_poison_object(iov); 153 req->async_data = NULL; 154 req->flags &= ~REQ_F_ASYNC_DATA; 155 } 156 } 157 158 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 159 { 160 struct io_ring_ctx *ctx = req->ctx; 161 struct io_async_msghdr *hdr; 162 163 hdr = io_alloc_cache_get(&ctx->netmsg_cache); 164 if (hdr) { 165 if (hdr->free_iov) { 166 kasan_mempool_unpoison_object(hdr->free_iov, 167 hdr->free_iov_nr * sizeof(struct iovec)); 168 req->flags |= REQ_F_NEED_CLEANUP; 169 } 170 req->flags |= REQ_F_ASYNC_DATA; 171 req->async_data = hdr; 172 return hdr; 173 } 174 175 if (!io_alloc_async_data(req)) { 176 hdr = req->async_data; 177 hdr->free_iov_nr = 0; 178 hdr->free_iov = NULL; 179 return hdr; 180 } 181 return NULL; 182 } 183 184 /* assign new iovec to kmsg, if we need to */ 185 static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, 186 struct iovec *iov) 187 { 188 if (iov) { 189 req->flags |= REQ_F_NEED_CLEANUP; 190 kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs; 191 if (kmsg->free_iov) 192 kfree(kmsg->free_iov); 193 kmsg->free_iov = iov; 194 } 195 return 0; 196 } 197 198 static inline void io_mshot_prep_retry(struct io_kiocb *req, 199 struct io_async_msghdr *kmsg) 200 { 201 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 202 203 req->flags &= ~REQ_F_BL_EMPTY; 204 sr->done_io = 0; 205 sr->len = 0; /* get from the provided buffer */ 206 req->buf_index = sr->buf_group; 207 } 208 209 #ifdef CONFIG_COMPAT 210 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 211 struct io_async_msghdr *iomsg, 212 struct compat_msghdr *msg, int ddir) 213 { 214 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 215 struct compat_iovec __user *uiov; 216 struct iovec *iov; 217 int ret, nr_segs; 218 219 if (iomsg->free_iov) { 220 nr_segs = iomsg->free_iov_nr; 221 iov = iomsg->free_iov; 222 } else { 223 iov = &iomsg->fast_iov; 224 nr_segs = 1; 225 } 226 227 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 228 return -EFAULT; 229 230 uiov = compat_ptr(msg->msg_iov); 231 if (req->flags & REQ_F_BUFFER_SELECT) { 232 compat_ssize_t clen; 233 234 if (msg->msg_iovlen == 0) { 235 sr->len = iov->iov_len = 0; 236 iov->iov_base = NULL; 237 } else if (msg->msg_iovlen > 1) { 238 return -EINVAL; 239 } else { 240 if (!access_ok(uiov, sizeof(*uiov))) 241 return -EFAULT; 242 if (__get_user(clen, &uiov->iov_len)) 243 return -EFAULT; 244 if (clen < 0) 245 return -EINVAL; 246 sr->len = clen; 247 } 248 249 return 0; 250 } 251 252 ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, 253 nr_segs, &iov, &iomsg->msg.msg_iter, true); 254 if (unlikely(ret < 0)) 255 return ret; 256 257 return io_net_vec_assign(req, iomsg, iov); 258 } 259 #endif 260 261 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 262 struct user_msghdr *msg, int ddir) 263 { 264 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 265 struct user_msghdr __user *umsg = sr->umsg; 266 struct iovec *iov; 267 int ret, nr_segs; 268 269 if (iomsg->free_iov) { 270 nr_segs = iomsg->free_iov_nr; 271 iov = iomsg->free_iov; 272 } else { 273 iov = &iomsg->fast_iov; 274 nr_segs = 1; 275 } 276 277 if (!user_access_begin(umsg, sizeof(*umsg))) 278 return -EFAULT; 279 280 ret = -EFAULT; 281 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end); 282 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end); 283 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end); 284 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end); 285 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end); 286 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end); 287 msg->msg_flags = 0; 288 289 if (req->flags & REQ_F_BUFFER_SELECT) { 290 if (msg->msg_iovlen == 0) { 291 sr->len = iov->iov_len = 0; 292 iov->iov_base = NULL; 293 } else if (msg->msg_iovlen > 1) { 294 ret = -EINVAL; 295 goto ua_end; 296 } else { 297 /* we only need the length for provided buffers */ 298 if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) 299 goto ua_end; 300 unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, 301 ua_end); 302 sr->len = iov->iov_len; 303 } 304 ret = 0; 305 ua_end: 306 user_access_end(); 307 return ret; 308 } 309 310 user_access_end(); 311 ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs, 312 &iov, &iomsg->msg.msg_iter, false); 313 if (unlikely(ret < 0)) 314 return ret; 315 316 return io_net_vec_assign(req, iomsg, iov); 317 } 318 319 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 320 struct io_async_msghdr *iomsg) 321 { 322 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 323 struct user_msghdr msg; 324 int ret; 325 326 iomsg->msg.msg_name = &iomsg->addr; 327 iomsg->msg.msg_iter.nr_segs = 0; 328 329 #ifdef CONFIG_COMPAT 330 if (unlikely(req->ctx->compat)) { 331 struct compat_msghdr cmsg; 332 333 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); 334 if (unlikely(ret)) 335 return ret; 336 337 return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); 338 } 339 #endif 340 341 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); 342 if (unlikely(ret)) 343 return ret; 344 345 ret = __copy_msghdr(&iomsg->msg, &msg, NULL); 346 347 /* save msg_control as sys_sendmsg() overwrites it */ 348 sr->msg_control = iomsg->msg.msg_control_user; 349 return ret; 350 } 351 352 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 353 { 354 struct io_async_msghdr *io = req->async_data; 355 356 io_netmsg_iovec_free(io); 357 } 358 359 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 360 { 361 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 362 struct io_async_msghdr *kmsg = req->async_data; 363 void __user *addr; 364 u16 addr_len; 365 int ret; 366 367 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 368 369 if (READ_ONCE(sqe->__pad3[0])) 370 return -EINVAL; 371 372 kmsg->msg.msg_name = NULL; 373 kmsg->msg.msg_namelen = 0; 374 kmsg->msg.msg_control = NULL; 375 kmsg->msg.msg_controllen = 0; 376 kmsg->msg.msg_ubuf = NULL; 377 378 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 379 addr_len = READ_ONCE(sqe->addr_len); 380 if (addr) { 381 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr); 382 if (unlikely(ret < 0)) 383 return ret; 384 kmsg->msg.msg_name = &kmsg->addr; 385 kmsg->msg.msg_namelen = addr_len; 386 } 387 if (!io_do_buffer_select(req)) { 388 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 389 &kmsg->msg.msg_iter); 390 if (unlikely(ret < 0)) 391 return ret; 392 } 393 return 0; 394 } 395 396 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 397 { 398 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 399 struct io_async_msghdr *kmsg = req->async_data; 400 int ret; 401 402 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 403 404 ret = io_sendmsg_copy_hdr(req, kmsg); 405 if (!ret) 406 req->flags |= REQ_F_NEED_CLEANUP; 407 return ret; 408 } 409 410 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) 411 412 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 413 { 414 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 415 416 sr->done_io = 0; 417 418 if (req->opcode != IORING_OP_SEND) { 419 if (sqe->addr2 || sqe->file_index) 420 return -EINVAL; 421 } 422 423 sr->len = READ_ONCE(sqe->len); 424 sr->flags = READ_ONCE(sqe->ioprio); 425 if (sr->flags & ~SENDMSG_FLAGS) 426 return -EINVAL; 427 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 428 if (sr->msg_flags & MSG_DONTWAIT) 429 req->flags |= REQ_F_NOWAIT; 430 if (sr->flags & IORING_RECVSEND_BUNDLE) { 431 if (req->opcode == IORING_OP_SENDMSG) 432 return -EINVAL; 433 if (!(req->flags & REQ_F_BUFFER_SELECT)) 434 return -EINVAL; 435 sr->msg_flags |= MSG_WAITALL; 436 sr->buf_group = req->buf_index; 437 req->buf_list = NULL; 438 } 439 440 #ifdef CONFIG_COMPAT 441 if (req->ctx->compat) 442 sr->msg_flags |= MSG_CMSG_COMPAT; 443 #endif 444 if (unlikely(!io_msg_alloc_async(req))) 445 return -ENOMEM; 446 if (req->opcode != IORING_OP_SENDMSG) 447 return io_send_setup(req, sqe); 448 return io_sendmsg_setup(req, sqe); 449 } 450 451 static void io_req_msg_cleanup(struct io_kiocb *req, 452 unsigned int issue_flags) 453 { 454 req->flags &= ~REQ_F_NEED_CLEANUP; 455 io_netmsg_recycle(req, issue_flags); 456 } 457 458 /* 459 * For bundle completions, we need to figure out how many segments we consumed. 460 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 461 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 462 * the segments, then it's a trivial questiont o answer. If we have residual 463 * data in the iter, then loop the segments to figure out how much we 464 * transferred. 465 */ 466 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 467 { 468 struct iovec *iov; 469 int nbufs; 470 471 /* no data is always zero segments, and a ubuf is always 1 segment */ 472 if (ret <= 0) 473 return 0; 474 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 475 return 1; 476 477 iov = kmsg->free_iov; 478 if (!iov) 479 iov = &kmsg->fast_iov; 480 481 /* if all data was transferred, it's basic pointer math */ 482 if (!iov_iter_count(&kmsg->msg.msg_iter)) 483 return iter_iov(&kmsg->msg.msg_iter) - iov; 484 485 /* short transfer, count segments */ 486 nbufs = 0; 487 do { 488 int this_len = min_t(int, iov[nbufs].iov_len, ret); 489 490 nbufs++; 491 ret -= this_len; 492 } while (ret); 493 494 return nbufs; 495 } 496 497 static inline bool io_send_finish(struct io_kiocb *req, int *ret, 498 struct io_async_msghdr *kmsg, 499 unsigned issue_flags) 500 { 501 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 502 bool bundle_finished = *ret <= 0; 503 unsigned int cflags; 504 505 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 506 cflags = io_put_kbuf(req, *ret, issue_flags); 507 goto finish; 508 } 509 510 cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags); 511 512 if (bundle_finished || req->flags & REQ_F_BL_EMPTY) 513 goto finish; 514 515 /* 516 * Fill CQE for this receive and see if we should keep trying to 517 * receive from this socket. 518 */ 519 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 520 io_mshot_prep_retry(req, kmsg); 521 return false; 522 } 523 524 /* Otherwise stop bundle and use the current result. */ 525 finish: 526 io_req_set_res(req, *ret, cflags); 527 *ret = IOU_OK; 528 return true; 529 } 530 531 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 532 { 533 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 534 struct io_async_msghdr *kmsg = req->async_data; 535 struct socket *sock; 536 unsigned flags; 537 int min_ret = 0; 538 int ret; 539 540 sock = sock_from_file(req->file); 541 if (unlikely(!sock)) 542 return -ENOTSOCK; 543 544 if (!(req->flags & REQ_F_POLLED) && 545 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 546 return -EAGAIN; 547 548 flags = sr->msg_flags; 549 if (issue_flags & IO_URING_F_NONBLOCK) 550 flags |= MSG_DONTWAIT; 551 if (flags & MSG_WAITALL) 552 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 553 554 kmsg->msg.msg_control_user = sr->msg_control; 555 556 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 557 558 if (ret < min_ret) { 559 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 560 return -EAGAIN; 561 if (ret > 0 && io_net_retry(sock, flags)) { 562 kmsg->msg.msg_controllen = 0; 563 kmsg->msg.msg_control = NULL; 564 sr->done_io += ret; 565 req->flags |= REQ_F_BL_NO_RECYCLE; 566 return -EAGAIN; 567 } 568 if (ret == -ERESTARTSYS) 569 ret = -EINTR; 570 req_set_fail(req); 571 } 572 io_req_msg_cleanup(req, issue_flags); 573 if (ret >= 0) 574 ret += sr->done_io; 575 else if (sr->done_io) 576 ret = sr->done_io; 577 io_req_set_res(req, ret, 0); 578 return IOU_OK; 579 } 580 581 int io_send(struct io_kiocb *req, unsigned int issue_flags) 582 { 583 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 584 struct io_async_msghdr *kmsg = req->async_data; 585 struct socket *sock; 586 unsigned flags; 587 int min_ret = 0; 588 int ret; 589 590 sock = sock_from_file(req->file); 591 if (unlikely(!sock)) 592 return -ENOTSOCK; 593 594 if (!(req->flags & REQ_F_POLLED) && 595 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 596 return -EAGAIN; 597 598 flags = sr->msg_flags; 599 if (issue_flags & IO_URING_F_NONBLOCK) 600 flags |= MSG_DONTWAIT; 601 602 retry_bundle: 603 if (io_do_buffer_select(req)) { 604 struct buf_sel_arg arg = { 605 .iovs = &kmsg->fast_iov, 606 .max_len = min_not_zero(sr->len, INT_MAX), 607 .nr_iovs = 1, 608 }; 609 610 if (kmsg->free_iov) { 611 arg.nr_iovs = kmsg->free_iov_nr; 612 arg.iovs = kmsg->free_iov; 613 arg.mode = KBUF_MODE_FREE; 614 } 615 616 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 617 arg.nr_iovs = 1; 618 else 619 arg.mode |= KBUF_MODE_EXPAND; 620 621 ret = io_buffers_select(req, &arg, issue_flags); 622 if (unlikely(ret < 0)) 623 return ret; 624 625 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 626 kmsg->free_iov_nr = ret; 627 kmsg->free_iov = arg.iovs; 628 req->flags |= REQ_F_NEED_CLEANUP; 629 } 630 sr->len = arg.out_len; 631 632 if (ret == 1) { 633 sr->buf = arg.iovs[0].iov_base; 634 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 635 &kmsg->msg.msg_iter); 636 if (unlikely(ret)) 637 return ret; 638 } else { 639 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 640 arg.iovs, ret, arg.out_len); 641 } 642 } 643 644 /* 645 * If MSG_WAITALL is set, or this is a bundle send, then we need 646 * the full amount. If just bundle is set, if we do a short send 647 * then we complete the bundle sequence rather than continue on. 648 */ 649 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 650 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 651 652 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 653 kmsg->msg.msg_flags = flags; 654 ret = sock_sendmsg(sock, &kmsg->msg); 655 if (ret < min_ret) { 656 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 657 return -EAGAIN; 658 659 if (ret > 0 && io_net_retry(sock, flags)) { 660 sr->len -= ret; 661 sr->buf += ret; 662 sr->done_io += ret; 663 req->flags |= REQ_F_BL_NO_RECYCLE; 664 return -EAGAIN; 665 } 666 if (ret == -ERESTARTSYS) 667 ret = -EINTR; 668 req_set_fail(req); 669 } 670 if (ret >= 0) 671 ret += sr->done_io; 672 else if (sr->done_io) 673 ret = sr->done_io; 674 675 if (!io_send_finish(req, &ret, kmsg, issue_flags)) 676 goto retry_bundle; 677 678 io_req_msg_cleanup(req, issue_flags); 679 return ret; 680 } 681 682 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 683 struct io_async_msghdr *iomsg, 684 int namelen, size_t controllen) 685 { 686 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 687 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 688 int hdr; 689 690 if (unlikely(namelen < 0)) 691 return -EOVERFLOW; 692 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 693 namelen, &hdr)) 694 return -EOVERFLOW; 695 if (check_add_overflow(hdr, controllen, &hdr)) 696 return -EOVERFLOW; 697 698 iomsg->namelen = namelen; 699 iomsg->controllen = controllen; 700 return 0; 701 } 702 703 return 0; 704 } 705 706 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 707 struct io_async_msghdr *iomsg) 708 { 709 struct user_msghdr msg; 710 int ret; 711 712 iomsg->msg.msg_name = &iomsg->addr; 713 iomsg->msg.msg_iter.nr_segs = 0; 714 715 #ifdef CONFIG_COMPAT 716 if (unlikely(req->ctx->compat)) { 717 struct compat_msghdr cmsg; 718 719 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); 720 if (unlikely(ret)) 721 return ret; 722 723 ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); 724 if (unlikely(ret)) 725 return ret; 726 727 return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, 728 cmsg.msg_controllen); 729 } 730 #endif 731 732 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); 733 if (unlikely(ret)) 734 return ret; 735 736 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 737 if (unlikely(ret)) 738 return ret; 739 740 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 741 msg.msg_controllen); 742 } 743 744 static int io_recvmsg_prep_setup(struct io_kiocb *req) 745 { 746 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 747 struct io_async_msghdr *kmsg; 748 int ret; 749 750 kmsg = io_msg_alloc_async(req); 751 if (unlikely(!kmsg)) 752 return -ENOMEM; 753 754 if (req->opcode == IORING_OP_RECV) { 755 kmsg->msg.msg_name = NULL; 756 kmsg->msg.msg_namelen = 0; 757 kmsg->msg.msg_inq = 0; 758 kmsg->msg.msg_control = NULL; 759 kmsg->msg.msg_get_inq = 1; 760 kmsg->msg.msg_controllen = 0; 761 kmsg->msg.msg_iocb = NULL; 762 kmsg->msg.msg_ubuf = NULL; 763 764 if (!io_do_buffer_select(req)) { 765 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 766 &kmsg->msg.msg_iter); 767 if (unlikely(ret)) 768 return ret; 769 } 770 return 0; 771 } 772 773 ret = io_recvmsg_copy_hdr(req, kmsg); 774 if (!ret) 775 req->flags |= REQ_F_NEED_CLEANUP; 776 return ret; 777 } 778 779 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 780 IORING_RECVSEND_BUNDLE) 781 782 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 783 { 784 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 785 786 sr->done_io = 0; 787 788 if (unlikely(sqe->file_index || sqe->addr2)) 789 return -EINVAL; 790 791 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 792 sr->len = READ_ONCE(sqe->len); 793 sr->flags = READ_ONCE(sqe->ioprio); 794 if (sr->flags & ~RECVMSG_FLAGS) 795 return -EINVAL; 796 sr->msg_flags = READ_ONCE(sqe->msg_flags); 797 if (sr->msg_flags & MSG_DONTWAIT) 798 req->flags |= REQ_F_NOWAIT; 799 if (sr->msg_flags & MSG_ERRQUEUE) 800 req->flags |= REQ_F_CLEAR_POLLIN; 801 if (req->flags & REQ_F_BUFFER_SELECT) { 802 /* 803 * Store the buffer group for this multishot receive separately, 804 * as if we end up doing an io-wq based issue that selects a 805 * buffer, it has to be committed immediately and that will 806 * clear ->buf_list. This means we lose the link to the buffer 807 * list, and the eventual buffer put on completion then cannot 808 * restore it. 809 */ 810 sr->buf_group = req->buf_index; 811 req->buf_list = NULL; 812 } 813 if (sr->flags & IORING_RECV_MULTISHOT) { 814 if (!(req->flags & REQ_F_BUFFER_SELECT)) 815 return -EINVAL; 816 if (sr->msg_flags & MSG_WAITALL) 817 return -EINVAL; 818 if (req->opcode == IORING_OP_RECV && sr->len) 819 return -EINVAL; 820 req->flags |= REQ_F_APOLL_MULTISHOT; 821 } 822 if (sr->flags & IORING_RECVSEND_BUNDLE) { 823 if (req->opcode == IORING_OP_RECVMSG) 824 return -EINVAL; 825 } 826 827 #ifdef CONFIG_COMPAT 828 if (req->ctx->compat) 829 sr->msg_flags |= MSG_CMSG_COMPAT; 830 #endif 831 sr->nr_multishot_loops = 0; 832 return io_recvmsg_prep_setup(req); 833 } 834 835 /* 836 * Finishes io_recv and io_recvmsg. 837 * 838 * Returns true if it is actually finished, or false if it should run 839 * again (for multishot). 840 */ 841 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 842 struct io_async_msghdr *kmsg, 843 bool mshot_finished, unsigned issue_flags) 844 { 845 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 846 unsigned int cflags = 0; 847 848 if (kmsg->msg.msg_inq > 0) 849 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 850 851 if (sr->flags & IORING_RECVSEND_BUNDLE) { 852 cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), 853 issue_flags); 854 /* bundle with no more immediate buffers, we're done */ 855 if (req->flags & REQ_F_BL_EMPTY) 856 goto finish; 857 } else { 858 cflags |= io_put_kbuf(req, *ret, issue_flags); 859 } 860 861 /* 862 * Fill CQE for this receive and see if we should keep trying to 863 * receive from this socket. 864 */ 865 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 866 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 867 int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; 868 869 io_mshot_prep_retry(req, kmsg); 870 /* Known not-empty or unknown state, retry */ 871 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 872 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) 873 return false; 874 /* mshot retries exceeded, force a requeue */ 875 sr->nr_multishot_loops = 0; 876 mshot_retry_ret = IOU_REQUEUE; 877 } 878 if (issue_flags & IO_URING_F_MULTISHOT) 879 *ret = mshot_retry_ret; 880 else 881 *ret = -EAGAIN; 882 return true; 883 } 884 885 /* Finish the request / stop multishot. */ 886 finish: 887 io_req_set_res(req, *ret, cflags); 888 889 if (issue_flags & IO_URING_F_MULTISHOT) 890 *ret = IOU_STOP_MULTISHOT; 891 else 892 *ret = IOU_OK; 893 io_req_msg_cleanup(req, issue_flags); 894 return true; 895 } 896 897 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 898 struct io_sr_msg *sr, void __user **buf, 899 size_t *len) 900 { 901 unsigned long ubuf = (unsigned long) *buf; 902 unsigned long hdr; 903 904 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 905 kmsg->controllen; 906 if (*len < hdr) 907 return -EFAULT; 908 909 if (kmsg->controllen) { 910 unsigned long control = ubuf + hdr - kmsg->controllen; 911 912 kmsg->msg.msg_control_user = (void __user *) control; 913 kmsg->msg.msg_controllen = kmsg->controllen; 914 } 915 916 sr->buf = *buf; /* stash for later copy */ 917 *buf = (void __user *) (ubuf + hdr); 918 kmsg->payloadlen = *len = *len - hdr; 919 return 0; 920 } 921 922 struct io_recvmsg_multishot_hdr { 923 struct io_uring_recvmsg_out msg; 924 struct sockaddr_storage addr; 925 }; 926 927 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 928 struct io_async_msghdr *kmsg, 929 unsigned int flags, bool *finished) 930 { 931 int err; 932 int copy_len; 933 struct io_recvmsg_multishot_hdr hdr; 934 935 if (kmsg->namelen) 936 kmsg->msg.msg_name = &hdr.addr; 937 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 938 kmsg->msg.msg_namelen = 0; 939 940 if (sock->file->f_flags & O_NONBLOCK) 941 flags |= MSG_DONTWAIT; 942 943 err = sock_recvmsg(sock, &kmsg->msg, flags); 944 *finished = err <= 0; 945 if (err < 0) 946 return err; 947 948 hdr.msg = (struct io_uring_recvmsg_out) { 949 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 950 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 951 }; 952 953 hdr.msg.payloadlen = err; 954 if (err > kmsg->payloadlen) 955 err = kmsg->payloadlen; 956 957 copy_len = sizeof(struct io_uring_recvmsg_out); 958 if (kmsg->msg.msg_namelen > kmsg->namelen) 959 copy_len += kmsg->namelen; 960 else 961 copy_len += kmsg->msg.msg_namelen; 962 963 /* 964 * "fromlen shall refer to the value before truncation.." 965 * 1003.1g 966 */ 967 hdr.msg.namelen = kmsg->msg.msg_namelen; 968 969 /* ensure that there is no gap between hdr and sockaddr_storage */ 970 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 971 sizeof(struct io_uring_recvmsg_out)); 972 if (copy_to_user(io->buf, &hdr, copy_len)) { 973 *finished = true; 974 return -EFAULT; 975 } 976 977 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 978 kmsg->controllen + err; 979 } 980 981 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 982 { 983 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 984 struct io_async_msghdr *kmsg = req->async_data; 985 struct socket *sock; 986 unsigned flags; 987 int ret, min_ret = 0; 988 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 989 bool mshot_finished = true; 990 991 sock = sock_from_file(req->file); 992 if (unlikely(!sock)) 993 return -ENOTSOCK; 994 995 if (!(req->flags & REQ_F_POLLED) && 996 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 997 return -EAGAIN; 998 999 flags = sr->msg_flags; 1000 if (force_nonblock) 1001 flags |= MSG_DONTWAIT; 1002 1003 retry_multishot: 1004 if (io_do_buffer_select(req)) { 1005 void __user *buf; 1006 size_t len = sr->len; 1007 1008 buf = io_buffer_select(req, &len, issue_flags); 1009 if (!buf) 1010 return -ENOBUFS; 1011 1012 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1013 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 1014 if (ret) { 1015 io_kbuf_recycle(req, issue_flags); 1016 return ret; 1017 } 1018 } 1019 1020 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 1021 } 1022 1023 kmsg->msg.msg_get_inq = 1; 1024 kmsg->msg.msg_inq = -1; 1025 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1026 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1027 &mshot_finished); 1028 } else { 1029 /* disable partial retry for recvmsg with cmsg attached */ 1030 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1031 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1032 1033 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1034 kmsg->uaddr, flags); 1035 } 1036 1037 if (ret < min_ret) { 1038 if (ret == -EAGAIN && force_nonblock) { 1039 if (issue_flags & IO_URING_F_MULTISHOT) { 1040 io_kbuf_recycle(req, issue_flags); 1041 return IOU_ISSUE_SKIP_COMPLETE; 1042 } 1043 return -EAGAIN; 1044 } 1045 if (ret > 0 && io_net_retry(sock, flags)) { 1046 sr->done_io += ret; 1047 req->flags |= REQ_F_BL_NO_RECYCLE; 1048 return -EAGAIN; 1049 } 1050 if (ret == -ERESTARTSYS) 1051 ret = -EINTR; 1052 req_set_fail(req); 1053 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1054 req_set_fail(req); 1055 } 1056 1057 if (ret > 0) 1058 ret += sr->done_io; 1059 else if (sr->done_io) 1060 ret = sr->done_io; 1061 else 1062 io_kbuf_recycle(req, issue_flags); 1063 1064 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1065 goto retry_multishot; 1066 1067 return ret; 1068 } 1069 1070 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1071 size_t *len, unsigned int issue_flags) 1072 { 1073 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1074 int ret; 1075 1076 /* 1077 * If the ring isn't locked, then don't use the peek interface 1078 * to grab multiple buffers as we will lock/unlock between 1079 * this selection and posting the buffers. 1080 */ 1081 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1082 sr->flags & IORING_RECVSEND_BUNDLE) { 1083 struct buf_sel_arg arg = { 1084 .iovs = &kmsg->fast_iov, 1085 .nr_iovs = 1, 1086 .mode = KBUF_MODE_EXPAND, 1087 }; 1088 1089 if (kmsg->free_iov) { 1090 arg.nr_iovs = kmsg->free_iov_nr; 1091 arg.iovs = kmsg->free_iov; 1092 arg.mode |= KBUF_MODE_FREE; 1093 } 1094 1095 if (kmsg->msg.msg_inq > 0) 1096 arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq); 1097 1098 ret = io_buffers_peek(req, &arg); 1099 if (unlikely(ret < 0)) 1100 return ret; 1101 1102 /* special case 1 vec, can be a fast path */ 1103 if (ret == 1) { 1104 sr->buf = arg.iovs[0].iov_base; 1105 sr->len = arg.iovs[0].iov_len; 1106 goto map_ubuf; 1107 } 1108 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1109 arg.out_len); 1110 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 1111 kmsg->free_iov_nr = ret; 1112 kmsg->free_iov = arg.iovs; 1113 req->flags |= REQ_F_NEED_CLEANUP; 1114 } 1115 } else { 1116 void __user *buf; 1117 1118 *len = sr->len; 1119 buf = io_buffer_select(req, len, issue_flags); 1120 if (!buf) 1121 return -ENOBUFS; 1122 sr->buf = buf; 1123 sr->len = *len; 1124 map_ubuf: 1125 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1126 &kmsg->msg.msg_iter); 1127 if (unlikely(ret)) 1128 return ret; 1129 } 1130 1131 return 0; 1132 } 1133 1134 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1135 { 1136 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1137 struct io_async_msghdr *kmsg = req->async_data; 1138 struct socket *sock; 1139 unsigned flags; 1140 int ret, min_ret = 0; 1141 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1142 size_t len = sr->len; 1143 bool mshot_finished; 1144 1145 if (!(req->flags & REQ_F_POLLED) && 1146 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1147 return -EAGAIN; 1148 1149 sock = sock_from_file(req->file); 1150 if (unlikely(!sock)) 1151 return -ENOTSOCK; 1152 1153 flags = sr->msg_flags; 1154 if (force_nonblock) 1155 flags |= MSG_DONTWAIT; 1156 1157 retry_multishot: 1158 if (io_do_buffer_select(req)) { 1159 ret = io_recv_buf_select(req, kmsg, &len, issue_flags); 1160 if (unlikely(ret)) { 1161 kmsg->msg.msg_inq = -1; 1162 goto out_free; 1163 } 1164 sr->buf = NULL; 1165 } 1166 1167 kmsg->msg.msg_flags = 0; 1168 kmsg->msg.msg_inq = -1; 1169 1170 if (flags & MSG_WAITALL) 1171 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1172 1173 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1174 if (ret < min_ret) { 1175 if (ret == -EAGAIN && force_nonblock) { 1176 if (issue_flags & IO_URING_F_MULTISHOT) { 1177 io_kbuf_recycle(req, issue_flags); 1178 return IOU_ISSUE_SKIP_COMPLETE; 1179 } 1180 1181 return -EAGAIN; 1182 } 1183 if (ret > 0 && io_net_retry(sock, flags)) { 1184 sr->len -= ret; 1185 sr->buf += ret; 1186 sr->done_io += ret; 1187 req->flags |= REQ_F_BL_NO_RECYCLE; 1188 return -EAGAIN; 1189 } 1190 if (ret == -ERESTARTSYS) 1191 ret = -EINTR; 1192 req_set_fail(req); 1193 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1194 out_free: 1195 req_set_fail(req); 1196 } 1197 1198 mshot_finished = ret <= 0; 1199 if (ret > 0) 1200 ret += sr->done_io; 1201 else if (sr->done_io) 1202 ret = sr->done_io; 1203 else 1204 io_kbuf_recycle(req, issue_flags); 1205 1206 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1207 goto retry_multishot; 1208 1209 return ret; 1210 } 1211 1212 void io_send_zc_cleanup(struct io_kiocb *req) 1213 { 1214 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1215 struct io_async_msghdr *io = req->async_data; 1216 1217 if (req_has_async_data(req)) 1218 io_netmsg_iovec_free(io); 1219 if (zc->notif) { 1220 io_notif_flush(zc->notif); 1221 zc->notif = NULL; 1222 } 1223 } 1224 1225 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1226 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 1227 1228 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1229 { 1230 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1231 struct io_ring_ctx *ctx = req->ctx; 1232 struct io_kiocb *notif; 1233 1234 zc->done_io = 0; 1235 req->flags |= REQ_F_POLL_NO_LAZY; 1236 1237 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1238 return -EINVAL; 1239 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1240 if (req->flags & REQ_F_CQE_SKIP) 1241 return -EINVAL; 1242 1243 notif = zc->notif = io_alloc_notif(ctx); 1244 if (!notif) 1245 return -ENOMEM; 1246 notif->cqe.user_data = req->cqe.user_data; 1247 notif->cqe.res = 0; 1248 notif->cqe.flags = IORING_CQE_F_NOTIF; 1249 req->flags |= REQ_F_NEED_CLEANUP; 1250 1251 zc->flags = READ_ONCE(sqe->ioprio); 1252 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1253 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1254 return -EINVAL; 1255 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1256 struct io_notif_data *nd = io_notif_to_data(notif); 1257 1258 nd->zc_report = true; 1259 nd->zc_used = false; 1260 nd->zc_copied = false; 1261 } 1262 } 1263 1264 if (req->opcode != IORING_OP_SEND_ZC) { 1265 if (unlikely(sqe->addr2 || sqe->file_index)) 1266 return -EINVAL; 1267 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 1268 return -EINVAL; 1269 } 1270 1271 zc->len = READ_ONCE(sqe->len); 1272 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1273 zc->buf_index = READ_ONCE(sqe->buf_index); 1274 if (zc->msg_flags & MSG_DONTWAIT) 1275 req->flags |= REQ_F_NOWAIT; 1276 1277 #ifdef CONFIG_COMPAT 1278 if (req->ctx->compat) 1279 zc->msg_flags |= MSG_CMSG_COMPAT; 1280 #endif 1281 if (unlikely(!io_msg_alloc_async(req))) 1282 return -ENOMEM; 1283 if (req->opcode != IORING_OP_SENDMSG_ZC) 1284 return io_send_setup(req, sqe); 1285 return io_sendmsg_setup(req, sqe); 1286 } 1287 1288 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1289 struct iov_iter *from, size_t length) 1290 { 1291 skb_zcopy_downgrade_managed(skb); 1292 return zerocopy_fill_skb_from_iter(skb, from, length); 1293 } 1294 1295 static int io_sg_from_iter(struct sk_buff *skb, 1296 struct iov_iter *from, size_t length) 1297 { 1298 struct skb_shared_info *shinfo = skb_shinfo(skb); 1299 int frag = shinfo->nr_frags; 1300 int ret = 0; 1301 struct bvec_iter bi; 1302 ssize_t copied = 0; 1303 unsigned long truesize = 0; 1304 1305 if (!frag) 1306 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1307 else if (unlikely(!skb_zcopy_managed(skb))) 1308 return zerocopy_fill_skb_from_iter(skb, from, length); 1309 1310 bi.bi_size = min(from->count, length); 1311 bi.bi_bvec_done = from->iov_offset; 1312 bi.bi_idx = 0; 1313 1314 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1315 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1316 1317 copied += v.bv_len; 1318 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1319 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1320 v.bv_offset, v.bv_len); 1321 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1322 } 1323 if (bi.bi_size) 1324 ret = -EMSGSIZE; 1325 1326 shinfo->nr_frags = frag; 1327 from->bvec += bi.bi_idx; 1328 from->nr_segs -= bi.bi_idx; 1329 from->count -= copied; 1330 from->iov_offset = bi.bi_bvec_done; 1331 1332 skb->data_len += copied; 1333 skb->len += copied; 1334 skb->truesize += truesize; 1335 return ret; 1336 } 1337 1338 static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) 1339 { 1340 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1341 struct io_async_msghdr *kmsg = req->async_data; 1342 int ret; 1343 1344 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 1345 struct io_ring_ctx *ctx = req->ctx; 1346 struct io_rsrc_node *node; 1347 1348 ret = -EFAULT; 1349 io_ring_submit_lock(ctx, issue_flags); 1350 node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index); 1351 if (node) { 1352 io_req_assign_buf_node(sr->notif, node); 1353 ret = 0; 1354 } 1355 io_ring_submit_unlock(ctx, issue_flags); 1356 1357 if (unlikely(ret)) 1358 return ret; 1359 1360 ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter, 1361 node->buf, (u64)(uintptr_t)sr->buf, 1362 sr->len); 1363 if (unlikely(ret)) 1364 return ret; 1365 kmsg->msg.sg_from_iter = io_sg_from_iter; 1366 } else { 1367 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 1368 if (unlikely(ret)) 1369 return ret; 1370 ret = io_notif_account_mem(sr->notif, sr->len); 1371 if (unlikely(ret)) 1372 return ret; 1373 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1374 } 1375 1376 return ret; 1377 } 1378 1379 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1380 { 1381 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1382 struct io_async_msghdr *kmsg = req->async_data; 1383 struct socket *sock; 1384 unsigned msg_flags; 1385 int ret, min_ret = 0; 1386 1387 sock = sock_from_file(req->file); 1388 if (unlikely(!sock)) 1389 return -ENOTSOCK; 1390 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1391 return -EOPNOTSUPP; 1392 1393 if (!(req->flags & REQ_F_POLLED) && 1394 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1395 return -EAGAIN; 1396 1397 if (!zc->done_io) { 1398 ret = io_send_zc_import(req, issue_flags); 1399 if (unlikely(ret)) 1400 return ret; 1401 } 1402 1403 msg_flags = zc->msg_flags; 1404 if (issue_flags & IO_URING_F_NONBLOCK) 1405 msg_flags |= MSG_DONTWAIT; 1406 if (msg_flags & MSG_WAITALL) 1407 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1408 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1409 1410 kmsg->msg.msg_flags = msg_flags; 1411 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1412 ret = sock_sendmsg(sock, &kmsg->msg); 1413 1414 if (unlikely(ret < min_ret)) { 1415 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1416 return -EAGAIN; 1417 1418 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { 1419 zc->len -= ret; 1420 zc->buf += ret; 1421 zc->done_io += ret; 1422 req->flags |= REQ_F_BL_NO_RECYCLE; 1423 return -EAGAIN; 1424 } 1425 if (ret == -ERESTARTSYS) 1426 ret = -EINTR; 1427 req_set_fail(req); 1428 } 1429 1430 if (ret >= 0) 1431 ret += zc->done_io; 1432 else if (zc->done_io) 1433 ret = zc->done_io; 1434 1435 /* 1436 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1437 * flushing notif to io_send_zc_cleanup() 1438 */ 1439 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1440 io_notif_flush(zc->notif); 1441 io_req_msg_cleanup(req, 0); 1442 } 1443 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1444 return IOU_OK; 1445 } 1446 1447 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1448 { 1449 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1450 struct io_async_msghdr *kmsg = req->async_data; 1451 struct socket *sock; 1452 unsigned flags; 1453 int ret, min_ret = 0; 1454 1455 sock = sock_from_file(req->file); 1456 if (unlikely(!sock)) 1457 return -ENOTSOCK; 1458 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1459 return -EOPNOTSUPP; 1460 1461 if (!(req->flags & REQ_F_POLLED) && 1462 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1463 return -EAGAIN; 1464 1465 flags = sr->msg_flags; 1466 if (issue_flags & IO_URING_F_NONBLOCK) 1467 flags |= MSG_DONTWAIT; 1468 if (flags & MSG_WAITALL) 1469 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1470 1471 kmsg->msg.msg_control_user = sr->msg_control; 1472 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1473 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1474 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1475 1476 if (unlikely(ret < min_ret)) { 1477 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1478 return -EAGAIN; 1479 1480 if (ret > 0 && io_net_retry(sock, flags)) { 1481 sr->done_io += ret; 1482 req->flags |= REQ_F_BL_NO_RECYCLE; 1483 return -EAGAIN; 1484 } 1485 if (ret == -ERESTARTSYS) 1486 ret = -EINTR; 1487 req_set_fail(req); 1488 } 1489 1490 if (ret >= 0) 1491 ret += sr->done_io; 1492 else if (sr->done_io) 1493 ret = sr->done_io; 1494 1495 /* 1496 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1497 * flushing notif to io_send_zc_cleanup() 1498 */ 1499 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1500 io_notif_flush(sr->notif); 1501 io_req_msg_cleanup(req, 0); 1502 } 1503 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1504 return IOU_OK; 1505 } 1506 1507 void io_sendrecv_fail(struct io_kiocb *req) 1508 { 1509 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1510 1511 if (sr->done_io) 1512 req->cqe.res = sr->done_io; 1513 1514 if ((req->flags & REQ_F_NEED_CLEANUP) && 1515 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1516 req->cqe.flags |= IORING_CQE_F_MORE; 1517 } 1518 1519 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1520 IORING_ACCEPT_POLL_FIRST) 1521 1522 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1523 { 1524 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1525 1526 if (sqe->len || sqe->buf_index) 1527 return -EINVAL; 1528 1529 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1530 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1531 accept->flags = READ_ONCE(sqe->accept_flags); 1532 accept->nofile = rlimit(RLIMIT_NOFILE); 1533 accept->iou_flags = READ_ONCE(sqe->ioprio); 1534 if (accept->iou_flags & ~ACCEPT_FLAGS) 1535 return -EINVAL; 1536 1537 accept->file_slot = READ_ONCE(sqe->file_index); 1538 if (accept->file_slot) { 1539 if (accept->flags & SOCK_CLOEXEC) 1540 return -EINVAL; 1541 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1542 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1543 return -EINVAL; 1544 } 1545 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1546 return -EINVAL; 1547 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1548 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1549 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1550 req->flags |= REQ_F_APOLL_MULTISHOT; 1551 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1552 req->flags |= REQ_F_NOWAIT; 1553 return 0; 1554 } 1555 1556 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1557 { 1558 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1559 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1560 bool fixed = !!accept->file_slot; 1561 struct proto_accept_arg arg = { 1562 .flags = force_nonblock ? O_NONBLOCK : 0, 1563 }; 1564 struct file *file; 1565 unsigned cflags; 1566 int ret, fd; 1567 1568 if (!(req->flags & REQ_F_POLLED) && 1569 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1570 return -EAGAIN; 1571 1572 retry: 1573 if (!fixed) { 1574 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1575 if (unlikely(fd < 0)) 1576 return fd; 1577 } 1578 arg.err = 0; 1579 arg.is_empty = -1; 1580 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1581 accept->flags); 1582 if (IS_ERR(file)) { 1583 if (!fixed) 1584 put_unused_fd(fd); 1585 ret = PTR_ERR(file); 1586 if (ret == -EAGAIN && force_nonblock && 1587 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) { 1588 /* 1589 * if it's multishot and polled, we don't need to 1590 * return EAGAIN to arm the poll infra since it 1591 * has already been done 1592 */ 1593 if (issue_flags & IO_URING_F_MULTISHOT) 1594 return IOU_ISSUE_SKIP_COMPLETE; 1595 return ret; 1596 } 1597 if (ret == -ERESTARTSYS) 1598 ret = -EINTR; 1599 req_set_fail(req); 1600 } else if (!fixed) { 1601 fd_install(fd, file); 1602 ret = fd; 1603 } else { 1604 ret = io_fixed_fd_install(req, issue_flags, file, 1605 accept->file_slot); 1606 } 1607 1608 cflags = 0; 1609 if (!arg.is_empty) 1610 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1611 1612 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1613 io_req_set_res(req, ret, cflags); 1614 return IOU_OK; 1615 } 1616 1617 if (ret < 0) 1618 return ret; 1619 if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1620 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1621 goto retry; 1622 if (issue_flags & IO_URING_F_MULTISHOT) 1623 return IOU_ISSUE_SKIP_COMPLETE; 1624 return -EAGAIN; 1625 } 1626 1627 io_req_set_res(req, ret, cflags); 1628 return IOU_STOP_MULTISHOT; 1629 } 1630 1631 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1632 { 1633 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1634 1635 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1636 return -EINVAL; 1637 1638 sock->domain = READ_ONCE(sqe->fd); 1639 sock->type = READ_ONCE(sqe->off); 1640 sock->protocol = READ_ONCE(sqe->len); 1641 sock->file_slot = READ_ONCE(sqe->file_index); 1642 sock->nofile = rlimit(RLIMIT_NOFILE); 1643 1644 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1645 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1646 return -EINVAL; 1647 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1648 return -EINVAL; 1649 return 0; 1650 } 1651 1652 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1653 { 1654 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1655 bool fixed = !!sock->file_slot; 1656 struct file *file; 1657 int ret, fd; 1658 1659 if (!fixed) { 1660 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1661 if (unlikely(fd < 0)) 1662 return fd; 1663 } 1664 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1665 if (IS_ERR(file)) { 1666 if (!fixed) 1667 put_unused_fd(fd); 1668 ret = PTR_ERR(file); 1669 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1670 return -EAGAIN; 1671 if (ret == -ERESTARTSYS) 1672 ret = -EINTR; 1673 req_set_fail(req); 1674 } else if (!fixed) { 1675 fd_install(fd, file); 1676 ret = fd; 1677 } else { 1678 ret = io_fixed_fd_install(req, issue_flags, file, 1679 sock->file_slot); 1680 } 1681 io_req_set_res(req, ret, 0); 1682 return IOU_OK; 1683 } 1684 1685 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1686 { 1687 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1688 struct io_async_msghdr *io; 1689 1690 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1691 return -EINVAL; 1692 1693 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1694 conn->addr_len = READ_ONCE(sqe->addr2); 1695 conn->in_progress = conn->seen_econnaborted = false; 1696 1697 io = io_msg_alloc_async(req); 1698 if (unlikely(!io)) 1699 return -ENOMEM; 1700 1701 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1702 } 1703 1704 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1705 { 1706 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1707 struct io_async_msghdr *io = req->async_data; 1708 unsigned file_flags; 1709 int ret; 1710 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1711 1712 file_flags = force_nonblock ? O_NONBLOCK : 0; 1713 1714 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1715 file_flags); 1716 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1717 && force_nonblock) { 1718 if (ret == -EINPROGRESS) { 1719 connect->in_progress = true; 1720 } else if (ret == -ECONNABORTED) { 1721 if (connect->seen_econnaborted) 1722 goto out; 1723 connect->seen_econnaborted = true; 1724 } 1725 return -EAGAIN; 1726 } 1727 if (connect->in_progress) { 1728 /* 1729 * At least bluetooth will return -EBADFD on a re-connect 1730 * attempt, and it's (supposedly) also valid to get -EISCONN 1731 * which means the previous result is good. For both of these, 1732 * grab the sock_error() and use that for the completion. 1733 */ 1734 if (ret == -EBADFD || ret == -EISCONN) 1735 ret = sock_error(sock_from_file(req->file)->sk); 1736 } 1737 if (ret == -ERESTARTSYS) 1738 ret = -EINTR; 1739 out: 1740 if (ret < 0) 1741 req_set_fail(req); 1742 io_req_msg_cleanup(req, issue_flags); 1743 io_req_set_res(req, ret, 0); 1744 return IOU_OK; 1745 } 1746 1747 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1748 { 1749 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1750 struct sockaddr __user *uaddr; 1751 struct io_async_msghdr *io; 1752 1753 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1754 return -EINVAL; 1755 1756 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1757 bind->addr_len = READ_ONCE(sqe->addr2); 1758 1759 io = io_msg_alloc_async(req); 1760 if (unlikely(!io)) 1761 return -ENOMEM; 1762 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1763 } 1764 1765 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1766 { 1767 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1768 struct io_async_msghdr *io = req->async_data; 1769 struct socket *sock; 1770 int ret; 1771 1772 sock = sock_from_file(req->file); 1773 if (unlikely(!sock)) 1774 return -ENOTSOCK; 1775 1776 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1777 if (ret < 0) 1778 req_set_fail(req); 1779 io_req_set_res(req, ret, 0); 1780 return 0; 1781 } 1782 1783 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1784 { 1785 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1786 1787 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1788 return -EINVAL; 1789 1790 listen->backlog = READ_ONCE(sqe->len); 1791 return 0; 1792 } 1793 1794 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1795 { 1796 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1797 struct socket *sock; 1798 int ret; 1799 1800 sock = sock_from_file(req->file); 1801 if (unlikely(!sock)) 1802 return -ENOTSOCK; 1803 1804 ret = __sys_listen_socket(sock, listen->backlog); 1805 if (ret < 0) 1806 req_set_fail(req); 1807 io_req_set_res(req, ret, 0); 1808 return 0; 1809 } 1810 1811 void io_netmsg_cache_free(const void *entry) 1812 { 1813 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1814 1815 if (kmsg->free_iov) { 1816 kasan_mempool_unpoison_object(kmsg->free_iov, 1817 kmsg->free_iov_nr * sizeof(struct iovec)); 1818 io_netmsg_iovec_free(kmsg); 1819 } 1820 kfree(kmsg); 1821 } 1822 #endif 1823