1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 #include "zcrx.h" 20 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 int iou_flags; 32 u32 file_slot; 33 unsigned long nofile; 34 }; 35 36 struct io_socket { 37 struct file *file; 38 int domain; 39 int type; 40 int protocol; 41 int flags; 42 u32 file_slot; 43 unsigned long nofile; 44 }; 45 46 struct io_connect { 47 struct file *file; 48 struct sockaddr __user *addr; 49 int addr_len; 50 bool in_progress; 51 bool seen_econnaborted; 52 }; 53 54 struct io_bind { 55 struct file *file; 56 int addr_len; 57 }; 58 59 struct io_listen { 60 struct file *file; 61 int backlog; 62 }; 63 64 struct io_sr_msg { 65 struct file *file; 66 union { 67 struct compat_msghdr __user *umsg_compat; 68 struct user_msghdr __user *umsg; 69 void __user *buf; 70 }; 71 int len; 72 unsigned done_io; 73 unsigned msg_flags; 74 unsigned nr_multishot_loops; 75 u16 flags; 76 /* initialised and used only by !msg send variants */ 77 u16 buf_group; 78 /* per-invocation mshot limit */ 79 unsigned mshot_len; 80 /* overall mshot byte limit */ 81 unsigned mshot_total_len; 82 void __user *msg_control; 83 /* used only for send zerocopy */ 84 struct io_kiocb *notif; 85 }; 86 87 /* 88 * The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold 89 * anyway. Use the upper 8 bits for internal uses. 90 */ 91 enum sr_retry_flags { 92 IORING_RECV_RETRY = (1U << 15), 93 IORING_RECV_PARTIAL_MAP = (1U << 14), 94 IORING_RECV_MSHOT_CAP = (1U << 13), 95 IORING_RECV_MSHOT_LIM = (1U << 12), 96 IORING_RECV_MSHOT_DONE = (1U << 11), 97 98 IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP, 99 IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP | 100 IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE, 101 }; 102 103 /* 104 * Number of times we'll try and do receives if there's more data. If we 105 * exceed this limit, then add us to the back of the queue and retry from 106 * there. This helps fairness between flooding clients. 107 */ 108 #define MULTISHOT_MAX_RETRY 32 109 110 struct io_recvzc { 111 struct file *file; 112 unsigned msg_flags; 113 u16 flags; 114 u32 len; 115 struct io_zcrx_ifq *ifq; 116 }; 117 118 static int io_sg_from_iter_iovec(struct sk_buff *skb, 119 struct iov_iter *from, size_t length); 120 static int io_sg_from_iter(struct sk_buff *skb, 121 struct iov_iter *from, size_t length); 122 123 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 124 { 125 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 126 127 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 128 sqe->buf_index || sqe->splice_fd_in)) 129 return -EINVAL; 130 131 shutdown->how = READ_ONCE(sqe->len); 132 req->flags |= REQ_F_FORCE_ASYNC; 133 return 0; 134 } 135 136 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 137 { 138 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 139 struct socket *sock; 140 int ret; 141 142 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 143 144 sock = sock_from_file(req->file); 145 if (unlikely(!sock)) 146 return -ENOTSOCK; 147 148 ret = __sys_shutdown_sock(sock, shutdown->how); 149 io_req_set_res(req, ret, 0); 150 return IOU_COMPLETE; 151 } 152 153 static bool io_net_retry(struct socket *sock, int flags) 154 { 155 if (!(flags & MSG_WAITALL)) 156 return false; 157 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 158 } 159 160 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 161 { 162 if (kmsg->vec.iovec) 163 io_vec_free(&kmsg->vec); 164 } 165 166 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 167 { 168 struct io_async_msghdr *hdr = req->async_data; 169 170 /* can't recycle, ensure we free the iovec if we have one */ 171 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 172 io_netmsg_iovec_free(hdr); 173 return; 174 } 175 176 /* Let normal cleanup path reap it if we fail adding to the cache */ 177 io_alloc_cache_vec_kasan(&hdr->vec); 178 if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP) 179 io_vec_free(&hdr->vec); 180 181 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { 182 req->async_data = NULL; 183 req->flags &= ~(REQ_F_ASYNC_DATA|REQ_F_NEED_CLEANUP); 184 } 185 } 186 187 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 188 { 189 struct io_ring_ctx *ctx = req->ctx; 190 struct io_async_msghdr *hdr; 191 192 hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); 193 if (!hdr) 194 return NULL; 195 196 /* If the async data was cached, we might have an iov cached inside. */ 197 if (hdr->vec.iovec) 198 req->flags |= REQ_F_NEED_CLEANUP; 199 return hdr; 200 } 201 202 static inline void io_mshot_prep_retry(struct io_kiocb *req, 203 struct io_async_msghdr *kmsg) 204 { 205 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 206 207 req->flags &= ~REQ_F_BL_EMPTY; 208 sr->done_io = 0; 209 sr->flags &= ~IORING_RECV_RETRY_CLEAR; 210 sr->len = sr->mshot_len; 211 } 212 213 static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg, 214 const struct iovec __user *uiov, unsigned uvec_seg, 215 int ddir) 216 { 217 struct iovec *iov; 218 int ret, nr_segs; 219 220 if (iomsg->vec.iovec) { 221 nr_segs = iomsg->vec.nr; 222 iov = iomsg->vec.iovec; 223 } else { 224 nr_segs = 1; 225 iov = &iomsg->fast_iov; 226 } 227 228 ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov, 229 &iomsg->msg.msg_iter, io_is_compat(req->ctx)); 230 if (unlikely(ret < 0)) 231 return ret; 232 233 if (iov) { 234 req->flags |= REQ_F_NEED_CLEANUP; 235 io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs); 236 } 237 return 0; 238 } 239 240 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 241 struct io_async_msghdr *iomsg, 242 struct compat_msghdr *msg, int ddir, 243 struct sockaddr __user **save_addr) 244 { 245 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 246 struct compat_iovec __user *uiov; 247 int ret; 248 249 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 250 return -EFAULT; 251 252 ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr); 253 if (ret) 254 return ret; 255 256 uiov = compat_ptr(msg->msg_iov); 257 if (req->flags & REQ_F_BUFFER_SELECT) { 258 if (msg->msg_iovlen == 0) { 259 sr->len = 0; 260 } else if (msg->msg_iovlen > 1) { 261 return -EINVAL; 262 } else { 263 struct compat_iovec tmp_iov; 264 265 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 266 return -EFAULT; 267 sr->len = tmp_iov.iov_len; 268 } 269 } 270 return 0; 271 } 272 273 static int io_copy_msghdr_from_user(struct user_msghdr *msg, 274 struct user_msghdr __user *umsg) 275 { 276 if (!user_access_begin(umsg, sizeof(*umsg))) 277 return -EFAULT; 278 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end); 279 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end); 280 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end); 281 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end); 282 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end); 283 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end); 284 user_access_end(); 285 return 0; 286 ua_end: 287 user_access_end(); 288 return -EFAULT; 289 } 290 291 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 292 struct user_msghdr *msg, int ddir, 293 struct sockaddr __user **save_addr) 294 { 295 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 296 struct user_msghdr __user *umsg = sr->umsg; 297 int ret; 298 299 iomsg->msg.msg_name = &iomsg->addr; 300 iomsg->msg.msg_iter.nr_segs = 0; 301 302 if (io_is_compat(req->ctx)) { 303 struct compat_msghdr cmsg; 304 305 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr); 306 if (ret) 307 return ret; 308 309 memset(msg, 0, sizeof(*msg)); 310 msg->msg_namelen = cmsg.msg_namelen; 311 msg->msg_controllen = cmsg.msg_controllen; 312 msg->msg_iov = compat_ptr(cmsg.msg_iov); 313 msg->msg_iovlen = cmsg.msg_iovlen; 314 return 0; 315 } 316 317 ret = io_copy_msghdr_from_user(msg, umsg); 318 if (unlikely(ret)) 319 return ret; 320 321 msg->msg_flags = 0; 322 323 ret = __copy_msghdr(&iomsg->msg, msg, save_addr); 324 if (ret) 325 return ret; 326 327 if (req->flags & REQ_F_BUFFER_SELECT) { 328 if (msg->msg_iovlen == 0) { 329 sr->len = 0; 330 } else if (msg->msg_iovlen > 1) { 331 return -EINVAL; 332 } else { 333 struct iovec __user *uiov = msg->msg_iov; 334 struct iovec tmp_iov; 335 336 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 337 return -EFAULT; 338 sr->len = tmp_iov.iov_len; 339 } 340 } 341 return 0; 342 } 343 344 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 345 { 346 struct io_async_msghdr *io = req->async_data; 347 348 io_netmsg_iovec_free(io); 349 } 350 351 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 352 { 353 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 354 struct io_async_msghdr *kmsg = req->async_data; 355 void __user *addr; 356 u16 addr_len; 357 int ret; 358 359 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 360 361 if (READ_ONCE(sqe->__pad3[0])) 362 return -EINVAL; 363 364 kmsg->msg.msg_name = NULL; 365 kmsg->msg.msg_namelen = 0; 366 kmsg->msg.msg_control = NULL; 367 kmsg->msg.msg_controllen = 0; 368 kmsg->msg.msg_ubuf = NULL; 369 370 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 371 addr_len = READ_ONCE(sqe->addr_len); 372 if (addr) { 373 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr); 374 if (unlikely(ret < 0)) 375 return ret; 376 kmsg->msg.msg_name = &kmsg->addr; 377 kmsg->msg.msg_namelen = addr_len; 378 } 379 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 380 req->flags |= REQ_F_IMPORT_BUFFER; 381 return 0; 382 } 383 if (req->flags & REQ_F_BUFFER_SELECT) 384 return 0; 385 return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 386 } 387 388 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 389 { 390 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 391 struct io_async_msghdr *kmsg = req->async_data; 392 struct user_msghdr msg; 393 int ret; 394 395 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 396 ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); 397 if (unlikely(ret)) 398 return ret; 399 /* save msg_control as sys_sendmsg() overwrites it */ 400 sr->msg_control = kmsg->msg.msg_control_user; 401 402 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 403 kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; 404 return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, 405 msg.msg_iovlen); 406 } 407 if (req->flags & REQ_F_BUFFER_SELECT) 408 return 0; 409 return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); 410 } 411 412 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) 413 414 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 415 { 416 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 417 418 sr->done_io = 0; 419 sr->len = READ_ONCE(sqe->len); 420 sr->flags = READ_ONCE(sqe->ioprio); 421 if (sr->flags & ~SENDMSG_FLAGS) 422 return -EINVAL; 423 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 424 if (sr->msg_flags & MSG_DONTWAIT) 425 req->flags |= REQ_F_NOWAIT; 426 if (req->flags & REQ_F_BUFFER_SELECT) 427 sr->buf_group = req->buf_index; 428 if (sr->flags & IORING_RECVSEND_BUNDLE) { 429 if (req->opcode == IORING_OP_SENDMSG) 430 return -EINVAL; 431 sr->msg_flags |= MSG_WAITALL; 432 req->buf_list = NULL; 433 req->flags |= REQ_F_MULTISHOT; 434 } 435 436 if (io_is_compat(req->ctx)) 437 sr->msg_flags |= MSG_CMSG_COMPAT; 438 439 if (unlikely(!io_msg_alloc_async(req))) 440 return -ENOMEM; 441 if (req->opcode != IORING_OP_SENDMSG) 442 return io_send_setup(req, sqe); 443 if (unlikely(sqe->addr2 || sqe->file_index)) 444 return -EINVAL; 445 return io_sendmsg_setup(req, sqe); 446 } 447 448 static void io_req_msg_cleanup(struct io_kiocb *req, 449 unsigned int issue_flags) 450 { 451 io_netmsg_recycle(req, issue_flags); 452 } 453 454 /* 455 * For bundle completions, we need to figure out how many segments we consumed. 456 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 457 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 458 * the segments, then it's a trivial questiont o answer. If we have residual 459 * data in the iter, then loop the segments to figure out how much we 460 * transferred. 461 */ 462 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 463 { 464 struct iovec *iov; 465 int nbufs; 466 467 /* no data is always zero segments, and a ubuf is always 1 segment */ 468 if (ret <= 0) 469 return 0; 470 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 471 return 1; 472 473 iov = kmsg->vec.iovec; 474 if (!iov) 475 iov = &kmsg->fast_iov; 476 477 /* if all data was transferred, it's basic pointer math */ 478 if (!iov_iter_count(&kmsg->msg.msg_iter)) 479 return iter_iov(&kmsg->msg.msg_iter) - iov; 480 481 /* short transfer, count segments */ 482 nbufs = 0; 483 do { 484 int this_len = min_t(int, iov[nbufs].iov_len, ret); 485 486 nbufs++; 487 ret -= this_len; 488 } while (ret); 489 490 return nbufs; 491 } 492 493 static inline bool io_send_finish(struct io_kiocb *req, int *ret, 494 struct io_async_msghdr *kmsg, 495 unsigned issue_flags) 496 { 497 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 498 bool bundle_finished = *ret <= 0; 499 unsigned int cflags; 500 501 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 502 cflags = io_put_kbuf(req, *ret, issue_flags); 503 goto finish; 504 } 505 506 cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags); 507 508 if (bundle_finished || req->flags & REQ_F_BL_EMPTY) 509 goto finish; 510 511 /* 512 * Fill CQE for this receive and see if we should keep trying to 513 * receive from this socket. 514 */ 515 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 516 io_mshot_prep_retry(req, kmsg); 517 return false; 518 } 519 520 /* Otherwise stop bundle and use the current result. */ 521 finish: 522 io_req_set_res(req, *ret, cflags); 523 *ret = IOU_COMPLETE; 524 return true; 525 } 526 527 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 528 { 529 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 530 struct io_async_msghdr *kmsg = req->async_data; 531 struct socket *sock; 532 unsigned flags; 533 int min_ret = 0; 534 int ret; 535 536 sock = sock_from_file(req->file); 537 if (unlikely(!sock)) 538 return -ENOTSOCK; 539 540 if (!(req->flags & REQ_F_POLLED) && 541 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 542 return -EAGAIN; 543 544 flags = sr->msg_flags; 545 if (issue_flags & IO_URING_F_NONBLOCK) 546 flags |= MSG_DONTWAIT; 547 if (flags & MSG_WAITALL) 548 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 549 550 kmsg->msg.msg_control_user = sr->msg_control; 551 552 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 553 554 if (ret < min_ret) { 555 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 556 return -EAGAIN; 557 if (ret > 0 && io_net_retry(sock, flags)) { 558 kmsg->msg.msg_controllen = 0; 559 kmsg->msg.msg_control = NULL; 560 sr->done_io += ret; 561 req->flags |= REQ_F_BL_NO_RECYCLE; 562 return -EAGAIN; 563 } 564 if (ret == -ERESTARTSYS) 565 ret = -EINTR; 566 req_set_fail(req); 567 } 568 io_req_msg_cleanup(req, issue_flags); 569 if (ret >= 0) 570 ret += sr->done_io; 571 else if (sr->done_io) 572 ret = sr->done_io; 573 io_req_set_res(req, ret, 0); 574 return IOU_COMPLETE; 575 } 576 577 static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, 578 struct io_async_msghdr *kmsg) 579 { 580 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 581 582 int ret; 583 struct buf_sel_arg arg = { 584 .iovs = &kmsg->fast_iov, 585 .max_len = min_not_zero(sr->len, INT_MAX), 586 .nr_iovs = 1, 587 .buf_group = sr->buf_group, 588 }; 589 590 if (kmsg->vec.iovec) { 591 arg.nr_iovs = kmsg->vec.nr; 592 arg.iovs = kmsg->vec.iovec; 593 arg.mode = KBUF_MODE_FREE; 594 } 595 596 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 597 arg.nr_iovs = 1; 598 else 599 arg.mode |= KBUF_MODE_EXPAND; 600 601 ret = io_buffers_select(req, &arg, issue_flags); 602 if (unlikely(ret < 0)) 603 return ret; 604 605 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 606 kmsg->vec.nr = ret; 607 kmsg->vec.iovec = arg.iovs; 608 req->flags |= REQ_F_NEED_CLEANUP; 609 } 610 sr->len = arg.out_len; 611 612 if (ret == 1) { 613 sr->buf = arg.iovs[0].iov_base; 614 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 615 &kmsg->msg.msg_iter); 616 if (unlikely(ret)) 617 return ret; 618 } else { 619 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 620 arg.iovs, ret, arg.out_len); 621 } 622 623 return 0; 624 } 625 626 int io_send(struct io_kiocb *req, unsigned int issue_flags) 627 { 628 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 629 struct io_async_msghdr *kmsg = req->async_data; 630 struct socket *sock; 631 unsigned flags; 632 int min_ret = 0; 633 int ret; 634 635 sock = sock_from_file(req->file); 636 if (unlikely(!sock)) 637 return -ENOTSOCK; 638 639 if (!(req->flags & REQ_F_POLLED) && 640 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 641 return -EAGAIN; 642 643 flags = sr->msg_flags; 644 if (issue_flags & IO_URING_F_NONBLOCK) 645 flags |= MSG_DONTWAIT; 646 647 retry_bundle: 648 if (io_do_buffer_select(req)) { 649 ret = io_send_select_buffer(req, issue_flags, kmsg); 650 if (ret) 651 return ret; 652 } 653 654 /* 655 * If MSG_WAITALL is set, or this is a bundle send, then we need 656 * the full amount. If just bundle is set, if we do a short send 657 * then we complete the bundle sequence rather than continue on. 658 */ 659 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 660 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 661 662 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 663 kmsg->msg.msg_flags = flags; 664 ret = sock_sendmsg(sock, &kmsg->msg); 665 if (ret < min_ret) { 666 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 667 return -EAGAIN; 668 669 if (ret > 0 && io_net_retry(sock, flags)) { 670 sr->len -= ret; 671 sr->buf += ret; 672 sr->done_io += ret; 673 req->flags |= REQ_F_BL_NO_RECYCLE; 674 return -EAGAIN; 675 } 676 if (ret == -ERESTARTSYS) 677 ret = -EINTR; 678 req_set_fail(req); 679 } 680 if (ret >= 0) 681 ret += sr->done_io; 682 else if (sr->done_io) 683 ret = sr->done_io; 684 685 if (!io_send_finish(req, &ret, kmsg, issue_flags)) 686 goto retry_bundle; 687 688 io_req_msg_cleanup(req, issue_flags); 689 return ret; 690 } 691 692 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 693 struct io_async_msghdr *iomsg, 694 int namelen, size_t controllen) 695 { 696 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 697 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 698 int hdr; 699 700 if (unlikely(namelen < 0)) 701 return -EOVERFLOW; 702 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 703 namelen, &hdr)) 704 return -EOVERFLOW; 705 if (check_add_overflow(hdr, controllen, &hdr)) 706 return -EOVERFLOW; 707 708 iomsg->namelen = namelen; 709 iomsg->controllen = controllen; 710 return 0; 711 } 712 713 return 0; 714 } 715 716 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 717 struct io_async_msghdr *iomsg) 718 { 719 struct user_msghdr msg; 720 int ret; 721 722 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr); 723 if (unlikely(ret)) 724 return ret; 725 726 if (!(req->flags & REQ_F_BUFFER_SELECT)) { 727 ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen, 728 ITER_DEST); 729 if (unlikely(ret)) 730 return ret; 731 } 732 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 733 msg.msg_controllen); 734 } 735 736 static int io_recvmsg_prep_setup(struct io_kiocb *req) 737 { 738 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 739 struct io_async_msghdr *kmsg; 740 741 kmsg = io_msg_alloc_async(req); 742 if (unlikely(!kmsg)) 743 return -ENOMEM; 744 745 if (req->opcode == IORING_OP_RECV) { 746 kmsg->msg.msg_name = NULL; 747 kmsg->msg.msg_namelen = 0; 748 kmsg->msg.msg_inq = 0; 749 kmsg->msg.msg_control = NULL; 750 kmsg->msg.msg_get_inq = 1; 751 kmsg->msg.msg_controllen = 0; 752 kmsg->msg.msg_iocb = NULL; 753 kmsg->msg.msg_ubuf = NULL; 754 755 if (req->flags & REQ_F_BUFFER_SELECT) 756 return 0; 757 return import_ubuf(ITER_DEST, sr->buf, sr->len, 758 &kmsg->msg.msg_iter); 759 } 760 761 return io_recvmsg_copy_hdr(req, kmsg); 762 } 763 764 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 765 IORING_RECVSEND_BUNDLE) 766 767 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 768 { 769 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 770 771 sr->done_io = 0; 772 773 if (unlikely(sqe->addr2)) 774 return -EINVAL; 775 776 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 777 sr->len = READ_ONCE(sqe->len); 778 sr->flags = READ_ONCE(sqe->ioprio); 779 if (sr->flags & ~RECVMSG_FLAGS) 780 return -EINVAL; 781 sr->msg_flags = READ_ONCE(sqe->msg_flags); 782 if (sr->msg_flags & MSG_DONTWAIT) 783 req->flags |= REQ_F_NOWAIT; 784 if (sr->msg_flags & MSG_ERRQUEUE) 785 req->flags |= REQ_F_CLEAR_POLLIN; 786 if (req->flags & REQ_F_BUFFER_SELECT) { 787 /* 788 * Store the buffer group for this multishot receive separately, 789 * as if we end up doing an io-wq based issue that selects a 790 * buffer, it has to be committed immediately and that will 791 * clear ->buf_list. This means we lose the link to the buffer 792 * list, and the eventual buffer put on completion then cannot 793 * restore it. 794 */ 795 sr->buf_group = req->buf_index; 796 req->buf_list = NULL; 797 } 798 sr->mshot_total_len = sr->mshot_len = 0; 799 if (sr->flags & IORING_RECV_MULTISHOT) { 800 if (!(req->flags & REQ_F_BUFFER_SELECT)) 801 return -EINVAL; 802 if (sr->msg_flags & MSG_WAITALL) 803 return -EINVAL; 804 if (req->opcode == IORING_OP_RECV) { 805 sr->mshot_len = sr->len; 806 sr->mshot_total_len = READ_ONCE(sqe->optlen); 807 if (sr->mshot_total_len) 808 sr->flags |= IORING_RECV_MSHOT_LIM; 809 } else if (sqe->optlen) { 810 return -EINVAL; 811 } 812 req->flags |= REQ_F_APOLL_MULTISHOT; 813 } else if (sqe->optlen) { 814 return -EINVAL; 815 } 816 817 if (sr->flags & IORING_RECVSEND_BUNDLE) { 818 if (req->opcode == IORING_OP_RECVMSG) 819 return -EINVAL; 820 } 821 822 if (io_is_compat(req->ctx)) 823 sr->msg_flags |= MSG_CMSG_COMPAT; 824 825 sr->nr_multishot_loops = 0; 826 return io_recvmsg_prep_setup(req); 827 } 828 829 /* bits to clear in old and inherit in new cflags on bundle retry */ 830 #define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE) 831 832 /* 833 * Finishes io_recv and io_recvmsg. 834 * 835 * Returns true if it is actually finished, or false if it should run 836 * again (for multishot). 837 */ 838 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 839 struct io_async_msghdr *kmsg, 840 bool mshot_finished, unsigned issue_flags) 841 { 842 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 843 unsigned int cflags = 0; 844 845 if (kmsg->msg.msg_inq > 0) 846 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 847 848 if (*ret > 0 && sr->flags & IORING_RECV_MSHOT_LIM) { 849 /* 850 * If sr->len hits zero, the limit has been reached. Mark 851 * mshot as finished, and flag MSHOT_DONE as well to prevent 852 * a potential bundle from being retried. 853 */ 854 sr->mshot_total_len -= min_t(int, *ret, sr->mshot_total_len); 855 if (!sr->mshot_total_len) { 856 sr->flags |= IORING_RECV_MSHOT_DONE; 857 mshot_finished = true; 858 } 859 } 860 861 if (sr->flags & IORING_RECVSEND_BUNDLE) { 862 size_t this_ret = *ret - sr->done_io; 863 864 cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret), 865 issue_flags); 866 if (sr->flags & IORING_RECV_RETRY) 867 cflags = req->cqe.flags | (cflags & CQE_F_MASK); 868 if (sr->mshot_len && *ret >= sr->mshot_len) 869 sr->flags |= IORING_RECV_MSHOT_CAP; 870 /* bundle with no more immediate buffers, we're done */ 871 if (req->flags & REQ_F_BL_EMPTY) 872 goto finish; 873 /* 874 * If more is available AND it was a full transfer, retry and 875 * append to this one 876 */ 877 if (!(sr->flags & IORING_RECV_NO_RETRY) && 878 kmsg->msg.msg_inq > 1 && this_ret > 0 && 879 !iov_iter_count(&kmsg->msg.msg_iter)) { 880 req->cqe.flags = cflags & ~CQE_F_MASK; 881 sr->len = kmsg->msg.msg_inq; 882 sr->done_io += this_ret; 883 sr->flags |= IORING_RECV_RETRY; 884 return false; 885 } 886 } else { 887 cflags |= io_put_kbuf(req, *ret, issue_flags); 888 } 889 890 /* 891 * Fill CQE for this receive and see if we should keep trying to 892 * receive from this socket. 893 */ 894 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 895 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 896 *ret = IOU_RETRY; 897 io_mshot_prep_retry(req, kmsg); 898 /* Known not-empty or unknown state, retry */ 899 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 900 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY && 901 !(sr->flags & IORING_RECV_MSHOT_CAP)) { 902 return false; 903 } 904 /* mshot retries exceeded, force a requeue */ 905 sr->nr_multishot_loops = 0; 906 sr->flags &= ~IORING_RECV_MSHOT_CAP; 907 if (issue_flags & IO_URING_F_MULTISHOT) 908 *ret = IOU_REQUEUE; 909 } 910 return true; 911 } 912 913 /* Finish the request / stop multishot. */ 914 finish: 915 io_req_set_res(req, *ret, cflags); 916 *ret = IOU_COMPLETE; 917 io_req_msg_cleanup(req, issue_flags); 918 return true; 919 } 920 921 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 922 struct io_sr_msg *sr, void __user **buf, 923 size_t *len) 924 { 925 unsigned long ubuf = (unsigned long) *buf; 926 unsigned long hdr; 927 928 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 929 kmsg->controllen; 930 if (*len < hdr) 931 return -EFAULT; 932 933 if (kmsg->controllen) { 934 unsigned long control = ubuf + hdr - kmsg->controllen; 935 936 kmsg->msg.msg_control_user = (void __user *) control; 937 kmsg->msg.msg_controllen = kmsg->controllen; 938 } 939 940 sr->buf = *buf; /* stash for later copy */ 941 *buf = (void __user *) (ubuf + hdr); 942 kmsg->payloadlen = *len = *len - hdr; 943 return 0; 944 } 945 946 struct io_recvmsg_multishot_hdr { 947 struct io_uring_recvmsg_out msg; 948 struct sockaddr_storage addr; 949 }; 950 951 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 952 struct io_async_msghdr *kmsg, 953 unsigned int flags, bool *finished) 954 { 955 int err; 956 int copy_len; 957 struct io_recvmsg_multishot_hdr hdr; 958 959 if (kmsg->namelen) 960 kmsg->msg.msg_name = &hdr.addr; 961 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 962 kmsg->msg.msg_namelen = 0; 963 964 if (sock->file->f_flags & O_NONBLOCK) 965 flags |= MSG_DONTWAIT; 966 967 err = sock_recvmsg(sock, &kmsg->msg, flags); 968 *finished = err <= 0; 969 if (err < 0) 970 return err; 971 972 hdr.msg = (struct io_uring_recvmsg_out) { 973 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 974 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 975 }; 976 977 hdr.msg.payloadlen = err; 978 if (err > kmsg->payloadlen) 979 err = kmsg->payloadlen; 980 981 copy_len = sizeof(struct io_uring_recvmsg_out); 982 if (kmsg->msg.msg_namelen > kmsg->namelen) 983 copy_len += kmsg->namelen; 984 else 985 copy_len += kmsg->msg.msg_namelen; 986 987 /* 988 * "fromlen shall refer to the value before truncation.." 989 * 1003.1g 990 */ 991 hdr.msg.namelen = kmsg->msg.msg_namelen; 992 993 /* ensure that there is no gap between hdr and sockaddr_storage */ 994 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 995 sizeof(struct io_uring_recvmsg_out)); 996 if (copy_to_user(io->buf, &hdr, copy_len)) { 997 *finished = true; 998 return -EFAULT; 999 } 1000 1001 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 1002 kmsg->controllen + err; 1003 } 1004 1005 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 1006 { 1007 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1008 struct io_async_msghdr *kmsg = req->async_data; 1009 struct socket *sock; 1010 unsigned flags; 1011 int ret, min_ret = 0; 1012 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1013 bool mshot_finished = true; 1014 1015 sock = sock_from_file(req->file); 1016 if (unlikely(!sock)) 1017 return -ENOTSOCK; 1018 1019 if (!(req->flags & REQ_F_POLLED) && 1020 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1021 return -EAGAIN; 1022 1023 flags = sr->msg_flags; 1024 if (force_nonblock) 1025 flags |= MSG_DONTWAIT; 1026 1027 retry_multishot: 1028 if (io_do_buffer_select(req)) { 1029 void __user *buf; 1030 size_t len = sr->len; 1031 1032 buf = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1033 if (!buf) 1034 return -ENOBUFS; 1035 1036 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1037 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 1038 if (ret) { 1039 io_kbuf_recycle(req, issue_flags); 1040 return ret; 1041 } 1042 } 1043 1044 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 1045 } 1046 1047 kmsg->msg.msg_get_inq = 1; 1048 kmsg->msg.msg_inq = -1; 1049 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1050 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1051 &mshot_finished); 1052 } else { 1053 /* disable partial retry for recvmsg with cmsg attached */ 1054 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1055 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1056 1057 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1058 kmsg->uaddr, flags); 1059 } 1060 1061 if (ret < min_ret) { 1062 if (ret == -EAGAIN && force_nonblock) { 1063 if (issue_flags & IO_URING_F_MULTISHOT) 1064 io_kbuf_recycle(req, issue_flags); 1065 1066 return IOU_RETRY; 1067 } 1068 if (ret > 0 && io_net_retry(sock, flags)) { 1069 sr->done_io += ret; 1070 req->flags |= REQ_F_BL_NO_RECYCLE; 1071 return IOU_RETRY; 1072 } 1073 if (ret == -ERESTARTSYS) 1074 ret = -EINTR; 1075 req_set_fail(req); 1076 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1077 req_set_fail(req); 1078 } 1079 1080 if (ret > 0) 1081 ret += sr->done_io; 1082 else if (sr->done_io) 1083 ret = sr->done_io; 1084 else 1085 io_kbuf_recycle(req, issue_flags); 1086 1087 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1088 goto retry_multishot; 1089 1090 return ret; 1091 } 1092 1093 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1094 size_t *len, unsigned int issue_flags) 1095 { 1096 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1097 int ret; 1098 1099 /* 1100 * If the ring isn't locked, then don't use the peek interface 1101 * to grab multiple buffers as we will lock/unlock between 1102 * this selection and posting the buffers. 1103 */ 1104 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1105 sr->flags & IORING_RECVSEND_BUNDLE) { 1106 struct buf_sel_arg arg = { 1107 .iovs = &kmsg->fast_iov, 1108 .nr_iovs = 1, 1109 .mode = KBUF_MODE_EXPAND, 1110 .buf_group = sr->buf_group, 1111 }; 1112 1113 if (kmsg->vec.iovec) { 1114 arg.nr_iovs = kmsg->vec.nr; 1115 arg.iovs = kmsg->vec.iovec; 1116 arg.mode |= KBUF_MODE_FREE; 1117 } 1118 1119 if (*len) 1120 arg.max_len = *len; 1121 else if (kmsg->msg.msg_inq > 1) 1122 arg.max_len = min_not_zero(*len, (size_t) kmsg->msg.msg_inq); 1123 1124 /* if mshot limited, ensure we don't go over */ 1125 if (sr->flags & IORING_RECV_MSHOT_LIM) 1126 arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len); 1127 ret = io_buffers_peek(req, &arg); 1128 if (unlikely(ret < 0)) 1129 return ret; 1130 1131 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 1132 kmsg->vec.nr = ret; 1133 kmsg->vec.iovec = arg.iovs; 1134 req->flags |= REQ_F_NEED_CLEANUP; 1135 } 1136 if (arg.partial_map) 1137 sr->flags |= IORING_RECV_PARTIAL_MAP; 1138 1139 /* special case 1 vec, can be a fast path */ 1140 if (ret == 1) { 1141 sr->buf = arg.iovs[0].iov_base; 1142 sr->len = arg.iovs[0].iov_len; 1143 goto map_ubuf; 1144 } 1145 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1146 arg.out_len); 1147 } else { 1148 void __user *buf; 1149 1150 *len = sr->len; 1151 buf = io_buffer_select(req, len, sr->buf_group, issue_flags); 1152 if (!buf) 1153 return -ENOBUFS; 1154 sr->buf = buf; 1155 sr->len = *len; 1156 map_ubuf: 1157 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1158 &kmsg->msg.msg_iter); 1159 if (unlikely(ret)) 1160 return ret; 1161 } 1162 1163 return 0; 1164 } 1165 1166 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1167 { 1168 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1169 struct io_async_msghdr *kmsg = req->async_data; 1170 struct socket *sock; 1171 unsigned flags; 1172 int ret, min_ret = 0; 1173 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1174 size_t len = sr->len; 1175 bool mshot_finished; 1176 1177 if (!(req->flags & REQ_F_POLLED) && 1178 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1179 return -EAGAIN; 1180 1181 sock = sock_from_file(req->file); 1182 if (unlikely(!sock)) 1183 return -ENOTSOCK; 1184 1185 flags = sr->msg_flags; 1186 if (force_nonblock) 1187 flags |= MSG_DONTWAIT; 1188 1189 retry_multishot: 1190 if (io_do_buffer_select(req)) { 1191 ret = io_recv_buf_select(req, kmsg, &len, issue_flags); 1192 if (unlikely(ret)) { 1193 kmsg->msg.msg_inq = -1; 1194 goto out_free; 1195 } 1196 sr->buf = NULL; 1197 } 1198 1199 kmsg->msg.msg_flags = 0; 1200 kmsg->msg.msg_inq = -1; 1201 1202 if (flags & MSG_WAITALL) 1203 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1204 1205 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1206 if (ret < min_ret) { 1207 if (ret == -EAGAIN && force_nonblock) { 1208 if (issue_flags & IO_URING_F_MULTISHOT) 1209 io_kbuf_recycle(req, issue_flags); 1210 1211 return IOU_RETRY; 1212 } 1213 if (ret > 0 && io_net_retry(sock, flags)) { 1214 sr->len -= ret; 1215 sr->buf += ret; 1216 sr->done_io += ret; 1217 req->flags |= REQ_F_BL_NO_RECYCLE; 1218 return -EAGAIN; 1219 } 1220 if (ret == -ERESTARTSYS) 1221 ret = -EINTR; 1222 req_set_fail(req); 1223 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1224 out_free: 1225 req_set_fail(req); 1226 } 1227 1228 mshot_finished = ret <= 0; 1229 if (ret > 0) 1230 ret += sr->done_io; 1231 else if (sr->done_io) 1232 ret = sr->done_io; 1233 else 1234 io_kbuf_recycle(req, issue_flags); 1235 1236 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1237 goto retry_multishot; 1238 1239 return ret; 1240 } 1241 1242 int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1243 { 1244 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1245 unsigned ifq_idx; 1246 1247 if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3)) 1248 return -EINVAL; 1249 1250 ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx); 1251 zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx); 1252 if (!zc->ifq) 1253 return -EINVAL; 1254 1255 zc->len = READ_ONCE(sqe->len); 1256 zc->flags = READ_ONCE(sqe->ioprio); 1257 zc->msg_flags = READ_ONCE(sqe->msg_flags); 1258 if (zc->msg_flags) 1259 return -EINVAL; 1260 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)) 1261 return -EINVAL; 1262 /* multishot required */ 1263 if (!(zc->flags & IORING_RECV_MULTISHOT)) 1264 return -EINVAL; 1265 /* All data completions are posted as aux CQEs. */ 1266 req->flags |= REQ_F_APOLL_MULTISHOT; 1267 1268 return 0; 1269 } 1270 1271 int io_recvzc(struct io_kiocb *req, unsigned int issue_flags) 1272 { 1273 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1274 struct socket *sock; 1275 unsigned int len; 1276 int ret; 1277 1278 if (!(req->flags & REQ_F_POLLED) && 1279 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1280 return -EAGAIN; 1281 1282 sock = sock_from_file(req->file); 1283 if (unlikely(!sock)) 1284 return -ENOTSOCK; 1285 1286 len = zc->len; 1287 ret = io_zcrx_recv(req, zc->ifq, sock, zc->msg_flags | MSG_DONTWAIT, 1288 issue_flags, &zc->len); 1289 if (len && zc->len == 0) { 1290 io_req_set_res(req, 0, 0); 1291 1292 return IOU_COMPLETE; 1293 } 1294 if (unlikely(ret <= 0) && ret != -EAGAIN) { 1295 if (ret == -ERESTARTSYS) 1296 ret = -EINTR; 1297 if (ret == IOU_REQUEUE) 1298 return IOU_REQUEUE; 1299 1300 req_set_fail(req); 1301 io_req_set_res(req, ret, 0); 1302 return IOU_COMPLETE; 1303 } 1304 return IOU_RETRY; 1305 } 1306 1307 void io_send_zc_cleanup(struct io_kiocb *req) 1308 { 1309 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1310 struct io_async_msghdr *io = req->async_data; 1311 1312 if (req_has_async_data(req)) 1313 io_netmsg_iovec_free(io); 1314 if (zc->notif) { 1315 io_notif_flush(zc->notif); 1316 zc->notif = NULL; 1317 } 1318 } 1319 1320 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1321 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 1322 1323 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1324 { 1325 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1326 struct io_ring_ctx *ctx = req->ctx; 1327 struct io_async_msghdr *iomsg; 1328 struct io_kiocb *notif; 1329 int ret; 1330 1331 zc->done_io = 0; 1332 1333 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1334 return -EINVAL; 1335 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1336 if (req->flags & REQ_F_CQE_SKIP) 1337 return -EINVAL; 1338 1339 notif = zc->notif = io_alloc_notif(ctx); 1340 if (!notif) 1341 return -ENOMEM; 1342 notif->cqe.user_data = req->cqe.user_data; 1343 notif->cqe.res = 0; 1344 notif->cqe.flags = IORING_CQE_F_NOTIF; 1345 req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY; 1346 1347 zc->flags = READ_ONCE(sqe->ioprio); 1348 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1349 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1350 return -EINVAL; 1351 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1352 struct io_notif_data *nd = io_notif_to_data(notif); 1353 1354 nd->zc_report = true; 1355 nd->zc_used = false; 1356 nd->zc_copied = false; 1357 } 1358 } 1359 1360 zc->len = READ_ONCE(sqe->len); 1361 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1362 req->buf_index = READ_ONCE(sqe->buf_index); 1363 if (zc->msg_flags & MSG_DONTWAIT) 1364 req->flags |= REQ_F_NOWAIT; 1365 1366 if (io_is_compat(req->ctx)) 1367 zc->msg_flags |= MSG_CMSG_COMPAT; 1368 1369 iomsg = io_msg_alloc_async(req); 1370 if (unlikely(!iomsg)) 1371 return -ENOMEM; 1372 1373 if (req->opcode == IORING_OP_SEND_ZC) { 1374 ret = io_send_setup(req, sqe); 1375 } else { 1376 if (unlikely(sqe->addr2 || sqe->file_index)) 1377 return -EINVAL; 1378 ret = io_sendmsg_setup(req, sqe); 1379 } 1380 if (unlikely(ret)) 1381 return ret; 1382 1383 if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { 1384 iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1385 return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count); 1386 } 1387 iomsg->msg.sg_from_iter = io_sg_from_iter; 1388 return 0; 1389 } 1390 1391 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1392 struct iov_iter *from, size_t length) 1393 { 1394 skb_zcopy_downgrade_managed(skb); 1395 return zerocopy_fill_skb_from_iter(skb, from, length); 1396 } 1397 1398 static int io_sg_from_iter(struct sk_buff *skb, 1399 struct iov_iter *from, size_t length) 1400 { 1401 struct skb_shared_info *shinfo = skb_shinfo(skb); 1402 int frag = shinfo->nr_frags; 1403 int ret = 0; 1404 struct bvec_iter bi; 1405 ssize_t copied = 0; 1406 unsigned long truesize = 0; 1407 1408 if (!frag) 1409 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1410 else if (unlikely(!skb_zcopy_managed(skb))) 1411 return zerocopy_fill_skb_from_iter(skb, from, length); 1412 1413 bi.bi_size = min(from->count, length); 1414 bi.bi_bvec_done = from->iov_offset; 1415 bi.bi_idx = 0; 1416 1417 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1418 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1419 1420 copied += v.bv_len; 1421 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1422 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1423 v.bv_offset, v.bv_len); 1424 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1425 } 1426 if (bi.bi_size) 1427 ret = -EMSGSIZE; 1428 1429 shinfo->nr_frags = frag; 1430 from->bvec += bi.bi_idx; 1431 from->nr_segs -= bi.bi_idx; 1432 from->count -= copied; 1433 from->iov_offset = bi.bi_bvec_done; 1434 1435 skb->data_len += copied; 1436 skb->len += copied; 1437 skb->truesize += truesize; 1438 return ret; 1439 } 1440 1441 static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) 1442 { 1443 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1444 struct io_async_msghdr *kmsg = req->async_data; 1445 1446 WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF)); 1447 1448 sr->notif->buf_index = req->buf_index; 1449 return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter, 1450 (u64)(uintptr_t)sr->buf, sr->len, 1451 ITER_SOURCE, issue_flags); 1452 } 1453 1454 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1455 { 1456 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1457 struct io_async_msghdr *kmsg = req->async_data; 1458 struct socket *sock; 1459 unsigned msg_flags; 1460 int ret, min_ret = 0; 1461 1462 sock = sock_from_file(req->file); 1463 if (unlikely(!sock)) 1464 return -ENOTSOCK; 1465 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1466 return -EOPNOTSUPP; 1467 1468 if (!(req->flags & REQ_F_POLLED) && 1469 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1470 return -EAGAIN; 1471 1472 if (req->flags & REQ_F_IMPORT_BUFFER) { 1473 req->flags &= ~REQ_F_IMPORT_BUFFER; 1474 ret = io_send_zc_import(req, issue_flags); 1475 if (unlikely(ret)) 1476 return ret; 1477 } 1478 1479 msg_flags = zc->msg_flags; 1480 if (issue_flags & IO_URING_F_NONBLOCK) 1481 msg_flags |= MSG_DONTWAIT; 1482 if (msg_flags & MSG_WAITALL) 1483 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1484 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1485 1486 kmsg->msg.msg_flags = msg_flags; 1487 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1488 ret = sock_sendmsg(sock, &kmsg->msg); 1489 1490 if (unlikely(ret < min_ret)) { 1491 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1492 return -EAGAIN; 1493 1494 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { 1495 zc->len -= ret; 1496 zc->buf += ret; 1497 zc->done_io += ret; 1498 req->flags |= REQ_F_BL_NO_RECYCLE; 1499 return -EAGAIN; 1500 } 1501 if (ret == -ERESTARTSYS) 1502 ret = -EINTR; 1503 req_set_fail(req); 1504 } 1505 1506 if (ret >= 0) 1507 ret += zc->done_io; 1508 else if (zc->done_io) 1509 ret = zc->done_io; 1510 1511 /* 1512 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1513 * flushing notif to io_send_zc_cleanup() 1514 */ 1515 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1516 io_notif_flush(zc->notif); 1517 zc->notif = NULL; 1518 io_req_msg_cleanup(req, 0); 1519 } 1520 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1521 return IOU_COMPLETE; 1522 } 1523 1524 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1525 { 1526 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1527 struct io_async_msghdr *kmsg = req->async_data; 1528 struct socket *sock; 1529 unsigned flags; 1530 int ret, min_ret = 0; 1531 1532 if (req->flags & REQ_F_IMPORT_BUFFER) { 1533 unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; 1534 int ret; 1535 1536 ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, req, 1537 &kmsg->vec, uvec_segs, issue_flags); 1538 if (unlikely(ret)) 1539 return ret; 1540 req->flags &= ~REQ_F_IMPORT_BUFFER; 1541 } 1542 1543 sock = sock_from_file(req->file); 1544 if (unlikely(!sock)) 1545 return -ENOTSOCK; 1546 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1547 return -EOPNOTSUPP; 1548 1549 if (!(req->flags & REQ_F_POLLED) && 1550 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1551 return -EAGAIN; 1552 1553 flags = sr->msg_flags; 1554 if (issue_flags & IO_URING_F_NONBLOCK) 1555 flags |= MSG_DONTWAIT; 1556 if (flags & MSG_WAITALL) 1557 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1558 1559 kmsg->msg.msg_control_user = sr->msg_control; 1560 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1561 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1562 1563 if (unlikely(ret < min_ret)) { 1564 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1565 return -EAGAIN; 1566 1567 if (ret > 0 && io_net_retry(sock, flags)) { 1568 sr->done_io += ret; 1569 req->flags |= REQ_F_BL_NO_RECYCLE; 1570 return -EAGAIN; 1571 } 1572 if (ret == -ERESTARTSYS) 1573 ret = -EINTR; 1574 req_set_fail(req); 1575 } 1576 1577 if (ret >= 0) 1578 ret += sr->done_io; 1579 else if (sr->done_io) 1580 ret = sr->done_io; 1581 1582 /* 1583 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1584 * flushing notif to io_send_zc_cleanup() 1585 */ 1586 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1587 io_notif_flush(sr->notif); 1588 sr->notif = NULL; 1589 io_req_msg_cleanup(req, 0); 1590 } 1591 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1592 return IOU_COMPLETE; 1593 } 1594 1595 void io_sendrecv_fail(struct io_kiocb *req) 1596 { 1597 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1598 1599 if (sr->done_io) 1600 req->cqe.res = sr->done_io; 1601 1602 if ((req->flags & REQ_F_NEED_CLEANUP) && 1603 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1604 req->cqe.flags |= IORING_CQE_F_MORE; 1605 } 1606 1607 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1608 IORING_ACCEPT_POLL_FIRST) 1609 1610 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1611 { 1612 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1613 1614 if (sqe->len || sqe->buf_index) 1615 return -EINVAL; 1616 1617 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1618 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1619 accept->flags = READ_ONCE(sqe->accept_flags); 1620 accept->nofile = rlimit(RLIMIT_NOFILE); 1621 accept->iou_flags = READ_ONCE(sqe->ioprio); 1622 if (accept->iou_flags & ~ACCEPT_FLAGS) 1623 return -EINVAL; 1624 1625 accept->file_slot = READ_ONCE(sqe->file_index); 1626 if (accept->file_slot) { 1627 if (accept->flags & SOCK_CLOEXEC) 1628 return -EINVAL; 1629 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1630 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1631 return -EINVAL; 1632 } 1633 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1634 return -EINVAL; 1635 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1636 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1637 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1638 req->flags |= REQ_F_APOLL_MULTISHOT; 1639 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1640 req->flags |= REQ_F_NOWAIT; 1641 return 0; 1642 } 1643 1644 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1645 { 1646 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1647 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1648 bool fixed = !!accept->file_slot; 1649 struct proto_accept_arg arg = { 1650 .flags = force_nonblock ? O_NONBLOCK : 0, 1651 }; 1652 struct file *file; 1653 unsigned cflags; 1654 int ret, fd; 1655 1656 if (!(req->flags & REQ_F_POLLED) && 1657 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1658 return -EAGAIN; 1659 1660 retry: 1661 if (!fixed) { 1662 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1663 if (unlikely(fd < 0)) 1664 return fd; 1665 } 1666 arg.err = 0; 1667 arg.is_empty = -1; 1668 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1669 accept->flags); 1670 if (IS_ERR(file)) { 1671 if (!fixed) 1672 put_unused_fd(fd); 1673 ret = PTR_ERR(file); 1674 if (ret == -EAGAIN && force_nonblock && 1675 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) 1676 return IOU_RETRY; 1677 1678 if (ret == -ERESTARTSYS) 1679 ret = -EINTR; 1680 } else if (!fixed) { 1681 fd_install(fd, file); 1682 ret = fd; 1683 } else { 1684 ret = io_fixed_fd_install(req, issue_flags, file, 1685 accept->file_slot); 1686 } 1687 1688 cflags = 0; 1689 if (!arg.is_empty) 1690 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1691 1692 if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) && 1693 io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1694 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1695 goto retry; 1696 return IOU_RETRY; 1697 } 1698 1699 io_req_set_res(req, ret, cflags); 1700 if (ret < 0) 1701 req_set_fail(req); 1702 return IOU_COMPLETE; 1703 } 1704 1705 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1706 { 1707 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1708 1709 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1710 return -EINVAL; 1711 1712 sock->domain = READ_ONCE(sqe->fd); 1713 sock->type = READ_ONCE(sqe->off); 1714 sock->protocol = READ_ONCE(sqe->len); 1715 sock->file_slot = READ_ONCE(sqe->file_index); 1716 sock->nofile = rlimit(RLIMIT_NOFILE); 1717 1718 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1719 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1720 return -EINVAL; 1721 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1722 return -EINVAL; 1723 return 0; 1724 } 1725 1726 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1727 { 1728 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1729 bool fixed = !!sock->file_slot; 1730 struct file *file; 1731 int ret, fd; 1732 1733 if (!fixed) { 1734 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1735 if (unlikely(fd < 0)) 1736 return fd; 1737 } 1738 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1739 if (IS_ERR(file)) { 1740 if (!fixed) 1741 put_unused_fd(fd); 1742 ret = PTR_ERR(file); 1743 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1744 return -EAGAIN; 1745 if (ret == -ERESTARTSYS) 1746 ret = -EINTR; 1747 req_set_fail(req); 1748 } else if (!fixed) { 1749 fd_install(fd, file); 1750 ret = fd; 1751 } else { 1752 ret = io_fixed_fd_install(req, issue_flags, file, 1753 sock->file_slot); 1754 } 1755 io_req_set_res(req, ret, 0); 1756 return IOU_COMPLETE; 1757 } 1758 1759 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1760 { 1761 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1762 struct io_async_msghdr *io; 1763 1764 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1765 return -EINVAL; 1766 1767 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1768 conn->addr_len = READ_ONCE(sqe->addr2); 1769 conn->in_progress = conn->seen_econnaborted = false; 1770 1771 io = io_msg_alloc_async(req); 1772 if (unlikely(!io)) 1773 return -ENOMEM; 1774 1775 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1776 } 1777 1778 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1779 { 1780 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1781 struct io_async_msghdr *io = req->async_data; 1782 unsigned file_flags; 1783 int ret; 1784 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1785 1786 if (connect->in_progress) { 1787 struct poll_table_struct pt = { ._key = EPOLLERR }; 1788 1789 if (vfs_poll(req->file, &pt) & EPOLLERR) 1790 goto get_sock_err; 1791 } 1792 1793 file_flags = force_nonblock ? O_NONBLOCK : 0; 1794 1795 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1796 file_flags); 1797 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1798 && force_nonblock) { 1799 if (ret == -EINPROGRESS) { 1800 connect->in_progress = true; 1801 } else if (ret == -ECONNABORTED) { 1802 if (connect->seen_econnaborted) 1803 goto out; 1804 connect->seen_econnaborted = true; 1805 } 1806 return -EAGAIN; 1807 } 1808 if (connect->in_progress) { 1809 /* 1810 * At least bluetooth will return -EBADFD on a re-connect 1811 * attempt, and it's (supposedly) also valid to get -EISCONN 1812 * which means the previous result is good. For both of these, 1813 * grab the sock_error() and use that for the completion. 1814 */ 1815 if (ret == -EBADFD || ret == -EISCONN) { 1816 get_sock_err: 1817 ret = sock_error(sock_from_file(req->file)->sk); 1818 } 1819 } 1820 if (ret == -ERESTARTSYS) 1821 ret = -EINTR; 1822 out: 1823 if (ret < 0) 1824 req_set_fail(req); 1825 io_req_msg_cleanup(req, issue_flags); 1826 io_req_set_res(req, ret, 0); 1827 return IOU_COMPLETE; 1828 } 1829 1830 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1831 { 1832 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1833 struct sockaddr __user *uaddr; 1834 struct io_async_msghdr *io; 1835 1836 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1837 return -EINVAL; 1838 1839 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1840 bind->addr_len = READ_ONCE(sqe->addr2); 1841 1842 io = io_msg_alloc_async(req); 1843 if (unlikely(!io)) 1844 return -ENOMEM; 1845 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1846 } 1847 1848 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1849 { 1850 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1851 struct io_async_msghdr *io = req->async_data; 1852 struct socket *sock; 1853 int ret; 1854 1855 sock = sock_from_file(req->file); 1856 if (unlikely(!sock)) 1857 return -ENOTSOCK; 1858 1859 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1860 if (ret < 0) 1861 req_set_fail(req); 1862 io_req_set_res(req, ret, 0); 1863 return 0; 1864 } 1865 1866 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1867 { 1868 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1869 1870 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1871 return -EINVAL; 1872 1873 listen->backlog = READ_ONCE(sqe->len); 1874 return 0; 1875 } 1876 1877 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1878 { 1879 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1880 struct socket *sock; 1881 int ret; 1882 1883 sock = sock_from_file(req->file); 1884 if (unlikely(!sock)) 1885 return -ENOTSOCK; 1886 1887 ret = __sys_listen_socket(sock, listen->backlog); 1888 if (ret < 0) 1889 req_set_fail(req); 1890 io_req_set_res(req, ret, 0); 1891 return 0; 1892 } 1893 1894 void io_netmsg_cache_free(const void *entry) 1895 { 1896 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1897 1898 io_vec_free(&kmsg->vec); 1899 kfree(kmsg); 1900 } 1901