1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 #include "zcrx.h" 20 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 int iou_flags; 32 u32 file_slot; 33 unsigned long nofile; 34 }; 35 36 struct io_socket { 37 struct file *file; 38 int domain; 39 int type; 40 int protocol; 41 int flags; 42 u32 file_slot; 43 unsigned long nofile; 44 }; 45 46 struct io_connect { 47 struct file *file; 48 struct sockaddr __user *addr; 49 int addr_len; 50 bool in_progress; 51 bool seen_econnaborted; 52 }; 53 54 struct io_bind { 55 struct file *file; 56 int addr_len; 57 }; 58 59 struct io_listen { 60 struct file *file; 61 int backlog; 62 }; 63 64 struct io_sr_msg { 65 struct file *file; 66 union { 67 struct compat_msghdr __user *umsg_compat; 68 struct user_msghdr __user *umsg; 69 void __user *buf; 70 }; 71 int len; 72 unsigned done_io; 73 unsigned msg_flags; 74 unsigned nr_multishot_loops; 75 u16 flags; 76 /* initialised and used only by !msg send variants */ 77 u16 buf_group; 78 /* per-invocation mshot limit */ 79 unsigned mshot_len; 80 /* overall mshot byte limit */ 81 unsigned mshot_total_len; 82 void __user *msg_control; 83 /* used only for send zerocopy */ 84 struct io_kiocb *notif; 85 }; 86 87 /* 88 * The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold 89 * anyway. Use the upper 8 bits for internal uses. 90 */ 91 enum sr_retry_flags { 92 IORING_RECV_RETRY = (1U << 15), 93 IORING_RECV_PARTIAL_MAP = (1U << 14), 94 IORING_RECV_MSHOT_CAP = (1U << 13), 95 IORING_RECV_MSHOT_LIM = (1U << 12), 96 IORING_RECV_MSHOT_DONE = (1U << 11), 97 98 IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP, 99 IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP | 100 IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE, 101 }; 102 103 /* 104 * Number of times we'll try and do receives if there's more data. If we 105 * exceed this limit, then add us to the back of the queue and retry from 106 * there. This helps fairness between flooding clients. 107 */ 108 #define MULTISHOT_MAX_RETRY 32 109 110 struct io_recvzc { 111 struct file *file; 112 unsigned msg_flags; 113 u16 flags; 114 u32 len; 115 struct io_zcrx_ifq *ifq; 116 }; 117 118 static int io_sg_from_iter_iovec(struct sk_buff *skb, 119 struct iov_iter *from, size_t length); 120 static int io_sg_from_iter(struct sk_buff *skb, 121 struct iov_iter *from, size_t length); 122 123 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 124 { 125 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 126 127 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 128 sqe->buf_index || sqe->splice_fd_in)) 129 return -EINVAL; 130 131 shutdown->how = READ_ONCE(sqe->len); 132 req->flags |= REQ_F_FORCE_ASYNC; 133 return 0; 134 } 135 136 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 137 { 138 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 139 struct socket *sock; 140 int ret; 141 142 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 143 144 sock = sock_from_file(req->file); 145 if (unlikely(!sock)) 146 return -ENOTSOCK; 147 148 ret = __sys_shutdown_sock(sock, shutdown->how); 149 io_req_set_res(req, ret, 0); 150 return IOU_COMPLETE; 151 } 152 153 static bool io_net_retry(struct socket *sock, int flags) 154 { 155 if (!(flags & MSG_WAITALL)) 156 return false; 157 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 158 } 159 160 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 161 { 162 if (kmsg->vec.iovec) 163 io_vec_free(&kmsg->vec); 164 } 165 166 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 167 { 168 struct io_async_msghdr *hdr = req->async_data; 169 170 /* can't recycle, ensure we free the iovec if we have one */ 171 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 172 io_netmsg_iovec_free(hdr); 173 return; 174 } 175 176 /* Let normal cleanup path reap it if we fail adding to the cache */ 177 io_alloc_cache_vec_kasan(&hdr->vec); 178 if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP) 179 io_vec_free(&hdr->vec); 180 181 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { 182 req->async_data = NULL; 183 req->flags &= ~(REQ_F_ASYNC_DATA|REQ_F_NEED_CLEANUP); 184 } 185 } 186 187 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 188 { 189 struct io_ring_ctx *ctx = req->ctx; 190 struct io_async_msghdr *hdr; 191 192 hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); 193 if (!hdr) 194 return NULL; 195 196 /* If the async data was cached, we might have an iov cached inside. */ 197 if (hdr->vec.iovec) 198 req->flags |= REQ_F_NEED_CLEANUP; 199 return hdr; 200 } 201 202 static inline void io_mshot_prep_retry(struct io_kiocb *req, 203 struct io_async_msghdr *kmsg) 204 { 205 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 206 207 req->flags &= ~REQ_F_BL_EMPTY; 208 sr->done_io = 0; 209 sr->flags &= ~IORING_RECV_RETRY_CLEAR; 210 sr->len = sr->mshot_len; 211 } 212 213 static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg, 214 const struct iovec __user *uiov, unsigned uvec_seg, 215 int ddir) 216 { 217 struct iovec *iov; 218 int ret, nr_segs; 219 220 if (iomsg->vec.iovec) { 221 nr_segs = iomsg->vec.nr; 222 iov = iomsg->vec.iovec; 223 } else { 224 nr_segs = 1; 225 iov = &iomsg->fast_iov; 226 } 227 228 ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov, 229 &iomsg->msg.msg_iter, io_is_compat(req->ctx)); 230 if (unlikely(ret < 0)) 231 return ret; 232 233 if (iov) { 234 req->flags |= REQ_F_NEED_CLEANUP; 235 io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs); 236 } 237 return 0; 238 } 239 240 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 241 struct io_async_msghdr *iomsg, 242 struct compat_msghdr *msg, int ddir, 243 struct sockaddr __user **save_addr) 244 { 245 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 246 struct compat_iovec __user *uiov; 247 int ret; 248 249 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 250 return -EFAULT; 251 252 ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr); 253 if (ret) 254 return ret; 255 256 uiov = compat_ptr(msg->msg_iov); 257 if (req->flags & REQ_F_BUFFER_SELECT) { 258 if (msg->msg_iovlen == 0) { 259 sr->len = 0; 260 } else if (msg->msg_iovlen > 1) { 261 return -EINVAL; 262 } else { 263 struct compat_iovec tmp_iov; 264 265 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 266 return -EFAULT; 267 sr->len = tmp_iov.iov_len; 268 } 269 } 270 return 0; 271 } 272 273 static int io_copy_msghdr_from_user(struct user_msghdr *msg, 274 struct user_msghdr __user *umsg) 275 { 276 if (!user_access_begin(umsg, sizeof(*umsg))) 277 return -EFAULT; 278 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end); 279 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end); 280 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end); 281 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end); 282 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end); 283 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end); 284 user_access_end(); 285 return 0; 286 ua_end: 287 user_access_end(); 288 return -EFAULT; 289 } 290 291 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 292 struct user_msghdr *msg, int ddir, 293 struct sockaddr __user **save_addr) 294 { 295 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 296 struct user_msghdr __user *umsg = sr->umsg; 297 int ret; 298 299 iomsg->msg.msg_name = &iomsg->addr; 300 iomsg->msg.msg_iter.nr_segs = 0; 301 302 if (io_is_compat(req->ctx)) { 303 struct compat_msghdr cmsg; 304 305 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr); 306 if (ret) 307 return ret; 308 309 memset(msg, 0, sizeof(*msg)); 310 msg->msg_namelen = cmsg.msg_namelen; 311 msg->msg_controllen = cmsg.msg_controllen; 312 msg->msg_iov = compat_ptr(cmsg.msg_iov); 313 msg->msg_iovlen = cmsg.msg_iovlen; 314 return 0; 315 } 316 317 ret = io_copy_msghdr_from_user(msg, umsg); 318 if (unlikely(ret)) 319 return ret; 320 321 msg->msg_flags = 0; 322 323 ret = __copy_msghdr(&iomsg->msg, msg, save_addr); 324 if (ret) 325 return ret; 326 327 if (req->flags & REQ_F_BUFFER_SELECT) { 328 if (msg->msg_iovlen == 0) { 329 sr->len = 0; 330 } else if (msg->msg_iovlen > 1) { 331 return -EINVAL; 332 } else { 333 struct iovec __user *uiov = msg->msg_iov; 334 struct iovec tmp_iov; 335 336 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 337 return -EFAULT; 338 sr->len = tmp_iov.iov_len; 339 } 340 } 341 return 0; 342 } 343 344 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 345 { 346 struct io_async_msghdr *io = req->async_data; 347 348 io_netmsg_iovec_free(io); 349 } 350 351 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 352 { 353 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 354 struct io_async_msghdr *kmsg = req->async_data; 355 void __user *addr; 356 u16 addr_len; 357 int ret; 358 359 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 360 361 if (READ_ONCE(sqe->__pad3[0])) 362 return -EINVAL; 363 364 kmsg->msg.msg_name = NULL; 365 kmsg->msg.msg_namelen = 0; 366 kmsg->msg.msg_control = NULL; 367 kmsg->msg.msg_controllen = 0; 368 kmsg->msg.msg_ubuf = NULL; 369 370 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 371 addr_len = READ_ONCE(sqe->addr_len); 372 if (addr) { 373 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr); 374 if (unlikely(ret < 0)) 375 return ret; 376 kmsg->msg.msg_name = &kmsg->addr; 377 kmsg->msg.msg_namelen = addr_len; 378 } 379 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 380 req->flags |= REQ_F_IMPORT_BUFFER; 381 return 0; 382 } 383 if (req->flags & REQ_F_BUFFER_SELECT) 384 return 0; 385 386 if (sr->flags & IORING_SEND_VECTORIZED) 387 return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); 388 389 return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 390 } 391 392 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 393 { 394 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 395 struct io_async_msghdr *kmsg = req->async_data; 396 struct user_msghdr msg; 397 int ret; 398 399 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 400 ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); 401 if (unlikely(ret)) 402 return ret; 403 /* save msg_control as sys_sendmsg() overwrites it */ 404 sr->msg_control = kmsg->msg.msg_control_user; 405 406 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 407 kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; 408 return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, 409 msg.msg_iovlen); 410 } 411 if (req->flags & REQ_F_BUFFER_SELECT) 412 return 0; 413 return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); 414 } 415 416 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | IORING_SEND_VECTORIZED) 417 418 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 419 { 420 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 421 422 sr->done_io = 0; 423 sr->len = READ_ONCE(sqe->len); 424 sr->flags = READ_ONCE(sqe->ioprio); 425 if (sr->flags & ~SENDMSG_FLAGS) 426 return -EINVAL; 427 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 428 if (sr->msg_flags & MSG_DONTWAIT) 429 req->flags |= REQ_F_NOWAIT; 430 if (req->flags & REQ_F_BUFFER_SELECT) 431 sr->buf_group = req->buf_index; 432 if (sr->flags & IORING_RECVSEND_BUNDLE) { 433 if (req->opcode == IORING_OP_SENDMSG) 434 return -EINVAL; 435 sr->msg_flags |= MSG_WAITALL; 436 req->buf_list = NULL; 437 req->flags |= REQ_F_MULTISHOT; 438 } 439 440 if (io_is_compat(req->ctx)) 441 sr->msg_flags |= MSG_CMSG_COMPAT; 442 443 if (unlikely(!io_msg_alloc_async(req))) 444 return -ENOMEM; 445 if (req->opcode != IORING_OP_SENDMSG) 446 return io_send_setup(req, sqe); 447 if (unlikely(sqe->addr2 || sqe->file_index)) 448 return -EINVAL; 449 return io_sendmsg_setup(req, sqe); 450 } 451 452 static void io_req_msg_cleanup(struct io_kiocb *req, 453 unsigned int issue_flags) 454 { 455 io_netmsg_recycle(req, issue_flags); 456 } 457 458 /* 459 * For bundle completions, we need to figure out how many segments we consumed. 460 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 461 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 462 * the segments, then it's a trivial questiont o answer. If we have residual 463 * data in the iter, then loop the segments to figure out how much we 464 * transferred. 465 */ 466 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 467 { 468 struct iovec *iov; 469 int nbufs; 470 471 /* no data is always zero segments, and a ubuf is always 1 segment */ 472 if (ret <= 0) 473 return 0; 474 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 475 return 1; 476 477 iov = kmsg->vec.iovec; 478 if (!iov) 479 iov = &kmsg->fast_iov; 480 481 /* if all data was transferred, it's basic pointer math */ 482 if (!iov_iter_count(&kmsg->msg.msg_iter)) 483 return iter_iov(&kmsg->msg.msg_iter) - iov; 484 485 /* short transfer, count segments */ 486 nbufs = 0; 487 do { 488 int this_len = min_t(int, iov[nbufs].iov_len, ret); 489 490 nbufs++; 491 ret -= this_len; 492 } while (ret); 493 494 return nbufs; 495 } 496 497 static int io_net_kbuf_recyle(struct io_kiocb *req, 498 struct io_async_msghdr *kmsg, int len) 499 { 500 req->flags |= REQ_F_BL_NO_RECYCLE; 501 if (req->flags & REQ_F_BUFFERS_COMMIT) 502 io_kbuf_commit(req, req->buf_list, len, io_bundle_nbufs(kmsg, len)); 503 return IOU_RETRY; 504 } 505 506 static inline bool io_send_finish(struct io_kiocb *req, int *ret, 507 struct io_async_msghdr *kmsg, 508 unsigned issue_flags) 509 { 510 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 511 bool bundle_finished = *ret <= 0; 512 unsigned int cflags; 513 514 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 515 cflags = io_put_kbuf(req, *ret, issue_flags); 516 goto finish; 517 } 518 519 cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags); 520 521 if (bundle_finished || req->flags & REQ_F_BL_EMPTY) 522 goto finish; 523 524 /* 525 * Fill CQE for this receive and see if we should keep trying to 526 * receive from this socket. 527 */ 528 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 529 io_mshot_prep_retry(req, kmsg); 530 return false; 531 } 532 533 /* Otherwise stop bundle and use the current result. */ 534 finish: 535 io_req_set_res(req, *ret, cflags); 536 *ret = IOU_COMPLETE; 537 return true; 538 } 539 540 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 541 { 542 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 543 struct io_async_msghdr *kmsg = req->async_data; 544 struct socket *sock; 545 unsigned flags; 546 int min_ret = 0; 547 int ret; 548 549 sock = sock_from_file(req->file); 550 if (unlikely(!sock)) 551 return -ENOTSOCK; 552 553 if (!(req->flags & REQ_F_POLLED) && 554 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 555 return -EAGAIN; 556 557 flags = sr->msg_flags; 558 if (issue_flags & IO_URING_F_NONBLOCK) 559 flags |= MSG_DONTWAIT; 560 if (flags & MSG_WAITALL) 561 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 562 563 kmsg->msg.msg_control_user = sr->msg_control; 564 565 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 566 567 if (ret < min_ret) { 568 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 569 return -EAGAIN; 570 if (ret > 0 && io_net_retry(sock, flags)) { 571 kmsg->msg.msg_controllen = 0; 572 kmsg->msg.msg_control = NULL; 573 sr->done_io += ret; 574 return io_net_kbuf_recyle(req, kmsg, ret); 575 } 576 if (ret == -ERESTARTSYS) 577 ret = -EINTR; 578 req_set_fail(req); 579 } 580 io_req_msg_cleanup(req, issue_flags); 581 if (ret >= 0) 582 ret += sr->done_io; 583 else if (sr->done_io) 584 ret = sr->done_io; 585 io_req_set_res(req, ret, 0); 586 return IOU_COMPLETE; 587 } 588 589 static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, 590 struct io_async_msghdr *kmsg) 591 { 592 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 593 594 int ret; 595 struct buf_sel_arg arg = { 596 .iovs = &kmsg->fast_iov, 597 .max_len = min_not_zero(sr->len, INT_MAX), 598 .nr_iovs = 1, 599 .buf_group = sr->buf_group, 600 }; 601 602 if (kmsg->vec.iovec) { 603 arg.nr_iovs = kmsg->vec.nr; 604 arg.iovs = kmsg->vec.iovec; 605 arg.mode = KBUF_MODE_FREE; 606 } 607 608 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 609 arg.nr_iovs = 1; 610 else 611 arg.mode |= KBUF_MODE_EXPAND; 612 613 ret = io_buffers_select(req, &arg, issue_flags); 614 if (unlikely(ret < 0)) 615 return ret; 616 617 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 618 kmsg->vec.nr = ret; 619 kmsg->vec.iovec = arg.iovs; 620 req->flags |= REQ_F_NEED_CLEANUP; 621 } 622 sr->len = arg.out_len; 623 624 if (ret == 1) { 625 sr->buf = arg.iovs[0].iov_base; 626 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 627 &kmsg->msg.msg_iter); 628 if (unlikely(ret)) 629 return ret; 630 } else { 631 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 632 arg.iovs, ret, arg.out_len); 633 } 634 635 return 0; 636 } 637 638 int io_send(struct io_kiocb *req, unsigned int issue_flags) 639 { 640 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 641 struct io_async_msghdr *kmsg = req->async_data; 642 struct socket *sock; 643 unsigned flags; 644 int min_ret = 0; 645 int ret; 646 647 sock = sock_from_file(req->file); 648 if (unlikely(!sock)) 649 return -ENOTSOCK; 650 651 if (!(req->flags & REQ_F_POLLED) && 652 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 653 return -EAGAIN; 654 655 flags = sr->msg_flags; 656 if (issue_flags & IO_URING_F_NONBLOCK) 657 flags |= MSG_DONTWAIT; 658 659 retry_bundle: 660 if (io_do_buffer_select(req)) { 661 ret = io_send_select_buffer(req, issue_flags, kmsg); 662 if (ret) 663 return ret; 664 } 665 666 /* 667 * If MSG_WAITALL is set, or this is a bundle send, then we need 668 * the full amount. If just bundle is set, if we do a short send 669 * then we complete the bundle sequence rather than continue on. 670 */ 671 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 672 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 673 674 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 675 kmsg->msg.msg_flags = flags; 676 ret = sock_sendmsg(sock, &kmsg->msg); 677 if (ret < min_ret) { 678 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 679 return -EAGAIN; 680 681 if (ret > 0 && io_net_retry(sock, flags)) { 682 sr->len -= ret; 683 sr->buf += ret; 684 sr->done_io += ret; 685 return io_net_kbuf_recyle(req, kmsg, ret); 686 } 687 if (ret == -ERESTARTSYS) 688 ret = -EINTR; 689 req_set_fail(req); 690 } 691 if (ret >= 0) 692 ret += sr->done_io; 693 else if (sr->done_io) 694 ret = sr->done_io; 695 696 if (!io_send_finish(req, &ret, kmsg, issue_flags)) 697 goto retry_bundle; 698 699 io_req_msg_cleanup(req, issue_flags); 700 return ret; 701 } 702 703 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 704 struct io_async_msghdr *iomsg, 705 int namelen, size_t controllen) 706 { 707 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 708 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 709 int hdr; 710 711 if (unlikely(namelen < 0)) 712 return -EOVERFLOW; 713 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 714 namelen, &hdr)) 715 return -EOVERFLOW; 716 if (check_add_overflow(hdr, controllen, &hdr)) 717 return -EOVERFLOW; 718 719 iomsg->namelen = namelen; 720 iomsg->controllen = controllen; 721 return 0; 722 } 723 724 return 0; 725 } 726 727 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 728 struct io_async_msghdr *iomsg) 729 { 730 struct user_msghdr msg; 731 int ret; 732 733 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr); 734 if (unlikely(ret)) 735 return ret; 736 737 if (!(req->flags & REQ_F_BUFFER_SELECT)) { 738 ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen, 739 ITER_DEST); 740 if (unlikely(ret)) 741 return ret; 742 } 743 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 744 msg.msg_controllen); 745 } 746 747 static int io_recvmsg_prep_setup(struct io_kiocb *req) 748 { 749 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 750 struct io_async_msghdr *kmsg; 751 752 kmsg = io_msg_alloc_async(req); 753 if (unlikely(!kmsg)) 754 return -ENOMEM; 755 756 if (req->opcode == IORING_OP_RECV) { 757 kmsg->msg.msg_name = NULL; 758 kmsg->msg.msg_namelen = 0; 759 kmsg->msg.msg_inq = 0; 760 kmsg->msg.msg_control = NULL; 761 kmsg->msg.msg_get_inq = 1; 762 kmsg->msg.msg_controllen = 0; 763 kmsg->msg.msg_iocb = NULL; 764 kmsg->msg.msg_ubuf = NULL; 765 766 if (req->flags & REQ_F_BUFFER_SELECT) 767 return 0; 768 return import_ubuf(ITER_DEST, sr->buf, sr->len, 769 &kmsg->msg.msg_iter); 770 } 771 772 return io_recvmsg_copy_hdr(req, kmsg); 773 } 774 775 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 776 IORING_RECVSEND_BUNDLE) 777 778 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 779 { 780 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 781 782 sr->done_io = 0; 783 784 if (unlikely(sqe->addr2)) 785 return -EINVAL; 786 787 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 788 sr->len = READ_ONCE(sqe->len); 789 sr->flags = READ_ONCE(sqe->ioprio); 790 if (sr->flags & ~RECVMSG_FLAGS) 791 return -EINVAL; 792 sr->msg_flags = READ_ONCE(sqe->msg_flags); 793 if (sr->msg_flags & MSG_DONTWAIT) 794 req->flags |= REQ_F_NOWAIT; 795 if (sr->msg_flags & MSG_ERRQUEUE) 796 req->flags |= REQ_F_CLEAR_POLLIN; 797 if (req->flags & REQ_F_BUFFER_SELECT) { 798 /* 799 * Store the buffer group for this multishot receive separately, 800 * as if we end up doing an io-wq based issue that selects a 801 * buffer, it has to be committed immediately and that will 802 * clear ->buf_list. This means we lose the link to the buffer 803 * list, and the eventual buffer put on completion then cannot 804 * restore it. 805 */ 806 sr->buf_group = req->buf_index; 807 req->buf_list = NULL; 808 } 809 sr->mshot_total_len = sr->mshot_len = 0; 810 if (sr->flags & IORING_RECV_MULTISHOT) { 811 if (!(req->flags & REQ_F_BUFFER_SELECT)) 812 return -EINVAL; 813 if (sr->msg_flags & MSG_WAITALL) 814 return -EINVAL; 815 if (req->opcode == IORING_OP_RECV) { 816 sr->mshot_len = sr->len; 817 sr->mshot_total_len = READ_ONCE(sqe->optlen); 818 if (sr->mshot_total_len) 819 sr->flags |= IORING_RECV_MSHOT_LIM; 820 } else if (sqe->optlen) { 821 return -EINVAL; 822 } 823 req->flags |= REQ_F_APOLL_MULTISHOT; 824 } else if (sqe->optlen) { 825 return -EINVAL; 826 } 827 828 if (sr->flags & IORING_RECVSEND_BUNDLE) { 829 if (req->opcode == IORING_OP_RECVMSG) 830 return -EINVAL; 831 } 832 833 if (io_is_compat(req->ctx)) 834 sr->msg_flags |= MSG_CMSG_COMPAT; 835 836 sr->nr_multishot_loops = 0; 837 return io_recvmsg_prep_setup(req); 838 } 839 840 /* bits to clear in old and inherit in new cflags on bundle retry */ 841 #define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE) 842 843 /* 844 * Finishes io_recv and io_recvmsg. 845 * 846 * Returns true if it is actually finished, or false if it should run 847 * again (for multishot). 848 */ 849 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 850 struct io_async_msghdr *kmsg, 851 bool mshot_finished, unsigned issue_flags) 852 { 853 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 854 unsigned int cflags = 0; 855 856 if (kmsg->msg.msg_inq > 0) 857 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 858 859 if (*ret > 0 && sr->flags & IORING_RECV_MSHOT_LIM) { 860 /* 861 * If sr->len hits zero, the limit has been reached. Mark 862 * mshot as finished, and flag MSHOT_DONE as well to prevent 863 * a potential bundle from being retried. 864 */ 865 sr->mshot_total_len -= min_t(int, *ret, sr->mshot_total_len); 866 if (!sr->mshot_total_len) { 867 sr->flags |= IORING_RECV_MSHOT_DONE; 868 mshot_finished = true; 869 } 870 } 871 872 if (sr->flags & IORING_RECVSEND_BUNDLE) { 873 size_t this_ret = *ret - sr->done_io; 874 875 cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret), 876 issue_flags); 877 if (sr->flags & IORING_RECV_RETRY) 878 cflags = req->cqe.flags | (cflags & CQE_F_MASK); 879 if (sr->mshot_len && *ret >= sr->mshot_len) 880 sr->flags |= IORING_RECV_MSHOT_CAP; 881 /* bundle with no more immediate buffers, we're done */ 882 if (req->flags & REQ_F_BL_EMPTY) 883 goto finish; 884 /* 885 * If more is available AND it was a full transfer, retry and 886 * append to this one 887 */ 888 if (!(sr->flags & IORING_RECV_NO_RETRY) && 889 kmsg->msg.msg_inq > 1 && this_ret > 0 && 890 !iov_iter_count(&kmsg->msg.msg_iter)) { 891 req->cqe.flags = cflags & ~CQE_F_MASK; 892 sr->len = kmsg->msg.msg_inq; 893 sr->done_io += this_ret; 894 sr->flags |= IORING_RECV_RETRY; 895 return false; 896 } 897 } else { 898 cflags |= io_put_kbuf(req, *ret, issue_flags); 899 } 900 901 /* 902 * Fill CQE for this receive and see if we should keep trying to 903 * receive from this socket. 904 */ 905 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 906 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 907 *ret = IOU_RETRY; 908 io_mshot_prep_retry(req, kmsg); 909 /* Known not-empty or unknown state, retry */ 910 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 911 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY && 912 !(sr->flags & IORING_RECV_MSHOT_CAP)) { 913 return false; 914 } 915 /* mshot retries exceeded, force a requeue */ 916 sr->nr_multishot_loops = 0; 917 sr->flags &= ~IORING_RECV_MSHOT_CAP; 918 if (issue_flags & IO_URING_F_MULTISHOT) 919 *ret = IOU_REQUEUE; 920 } 921 return true; 922 } 923 924 /* Finish the request / stop multishot. */ 925 finish: 926 io_req_set_res(req, *ret, cflags); 927 *ret = IOU_COMPLETE; 928 io_req_msg_cleanup(req, issue_flags); 929 return true; 930 } 931 932 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 933 struct io_sr_msg *sr, void __user **buf, 934 size_t *len) 935 { 936 unsigned long ubuf = (unsigned long) *buf; 937 unsigned long hdr; 938 939 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 940 kmsg->controllen; 941 if (*len < hdr) 942 return -EFAULT; 943 944 if (kmsg->controllen) { 945 unsigned long control = ubuf + hdr - kmsg->controllen; 946 947 kmsg->msg.msg_control_user = (void __user *) control; 948 kmsg->msg.msg_controllen = kmsg->controllen; 949 } 950 951 sr->buf = *buf; /* stash for later copy */ 952 *buf = (void __user *) (ubuf + hdr); 953 kmsg->payloadlen = *len = *len - hdr; 954 return 0; 955 } 956 957 struct io_recvmsg_multishot_hdr { 958 struct io_uring_recvmsg_out msg; 959 struct sockaddr_storage addr; 960 }; 961 962 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 963 struct io_async_msghdr *kmsg, 964 unsigned int flags, bool *finished) 965 { 966 int err; 967 int copy_len; 968 struct io_recvmsg_multishot_hdr hdr; 969 970 if (kmsg->namelen) 971 kmsg->msg.msg_name = &hdr.addr; 972 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 973 kmsg->msg.msg_namelen = 0; 974 975 if (sock->file->f_flags & O_NONBLOCK) 976 flags |= MSG_DONTWAIT; 977 978 err = sock_recvmsg(sock, &kmsg->msg, flags); 979 *finished = err <= 0; 980 if (err < 0) 981 return err; 982 983 hdr.msg = (struct io_uring_recvmsg_out) { 984 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 985 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 986 }; 987 988 hdr.msg.payloadlen = err; 989 if (err > kmsg->payloadlen) 990 err = kmsg->payloadlen; 991 992 copy_len = sizeof(struct io_uring_recvmsg_out); 993 if (kmsg->msg.msg_namelen > kmsg->namelen) 994 copy_len += kmsg->namelen; 995 else 996 copy_len += kmsg->msg.msg_namelen; 997 998 /* 999 * "fromlen shall refer to the value before truncation.." 1000 * 1003.1g 1001 */ 1002 hdr.msg.namelen = kmsg->msg.msg_namelen; 1003 1004 /* ensure that there is no gap between hdr and sockaddr_storage */ 1005 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 1006 sizeof(struct io_uring_recvmsg_out)); 1007 if (copy_to_user(io->buf, &hdr, copy_len)) { 1008 *finished = true; 1009 return -EFAULT; 1010 } 1011 1012 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 1013 kmsg->controllen + err; 1014 } 1015 1016 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 1017 { 1018 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1019 struct io_async_msghdr *kmsg = req->async_data; 1020 struct socket *sock; 1021 unsigned flags; 1022 int ret, min_ret = 0; 1023 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1024 bool mshot_finished = true; 1025 1026 sock = sock_from_file(req->file); 1027 if (unlikely(!sock)) 1028 return -ENOTSOCK; 1029 1030 if (!(req->flags & REQ_F_POLLED) && 1031 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1032 return -EAGAIN; 1033 1034 flags = sr->msg_flags; 1035 if (force_nonblock) 1036 flags |= MSG_DONTWAIT; 1037 1038 retry_multishot: 1039 if (io_do_buffer_select(req)) { 1040 void __user *buf; 1041 size_t len = sr->len; 1042 1043 buf = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1044 if (!buf) 1045 return -ENOBUFS; 1046 1047 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1048 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 1049 if (ret) { 1050 io_kbuf_recycle(req, issue_flags); 1051 return ret; 1052 } 1053 } 1054 1055 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 1056 } 1057 1058 kmsg->msg.msg_get_inq = 1; 1059 kmsg->msg.msg_inq = -1; 1060 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1061 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1062 &mshot_finished); 1063 } else { 1064 /* disable partial retry for recvmsg with cmsg attached */ 1065 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1066 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1067 1068 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1069 kmsg->uaddr, flags); 1070 } 1071 1072 if (ret < min_ret) { 1073 if (ret == -EAGAIN && force_nonblock) { 1074 if (issue_flags & IO_URING_F_MULTISHOT) 1075 io_kbuf_recycle(req, issue_flags); 1076 1077 return IOU_RETRY; 1078 } 1079 if (ret > 0 && io_net_retry(sock, flags)) { 1080 sr->done_io += ret; 1081 return io_net_kbuf_recyle(req, kmsg, ret); 1082 } 1083 if (ret == -ERESTARTSYS) 1084 ret = -EINTR; 1085 req_set_fail(req); 1086 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1087 req_set_fail(req); 1088 } 1089 1090 if (ret > 0) 1091 ret += sr->done_io; 1092 else if (sr->done_io) 1093 ret = sr->done_io; 1094 else 1095 io_kbuf_recycle(req, issue_flags); 1096 1097 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1098 goto retry_multishot; 1099 1100 return ret; 1101 } 1102 1103 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1104 size_t *len, unsigned int issue_flags) 1105 { 1106 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1107 int ret; 1108 1109 /* 1110 * If the ring isn't locked, then don't use the peek interface 1111 * to grab multiple buffers as we will lock/unlock between 1112 * this selection and posting the buffers. 1113 */ 1114 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1115 sr->flags & IORING_RECVSEND_BUNDLE) { 1116 struct buf_sel_arg arg = { 1117 .iovs = &kmsg->fast_iov, 1118 .nr_iovs = 1, 1119 .mode = KBUF_MODE_EXPAND, 1120 .buf_group = sr->buf_group, 1121 }; 1122 1123 if (kmsg->vec.iovec) { 1124 arg.nr_iovs = kmsg->vec.nr; 1125 arg.iovs = kmsg->vec.iovec; 1126 arg.mode |= KBUF_MODE_FREE; 1127 } 1128 1129 if (*len) 1130 arg.max_len = *len; 1131 else if (kmsg->msg.msg_inq > 1) 1132 arg.max_len = min_not_zero(*len, (size_t) kmsg->msg.msg_inq); 1133 1134 /* if mshot limited, ensure we don't go over */ 1135 if (sr->flags & IORING_RECV_MSHOT_LIM) 1136 arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len); 1137 ret = io_buffers_peek(req, &arg); 1138 if (unlikely(ret < 0)) 1139 return ret; 1140 1141 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 1142 kmsg->vec.nr = ret; 1143 kmsg->vec.iovec = arg.iovs; 1144 req->flags |= REQ_F_NEED_CLEANUP; 1145 } 1146 if (arg.partial_map) 1147 sr->flags |= IORING_RECV_PARTIAL_MAP; 1148 1149 /* special case 1 vec, can be a fast path */ 1150 if (ret == 1) { 1151 sr->buf = arg.iovs[0].iov_base; 1152 sr->len = arg.iovs[0].iov_len; 1153 goto map_ubuf; 1154 } 1155 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1156 arg.out_len); 1157 } else { 1158 void __user *buf; 1159 1160 *len = sr->len; 1161 buf = io_buffer_select(req, len, sr->buf_group, issue_flags); 1162 if (!buf) 1163 return -ENOBUFS; 1164 sr->buf = buf; 1165 sr->len = *len; 1166 map_ubuf: 1167 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1168 &kmsg->msg.msg_iter); 1169 if (unlikely(ret)) 1170 return ret; 1171 } 1172 1173 return 0; 1174 } 1175 1176 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1177 { 1178 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1179 struct io_async_msghdr *kmsg = req->async_data; 1180 struct socket *sock; 1181 unsigned flags; 1182 int ret, min_ret = 0; 1183 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1184 size_t len = sr->len; 1185 bool mshot_finished; 1186 1187 if (!(req->flags & REQ_F_POLLED) && 1188 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1189 return -EAGAIN; 1190 1191 sock = sock_from_file(req->file); 1192 if (unlikely(!sock)) 1193 return -ENOTSOCK; 1194 1195 flags = sr->msg_flags; 1196 if (force_nonblock) 1197 flags |= MSG_DONTWAIT; 1198 1199 retry_multishot: 1200 if (io_do_buffer_select(req)) { 1201 ret = io_recv_buf_select(req, kmsg, &len, issue_flags); 1202 if (unlikely(ret)) { 1203 kmsg->msg.msg_inq = -1; 1204 goto out_free; 1205 } 1206 sr->buf = NULL; 1207 } 1208 1209 kmsg->msg.msg_flags = 0; 1210 kmsg->msg.msg_inq = -1; 1211 1212 if (flags & MSG_WAITALL) 1213 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1214 1215 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1216 if (ret < min_ret) { 1217 if (ret == -EAGAIN && force_nonblock) { 1218 if (issue_flags & IO_URING_F_MULTISHOT) 1219 io_kbuf_recycle(req, issue_flags); 1220 1221 return IOU_RETRY; 1222 } 1223 if (ret > 0 && io_net_retry(sock, flags)) { 1224 sr->len -= ret; 1225 sr->buf += ret; 1226 sr->done_io += ret; 1227 return io_net_kbuf_recyle(req, kmsg, ret); 1228 } 1229 if (ret == -ERESTARTSYS) 1230 ret = -EINTR; 1231 req_set_fail(req); 1232 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1233 out_free: 1234 req_set_fail(req); 1235 } 1236 1237 mshot_finished = ret <= 0; 1238 if (ret > 0) 1239 ret += sr->done_io; 1240 else if (sr->done_io) 1241 ret = sr->done_io; 1242 else 1243 io_kbuf_recycle(req, issue_flags); 1244 1245 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1246 goto retry_multishot; 1247 1248 return ret; 1249 } 1250 1251 int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1252 { 1253 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1254 unsigned ifq_idx; 1255 1256 if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3)) 1257 return -EINVAL; 1258 1259 ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx); 1260 zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx); 1261 if (!zc->ifq) 1262 return -EINVAL; 1263 1264 zc->len = READ_ONCE(sqe->len); 1265 zc->flags = READ_ONCE(sqe->ioprio); 1266 zc->msg_flags = READ_ONCE(sqe->msg_flags); 1267 if (zc->msg_flags) 1268 return -EINVAL; 1269 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)) 1270 return -EINVAL; 1271 /* multishot required */ 1272 if (!(zc->flags & IORING_RECV_MULTISHOT)) 1273 return -EINVAL; 1274 /* All data completions are posted as aux CQEs. */ 1275 req->flags |= REQ_F_APOLL_MULTISHOT; 1276 1277 return 0; 1278 } 1279 1280 int io_recvzc(struct io_kiocb *req, unsigned int issue_flags) 1281 { 1282 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1283 struct socket *sock; 1284 unsigned int len; 1285 int ret; 1286 1287 if (!(req->flags & REQ_F_POLLED) && 1288 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1289 return -EAGAIN; 1290 1291 sock = sock_from_file(req->file); 1292 if (unlikely(!sock)) 1293 return -ENOTSOCK; 1294 1295 len = zc->len; 1296 ret = io_zcrx_recv(req, zc->ifq, sock, zc->msg_flags | MSG_DONTWAIT, 1297 issue_flags, &zc->len); 1298 if (len && zc->len == 0) { 1299 io_req_set_res(req, 0, 0); 1300 1301 return IOU_COMPLETE; 1302 } 1303 if (unlikely(ret <= 0) && ret != -EAGAIN) { 1304 if (ret == -ERESTARTSYS) 1305 ret = -EINTR; 1306 if (ret == IOU_REQUEUE) 1307 return IOU_REQUEUE; 1308 1309 req_set_fail(req); 1310 io_req_set_res(req, ret, 0); 1311 return IOU_COMPLETE; 1312 } 1313 return IOU_RETRY; 1314 } 1315 1316 void io_send_zc_cleanup(struct io_kiocb *req) 1317 { 1318 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1319 struct io_async_msghdr *io = req->async_data; 1320 1321 if (req_has_async_data(req)) 1322 io_netmsg_iovec_free(io); 1323 if (zc->notif) { 1324 io_notif_flush(zc->notif); 1325 zc->notif = NULL; 1326 } 1327 } 1328 1329 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1330 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \ 1331 IORING_SEND_VECTORIZED) 1332 1333 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1334 { 1335 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1336 struct io_ring_ctx *ctx = req->ctx; 1337 struct io_async_msghdr *iomsg; 1338 struct io_kiocb *notif; 1339 int ret; 1340 1341 zc->done_io = 0; 1342 1343 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1344 return -EINVAL; 1345 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1346 if (req->flags & REQ_F_CQE_SKIP) 1347 return -EINVAL; 1348 1349 notif = zc->notif = io_alloc_notif(ctx); 1350 if (!notif) 1351 return -ENOMEM; 1352 notif->cqe.user_data = req->cqe.user_data; 1353 notif->cqe.res = 0; 1354 notif->cqe.flags = IORING_CQE_F_NOTIF; 1355 req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY; 1356 1357 zc->flags = READ_ONCE(sqe->ioprio); 1358 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1359 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1360 return -EINVAL; 1361 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1362 struct io_notif_data *nd = io_notif_to_data(notif); 1363 1364 nd->zc_report = true; 1365 nd->zc_used = false; 1366 nd->zc_copied = false; 1367 } 1368 } 1369 1370 zc->len = READ_ONCE(sqe->len); 1371 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1372 req->buf_index = READ_ONCE(sqe->buf_index); 1373 if (zc->msg_flags & MSG_DONTWAIT) 1374 req->flags |= REQ_F_NOWAIT; 1375 1376 if (io_is_compat(req->ctx)) 1377 zc->msg_flags |= MSG_CMSG_COMPAT; 1378 1379 iomsg = io_msg_alloc_async(req); 1380 if (unlikely(!iomsg)) 1381 return -ENOMEM; 1382 1383 if (req->opcode == IORING_OP_SEND_ZC) { 1384 ret = io_send_setup(req, sqe); 1385 } else { 1386 if (unlikely(sqe->addr2 || sqe->file_index)) 1387 return -EINVAL; 1388 ret = io_sendmsg_setup(req, sqe); 1389 } 1390 if (unlikely(ret)) 1391 return ret; 1392 1393 if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { 1394 iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1395 return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count); 1396 } 1397 iomsg->msg.sg_from_iter = io_sg_from_iter; 1398 return 0; 1399 } 1400 1401 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1402 struct iov_iter *from, size_t length) 1403 { 1404 skb_zcopy_downgrade_managed(skb); 1405 return zerocopy_fill_skb_from_iter(skb, from, length); 1406 } 1407 1408 static int io_sg_from_iter(struct sk_buff *skb, 1409 struct iov_iter *from, size_t length) 1410 { 1411 struct skb_shared_info *shinfo = skb_shinfo(skb); 1412 int frag = shinfo->nr_frags; 1413 int ret = 0; 1414 struct bvec_iter bi; 1415 ssize_t copied = 0; 1416 unsigned long truesize = 0; 1417 1418 if (!frag) 1419 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1420 else if (unlikely(!skb_zcopy_managed(skb))) 1421 return zerocopy_fill_skb_from_iter(skb, from, length); 1422 1423 bi.bi_size = min(from->count, length); 1424 bi.bi_bvec_done = from->iov_offset; 1425 bi.bi_idx = 0; 1426 1427 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1428 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1429 1430 copied += v.bv_len; 1431 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1432 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1433 v.bv_offset, v.bv_len); 1434 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1435 } 1436 if (bi.bi_size) 1437 ret = -EMSGSIZE; 1438 1439 shinfo->nr_frags = frag; 1440 from->bvec += bi.bi_idx; 1441 from->nr_segs -= bi.bi_idx; 1442 from->count -= copied; 1443 from->iov_offset = bi.bi_bvec_done; 1444 1445 skb->data_len += copied; 1446 skb->len += copied; 1447 skb->truesize += truesize; 1448 return ret; 1449 } 1450 1451 static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) 1452 { 1453 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1454 struct io_async_msghdr *kmsg = req->async_data; 1455 1456 WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF)); 1457 1458 sr->notif->buf_index = req->buf_index; 1459 return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter, 1460 (u64)(uintptr_t)sr->buf, sr->len, 1461 ITER_SOURCE, issue_flags); 1462 } 1463 1464 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1465 { 1466 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1467 struct io_async_msghdr *kmsg = req->async_data; 1468 struct socket *sock; 1469 unsigned msg_flags; 1470 int ret, min_ret = 0; 1471 1472 sock = sock_from_file(req->file); 1473 if (unlikely(!sock)) 1474 return -ENOTSOCK; 1475 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1476 return -EOPNOTSUPP; 1477 1478 if (!(req->flags & REQ_F_POLLED) && 1479 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1480 return -EAGAIN; 1481 1482 if (req->flags & REQ_F_IMPORT_BUFFER) { 1483 req->flags &= ~REQ_F_IMPORT_BUFFER; 1484 ret = io_send_zc_import(req, issue_flags); 1485 if (unlikely(ret)) 1486 return ret; 1487 } 1488 1489 msg_flags = zc->msg_flags; 1490 if (issue_flags & IO_URING_F_NONBLOCK) 1491 msg_flags |= MSG_DONTWAIT; 1492 if (msg_flags & MSG_WAITALL) 1493 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1494 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1495 1496 kmsg->msg.msg_flags = msg_flags; 1497 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1498 ret = sock_sendmsg(sock, &kmsg->msg); 1499 1500 if (unlikely(ret < min_ret)) { 1501 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1502 return -EAGAIN; 1503 1504 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { 1505 zc->len -= ret; 1506 zc->buf += ret; 1507 zc->done_io += ret; 1508 return io_net_kbuf_recyle(req, kmsg, ret); 1509 } 1510 if (ret == -ERESTARTSYS) 1511 ret = -EINTR; 1512 req_set_fail(req); 1513 } 1514 1515 if (ret >= 0) 1516 ret += zc->done_io; 1517 else if (zc->done_io) 1518 ret = zc->done_io; 1519 1520 /* 1521 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1522 * flushing notif to io_send_zc_cleanup() 1523 */ 1524 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1525 io_notif_flush(zc->notif); 1526 zc->notif = NULL; 1527 io_req_msg_cleanup(req, 0); 1528 } 1529 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1530 return IOU_COMPLETE; 1531 } 1532 1533 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1534 { 1535 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1536 struct io_async_msghdr *kmsg = req->async_data; 1537 struct socket *sock; 1538 unsigned flags; 1539 int ret, min_ret = 0; 1540 1541 if (req->flags & REQ_F_IMPORT_BUFFER) { 1542 unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; 1543 int ret; 1544 1545 ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, req, 1546 &kmsg->vec, uvec_segs, issue_flags); 1547 if (unlikely(ret)) 1548 return ret; 1549 req->flags &= ~REQ_F_IMPORT_BUFFER; 1550 } 1551 1552 sock = sock_from_file(req->file); 1553 if (unlikely(!sock)) 1554 return -ENOTSOCK; 1555 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1556 return -EOPNOTSUPP; 1557 1558 if (!(req->flags & REQ_F_POLLED) && 1559 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1560 return -EAGAIN; 1561 1562 flags = sr->msg_flags; 1563 if (issue_flags & IO_URING_F_NONBLOCK) 1564 flags |= MSG_DONTWAIT; 1565 if (flags & MSG_WAITALL) 1566 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1567 1568 kmsg->msg.msg_control_user = sr->msg_control; 1569 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1570 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1571 1572 if (unlikely(ret < min_ret)) { 1573 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1574 return -EAGAIN; 1575 1576 if (ret > 0 && io_net_retry(sock, flags)) { 1577 sr->done_io += ret; 1578 return io_net_kbuf_recyle(req, kmsg, ret); 1579 } 1580 if (ret == -ERESTARTSYS) 1581 ret = -EINTR; 1582 req_set_fail(req); 1583 } 1584 1585 if (ret >= 0) 1586 ret += sr->done_io; 1587 else if (sr->done_io) 1588 ret = sr->done_io; 1589 1590 /* 1591 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1592 * flushing notif to io_send_zc_cleanup() 1593 */ 1594 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1595 io_notif_flush(sr->notif); 1596 sr->notif = NULL; 1597 io_req_msg_cleanup(req, 0); 1598 } 1599 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1600 return IOU_COMPLETE; 1601 } 1602 1603 void io_sendrecv_fail(struct io_kiocb *req) 1604 { 1605 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1606 1607 if (sr->done_io) 1608 req->cqe.res = sr->done_io; 1609 1610 if ((req->flags & REQ_F_NEED_CLEANUP) && 1611 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1612 req->cqe.flags |= IORING_CQE_F_MORE; 1613 } 1614 1615 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1616 IORING_ACCEPT_POLL_FIRST) 1617 1618 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1619 { 1620 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1621 1622 if (sqe->len || sqe->buf_index) 1623 return -EINVAL; 1624 1625 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1626 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1627 accept->flags = READ_ONCE(sqe->accept_flags); 1628 accept->nofile = rlimit(RLIMIT_NOFILE); 1629 accept->iou_flags = READ_ONCE(sqe->ioprio); 1630 if (accept->iou_flags & ~ACCEPT_FLAGS) 1631 return -EINVAL; 1632 1633 accept->file_slot = READ_ONCE(sqe->file_index); 1634 if (accept->file_slot) { 1635 if (accept->flags & SOCK_CLOEXEC) 1636 return -EINVAL; 1637 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1638 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1639 return -EINVAL; 1640 } 1641 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1642 return -EINVAL; 1643 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1644 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1645 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1646 req->flags |= REQ_F_APOLL_MULTISHOT; 1647 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1648 req->flags |= REQ_F_NOWAIT; 1649 return 0; 1650 } 1651 1652 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1653 { 1654 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1655 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1656 bool fixed = !!accept->file_slot; 1657 struct proto_accept_arg arg = { 1658 .flags = force_nonblock ? O_NONBLOCK : 0, 1659 }; 1660 struct file *file; 1661 unsigned cflags; 1662 int ret, fd; 1663 1664 if (!(req->flags & REQ_F_POLLED) && 1665 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1666 return -EAGAIN; 1667 1668 retry: 1669 if (!fixed) { 1670 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1671 if (unlikely(fd < 0)) 1672 return fd; 1673 } 1674 arg.err = 0; 1675 arg.is_empty = -1; 1676 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1677 accept->flags); 1678 if (IS_ERR(file)) { 1679 if (!fixed) 1680 put_unused_fd(fd); 1681 ret = PTR_ERR(file); 1682 if (ret == -EAGAIN && force_nonblock && 1683 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) 1684 return IOU_RETRY; 1685 1686 if (ret == -ERESTARTSYS) 1687 ret = -EINTR; 1688 } else if (!fixed) { 1689 fd_install(fd, file); 1690 ret = fd; 1691 } else { 1692 ret = io_fixed_fd_install(req, issue_flags, file, 1693 accept->file_slot); 1694 } 1695 1696 cflags = 0; 1697 if (!arg.is_empty) 1698 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1699 1700 if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) && 1701 io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1702 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1703 goto retry; 1704 return IOU_RETRY; 1705 } 1706 1707 io_req_set_res(req, ret, cflags); 1708 if (ret < 0) 1709 req_set_fail(req); 1710 return IOU_COMPLETE; 1711 } 1712 1713 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1714 { 1715 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1716 1717 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1718 return -EINVAL; 1719 1720 sock->domain = READ_ONCE(sqe->fd); 1721 sock->type = READ_ONCE(sqe->off); 1722 sock->protocol = READ_ONCE(sqe->len); 1723 sock->file_slot = READ_ONCE(sqe->file_index); 1724 sock->nofile = rlimit(RLIMIT_NOFILE); 1725 1726 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1727 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1728 return -EINVAL; 1729 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1730 return -EINVAL; 1731 return 0; 1732 } 1733 1734 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1735 { 1736 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1737 bool fixed = !!sock->file_slot; 1738 struct file *file; 1739 int ret, fd; 1740 1741 if (!fixed) { 1742 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1743 if (unlikely(fd < 0)) 1744 return fd; 1745 } 1746 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1747 if (IS_ERR(file)) { 1748 if (!fixed) 1749 put_unused_fd(fd); 1750 ret = PTR_ERR(file); 1751 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1752 return -EAGAIN; 1753 if (ret == -ERESTARTSYS) 1754 ret = -EINTR; 1755 req_set_fail(req); 1756 } else if (!fixed) { 1757 fd_install(fd, file); 1758 ret = fd; 1759 } else { 1760 ret = io_fixed_fd_install(req, issue_flags, file, 1761 sock->file_slot); 1762 } 1763 io_req_set_res(req, ret, 0); 1764 return IOU_COMPLETE; 1765 } 1766 1767 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1768 { 1769 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1770 struct io_async_msghdr *io; 1771 1772 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1773 return -EINVAL; 1774 1775 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1776 conn->addr_len = READ_ONCE(sqe->addr2); 1777 conn->in_progress = conn->seen_econnaborted = false; 1778 1779 io = io_msg_alloc_async(req); 1780 if (unlikely(!io)) 1781 return -ENOMEM; 1782 1783 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1784 } 1785 1786 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1787 { 1788 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1789 struct io_async_msghdr *io = req->async_data; 1790 unsigned file_flags; 1791 int ret; 1792 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1793 1794 if (connect->in_progress) { 1795 struct poll_table_struct pt = { ._key = EPOLLERR }; 1796 1797 if (vfs_poll(req->file, &pt) & EPOLLERR) 1798 goto get_sock_err; 1799 } 1800 1801 file_flags = force_nonblock ? O_NONBLOCK : 0; 1802 1803 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1804 file_flags); 1805 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1806 && force_nonblock) { 1807 if (ret == -EINPROGRESS) { 1808 connect->in_progress = true; 1809 } else if (ret == -ECONNABORTED) { 1810 if (connect->seen_econnaborted) 1811 goto out; 1812 connect->seen_econnaborted = true; 1813 } 1814 return -EAGAIN; 1815 } 1816 if (connect->in_progress) { 1817 /* 1818 * At least bluetooth will return -EBADFD on a re-connect 1819 * attempt, and it's (supposedly) also valid to get -EISCONN 1820 * which means the previous result is good. For both of these, 1821 * grab the sock_error() and use that for the completion. 1822 */ 1823 if (ret == -EBADFD || ret == -EISCONN) { 1824 get_sock_err: 1825 ret = sock_error(sock_from_file(req->file)->sk); 1826 } 1827 } 1828 if (ret == -ERESTARTSYS) 1829 ret = -EINTR; 1830 out: 1831 if (ret < 0) 1832 req_set_fail(req); 1833 io_req_msg_cleanup(req, issue_flags); 1834 io_req_set_res(req, ret, 0); 1835 return IOU_COMPLETE; 1836 } 1837 1838 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1839 { 1840 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1841 struct sockaddr __user *uaddr; 1842 struct io_async_msghdr *io; 1843 1844 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1845 return -EINVAL; 1846 1847 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1848 bind->addr_len = READ_ONCE(sqe->addr2); 1849 1850 io = io_msg_alloc_async(req); 1851 if (unlikely(!io)) 1852 return -ENOMEM; 1853 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1854 } 1855 1856 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1857 { 1858 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1859 struct io_async_msghdr *io = req->async_data; 1860 struct socket *sock; 1861 int ret; 1862 1863 sock = sock_from_file(req->file); 1864 if (unlikely(!sock)) 1865 return -ENOTSOCK; 1866 1867 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1868 if (ret < 0) 1869 req_set_fail(req); 1870 io_req_set_res(req, ret, 0); 1871 return 0; 1872 } 1873 1874 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1875 { 1876 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1877 1878 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1879 return -EINVAL; 1880 1881 listen->backlog = READ_ONCE(sqe->len); 1882 return 0; 1883 } 1884 1885 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1886 { 1887 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1888 struct socket *sock; 1889 int ret; 1890 1891 sock = sock_from_file(req->file); 1892 if (unlikely(!sock)) 1893 return -ENOTSOCK; 1894 1895 ret = __sys_listen_socket(sock, listen->backlog); 1896 if (ret < 0) 1897 req_set_fail(req); 1898 io_req_set_res(req, ret, 0); 1899 return 0; 1900 } 1901 1902 void io_netmsg_cache_free(const void *entry) 1903 { 1904 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1905 1906 io_vec_free(&kmsg->vec); 1907 kfree(kmsg); 1908 } 1909