1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "filetable.h" 14 #include "io_uring.h" 15 #include "kbuf.h" 16 #include "alloc_cache.h" 17 #include "net.h" 18 #include "notif.h" 19 #include "rsrc.h" 20 #include "zcrx.h" 21 22 struct io_shutdown { 23 struct file *file; 24 int how; 25 }; 26 27 struct io_accept { 28 struct file *file; 29 struct sockaddr __user *addr; 30 int __user *addr_len; 31 int flags; 32 int iou_flags; 33 u32 file_slot; 34 unsigned long nofile; 35 }; 36 37 struct io_socket { 38 struct file *file; 39 int domain; 40 int type; 41 int protocol; 42 int flags; 43 u32 file_slot; 44 unsigned long nofile; 45 }; 46 47 struct io_connect { 48 struct file *file; 49 struct sockaddr __user *addr; 50 int addr_len; 51 bool in_progress; 52 bool seen_econnaborted; 53 }; 54 55 struct io_bind { 56 struct file *file; 57 int addr_len; 58 }; 59 60 struct io_listen { 61 struct file *file; 62 int backlog; 63 }; 64 65 struct io_sr_msg { 66 struct file *file; 67 union { 68 struct compat_msghdr __user *umsg_compat; 69 struct user_msghdr __user *umsg; 70 void __user *buf; 71 }; 72 int len; 73 unsigned done_io; 74 unsigned msg_flags; 75 unsigned nr_multishot_loops; 76 u16 flags; 77 /* initialised and used only by !msg send variants */ 78 u16 buf_group; 79 /* per-invocation mshot limit */ 80 unsigned mshot_len; 81 /* overall mshot byte limit */ 82 unsigned mshot_total_len; 83 void __user *msg_control; 84 /* used only for send zerocopy */ 85 struct io_kiocb *notif; 86 }; 87 88 /* 89 * The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold 90 * anyway. Use the upper 8 bits for internal uses. 91 */ 92 enum sr_retry_flags { 93 IORING_RECV_RETRY = (1U << 15), 94 IORING_RECV_PARTIAL_MAP = (1U << 14), 95 IORING_RECV_MSHOT_CAP = (1U << 13), 96 IORING_RECV_MSHOT_LIM = (1U << 12), 97 IORING_RECV_MSHOT_DONE = (1U << 11), 98 99 IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP, 100 IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP | 101 IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE, 102 }; 103 104 /* 105 * Number of times we'll try and do receives if there's more data. If we 106 * exceed this limit, then add us to the back of the queue and retry from 107 * there. This helps fairness between flooding clients. 108 */ 109 #define MULTISHOT_MAX_RETRY 32 110 111 struct io_recvzc { 112 struct file *file; 113 u16 flags; 114 u32 len; 115 struct io_zcrx_ifq *ifq; 116 }; 117 118 static int io_sg_from_iter_iovec(struct sk_buff *skb, 119 struct iov_iter *from, size_t length); 120 static int io_sg_from_iter(struct sk_buff *skb, 121 struct iov_iter *from, size_t length); 122 123 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 124 { 125 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 126 127 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 128 sqe->buf_index || sqe->splice_fd_in)) 129 return -EINVAL; 130 131 shutdown->how = READ_ONCE(sqe->len); 132 req->flags |= REQ_F_FORCE_ASYNC; 133 return 0; 134 } 135 136 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 137 { 138 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 139 struct socket *sock; 140 int ret; 141 142 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 143 144 sock = sock_from_file(req->file); 145 if (unlikely(!sock)) 146 return -ENOTSOCK; 147 148 ret = __sys_shutdown_sock(sock, shutdown->how); 149 io_req_set_res(req, ret, 0); 150 return IOU_COMPLETE; 151 } 152 153 static bool io_net_retry(struct socket *sock, int flags) 154 { 155 if (!(flags & MSG_WAITALL)) 156 return false; 157 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 158 } 159 160 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 161 { 162 if (kmsg->vec.iovec) 163 io_vec_free(&kmsg->vec); 164 } 165 166 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 167 { 168 struct io_async_msghdr *hdr = req->async_data; 169 170 /* can't recycle, ensure we free the iovec if we have one */ 171 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 172 io_netmsg_iovec_free(hdr); 173 return; 174 } 175 176 /* Let normal cleanup path reap it if we fail adding to the cache */ 177 io_alloc_cache_vec_kasan(&hdr->vec); 178 if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP) 179 io_vec_free(&hdr->vec); 180 181 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) 182 io_req_async_data_clear(req, REQ_F_NEED_CLEANUP); 183 } 184 185 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 186 { 187 struct io_ring_ctx *ctx = req->ctx; 188 struct io_async_msghdr *hdr; 189 190 hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); 191 if (!hdr) 192 return NULL; 193 194 /* If the async data was cached, we might have an iov cached inside. */ 195 if (hdr->vec.iovec) 196 req->flags |= REQ_F_NEED_CLEANUP; 197 return hdr; 198 } 199 200 static inline void io_mshot_prep_retry(struct io_kiocb *req, 201 struct io_async_msghdr *kmsg) 202 { 203 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 204 205 req->flags &= ~REQ_F_BL_EMPTY; 206 sr->done_io = 0; 207 sr->flags &= ~IORING_RECV_RETRY_CLEAR; 208 sr->len = sr->mshot_len; 209 } 210 211 static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg, 212 const struct iovec __user *uiov, unsigned uvec_seg, 213 int ddir) 214 { 215 struct iovec *iov; 216 int ret, nr_segs; 217 218 if (iomsg->vec.iovec) { 219 nr_segs = iomsg->vec.nr; 220 iov = iomsg->vec.iovec; 221 } else { 222 nr_segs = 1; 223 iov = &iomsg->fast_iov; 224 } 225 226 ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov, 227 &iomsg->msg.msg_iter, io_is_compat(req->ctx)); 228 if (unlikely(ret < 0)) 229 return ret; 230 231 if (iov) { 232 req->flags |= REQ_F_NEED_CLEANUP; 233 io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs); 234 } 235 return 0; 236 } 237 238 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 239 struct io_async_msghdr *iomsg, 240 struct compat_msghdr *msg, int ddir, 241 struct sockaddr __user **save_addr) 242 { 243 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 244 struct compat_iovec __user *uiov; 245 int ret; 246 247 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 248 return -EFAULT; 249 250 ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr); 251 if (ret) 252 return ret; 253 254 uiov = compat_ptr(msg->msg_iov); 255 if (req->flags & REQ_F_BUFFER_SELECT) { 256 if (msg->msg_iovlen == 0) { 257 sr->len = 0; 258 } else if (msg->msg_iovlen > 1) { 259 return -EINVAL; 260 } else { 261 struct compat_iovec tmp_iov; 262 263 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 264 return -EFAULT; 265 sr->len = tmp_iov.iov_len; 266 } 267 } 268 return 0; 269 } 270 271 static int io_copy_msghdr_from_user(struct user_msghdr *msg, 272 struct user_msghdr __user *umsg) 273 { 274 if (!user_access_begin(umsg, sizeof(*umsg))) 275 return -EFAULT; 276 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end); 277 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end); 278 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end); 279 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end); 280 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end); 281 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end); 282 user_access_end(); 283 return 0; 284 ua_end: 285 user_access_end(); 286 return -EFAULT; 287 } 288 289 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 290 struct user_msghdr *msg, int ddir, 291 struct sockaddr __user **save_addr) 292 { 293 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 294 struct user_msghdr __user *umsg = sr->umsg; 295 int ret; 296 297 iomsg->msg.msg_name = &iomsg->addr; 298 iomsg->msg.msg_iter.nr_segs = 0; 299 300 if (io_is_compat(req->ctx)) { 301 struct compat_msghdr cmsg; 302 303 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr); 304 if (ret) 305 return ret; 306 307 memset(msg, 0, sizeof(*msg)); 308 msg->msg_namelen = cmsg.msg_namelen; 309 msg->msg_controllen = cmsg.msg_controllen; 310 msg->msg_iov = compat_ptr(cmsg.msg_iov); 311 msg->msg_iovlen = cmsg.msg_iovlen; 312 return 0; 313 } 314 315 ret = io_copy_msghdr_from_user(msg, umsg); 316 if (unlikely(ret)) 317 return ret; 318 319 msg->msg_flags = 0; 320 321 ret = __copy_msghdr(&iomsg->msg, msg, save_addr); 322 if (ret) 323 return ret; 324 325 if (req->flags & REQ_F_BUFFER_SELECT) { 326 if (msg->msg_iovlen == 0) { 327 sr->len = 0; 328 } else if (msg->msg_iovlen > 1) { 329 return -EINVAL; 330 } else { 331 struct iovec __user *uiov = msg->msg_iov; 332 struct iovec tmp_iov; 333 334 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 335 return -EFAULT; 336 sr->len = tmp_iov.iov_len; 337 } 338 } 339 return 0; 340 } 341 342 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 343 { 344 struct io_async_msghdr *io = req->async_data; 345 346 io_netmsg_iovec_free(io); 347 } 348 349 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 350 { 351 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 352 struct io_async_msghdr *kmsg = req->async_data; 353 void __user *addr; 354 u16 addr_len; 355 int ret; 356 357 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 358 359 if (READ_ONCE(sqe->__pad3[0])) 360 return -EINVAL; 361 362 kmsg->msg.msg_name = NULL; 363 kmsg->msg.msg_namelen = 0; 364 kmsg->msg.msg_control = NULL; 365 kmsg->msg.msg_controllen = 0; 366 kmsg->msg.msg_ubuf = NULL; 367 368 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 369 addr_len = READ_ONCE(sqe->addr_len); 370 if (addr) { 371 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr); 372 if (unlikely(ret < 0)) 373 return ret; 374 kmsg->msg.msg_name = &kmsg->addr; 375 kmsg->msg.msg_namelen = addr_len; 376 } 377 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 378 if (sr->flags & IORING_SEND_VECTORIZED) 379 return -EINVAL; 380 req->flags |= REQ_F_IMPORT_BUFFER; 381 return 0; 382 } 383 if (req->flags & REQ_F_BUFFER_SELECT) 384 return 0; 385 386 if (sr->flags & IORING_SEND_VECTORIZED) 387 return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); 388 389 return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 390 } 391 392 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 393 { 394 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 395 struct io_async_msghdr *kmsg = req->async_data; 396 struct user_msghdr msg; 397 int ret; 398 399 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 400 ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); 401 if (unlikely(ret)) 402 return ret; 403 /* save msg_control as sys_sendmsg() overwrites it */ 404 sr->msg_control = kmsg->msg.msg_control_user; 405 406 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 407 kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; 408 return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, 409 msg.msg_iovlen); 410 } 411 if (req->flags & REQ_F_BUFFER_SELECT) 412 return 0; 413 return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); 414 } 415 416 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | IORING_SEND_VECTORIZED) 417 418 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 419 { 420 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 421 422 sr->done_io = 0; 423 sr->len = READ_ONCE(sqe->len); 424 if (unlikely(sr->len < 0)) 425 return -EINVAL; 426 sr->flags = READ_ONCE(sqe->ioprio); 427 if (sr->flags & ~SENDMSG_FLAGS) 428 return -EINVAL; 429 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 430 if (sr->msg_flags & MSG_DONTWAIT) 431 req->flags |= REQ_F_NOWAIT; 432 if (req->flags & REQ_F_BUFFER_SELECT) 433 sr->buf_group = req->buf_index; 434 if (sr->flags & IORING_RECVSEND_BUNDLE) { 435 if (req->opcode == IORING_OP_SENDMSG) 436 return -EINVAL; 437 sr->msg_flags |= MSG_WAITALL; 438 req->flags |= REQ_F_MULTISHOT; 439 } 440 441 if (io_is_compat(req->ctx)) 442 sr->msg_flags |= MSG_CMSG_COMPAT; 443 444 if (unlikely(!io_msg_alloc_async(req))) 445 return -ENOMEM; 446 if (req->opcode != IORING_OP_SENDMSG) 447 return io_send_setup(req, sqe); 448 if (unlikely(sqe->addr2 || sqe->file_index)) 449 return -EINVAL; 450 return io_sendmsg_setup(req, sqe); 451 } 452 453 static void io_req_msg_cleanup(struct io_kiocb *req, 454 unsigned int issue_flags) 455 { 456 io_netmsg_recycle(req, issue_flags); 457 } 458 459 /* 460 * For bundle completions, we need to figure out how many segments we consumed. 461 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 462 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 463 * the segments, then it's a trivial questiont o answer. If we have residual 464 * data in the iter, then loop the segments to figure out how much we 465 * transferred. 466 */ 467 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 468 { 469 struct iovec *iov; 470 int nbufs; 471 472 /* no data is always zero segments, and a ubuf is always 1 segment */ 473 if (ret <= 0) 474 return 0; 475 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 476 return 1; 477 478 iov = kmsg->vec.iovec; 479 if (!iov) 480 iov = &kmsg->fast_iov; 481 482 /* if all data was transferred, it's basic pointer math */ 483 if (!iov_iter_count(&kmsg->msg.msg_iter)) 484 return iter_iov(&kmsg->msg.msg_iter) - iov; 485 486 /* short transfer, count segments */ 487 nbufs = 0; 488 do { 489 int this_len = min_t(int, iov[nbufs].iov_len, ret); 490 491 nbufs++; 492 ret -= this_len; 493 } while (ret); 494 495 return nbufs; 496 } 497 498 static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl, 499 struct io_async_msghdr *kmsg, int len) 500 { 501 req->flags |= REQ_F_BL_NO_RECYCLE; 502 if (req->flags & REQ_F_BUFFERS_COMMIT) 503 io_kbuf_commit(req, bl, len, io_bundle_nbufs(kmsg, len)); 504 return IOU_RETRY; 505 } 506 507 static inline bool io_send_finish(struct io_kiocb *req, 508 struct io_async_msghdr *kmsg, 509 struct io_br_sel *sel) 510 { 511 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 512 bool bundle_finished = sel->val <= 0; 513 unsigned int cflags; 514 515 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 516 cflags = io_put_kbuf(req, sel->val, sel->buf_list); 517 goto finish; 518 } 519 520 cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val)); 521 522 /* 523 * Don't start new bundles if the buffer list is empty, or if the 524 * current operation needed to go through polling to complete. 525 */ 526 if (bundle_finished || req->flags & (REQ_F_BL_EMPTY | REQ_F_POLLED)) 527 goto finish; 528 529 /* 530 * Fill CQE for this receive and see if we should keep trying to 531 * receive from this socket. 532 */ 533 if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { 534 io_mshot_prep_retry(req, kmsg); 535 return false; 536 } 537 538 /* Otherwise stop bundle and use the current result. */ 539 finish: 540 io_req_set_res(req, sel->val, cflags); 541 sel->val = IOU_COMPLETE; 542 return true; 543 } 544 545 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 546 { 547 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 548 struct io_async_msghdr *kmsg = req->async_data; 549 struct socket *sock; 550 unsigned flags; 551 int min_ret = 0; 552 int ret; 553 554 sock = sock_from_file(req->file); 555 if (unlikely(!sock)) 556 return -ENOTSOCK; 557 558 if (!(req->flags & REQ_F_POLLED) && 559 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 560 return -EAGAIN; 561 562 flags = sr->msg_flags; 563 if (issue_flags & IO_URING_F_NONBLOCK) 564 flags |= MSG_DONTWAIT; 565 if (flags & MSG_WAITALL) 566 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 567 568 kmsg->msg.msg_control_user = sr->msg_control; 569 570 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 571 572 if (ret < min_ret) { 573 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 574 return -EAGAIN; 575 if (ret > 0 && io_net_retry(sock, flags)) { 576 kmsg->msg.msg_controllen = 0; 577 kmsg->msg.msg_control = NULL; 578 sr->done_io += ret; 579 return -EAGAIN; 580 } 581 if (ret == -ERESTARTSYS) 582 ret = -EINTR; 583 req_set_fail(req); 584 } 585 io_req_msg_cleanup(req, issue_flags); 586 if (ret >= 0) 587 ret += sr->done_io; 588 else if (sr->done_io) 589 ret = sr->done_io; 590 io_req_set_res(req, ret, 0); 591 return IOU_COMPLETE; 592 } 593 594 static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, 595 struct io_br_sel *sel, struct io_async_msghdr *kmsg) 596 { 597 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 598 struct buf_sel_arg arg = { 599 .iovs = &kmsg->fast_iov, 600 .max_len = min_not_zero(sr->len, INT_MAX), 601 .nr_iovs = 1, 602 .buf_group = sr->buf_group, 603 }; 604 int ret; 605 606 if (kmsg->vec.iovec) { 607 arg.nr_iovs = kmsg->vec.nr; 608 arg.iovs = kmsg->vec.iovec; 609 arg.mode = KBUF_MODE_FREE; 610 } 611 612 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 613 arg.nr_iovs = 1; 614 else 615 arg.mode |= KBUF_MODE_EXPAND; 616 617 ret = io_buffers_select(req, &arg, sel, issue_flags); 618 if (unlikely(ret < 0)) 619 return ret; 620 621 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 622 kmsg->vec.nr = ret; 623 kmsg->vec.iovec = arg.iovs; 624 req->flags |= REQ_F_NEED_CLEANUP; 625 } 626 sr->len = arg.out_len; 627 628 if (ret == 1) { 629 sr->buf = arg.iovs[0].iov_base; 630 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 631 &kmsg->msg.msg_iter); 632 if (unlikely(ret)) 633 return ret; 634 } else { 635 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 636 arg.iovs, ret, arg.out_len); 637 } 638 639 return 0; 640 } 641 642 int io_send(struct io_kiocb *req, unsigned int issue_flags) 643 { 644 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 645 struct io_async_msghdr *kmsg = req->async_data; 646 struct io_br_sel sel = { }; 647 struct socket *sock; 648 unsigned flags; 649 int min_ret = 0; 650 int ret; 651 652 sock = sock_from_file(req->file); 653 if (unlikely(!sock)) 654 return -ENOTSOCK; 655 656 if (!(req->flags & REQ_F_POLLED) && 657 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 658 return -EAGAIN; 659 660 flags = sr->msg_flags; 661 if (issue_flags & IO_URING_F_NONBLOCK) 662 flags |= MSG_DONTWAIT; 663 664 retry_bundle: 665 sel.buf_list = NULL; 666 if (io_do_buffer_select(req)) { 667 ret = io_send_select_buffer(req, issue_flags, &sel, kmsg); 668 if (ret) 669 return ret; 670 } 671 672 /* 673 * If MSG_WAITALL is set, or this is a bundle send, then we need 674 * the full amount. If just bundle is set, if we do a short send 675 * then we complete the bundle sequence rather than continue on. 676 */ 677 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 678 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 679 680 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 681 kmsg->msg.msg_flags = flags; 682 ret = sock_sendmsg(sock, &kmsg->msg); 683 if (ret < min_ret) { 684 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 685 return -EAGAIN; 686 687 if (ret > 0 && io_net_retry(sock, flags)) { 688 sr->len -= ret; 689 sr->buf += ret; 690 sr->done_io += ret; 691 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 692 } 693 if (ret == -ERESTARTSYS) 694 ret = -EINTR; 695 req_set_fail(req); 696 } 697 if (ret >= 0) 698 ret += sr->done_io; 699 else if (sr->done_io) 700 ret = sr->done_io; 701 702 sel.val = ret; 703 if (!io_send_finish(req, kmsg, &sel)) 704 goto retry_bundle; 705 706 io_req_msg_cleanup(req, issue_flags); 707 return sel.val; 708 } 709 710 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 711 struct io_async_msghdr *iomsg, 712 int namelen, size_t controllen) 713 { 714 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 715 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 716 int hdr; 717 718 if (unlikely(namelen < 0)) 719 return -EOVERFLOW; 720 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 721 namelen, &hdr)) 722 return -EOVERFLOW; 723 if (check_add_overflow(hdr, controllen, &hdr)) 724 return -EOVERFLOW; 725 726 iomsg->namelen = namelen; 727 iomsg->controllen = controllen; 728 return 0; 729 } 730 731 return 0; 732 } 733 734 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 735 struct io_async_msghdr *iomsg) 736 { 737 struct user_msghdr msg; 738 int ret; 739 740 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr); 741 if (unlikely(ret)) 742 return ret; 743 744 if (!(req->flags & REQ_F_BUFFER_SELECT)) { 745 ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen, 746 ITER_DEST); 747 if (unlikely(ret)) 748 return ret; 749 } 750 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 751 msg.msg_controllen); 752 } 753 754 static int io_recvmsg_prep_setup(struct io_kiocb *req) 755 { 756 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 757 struct io_async_msghdr *kmsg; 758 759 kmsg = io_msg_alloc_async(req); 760 if (unlikely(!kmsg)) 761 return -ENOMEM; 762 763 if (req->opcode == IORING_OP_RECV) { 764 kmsg->msg.msg_name = NULL; 765 kmsg->msg.msg_namelen = 0; 766 kmsg->msg.msg_inq = 0; 767 kmsg->msg.msg_control = NULL; 768 kmsg->msg.msg_get_inq = 1; 769 kmsg->msg.msg_controllen = 0; 770 kmsg->msg.msg_iocb = NULL; 771 kmsg->msg.msg_ubuf = NULL; 772 773 if (req->flags & REQ_F_BUFFER_SELECT) 774 return 0; 775 return import_ubuf(ITER_DEST, sr->buf, sr->len, 776 &kmsg->msg.msg_iter); 777 } 778 779 return io_recvmsg_copy_hdr(req, kmsg); 780 } 781 782 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 783 IORING_RECVSEND_BUNDLE) 784 785 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 786 { 787 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 788 789 sr->done_io = 0; 790 791 if (unlikely(sqe->addr2)) 792 return -EINVAL; 793 794 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 795 sr->len = READ_ONCE(sqe->len); 796 if (unlikely(sr->len < 0)) 797 return -EINVAL; 798 sr->flags = READ_ONCE(sqe->ioprio); 799 if (sr->flags & ~RECVMSG_FLAGS) 800 return -EINVAL; 801 sr->msg_flags = READ_ONCE(sqe->msg_flags); 802 if (sr->msg_flags & MSG_DONTWAIT) 803 req->flags |= REQ_F_NOWAIT; 804 if (sr->msg_flags & MSG_ERRQUEUE) 805 req->flags |= REQ_F_CLEAR_POLLIN; 806 if (req->flags & REQ_F_BUFFER_SELECT) 807 sr->buf_group = req->buf_index; 808 sr->mshot_total_len = sr->mshot_len = 0; 809 if (sr->flags & IORING_RECV_MULTISHOT) { 810 if (!(req->flags & REQ_F_BUFFER_SELECT)) 811 return -EINVAL; 812 if (sr->msg_flags & MSG_WAITALL) 813 return -EINVAL; 814 if (req->opcode == IORING_OP_RECV) { 815 sr->mshot_len = sr->len; 816 sr->mshot_total_len = READ_ONCE(sqe->optlen); 817 if (sr->mshot_total_len) 818 sr->flags |= IORING_RECV_MSHOT_LIM; 819 } else if (sqe->optlen) { 820 return -EINVAL; 821 } 822 req->flags |= REQ_F_APOLL_MULTISHOT; 823 } else if (sqe->optlen) { 824 return -EINVAL; 825 } 826 827 if (sr->flags & IORING_RECVSEND_BUNDLE) { 828 if (req->opcode == IORING_OP_RECVMSG) 829 return -EINVAL; 830 } 831 832 if (io_is_compat(req->ctx)) 833 sr->msg_flags |= MSG_CMSG_COMPAT; 834 835 sr->nr_multishot_loops = 0; 836 return io_recvmsg_prep_setup(req); 837 } 838 839 /* bits to clear in old and inherit in new cflags on bundle retry */ 840 #define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE) 841 842 /* 843 * Finishes io_recv and io_recvmsg. 844 * 845 * Returns true if it is actually finished, or false if it should run 846 * again (for multishot). 847 */ 848 static inline bool io_recv_finish(struct io_kiocb *req, 849 struct io_async_msghdr *kmsg, 850 struct io_br_sel *sel, bool mshot_finished, 851 unsigned issue_flags) 852 { 853 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 854 unsigned int cflags = 0; 855 856 if (kmsg->msg.msg_inq > 0) 857 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 858 859 if (sel->val > 0 && sr->flags & IORING_RECV_MSHOT_LIM) { 860 /* 861 * If sr->len hits zero, the limit has been reached. Mark 862 * mshot as finished, and flag MSHOT_DONE as well to prevent 863 * a potential bundle from being retried. 864 */ 865 sr->mshot_total_len -= min_t(int, sel->val, sr->mshot_total_len); 866 if (!sr->mshot_total_len) { 867 sr->flags |= IORING_RECV_MSHOT_DONE; 868 mshot_finished = true; 869 } 870 } 871 872 if (sr->flags & IORING_RECVSEND_BUNDLE) { 873 size_t this_ret = sel->val - sr->done_io; 874 875 cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret)); 876 if (sr->flags & IORING_RECV_RETRY) 877 cflags = req->cqe.flags | (cflags & CQE_F_MASK); 878 if (sr->mshot_len && sel->val >= sr->mshot_len) 879 sr->flags |= IORING_RECV_MSHOT_CAP; 880 /* bundle with no more immediate buffers, we're done */ 881 if (req->flags & REQ_F_BL_EMPTY) 882 goto finish; 883 /* 884 * If more is available AND it was a full transfer, retry and 885 * append to this one 886 */ 887 if (!(sr->flags & IORING_RECV_NO_RETRY) && 888 kmsg->msg.msg_inq > 1 && this_ret > 0 && 889 !iov_iter_count(&kmsg->msg.msg_iter)) { 890 req->cqe.flags = cflags & ~CQE_F_MASK; 891 sr->len = kmsg->msg.msg_inq; 892 sr->done_io += this_ret; 893 sr->flags |= IORING_RECV_RETRY; 894 return false; 895 } 896 } else { 897 cflags |= io_put_kbuf(req, sel->val, sel->buf_list); 898 } 899 900 /* 901 * Fill CQE for this receive and see if we should keep trying to 902 * receive from this socket. 903 */ 904 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 905 io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { 906 sel->val = IOU_RETRY; 907 io_mshot_prep_retry(req, kmsg); 908 /* Known not-empty or unknown state, retry */ 909 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 910 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY && 911 !(sr->flags & IORING_RECV_MSHOT_CAP)) { 912 return false; 913 } 914 /* mshot retries exceeded, force a requeue */ 915 sr->nr_multishot_loops = 0; 916 sr->flags &= ~IORING_RECV_MSHOT_CAP; 917 if (issue_flags & IO_URING_F_MULTISHOT) 918 sel->val = IOU_REQUEUE; 919 } 920 return true; 921 } 922 923 /* Finish the request / stop multishot. */ 924 finish: 925 io_req_set_res(req, sel->val, cflags); 926 sel->val = IOU_COMPLETE; 927 io_req_msg_cleanup(req, issue_flags); 928 return true; 929 } 930 931 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 932 struct io_sr_msg *sr, void __user **buf, 933 size_t *len) 934 { 935 unsigned long ubuf = (unsigned long) *buf; 936 unsigned long hdr; 937 938 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 939 kmsg->controllen; 940 if (*len < hdr) 941 return -EFAULT; 942 943 if (kmsg->controllen) { 944 unsigned long control = ubuf + hdr - kmsg->controllen; 945 946 kmsg->msg.msg_control_user = (void __user *) control; 947 kmsg->msg.msg_controllen = kmsg->controllen; 948 } 949 950 sr->buf = *buf; /* stash for later copy */ 951 *buf = (void __user *) (ubuf + hdr); 952 kmsg->payloadlen = *len = *len - hdr; 953 return 0; 954 } 955 956 struct io_recvmsg_multishot_hdr { 957 struct io_uring_recvmsg_out msg; 958 struct sockaddr_storage addr; 959 }; 960 961 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 962 struct io_async_msghdr *kmsg, 963 unsigned int flags, bool *finished) 964 { 965 int err; 966 int copy_len; 967 struct io_recvmsg_multishot_hdr hdr; 968 969 if (kmsg->namelen) 970 kmsg->msg.msg_name = &hdr.addr; 971 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 972 kmsg->msg.msg_namelen = 0; 973 974 if (sock->file->f_flags & O_NONBLOCK) 975 flags |= MSG_DONTWAIT; 976 977 err = sock_recvmsg(sock, &kmsg->msg, flags); 978 *finished = err <= 0; 979 if (err < 0) 980 return err; 981 982 hdr.msg = (struct io_uring_recvmsg_out) { 983 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 984 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 985 }; 986 987 hdr.msg.payloadlen = err; 988 if (err > kmsg->payloadlen) 989 err = kmsg->payloadlen; 990 991 copy_len = sizeof(struct io_uring_recvmsg_out); 992 if (kmsg->msg.msg_namelen > kmsg->namelen) 993 copy_len += kmsg->namelen; 994 else 995 copy_len += kmsg->msg.msg_namelen; 996 997 /* 998 * "fromlen shall refer to the value before truncation.." 999 * 1003.1g 1000 */ 1001 hdr.msg.namelen = kmsg->msg.msg_namelen; 1002 1003 /* ensure that there is no gap between hdr and sockaddr_storage */ 1004 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 1005 sizeof(struct io_uring_recvmsg_out)); 1006 if (copy_to_user(io->buf, &hdr, copy_len)) { 1007 *finished = true; 1008 return -EFAULT; 1009 } 1010 1011 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 1012 kmsg->controllen + err; 1013 } 1014 1015 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 1016 { 1017 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1018 struct io_async_msghdr *kmsg = req->async_data; 1019 struct io_br_sel sel = { }; 1020 struct socket *sock; 1021 unsigned flags; 1022 int ret, min_ret = 0; 1023 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1024 bool mshot_finished = true; 1025 1026 sock = sock_from_file(req->file); 1027 if (unlikely(!sock)) 1028 return -ENOTSOCK; 1029 1030 if (!(req->flags & REQ_F_POLLED) && 1031 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1032 return -EAGAIN; 1033 1034 flags = sr->msg_flags; 1035 if (force_nonblock) 1036 flags |= MSG_DONTWAIT; 1037 1038 retry_multishot: 1039 sel.buf_list = NULL; 1040 if (io_do_buffer_select(req)) { 1041 size_t len = sr->len; 1042 1043 sel = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1044 if (!sel.addr) 1045 return -ENOBUFS; 1046 1047 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1048 ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len); 1049 if (ret) { 1050 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1051 return ret; 1052 } 1053 } 1054 1055 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, sel.addr, len); 1056 } 1057 1058 kmsg->msg.msg_get_inq = 1; 1059 kmsg->msg.msg_inq = -1; 1060 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1061 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1062 &mshot_finished); 1063 } else { 1064 /* disable partial retry for recvmsg with cmsg attached */ 1065 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1066 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1067 1068 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1069 kmsg->uaddr, flags); 1070 } 1071 1072 if (ret < min_ret) { 1073 if (ret == -EAGAIN && force_nonblock) { 1074 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1075 return IOU_RETRY; 1076 } 1077 if (ret > 0 && io_net_retry(sock, flags)) { 1078 sr->done_io += ret; 1079 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 1080 } 1081 if (ret == -ERESTARTSYS) 1082 ret = -EINTR; 1083 req_set_fail(req); 1084 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1085 req_set_fail(req); 1086 } 1087 1088 if (ret > 0) 1089 ret += sr->done_io; 1090 else if (sr->done_io) 1091 ret = sr->done_io; 1092 else 1093 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1094 1095 sel.val = ret; 1096 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) 1097 goto retry_multishot; 1098 1099 return sel.val; 1100 } 1101 1102 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1103 struct io_br_sel *sel, unsigned int issue_flags) 1104 { 1105 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1106 int ret; 1107 1108 /* 1109 * If the ring isn't locked, then don't use the peek interface 1110 * to grab multiple buffers as we will lock/unlock between 1111 * this selection and posting the buffers. 1112 */ 1113 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1114 sr->flags & IORING_RECVSEND_BUNDLE) { 1115 struct buf_sel_arg arg = { 1116 .iovs = &kmsg->fast_iov, 1117 .nr_iovs = 1, 1118 .mode = KBUF_MODE_EXPAND, 1119 .buf_group = sr->buf_group, 1120 }; 1121 1122 if (kmsg->vec.iovec) { 1123 arg.nr_iovs = kmsg->vec.nr; 1124 arg.iovs = kmsg->vec.iovec; 1125 arg.mode |= KBUF_MODE_FREE; 1126 } 1127 1128 if (sel->val) 1129 arg.max_len = sel->val; 1130 else if (kmsg->msg.msg_inq > 1) 1131 arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq); 1132 1133 /* if mshot limited, ensure we don't go over */ 1134 if (sr->flags & IORING_RECV_MSHOT_LIM) 1135 arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len); 1136 ret = io_buffers_peek(req, &arg, sel); 1137 if (unlikely(ret < 0)) 1138 return ret; 1139 1140 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 1141 kmsg->vec.nr = ret; 1142 kmsg->vec.iovec = arg.iovs; 1143 req->flags |= REQ_F_NEED_CLEANUP; 1144 } 1145 if (arg.partial_map) 1146 sr->flags |= IORING_RECV_PARTIAL_MAP; 1147 1148 /* special case 1 vec, can be a fast path */ 1149 if (ret == 1) { 1150 sr->buf = arg.iovs[0].iov_base; 1151 sr->len = arg.iovs[0].iov_len; 1152 goto map_ubuf; 1153 } 1154 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1155 arg.out_len); 1156 } else { 1157 size_t len = sel->val; 1158 1159 *sel = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1160 if (!sel->addr) 1161 return -ENOBUFS; 1162 sr->buf = sel->addr; 1163 sr->len = len; 1164 map_ubuf: 1165 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1166 &kmsg->msg.msg_iter); 1167 if (unlikely(ret)) 1168 return ret; 1169 } 1170 1171 return 0; 1172 } 1173 1174 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1175 { 1176 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1177 struct io_async_msghdr *kmsg = req->async_data; 1178 struct io_br_sel sel; 1179 struct socket *sock; 1180 unsigned flags; 1181 int ret, min_ret = 0; 1182 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1183 bool mshot_finished; 1184 1185 if (!(req->flags & REQ_F_POLLED) && 1186 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1187 return -EAGAIN; 1188 1189 sock = sock_from_file(req->file); 1190 if (unlikely(!sock)) 1191 return -ENOTSOCK; 1192 1193 flags = sr->msg_flags; 1194 if (force_nonblock) 1195 flags |= MSG_DONTWAIT; 1196 1197 retry_multishot: 1198 sel.buf_list = NULL; 1199 if (io_do_buffer_select(req)) { 1200 sel.val = sr->len; 1201 ret = io_recv_buf_select(req, kmsg, &sel, issue_flags); 1202 if (unlikely(ret < 0)) { 1203 kmsg->msg.msg_inq = -1; 1204 goto out_free; 1205 } 1206 sr->buf = NULL; 1207 } 1208 1209 kmsg->msg.msg_flags = 0; 1210 kmsg->msg.msg_inq = -1; 1211 1212 if (flags & MSG_WAITALL) 1213 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1214 1215 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1216 if (ret < min_ret) { 1217 if (ret == -EAGAIN && force_nonblock) { 1218 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1219 return IOU_RETRY; 1220 } 1221 if (ret > 0 && io_net_retry(sock, flags)) { 1222 sr->len -= ret; 1223 sr->buf += ret; 1224 sr->done_io += ret; 1225 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 1226 } 1227 if (ret == -ERESTARTSYS) 1228 ret = -EINTR; 1229 req_set_fail(req); 1230 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1231 out_free: 1232 req_set_fail(req); 1233 } 1234 1235 mshot_finished = ret <= 0; 1236 if (ret > 0) 1237 ret += sr->done_io; 1238 else if (sr->done_io) 1239 ret = sr->done_io; 1240 else 1241 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1242 1243 sel.val = ret; 1244 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) 1245 goto retry_multishot; 1246 1247 return sel.val; 1248 } 1249 1250 int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1251 { 1252 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1253 unsigned ifq_idx; 1254 1255 if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3)) 1256 return -EINVAL; 1257 1258 ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx); 1259 zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx); 1260 if (!zc->ifq) 1261 return -EINVAL; 1262 1263 zc->len = READ_ONCE(sqe->len); 1264 zc->flags = READ_ONCE(sqe->ioprio); 1265 if (READ_ONCE(sqe->msg_flags)) 1266 return -EINVAL; 1267 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)) 1268 return -EINVAL; 1269 /* multishot required */ 1270 if (!(zc->flags & IORING_RECV_MULTISHOT)) 1271 return -EINVAL; 1272 /* All data completions are posted as aux CQEs. */ 1273 req->flags |= REQ_F_APOLL_MULTISHOT; 1274 1275 return 0; 1276 } 1277 1278 int io_recvzc(struct io_kiocb *req, unsigned int issue_flags) 1279 { 1280 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1281 struct socket *sock; 1282 unsigned int len; 1283 int ret; 1284 1285 if (!(req->flags & REQ_F_POLLED) && 1286 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1287 return -EAGAIN; 1288 1289 sock = sock_from_file(req->file); 1290 if (unlikely(!sock)) 1291 return -ENOTSOCK; 1292 1293 len = zc->len; 1294 ret = io_zcrx_recv(req, zc->ifq, sock, 0, issue_flags, &zc->len); 1295 if (len && zc->len == 0) { 1296 io_req_set_res(req, 0, 0); 1297 1298 return IOU_COMPLETE; 1299 } 1300 if (unlikely(ret <= 0) && ret != -EAGAIN) { 1301 if (ret == -ERESTARTSYS) 1302 ret = -EINTR; 1303 if (ret == IOU_REQUEUE) 1304 return IOU_REQUEUE; 1305 1306 req_set_fail(req); 1307 io_req_set_res(req, ret, 0); 1308 return IOU_COMPLETE; 1309 } 1310 return IOU_RETRY; 1311 } 1312 1313 void io_send_zc_cleanup(struct io_kiocb *req) 1314 { 1315 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1316 struct io_async_msghdr *io = req->async_data; 1317 1318 if (req_has_async_data(req)) 1319 io_netmsg_iovec_free(io); 1320 if (zc->notif) { 1321 io_notif_flush(zc->notif); 1322 zc->notif = NULL; 1323 } 1324 } 1325 1326 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1327 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \ 1328 IORING_SEND_VECTORIZED) 1329 1330 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1331 { 1332 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1333 struct io_ring_ctx *ctx = req->ctx; 1334 struct io_async_msghdr *iomsg; 1335 struct io_kiocb *notif; 1336 int ret; 1337 1338 zc->done_io = 0; 1339 1340 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1341 return -EINVAL; 1342 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1343 if (req->flags & REQ_F_CQE_SKIP) 1344 return -EINVAL; 1345 1346 notif = zc->notif = io_alloc_notif(ctx); 1347 if (!notif) 1348 return -ENOMEM; 1349 notif->cqe.user_data = req->cqe.user_data; 1350 notif->cqe.res = 0; 1351 notif->cqe.flags = IORING_CQE_F_NOTIF; 1352 req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY; 1353 1354 zc->flags = READ_ONCE(sqe->ioprio); 1355 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1356 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1357 return -EINVAL; 1358 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1359 struct io_notif_data *nd = io_notif_to_data(notif); 1360 1361 nd->zc_report = true; 1362 nd->zc_used = false; 1363 nd->zc_copied = false; 1364 } 1365 } 1366 1367 zc->len = READ_ONCE(sqe->len); 1368 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1369 req->buf_index = READ_ONCE(sqe->buf_index); 1370 if (zc->msg_flags & MSG_DONTWAIT) 1371 req->flags |= REQ_F_NOWAIT; 1372 1373 if (io_is_compat(req->ctx)) 1374 zc->msg_flags |= MSG_CMSG_COMPAT; 1375 1376 iomsg = io_msg_alloc_async(req); 1377 if (unlikely(!iomsg)) 1378 return -ENOMEM; 1379 1380 if (req->opcode == IORING_OP_SEND_ZC) { 1381 ret = io_send_setup(req, sqe); 1382 } else { 1383 if (unlikely(sqe->addr2 || sqe->file_index)) 1384 return -EINVAL; 1385 ret = io_sendmsg_setup(req, sqe); 1386 } 1387 if (unlikely(ret)) 1388 return ret; 1389 1390 if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { 1391 iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1392 return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count); 1393 } 1394 iomsg->msg.sg_from_iter = io_sg_from_iter; 1395 return 0; 1396 } 1397 1398 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1399 struct iov_iter *from, size_t length) 1400 { 1401 skb_zcopy_downgrade_managed(skb); 1402 return zerocopy_fill_skb_from_iter(skb, from, length); 1403 } 1404 1405 static int io_sg_from_iter(struct sk_buff *skb, 1406 struct iov_iter *from, size_t length) 1407 { 1408 struct skb_shared_info *shinfo = skb_shinfo(skb); 1409 int frag = shinfo->nr_frags; 1410 int ret = 0; 1411 struct bvec_iter bi; 1412 ssize_t copied = 0; 1413 unsigned long truesize = 0; 1414 1415 if (!frag) 1416 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1417 else if (unlikely(!skb_zcopy_managed(skb))) 1418 return zerocopy_fill_skb_from_iter(skb, from, length); 1419 1420 bi.bi_size = min(from->count, length); 1421 bi.bi_bvec_done = from->iov_offset; 1422 bi.bi_idx = 0; 1423 1424 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1425 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1426 1427 copied += v.bv_len; 1428 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1429 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1430 v.bv_offset, v.bv_len); 1431 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1432 } 1433 if (bi.bi_size) 1434 ret = -EMSGSIZE; 1435 1436 shinfo->nr_frags = frag; 1437 from->bvec += bi.bi_idx; 1438 from->nr_segs -= bi.bi_idx; 1439 from->count -= copied; 1440 from->iov_offset = bi.bi_bvec_done; 1441 1442 skb->data_len += copied; 1443 skb->len += copied; 1444 skb->truesize += truesize; 1445 return ret; 1446 } 1447 1448 static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) 1449 { 1450 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1451 struct io_async_msghdr *kmsg = req->async_data; 1452 1453 WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF)); 1454 1455 sr->notif->buf_index = req->buf_index; 1456 return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter, 1457 (u64)(uintptr_t)sr->buf, sr->len, 1458 ITER_SOURCE, issue_flags); 1459 } 1460 1461 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1462 { 1463 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1464 struct io_async_msghdr *kmsg = req->async_data; 1465 struct socket *sock; 1466 unsigned msg_flags; 1467 int ret, min_ret = 0; 1468 1469 sock = sock_from_file(req->file); 1470 if (unlikely(!sock)) 1471 return -ENOTSOCK; 1472 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1473 return -EOPNOTSUPP; 1474 1475 if (!(req->flags & REQ_F_POLLED) && 1476 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1477 return -EAGAIN; 1478 1479 if (req->flags & REQ_F_IMPORT_BUFFER) { 1480 req->flags &= ~REQ_F_IMPORT_BUFFER; 1481 ret = io_send_zc_import(req, issue_flags); 1482 if (unlikely(ret)) 1483 return ret; 1484 } 1485 1486 msg_flags = zc->msg_flags; 1487 if (issue_flags & IO_URING_F_NONBLOCK) 1488 msg_flags |= MSG_DONTWAIT; 1489 if (msg_flags & MSG_WAITALL) 1490 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1491 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1492 1493 kmsg->msg.msg_flags = msg_flags; 1494 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1495 ret = sock_sendmsg(sock, &kmsg->msg); 1496 1497 if (unlikely(ret < min_ret)) { 1498 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1499 return -EAGAIN; 1500 1501 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { 1502 zc->done_io += ret; 1503 return -EAGAIN; 1504 } 1505 if (ret == -ERESTARTSYS) 1506 ret = -EINTR; 1507 req_set_fail(req); 1508 } 1509 1510 if (ret >= 0) 1511 ret += zc->done_io; 1512 else if (zc->done_io) 1513 ret = zc->done_io; 1514 1515 /* 1516 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1517 * flushing notif to io_send_zc_cleanup() 1518 */ 1519 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1520 io_notif_flush(zc->notif); 1521 zc->notif = NULL; 1522 io_req_msg_cleanup(req, 0); 1523 } 1524 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1525 return IOU_COMPLETE; 1526 } 1527 1528 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1529 { 1530 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1531 struct io_async_msghdr *kmsg = req->async_data; 1532 struct socket *sock; 1533 unsigned flags; 1534 int ret, min_ret = 0; 1535 1536 if (req->flags & REQ_F_IMPORT_BUFFER) { 1537 unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; 1538 int ret; 1539 1540 sr->notif->buf_index = req->buf_index; 1541 ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, 1542 sr->notif, &kmsg->vec, uvec_segs, 1543 issue_flags); 1544 if (unlikely(ret)) 1545 return ret; 1546 req->flags &= ~REQ_F_IMPORT_BUFFER; 1547 } 1548 1549 sock = sock_from_file(req->file); 1550 if (unlikely(!sock)) 1551 return -ENOTSOCK; 1552 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1553 return -EOPNOTSUPP; 1554 1555 if (!(req->flags & REQ_F_POLLED) && 1556 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1557 return -EAGAIN; 1558 1559 flags = sr->msg_flags; 1560 if (issue_flags & IO_URING_F_NONBLOCK) 1561 flags |= MSG_DONTWAIT; 1562 if (flags & MSG_WAITALL) 1563 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1564 1565 kmsg->msg.msg_control_user = sr->msg_control; 1566 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1567 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1568 1569 if (unlikely(ret < min_ret)) { 1570 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1571 return -EAGAIN; 1572 1573 if (ret > 0 && io_net_retry(sock, flags)) { 1574 sr->done_io += ret; 1575 return -EAGAIN; 1576 } 1577 if (ret == -ERESTARTSYS) 1578 ret = -EINTR; 1579 req_set_fail(req); 1580 } 1581 1582 if (ret >= 0) 1583 ret += sr->done_io; 1584 else if (sr->done_io) 1585 ret = sr->done_io; 1586 1587 /* 1588 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1589 * flushing notif to io_send_zc_cleanup() 1590 */ 1591 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1592 io_notif_flush(sr->notif); 1593 sr->notif = NULL; 1594 io_req_msg_cleanup(req, 0); 1595 } 1596 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1597 return IOU_COMPLETE; 1598 } 1599 1600 void io_sendrecv_fail(struct io_kiocb *req) 1601 { 1602 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1603 1604 if (sr->done_io) 1605 req->cqe.res = sr->done_io; 1606 1607 if ((req->flags & REQ_F_NEED_CLEANUP) && 1608 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1609 req->cqe.flags |= IORING_CQE_F_MORE; 1610 } 1611 1612 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1613 IORING_ACCEPT_POLL_FIRST) 1614 1615 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1616 { 1617 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1618 1619 if (sqe->len || sqe->buf_index) 1620 return -EINVAL; 1621 1622 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1623 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1624 accept->flags = READ_ONCE(sqe->accept_flags); 1625 accept->nofile = rlimit(RLIMIT_NOFILE); 1626 accept->iou_flags = READ_ONCE(sqe->ioprio); 1627 if (accept->iou_flags & ~ACCEPT_FLAGS) 1628 return -EINVAL; 1629 1630 accept->file_slot = READ_ONCE(sqe->file_index); 1631 if (accept->file_slot) { 1632 if (accept->flags & SOCK_CLOEXEC) 1633 return -EINVAL; 1634 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1635 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1636 return -EINVAL; 1637 } 1638 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1639 return -EINVAL; 1640 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1641 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1642 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1643 req->flags |= REQ_F_APOLL_MULTISHOT; 1644 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1645 req->flags |= REQ_F_NOWAIT; 1646 return 0; 1647 } 1648 1649 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1650 { 1651 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1652 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1653 bool fixed = !!accept->file_slot; 1654 struct proto_accept_arg arg = { 1655 .flags = force_nonblock ? O_NONBLOCK : 0, 1656 }; 1657 struct file *file; 1658 unsigned cflags; 1659 int ret, fd; 1660 1661 if (!(req->flags & REQ_F_POLLED) && 1662 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1663 return -EAGAIN; 1664 1665 retry: 1666 if (!fixed) { 1667 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1668 if (unlikely(fd < 0)) 1669 return fd; 1670 } 1671 arg.err = 0; 1672 arg.is_empty = -1; 1673 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1674 accept->flags); 1675 if (IS_ERR(file)) { 1676 if (!fixed) 1677 put_unused_fd(fd); 1678 ret = PTR_ERR(file); 1679 if (ret == -EAGAIN && force_nonblock && 1680 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) 1681 return IOU_RETRY; 1682 1683 if (ret == -ERESTARTSYS) 1684 ret = -EINTR; 1685 } else if (!fixed) { 1686 fd_install(fd, file); 1687 ret = fd; 1688 } else { 1689 ret = io_fixed_fd_install(req, issue_flags, file, 1690 accept->file_slot); 1691 } 1692 1693 cflags = 0; 1694 if (!arg.is_empty) 1695 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1696 1697 if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) && 1698 io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1699 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1700 goto retry; 1701 return IOU_RETRY; 1702 } 1703 1704 io_req_set_res(req, ret, cflags); 1705 if (ret < 0) 1706 req_set_fail(req); 1707 return IOU_COMPLETE; 1708 } 1709 1710 void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req) 1711 { 1712 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1713 1714 bctx->socket.family = sock->domain; 1715 bctx->socket.type = sock->type; 1716 bctx->socket.protocol = sock->protocol; 1717 } 1718 1719 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1720 { 1721 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1722 1723 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1724 return -EINVAL; 1725 1726 sock->domain = READ_ONCE(sqe->fd); 1727 sock->type = READ_ONCE(sqe->off); 1728 sock->protocol = READ_ONCE(sqe->len); 1729 sock->file_slot = READ_ONCE(sqe->file_index); 1730 sock->nofile = rlimit(RLIMIT_NOFILE); 1731 1732 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1733 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1734 return -EINVAL; 1735 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1736 return -EINVAL; 1737 return 0; 1738 } 1739 1740 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1741 { 1742 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1743 bool fixed = !!sock->file_slot; 1744 struct file *file; 1745 int ret, fd; 1746 1747 if (!fixed) { 1748 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1749 if (unlikely(fd < 0)) 1750 return fd; 1751 } 1752 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1753 if (IS_ERR(file)) { 1754 if (!fixed) 1755 put_unused_fd(fd); 1756 ret = PTR_ERR(file); 1757 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1758 return -EAGAIN; 1759 if (ret == -ERESTARTSYS) 1760 ret = -EINTR; 1761 req_set_fail(req); 1762 } else if (!fixed) { 1763 fd_install(fd, file); 1764 ret = fd; 1765 } else { 1766 ret = io_fixed_fd_install(req, issue_flags, file, 1767 sock->file_slot); 1768 } 1769 io_req_set_res(req, ret, 0); 1770 return IOU_COMPLETE; 1771 } 1772 1773 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1774 { 1775 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1776 struct io_async_msghdr *io; 1777 1778 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1779 return -EINVAL; 1780 1781 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1782 conn->addr_len = READ_ONCE(sqe->addr2); 1783 conn->in_progress = conn->seen_econnaborted = false; 1784 1785 io = io_msg_alloc_async(req); 1786 if (unlikely(!io)) 1787 return -ENOMEM; 1788 1789 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1790 } 1791 1792 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1793 { 1794 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1795 struct io_async_msghdr *io = req->async_data; 1796 unsigned file_flags; 1797 int ret; 1798 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1799 1800 if (connect->in_progress) { 1801 struct poll_table_struct pt = { ._key = EPOLLERR }; 1802 1803 if (vfs_poll(req->file, &pt) & EPOLLERR) 1804 goto get_sock_err; 1805 } 1806 1807 file_flags = force_nonblock ? O_NONBLOCK : 0; 1808 1809 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1810 file_flags); 1811 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1812 && force_nonblock) { 1813 if (ret == -EINPROGRESS) { 1814 connect->in_progress = true; 1815 } else if (ret == -ECONNABORTED) { 1816 if (connect->seen_econnaborted) 1817 goto out; 1818 connect->seen_econnaborted = true; 1819 } 1820 return -EAGAIN; 1821 } 1822 if (connect->in_progress) { 1823 /* 1824 * At least bluetooth will return -EBADFD on a re-connect 1825 * attempt, and it's (supposedly) also valid to get -EISCONN 1826 * which means the previous result is good. For both of these, 1827 * grab the sock_error() and use that for the completion. 1828 */ 1829 if (ret == -EBADFD || ret == -EISCONN) { 1830 get_sock_err: 1831 ret = sock_error(sock_from_file(req->file)->sk); 1832 } 1833 } 1834 if (ret == -ERESTARTSYS) 1835 ret = -EINTR; 1836 out: 1837 if (ret < 0) 1838 req_set_fail(req); 1839 io_req_msg_cleanup(req, issue_flags); 1840 io_req_set_res(req, ret, 0); 1841 return IOU_COMPLETE; 1842 } 1843 1844 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1845 { 1846 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1847 struct sockaddr __user *uaddr; 1848 struct io_async_msghdr *io; 1849 1850 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1851 return -EINVAL; 1852 1853 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1854 bind->addr_len = READ_ONCE(sqe->addr2); 1855 1856 io = io_msg_alloc_async(req); 1857 if (unlikely(!io)) 1858 return -ENOMEM; 1859 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1860 } 1861 1862 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1863 { 1864 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1865 struct io_async_msghdr *io = req->async_data; 1866 struct socket *sock; 1867 int ret; 1868 1869 sock = sock_from_file(req->file); 1870 if (unlikely(!sock)) 1871 return -ENOTSOCK; 1872 1873 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1874 if (ret < 0) 1875 req_set_fail(req); 1876 io_req_set_res(req, ret, 0); 1877 return 0; 1878 } 1879 1880 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1881 { 1882 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1883 1884 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1885 return -EINVAL; 1886 1887 listen->backlog = READ_ONCE(sqe->len); 1888 return 0; 1889 } 1890 1891 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1892 { 1893 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1894 struct socket *sock; 1895 int ret; 1896 1897 sock = sock_from_file(req->file); 1898 if (unlikely(!sock)) 1899 return -ENOTSOCK; 1900 1901 ret = __sys_listen_socket(sock, listen->backlog); 1902 if (ret < 0) 1903 req_set_fail(req); 1904 io_req_set_res(req, ret, 0); 1905 return 0; 1906 } 1907 1908 void io_netmsg_cache_free(const void *entry) 1909 { 1910 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1911 1912 io_vec_free(&kmsg->vec); 1913 kfree(kmsg); 1914 } 1915