1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "filetable.h" 14 #include "io_uring.h" 15 #include "kbuf.h" 16 #include "alloc_cache.h" 17 #include "net.h" 18 #include "notif.h" 19 #include "rsrc.h" 20 #include "zcrx.h" 21 22 struct io_shutdown { 23 struct file *file; 24 int how; 25 }; 26 27 struct io_accept { 28 struct file *file; 29 struct sockaddr __user *addr; 30 int __user *addr_len; 31 int flags; 32 int iou_flags; 33 u32 file_slot; 34 unsigned long nofile; 35 }; 36 37 struct io_socket { 38 struct file *file; 39 int domain; 40 int type; 41 int protocol; 42 int flags; 43 u32 file_slot; 44 unsigned long nofile; 45 }; 46 47 struct io_connect { 48 struct file *file; 49 struct sockaddr __user *addr; 50 int addr_len; 51 bool in_progress; 52 bool seen_econnaborted; 53 }; 54 55 struct io_bind { 56 struct file *file; 57 int addr_len; 58 }; 59 60 struct io_listen { 61 struct file *file; 62 int backlog; 63 }; 64 65 struct io_sr_msg { 66 struct file *file; 67 union { 68 struct compat_msghdr __user *umsg_compat; 69 struct user_msghdr __user *umsg; 70 void __user *buf; 71 }; 72 int len; 73 unsigned done_io; 74 unsigned msg_flags; 75 unsigned nr_multishot_loops; 76 u16 flags; 77 /* initialised and used only by !msg send variants */ 78 u16 buf_group; 79 /* per-invocation mshot limit */ 80 unsigned mshot_len; 81 /* overall mshot byte limit */ 82 unsigned mshot_total_len; 83 void __user *msg_control; 84 /* used only for send zerocopy */ 85 struct io_kiocb *notif; 86 }; 87 88 /* 89 * The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold 90 * anyway. Use the upper 8 bits for internal uses. 91 */ 92 enum sr_retry_flags { 93 IORING_RECV_RETRY = (1U << 15), 94 IORING_RECV_PARTIAL_MAP = (1U << 14), 95 IORING_RECV_MSHOT_CAP = (1U << 13), 96 IORING_RECV_MSHOT_LIM = (1U << 12), 97 IORING_RECV_MSHOT_DONE = (1U << 11), 98 99 IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP, 100 IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP | 101 IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE, 102 }; 103 104 /* 105 * Number of times we'll try and do receives if there's more data. If we 106 * exceed this limit, then add us to the back of the queue and retry from 107 * there. This helps fairness between flooding clients. 108 */ 109 #define MULTISHOT_MAX_RETRY 32 110 111 struct io_recvzc { 112 struct file *file; 113 u16 flags; 114 u32 len; 115 struct io_zcrx_ifq *ifq; 116 }; 117 118 static int io_sg_from_iter_iovec(struct sk_buff *skb, 119 struct iov_iter *from, size_t length); 120 static int io_sg_from_iter(struct sk_buff *skb, 121 struct iov_iter *from, size_t length); 122 123 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 124 { 125 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 126 127 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 128 sqe->buf_index || sqe->splice_fd_in)) 129 return -EINVAL; 130 131 shutdown->how = READ_ONCE(sqe->len); 132 req->flags |= REQ_F_FORCE_ASYNC; 133 return 0; 134 } 135 136 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 137 { 138 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 139 struct socket *sock; 140 int ret; 141 142 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 143 144 sock = sock_from_file(req->file); 145 if (unlikely(!sock)) 146 return -ENOTSOCK; 147 148 ret = __sys_shutdown_sock(sock, shutdown->how); 149 io_req_set_res(req, ret, 0); 150 return IOU_COMPLETE; 151 } 152 153 static bool io_net_retry(struct socket *sock, int flags) 154 { 155 if (!(flags & MSG_WAITALL)) 156 return false; 157 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 158 } 159 160 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 161 { 162 if (kmsg->vec.iovec) 163 io_vec_free(&kmsg->vec); 164 } 165 166 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 167 { 168 struct io_async_msghdr *hdr = req->async_data; 169 170 /* can't recycle, ensure we free the iovec if we have one */ 171 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 172 io_netmsg_iovec_free(hdr); 173 return; 174 } 175 176 /* Let normal cleanup path reap it if we fail adding to the cache */ 177 io_alloc_cache_vec_kasan(&hdr->vec); 178 if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP) 179 io_vec_free(&hdr->vec); 180 181 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) 182 io_req_async_data_clear(req, REQ_F_NEED_CLEANUP); 183 } 184 185 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 186 { 187 struct io_ring_ctx *ctx = req->ctx; 188 struct io_async_msghdr *hdr; 189 190 hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); 191 if (!hdr) 192 return NULL; 193 194 /* If the async data was cached, we might have an iov cached inside. */ 195 if (hdr->vec.iovec) 196 req->flags |= REQ_F_NEED_CLEANUP; 197 return hdr; 198 } 199 200 static inline void io_mshot_prep_retry(struct io_kiocb *req, 201 struct io_async_msghdr *kmsg) 202 { 203 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 204 205 req->flags &= ~REQ_F_BL_EMPTY; 206 sr->done_io = 0; 207 sr->flags &= ~IORING_RECV_RETRY_CLEAR; 208 sr->len = sr->mshot_len; 209 } 210 211 static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg, 212 const struct iovec __user *uiov, unsigned uvec_seg, 213 int ddir) 214 { 215 struct iovec *iov; 216 int ret, nr_segs; 217 218 if (iomsg->vec.iovec) { 219 nr_segs = iomsg->vec.nr; 220 iov = iomsg->vec.iovec; 221 } else { 222 nr_segs = 1; 223 iov = &iomsg->fast_iov; 224 } 225 226 ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov, 227 &iomsg->msg.msg_iter, io_is_compat(req->ctx)); 228 if (unlikely(ret < 0)) 229 return ret; 230 231 if (iov) { 232 req->flags |= REQ_F_NEED_CLEANUP; 233 io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs); 234 } 235 return 0; 236 } 237 238 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 239 struct io_async_msghdr *iomsg, 240 struct compat_msghdr *msg, int ddir, 241 struct sockaddr __user **save_addr) 242 { 243 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 244 struct compat_iovec __user *uiov; 245 int ret; 246 247 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 248 return -EFAULT; 249 250 ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr); 251 if (ret) 252 return ret; 253 254 uiov = compat_ptr(msg->msg_iov); 255 if (req->flags & REQ_F_BUFFER_SELECT) { 256 if (msg->msg_iovlen == 0) { 257 sr->len = 0; 258 } else if (msg->msg_iovlen > 1) { 259 return -EINVAL; 260 } else { 261 struct compat_iovec tmp_iov; 262 263 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 264 return -EFAULT; 265 sr->len = tmp_iov.iov_len; 266 } 267 } 268 return 0; 269 } 270 271 static int io_copy_msghdr_from_user(struct user_msghdr *msg, 272 struct user_msghdr __user *umsg) 273 { 274 if (!user_access_begin(umsg, sizeof(*umsg))) 275 return -EFAULT; 276 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end); 277 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end); 278 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end); 279 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end); 280 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end); 281 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end); 282 user_access_end(); 283 return 0; 284 ua_end: 285 user_access_end(); 286 return -EFAULT; 287 } 288 289 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 290 struct user_msghdr *msg, int ddir, 291 struct sockaddr __user **save_addr) 292 { 293 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 294 struct user_msghdr __user *umsg = sr->umsg; 295 int ret; 296 297 iomsg->msg.msg_name = &iomsg->addr; 298 iomsg->msg.msg_iter.nr_segs = 0; 299 300 if (io_is_compat(req->ctx)) { 301 struct compat_msghdr cmsg; 302 303 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr); 304 if (ret) 305 return ret; 306 307 memset(msg, 0, sizeof(*msg)); 308 msg->msg_namelen = cmsg.msg_namelen; 309 msg->msg_controllen = cmsg.msg_controllen; 310 msg->msg_iov = compat_ptr(cmsg.msg_iov); 311 msg->msg_iovlen = cmsg.msg_iovlen; 312 return 0; 313 } 314 315 ret = io_copy_msghdr_from_user(msg, umsg); 316 if (unlikely(ret)) 317 return ret; 318 319 msg->msg_flags = 0; 320 321 ret = __copy_msghdr(&iomsg->msg, msg, save_addr); 322 if (ret) 323 return ret; 324 325 if (req->flags & REQ_F_BUFFER_SELECT) { 326 if (msg->msg_iovlen == 0) { 327 sr->len = 0; 328 } else if (msg->msg_iovlen > 1) { 329 return -EINVAL; 330 } else { 331 struct iovec __user *uiov = msg->msg_iov; 332 struct iovec tmp_iov; 333 334 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 335 return -EFAULT; 336 sr->len = tmp_iov.iov_len; 337 } 338 } 339 return 0; 340 } 341 342 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 343 { 344 struct io_async_msghdr *io = req->async_data; 345 346 io_netmsg_iovec_free(io); 347 } 348 349 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 350 { 351 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 352 struct io_async_msghdr *kmsg = req->async_data; 353 void __user *addr; 354 u16 addr_len; 355 int ret; 356 357 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 358 359 if (READ_ONCE(sqe->__pad3[0])) 360 return -EINVAL; 361 362 kmsg->msg.msg_name = NULL; 363 kmsg->msg.msg_namelen = 0; 364 kmsg->msg.msg_control = NULL; 365 kmsg->msg.msg_controllen = 0; 366 kmsg->msg.msg_ubuf = NULL; 367 368 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 369 addr_len = READ_ONCE(sqe->addr_len); 370 if (addr) { 371 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr); 372 if (unlikely(ret < 0)) 373 return ret; 374 kmsg->msg.msg_name = &kmsg->addr; 375 kmsg->msg.msg_namelen = addr_len; 376 } 377 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 378 if (sr->flags & IORING_SEND_VECTORIZED) 379 return -EINVAL; 380 req->flags |= REQ_F_IMPORT_BUFFER; 381 return 0; 382 } 383 if (req->flags & REQ_F_BUFFER_SELECT) 384 return 0; 385 386 if (sr->flags & IORING_SEND_VECTORIZED) 387 return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); 388 389 return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 390 } 391 392 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 393 { 394 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 395 struct io_async_msghdr *kmsg = req->async_data; 396 struct user_msghdr msg; 397 int ret; 398 399 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 400 ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); 401 if (unlikely(ret)) 402 return ret; 403 /* save msg_control as sys_sendmsg() overwrites it */ 404 sr->msg_control = kmsg->msg.msg_control_user; 405 406 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 407 kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; 408 return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, 409 msg.msg_iovlen); 410 } 411 if (req->flags & REQ_F_BUFFER_SELECT) 412 return 0; 413 return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); 414 } 415 416 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | IORING_SEND_VECTORIZED) 417 418 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 419 { 420 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 421 422 sr->done_io = 0; 423 sr->len = READ_ONCE(sqe->len); 424 sr->flags = READ_ONCE(sqe->ioprio); 425 if (sr->flags & ~SENDMSG_FLAGS) 426 return -EINVAL; 427 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 428 if (sr->msg_flags & MSG_DONTWAIT) 429 req->flags |= REQ_F_NOWAIT; 430 if (req->flags & REQ_F_BUFFER_SELECT) 431 sr->buf_group = req->buf_index; 432 if (sr->flags & IORING_RECVSEND_BUNDLE) { 433 if (req->opcode == IORING_OP_SENDMSG) 434 return -EINVAL; 435 sr->msg_flags |= MSG_WAITALL; 436 req->flags |= REQ_F_MULTISHOT; 437 } 438 439 if (io_is_compat(req->ctx)) 440 sr->msg_flags |= MSG_CMSG_COMPAT; 441 442 if (unlikely(!io_msg_alloc_async(req))) 443 return -ENOMEM; 444 if (req->opcode != IORING_OP_SENDMSG) 445 return io_send_setup(req, sqe); 446 if (unlikely(sqe->addr2 || sqe->file_index)) 447 return -EINVAL; 448 return io_sendmsg_setup(req, sqe); 449 } 450 451 static void io_req_msg_cleanup(struct io_kiocb *req, 452 unsigned int issue_flags) 453 { 454 io_netmsg_recycle(req, issue_flags); 455 } 456 457 /* 458 * For bundle completions, we need to figure out how many segments we consumed. 459 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 460 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 461 * the segments, then it's a trivial questiont o answer. If we have residual 462 * data in the iter, then loop the segments to figure out how much we 463 * transferred. 464 */ 465 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 466 { 467 struct iovec *iov; 468 int nbufs; 469 470 /* no data is always zero segments, and a ubuf is always 1 segment */ 471 if (ret <= 0) 472 return 0; 473 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 474 return 1; 475 476 iov = kmsg->vec.iovec; 477 if (!iov) 478 iov = &kmsg->fast_iov; 479 480 /* if all data was transferred, it's basic pointer math */ 481 if (!iov_iter_count(&kmsg->msg.msg_iter)) 482 return iter_iov(&kmsg->msg.msg_iter) - iov; 483 484 /* short transfer, count segments */ 485 nbufs = 0; 486 do { 487 int this_len = min_t(int, iov[nbufs].iov_len, ret); 488 489 nbufs++; 490 ret -= this_len; 491 } while (ret); 492 493 return nbufs; 494 } 495 496 static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl, 497 struct io_async_msghdr *kmsg, int len) 498 { 499 req->flags |= REQ_F_BL_NO_RECYCLE; 500 if (req->flags & REQ_F_BUFFERS_COMMIT) 501 io_kbuf_commit(req, bl, len, io_bundle_nbufs(kmsg, len)); 502 return IOU_RETRY; 503 } 504 505 static inline bool io_send_finish(struct io_kiocb *req, 506 struct io_async_msghdr *kmsg, 507 struct io_br_sel *sel) 508 { 509 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 510 bool bundle_finished = sel->val <= 0; 511 unsigned int cflags; 512 513 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 514 cflags = io_put_kbuf(req, sel->val, sel->buf_list); 515 goto finish; 516 } 517 518 cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val)); 519 520 /* 521 * Don't start new bundles if the buffer list is empty, or if the 522 * current operation needed to go through polling to complete. 523 */ 524 if (bundle_finished || req->flags & (REQ_F_BL_EMPTY | REQ_F_POLLED)) 525 goto finish; 526 527 /* 528 * Fill CQE for this receive and see if we should keep trying to 529 * receive from this socket. 530 */ 531 if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { 532 io_mshot_prep_retry(req, kmsg); 533 return false; 534 } 535 536 /* Otherwise stop bundle and use the current result. */ 537 finish: 538 io_req_set_res(req, sel->val, cflags); 539 sel->val = IOU_COMPLETE; 540 return true; 541 } 542 543 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 544 { 545 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 546 struct io_async_msghdr *kmsg = req->async_data; 547 struct socket *sock; 548 unsigned flags; 549 int min_ret = 0; 550 int ret; 551 552 sock = sock_from_file(req->file); 553 if (unlikely(!sock)) 554 return -ENOTSOCK; 555 556 if (!(req->flags & REQ_F_POLLED) && 557 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 558 return -EAGAIN; 559 560 flags = sr->msg_flags; 561 if (issue_flags & IO_URING_F_NONBLOCK) 562 flags |= MSG_DONTWAIT; 563 if (flags & MSG_WAITALL) 564 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 565 566 kmsg->msg.msg_control_user = sr->msg_control; 567 568 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 569 570 if (ret < min_ret) { 571 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 572 return -EAGAIN; 573 if (ret > 0 && io_net_retry(sock, flags)) { 574 kmsg->msg.msg_controllen = 0; 575 kmsg->msg.msg_control = NULL; 576 sr->done_io += ret; 577 return -EAGAIN; 578 } 579 if (ret == -ERESTARTSYS) 580 ret = -EINTR; 581 req_set_fail(req); 582 } 583 io_req_msg_cleanup(req, issue_flags); 584 if (ret >= 0) 585 ret += sr->done_io; 586 else if (sr->done_io) 587 ret = sr->done_io; 588 io_req_set_res(req, ret, 0); 589 return IOU_COMPLETE; 590 } 591 592 static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, 593 struct io_br_sel *sel, struct io_async_msghdr *kmsg) 594 { 595 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 596 struct buf_sel_arg arg = { 597 .iovs = &kmsg->fast_iov, 598 .max_len = min_not_zero(sr->len, INT_MAX), 599 .nr_iovs = 1, 600 .buf_group = sr->buf_group, 601 }; 602 int ret; 603 604 if (kmsg->vec.iovec) { 605 arg.nr_iovs = kmsg->vec.nr; 606 arg.iovs = kmsg->vec.iovec; 607 arg.mode = KBUF_MODE_FREE; 608 } 609 610 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 611 arg.nr_iovs = 1; 612 else 613 arg.mode |= KBUF_MODE_EXPAND; 614 615 ret = io_buffers_select(req, &arg, sel, issue_flags); 616 if (unlikely(ret < 0)) 617 return ret; 618 619 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 620 kmsg->vec.nr = ret; 621 kmsg->vec.iovec = arg.iovs; 622 req->flags |= REQ_F_NEED_CLEANUP; 623 } 624 sr->len = arg.out_len; 625 626 if (ret == 1) { 627 sr->buf = arg.iovs[0].iov_base; 628 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 629 &kmsg->msg.msg_iter); 630 if (unlikely(ret)) 631 return ret; 632 } else { 633 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 634 arg.iovs, ret, arg.out_len); 635 } 636 637 return 0; 638 } 639 640 int io_send(struct io_kiocb *req, unsigned int issue_flags) 641 { 642 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 643 struct io_async_msghdr *kmsg = req->async_data; 644 struct io_br_sel sel = { }; 645 struct socket *sock; 646 unsigned flags; 647 int min_ret = 0; 648 int ret; 649 650 sock = sock_from_file(req->file); 651 if (unlikely(!sock)) 652 return -ENOTSOCK; 653 654 if (!(req->flags & REQ_F_POLLED) && 655 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 656 return -EAGAIN; 657 658 flags = sr->msg_flags; 659 if (issue_flags & IO_URING_F_NONBLOCK) 660 flags |= MSG_DONTWAIT; 661 662 retry_bundle: 663 sel.buf_list = NULL; 664 if (io_do_buffer_select(req)) { 665 ret = io_send_select_buffer(req, issue_flags, &sel, kmsg); 666 if (ret) 667 return ret; 668 } 669 670 /* 671 * If MSG_WAITALL is set, or this is a bundle send, then we need 672 * the full amount. If just bundle is set, if we do a short send 673 * then we complete the bundle sequence rather than continue on. 674 */ 675 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 676 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 677 678 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 679 kmsg->msg.msg_flags = flags; 680 ret = sock_sendmsg(sock, &kmsg->msg); 681 if (ret < min_ret) { 682 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 683 return -EAGAIN; 684 685 if (ret > 0 && io_net_retry(sock, flags)) { 686 sr->len -= ret; 687 sr->buf += ret; 688 sr->done_io += ret; 689 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 690 } 691 if (ret == -ERESTARTSYS) 692 ret = -EINTR; 693 req_set_fail(req); 694 } 695 if (ret >= 0) 696 ret += sr->done_io; 697 else if (sr->done_io) 698 ret = sr->done_io; 699 700 sel.val = ret; 701 if (!io_send_finish(req, kmsg, &sel)) 702 goto retry_bundle; 703 704 io_req_msg_cleanup(req, issue_flags); 705 return sel.val; 706 } 707 708 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 709 struct io_async_msghdr *iomsg, 710 int namelen, size_t controllen) 711 { 712 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 713 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 714 int hdr; 715 716 if (unlikely(namelen < 0)) 717 return -EOVERFLOW; 718 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 719 namelen, &hdr)) 720 return -EOVERFLOW; 721 if (check_add_overflow(hdr, controllen, &hdr)) 722 return -EOVERFLOW; 723 724 iomsg->namelen = namelen; 725 iomsg->controllen = controllen; 726 return 0; 727 } 728 729 return 0; 730 } 731 732 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 733 struct io_async_msghdr *iomsg) 734 { 735 struct user_msghdr msg; 736 int ret; 737 738 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr); 739 if (unlikely(ret)) 740 return ret; 741 742 if (!(req->flags & REQ_F_BUFFER_SELECT)) { 743 ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen, 744 ITER_DEST); 745 if (unlikely(ret)) 746 return ret; 747 } 748 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 749 msg.msg_controllen); 750 } 751 752 static int io_recvmsg_prep_setup(struct io_kiocb *req) 753 { 754 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 755 struct io_async_msghdr *kmsg; 756 757 kmsg = io_msg_alloc_async(req); 758 if (unlikely(!kmsg)) 759 return -ENOMEM; 760 761 if (req->opcode == IORING_OP_RECV) { 762 kmsg->msg.msg_name = NULL; 763 kmsg->msg.msg_namelen = 0; 764 kmsg->msg.msg_inq = 0; 765 kmsg->msg.msg_control = NULL; 766 kmsg->msg.msg_get_inq = 1; 767 kmsg->msg.msg_controllen = 0; 768 kmsg->msg.msg_iocb = NULL; 769 kmsg->msg.msg_ubuf = NULL; 770 771 if (req->flags & REQ_F_BUFFER_SELECT) 772 return 0; 773 return import_ubuf(ITER_DEST, sr->buf, sr->len, 774 &kmsg->msg.msg_iter); 775 } 776 777 return io_recvmsg_copy_hdr(req, kmsg); 778 } 779 780 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 781 IORING_RECVSEND_BUNDLE) 782 783 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 784 { 785 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 786 787 sr->done_io = 0; 788 789 if (unlikely(sqe->addr2)) 790 return -EINVAL; 791 792 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 793 sr->len = READ_ONCE(sqe->len); 794 sr->flags = READ_ONCE(sqe->ioprio); 795 if (sr->flags & ~RECVMSG_FLAGS) 796 return -EINVAL; 797 sr->msg_flags = READ_ONCE(sqe->msg_flags); 798 if (sr->msg_flags & MSG_DONTWAIT) 799 req->flags |= REQ_F_NOWAIT; 800 if (sr->msg_flags & MSG_ERRQUEUE) 801 req->flags |= REQ_F_CLEAR_POLLIN; 802 if (req->flags & REQ_F_BUFFER_SELECT) 803 sr->buf_group = req->buf_index; 804 sr->mshot_total_len = sr->mshot_len = 0; 805 if (sr->flags & IORING_RECV_MULTISHOT) { 806 if (!(req->flags & REQ_F_BUFFER_SELECT)) 807 return -EINVAL; 808 if (sr->msg_flags & MSG_WAITALL) 809 return -EINVAL; 810 if (req->opcode == IORING_OP_RECV) { 811 sr->mshot_len = sr->len; 812 sr->mshot_total_len = READ_ONCE(sqe->optlen); 813 if (sr->mshot_total_len) 814 sr->flags |= IORING_RECV_MSHOT_LIM; 815 } else if (sqe->optlen) { 816 return -EINVAL; 817 } 818 req->flags |= REQ_F_APOLL_MULTISHOT; 819 } else if (sqe->optlen) { 820 return -EINVAL; 821 } 822 823 if (sr->flags & IORING_RECVSEND_BUNDLE) { 824 if (req->opcode == IORING_OP_RECVMSG) 825 return -EINVAL; 826 } 827 828 if (io_is_compat(req->ctx)) 829 sr->msg_flags |= MSG_CMSG_COMPAT; 830 831 sr->nr_multishot_loops = 0; 832 return io_recvmsg_prep_setup(req); 833 } 834 835 /* bits to clear in old and inherit in new cflags on bundle retry */ 836 #define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE) 837 838 /* 839 * Finishes io_recv and io_recvmsg. 840 * 841 * Returns true if it is actually finished, or false if it should run 842 * again (for multishot). 843 */ 844 static inline bool io_recv_finish(struct io_kiocb *req, 845 struct io_async_msghdr *kmsg, 846 struct io_br_sel *sel, bool mshot_finished, 847 unsigned issue_flags) 848 { 849 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 850 unsigned int cflags = 0; 851 852 if (kmsg->msg.msg_inq > 0) 853 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 854 855 if (sel->val > 0 && sr->flags & IORING_RECV_MSHOT_LIM) { 856 /* 857 * If sr->len hits zero, the limit has been reached. Mark 858 * mshot as finished, and flag MSHOT_DONE as well to prevent 859 * a potential bundle from being retried. 860 */ 861 sr->mshot_total_len -= min_t(int, sel->val, sr->mshot_total_len); 862 if (!sr->mshot_total_len) { 863 sr->flags |= IORING_RECV_MSHOT_DONE; 864 mshot_finished = true; 865 } 866 } 867 868 if (sr->flags & IORING_RECVSEND_BUNDLE) { 869 size_t this_ret = sel->val - sr->done_io; 870 871 cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret)); 872 if (sr->flags & IORING_RECV_RETRY) 873 cflags = req->cqe.flags | (cflags & CQE_F_MASK); 874 if (sr->mshot_len && sel->val >= sr->mshot_len) 875 sr->flags |= IORING_RECV_MSHOT_CAP; 876 /* bundle with no more immediate buffers, we're done */ 877 if (req->flags & REQ_F_BL_EMPTY) 878 goto finish; 879 /* 880 * If more is available AND it was a full transfer, retry and 881 * append to this one 882 */ 883 if (!(sr->flags & IORING_RECV_NO_RETRY) && 884 kmsg->msg.msg_inq > 1 && this_ret > 0 && 885 !iov_iter_count(&kmsg->msg.msg_iter)) { 886 req->cqe.flags = cflags & ~CQE_F_MASK; 887 sr->len = kmsg->msg.msg_inq; 888 sr->done_io += this_ret; 889 sr->flags |= IORING_RECV_RETRY; 890 return false; 891 } 892 } else { 893 cflags |= io_put_kbuf(req, sel->val, sel->buf_list); 894 } 895 896 /* 897 * Fill CQE for this receive and see if we should keep trying to 898 * receive from this socket. 899 */ 900 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 901 io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { 902 sel->val = IOU_RETRY; 903 io_mshot_prep_retry(req, kmsg); 904 /* Known not-empty or unknown state, retry */ 905 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 906 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY && 907 !(sr->flags & IORING_RECV_MSHOT_CAP)) { 908 return false; 909 } 910 /* mshot retries exceeded, force a requeue */ 911 sr->nr_multishot_loops = 0; 912 sr->flags &= ~IORING_RECV_MSHOT_CAP; 913 if (issue_flags & IO_URING_F_MULTISHOT) 914 sel->val = IOU_REQUEUE; 915 } 916 return true; 917 } 918 919 /* Finish the request / stop multishot. */ 920 finish: 921 io_req_set_res(req, sel->val, cflags); 922 sel->val = IOU_COMPLETE; 923 io_req_msg_cleanup(req, issue_flags); 924 return true; 925 } 926 927 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 928 struct io_sr_msg *sr, void __user **buf, 929 size_t *len) 930 { 931 unsigned long ubuf = (unsigned long) *buf; 932 unsigned long hdr; 933 934 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 935 kmsg->controllen; 936 if (*len < hdr) 937 return -EFAULT; 938 939 if (kmsg->controllen) { 940 unsigned long control = ubuf + hdr - kmsg->controllen; 941 942 kmsg->msg.msg_control_user = (void __user *) control; 943 kmsg->msg.msg_controllen = kmsg->controllen; 944 } 945 946 sr->buf = *buf; /* stash for later copy */ 947 *buf = (void __user *) (ubuf + hdr); 948 kmsg->payloadlen = *len = *len - hdr; 949 return 0; 950 } 951 952 struct io_recvmsg_multishot_hdr { 953 struct io_uring_recvmsg_out msg; 954 struct sockaddr_storage addr; 955 }; 956 957 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 958 struct io_async_msghdr *kmsg, 959 unsigned int flags, bool *finished) 960 { 961 int err; 962 int copy_len; 963 struct io_recvmsg_multishot_hdr hdr; 964 965 if (kmsg->namelen) 966 kmsg->msg.msg_name = &hdr.addr; 967 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 968 kmsg->msg.msg_namelen = 0; 969 970 if (sock->file->f_flags & O_NONBLOCK) 971 flags |= MSG_DONTWAIT; 972 973 err = sock_recvmsg(sock, &kmsg->msg, flags); 974 *finished = err <= 0; 975 if (err < 0) 976 return err; 977 978 hdr.msg = (struct io_uring_recvmsg_out) { 979 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 980 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 981 }; 982 983 hdr.msg.payloadlen = err; 984 if (err > kmsg->payloadlen) 985 err = kmsg->payloadlen; 986 987 copy_len = sizeof(struct io_uring_recvmsg_out); 988 if (kmsg->msg.msg_namelen > kmsg->namelen) 989 copy_len += kmsg->namelen; 990 else 991 copy_len += kmsg->msg.msg_namelen; 992 993 /* 994 * "fromlen shall refer to the value before truncation.." 995 * 1003.1g 996 */ 997 hdr.msg.namelen = kmsg->msg.msg_namelen; 998 999 /* ensure that there is no gap between hdr and sockaddr_storage */ 1000 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 1001 sizeof(struct io_uring_recvmsg_out)); 1002 if (copy_to_user(io->buf, &hdr, copy_len)) { 1003 *finished = true; 1004 return -EFAULT; 1005 } 1006 1007 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 1008 kmsg->controllen + err; 1009 } 1010 1011 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 1012 { 1013 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1014 struct io_async_msghdr *kmsg = req->async_data; 1015 struct io_br_sel sel = { }; 1016 struct socket *sock; 1017 unsigned flags; 1018 int ret, min_ret = 0; 1019 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1020 bool mshot_finished = true; 1021 1022 sock = sock_from_file(req->file); 1023 if (unlikely(!sock)) 1024 return -ENOTSOCK; 1025 1026 if (!(req->flags & REQ_F_POLLED) && 1027 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1028 return -EAGAIN; 1029 1030 flags = sr->msg_flags; 1031 if (force_nonblock) 1032 flags |= MSG_DONTWAIT; 1033 1034 retry_multishot: 1035 sel.buf_list = NULL; 1036 if (io_do_buffer_select(req)) { 1037 size_t len = sr->len; 1038 1039 sel = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1040 if (!sel.addr) 1041 return -ENOBUFS; 1042 1043 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1044 ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len); 1045 if (ret) { 1046 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1047 return ret; 1048 } 1049 } 1050 1051 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, sel.addr, len); 1052 } 1053 1054 kmsg->msg.msg_get_inq = 1; 1055 kmsg->msg.msg_inq = -1; 1056 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1057 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1058 &mshot_finished); 1059 } else { 1060 /* disable partial retry for recvmsg with cmsg attached */ 1061 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1062 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1063 1064 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1065 kmsg->uaddr, flags); 1066 } 1067 1068 if (ret < min_ret) { 1069 if (ret == -EAGAIN && force_nonblock) { 1070 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1071 return IOU_RETRY; 1072 } 1073 if (ret > 0 && io_net_retry(sock, flags)) { 1074 sr->done_io += ret; 1075 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 1076 } 1077 if (ret == -ERESTARTSYS) 1078 ret = -EINTR; 1079 req_set_fail(req); 1080 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1081 req_set_fail(req); 1082 } 1083 1084 if (ret > 0) 1085 ret += sr->done_io; 1086 else if (sr->done_io) 1087 ret = sr->done_io; 1088 else 1089 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1090 1091 sel.val = ret; 1092 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) 1093 goto retry_multishot; 1094 1095 return sel.val; 1096 } 1097 1098 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1099 struct io_br_sel *sel, unsigned int issue_flags) 1100 { 1101 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1102 int ret; 1103 1104 /* 1105 * If the ring isn't locked, then don't use the peek interface 1106 * to grab multiple buffers as we will lock/unlock between 1107 * this selection and posting the buffers. 1108 */ 1109 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1110 sr->flags & IORING_RECVSEND_BUNDLE) { 1111 struct buf_sel_arg arg = { 1112 .iovs = &kmsg->fast_iov, 1113 .nr_iovs = 1, 1114 .mode = KBUF_MODE_EXPAND, 1115 .buf_group = sr->buf_group, 1116 }; 1117 1118 if (kmsg->vec.iovec) { 1119 arg.nr_iovs = kmsg->vec.nr; 1120 arg.iovs = kmsg->vec.iovec; 1121 arg.mode |= KBUF_MODE_FREE; 1122 } 1123 1124 if (sel->val) 1125 arg.max_len = sel->val; 1126 else if (kmsg->msg.msg_inq > 1) 1127 arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq); 1128 1129 /* if mshot limited, ensure we don't go over */ 1130 if (sr->flags & IORING_RECV_MSHOT_LIM) 1131 arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len); 1132 ret = io_buffers_peek(req, &arg, sel); 1133 if (unlikely(ret < 0)) 1134 return ret; 1135 1136 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 1137 kmsg->vec.nr = ret; 1138 kmsg->vec.iovec = arg.iovs; 1139 req->flags |= REQ_F_NEED_CLEANUP; 1140 } 1141 if (arg.partial_map) 1142 sr->flags |= IORING_RECV_PARTIAL_MAP; 1143 1144 /* special case 1 vec, can be a fast path */ 1145 if (ret == 1) { 1146 sr->buf = arg.iovs[0].iov_base; 1147 sr->len = arg.iovs[0].iov_len; 1148 goto map_ubuf; 1149 } 1150 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1151 arg.out_len); 1152 } else { 1153 size_t len = sel->val; 1154 1155 *sel = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1156 if (!sel->addr) 1157 return -ENOBUFS; 1158 sr->buf = sel->addr; 1159 sr->len = len; 1160 map_ubuf: 1161 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1162 &kmsg->msg.msg_iter); 1163 if (unlikely(ret)) 1164 return ret; 1165 } 1166 1167 return 0; 1168 } 1169 1170 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1171 { 1172 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1173 struct io_async_msghdr *kmsg = req->async_data; 1174 struct io_br_sel sel; 1175 struct socket *sock; 1176 unsigned flags; 1177 int ret, min_ret = 0; 1178 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1179 bool mshot_finished; 1180 1181 if (!(req->flags & REQ_F_POLLED) && 1182 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1183 return -EAGAIN; 1184 1185 sock = sock_from_file(req->file); 1186 if (unlikely(!sock)) 1187 return -ENOTSOCK; 1188 1189 flags = sr->msg_flags; 1190 if (force_nonblock) 1191 flags |= MSG_DONTWAIT; 1192 1193 retry_multishot: 1194 sel.buf_list = NULL; 1195 if (io_do_buffer_select(req)) { 1196 sel.val = sr->len; 1197 ret = io_recv_buf_select(req, kmsg, &sel, issue_flags); 1198 if (unlikely(ret < 0)) { 1199 kmsg->msg.msg_inq = -1; 1200 goto out_free; 1201 } 1202 sr->buf = NULL; 1203 } 1204 1205 kmsg->msg.msg_flags = 0; 1206 kmsg->msg.msg_inq = -1; 1207 1208 if (flags & MSG_WAITALL) 1209 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1210 1211 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1212 if (ret < min_ret) { 1213 if (ret == -EAGAIN && force_nonblock) { 1214 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1215 return IOU_RETRY; 1216 } 1217 if (ret > 0 && io_net_retry(sock, flags)) { 1218 sr->len -= ret; 1219 sr->buf += ret; 1220 sr->done_io += ret; 1221 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 1222 } 1223 if (ret == -ERESTARTSYS) 1224 ret = -EINTR; 1225 req_set_fail(req); 1226 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1227 out_free: 1228 req_set_fail(req); 1229 } 1230 1231 mshot_finished = ret <= 0; 1232 if (ret > 0) 1233 ret += sr->done_io; 1234 else if (sr->done_io) 1235 ret = sr->done_io; 1236 else 1237 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1238 1239 sel.val = ret; 1240 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) 1241 goto retry_multishot; 1242 1243 return sel.val; 1244 } 1245 1246 int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1247 { 1248 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1249 unsigned ifq_idx; 1250 1251 if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3)) 1252 return -EINVAL; 1253 1254 ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx); 1255 zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx); 1256 if (!zc->ifq) 1257 return -EINVAL; 1258 1259 zc->len = READ_ONCE(sqe->len); 1260 zc->flags = READ_ONCE(sqe->ioprio); 1261 if (READ_ONCE(sqe->msg_flags)) 1262 return -EINVAL; 1263 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)) 1264 return -EINVAL; 1265 /* multishot required */ 1266 if (!(zc->flags & IORING_RECV_MULTISHOT)) 1267 return -EINVAL; 1268 /* All data completions are posted as aux CQEs. */ 1269 req->flags |= REQ_F_APOLL_MULTISHOT; 1270 1271 return 0; 1272 } 1273 1274 int io_recvzc(struct io_kiocb *req, unsigned int issue_flags) 1275 { 1276 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1277 struct socket *sock; 1278 unsigned int len; 1279 int ret; 1280 1281 if (!(req->flags & REQ_F_POLLED) && 1282 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1283 return -EAGAIN; 1284 1285 sock = sock_from_file(req->file); 1286 if (unlikely(!sock)) 1287 return -ENOTSOCK; 1288 1289 len = zc->len; 1290 ret = io_zcrx_recv(req, zc->ifq, sock, 0, issue_flags, &zc->len); 1291 if (len && zc->len == 0) { 1292 io_req_set_res(req, 0, 0); 1293 1294 return IOU_COMPLETE; 1295 } 1296 if (unlikely(ret <= 0) && ret != -EAGAIN) { 1297 if (ret == -ERESTARTSYS) 1298 ret = -EINTR; 1299 if (ret == IOU_REQUEUE) 1300 return IOU_REQUEUE; 1301 1302 req_set_fail(req); 1303 io_req_set_res(req, ret, 0); 1304 return IOU_COMPLETE; 1305 } 1306 return IOU_RETRY; 1307 } 1308 1309 void io_send_zc_cleanup(struct io_kiocb *req) 1310 { 1311 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1312 struct io_async_msghdr *io = req->async_data; 1313 1314 if (req_has_async_data(req)) 1315 io_netmsg_iovec_free(io); 1316 if (zc->notif) { 1317 io_notif_flush(zc->notif); 1318 zc->notif = NULL; 1319 } 1320 } 1321 1322 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1323 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \ 1324 IORING_SEND_VECTORIZED) 1325 1326 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1327 { 1328 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1329 struct io_ring_ctx *ctx = req->ctx; 1330 struct io_async_msghdr *iomsg; 1331 struct io_kiocb *notif; 1332 int ret; 1333 1334 zc->done_io = 0; 1335 1336 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1337 return -EINVAL; 1338 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1339 if (req->flags & REQ_F_CQE_SKIP) 1340 return -EINVAL; 1341 1342 notif = zc->notif = io_alloc_notif(ctx); 1343 if (!notif) 1344 return -ENOMEM; 1345 notif->cqe.user_data = req->cqe.user_data; 1346 notif->cqe.res = 0; 1347 notif->cqe.flags = IORING_CQE_F_NOTIF; 1348 req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY; 1349 1350 zc->flags = READ_ONCE(sqe->ioprio); 1351 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1352 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1353 return -EINVAL; 1354 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1355 struct io_notif_data *nd = io_notif_to_data(notif); 1356 1357 nd->zc_report = true; 1358 nd->zc_used = false; 1359 nd->zc_copied = false; 1360 } 1361 } 1362 1363 zc->len = READ_ONCE(sqe->len); 1364 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1365 req->buf_index = READ_ONCE(sqe->buf_index); 1366 if (zc->msg_flags & MSG_DONTWAIT) 1367 req->flags |= REQ_F_NOWAIT; 1368 1369 if (io_is_compat(req->ctx)) 1370 zc->msg_flags |= MSG_CMSG_COMPAT; 1371 1372 iomsg = io_msg_alloc_async(req); 1373 if (unlikely(!iomsg)) 1374 return -ENOMEM; 1375 1376 if (req->opcode == IORING_OP_SEND_ZC) { 1377 ret = io_send_setup(req, sqe); 1378 } else { 1379 if (unlikely(sqe->addr2 || sqe->file_index)) 1380 return -EINVAL; 1381 ret = io_sendmsg_setup(req, sqe); 1382 } 1383 if (unlikely(ret)) 1384 return ret; 1385 1386 if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { 1387 iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1388 return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count); 1389 } 1390 iomsg->msg.sg_from_iter = io_sg_from_iter; 1391 return 0; 1392 } 1393 1394 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1395 struct iov_iter *from, size_t length) 1396 { 1397 skb_zcopy_downgrade_managed(skb); 1398 return zerocopy_fill_skb_from_iter(skb, from, length); 1399 } 1400 1401 static int io_sg_from_iter(struct sk_buff *skb, 1402 struct iov_iter *from, size_t length) 1403 { 1404 struct skb_shared_info *shinfo = skb_shinfo(skb); 1405 int frag = shinfo->nr_frags; 1406 int ret = 0; 1407 struct bvec_iter bi; 1408 ssize_t copied = 0; 1409 unsigned long truesize = 0; 1410 1411 if (!frag) 1412 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1413 else if (unlikely(!skb_zcopy_managed(skb))) 1414 return zerocopy_fill_skb_from_iter(skb, from, length); 1415 1416 bi.bi_size = min(from->count, length); 1417 bi.bi_bvec_done = from->iov_offset; 1418 bi.bi_idx = 0; 1419 1420 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1421 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1422 1423 copied += v.bv_len; 1424 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1425 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1426 v.bv_offset, v.bv_len); 1427 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1428 } 1429 if (bi.bi_size) 1430 ret = -EMSGSIZE; 1431 1432 shinfo->nr_frags = frag; 1433 from->bvec += bi.bi_idx; 1434 from->nr_segs -= bi.bi_idx; 1435 from->count -= copied; 1436 from->iov_offset = bi.bi_bvec_done; 1437 1438 skb->data_len += copied; 1439 skb->len += copied; 1440 skb->truesize += truesize; 1441 return ret; 1442 } 1443 1444 static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) 1445 { 1446 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1447 struct io_async_msghdr *kmsg = req->async_data; 1448 1449 WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF)); 1450 1451 sr->notif->buf_index = req->buf_index; 1452 return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter, 1453 (u64)(uintptr_t)sr->buf, sr->len, 1454 ITER_SOURCE, issue_flags); 1455 } 1456 1457 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1458 { 1459 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1460 struct io_async_msghdr *kmsg = req->async_data; 1461 struct socket *sock; 1462 unsigned msg_flags; 1463 int ret, min_ret = 0; 1464 1465 sock = sock_from_file(req->file); 1466 if (unlikely(!sock)) 1467 return -ENOTSOCK; 1468 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1469 return -EOPNOTSUPP; 1470 1471 if (!(req->flags & REQ_F_POLLED) && 1472 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1473 return -EAGAIN; 1474 1475 if (req->flags & REQ_F_IMPORT_BUFFER) { 1476 req->flags &= ~REQ_F_IMPORT_BUFFER; 1477 ret = io_send_zc_import(req, issue_flags); 1478 if (unlikely(ret)) 1479 return ret; 1480 } 1481 1482 msg_flags = zc->msg_flags; 1483 if (issue_flags & IO_URING_F_NONBLOCK) 1484 msg_flags |= MSG_DONTWAIT; 1485 if (msg_flags & MSG_WAITALL) 1486 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1487 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1488 1489 kmsg->msg.msg_flags = msg_flags; 1490 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1491 ret = sock_sendmsg(sock, &kmsg->msg); 1492 1493 if (unlikely(ret < min_ret)) { 1494 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1495 return -EAGAIN; 1496 1497 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { 1498 zc->done_io += ret; 1499 return -EAGAIN; 1500 } 1501 if (ret == -ERESTARTSYS) 1502 ret = -EINTR; 1503 req_set_fail(req); 1504 } 1505 1506 if (ret >= 0) 1507 ret += zc->done_io; 1508 else if (zc->done_io) 1509 ret = zc->done_io; 1510 1511 /* 1512 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1513 * flushing notif to io_send_zc_cleanup() 1514 */ 1515 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1516 io_notif_flush(zc->notif); 1517 zc->notif = NULL; 1518 io_req_msg_cleanup(req, 0); 1519 } 1520 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1521 return IOU_COMPLETE; 1522 } 1523 1524 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1525 { 1526 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1527 struct io_async_msghdr *kmsg = req->async_data; 1528 struct socket *sock; 1529 unsigned flags; 1530 int ret, min_ret = 0; 1531 1532 if (req->flags & REQ_F_IMPORT_BUFFER) { 1533 unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; 1534 int ret; 1535 1536 sr->notif->buf_index = req->buf_index; 1537 ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, 1538 sr->notif, &kmsg->vec, uvec_segs, 1539 issue_flags); 1540 if (unlikely(ret)) 1541 return ret; 1542 req->flags &= ~REQ_F_IMPORT_BUFFER; 1543 } 1544 1545 sock = sock_from_file(req->file); 1546 if (unlikely(!sock)) 1547 return -ENOTSOCK; 1548 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1549 return -EOPNOTSUPP; 1550 1551 if (!(req->flags & REQ_F_POLLED) && 1552 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1553 return -EAGAIN; 1554 1555 flags = sr->msg_flags; 1556 if (issue_flags & IO_URING_F_NONBLOCK) 1557 flags |= MSG_DONTWAIT; 1558 if (flags & MSG_WAITALL) 1559 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1560 1561 kmsg->msg.msg_control_user = sr->msg_control; 1562 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1563 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1564 1565 if (unlikely(ret < min_ret)) { 1566 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1567 return -EAGAIN; 1568 1569 if (ret > 0 && io_net_retry(sock, flags)) { 1570 sr->done_io += ret; 1571 return -EAGAIN; 1572 } 1573 if (ret == -ERESTARTSYS) 1574 ret = -EINTR; 1575 req_set_fail(req); 1576 } 1577 1578 if (ret >= 0) 1579 ret += sr->done_io; 1580 else if (sr->done_io) 1581 ret = sr->done_io; 1582 1583 /* 1584 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1585 * flushing notif to io_send_zc_cleanup() 1586 */ 1587 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1588 io_notif_flush(sr->notif); 1589 sr->notif = NULL; 1590 io_req_msg_cleanup(req, 0); 1591 } 1592 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1593 return IOU_COMPLETE; 1594 } 1595 1596 void io_sendrecv_fail(struct io_kiocb *req) 1597 { 1598 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1599 1600 if (sr->done_io) 1601 req->cqe.res = sr->done_io; 1602 1603 if ((req->flags & REQ_F_NEED_CLEANUP) && 1604 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1605 req->cqe.flags |= IORING_CQE_F_MORE; 1606 } 1607 1608 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1609 IORING_ACCEPT_POLL_FIRST) 1610 1611 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1612 { 1613 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1614 1615 if (sqe->len || sqe->buf_index) 1616 return -EINVAL; 1617 1618 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1619 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1620 accept->flags = READ_ONCE(sqe->accept_flags); 1621 accept->nofile = rlimit(RLIMIT_NOFILE); 1622 accept->iou_flags = READ_ONCE(sqe->ioprio); 1623 if (accept->iou_flags & ~ACCEPT_FLAGS) 1624 return -EINVAL; 1625 1626 accept->file_slot = READ_ONCE(sqe->file_index); 1627 if (accept->file_slot) { 1628 if (accept->flags & SOCK_CLOEXEC) 1629 return -EINVAL; 1630 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1631 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1632 return -EINVAL; 1633 } 1634 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1635 return -EINVAL; 1636 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1637 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1638 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1639 req->flags |= REQ_F_APOLL_MULTISHOT; 1640 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1641 req->flags |= REQ_F_NOWAIT; 1642 return 0; 1643 } 1644 1645 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1646 { 1647 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1648 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1649 bool fixed = !!accept->file_slot; 1650 struct proto_accept_arg arg = { 1651 .flags = force_nonblock ? O_NONBLOCK : 0, 1652 }; 1653 struct file *file; 1654 unsigned cflags; 1655 int ret, fd; 1656 1657 if (!(req->flags & REQ_F_POLLED) && 1658 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1659 return -EAGAIN; 1660 1661 retry: 1662 if (!fixed) { 1663 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1664 if (unlikely(fd < 0)) 1665 return fd; 1666 } 1667 arg.err = 0; 1668 arg.is_empty = -1; 1669 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1670 accept->flags); 1671 if (IS_ERR(file)) { 1672 if (!fixed) 1673 put_unused_fd(fd); 1674 ret = PTR_ERR(file); 1675 if (ret == -EAGAIN && force_nonblock && 1676 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) 1677 return IOU_RETRY; 1678 1679 if (ret == -ERESTARTSYS) 1680 ret = -EINTR; 1681 } else if (!fixed) { 1682 fd_install(fd, file); 1683 ret = fd; 1684 } else { 1685 ret = io_fixed_fd_install(req, issue_flags, file, 1686 accept->file_slot); 1687 } 1688 1689 cflags = 0; 1690 if (!arg.is_empty) 1691 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1692 1693 if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) && 1694 io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1695 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1696 goto retry; 1697 return IOU_RETRY; 1698 } 1699 1700 io_req_set_res(req, ret, cflags); 1701 if (ret < 0) 1702 req_set_fail(req); 1703 return IOU_COMPLETE; 1704 } 1705 1706 void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req) 1707 { 1708 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1709 1710 bctx->socket.family = sock->domain; 1711 bctx->socket.type = sock->type; 1712 bctx->socket.protocol = sock->protocol; 1713 } 1714 1715 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1716 { 1717 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1718 1719 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1720 return -EINVAL; 1721 1722 sock->domain = READ_ONCE(sqe->fd); 1723 sock->type = READ_ONCE(sqe->off); 1724 sock->protocol = READ_ONCE(sqe->len); 1725 sock->file_slot = READ_ONCE(sqe->file_index); 1726 sock->nofile = rlimit(RLIMIT_NOFILE); 1727 1728 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1729 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1730 return -EINVAL; 1731 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1732 return -EINVAL; 1733 return 0; 1734 } 1735 1736 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1737 { 1738 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1739 bool fixed = !!sock->file_slot; 1740 struct file *file; 1741 int ret, fd; 1742 1743 if (!fixed) { 1744 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1745 if (unlikely(fd < 0)) 1746 return fd; 1747 } 1748 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1749 if (IS_ERR(file)) { 1750 if (!fixed) 1751 put_unused_fd(fd); 1752 ret = PTR_ERR(file); 1753 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1754 return -EAGAIN; 1755 if (ret == -ERESTARTSYS) 1756 ret = -EINTR; 1757 req_set_fail(req); 1758 } else if (!fixed) { 1759 fd_install(fd, file); 1760 ret = fd; 1761 } else { 1762 ret = io_fixed_fd_install(req, issue_flags, file, 1763 sock->file_slot); 1764 } 1765 io_req_set_res(req, ret, 0); 1766 return IOU_COMPLETE; 1767 } 1768 1769 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1770 { 1771 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1772 struct io_async_msghdr *io; 1773 1774 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1775 return -EINVAL; 1776 1777 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1778 conn->addr_len = READ_ONCE(sqe->addr2); 1779 conn->in_progress = conn->seen_econnaborted = false; 1780 1781 io = io_msg_alloc_async(req); 1782 if (unlikely(!io)) 1783 return -ENOMEM; 1784 1785 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1786 } 1787 1788 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1789 { 1790 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1791 struct io_async_msghdr *io = req->async_data; 1792 unsigned file_flags; 1793 int ret; 1794 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1795 1796 if (connect->in_progress) { 1797 struct poll_table_struct pt = { ._key = EPOLLERR }; 1798 1799 if (vfs_poll(req->file, &pt) & EPOLLERR) 1800 goto get_sock_err; 1801 } 1802 1803 file_flags = force_nonblock ? O_NONBLOCK : 0; 1804 1805 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1806 file_flags); 1807 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1808 && force_nonblock) { 1809 if (ret == -EINPROGRESS) { 1810 connect->in_progress = true; 1811 } else if (ret == -ECONNABORTED) { 1812 if (connect->seen_econnaborted) 1813 goto out; 1814 connect->seen_econnaborted = true; 1815 } 1816 return -EAGAIN; 1817 } 1818 if (connect->in_progress) { 1819 /* 1820 * At least bluetooth will return -EBADFD on a re-connect 1821 * attempt, and it's (supposedly) also valid to get -EISCONN 1822 * which means the previous result is good. For both of these, 1823 * grab the sock_error() and use that for the completion. 1824 */ 1825 if (ret == -EBADFD || ret == -EISCONN) { 1826 get_sock_err: 1827 ret = sock_error(sock_from_file(req->file)->sk); 1828 } 1829 } 1830 if (ret == -ERESTARTSYS) 1831 ret = -EINTR; 1832 out: 1833 if (ret < 0) 1834 req_set_fail(req); 1835 io_req_msg_cleanup(req, issue_flags); 1836 io_req_set_res(req, ret, 0); 1837 return IOU_COMPLETE; 1838 } 1839 1840 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1841 { 1842 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1843 struct sockaddr __user *uaddr; 1844 struct io_async_msghdr *io; 1845 1846 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1847 return -EINVAL; 1848 1849 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1850 bind->addr_len = READ_ONCE(sqe->addr2); 1851 1852 io = io_msg_alloc_async(req); 1853 if (unlikely(!io)) 1854 return -ENOMEM; 1855 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1856 } 1857 1858 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1859 { 1860 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1861 struct io_async_msghdr *io = req->async_data; 1862 struct socket *sock; 1863 int ret; 1864 1865 sock = sock_from_file(req->file); 1866 if (unlikely(!sock)) 1867 return -ENOTSOCK; 1868 1869 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1870 if (ret < 0) 1871 req_set_fail(req); 1872 io_req_set_res(req, ret, 0); 1873 return 0; 1874 } 1875 1876 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1877 { 1878 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1879 1880 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1881 return -EINVAL; 1882 1883 listen->backlog = READ_ONCE(sqe->len); 1884 return 0; 1885 } 1886 1887 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1888 { 1889 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1890 struct socket *sock; 1891 int ret; 1892 1893 sock = sock_from_file(req->file); 1894 if (unlikely(!sock)) 1895 return -ENOTSOCK; 1896 1897 ret = __sys_listen_socket(sock, listen->backlog); 1898 if (ret < 0) 1899 req_set_fail(req); 1900 io_req_set_res(req, ret, 0); 1901 return 0; 1902 } 1903 1904 void io_netmsg_cache_free(const void *entry) 1905 { 1906 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1907 1908 io_vec_free(&kmsg->vec); 1909 kfree(kmsg); 1910 } 1911