1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/un.h> 8 #include <linux/compat.h> 9 #include <net/compat.h> 10 #include <linux/io_uring.h> 11 12 #include <uapi/linux/io_uring.h> 13 14 #include "filetable.h" 15 #include "io_uring.h" 16 #include "kbuf.h" 17 #include "alloc_cache.h" 18 #include "net.h" 19 #include "notif.h" 20 #include "rsrc.h" 21 #include "zcrx.h" 22 23 struct io_shutdown { 24 struct file *file; 25 int how; 26 }; 27 28 struct io_accept { 29 struct file *file; 30 struct sockaddr __user *addr; 31 int __user *addr_len; 32 int flags; 33 int iou_flags; 34 u32 file_slot; 35 unsigned long nofile; 36 }; 37 38 struct io_socket { 39 struct file *file; 40 int domain; 41 int type; 42 int protocol; 43 int flags; 44 u32 file_slot; 45 unsigned long nofile; 46 }; 47 48 struct io_connect { 49 struct file *file; 50 struct sockaddr __user *addr; 51 int addr_len; 52 bool in_progress; 53 bool seen_econnaborted; 54 }; 55 56 struct io_bind { 57 struct file *file; 58 int addr_len; 59 }; 60 61 struct io_listen { 62 struct file *file; 63 int backlog; 64 }; 65 66 struct io_sr_msg { 67 struct file *file; 68 union { 69 struct compat_msghdr __user *umsg_compat; 70 struct user_msghdr __user *umsg; 71 void __user *buf; 72 }; 73 int len; 74 unsigned done_io; 75 unsigned msg_flags; 76 unsigned nr_multishot_loops; 77 u16 flags; 78 /* initialised and used only by !msg send variants */ 79 u16 buf_group; 80 /* per-invocation mshot limit */ 81 unsigned mshot_len; 82 /* overall mshot byte limit */ 83 unsigned mshot_total_len; 84 void __user *msg_control; 85 /* used only for send zerocopy */ 86 struct io_kiocb *notif; 87 }; 88 89 /* 90 * The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold 91 * anyway. Use the upper 8 bits for internal uses. 92 */ 93 enum sr_retry_flags { 94 IORING_RECV_RETRY = (1U << 15), 95 IORING_RECV_PARTIAL_MAP = (1U << 14), 96 IORING_RECV_MSHOT_CAP = (1U << 13), 97 IORING_RECV_MSHOT_LIM = (1U << 12), 98 IORING_RECV_MSHOT_DONE = (1U << 11), 99 100 IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP, 101 IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP | 102 IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE, 103 }; 104 105 /* 106 * Number of times we'll try and do receives if there's more data. If we 107 * exceed this limit, then add us to the back of the queue and retry from 108 * there. This helps fairness between flooding clients. 109 */ 110 #define MULTISHOT_MAX_RETRY 32 111 112 struct io_recvzc { 113 struct file *file; 114 u16 flags; 115 u32 len; 116 struct io_zcrx_ifq *ifq; 117 }; 118 119 static int io_sg_from_iter_iovec(struct sk_buff *skb, 120 struct iov_iter *from, size_t length); 121 static int io_sg_from_iter(struct sk_buff *skb, 122 struct iov_iter *from, size_t length); 123 124 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 125 { 126 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 127 128 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 129 sqe->buf_index || sqe->splice_fd_in)) 130 return -EINVAL; 131 132 shutdown->how = READ_ONCE(sqe->len); 133 req->flags |= REQ_F_FORCE_ASYNC; 134 return 0; 135 } 136 137 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 138 { 139 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 140 struct socket *sock; 141 int ret; 142 143 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 144 145 sock = sock_from_file(req->file); 146 if (unlikely(!sock)) 147 return -ENOTSOCK; 148 149 ret = __sys_shutdown_sock(sock, shutdown->how); 150 io_req_set_res(req, ret, 0); 151 return IOU_COMPLETE; 152 } 153 154 static bool io_net_retry(struct socket *sock, int flags) 155 { 156 if (!(flags & MSG_WAITALL)) 157 return false; 158 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 159 } 160 161 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 162 { 163 if (kmsg->vec.iovec) 164 io_vec_free(&kmsg->vec); 165 } 166 167 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 168 { 169 struct io_async_msghdr *hdr = req->async_data; 170 171 /* can't recycle, ensure we free the iovec if we have one */ 172 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 173 io_netmsg_iovec_free(hdr); 174 return; 175 } 176 177 /* Let normal cleanup path reap it if we fail adding to the cache */ 178 io_alloc_cache_vec_kasan(&hdr->vec); 179 if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP) 180 io_vec_free(&hdr->vec); 181 182 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) 183 io_req_async_data_clear(req, REQ_F_NEED_CLEANUP); 184 } 185 186 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 187 { 188 struct io_ring_ctx *ctx = req->ctx; 189 struct io_async_msghdr *hdr; 190 191 hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); 192 if (!hdr) 193 return NULL; 194 195 /* If the async data was cached, we might have an iov cached inside. */ 196 if (hdr->vec.iovec) 197 req->flags |= REQ_F_NEED_CLEANUP; 198 return hdr; 199 } 200 201 static inline void io_mshot_prep_retry(struct io_kiocb *req, 202 struct io_async_msghdr *kmsg) 203 { 204 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 205 206 req->flags &= ~REQ_F_BL_EMPTY; 207 sr->done_io = 0; 208 sr->flags &= ~IORING_RECV_RETRY_CLEAR; 209 sr->len = sr->mshot_len; 210 } 211 212 static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg, 213 const struct iovec __user *uiov, unsigned uvec_seg, 214 int ddir) 215 { 216 struct iovec *iov; 217 int ret, nr_segs; 218 219 if (iomsg->vec.iovec) { 220 nr_segs = iomsg->vec.nr; 221 iov = iomsg->vec.iovec; 222 } else { 223 nr_segs = 1; 224 iov = &iomsg->fast_iov; 225 } 226 227 ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov, 228 &iomsg->msg.msg_iter, io_is_compat(req->ctx)); 229 if (unlikely(ret < 0)) 230 return ret; 231 232 if (iov) { 233 req->flags |= REQ_F_NEED_CLEANUP; 234 io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs); 235 } 236 return 0; 237 } 238 239 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 240 struct io_async_msghdr *iomsg, 241 struct compat_msghdr *msg, int ddir, 242 struct sockaddr __user **save_addr) 243 { 244 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 245 struct compat_iovec __user *uiov; 246 int ret; 247 248 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 249 return -EFAULT; 250 251 ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr); 252 if (ret) 253 return ret; 254 255 uiov = compat_ptr(msg->msg_iov); 256 if (req->flags & REQ_F_BUFFER_SELECT) { 257 if (msg->msg_iovlen == 0) { 258 sr->len = 0; 259 } else if (msg->msg_iovlen > 1) { 260 return -EINVAL; 261 } else { 262 struct compat_iovec tmp_iov; 263 264 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 265 return -EFAULT; 266 sr->len = tmp_iov.iov_len; 267 } 268 } 269 return 0; 270 } 271 272 static int io_copy_msghdr_from_user(struct user_msghdr *msg, 273 struct user_msghdr __user *umsg) 274 { 275 if (!user_access_begin(umsg, sizeof(*umsg))) 276 return -EFAULT; 277 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end); 278 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end); 279 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end); 280 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end); 281 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end); 282 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end); 283 user_access_end(); 284 return 0; 285 ua_end: 286 user_access_end(); 287 return -EFAULT; 288 } 289 290 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 291 struct user_msghdr *msg, int ddir, 292 struct sockaddr __user **save_addr) 293 { 294 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 295 struct user_msghdr __user *umsg = sr->umsg; 296 int ret; 297 298 iomsg->msg.msg_name = &iomsg->addr; 299 iomsg->msg.msg_iter.nr_segs = 0; 300 301 if (io_is_compat(req->ctx)) { 302 struct compat_msghdr cmsg; 303 304 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr); 305 if (ret) 306 return ret; 307 308 memset(msg, 0, sizeof(*msg)); 309 msg->msg_namelen = cmsg.msg_namelen; 310 msg->msg_controllen = cmsg.msg_controllen; 311 msg->msg_iov = compat_ptr(cmsg.msg_iov); 312 msg->msg_iovlen = cmsg.msg_iovlen; 313 return 0; 314 } 315 316 ret = io_copy_msghdr_from_user(msg, umsg); 317 if (unlikely(ret)) 318 return ret; 319 320 msg->msg_flags = 0; 321 322 ret = __copy_msghdr(&iomsg->msg, msg, save_addr); 323 if (ret) 324 return ret; 325 326 if (req->flags & REQ_F_BUFFER_SELECT) { 327 if (msg->msg_iovlen == 0) { 328 sr->len = 0; 329 } else if (msg->msg_iovlen > 1) { 330 return -EINVAL; 331 } else { 332 struct iovec __user *uiov = msg->msg_iov; 333 struct iovec tmp_iov; 334 335 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 336 return -EFAULT; 337 sr->len = tmp_iov.iov_len; 338 } 339 } 340 return 0; 341 } 342 343 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 344 { 345 struct io_async_msghdr *io = req->async_data; 346 347 io_netmsg_iovec_free(io); 348 } 349 350 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 351 { 352 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 353 struct io_async_msghdr *kmsg = req->async_data; 354 void __user *addr; 355 u16 addr_len; 356 int ret; 357 358 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 359 360 if (READ_ONCE(sqe->__pad3[0])) 361 return -EINVAL; 362 363 kmsg->msg.msg_name = NULL; 364 kmsg->msg.msg_namelen = 0; 365 kmsg->msg.msg_control = NULL; 366 kmsg->msg.msg_controllen = 0; 367 kmsg->msg.msg_ubuf = NULL; 368 369 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 370 addr_len = READ_ONCE(sqe->addr_len); 371 if (addr) { 372 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr); 373 if (unlikely(ret < 0)) 374 return ret; 375 kmsg->msg.msg_name = &kmsg->addr; 376 kmsg->msg.msg_namelen = addr_len; 377 } 378 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 379 if (!(sr->flags & IORING_SEND_VECTORIZED)) { 380 req->flags |= REQ_F_IMPORT_BUFFER; 381 return 0; 382 } 383 384 kmsg->msg.msg_iter.nr_segs = sr->len; 385 return io_prep_reg_iovec(req, &kmsg->vec, sr->buf, sr->len); 386 } 387 if (req->flags & REQ_F_BUFFER_SELECT) 388 return 0; 389 390 if (sr->flags & IORING_SEND_VECTORIZED) 391 return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); 392 393 return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 394 } 395 396 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 397 { 398 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 399 struct io_async_msghdr *kmsg = req->async_data; 400 struct user_msghdr msg; 401 int ret; 402 403 sr->flags |= IORING_SEND_VECTORIZED; 404 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 405 ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); 406 if (unlikely(ret)) 407 return ret; 408 /* save msg_control as sys_sendmsg() overwrites it */ 409 sr->msg_control = kmsg->msg.msg_control_user; 410 411 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 412 kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; 413 return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, 414 msg.msg_iovlen); 415 } 416 if (req->flags & REQ_F_BUFFER_SELECT) 417 return 0; 418 return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); 419 } 420 421 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | \ 422 IORING_SEND_VECTORIZED | IORING_RECVSEND_FIXED_BUF) 423 424 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 425 { 426 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 427 428 sr->done_io = 0; 429 sr->len = READ_ONCE(sqe->len); 430 if (unlikely(sr->len < 0)) 431 return -EINVAL; 432 sr->flags = READ_ONCE(sqe->ioprio); 433 if (sr->flags & ~SENDMSG_FLAGS) 434 return -EINVAL; 435 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 436 /* registered buffer send only supported for plain IORING_OP_SEND */ 437 if (req->opcode != IORING_OP_SEND || 438 (req->flags & REQ_F_BUFFER_SELECT) || 439 (sr->flags & (IORING_RECVSEND_BUNDLE|IORING_SEND_VECTORIZED))) 440 return -EINVAL; 441 req->buf_index = READ_ONCE(sqe->buf_index); 442 } 443 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 444 if (sr->msg_flags & MSG_DONTWAIT) 445 req->flags |= REQ_F_NOWAIT; 446 if (req->flags & REQ_F_BUFFER_SELECT) 447 sr->buf_group = req->buf_index; 448 if (sr->flags & IORING_RECVSEND_BUNDLE) { 449 if (req->opcode == IORING_OP_SENDMSG) 450 return -EINVAL; 451 sr->msg_flags |= MSG_WAITALL; 452 req->flags |= REQ_F_MULTISHOT; 453 } 454 455 if (io_is_compat(req->ctx)) 456 sr->msg_flags |= MSG_CMSG_COMPAT; 457 458 if (unlikely(!io_msg_alloc_async(req))) 459 return -ENOMEM; 460 if (req->opcode != IORING_OP_SENDMSG) 461 return io_send_setup(req, sqe); 462 if (unlikely(sqe->addr2 || sqe->file_index)) 463 return -EINVAL; 464 return io_sendmsg_setup(req, sqe); 465 } 466 467 static void io_req_msg_cleanup(struct io_kiocb *req, 468 unsigned int issue_flags) 469 { 470 io_netmsg_recycle(req, issue_flags); 471 } 472 473 /* 474 * For bundle completions, we need to figure out how many segments we consumed. 475 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 476 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 477 * the segments, then it's a trivial questiont o answer. If we have residual 478 * data in the iter, then loop the segments to figure out how much we 479 * transferred. 480 */ 481 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 482 { 483 struct iovec *iov; 484 int nbufs; 485 486 /* no data is always zero segments, and a ubuf is always 1 segment */ 487 if (ret <= 0) 488 return 0; 489 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 490 return 1; 491 492 iov = kmsg->vec.iovec; 493 if (!iov) 494 iov = &kmsg->fast_iov; 495 496 /* if all data was transferred, it's basic pointer math */ 497 if (!iov_iter_count(&kmsg->msg.msg_iter)) 498 return iter_iov(&kmsg->msg.msg_iter) - iov; 499 500 /* short transfer, count segments */ 501 nbufs = 0; 502 do { 503 int this_len = min_t(int, iov[nbufs].iov_len, ret); 504 505 nbufs++; 506 ret -= this_len; 507 } while (ret); 508 509 return nbufs; 510 } 511 512 static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl, 513 struct io_async_msghdr *kmsg, int len) 514 { 515 req->flags |= REQ_F_BL_NO_RECYCLE; 516 if (req->flags & REQ_F_BUFFERS_COMMIT) 517 io_kbuf_commit(req, bl, len, io_bundle_nbufs(kmsg, len)); 518 return IOU_RETRY; 519 } 520 521 static inline bool io_send_finish(struct io_kiocb *req, 522 struct io_async_msghdr *kmsg, 523 struct io_br_sel *sel) 524 { 525 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 526 bool bundle_finished = sel->val <= 0; 527 unsigned int cflags; 528 529 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 530 cflags = io_put_kbuf(req, sel->val, sel->buf_list); 531 goto finish; 532 } 533 534 cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val)); 535 536 /* 537 * Don't start new bundles if the buffer list is empty, or if the 538 * current operation needed to go through polling to complete. 539 */ 540 if (bundle_finished || req->flags & (REQ_F_BL_EMPTY | REQ_F_POLLED)) 541 goto finish; 542 543 /* 544 * Fill CQE for this receive and see if we should keep trying to 545 * receive from this socket. 546 */ 547 if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { 548 io_mshot_prep_retry(req, kmsg); 549 return false; 550 } 551 552 /* Otherwise stop bundle and use the current result. */ 553 finish: 554 io_req_set_res(req, sel->val, cflags); 555 sel->val = IOU_COMPLETE; 556 return true; 557 } 558 559 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 560 { 561 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 562 struct io_async_msghdr *kmsg = req->async_data; 563 struct socket *sock; 564 unsigned flags; 565 int min_ret = 0; 566 int ret; 567 568 sock = sock_from_file(req->file); 569 if (unlikely(!sock)) 570 return -ENOTSOCK; 571 572 if (!(req->flags & REQ_F_POLLED) && 573 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 574 return -EAGAIN; 575 576 flags = sr->msg_flags; 577 if (issue_flags & IO_URING_F_NONBLOCK) 578 flags |= MSG_DONTWAIT; 579 if (flags & MSG_WAITALL) 580 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 581 582 kmsg->msg.msg_control_user = sr->msg_control; 583 584 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 585 586 if (ret < min_ret) { 587 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 588 return -EAGAIN; 589 if (ret > 0 && io_net_retry(sock, flags)) { 590 kmsg->msg.msg_controllen = 0; 591 kmsg->msg.msg_control = NULL; 592 sr->done_io += ret; 593 return -EAGAIN; 594 } 595 if (ret == -ERESTARTSYS) 596 ret = -EINTR; 597 req_set_fail(req); 598 } 599 io_req_msg_cleanup(req, issue_flags); 600 if (ret >= 0) 601 ret += sr->done_io; 602 else if (sr->done_io) 603 ret = sr->done_io; 604 io_req_set_res(req, ret, 0); 605 return IOU_COMPLETE; 606 } 607 608 static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, 609 struct io_br_sel *sel, struct io_async_msghdr *kmsg) 610 { 611 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 612 struct buf_sel_arg arg = { 613 .iovs = &kmsg->fast_iov, 614 .max_len = min_not_zero(sr->len, INT_MAX), 615 .nr_iovs = 1, 616 .buf_group = sr->buf_group, 617 }; 618 int ret; 619 620 if (kmsg->vec.iovec) { 621 arg.nr_iovs = kmsg->vec.nr; 622 arg.iovs = kmsg->vec.iovec; 623 arg.mode = KBUF_MODE_FREE; 624 } 625 626 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 627 arg.nr_iovs = 1; 628 else 629 arg.mode |= KBUF_MODE_EXPAND; 630 631 ret = io_buffers_select(req, &arg, sel, issue_flags); 632 if (unlikely(ret < 0)) 633 return ret; 634 635 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 636 kmsg->vec.nr = ret; 637 kmsg->vec.iovec = arg.iovs; 638 req->flags |= REQ_F_NEED_CLEANUP; 639 } 640 sr->len = arg.out_len; 641 642 if (ret == 1) { 643 sr->buf = arg.iovs[0].iov_base; 644 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 645 &kmsg->msg.msg_iter); 646 if (unlikely(ret)) 647 return ret; 648 } else { 649 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 650 arg.iovs, ret, arg.out_len); 651 } 652 653 return 0; 654 } 655 656 int io_send(struct io_kiocb *req, unsigned int issue_flags) 657 { 658 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 659 struct io_async_msghdr *kmsg = req->async_data; 660 struct io_br_sel sel = { }; 661 struct socket *sock; 662 unsigned flags; 663 int min_ret = 0; 664 int ret; 665 666 sock = sock_from_file(req->file); 667 if (unlikely(!sock)) 668 return -ENOTSOCK; 669 670 if (!(req->flags & REQ_F_POLLED) && 671 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 672 return -EAGAIN; 673 674 if (req->flags & REQ_F_IMPORT_BUFFER) { 675 ret = io_import_reg_buf(req, &kmsg->msg.msg_iter, 676 (u64)(uintptr_t)sr->buf, sr->len, 677 ITER_SOURCE, issue_flags); 678 if (unlikely(ret)) 679 return ret; 680 req->flags &= ~REQ_F_IMPORT_BUFFER; 681 } 682 683 flags = sr->msg_flags; 684 if (issue_flags & IO_URING_F_NONBLOCK) 685 flags |= MSG_DONTWAIT; 686 687 retry_bundle: 688 sel.buf_list = NULL; 689 if (io_do_buffer_select(req)) { 690 ret = io_send_select_buffer(req, issue_flags, &sel, kmsg); 691 if (ret) 692 return ret; 693 } 694 695 /* 696 * If MSG_WAITALL is set, or this is a bundle send, then we need 697 * the full amount. If just bundle is set, if we do a short send 698 * then we complete the bundle sequence rather than continue on. 699 */ 700 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 701 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 702 703 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 704 kmsg->msg.msg_flags = flags; 705 ret = sock_sendmsg(sock, &kmsg->msg); 706 if (ret < min_ret) { 707 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 708 return -EAGAIN; 709 710 if (ret > 0 && io_net_retry(sock, flags)) { 711 sr->len -= ret; 712 sr->buf += ret; 713 sr->done_io += ret; 714 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 715 } 716 if (ret == -ERESTARTSYS) 717 ret = -EINTR; 718 req_set_fail(req); 719 } 720 if (ret >= 0) 721 ret += sr->done_io; 722 else if (sr->done_io) 723 ret = sr->done_io; 724 725 sel.val = ret; 726 if (!io_send_finish(req, kmsg, &sel)) 727 goto retry_bundle; 728 729 io_req_msg_cleanup(req, issue_flags); 730 return sel.val; 731 } 732 733 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 734 struct io_async_msghdr *iomsg, 735 int namelen, size_t controllen) 736 { 737 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 738 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 739 int hdr; 740 741 if (unlikely(namelen < 0)) 742 return -EOVERFLOW; 743 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 744 namelen, &hdr)) 745 return -EOVERFLOW; 746 if (check_add_overflow(hdr, controllen, &hdr)) 747 return -EOVERFLOW; 748 749 iomsg->namelen = namelen; 750 iomsg->controllen = controllen; 751 return 0; 752 } 753 754 return 0; 755 } 756 757 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 758 struct io_async_msghdr *iomsg) 759 { 760 struct user_msghdr msg; 761 int ret; 762 763 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr); 764 if (unlikely(ret)) 765 return ret; 766 767 if (!(req->flags & REQ_F_BUFFER_SELECT)) { 768 ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen, 769 ITER_DEST); 770 if (unlikely(ret)) 771 return ret; 772 } 773 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 774 msg.msg_controllen); 775 } 776 777 static int io_recvmsg_prep_setup(struct io_kiocb *req) 778 { 779 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 780 struct io_async_msghdr *kmsg; 781 782 kmsg = io_msg_alloc_async(req); 783 if (unlikely(!kmsg)) 784 return -ENOMEM; 785 786 if (req->opcode == IORING_OP_RECV) { 787 kmsg->msg.msg_name = NULL; 788 kmsg->msg.msg_namelen = 0; 789 kmsg->msg.msg_inq = 0; 790 kmsg->msg.msg_control = NULL; 791 kmsg->msg.msg_get_inq = 1; 792 kmsg->msg.msg_controllen = 0; 793 kmsg->msg.msg_ubuf = NULL; 794 795 if (req->flags & REQ_F_BUFFER_SELECT) 796 return 0; 797 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 798 req->flags |= REQ_F_IMPORT_BUFFER; 799 return 0; 800 } 801 return import_ubuf(ITER_DEST, sr->buf, sr->len, 802 &kmsg->msg.msg_iter); 803 } 804 805 return io_recvmsg_copy_hdr(req, kmsg); 806 } 807 808 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 809 IORING_RECVSEND_BUNDLE | IORING_RECVSEND_FIXED_BUF) 810 811 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 812 { 813 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 814 815 sr->done_io = 0; 816 817 if (unlikely(sqe->addr2)) 818 return -EINVAL; 819 820 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 821 sr->len = READ_ONCE(sqe->len); 822 if (unlikely(sr->len < 0)) 823 return -EINVAL; 824 sr->flags = READ_ONCE(sqe->ioprio); 825 if (sr->flags & ~RECVMSG_FLAGS) 826 return -EINVAL; 827 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 828 /* registered buffer recv only for plain IORING_OP_RECV */ 829 if (req->opcode != IORING_OP_RECV || 830 (req->flags & REQ_F_BUFFER_SELECT) || 831 (sr->flags & (IORING_RECV_MULTISHOT | IORING_RECVSEND_BUNDLE))) 832 return -EINVAL; 833 req->buf_index = READ_ONCE(sqe->buf_index); 834 } 835 sr->msg_flags = READ_ONCE(sqe->msg_flags); 836 if (sr->msg_flags & MSG_DONTWAIT) 837 req->flags |= REQ_F_NOWAIT; 838 if (sr->msg_flags & MSG_ERRQUEUE) 839 req->flags |= REQ_F_CLEAR_POLLIN; 840 if (req->flags & REQ_F_BUFFER_SELECT) 841 sr->buf_group = req->buf_index; 842 sr->mshot_total_len = sr->mshot_len = 0; 843 if (sr->flags & IORING_RECV_MULTISHOT) { 844 if (!(req->flags & REQ_F_BUFFER_SELECT)) 845 return -EINVAL; 846 if (sr->msg_flags & MSG_WAITALL) 847 return -EINVAL; 848 if (req->opcode == IORING_OP_RECV) { 849 sr->mshot_len = sr->len; 850 sr->mshot_total_len = READ_ONCE(sqe->optlen); 851 if (sr->mshot_total_len) 852 sr->flags |= IORING_RECV_MSHOT_LIM; 853 } else if (sqe->optlen) { 854 return -EINVAL; 855 } 856 req->flags |= REQ_F_APOLL_MULTISHOT; 857 } else if (sqe->optlen) { 858 return -EINVAL; 859 } 860 861 if (sr->flags & IORING_RECVSEND_BUNDLE) { 862 if (req->opcode == IORING_OP_RECVMSG) 863 return -EINVAL; 864 } 865 866 if (io_is_compat(req->ctx)) 867 sr->msg_flags |= MSG_CMSG_COMPAT; 868 869 sr->nr_multishot_loops = 0; 870 return io_recvmsg_prep_setup(req); 871 } 872 873 /* bits to clear in old and inherit in new cflags on bundle retry */ 874 #define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE|\ 875 IORING_CQE_F_BUF_MORE) 876 877 /* 878 * Finishes io_recv and io_recvmsg. 879 * 880 * Returns true if it is actually finished, or false if it should run 881 * again (for multishot). 882 */ 883 static inline bool io_recv_finish(struct io_kiocb *req, 884 struct io_async_msghdr *kmsg, 885 struct io_br_sel *sel, bool mshot_finished, 886 unsigned issue_flags) 887 { 888 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 889 unsigned int cflags = 0; 890 891 if (kmsg->msg.msg_inq > 0) 892 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 893 894 if (sel->val > 0 && sr->flags & IORING_RECV_MSHOT_LIM) { 895 /* 896 * If sr->len hits zero, the limit has been reached. Mark 897 * mshot as finished, and flag MSHOT_DONE as well to prevent 898 * a potential bundle from being retried. 899 */ 900 sr->mshot_total_len -= min_t(int, sel->val, sr->mshot_total_len); 901 if (!sr->mshot_total_len) { 902 sr->flags |= IORING_RECV_MSHOT_DONE; 903 mshot_finished = true; 904 } 905 } 906 907 if (sr->flags & IORING_RECVSEND_BUNDLE) { 908 size_t this_ret = sel->val - sr->done_io; 909 910 cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret)); 911 if (sr->flags & IORING_RECV_RETRY) 912 cflags = req->cqe.flags | (cflags & CQE_F_MASK); 913 if (sr->mshot_len && sel->val >= sr->mshot_len) 914 sr->flags |= IORING_RECV_MSHOT_CAP; 915 /* bundle with no more immediate buffers, we're done */ 916 if (req->flags & REQ_F_BL_EMPTY) 917 goto finish; 918 /* 919 * If more is available AND it was a full transfer, retry and 920 * append to this one 921 */ 922 if (!(sr->flags & IORING_RECV_NO_RETRY) && 923 kmsg->msg.msg_inq > 1 && this_ret > 0 && 924 !iov_iter_count(&kmsg->msg.msg_iter)) { 925 req->cqe.flags = cflags & ~CQE_F_MASK; 926 sr->len = kmsg->msg.msg_inq; 927 sr->done_io += this_ret; 928 sr->flags |= IORING_RECV_RETRY; 929 return false; 930 } 931 } else { 932 cflags |= io_put_kbuf(req, sel->val, sel->buf_list); 933 } 934 935 /* 936 * Fill CQE for this receive and see if we should keep trying to 937 * receive from this socket. 938 */ 939 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 940 io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { 941 sel->val = IOU_RETRY; 942 io_mshot_prep_retry(req, kmsg); 943 /* Known not-empty or unknown state, retry */ 944 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 945 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY && 946 !(sr->flags & IORING_RECV_MSHOT_CAP)) { 947 return false; 948 } 949 /* mshot retries exceeded, force a requeue */ 950 sr->nr_multishot_loops = 0; 951 sr->flags &= ~IORING_RECV_MSHOT_CAP; 952 if (issue_flags & IO_URING_F_MULTISHOT) 953 sel->val = IOU_REQUEUE; 954 } 955 return true; 956 } 957 958 /* Finish the request / stop multishot. */ 959 finish: 960 io_req_set_res(req, sel->val, cflags); 961 sel->val = IOU_COMPLETE; 962 io_req_msg_cleanup(req, issue_flags); 963 return true; 964 } 965 966 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 967 struct io_sr_msg *sr, void __user **buf, 968 size_t *len) 969 { 970 unsigned long ubuf = (unsigned long) *buf; 971 unsigned long hdr; 972 973 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 974 kmsg->controllen; 975 if (*len < hdr) 976 return -EFAULT; 977 978 if (kmsg->controllen) { 979 unsigned long control = ubuf + hdr - kmsg->controllen; 980 981 kmsg->msg.msg_control_user = (void __user *) control; 982 kmsg->msg.msg_controllen = kmsg->controllen; 983 } 984 985 sr->buf = *buf; /* stash for later copy */ 986 *buf = (void __user *) (ubuf + hdr); 987 kmsg->payloadlen = *len = *len - hdr; 988 return 0; 989 } 990 991 struct io_recvmsg_multishot_hdr { 992 struct io_uring_recvmsg_out msg; 993 struct sockaddr_storage addr; 994 }; 995 996 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 997 struct io_async_msghdr *kmsg, 998 unsigned int flags, bool *finished) 999 { 1000 int err; 1001 int copy_len; 1002 struct io_recvmsg_multishot_hdr hdr; 1003 1004 if (kmsg->namelen) 1005 kmsg->msg.msg_name = &hdr.addr; 1006 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 1007 kmsg->msg.msg_namelen = 0; 1008 1009 if (sock->file->f_flags & O_NONBLOCK) 1010 flags |= MSG_DONTWAIT; 1011 1012 err = sock_recvmsg(sock, &kmsg->msg, flags); 1013 *finished = err <= 0; 1014 if (err < 0) 1015 return err; 1016 1017 hdr.msg = (struct io_uring_recvmsg_out) { 1018 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 1019 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 1020 }; 1021 1022 hdr.msg.payloadlen = err; 1023 if (err > kmsg->payloadlen) 1024 err = kmsg->payloadlen; 1025 1026 copy_len = sizeof(struct io_uring_recvmsg_out); 1027 if (kmsg->msg.msg_namelen > kmsg->namelen) 1028 copy_len += kmsg->namelen; 1029 else 1030 copy_len += kmsg->msg.msg_namelen; 1031 1032 /* 1033 * "fromlen shall refer to the value before truncation.." 1034 * 1003.1g 1035 */ 1036 hdr.msg.namelen = kmsg->msg.msg_namelen; 1037 1038 /* ensure that there is no gap between hdr and sockaddr_storage */ 1039 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 1040 sizeof(struct io_uring_recvmsg_out)); 1041 if (copy_to_user(io->buf, &hdr, copy_len)) { 1042 *finished = true; 1043 return -EFAULT; 1044 } 1045 1046 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 1047 kmsg->controllen + err; 1048 } 1049 1050 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 1051 { 1052 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1053 struct io_async_msghdr *kmsg = req->async_data; 1054 struct io_br_sel sel = { }; 1055 struct socket *sock; 1056 unsigned flags; 1057 int ret, min_ret = 0; 1058 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1059 bool mshot_finished = true; 1060 1061 sock = sock_from_file(req->file); 1062 if (unlikely(!sock)) 1063 return -ENOTSOCK; 1064 1065 if (!(req->flags & REQ_F_POLLED) && 1066 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1067 return -EAGAIN; 1068 1069 flags = sr->msg_flags; 1070 if (force_nonblock) 1071 flags |= MSG_DONTWAIT; 1072 1073 retry_multishot: 1074 sel.buf_list = NULL; 1075 if (io_do_buffer_select(req)) { 1076 size_t len = sr->len; 1077 1078 sel = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1079 if (!sel.addr) 1080 return -ENOBUFS; 1081 1082 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1083 ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len); 1084 if (ret) { 1085 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1086 return ret; 1087 } 1088 } 1089 1090 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, sel.addr, len); 1091 } 1092 1093 kmsg->msg.msg_get_inq = 1; 1094 kmsg->msg.msg_inq = -1; 1095 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1096 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1097 &mshot_finished); 1098 } else { 1099 /* disable partial retry for recvmsg with cmsg attached */ 1100 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1101 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1102 1103 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1104 kmsg->uaddr, flags); 1105 } 1106 1107 if (ret < min_ret) { 1108 if (ret == -EAGAIN && force_nonblock) { 1109 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1110 return IOU_RETRY; 1111 } 1112 if (ret > 0 && io_net_retry(sock, flags)) { 1113 sr->done_io += ret; 1114 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 1115 } 1116 if (ret == -ERESTARTSYS) 1117 ret = -EINTR; 1118 req_set_fail(req); 1119 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1120 req_set_fail(req); 1121 } 1122 1123 if (ret > 0) 1124 ret += sr->done_io; 1125 else if (sr->done_io) 1126 ret = sr->done_io; 1127 else 1128 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1129 1130 sel.val = ret; 1131 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) 1132 goto retry_multishot; 1133 1134 return sel.val; 1135 } 1136 1137 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1138 struct io_br_sel *sel, unsigned int issue_flags) 1139 { 1140 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1141 int ret; 1142 1143 /* 1144 * If the ring isn't locked, then don't use the peek interface 1145 * to grab multiple buffers as we will lock/unlock between 1146 * this selection and posting the buffers. 1147 */ 1148 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1149 sr->flags & IORING_RECVSEND_BUNDLE) { 1150 struct buf_sel_arg arg = { 1151 .iovs = &kmsg->fast_iov, 1152 .nr_iovs = 1, 1153 .mode = KBUF_MODE_EXPAND, 1154 .buf_group = sr->buf_group, 1155 }; 1156 1157 if (kmsg->vec.iovec) { 1158 arg.nr_iovs = kmsg->vec.nr; 1159 arg.iovs = kmsg->vec.iovec; 1160 arg.mode |= KBUF_MODE_FREE; 1161 } 1162 1163 if (sel->val) 1164 arg.max_len = sel->val; 1165 else if (kmsg->msg.msg_inq > 1) 1166 arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq); 1167 1168 /* if mshot limited, ensure we don't go over */ 1169 if (sr->flags & IORING_RECV_MSHOT_LIM) 1170 arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len); 1171 ret = io_buffers_peek(req, &arg, sel); 1172 if (unlikely(ret < 0)) 1173 return ret; 1174 1175 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 1176 kmsg->vec.nr = ret; 1177 kmsg->vec.iovec = arg.iovs; 1178 req->flags |= REQ_F_NEED_CLEANUP; 1179 } 1180 if (arg.partial_map) 1181 sr->flags |= IORING_RECV_PARTIAL_MAP; 1182 1183 /* special case 1 vec, can be a fast path */ 1184 if (ret == 1) { 1185 sr->buf = arg.iovs[0].iov_base; 1186 sr->len = arg.iovs[0].iov_len; 1187 goto map_ubuf; 1188 } 1189 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1190 arg.out_len); 1191 } else { 1192 size_t len = sel->val; 1193 1194 *sel = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1195 if (!sel->addr) 1196 return -ENOBUFS; 1197 sr->buf = sel->addr; 1198 sr->len = len; 1199 map_ubuf: 1200 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1201 &kmsg->msg.msg_iter); 1202 if (unlikely(ret)) 1203 return ret; 1204 } 1205 1206 return 0; 1207 } 1208 1209 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1210 { 1211 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1212 struct io_async_msghdr *kmsg = req->async_data; 1213 struct io_br_sel sel; 1214 struct socket *sock; 1215 unsigned flags; 1216 int ret, min_ret = 0; 1217 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1218 bool mshot_finished; 1219 1220 sock = sock_from_file(req->file); 1221 if (unlikely(!sock)) 1222 return -ENOTSOCK; 1223 1224 if (!(req->flags & REQ_F_POLLED) && 1225 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1226 return -EAGAIN; 1227 1228 flags = sr->msg_flags; 1229 if (force_nonblock) 1230 flags |= MSG_DONTWAIT; 1231 1232 if (req->flags & REQ_F_IMPORT_BUFFER) { 1233 ret = io_import_reg_buf(req, &kmsg->msg.msg_iter, 1234 (u64)(uintptr_t)sr->buf, sr->len, 1235 ITER_DEST, issue_flags); 1236 if (unlikely(ret)) { 1237 kmsg->msg.msg_inq = -1; 1238 sel.buf_list = NULL; 1239 goto out_free; 1240 } 1241 req->flags &= ~REQ_F_IMPORT_BUFFER; 1242 } 1243 1244 retry_multishot: 1245 sel.buf_list = NULL; 1246 if (io_do_buffer_select(req)) { 1247 sel.val = sr->len; 1248 ret = io_recv_buf_select(req, kmsg, &sel, issue_flags); 1249 if (unlikely(ret < 0)) { 1250 kmsg->msg.msg_inq = -1; 1251 goto out_free; 1252 } 1253 sr->buf = NULL; 1254 } 1255 1256 kmsg->msg.msg_flags = 0; 1257 kmsg->msg.msg_inq = -1; 1258 1259 if (flags & MSG_WAITALL) 1260 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1261 1262 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1263 if (ret < min_ret) { 1264 if (ret == -EAGAIN && force_nonblock) { 1265 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1266 return IOU_RETRY; 1267 } 1268 if (ret > 0 && io_net_retry(sock, flags)) { 1269 sr->len -= ret; 1270 sr->buf += ret; 1271 sr->done_io += ret; 1272 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 1273 } 1274 if (ret == -ERESTARTSYS) 1275 ret = -EINTR; 1276 req_set_fail(req); 1277 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1278 out_free: 1279 req_set_fail(req); 1280 } 1281 1282 mshot_finished = ret <= 0; 1283 if (ret > 0) 1284 ret += sr->done_io; 1285 else if (sr->done_io) 1286 ret = sr->done_io; 1287 else 1288 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1289 1290 sel.val = ret; 1291 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) 1292 goto retry_multishot; 1293 1294 return sel.val; 1295 } 1296 1297 int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1298 { 1299 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1300 unsigned ifq_idx; 1301 1302 if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3)) 1303 return -EINVAL; 1304 1305 ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx); 1306 zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx); 1307 if (!zc->ifq) 1308 return -EINVAL; 1309 1310 zc->len = READ_ONCE(sqe->len); 1311 zc->flags = READ_ONCE(sqe->ioprio); 1312 if (READ_ONCE(sqe->msg_flags)) 1313 return -EINVAL; 1314 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)) 1315 return -EINVAL; 1316 /* multishot required */ 1317 if (!(zc->flags & IORING_RECV_MULTISHOT)) 1318 return -EINVAL; 1319 /* All data completions are posted as aux CQEs. */ 1320 req->flags |= REQ_F_APOLL_MULTISHOT; 1321 1322 return 0; 1323 } 1324 1325 int io_recvzc(struct io_kiocb *req, unsigned int issue_flags) 1326 { 1327 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1328 struct socket *sock; 1329 unsigned int len; 1330 int ret; 1331 1332 sock = sock_from_file(req->file); 1333 if (unlikely(!sock)) 1334 return -ENOTSOCK; 1335 1336 if (!(req->flags & REQ_F_POLLED) && 1337 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1338 return -EAGAIN; 1339 1340 len = zc->len; 1341 ret = io_zcrx_recv(req, zc->ifq, sock, 0, issue_flags, &zc->len); 1342 if (len && zc->len == 0) { 1343 io_req_set_res(req, 0, 0); 1344 1345 return IOU_COMPLETE; 1346 } 1347 if (unlikely(ret <= 0) && ret != -EAGAIN) { 1348 if (ret == -ERESTARTSYS) 1349 ret = -EINTR; 1350 if (ret == IOU_REQUEUE) 1351 return IOU_REQUEUE; 1352 1353 req_set_fail(req); 1354 io_req_set_res(req, ret, 0); 1355 return IOU_COMPLETE; 1356 } 1357 return IOU_RETRY; 1358 } 1359 1360 void io_send_zc_cleanup(struct io_kiocb *req) 1361 { 1362 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1363 struct io_async_msghdr *io = req->async_data; 1364 1365 if (req_has_async_data(req)) 1366 io_netmsg_iovec_free(io); 1367 if (zc->notif) { 1368 io_notif_flush(zc->notif); 1369 zc->notif = NULL; 1370 } 1371 } 1372 1373 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1374 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \ 1375 IORING_SEND_VECTORIZED) 1376 1377 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1378 { 1379 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1380 struct io_ring_ctx *ctx = req->ctx; 1381 struct io_async_msghdr *iomsg; 1382 struct io_kiocb *notif; 1383 u64 user_data; 1384 int ret; 1385 1386 zc->done_io = 0; 1387 1388 if (unlikely(READ_ONCE(sqe->__pad2[0]))) 1389 return -EINVAL; 1390 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1391 if (req->flags & REQ_F_CQE_SKIP) 1392 return -EINVAL; 1393 1394 notif = zc->notif = io_alloc_notif(ctx); 1395 if (!notif) 1396 return -ENOMEM; 1397 user_data = READ_ONCE(sqe->addr3); 1398 if (!user_data) 1399 user_data = req->cqe.user_data; 1400 1401 notif->cqe.user_data = user_data; 1402 notif->cqe.res = 0; 1403 notif->cqe.flags = IORING_CQE_F_NOTIF; 1404 req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY; 1405 1406 zc->flags = READ_ONCE(sqe->ioprio); 1407 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1408 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1409 return -EINVAL; 1410 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1411 struct io_notif_data *nd = io_notif_to_data(notif); 1412 1413 nd->zc_report = true; 1414 nd->zc_used = false; 1415 nd->zc_copied = false; 1416 } 1417 } 1418 1419 zc->len = READ_ONCE(sqe->len); 1420 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1421 req->buf_index = READ_ONCE(sqe->buf_index); 1422 if (zc->msg_flags & MSG_DONTWAIT) 1423 req->flags |= REQ_F_NOWAIT; 1424 1425 if (io_is_compat(ctx)) 1426 zc->msg_flags |= MSG_CMSG_COMPAT; 1427 1428 iomsg = io_msg_alloc_async(req); 1429 if (unlikely(!iomsg)) 1430 return -ENOMEM; 1431 1432 if (req->opcode == IORING_OP_SEND_ZC) { 1433 ret = io_send_setup(req, sqe); 1434 } else { 1435 if (unlikely(sqe->addr2 || sqe->file_index)) 1436 return -EINVAL; 1437 ret = io_sendmsg_setup(req, sqe); 1438 } 1439 if (unlikely(ret)) 1440 return ret; 1441 1442 if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { 1443 iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1444 return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count); 1445 } 1446 iomsg->msg.sg_from_iter = io_sg_from_iter; 1447 return 0; 1448 } 1449 1450 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1451 struct iov_iter *from, size_t length) 1452 { 1453 skb_zcopy_downgrade_managed(skb); 1454 return zerocopy_fill_skb_from_iter(skb, from, length); 1455 } 1456 1457 static int io_sg_from_iter(struct sk_buff *skb, 1458 struct iov_iter *from, size_t length) 1459 { 1460 struct skb_shared_info *shinfo = skb_shinfo(skb); 1461 int frag = shinfo->nr_frags; 1462 int ret = 0; 1463 struct bvec_iter bi; 1464 ssize_t copied = 0; 1465 unsigned long truesize = 0; 1466 1467 if (!frag) 1468 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1469 else if (unlikely(!skb_zcopy_managed(skb))) 1470 return zerocopy_fill_skb_from_iter(skb, from, length); 1471 1472 bi.bi_size = min(from->count, length); 1473 bi.bi_bvec_done = from->iov_offset; 1474 bi.bi_idx = 0; 1475 1476 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1477 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1478 1479 copied += v.bv_len; 1480 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1481 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1482 v.bv_offset, v.bv_len); 1483 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1484 } 1485 if (bi.bi_size) 1486 ret = -EMSGSIZE; 1487 1488 shinfo->nr_frags = frag; 1489 from->bvec += bi.bi_idx; 1490 from->nr_segs -= bi.bi_idx; 1491 from->count -= copied; 1492 from->iov_offset = bi.bi_bvec_done; 1493 1494 skb->data_len += copied; 1495 skb->len += copied; 1496 skb->truesize += truesize; 1497 return ret; 1498 } 1499 1500 static int io_send_zc_import(struct io_kiocb *req, 1501 struct io_async_msghdr *kmsg, 1502 unsigned int issue_flags) 1503 { 1504 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1505 struct io_kiocb *notif = sr->notif; 1506 int ret; 1507 1508 WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF)); 1509 1510 notif->buf_index = req->buf_index; 1511 1512 if (!(sr->flags & IORING_SEND_VECTORIZED)) { 1513 ret = io_import_reg_buf(notif, &kmsg->msg.msg_iter, 1514 (u64)(uintptr_t)sr->buf, sr->len, 1515 ITER_SOURCE, issue_flags); 1516 } else { 1517 unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; 1518 1519 ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, 1520 notif, &kmsg->vec, uvec_segs, 1521 issue_flags); 1522 } 1523 1524 if (unlikely(ret)) 1525 return ret; 1526 req->flags &= ~REQ_F_IMPORT_BUFFER; 1527 return 0; 1528 } 1529 1530 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1531 { 1532 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1533 struct io_async_msghdr *kmsg = req->async_data; 1534 struct socket *sock; 1535 unsigned msg_flags; 1536 int ret, min_ret = 0; 1537 1538 sock = sock_from_file(req->file); 1539 if (unlikely(!sock)) 1540 return -ENOTSOCK; 1541 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1542 return -EOPNOTSUPP; 1543 if (!(req->flags & REQ_F_POLLED) && 1544 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1545 return -EAGAIN; 1546 1547 if (req->flags & REQ_F_IMPORT_BUFFER) { 1548 ret = io_send_zc_import(req, kmsg, issue_flags); 1549 if (unlikely(ret)) 1550 return ret; 1551 } 1552 1553 msg_flags = sr->msg_flags; 1554 if (issue_flags & IO_URING_F_NONBLOCK) 1555 msg_flags |= MSG_DONTWAIT; 1556 if (msg_flags & MSG_WAITALL) 1557 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1558 1559 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1560 1561 if (req->opcode == IORING_OP_SEND_ZC) { 1562 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1563 kmsg->msg.msg_flags = msg_flags; 1564 ret = sock_sendmsg(sock, &kmsg->msg); 1565 } else { 1566 kmsg->msg.msg_control_user = sr->msg_control; 1567 ret = __sys_sendmsg_sock(sock, &kmsg->msg, msg_flags); 1568 } 1569 1570 if (unlikely(ret < min_ret)) { 1571 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1572 return -EAGAIN; 1573 1574 if (ret > 0 && io_net_retry(sock, sr->msg_flags)) { 1575 sr->done_io += ret; 1576 return -EAGAIN; 1577 } 1578 if (ret == -ERESTARTSYS) 1579 ret = -EINTR; 1580 req_set_fail(req); 1581 } 1582 1583 if (ret >= 0) 1584 ret += sr->done_io; 1585 else if (sr->done_io) 1586 ret = sr->done_io; 1587 1588 /* 1589 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1590 * flushing notif to io_send_zc_cleanup() 1591 */ 1592 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1593 io_notif_flush(sr->notif); 1594 sr->notif = NULL; 1595 io_req_msg_cleanup(req, 0); 1596 } 1597 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1598 return IOU_COMPLETE; 1599 } 1600 1601 void io_sendrecv_fail(struct io_kiocb *req) 1602 { 1603 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1604 1605 if (sr->done_io) 1606 req->cqe.res = sr->done_io; 1607 1608 if ((req->flags & REQ_F_NEED_CLEANUP) && 1609 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1610 req->cqe.flags |= IORING_CQE_F_MORE; 1611 } 1612 1613 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1614 IORING_ACCEPT_POLL_FIRST) 1615 1616 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1617 { 1618 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1619 1620 if (sqe->len || sqe->buf_index) 1621 return -EINVAL; 1622 1623 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1624 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1625 accept->flags = READ_ONCE(sqe->accept_flags); 1626 accept->nofile = rlimit(RLIMIT_NOFILE); 1627 accept->iou_flags = READ_ONCE(sqe->ioprio); 1628 if (accept->iou_flags & ~ACCEPT_FLAGS) 1629 return -EINVAL; 1630 1631 accept->file_slot = READ_ONCE(sqe->file_index); 1632 if (accept->file_slot) { 1633 if (accept->flags & SOCK_CLOEXEC) 1634 return -EINVAL; 1635 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1636 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1637 return -EINVAL; 1638 } 1639 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1640 return -EINVAL; 1641 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1642 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1643 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1644 req->flags |= REQ_F_APOLL_MULTISHOT; 1645 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1646 req->flags |= REQ_F_NOWAIT; 1647 return 0; 1648 } 1649 1650 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1651 { 1652 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1653 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1654 bool fixed = !!accept->file_slot; 1655 struct proto_accept_arg arg = { 1656 .flags = force_nonblock ? O_NONBLOCK : 0, 1657 }; 1658 struct file *file; 1659 unsigned cflags; 1660 int ret, fd; 1661 1662 if (!(req->flags & REQ_F_POLLED) && 1663 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1664 return -EAGAIN; 1665 1666 retry: 1667 if (!fixed) { 1668 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1669 if (unlikely(fd < 0)) 1670 return fd; 1671 } 1672 arg.err = 0; 1673 arg.is_empty = -1; 1674 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1675 accept->flags); 1676 if (IS_ERR(file)) { 1677 if (!fixed) 1678 put_unused_fd(fd); 1679 ret = PTR_ERR(file); 1680 if (ret == -EAGAIN && force_nonblock && 1681 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) 1682 return IOU_RETRY; 1683 1684 if (ret == -ERESTARTSYS) 1685 ret = -EINTR; 1686 } else if (!fixed) { 1687 fd_install(fd, file); 1688 ret = fd; 1689 } else { 1690 ret = io_fixed_fd_install(req, issue_flags, file, 1691 accept->file_slot); 1692 } 1693 1694 cflags = 0; 1695 if (!arg.is_empty) 1696 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1697 1698 if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) && 1699 io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1700 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1701 goto retry; 1702 return IOU_RETRY; 1703 } 1704 1705 io_req_set_res(req, ret, cflags); 1706 if (ret < 0) 1707 req_set_fail(req); 1708 return IOU_COMPLETE; 1709 } 1710 1711 void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req) 1712 { 1713 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1714 1715 bctx->socket.family = sock->domain; 1716 bctx->socket.type = sock->type; 1717 bctx->socket.protocol = sock->protocol; 1718 } 1719 1720 void io_connect_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req) 1721 { 1722 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1723 struct sockaddr_storage *ss = req->async_data; 1724 1725 /* 1726 * move_addr_to_kernel() skips the copy for addr_len == 0, so 1727 * iomsg->addr may hold stale data from a prior CONNECT. Bail 1728 * unless addr_len covers the family discriminator. 1729 */ 1730 if (conn->addr_len < (int)sizeof(sa_family_t)) 1731 return; 1732 1733 bctx->connect.family = ss->ss_family; 1734 switch (ss->ss_family) { 1735 case AF_INET: { 1736 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 1737 1738 if (conn->addr_len < (int)sizeof(*sin)) 1739 break; 1740 bctx->connect.port = sin->sin_port; 1741 bctx->connect.v4_addr = sin->sin_addr.s_addr; 1742 break; 1743 } 1744 case AF_INET6: { 1745 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 1746 1747 if (conn->addr_len < (int)sizeof(*sin6)) 1748 break; 1749 bctx->connect.port = sin6->sin6_port; 1750 memcpy(bctx->connect.v6_addr, &sin6->sin6_addr, 1751 sizeof(bctx->connect.v6_addr)); 1752 break; 1753 } 1754 default: 1755 /* family is set; per-family fields stay zero - family-only filtering */ 1756 break; 1757 } 1758 } 1759 1760 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1761 { 1762 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1763 1764 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1765 return -EINVAL; 1766 1767 sock->domain = READ_ONCE(sqe->fd); 1768 sock->type = READ_ONCE(sqe->off); 1769 sock->protocol = READ_ONCE(sqe->len); 1770 sock->file_slot = READ_ONCE(sqe->file_index); 1771 sock->nofile = rlimit(RLIMIT_NOFILE); 1772 1773 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1774 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1775 return -EINVAL; 1776 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1777 return -EINVAL; 1778 return 0; 1779 } 1780 1781 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1782 { 1783 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1784 bool fixed = !!sock->file_slot; 1785 struct file *file; 1786 int ret, fd; 1787 1788 if (!fixed) { 1789 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1790 if (unlikely(fd < 0)) 1791 return fd; 1792 } 1793 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1794 if (IS_ERR(file)) { 1795 if (!fixed) 1796 put_unused_fd(fd); 1797 ret = PTR_ERR(file); 1798 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1799 return -EAGAIN; 1800 if (ret == -ERESTARTSYS) 1801 ret = -EINTR; 1802 req_set_fail(req); 1803 } else if (!fixed) { 1804 fd_install(fd, file); 1805 ret = fd; 1806 } else { 1807 ret = io_fixed_fd_install(req, issue_flags, file, 1808 sock->file_slot); 1809 } 1810 io_req_set_res(req, ret, 0); 1811 return IOU_COMPLETE; 1812 } 1813 1814 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1815 { 1816 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1817 struct sockaddr_storage *addr; 1818 1819 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1820 return -EINVAL; 1821 1822 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1823 conn->addr_len = READ_ONCE(sqe->addr2); 1824 conn->in_progress = conn->seen_econnaborted = false; 1825 1826 addr = io_uring_alloc_async_data(NULL, req); 1827 if (unlikely(!addr)) 1828 return -ENOMEM; 1829 1830 return move_addr_to_kernel(conn->addr, conn->addr_len, addr); 1831 } 1832 1833 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1834 { 1835 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1836 struct sockaddr_storage *addr = req->async_data; 1837 unsigned file_flags; 1838 int ret; 1839 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1840 1841 if (connect->in_progress) { 1842 struct poll_table_struct pt = { ._key = EPOLLERR }; 1843 1844 if (vfs_poll(req->file, &pt) & EPOLLERR) 1845 goto get_sock_err; 1846 } 1847 1848 file_flags = force_nonblock ? O_NONBLOCK : 0; 1849 1850 ret = __sys_connect_file(req->file, addr, connect->addr_len, file_flags); 1851 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1852 && force_nonblock) { 1853 if (ret == -EINPROGRESS) { 1854 connect->in_progress = true; 1855 } else if (ret == -ECONNABORTED) { 1856 if (connect->seen_econnaborted) 1857 goto out; 1858 connect->seen_econnaborted = true; 1859 } 1860 return -EAGAIN; 1861 } 1862 if (connect->in_progress) { 1863 /* 1864 * At least bluetooth will return -EBADFD on a re-connect 1865 * attempt, and it's (supposedly) also valid to get -EISCONN 1866 * which means the previous result is good. For both of these, 1867 * grab the sock_error() and use that for the completion. 1868 */ 1869 if (ret == -EBADFD || ret == -EISCONN) { 1870 get_sock_err: 1871 ret = sock_error(sock_from_file(req->file)->sk); 1872 } 1873 } 1874 if (ret == -ERESTARTSYS) 1875 ret = -EINTR; 1876 out: 1877 if (ret < 0) 1878 req_set_fail(req); 1879 io_req_set_res(req, ret, 0); 1880 return IOU_COMPLETE; 1881 } 1882 1883 /* 1884 * Check if bind request would potentially end up with filename_create(), 1885 * which in turn end up in mnt_want_write() which will grab the fs 1886 * percpu start write sem. This can trigger a lockdep warning. 1887 */ 1888 static int io_bind_file_create(const struct sockaddr_storage *addr, int addr_len) 1889 { 1890 const struct sockaddr_un *sun; 1891 1892 if (addr->ss_family != AF_UNIX) 1893 return 0; 1894 if (addr_len <= offsetof(struct sockaddr_un, sun_path)) 1895 return 0; 1896 sun = (const struct sockaddr_un *) addr; 1897 return sun->sun_path[0] != '\0'; 1898 } 1899 1900 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1901 { 1902 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1903 struct sockaddr __user *uaddr; 1904 struct sockaddr_storage *addr; 1905 int ret; 1906 1907 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1908 return -EINVAL; 1909 1910 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1911 bind->addr_len = READ_ONCE(sqe->addr2); 1912 1913 addr = io_uring_alloc_async_data(NULL, req); 1914 if (unlikely(!addr)) 1915 return -ENOMEM; 1916 ret = move_addr_to_kernel(uaddr, bind->addr_len, addr); 1917 if (unlikely(ret)) 1918 return ret; 1919 if (io_bind_file_create(addr, bind->addr_len)) 1920 req->flags |= REQ_F_FORCE_ASYNC; 1921 return 0; 1922 } 1923 1924 1925 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1926 { 1927 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1928 struct sockaddr_storage *addr = req->async_data; 1929 struct socket *sock; 1930 int ret; 1931 1932 sock = sock_from_file(req->file); 1933 if (unlikely(!sock)) 1934 return -ENOTSOCK; 1935 1936 ret = __sys_bind_socket(sock, addr, bind->addr_len); 1937 if (ret < 0) 1938 req_set_fail(req); 1939 io_req_set_res(req, ret, 0); 1940 return 0; 1941 } 1942 1943 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1944 { 1945 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1946 1947 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1948 return -EINVAL; 1949 1950 listen->backlog = READ_ONCE(sqe->len); 1951 return 0; 1952 } 1953 1954 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1955 { 1956 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1957 struct socket *sock; 1958 int ret; 1959 1960 sock = sock_from_file(req->file); 1961 if (unlikely(!sock)) 1962 return -ENOTSOCK; 1963 1964 ret = __sys_listen_socket(sock, listen->backlog); 1965 if (ret < 0) 1966 req_set_fail(req); 1967 io_req_set_res(req, ret, 0); 1968 return 0; 1969 } 1970 1971 void io_netmsg_cache_free(const void *entry) 1972 { 1973 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1974 1975 io_vec_free(&kmsg->vec); 1976 kfree(kmsg); 1977 } 1978