1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/un.h> 8 #include <linux/compat.h> 9 #include <net/compat.h> 10 #include <linux/io_uring.h> 11 12 #include <uapi/linux/io_uring.h> 13 14 #include "filetable.h" 15 #include "io_uring.h" 16 #include "kbuf.h" 17 #include "alloc_cache.h" 18 #include "net.h" 19 #include "notif.h" 20 #include "rsrc.h" 21 #include "zcrx.h" 22 23 struct io_shutdown { 24 struct file *file; 25 int how; 26 }; 27 28 struct io_accept { 29 struct file *file; 30 struct sockaddr __user *addr; 31 int __user *addr_len; 32 int flags; 33 int iou_flags; 34 u32 file_slot; 35 unsigned long nofile; 36 }; 37 38 struct io_socket { 39 struct file *file; 40 int domain; 41 int type; 42 int protocol; 43 int flags; 44 u32 file_slot; 45 unsigned long nofile; 46 }; 47 48 struct io_connect { 49 struct file *file; 50 struct sockaddr __user *addr; 51 int addr_len; 52 bool in_progress; 53 bool seen_econnaborted; 54 }; 55 56 struct io_bind { 57 struct file *file; 58 int addr_len; 59 }; 60 61 struct io_listen { 62 struct file *file; 63 int backlog; 64 }; 65 66 struct io_sr_msg { 67 struct file *file; 68 union { 69 struct compat_msghdr __user *umsg_compat; 70 struct user_msghdr __user *umsg; 71 void __user *buf; 72 }; 73 int len; 74 unsigned done_io; 75 unsigned msg_flags; 76 unsigned nr_multishot_loops; 77 u16 flags; 78 /* initialised and used only by !msg send variants */ 79 u16 buf_group; 80 /* per-invocation mshot limit */ 81 unsigned mshot_len; 82 /* overall mshot byte limit */ 83 unsigned mshot_total_len; 84 void __user *msg_control; 85 /* used only for send zerocopy */ 86 struct io_kiocb *notif; 87 }; 88 89 /* 90 * The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold 91 * anyway. Use the upper 8 bits for internal uses. 92 */ 93 enum sr_retry_flags { 94 IORING_RECV_RETRY = (1U << 15), 95 IORING_RECV_PARTIAL_MAP = (1U << 14), 96 IORING_RECV_MSHOT_CAP = (1U << 13), 97 IORING_RECV_MSHOT_LIM = (1U << 12), 98 IORING_RECV_MSHOT_DONE = (1U << 11), 99 100 IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP, 101 IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP | 102 IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE, 103 }; 104 105 /* 106 * Number of times we'll try and do receives if there's more data. If we 107 * exceed this limit, then add us to the back of the queue and retry from 108 * there. This helps fairness between flooding clients. 109 */ 110 #define MULTISHOT_MAX_RETRY 32 111 112 struct io_recvzc { 113 struct file *file; 114 u16 flags; 115 u32 len; 116 struct io_zcrx_ifq *ifq; 117 }; 118 119 static int io_sg_from_iter_iovec(struct sk_buff *skb, 120 struct iov_iter *from, size_t length); 121 static int io_sg_from_iter(struct sk_buff *skb, 122 struct iov_iter *from, size_t length); 123 124 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 125 { 126 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 127 128 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 129 sqe->buf_index || sqe->splice_fd_in)) 130 return -EINVAL; 131 132 shutdown->how = READ_ONCE(sqe->len); 133 req->flags |= REQ_F_FORCE_ASYNC; 134 return 0; 135 } 136 137 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 138 { 139 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 140 struct socket *sock; 141 int ret; 142 143 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 144 145 sock = sock_from_file(req->file); 146 if (unlikely(!sock)) 147 return -ENOTSOCK; 148 149 ret = __sys_shutdown_sock(sock, shutdown->how); 150 io_req_set_res(req, ret, 0); 151 return IOU_COMPLETE; 152 } 153 154 static bool io_net_retry(struct socket *sock, int flags) 155 { 156 if (!(flags & MSG_WAITALL)) 157 return false; 158 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 159 } 160 161 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 162 { 163 if (kmsg->vec.iovec) 164 io_vec_free(&kmsg->vec); 165 } 166 167 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 168 { 169 struct io_async_msghdr *hdr = req->async_data; 170 171 /* can't recycle, ensure we free the iovec if we have one */ 172 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 173 io_netmsg_iovec_free(hdr); 174 return; 175 } 176 177 /* Let normal cleanup path reap it if we fail adding to the cache */ 178 io_alloc_cache_vec_kasan(&hdr->vec); 179 if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP) 180 io_vec_free(&hdr->vec); 181 182 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) 183 io_req_async_data_clear(req, REQ_F_NEED_CLEANUP); 184 } 185 186 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 187 { 188 struct io_ring_ctx *ctx = req->ctx; 189 struct io_async_msghdr *hdr; 190 191 hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); 192 if (!hdr) 193 return NULL; 194 195 /* If the async data was cached, we might have an iov cached inside. */ 196 if (hdr->vec.iovec) 197 req->flags |= REQ_F_NEED_CLEANUP; 198 return hdr; 199 } 200 201 static inline void io_mshot_prep_retry(struct io_kiocb *req, 202 struct io_async_msghdr *kmsg) 203 { 204 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 205 206 req->flags &= ~REQ_F_BL_EMPTY; 207 sr->done_io = 0; 208 sr->flags &= ~IORING_RECV_RETRY_CLEAR; 209 sr->len = sr->mshot_len; 210 } 211 212 static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg, 213 const struct iovec __user *uiov, unsigned uvec_seg, 214 int ddir) 215 { 216 struct iovec *iov; 217 int ret, nr_segs; 218 219 if (iomsg->vec.iovec) { 220 nr_segs = iomsg->vec.nr; 221 iov = iomsg->vec.iovec; 222 } else { 223 nr_segs = 1; 224 iov = &iomsg->fast_iov; 225 } 226 227 ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov, 228 &iomsg->msg.msg_iter, io_is_compat(req->ctx)); 229 if (unlikely(ret < 0)) 230 return ret; 231 232 if (iov) { 233 req->flags |= REQ_F_NEED_CLEANUP; 234 io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs); 235 } 236 return 0; 237 } 238 239 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 240 struct io_async_msghdr *iomsg, 241 struct compat_msghdr *msg, int ddir, 242 struct sockaddr __user **save_addr) 243 { 244 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 245 struct compat_iovec __user *uiov; 246 int ret; 247 248 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 249 return -EFAULT; 250 251 ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr); 252 if (ret) 253 return ret; 254 255 uiov = compat_ptr(msg->msg_iov); 256 if (req->flags & REQ_F_BUFFER_SELECT) { 257 if (msg->msg_iovlen == 0) { 258 sr->len = 0; 259 } else if (msg->msg_iovlen > 1) { 260 return -EINVAL; 261 } else { 262 struct compat_iovec tmp_iov; 263 264 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 265 return -EFAULT; 266 sr->len = tmp_iov.iov_len; 267 } 268 } 269 return 0; 270 } 271 272 static int io_copy_msghdr_from_user(struct user_msghdr *msg, 273 struct user_msghdr __user *umsg) 274 { 275 if (!user_access_begin(umsg, sizeof(*umsg))) 276 return -EFAULT; 277 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end); 278 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end); 279 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end); 280 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end); 281 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end); 282 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end); 283 user_access_end(); 284 return 0; 285 ua_end: 286 user_access_end(); 287 return -EFAULT; 288 } 289 290 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 291 struct user_msghdr *msg, int ddir, 292 struct sockaddr __user **save_addr) 293 { 294 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 295 struct user_msghdr __user *umsg = sr->umsg; 296 int ret; 297 298 iomsg->msg.msg_name = &iomsg->addr; 299 iomsg->msg.msg_iter.nr_segs = 0; 300 301 if (io_is_compat(req->ctx)) { 302 struct compat_msghdr cmsg; 303 304 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr); 305 if (ret) 306 return ret; 307 308 memset(msg, 0, sizeof(*msg)); 309 msg->msg_namelen = cmsg.msg_namelen; 310 msg->msg_controllen = cmsg.msg_controllen; 311 msg->msg_iov = compat_ptr(cmsg.msg_iov); 312 msg->msg_iovlen = cmsg.msg_iovlen; 313 return 0; 314 } 315 316 ret = io_copy_msghdr_from_user(msg, umsg); 317 if (unlikely(ret)) 318 return ret; 319 320 msg->msg_flags = 0; 321 322 ret = __copy_msghdr(&iomsg->msg, msg, save_addr); 323 if (ret) 324 return ret; 325 326 if (req->flags & REQ_F_BUFFER_SELECT) { 327 if (msg->msg_iovlen == 0) { 328 sr->len = 0; 329 } else if (msg->msg_iovlen > 1) { 330 return -EINVAL; 331 } else { 332 struct iovec __user *uiov = msg->msg_iov; 333 struct iovec tmp_iov; 334 335 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 336 return -EFAULT; 337 sr->len = tmp_iov.iov_len; 338 } 339 } 340 return 0; 341 } 342 343 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 344 { 345 struct io_async_msghdr *io = req->async_data; 346 347 io_netmsg_iovec_free(io); 348 } 349 350 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 351 { 352 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 353 struct io_async_msghdr *kmsg = req->async_data; 354 void __user *addr; 355 u16 addr_len; 356 int ret; 357 358 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 359 360 if (READ_ONCE(sqe->__pad3[0])) 361 return -EINVAL; 362 363 kmsg->msg.msg_name = NULL; 364 kmsg->msg.msg_namelen = 0; 365 kmsg->msg.msg_control = NULL; 366 kmsg->msg.msg_controllen = 0; 367 kmsg->msg.msg_ubuf = NULL; 368 369 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 370 addr_len = READ_ONCE(sqe->addr_len); 371 if (addr) { 372 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr); 373 if (unlikely(ret < 0)) 374 return ret; 375 kmsg->msg.msg_name = &kmsg->addr; 376 kmsg->msg.msg_namelen = addr_len; 377 } 378 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 379 if (!(sr->flags & IORING_SEND_VECTORIZED)) { 380 req->flags |= REQ_F_IMPORT_BUFFER; 381 return 0; 382 } 383 384 kmsg->msg.msg_iter.nr_segs = sr->len; 385 return io_prep_reg_iovec(req, &kmsg->vec, sr->buf, sr->len); 386 } 387 if (req->flags & REQ_F_BUFFER_SELECT) 388 return 0; 389 390 if (sr->flags & IORING_SEND_VECTORIZED) 391 return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); 392 393 return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 394 } 395 396 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 397 { 398 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 399 struct io_async_msghdr *kmsg = req->async_data; 400 struct user_msghdr msg; 401 int ret; 402 403 sr->flags |= IORING_SEND_VECTORIZED; 404 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 405 ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); 406 if (unlikely(ret)) 407 return ret; 408 /* save msg_control as sys_sendmsg() overwrites it */ 409 sr->msg_control = kmsg->msg.msg_control_user; 410 411 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 412 kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; 413 return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, 414 msg.msg_iovlen); 415 } 416 if (req->flags & REQ_F_BUFFER_SELECT) 417 return 0; 418 return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); 419 } 420 421 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | IORING_SEND_VECTORIZED) 422 423 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 424 { 425 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 426 427 sr->done_io = 0; 428 sr->len = READ_ONCE(sqe->len); 429 if (unlikely(sr->len < 0)) 430 return -EINVAL; 431 sr->flags = READ_ONCE(sqe->ioprio); 432 if (sr->flags & ~SENDMSG_FLAGS) 433 return -EINVAL; 434 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 435 if (sr->msg_flags & MSG_DONTWAIT) 436 req->flags |= REQ_F_NOWAIT; 437 if (req->flags & REQ_F_BUFFER_SELECT) 438 sr->buf_group = req->buf_index; 439 if (sr->flags & IORING_RECVSEND_BUNDLE) { 440 if (req->opcode == IORING_OP_SENDMSG) 441 return -EINVAL; 442 sr->msg_flags |= MSG_WAITALL; 443 req->flags |= REQ_F_MULTISHOT; 444 } 445 446 if (io_is_compat(req->ctx)) 447 sr->msg_flags |= MSG_CMSG_COMPAT; 448 449 if (unlikely(!io_msg_alloc_async(req))) 450 return -ENOMEM; 451 if (req->opcode != IORING_OP_SENDMSG) 452 return io_send_setup(req, sqe); 453 if (unlikely(sqe->addr2 || sqe->file_index)) 454 return -EINVAL; 455 return io_sendmsg_setup(req, sqe); 456 } 457 458 static void io_req_msg_cleanup(struct io_kiocb *req, 459 unsigned int issue_flags) 460 { 461 io_netmsg_recycle(req, issue_flags); 462 } 463 464 /* 465 * For bundle completions, we need to figure out how many segments we consumed. 466 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 467 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 468 * the segments, then it's a trivial questiont o answer. If we have residual 469 * data in the iter, then loop the segments to figure out how much we 470 * transferred. 471 */ 472 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 473 { 474 struct iovec *iov; 475 int nbufs; 476 477 /* no data is always zero segments, and a ubuf is always 1 segment */ 478 if (ret <= 0) 479 return 0; 480 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 481 return 1; 482 483 iov = kmsg->vec.iovec; 484 if (!iov) 485 iov = &kmsg->fast_iov; 486 487 /* if all data was transferred, it's basic pointer math */ 488 if (!iov_iter_count(&kmsg->msg.msg_iter)) 489 return iter_iov(&kmsg->msg.msg_iter) - iov; 490 491 /* short transfer, count segments */ 492 nbufs = 0; 493 do { 494 int this_len = min_t(int, iov[nbufs].iov_len, ret); 495 496 nbufs++; 497 ret -= this_len; 498 } while (ret); 499 500 return nbufs; 501 } 502 503 static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl, 504 struct io_async_msghdr *kmsg, int len) 505 { 506 req->flags |= REQ_F_BL_NO_RECYCLE; 507 if (req->flags & REQ_F_BUFFERS_COMMIT) 508 io_kbuf_commit(req, bl, len, io_bundle_nbufs(kmsg, len)); 509 return IOU_RETRY; 510 } 511 512 static inline bool io_send_finish(struct io_kiocb *req, 513 struct io_async_msghdr *kmsg, 514 struct io_br_sel *sel) 515 { 516 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 517 bool bundle_finished = sel->val <= 0; 518 unsigned int cflags; 519 520 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 521 cflags = io_put_kbuf(req, sel->val, sel->buf_list); 522 goto finish; 523 } 524 525 cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val)); 526 527 /* 528 * Don't start new bundles if the buffer list is empty, or if the 529 * current operation needed to go through polling to complete. 530 */ 531 if (bundle_finished || req->flags & (REQ_F_BL_EMPTY | REQ_F_POLLED)) 532 goto finish; 533 534 /* 535 * Fill CQE for this receive and see if we should keep trying to 536 * receive from this socket. 537 */ 538 if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { 539 io_mshot_prep_retry(req, kmsg); 540 return false; 541 } 542 543 /* Otherwise stop bundle and use the current result. */ 544 finish: 545 io_req_set_res(req, sel->val, cflags); 546 sel->val = IOU_COMPLETE; 547 return true; 548 } 549 550 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 551 { 552 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 553 struct io_async_msghdr *kmsg = req->async_data; 554 struct socket *sock; 555 unsigned flags; 556 int min_ret = 0; 557 int ret; 558 559 sock = sock_from_file(req->file); 560 if (unlikely(!sock)) 561 return -ENOTSOCK; 562 563 if (!(req->flags & REQ_F_POLLED) && 564 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 565 return -EAGAIN; 566 567 flags = sr->msg_flags; 568 if (issue_flags & IO_URING_F_NONBLOCK) 569 flags |= MSG_DONTWAIT; 570 if (flags & MSG_WAITALL) 571 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 572 573 kmsg->msg.msg_control_user = sr->msg_control; 574 575 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 576 577 if (ret < min_ret) { 578 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 579 return -EAGAIN; 580 if (ret > 0 && io_net_retry(sock, flags)) { 581 kmsg->msg.msg_controllen = 0; 582 kmsg->msg.msg_control = NULL; 583 sr->done_io += ret; 584 return -EAGAIN; 585 } 586 if (ret == -ERESTARTSYS) 587 ret = -EINTR; 588 req_set_fail(req); 589 } 590 io_req_msg_cleanup(req, issue_flags); 591 if (ret >= 0) 592 ret += sr->done_io; 593 else if (sr->done_io) 594 ret = sr->done_io; 595 io_req_set_res(req, ret, 0); 596 return IOU_COMPLETE; 597 } 598 599 static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, 600 struct io_br_sel *sel, struct io_async_msghdr *kmsg) 601 { 602 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 603 struct buf_sel_arg arg = { 604 .iovs = &kmsg->fast_iov, 605 .max_len = min_not_zero(sr->len, INT_MAX), 606 .nr_iovs = 1, 607 .buf_group = sr->buf_group, 608 }; 609 int ret; 610 611 if (kmsg->vec.iovec) { 612 arg.nr_iovs = kmsg->vec.nr; 613 arg.iovs = kmsg->vec.iovec; 614 arg.mode = KBUF_MODE_FREE; 615 } 616 617 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 618 arg.nr_iovs = 1; 619 else 620 arg.mode |= KBUF_MODE_EXPAND; 621 622 ret = io_buffers_select(req, &arg, sel, issue_flags); 623 if (unlikely(ret < 0)) 624 return ret; 625 626 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 627 kmsg->vec.nr = ret; 628 kmsg->vec.iovec = arg.iovs; 629 req->flags |= REQ_F_NEED_CLEANUP; 630 } 631 sr->len = arg.out_len; 632 633 if (ret == 1) { 634 sr->buf = arg.iovs[0].iov_base; 635 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 636 &kmsg->msg.msg_iter); 637 if (unlikely(ret)) 638 return ret; 639 } else { 640 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 641 arg.iovs, ret, arg.out_len); 642 } 643 644 return 0; 645 } 646 647 int io_send(struct io_kiocb *req, unsigned int issue_flags) 648 { 649 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 650 struct io_async_msghdr *kmsg = req->async_data; 651 struct io_br_sel sel = { }; 652 struct socket *sock; 653 unsigned flags; 654 int min_ret = 0; 655 int ret; 656 657 sock = sock_from_file(req->file); 658 if (unlikely(!sock)) 659 return -ENOTSOCK; 660 661 if (!(req->flags & REQ_F_POLLED) && 662 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 663 return -EAGAIN; 664 665 flags = sr->msg_flags; 666 if (issue_flags & IO_URING_F_NONBLOCK) 667 flags |= MSG_DONTWAIT; 668 669 retry_bundle: 670 sel.buf_list = NULL; 671 if (io_do_buffer_select(req)) { 672 ret = io_send_select_buffer(req, issue_flags, &sel, kmsg); 673 if (ret) 674 return ret; 675 } 676 677 /* 678 * If MSG_WAITALL is set, or this is a bundle send, then we need 679 * the full amount. If just bundle is set, if we do a short send 680 * then we complete the bundle sequence rather than continue on. 681 */ 682 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 683 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 684 685 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 686 kmsg->msg.msg_flags = flags; 687 ret = sock_sendmsg(sock, &kmsg->msg); 688 if (ret < min_ret) { 689 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 690 return -EAGAIN; 691 692 if (ret > 0 && io_net_retry(sock, flags)) { 693 sr->len -= ret; 694 sr->buf += ret; 695 sr->done_io += ret; 696 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 697 } 698 if (ret == -ERESTARTSYS) 699 ret = -EINTR; 700 req_set_fail(req); 701 } 702 if (ret >= 0) 703 ret += sr->done_io; 704 else if (sr->done_io) 705 ret = sr->done_io; 706 707 sel.val = ret; 708 if (!io_send_finish(req, kmsg, &sel)) 709 goto retry_bundle; 710 711 io_req_msg_cleanup(req, issue_flags); 712 return sel.val; 713 } 714 715 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 716 struct io_async_msghdr *iomsg, 717 int namelen, size_t controllen) 718 { 719 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 720 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 721 int hdr; 722 723 if (unlikely(namelen < 0)) 724 return -EOVERFLOW; 725 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 726 namelen, &hdr)) 727 return -EOVERFLOW; 728 if (check_add_overflow(hdr, controllen, &hdr)) 729 return -EOVERFLOW; 730 731 iomsg->namelen = namelen; 732 iomsg->controllen = controllen; 733 return 0; 734 } 735 736 return 0; 737 } 738 739 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 740 struct io_async_msghdr *iomsg) 741 { 742 struct user_msghdr msg; 743 int ret; 744 745 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr); 746 if (unlikely(ret)) 747 return ret; 748 749 if (!(req->flags & REQ_F_BUFFER_SELECT)) { 750 ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen, 751 ITER_DEST); 752 if (unlikely(ret)) 753 return ret; 754 } 755 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 756 msg.msg_controllen); 757 } 758 759 static int io_recvmsg_prep_setup(struct io_kiocb *req) 760 { 761 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 762 struct io_async_msghdr *kmsg; 763 764 kmsg = io_msg_alloc_async(req); 765 if (unlikely(!kmsg)) 766 return -ENOMEM; 767 768 if (req->opcode == IORING_OP_RECV) { 769 kmsg->msg.msg_name = NULL; 770 kmsg->msg.msg_namelen = 0; 771 kmsg->msg.msg_inq = 0; 772 kmsg->msg.msg_control = NULL; 773 kmsg->msg.msg_get_inq = 1; 774 kmsg->msg.msg_controllen = 0; 775 kmsg->msg.msg_iocb = NULL; 776 kmsg->msg.msg_ubuf = NULL; 777 778 if (req->flags & REQ_F_BUFFER_SELECT) 779 return 0; 780 return import_ubuf(ITER_DEST, sr->buf, sr->len, 781 &kmsg->msg.msg_iter); 782 } 783 784 return io_recvmsg_copy_hdr(req, kmsg); 785 } 786 787 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 788 IORING_RECVSEND_BUNDLE) 789 790 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 791 { 792 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 793 794 sr->done_io = 0; 795 796 if (unlikely(sqe->addr2)) 797 return -EINVAL; 798 799 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 800 sr->len = READ_ONCE(sqe->len); 801 if (unlikely(sr->len < 0)) 802 return -EINVAL; 803 sr->flags = READ_ONCE(sqe->ioprio); 804 if (sr->flags & ~RECVMSG_FLAGS) 805 return -EINVAL; 806 sr->msg_flags = READ_ONCE(sqe->msg_flags); 807 if (sr->msg_flags & MSG_DONTWAIT) 808 req->flags |= REQ_F_NOWAIT; 809 if (sr->msg_flags & MSG_ERRQUEUE) 810 req->flags |= REQ_F_CLEAR_POLLIN; 811 if (req->flags & REQ_F_BUFFER_SELECT) 812 sr->buf_group = req->buf_index; 813 sr->mshot_total_len = sr->mshot_len = 0; 814 if (sr->flags & IORING_RECV_MULTISHOT) { 815 if (!(req->flags & REQ_F_BUFFER_SELECT)) 816 return -EINVAL; 817 if (sr->msg_flags & MSG_WAITALL) 818 return -EINVAL; 819 if (req->opcode == IORING_OP_RECV) { 820 sr->mshot_len = sr->len; 821 sr->mshot_total_len = READ_ONCE(sqe->optlen); 822 if (sr->mshot_total_len) 823 sr->flags |= IORING_RECV_MSHOT_LIM; 824 } else if (sqe->optlen) { 825 return -EINVAL; 826 } 827 req->flags |= REQ_F_APOLL_MULTISHOT; 828 } else if (sqe->optlen) { 829 return -EINVAL; 830 } 831 832 if (sr->flags & IORING_RECVSEND_BUNDLE) { 833 if (req->opcode == IORING_OP_RECVMSG) 834 return -EINVAL; 835 } 836 837 if (io_is_compat(req->ctx)) 838 sr->msg_flags |= MSG_CMSG_COMPAT; 839 840 sr->nr_multishot_loops = 0; 841 return io_recvmsg_prep_setup(req); 842 } 843 844 /* bits to clear in old and inherit in new cflags on bundle retry */ 845 #define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE|\ 846 IORING_CQE_F_BUF_MORE) 847 848 /* 849 * Finishes io_recv and io_recvmsg. 850 * 851 * Returns true if it is actually finished, or false if it should run 852 * again (for multishot). 853 */ 854 static inline bool io_recv_finish(struct io_kiocb *req, 855 struct io_async_msghdr *kmsg, 856 struct io_br_sel *sel, bool mshot_finished, 857 unsigned issue_flags) 858 { 859 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 860 unsigned int cflags = 0; 861 862 if (kmsg->msg.msg_inq > 0) 863 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 864 865 if (sel->val > 0 && sr->flags & IORING_RECV_MSHOT_LIM) { 866 /* 867 * If sr->len hits zero, the limit has been reached. Mark 868 * mshot as finished, and flag MSHOT_DONE as well to prevent 869 * a potential bundle from being retried. 870 */ 871 sr->mshot_total_len -= min_t(int, sel->val, sr->mshot_total_len); 872 if (!sr->mshot_total_len) { 873 sr->flags |= IORING_RECV_MSHOT_DONE; 874 mshot_finished = true; 875 } 876 } 877 878 if (sr->flags & IORING_RECVSEND_BUNDLE) { 879 size_t this_ret = sel->val - sr->done_io; 880 881 cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret)); 882 if (sr->flags & IORING_RECV_RETRY) 883 cflags = req->cqe.flags | (cflags & CQE_F_MASK); 884 if (sr->mshot_len && sel->val >= sr->mshot_len) 885 sr->flags |= IORING_RECV_MSHOT_CAP; 886 /* bundle with no more immediate buffers, we're done */ 887 if (req->flags & REQ_F_BL_EMPTY) 888 goto finish; 889 /* 890 * If more is available AND it was a full transfer, retry and 891 * append to this one 892 */ 893 if (!(sr->flags & IORING_RECV_NO_RETRY) && 894 kmsg->msg.msg_inq > 1 && this_ret > 0 && 895 !iov_iter_count(&kmsg->msg.msg_iter)) { 896 req->cqe.flags = cflags & ~CQE_F_MASK; 897 sr->len = kmsg->msg.msg_inq; 898 sr->done_io += this_ret; 899 sr->flags |= IORING_RECV_RETRY; 900 return false; 901 } 902 } else { 903 cflags |= io_put_kbuf(req, sel->val, sel->buf_list); 904 } 905 906 /* 907 * Fill CQE for this receive and see if we should keep trying to 908 * receive from this socket. 909 */ 910 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 911 io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { 912 sel->val = IOU_RETRY; 913 io_mshot_prep_retry(req, kmsg); 914 /* Known not-empty or unknown state, retry */ 915 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 916 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY && 917 !(sr->flags & IORING_RECV_MSHOT_CAP)) { 918 return false; 919 } 920 /* mshot retries exceeded, force a requeue */ 921 sr->nr_multishot_loops = 0; 922 sr->flags &= ~IORING_RECV_MSHOT_CAP; 923 if (issue_flags & IO_URING_F_MULTISHOT) 924 sel->val = IOU_REQUEUE; 925 } 926 return true; 927 } 928 929 /* Finish the request / stop multishot. */ 930 finish: 931 io_req_set_res(req, sel->val, cflags); 932 sel->val = IOU_COMPLETE; 933 io_req_msg_cleanup(req, issue_flags); 934 return true; 935 } 936 937 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 938 struct io_sr_msg *sr, void __user **buf, 939 size_t *len) 940 { 941 unsigned long ubuf = (unsigned long) *buf; 942 unsigned long hdr; 943 944 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 945 kmsg->controllen; 946 if (*len < hdr) 947 return -EFAULT; 948 949 if (kmsg->controllen) { 950 unsigned long control = ubuf + hdr - kmsg->controllen; 951 952 kmsg->msg.msg_control_user = (void __user *) control; 953 kmsg->msg.msg_controllen = kmsg->controllen; 954 } 955 956 sr->buf = *buf; /* stash for later copy */ 957 *buf = (void __user *) (ubuf + hdr); 958 kmsg->payloadlen = *len = *len - hdr; 959 return 0; 960 } 961 962 struct io_recvmsg_multishot_hdr { 963 struct io_uring_recvmsg_out msg; 964 struct sockaddr_storage addr; 965 }; 966 967 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 968 struct io_async_msghdr *kmsg, 969 unsigned int flags, bool *finished) 970 { 971 int err; 972 int copy_len; 973 struct io_recvmsg_multishot_hdr hdr; 974 975 if (kmsg->namelen) 976 kmsg->msg.msg_name = &hdr.addr; 977 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 978 kmsg->msg.msg_namelen = 0; 979 980 if (sock->file->f_flags & O_NONBLOCK) 981 flags |= MSG_DONTWAIT; 982 983 err = sock_recvmsg(sock, &kmsg->msg, flags); 984 *finished = err <= 0; 985 if (err < 0) 986 return err; 987 988 hdr.msg = (struct io_uring_recvmsg_out) { 989 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 990 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 991 }; 992 993 hdr.msg.payloadlen = err; 994 if (err > kmsg->payloadlen) 995 err = kmsg->payloadlen; 996 997 copy_len = sizeof(struct io_uring_recvmsg_out); 998 if (kmsg->msg.msg_namelen > kmsg->namelen) 999 copy_len += kmsg->namelen; 1000 else 1001 copy_len += kmsg->msg.msg_namelen; 1002 1003 /* 1004 * "fromlen shall refer to the value before truncation.." 1005 * 1003.1g 1006 */ 1007 hdr.msg.namelen = kmsg->msg.msg_namelen; 1008 1009 /* ensure that there is no gap between hdr and sockaddr_storage */ 1010 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 1011 sizeof(struct io_uring_recvmsg_out)); 1012 if (copy_to_user(io->buf, &hdr, copy_len)) { 1013 *finished = true; 1014 return -EFAULT; 1015 } 1016 1017 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 1018 kmsg->controllen + err; 1019 } 1020 1021 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 1022 { 1023 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1024 struct io_async_msghdr *kmsg = req->async_data; 1025 struct io_br_sel sel = { }; 1026 struct socket *sock; 1027 unsigned flags; 1028 int ret, min_ret = 0; 1029 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1030 bool mshot_finished = true; 1031 1032 sock = sock_from_file(req->file); 1033 if (unlikely(!sock)) 1034 return -ENOTSOCK; 1035 1036 if (!(req->flags & REQ_F_POLLED) && 1037 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1038 return -EAGAIN; 1039 1040 flags = sr->msg_flags; 1041 if (force_nonblock) 1042 flags |= MSG_DONTWAIT; 1043 1044 retry_multishot: 1045 sel.buf_list = NULL; 1046 if (io_do_buffer_select(req)) { 1047 size_t len = sr->len; 1048 1049 sel = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1050 if (!sel.addr) 1051 return -ENOBUFS; 1052 1053 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1054 ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len); 1055 if (ret) { 1056 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1057 return ret; 1058 } 1059 } 1060 1061 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, sel.addr, len); 1062 } 1063 1064 kmsg->msg.msg_get_inq = 1; 1065 kmsg->msg.msg_inq = -1; 1066 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1067 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1068 &mshot_finished); 1069 } else { 1070 /* disable partial retry for recvmsg with cmsg attached */ 1071 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1072 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1073 1074 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1075 kmsg->uaddr, flags); 1076 } 1077 1078 if (ret < min_ret) { 1079 if (ret == -EAGAIN && force_nonblock) { 1080 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1081 return IOU_RETRY; 1082 } 1083 if (ret > 0 && io_net_retry(sock, flags)) { 1084 sr->done_io += ret; 1085 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 1086 } 1087 if (ret == -ERESTARTSYS) 1088 ret = -EINTR; 1089 req_set_fail(req); 1090 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1091 req_set_fail(req); 1092 } 1093 1094 if (ret > 0) 1095 ret += sr->done_io; 1096 else if (sr->done_io) 1097 ret = sr->done_io; 1098 else 1099 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1100 1101 sel.val = ret; 1102 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) 1103 goto retry_multishot; 1104 1105 return sel.val; 1106 } 1107 1108 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1109 struct io_br_sel *sel, unsigned int issue_flags) 1110 { 1111 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1112 int ret; 1113 1114 /* 1115 * If the ring isn't locked, then don't use the peek interface 1116 * to grab multiple buffers as we will lock/unlock between 1117 * this selection and posting the buffers. 1118 */ 1119 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1120 sr->flags & IORING_RECVSEND_BUNDLE) { 1121 struct buf_sel_arg arg = { 1122 .iovs = &kmsg->fast_iov, 1123 .nr_iovs = 1, 1124 .mode = KBUF_MODE_EXPAND, 1125 .buf_group = sr->buf_group, 1126 }; 1127 1128 if (kmsg->vec.iovec) { 1129 arg.nr_iovs = kmsg->vec.nr; 1130 arg.iovs = kmsg->vec.iovec; 1131 arg.mode |= KBUF_MODE_FREE; 1132 } 1133 1134 if (sel->val) 1135 arg.max_len = sel->val; 1136 else if (kmsg->msg.msg_inq > 1) 1137 arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq); 1138 1139 /* if mshot limited, ensure we don't go over */ 1140 if (sr->flags & IORING_RECV_MSHOT_LIM) 1141 arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len); 1142 ret = io_buffers_peek(req, &arg, sel); 1143 if (unlikely(ret < 0)) 1144 return ret; 1145 1146 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 1147 kmsg->vec.nr = ret; 1148 kmsg->vec.iovec = arg.iovs; 1149 req->flags |= REQ_F_NEED_CLEANUP; 1150 } 1151 if (arg.partial_map) 1152 sr->flags |= IORING_RECV_PARTIAL_MAP; 1153 1154 /* special case 1 vec, can be a fast path */ 1155 if (ret == 1) { 1156 sr->buf = arg.iovs[0].iov_base; 1157 sr->len = arg.iovs[0].iov_len; 1158 goto map_ubuf; 1159 } 1160 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1161 arg.out_len); 1162 } else { 1163 size_t len = sel->val; 1164 1165 *sel = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1166 if (!sel->addr) 1167 return -ENOBUFS; 1168 sr->buf = sel->addr; 1169 sr->len = len; 1170 map_ubuf: 1171 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1172 &kmsg->msg.msg_iter); 1173 if (unlikely(ret)) 1174 return ret; 1175 } 1176 1177 return 0; 1178 } 1179 1180 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1181 { 1182 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1183 struct io_async_msghdr *kmsg = req->async_data; 1184 struct io_br_sel sel; 1185 struct socket *sock; 1186 unsigned flags; 1187 int ret, min_ret = 0; 1188 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1189 bool mshot_finished; 1190 1191 if (!(req->flags & REQ_F_POLLED) && 1192 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1193 return -EAGAIN; 1194 1195 sock = sock_from_file(req->file); 1196 if (unlikely(!sock)) 1197 return -ENOTSOCK; 1198 1199 flags = sr->msg_flags; 1200 if (force_nonblock) 1201 flags |= MSG_DONTWAIT; 1202 1203 retry_multishot: 1204 sel.buf_list = NULL; 1205 if (io_do_buffer_select(req)) { 1206 sel.val = sr->len; 1207 ret = io_recv_buf_select(req, kmsg, &sel, issue_flags); 1208 if (unlikely(ret < 0)) { 1209 kmsg->msg.msg_inq = -1; 1210 goto out_free; 1211 } 1212 sr->buf = NULL; 1213 } 1214 1215 kmsg->msg.msg_flags = 0; 1216 kmsg->msg.msg_inq = -1; 1217 1218 if (flags & MSG_WAITALL) 1219 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1220 1221 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1222 if (ret < min_ret) { 1223 if (ret == -EAGAIN && force_nonblock) { 1224 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1225 return IOU_RETRY; 1226 } 1227 if (ret > 0 && io_net_retry(sock, flags)) { 1228 sr->len -= ret; 1229 sr->buf += ret; 1230 sr->done_io += ret; 1231 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 1232 } 1233 if (ret == -ERESTARTSYS) 1234 ret = -EINTR; 1235 req_set_fail(req); 1236 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1237 out_free: 1238 req_set_fail(req); 1239 } 1240 1241 mshot_finished = ret <= 0; 1242 if (ret > 0) 1243 ret += sr->done_io; 1244 else if (sr->done_io) 1245 ret = sr->done_io; 1246 else 1247 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1248 1249 sel.val = ret; 1250 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) 1251 goto retry_multishot; 1252 1253 return sel.val; 1254 } 1255 1256 int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1257 { 1258 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1259 unsigned ifq_idx; 1260 1261 if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3)) 1262 return -EINVAL; 1263 1264 ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx); 1265 zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx); 1266 if (!zc->ifq) 1267 return -EINVAL; 1268 1269 zc->len = READ_ONCE(sqe->len); 1270 zc->flags = READ_ONCE(sqe->ioprio); 1271 if (READ_ONCE(sqe->msg_flags)) 1272 return -EINVAL; 1273 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)) 1274 return -EINVAL; 1275 /* multishot required */ 1276 if (!(zc->flags & IORING_RECV_MULTISHOT)) 1277 return -EINVAL; 1278 /* All data completions are posted as aux CQEs. */ 1279 req->flags |= REQ_F_APOLL_MULTISHOT; 1280 1281 return 0; 1282 } 1283 1284 int io_recvzc(struct io_kiocb *req, unsigned int issue_flags) 1285 { 1286 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1287 struct socket *sock; 1288 unsigned int len; 1289 int ret; 1290 1291 if (!(req->flags & REQ_F_POLLED) && 1292 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1293 return -EAGAIN; 1294 1295 sock = sock_from_file(req->file); 1296 if (unlikely(!sock)) 1297 return -ENOTSOCK; 1298 1299 len = zc->len; 1300 ret = io_zcrx_recv(req, zc->ifq, sock, 0, issue_flags, &zc->len); 1301 if (len && zc->len == 0) { 1302 io_req_set_res(req, 0, 0); 1303 1304 return IOU_COMPLETE; 1305 } 1306 if (unlikely(ret <= 0) && ret != -EAGAIN) { 1307 if (ret == -ERESTARTSYS) 1308 ret = -EINTR; 1309 if (ret == IOU_REQUEUE) 1310 return IOU_REQUEUE; 1311 1312 req_set_fail(req); 1313 io_req_set_res(req, ret, 0); 1314 return IOU_COMPLETE; 1315 } 1316 return IOU_RETRY; 1317 } 1318 1319 void io_send_zc_cleanup(struct io_kiocb *req) 1320 { 1321 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1322 struct io_async_msghdr *io = req->async_data; 1323 1324 if (req_has_async_data(req)) 1325 io_netmsg_iovec_free(io); 1326 if (zc->notif) { 1327 io_notif_flush(zc->notif); 1328 zc->notif = NULL; 1329 } 1330 } 1331 1332 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1333 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \ 1334 IORING_SEND_VECTORIZED) 1335 1336 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1337 { 1338 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1339 struct io_ring_ctx *ctx = req->ctx; 1340 struct io_async_msghdr *iomsg; 1341 struct io_kiocb *notif; 1342 u64 user_data; 1343 int ret; 1344 1345 zc->done_io = 0; 1346 1347 if (unlikely(READ_ONCE(sqe->__pad2[0]))) 1348 return -EINVAL; 1349 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1350 if (req->flags & REQ_F_CQE_SKIP) 1351 return -EINVAL; 1352 1353 notif = zc->notif = io_alloc_notif(ctx); 1354 if (!notif) 1355 return -ENOMEM; 1356 user_data = READ_ONCE(sqe->addr3); 1357 if (!user_data) 1358 user_data = req->cqe.user_data; 1359 1360 notif->cqe.user_data = user_data; 1361 notif->cqe.res = 0; 1362 notif->cqe.flags = IORING_CQE_F_NOTIF; 1363 req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY; 1364 1365 zc->flags = READ_ONCE(sqe->ioprio); 1366 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1367 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1368 return -EINVAL; 1369 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1370 struct io_notif_data *nd = io_notif_to_data(notif); 1371 1372 nd->zc_report = true; 1373 nd->zc_used = false; 1374 nd->zc_copied = false; 1375 } 1376 } 1377 1378 zc->len = READ_ONCE(sqe->len); 1379 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1380 req->buf_index = READ_ONCE(sqe->buf_index); 1381 if (zc->msg_flags & MSG_DONTWAIT) 1382 req->flags |= REQ_F_NOWAIT; 1383 1384 if (io_is_compat(ctx)) 1385 zc->msg_flags |= MSG_CMSG_COMPAT; 1386 1387 iomsg = io_msg_alloc_async(req); 1388 if (unlikely(!iomsg)) 1389 return -ENOMEM; 1390 1391 if (req->opcode == IORING_OP_SEND_ZC) { 1392 ret = io_send_setup(req, sqe); 1393 } else { 1394 if (unlikely(sqe->addr2 || sqe->file_index)) 1395 return -EINVAL; 1396 ret = io_sendmsg_setup(req, sqe); 1397 } 1398 if (unlikely(ret)) 1399 return ret; 1400 1401 if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { 1402 iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1403 return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count); 1404 } 1405 iomsg->msg.sg_from_iter = io_sg_from_iter; 1406 return 0; 1407 } 1408 1409 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1410 struct iov_iter *from, size_t length) 1411 { 1412 skb_zcopy_downgrade_managed(skb); 1413 return zerocopy_fill_skb_from_iter(skb, from, length); 1414 } 1415 1416 static int io_sg_from_iter(struct sk_buff *skb, 1417 struct iov_iter *from, size_t length) 1418 { 1419 struct skb_shared_info *shinfo = skb_shinfo(skb); 1420 int frag = shinfo->nr_frags; 1421 int ret = 0; 1422 struct bvec_iter bi; 1423 ssize_t copied = 0; 1424 unsigned long truesize = 0; 1425 1426 if (!frag) 1427 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1428 else if (unlikely(!skb_zcopy_managed(skb))) 1429 return zerocopy_fill_skb_from_iter(skb, from, length); 1430 1431 bi.bi_size = min(from->count, length); 1432 bi.bi_bvec_done = from->iov_offset; 1433 bi.bi_idx = 0; 1434 1435 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1436 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1437 1438 copied += v.bv_len; 1439 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1440 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1441 v.bv_offset, v.bv_len); 1442 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1443 } 1444 if (bi.bi_size) 1445 ret = -EMSGSIZE; 1446 1447 shinfo->nr_frags = frag; 1448 from->bvec += bi.bi_idx; 1449 from->nr_segs -= bi.bi_idx; 1450 from->count -= copied; 1451 from->iov_offset = bi.bi_bvec_done; 1452 1453 skb->data_len += copied; 1454 skb->len += copied; 1455 skb->truesize += truesize; 1456 return ret; 1457 } 1458 1459 static int io_send_zc_import(struct io_kiocb *req, 1460 struct io_async_msghdr *kmsg, 1461 unsigned int issue_flags) 1462 { 1463 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1464 struct io_kiocb *notif = sr->notif; 1465 int ret; 1466 1467 WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF)); 1468 1469 notif->buf_index = req->buf_index; 1470 1471 if (!(sr->flags & IORING_SEND_VECTORIZED)) { 1472 ret = io_import_reg_buf(notif, &kmsg->msg.msg_iter, 1473 (u64)(uintptr_t)sr->buf, sr->len, 1474 ITER_SOURCE, issue_flags); 1475 } else { 1476 unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; 1477 1478 ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, 1479 notif, &kmsg->vec, uvec_segs, 1480 issue_flags); 1481 } 1482 1483 if (unlikely(ret)) 1484 return ret; 1485 req->flags &= ~REQ_F_IMPORT_BUFFER; 1486 return 0; 1487 } 1488 1489 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1490 { 1491 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1492 struct io_async_msghdr *kmsg = req->async_data; 1493 struct socket *sock; 1494 unsigned msg_flags; 1495 int ret, min_ret = 0; 1496 1497 sock = sock_from_file(req->file); 1498 if (unlikely(!sock)) 1499 return -ENOTSOCK; 1500 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1501 return -EOPNOTSUPP; 1502 if (!(req->flags & REQ_F_POLLED) && 1503 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1504 return -EAGAIN; 1505 1506 if (req->flags & REQ_F_IMPORT_BUFFER) { 1507 ret = io_send_zc_import(req, kmsg, issue_flags); 1508 if (unlikely(ret)) 1509 return ret; 1510 } 1511 1512 msg_flags = sr->msg_flags; 1513 if (issue_flags & IO_URING_F_NONBLOCK) 1514 msg_flags |= MSG_DONTWAIT; 1515 if (msg_flags & MSG_WAITALL) 1516 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1517 1518 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1519 1520 if (req->opcode == IORING_OP_SEND_ZC) { 1521 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1522 kmsg->msg.msg_flags = msg_flags; 1523 ret = sock_sendmsg(sock, &kmsg->msg); 1524 } else { 1525 kmsg->msg.msg_control_user = sr->msg_control; 1526 ret = __sys_sendmsg_sock(sock, &kmsg->msg, msg_flags); 1527 } 1528 1529 if (unlikely(ret < min_ret)) { 1530 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1531 return -EAGAIN; 1532 1533 if (ret > 0 && io_net_retry(sock, sr->msg_flags)) { 1534 sr->done_io += ret; 1535 return -EAGAIN; 1536 } 1537 if (ret == -ERESTARTSYS) 1538 ret = -EINTR; 1539 req_set_fail(req); 1540 } 1541 1542 if (ret >= 0) 1543 ret += sr->done_io; 1544 else if (sr->done_io) 1545 ret = sr->done_io; 1546 1547 /* 1548 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1549 * flushing notif to io_send_zc_cleanup() 1550 */ 1551 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1552 io_notif_flush(sr->notif); 1553 sr->notif = NULL; 1554 io_req_msg_cleanup(req, 0); 1555 } 1556 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1557 return IOU_COMPLETE; 1558 } 1559 1560 void io_sendrecv_fail(struct io_kiocb *req) 1561 { 1562 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1563 1564 if (sr->done_io) 1565 req->cqe.res = sr->done_io; 1566 1567 if ((req->flags & REQ_F_NEED_CLEANUP) && 1568 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1569 req->cqe.flags |= IORING_CQE_F_MORE; 1570 } 1571 1572 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1573 IORING_ACCEPT_POLL_FIRST) 1574 1575 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1576 { 1577 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1578 1579 if (sqe->len || sqe->buf_index) 1580 return -EINVAL; 1581 1582 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1583 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1584 accept->flags = READ_ONCE(sqe->accept_flags); 1585 accept->nofile = rlimit(RLIMIT_NOFILE); 1586 accept->iou_flags = READ_ONCE(sqe->ioprio); 1587 if (accept->iou_flags & ~ACCEPT_FLAGS) 1588 return -EINVAL; 1589 1590 accept->file_slot = READ_ONCE(sqe->file_index); 1591 if (accept->file_slot) { 1592 if (accept->flags & SOCK_CLOEXEC) 1593 return -EINVAL; 1594 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1595 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1596 return -EINVAL; 1597 } 1598 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1599 return -EINVAL; 1600 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1601 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1602 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1603 req->flags |= REQ_F_APOLL_MULTISHOT; 1604 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1605 req->flags |= REQ_F_NOWAIT; 1606 return 0; 1607 } 1608 1609 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1610 { 1611 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1612 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1613 bool fixed = !!accept->file_slot; 1614 struct proto_accept_arg arg = { 1615 .flags = force_nonblock ? O_NONBLOCK : 0, 1616 }; 1617 struct file *file; 1618 unsigned cflags; 1619 int ret, fd; 1620 1621 if (!(req->flags & REQ_F_POLLED) && 1622 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1623 return -EAGAIN; 1624 1625 retry: 1626 if (!fixed) { 1627 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1628 if (unlikely(fd < 0)) 1629 return fd; 1630 } 1631 arg.err = 0; 1632 arg.is_empty = -1; 1633 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1634 accept->flags); 1635 if (IS_ERR(file)) { 1636 if (!fixed) 1637 put_unused_fd(fd); 1638 ret = PTR_ERR(file); 1639 if (ret == -EAGAIN && force_nonblock && 1640 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) 1641 return IOU_RETRY; 1642 1643 if (ret == -ERESTARTSYS) 1644 ret = -EINTR; 1645 } else if (!fixed) { 1646 fd_install(fd, file); 1647 ret = fd; 1648 } else { 1649 ret = io_fixed_fd_install(req, issue_flags, file, 1650 accept->file_slot); 1651 } 1652 1653 cflags = 0; 1654 if (!arg.is_empty) 1655 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1656 1657 if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) && 1658 io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1659 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1660 goto retry; 1661 return IOU_RETRY; 1662 } 1663 1664 io_req_set_res(req, ret, cflags); 1665 if (ret < 0) 1666 req_set_fail(req); 1667 return IOU_COMPLETE; 1668 } 1669 1670 void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req) 1671 { 1672 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1673 1674 bctx->socket.family = sock->domain; 1675 bctx->socket.type = sock->type; 1676 bctx->socket.protocol = sock->protocol; 1677 } 1678 1679 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1680 { 1681 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1682 1683 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1684 return -EINVAL; 1685 1686 sock->domain = READ_ONCE(sqe->fd); 1687 sock->type = READ_ONCE(sqe->off); 1688 sock->protocol = READ_ONCE(sqe->len); 1689 sock->file_slot = READ_ONCE(sqe->file_index); 1690 sock->nofile = rlimit(RLIMIT_NOFILE); 1691 1692 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1693 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1694 return -EINVAL; 1695 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1696 return -EINVAL; 1697 return 0; 1698 } 1699 1700 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1701 { 1702 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1703 bool fixed = !!sock->file_slot; 1704 struct file *file; 1705 int ret, fd; 1706 1707 if (!fixed) { 1708 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1709 if (unlikely(fd < 0)) 1710 return fd; 1711 } 1712 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1713 if (IS_ERR(file)) { 1714 if (!fixed) 1715 put_unused_fd(fd); 1716 ret = PTR_ERR(file); 1717 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1718 return -EAGAIN; 1719 if (ret == -ERESTARTSYS) 1720 ret = -EINTR; 1721 req_set_fail(req); 1722 } else if (!fixed) { 1723 fd_install(fd, file); 1724 ret = fd; 1725 } else { 1726 ret = io_fixed_fd_install(req, issue_flags, file, 1727 sock->file_slot); 1728 } 1729 io_req_set_res(req, ret, 0); 1730 return IOU_COMPLETE; 1731 } 1732 1733 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1734 { 1735 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1736 struct io_async_msghdr *io; 1737 1738 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1739 return -EINVAL; 1740 1741 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1742 conn->addr_len = READ_ONCE(sqe->addr2); 1743 conn->in_progress = conn->seen_econnaborted = false; 1744 1745 io = io_msg_alloc_async(req); 1746 if (unlikely(!io)) 1747 return -ENOMEM; 1748 1749 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1750 } 1751 1752 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1753 { 1754 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1755 struct io_async_msghdr *io = req->async_data; 1756 unsigned file_flags; 1757 int ret; 1758 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1759 1760 if (connect->in_progress) { 1761 struct poll_table_struct pt = { ._key = EPOLLERR }; 1762 1763 if (vfs_poll(req->file, &pt) & EPOLLERR) 1764 goto get_sock_err; 1765 } 1766 1767 file_flags = force_nonblock ? O_NONBLOCK : 0; 1768 1769 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1770 file_flags); 1771 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1772 && force_nonblock) { 1773 if (ret == -EINPROGRESS) { 1774 connect->in_progress = true; 1775 } else if (ret == -ECONNABORTED) { 1776 if (connect->seen_econnaborted) 1777 goto out; 1778 connect->seen_econnaborted = true; 1779 } 1780 return -EAGAIN; 1781 } 1782 if (connect->in_progress) { 1783 /* 1784 * At least bluetooth will return -EBADFD on a re-connect 1785 * attempt, and it's (supposedly) also valid to get -EISCONN 1786 * which means the previous result is good. For both of these, 1787 * grab the sock_error() and use that for the completion. 1788 */ 1789 if (ret == -EBADFD || ret == -EISCONN) { 1790 get_sock_err: 1791 ret = sock_error(sock_from_file(req->file)->sk); 1792 } 1793 } 1794 if (ret == -ERESTARTSYS) 1795 ret = -EINTR; 1796 out: 1797 if (ret < 0) 1798 req_set_fail(req); 1799 io_req_msg_cleanup(req, issue_flags); 1800 io_req_set_res(req, ret, 0); 1801 return IOU_COMPLETE; 1802 } 1803 1804 /* 1805 * Check if bind request would potentially end up with filename_create(), 1806 * which in turn end up in mnt_want_write() which will grab the fs 1807 * percpu start write sem. This can trigger a lockdep warning. 1808 */ 1809 static int io_bind_file_create(const struct io_async_msghdr *io, int addr_len) 1810 { 1811 const struct sockaddr_un *sun; 1812 1813 if (io->addr.ss_family != AF_UNIX) 1814 return 0; 1815 if (addr_len <= offsetof(struct sockaddr_un, sun_path)) 1816 return 0; 1817 sun = (const struct sockaddr_un *) &io->addr; 1818 return sun->sun_path[0] != '\0'; 1819 } 1820 1821 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1822 { 1823 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1824 struct sockaddr __user *uaddr; 1825 struct io_async_msghdr *io; 1826 int ret; 1827 1828 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1829 return -EINVAL; 1830 1831 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1832 bind->addr_len = READ_ONCE(sqe->addr2); 1833 1834 io = io_msg_alloc_async(req); 1835 if (unlikely(!io)) 1836 return -ENOMEM; 1837 ret = move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1838 if (unlikely(ret)) 1839 return ret; 1840 if (io_bind_file_create(io, bind->addr_len)) 1841 req->flags |= REQ_F_FORCE_ASYNC; 1842 return 0; 1843 } 1844 1845 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1846 { 1847 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1848 struct io_async_msghdr *io = req->async_data; 1849 struct socket *sock; 1850 int ret; 1851 1852 sock = sock_from_file(req->file); 1853 if (unlikely(!sock)) 1854 return -ENOTSOCK; 1855 1856 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1857 if (ret < 0) 1858 req_set_fail(req); 1859 io_req_set_res(req, ret, 0); 1860 return 0; 1861 } 1862 1863 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1864 { 1865 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1866 1867 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1868 return -EINVAL; 1869 1870 listen->backlog = READ_ONCE(sqe->len); 1871 return 0; 1872 } 1873 1874 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1875 { 1876 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1877 struct socket *sock; 1878 int ret; 1879 1880 sock = sock_from_file(req->file); 1881 if (unlikely(!sock)) 1882 return -ENOTSOCK; 1883 1884 ret = __sys_listen_socket(sock, listen->backlog); 1885 if (ret < 0) 1886 req_set_fail(req); 1887 io_req_set_res(req, ret, 0); 1888 return 0; 1889 } 1890 1891 void io_netmsg_cache_free(const void *entry) 1892 { 1893 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1894 1895 io_vec_free(&kmsg->vec); 1896 kfree(kmsg); 1897 } 1898