1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "filetable.h" 14 #include "io_uring.h" 15 #include "kbuf.h" 16 #include "alloc_cache.h" 17 #include "net.h" 18 #include "notif.h" 19 #include "rsrc.h" 20 #include "zcrx.h" 21 22 struct io_shutdown { 23 struct file *file; 24 int how; 25 }; 26 27 struct io_accept { 28 struct file *file; 29 struct sockaddr __user *addr; 30 int __user *addr_len; 31 int flags; 32 int iou_flags; 33 u32 file_slot; 34 unsigned long nofile; 35 }; 36 37 struct io_socket { 38 struct file *file; 39 int domain; 40 int type; 41 int protocol; 42 int flags; 43 u32 file_slot; 44 unsigned long nofile; 45 }; 46 47 struct io_connect { 48 struct file *file; 49 struct sockaddr __user *addr; 50 int addr_len; 51 bool in_progress; 52 bool seen_econnaborted; 53 }; 54 55 struct io_bind { 56 struct file *file; 57 int addr_len; 58 }; 59 60 struct io_listen { 61 struct file *file; 62 int backlog; 63 }; 64 65 struct io_sr_msg { 66 struct file *file; 67 union { 68 struct compat_msghdr __user *umsg_compat; 69 struct user_msghdr __user *umsg; 70 void __user *buf; 71 }; 72 int len; 73 unsigned done_io; 74 unsigned msg_flags; 75 unsigned nr_multishot_loops; 76 u16 flags; 77 /* initialised and used only by !msg send variants */ 78 u16 buf_group; 79 /* per-invocation mshot limit */ 80 unsigned mshot_len; 81 /* overall mshot byte limit */ 82 unsigned mshot_total_len; 83 void __user *msg_control; 84 /* used only for send zerocopy */ 85 struct io_kiocb *notif; 86 }; 87 88 /* 89 * The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold 90 * anyway. Use the upper 8 bits for internal uses. 91 */ 92 enum sr_retry_flags { 93 IORING_RECV_RETRY = (1U << 15), 94 IORING_RECV_PARTIAL_MAP = (1U << 14), 95 IORING_RECV_MSHOT_CAP = (1U << 13), 96 IORING_RECV_MSHOT_LIM = (1U << 12), 97 IORING_RECV_MSHOT_DONE = (1U << 11), 98 99 IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP, 100 IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP | 101 IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE, 102 }; 103 104 /* 105 * Number of times we'll try and do receives if there's more data. If we 106 * exceed this limit, then add us to the back of the queue and retry from 107 * there. This helps fairness between flooding clients. 108 */ 109 #define MULTISHOT_MAX_RETRY 32 110 111 struct io_recvzc { 112 struct file *file; 113 u16 flags; 114 u32 len; 115 struct io_zcrx_ifq *ifq; 116 }; 117 118 static int io_sg_from_iter_iovec(struct sk_buff *skb, 119 struct iov_iter *from, size_t length); 120 static int io_sg_from_iter(struct sk_buff *skb, 121 struct iov_iter *from, size_t length); 122 123 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 124 { 125 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 126 127 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 128 sqe->buf_index || sqe->splice_fd_in)) 129 return -EINVAL; 130 131 shutdown->how = READ_ONCE(sqe->len); 132 req->flags |= REQ_F_FORCE_ASYNC; 133 return 0; 134 } 135 136 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 137 { 138 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 139 struct socket *sock; 140 int ret; 141 142 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 143 144 sock = sock_from_file(req->file); 145 if (unlikely(!sock)) 146 return -ENOTSOCK; 147 148 ret = __sys_shutdown_sock(sock, shutdown->how); 149 io_req_set_res(req, ret, 0); 150 return IOU_COMPLETE; 151 } 152 153 static bool io_net_retry(struct socket *sock, int flags) 154 { 155 if (!(flags & MSG_WAITALL)) 156 return false; 157 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 158 } 159 160 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 161 { 162 if (kmsg->vec.iovec) 163 io_vec_free(&kmsg->vec); 164 } 165 166 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 167 { 168 struct io_async_msghdr *hdr = req->async_data; 169 170 /* can't recycle, ensure we free the iovec if we have one */ 171 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 172 io_netmsg_iovec_free(hdr); 173 return; 174 } 175 176 /* Let normal cleanup path reap it if we fail adding to the cache */ 177 io_alloc_cache_vec_kasan(&hdr->vec); 178 if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP) 179 io_vec_free(&hdr->vec); 180 181 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) 182 io_req_async_data_clear(req, REQ_F_NEED_CLEANUP); 183 } 184 185 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 186 { 187 struct io_ring_ctx *ctx = req->ctx; 188 struct io_async_msghdr *hdr; 189 190 hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); 191 if (!hdr) 192 return NULL; 193 194 /* If the async data was cached, we might have an iov cached inside. */ 195 if (hdr->vec.iovec) 196 req->flags |= REQ_F_NEED_CLEANUP; 197 return hdr; 198 } 199 200 static inline void io_mshot_prep_retry(struct io_kiocb *req, 201 struct io_async_msghdr *kmsg) 202 { 203 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 204 205 req->flags &= ~REQ_F_BL_EMPTY; 206 sr->done_io = 0; 207 sr->flags &= ~IORING_RECV_RETRY_CLEAR; 208 sr->len = sr->mshot_len; 209 } 210 211 static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg, 212 const struct iovec __user *uiov, unsigned uvec_seg, 213 int ddir) 214 { 215 struct iovec *iov; 216 int ret, nr_segs; 217 218 if (iomsg->vec.iovec) { 219 nr_segs = iomsg->vec.nr; 220 iov = iomsg->vec.iovec; 221 } else { 222 nr_segs = 1; 223 iov = &iomsg->fast_iov; 224 } 225 226 ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov, 227 &iomsg->msg.msg_iter, io_is_compat(req->ctx)); 228 if (unlikely(ret < 0)) 229 return ret; 230 231 if (iov) { 232 req->flags |= REQ_F_NEED_CLEANUP; 233 io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs); 234 } 235 return 0; 236 } 237 238 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 239 struct io_async_msghdr *iomsg, 240 struct compat_msghdr *msg, int ddir, 241 struct sockaddr __user **save_addr) 242 { 243 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 244 struct compat_iovec __user *uiov; 245 int ret; 246 247 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 248 return -EFAULT; 249 250 ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr); 251 if (ret) 252 return ret; 253 254 uiov = compat_ptr(msg->msg_iov); 255 if (req->flags & REQ_F_BUFFER_SELECT) { 256 if (msg->msg_iovlen == 0) { 257 sr->len = 0; 258 } else if (msg->msg_iovlen > 1) { 259 return -EINVAL; 260 } else { 261 struct compat_iovec tmp_iov; 262 263 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 264 return -EFAULT; 265 sr->len = tmp_iov.iov_len; 266 } 267 } 268 return 0; 269 } 270 271 static int io_copy_msghdr_from_user(struct user_msghdr *msg, 272 struct user_msghdr __user *umsg) 273 { 274 if (!user_access_begin(umsg, sizeof(*umsg))) 275 return -EFAULT; 276 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end); 277 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end); 278 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end); 279 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end); 280 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end); 281 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end); 282 user_access_end(); 283 return 0; 284 ua_end: 285 user_access_end(); 286 return -EFAULT; 287 } 288 289 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 290 struct user_msghdr *msg, int ddir, 291 struct sockaddr __user **save_addr) 292 { 293 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 294 struct user_msghdr __user *umsg = sr->umsg; 295 int ret; 296 297 iomsg->msg.msg_name = &iomsg->addr; 298 iomsg->msg.msg_iter.nr_segs = 0; 299 300 if (io_is_compat(req->ctx)) { 301 struct compat_msghdr cmsg; 302 303 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr); 304 if (ret) 305 return ret; 306 307 memset(msg, 0, sizeof(*msg)); 308 msg->msg_namelen = cmsg.msg_namelen; 309 msg->msg_controllen = cmsg.msg_controllen; 310 msg->msg_iov = compat_ptr(cmsg.msg_iov); 311 msg->msg_iovlen = cmsg.msg_iovlen; 312 return 0; 313 } 314 315 ret = io_copy_msghdr_from_user(msg, umsg); 316 if (unlikely(ret)) 317 return ret; 318 319 msg->msg_flags = 0; 320 321 ret = __copy_msghdr(&iomsg->msg, msg, save_addr); 322 if (ret) 323 return ret; 324 325 if (req->flags & REQ_F_BUFFER_SELECT) { 326 if (msg->msg_iovlen == 0) { 327 sr->len = 0; 328 } else if (msg->msg_iovlen > 1) { 329 return -EINVAL; 330 } else { 331 struct iovec __user *uiov = msg->msg_iov; 332 struct iovec tmp_iov; 333 334 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov))) 335 return -EFAULT; 336 sr->len = tmp_iov.iov_len; 337 } 338 } 339 return 0; 340 } 341 342 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 343 { 344 struct io_async_msghdr *io = req->async_data; 345 346 io_netmsg_iovec_free(io); 347 } 348 349 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 350 { 351 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 352 struct io_async_msghdr *kmsg = req->async_data; 353 void __user *addr; 354 u16 addr_len; 355 int ret; 356 357 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 358 359 if (READ_ONCE(sqe->__pad3[0])) 360 return -EINVAL; 361 362 kmsg->msg.msg_name = NULL; 363 kmsg->msg.msg_namelen = 0; 364 kmsg->msg.msg_control = NULL; 365 kmsg->msg.msg_controllen = 0; 366 kmsg->msg.msg_ubuf = NULL; 367 368 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 369 addr_len = READ_ONCE(sqe->addr_len); 370 if (addr) { 371 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr); 372 if (unlikely(ret < 0)) 373 return ret; 374 kmsg->msg.msg_name = &kmsg->addr; 375 kmsg->msg.msg_namelen = addr_len; 376 } 377 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 378 if (!(sr->flags & IORING_SEND_VECTORIZED)) { 379 req->flags |= REQ_F_IMPORT_BUFFER; 380 return 0; 381 } 382 383 kmsg->msg.msg_iter.nr_segs = sr->len; 384 return io_prep_reg_iovec(req, &kmsg->vec, sr->buf, sr->len); 385 } 386 if (req->flags & REQ_F_BUFFER_SELECT) 387 return 0; 388 389 if (sr->flags & IORING_SEND_VECTORIZED) 390 return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); 391 392 return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 393 } 394 395 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) 396 { 397 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 398 struct io_async_msghdr *kmsg = req->async_data; 399 struct user_msghdr msg; 400 int ret; 401 402 sr->flags |= IORING_SEND_VECTORIZED; 403 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 404 ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); 405 if (unlikely(ret)) 406 return ret; 407 /* save msg_control as sys_sendmsg() overwrites it */ 408 sr->msg_control = kmsg->msg.msg_control_user; 409 410 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 411 kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; 412 return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, 413 msg.msg_iovlen); 414 } 415 if (req->flags & REQ_F_BUFFER_SELECT) 416 return 0; 417 return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); 418 } 419 420 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | IORING_SEND_VECTORIZED) 421 422 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 423 { 424 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 425 426 sr->done_io = 0; 427 sr->len = READ_ONCE(sqe->len); 428 if (unlikely(sr->len < 0)) 429 return -EINVAL; 430 sr->flags = READ_ONCE(sqe->ioprio); 431 if (sr->flags & ~SENDMSG_FLAGS) 432 return -EINVAL; 433 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 434 if (sr->msg_flags & MSG_DONTWAIT) 435 req->flags |= REQ_F_NOWAIT; 436 if (req->flags & REQ_F_BUFFER_SELECT) 437 sr->buf_group = req->buf_index; 438 if (sr->flags & IORING_RECVSEND_BUNDLE) { 439 if (req->opcode == IORING_OP_SENDMSG) 440 return -EINVAL; 441 sr->msg_flags |= MSG_WAITALL; 442 req->flags |= REQ_F_MULTISHOT; 443 } 444 445 if (io_is_compat(req->ctx)) 446 sr->msg_flags |= MSG_CMSG_COMPAT; 447 448 if (unlikely(!io_msg_alloc_async(req))) 449 return -ENOMEM; 450 if (req->opcode != IORING_OP_SENDMSG) 451 return io_send_setup(req, sqe); 452 if (unlikely(sqe->addr2 || sqe->file_index)) 453 return -EINVAL; 454 return io_sendmsg_setup(req, sqe); 455 } 456 457 static void io_req_msg_cleanup(struct io_kiocb *req, 458 unsigned int issue_flags) 459 { 460 io_netmsg_recycle(req, issue_flags); 461 } 462 463 /* 464 * For bundle completions, we need to figure out how many segments we consumed. 465 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 466 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 467 * the segments, then it's a trivial questiont o answer. If we have residual 468 * data in the iter, then loop the segments to figure out how much we 469 * transferred. 470 */ 471 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 472 { 473 struct iovec *iov; 474 int nbufs; 475 476 /* no data is always zero segments, and a ubuf is always 1 segment */ 477 if (ret <= 0) 478 return 0; 479 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 480 return 1; 481 482 iov = kmsg->vec.iovec; 483 if (!iov) 484 iov = &kmsg->fast_iov; 485 486 /* if all data was transferred, it's basic pointer math */ 487 if (!iov_iter_count(&kmsg->msg.msg_iter)) 488 return iter_iov(&kmsg->msg.msg_iter) - iov; 489 490 /* short transfer, count segments */ 491 nbufs = 0; 492 do { 493 int this_len = min_t(int, iov[nbufs].iov_len, ret); 494 495 nbufs++; 496 ret -= this_len; 497 } while (ret); 498 499 return nbufs; 500 } 501 502 static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl, 503 struct io_async_msghdr *kmsg, int len) 504 { 505 req->flags |= REQ_F_BL_NO_RECYCLE; 506 if (req->flags & REQ_F_BUFFERS_COMMIT) 507 io_kbuf_commit(req, bl, len, io_bundle_nbufs(kmsg, len)); 508 return IOU_RETRY; 509 } 510 511 static inline bool io_send_finish(struct io_kiocb *req, 512 struct io_async_msghdr *kmsg, 513 struct io_br_sel *sel) 514 { 515 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 516 bool bundle_finished = sel->val <= 0; 517 unsigned int cflags; 518 519 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 520 cflags = io_put_kbuf(req, sel->val, sel->buf_list); 521 goto finish; 522 } 523 524 cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val)); 525 526 /* 527 * Don't start new bundles if the buffer list is empty, or if the 528 * current operation needed to go through polling to complete. 529 */ 530 if (bundle_finished || req->flags & (REQ_F_BL_EMPTY | REQ_F_POLLED)) 531 goto finish; 532 533 /* 534 * Fill CQE for this receive and see if we should keep trying to 535 * receive from this socket. 536 */ 537 if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { 538 io_mshot_prep_retry(req, kmsg); 539 return false; 540 } 541 542 /* Otherwise stop bundle and use the current result. */ 543 finish: 544 io_req_set_res(req, sel->val, cflags); 545 sel->val = IOU_COMPLETE; 546 return true; 547 } 548 549 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 550 { 551 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 552 struct io_async_msghdr *kmsg = req->async_data; 553 struct socket *sock; 554 unsigned flags; 555 int min_ret = 0; 556 int ret; 557 558 sock = sock_from_file(req->file); 559 if (unlikely(!sock)) 560 return -ENOTSOCK; 561 562 if (!(req->flags & REQ_F_POLLED) && 563 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 564 return -EAGAIN; 565 566 flags = sr->msg_flags; 567 if (issue_flags & IO_URING_F_NONBLOCK) 568 flags |= MSG_DONTWAIT; 569 if (flags & MSG_WAITALL) 570 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 571 572 kmsg->msg.msg_control_user = sr->msg_control; 573 574 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 575 576 if (ret < min_ret) { 577 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 578 return -EAGAIN; 579 if (ret > 0 && io_net_retry(sock, flags)) { 580 kmsg->msg.msg_controllen = 0; 581 kmsg->msg.msg_control = NULL; 582 sr->done_io += ret; 583 return -EAGAIN; 584 } 585 if (ret == -ERESTARTSYS) 586 ret = -EINTR; 587 req_set_fail(req); 588 } 589 io_req_msg_cleanup(req, issue_flags); 590 if (ret >= 0) 591 ret += sr->done_io; 592 else if (sr->done_io) 593 ret = sr->done_io; 594 io_req_set_res(req, ret, 0); 595 return IOU_COMPLETE; 596 } 597 598 static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, 599 struct io_br_sel *sel, struct io_async_msghdr *kmsg) 600 { 601 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 602 struct buf_sel_arg arg = { 603 .iovs = &kmsg->fast_iov, 604 .max_len = min_not_zero(sr->len, INT_MAX), 605 .nr_iovs = 1, 606 .buf_group = sr->buf_group, 607 }; 608 int ret; 609 610 if (kmsg->vec.iovec) { 611 arg.nr_iovs = kmsg->vec.nr; 612 arg.iovs = kmsg->vec.iovec; 613 arg.mode = KBUF_MODE_FREE; 614 } 615 616 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 617 arg.nr_iovs = 1; 618 else 619 arg.mode |= KBUF_MODE_EXPAND; 620 621 ret = io_buffers_select(req, &arg, sel, issue_flags); 622 if (unlikely(ret < 0)) 623 return ret; 624 625 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 626 kmsg->vec.nr = ret; 627 kmsg->vec.iovec = arg.iovs; 628 req->flags |= REQ_F_NEED_CLEANUP; 629 } 630 sr->len = arg.out_len; 631 632 if (ret == 1) { 633 sr->buf = arg.iovs[0].iov_base; 634 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 635 &kmsg->msg.msg_iter); 636 if (unlikely(ret)) 637 return ret; 638 } else { 639 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 640 arg.iovs, ret, arg.out_len); 641 } 642 643 return 0; 644 } 645 646 int io_send(struct io_kiocb *req, unsigned int issue_flags) 647 { 648 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 649 struct io_async_msghdr *kmsg = req->async_data; 650 struct io_br_sel sel = { }; 651 struct socket *sock; 652 unsigned flags; 653 int min_ret = 0; 654 int ret; 655 656 sock = sock_from_file(req->file); 657 if (unlikely(!sock)) 658 return -ENOTSOCK; 659 660 if (!(req->flags & REQ_F_POLLED) && 661 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 662 return -EAGAIN; 663 664 flags = sr->msg_flags; 665 if (issue_flags & IO_URING_F_NONBLOCK) 666 flags |= MSG_DONTWAIT; 667 668 retry_bundle: 669 sel.buf_list = NULL; 670 if (io_do_buffer_select(req)) { 671 ret = io_send_select_buffer(req, issue_flags, &sel, kmsg); 672 if (ret) 673 return ret; 674 } 675 676 /* 677 * If MSG_WAITALL is set, or this is a bundle send, then we need 678 * the full amount. If just bundle is set, if we do a short send 679 * then we complete the bundle sequence rather than continue on. 680 */ 681 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 682 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 683 684 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 685 kmsg->msg.msg_flags = flags; 686 ret = sock_sendmsg(sock, &kmsg->msg); 687 if (ret < min_ret) { 688 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 689 return -EAGAIN; 690 691 if (ret > 0 && io_net_retry(sock, flags)) { 692 sr->len -= ret; 693 sr->buf += ret; 694 sr->done_io += ret; 695 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 696 } 697 if (ret == -ERESTARTSYS) 698 ret = -EINTR; 699 req_set_fail(req); 700 } 701 if (ret >= 0) 702 ret += sr->done_io; 703 else if (sr->done_io) 704 ret = sr->done_io; 705 706 sel.val = ret; 707 if (!io_send_finish(req, kmsg, &sel)) 708 goto retry_bundle; 709 710 io_req_msg_cleanup(req, issue_flags); 711 return sel.val; 712 } 713 714 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 715 struct io_async_msghdr *iomsg, 716 int namelen, size_t controllen) 717 { 718 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 719 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 720 int hdr; 721 722 if (unlikely(namelen < 0)) 723 return -EOVERFLOW; 724 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 725 namelen, &hdr)) 726 return -EOVERFLOW; 727 if (check_add_overflow(hdr, controllen, &hdr)) 728 return -EOVERFLOW; 729 730 iomsg->namelen = namelen; 731 iomsg->controllen = controllen; 732 return 0; 733 } 734 735 return 0; 736 } 737 738 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 739 struct io_async_msghdr *iomsg) 740 { 741 struct user_msghdr msg; 742 int ret; 743 744 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr); 745 if (unlikely(ret)) 746 return ret; 747 748 if (!(req->flags & REQ_F_BUFFER_SELECT)) { 749 ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen, 750 ITER_DEST); 751 if (unlikely(ret)) 752 return ret; 753 } 754 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 755 msg.msg_controllen); 756 } 757 758 static int io_recvmsg_prep_setup(struct io_kiocb *req) 759 { 760 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 761 struct io_async_msghdr *kmsg; 762 763 kmsg = io_msg_alloc_async(req); 764 if (unlikely(!kmsg)) 765 return -ENOMEM; 766 767 if (req->opcode == IORING_OP_RECV) { 768 kmsg->msg.msg_name = NULL; 769 kmsg->msg.msg_namelen = 0; 770 kmsg->msg.msg_inq = 0; 771 kmsg->msg.msg_control = NULL; 772 kmsg->msg.msg_get_inq = 1; 773 kmsg->msg.msg_controllen = 0; 774 kmsg->msg.msg_iocb = NULL; 775 kmsg->msg.msg_ubuf = NULL; 776 777 if (req->flags & REQ_F_BUFFER_SELECT) 778 return 0; 779 return import_ubuf(ITER_DEST, sr->buf, sr->len, 780 &kmsg->msg.msg_iter); 781 } 782 783 return io_recvmsg_copy_hdr(req, kmsg); 784 } 785 786 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 787 IORING_RECVSEND_BUNDLE) 788 789 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 790 { 791 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 792 793 sr->done_io = 0; 794 795 if (unlikely(sqe->addr2)) 796 return -EINVAL; 797 798 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 799 sr->len = READ_ONCE(sqe->len); 800 if (unlikely(sr->len < 0)) 801 return -EINVAL; 802 sr->flags = READ_ONCE(sqe->ioprio); 803 if (sr->flags & ~RECVMSG_FLAGS) 804 return -EINVAL; 805 sr->msg_flags = READ_ONCE(sqe->msg_flags); 806 if (sr->msg_flags & MSG_DONTWAIT) 807 req->flags |= REQ_F_NOWAIT; 808 if (sr->msg_flags & MSG_ERRQUEUE) 809 req->flags |= REQ_F_CLEAR_POLLIN; 810 if (req->flags & REQ_F_BUFFER_SELECT) 811 sr->buf_group = req->buf_index; 812 sr->mshot_total_len = sr->mshot_len = 0; 813 if (sr->flags & IORING_RECV_MULTISHOT) { 814 if (!(req->flags & REQ_F_BUFFER_SELECT)) 815 return -EINVAL; 816 if (sr->msg_flags & MSG_WAITALL) 817 return -EINVAL; 818 if (req->opcode == IORING_OP_RECV) { 819 sr->mshot_len = sr->len; 820 sr->mshot_total_len = READ_ONCE(sqe->optlen); 821 if (sr->mshot_total_len) 822 sr->flags |= IORING_RECV_MSHOT_LIM; 823 } else if (sqe->optlen) { 824 return -EINVAL; 825 } 826 req->flags |= REQ_F_APOLL_MULTISHOT; 827 } else if (sqe->optlen) { 828 return -EINVAL; 829 } 830 831 if (sr->flags & IORING_RECVSEND_BUNDLE) { 832 if (req->opcode == IORING_OP_RECVMSG) 833 return -EINVAL; 834 } 835 836 if (io_is_compat(req->ctx)) 837 sr->msg_flags |= MSG_CMSG_COMPAT; 838 839 sr->nr_multishot_loops = 0; 840 return io_recvmsg_prep_setup(req); 841 } 842 843 /* bits to clear in old and inherit in new cflags on bundle retry */ 844 #define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE) 845 846 /* 847 * Finishes io_recv and io_recvmsg. 848 * 849 * Returns true if it is actually finished, or false if it should run 850 * again (for multishot). 851 */ 852 static inline bool io_recv_finish(struct io_kiocb *req, 853 struct io_async_msghdr *kmsg, 854 struct io_br_sel *sel, bool mshot_finished, 855 unsigned issue_flags) 856 { 857 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 858 unsigned int cflags = 0; 859 860 if (kmsg->msg.msg_inq > 0) 861 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 862 863 if (sel->val > 0 && sr->flags & IORING_RECV_MSHOT_LIM) { 864 /* 865 * If sr->len hits zero, the limit has been reached. Mark 866 * mshot as finished, and flag MSHOT_DONE as well to prevent 867 * a potential bundle from being retried. 868 */ 869 sr->mshot_total_len -= min_t(int, sel->val, sr->mshot_total_len); 870 if (!sr->mshot_total_len) { 871 sr->flags |= IORING_RECV_MSHOT_DONE; 872 mshot_finished = true; 873 } 874 } 875 876 if (sr->flags & IORING_RECVSEND_BUNDLE) { 877 size_t this_ret = sel->val - sr->done_io; 878 879 cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret)); 880 if (sr->flags & IORING_RECV_RETRY) 881 cflags = req->cqe.flags | (cflags & CQE_F_MASK); 882 if (sr->mshot_len && sel->val >= sr->mshot_len) 883 sr->flags |= IORING_RECV_MSHOT_CAP; 884 /* bundle with no more immediate buffers, we're done */ 885 if (req->flags & REQ_F_BL_EMPTY) 886 goto finish; 887 /* 888 * If more is available AND it was a full transfer, retry and 889 * append to this one 890 */ 891 if (!(sr->flags & IORING_RECV_NO_RETRY) && 892 kmsg->msg.msg_inq > 1 && this_ret > 0 && 893 !iov_iter_count(&kmsg->msg.msg_iter)) { 894 req->cqe.flags = cflags & ~CQE_F_MASK; 895 sr->len = kmsg->msg.msg_inq; 896 sr->done_io += this_ret; 897 sr->flags |= IORING_RECV_RETRY; 898 return false; 899 } 900 } else { 901 cflags |= io_put_kbuf(req, sel->val, sel->buf_list); 902 } 903 904 /* 905 * Fill CQE for this receive and see if we should keep trying to 906 * receive from this socket. 907 */ 908 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 909 io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { 910 sel->val = IOU_RETRY; 911 io_mshot_prep_retry(req, kmsg); 912 /* Known not-empty or unknown state, retry */ 913 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 914 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY && 915 !(sr->flags & IORING_RECV_MSHOT_CAP)) { 916 return false; 917 } 918 /* mshot retries exceeded, force a requeue */ 919 sr->nr_multishot_loops = 0; 920 sr->flags &= ~IORING_RECV_MSHOT_CAP; 921 if (issue_flags & IO_URING_F_MULTISHOT) 922 sel->val = IOU_REQUEUE; 923 } 924 return true; 925 } 926 927 /* Finish the request / stop multishot. */ 928 finish: 929 io_req_set_res(req, sel->val, cflags); 930 sel->val = IOU_COMPLETE; 931 io_req_msg_cleanup(req, issue_flags); 932 return true; 933 } 934 935 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 936 struct io_sr_msg *sr, void __user **buf, 937 size_t *len) 938 { 939 unsigned long ubuf = (unsigned long) *buf; 940 unsigned long hdr; 941 942 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 943 kmsg->controllen; 944 if (*len < hdr) 945 return -EFAULT; 946 947 if (kmsg->controllen) { 948 unsigned long control = ubuf + hdr - kmsg->controllen; 949 950 kmsg->msg.msg_control_user = (void __user *) control; 951 kmsg->msg.msg_controllen = kmsg->controllen; 952 } 953 954 sr->buf = *buf; /* stash for later copy */ 955 *buf = (void __user *) (ubuf + hdr); 956 kmsg->payloadlen = *len = *len - hdr; 957 return 0; 958 } 959 960 struct io_recvmsg_multishot_hdr { 961 struct io_uring_recvmsg_out msg; 962 struct sockaddr_storage addr; 963 }; 964 965 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 966 struct io_async_msghdr *kmsg, 967 unsigned int flags, bool *finished) 968 { 969 int err; 970 int copy_len; 971 struct io_recvmsg_multishot_hdr hdr; 972 973 if (kmsg->namelen) 974 kmsg->msg.msg_name = &hdr.addr; 975 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 976 kmsg->msg.msg_namelen = 0; 977 978 if (sock->file->f_flags & O_NONBLOCK) 979 flags |= MSG_DONTWAIT; 980 981 err = sock_recvmsg(sock, &kmsg->msg, flags); 982 *finished = err <= 0; 983 if (err < 0) 984 return err; 985 986 hdr.msg = (struct io_uring_recvmsg_out) { 987 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 988 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 989 }; 990 991 hdr.msg.payloadlen = err; 992 if (err > kmsg->payloadlen) 993 err = kmsg->payloadlen; 994 995 copy_len = sizeof(struct io_uring_recvmsg_out); 996 if (kmsg->msg.msg_namelen > kmsg->namelen) 997 copy_len += kmsg->namelen; 998 else 999 copy_len += kmsg->msg.msg_namelen; 1000 1001 /* 1002 * "fromlen shall refer to the value before truncation.." 1003 * 1003.1g 1004 */ 1005 hdr.msg.namelen = kmsg->msg.msg_namelen; 1006 1007 /* ensure that there is no gap between hdr and sockaddr_storage */ 1008 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 1009 sizeof(struct io_uring_recvmsg_out)); 1010 if (copy_to_user(io->buf, &hdr, copy_len)) { 1011 *finished = true; 1012 return -EFAULT; 1013 } 1014 1015 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 1016 kmsg->controllen + err; 1017 } 1018 1019 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 1020 { 1021 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1022 struct io_async_msghdr *kmsg = req->async_data; 1023 struct io_br_sel sel = { }; 1024 struct socket *sock; 1025 unsigned flags; 1026 int ret, min_ret = 0; 1027 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1028 bool mshot_finished = true; 1029 1030 sock = sock_from_file(req->file); 1031 if (unlikely(!sock)) 1032 return -ENOTSOCK; 1033 1034 if (!(req->flags & REQ_F_POLLED) && 1035 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1036 return -EAGAIN; 1037 1038 flags = sr->msg_flags; 1039 if (force_nonblock) 1040 flags |= MSG_DONTWAIT; 1041 1042 retry_multishot: 1043 sel.buf_list = NULL; 1044 if (io_do_buffer_select(req)) { 1045 size_t len = sr->len; 1046 1047 sel = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1048 if (!sel.addr) 1049 return -ENOBUFS; 1050 1051 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1052 ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len); 1053 if (ret) { 1054 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1055 return ret; 1056 } 1057 } 1058 1059 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, sel.addr, len); 1060 } 1061 1062 kmsg->msg.msg_get_inq = 1; 1063 kmsg->msg.msg_inq = -1; 1064 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1065 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1066 &mshot_finished); 1067 } else { 1068 /* disable partial retry for recvmsg with cmsg attached */ 1069 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1070 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1071 1072 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1073 kmsg->uaddr, flags); 1074 } 1075 1076 if (ret < min_ret) { 1077 if (ret == -EAGAIN && force_nonblock) { 1078 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1079 return IOU_RETRY; 1080 } 1081 if (ret > 0 && io_net_retry(sock, flags)) { 1082 sr->done_io += ret; 1083 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 1084 } 1085 if (ret == -ERESTARTSYS) 1086 ret = -EINTR; 1087 req_set_fail(req); 1088 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1089 req_set_fail(req); 1090 } 1091 1092 if (ret > 0) 1093 ret += sr->done_io; 1094 else if (sr->done_io) 1095 ret = sr->done_io; 1096 else 1097 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1098 1099 sel.val = ret; 1100 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) 1101 goto retry_multishot; 1102 1103 return sel.val; 1104 } 1105 1106 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1107 struct io_br_sel *sel, unsigned int issue_flags) 1108 { 1109 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1110 int ret; 1111 1112 /* 1113 * If the ring isn't locked, then don't use the peek interface 1114 * to grab multiple buffers as we will lock/unlock between 1115 * this selection and posting the buffers. 1116 */ 1117 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1118 sr->flags & IORING_RECVSEND_BUNDLE) { 1119 struct buf_sel_arg arg = { 1120 .iovs = &kmsg->fast_iov, 1121 .nr_iovs = 1, 1122 .mode = KBUF_MODE_EXPAND, 1123 .buf_group = sr->buf_group, 1124 }; 1125 1126 if (kmsg->vec.iovec) { 1127 arg.nr_iovs = kmsg->vec.nr; 1128 arg.iovs = kmsg->vec.iovec; 1129 arg.mode |= KBUF_MODE_FREE; 1130 } 1131 1132 if (sel->val) 1133 arg.max_len = sel->val; 1134 else if (kmsg->msg.msg_inq > 1) 1135 arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq); 1136 1137 /* if mshot limited, ensure we don't go over */ 1138 if (sr->flags & IORING_RECV_MSHOT_LIM) 1139 arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len); 1140 ret = io_buffers_peek(req, &arg, sel); 1141 if (unlikely(ret < 0)) 1142 return ret; 1143 1144 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 1145 kmsg->vec.nr = ret; 1146 kmsg->vec.iovec = arg.iovs; 1147 req->flags |= REQ_F_NEED_CLEANUP; 1148 } 1149 if (arg.partial_map) 1150 sr->flags |= IORING_RECV_PARTIAL_MAP; 1151 1152 /* special case 1 vec, can be a fast path */ 1153 if (ret == 1) { 1154 sr->buf = arg.iovs[0].iov_base; 1155 sr->len = arg.iovs[0].iov_len; 1156 goto map_ubuf; 1157 } 1158 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1159 arg.out_len); 1160 } else { 1161 size_t len = sel->val; 1162 1163 *sel = io_buffer_select(req, &len, sr->buf_group, issue_flags); 1164 if (!sel->addr) 1165 return -ENOBUFS; 1166 sr->buf = sel->addr; 1167 sr->len = len; 1168 map_ubuf: 1169 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1170 &kmsg->msg.msg_iter); 1171 if (unlikely(ret)) 1172 return ret; 1173 } 1174 1175 return 0; 1176 } 1177 1178 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1179 { 1180 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1181 struct io_async_msghdr *kmsg = req->async_data; 1182 struct io_br_sel sel; 1183 struct socket *sock; 1184 unsigned flags; 1185 int ret, min_ret = 0; 1186 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1187 bool mshot_finished; 1188 1189 if (!(req->flags & REQ_F_POLLED) && 1190 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1191 return -EAGAIN; 1192 1193 sock = sock_from_file(req->file); 1194 if (unlikely(!sock)) 1195 return -ENOTSOCK; 1196 1197 flags = sr->msg_flags; 1198 if (force_nonblock) 1199 flags |= MSG_DONTWAIT; 1200 1201 retry_multishot: 1202 sel.buf_list = NULL; 1203 if (io_do_buffer_select(req)) { 1204 sel.val = sr->len; 1205 ret = io_recv_buf_select(req, kmsg, &sel, issue_flags); 1206 if (unlikely(ret < 0)) { 1207 kmsg->msg.msg_inq = -1; 1208 goto out_free; 1209 } 1210 sr->buf = NULL; 1211 } 1212 1213 kmsg->msg.msg_flags = 0; 1214 kmsg->msg.msg_inq = -1; 1215 1216 if (flags & MSG_WAITALL) 1217 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1218 1219 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1220 if (ret < min_ret) { 1221 if (ret == -EAGAIN && force_nonblock) { 1222 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1223 return IOU_RETRY; 1224 } 1225 if (ret > 0 && io_net_retry(sock, flags)) { 1226 sr->len -= ret; 1227 sr->buf += ret; 1228 sr->done_io += ret; 1229 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); 1230 } 1231 if (ret == -ERESTARTSYS) 1232 ret = -EINTR; 1233 req_set_fail(req); 1234 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1235 out_free: 1236 req_set_fail(req); 1237 } 1238 1239 mshot_finished = ret <= 0; 1240 if (ret > 0) 1241 ret += sr->done_io; 1242 else if (sr->done_io) 1243 ret = sr->done_io; 1244 else 1245 io_kbuf_recycle(req, sel.buf_list, issue_flags); 1246 1247 sel.val = ret; 1248 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) 1249 goto retry_multishot; 1250 1251 return sel.val; 1252 } 1253 1254 int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1255 { 1256 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1257 unsigned ifq_idx; 1258 1259 if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3)) 1260 return -EINVAL; 1261 1262 ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx); 1263 zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx); 1264 if (!zc->ifq) 1265 return -EINVAL; 1266 1267 zc->len = READ_ONCE(sqe->len); 1268 zc->flags = READ_ONCE(sqe->ioprio); 1269 if (READ_ONCE(sqe->msg_flags)) 1270 return -EINVAL; 1271 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)) 1272 return -EINVAL; 1273 /* multishot required */ 1274 if (!(zc->flags & IORING_RECV_MULTISHOT)) 1275 return -EINVAL; 1276 /* All data completions are posted as aux CQEs. */ 1277 req->flags |= REQ_F_APOLL_MULTISHOT; 1278 1279 return 0; 1280 } 1281 1282 int io_recvzc(struct io_kiocb *req, unsigned int issue_flags) 1283 { 1284 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); 1285 struct socket *sock; 1286 unsigned int len; 1287 int ret; 1288 1289 if (!(req->flags & REQ_F_POLLED) && 1290 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1291 return -EAGAIN; 1292 1293 sock = sock_from_file(req->file); 1294 if (unlikely(!sock)) 1295 return -ENOTSOCK; 1296 1297 len = zc->len; 1298 ret = io_zcrx_recv(req, zc->ifq, sock, 0, issue_flags, &zc->len); 1299 if (len && zc->len == 0) { 1300 io_req_set_res(req, 0, 0); 1301 1302 return IOU_COMPLETE; 1303 } 1304 if (unlikely(ret <= 0) && ret != -EAGAIN) { 1305 if (ret == -ERESTARTSYS) 1306 ret = -EINTR; 1307 if (ret == IOU_REQUEUE) 1308 return IOU_REQUEUE; 1309 1310 req_set_fail(req); 1311 io_req_set_res(req, ret, 0); 1312 return IOU_COMPLETE; 1313 } 1314 return IOU_RETRY; 1315 } 1316 1317 void io_send_zc_cleanup(struct io_kiocb *req) 1318 { 1319 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1320 struct io_async_msghdr *io = req->async_data; 1321 1322 if (req_has_async_data(req)) 1323 io_netmsg_iovec_free(io); 1324 if (zc->notif) { 1325 io_notif_flush(zc->notif); 1326 zc->notif = NULL; 1327 } 1328 } 1329 1330 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1331 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \ 1332 IORING_SEND_VECTORIZED) 1333 1334 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1335 { 1336 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1337 struct io_ring_ctx *ctx = req->ctx; 1338 struct io_async_msghdr *iomsg; 1339 struct io_kiocb *notif; 1340 u64 user_data; 1341 int ret; 1342 1343 zc->done_io = 0; 1344 1345 if (unlikely(READ_ONCE(sqe->__pad2[0]))) 1346 return -EINVAL; 1347 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1348 if (req->flags & REQ_F_CQE_SKIP) 1349 return -EINVAL; 1350 1351 notif = zc->notif = io_alloc_notif(ctx); 1352 if (!notif) 1353 return -ENOMEM; 1354 user_data = READ_ONCE(sqe->addr3); 1355 if (!user_data) 1356 user_data = req->cqe.user_data; 1357 1358 notif->cqe.user_data = user_data; 1359 notif->cqe.res = 0; 1360 notif->cqe.flags = IORING_CQE_F_NOTIF; 1361 req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY; 1362 1363 zc->flags = READ_ONCE(sqe->ioprio); 1364 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1365 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1366 return -EINVAL; 1367 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1368 struct io_notif_data *nd = io_notif_to_data(notif); 1369 1370 nd->zc_report = true; 1371 nd->zc_used = false; 1372 nd->zc_copied = false; 1373 } 1374 } 1375 1376 zc->len = READ_ONCE(sqe->len); 1377 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1378 req->buf_index = READ_ONCE(sqe->buf_index); 1379 if (zc->msg_flags & MSG_DONTWAIT) 1380 req->flags |= REQ_F_NOWAIT; 1381 1382 if (io_is_compat(ctx)) 1383 zc->msg_flags |= MSG_CMSG_COMPAT; 1384 1385 iomsg = io_msg_alloc_async(req); 1386 if (unlikely(!iomsg)) 1387 return -ENOMEM; 1388 1389 if (req->opcode == IORING_OP_SEND_ZC) { 1390 ret = io_send_setup(req, sqe); 1391 } else { 1392 if (unlikely(sqe->addr2 || sqe->file_index)) 1393 return -EINVAL; 1394 ret = io_sendmsg_setup(req, sqe); 1395 } 1396 if (unlikely(ret)) 1397 return ret; 1398 1399 if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { 1400 iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1401 return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count); 1402 } 1403 iomsg->msg.sg_from_iter = io_sg_from_iter; 1404 return 0; 1405 } 1406 1407 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1408 struct iov_iter *from, size_t length) 1409 { 1410 skb_zcopy_downgrade_managed(skb); 1411 return zerocopy_fill_skb_from_iter(skb, from, length); 1412 } 1413 1414 static int io_sg_from_iter(struct sk_buff *skb, 1415 struct iov_iter *from, size_t length) 1416 { 1417 struct skb_shared_info *shinfo = skb_shinfo(skb); 1418 int frag = shinfo->nr_frags; 1419 int ret = 0; 1420 struct bvec_iter bi; 1421 ssize_t copied = 0; 1422 unsigned long truesize = 0; 1423 1424 if (!frag) 1425 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1426 else if (unlikely(!skb_zcopy_managed(skb))) 1427 return zerocopy_fill_skb_from_iter(skb, from, length); 1428 1429 bi.bi_size = min(from->count, length); 1430 bi.bi_bvec_done = from->iov_offset; 1431 bi.bi_idx = 0; 1432 1433 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1434 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1435 1436 copied += v.bv_len; 1437 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1438 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1439 v.bv_offset, v.bv_len); 1440 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1441 } 1442 if (bi.bi_size) 1443 ret = -EMSGSIZE; 1444 1445 shinfo->nr_frags = frag; 1446 from->bvec += bi.bi_idx; 1447 from->nr_segs -= bi.bi_idx; 1448 from->count -= copied; 1449 from->iov_offset = bi.bi_bvec_done; 1450 1451 skb->data_len += copied; 1452 skb->len += copied; 1453 skb->truesize += truesize; 1454 return ret; 1455 } 1456 1457 static int io_send_zc_import(struct io_kiocb *req, 1458 struct io_async_msghdr *kmsg, 1459 unsigned int issue_flags) 1460 { 1461 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1462 struct io_kiocb *notif = sr->notif; 1463 int ret; 1464 1465 WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF)); 1466 1467 notif->buf_index = req->buf_index; 1468 1469 if (!(sr->flags & IORING_SEND_VECTORIZED)) { 1470 ret = io_import_reg_buf(notif, &kmsg->msg.msg_iter, 1471 (u64)(uintptr_t)sr->buf, sr->len, 1472 ITER_SOURCE, issue_flags); 1473 } else { 1474 unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; 1475 1476 ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, 1477 notif, &kmsg->vec, uvec_segs, 1478 issue_flags); 1479 } 1480 1481 if (unlikely(ret)) 1482 return ret; 1483 req->flags &= ~REQ_F_IMPORT_BUFFER; 1484 return 0; 1485 } 1486 1487 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1488 { 1489 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1490 struct io_async_msghdr *kmsg = req->async_data; 1491 struct socket *sock; 1492 unsigned msg_flags; 1493 int ret, min_ret = 0; 1494 1495 sock = sock_from_file(req->file); 1496 if (unlikely(!sock)) 1497 return -ENOTSOCK; 1498 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1499 return -EOPNOTSUPP; 1500 if (!(req->flags & REQ_F_POLLED) && 1501 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1502 return -EAGAIN; 1503 1504 if (req->flags & REQ_F_IMPORT_BUFFER) { 1505 ret = io_send_zc_import(req, kmsg, issue_flags); 1506 if (unlikely(ret)) 1507 return ret; 1508 } 1509 1510 msg_flags = sr->msg_flags; 1511 if (issue_flags & IO_URING_F_NONBLOCK) 1512 msg_flags |= MSG_DONTWAIT; 1513 if (msg_flags & MSG_WAITALL) 1514 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1515 1516 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1517 1518 if (req->opcode == IORING_OP_SEND_ZC) { 1519 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1520 kmsg->msg.msg_flags = msg_flags; 1521 ret = sock_sendmsg(sock, &kmsg->msg); 1522 } else { 1523 kmsg->msg.msg_control_user = sr->msg_control; 1524 ret = __sys_sendmsg_sock(sock, &kmsg->msg, msg_flags); 1525 } 1526 1527 if (unlikely(ret < min_ret)) { 1528 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1529 return -EAGAIN; 1530 1531 if (ret > 0 && io_net_retry(sock, sr->msg_flags)) { 1532 sr->done_io += ret; 1533 return -EAGAIN; 1534 } 1535 if (ret == -ERESTARTSYS) 1536 ret = -EINTR; 1537 req_set_fail(req); 1538 } 1539 1540 if (ret >= 0) 1541 ret += sr->done_io; 1542 else if (sr->done_io) 1543 ret = sr->done_io; 1544 1545 /* 1546 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1547 * flushing notif to io_send_zc_cleanup() 1548 */ 1549 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1550 io_notif_flush(sr->notif); 1551 sr->notif = NULL; 1552 io_req_msg_cleanup(req, 0); 1553 } 1554 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1555 return IOU_COMPLETE; 1556 } 1557 1558 void io_sendrecv_fail(struct io_kiocb *req) 1559 { 1560 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1561 1562 if (sr->done_io) 1563 req->cqe.res = sr->done_io; 1564 1565 if ((req->flags & REQ_F_NEED_CLEANUP) && 1566 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1567 req->cqe.flags |= IORING_CQE_F_MORE; 1568 } 1569 1570 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1571 IORING_ACCEPT_POLL_FIRST) 1572 1573 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1574 { 1575 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1576 1577 if (sqe->len || sqe->buf_index) 1578 return -EINVAL; 1579 1580 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1581 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1582 accept->flags = READ_ONCE(sqe->accept_flags); 1583 accept->nofile = rlimit(RLIMIT_NOFILE); 1584 accept->iou_flags = READ_ONCE(sqe->ioprio); 1585 if (accept->iou_flags & ~ACCEPT_FLAGS) 1586 return -EINVAL; 1587 1588 accept->file_slot = READ_ONCE(sqe->file_index); 1589 if (accept->file_slot) { 1590 if (accept->flags & SOCK_CLOEXEC) 1591 return -EINVAL; 1592 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1593 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1594 return -EINVAL; 1595 } 1596 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1597 return -EINVAL; 1598 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1599 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1600 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1601 req->flags |= REQ_F_APOLL_MULTISHOT; 1602 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1603 req->flags |= REQ_F_NOWAIT; 1604 return 0; 1605 } 1606 1607 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1608 { 1609 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1610 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1611 bool fixed = !!accept->file_slot; 1612 struct proto_accept_arg arg = { 1613 .flags = force_nonblock ? O_NONBLOCK : 0, 1614 }; 1615 struct file *file; 1616 unsigned cflags; 1617 int ret, fd; 1618 1619 if (!(req->flags & REQ_F_POLLED) && 1620 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1621 return -EAGAIN; 1622 1623 retry: 1624 if (!fixed) { 1625 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1626 if (unlikely(fd < 0)) 1627 return fd; 1628 } 1629 arg.err = 0; 1630 arg.is_empty = -1; 1631 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1632 accept->flags); 1633 if (IS_ERR(file)) { 1634 if (!fixed) 1635 put_unused_fd(fd); 1636 ret = PTR_ERR(file); 1637 if (ret == -EAGAIN && force_nonblock && 1638 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) 1639 return IOU_RETRY; 1640 1641 if (ret == -ERESTARTSYS) 1642 ret = -EINTR; 1643 } else if (!fixed) { 1644 fd_install(fd, file); 1645 ret = fd; 1646 } else { 1647 ret = io_fixed_fd_install(req, issue_flags, file, 1648 accept->file_slot); 1649 } 1650 1651 cflags = 0; 1652 if (!arg.is_empty) 1653 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1654 1655 if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) && 1656 io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1657 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1658 goto retry; 1659 return IOU_RETRY; 1660 } 1661 1662 io_req_set_res(req, ret, cflags); 1663 if (ret < 0) 1664 req_set_fail(req); 1665 return IOU_COMPLETE; 1666 } 1667 1668 void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req) 1669 { 1670 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1671 1672 bctx->socket.family = sock->domain; 1673 bctx->socket.type = sock->type; 1674 bctx->socket.protocol = sock->protocol; 1675 } 1676 1677 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1678 { 1679 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1680 1681 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1682 return -EINVAL; 1683 1684 sock->domain = READ_ONCE(sqe->fd); 1685 sock->type = READ_ONCE(sqe->off); 1686 sock->protocol = READ_ONCE(sqe->len); 1687 sock->file_slot = READ_ONCE(sqe->file_index); 1688 sock->nofile = rlimit(RLIMIT_NOFILE); 1689 1690 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1691 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1692 return -EINVAL; 1693 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1694 return -EINVAL; 1695 return 0; 1696 } 1697 1698 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1699 { 1700 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1701 bool fixed = !!sock->file_slot; 1702 struct file *file; 1703 int ret, fd; 1704 1705 if (!fixed) { 1706 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1707 if (unlikely(fd < 0)) 1708 return fd; 1709 } 1710 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1711 if (IS_ERR(file)) { 1712 if (!fixed) 1713 put_unused_fd(fd); 1714 ret = PTR_ERR(file); 1715 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1716 return -EAGAIN; 1717 if (ret == -ERESTARTSYS) 1718 ret = -EINTR; 1719 req_set_fail(req); 1720 } else if (!fixed) { 1721 fd_install(fd, file); 1722 ret = fd; 1723 } else { 1724 ret = io_fixed_fd_install(req, issue_flags, file, 1725 sock->file_slot); 1726 } 1727 io_req_set_res(req, ret, 0); 1728 return IOU_COMPLETE; 1729 } 1730 1731 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1732 { 1733 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1734 struct io_async_msghdr *io; 1735 1736 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1737 return -EINVAL; 1738 1739 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1740 conn->addr_len = READ_ONCE(sqe->addr2); 1741 conn->in_progress = conn->seen_econnaborted = false; 1742 1743 io = io_msg_alloc_async(req); 1744 if (unlikely(!io)) 1745 return -ENOMEM; 1746 1747 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1748 } 1749 1750 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1751 { 1752 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1753 struct io_async_msghdr *io = req->async_data; 1754 unsigned file_flags; 1755 int ret; 1756 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1757 1758 if (connect->in_progress) { 1759 struct poll_table_struct pt = { ._key = EPOLLERR }; 1760 1761 if (vfs_poll(req->file, &pt) & EPOLLERR) 1762 goto get_sock_err; 1763 } 1764 1765 file_flags = force_nonblock ? O_NONBLOCK : 0; 1766 1767 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1768 file_flags); 1769 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1770 && force_nonblock) { 1771 if (ret == -EINPROGRESS) { 1772 connect->in_progress = true; 1773 } else if (ret == -ECONNABORTED) { 1774 if (connect->seen_econnaborted) 1775 goto out; 1776 connect->seen_econnaborted = true; 1777 } 1778 return -EAGAIN; 1779 } 1780 if (connect->in_progress) { 1781 /* 1782 * At least bluetooth will return -EBADFD on a re-connect 1783 * attempt, and it's (supposedly) also valid to get -EISCONN 1784 * which means the previous result is good. For both of these, 1785 * grab the sock_error() and use that for the completion. 1786 */ 1787 if (ret == -EBADFD || ret == -EISCONN) { 1788 get_sock_err: 1789 ret = sock_error(sock_from_file(req->file)->sk); 1790 } 1791 } 1792 if (ret == -ERESTARTSYS) 1793 ret = -EINTR; 1794 out: 1795 if (ret < 0) 1796 req_set_fail(req); 1797 io_req_msg_cleanup(req, issue_flags); 1798 io_req_set_res(req, ret, 0); 1799 return IOU_COMPLETE; 1800 } 1801 1802 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1803 { 1804 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1805 struct sockaddr __user *uaddr; 1806 struct io_async_msghdr *io; 1807 1808 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1809 return -EINVAL; 1810 1811 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1812 bind->addr_len = READ_ONCE(sqe->addr2); 1813 1814 io = io_msg_alloc_async(req); 1815 if (unlikely(!io)) 1816 return -ENOMEM; 1817 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1818 } 1819 1820 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1821 { 1822 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1823 struct io_async_msghdr *io = req->async_data; 1824 struct socket *sock; 1825 int ret; 1826 1827 sock = sock_from_file(req->file); 1828 if (unlikely(!sock)) 1829 return -ENOTSOCK; 1830 1831 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1832 if (ret < 0) 1833 req_set_fail(req); 1834 io_req_set_res(req, ret, 0); 1835 return 0; 1836 } 1837 1838 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1839 { 1840 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1841 1842 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1843 return -EINVAL; 1844 1845 listen->backlog = READ_ONCE(sqe->len); 1846 return 0; 1847 } 1848 1849 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1850 { 1851 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1852 struct socket *sock; 1853 int ret; 1854 1855 sock = sock_from_file(req->file); 1856 if (unlikely(!sock)) 1857 return -ENOTSOCK; 1858 1859 ret = __sys_listen_socket(sock, listen->backlog); 1860 if (ret < 0) 1861 req_set_fail(req); 1862 io_req_set_res(req, ret, 0); 1863 return 0; 1864 } 1865 1866 void io_netmsg_cache_free(const void *entry) 1867 { 1868 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1869 1870 io_vec_free(&kmsg->vec); 1871 kfree(kmsg); 1872 } 1873