1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 u32 file_slot; 32 unsigned long nofile; 33 }; 34 35 struct io_socket { 36 struct file *file; 37 int domain; 38 int type; 39 int protocol; 40 int flags; 41 u32 file_slot; 42 unsigned long nofile; 43 }; 44 45 struct io_connect { 46 struct file *file; 47 struct sockaddr __user *addr; 48 int addr_len; 49 bool in_progress; 50 }; 51 52 struct io_sr_msg { 53 struct file *file; 54 union { 55 struct compat_msghdr __user *umsg_compat; 56 struct user_msghdr __user *umsg; 57 void __user *buf; 58 }; 59 unsigned len; 60 unsigned done_io; 61 unsigned msg_flags; 62 u16 flags; 63 /* initialised and used only by !msg send variants */ 64 u16 addr_len; 65 void __user *addr; 66 /* used only for send zerocopy */ 67 struct io_kiocb *notif; 68 }; 69 70 #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) 71 72 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 73 { 74 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 75 76 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 77 sqe->buf_index || sqe->splice_fd_in)) 78 return -EINVAL; 79 80 shutdown->how = READ_ONCE(sqe->len); 81 return 0; 82 } 83 84 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 85 { 86 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 87 struct socket *sock; 88 int ret; 89 90 if (issue_flags & IO_URING_F_NONBLOCK) 91 return -EAGAIN; 92 93 sock = sock_from_file(req->file); 94 if (unlikely(!sock)) 95 return -ENOTSOCK; 96 97 ret = __sys_shutdown_sock(sock, shutdown->how); 98 io_req_set_res(req, ret, 0); 99 return IOU_OK; 100 } 101 102 static bool io_net_retry(struct socket *sock, int flags) 103 { 104 if (!(flags & MSG_WAITALL)) 105 return false; 106 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 107 } 108 109 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 110 { 111 struct io_async_msghdr *hdr = req->async_data; 112 113 if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED) 114 return; 115 116 /* Let normal cleanup path reap it if we fail adding to the cache */ 117 if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { 118 req->async_data = NULL; 119 req->flags &= ~REQ_F_ASYNC_DATA; 120 } 121 } 122 123 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req, 124 unsigned int issue_flags) 125 { 126 struct io_ring_ctx *ctx = req->ctx; 127 struct io_cache_entry *entry; 128 struct io_async_msghdr *hdr; 129 130 if (!(issue_flags & IO_URING_F_UNLOCKED) && 131 (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) { 132 hdr = container_of(entry, struct io_async_msghdr, cache); 133 hdr->free_iov = NULL; 134 req->flags |= REQ_F_ASYNC_DATA; 135 req->async_data = hdr; 136 return hdr; 137 } 138 139 if (!io_alloc_async_data(req)) { 140 hdr = req->async_data; 141 hdr->free_iov = NULL; 142 return hdr; 143 } 144 return NULL; 145 } 146 147 static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req) 148 { 149 /* ->prep_async is always called from the submission context */ 150 return io_msg_alloc_async(req, 0); 151 } 152 153 static int io_setup_async_msg(struct io_kiocb *req, 154 struct io_async_msghdr *kmsg, 155 unsigned int issue_flags) 156 { 157 struct io_async_msghdr *async_msg; 158 159 if (req_has_async_data(req)) 160 return -EAGAIN; 161 async_msg = io_msg_alloc_async(req, issue_flags); 162 if (!async_msg) { 163 kfree(kmsg->free_iov); 164 return -ENOMEM; 165 } 166 req->flags |= REQ_F_NEED_CLEANUP; 167 memcpy(async_msg, kmsg, sizeof(*kmsg)); 168 if (async_msg->msg.msg_name) 169 async_msg->msg.msg_name = &async_msg->addr; 170 /* if were using fast_iov, set it to the new one */ 171 if (!kmsg->free_iov) { 172 size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; 173 async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx]; 174 } 175 176 return -EAGAIN; 177 } 178 179 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 180 struct io_async_msghdr *iomsg) 181 { 182 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 183 184 iomsg->msg.msg_name = &iomsg->addr; 185 iomsg->free_iov = iomsg->fast_iov; 186 return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, 187 &iomsg->free_iov); 188 } 189 190 int io_send_prep_async(struct io_kiocb *req) 191 { 192 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 193 struct io_async_msghdr *io; 194 int ret; 195 196 if (!zc->addr || req_has_async_data(req)) 197 return 0; 198 io = io_msg_alloc_async_prep(req); 199 if (!io) 200 return -ENOMEM; 201 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); 202 return ret; 203 } 204 205 static int io_setup_async_addr(struct io_kiocb *req, 206 struct sockaddr_storage *addr_storage, 207 unsigned int issue_flags) 208 { 209 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 210 struct io_async_msghdr *io; 211 212 if (!sr->addr || req_has_async_data(req)) 213 return -EAGAIN; 214 io = io_msg_alloc_async(req, issue_flags); 215 if (!io) 216 return -ENOMEM; 217 memcpy(&io->addr, addr_storage, sizeof(io->addr)); 218 return -EAGAIN; 219 } 220 221 int io_sendmsg_prep_async(struct io_kiocb *req) 222 { 223 int ret; 224 225 if (!io_msg_alloc_async_prep(req)) 226 return -ENOMEM; 227 ret = io_sendmsg_copy_hdr(req, req->async_data); 228 if (!ret) 229 req->flags |= REQ_F_NEED_CLEANUP; 230 return ret; 231 } 232 233 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 234 { 235 struct io_async_msghdr *io = req->async_data; 236 237 kfree(io->free_iov); 238 } 239 240 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 241 { 242 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 243 244 if (req->opcode == IORING_OP_SEND) { 245 if (READ_ONCE(sqe->__pad3[0])) 246 return -EINVAL; 247 sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 248 sr->addr_len = READ_ONCE(sqe->addr_len); 249 } else if (sqe->addr2 || sqe->file_index) { 250 return -EINVAL; 251 } 252 253 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 254 sr->len = READ_ONCE(sqe->len); 255 sr->flags = READ_ONCE(sqe->ioprio); 256 if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) 257 return -EINVAL; 258 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 259 if (sr->msg_flags & MSG_DONTWAIT) 260 req->flags |= REQ_F_NOWAIT; 261 262 #ifdef CONFIG_COMPAT 263 if (req->ctx->compat) 264 sr->msg_flags |= MSG_CMSG_COMPAT; 265 #endif 266 sr->done_io = 0; 267 return 0; 268 } 269 270 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 271 { 272 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 273 struct io_async_msghdr iomsg, *kmsg; 274 struct socket *sock; 275 unsigned flags; 276 int min_ret = 0; 277 int ret; 278 279 sock = sock_from_file(req->file); 280 if (unlikely(!sock)) 281 return -ENOTSOCK; 282 283 if (req_has_async_data(req)) { 284 kmsg = req->async_data; 285 } else { 286 ret = io_sendmsg_copy_hdr(req, &iomsg); 287 if (ret) 288 return ret; 289 kmsg = &iomsg; 290 } 291 292 if (!(req->flags & REQ_F_POLLED) && 293 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 294 return io_setup_async_msg(req, kmsg, issue_flags); 295 296 flags = sr->msg_flags; 297 if (issue_flags & IO_URING_F_NONBLOCK) 298 flags |= MSG_DONTWAIT; 299 if (flags & MSG_WAITALL) 300 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 301 302 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 303 304 if (ret < min_ret) { 305 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 306 return io_setup_async_msg(req, kmsg, issue_flags); 307 if (ret > 0 && io_net_retry(sock, flags)) { 308 sr->done_io += ret; 309 req->flags |= REQ_F_PARTIAL_IO; 310 return io_setup_async_msg(req, kmsg, issue_flags); 311 } 312 if (ret == -ERESTARTSYS) 313 ret = -EINTR; 314 req_set_fail(req); 315 } 316 /* fast path, check for non-NULL to avoid function call */ 317 if (kmsg->free_iov) 318 kfree(kmsg->free_iov); 319 req->flags &= ~REQ_F_NEED_CLEANUP; 320 io_netmsg_recycle(req, issue_flags); 321 if (ret >= 0) 322 ret += sr->done_io; 323 else if (sr->done_io) 324 ret = sr->done_io; 325 io_req_set_res(req, ret, 0); 326 return IOU_OK; 327 } 328 329 int io_send(struct io_kiocb *req, unsigned int issue_flags) 330 { 331 struct sockaddr_storage __address; 332 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 333 struct msghdr msg; 334 struct iovec iov; 335 struct socket *sock; 336 unsigned flags; 337 int min_ret = 0; 338 int ret; 339 340 msg.msg_name = NULL; 341 msg.msg_control = NULL; 342 msg.msg_controllen = 0; 343 msg.msg_namelen = 0; 344 msg.msg_ubuf = NULL; 345 346 if (sr->addr) { 347 if (req_has_async_data(req)) { 348 struct io_async_msghdr *io = req->async_data; 349 350 msg.msg_name = &io->addr; 351 } else { 352 ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address); 353 if (unlikely(ret < 0)) 354 return ret; 355 msg.msg_name = (struct sockaddr *)&__address; 356 } 357 msg.msg_namelen = sr->addr_len; 358 } 359 360 if (!(req->flags & REQ_F_POLLED) && 361 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 362 return io_setup_async_addr(req, &__address, issue_flags); 363 364 sock = sock_from_file(req->file); 365 if (unlikely(!sock)) 366 return -ENOTSOCK; 367 368 ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); 369 if (unlikely(ret)) 370 return ret; 371 372 flags = sr->msg_flags; 373 if (issue_flags & IO_URING_F_NONBLOCK) 374 flags |= MSG_DONTWAIT; 375 if (flags & MSG_WAITALL) 376 min_ret = iov_iter_count(&msg.msg_iter); 377 378 msg.msg_flags = flags; 379 ret = sock_sendmsg(sock, &msg); 380 if (ret < min_ret) { 381 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 382 return io_setup_async_addr(req, &__address, issue_flags); 383 384 if (ret > 0 && io_net_retry(sock, flags)) { 385 sr->len -= ret; 386 sr->buf += ret; 387 sr->done_io += ret; 388 req->flags |= REQ_F_PARTIAL_IO; 389 return io_setup_async_addr(req, &__address, issue_flags); 390 } 391 if (ret == -ERESTARTSYS) 392 ret = -EINTR; 393 req_set_fail(req); 394 } 395 if (ret >= 0) 396 ret += sr->done_io; 397 else if (sr->done_io) 398 ret = sr->done_io; 399 io_req_set_res(req, ret, 0); 400 return IOU_OK; 401 } 402 403 static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) 404 { 405 int hdr; 406 407 if (iomsg->namelen < 0) 408 return true; 409 if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), 410 iomsg->namelen, &hdr)) 411 return true; 412 if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) 413 return true; 414 415 return false; 416 } 417 418 static int __io_recvmsg_copy_hdr(struct io_kiocb *req, 419 struct io_async_msghdr *iomsg) 420 { 421 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 422 struct user_msghdr msg; 423 int ret; 424 425 if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) 426 return -EFAULT; 427 428 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 429 if (ret) 430 return ret; 431 432 if (req->flags & REQ_F_BUFFER_SELECT) { 433 if (msg.msg_iovlen == 0) { 434 sr->len = iomsg->fast_iov[0].iov_len = 0; 435 iomsg->fast_iov[0].iov_base = NULL; 436 iomsg->free_iov = NULL; 437 } else if (msg.msg_iovlen > 1) { 438 return -EINVAL; 439 } else { 440 if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) 441 return -EFAULT; 442 sr->len = iomsg->fast_iov[0].iov_len; 443 iomsg->free_iov = NULL; 444 } 445 446 if (req->flags & REQ_F_APOLL_MULTISHOT) { 447 iomsg->namelen = msg.msg_namelen; 448 iomsg->controllen = msg.msg_controllen; 449 if (io_recvmsg_multishot_overflow(iomsg)) 450 return -EOVERFLOW; 451 } 452 } else { 453 iomsg->free_iov = iomsg->fast_iov; 454 ret = __import_iovec(READ, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, 455 &iomsg->free_iov, &iomsg->msg.msg_iter, 456 false); 457 if (ret > 0) 458 ret = 0; 459 } 460 461 return ret; 462 } 463 464 #ifdef CONFIG_COMPAT 465 static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, 466 struct io_async_msghdr *iomsg) 467 { 468 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 469 struct compat_msghdr msg; 470 struct compat_iovec __user *uiov; 471 int ret; 472 473 if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) 474 return -EFAULT; 475 476 ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 477 if (ret) 478 return ret; 479 480 uiov = compat_ptr(msg.msg_iov); 481 if (req->flags & REQ_F_BUFFER_SELECT) { 482 compat_ssize_t clen; 483 484 if (msg.msg_iovlen == 0) { 485 sr->len = 0; 486 } else if (msg.msg_iovlen > 1) { 487 return -EINVAL; 488 } else { 489 if (!access_ok(uiov, sizeof(*uiov))) 490 return -EFAULT; 491 if (__get_user(clen, &uiov->iov_len)) 492 return -EFAULT; 493 if (clen < 0) 494 return -EINVAL; 495 sr->len = clen; 496 } 497 498 if (req->flags & REQ_F_APOLL_MULTISHOT) { 499 iomsg->namelen = msg.msg_namelen; 500 iomsg->controllen = msg.msg_controllen; 501 if (io_recvmsg_multishot_overflow(iomsg)) 502 return -EOVERFLOW; 503 } 504 } else { 505 iomsg->free_iov = iomsg->fast_iov; 506 ret = __import_iovec(READ, (struct iovec __user *)uiov, msg.msg_iovlen, 507 UIO_FASTIOV, &iomsg->free_iov, 508 &iomsg->msg.msg_iter, true); 509 if (ret < 0) 510 return ret; 511 } 512 513 return 0; 514 } 515 #endif 516 517 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 518 struct io_async_msghdr *iomsg) 519 { 520 iomsg->msg.msg_name = &iomsg->addr; 521 522 #ifdef CONFIG_COMPAT 523 if (req->ctx->compat) 524 return __io_compat_recvmsg_copy_hdr(req, iomsg); 525 #endif 526 527 return __io_recvmsg_copy_hdr(req, iomsg); 528 } 529 530 int io_recvmsg_prep_async(struct io_kiocb *req) 531 { 532 int ret; 533 534 if (!io_msg_alloc_async_prep(req)) 535 return -ENOMEM; 536 ret = io_recvmsg_copy_hdr(req, req->async_data); 537 if (!ret) 538 req->flags |= REQ_F_NEED_CLEANUP; 539 return ret; 540 } 541 542 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT) 543 544 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 545 { 546 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 547 548 if (unlikely(sqe->file_index || sqe->addr2)) 549 return -EINVAL; 550 551 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 552 sr->len = READ_ONCE(sqe->len); 553 sr->flags = READ_ONCE(sqe->ioprio); 554 if (sr->flags & ~(RECVMSG_FLAGS)) 555 return -EINVAL; 556 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 557 if (sr->msg_flags & MSG_DONTWAIT) 558 req->flags |= REQ_F_NOWAIT; 559 if (sr->msg_flags & MSG_ERRQUEUE) 560 req->flags |= REQ_F_CLEAR_POLLIN; 561 if (sr->flags & IORING_RECV_MULTISHOT) { 562 if (!(req->flags & REQ_F_BUFFER_SELECT)) 563 return -EINVAL; 564 if (sr->msg_flags & MSG_WAITALL) 565 return -EINVAL; 566 if (req->opcode == IORING_OP_RECV && sr->len) 567 return -EINVAL; 568 req->flags |= REQ_F_APOLL_MULTISHOT; 569 } 570 571 #ifdef CONFIG_COMPAT 572 if (req->ctx->compat) 573 sr->msg_flags |= MSG_CMSG_COMPAT; 574 #endif 575 sr->done_io = 0; 576 return 0; 577 } 578 579 static inline void io_recv_prep_retry(struct io_kiocb *req) 580 { 581 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 582 583 sr->done_io = 0; 584 sr->len = 0; /* get from the provided buffer */ 585 } 586 587 /* 588 * Finishes io_recv and io_recvmsg. 589 * 590 * Returns true if it is actually finished, or false if it should run 591 * again (for multishot). 592 */ 593 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 594 unsigned int cflags, bool mshot_finished) 595 { 596 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 597 io_req_set_res(req, *ret, cflags); 598 *ret = IOU_OK; 599 return true; 600 } 601 602 if (!mshot_finished) { 603 if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret, 604 cflags | IORING_CQE_F_MORE, false)) { 605 io_recv_prep_retry(req); 606 return false; 607 } 608 /* 609 * Otherwise stop multishot but use the current result. 610 * Probably will end up going into overflow, but this means 611 * we cannot trust the ordering anymore 612 */ 613 } 614 615 io_req_set_res(req, *ret, cflags); 616 617 if (req->flags & REQ_F_POLLED) 618 *ret = IOU_STOP_MULTISHOT; 619 else 620 *ret = IOU_OK; 621 return true; 622 } 623 624 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 625 struct io_sr_msg *sr, void __user **buf, 626 size_t *len) 627 { 628 unsigned long ubuf = (unsigned long) *buf; 629 unsigned long hdr; 630 631 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 632 kmsg->controllen; 633 if (*len < hdr) 634 return -EFAULT; 635 636 if (kmsg->controllen) { 637 unsigned long control = ubuf + hdr - kmsg->controllen; 638 639 kmsg->msg.msg_control_user = (void __user *) control; 640 kmsg->msg.msg_controllen = kmsg->controllen; 641 } 642 643 sr->buf = *buf; /* stash for later copy */ 644 *buf = (void __user *) (ubuf + hdr); 645 kmsg->payloadlen = *len = *len - hdr; 646 return 0; 647 } 648 649 struct io_recvmsg_multishot_hdr { 650 struct io_uring_recvmsg_out msg; 651 struct sockaddr_storage addr; 652 }; 653 654 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 655 struct io_async_msghdr *kmsg, 656 unsigned int flags, bool *finished) 657 { 658 int err; 659 int copy_len; 660 struct io_recvmsg_multishot_hdr hdr; 661 662 if (kmsg->namelen) 663 kmsg->msg.msg_name = &hdr.addr; 664 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 665 kmsg->msg.msg_namelen = 0; 666 667 if (sock->file->f_flags & O_NONBLOCK) 668 flags |= MSG_DONTWAIT; 669 670 err = sock_recvmsg(sock, &kmsg->msg, flags); 671 *finished = err <= 0; 672 if (err < 0) 673 return err; 674 675 hdr.msg = (struct io_uring_recvmsg_out) { 676 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 677 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 678 }; 679 680 hdr.msg.payloadlen = err; 681 if (err > kmsg->payloadlen) 682 err = kmsg->payloadlen; 683 684 copy_len = sizeof(struct io_uring_recvmsg_out); 685 if (kmsg->msg.msg_namelen > kmsg->namelen) 686 copy_len += kmsg->namelen; 687 else 688 copy_len += kmsg->msg.msg_namelen; 689 690 /* 691 * "fromlen shall refer to the value before truncation.." 692 * 1003.1g 693 */ 694 hdr.msg.namelen = kmsg->msg.msg_namelen; 695 696 /* ensure that there is no gap between hdr and sockaddr_storage */ 697 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 698 sizeof(struct io_uring_recvmsg_out)); 699 if (copy_to_user(io->buf, &hdr, copy_len)) { 700 *finished = true; 701 return -EFAULT; 702 } 703 704 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 705 kmsg->controllen + err; 706 } 707 708 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 709 { 710 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 711 struct io_async_msghdr iomsg, *kmsg; 712 struct socket *sock; 713 unsigned int cflags; 714 unsigned flags; 715 int ret, min_ret = 0; 716 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 717 bool mshot_finished = true; 718 719 sock = sock_from_file(req->file); 720 if (unlikely(!sock)) 721 return -ENOTSOCK; 722 723 if (req_has_async_data(req)) { 724 kmsg = req->async_data; 725 } else { 726 ret = io_recvmsg_copy_hdr(req, &iomsg); 727 if (ret) 728 return ret; 729 kmsg = &iomsg; 730 } 731 732 if (!(req->flags & REQ_F_POLLED) && 733 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 734 return io_setup_async_msg(req, kmsg, issue_flags); 735 736 retry_multishot: 737 if (io_do_buffer_select(req)) { 738 void __user *buf; 739 size_t len = sr->len; 740 741 buf = io_buffer_select(req, &len, issue_flags); 742 if (!buf) 743 return -ENOBUFS; 744 745 if (req->flags & REQ_F_APOLL_MULTISHOT) { 746 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 747 if (ret) { 748 io_kbuf_recycle(req, issue_flags); 749 return ret; 750 } 751 } 752 753 kmsg->fast_iov[0].iov_base = buf; 754 kmsg->fast_iov[0].iov_len = len; 755 iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1, 756 len); 757 } 758 759 flags = sr->msg_flags; 760 if (force_nonblock) 761 flags |= MSG_DONTWAIT; 762 if (flags & MSG_WAITALL) 763 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 764 765 kmsg->msg.msg_get_inq = 1; 766 if (req->flags & REQ_F_APOLL_MULTISHOT) 767 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 768 &mshot_finished); 769 else 770 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 771 kmsg->uaddr, flags); 772 773 if (ret < min_ret) { 774 if (ret == -EAGAIN && force_nonblock) { 775 ret = io_setup_async_msg(req, kmsg, issue_flags); 776 if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) == 777 IO_APOLL_MULTI_POLLED) { 778 io_kbuf_recycle(req, issue_flags); 779 return IOU_ISSUE_SKIP_COMPLETE; 780 } 781 return ret; 782 } 783 if (ret > 0 && io_net_retry(sock, flags)) { 784 sr->done_io += ret; 785 req->flags |= REQ_F_PARTIAL_IO; 786 return io_setup_async_msg(req, kmsg, issue_flags); 787 } 788 if (ret == -ERESTARTSYS) 789 ret = -EINTR; 790 req_set_fail(req); 791 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 792 req_set_fail(req); 793 } 794 795 if (ret > 0) 796 ret += sr->done_io; 797 else if (sr->done_io) 798 ret = sr->done_io; 799 else 800 io_kbuf_recycle(req, issue_flags); 801 802 cflags = io_put_kbuf(req, issue_flags); 803 if (kmsg->msg.msg_inq) 804 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 805 806 if (!io_recv_finish(req, &ret, cflags, mshot_finished)) 807 goto retry_multishot; 808 809 if (mshot_finished) { 810 io_netmsg_recycle(req, issue_flags); 811 /* fast path, check for non-NULL to avoid function call */ 812 if (kmsg->free_iov) 813 kfree(kmsg->free_iov); 814 req->flags &= ~REQ_F_NEED_CLEANUP; 815 } 816 817 return ret; 818 } 819 820 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 821 { 822 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 823 struct msghdr msg; 824 struct socket *sock; 825 struct iovec iov; 826 unsigned int cflags; 827 unsigned flags; 828 int ret, min_ret = 0; 829 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 830 size_t len = sr->len; 831 832 if (!(req->flags & REQ_F_POLLED) && 833 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 834 return -EAGAIN; 835 836 sock = sock_from_file(req->file); 837 if (unlikely(!sock)) 838 return -ENOTSOCK; 839 840 retry_multishot: 841 if (io_do_buffer_select(req)) { 842 void __user *buf; 843 844 buf = io_buffer_select(req, &len, issue_flags); 845 if (!buf) 846 return -ENOBUFS; 847 sr->buf = buf; 848 } 849 850 ret = import_single_range(READ, sr->buf, len, &iov, &msg.msg_iter); 851 if (unlikely(ret)) 852 goto out_free; 853 854 msg.msg_name = NULL; 855 msg.msg_namelen = 0; 856 msg.msg_control = NULL; 857 msg.msg_get_inq = 1; 858 msg.msg_flags = 0; 859 msg.msg_controllen = 0; 860 msg.msg_iocb = NULL; 861 msg.msg_ubuf = NULL; 862 863 flags = sr->msg_flags; 864 if (force_nonblock) 865 flags |= MSG_DONTWAIT; 866 if (flags & MSG_WAITALL) 867 min_ret = iov_iter_count(&msg.msg_iter); 868 869 ret = sock_recvmsg(sock, &msg, flags); 870 if (ret < min_ret) { 871 if (ret == -EAGAIN && force_nonblock) { 872 if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) { 873 io_kbuf_recycle(req, issue_flags); 874 return IOU_ISSUE_SKIP_COMPLETE; 875 } 876 877 return -EAGAIN; 878 } 879 if (ret > 0 && io_net_retry(sock, flags)) { 880 sr->len -= ret; 881 sr->buf += ret; 882 sr->done_io += ret; 883 req->flags |= REQ_F_PARTIAL_IO; 884 return -EAGAIN; 885 } 886 if (ret == -ERESTARTSYS) 887 ret = -EINTR; 888 req_set_fail(req); 889 } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 890 out_free: 891 req_set_fail(req); 892 } 893 894 if (ret > 0) 895 ret += sr->done_io; 896 else if (sr->done_io) 897 ret = sr->done_io; 898 else 899 io_kbuf_recycle(req, issue_flags); 900 901 cflags = io_put_kbuf(req, issue_flags); 902 if (msg.msg_inq) 903 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 904 905 if (!io_recv_finish(req, &ret, cflags, ret <= 0)) 906 goto retry_multishot; 907 908 return ret; 909 } 910 911 void io_send_zc_cleanup(struct io_kiocb *req) 912 { 913 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 914 struct io_async_msghdr *io; 915 916 if (req_has_async_data(req)) { 917 io = req->async_data; 918 /* might be ->fast_iov if *msg_copy_hdr failed */ 919 if (io->free_iov != io->fast_iov) 920 kfree(io->free_iov); 921 } 922 if (zc->notif) { 923 io_notif_flush(zc->notif); 924 zc->notif = NULL; 925 } 926 } 927 928 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 929 { 930 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 931 struct io_ring_ctx *ctx = req->ctx; 932 struct io_kiocb *notif; 933 934 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 935 return -EINVAL; 936 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 937 if (req->flags & REQ_F_CQE_SKIP) 938 return -EINVAL; 939 940 zc->flags = READ_ONCE(sqe->ioprio); 941 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | 942 IORING_RECVSEND_FIXED_BUF)) 943 return -EINVAL; 944 notif = zc->notif = io_alloc_notif(ctx); 945 if (!notif) 946 return -ENOMEM; 947 notif->cqe.user_data = req->cqe.user_data; 948 notif->cqe.res = 0; 949 notif->cqe.flags = IORING_CQE_F_NOTIF; 950 req->flags |= REQ_F_NEED_CLEANUP; 951 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 952 unsigned idx = READ_ONCE(sqe->buf_index); 953 954 if (unlikely(idx >= ctx->nr_user_bufs)) 955 return -EFAULT; 956 idx = array_index_nospec(idx, ctx->nr_user_bufs); 957 req->imu = READ_ONCE(ctx->user_bufs[idx]); 958 io_req_set_rsrc_node(notif, ctx, 0); 959 } 960 961 if (req->opcode == IORING_OP_SEND_ZC) { 962 if (READ_ONCE(sqe->__pad3[0])) 963 return -EINVAL; 964 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 965 zc->addr_len = READ_ONCE(sqe->addr_len); 966 } else { 967 if (unlikely(sqe->addr2 || sqe->file_index)) 968 return -EINVAL; 969 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 970 return -EINVAL; 971 } 972 973 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 974 zc->len = READ_ONCE(sqe->len); 975 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 976 if (zc->msg_flags & MSG_DONTWAIT) 977 req->flags |= REQ_F_NOWAIT; 978 979 zc->done_io = 0; 980 981 #ifdef CONFIG_COMPAT 982 if (req->ctx->compat) 983 zc->msg_flags |= MSG_CMSG_COMPAT; 984 #endif 985 return 0; 986 } 987 988 static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, 989 struct iov_iter *from, size_t length) 990 { 991 skb_zcopy_downgrade_managed(skb); 992 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 993 } 994 995 static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, 996 struct iov_iter *from, size_t length) 997 { 998 struct skb_shared_info *shinfo = skb_shinfo(skb); 999 int frag = shinfo->nr_frags; 1000 int ret = 0; 1001 struct bvec_iter bi; 1002 ssize_t copied = 0; 1003 unsigned long truesize = 0; 1004 1005 if (!frag) 1006 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1007 else if (unlikely(!skb_zcopy_managed(skb))) 1008 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); 1009 1010 bi.bi_size = min(from->count, length); 1011 bi.bi_bvec_done = from->iov_offset; 1012 bi.bi_idx = 0; 1013 1014 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1015 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1016 1017 copied += v.bv_len; 1018 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1019 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1020 v.bv_offset, v.bv_len); 1021 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1022 } 1023 if (bi.bi_size) 1024 ret = -EMSGSIZE; 1025 1026 shinfo->nr_frags = frag; 1027 from->bvec += bi.bi_idx; 1028 from->nr_segs -= bi.bi_idx; 1029 from->count -= copied; 1030 from->iov_offset = bi.bi_bvec_done; 1031 1032 skb->data_len += copied; 1033 skb->len += copied; 1034 skb->truesize += truesize; 1035 1036 if (sk && sk->sk_type == SOCK_STREAM) { 1037 sk_wmem_queued_add(sk, truesize); 1038 if (!skb_zcopy_pure(skb)) 1039 sk_mem_charge(sk, truesize); 1040 } else { 1041 refcount_add(truesize, &skb->sk->sk_wmem_alloc); 1042 } 1043 return ret; 1044 } 1045 1046 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1047 { 1048 struct sockaddr_storage __address; 1049 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1050 struct msghdr msg; 1051 struct iovec iov; 1052 struct socket *sock; 1053 unsigned msg_flags; 1054 int ret, min_ret = 0; 1055 1056 sock = sock_from_file(req->file); 1057 if (unlikely(!sock)) 1058 return -ENOTSOCK; 1059 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1060 return -EOPNOTSUPP; 1061 1062 msg.msg_name = NULL; 1063 msg.msg_control = NULL; 1064 msg.msg_controllen = 0; 1065 msg.msg_namelen = 0; 1066 1067 if (zc->addr) { 1068 if (req_has_async_data(req)) { 1069 struct io_async_msghdr *io = req->async_data; 1070 1071 msg.msg_name = &io->addr; 1072 } else { 1073 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); 1074 if (unlikely(ret < 0)) 1075 return ret; 1076 msg.msg_name = (struct sockaddr *)&__address; 1077 } 1078 msg.msg_namelen = zc->addr_len; 1079 } 1080 1081 if (!(req->flags & REQ_F_POLLED) && 1082 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1083 return io_setup_async_addr(req, &__address, issue_flags); 1084 1085 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1086 ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, 1087 (u64)(uintptr_t)zc->buf, zc->len); 1088 if (unlikely(ret)) 1089 return ret; 1090 msg.sg_from_iter = io_sg_from_iter; 1091 } else { 1092 ret = import_single_range(WRITE, zc->buf, zc->len, &iov, 1093 &msg.msg_iter); 1094 if (unlikely(ret)) 1095 return ret; 1096 ret = io_notif_account_mem(zc->notif, zc->len); 1097 if (unlikely(ret)) 1098 return ret; 1099 msg.sg_from_iter = io_sg_from_iter_iovec; 1100 } 1101 1102 msg_flags = zc->msg_flags | MSG_ZEROCOPY; 1103 if (issue_flags & IO_URING_F_NONBLOCK) 1104 msg_flags |= MSG_DONTWAIT; 1105 if (msg_flags & MSG_WAITALL) 1106 min_ret = iov_iter_count(&msg.msg_iter); 1107 1108 msg.msg_flags = msg_flags; 1109 msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1110 ret = sock_sendmsg(sock, &msg); 1111 1112 if (unlikely(ret < min_ret)) { 1113 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1114 return io_setup_async_addr(req, &__address, issue_flags); 1115 1116 if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { 1117 zc->len -= ret; 1118 zc->buf += ret; 1119 zc->done_io += ret; 1120 req->flags |= REQ_F_PARTIAL_IO; 1121 return io_setup_async_addr(req, &__address, issue_flags); 1122 } 1123 if (ret == -ERESTARTSYS) 1124 ret = -EINTR; 1125 req_set_fail(req); 1126 } 1127 1128 if (ret >= 0) 1129 ret += zc->done_io; 1130 else if (zc->done_io) 1131 ret = zc->done_io; 1132 1133 /* 1134 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1135 * flushing notif to io_send_zc_cleanup() 1136 */ 1137 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1138 io_notif_flush(zc->notif); 1139 req->flags &= ~REQ_F_NEED_CLEANUP; 1140 } 1141 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1142 return IOU_OK; 1143 } 1144 1145 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1146 { 1147 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1148 struct io_async_msghdr iomsg, *kmsg; 1149 struct socket *sock; 1150 unsigned flags; 1151 int ret, min_ret = 0; 1152 1153 sock = sock_from_file(req->file); 1154 if (unlikely(!sock)) 1155 return -ENOTSOCK; 1156 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1157 return -EOPNOTSUPP; 1158 1159 if (req_has_async_data(req)) { 1160 kmsg = req->async_data; 1161 } else { 1162 ret = io_sendmsg_copy_hdr(req, &iomsg); 1163 if (ret) 1164 return ret; 1165 kmsg = &iomsg; 1166 } 1167 1168 if (!(req->flags & REQ_F_POLLED) && 1169 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1170 return io_setup_async_msg(req, kmsg, issue_flags); 1171 1172 flags = sr->msg_flags | MSG_ZEROCOPY; 1173 if (issue_flags & IO_URING_F_NONBLOCK) 1174 flags |= MSG_DONTWAIT; 1175 if (flags & MSG_WAITALL) 1176 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1177 1178 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1179 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1180 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1181 1182 if (unlikely(ret < min_ret)) { 1183 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1184 return io_setup_async_msg(req, kmsg, issue_flags); 1185 1186 if (ret > 0 && io_net_retry(sock, flags)) { 1187 sr->done_io += ret; 1188 req->flags |= REQ_F_PARTIAL_IO; 1189 return io_setup_async_msg(req, kmsg, issue_flags); 1190 } 1191 if (ret == -ERESTARTSYS) 1192 ret = -EINTR; 1193 req_set_fail(req); 1194 } 1195 /* fast path, check for non-NULL to avoid function call */ 1196 if (kmsg->free_iov) { 1197 kfree(kmsg->free_iov); 1198 kmsg->free_iov = NULL; 1199 } 1200 1201 io_netmsg_recycle(req, issue_flags); 1202 if (ret >= 0) 1203 ret += sr->done_io; 1204 else if (sr->done_io) 1205 ret = sr->done_io; 1206 1207 /* 1208 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1209 * flushing notif to io_send_zc_cleanup() 1210 */ 1211 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1212 io_notif_flush(sr->notif); 1213 req->flags &= ~REQ_F_NEED_CLEANUP; 1214 } 1215 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1216 return IOU_OK; 1217 } 1218 1219 void io_sendrecv_fail(struct io_kiocb *req) 1220 { 1221 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1222 1223 if (req->flags & REQ_F_PARTIAL_IO) 1224 req->cqe.res = sr->done_io; 1225 1226 if ((req->flags & REQ_F_NEED_CLEANUP) && 1227 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1228 req->cqe.flags |= IORING_CQE_F_MORE; 1229 } 1230 1231 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1232 { 1233 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1234 unsigned flags; 1235 1236 if (sqe->len || sqe->buf_index) 1237 return -EINVAL; 1238 1239 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1240 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1241 accept->flags = READ_ONCE(sqe->accept_flags); 1242 accept->nofile = rlimit(RLIMIT_NOFILE); 1243 flags = READ_ONCE(sqe->ioprio); 1244 if (flags & ~IORING_ACCEPT_MULTISHOT) 1245 return -EINVAL; 1246 1247 accept->file_slot = READ_ONCE(sqe->file_index); 1248 if (accept->file_slot) { 1249 if (accept->flags & SOCK_CLOEXEC) 1250 return -EINVAL; 1251 if (flags & IORING_ACCEPT_MULTISHOT && 1252 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1253 return -EINVAL; 1254 } 1255 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1256 return -EINVAL; 1257 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1258 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1259 if (flags & IORING_ACCEPT_MULTISHOT) 1260 req->flags |= REQ_F_APOLL_MULTISHOT; 1261 return 0; 1262 } 1263 1264 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1265 { 1266 struct io_ring_ctx *ctx = req->ctx; 1267 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1268 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1269 unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; 1270 bool fixed = !!accept->file_slot; 1271 struct file *file; 1272 int ret, fd; 1273 1274 retry: 1275 if (!fixed) { 1276 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1277 if (unlikely(fd < 0)) 1278 return fd; 1279 } 1280 file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, 1281 accept->flags); 1282 if (IS_ERR(file)) { 1283 if (!fixed) 1284 put_unused_fd(fd); 1285 ret = PTR_ERR(file); 1286 if (ret == -EAGAIN && force_nonblock) { 1287 /* 1288 * if it's multishot and polled, we don't need to 1289 * return EAGAIN to arm the poll infra since it 1290 * has already been done 1291 */ 1292 if ((req->flags & IO_APOLL_MULTI_POLLED) == 1293 IO_APOLL_MULTI_POLLED) 1294 ret = IOU_ISSUE_SKIP_COMPLETE; 1295 return ret; 1296 } 1297 if (ret == -ERESTARTSYS) 1298 ret = -EINTR; 1299 req_set_fail(req); 1300 } else if (!fixed) { 1301 fd_install(fd, file); 1302 ret = fd; 1303 } else { 1304 ret = io_fixed_fd_install(req, issue_flags, file, 1305 accept->file_slot); 1306 } 1307 1308 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1309 io_req_set_res(req, ret, 0); 1310 return IOU_OK; 1311 } 1312 1313 if (ret >= 0 && 1314 io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false)) 1315 goto retry; 1316 1317 io_req_set_res(req, ret, 0); 1318 if (req->flags & REQ_F_POLLED) 1319 return IOU_STOP_MULTISHOT; 1320 return IOU_OK; 1321 } 1322 1323 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1324 { 1325 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1326 1327 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1328 return -EINVAL; 1329 1330 sock->domain = READ_ONCE(sqe->fd); 1331 sock->type = READ_ONCE(sqe->off); 1332 sock->protocol = READ_ONCE(sqe->len); 1333 sock->file_slot = READ_ONCE(sqe->file_index); 1334 sock->nofile = rlimit(RLIMIT_NOFILE); 1335 1336 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1337 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1338 return -EINVAL; 1339 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1340 return -EINVAL; 1341 return 0; 1342 } 1343 1344 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1345 { 1346 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1347 bool fixed = !!sock->file_slot; 1348 struct file *file; 1349 int ret, fd; 1350 1351 if (!fixed) { 1352 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1353 if (unlikely(fd < 0)) 1354 return fd; 1355 } 1356 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1357 if (IS_ERR(file)) { 1358 if (!fixed) 1359 put_unused_fd(fd); 1360 ret = PTR_ERR(file); 1361 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1362 return -EAGAIN; 1363 if (ret == -ERESTARTSYS) 1364 ret = -EINTR; 1365 req_set_fail(req); 1366 } else if (!fixed) { 1367 fd_install(fd, file); 1368 ret = fd; 1369 } else { 1370 ret = io_fixed_fd_install(req, issue_flags, file, 1371 sock->file_slot); 1372 } 1373 io_req_set_res(req, ret, 0); 1374 return IOU_OK; 1375 } 1376 1377 int io_connect_prep_async(struct io_kiocb *req) 1378 { 1379 struct io_async_connect *io = req->async_data; 1380 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1381 1382 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); 1383 } 1384 1385 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1386 { 1387 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1388 1389 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1390 return -EINVAL; 1391 1392 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1393 conn->addr_len = READ_ONCE(sqe->addr2); 1394 conn->in_progress = false; 1395 return 0; 1396 } 1397 1398 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1399 { 1400 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1401 struct io_async_connect __io, *io; 1402 unsigned file_flags; 1403 int ret; 1404 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1405 1406 if (connect->in_progress) { 1407 struct socket *socket; 1408 1409 ret = -ENOTSOCK; 1410 socket = sock_from_file(req->file); 1411 if (socket) 1412 ret = sock_error(socket->sk); 1413 goto out; 1414 } 1415 1416 if (req_has_async_data(req)) { 1417 io = req->async_data; 1418 } else { 1419 ret = move_addr_to_kernel(connect->addr, 1420 connect->addr_len, 1421 &__io.address); 1422 if (ret) 1423 goto out; 1424 io = &__io; 1425 } 1426 1427 file_flags = force_nonblock ? O_NONBLOCK : 0; 1428 1429 ret = __sys_connect_file(req->file, &io->address, 1430 connect->addr_len, file_flags); 1431 if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { 1432 if (ret == -EINPROGRESS) { 1433 connect->in_progress = true; 1434 } else { 1435 if (req_has_async_data(req)) 1436 return -EAGAIN; 1437 if (io_alloc_async_data(req)) { 1438 ret = -ENOMEM; 1439 goto out; 1440 } 1441 memcpy(req->async_data, &__io, sizeof(__io)); 1442 } 1443 return -EAGAIN; 1444 } 1445 if (ret == -ERESTARTSYS) 1446 ret = -EINTR; 1447 out: 1448 if (ret < 0) 1449 req_set_fail(req); 1450 io_req_set_res(req, ret, 0); 1451 return IOU_OK; 1452 } 1453 1454 void io_netmsg_cache_free(struct io_cache_entry *entry) 1455 { 1456 kfree(container_of(entry, struct io_async_msghdr, cache)); 1457 } 1458 #endif 1459