1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
6 #include <linux/net.h>
7 #include <linux/compat.h>
8 #include <net/compat.h>
9 #include <linux/io_uring.h>
10
11 #include <uapi/linux/io_uring.h>
12
13 #include "filetable.h"
14 #include "io_uring.h"
15 #include "kbuf.h"
16 #include "alloc_cache.h"
17 #include "net.h"
18 #include "notif.h"
19 #include "rsrc.h"
20 #include "zcrx.h"
21
22 struct io_shutdown {
23 struct file *file;
24 int how;
25 };
26
27 struct io_accept {
28 struct file *file;
29 struct sockaddr __user *addr;
30 int __user *addr_len;
31 int flags;
32 int iou_flags;
33 u32 file_slot;
34 unsigned long nofile;
35 };
36
37 struct io_socket {
38 struct file *file;
39 int domain;
40 int type;
41 int protocol;
42 int flags;
43 u32 file_slot;
44 unsigned long nofile;
45 };
46
47 struct io_connect {
48 struct file *file;
49 struct sockaddr __user *addr;
50 int addr_len;
51 bool in_progress;
52 bool seen_econnaborted;
53 };
54
55 struct io_bind {
56 struct file *file;
57 int addr_len;
58 };
59
60 struct io_listen {
61 struct file *file;
62 int backlog;
63 };
64
65 struct io_sr_msg {
66 struct file *file;
67 union {
68 struct compat_msghdr __user *umsg_compat;
69 struct user_msghdr __user *umsg;
70 void __user *buf;
71 };
72 int len;
73 unsigned done_io;
74 unsigned msg_flags;
75 unsigned nr_multishot_loops;
76 u16 flags;
77 /* initialised and used only by !msg send variants */
78 u16 buf_group;
79 /* per-invocation mshot limit */
80 unsigned mshot_len;
81 /* overall mshot byte limit */
82 unsigned mshot_total_len;
83 void __user *msg_control;
84 /* used only for send zerocopy */
85 struct io_kiocb *notif;
86 };
87
88 /*
89 * The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold
90 * anyway. Use the upper 8 bits for internal uses.
91 */
92 enum sr_retry_flags {
93 IORING_RECV_RETRY = (1U << 15),
94 IORING_RECV_PARTIAL_MAP = (1U << 14),
95 IORING_RECV_MSHOT_CAP = (1U << 13),
96 IORING_RECV_MSHOT_LIM = (1U << 12),
97 IORING_RECV_MSHOT_DONE = (1U << 11),
98
99 IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP,
100 IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP |
101 IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE,
102 };
103
104 /*
105 * Number of times we'll try and do receives if there's more data. If we
106 * exceed this limit, then add us to the back of the queue and retry from
107 * there. This helps fairness between flooding clients.
108 */
109 #define MULTISHOT_MAX_RETRY 32
110
111 struct io_recvzc {
112 struct file *file;
113 u16 flags;
114 u32 len;
115 struct io_zcrx_ifq *ifq;
116 };
117
118 static int io_sg_from_iter_iovec(struct sk_buff *skb,
119 struct iov_iter *from, size_t length);
120 static int io_sg_from_iter(struct sk_buff *skb,
121 struct iov_iter *from, size_t length);
122
io_shutdown_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)123 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
124 {
125 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
126
127 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
128 sqe->buf_index || sqe->splice_fd_in))
129 return -EINVAL;
130
131 shutdown->how = READ_ONCE(sqe->len);
132 req->flags |= REQ_F_FORCE_ASYNC;
133 return 0;
134 }
135
io_shutdown(struct io_kiocb * req,unsigned int issue_flags)136 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
137 {
138 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
139 struct socket *sock;
140 int ret;
141
142 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
143
144 sock = sock_from_file(req->file);
145 if (unlikely(!sock))
146 return -ENOTSOCK;
147
148 ret = __sys_shutdown_sock(sock, shutdown->how);
149 io_req_set_res(req, ret, 0);
150 return IOU_COMPLETE;
151 }
152
io_net_retry(struct socket * sock,int flags)153 static bool io_net_retry(struct socket *sock, int flags)
154 {
155 if (!(flags & MSG_WAITALL))
156 return false;
157 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
158 }
159
io_netmsg_iovec_free(struct io_async_msghdr * kmsg)160 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
161 {
162 if (kmsg->vec.iovec)
163 io_vec_free(&kmsg->vec);
164 }
165
io_netmsg_recycle(struct io_kiocb * req,unsigned int issue_flags)166 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
167 {
168 struct io_async_msghdr *hdr = req->async_data;
169
170 /* can't recycle, ensure we free the iovec if we have one */
171 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
172 io_netmsg_iovec_free(hdr);
173 return;
174 }
175
176 /* Let normal cleanup path reap it if we fail adding to the cache */
177 io_alloc_cache_vec_kasan(&hdr->vec);
178 if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP)
179 io_vec_free(&hdr->vec);
180
181 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr))
182 io_req_async_data_clear(req, REQ_F_NEED_CLEANUP);
183 }
184
io_msg_alloc_async(struct io_kiocb * req)185 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
186 {
187 struct io_ring_ctx *ctx = req->ctx;
188 struct io_async_msghdr *hdr;
189
190 hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req);
191 if (!hdr)
192 return NULL;
193
194 /* If the async data was cached, we might have an iov cached inside. */
195 if (hdr->vec.iovec)
196 req->flags |= REQ_F_NEED_CLEANUP;
197 return hdr;
198 }
199
io_mshot_prep_retry(struct io_kiocb * req,struct io_async_msghdr * kmsg)200 static inline void io_mshot_prep_retry(struct io_kiocb *req,
201 struct io_async_msghdr *kmsg)
202 {
203 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
204
205 req->flags &= ~REQ_F_BL_EMPTY;
206 sr->done_io = 0;
207 sr->flags &= ~IORING_RECV_RETRY_CLEAR;
208 sr->len = sr->mshot_len;
209 }
210
io_net_import_vec(struct io_kiocb * req,struct io_async_msghdr * iomsg,const struct iovec __user * uiov,unsigned uvec_seg,int ddir)211 static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg,
212 const struct iovec __user *uiov, unsigned uvec_seg,
213 int ddir)
214 {
215 struct iovec *iov;
216 int ret, nr_segs;
217
218 if (iomsg->vec.iovec) {
219 nr_segs = iomsg->vec.nr;
220 iov = iomsg->vec.iovec;
221 } else {
222 nr_segs = 1;
223 iov = &iomsg->fast_iov;
224 }
225
226 ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov,
227 &iomsg->msg.msg_iter, io_is_compat(req->ctx));
228 if (unlikely(ret < 0))
229 return ret;
230
231 if (iov) {
232 req->flags |= REQ_F_NEED_CLEANUP;
233 io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs);
234 }
235 return 0;
236 }
237
io_compat_msg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg,struct compat_msghdr * msg,int ddir,struct sockaddr __user ** save_addr)238 static int io_compat_msg_copy_hdr(struct io_kiocb *req,
239 struct io_async_msghdr *iomsg,
240 struct compat_msghdr *msg, int ddir,
241 struct sockaddr __user **save_addr)
242 {
243 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
244 struct compat_iovec __user *uiov;
245 int ret;
246
247 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
248 return -EFAULT;
249
250 ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr);
251 if (ret)
252 return ret;
253
254 uiov = compat_ptr(msg->msg_iov);
255 if (req->flags & REQ_F_BUFFER_SELECT) {
256 if (msg->msg_iovlen == 0) {
257 sr->len = 0;
258 } else if (msg->msg_iovlen > 1) {
259 return -EINVAL;
260 } else {
261 struct compat_iovec tmp_iov;
262
263 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
264 return -EFAULT;
265 sr->len = tmp_iov.iov_len;
266 }
267 }
268 return 0;
269 }
270
io_copy_msghdr_from_user(struct user_msghdr * msg,struct user_msghdr __user * umsg)271 static int io_copy_msghdr_from_user(struct user_msghdr *msg,
272 struct user_msghdr __user *umsg)
273 {
274 if (!user_access_begin(umsg, sizeof(*umsg)))
275 return -EFAULT;
276 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end);
277 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end);
278 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end);
279 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end);
280 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end);
281 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end);
282 user_access_end();
283 return 0;
284 ua_end:
285 user_access_end();
286 return -EFAULT;
287 }
288
io_msg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg,struct user_msghdr * msg,int ddir,struct sockaddr __user ** save_addr)289 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
290 struct user_msghdr *msg, int ddir,
291 struct sockaddr __user **save_addr)
292 {
293 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
294 struct user_msghdr __user *umsg = sr->umsg;
295 int ret;
296
297 iomsg->msg.msg_name = &iomsg->addr;
298 iomsg->msg.msg_iter.nr_segs = 0;
299
300 if (io_is_compat(req->ctx)) {
301 struct compat_msghdr cmsg;
302
303 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr);
304 if (ret)
305 return ret;
306
307 memset(msg, 0, sizeof(*msg));
308 msg->msg_namelen = cmsg.msg_namelen;
309 msg->msg_controllen = cmsg.msg_controllen;
310 msg->msg_iov = compat_ptr(cmsg.msg_iov);
311 msg->msg_iovlen = cmsg.msg_iovlen;
312 return 0;
313 }
314
315 ret = io_copy_msghdr_from_user(msg, umsg);
316 if (unlikely(ret))
317 return ret;
318
319 msg->msg_flags = 0;
320
321 ret = __copy_msghdr(&iomsg->msg, msg, save_addr);
322 if (ret)
323 return ret;
324
325 if (req->flags & REQ_F_BUFFER_SELECT) {
326 if (msg->msg_iovlen == 0) {
327 sr->len = 0;
328 } else if (msg->msg_iovlen > 1) {
329 return -EINVAL;
330 } else {
331 struct iovec __user *uiov = msg->msg_iov;
332 struct iovec tmp_iov;
333
334 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
335 return -EFAULT;
336 sr->len = tmp_iov.iov_len;
337 }
338 }
339 return 0;
340 }
341
io_sendmsg_recvmsg_cleanup(struct io_kiocb * req)342 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
343 {
344 struct io_async_msghdr *io = req->async_data;
345
346 io_netmsg_iovec_free(io);
347 }
348
io_send_setup(struct io_kiocb * req,const struct io_uring_sqe * sqe)349 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
350 {
351 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
352 struct io_async_msghdr *kmsg = req->async_data;
353 void __user *addr;
354 u16 addr_len;
355 int ret;
356
357 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
358
359 if (READ_ONCE(sqe->__pad3[0]))
360 return -EINVAL;
361
362 kmsg->msg.msg_name = NULL;
363 kmsg->msg.msg_namelen = 0;
364 kmsg->msg.msg_control = NULL;
365 kmsg->msg.msg_controllen = 0;
366 kmsg->msg.msg_ubuf = NULL;
367
368 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
369 addr_len = READ_ONCE(sqe->addr_len);
370 if (addr) {
371 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr);
372 if (unlikely(ret < 0))
373 return ret;
374 kmsg->msg.msg_name = &kmsg->addr;
375 kmsg->msg.msg_namelen = addr_len;
376 }
377 if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
378 if (sr->flags & IORING_SEND_VECTORIZED)
379 return -EINVAL;
380 req->flags |= REQ_F_IMPORT_BUFFER;
381 return 0;
382 }
383 if (req->flags & REQ_F_BUFFER_SELECT)
384 return 0;
385
386 if (sr->flags & IORING_SEND_VECTORIZED)
387 return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);
388
389 return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
390 }
391
io_sendmsg_setup(struct io_kiocb * req,const struct io_uring_sqe * sqe)392 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
393 {
394 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
395 struct io_async_msghdr *kmsg = req->async_data;
396 struct user_msghdr msg;
397 int ret;
398
399 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
400 ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL);
401 if (unlikely(ret))
402 return ret;
403 /* save msg_control as sys_sendmsg() overwrites it */
404 sr->msg_control = kmsg->msg.msg_control_user;
405
406 if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
407 kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen;
408 return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov,
409 msg.msg_iovlen);
410 }
411 if (req->flags & REQ_F_BUFFER_SELECT)
412 return 0;
413 return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE);
414 }
415
416 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | IORING_SEND_VECTORIZED)
417
io_sendmsg_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)418 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
419 {
420 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
421
422 sr->done_io = 0;
423 sr->len = READ_ONCE(sqe->len);
424 if (unlikely(sr->len < 0))
425 return -EINVAL;
426 sr->flags = READ_ONCE(sqe->ioprio);
427 if (sr->flags & ~SENDMSG_FLAGS)
428 return -EINVAL;
429 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
430 if (sr->msg_flags & MSG_DONTWAIT)
431 req->flags |= REQ_F_NOWAIT;
432 if (req->flags & REQ_F_BUFFER_SELECT)
433 sr->buf_group = req->buf_index;
434 if (sr->flags & IORING_RECVSEND_BUNDLE) {
435 if (req->opcode == IORING_OP_SENDMSG)
436 return -EINVAL;
437 sr->msg_flags |= MSG_WAITALL;
438 req->flags |= REQ_F_MULTISHOT;
439 }
440
441 if (io_is_compat(req->ctx))
442 sr->msg_flags |= MSG_CMSG_COMPAT;
443
444 if (unlikely(!io_msg_alloc_async(req)))
445 return -ENOMEM;
446 if (req->opcode != IORING_OP_SENDMSG)
447 return io_send_setup(req, sqe);
448 if (unlikely(sqe->addr2 || sqe->file_index))
449 return -EINVAL;
450 return io_sendmsg_setup(req, sqe);
451 }
452
io_req_msg_cleanup(struct io_kiocb * req,unsigned int issue_flags)453 static void io_req_msg_cleanup(struct io_kiocb *req,
454 unsigned int issue_flags)
455 {
456 io_netmsg_recycle(req, issue_flags);
457 }
458
459 /*
460 * For bundle completions, we need to figure out how many segments we consumed.
461 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it
462 * could be using an ITER_IOVEC. If the latter, then if we consumed all of
463 * the segments, then it's a trivial questiont o answer. If we have residual
464 * data in the iter, then loop the segments to figure out how much we
465 * transferred.
466 */
io_bundle_nbufs(struct io_async_msghdr * kmsg,int ret)467 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
468 {
469 struct iovec *iov;
470 int nbufs;
471
472 /* no data is always zero segments, and a ubuf is always 1 segment */
473 if (ret <= 0)
474 return 0;
475 if (iter_is_ubuf(&kmsg->msg.msg_iter))
476 return 1;
477
478 iov = kmsg->vec.iovec;
479 if (!iov)
480 iov = &kmsg->fast_iov;
481
482 /* if all data was transferred, it's basic pointer math */
483 if (!iov_iter_count(&kmsg->msg.msg_iter))
484 return iter_iov(&kmsg->msg.msg_iter) - iov;
485
486 /* short transfer, count segments */
487 nbufs = 0;
488 do {
489 int this_len = min_t(int, iov[nbufs].iov_len, ret);
490
491 nbufs++;
492 ret -= this_len;
493 } while (ret);
494
495 return nbufs;
496 }
497
io_net_kbuf_recyle(struct io_kiocb * req,struct io_buffer_list * bl,struct io_async_msghdr * kmsg,int len)498 static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl,
499 struct io_async_msghdr *kmsg, int len)
500 {
501 req->flags |= REQ_F_BL_NO_RECYCLE;
502 if (req->flags & REQ_F_BUFFERS_COMMIT)
503 io_kbuf_commit(req, bl, len, io_bundle_nbufs(kmsg, len));
504 return IOU_RETRY;
505 }
506
io_send_finish(struct io_kiocb * req,struct io_async_msghdr * kmsg,struct io_br_sel * sel)507 static inline bool io_send_finish(struct io_kiocb *req,
508 struct io_async_msghdr *kmsg,
509 struct io_br_sel *sel)
510 {
511 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
512 bool bundle_finished = sel->val <= 0;
513 unsigned int cflags;
514
515 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
516 cflags = io_put_kbuf(req, sel->val, sel->buf_list);
517 goto finish;
518 }
519
520 cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val));
521
522 /*
523 * Don't start new bundles if the buffer list is empty, or if the
524 * current operation needed to go through polling to complete.
525 */
526 if (bundle_finished || req->flags & (REQ_F_BL_EMPTY | REQ_F_POLLED))
527 goto finish;
528
529 /*
530 * Fill CQE for this receive and see if we should keep trying to
531 * receive from this socket.
532 */
533 if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) {
534 io_mshot_prep_retry(req, kmsg);
535 return false;
536 }
537
538 /* Otherwise stop bundle and use the current result. */
539 finish:
540 io_req_set_res(req, sel->val, cflags);
541 sel->val = IOU_COMPLETE;
542 return true;
543 }
544
io_sendmsg(struct io_kiocb * req,unsigned int issue_flags)545 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
546 {
547 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
548 struct io_async_msghdr *kmsg = req->async_data;
549 struct socket *sock;
550 unsigned flags;
551 int min_ret = 0;
552 int ret;
553
554 sock = sock_from_file(req->file);
555 if (unlikely(!sock))
556 return -ENOTSOCK;
557
558 if (!(req->flags & REQ_F_POLLED) &&
559 (sr->flags & IORING_RECVSEND_POLL_FIRST))
560 return -EAGAIN;
561
562 flags = sr->msg_flags;
563 if (issue_flags & IO_URING_F_NONBLOCK)
564 flags |= MSG_DONTWAIT;
565 if (flags & MSG_WAITALL)
566 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
567
568 kmsg->msg.msg_control_user = sr->msg_control;
569
570 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
571
572 if (ret < min_ret) {
573 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
574 return -EAGAIN;
575 if (ret > 0 && io_net_retry(sock, flags)) {
576 kmsg->msg.msg_controllen = 0;
577 kmsg->msg.msg_control = NULL;
578 sr->done_io += ret;
579 return -EAGAIN;
580 }
581 if (ret == -ERESTARTSYS)
582 ret = -EINTR;
583 req_set_fail(req);
584 }
585 io_req_msg_cleanup(req, issue_flags);
586 if (ret >= 0)
587 ret += sr->done_io;
588 else if (sr->done_io)
589 ret = sr->done_io;
590 io_req_set_res(req, ret, 0);
591 return IOU_COMPLETE;
592 }
593
io_send_select_buffer(struct io_kiocb * req,unsigned int issue_flags,struct io_br_sel * sel,struct io_async_msghdr * kmsg)594 static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
595 struct io_br_sel *sel, struct io_async_msghdr *kmsg)
596 {
597 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
598 struct buf_sel_arg arg = {
599 .iovs = &kmsg->fast_iov,
600 .max_len = min_not_zero(sr->len, INT_MAX),
601 .nr_iovs = 1,
602 .buf_group = sr->buf_group,
603 };
604 int ret;
605
606 if (kmsg->vec.iovec) {
607 arg.nr_iovs = kmsg->vec.nr;
608 arg.iovs = kmsg->vec.iovec;
609 arg.mode = KBUF_MODE_FREE;
610 }
611
612 if (!(sr->flags & IORING_RECVSEND_BUNDLE))
613 arg.nr_iovs = 1;
614 else
615 arg.mode |= KBUF_MODE_EXPAND;
616
617 ret = io_buffers_select(req, &arg, sel, issue_flags);
618 if (unlikely(ret < 0))
619 return ret;
620
621 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
622 kmsg->vec.nr = ret;
623 kmsg->vec.iovec = arg.iovs;
624 req->flags |= REQ_F_NEED_CLEANUP;
625 }
626 sr->len = arg.out_len;
627
628 if (ret == 1) {
629 sr->buf = arg.iovs[0].iov_base;
630 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
631 &kmsg->msg.msg_iter);
632 if (unlikely(ret))
633 return ret;
634 } else {
635 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
636 arg.iovs, ret, arg.out_len);
637 }
638
639 return 0;
640 }
641
io_send(struct io_kiocb * req,unsigned int issue_flags)642 int io_send(struct io_kiocb *req, unsigned int issue_flags)
643 {
644 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
645 struct io_async_msghdr *kmsg = req->async_data;
646 struct io_br_sel sel = { };
647 struct socket *sock;
648 unsigned flags;
649 int min_ret = 0;
650 int ret;
651
652 sock = sock_from_file(req->file);
653 if (unlikely(!sock))
654 return -ENOTSOCK;
655
656 if (!(req->flags & REQ_F_POLLED) &&
657 (sr->flags & IORING_RECVSEND_POLL_FIRST))
658 return -EAGAIN;
659
660 flags = sr->msg_flags;
661 if (issue_flags & IO_URING_F_NONBLOCK)
662 flags |= MSG_DONTWAIT;
663
664 retry_bundle:
665 sel.buf_list = NULL;
666 if (io_do_buffer_select(req)) {
667 ret = io_send_select_buffer(req, issue_flags, &sel, kmsg);
668 if (ret)
669 return ret;
670 }
671
672 /*
673 * If MSG_WAITALL is set, or this is a bundle send, then we need
674 * the full amount. If just bundle is set, if we do a short send
675 * then we complete the bundle sequence rather than continue on.
676 */
677 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE)
678 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
679
680 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
681 kmsg->msg.msg_flags = flags;
682 ret = sock_sendmsg(sock, &kmsg->msg);
683 if (ret < min_ret) {
684 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
685 return -EAGAIN;
686
687 if (ret > 0 && io_net_retry(sock, flags)) {
688 sr->len -= ret;
689 sr->buf += ret;
690 sr->done_io += ret;
691 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
692 }
693 if (ret == -ERESTARTSYS)
694 ret = -EINTR;
695 req_set_fail(req);
696 }
697 if (ret >= 0)
698 ret += sr->done_io;
699 else if (sr->done_io)
700 ret = sr->done_io;
701
702 sel.val = ret;
703 if (!io_send_finish(req, kmsg, &sel))
704 goto retry_bundle;
705
706 io_req_msg_cleanup(req, issue_flags);
707 return sel.val;
708 }
709
io_recvmsg_mshot_prep(struct io_kiocb * req,struct io_async_msghdr * iomsg,int namelen,size_t controllen)710 static int io_recvmsg_mshot_prep(struct io_kiocb *req,
711 struct io_async_msghdr *iomsg,
712 int namelen, size_t controllen)
713 {
714 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
715 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
716 int hdr;
717
718 if (unlikely(namelen < 0))
719 return -EOVERFLOW;
720 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
721 namelen, &hdr))
722 return -EOVERFLOW;
723 if (check_add_overflow(hdr, controllen, &hdr))
724 return -EOVERFLOW;
725
726 iomsg->namelen = namelen;
727 iomsg->controllen = controllen;
728 return 0;
729 }
730
731 return 0;
732 }
733
io_recvmsg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg)734 static int io_recvmsg_copy_hdr(struct io_kiocb *req,
735 struct io_async_msghdr *iomsg)
736 {
737 struct user_msghdr msg;
738 int ret;
739
740 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr);
741 if (unlikely(ret))
742 return ret;
743
744 if (!(req->flags & REQ_F_BUFFER_SELECT)) {
745 ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen,
746 ITER_DEST);
747 if (unlikely(ret))
748 return ret;
749 }
750 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
751 msg.msg_controllen);
752 }
753
io_recvmsg_prep_setup(struct io_kiocb * req)754 static int io_recvmsg_prep_setup(struct io_kiocb *req)
755 {
756 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
757 struct io_async_msghdr *kmsg;
758
759 kmsg = io_msg_alloc_async(req);
760 if (unlikely(!kmsg))
761 return -ENOMEM;
762
763 if (req->opcode == IORING_OP_RECV) {
764 kmsg->msg.msg_name = NULL;
765 kmsg->msg.msg_namelen = 0;
766 kmsg->msg.msg_inq = 0;
767 kmsg->msg.msg_control = NULL;
768 kmsg->msg.msg_get_inq = 1;
769 kmsg->msg.msg_controllen = 0;
770 kmsg->msg.msg_iocb = NULL;
771 kmsg->msg.msg_ubuf = NULL;
772
773 if (req->flags & REQ_F_BUFFER_SELECT)
774 return 0;
775 return import_ubuf(ITER_DEST, sr->buf, sr->len,
776 &kmsg->msg.msg_iter);
777 }
778
779 return io_recvmsg_copy_hdr(req, kmsg);
780 }
781
782 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
783 IORING_RECVSEND_BUNDLE)
784
io_recvmsg_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)785 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
786 {
787 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
788
789 sr->done_io = 0;
790
791 if (unlikely(sqe->addr2))
792 return -EINVAL;
793
794 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
795 sr->len = READ_ONCE(sqe->len);
796 if (unlikely(sr->len < 0))
797 return -EINVAL;
798 sr->flags = READ_ONCE(sqe->ioprio);
799 if (sr->flags & ~RECVMSG_FLAGS)
800 return -EINVAL;
801 sr->msg_flags = READ_ONCE(sqe->msg_flags);
802 if (sr->msg_flags & MSG_DONTWAIT)
803 req->flags |= REQ_F_NOWAIT;
804 if (sr->msg_flags & MSG_ERRQUEUE)
805 req->flags |= REQ_F_CLEAR_POLLIN;
806 if (req->flags & REQ_F_BUFFER_SELECT)
807 sr->buf_group = req->buf_index;
808 sr->mshot_total_len = sr->mshot_len = 0;
809 if (sr->flags & IORING_RECV_MULTISHOT) {
810 if (!(req->flags & REQ_F_BUFFER_SELECT))
811 return -EINVAL;
812 if (sr->msg_flags & MSG_WAITALL)
813 return -EINVAL;
814 if (req->opcode == IORING_OP_RECV) {
815 sr->mshot_len = sr->len;
816 sr->mshot_total_len = READ_ONCE(sqe->optlen);
817 if (sr->mshot_total_len)
818 sr->flags |= IORING_RECV_MSHOT_LIM;
819 } else if (sqe->optlen) {
820 return -EINVAL;
821 }
822 req->flags |= REQ_F_APOLL_MULTISHOT;
823 } else if (sqe->optlen) {
824 return -EINVAL;
825 }
826
827 if (sr->flags & IORING_RECVSEND_BUNDLE) {
828 if (req->opcode == IORING_OP_RECVMSG)
829 return -EINVAL;
830 }
831
832 if (io_is_compat(req->ctx))
833 sr->msg_flags |= MSG_CMSG_COMPAT;
834
835 sr->nr_multishot_loops = 0;
836 return io_recvmsg_prep_setup(req);
837 }
838
839 /* bits to clear in old and inherit in new cflags on bundle retry */
840 #define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE)
841
842 /*
843 * Finishes io_recv and io_recvmsg.
844 *
845 * Returns true if it is actually finished, or false if it should run
846 * again (for multishot).
847 */
io_recv_finish(struct io_kiocb * req,struct io_async_msghdr * kmsg,struct io_br_sel * sel,bool mshot_finished,unsigned issue_flags)848 static inline bool io_recv_finish(struct io_kiocb *req,
849 struct io_async_msghdr *kmsg,
850 struct io_br_sel *sel, bool mshot_finished,
851 unsigned issue_flags)
852 {
853 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
854 unsigned int cflags = 0;
855
856 if (kmsg->msg.msg_inq > 0)
857 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
858
859 if (sel->val > 0 && sr->flags & IORING_RECV_MSHOT_LIM) {
860 /*
861 * If sr->len hits zero, the limit has been reached. Mark
862 * mshot as finished, and flag MSHOT_DONE as well to prevent
863 * a potential bundle from being retried.
864 */
865 sr->mshot_total_len -= min_t(int, sel->val, sr->mshot_total_len);
866 if (!sr->mshot_total_len) {
867 sr->flags |= IORING_RECV_MSHOT_DONE;
868 mshot_finished = true;
869 }
870 }
871
872 if (sr->flags & IORING_RECVSEND_BUNDLE) {
873 size_t this_ret = sel->val - sr->done_io;
874
875 cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret));
876 if (sr->flags & IORING_RECV_RETRY)
877 cflags = req->cqe.flags | (cflags & CQE_F_MASK);
878 if (sr->mshot_len && sel->val >= sr->mshot_len)
879 sr->flags |= IORING_RECV_MSHOT_CAP;
880 /* bundle with no more immediate buffers, we're done */
881 if (req->flags & REQ_F_BL_EMPTY)
882 goto finish;
883 /*
884 * If more is available AND it was a full transfer, retry and
885 * append to this one
886 */
887 if (!(sr->flags & IORING_RECV_NO_RETRY) &&
888 kmsg->msg.msg_inq > 1 && this_ret > 0 &&
889 !iov_iter_count(&kmsg->msg.msg_iter)) {
890 req->cqe.flags = cflags & ~CQE_F_MASK;
891 sr->len = kmsg->msg.msg_inq;
892 sr->done_io += this_ret;
893 sr->flags |= IORING_RECV_RETRY;
894 return false;
895 }
896 } else {
897 cflags |= io_put_kbuf(req, sel->val, sel->buf_list);
898 }
899
900 /*
901 * Fill CQE for this receive and see if we should keep trying to
902 * receive from this socket.
903 */
904 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
905 io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) {
906 sel->val = IOU_RETRY;
907 io_mshot_prep_retry(req, kmsg);
908 /* Known not-empty or unknown state, retry */
909 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
910 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY &&
911 !(sr->flags & IORING_RECV_MSHOT_CAP)) {
912 return false;
913 }
914 /* mshot retries exceeded, force a requeue */
915 sr->nr_multishot_loops = 0;
916 sr->flags &= ~IORING_RECV_MSHOT_CAP;
917 if (issue_flags & IO_URING_F_MULTISHOT)
918 sel->val = IOU_REQUEUE;
919 }
920 return true;
921 }
922
923 /* Finish the request / stop multishot. */
924 finish:
925 io_req_set_res(req, sel->val, cflags);
926 sel->val = IOU_COMPLETE;
927 io_req_msg_cleanup(req, issue_flags);
928 return true;
929 }
930
io_recvmsg_prep_multishot(struct io_async_msghdr * kmsg,struct io_sr_msg * sr,void __user ** buf,size_t * len)931 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
932 struct io_sr_msg *sr, void __user **buf,
933 size_t *len)
934 {
935 unsigned long ubuf = (unsigned long) *buf;
936 unsigned long hdr;
937
938 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
939 kmsg->controllen;
940 if (*len < hdr)
941 return -EFAULT;
942
943 if (kmsg->controllen) {
944 unsigned long control = ubuf + hdr - kmsg->controllen;
945
946 kmsg->msg.msg_control_user = (void __user *) control;
947 kmsg->msg.msg_controllen = kmsg->controllen;
948 }
949
950 sr->buf = *buf; /* stash for later copy */
951 *buf = (void __user *) (ubuf + hdr);
952 kmsg->payloadlen = *len = *len - hdr;
953 return 0;
954 }
955
956 struct io_recvmsg_multishot_hdr {
957 struct io_uring_recvmsg_out msg;
958 struct sockaddr_storage addr;
959 };
960
io_recvmsg_multishot(struct socket * sock,struct io_sr_msg * io,struct io_async_msghdr * kmsg,unsigned int flags,bool * finished)961 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
962 struct io_async_msghdr *kmsg,
963 unsigned int flags, bool *finished)
964 {
965 int err;
966 int copy_len;
967 struct io_recvmsg_multishot_hdr hdr;
968
969 if (kmsg->namelen)
970 kmsg->msg.msg_name = &hdr.addr;
971 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
972 kmsg->msg.msg_namelen = 0;
973
974 if (sock->file->f_flags & O_NONBLOCK)
975 flags |= MSG_DONTWAIT;
976
977 err = sock_recvmsg(sock, &kmsg->msg, flags);
978 *finished = err <= 0;
979 if (err < 0)
980 return err;
981
982 hdr.msg = (struct io_uring_recvmsg_out) {
983 .controllen = kmsg->controllen - kmsg->msg.msg_controllen,
984 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
985 };
986
987 hdr.msg.payloadlen = err;
988 if (err > kmsg->payloadlen)
989 err = kmsg->payloadlen;
990
991 copy_len = sizeof(struct io_uring_recvmsg_out);
992 if (kmsg->msg.msg_namelen > kmsg->namelen)
993 copy_len += kmsg->namelen;
994 else
995 copy_len += kmsg->msg.msg_namelen;
996
997 /*
998 * "fromlen shall refer to the value before truncation.."
999 * 1003.1g
1000 */
1001 hdr.msg.namelen = kmsg->msg.msg_namelen;
1002
1003 /* ensure that there is no gap between hdr and sockaddr_storage */
1004 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
1005 sizeof(struct io_uring_recvmsg_out));
1006 if (copy_to_user(io->buf, &hdr, copy_len)) {
1007 *finished = true;
1008 return -EFAULT;
1009 }
1010
1011 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
1012 kmsg->controllen + err;
1013 }
1014
io_recvmsg(struct io_kiocb * req,unsigned int issue_flags)1015 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
1016 {
1017 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1018 struct io_async_msghdr *kmsg = req->async_data;
1019 struct io_br_sel sel = { };
1020 struct socket *sock;
1021 unsigned flags;
1022 int ret, min_ret = 0;
1023 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1024 bool mshot_finished = true;
1025
1026 sock = sock_from_file(req->file);
1027 if (unlikely(!sock))
1028 return -ENOTSOCK;
1029
1030 if (!(req->flags & REQ_F_POLLED) &&
1031 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1032 return -EAGAIN;
1033
1034 flags = sr->msg_flags;
1035 if (force_nonblock)
1036 flags |= MSG_DONTWAIT;
1037
1038 retry_multishot:
1039 sel.buf_list = NULL;
1040 if (io_do_buffer_select(req)) {
1041 size_t len = sr->len;
1042
1043 sel = io_buffer_select(req, &len, sr->buf_group, issue_flags);
1044 if (!sel.addr)
1045 return -ENOBUFS;
1046
1047 if (req->flags & REQ_F_APOLL_MULTISHOT) {
1048 ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len);
1049 if (ret) {
1050 io_kbuf_recycle(req, sel.buf_list, issue_flags);
1051 return ret;
1052 }
1053 }
1054
1055 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, sel.addr, len);
1056 }
1057
1058 kmsg->msg.msg_get_inq = 1;
1059 kmsg->msg.msg_inq = -1;
1060 if (req->flags & REQ_F_APOLL_MULTISHOT) {
1061 ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
1062 &mshot_finished);
1063 } else {
1064 /* disable partial retry for recvmsg with cmsg attached */
1065 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
1066 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1067
1068 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
1069 kmsg->uaddr, flags);
1070 }
1071
1072 if (ret < min_ret) {
1073 if (ret == -EAGAIN && force_nonblock) {
1074 io_kbuf_recycle(req, sel.buf_list, issue_flags);
1075 return IOU_RETRY;
1076 }
1077 if (ret > 0 && io_net_retry(sock, flags)) {
1078 sr->done_io += ret;
1079 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
1080 }
1081 if (ret == -ERESTARTSYS)
1082 ret = -EINTR;
1083 req_set_fail(req);
1084 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1085 req_set_fail(req);
1086 }
1087
1088 if (ret > 0)
1089 ret += sr->done_io;
1090 else if (sr->done_io)
1091 ret = sr->done_io;
1092 else
1093 io_kbuf_recycle(req, sel.buf_list, issue_flags);
1094
1095 sel.val = ret;
1096 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
1097 goto retry_multishot;
1098
1099 return sel.val;
1100 }
1101
io_recv_buf_select(struct io_kiocb * req,struct io_async_msghdr * kmsg,struct io_br_sel * sel,unsigned int issue_flags)1102 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
1103 struct io_br_sel *sel, unsigned int issue_flags)
1104 {
1105 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1106 int ret;
1107
1108 /*
1109 * If the ring isn't locked, then don't use the peek interface
1110 * to grab multiple buffers as we will lock/unlock between
1111 * this selection and posting the buffers.
1112 */
1113 if (!(issue_flags & IO_URING_F_UNLOCKED) &&
1114 sr->flags & IORING_RECVSEND_BUNDLE) {
1115 struct buf_sel_arg arg = {
1116 .iovs = &kmsg->fast_iov,
1117 .nr_iovs = 1,
1118 .mode = KBUF_MODE_EXPAND,
1119 .buf_group = sr->buf_group,
1120 };
1121
1122 if (kmsg->vec.iovec) {
1123 arg.nr_iovs = kmsg->vec.nr;
1124 arg.iovs = kmsg->vec.iovec;
1125 arg.mode |= KBUF_MODE_FREE;
1126 }
1127
1128 if (sel->val)
1129 arg.max_len = sel->val;
1130 else if (kmsg->msg.msg_inq > 1)
1131 arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq);
1132
1133 /* if mshot limited, ensure we don't go over */
1134 if (sr->flags & IORING_RECV_MSHOT_LIM)
1135 arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len);
1136 ret = io_buffers_peek(req, &arg, sel);
1137 if (unlikely(ret < 0))
1138 return ret;
1139
1140 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
1141 kmsg->vec.nr = ret;
1142 kmsg->vec.iovec = arg.iovs;
1143 req->flags |= REQ_F_NEED_CLEANUP;
1144 }
1145 if (arg.partial_map)
1146 sr->flags |= IORING_RECV_PARTIAL_MAP;
1147
1148 /* special case 1 vec, can be a fast path */
1149 if (ret == 1) {
1150 sr->buf = arg.iovs[0].iov_base;
1151 sr->len = arg.iovs[0].iov_len;
1152 goto map_ubuf;
1153 }
1154 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
1155 arg.out_len);
1156 } else {
1157 size_t len = sel->val;
1158
1159 *sel = io_buffer_select(req, &len, sr->buf_group, issue_flags);
1160 if (!sel->addr)
1161 return -ENOBUFS;
1162 sr->buf = sel->addr;
1163 sr->len = len;
1164 map_ubuf:
1165 ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
1166 &kmsg->msg.msg_iter);
1167 if (unlikely(ret))
1168 return ret;
1169 }
1170
1171 return 0;
1172 }
1173
io_recv(struct io_kiocb * req,unsigned int issue_flags)1174 int io_recv(struct io_kiocb *req, unsigned int issue_flags)
1175 {
1176 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1177 struct io_async_msghdr *kmsg = req->async_data;
1178 struct io_br_sel sel;
1179 struct socket *sock;
1180 unsigned flags;
1181 int ret, min_ret = 0;
1182 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1183 bool mshot_finished;
1184
1185 if (!(req->flags & REQ_F_POLLED) &&
1186 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1187 return -EAGAIN;
1188
1189 sock = sock_from_file(req->file);
1190 if (unlikely(!sock))
1191 return -ENOTSOCK;
1192
1193 flags = sr->msg_flags;
1194 if (force_nonblock)
1195 flags |= MSG_DONTWAIT;
1196
1197 retry_multishot:
1198 sel.buf_list = NULL;
1199 if (io_do_buffer_select(req)) {
1200 sel.val = sr->len;
1201 ret = io_recv_buf_select(req, kmsg, &sel, issue_flags);
1202 if (unlikely(ret < 0)) {
1203 kmsg->msg.msg_inq = -1;
1204 goto out_free;
1205 }
1206 sr->buf = NULL;
1207 }
1208
1209 kmsg->msg.msg_flags = 0;
1210 kmsg->msg.msg_inq = -1;
1211
1212 if (flags & MSG_WAITALL)
1213 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1214
1215 ret = sock_recvmsg(sock, &kmsg->msg, flags);
1216 if (ret < min_ret) {
1217 if (ret == -EAGAIN && force_nonblock) {
1218 io_kbuf_recycle(req, sel.buf_list, issue_flags);
1219 return IOU_RETRY;
1220 }
1221 if (ret > 0 && io_net_retry(sock, flags)) {
1222 sr->len -= ret;
1223 sr->buf += ret;
1224 sr->done_io += ret;
1225 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
1226 }
1227 if (ret == -ERESTARTSYS)
1228 ret = -EINTR;
1229 req_set_fail(req);
1230 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1231 out_free:
1232 req_set_fail(req);
1233 }
1234
1235 mshot_finished = ret <= 0;
1236 if (ret > 0)
1237 ret += sr->done_io;
1238 else if (sr->done_io)
1239 ret = sr->done_io;
1240 else
1241 io_kbuf_recycle(req, sel.buf_list, issue_flags);
1242
1243 sel.val = ret;
1244 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
1245 goto retry_multishot;
1246
1247 return sel.val;
1248 }
1249
io_recvzc_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1250 int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1251 {
1252 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
1253 unsigned ifq_idx;
1254
1255 if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3))
1256 return -EINVAL;
1257
1258 ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx);
1259 zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx);
1260 if (!zc->ifq)
1261 return -EINVAL;
1262
1263 zc->len = READ_ONCE(sqe->len);
1264 zc->flags = READ_ONCE(sqe->ioprio);
1265 if (READ_ONCE(sqe->msg_flags))
1266 return -EINVAL;
1267 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT))
1268 return -EINVAL;
1269 /* multishot required */
1270 if (!(zc->flags & IORING_RECV_MULTISHOT))
1271 return -EINVAL;
1272 /* All data completions are posted as aux CQEs. */
1273 req->flags |= REQ_F_APOLL_MULTISHOT;
1274
1275 return 0;
1276 }
1277
io_recvzc(struct io_kiocb * req,unsigned int issue_flags)1278 int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
1279 {
1280 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
1281 struct socket *sock;
1282 unsigned int len;
1283 int ret;
1284
1285 if (!(req->flags & REQ_F_POLLED) &&
1286 (zc->flags & IORING_RECVSEND_POLL_FIRST))
1287 return -EAGAIN;
1288
1289 sock = sock_from_file(req->file);
1290 if (unlikely(!sock))
1291 return -ENOTSOCK;
1292
1293 len = zc->len;
1294 ret = io_zcrx_recv(req, zc->ifq, sock, 0, issue_flags, &zc->len);
1295 if (len && zc->len == 0) {
1296 io_req_set_res(req, 0, 0);
1297
1298 return IOU_COMPLETE;
1299 }
1300 if (unlikely(ret <= 0) && ret != -EAGAIN) {
1301 if (ret == -ERESTARTSYS)
1302 ret = -EINTR;
1303 if (ret == IOU_REQUEUE)
1304 return IOU_REQUEUE;
1305
1306 req_set_fail(req);
1307 io_req_set_res(req, ret, 0);
1308 return IOU_COMPLETE;
1309 }
1310 return IOU_RETRY;
1311 }
1312
io_send_zc_cleanup(struct io_kiocb * req)1313 void io_send_zc_cleanup(struct io_kiocb *req)
1314 {
1315 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1316 struct io_async_msghdr *io = req->async_data;
1317
1318 if (req_has_async_data(req))
1319 io_netmsg_iovec_free(io);
1320 if (zc->notif) {
1321 io_notif_flush(zc->notif);
1322 zc->notif = NULL;
1323 }
1324 }
1325
1326 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1327 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \
1328 IORING_SEND_VECTORIZED)
1329
io_send_zc_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1330 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1331 {
1332 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1333 struct io_ring_ctx *ctx = req->ctx;
1334 struct io_async_msghdr *iomsg;
1335 struct io_kiocb *notif;
1336 int ret;
1337
1338 zc->done_io = 0;
1339
1340 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1341 return -EINVAL;
1342 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1343 if (req->flags & REQ_F_CQE_SKIP)
1344 return -EINVAL;
1345
1346 notif = zc->notif = io_alloc_notif(ctx);
1347 if (!notif)
1348 return -ENOMEM;
1349 notif->cqe.user_data = req->cqe.user_data;
1350 notif->cqe.res = 0;
1351 notif->cqe.flags = IORING_CQE_F_NOTIF;
1352 req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY;
1353
1354 zc->flags = READ_ONCE(sqe->ioprio);
1355 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1356 if (zc->flags & ~IO_ZC_FLAGS_VALID)
1357 return -EINVAL;
1358 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1359 struct io_notif_data *nd = io_notif_to_data(notif);
1360
1361 nd->zc_report = true;
1362 nd->zc_used = false;
1363 nd->zc_copied = false;
1364 }
1365 }
1366
1367 zc->len = READ_ONCE(sqe->len);
1368 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
1369 req->buf_index = READ_ONCE(sqe->buf_index);
1370 if (zc->msg_flags & MSG_DONTWAIT)
1371 req->flags |= REQ_F_NOWAIT;
1372
1373 if (io_is_compat(req->ctx))
1374 zc->msg_flags |= MSG_CMSG_COMPAT;
1375
1376 iomsg = io_msg_alloc_async(req);
1377 if (unlikely(!iomsg))
1378 return -ENOMEM;
1379
1380 if (req->opcode == IORING_OP_SEND_ZC) {
1381 ret = io_send_setup(req, sqe);
1382 } else {
1383 if (unlikely(sqe->addr2 || sqe->file_index))
1384 return -EINVAL;
1385 ret = io_sendmsg_setup(req, sqe);
1386 }
1387 if (unlikely(ret))
1388 return ret;
1389
1390 if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) {
1391 iomsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1392 return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count);
1393 }
1394 iomsg->msg.sg_from_iter = io_sg_from_iter;
1395 return 0;
1396 }
1397
io_sg_from_iter_iovec(struct sk_buff * skb,struct iov_iter * from,size_t length)1398 static int io_sg_from_iter_iovec(struct sk_buff *skb,
1399 struct iov_iter *from, size_t length)
1400 {
1401 skb_zcopy_downgrade_managed(skb);
1402 return zerocopy_fill_skb_from_iter(skb, from, length);
1403 }
1404
io_sg_from_iter(struct sk_buff * skb,struct iov_iter * from,size_t length)1405 static int io_sg_from_iter(struct sk_buff *skb,
1406 struct iov_iter *from, size_t length)
1407 {
1408 struct skb_shared_info *shinfo = skb_shinfo(skb);
1409 int frag = shinfo->nr_frags;
1410 int ret = 0;
1411 struct bvec_iter bi;
1412 ssize_t copied = 0;
1413 unsigned long truesize = 0;
1414
1415 if (!frag)
1416 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1417 else if (unlikely(!skb_zcopy_managed(skb)))
1418 return zerocopy_fill_skb_from_iter(skb, from, length);
1419
1420 bi.bi_size = min(from->count, length);
1421 bi.bi_bvec_done = from->iov_offset;
1422 bi.bi_idx = 0;
1423
1424 while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1425 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1426
1427 copied += v.bv_len;
1428 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1429 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
1430 v.bv_offset, v.bv_len);
1431 bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
1432 }
1433 if (bi.bi_size)
1434 ret = -EMSGSIZE;
1435
1436 shinfo->nr_frags = frag;
1437 from->bvec += bi.bi_idx;
1438 from->nr_segs -= bi.bi_idx;
1439 from->count -= copied;
1440 from->iov_offset = bi.bi_bvec_done;
1441
1442 skb->data_len += copied;
1443 skb->len += copied;
1444 skb->truesize += truesize;
1445 return ret;
1446 }
1447
io_send_zc_import(struct io_kiocb * req,unsigned int issue_flags)1448 static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
1449 {
1450 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1451 struct io_async_msghdr *kmsg = req->async_data;
1452
1453 WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF));
1454
1455 sr->notif->buf_index = req->buf_index;
1456 return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter,
1457 (u64)(uintptr_t)sr->buf, sr->len,
1458 ITER_SOURCE, issue_flags);
1459 }
1460
io_send_zc(struct io_kiocb * req,unsigned int issue_flags)1461 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
1462 {
1463 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1464 struct io_async_msghdr *kmsg = req->async_data;
1465 struct socket *sock;
1466 unsigned msg_flags;
1467 int ret, min_ret = 0;
1468
1469 sock = sock_from_file(req->file);
1470 if (unlikely(!sock))
1471 return -ENOTSOCK;
1472 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1473 return -EOPNOTSUPP;
1474
1475 if (!(req->flags & REQ_F_POLLED) &&
1476 (zc->flags & IORING_RECVSEND_POLL_FIRST))
1477 return -EAGAIN;
1478
1479 if (req->flags & REQ_F_IMPORT_BUFFER) {
1480 req->flags &= ~REQ_F_IMPORT_BUFFER;
1481 ret = io_send_zc_import(req, issue_flags);
1482 if (unlikely(ret))
1483 return ret;
1484 }
1485
1486 msg_flags = zc->msg_flags;
1487 if (issue_flags & IO_URING_F_NONBLOCK)
1488 msg_flags |= MSG_DONTWAIT;
1489 if (msg_flags & MSG_WAITALL)
1490 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1491 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1492
1493 kmsg->msg.msg_flags = msg_flags;
1494 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
1495 ret = sock_sendmsg(sock, &kmsg->msg);
1496
1497 if (unlikely(ret < min_ret)) {
1498 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1499 return -EAGAIN;
1500
1501 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) {
1502 zc->done_io += ret;
1503 return -EAGAIN;
1504 }
1505 if (ret == -ERESTARTSYS)
1506 ret = -EINTR;
1507 req_set_fail(req);
1508 }
1509
1510 if (ret >= 0)
1511 ret += zc->done_io;
1512 else if (zc->done_io)
1513 ret = zc->done_io;
1514
1515 /*
1516 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1517 * flushing notif to io_send_zc_cleanup()
1518 */
1519 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1520 io_notif_flush(zc->notif);
1521 zc->notif = NULL;
1522 io_req_msg_cleanup(req, 0);
1523 }
1524 io_req_set_res(req, ret, IORING_CQE_F_MORE);
1525 return IOU_COMPLETE;
1526 }
1527
io_sendmsg_zc(struct io_kiocb * req,unsigned int issue_flags)1528 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1529 {
1530 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1531 struct io_async_msghdr *kmsg = req->async_data;
1532 struct socket *sock;
1533 unsigned flags;
1534 int ret, min_ret = 0;
1535
1536 if (req->flags & REQ_F_IMPORT_BUFFER) {
1537 unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs;
1538 int ret;
1539
1540 sr->notif->buf_index = req->buf_index;
1541 ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter,
1542 sr->notif, &kmsg->vec, uvec_segs,
1543 issue_flags);
1544 if (unlikely(ret))
1545 return ret;
1546 req->flags &= ~REQ_F_IMPORT_BUFFER;
1547 }
1548
1549 sock = sock_from_file(req->file);
1550 if (unlikely(!sock))
1551 return -ENOTSOCK;
1552 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1553 return -EOPNOTSUPP;
1554
1555 if (!(req->flags & REQ_F_POLLED) &&
1556 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1557 return -EAGAIN;
1558
1559 flags = sr->msg_flags;
1560 if (issue_flags & IO_URING_F_NONBLOCK)
1561 flags |= MSG_DONTWAIT;
1562 if (flags & MSG_WAITALL)
1563 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1564
1565 kmsg->msg.msg_control_user = sr->msg_control;
1566 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1567 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1568
1569 if (unlikely(ret < min_ret)) {
1570 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1571 return -EAGAIN;
1572
1573 if (ret > 0 && io_net_retry(sock, flags)) {
1574 sr->done_io += ret;
1575 return -EAGAIN;
1576 }
1577 if (ret == -ERESTARTSYS)
1578 ret = -EINTR;
1579 req_set_fail(req);
1580 }
1581
1582 if (ret >= 0)
1583 ret += sr->done_io;
1584 else if (sr->done_io)
1585 ret = sr->done_io;
1586
1587 /*
1588 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1589 * flushing notif to io_send_zc_cleanup()
1590 */
1591 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1592 io_notif_flush(sr->notif);
1593 sr->notif = NULL;
1594 io_req_msg_cleanup(req, 0);
1595 }
1596 io_req_set_res(req, ret, IORING_CQE_F_MORE);
1597 return IOU_COMPLETE;
1598 }
1599
io_sendrecv_fail(struct io_kiocb * req)1600 void io_sendrecv_fail(struct io_kiocb *req)
1601 {
1602 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1603
1604 if (sr->done_io)
1605 req->cqe.res = sr->done_io;
1606
1607 if ((req->flags & REQ_F_NEED_CLEANUP) &&
1608 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1609 req->cqe.flags |= IORING_CQE_F_MORE;
1610 }
1611
1612 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \
1613 IORING_ACCEPT_POLL_FIRST)
1614
io_accept_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1615 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1616 {
1617 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1618
1619 if (sqe->len || sqe->buf_index)
1620 return -EINVAL;
1621
1622 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1623 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1624 accept->flags = READ_ONCE(sqe->accept_flags);
1625 accept->nofile = rlimit(RLIMIT_NOFILE);
1626 accept->iou_flags = READ_ONCE(sqe->ioprio);
1627 if (accept->iou_flags & ~ACCEPT_FLAGS)
1628 return -EINVAL;
1629
1630 accept->file_slot = READ_ONCE(sqe->file_index);
1631 if (accept->file_slot) {
1632 if (accept->flags & SOCK_CLOEXEC)
1633 return -EINVAL;
1634 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT &&
1635 accept->file_slot != IORING_FILE_INDEX_ALLOC)
1636 return -EINVAL;
1637 }
1638 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1639 return -EINVAL;
1640 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1641 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1642 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT)
1643 req->flags |= REQ_F_APOLL_MULTISHOT;
1644 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT)
1645 req->flags |= REQ_F_NOWAIT;
1646 return 0;
1647 }
1648
io_accept(struct io_kiocb * req,unsigned int issue_flags)1649 int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1650 {
1651 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1652 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1653 bool fixed = !!accept->file_slot;
1654 struct proto_accept_arg arg = {
1655 .flags = force_nonblock ? O_NONBLOCK : 0,
1656 };
1657 struct file *file;
1658 unsigned cflags;
1659 int ret, fd;
1660
1661 if (!(req->flags & REQ_F_POLLED) &&
1662 accept->iou_flags & IORING_ACCEPT_POLL_FIRST)
1663 return -EAGAIN;
1664
1665 retry:
1666 if (!fixed) {
1667 fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1668 if (unlikely(fd < 0))
1669 return fd;
1670 }
1671 arg.err = 0;
1672 arg.is_empty = -1;
1673 file = do_accept(req->file, &arg, accept->addr, accept->addr_len,
1674 accept->flags);
1675 if (IS_ERR(file)) {
1676 if (!fixed)
1677 put_unused_fd(fd);
1678 ret = PTR_ERR(file);
1679 if (ret == -EAGAIN && force_nonblock &&
1680 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT))
1681 return IOU_RETRY;
1682
1683 if (ret == -ERESTARTSYS)
1684 ret = -EINTR;
1685 } else if (!fixed) {
1686 fd_install(fd, file);
1687 ret = fd;
1688 } else {
1689 ret = io_fixed_fd_install(req, issue_flags, file,
1690 accept->file_slot);
1691 }
1692
1693 cflags = 0;
1694 if (!arg.is_empty)
1695 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
1696
1697 if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) &&
1698 io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
1699 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
1700 goto retry;
1701 return IOU_RETRY;
1702 }
1703
1704 io_req_set_res(req, ret, cflags);
1705 if (ret < 0)
1706 req_set_fail(req);
1707 return IOU_COMPLETE;
1708 }
1709
io_socket_bpf_populate(struct io_uring_bpf_ctx * bctx,struct io_kiocb * req)1710 void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req)
1711 {
1712 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1713
1714 bctx->socket.family = sock->domain;
1715 bctx->socket.type = sock->type;
1716 bctx->socket.protocol = sock->protocol;
1717 }
1718
io_socket_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1719 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1720 {
1721 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1722
1723 if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1724 return -EINVAL;
1725
1726 sock->domain = READ_ONCE(sqe->fd);
1727 sock->type = READ_ONCE(sqe->off);
1728 sock->protocol = READ_ONCE(sqe->len);
1729 sock->file_slot = READ_ONCE(sqe->file_index);
1730 sock->nofile = rlimit(RLIMIT_NOFILE);
1731
1732 sock->flags = sock->type & ~SOCK_TYPE_MASK;
1733 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1734 return -EINVAL;
1735 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1736 return -EINVAL;
1737 return 0;
1738 }
1739
io_socket(struct io_kiocb * req,unsigned int issue_flags)1740 int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1741 {
1742 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1743 bool fixed = !!sock->file_slot;
1744 struct file *file;
1745 int ret, fd;
1746
1747 if (!fixed) {
1748 fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1749 if (unlikely(fd < 0))
1750 return fd;
1751 }
1752 file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1753 if (IS_ERR(file)) {
1754 if (!fixed)
1755 put_unused_fd(fd);
1756 ret = PTR_ERR(file);
1757 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1758 return -EAGAIN;
1759 if (ret == -ERESTARTSYS)
1760 ret = -EINTR;
1761 req_set_fail(req);
1762 } else if (!fixed) {
1763 fd_install(fd, file);
1764 ret = fd;
1765 } else {
1766 ret = io_fixed_fd_install(req, issue_flags, file,
1767 sock->file_slot);
1768 }
1769 io_req_set_res(req, ret, 0);
1770 return IOU_COMPLETE;
1771 }
1772
io_connect_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1773 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1774 {
1775 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1776 struct io_async_msghdr *io;
1777
1778 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1779 return -EINVAL;
1780
1781 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1782 conn->addr_len = READ_ONCE(sqe->addr2);
1783 conn->in_progress = conn->seen_econnaborted = false;
1784
1785 io = io_msg_alloc_async(req);
1786 if (unlikely(!io))
1787 return -ENOMEM;
1788
1789 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr);
1790 }
1791
io_connect(struct io_kiocb * req,unsigned int issue_flags)1792 int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1793 {
1794 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1795 struct io_async_msghdr *io = req->async_data;
1796 unsigned file_flags;
1797 int ret;
1798 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1799
1800 if (connect->in_progress) {
1801 struct poll_table_struct pt = { ._key = EPOLLERR };
1802
1803 if (vfs_poll(req->file, &pt) & EPOLLERR)
1804 goto get_sock_err;
1805 }
1806
1807 file_flags = force_nonblock ? O_NONBLOCK : 0;
1808
1809 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len,
1810 file_flags);
1811 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1812 && force_nonblock) {
1813 if (ret == -EINPROGRESS) {
1814 connect->in_progress = true;
1815 } else if (ret == -ECONNABORTED) {
1816 if (connect->seen_econnaborted)
1817 goto out;
1818 connect->seen_econnaborted = true;
1819 }
1820 return -EAGAIN;
1821 }
1822 if (connect->in_progress) {
1823 /*
1824 * At least bluetooth will return -EBADFD on a re-connect
1825 * attempt, and it's (supposedly) also valid to get -EISCONN
1826 * which means the previous result is good. For both of these,
1827 * grab the sock_error() and use that for the completion.
1828 */
1829 if (ret == -EBADFD || ret == -EISCONN) {
1830 get_sock_err:
1831 ret = sock_error(sock_from_file(req->file)->sk);
1832 }
1833 }
1834 if (ret == -ERESTARTSYS)
1835 ret = -EINTR;
1836 out:
1837 if (ret < 0)
1838 req_set_fail(req);
1839 io_req_msg_cleanup(req, issue_flags);
1840 io_req_set_res(req, ret, 0);
1841 return IOU_COMPLETE;
1842 }
1843
io_bind_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1844 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1845 {
1846 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1847 struct sockaddr __user *uaddr;
1848 struct io_async_msghdr *io;
1849
1850 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1851 return -EINVAL;
1852
1853 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1854 bind->addr_len = READ_ONCE(sqe->addr2);
1855
1856 io = io_msg_alloc_async(req);
1857 if (unlikely(!io))
1858 return -ENOMEM;
1859 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
1860 }
1861
io_bind(struct io_kiocb * req,unsigned int issue_flags)1862 int io_bind(struct io_kiocb *req, unsigned int issue_flags)
1863 {
1864 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1865 struct io_async_msghdr *io = req->async_data;
1866 struct socket *sock;
1867 int ret;
1868
1869 sock = sock_from_file(req->file);
1870 if (unlikely(!sock))
1871 return -ENOTSOCK;
1872
1873 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len);
1874 if (ret < 0)
1875 req_set_fail(req);
1876 io_req_set_res(req, ret, 0);
1877 return 0;
1878 }
1879
io_listen_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1880 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1881 {
1882 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1883
1884 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2)
1885 return -EINVAL;
1886
1887 listen->backlog = READ_ONCE(sqe->len);
1888 return 0;
1889 }
1890
io_listen(struct io_kiocb * req,unsigned int issue_flags)1891 int io_listen(struct io_kiocb *req, unsigned int issue_flags)
1892 {
1893 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1894 struct socket *sock;
1895 int ret;
1896
1897 sock = sock_from_file(req->file);
1898 if (unlikely(!sock))
1899 return -ENOTSOCK;
1900
1901 ret = __sys_listen_socket(sock, listen->backlog);
1902 if (ret < 0)
1903 req_set_fail(req);
1904 io_req_set_res(req, ret, 0);
1905 return 0;
1906 }
1907
io_netmsg_cache_free(const void * entry)1908 void io_netmsg_cache_free(const void *entry)
1909 {
1910 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
1911
1912 io_vec_free(&kmsg->vec);
1913 kfree(kmsg);
1914 }
1915